From b2e2b4cce7f913957317b5df3eddf930ac8407f8 Mon Sep 17 00:00:00 2001 From: liyang830 Date: Mon, 18 Apr 2022 19:58:57 +0800 Subject: [PATCH 001/230] fix attach table dictionaries function name normalizer --- src/Databases/DatabaseOrdinary.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp index b5557d9a08d..baf93182a57 100644 --- a/src/Databases/DatabaseOrdinary.cpp +++ b/src/Databases/DatabaseOrdinary.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -106,6 +107,7 @@ void DatabaseOrdinary::loadStoredObjects( const auto & name = name_with_path_and_query.first; const auto & path = name_with_path_and_query.second.path; const auto & ast = name_with_path_and_query.second.ast; + FunctionNameNormalizer().visit(ast.get()); const auto & create_query = ast->as(); if (create_query.is_dictionary) @@ -128,6 +130,7 @@ void DatabaseOrdinary::loadStoredObjects( const auto & name = name_with_path_and_query.first; const auto & path = name_with_path_and_query.second.path; const auto & ast = name_with_path_and_query.second.ast; + FunctionNameNormalizer().visit(ast.get()); const auto & create_query = ast->as(); if (!create_query.is_dictionary) @@ -167,6 +170,7 @@ void DatabaseOrdinary::loadTablesMetadata(ContextPtr local_context, ParsedTables auto ast = parseQueryFromMetadata(log, getContext(), full_path.string(), /*throw_on_error*/ true, /*remove_empty*/ false); if (ast) { + FunctionNameNormalizer().visit(ast.get()); auto * create_query = ast->as(); create_query->setDatabase(database_name); @@ -220,6 +224,7 @@ void DatabaseOrdinary::loadTablesMetadata(ContextPtr local_context, ParsedTables void DatabaseOrdinary::loadTableFromMetadata(ContextMutablePtr local_context, const String & file_path, const QualifiedTableName & name, const ASTPtr & ast, bool force_restore) { assert(name.database == database_name); + FunctionNameNormalizer().visit(ast.get()); const auto & create_query = ast->as(); tryAttachTable( From f091c8d1d8ff0577e60bf1aed0d3f97d30cdb35f Mon Sep 17 00:00:00 2001 From: liyang830 Date: Fri, 17 Jun 2022 16:42:05 +0800 Subject: [PATCH 002/230] fix: attach table normalizer, add test --- src/Databases/DatabaseOrdinary.cpp | 3 -- src/Interpreters/InterpreterCreateQuery.cpp | 1 + .../test_attach_table_normalizer/__init__.py | 0 .../configs/config.xml | 4 ++ .../test_attach_table_normalizer/test.py | 43 +++++++++++++++++++ 5 files changed, 48 insertions(+), 3 deletions(-) create mode 100644 tests/integration/test_attach_table_normalizer/__init__.py create mode 100644 tests/integration/test_attach_table_normalizer/configs/config.xml create mode 100644 tests/integration/test_attach_table_normalizer/test.py diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp index baf93182a57..5708ff50323 100644 --- a/src/Databases/DatabaseOrdinary.cpp +++ b/src/Databases/DatabaseOrdinary.cpp @@ -107,7 +107,6 @@ void DatabaseOrdinary::loadStoredObjects( const auto & name = name_with_path_and_query.first; const auto & path = name_with_path_and_query.second.path; const auto & ast = name_with_path_and_query.second.ast; - FunctionNameNormalizer().visit(ast.get()); const auto & create_query = ast->as(); if (create_query.is_dictionary) @@ -170,7 +169,6 @@ void DatabaseOrdinary::loadTablesMetadata(ContextPtr local_context, ParsedTables auto ast = parseQueryFromMetadata(log, getContext(), full_path.string(), /*throw_on_error*/ true, /*remove_empty*/ false); if (ast) { - FunctionNameNormalizer().visit(ast.get()); auto * create_query = ast->as(); create_query->setDatabase(database_name); @@ -224,7 +222,6 @@ void DatabaseOrdinary::loadTablesMetadata(ContextPtr local_context, ParsedTables void DatabaseOrdinary::loadTableFromMetadata(ContextMutablePtr local_context, const String & file_path, const QualifiedTableName & name, const ASTPtr & ast, bool force_restore) { assert(name.database == database_name); - FunctionNameNormalizer().visit(ast.get()); const auto & create_query = ast->as(); tryAttachTable( diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index ed996430996..7eb293b1813 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -953,6 +953,7 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) // Table SQL definition is available even if the table is detached (even permanently) auto query = database->getCreateTableQuery(create.getTable(), getContext()); + FunctionNameNormalizer().visit(query.get()); auto create_query = query->as(); if (!create.is_dictionary && create_query.is_dictionary) diff --git a/tests/integration/test_attach_table_normalizer/__init__.py b/tests/integration/test_attach_table_normalizer/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_attach_table_normalizer/configs/config.xml b/tests/integration/test_attach_table_normalizer/configs/config.xml new file mode 100644 index 00000000000..0500e2ad554 --- /dev/null +++ b/tests/integration/test_attach_table_normalizer/configs/config.xml @@ -0,0 +1,4 @@ + + 1 + 1 + diff --git a/tests/integration/test_attach_table_normalizer/test.py b/tests/integration/test_attach_table_normalizer/test.py new file mode 100644 index 00000000000..3e86d567c5b --- /dev/null +++ b/tests/integration/test_attach_table_normalizer/test.py @@ -0,0 +1,43 @@ +import pytest + +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance('node', main_configs=["configs/config.xml"], with_zookeeper=True) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + +def replace_substring_to_substr(node): + node.exec_in_container(["bash", "-c", "sed -i 's/substring/substr/g' /var/lib/clickhouse/metadata/default/file.sql"], user="root") + +@pytest.mark.parametrize("engine", ['Ordinary', 'Atomic']) +def test_attach_substr(started_cluster, engine): + # Initialize + node.query("CREATE TABLE default.file(`s` String, `n` UInt8) ENGINE = MergeTree PARTITION BY substring(s, 1, 2) ORDER BY n ") + + # Detach table file + node.query("DETACH TABLE file") + + # Replace subtring to substr + replace_substring_to_substr(node) + + # Attach table file + node.query("ATTACH TABLE file") + +@pytest.mark.parametrize("engine", ['Ordinary', 'Atomic']) +def test_attach_substr(started_cluster, engine): + # Initialize + node.query("CREATE TABLE default.file(`s` String, `n` UInt8) ENGINE = MergeTree PARTITION BY substring(s, 1, 2) ORDER BY n ") + + # Replace subtring to substr + replace_substring_to_substr(node) + + # Restart clickhouse + node.restart_clickhouse(kill=True) From c7a85d565cb17c068528bdbf38a74d0ab29a1450 Mon Sep 17 00:00:00 2001 From: liyang830 Date: Fri, 17 Jun 2022 17:51:33 +0800 Subject: [PATCH 003/230] fix: rename restart test --- tests/integration/test_attach_table_normalizer/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_attach_table_normalizer/test.py b/tests/integration/test_attach_table_normalizer/test.py index 3e86d567c5b..5a31801b99c 100644 --- a/tests/integration/test_attach_table_normalizer/test.py +++ b/tests/integration/test_attach_table_normalizer/test.py @@ -32,7 +32,7 @@ def test_attach_substr(started_cluster, engine): node.query("ATTACH TABLE file") @pytest.mark.parametrize("engine", ['Ordinary', 'Atomic']) -def test_attach_substr(started_cluster, engine): +def test_attach_substr_restart(started_cluster, engine): # Initialize node.query("CREATE TABLE default.file(`s` String, `n` UInt8) ENGINE = MergeTree PARTITION BY substring(s, 1, 2) ORDER BY n ") From 701c687e7933f77ad51e91fa8bf1ef6ff2282f8d Mon Sep 17 00:00:00 2001 From: liyang830 Date: Sat, 18 Jun 2022 17:13:50 +0800 Subject: [PATCH 004/230] fix : test error --- src/Databases/DatabaseOrdinary.cpp | 2 +- tests/integration/test_attach_table_normalizer/test.py | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp index 5708ff50323..1477014a869 100644 --- a/src/Databases/DatabaseOrdinary.cpp +++ b/src/Databases/DatabaseOrdinary.cpp @@ -129,7 +129,6 @@ void DatabaseOrdinary::loadStoredObjects( const auto & name = name_with_path_and_query.first; const auto & path = name_with_path_and_query.second.path; const auto & ast = name_with_path_and_query.second.ast; - FunctionNameNormalizer().visit(ast.get()); const auto & create_query = ast->as(); if (!create_query.is_dictionary) @@ -169,6 +168,7 @@ void DatabaseOrdinary::loadTablesMetadata(ContextPtr local_context, ParsedTables auto ast = parseQueryFromMetadata(log, getContext(), full_path.string(), /*throw_on_error*/ true, /*remove_empty*/ false); if (ast) { + FunctionNameNormalizer().visit(ast.get()); auto * create_query = ast->as(); create_query->setDatabase(database_name); diff --git a/tests/integration/test_attach_table_normalizer/test.py b/tests/integration/test_attach_table_normalizer/test.py index 5a31801b99c..80c4b99dfcc 100644 --- a/tests/integration/test_attach_table_normalizer/test.py +++ b/tests/integration/test_attach_table_normalizer/test.py @@ -20,12 +20,13 @@ def replace_substring_to_substr(node): @pytest.mark.parametrize("engine", ['Ordinary', 'Atomic']) def test_attach_substr(started_cluster, engine): # Initialize + node.query("DROP TABLE IF EXISTS default.file") node.query("CREATE TABLE default.file(`s` String, `n` UInt8) ENGINE = MergeTree PARTITION BY substring(s, 1, 2) ORDER BY n ") # Detach table file node.query("DETACH TABLE file") - # Replace subtring to substr + # Replace substring to substr replace_substring_to_substr(node) # Attach table file @@ -34,9 +35,10 @@ def test_attach_substr(started_cluster, engine): @pytest.mark.parametrize("engine", ['Ordinary', 'Atomic']) def test_attach_substr_restart(started_cluster, engine): # Initialize + node.query("DROP TABLE IF EXISTS default.file") node.query("CREATE TABLE default.file(`s` String, `n` UInt8) ENGINE = MergeTree PARTITION BY substring(s, 1, 2) ORDER BY n ") - # Replace subtring to substr + # Replace substring to substr replace_substring_to_substr(node) # Restart clickhouse From 252e750fd79090dc4fdb8bfb1317d8f8b1f3136c Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 8 Jul 2022 17:57:24 +0200 Subject: [PATCH 005/230] Update test.py --- tests/integration/test_attach_table_normalizer/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_attach_table_normalizer/test.py b/tests/integration/test_attach_table_normalizer/test.py index 80c4b99dfcc..f2d99588b94 100644 --- a/tests/integration/test_attach_table_normalizer/test.py +++ b/tests/integration/test_attach_table_normalizer/test.py @@ -3,7 +3,7 @@ import pytest from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) -node = cluster.add_instance('node', main_configs=["configs/config.xml"], with_zookeeper=True) +node = cluster.add_instance('node', main_configs=["configs/config.xml"], with_zookeeper=True, stay_alive=True) @pytest.fixture(scope="module") From 2de309c34f366967b50aed8e504a6748b7543057 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 14 Nov 2021 00:56:52 +0300 Subject: [PATCH 006/230] Add Linux RISC-V 64 build to CI --- cmake/target.cmake | 11 +++++++++++ docker/packager/packager | 18 ++++++++++++++++++ docs/en/development/build-cross-riscv.md | 2 +- 3 files changed, 30 insertions(+), 1 deletion(-) diff --git a/cmake/target.cmake b/cmake/target.cmake index 0fb5e8a20de..6b78a9253b2 100644 --- a/cmake/target.cmake +++ b/cmake/target.cmake @@ -33,6 +33,17 @@ if (CMAKE_CROSSCOMPILING) elseif (ARCH_PPC64LE) set (ENABLE_GRPC OFF CACHE INTERNAL "") set (ENABLE_SENTRY OFF CACHE INTERNAL "") + elseif (ARCH_RISCV64 OFF CACHE INTERNAL "") + # RISC-V support is preliminary + set (GLIBC_COMPATIBILITY OFF CACHE INTERNAL "") + set (ENABLE_LDAP OFF CACHE INTERNAL "") + set (OPENSSL_NO_ASM ON CACHE INTERNAL "") + set (ENABLE_JEMALLOC ON CACHE INTERNAL "") + set (ENABLE_PARQUET OFF CACHE INTERNAL "") + set (USE_UNWIND OFF CACHE INTERNAL "") + set (ENABLE_GRPC OFF CACHE INTERNAL "") + set (ENABLE_HDFS OFF CACHE INTERNAL "") + set (ENABLE_MYSQL OFF CACHE INTERNAL "") endif () elseif (OS_FREEBSD) # FIXME: broken dependencies diff --git a/docker/packager/packager b/docker/packager/packager index 66eb568d460..98b864edbc6 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -130,6 +130,7 @@ def parse_env_variables( ARM_SUFFIX = "-aarch64" FREEBSD_SUFFIX = "-freebsd" PPC_SUFFIX = "-ppc64le" + RISCV_SUFFIX = "-riscv64" result = [] result.append("OUTPUT_DIR=/output") @@ -140,6 +141,7 @@ def parse_env_variables( is_cross_darwin_arm = compiler.endswith(DARWIN_ARM_SUFFIX) is_cross_arm = compiler.endswith(ARM_SUFFIX) is_cross_ppc = compiler.endswith(PPC_SUFFIX) + is_cross_riscv = compiler.endswith(RISCV_SUFFIX) is_cross_freebsd = compiler.endswith(FREEBSD_SUFFIX) if is_cross_darwin: @@ -186,6 +188,11 @@ def parse_env_variables( cmake_flags.append( "-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-ppc64le.cmake" ) + elif is_cross_riscv: + cc = compiler[: -len(RISCV_SUFFIX)] + cmake_flags.append( + "-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-riscv64.cmake" + ) else: cc = compiler result.append("DEB_ARCH=amd64") @@ -329,6 +336,7 @@ if __name__ == "__main__": ) parser.add_argument("--output-dir", type=dir_name, required=True) parser.add_argument("--build-type", choices=("debug", ""), default="") +<<<<<<< HEAD parser.add_argument( "--compiler", @@ -352,6 +360,16 @@ if __name__ == "__main__": ) parser.add_argument("--shared-libraries", action="store_true") +======= + parser.add_argument("--compiler", choices=("clang-11", "clang-11-darwin", "clang-11-darwin-aarch64", "clang-11-aarch64", + "clang-12", "clang-12-darwin", "clang-12-darwin-aarch64", "clang-12-aarch64", + "clang-13", "clang-13-darwin", "clang-13-darwin-aarch64", "clang-13-aarch64", + "clang-13-ppc64le", "clang-13-riscv64", + "clang-11-freebsd", "clang-12-freebsd", "clang-13-freebsd", "gcc-11"), default="clang-13") + parser.add_argument("--sanitizer", choices=("address", "thread", "memory", "undefined", ""), default="") + parser.add_argument("--unbundled", action="store_true") + parser.add_argument("--split-binary", action="store_true") +>>>>>>> 8bc7bf3d87 (Add Linux RISC-V 64 build to CI) parser.add_argument("--clang-tidy", action="store_true") parser.add_argument("--cache", choices=("ccache", "distcc", ""), default="") parser.add_argument( diff --git a/docs/en/development/build-cross-riscv.md b/docs/en/development/build-cross-riscv.md index a0b31ff131a..b94b1072f28 100644 --- a/docs/en/development/build-cross-riscv.md +++ b/docs/en/development/build-cross-riscv.md @@ -11,7 +11,7 @@ This is for the case when you have Linux machine and want to use it to build `cl The cross-build for RISC-V 64 is based on the [Build instructions](../development/build.md), follow them first. -## Install Clang-13 +## Install Clang-14 Follow the instructions from https://apt.llvm.org/ for your Ubuntu or Debian setup or do ``` From 1bd3b8825c8bf72b0d32ff6a0287f853eebbdcaf Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 14 Nov 2021 02:48:38 +0300 Subject: [PATCH 007/230] Fix typo --- cmake/target.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/target.cmake b/cmake/target.cmake index 6b78a9253b2..86b060f53e1 100644 --- a/cmake/target.cmake +++ b/cmake/target.cmake @@ -33,7 +33,7 @@ if (CMAKE_CROSSCOMPILING) elseif (ARCH_PPC64LE) set (ENABLE_GRPC OFF CACHE INTERNAL "") set (ENABLE_SENTRY OFF CACHE INTERNAL "") - elseif (ARCH_RISCV64 OFF CACHE INTERNAL "") + elseif (ARCH_RISCV64) # RISC-V support is preliminary set (GLIBC_COMPATIBILITY OFF CACHE INTERNAL "") set (ENABLE_LDAP OFF CACHE INTERNAL "") From 1021b756ac33806bec7525bac9a1b45a76d9c507 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 14 Aug 2022 06:12:38 +0200 Subject: [PATCH 008/230] Fix conflict --- docker/packager/packager | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/docker/packager/packager b/docker/packager/packager index 98b864edbc6..3769e321ccc 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -336,7 +336,6 @@ if __name__ == "__main__": ) parser.add_argument("--output-dir", type=dir_name, required=True) parser.add_argument("--build-type", choices=("debug", ""), default="") -<<<<<<< HEAD parser.add_argument( "--compiler", @@ -347,6 +346,7 @@ if __name__ == "__main__": "clang-14-darwin-aarch64", "clang-14-aarch64", "clang-14-ppc64le", + "clang-14-riscv64", "clang-14-freebsd", "gcc-11", ), @@ -360,16 +360,6 @@ if __name__ == "__main__": ) parser.add_argument("--shared-libraries", action="store_true") -======= - parser.add_argument("--compiler", choices=("clang-11", "clang-11-darwin", "clang-11-darwin-aarch64", "clang-11-aarch64", - "clang-12", "clang-12-darwin", "clang-12-darwin-aarch64", "clang-12-aarch64", - "clang-13", "clang-13-darwin", "clang-13-darwin-aarch64", "clang-13-aarch64", - "clang-13-ppc64le", "clang-13-riscv64", - "clang-11-freebsd", "clang-12-freebsd", "clang-13-freebsd", "gcc-11"), default="clang-13") - parser.add_argument("--sanitizer", choices=("address", "thread", "memory", "undefined", ""), default="") - parser.add_argument("--unbundled", action="store_true") - parser.add_argument("--split-binary", action="store_true") ->>>>>>> 8bc7bf3d87 (Add Linux RISC-V 64 build to CI) parser.add_argument("--clang-tidy", action="store_true") parser.add_argument("--cache", choices=("ccache", "distcc", ""), default="") parser.add_argument( From edc99648ade4ef39e633da31b97995f6b5d3cd5c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 14 Aug 2022 06:14:43 +0200 Subject: [PATCH 009/230] Add build to CI --- tests/ci/ci_config.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 5b8f3b4227e..8dd4843cb88 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -177,6 +177,17 @@ CI_CONFIG = { "tidy": "disable", "with_coverage": False, }, + "binary_riscv64": { + "compiler": "clang-14-riscv64", + "build_type": "", + "sanitizer": "", + "package_type": "binary", + "static_binary_name": "riscv64", + "bundled": "bundled", + "libraries": "static", + "tidy": "disable", + "with_coverage": False, + }, }, "builds_report_config": { "ClickHouse build check": [ @@ -198,6 +209,7 @@ CI_CONFIG = { "binary_freebsd", "binary_darwin_aarch64", "binary_ppc64le", + "binary_riscv64", ], }, "tests_config": { From ec334a3a0866a773b2bb34f1d08be789831df33d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 14 Aug 2022 09:45:38 +0200 Subject: [PATCH 010/230] Programming in YAML with copy-paste --- .github/workflows/master.yml | 49 +++++++++++++++++++++++++++++- .github/workflows/pull_request.yml | 47 +++++++++++++++++++++++++++- 2 files changed, 94 insertions(+), 2 deletions(-) diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 2acc1468328..69a28350945 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -923,6 +923,53 @@ jobs: # shellcheck disable=SC2046 docker rm -f $(docker ps -a -q) ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" + BuilderBinRISCV64: + needs: [DockerHubPush] + runs-on: [self-hosted, builder] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/build_check + IMAGES_PATH=${{runner.temp}}/images_path + REPO_COPY=${{runner.temp}}/build_check/ClickHouse + CACHES_PATH=${{runner.temp}}/../ccaches + BUILD_NAME=binary_riscv64 + EOF + - name: Download changed images + uses: actions/download-artifact@v2 + with: + name: changed_images + path: ${{ env.IMAGES_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + with: + fetch-depth: 0 # otherwise we will have no info about contributors + - name: Build + run: | + git -C "$GITHUB_WORKSPACE" submodule sync --recursive + git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10 + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" + - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} + uses: actions/upload-artifact@v2 + with: + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json + - name: Cleanup + if: always() + run: | + # shellcheck disable=SC2046 + docker kill $(docker ps -q) ||: + # shellcheck disable=SC2046 + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" ############################################################################################ ##################################### Docker images ####################################### ############################################################################################ @@ -1009,8 +1056,8 @@ jobs: - BuilderBinDarwin - BuilderBinDarwinAarch64 - BuilderBinFreeBSD - # - BuilderBinGCC - BuilderBinPPC64 + - BuilderBinRISCV64 - BuilderBinClangTidy - BuilderDebShared runs-on: [self-hosted, style-checker] diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 513df8487c4..f17c25cd164 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -935,6 +935,51 @@ jobs: # shellcheck disable=SC2046 docker rm -f $(docker ps -a -q) ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" + BuilderBinRISCV64: + needs: [DockerHubPush, FastTest, StyleCheck] + runs-on: [self-hosted, builder] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/build_check + IMAGES_PATH=${{runner.temp}}/images_path + REPO_COPY=${{runner.temp}}/build_check/ClickHouse + CACHES_PATH=${{runner.temp}}/../ccaches + BUILD_NAME=binary_riscv64 + EOF + - name: Download changed images + uses: actions/download-artifact@v2 + with: + name: changed_images + path: ${{ env.IMAGES_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Build + run: | + git -C "$GITHUB_WORKSPACE" submodule sync --recursive + git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10 + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" + - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} + uses: actions/upload-artifact@v2 + with: + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json + - name: Cleanup + if: always() + run: | + # shellcheck disable=SC2046 + docker kill $(docker ps -q) ||: + # shellcheck disable=SC2046 + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" ############################################################################################ ##################################### Docker images ####################################### ############################################################################################ @@ -1021,8 +1066,8 @@ jobs: - BuilderBinDarwin - BuilderBinDarwinAarch64 - BuilderBinFreeBSD - # - BuilderBinGCC - BuilderBinPPC64 + - BuilderBinRISCV64 - BuilderBinClangTidy - BuilderDebShared runs-on: [self-hosted, style-checker] From a517d9d65fcd6646944d45d295284edf3c87cf99 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 15 Aug 2022 04:23:56 +0200 Subject: [PATCH 011/230] Maybe make it possible --- docker/packager/binary/Dockerfile | 9 +++++++++ docker/packager/packager | 4 ++-- tests/ci/ci_config.py | 2 +- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index 74919bb2100..67e61f2036b 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -44,6 +44,15 @@ RUN add-apt-repository ppa:ubuntu-toolchain-r/test --yes \ # A cross-linker for RISC-V 64 (we need it, because LLVM's LLD does not work): RUN apt-get install binutils-riscv64-linux-gnu +# Build LLVM from trunk, because only that version can build the RISC-V binaries successfully. +# It can be also used to check it for regular builds. +RUN git clone git@github.com:llvm/llvm-project.git \ + && mkdir llvm-build \ + && cd llvm-build \ + && cmake -GNinja -DCMAKE_BUILD_TYPE:STRING=Release -DLLVM_ENABLE_PROJECTS=all -DLLVM_TARGETS_TO_BUILD=all ../llvm-project/llvm \ + && ninja \ + && ninja install + # Architecture of the image when BuildKit/buildx is used ARG TARGETARCH ARG NFPM_VERSION=2.16.0 diff --git a/docker/packager/packager b/docker/packager/packager index 3769e321ccc..03f3bd80c96 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -189,7 +189,7 @@ def parse_env_variables( "-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-ppc64le.cmake" ) elif is_cross_riscv: - cc = compiler[: -len(RISCV_SUFFIX)] + cc = "clang" cmake_flags.append( "-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-riscv64.cmake" ) @@ -346,7 +346,7 @@ if __name__ == "__main__": "clang-14-darwin-aarch64", "clang-14-aarch64", "clang-14-ppc64le", - "clang-14-riscv64", + "clang-trunk-riscv64", "clang-14-freebsd", "gcc-11", ), diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 8dd4843cb88..f66eba7d966 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -178,7 +178,7 @@ CI_CONFIG = { "with_coverage": False, }, "binary_riscv64": { - "compiler": "clang-14-riscv64", + "compiler": "clang-trunk-riscv64", "build_type": "", "sanitizer": "", "package_type": "binary", From e702adf0c967cafdc03d80b23d83a477c52eab09 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 15 Aug 2022 06:23:00 +0200 Subject: [PATCH 012/230] Fix error --- docker/packager/binary/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index 67e61f2036b..fdb2b324e06 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -46,7 +46,7 @@ RUN apt-get install binutils-riscv64-linux-gnu # Build LLVM from trunk, because only that version can build the RISC-V binaries successfully. # It can be also used to check it for regular builds. -RUN git clone git@github.com:llvm/llvm-project.git \ +RUN git clone https://github.com/llvm/llvm-project.git \ && mkdir llvm-build \ && cd llvm-build \ && cmake -GNinja -DCMAKE_BUILD_TYPE:STRING=Release -DLLVM_ENABLE_PROJECTS=all -DLLVM_TARGETS_TO_BUILD=all ../llvm-project/llvm \ From 7b99581e5c2a9d9e4cde387ae7cd6928cb9918bc Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 17 Aug 2022 07:47:42 +0200 Subject: [PATCH 013/230] Better machines to build Docker images --- .github/workflows/backport_branches.yml | 4 ++-- .github/workflows/docs_check.yml | 4 ++-- .github/workflows/docs_release.yml | 4 ++-- .github/workflows/master.yml | 4 ++-- .github/workflows/nightly.yml | 4 ++-- .github/workflows/pull_request.yml | 4 ++-- .github/workflows/release_branches.yml | 4 ++-- 7 files changed, 14 insertions(+), 14 deletions(-) diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml index e1b2b1fad01..bd399e48100 100644 --- a/.github/workflows/backport_branches.yml +++ b/.github/workflows/backport_branches.yml @@ -22,7 +22,7 @@ jobs: cd "$GITHUB_WORKSPACE/tests/ci" python3 -m unittest discover -s . -p '*_test.py' DockerHubPushAarch64: - runs-on: [self-hosted, style-checker-aarch64] + runs-on: [self-hosted, func-tester-aarch64] steps: - name: Clear repository run: | @@ -39,7 +39,7 @@ jobs: name: changed_images_aarch64 path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: - runs-on: [self-hosted, style-checker] + runs-on: [self-hosted, builder] steps: - name: Clear repository run: | diff --git a/.github/workflows/docs_check.yml b/.github/workflows/docs_check.yml index b50584a2c01..850f690f44d 100644 --- a/.github/workflows/docs_check.yml +++ b/.github/workflows/docs_check.yml @@ -31,7 +31,7 @@ jobs: python3 run_check.py DockerHubPushAarch64: needs: CheckLabels - runs-on: [self-hosted, style-checker-aarch64] + runs-on: [self-hosted, func-tester-aarch64] steps: - name: Clear repository run: | @@ -49,7 +49,7 @@ jobs: path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: needs: CheckLabels - runs-on: [self-hosted, style-checker] + runs-on: [self-hosted, builder] steps: - name: Clear repository run: | diff --git a/.github/workflows/docs_release.yml b/.github/workflows/docs_release.yml index e0fdb0c2f7b..cb2f2b8453d 100644 --- a/.github/workflows/docs_release.yml +++ b/.github/workflows/docs_release.yml @@ -20,7 +20,7 @@ concurrency: workflow_dispatch: jobs: DockerHubPushAarch64: - runs-on: [self-hosted, style-checker-aarch64] + runs-on: [self-hosted, func-tester-aarch64] steps: - name: Clear repository run: | @@ -37,7 +37,7 @@ jobs: name: changed_images_aarch64 path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: - runs-on: [self-hosted, style-checker] + runs-on: [self-hosted, builder] steps: - name: Clear repository run: | diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 69a28350945..552272b38e5 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -22,7 +22,7 @@ jobs: cd "$GITHUB_WORKSPACE/tests/ci" python3 -m unittest discover -s . -p '*_test.py' DockerHubPushAarch64: - runs-on: [self-hosted, style-checker-aarch64] + runs-on: [self-hosted, func-tester-aarch64] steps: - name: Clear repository run: | @@ -39,7 +39,7 @@ jobs: name: changed_images_aarch64 path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: - runs-on: [self-hosted, style-checker] + runs-on: [self-hosted, builder] steps: - name: Clear repository run: | diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index e712ada1551..bff937b832f 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -11,7 +11,7 @@ env: jobs: DockerHubPushAarch64: - runs-on: [self-hosted, style-checker-aarch64] + runs-on: [self-hosted, func-tester-aarch64] steps: - name: Clear repository run: | @@ -28,7 +28,7 @@ jobs: name: changed_images_aarch64 path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: - runs-on: [self-hosted, style-checker] + runs-on: [self-hosted, builder] steps: - name: Clear repository run: | diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index f17c25cd164..c10767c55e6 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -47,7 +47,7 @@ jobs: python3 -m unittest discover -s . -p '*_test.py' DockerHubPushAarch64: needs: CheckLabels - runs-on: [self-hosted, style-checker-aarch64] + runs-on: [self-hosted, func-tester-aarch64] steps: - name: Clear repository run: | @@ -65,7 +65,7 @@ jobs: path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: needs: CheckLabels - runs-on: [self-hosted, style-checker] + runs-on: [self-hosted, builder] steps: - name: Clear repository run: | diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index fdfedc56f5d..1f082f0ab64 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -13,7 +13,7 @@ on: # yamllint disable-line rule:truthy jobs: DockerHubPushAarch64: - runs-on: [self-hosted, style-checker-aarch64] + runs-on: [self-hosted, func-tester-aarch64] steps: - name: Clear repository run: | @@ -30,7 +30,7 @@ jobs: name: changed_images_aarch64 path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: - runs-on: [self-hosted, style-checker] + runs-on: [self-hosted, builder] steps: - name: Clear repository run: | From 2b82916175f1eda4d0456fcbcb7784b5d6ba377a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 17 Aug 2022 09:13:04 +0200 Subject: [PATCH 014/230] Do not put garbage in the Docker image --- docker/packager/binary/Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index fdb2b324e06..cbab3d501d6 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -51,7 +51,8 @@ RUN git clone https://github.com/llvm/llvm-project.git \ && cd llvm-build \ && cmake -GNinja -DCMAKE_BUILD_TYPE:STRING=Release -DLLVM_ENABLE_PROJECTS=all -DLLVM_TARGETS_TO_BUILD=all ../llvm-project/llvm \ && ninja \ - && ninja install + && ninja install \ + && cd .. && rm -rf llvm-build llvm-project # Architecture of the image when BuildKit/buildx is used ARG TARGETARCH From 0eca4d9560ec20290aa35ae9765bf293dbfe01ce Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 20 Aug 2022 18:50:27 +0200 Subject: [PATCH 015/230] Maybe fix error --- contrib/sysroot | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/sysroot b/contrib/sysroot index e9fb375d0a1..38c88421249 160000 --- a/contrib/sysroot +++ b/contrib/sysroot @@ -1 +1 @@ -Subproject commit e9fb375d0a1e5ebfd74c043f088f2342552103f8 +Subproject commit 38c8842124940a26e7e851c083cd61c651a83ee3 From c08766aa3476e7faea38187061993eeb1b76454e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 21 Aug 2022 20:51:59 +0200 Subject: [PATCH 016/230] Revert "Do not put garbage in the Docker image" This reverts commit 2b82916175f1eda4d0456fcbcb7784b5d6ba377a. --- docker/packager/binary/Dockerfile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index cbab3d501d6..fdb2b324e06 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -51,8 +51,7 @@ RUN git clone https://github.com/llvm/llvm-project.git \ && cd llvm-build \ && cmake -GNinja -DCMAKE_BUILD_TYPE:STRING=Release -DLLVM_ENABLE_PROJECTS=all -DLLVM_TARGETS_TO_BUILD=all ../llvm-project/llvm \ && ninja \ - && ninja install \ - && cd .. && rm -rf llvm-build llvm-project + && ninja install # Architecture of the image when BuildKit/buildx is used ARG TARGETARCH From 47ccb28ad387642bfc1549642dd43bfea5c06f4a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 21 Aug 2022 20:52:01 +0200 Subject: [PATCH 017/230] Revert "Better machines to build Docker images" This reverts commit 7b99581e5c2a9d9e4cde387ae7cd6928cb9918bc. --- .github/workflows/backport_branches.yml | 4 ++-- .github/workflows/docs_check.yml | 4 ++-- .github/workflows/docs_release.yml | 4 ++-- .github/workflows/master.yml | 4 ++-- .github/workflows/nightly.yml | 4 ++-- .github/workflows/pull_request.yml | 4 ++-- .github/workflows/release_branches.yml | 4 ++-- 7 files changed, 14 insertions(+), 14 deletions(-) diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml index bd399e48100..e1b2b1fad01 100644 --- a/.github/workflows/backport_branches.yml +++ b/.github/workflows/backport_branches.yml @@ -22,7 +22,7 @@ jobs: cd "$GITHUB_WORKSPACE/tests/ci" python3 -m unittest discover -s . -p '*_test.py' DockerHubPushAarch64: - runs-on: [self-hosted, func-tester-aarch64] + runs-on: [self-hosted, style-checker-aarch64] steps: - name: Clear repository run: | @@ -39,7 +39,7 @@ jobs: name: changed_images_aarch64 path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: - runs-on: [self-hosted, builder] + runs-on: [self-hosted, style-checker] steps: - name: Clear repository run: | diff --git a/.github/workflows/docs_check.yml b/.github/workflows/docs_check.yml index 850f690f44d..b50584a2c01 100644 --- a/.github/workflows/docs_check.yml +++ b/.github/workflows/docs_check.yml @@ -31,7 +31,7 @@ jobs: python3 run_check.py DockerHubPushAarch64: needs: CheckLabels - runs-on: [self-hosted, func-tester-aarch64] + runs-on: [self-hosted, style-checker-aarch64] steps: - name: Clear repository run: | @@ -49,7 +49,7 @@ jobs: path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: needs: CheckLabels - runs-on: [self-hosted, builder] + runs-on: [self-hosted, style-checker] steps: - name: Clear repository run: | diff --git a/.github/workflows/docs_release.yml b/.github/workflows/docs_release.yml index cb2f2b8453d..e0fdb0c2f7b 100644 --- a/.github/workflows/docs_release.yml +++ b/.github/workflows/docs_release.yml @@ -20,7 +20,7 @@ concurrency: workflow_dispatch: jobs: DockerHubPushAarch64: - runs-on: [self-hosted, func-tester-aarch64] + runs-on: [self-hosted, style-checker-aarch64] steps: - name: Clear repository run: | @@ -37,7 +37,7 @@ jobs: name: changed_images_aarch64 path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: - runs-on: [self-hosted, builder] + runs-on: [self-hosted, style-checker] steps: - name: Clear repository run: | diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 552272b38e5..69a28350945 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -22,7 +22,7 @@ jobs: cd "$GITHUB_WORKSPACE/tests/ci" python3 -m unittest discover -s . -p '*_test.py' DockerHubPushAarch64: - runs-on: [self-hosted, func-tester-aarch64] + runs-on: [self-hosted, style-checker-aarch64] steps: - name: Clear repository run: | @@ -39,7 +39,7 @@ jobs: name: changed_images_aarch64 path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: - runs-on: [self-hosted, builder] + runs-on: [self-hosted, style-checker] steps: - name: Clear repository run: | diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index bff937b832f..e712ada1551 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -11,7 +11,7 @@ env: jobs: DockerHubPushAarch64: - runs-on: [self-hosted, func-tester-aarch64] + runs-on: [self-hosted, style-checker-aarch64] steps: - name: Clear repository run: | @@ -28,7 +28,7 @@ jobs: name: changed_images_aarch64 path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: - runs-on: [self-hosted, builder] + runs-on: [self-hosted, style-checker] steps: - name: Clear repository run: | diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index c10767c55e6..f17c25cd164 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -47,7 +47,7 @@ jobs: python3 -m unittest discover -s . -p '*_test.py' DockerHubPushAarch64: needs: CheckLabels - runs-on: [self-hosted, func-tester-aarch64] + runs-on: [self-hosted, style-checker-aarch64] steps: - name: Clear repository run: | @@ -65,7 +65,7 @@ jobs: path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: needs: CheckLabels - runs-on: [self-hosted, builder] + runs-on: [self-hosted, style-checker] steps: - name: Clear repository run: | diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index 1f082f0ab64..fdfedc56f5d 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -13,7 +13,7 @@ on: # yamllint disable-line rule:truthy jobs: DockerHubPushAarch64: - runs-on: [self-hosted, func-tester-aarch64] + runs-on: [self-hosted, style-checker-aarch64] steps: - name: Clear repository run: | @@ -30,7 +30,7 @@ jobs: name: changed_images_aarch64 path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: - runs-on: [self-hosted, builder] + runs-on: [self-hosted, style-checker] steps: - name: Clear repository run: | From dd9085346af943c9dca0ab18fe7f4c16fda38ae5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 21 Aug 2022 20:52:03 +0200 Subject: [PATCH 018/230] Revert "Fix error" This reverts commit e702adf0c967cafdc03d80b23d83a477c52eab09. --- docker/packager/binary/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index fdb2b324e06..67e61f2036b 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -46,7 +46,7 @@ RUN apt-get install binutils-riscv64-linux-gnu # Build LLVM from trunk, because only that version can build the RISC-V binaries successfully. # It can be also used to check it for regular builds. -RUN git clone https://github.com/llvm/llvm-project.git \ +RUN git clone git@github.com:llvm/llvm-project.git \ && mkdir llvm-build \ && cd llvm-build \ && cmake -GNinja -DCMAKE_BUILD_TYPE:STRING=Release -DLLVM_ENABLE_PROJECTS=all -DLLVM_TARGETS_TO_BUILD=all ../llvm-project/llvm \ From cb2e5f316c442ac0528980f5204d0afa45d030ac Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 21 Aug 2022 20:52:04 +0200 Subject: [PATCH 019/230] Revert "Maybe make it possible" This reverts commit a517d9d65fcd6646944d45d295284edf3c87cf99. --- docker/packager/binary/Dockerfile | 9 --------- docker/packager/packager | 4 ++-- tests/ci/ci_config.py | 2 +- 3 files changed, 3 insertions(+), 12 deletions(-) diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index 67e61f2036b..74919bb2100 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -44,15 +44,6 @@ RUN add-apt-repository ppa:ubuntu-toolchain-r/test --yes \ # A cross-linker for RISC-V 64 (we need it, because LLVM's LLD does not work): RUN apt-get install binutils-riscv64-linux-gnu -# Build LLVM from trunk, because only that version can build the RISC-V binaries successfully. -# It can be also used to check it for regular builds. -RUN git clone git@github.com:llvm/llvm-project.git \ - && mkdir llvm-build \ - && cd llvm-build \ - && cmake -GNinja -DCMAKE_BUILD_TYPE:STRING=Release -DLLVM_ENABLE_PROJECTS=all -DLLVM_TARGETS_TO_BUILD=all ../llvm-project/llvm \ - && ninja \ - && ninja install - # Architecture of the image when BuildKit/buildx is used ARG TARGETARCH ARG NFPM_VERSION=2.16.0 diff --git a/docker/packager/packager b/docker/packager/packager index 03f3bd80c96..3769e321ccc 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -189,7 +189,7 @@ def parse_env_variables( "-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-ppc64le.cmake" ) elif is_cross_riscv: - cc = "clang" + cc = compiler[: -len(RISCV_SUFFIX)] cmake_flags.append( "-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-riscv64.cmake" ) @@ -346,7 +346,7 @@ if __name__ == "__main__": "clang-14-darwin-aarch64", "clang-14-aarch64", "clang-14-ppc64le", - "clang-trunk-riscv64", + "clang-14-riscv64", "clang-14-freebsd", "gcc-11", ), diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index aa5762d7536..7110ff628ad 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -178,7 +178,7 @@ CI_CONFIG = { "with_coverage": False, }, "binary_riscv64": { - "compiler": "clang-trunk-riscv64", + "compiler": "clang-14-riscv64", "build_type": "", "sanitizer": "", "package_type": "binary", From 935bc723299056c816646fb9067638a60ddfb085 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 14 Feb 2023 18:51:10 +0100 Subject: [PATCH 020/230] [TEST] Tune allocator Processing of the default max_block_size can be faster then running mmap()/munmap() plus memory dependencies. Here is an example: SELECT count() FROM zeros(10_000_000) WHERE NOT ignore(randomString(1000)) SETTINGS function_implementation='avx2' - Before this patch it takes: ~6sec - After: 1.3sec And even though 128MiB should be enough, since for this query size of allocation for string will be 65409*(1000+1)=65474409 bytes, due to rounding to power of two it will not, so let's try simply use 256MiB (another option is to use strict comparison for MMAP_THRESHOLD) and see the perf tests. But also note, that this has other allocator side effects (performance, fragmentation), so unlikely this is for upstream. I've found this while I was playing with PODArray [1]. [1]: https://s3.amazonaws.com/clickhouse-test-reports/45654/2101b66570cbb9eb9a492afa8ab82d562c34336b/performance_comparison_[1/4]/report.html Signed-off-by: Azat Khuzhin --- src/Common/Allocator.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/Allocator.cpp b/src/Common/Allocator.cpp index 5a66ddb63a2..c02210f2ece 100644 --- a/src/Common/Allocator.cpp +++ b/src/Common/Allocator.cpp @@ -8,7 +8,7 @@ * See also: https://gcc.gnu.org/legacy-ml/gcc-help/2017-12/msg00021.html */ #ifdef NDEBUG - __attribute__((__weak__)) extern const size_t MMAP_THRESHOLD = 64 * (1ULL << 20); + __attribute__((__weak__)) extern const size_t MMAP_THRESHOLD = 256 * (1ULL << 20); #else /** * In debug build, use small mmap threshold to reproduce more memory From 5781eb67cba3e827ecf47b7929c47777a6e48094 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 26 Feb 2023 01:28:13 +0300 Subject: [PATCH 021/230] Update test.py --- tests/integration/test_attach_table_normalizer/test.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/integration/test_attach_table_normalizer/test.py b/tests/integration/test_attach_table_normalizer/test.py index f2d99588b94..526da39935a 100644 --- a/tests/integration/test_attach_table_normalizer/test.py +++ b/tests/integration/test_attach_table_normalizer/test.py @@ -17,8 +17,7 @@ def started_cluster(): def replace_substring_to_substr(node): node.exec_in_container(["bash", "-c", "sed -i 's/substring/substr/g' /var/lib/clickhouse/metadata/default/file.sql"], user="root") -@pytest.mark.parametrize("engine", ['Ordinary', 'Atomic']) -def test_attach_substr(started_cluster, engine): +def test_attach_substr(started_cluster): # Initialize node.query("DROP TABLE IF EXISTS default.file") node.query("CREATE TABLE default.file(`s` String, `n` UInt8) ENGINE = MergeTree PARTITION BY substring(s, 1, 2) ORDER BY n ") @@ -32,8 +31,7 @@ def test_attach_substr(started_cluster, engine): # Attach table file node.query("ATTACH TABLE file") -@pytest.mark.parametrize("engine", ['Ordinary', 'Atomic']) -def test_attach_substr_restart(started_cluster, engine): +def test_attach_substr_restart(started_cluster): # Initialize node.query("DROP TABLE IF EXISTS default.file") node.query("CREATE TABLE default.file(`s` String, `n` UInt8) ENGINE = MergeTree PARTITION BY substring(s, 1, 2) ORDER BY n ") From 63982a20936bb384a4c4f88f9e4ed2282680e33b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 26 Feb 2023 01:29:29 +0300 Subject: [PATCH 022/230] Delete config.xml --- .../test_attach_table_normalizer/configs/config.xml | 4 ---- 1 file changed, 4 deletions(-) delete mode 100644 tests/integration/test_attach_table_normalizer/configs/config.xml diff --git a/tests/integration/test_attach_table_normalizer/configs/config.xml b/tests/integration/test_attach_table_normalizer/configs/config.xml deleted file mode 100644 index 0500e2ad554..00000000000 --- a/tests/integration/test_attach_table_normalizer/configs/config.xml +++ /dev/null @@ -1,4 +0,0 @@ - - 1 - 1 - From 0e01991eb7b1331d2fca09c94b3e41fdd5c32bb3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 26 Feb 2023 04:33:56 +0300 Subject: [PATCH 023/230] Update test.py --- .../test_attach_table_normalizer/test.py | 24 +++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/tests/integration/test_attach_table_normalizer/test.py b/tests/integration/test_attach_table_normalizer/test.py index 526da39935a..ddbb02bf4ef 100644 --- a/tests/integration/test_attach_table_normalizer/test.py +++ b/tests/integration/test_attach_table_normalizer/test.py @@ -3,7 +3,9 @@ import pytest from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) -node = cluster.add_instance('node', main_configs=["configs/config.xml"], with_zookeeper=True, stay_alive=True) +node = cluster.add_instance( + 'node', main_configs=["configs/config.xml"], with_zookeeper=True, stay_alive=True +) @pytest.fixture(scope="module") @@ -14,13 +16,24 @@ def started_cluster(): finally: cluster.shutdown() + def replace_substring_to_substr(node): - node.exec_in_container(["bash", "-c", "sed -i 's/substring/substr/g' /var/lib/clickhouse/metadata/default/file.sql"], user="root") + node.exec_in_container(( + [ + "bash", + "-c", + "sed -i 's/substring/substr/g' /var/lib/clickhouse/metadata/default/file.sql", + ], + user="root", + ) + def test_attach_substr(started_cluster): # Initialize node.query("DROP TABLE IF EXISTS default.file") - node.query("CREATE TABLE default.file(`s` String, `n` UInt8) ENGINE = MergeTree PARTITION BY substring(s, 1, 2) ORDER BY n ") + node.query( + "CREATE TABLE default.file(`s` String, `n` UInt8) ENGINE = MergeTree PARTITION BY substring(s, 1, 2) ORDER BY n " + ) # Detach table file node.query("DETACH TABLE file") @@ -31,10 +44,13 @@ def test_attach_substr(started_cluster): # Attach table file node.query("ATTACH TABLE file") + def test_attach_substr_restart(started_cluster): # Initialize node.query("DROP TABLE IF EXISTS default.file") - node.query("CREATE TABLE default.file(`s` String, `n` UInt8) ENGINE = MergeTree PARTITION BY substring(s, 1, 2) ORDER BY n ") + node.query( + "CREATE TABLE default.file(`s` String, `n` UInt8) ENGINE = MergeTree PARTITION BY substring(s, 1, 2) ORDER BY n " + ) # Replace substring to substr replace_substring_to_substr(node) From e997b1393ce12ba639049147afdedb13e338af38 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 19 Apr 2023 22:40:13 +0200 Subject: [PATCH 024/230] Play with MMAP_THRESHOLD (set it to 128MiB) Signed-off-by: Azat Khuzhin --- src/Common/Allocator.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/Allocator.cpp b/src/Common/Allocator.cpp index c02210f2ece..0fb90e5a47e 100644 --- a/src/Common/Allocator.cpp +++ b/src/Common/Allocator.cpp @@ -8,7 +8,7 @@ * See also: https://gcc.gnu.org/legacy-ml/gcc-help/2017-12/msg00021.html */ #ifdef NDEBUG - __attribute__((__weak__)) extern const size_t MMAP_THRESHOLD = 256 * (1ULL << 20); + __attribute__((__weak__)) extern const size_t MMAP_THRESHOLD = 128 * (1ULL << 20); #else /** * In debug build, use small mmap threshold to reproduce more memory From 491c26fb0aa08dd75adf46699225658fd9a45d5d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 29 Apr 2023 18:55:19 +0200 Subject: [PATCH 025/230] Slight improvement in Disks interface --- src/Disks/DiskEncrypted.cpp | 2 +- src/Disks/DiskEncrypted.h | 6 +-- src/Disks/DiskLocal.cpp | 44 +++++++++++++------ src/Disks/DiskLocal.h | 8 ++-- src/Disks/IDisk.h | 8 ++-- src/Disks/IVolume.cpp | 4 +- src/Disks/IVolume.h | 2 +- .../ObjectStorages/DiskObjectStorage.cpp | 17 ++++--- src/Disks/ObjectStorages/DiskObjectStorage.h | 12 +++-- src/Disks/StoragePolicy.cpp | 27 ++++++++++-- src/Disks/VolumeJBOD.cpp | 22 +++++++--- src/Disks/VolumeJBOD.h | 4 +- src/Functions/filesystem.cpp | 6 +-- .../ServerAsynchronousMetrics.cpp | 23 ++++++---- .../MergeTree/MergeTreePartsMover.cpp | 12 +++-- src/Storages/System/StorageSystemDisks.cpp | 6 +-- 16 files changed, 130 insertions(+), 73 deletions(-) diff --git a/src/Disks/DiskEncrypted.cpp b/src/Disks/DiskEncrypted.cpp index db18e9652e7..1f8d75dbeb8 100644 --- a/src/Disks/DiskEncrypted.cpp +++ b/src/Disks/DiskEncrypted.cpp @@ -184,7 +184,7 @@ public: } UInt64 getSize() const override { return reservation->getSize(); } - UInt64 getUnreservedSpace() const override { return reservation->getUnreservedSpace(); } + std::optional getUnreservedSpace() const override { return reservation->getUnreservedSpace(); } DiskPtr getDisk(size_t i) const override { diff --git a/src/Disks/DiskEncrypted.h b/src/Disks/DiskEncrypted.h index 8e824a1f7e5..5d04558792e 100644 --- a/src/Disks/DiskEncrypted.h +++ b/src/Disks/DiskEncrypted.h @@ -256,17 +256,17 @@ public: return std::make_shared(*this); } - UInt64 getTotalSpace() const override + std::optional getTotalSpace() const override { return delegate->getTotalSpace(); } - UInt64 getAvailableSpace() const override + std::optional getAvailableSpace() const override { return delegate->getAvailableSpace(); } - UInt64 getUnreservedSpace() const override + std::optional getUnreservedSpace() const override { return delegate->getUnreservedSpace(); } diff --git a/src/Disks/DiskLocal.cpp b/src/Disks/DiskLocal.cpp index 49f28a19b31..af9d4ffd19c 100644 --- a/src/Disks/DiskLocal.cpp +++ b/src/Disks/DiskLocal.cpp @@ -97,7 +97,8 @@ static void loadDiskLocalConfig(const String & name, tmp_path = context->getPath(); // Create tmp disk for getting total disk space. - keep_free_space_bytes = static_cast(DiskLocal("tmp", tmp_path, 0).getTotalSpace() * ratio); + auto total_space_of_local_disk = DiskLocal("tmp", tmp_path, 0).getTotalSpace(); + keep_free_space_bytes = total_space_of_local_disk ? static_cast(*total_space_of_local_disk * ratio) : 0; } } @@ -128,7 +129,7 @@ public: {} UInt64 getSize() const override { return size; } - UInt64 getUnreservedSpace() const override { return unreserved_space; } + std::optional getUnreservedSpace() const override { return unreserved_space; } DiskPtr getDisk(size_t i) const override { @@ -225,8 +226,11 @@ std::optional DiskLocal::tryReserve(UInt64 bytes) { std::lock_guard lock(DiskLocal::reservation_mutex); - UInt64 available_space = getAvailableSpace(); - UInt64 unreserved_space = available_space - std::min(available_space, reserved_bytes); + auto available_space = getAvailableSpace(); + + UInt64 unreserved_space = available_space + ? *available_space - std::min(*available_space, reserved_bytes) + : std::numeric_limits::max(); if (bytes == 0) { @@ -237,12 +241,24 @@ std::optional DiskLocal::tryReserve(UInt64 bytes) if (unreserved_space >= bytes) { - LOG_TRACE( - logger, - "Reserved {} on local disk {}, having unreserved {}.", - ReadableSize(bytes), - backQuote(name), - ReadableSize(unreserved_space)); + if (available_space) + { + LOG_TRACE( + logger, + "Reserved {} on local disk {}, having unreserved {}.", + ReadableSize(bytes), + backQuote(name), + ReadableSize(unreserved_space)); + } + else + { + LOG_TRACE( + logger, + "Reserved {} on local disk {}.", + ReadableSize(bytes), + backQuote(name)); + } + ++reservation_count; reserved_bytes += bytes; return {unreserved_space - bytes}; @@ -268,14 +284,14 @@ static UInt64 getTotalSpaceByName(const String & name, const String & disk_path, return total_size - keep_free_space_bytes; } -UInt64 DiskLocal::getTotalSpace() const +std::optional DiskLocal::getTotalSpace() const { if (broken || readonly) return 0; return getTotalSpaceByName(name, disk_path, keep_free_space_bytes); } -UInt64 DiskLocal::getAvailableSpace() const +std::optional DiskLocal::getAvailableSpace() const { if (broken || readonly) return 0; @@ -292,10 +308,10 @@ UInt64 DiskLocal::getAvailableSpace() const return total_size - keep_free_space_bytes; } -UInt64 DiskLocal::getUnreservedSpace() const +std::optional DiskLocal::getUnreservedSpace() const { std::lock_guard lock(DiskLocal::reservation_mutex); - auto available_space = getAvailableSpace(); + auto available_space = *getAvailableSpace(); available_space -= std::min(available_space, reserved_bytes); return available_space; } diff --git a/src/Disks/DiskLocal.h b/src/Disks/DiskLocal.h index 7ea2c04704c..6da62332726 100644 --- a/src/Disks/DiskLocal.h +++ b/src/Disks/DiskLocal.h @@ -31,11 +31,9 @@ public: ReservationPtr reserve(UInt64 bytes) override; - UInt64 getTotalSpace() const override; - - UInt64 getAvailableSpace() const override; - - UInt64 getUnreservedSpace() const override; + std::optional getTotalSpace() const override; + std::optional getAvailableSpace() const override; + std::optional getUnreservedSpace() const override; UInt64 getKeepingFreeSpace() const override { return keep_free_space_bytes; } diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h index 68798047cfd..7202d1f5cfc 100644 --- a/src/Disks/IDisk.h +++ b/src/Disks/IDisk.h @@ -127,13 +127,13 @@ public: const String & getName() const override { return name; } /// Total available space on the disk. - virtual UInt64 getTotalSpace() const = 0; + virtual std::optional getTotalSpace() const = 0; /// Space currently available on the disk. - virtual UInt64 getAvailableSpace() const = 0; + virtual std::optional getAvailableSpace() const = 0; /// Space available for reservation (available space minus reserved space). - virtual UInt64 getUnreservedSpace() const = 0; + virtual std::optional getUnreservedSpace() const = 0; /// Amount of bytes which should be kept free on the disk. virtual UInt64 getKeepingFreeSpace() const { return 0; } @@ -463,7 +463,7 @@ public: /// Space available for reservation /// (with this reservation already take into account). - virtual UInt64 getUnreservedSpace() const = 0; + virtual std::optional getUnreservedSpace() const = 0; /// Get i-th disk where reservation take place. virtual DiskPtr getDisk(size_t i = 0) const = 0; /// NOLINT diff --git a/src/Disks/IVolume.cpp b/src/Disks/IVolume.cpp index eb474f12ad2..15b52acb422 100644 --- a/src/Disks/IVolume.cpp +++ b/src/Disks/IVolume.cpp @@ -49,9 +49,9 @@ IVolume::IVolume( throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "Volume must contain at least one disk"); } -UInt64 IVolume::getMaxUnreservedFreeSpace() const +std::optional IVolume::getMaxUnreservedFreeSpace() const { - UInt64 res = 0; + std::optional res = 0; for (const auto & disk : disks) res = std::max(res, disk->getUnreservedSpace()); return res; diff --git a/src/Disks/IVolume.h b/src/Disks/IVolume.h index ada28caa960..f40d4dcba60 100644 --- a/src/Disks/IVolume.h +++ b/src/Disks/IVolume.h @@ -74,7 +74,7 @@ public: virtual VolumeType getType() const = 0; /// Return biggest unreserved space across all disks - UInt64 getMaxUnreservedFreeSpace() const; + std::optional getMaxUnreservedFreeSpace() const; DiskPtr getDisk() const { return getDisk(0); } virtual DiskPtr getDisk(size_t i) const { return disks[i]; } diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp index bf5d0ab829d..2f4e0db070f 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp @@ -469,18 +469,25 @@ void DiskObjectStorage::removeSharedRecursive( transaction->commit(); } -std::optional DiskObjectStorage::tryReserve(UInt64 bytes) +bool DiskObjectStorage::tryReserve(UInt64 bytes) { std::lock_guard lock(reservation_mutex); auto available_space = getAvailableSpace(); - UInt64 unreserved_space = available_space - std::min(available_space, reserved_bytes); + if (!available_space) + { + ++reservation_count; + reserved_bytes += bytes; + return true; + } + + UInt64 unreserved_space = *available_space - std::min(*available_space, reserved_bytes); if (bytes == 0) { LOG_TRACE(log, "Reserved 0 bytes on remote disk {}", backQuote(name)); ++reservation_count; - return {unreserved_space}; + return true; } if (unreserved_space >= bytes) @@ -493,14 +500,14 @@ std::optional DiskObjectStorage::tryReserve(UInt64 bytes) ReadableSize(unreserved_space)); ++reservation_count; reserved_bytes += bytes; - return {unreserved_space - bytes}; + return true; } else { LOG_TRACE(log, "Could not reserve {} on remote disk {}. Not enough unreserved space", ReadableSize(bytes), backQuote(name)); } - return {}; + return false; } bool DiskObjectStorage::supportsCache() const diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.h b/src/Disks/ObjectStorages/DiskObjectStorage.h index 4372bc75950..2c544e01ca9 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.h +++ b/src/Disks/ObjectStorages/DiskObjectStorage.h @@ -53,11 +53,9 @@ public: const std::string & getCacheName() const override { return object_storage->getCacheName(); } - UInt64 getTotalSpace() const override { return std::numeric_limits::max(); } - - UInt64 getAvailableSpace() const override { return std::numeric_limits::max(); } - - UInt64 getUnreservedSpace() const override { return std::numeric_limits::max(); } + std::optional getTotalSpace() const override { return {}; } + std::optional getAvailableSpace() const override { return {}; } + std::optional getUnreservedSpace() const override { return {}; } UInt64 getKeepingFreeSpace() const override { return 0; } @@ -223,7 +221,7 @@ private: UInt64 reservation_count = 0; std::mutex reservation_mutex; - std::optional tryReserve(UInt64 bytes); + bool tryReserve(UInt64 bytes); const bool send_metadata; size_t threadpool_size; @@ -244,7 +242,7 @@ public: UInt64 getSize() const override { return size; } - UInt64 getUnreservedSpace() const override { return unreserved_space; } + std::optional getUnreservedSpace() const override { return unreserved_space; } DiskPtr getDisk(size_t i) const override; diff --git a/src/Disks/StoragePolicy.cpp b/src/Disks/StoragePolicy.cpp index f4be8b8fe86..92cca23ca76 100644 --- a/src/Disks/StoragePolicy.cpp +++ b/src/Disks/StoragePolicy.cpp @@ -211,7 +211,11 @@ UInt64 StoragePolicy::getMaxUnreservedFreeSpace() const { UInt64 res = 0; for (const auto & volume : volumes) - res = std::max(res, volume->getMaxUnreservedFreeSpace()); + { + auto max_unreserved_for_volume = volume->getMaxUnreservedFreeSpace(); + if (max_unreserved_for_volume) + res = std::max(res, *max_unreserved_for_volume); + } return res; } @@ -248,22 +252,37 @@ ReservationPtr StoragePolicy::reserveAndCheck(UInt64 bytes) const ReservationPtr StoragePolicy::makeEmptyReservationOnLargestDisk() const { UInt64 max_space = 0; + bool found_bottomless_disk = false; DiskPtr max_disk; + for (const auto & volume : volumes) { for (const auto & disk : volume->getDisks()) { - auto avail_space = disk->getAvailableSpace(); - if (avail_space > max_space) + auto available_space = disk->getAvailableSpace(); + + if (!available_space) { - max_space = avail_space; + max_disk = disk; + found_bottomless_disk = true; + break; + } + + if (*available_space > max_space) + { + max_space = *available_space; max_disk = disk; } } + + if (found_bottomless_disk) + break; } + if (!max_disk) throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "There is no space on any disk in storage policy: {}. " "It's likely all disks are broken", name); + auto reservation = max_disk->reserve(0); if (!reservation) { diff --git a/src/Disks/VolumeJBOD.cpp b/src/Disks/VolumeJBOD.cpp index 64bd2619665..885b1d56b0d 100644 --- a/src/Disks/VolumeJBOD.cpp +++ b/src/Disks/VolumeJBOD.cpp @@ -40,20 +40,28 @@ VolumeJBOD::VolumeJBOD( auto ratio = config.getDouble(config_prefix + ".max_data_part_size_ratio"); if (ratio < 0) throw Exception(ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG, "'max_data_part_size_ratio' have to be not less then 0."); + UInt64 sum_size = 0; std::vector sizes; for (const auto & disk : disks) { - sizes.push_back(disk->getTotalSpace()); - sum_size += sizes.back(); + auto size = disk->getTotalSpace(); + sizes.push_back(*size); + if (size) + sum_size += *size; + else + break; } - max_data_part_size = static_cast(sum_size * ratio / disks.size()); - for (size_t i = 0; i < disks.size(); ++i) + if (sizes.size() == disks.size()) { - if (sizes[i] < max_data_part_size) + max_data_part_size = static_cast(sum_size * ratio / disks.size()); + for (size_t i = 0; i < disks.size(); ++i) { - LOG_WARNING(logger, "Disk {} on volume {} have not enough space ({}) for containing part the size of max_data_part_size ({})", - backQuote(disks[i]->getName()), backQuote(config_prefix), ReadableSize(sizes[i]), ReadableSize(max_data_part_size)); + if (sizes[i] < max_data_part_size) + { + LOG_WARNING(logger, "Disk {} on volume {} have not enough space ({}) for containing part the size of max_data_part_size ({})", + backQuote(disks[i]->getName()), backQuote(config_prefix), ReadableSize(sizes[i]), ReadableSize(max_data_part_size)); + } } } } diff --git a/src/Disks/VolumeJBOD.h b/src/Disks/VolumeJBOD.h index ef6f215bf18..8d270a6c71c 100644 --- a/src/Disks/VolumeJBOD.h +++ b/src/Disks/VolumeJBOD.h @@ -68,7 +68,7 @@ private: struct DiskWithSize { DiskPtr disk; - uint64_t free_size = 0; + std::optional free_size = 0; DiskWithSize(DiskPtr disk_) : disk(disk_) @@ -80,7 +80,7 @@ private: return free_size < rhs.free_size; } - ReservationPtr reserve(uint64_t bytes) + ReservationPtr reserve(UInt64 bytes) { ReservationPtr reservation = disk->reserve(bytes); if (!reservation) diff --git a/src/Functions/filesystem.cpp b/src/Functions/filesystem.cpp index 1eb1c27211c..9fbf9b0cbe7 100644 --- a/src/Functions/filesystem.cpp +++ b/src/Functions/filesystem.cpp @@ -22,19 +22,19 @@ namespace struct FilesystemAvailable { static constexpr auto name = "filesystemAvailable"; - static std::uintmax_t get(const DiskPtr & disk) { return disk->getAvailableSpace(); } + static UInt64 get(const DiskPtr & disk) { return disk->getAvailableSpace().value_or(std::numeric_limits::max()); } }; struct FilesystemUnreserved { static constexpr auto name = "filesystemUnreserved"; - static std::uintmax_t get(const DiskPtr & disk) { return disk->getUnreservedSpace(); } + static UInt64 get(const DiskPtr & disk) { return disk->getUnreservedSpace().value_or(std::numeric_limits::max()); } }; struct FilesystemCapacity { static constexpr auto name = "filesystemCapacity"; - static std::uintmax_t get(const DiskPtr & disk) { return disk->getTotalSpace(); } + static UInt64 get(const DiskPtr & disk) { return disk->getTotalSpace().value_or(std::numeric_limits::max()); } }; template diff --git a/src/Interpreters/ServerAsynchronousMetrics.cpp b/src/Interpreters/ServerAsynchronousMetrics.cpp index e6e1a03f11c..0fbcfc9e6a1 100644 --- a/src/Interpreters/ServerAsynchronousMetrics.cpp +++ b/src/Interpreters/ServerAsynchronousMetrics.cpp @@ -191,14 +191,21 @@ void ServerAsynchronousMetrics::updateImpl(AsynchronousMetricValues & new_values auto available = disk->getAvailableSpace(); auto unreserved = disk->getUnreservedSpace(); - new_values[fmt::format("DiskTotal_{}", name)] = { total, - "The total size in bytes of the disk (virtual filesystem). Remote filesystems can show a large value like 16 EiB." }; - new_values[fmt::format("DiskUsed_{}", name)] = { total - available, - "Used bytes on the disk (virtual filesystem). Remote filesystems not always provide this information." }; - new_values[fmt::format("DiskAvailable_{}", name)] = { available, - "Available bytes on the disk (virtual filesystem). Remote filesystems can show a large value like 16 EiB." }; - new_values[fmt::format("DiskUnreserved_{}", name)] = { unreserved, - "Available bytes on the disk (virtual filesystem) without the reservations for merges, fetches, and moves. Remote filesystems can show a large value like 16 EiB." }; + new_values[fmt::format("DiskTotal_{}", name)] = { *total, + "The total size in bytes of the disk (virtual filesystem). Remote filesystems may not provide this information." }; + + if (available) + { + new_values[fmt::format("DiskUsed_{}", name)] = { *total - *available, + "Used bytes on the disk (virtual filesystem). Remote filesystems not always provide this information." }; + + new_values[fmt::format("DiskAvailable_{}", name)] = { *available, + "Available bytes on the disk (virtual filesystem). Remote filesystems may not provide this information." }; + } + + if (unreserved) + new_values[fmt::format("DiskUnreserved_{}", name)] = { *unreserved, + "Available bytes on the disk (virtual filesystem) without the reservations for merges, fetches, and moves. Remote filesystems may not provide this information." }; } } diff --git a/src/Storages/MergeTree/MergeTreePartsMover.cpp b/src/Storages/MergeTree/MergeTreePartsMover.cpp index e1da57744b3..391b04573d7 100644 --- a/src/Storages/MergeTree/MergeTreePartsMover.cpp +++ b/src/Storages/MergeTree/MergeTreePartsMover.cpp @@ -111,11 +111,15 @@ bool MergeTreePartsMover::selectPartsForMove( { for (const auto & disk : volumes[i]->getDisks()) { - UInt64 required_maximum_available_space = static_cast(disk->getTotalSpace() * policy->getMoveFactor()); - UInt64 unreserved_space = disk->getUnreservedSpace(); + auto total_space = disk->getTotalSpace(); + auto unreserved_space = disk->getUnreservedSpace(); + if (total_space && unreserved_space) + { + UInt64 required_maximum_available_space = static_cast(*total_space * policy->getMoveFactor()); - if (unreserved_space < required_maximum_available_space && !disk->isBroken()) - need_to_move.emplace(disk, required_maximum_available_space - unreserved_space); + if (*unreserved_space < required_maximum_available_space && !disk->isBroken()) + need_to_move.emplace(disk, required_maximum_available_space - *unreserved_space); + } } } } diff --git a/src/Storages/System/StorageSystemDisks.cpp b/src/Storages/System/StorageSystemDisks.cpp index 002da7abd14..23a00cc7ae5 100644 --- a/src/Storages/System/StorageSystemDisks.cpp +++ b/src/Storages/System/StorageSystemDisks.cpp @@ -64,9 +64,9 @@ Pipe StorageSystemDisks::read( { col_name->insert(disk_name); col_path->insert(disk_ptr->getPath()); - col_free->insert(disk_ptr->getAvailableSpace()); - col_total->insert(disk_ptr->getTotalSpace()); - col_unreserved->insert(disk_ptr->getUnreservedSpace()); + col_free->insert(disk_ptr->getAvailableSpace().value_or(std::numeric_limits::max())); + col_total->insert(disk_ptr->getTotalSpace().value_or(std::numeric_limits::max())); + col_unreserved->insert(disk_ptr->getUnreservedSpace().value_or(std::numeric_limits::max())); col_keep->insert(disk_ptr->getKeepingFreeSpace()); auto data_source_description = disk_ptr->getDataSourceDescription(); col_type->insert(toString(data_source_description.type)); From e1bf96a786be0883993d2d9e8a5d2c1fcd89095c Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Mon, 22 May 2023 14:29:15 +0800 Subject: [PATCH 026/230] finish dev --- src/Functions/geohashEncode.cpp | 76 ++++++++++++++++++++++++++++----- 1 file changed, 66 insertions(+), 10 deletions(-) diff --git a/src/Functions/geohashEncode.cpp b/src/Functions/geohashEncode.cpp index bc0c8b8fc5f..a05fa7fc8d6 100644 --- a/src/Functions/geohashEncode.cpp +++ b/src/Functions/geohashEncode.cpp @@ -37,7 +37,7 @@ public: bool isVariadic() const override { return true; } size_t getNumberOfArguments() const override { return 0; } - ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {2}; } + // ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {2}; } bool useDefaultImplementationForConstants() const override { return true; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } @@ -59,7 +59,50 @@ public: } template - bool tryExecute(const IColumn * lon_column, const IColumn * lat_column, UInt64 precision_value, ColumnPtr & result) const + bool tryVectorVector(const IColumn * lon_column, const IColumn * lat_column, const IColumn * precision_column, ColumnPtr & result) const + { + const ColumnVector * longitude = checkAndGetColumn>(lon_column); + const ColumnVector * latitude = checkAndGetColumn>(lat_column); + if (!latitude || !longitude) + return false; + + auto col_str = ColumnString::create(); + ColumnString::Chars & out_vec = col_str->getChars(); + ColumnString::Offsets & out_offsets = col_str->getOffsets(); + + const size_t size = lat_column->size(); + + out_offsets.resize(size); + out_vec.resize(size * (GEOHASH_MAX_TEXT_LENGTH + 1)); + + char * begin = reinterpret_cast(out_vec.data()); + char * pos = begin; + + for (size_t i = 0; i < size; ++i) + { + const Float64 longitude_value = longitude->getElement(i); + const Float64 latitude_value = latitude->getElement(i); + const UInt64 precision_value = std::min(precision_column->get64(i), GEOHASH_MAX_TEXT_LENGTH); + + const size_t encoded_size = geohashEncode(longitude_value, latitude_value, precision_value, pos); + + pos += encoded_size; + *pos = '\0'; + out_offsets[i] = ++pos - begin; + } + out_vec.resize(pos - begin); + + if (!out_offsets.empty() && out_offsets.back() != out_vec.size()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Column size mismatch (internal logical error)"); + + result = std::move(col_str); + + return true; + + } + + template + bool tryVectorConstant(const IColumn * lon_column, const IColumn * lat_column, UInt64 precision_value, ColumnPtr & result) const { const ColumnVector * longitude = checkAndGetColumn>(lon_column); const ColumnVector * latitude = checkAndGetColumn>(lat_column); @@ -105,16 +148,29 @@ public: const IColumn * longitude = arguments[0].column.get(); const IColumn * latitude = arguments[1].column.get(); - const UInt64 precision_value = std::min(GEOHASH_MAX_TEXT_LENGTH, - arguments.size() == 3 ? arguments[2].column->get64(0) : GEOHASH_MAX_TEXT_LENGTH); + if (arguments.size() < 3 || isColumnConst(*arguments[3].column)) + { + const UInt64 precision_value = std::min( + GEOHASH_MAX_TEXT_LENGTH, arguments.size() == 3 ? arguments[2].column->get64(0) : GEOHASH_MAX_TEXT_LENGTH); - ColumnPtr res_column; + ColumnPtr res_column; + if (tryVectorConstant(longitude, latitude, precision_value, res_column) + || tryVectorConstant(longitude, latitude, precision_value, res_column) + || tryVectorConstant(longitude, latitude, precision_value, res_column) + || tryVectorConstant(longitude, latitude, precision_value, res_column)) + return res_column; + } + else + { + const IColumn * precision = arguments[2].column.get(); + ColumnPtr res_column; + if (tryVectorVector(longitude, latitude, precision, res_column) + || tryVectorVector(longitude, latitude, precision, res_column) + || tryVectorVector(longitude, latitude, precision, res_column) + || tryVectorVector(longitude, latitude, precision, res_column)) + return res_column; - if (tryExecute(longitude, latitude, precision_value, res_column) || - tryExecute(longitude, latitude, precision_value, res_column) || - tryExecute(longitude, latitude, precision_value, res_column) || - tryExecute(longitude, latitude, precision_value, res_column)) - return res_column; + } std::string arguments_description; for (size_t i = 0; i < arguments.size(); ++i) From 1f91a75b5472f3f1321aac9a76c3078880ba5dc9 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Mon, 22 May 2023 15:52:58 +0800 Subject: [PATCH 027/230] fix bugs of geoHashEncode --- src/Functions/geohashEncode.cpp | 114 ++++++------------------------- src/Storages/HDFS/HDFSCommon.cpp | 4 +- 2 files changed, 22 insertions(+), 96 deletions(-) diff --git a/src/Functions/geohashEncode.cpp b/src/Functions/geohashEncode.cpp index a05fa7fc8d6..5f225a96c2b 100644 --- a/src/Functions/geohashEncode.cpp +++ b/src/Functions/geohashEncode.cpp @@ -4,6 +4,7 @@ #include #include +#include #include @@ -58,14 +59,25 @@ public: return std::make_shared(); } - template - bool tryVectorVector(const IColumn * lon_column, const IColumn * lat_column, const IColumn * precision_column, ColumnPtr & result) const + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { - const ColumnVector * longitude = checkAndGetColumn>(lon_column); - const ColumnVector * latitude = checkAndGetColumn>(lat_column); - if (!latitude || !longitude) - return false; + const IColumn * longitude = arguments[0].column.get(); + const IColumn * latitude = arguments[1].column.get(); + ColumnPtr precision; + if (arguments.size() < 3) + precision = DataTypeUInt8().createColumnConst(longitude->size(), GEOHASH_MAX_TEXT_LENGTH); + else + precision = arguments[2].column; + + ColumnPtr res_column; + vector(longitude, latitude, precision.get(), res_column); + return res_column; + } + +private: + void vector(const IColumn * lon_column, const IColumn * lat_column, const IColumn * precision_column, ColumnPtr & result) const + { auto col_str = ColumnString::create(); ColumnString::Chars & out_vec = col_str->getChars(); ColumnString::Offsets & out_offsets = col_str->getOffsets(); @@ -80,8 +92,8 @@ public: for (size_t i = 0; i < size; ++i) { - const Float64 longitude_value = longitude->getElement(i); - const Float64 latitude_value = latitude->getElement(i); + const Float64 longitude_value = lon_column->getFloat64(i); + const Float64 latitude_value = lat_column->getFloat64(i); const UInt64 precision_value = std::min(precision_column->get64(i), GEOHASH_MAX_TEXT_LENGTH); const size_t encoded_size = geohashEncode(longitude_value, latitude_value, precision_value, pos); @@ -96,92 +108,6 @@ public: throw Exception(ErrorCodes::LOGICAL_ERROR, "Column size mismatch (internal logical error)"); result = std::move(col_str); - - return true; - - } - - template - bool tryVectorConstant(const IColumn * lon_column, const IColumn * lat_column, UInt64 precision_value, ColumnPtr & result) const - { - const ColumnVector * longitude = checkAndGetColumn>(lon_column); - const ColumnVector * latitude = checkAndGetColumn>(lat_column); - if (!latitude || !longitude) - return false; - - auto col_str = ColumnString::create(); - ColumnString::Chars & out_vec = col_str->getChars(); - ColumnString::Offsets & out_offsets = col_str->getOffsets(); - - const size_t size = lat_column->size(); - - out_offsets.resize(size); - out_vec.resize(size * (GEOHASH_MAX_TEXT_LENGTH + 1)); - - char * begin = reinterpret_cast(out_vec.data()); - char * pos = begin; - - for (size_t i = 0; i < size; ++i) - { - const Float64 longitude_value = longitude->getElement(i); - const Float64 latitude_value = latitude->getElement(i); - - const size_t encoded_size = geohashEncode(longitude_value, latitude_value, precision_value, pos); - - pos += encoded_size; - *pos = '\0'; - out_offsets[i] = ++pos - begin; - } - out_vec.resize(pos - begin); - - if (!out_offsets.empty() && out_offsets.back() != out_vec.size()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Column size mismatch (internal logical error)"); - - result = std::move(col_str); - - return true; - - } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override - { - const IColumn * longitude = arguments[0].column.get(); - const IColumn * latitude = arguments[1].column.get(); - - if (arguments.size() < 3 || isColumnConst(*arguments[3].column)) - { - const UInt64 precision_value = std::min( - GEOHASH_MAX_TEXT_LENGTH, arguments.size() == 3 ? arguments[2].column->get64(0) : GEOHASH_MAX_TEXT_LENGTH); - - ColumnPtr res_column; - if (tryVectorConstant(longitude, latitude, precision_value, res_column) - || tryVectorConstant(longitude, latitude, precision_value, res_column) - || tryVectorConstant(longitude, latitude, precision_value, res_column) - || tryVectorConstant(longitude, latitude, precision_value, res_column)) - return res_column; - } - else - { - const IColumn * precision = arguments[2].column.get(); - ColumnPtr res_column; - if (tryVectorVector(longitude, latitude, precision, res_column) - || tryVectorVector(longitude, latitude, precision, res_column) - || tryVectorVector(longitude, latitude, precision, res_column) - || tryVectorVector(longitude, latitude, precision, res_column)) - return res_column; - - } - - std::string arguments_description; - for (size_t i = 0; i < arguments.size(); ++i) - { - if (i != 0) - arguments_description += ", "; - arguments_description += arguments[i].column->getName(); - } - - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Unsupported argument types: {} for function {}", - arguments_description, getName()); } }; diff --git a/src/Storages/HDFS/HDFSCommon.cpp b/src/Storages/HDFS/HDFSCommon.cpp index 932e80831fe..7b149518c0a 100644 --- a/src/Storages/HDFS/HDFSCommon.cpp +++ b/src/Storages/HDFS/HDFSCommon.cpp @@ -38,8 +38,8 @@ HDFSFileInfo::~HDFSFileInfo() } -void HDFSBuilderWrapper::loadFromConfig(const Poco::Util::AbstractConfiguration & config, - const String & prefix, bool isUser) +void HDFSBuilderWrapper::loadFromConfig( + const Poco::Util::AbstractConfiguration & config, const String & prefix, [[maybe_unused]] bool isUser) { Poco::Util::AbstractConfiguration::Keys keys; From 39806657711f933c7e0d0fa04e8cc0e8cd769eaa Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Mon, 22 May 2023 15:58:28 +0800 Subject: [PATCH 028/230] fix uts --- tests/queries/0_stateless/00932_geohash_support.reference | 4 ++++ tests/queries/0_stateless/00932_geohash_support.sql | 5 ++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/00932_geohash_support.reference b/tests/queries/0_stateless/00932_geohash_support.reference index ffc290681c7..0dcb084eb6d 100644 --- a/tests/queries/0_stateless/00932_geohash_support.reference +++ b/tests/queries/0_stateless/00932_geohash_support.reference @@ -9,6 +9,10 @@ default precision: ezs42d000000 mixing const and non-const-columns: ezs42d000000 +ezs42d000000 +ezs42d000000 +ezs42d000000 +ezs42d000000 from table (with const precision): 1 6 Ok 1 6 Ok diff --git a/tests/queries/0_stateless/00932_geohash_support.sql b/tests/queries/0_stateless/00932_geohash_support.sql index aeed72176b9..89f8eba9ca2 100644 --- a/tests/queries/0_stateless/00932_geohash_support.sql +++ b/tests/queries/0_stateless/00932_geohash_support.sql @@ -24,7 +24,10 @@ select geohashEncode(-5.60302734375, 42.593994140625); select 'mixing const and non-const-columns:'; select geohashEncode(materialize(-5.60302734375), materialize(42.593994140625), 0); -select geohashEncode(materialize(-5.60302734375), materialize(42.593994140625), materialize(0)); -- { serverError 44 } +select geohashEncode(materialize(-5.60302734375), materialize(42.593994140625), materialize(0)); +select geohashEncode(-5.60302734375, materialize(42.593994140625), 0); +select geohashEncode(materialize(-5.60302734375), 42.593994140625, 0); +select geohashEncode(-5.60302734375, 42.593994140625, 0); select 'from table (with const precision):'; From 056e5824b57a78314b7ae565585ef0afea1bd836 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Mon, 22 May 2023 16:02:42 +0800 Subject: [PATCH 029/230] remove useless code --- src/Functions/geohashEncode.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Functions/geohashEncode.cpp b/src/Functions/geohashEncode.cpp index 5f225a96c2b..ff61bf7d27c 100644 --- a/src/Functions/geohashEncode.cpp +++ b/src/Functions/geohashEncode.cpp @@ -38,7 +38,6 @@ public: bool isVariadic() const override { return true; } size_t getNumberOfArguments() const override { return 0; } - // ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {2}; } bool useDefaultImplementationForConstants() const override { return true; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } From 87907dafa7a8179382c98cb1718b58a002617e08 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Wed, 24 May 2023 14:27:37 +0800 Subject: [PATCH 030/230] fix code style --- src/Functions/geohashEncode.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Functions/geohashEncode.cpp b/src/Functions/geohashEncode.cpp index ff61bf7d27c..7c353b822aa 100644 --- a/src/Functions/geohashEncode.cpp +++ b/src/Functions/geohashEncode.cpp @@ -17,7 +17,6 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; - extern const int ILLEGAL_COLUMN; extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION; } From adfedb4df01bd0dcd2870df5f6b28b82017650a0 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 6 Jun 2023 14:46:34 +0200 Subject: [PATCH 031/230] Add USE NAMED COLLECTION access --- src/Access/Common/AccessRightsElement.cpp | 2 +- src/Access/Common/AccessType.h | 1 + .../ClickHouseDictionarySource.cpp | 2 +- src/Dictionaries/MySQLDictionarySource.cpp | 2 +- src/Storages/NamedCollectionsHelpers.cpp | 29 +++++++++++------ src/Storages/NamedCollectionsHelpers.h | 2 +- .../helpers/0_common_instance_users.xml | 6 +++- .../test_storage_s3/configs/access.xml | 19 +++++++++++ tests/integration/test_storage_s3/test.py | 32 +++++++++++++++---- 9 files changed, 74 insertions(+), 21 deletions(-) create mode 100644 tests/integration/test_storage_s3/configs/access.xml diff --git a/src/Access/Common/AccessRightsElement.cpp b/src/Access/Common/AccessRightsElement.cpp index e11d43634ec..835f414df37 100644 --- a/src/Access/Common/AccessRightsElement.cpp +++ b/src/Access/Common/AccessRightsElement.cpp @@ -155,7 +155,7 @@ namespace AccessRightsElement::AccessRightsElement(AccessFlags access_flags_, std::string_view database_) - : access_flags(access_flags_), database(database_), any_database(false) + : access_flags(access_flags_), database(database_), parameter(database_), any_database(false), any_parameter(false) { } diff --git a/src/Access/Common/AccessType.h b/src/Access/Common/AccessType.h index 6394c0279a7..6625ccb652b 100644 --- a/src/Access/Common/AccessType.h +++ b/src/Access/Common/AccessType.h @@ -142,6 +142,7 @@ enum class AccessType M(ACCESS_MANAGEMENT, "", GROUP, ALL) \ M(SHOW_NAMED_COLLECTIONS, "SHOW NAMED COLLECTIONS", NAMED_COLLECTION, NAMED_COLLECTION_CONTROL) \ M(SHOW_NAMED_COLLECTIONS_SECRETS, "SHOW NAMED COLLECTIONS SECRETS", NAMED_COLLECTION, NAMED_COLLECTION_CONTROL) \ + M(USE_NAMED_COLLECTION, "USE NAMED COLLECTION", NAMED_COLLECTION, NAMED_COLLECTION_CONTROL) \ M(NAMED_COLLECTION_CONTROL, "", NAMED_COLLECTION, ALL) \ \ M(SYSTEM_SHUTDOWN, "SYSTEM KILL, SHUTDOWN", GLOBAL, SYSTEM) \ diff --git a/src/Dictionaries/ClickHouseDictionarySource.cpp b/src/Dictionaries/ClickHouseDictionarySource.cpp index 65147ee664e..2dc7f6145b3 100644 --- a/src/Dictionaries/ClickHouseDictionarySource.cpp +++ b/src/Dictionaries/ClickHouseDictionarySource.cpp @@ -217,7 +217,7 @@ void registerDictionarySourceClickHouse(DictionarySourceFactory & factory) std::optional configuration; std::string settings_config_prefix = config_prefix + ".clickhouse"; - auto named_collection = created_from_ddl ? tryGetNamedCollectionWithOverrides(config, settings_config_prefix) : nullptr; + auto named_collection = created_from_ddl ? tryGetNamedCollectionWithOverrides(config, settings_config_prefix, global_context) : nullptr; if (named_collection) { diff --git a/src/Dictionaries/MySQLDictionarySource.cpp b/src/Dictionaries/MySQLDictionarySource.cpp index 730217f96b7..e61409e2b54 100644 --- a/src/Dictionaries/MySQLDictionarySource.cpp +++ b/src/Dictionaries/MySQLDictionarySource.cpp @@ -71,7 +71,7 @@ void registerDictionarySourceMysql(DictionarySourceFactory & factory) MySQLSettings mysql_settings; std::optional dictionary_configuration; - auto named_collection = created_from_ddl ? tryGetNamedCollectionWithOverrides(config, settings_config_prefix) : nullptr; + auto named_collection = created_from_ddl ? tryGetNamedCollectionWithOverrides(config, settings_config_prefix, global_context) : nullptr; if (named_collection) { auto allowed_arguments{dictionary_allowed_keys}; diff --git a/src/Storages/NamedCollectionsHelpers.cpp b/src/Storages/NamedCollectionsHelpers.cpp index 83128ab025a..efd5af29f48 100644 --- a/src/Storages/NamedCollectionsHelpers.cpp +++ b/src/Storages/NamedCollectionsHelpers.cpp @@ -1,4 +1,5 @@ #include "NamedCollectionsHelpers.h" +#include #include #include #include @@ -15,19 +16,16 @@ namespace ErrorCodes namespace { - NamedCollectionPtr tryGetNamedCollectionFromASTs(ASTs asts, bool throw_unknown_collection) + std::optional getCollectionName(ASTs asts) { if (asts.empty()) - return nullptr; + return std::nullopt; const auto * identifier = asts[0]->as(); if (!identifier) - return nullptr; + return std::nullopt; - const auto & collection_name = identifier->name(); - if (throw_unknown_collection) - return NamedCollectionFactory::instance().get(collection_name); - return NamedCollectionFactory::instance().tryGet(collection_name); + return identifier->name(); } std::optional>> getKeyValueFromAST(ASTPtr ast, bool fallback_to_ast_value, ContextPtr context) @@ -74,10 +72,21 @@ MutableNamedCollectionPtr tryGetNamedCollectionWithOverrides( NamedCollectionUtils::loadIfNot(); - auto collection = tryGetNamedCollectionFromASTs(asts, throw_unknown_collection); + auto collection_name = getCollectionName(asts); + if (!collection_name.has_value()) + return nullptr; + + NamedCollectionPtr collection; + if (throw_unknown_collection) + collection = NamedCollectionFactory::instance().get(*collection_name); + else + collection = NamedCollectionFactory::instance().tryGet(*collection_name); + if (!collection) return nullptr; + context->checkAccess(AccessType::USE_NAMED_COLLECTION, *collection_name); + auto collection_copy = collection->duplicate(); if (asts.size() == 1) @@ -106,12 +115,14 @@ MutableNamedCollectionPtr tryGetNamedCollectionWithOverrides( } MutableNamedCollectionPtr tryGetNamedCollectionWithOverrides( - const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix) + const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) { auto collection_name = config.getString(config_prefix + ".name", ""); if (collection_name.empty()) return nullptr; + context->checkAccess(AccessType::USE_NAMED_COLLECTION, collection_name); + const auto & collection = NamedCollectionFactory::instance().get(collection_name); auto collection_copy = collection->duplicate(); diff --git a/src/Storages/NamedCollectionsHelpers.h b/src/Storages/NamedCollectionsHelpers.h index 1473a3fbe48..15ed7c9e19b 100644 --- a/src/Storages/NamedCollectionsHelpers.h +++ b/src/Storages/NamedCollectionsHelpers.h @@ -22,7 +22,7 @@ MutableNamedCollectionPtr tryGetNamedCollectionWithOverrides( ASTs asts, ContextPtr context, bool throw_unknown_collection = true, std::vector> * complex_args = nullptr); /// Helper function to get named collection for dictionary source. /// Dictionaries have collection name as name argument of dict configuration and other arguments are overrides. -MutableNamedCollectionPtr tryGetNamedCollectionWithOverrides(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix); +MutableNamedCollectionPtr tryGetNamedCollectionWithOverrides(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context); HTTPHeaderEntries getHeadersFromNamedCollection(const NamedCollection & collection); diff --git a/tests/integration/helpers/0_common_instance_users.xml b/tests/integration/helpers/0_common_instance_users.xml index 3399ef5915a..6aae12400fd 100644 --- a/tests/integration/helpers/0_common_instance_users.xml +++ b/tests/integration/helpers/0_common_instance_users.xml @@ -1,7 +1,11 @@ - 1 + + GRANT ACCESS MANAGEMENT ON *.* WITH GRANT OPTION + GRANT ALL ON *.* WITH GRANT OPTION + GRANT USE NAMED COLLECTION ON * WITH GRANT OPTION + diff --git a/tests/integration/test_storage_s3/configs/access.xml b/tests/integration/test_storage_s3/configs/access.xml new file mode 100644 index 00000000000..8bded9104f6 --- /dev/null +++ b/tests/integration/test_storage_s3/configs/access.xml @@ -0,0 +1,19 @@ + + + + + default + default + + GRANT admin_role + + + + + + + GRANT USE NAMED COLLECTION ON * WITH GRANT OPTION + + + + diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index f983bd618e3..01dd4fd7856 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -55,6 +55,7 @@ def started_cluster(): "configs/named_collections.xml", "configs/schema_cache.xml", ], + user_configs=["configs/access.xml"], ) cluster.add_instance( "s3_max_redirects", @@ -921,22 +922,39 @@ def test_predefined_connection_configuration(started_cluster): instance = started_cluster.instances["dummy"] # type: ClickHouseInstance name = "test_table" - instance.query("drop table if exists {}".format(name)) + instance.query("CREATE USER user") + instance.query("GRANT CREATE ON *.* TO user") + instance.query("GRANT SOURCES ON *.* TO user") + instance.query("GRANT SELECT ON *.* TO user") + + instance.query(f"drop table if exists {name}", user="user") + error = instance.query_and_get_error( + f"CREATE TABLE {name} (id UInt32) ENGINE = S3(s3_conf1, format='CSV')" + ) + assert "To execute this query it's necessary to have grant USE NAMED COLLECTION ON s3_conf1" in error + error = instance.query_and_get_error( + f"CREATE TABLE {name} (id UInt32) ENGINE = S3(s3_conf1, format='CSV')", user="user" + ) + assert "To execute this query it's necessary to have grant USE NAMED COLLECTION ON s3_conf1" in error + + instance.query("GRANT USE NAMED COLLECTION ON s3_conf1 TO user", user="admin") instance.query( - "CREATE TABLE {} (id UInt32) ENGINE = S3(s3_conf1, format='CSV')".format(name) + f"CREATE TABLE {name} (id UInt32) ENGINE = S3(s3_conf1, format='CSV')", user="user" ) - instance.query("INSERT INTO {} SELECT number FROM numbers(10)".format(name)) - result = instance.query("SELECT * FROM {}".format(name)) + instance.query(f"INSERT INTO {name} SELECT number FROM numbers(10)") + result = instance.query(f"SELECT * FROM {name}") assert result == instance.query("SELECT number FROM numbers(10)") result = instance.query( - "SELECT * FROM s3(s3_conf1, format='CSV', structure='id UInt32')" + "SELECT * FROM s3(s3_conf1, format='CSV', structure='id UInt32')", user="user" ) assert result == instance.query("SELECT number FROM numbers(10)") - result = instance.query_and_get_error("SELECT * FROM s3(no_collection)") - assert "There is no named collection `no_collection`" in result + error = instance.query_and_get_error("SELECT * FROM s3(no_collection)") + assert "There is no named collection `no_collection`" in error + error = instance.query_and_get_error("SELECT * FROM s3(no_collection)", user="user") + assert "There is no named collection `no_collection`" in error result = "" From c6acdd7008e625907e2207a5b4cff554b3490a9d Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 6 Jun 2023 18:53:31 +0200 Subject: [PATCH 032/230] Fix fast test, fix black check --- tests/integration/test_storage_s3/test.py | 18 +++++++++++++----- .../01271_show_privileges.reference | 1 + .../02117_show_create_table_system.reference | 6 +++--- 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index 01dd4fd7856..f1cbd3366b4 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -931,15 +931,23 @@ def test_predefined_connection_configuration(started_cluster): error = instance.query_and_get_error( f"CREATE TABLE {name} (id UInt32) ENGINE = S3(s3_conf1, format='CSV')" ) - assert "To execute this query it's necessary to have grant USE NAMED COLLECTION ON s3_conf1" in error - error = instance.query_and_get_error( - f"CREATE TABLE {name} (id UInt32) ENGINE = S3(s3_conf1, format='CSV')", user="user" + assert ( + "To execute this query it's necessary to have grant USE NAMED COLLECTION ON s3_conf1" + in error + ) + error = instance.query_and_get_error( + f"CREATE TABLE {name} (id UInt32) ENGINE = S3(s3_conf1, format='CSV')", + user="user", + ) + assert ( + "To execute this query it's necessary to have grant USE NAMED COLLECTION ON s3_conf1" + in error ) - assert "To execute this query it's necessary to have grant USE NAMED COLLECTION ON s3_conf1" in error instance.query("GRANT USE NAMED COLLECTION ON s3_conf1 TO user", user="admin") instance.query( - f"CREATE TABLE {name} (id UInt32) ENGINE = S3(s3_conf1, format='CSV')", user="user" + f"CREATE TABLE {name} (id UInt32) ENGINE = S3(s3_conf1, format='CSV')", + user="user", ) instance.query(f"INSERT INTO {name} SELECT number FROM numbers(10)") diff --git a/tests/queries/0_stateless/01271_show_privileges.reference b/tests/queries/0_stateless/01271_show_privileges.reference index ec245d8b9e0..c32ac39a1f5 100644 --- a/tests/queries/0_stateless/01271_show_privileges.reference +++ b/tests/queries/0_stateless/01271_show_privileges.reference @@ -94,6 +94,7 @@ SHOW ACCESS [] \N ACCESS MANAGEMENT ACCESS MANAGEMENT [] \N ALL SHOW NAMED COLLECTIONS ['SHOW NAMED COLLECTIONS'] NAMED_COLLECTION NAMED COLLECTION CONTROL SHOW NAMED COLLECTIONS SECRETS ['SHOW NAMED COLLECTIONS SECRETS'] NAMED_COLLECTION NAMED COLLECTION CONTROL +USE NAMED COLLECTION ['USE NAMED COLLECTION'] NAMED_COLLECTION NAMED COLLECTION CONTROL NAMED COLLECTION CONTROL [] NAMED_COLLECTION ALL SYSTEM SHUTDOWN ['SYSTEM KILL','SHUTDOWN'] GLOBAL SYSTEM SYSTEM DROP DNS CACHE ['SYSTEM DROP DNS','DROP DNS CACHE','DROP DNS'] GLOBAL SYSTEM DROP CACHE diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index 09cc62dac00..9630767a552 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -297,7 +297,7 @@ CREATE TABLE system.grants ( `user_name` Nullable(String), `role_name` Nullable(String), - `access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION CONTROL' = 96, 'SYSTEM SHUTDOWN' = 97, 'SYSTEM DROP DNS CACHE' = 98, 'SYSTEM DROP MARK CACHE' = 99, 'SYSTEM DROP UNCOMPRESSED CACHE' = 100, 'SYSTEM DROP MMAP CACHE' = 101, 'SYSTEM DROP QUERY CACHE' = 102, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 103, 'SYSTEM DROP FILESYSTEM CACHE' = 104, 'SYSTEM DROP SCHEMA CACHE' = 105, 'SYSTEM DROP S3 CLIENT CACHE' = 106, 'SYSTEM DROP CACHE' = 107, 'SYSTEM RELOAD CONFIG' = 108, 'SYSTEM RELOAD USERS' = 109, 'SYSTEM RELOAD SYMBOLS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM DISTRIBUTED SENDS' = 121, 'SYSTEM REPLICATED SENDS' = 122, 'SYSTEM SENDS' = 123, 'SYSTEM REPLICATION QUEUES' = 124, 'SYSTEM DROP REPLICA' = 125, 'SYSTEM SYNC REPLICA' = 126, 'SYSTEM RESTART REPLICA' = 127, 'SYSTEM RESTORE REPLICA' = 128, 'SYSTEM WAIT LOADING PARTS' = 129, 'SYSTEM SYNC DATABASE REPLICA' = 130, 'SYSTEM SYNC TRANSACTION LOG' = 131, 'SYSTEM SYNC FILE CACHE' = 132, 'SYSTEM FLUSH DISTRIBUTED' = 133, 'SYSTEM FLUSH LOGS' = 134, 'SYSTEM FLUSH' = 135, 'SYSTEM THREAD FUZZER' = 136, 'SYSTEM UNFREEZE' = 137, 'SYSTEM FAILPOINT' = 138, 'SYSTEM' = 139, 'dictGet' = 140, 'displaySecretsInShowAndSelect' = 141, 'addressToLine' = 142, 'addressToLineWithInlines' = 143, 'addressToSymbol' = 144, 'demangle' = 145, 'INTROSPECTION' = 146, 'FILE' = 147, 'URL' = 148, 'REMOTE' = 149, 'MONGO' = 150, 'MEILISEARCH' = 151, 'MYSQL' = 152, 'POSTGRES' = 153, 'SQLITE' = 154, 'ODBC' = 155, 'JDBC' = 156, 'HDFS' = 157, 'S3' = 158, 'HIVE' = 159, 'SOURCES' = 160, 'CLUSTER' = 161, 'ALL' = 162, 'NONE' = 163), + `access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'USE NAMED COLLECTION' = 96, 'NAMED COLLECTION CONTROL' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD SYMBOLS' = 111, 'SYSTEM RELOAD DICTIONARY' = 112, 'SYSTEM RELOAD MODEL' = 113, 'SYSTEM RELOAD FUNCTION' = 114, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 115, 'SYSTEM RELOAD' = 116, 'SYSTEM RESTART DISK' = 117, 'SYSTEM MERGES' = 118, 'SYSTEM TTL MERGES' = 119, 'SYSTEM FETCHES' = 120, 'SYSTEM MOVES' = 121, 'SYSTEM DISTRIBUTED SENDS' = 122, 'SYSTEM REPLICATED SENDS' = 123, 'SYSTEM SENDS' = 124, 'SYSTEM REPLICATION QUEUES' = 125, 'SYSTEM DROP REPLICA' = 126, 'SYSTEM SYNC REPLICA' = 127, 'SYSTEM RESTART REPLICA' = 128, 'SYSTEM RESTORE REPLICA' = 129, 'SYSTEM WAIT LOADING PARTS' = 130, 'SYSTEM SYNC DATABASE REPLICA' = 131, 'SYSTEM SYNC TRANSACTION LOG' = 132, 'SYSTEM SYNC FILE CACHE' = 133, 'SYSTEM FLUSH DISTRIBUTED' = 134, 'SYSTEM FLUSH LOGS' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM' = 140, 'dictGet' = 141, 'displaySecretsInShowAndSelect' = 142, 'addressToLine' = 143, 'addressToLineWithInlines' = 144, 'addressToSymbol' = 145, 'demangle' = 146, 'INTROSPECTION' = 147, 'FILE' = 148, 'URL' = 149, 'REMOTE' = 150, 'MONGO' = 151, 'MEILISEARCH' = 152, 'MYSQL' = 153, 'POSTGRES' = 154, 'SQLITE' = 155, 'ODBC' = 156, 'JDBC' = 157, 'HDFS' = 158, 'S3' = 159, 'HIVE' = 160, 'SOURCES' = 161, 'CLUSTER' = 162, 'ALL' = 163, 'NONE' = 164), `database` Nullable(String), `table` Nullable(String), `column` Nullable(String), @@ -581,10 +581,10 @@ ENGINE = SystemPartsColumns COMMENT 'SYSTEM TABLE is built on the fly.' CREATE TABLE system.privileges ( - `privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION CONTROL' = 96, 'SYSTEM SHUTDOWN' = 97, 'SYSTEM DROP DNS CACHE' = 98, 'SYSTEM DROP MARK CACHE' = 99, 'SYSTEM DROP UNCOMPRESSED CACHE' = 100, 'SYSTEM DROP MMAP CACHE' = 101, 'SYSTEM DROP QUERY CACHE' = 102, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 103, 'SYSTEM DROP FILESYSTEM CACHE' = 104, 'SYSTEM DROP SCHEMA CACHE' = 105, 'SYSTEM DROP S3 CLIENT CACHE' = 106, 'SYSTEM DROP CACHE' = 107, 'SYSTEM RELOAD CONFIG' = 108, 'SYSTEM RELOAD USERS' = 109, 'SYSTEM RELOAD SYMBOLS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM DISTRIBUTED SENDS' = 121, 'SYSTEM REPLICATED SENDS' = 122, 'SYSTEM SENDS' = 123, 'SYSTEM REPLICATION QUEUES' = 124, 'SYSTEM DROP REPLICA' = 125, 'SYSTEM SYNC REPLICA' = 126, 'SYSTEM RESTART REPLICA' = 127, 'SYSTEM RESTORE REPLICA' = 128, 'SYSTEM WAIT LOADING PARTS' = 129, 'SYSTEM SYNC DATABASE REPLICA' = 130, 'SYSTEM SYNC TRANSACTION LOG' = 131, 'SYSTEM SYNC FILE CACHE' = 132, 'SYSTEM FLUSH DISTRIBUTED' = 133, 'SYSTEM FLUSH LOGS' = 134, 'SYSTEM FLUSH' = 135, 'SYSTEM THREAD FUZZER' = 136, 'SYSTEM UNFREEZE' = 137, 'SYSTEM FAILPOINT' = 138, 'SYSTEM' = 139, 'dictGet' = 140, 'displaySecretsInShowAndSelect' = 141, 'addressToLine' = 142, 'addressToLineWithInlines' = 143, 'addressToSymbol' = 144, 'demangle' = 145, 'INTROSPECTION' = 146, 'FILE' = 147, 'URL' = 148, 'REMOTE' = 149, 'MONGO' = 150, 'MEILISEARCH' = 151, 'MYSQL' = 152, 'POSTGRES' = 153, 'SQLITE' = 154, 'ODBC' = 155, 'JDBC' = 156, 'HDFS' = 157, 'S3' = 158, 'HIVE' = 159, 'SOURCES' = 160, 'CLUSTER' = 161, 'ALL' = 162, 'NONE' = 163), + `privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'USE NAMED COLLECTION' = 96, 'NAMED COLLECTION CONTROL' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD SYMBOLS' = 111, 'SYSTEM RELOAD DICTIONARY' = 112, 'SYSTEM RELOAD MODEL' = 113, 'SYSTEM RELOAD FUNCTION' = 114, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 115, 'SYSTEM RELOAD' = 116, 'SYSTEM RESTART DISK' = 117, 'SYSTEM MERGES' = 118, 'SYSTEM TTL MERGES' = 119, 'SYSTEM FETCHES' = 120, 'SYSTEM MOVES' = 121, 'SYSTEM DISTRIBUTED SENDS' = 122, 'SYSTEM REPLICATED SENDS' = 123, 'SYSTEM SENDS' = 124, 'SYSTEM REPLICATION QUEUES' = 125, 'SYSTEM DROP REPLICA' = 126, 'SYSTEM SYNC REPLICA' = 127, 'SYSTEM RESTART REPLICA' = 128, 'SYSTEM RESTORE REPLICA' = 129, 'SYSTEM WAIT LOADING PARTS' = 130, 'SYSTEM SYNC DATABASE REPLICA' = 131, 'SYSTEM SYNC TRANSACTION LOG' = 132, 'SYSTEM SYNC FILE CACHE' = 133, 'SYSTEM FLUSH DISTRIBUTED' = 134, 'SYSTEM FLUSH LOGS' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM' = 140, 'dictGet' = 141, 'displaySecretsInShowAndSelect' = 142, 'addressToLine' = 143, 'addressToLineWithInlines' = 144, 'addressToSymbol' = 145, 'demangle' = 146, 'INTROSPECTION' = 147, 'FILE' = 148, 'URL' = 149, 'REMOTE' = 150, 'MONGO' = 151, 'MEILISEARCH' = 152, 'MYSQL' = 153, 'POSTGRES' = 154, 'SQLITE' = 155, 'ODBC' = 156, 'JDBC' = 157, 'HDFS' = 158, 'S3' = 159, 'HIVE' = 160, 'SOURCES' = 161, 'CLUSTER' = 162, 'ALL' = 163, 'NONE' = 164), `aliases` Array(String), `level` Nullable(Enum8('GLOBAL' = 0, 'DATABASE' = 1, 'TABLE' = 2, 'DICTIONARY' = 3, 'VIEW' = 4, 'COLUMN' = 5, 'NAMED_COLLECTION' = 6)), - `parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION CONTROL' = 96, 'SYSTEM SHUTDOWN' = 97, 'SYSTEM DROP DNS CACHE' = 98, 'SYSTEM DROP MARK CACHE' = 99, 'SYSTEM DROP UNCOMPRESSED CACHE' = 100, 'SYSTEM DROP MMAP CACHE' = 101, 'SYSTEM DROP QUERY CACHE' = 102, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 103, 'SYSTEM DROP FILESYSTEM CACHE' = 104, 'SYSTEM DROP SCHEMA CACHE' = 105, 'SYSTEM DROP S3 CLIENT CACHE' = 106, 'SYSTEM DROP CACHE' = 107, 'SYSTEM RELOAD CONFIG' = 108, 'SYSTEM RELOAD USERS' = 109, 'SYSTEM RELOAD SYMBOLS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM DISTRIBUTED SENDS' = 121, 'SYSTEM REPLICATED SENDS' = 122, 'SYSTEM SENDS' = 123, 'SYSTEM REPLICATION QUEUES' = 124, 'SYSTEM DROP REPLICA' = 125, 'SYSTEM SYNC REPLICA' = 126, 'SYSTEM RESTART REPLICA' = 127, 'SYSTEM RESTORE REPLICA' = 128, 'SYSTEM WAIT LOADING PARTS' = 129, 'SYSTEM SYNC DATABASE REPLICA' = 130, 'SYSTEM SYNC TRANSACTION LOG' = 131, 'SYSTEM SYNC FILE CACHE' = 132, 'SYSTEM FLUSH DISTRIBUTED' = 133, 'SYSTEM FLUSH LOGS' = 134, 'SYSTEM FLUSH' = 135, 'SYSTEM THREAD FUZZER' = 136, 'SYSTEM UNFREEZE' = 137, 'SYSTEM FAILPOINT' = 138, 'SYSTEM' = 139, 'dictGet' = 140, 'displaySecretsInShowAndSelect' = 141, 'addressToLine' = 142, 'addressToLineWithInlines' = 143, 'addressToSymbol' = 144, 'demangle' = 145, 'INTROSPECTION' = 146, 'FILE' = 147, 'URL' = 148, 'REMOTE' = 149, 'MONGO' = 150, 'MEILISEARCH' = 151, 'MYSQL' = 152, 'POSTGRES' = 153, 'SQLITE' = 154, 'ODBC' = 155, 'JDBC' = 156, 'HDFS' = 157, 'S3' = 158, 'HIVE' = 159, 'SOURCES' = 160, 'CLUSTER' = 161, 'ALL' = 162, 'NONE' = 163)) + `parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'USE NAMED COLLECTION' = 96, 'NAMED COLLECTION CONTROL' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD SYMBOLS' = 111, 'SYSTEM RELOAD DICTIONARY' = 112, 'SYSTEM RELOAD MODEL' = 113, 'SYSTEM RELOAD FUNCTION' = 114, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 115, 'SYSTEM RELOAD' = 116, 'SYSTEM RESTART DISK' = 117, 'SYSTEM MERGES' = 118, 'SYSTEM TTL MERGES' = 119, 'SYSTEM FETCHES' = 120, 'SYSTEM MOVES' = 121, 'SYSTEM DISTRIBUTED SENDS' = 122, 'SYSTEM REPLICATED SENDS' = 123, 'SYSTEM SENDS' = 124, 'SYSTEM REPLICATION QUEUES' = 125, 'SYSTEM DROP REPLICA' = 126, 'SYSTEM SYNC REPLICA' = 127, 'SYSTEM RESTART REPLICA' = 128, 'SYSTEM RESTORE REPLICA' = 129, 'SYSTEM WAIT LOADING PARTS' = 130, 'SYSTEM SYNC DATABASE REPLICA' = 131, 'SYSTEM SYNC TRANSACTION LOG' = 132, 'SYSTEM SYNC FILE CACHE' = 133, 'SYSTEM FLUSH DISTRIBUTED' = 134, 'SYSTEM FLUSH LOGS' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM' = 140, 'dictGet' = 141, 'displaySecretsInShowAndSelect' = 142, 'addressToLine' = 143, 'addressToLineWithInlines' = 144, 'addressToSymbol' = 145, 'demangle' = 146, 'INTROSPECTION' = 147, 'FILE' = 148, 'URL' = 149, 'REMOTE' = 150, 'MONGO' = 151, 'MEILISEARCH' = 152, 'MYSQL' = 153, 'POSTGRES' = 154, 'SQLITE' = 155, 'ODBC' = 156, 'JDBC' = 157, 'HDFS' = 158, 'S3' = 159, 'HIVE' = 160, 'SOURCES' = 161, 'CLUSTER' = 162, 'ALL' = 163, 'NONE' = 164)) ) ENGINE = SystemPrivileges COMMENT 'SYSTEM TABLE is built on the fly.' From 6c48aba69392b68c08a4105d6c5ebd9dbf9392c1 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 8 Jun 2023 20:30:34 +0200 Subject: [PATCH 033/230] Fix tests --- tests/integration/helpers/0_common_instance_users.xml | 6 +----- .../test_dictionaries_mysql/configs/users.xml | 1 + tests/integration/test_dictionaries_mysql/test.py | 3 ++- .../test_dictionaries_postgresql/configs/users.xml | 10 ++++++++++ .../test_mysql_database_engine/configs/users.xml | 9 +++++++++ tests/integration/test_mysql_database_engine/test.py | 1 + .../configs/users.d/0a_users_no_default_access.xml | 9 +++++++++ .../test_postgresql_database_engine/configs/users.xml | 9 +++++++++ .../test_postgresql_database_engine/test.py | 2 +- .../configs/users.xml | 7 +++++++ .../test_redirect_url_storage/configs/users.xml | 9 +++++++++ tests/integration/test_redirect_url_storage/test.py | 1 + .../integration/test_storage_mongodb/configs/users.xml | 9 +++++++++ tests/integration/test_storage_mongodb/test.py | 1 + tests/integration/test_storage_mysql/configs/users.xml | 1 + tests/integration/test_storage_mysql/test.py | 1 + .../test_storage_postgresql/configs/users.xml | 9 +++++++++ tests/integration/test_storage_postgresql/test.py | 2 +- tests/integration/test_storage_s3/configs/users.xml | 9 +++++++++ tests/integration/test_storage_s3/test.py | 2 +- .../test_table_function_mongodb/configs/users.xml | 9 +++++++++ tests/integration/test_table_function_mongodb/test.py | 1 + 22 files changed, 102 insertions(+), 9 deletions(-) create mode 100644 tests/integration/test_dictionaries_postgresql/configs/users.xml create mode 100644 tests/integration/test_mysql_database_engine/configs/users.xml create mode 100644 tests/integration/test_named_collections/configs/users.d/0a_users_no_default_access.xml create mode 100644 tests/integration/test_postgresql_database_engine/configs/users.xml create mode 100644 tests/integration/test_redirect_url_storage/configs/users.xml create mode 100644 tests/integration/test_storage_mongodb/configs/users.xml create mode 100644 tests/integration/test_storage_postgresql/configs/users.xml create mode 100644 tests/integration/test_storage_s3/configs/users.xml create mode 100644 tests/integration/test_table_function_mongodb/configs/users.xml diff --git a/tests/integration/helpers/0_common_instance_users.xml b/tests/integration/helpers/0_common_instance_users.xml index 6aae12400fd..3399ef5915a 100644 --- a/tests/integration/helpers/0_common_instance_users.xml +++ b/tests/integration/helpers/0_common_instance_users.xml @@ -1,11 +1,7 @@ - - GRANT ACCESS MANAGEMENT ON *.* WITH GRANT OPTION - GRANT ALL ON *.* WITH GRANT OPTION - GRANT USE NAMED COLLECTION ON * WITH GRANT OPTION - + 1 diff --git a/tests/integration/test_dictionaries_mysql/configs/users.xml b/tests/integration/test_dictionaries_mysql/configs/users.xml index 4555a2ed494..70c7d3bc2c1 100644 --- a/tests/integration/test_dictionaries_mysql/configs/users.xml +++ b/tests/integration/test_dictionaries_mysql/configs/users.xml @@ -12,6 +12,7 @@ default default + 1 diff --git a/tests/integration/test_dictionaries_mysql/test.py b/tests/integration/test_dictionaries_mysql/test.py index a12139a0bea..8252a2fd514 100644 --- a/tests/integration/test_dictionaries_mysql/test.py +++ b/tests/integration/test_dictionaries_mysql/test.py @@ -8,9 +8,10 @@ import logging DICTS = ["configs/dictionaries/mysql_dict1.xml", "configs/dictionaries/mysql_dict2.xml"] CONFIG_FILES = ["configs/remote_servers.xml", "configs/named_collections.xml"] +USER_CONFIGS = ["configs/users.xml"] cluster = ClickHouseCluster(__file__) instance = cluster.add_instance( - "instance", main_configs=CONFIG_FILES, with_mysql=True, dictionaries=DICTS + "instance", main_configs=CONFIG_FILES, user_configs=USER_CONFIGS, with_mysql=True, dictionaries=DICTS ) create_table_mysql_template = """ diff --git a/tests/integration/test_dictionaries_postgresql/configs/users.xml b/tests/integration/test_dictionaries_postgresql/configs/users.xml new file mode 100644 index 00000000000..beb08eb6ed4 --- /dev/null +++ b/tests/integration/test_dictionaries_postgresql/configs/users.xml @@ -0,0 +1,10 @@ + + + + + default + default + 1 + + + diff --git a/tests/integration/test_mysql_database_engine/configs/users.xml b/tests/integration/test_mysql_database_engine/configs/users.xml new file mode 100644 index 00000000000..4b6ba057ecb --- /dev/null +++ b/tests/integration/test_mysql_database_engine/configs/users.xml @@ -0,0 +1,9 @@ + + + + + default + 1 + + + diff --git a/tests/integration/test_mysql_database_engine/test.py b/tests/integration/test_mysql_database_engine/test.py index 52a7b319551..18dde5307fd 100644 --- a/tests/integration/test_mysql_database_engine/test.py +++ b/tests/integration/test_mysql_database_engine/test.py @@ -12,6 +12,7 @@ cluster = ClickHouseCluster(__file__) clickhouse_node = cluster.add_instance( "node1", main_configs=["configs/remote_servers.xml", "configs/named_collections.xml"], + user_configs=["configs/users.xml"], with_mysql=True, stay_alive=True, ) diff --git a/tests/integration/test_named_collections/configs/users.d/0a_users_no_default_access.xml b/tests/integration/test_named_collections/configs/users.d/0a_users_no_default_access.xml new file mode 100644 index 00000000000..b8f38f04ca9 --- /dev/null +++ b/tests/integration/test_named_collections/configs/users.d/0a_users_no_default_access.xml @@ -0,0 +1,9 @@ + + + + + default + default + + + diff --git a/tests/integration/test_postgresql_database_engine/configs/users.xml b/tests/integration/test_postgresql_database_engine/configs/users.xml new file mode 100644 index 00000000000..4b6ba057ecb --- /dev/null +++ b/tests/integration/test_postgresql_database_engine/configs/users.xml @@ -0,0 +1,9 @@ + + + + + default + 1 + + + diff --git a/tests/integration/test_postgresql_database_engine/test.py b/tests/integration/test_postgresql_database_engine/test.py index d9f06f0295b..68e6f444f73 100644 --- a/tests/integration/test_postgresql_database_engine/test.py +++ b/tests/integration/test_postgresql_database_engine/test.py @@ -8,7 +8,7 @@ from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance( - "node1", main_configs=["configs/named_collections.xml"], with_postgres=True + "node1", main_configs=["configs/named_collections.xml"], user_configs=["configs/users.xml"], with_postgres=True ) postgres_table_template = """ diff --git a/tests/integration/test_postgresql_replica_database_engine_2/configs/users.xml b/tests/integration/test_postgresql_replica_database_engine_2/configs/users.xml index 26ea20e012f..e0c51962193 100644 --- a/tests/integration/test_postgresql_replica_database_engine_2/configs/users.xml +++ b/tests/integration/test_postgresql_replica_database_engine_2/configs/users.xml @@ -4,4 +4,11 @@ 1 + + + + default + 1 + + diff --git a/tests/integration/test_redirect_url_storage/configs/users.xml b/tests/integration/test_redirect_url_storage/configs/users.xml new file mode 100644 index 00000000000..4b6ba057ecb --- /dev/null +++ b/tests/integration/test_redirect_url_storage/configs/users.xml @@ -0,0 +1,9 @@ + + + + + default + 1 + + + diff --git a/tests/integration/test_redirect_url_storage/test.py b/tests/integration/test_redirect_url_storage/test.py index b2178655444..225a34c9109 100644 --- a/tests/integration/test_redirect_url_storage/test.py +++ b/tests/integration/test_redirect_url_storage/test.py @@ -9,6 +9,7 @@ cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance( "node1", main_configs=["configs/named_collections.xml"], + user_configs=["configs/user.xml"], with_zookeeper=False, with_hdfs=True, ) diff --git a/tests/integration/test_storage_mongodb/configs/users.xml b/tests/integration/test_storage_mongodb/configs/users.xml new file mode 100644 index 00000000000..4b6ba057ecb --- /dev/null +++ b/tests/integration/test_storage_mongodb/configs/users.xml @@ -0,0 +1,9 @@ + + + + + default + 1 + + + diff --git a/tests/integration/test_storage_mongodb/test.py b/tests/integration/test_storage_mongodb/test.py index 6ba5520704d..174ad908d60 100644 --- a/tests/integration/test_storage_mongodb/test.py +++ b/tests/integration/test_storage_mongodb/test.py @@ -17,6 +17,7 @@ def started_cluster(request): "configs_secure/config.d/ssl_conf.xml", "configs/named_collections.xml", ], + user_configs=["configs/users.xml"], with_mongo=True, with_mongo_secure=request.param, ) diff --git a/tests/integration/test_storage_mysql/configs/users.xml b/tests/integration/test_storage_mysql/configs/users.xml index d030ccb0e72..a11985dd113 100644 --- a/tests/integration/test_storage_mysql/configs/users.xml +++ b/tests/integration/test_storage_mysql/configs/users.xml @@ -12,6 +12,7 @@ ::/0 default + 1 diff --git a/tests/integration/test_storage_mysql/test.py b/tests/integration/test_storage_mysql/test.py index 49629575ec7..3e3132949e7 100644 --- a/tests/integration/test_storage_mysql/test.py +++ b/tests/integration/test_storage_mysql/test.py @@ -13,6 +13,7 @@ cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance( "node1", main_configs=["configs/remote_servers.xml", "configs/named_collections.xml"], + user_configs=["configs/users.xml"], with_mysql=True, ) node2 = cluster.add_instance( diff --git a/tests/integration/test_storage_postgresql/configs/users.xml b/tests/integration/test_storage_postgresql/configs/users.xml new file mode 100644 index 00000000000..4b6ba057ecb --- /dev/null +++ b/tests/integration/test_storage_postgresql/configs/users.xml @@ -0,0 +1,9 @@ + + + + + default + 1 + + + diff --git a/tests/integration/test_storage_postgresql/test.py b/tests/integration/test_storage_postgresql/test.py index d60a90ed7ce..2ce1bac3cff 100644 --- a/tests/integration/test_storage_postgresql/test.py +++ b/tests/integration/test_storage_postgresql/test.py @@ -7,7 +7,7 @@ from helpers.postgres_utility import get_postgres_conn cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance( - "node1", main_configs=["configs/named_collections.xml"], with_postgres=True + "node1", main_configs=["configs/named_collections.xml"], user_configs=["configs/users.xml"], with_postgres=True ) node2 = cluster.add_instance( "node2", diff --git a/tests/integration/test_storage_s3/configs/users.xml b/tests/integration/test_storage_s3/configs/users.xml new file mode 100644 index 00000000000..4b6ba057ecb --- /dev/null +++ b/tests/integration/test_storage_s3/configs/users.xml @@ -0,0 +1,9 @@ + + + + + default + 1 + + + diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index f1cbd3366b4..75473f3c406 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -55,7 +55,7 @@ def started_cluster(): "configs/named_collections.xml", "configs/schema_cache.xml", ], - user_configs=["configs/access.xml"], + user_configs=["configs/access.xml", "configs/users.xml"], ) cluster.add_instance( "s3_max_redirects", diff --git a/tests/integration/test_table_function_mongodb/configs/users.xml b/tests/integration/test_table_function_mongodb/configs/users.xml new file mode 100644 index 00000000000..4b6ba057ecb --- /dev/null +++ b/tests/integration/test_table_function_mongodb/configs/users.xml @@ -0,0 +1,9 @@ + + + + + default + 1 + + + diff --git a/tests/integration/test_table_function_mongodb/test.py b/tests/integration/test_table_function_mongodb/test.py index e0ad71b0079..3b6ace9d11b 100644 --- a/tests/integration/test_table_function_mongodb/test.py +++ b/tests/integration/test_table_function_mongodb/test.py @@ -16,6 +16,7 @@ def started_cluster(request): main_configs=[ "configs_secure/config.d/ssl_conf.xml", ], + user_configs=["configs/users.xml"], with_mongo_secure=request.param, ) cluster.start() From 5cf29fbf762e0efc51142afb3396a16414c121fc Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 9 Jun 2023 13:13:33 +0200 Subject: [PATCH 034/230] Fix black check --- tests/integration/test_dictionaries_mysql/test.py | 6 +++++- tests/integration/test_postgresql_database_engine/test.py | 5 ++++- tests/integration/test_storage_postgresql/test.py | 5 ++++- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/tests/integration/test_dictionaries_mysql/test.py b/tests/integration/test_dictionaries_mysql/test.py index 8252a2fd514..ee0d957b8a9 100644 --- a/tests/integration/test_dictionaries_mysql/test.py +++ b/tests/integration/test_dictionaries_mysql/test.py @@ -11,7 +11,11 @@ CONFIG_FILES = ["configs/remote_servers.xml", "configs/named_collections.xml"] USER_CONFIGS = ["configs/users.xml"] cluster = ClickHouseCluster(__file__) instance = cluster.add_instance( - "instance", main_configs=CONFIG_FILES, user_configs=USER_CONFIGS, with_mysql=True, dictionaries=DICTS + "instance", + main_configs=CONFIG_FILES, + user_configs=USER_CONFIGS, + with_mysql=True, + dictionaries=DICTS, ) create_table_mysql_template = """ diff --git a/tests/integration/test_postgresql_database_engine/test.py b/tests/integration/test_postgresql_database_engine/test.py index 68e6f444f73..59a464f9020 100644 --- a/tests/integration/test_postgresql_database_engine/test.py +++ b/tests/integration/test_postgresql_database_engine/test.py @@ -8,7 +8,10 @@ from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance( - "node1", main_configs=["configs/named_collections.xml"], user_configs=["configs/users.xml"], with_postgres=True + "node1", + main_configs=["configs/named_collections.xml"], + user_configs=["configs/users.xml"], + with_postgres=True, ) postgres_table_template = """ diff --git a/tests/integration/test_storage_postgresql/test.py b/tests/integration/test_storage_postgresql/test.py index 2ce1bac3cff..0c8fc597b5c 100644 --- a/tests/integration/test_storage_postgresql/test.py +++ b/tests/integration/test_storage_postgresql/test.py @@ -7,7 +7,10 @@ from helpers.postgres_utility import get_postgres_conn cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance( - "node1", main_configs=["configs/named_collections.xml"], user_configs=["configs/users.xml"], with_postgres=True + "node1", + main_configs=["configs/named_collections.xml"], + user_configs=["configs/users.xml"], + with_postgres=True, ) node2 = cluster.add_instance( "node2", From 7bd1c183ebe535ec3f8799e82d73f9b064c967c8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 10 Jun 2023 13:16:02 +0300 Subject: [PATCH 035/230] Update test.py --- tests/integration/test_attach_table_normalizer/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_attach_table_normalizer/test.py b/tests/integration/test_attach_table_normalizer/test.py index ddbb02bf4ef..ba0068e9c59 100644 --- a/tests/integration/test_attach_table_normalizer/test.py +++ b/tests/integration/test_attach_table_normalizer/test.py @@ -24,7 +24,7 @@ def replace_substring_to_substr(node): "-c", "sed -i 's/substring/substr/g' /var/lib/clickhouse/metadata/default/file.sql", ], - user="root", + user="root" ) From 6c776f4483382afa395bb5929e1b1351468795ec Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 13 Jun 2023 12:40:53 +0200 Subject: [PATCH 036/230] Better --- src/Access/Common/AccessType.h | 14 ++++++------- src/Access/UsersConfigAccessStorage.cpp | 2 +- src/Storages/NamedCollectionsHelpers.cpp | 4 ++-- tests/integration/test_storage_s3/test.py | 25 ++++++++++++++++++++--- 4 files changed, 32 insertions(+), 13 deletions(-) diff --git a/src/Access/Common/AccessType.h b/src/Access/Common/AccessType.h index 3a94a5037b2..16ee5177d66 100644 --- a/src/Access/Common/AccessType.h +++ b/src/Access/Common/AccessType.h @@ -70,7 +70,7 @@ enum class AccessType M(ALTER_FREEZE_PARTITION, "FREEZE PARTITION, UNFREEZE", TABLE, ALTER_TABLE) \ \ M(ALTER_DATABASE_SETTINGS, "ALTER DATABASE SETTING, ALTER MODIFY DATABASE SETTING, MODIFY DATABASE SETTING", DATABASE, ALTER_DATABASE) /* allows to execute ALTER MODIFY SETTING */\ - M(ALTER_NAMED_COLLECTION, "", NAMED_COLLECTION, NAMED_COLLECTION_CONTROL) /* allows to execute ALTER NAMED COLLECTION */\ + M(ALTER_NAMED_COLLECTION, "", NAMED_COLLECTION, NAMED_COLLECTION_ADMIN) /* allows to execute ALTER NAMED COLLECTION */\ \ M(ALTER_TABLE, "", GROUP, ALTER) \ M(ALTER_DATABASE, "", GROUP, ALTER) \ @@ -92,7 +92,7 @@ enum class AccessType M(CREATE_ARBITRARY_TEMPORARY_TABLE, "", GLOBAL, CREATE) /* allows to create and manipulate temporary tables with arbitrary table engine */\ M(CREATE_FUNCTION, "", GLOBAL, CREATE) /* allows to execute CREATE FUNCTION */ \ - M(CREATE_NAMED_COLLECTION, "", NAMED_COLLECTION, NAMED_COLLECTION_CONTROL) /* allows to execute CREATE NAMED COLLECTION */ \ + M(CREATE_NAMED_COLLECTION, "", NAMED_COLLECTION, NAMED_COLLECTION_ADMIN) /* allows to execute CREATE NAMED COLLECTION */ \ M(CREATE, "", GROUP, ALL) /* allows to execute {CREATE|ATTACH} */ \ \ M(DROP_DATABASE, "", DATABASE, DROP) /* allows to execute {DROP|DETACH} DATABASE */\ @@ -101,7 +101,7 @@ enum class AccessType implicitly enabled by the grant DROP_TABLE */\ M(DROP_DICTIONARY, "", DICTIONARY, DROP) /* allows to execute {DROP|DETACH} DICTIONARY */\ M(DROP_FUNCTION, "", GLOBAL, DROP) /* allows to execute DROP FUNCTION */\ - M(DROP_NAMED_COLLECTION, "", NAMED_COLLECTION, NAMED_COLLECTION_CONTROL) /* allows to execute DROP NAMED COLLECTION */\ + M(DROP_NAMED_COLLECTION, "", NAMED_COLLECTION, NAMED_COLLECTION_ADMIN) /* allows to execute DROP NAMED COLLECTION */\ M(DROP, "", GROUP, ALL) /* allows to execute {DROP|DETACH} */\ \ M(UNDROP_TABLE, "", TABLE, ALL) /* allows to execute {UNDROP} TABLE */\ @@ -140,10 +140,10 @@ enum class AccessType M(SHOW_SETTINGS_PROFILES, "SHOW PROFILES, SHOW CREATE SETTINGS PROFILE, SHOW CREATE PROFILE", GLOBAL, SHOW_ACCESS) \ M(SHOW_ACCESS, "", GROUP, ACCESS_MANAGEMENT) \ M(ACCESS_MANAGEMENT, "", GROUP, ALL) \ - M(SHOW_NAMED_COLLECTIONS, "SHOW NAMED COLLECTIONS", NAMED_COLLECTION, NAMED_COLLECTION_CONTROL) \ - M(SHOW_NAMED_COLLECTIONS_SECRETS, "SHOW NAMED COLLECTIONS SECRETS", NAMED_COLLECTION, NAMED_COLLECTION_CONTROL) \ - M(USE_NAMED_COLLECTION, "USE NAMED COLLECTION", NAMED_COLLECTION, NAMED_COLLECTION_CONTROL) \ - M(NAMED_COLLECTION_CONTROL, "", NAMED_COLLECTION, ALL) \ + M(SHOW_NAMED_COLLECTIONS, "SHOW NAMED COLLECTIONS", NAMED_COLLECTION, NAMED_COLLECTION_ADMIN) \ + M(SHOW_NAMED_COLLECTIONS_SECRETS, "SHOW NAMED COLLECTIONS SECRETS", NAMED_COLLECTION, NAMED_COLLECTION_ADMIN) \ + M(USE_NAMED_COLLECTION, "NAMED COLLECTION USAGE, NAMED COLLECTION", NAMED_COLLECTION, NAMED_COLLECTION_ADMIN) \ + M(NAMED_COLLECTION_ADMIN, "NAMED COLLECTION CONTROL", NAMED_COLLECTION, ALL) \ \ M(SYSTEM_SHUTDOWN, "SYSTEM KILL, SHUTDOWN", GLOBAL, SYSTEM) \ M(SYSTEM_DROP_DNS_CACHE, "SYSTEM DROP DNS, DROP DNS CACHE, DROP DNS", GLOBAL, SYSTEM_DROP_CACHE) \ diff --git a/src/Access/UsersConfigAccessStorage.cpp b/src/Access/UsersConfigAccessStorage.cpp index 187258d0fcd..15765045c97 100644 --- a/src/Access/UsersConfigAccessStorage.cpp +++ b/src/Access/UsersConfigAccessStorage.cpp @@ -328,7 +328,7 @@ namespace if (!named_collection_control) { - user->access.revoke(AccessType::NAMED_COLLECTION_CONTROL); + user->access.revoke(AccessType::NAMED_COLLECTION_ADMIN); } if (!show_named_collections_secrets) diff --git a/src/Storages/NamedCollectionsHelpers.cpp b/src/Storages/NamedCollectionsHelpers.cpp index efd5af29f48..29d47e131a6 100644 --- a/src/Storages/NamedCollectionsHelpers.cpp +++ b/src/Storages/NamedCollectionsHelpers.cpp @@ -76,6 +76,8 @@ MutableNamedCollectionPtr tryGetNamedCollectionWithOverrides( if (!collection_name.has_value()) return nullptr; + context->checkAccess(AccessType::USE_NAMED_COLLECTION, *collection_name); + NamedCollectionPtr collection; if (throw_unknown_collection) collection = NamedCollectionFactory::instance().get(*collection_name); @@ -85,8 +87,6 @@ MutableNamedCollectionPtr tryGetNamedCollectionWithOverrides( if (!collection) return nullptr; - context->checkAccess(AccessType::USE_NAMED_COLLECTION, *collection_name); - auto collection_copy = collection->duplicate(); if (asts.size() == 1) diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index 75473f3c406..28117d694d6 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -57,6 +57,16 @@ def started_cluster(): ], user_configs=["configs/access.xml", "configs/users.xml"], ) + cluster.add_instance( + "dummy_without_named_collections", + with_minio=True, + main_configs=[ + "configs/defaultS3.xml", + "configs/named_collections.xml", + "configs/schema_cache.xml", + ], + user_configs=["configs/access.xml"], + ) cluster.add_instance( "s3_max_redirects", with_minio=True, @@ -919,7 +929,7 @@ def test_truncate_table(started_cluster): def test_predefined_connection_configuration(started_cluster): bucket = started_cluster.minio_bucket - instance = started_cluster.instances["dummy"] # type: ClickHouseInstance + instance = started_cluster.instances["dummy_without_named_collections"] # type: ClickHouseInstance name = "test_table" instance.query("CREATE USER user") @@ -944,7 +954,7 @@ def test_predefined_connection_configuration(started_cluster): in error ) - instance.query("GRANT USE NAMED COLLECTION ON s3_conf1 TO user", user="admin") + instance.query("GRANT NAMED COLLECTION ON s3_conf1 TO user", user="admin") instance.query( f"CREATE TABLE {name} (id UInt32) ENGINE = S3(s3_conf1, format='CSV')", user="user", @@ -960,8 +970,17 @@ def test_predefined_connection_configuration(started_cluster): assert result == instance.query("SELECT number FROM numbers(10)") error = instance.query_and_get_error("SELECT * FROM s3(no_collection)") - assert "There is no named collection `no_collection`" in error + assert ( + "To execute this query it's necessary to have grant USE NAMED COLLECTION ON no_collection" + in error + ) error = instance.query_and_get_error("SELECT * FROM s3(no_collection)", user="user") + assert ( + "To execute this query it's necessary to have grant USE NAMED COLLECTION ON no_collection" + in error + ) + instance = started_cluster.instances["dummy"] # has named collection access + error = instance.query_and_get_error("SELECT * FROM s3(no_collection)") assert "There is no named collection `no_collection`" in error From 62b94073a2c79f3f336f62ad359e2789541dbdd7 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 13 Jun 2023 13:32:56 +0200 Subject: [PATCH 037/230] Fix black check --- tests/integration/test_storage_s3/test.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index 28117d694d6..cec92222d4c 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -929,7 +929,9 @@ def test_truncate_table(started_cluster): def test_predefined_connection_configuration(started_cluster): bucket = started_cluster.minio_bucket - instance = started_cluster.instances["dummy_without_named_collections"] # type: ClickHouseInstance + instance = started_cluster.instances[ + "dummy_without_named_collections" + ] # type: ClickHouseInstance name = "test_table" instance.query("CREATE USER user") @@ -979,7 +981,7 @@ def test_predefined_connection_configuration(started_cluster): "To execute this query it's necessary to have grant USE NAMED COLLECTION ON no_collection" in error ) - instance = started_cluster.instances["dummy"] # has named collection access + instance = started_cluster.instances["dummy"] # has named collection access error = instance.query_and_get_error("SELECT * FROM s3(no_collection)") assert "There is no named collection `no_collection`" in error From 188c613c655a918d618ade00ef7f763b1601d4e5 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 13 Jun 2023 14:30:36 +0200 Subject: [PATCH 038/230] Update tests --- .../0_stateless/01271_show_privileges.reference | 14 +++++++------- .../02117_show_create_table_system.reference | 6 +++--- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/queries/0_stateless/01271_show_privileges.reference b/tests/queries/0_stateless/01271_show_privileges.reference index c78c1a540f2..13113aeb194 100644 --- a/tests/queries/0_stateless/01271_show_privileges.reference +++ b/tests/queries/0_stateless/01271_show_privileges.reference @@ -39,7 +39,7 @@ ALTER MOVE PARTITION ['ALTER MOVE PART','MOVE PARTITION','MOVE PART'] TABLE ALTE ALTER FETCH PARTITION ['ALTER FETCH PART','FETCH PARTITION'] TABLE ALTER TABLE ALTER FREEZE PARTITION ['FREEZE PARTITION','UNFREEZE'] TABLE ALTER TABLE ALTER DATABASE SETTINGS ['ALTER DATABASE SETTING','ALTER MODIFY DATABASE SETTING','MODIFY DATABASE SETTING'] DATABASE ALTER DATABASE -ALTER NAMED COLLECTION [] NAMED_COLLECTION NAMED COLLECTION CONTROL +ALTER NAMED COLLECTION [] NAMED_COLLECTION NAMED COLLECTION ADMIN ALTER TABLE [] \N ALTER ALTER DATABASE [] \N ALTER ALTER VIEW REFRESH ['ALTER LIVE VIEW REFRESH','REFRESH VIEW'] VIEW ALTER VIEW @@ -53,14 +53,14 @@ CREATE DICTIONARY [] DICTIONARY CREATE CREATE TEMPORARY TABLE [] GLOBAL CREATE ARBITRARY TEMPORARY TABLE CREATE ARBITRARY TEMPORARY TABLE [] GLOBAL CREATE CREATE FUNCTION [] GLOBAL CREATE -CREATE NAMED COLLECTION [] NAMED_COLLECTION NAMED COLLECTION CONTROL +CREATE NAMED COLLECTION [] NAMED_COLLECTION NAMED COLLECTION ADMIN CREATE [] \N ALL DROP DATABASE [] DATABASE DROP DROP TABLE [] TABLE DROP DROP VIEW [] VIEW DROP DROP DICTIONARY [] DICTIONARY DROP DROP FUNCTION [] GLOBAL DROP -DROP NAMED COLLECTION [] NAMED_COLLECTION NAMED COLLECTION CONTROL +DROP NAMED COLLECTION [] NAMED_COLLECTION NAMED COLLECTION ADMIN DROP [] \N ALL UNDROP TABLE [] TABLE ALL TRUNCATE ['TRUNCATE TABLE'] TABLE ALL @@ -92,10 +92,10 @@ SHOW QUOTAS ['SHOW CREATE QUOTA'] GLOBAL SHOW ACCESS SHOW SETTINGS PROFILES ['SHOW PROFILES','SHOW CREATE SETTINGS PROFILE','SHOW CREATE PROFILE'] GLOBAL SHOW ACCESS SHOW ACCESS [] \N ACCESS MANAGEMENT ACCESS MANAGEMENT [] \N ALL -SHOW NAMED COLLECTIONS ['SHOW NAMED COLLECTIONS'] NAMED_COLLECTION NAMED COLLECTION CONTROL -SHOW NAMED COLLECTIONS SECRETS ['SHOW NAMED COLLECTIONS SECRETS'] NAMED_COLLECTION NAMED COLLECTION CONTROL -USE NAMED COLLECTION ['USE NAMED COLLECTION'] NAMED_COLLECTION NAMED COLLECTION CONTROL -NAMED COLLECTION CONTROL [] NAMED_COLLECTION ALL +SHOW NAMED COLLECTIONS ['SHOW NAMED COLLECTIONS'] NAMED_COLLECTION NAMED COLLECTION ADMIN +SHOW NAMED COLLECTIONS SECRETS ['SHOW NAMED COLLECTIONS SECRETS'] NAMED_COLLECTION NAMED COLLECTION ADMIN +USE NAMED COLLECTION ['NAMED COLLECTION USAGE','NAMED COLLECTION'] NAMED_COLLECTION NAMED COLLECTION ADMIN +NAMED COLLECTION ADMIN ['NAMED COLLECTION CONTROL'] NAMED_COLLECTION ALL SYSTEM SHUTDOWN ['SYSTEM KILL','SHUTDOWN'] GLOBAL SYSTEM SYSTEM DROP DNS CACHE ['SYSTEM DROP DNS','DROP DNS CACHE','DROP DNS'] GLOBAL SYSTEM DROP CACHE SYSTEM DROP MARK CACHE ['SYSTEM DROP MARK','DROP MARK CACHE','DROP MARKS'] GLOBAL SYSTEM DROP CACHE diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index a6db15d6bbf..0e71a5ed024 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -297,7 +297,7 @@ CREATE TABLE system.grants ( `user_name` Nullable(String), `role_name` Nullable(String), - `access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'USE NAMED COLLECTION' = 96, 'NAMED COLLECTION CONTROL' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD SYMBOLS' = 111, 'SYSTEM RELOAD DICTIONARY' = 112, 'SYSTEM RELOAD MODEL' = 113, 'SYSTEM RELOAD FUNCTION' = 114, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 115, 'SYSTEM RELOAD' = 116, 'SYSTEM RESTART DISK' = 117, 'SYSTEM MERGES' = 118, 'SYSTEM TTL MERGES' = 119, 'SYSTEM FETCHES' = 120, 'SYSTEM MOVES' = 121, 'SYSTEM DISTRIBUTED SENDS' = 122, 'SYSTEM REPLICATED SENDS' = 123, 'SYSTEM SENDS' = 124, 'SYSTEM REPLICATION QUEUES' = 125, 'SYSTEM DROP REPLICA' = 126, 'SYSTEM SYNC REPLICA' = 127, 'SYSTEM RESTART REPLICA' = 128, 'SYSTEM RESTORE REPLICA' = 129, 'SYSTEM WAIT LOADING PARTS' = 130, 'SYSTEM SYNC DATABASE REPLICA' = 131, 'SYSTEM SYNC TRANSACTION LOG' = 132, 'SYSTEM SYNC FILE CACHE' = 133, 'SYSTEM FLUSH DISTRIBUTED' = 134, 'SYSTEM FLUSH LOGS' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM' = 140, 'dictGet' = 141, 'displaySecretsInShowAndSelect' = 142, 'addressToLine' = 143, 'addressToLineWithInlines' = 144, 'addressToSymbol' = 145, 'demangle' = 146, 'INTROSPECTION' = 147, 'FILE' = 148, 'URL' = 149, 'REMOTE' = 150, 'MONGO' = 151, 'MEILISEARCH' = 152, 'MYSQL' = 153, 'POSTGRES' = 154, 'SQLITE' = 155, 'ODBC' = 156, 'JDBC' = 157, 'HDFS' = 158, 'S3' = 159, 'HIVE' = 160, 'AZURE' = 161, 'SOURCES' = 162, 'CLUSTER' = 163, 'ALL' = 164, 'NONE' = 165), + `access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'USE NAMED COLLECTION' = 96, 'NAMED COLLECTION ADMIN' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD SYMBOLS' = 111, 'SYSTEM RELOAD DICTIONARY' = 112, 'SYSTEM RELOAD MODEL' = 113, 'SYSTEM RELOAD FUNCTION' = 114, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 115, 'SYSTEM RELOAD' = 116, 'SYSTEM RESTART DISK' = 117, 'SYSTEM MERGES' = 118, 'SYSTEM TTL MERGES' = 119, 'SYSTEM FETCHES' = 120, 'SYSTEM MOVES' = 121, 'SYSTEM DISTRIBUTED SENDS' = 122, 'SYSTEM REPLICATED SENDS' = 123, 'SYSTEM SENDS' = 124, 'SYSTEM REPLICATION QUEUES' = 125, 'SYSTEM DROP REPLICA' = 126, 'SYSTEM SYNC REPLICA' = 127, 'SYSTEM RESTART REPLICA' = 128, 'SYSTEM RESTORE REPLICA' = 129, 'SYSTEM WAIT LOADING PARTS' = 130, 'SYSTEM SYNC DATABASE REPLICA' = 131, 'SYSTEM SYNC TRANSACTION LOG' = 132, 'SYSTEM SYNC FILE CACHE' = 133, 'SYSTEM FLUSH DISTRIBUTED' = 134, 'SYSTEM FLUSH LOGS' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM' = 140, 'dictGet' = 141, 'displaySecretsInShowAndSelect' = 142, 'addressToLine' = 143, 'addressToLineWithInlines' = 144, 'addressToSymbol' = 145, 'demangle' = 146, 'INTROSPECTION' = 147, 'FILE' = 148, 'URL' = 149, 'REMOTE' = 150, 'MONGO' = 151, 'MEILISEARCH' = 152, 'MYSQL' = 153, 'POSTGRES' = 154, 'SQLITE' = 155, 'ODBC' = 156, 'JDBC' = 157, 'HDFS' = 158, 'S3' = 159, 'HIVE' = 160, 'AZURE' = 161, 'SOURCES' = 162, 'CLUSTER' = 163, 'ALL' = 164, 'NONE' = 165), `database` Nullable(String), `table` Nullable(String), `column` Nullable(String), @@ -581,10 +581,10 @@ ENGINE = SystemPartsColumns COMMENT 'SYSTEM TABLE is built on the fly.' CREATE TABLE system.privileges ( - `privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'USE NAMED COLLECTION' = 96, 'NAMED COLLECTION CONTROL' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD SYMBOLS' = 111, 'SYSTEM RELOAD DICTIONARY' = 112, 'SYSTEM RELOAD MODEL' = 113, 'SYSTEM RELOAD FUNCTION' = 114, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 115, 'SYSTEM RELOAD' = 116, 'SYSTEM RESTART DISK' = 117, 'SYSTEM MERGES' = 118, 'SYSTEM TTL MERGES' = 119, 'SYSTEM FETCHES' = 120, 'SYSTEM MOVES' = 121, 'SYSTEM DISTRIBUTED SENDS' = 122, 'SYSTEM REPLICATED SENDS' = 123, 'SYSTEM SENDS' = 124, 'SYSTEM REPLICATION QUEUES' = 125, 'SYSTEM DROP REPLICA' = 126, 'SYSTEM SYNC REPLICA' = 127, 'SYSTEM RESTART REPLICA' = 128, 'SYSTEM RESTORE REPLICA' = 129, 'SYSTEM WAIT LOADING PARTS' = 130, 'SYSTEM SYNC DATABASE REPLICA' = 131, 'SYSTEM SYNC TRANSACTION LOG' = 132, 'SYSTEM SYNC FILE CACHE' = 133, 'SYSTEM FLUSH DISTRIBUTED' = 134, 'SYSTEM FLUSH LOGS' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM' = 140, 'dictGet' = 141, 'displaySecretsInShowAndSelect' = 142, 'addressToLine' = 143, 'addressToLineWithInlines' = 144, 'addressToSymbol' = 145, 'demangle' = 146, 'INTROSPECTION' = 147, 'FILE' = 148, 'URL' = 149, 'REMOTE' = 150, 'MONGO' = 151, 'MEILISEARCH' = 152, 'MYSQL' = 153, 'POSTGRES' = 154, 'SQLITE' = 155, 'ODBC' = 156, 'JDBC' = 157, 'HDFS' = 158, 'S3' = 159, 'HIVE' = 160, 'AZURE' = 161, 'SOURCES' = 162, 'CLUSTER' = 163, 'ALL' = 164, 'NONE' = 165), + `privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'USE NAMED COLLECTION' = 96, 'NAMED COLLECTION ADMIN' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD SYMBOLS' = 111, 'SYSTEM RELOAD DICTIONARY' = 112, 'SYSTEM RELOAD MODEL' = 113, 'SYSTEM RELOAD FUNCTION' = 114, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 115, 'SYSTEM RELOAD' = 116, 'SYSTEM RESTART DISK' = 117, 'SYSTEM MERGES' = 118, 'SYSTEM TTL MERGES' = 119, 'SYSTEM FETCHES' = 120, 'SYSTEM MOVES' = 121, 'SYSTEM DISTRIBUTED SENDS' = 122, 'SYSTEM REPLICATED SENDS' = 123, 'SYSTEM SENDS' = 124, 'SYSTEM REPLICATION QUEUES' = 125, 'SYSTEM DROP REPLICA' = 126, 'SYSTEM SYNC REPLICA' = 127, 'SYSTEM RESTART REPLICA' = 128, 'SYSTEM RESTORE REPLICA' = 129, 'SYSTEM WAIT LOADING PARTS' = 130, 'SYSTEM SYNC DATABASE REPLICA' = 131, 'SYSTEM SYNC TRANSACTION LOG' = 132, 'SYSTEM SYNC FILE CACHE' = 133, 'SYSTEM FLUSH DISTRIBUTED' = 134, 'SYSTEM FLUSH LOGS' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM' = 140, 'dictGet' = 141, 'displaySecretsInShowAndSelect' = 142, 'addressToLine' = 143, 'addressToLineWithInlines' = 144, 'addressToSymbol' = 145, 'demangle' = 146, 'INTROSPECTION' = 147, 'FILE' = 148, 'URL' = 149, 'REMOTE' = 150, 'MONGO' = 151, 'MEILISEARCH' = 152, 'MYSQL' = 153, 'POSTGRES' = 154, 'SQLITE' = 155, 'ODBC' = 156, 'JDBC' = 157, 'HDFS' = 158, 'S3' = 159, 'HIVE' = 160, 'AZURE' = 161, 'SOURCES' = 162, 'CLUSTER' = 163, 'ALL' = 164, 'NONE' = 165), `aliases` Array(String), `level` Nullable(Enum8('GLOBAL' = 0, 'DATABASE' = 1, 'TABLE' = 2, 'DICTIONARY' = 3, 'VIEW' = 4, 'COLUMN' = 5, 'NAMED_COLLECTION' = 6)), - `parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'USE NAMED COLLECTION' = 96, 'NAMED COLLECTION CONTROL' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD SYMBOLS' = 111, 'SYSTEM RELOAD DICTIONARY' = 112, 'SYSTEM RELOAD MODEL' = 113, 'SYSTEM RELOAD FUNCTION' = 114, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 115, 'SYSTEM RELOAD' = 116, 'SYSTEM RESTART DISK' = 117, 'SYSTEM MERGES' = 118, 'SYSTEM TTL MERGES' = 119, 'SYSTEM FETCHES' = 120, 'SYSTEM MOVES' = 121, 'SYSTEM DISTRIBUTED SENDS' = 122, 'SYSTEM REPLICATED SENDS' = 123, 'SYSTEM SENDS' = 124, 'SYSTEM REPLICATION QUEUES' = 125, 'SYSTEM DROP REPLICA' = 126, 'SYSTEM SYNC REPLICA' = 127, 'SYSTEM RESTART REPLICA' = 128, 'SYSTEM RESTORE REPLICA' = 129, 'SYSTEM WAIT LOADING PARTS' = 130, 'SYSTEM SYNC DATABASE REPLICA' = 131, 'SYSTEM SYNC TRANSACTION LOG' = 132, 'SYSTEM SYNC FILE CACHE' = 133, 'SYSTEM FLUSH DISTRIBUTED' = 134, 'SYSTEM FLUSH LOGS' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM' = 140, 'dictGet' = 141, 'displaySecretsInShowAndSelect' = 142, 'addressToLine' = 143, 'addressToLineWithInlines' = 144, 'addressToSymbol' = 145, 'demangle' = 146, 'INTROSPECTION' = 147, 'FILE' = 148, 'URL' = 149, 'REMOTE' = 150, 'MONGO' = 151, 'MEILISEARCH' = 152, 'MYSQL' = 153, 'POSTGRES' = 154, 'SQLITE' = 155, 'ODBC' = 156, 'JDBC' = 157, 'HDFS' = 158, 'S3' = 159, 'HIVE' = 160, 'AZURE' = 161, 'SOURCES' = 162, 'CLUSTER' = 163, 'ALL' = 164, 'NONE' = 165)) + `parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'USE NAMED COLLECTION' = 96, 'NAMED COLLECTION ADMIN' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD SYMBOLS' = 111, 'SYSTEM RELOAD DICTIONARY' = 112, 'SYSTEM RELOAD MODEL' = 113, 'SYSTEM RELOAD FUNCTION' = 114, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 115, 'SYSTEM RELOAD' = 116, 'SYSTEM RESTART DISK' = 117, 'SYSTEM MERGES' = 118, 'SYSTEM TTL MERGES' = 119, 'SYSTEM FETCHES' = 120, 'SYSTEM MOVES' = 121, 'SYSTEM DISTRIBUTED SENDS' = 122, 'SYSTEM REPLICATED SENDS' = 123, 'SYSTEM SENDS' = 124, 'SYSTEM REPLICATION QUEUES' = 125, 'SYSTEM DROP REPLICA' = 126, 'SYSTEM SYNC REPLICA' = 127, 'SYSTEM RESTART REPLICA' = 128, 'SYSTEM RESTORE REPLICA' = 129, 'SYSTEM WAIT LOADING PARTS' = 130, 'SYSTEM SYNC DATABASE REPLICA' = 131, 'SYSTEM SYNC TRANSACTION LOG' = 132, 'SYSTEM SYNC FILE CACHE' = 133, 'SYSTEM FLUSH DISTRIBUTED' = 134, 'SYSTEM FLUSH LOGS' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM' = 140, 'dictGet' = 141, 'displaySecretsInShowAndSelect' = 142, 'addressToLine' = 143, 'addressToLineWithInlines' = 144, 'addressToSymbol' = 145, 'demangle' = 146, 'INTROSPECTION' = 147, 'FILE' = 148, 'URL' = 149, 'REMOTE' = 150, 'MONGO' = 151, 'MEILISEARCH' = 152, 'MYSQL' = 153, 'POSTGRES' = 154, 'SQLITE' = 155, 'ODBC' = 156, 'JDBC' = 157, 'HDFS' = 158, 'S3' = 159, 'HIVE' = 160, 'AZURE' = 161, 'SOURCES' = 162, 'CLUSTER' = 163, 'ALL' = 164, 'NONE' = 165)) ) ENGINE = SystemPrivileges COMMENT 'SYSTEM TABLE is built on the fly.' From 2b40734900f121f60ad50e37c2c6fa2f9376e3d5 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Mon, 12 Jun 2023 14:29:16 +0200 Subject: [PATCH 039/230] use const-size tasks in prefetch pool --- .../MergeTree/MergeTreePrefetchedReadPool.cpp | 107 +++++++----------- 1 file changed, 43 insertions(+), 64 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp index 63a205a1a61..f0dd2123ca4 100644 --- a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp @@ -1,18 +1,18 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include -#include -#include namespace ProfileEvents @@ -296,31 +296,12 @@ MergeTreeReadTaskPtr MergeTreePrefetchedReadPool::getTask(size_t thread) return task; } -size_t MergeTreePrefetchedReadPool::getApproxSizeOfGranule(const IMergeTreeDataPart & part) const +size_t getApproximateSizeOfGranule(const IMergeTreeDataPart & part, const Names & columns_to_read) { - const auto & columns = part.getColumns(); - auto all_columns_are_fixed_size = columns.end() == std::find_if( - columns.begin(), columns.end(), - [](const auto & col){ return col.type->haveMaximumSizeOfValue() == false; }); - - if (all_columns_are_fixed_size) - { - size_t approx_size = 0; - for (const auto & col : columns) - approx_size += col.type->getMaximumSizeOfValueInMemory() * fixed_index_granularity; - - if (!index_granularity_bytes) - return approx_size; - - return std::min(index_granularity_bytes, approx_size); - } - - const size_t approx_size = static_cast(std::round(static_cast(part.getBytesOnDisk()) / part.getMarksCount())); - - if (!index_granularity_bytes) - return approx_size; - - return std::min(index_granularity_bytes, approx_size); + ColumnSize columns_size{}; + for (const auto & col_name : columns_to_read) + columns_size.add(part.getColumnSize(col_name)); + return columns_size.data_compressed / part.getMarksCount(); } MergeTreePrefetchedReadPool::PartsInfos MergeTreePrefetchedReadPool::getPartsInfos( @@ -347,7 +328,7 @@ MergeTreePrefetchedReadPool::PartsInfos MergeTreePrefetchedReadPool::getPartsInf for (const auto & range : part.ranges) part_info->sum_marks += range.end - range.begin; - part_info->approx_size_of_mark = getApproxSizeOfGranule(*part_info->data_part); + part_info->approx_size_of_mark = getApproximateSizeOfGranule(*part_info->data_part, column_names); const auto task_columns = getReadTaskColumns( part_reader_info, @@ -357,7 +338,7 @@ MergeTreePrefetchedReadPool::PartsInfos MergeTreePrefetchedReadPool::getPartsInf prewhere_info, actions_settings, reader_settings, - /*with_subcolumns=*/ true); + /* with_subcolumns */ true); part_info->size_predictor = !predict_block_size_bytes ? nullptr @@ -421,10 +402,6 @@ MergeTreePrefetchedReadPool::ThreadsTasks MergeTreePrefetchedReadPool::createThr } size_t min_prefetch_step_marks = 0; - if (settings.filesystem_prefetches_limit && settings.filesystem_prefetches_limit < sum_marks) - { - min_prefetch_step_marks = static_cast(std::round(static_cast(sum_marks) / settings.filesystem_prefetches_limit)); - } for (const auto & part : parts_infos) { @@ -437,12 +414,6 @@ MergeTreePrefetchedReadPool::ThreadsTasks MergeTreePrefetchedReadPool::createThr part->prefetch_step_marks = std::max( 1, static_cast(std::round(static_cast(settings.filesystem_prefetch_step_bytes) / part->approx_size_of_mark))); } - else - { - /// Experimentally derived ratio. - part->prefetch_step_marks = static_cast( - std::round(std::pow(std::max(1, static_cast(std::round(sum_marks / 1000))), double(1.5)))); - } /// This limit is important to avoid spikes of slow aws getObject requests when parallelizing within one file. /// (The default is taken from here https://docs.aws.amazon.com/whitepapers/latest/s3-optimizing-performance-best-practices/use-byte-range-fetches.html). @@ -450,13 +421,13 @@ MergeTreePrefetchedReadPool::ThreadsTasks MergeTreePrefetchedReadPool::createThr && settings.filesystem_prefetch_min_bytes_for_single_read_task && part->approx_size_of_mark < settings.filesystem_prefetch_min_bytes_for_single_read_task) { - - const size_t new_min_prefetch_step_marks = static_cast( + const size_t min_prefetch_step_marks_by_total_cols = static_cast( std::ceil(static_cast(settings.filesystem_prefetch_min_bytes_for_single_read_task) / part->approx_size_of_mark)); + /// At least one task to start working on it right now and another one to prefetch in the meantime. + const size_t new_min_prefetch_step_marks = std::min(min_prefetch_step_marks_by_total_cols, sum_marks / threads / 2); if (min_prefetch_step_marks < new_min_prefetch_step_marks) { - LOG_TEST( - log, "Increasing min prefetch step from {} to {}", min_prefetch_step_marks, new_min_prefetch_step_marks); + LOG_DEBUG(log, "Increasing min prefetch step from {} to {}", min_prefetch_step_marks, new_min_prefetch_step_marks); min_prefetch_step_marks = new_min_prefetch_step_marks; } @@ -464,25 +435,33 @@ MergeTreePrefetchedReadPool::ThreadsTasks MergeTreePrefetchedReadPool::createThr if (part->prefetch_step_marks < min_prefetch_step_marks) { - LOG_TEST( - log, "Increasing prefetch step from {} to {} because of the prefetches limit {}", - part->prefetch_step_marks, min_prefetch_step_marks, settings.filesystem_prefetches_limit); + LOG_DEBUG(log, "Increasing prefetch step from {} to {}", part->prefetch_step_marks, min_prefetch_step_marks); part->prefetch_step_marks = min_prefetch_step_marks; } - LOG_TEST(log, - "Part: {}, sum_marks: {}, approx mark size: {}, prefetch_step_bytes: {}, prefetch_step_marks: {}, (ranges: {})", - part->data_part->name, part->sum_marks, part->approx_size_of_mark, - settings.filesystem_prefetch_step_bytes, part->prefetch_step_marks, toString(part->ranges)); + LOG_DEBUG( + log, + "Part: {}, sum_marks: {}, approx mark size: {}, prefetch_step_bytes: {}, prefetch_step_marks: {}, (ranges: {})", + part->data_part->name, + part->sum_marks, + part->approx_size_of_mark, + settings.filesystem_prefetch_step_bytes, + part->prefetch_step_marks, + toString(part->ranges)); } const size_t min_marks_per_thread = (sum_marks - 1) / threads + 1; LOG_DEBUG( log, - "Sum marks: {}, threads: {}, min_marks_per_thread: {}, result prefetch step marks: {}, prefetches limit: {}, total_size_approx: {}", - sum_marks, threads, min_marks_per_thread, settings.filesystem_prefetch_step_bytes, settings.filesystem_prefetches_limit, total_size_approx); + "Sum marks: {}, threads: {}, min_marks_per_thread: {}, min prefetch step marks: {}, prefetches limit: {}, total_size_approx: {}", + sum_marks, + threads, + min_marks_per_thread, + min_prefetch_step_marks, + settings.filesystem_prefetches_limit, + total_size_approx); size_t allowed_memory_usage = settings.filesystem_prefetch_max_memory_usage; if (!allowed_memory_usage) @@ -492,6 +471,7 @@ MergeTreePrefetchedReadPool::ThreadsTasks MergeTreePrefetchedReadPool::createThr : std::nullopt; ThreadsTasks result_threads_tasks; + size_t total_tasks = 0; for (size_t i = 0, part_idx = 0; i < threads && part_idx < parts_infos.size(); ++i) { auto need_marks = min_marks_per_thread; @@ -606,12 +586,11 @@ MergeTreePrefetchedReadPool::ThreadsTasks MergeTreePrefetchedReadPool::createThr ++priority.value; result_threads_tasks[i].push_back(std::move(read_task)); + ++total_tasks; } } - LOG_TEST( - log, "Result tasks {} for {} threads: {}", - result_threads_tasks.size(), threads, dumpTasks(result_threads_tasks)); + LOG_TEST(log, "Result tasks {} for {} threads: {}", total_tasks, threads, dumpTasks(result_threads_tasks)); return result_threads_tasks; } From e88fc3989534986e78561a967a9263eda7548d3f Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Mon, 12 Jun 2023 14:32:09 +0200 Subject: [PATCH 040/230] cosmetics --- .../IO/AsynchronousBoundedReadBuffer.cpp | 23 +++++++++++-------- .../IO/CachedOnDiskReadBufferFromFile.cpp | 4 ++++ 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/src/Disks/IO/AsynchronousBoundedReadBuffer.cpp b/src/Disks/IO/AsynchronousBoundedReadBuffer.cpp index f9bd68222ae..6651658e156 100644 --- a/src/Disks/IO/AsynchronousBoundedReadBuffer.cpp +++ b/src/Disks/IO/AsynchronousBoundedReadBuffer.cpp @@ -33,6 +33,15 @@ namespace ProfileEvents extern const Event RemoteFSBuffers; } +namespace +{ +size_t chooseBufferSizeForRemoteReading(const DB::ReadSettings & settings, size_t file_size) +{ + /// Buffers used for prefetch or pre-download better to have enough size, but not bigger than the whole file. + return std::min(std::max(settings.prefetch_buffer_size, DBMS_DEFAULT_BUFFER_SIZE), file_size); +} +} + namespace DB { @@ -42,23 +51,17 @@ namespace ErrorCodes extern const int ARGUMENT_OUT_OF_BOUND; } -static size_t chooseBufferSize(const ReadSettings & settings, size_t file_size) -{ - /// Buffers used for prefetch or pre-download better to have enough size, but not bigger than the whole file. - return std::min(std::max(settings.prefetch_buffer_size, DBMS_DEFAULT_BUFFER_SIZE), file_size); -} - AsynchronousBoundedReadBuffer::AsynchronousBoundedReadBuffer( ImplPtr impl_, IAsynchronousReader & reader_, const ReadSettings & settings_, AsyncReadCountersPtr async_read_counters_, FilesystemReadPrefetchesLogPtr prefetches_log_) - : ReadBufferFromFileBase(chooseBufferSize(settings_, impl_->getFileSize()), nullptr, 0) + : ReadBufferFromFileBase(chooseBufferSizeForRemoteReading(settings_, impl_->getFileSize()), nullptr, 0) , impl(std::move(impl_)) , read_settings(settings_) , reader(reader_) - , prefetch_buffer(chooseBufferSize(settings_, impl->getFileSize())) + , prefetch_buffer(chooseBufferSizeForRemoteReading(read_settings, impl->getFileSize())) , query_id(CurrentThread::isInitialized() && CurrentThread::get().getQueryContext() != nullptr ? CurrentThread::getQueryId() : "") , current_reader_id(getRandomASCIIString(8)) , log(&Poco::Logger::get("AsynchronousBoundedReadBuffer")) @@ -111,7 +114,7 @@ void AsynchronousBoundedReadBuffer::prefetch(Priority priority) last_prefetch_info.submit_time = std::chrono::system_clock::now(); last_prefetch_info.priority = priority; - chassert(prefetch_buffer.size() == chooseBufferSize(read_settings, impl->getFileSize())); + chassert(prefetch_buffer.size() == chooseBufferSizeForRemoteReading(read_settings, impl->getFileSize())); prefetch_future = asyncReadInto(prefetch_buffer.data(), prefetch_buffer.size(), priority); ProfileEvents::increment(ProfileEvents::RemoteFSPrefetches); } @@ -190,7 +193,7 @@ bool AsynchronousBoundedReadBuffer::nextImpl() { ProfileEventTimeIncrement watch(ProfileEvents::SynchronousRemoteReadWaitMicroseconds); - chassert(memory.size() == chooseBufferSize(read_settings, impl->getFileSize())); + chassert(memory.size() == chooseBufferSizeForRemoteReading(read_settings, impl->getFileSize())); std::tie(size, offset) = impl->readInto(memory.data(), memory.size(), file_offset_of_buffer_end, bytes_to_ignore); ProfileEvents::increment(ProfileEvents::RemoteFSUnprefetchedReads); diff --git a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp index 6317aba20e9..bfde6d0984c 100644 --- a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp +++ b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp @@ -1085,6 +1085,10 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep() first_offset, file_segments->toString()); + /// Release buffer a little bit earlier. + if (read_until_position == file_offset_of_buffer_end) + implementation_buffer.reset(); + return result; } From 1d33043fe673d5ebc86b68fbbdb563c1cbcdbb0f Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Mon, 12 Jun 2023 16:18:47 +0200 Subject: [PATCH 041/230] changes around buffer sizes --- .../IO/AsynchronousBoundedReadBuffer.cpp | 9 ---- src/Disks/IO/ReadBufferFromRemoteFSGather.cpp | 41 ++++++++++++++----- src/Disks/IO/ReadBufferFromRemoteFSGather.h | 1 + 3 files changed, 31 insertions(+), 20 deletions(-) diff --git a/src/Disks/IO/AsynchronousBoundedReadBuffer.cpp b/src/Disks/IO/AsynchronousBoundedReadBuffer.cpp index 6651658e156..86ee541dcbd 100644 --- a/src/Disks/IO/AsynchronousBoundedReadBuffer.cpp +++ b/src/Disks/IO/AsynchronousBoundedReadBuffer.cpp @@ -33,15 +33,6 @@ namespace ProfileEvents extern const Event RemoteFSBuffers; } -namespace -{ -size_t chooseBufferSizeForRemoteReading(const DB::ReadSettings & settings, size_t file_size) -{ - /// Buffers used for prefetch or pre-download better to have enough size, but not bigger than the whole file. - return std::min(std::max(settings.prefetch_buffer_size, DBMS_DEFAULT_BUFFER_SIZE), file_size); -} -} - namespace DB { diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp index eb9c509e459..537c0cf1be7 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp @@ -2,14 +2,27 @@ #include +#include #include #include -#include +#include #include -#include -#include #include +#include +#include +using namespace DB; + + +namespace +{ +bool withCache(const ReadSettings & settings) +{ + return settings.remote_fs_cache && settings.enable_filesystem_cache + && (!CurrentThread::getQueryId().empty() || settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache + || !settings.avoid_readthrough_cache_outside_query_context); +} +} namespace DB { @@ -18,29 +31,35 @@ namespace ErrorCodes extern const int CANNOT_SEEK_THROUGH_FILE; } +size_t chooseBufferSizeForRemoteReading(const DB::ReadSettings & settings, size_t file_size) +{ + /// Only when cache is used we could download bigger portions of FileSegments than what we actually gonna read within particular task. + if (!withCache(settings)) + return settings.remote_fs_buffer_size; + + /// Buffers used for prefetch and pre-download better to have enough size, but not bigger than the whole file. + return std::min(std::max(settings.remote_fs_buffer_size, DBMS_DEFAULT_BUFFER_SIZE), file_size); +} + ReadBufferFromRemoteFSGather::ReadBufferFromRemoteFSGather( ReadBufferCreator && read_buffer_creator_, const StoredObjects & blobs_to_read_, const ReadSettings & settings_, std::shared_ptr cache_log_, bool use_external_buffer_) - : ReadBufferFromFileBase(use_external_buffer_ ? 0 : settings_.remote_fs_buffer_size, nullptr, 0) + : ReadBufferFromFileBase( + use_external_buffer_ ? 0 : chooseBufferSizeForRemoteReading(settings_, getTotalSize(blobs_to_read_)), nullptr, 0) , settings(settings_) , blobs_to_read(blobs_to_read_) , read_buffer_creator(std::move(read_buffer_creator_)) , cache_log(settings.enable_filesystem_cache_log ? cache_log_ : nullptr) - , query_id(CurrentThread::isInitialized() && CurrentThread::get().getQueryContext() != nullptr ? CurrentThread::getQueryId() : "") + , query_id(CurrentThread::getQueryId()) , use_external_buffer(use_external_buffer_) + , with_cache(withCache(settings)) , log(&Poco::Logger::get("ReadBufferFromRemoteFSGather")) { if (!blobs_to_read.empty()) current_object = blobs_to_read.front(); - - with_cache = settings.remote_fs_cache - && settings.enable_filesystem_cache - && (!query_id.empty() - || settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache - || !settings.avoid_readthrough_cache_outside_query_context); } SeekableReadBufferPtr ReadBufferFromRemoteFSGather::createImplementationBuffer(const StoredObject & object) diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.h b/src/Disks/IO/ReadBufferFromRemoteFSGather.h index 272ed2b3ac1..9bf55ab69ce 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.h +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.h @@ -86,4 +86,5 @@ private: Poco::Logger * log; }; +size_t chooseBufferSizeForRemoteReading(const DB::ReadSettings & settings, size_t file_size); } From 1dddcc94726bfca062da2af1b9880df5fa5e4268 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Mon, 12 Jun 2023 16:19:05 +0200 Subject: [PATCH 042/230] use connection pool --- src/Common/PoolBase.h | 88 +++++++++++++------- src/Disks/ObjectStorages/S3/diskSettings.cpp | 3 + src/IO/HTTPCommon.cpp | 59 ++++++++++--- src/IO/HTTPCommon.h | 16 +++- src/IO/ReadBufferFromS3.cpp | 50 ++++++++++- src/IO/ReadBufferFromS3.h | 6 +- src/IO/S3/PocoHTTPClient.cpp | 40 +++++++-- src/IO/S3/PocoHTTPClient.h | 25 ++++++ src/IO/S3/SessionAwareIOStream.h | 4 + 9 files changed, 239 insertions(+), 52 deletions(-) diff --git a/src/Common/PoolBase.h b/src/Common/PoolBase.h index 8cabb472d8f..5575b56f299 100644 --- a/src/Common/PoolBase.h +++ b/src/Common/PoolBase.h @@ -1,9 +1,11 @@ #pragma once -#include #include -#include +#include +#include +#include #include +#include #include #include @@ -15,14 +17,6 @@ namespace ProfileEvents extern const Event ConnectionPoolIsFullMicroseconds; } -namespace DB -{ - namespace ErrorCodes - { - extern const int LOGICAL_ERROR; - } -} - /** A class from which you can inherit and get a pool of something. Used for database connection pools. * Descendant class must provide a method for creating a new object to place in the pool. */ @@ -35,6 +29,22 @@ public: using ObjectPtr = std::shared_ptr; using Ptr = std::shared_ptr>; + enum class BehaviourOnLimit + { + /** + * Default behaviour - when limit on pool size is reached, callers will wait until object will be returned back in pool. + */ + Wait, + + /** + * If no free objects in pool - allocate a new object, but not store it in pool. + * This behaviour is needed when we simply don't want to waste time waiting or if we cannot guarantee that query could be processed using fixed amount of connections. + * For example, when we read from table on s3, one GetObject request corresponds to the whole FileSystemCache segment. This segments are shared between different + * reading tasks, so in general case connection could be taken from pool by one task and returned back by another one. And these tasks are processed completely independently. + */ + AllocateNewBypassingPool, + }; + private: /** The object with the flag, whether it is currently used. */ @@ -89,37 +99,53 @@ public: Object & operator*() && = delete; const Object & operator*() const && = delete; - Object * operator->() & { return &*data->data.object; } - const Object * operator->() const & { return &*data->data.object; } - Object & operator*() & { return *data->data.object; } - const Object & operator*() const & { return *data->data.object; } + Object * operator->() & { return castToObjectPtr(); } + const Object * operator->() const & { return castToObjectPtr(); } + Object & operator*() & { return *castToObjectPtr(); } + const Object & operator*() const & { return *castToObjectPtr(); } /** * Expire an object to make it reallocated later. */ void expire() { - data->data.is_expired = true; + if (data.index() == 1) + std::get<1>(data)->data.is_expired = true; } - bool isNull() const { return data == nullptr; } - - PoolBase * getPool() const - { - if (!data) - throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Attempt to get pool from uninitialized entry"); - return &data->data.pool; - } + bool isNull() const { return data.index() == 0 ? !std::get<0>(data) : !std::get<1>(data); } private: - std::shared_ptr data; + /** + * Plain object will be stored instead of PoolEntryHelper if fallback was made in get() (see BehaviourOnLimit::AllocateNewBypassingPool). + */ + std::variant> data; - explicit Entry(PooledObject & object) : data(std::make_shared(object)) {} + explicit Entry(ObjectPtr && object) : data(std::move(object)) { } + + explicit Entry(PooledObject & object) : data(std::make_shared(object)) { } + + auto castToObjectPtr() const + { + return std::visit( + [](const auto & ptr) + { + using T = std::decay_t; + if constexpr (std::is_same_v) + return ptr.get(); + else + return ptr->data.object.get(); + }, + data); + } }; virtual ~PoolBase() = default; - /** Allocates the object. Wait for free object in pool for 'timeout'. With 'timeout' < 0, the timeout is infinite. */ + /** Allocates the object. + * If 'behaviour_on_limit' is Wait - wait for free object in pool for 'timeout'. With 'timeout' < 0, the timeout is infinite. + * If 'behaviour_on_limit' is AllocateNewBypassingPool and there is no free object - a new object will be created but not stored in the pool. + */ Entry get(Poco::Timespan::TimeDiff timeout) { std::unique_lock lock(mutex); @@ -150,6 +176,9 @@ public: return Entry(*items.back()); } + if (behaviour_on_limit == BehaviourOnLimit::AllocateNewBypassingPool) + return Entry(allocObject()); + Stopwatch blocked; if (timeout < 0) { @@ -184,6 +213,8 @@ private: /** The maximum size of the pool. */ unsigned max_items; + BehaviourOnLimit behaviour_on_limit; + /** Pool. */ Objects items; @@ -192,11 +223,10 @@ private: std::condition_variable available; protected: - Poco::Logger * log; - PoolBase(unsigned max_items_, Poco::Logger * log_) - : max_items(max_items_), log(log_) + PoolBase(unsigned max_items_, Poco::Logger * log_, BehaviourOnLimit behaviour_on_limit_ = BehaviourOnLimit::Wait) + : max_items(max_items_), behaviour_on_limit(behaviour_on_limit_), log(log_) { items.reserve(max_items); } diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp index 409eb2a3dc3..fe57fb24bbd 100644 --- a/src/Disks/ObjectStorages/S3/diskSettings.cpp +++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp @@ -132,6 +132,9 @@ std::unique_ptr getClient( client_configuration.requestTimeoutMs = config.getUInt(config_prefix + ".request_timeout_ms", 30000); client_configuration.maxConnections = config.getUInt(config_prefix + ".max_connections", 100); client_configuration.endpointOverride = uri.endpoint; + client_configuration.http_keep_alive_timeout_ms = config.getUInt(config_prefix + ".http_keep_alive_timeout_ms", 10000); + client_configuration.http_connection_pool_size = config.getUInt(config_prefix + ".http_connection_pool_size", 1000); + client_configuration.wait_on_pool_size_limit = false; auto proxy_config = getProxyConfiguration(config_prefix, config); if (proxy_config) diff --git a/src/IO/HTTPCommon.cpp b/src/IO/HTTPCommon.cpp index 3ec9b3d0a83..f3e2064c8bf 100644 --- a/src/IO/HTTPCommon.cpp +++ b/src/IO/HTTPCommon.cpp @@ -131,8 +131,12 @@ namespace UInt16 proxy_port_, bool proxy_https_, size_t max_pool_size_, - bool resolve_host_ = true) - : Base(static_cast(max_pool_size_), &Poco::Logger::get("HTTPSessionPool")) + bool resolve_host_, + bool wait_on_pool_size_limit) + : Base( + static_cast(max_pool_size_), + &Poco::Logger::get("HTTPSessionPool"), + wait_on_pool_size_limit ? BehaviourOnLimit::Wait : BehaviourOnLimit::AllocateNewBypassingPool) , host(host_) , port(port_) , https(https_) @@ -155,11 +159,12 @@ namespace String proxy_host; UInt16 proxy_port; bool is_proxy_https; + bool wait_on_pool_size_limit; bool operator ==(const Key & rhs) const { - return std::tie(target_host, target_port, is_target_https, proxy_host, proxy_port, is_proxy_https) - == std::tie(rhs.target_host, rhs.target_port, rhs.is_target_https, rhs.proxy_host, rhs.proxy_port, rhs.is_proxy_https); + return std::tie(target_host, target_port, is_target_https, proxy_host, proxy_port, is_proxy_https, wait_on_pool_size_limit) + == std::tie(rhs.target_host, rhs.target_port, rhs.is_target_https, rhs.proxy_host, rhs.proxy_port, rhs.is_proxy_https, rhs.wait_on_pool_size_limit); } }; @@ -178,6 +183,7 @@ namespace s.update(k.proxy_host); s.update(k.proxy_port); s.update(k.is_proxy_https); + s.update(k.wait_on_pool_size_limit); return s.get64(); } }; @@ -218,14 +224,14 @@ namespace const Poco::URI & proxy_uri, const ConnectionTimeouts & timeouts, size_t max_connections_per_endpoint, - bool resolve_host = true) + bool resolve_host, + bool wait_on_pool_size_limit) { - std::lock_guard lock(mutex); + std::unique_lock lock(mutex); const std::string & host = uri.getHost(); UInt16 port = uri.getPort(); bool https = isHTTPS(uri); - String proxy_host; UInt16 proxy_port = 0; bool proxy_https = false; @@ -236,11 +242,27 @@ namespace proxy_https = isHTTPS(proxy_uri); } - HTTPSessionPool::Key key{host, port, https, proxy_host, proxy_port, proxy_https}; + HTTPSessionPool::Key key{host, port, https, proxy_host, proxy_port, proxy_https, wait_on_pool_size_limit}; auto pool_ptr = endpoints_pool.find(key); if (pool_ptr == endpoints_pool.end()) std::tie(pool_ptr, std::ignore) = endpoints_pool.emplace( - key, std::make_shared(host, port, https, proxy_host, proxy_port, proxy_https, max_connections_per_endpoint, resolve_host)); + key, + std::make_shared( + host, + port, + https, + proxy_host, + proxy_port, + proxy_https, + max_connections_per_endpoint, + resolve_host, + wait_on_pool_size_limit)); + + /// Some routines held session objects until the end of its lifetime. Also this routines may create another sessions in this time frame. + /// If some other session holds `lock` because it waits on another lock inside `pool_ptr->second->get` it isn't possible to create any + /// new session and thus finish routine, return session to the pool and unlock the thread waiting inside `pool_ptr->second->get`. + /// To avoid such a deadlock we unlock `lock` before entering `pool_ptr->second->get`. + lock.unlock(); auto retry_timeout = timeouts.connection_timeout.totalMicroseconds(); auto session = pool_ptr->second->get(retry_timeout); @@ -295,14 +317,25 @@ HTTPSessionPtr makeHTTPSession(const Poco::URI & uri, const ConnectionTimeouts & } -PooledHTTPSessionPtr makePooledHTTPSession(const Poco::URI & uri, const ConnectionTimeouts & timeouts, size_t per_endpoint_pool_size, bool resolve_host) +PooledHTTPSessionPtr makePooledHTTPSession( + const Poco::URI & uri, + const ConnectionTimeouts & timeouts, + size_t per_endpoint_pool_size, + bool resolve_host, + bool wait_on_pool_size_limit) { - return makePooledHTTPSession(uri, {}, timeouts, per_endpoint_pool_size, resolve_host); + return makePooledHTTPSession(uri, {}, timeouts, per_endpoint_pool_size, resolve_host, wait_on_pool_size_limit); } -PooledHTTPSessionPtr makePooledHTTPSession(const Poco::URI & uri, const Poco::URI & proxy_uri, const ConnectionTimeouts & timeouts, size_t per_endpoint_pool_size, bool resolve_host) +PooledHTTPSessionPtr makePooledHTTPSession( + const Poco::URI & uri, + const Poco::URI & proxy_uri, + const ConnectionTimeouts & timeouts, + size_t per_endpoint_pool_size, + bool resolve_host, + bool wait_on_pool_size_limit) { - return HTTPSessionPool::instance().getSession(uri, proxy_uri, timeouts, per_endpoint_pool_size, resolve_host); + return HTTPSessionPool::instance().getSession(uri, proxy_uri, timeouts, per_endpoint_pool_size, resolve_host, wait_on_pool_size_limit); } bool isRedirect(const Poco::Net::HTTPResponse::HTTPStatus status) { return status == Poco::Net::HTTPResponse::HTTP_MOVED_PERMANENTLY || status == Poco::Net::HTTPResponse::HTTP_FOUND || status == Poco::Net::HTTPResponse::HTTP_SEE_OTHER || status == Poco::Net::HTTPResponse::HTTP_TEMPORARY_REDIRECT; } diff --git a/src/IO/HTTPCommon.h b/src/IO/HTTPCommon.h index 3616a33c1c7..db8fc2a2a40 100644 --- a/src/IO/HTTPCommon.h +++ b/src/IO/HTTPCommon.h @@ -61,8 +61,20 @@ void setResponseDefaultHeaders(HTTPServerResponse & response, size_t keep_alive_ HTTPSessionPtr makeHTTPSession(const Poco::URI & uri, const ConnectionTimeouts & timeouts, bool resolve_host = true); /// As previous method creates session, but tooks it from pool, without and with proxy uri. -PooledHTTPSessionPtr makePooledHTTPSession(const Poco::URI & uri, const ConnectionTimeouts & timeouts, size_t per_endpoint_pool_size, bool resolve_host = true); -PooledHTTPSessionPtr makePooledHTTPSession(const Poco::URI & uri, const Poco::URI & proxy_uri, const ConnectionTimeouts & timeouts, size_t per_endpoint_pool_size, bool resolve_host = true); +PooledHTTPSessionPtr makePooledHTTPSession( + const Poco::URI & uri, + const ConnectionTimeouts & timeouts, + size_t per_endpoint_pool_size, + bool resolve_host = true, + bool wait_on_pool_size_limit = true); + +PooledHTTPSessionPtr makePooledHTTPSession( + const Poco::URI & uri, + const Poco::URI & proxy_uri, + const ConnectionTimeouts & timeouts, + size_t per_endpoint_pool_size, + bool resolve_host = true, + bool wait_on_pool_size_limit = true); bool isRedirect(Poco::Net::HTTPResponse::HTTPStatus status); diff --git a/src/IO/ReadBufferFromS3.cpp b/src/IO/ReadBufferFromS3.cpp index d1cb1ec9ab0..364253ba746 100644 --- a/src/IO/ReadBufferFromS3.cpp +++ b/src/IO/ReadBufferFromS3.cpp @@ -31,6 +31,23 @@ namespace ProfileEvents extern const Event RemoteReadThrottlerSleepMicroseconds; } +namespace +{ +void resetSession(Aws::S3::Model::GetObjectResult & read_result) +{ + if (auto * session_aware_stream = dynamic_cast *>(&read_result.GetBody())) + { + auto & session + = static_cast(*static_cast(session_aware_stream->getSession())); + session.reset(); + } + else if (!dynamic_cast *>(&read_result.GetBody())) + { + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Session of unexpected type encountered"); + } +} +} + namespace DB { namespace ErrorCodes @@ -74,7 +91,10 @@ bool ReadBufferFromS3::nextImpl() if (read_until_position) { if (read_until_position == offset) + { + read_all_range_successfully = true; return false; + } if (read_until_position < offset) throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to read beyond right offset ({} > {})", offset, read_until_position - 1); @@ -154,7 +174,10 @@ bool ReadBufferFromS3::nextImpl() } if (!next_result) + { + read_all_range_successfully = true; return false; + } BufferBase::set(impl->buffer().begin(), impl->buffer().size(), impl->offset()); @@ -240,6 +263,8 @@ off_t ReadBufferFromS3::seek(off_t offset_, int whence) if (offset_ == getPosition() && whence == SEEK_SET) return offset_; + read_all_range_successfully = false; + if (impl && restricted_seek) { throw Exception( @@ -312,6 +337,8 @@ void ReadBufferFromS3::setReadUntilPosition(size_t position) { if (position != static_cast(read_until_position)) { + read_all_range_successfully = false; + if (impl) { if (!atEndOfRequestedRangeGuess()) @@ -328,6 +355,8 @@ void ReadBufferFromS3::setReadUntilEnd() { if (read_until_position) { + read_all_range_successfully = false; + read_until_position = 0; if (impl) { @@ -351,8 +380,27 @@ bool ReadBufferFromS3::atEndOfRequestedRangeGuess() return false; } +ReadBufferFromS3::~ReadBufferFromS3() +{ + try + { + if (!read_all_range_successfully && read_result) + /// When we abandon a session with an ongoing GetObject request and there is another one trying to delete the same object this delete + /// operation will hang until GetObject's session idle timeouts. So we have to call `reset()` on GetObject's session session immediately. + resetSession(*read_result); + } + catch (...) + { + tryLogCurrentException(log); + } +} + std::unique_ptr ReadBufferFromS3::initialize() { + if (!read_all_range_successfully && read_result) + resetSession(*read_result); + read_all_range_successfully = false; + /** * If remote_filesystem_read_method = 'threadpool', then for MergeTree family tables * exact byte ranges to read are always passed here. @@ -363,7 +411,7 @@ std::unique_ptr ReadBufferFromS3::initialize() read_result = sendRequest(offset, read_until_position ? std::make_optional(read_until_position - 1) : std::nullopt); size_t buffer_size = use_external_buffer ? 0 : read_settings.remote_fs_buffer_size; - return std::make_unique(read_result.GetBody(), buffer_size); + return std::make_unique(read_result->GetBody(), buffer_size); } Aws::S3::Model::GetObjectResult ReadBufferFromS3::sendRequest(size_t range_begin, std::optional range_end_incl) const diff --git a/src/IO/ReadBufferFromS3.h b/src/IO/ReadBufferFromS3.h index 0f665861a1e..11299aa2c2a 100644 --- a/src/IO/ReadBufferFromS3.h +++ b/src/IO/ReadBufferFromS3.h @@ -41,7 +41,7 @@ private: std::atomic offset = 0; std::atomic read_until_position = 0; - Aws::S3::Model::GetObjectResult read_result; + std::optional read_result; std::unique_ptr impl; Poco::Logger * log = &Poco::Logger::get("ReadBufferFromS3"); @@ -60,6 +60,8 @@ public: bool restricted_seek_ = false, std::optional file_size = std::nullopt); + ~ReadBufferFromS3() override; + bool nextImpl() override; off_t seek(off_t off, int whence) override; @@ -100,6 +102,8 @@ private: /// There is different seek policy for disk seek and for non-disk seek /// (non-disk seek is applied for seekable input formats: orc, arrow, parquet). bool restricted_seek; + + bool read_all_range_successfully = false; }; } diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp index bfda7149343..754b1bfd5b8 100644 --- a/src/IO/S3/PocoHTTPClient.cpp +++ b/src/IO/S3/PocoHTTPClient.cpp @@ -1,3 +1,4 @@ +#include #include "Common/DNSResolver.h" #include "config.h" @@ -138,8 +139,9 @@ PocoHTTPClient::PocoHTTPClient(const PocoHTTPClientConfiguration & client_config , timeouts(ConnectionTimeouts( Poco::Timespan(client_configuration.connectTimeoutMs * 1000), /// connection timeout. Poco::Timespan(client_configuration.requestTimeoutMs * 1000), /// send timeout. - Poco::Timespan(client_configuration.requestTimeoutMs * 1000) /// receive timeout. - )) + Poco::Timespan(client_configuration.requestTimeoutMs * 1000), /// receive timeout. + Poco::Timespan(client_configuration.enableTcpKeepAlive ? client_configuration.tcpKeepAliveIntervalMs * 1000 : 0), + Poco::Timespan(client_configuration.http_keep_alive_timeout_ms * 1000))) /// flag indicating whether keep-alive is enabled is set to each session upon creation , remote_host_filter(client_configuration.remote_host_filter) , s3_max_redirects(client_configuration.s3_max_redirects) , enable_s3_requests_logging(client_configuration.enable_s3_requests_logging) @@ -147,6 +149,8 @@ PocoHTTPClient::PocoHTTPClient(const PocoHTTPClientConfiguration & client_config , get_request_throttler(client_configuration.get_request_throttler) , put_request_throttler(client_configuration.put_request_throttler) , extra_headers(client_configuration.extra_headers) + , http_connection_pool_size(client_configuration.http_connection_pool_size) + , wait_on_pool_size_limit(client_configuration.wait_on_pool_size_limit) { } @@ -254,9 +258,26 @@ void PocoHTTPClient::addMetric(const Aws::Http::HttpRequest & request, S3MetricT void PocoHTTPClient::makeRequestInternal( Aws::Http::HttpRequest & request, std::shared_ptr & response, + Aws::Utils::RateLimits::RateLimiterInterface * readLimiter , + Aws::Utils::RateLimits::RateLimiterInterface * writeLimiter) const +{ + const auto request_configuration = per_request_configuration(request); + if (http_connection_pool_size && request_configuration.proxy_host.empty()) + makeRequestInternalImpl(request, request_configuration, response, readLimiter, writeLimiter); + else + makeRequestInternalImpl(request, request_configuration, response, readLimiter, writeLimiter); +} + +template +void PocoHTTPClient::makeRequestInternalImpl( + Aws::Http::HttpRequest & request, + const ClientConfigurationPerRequest & request_configuration, + std::shared_ptr & response, Aws::Utils::RateLimits::RateLimiterInterface *, Aws::Utils::RateLimits::RateLimiterInterface *) const { + using SessionPtr = std::conditional_t; + Poco::Logger * log = &Poco::Logger::get("AWSClient"); auto uri = request.GetUri().GetURIString(); @@ -303,8 +324,7 @@ void PocoHTTPClient::makeRequestInternal( for (unsigned int attempt = 0; attempt <= s3_max_redirects; ++attempt) { Poco::URI target_uri(uri); - HTTPSessionPtr session; - auto request_configuration = per_request_configuration(request); + SessionPtr session; if (!request_configuration.proxy_host.empty()) { @@ -313,7 +333,11 @@ void PocoHTTPClient::makeRequestInternal( /// Reverse proxy can replace host header with resolved ip address instead of host name. /// This can lead to request signature difference on S3 side. - session = makeHTTPSession(target_uri, timeouts, /* resolve_host = */ false); + if constexpr (pooled) + session = makePooledHTTPSession( + target_uri, timeouts, http_connection_pool_size, /* resolve_host = */ true, wait_on_pool_size_limit); + else + session = makeHTTPSession(target_uri, timeouts, /* resolve_host = */ false); bool use_tunnel = request_configuration.proxy_scheme == Aws::Http::Scheme::HTTP && target_uri.getScheme() == "https"; session->setProxy( @@ -325,7 +349,11 @@ void PocoHTTPClient::makeRequestInternal( } else { - session = makeHTTPSession(target_uri, timeouts, /* resolve_host = */ true); + if constexpr (pooled) + session = makePooledHTTPSession( + target_uri, timeouts, http_connection_pool_size, /* resolve_host = */ true, wait_on_pool_size_limit); + else + session = makeHTTPSession(target_uri, timeouts, /* resolve_host = */ false); } /// In case of error this address will be written to logs diff --git a/src/IO/S3/PocoHTTPClient.h b/src/IO/S3/PocoHTTPClient.h index 762178a9365..92d3d5c5747 100644 --- a/src/IO/S3/PocoHTTPClient.h +++ b/src/IO/S3/PocoHTTPClient.h @@ -53,6 +53,13 @@ struct PocoHTTPClientConfiguration : public Aws::Client::ClientConfiguration ThrottlerPtr put_request_throttler; HTTPHeaderEntries extra_headers; + /// Not a client parameter in terms of HTTP and we won't send it to the server. Used internally to determine when connection have to be re-established. + uint32_t http_keep_alive_timeout_ms = 0; + /// Zero means pooling will not be used. + size_t http_connection_pool_size = 0; + /// See PoolBase::BehaviourOnLimit + bool wait_on_pool_size_limit = true; + void updateSchemeAndRegion(); std::function error_report; @@ -90,6 +97,12 @@ public: ); } + void SetResponseBody(Aws::IStream & incoming_stream, PooledHTTPSessionPtr & session_) /// NOLINT + { + body_stream = Aws::Utils::Stream::ResponseStream( + Aws::New>("http result streambuf", session_, incoming_stream.rdbuf())); + } + void SetResponseBody(std::string & response_body) /// NOLINT { auto stream = Aws::New("http result buf", response_body); // STYLE_CHECK_ALLOW_STD_STRING_STREAM @@ -149,6 +162,15 @@ private: EnumSize, }; + template + void makeRequestInternalImpl( + Aws::Http::HttpRequest & request, + const ClientConfigurationPerRequest & per_request_configuration, + std::shared_ptr & response, + Aws::Utils::RateLimits::RateLimiterInterface * readLimiter, + Aws::Utils::RateLimits::RateLimiterInterface * writeLimiter) const; + +protected: static S3MetricKind getMetricKind(const Aws::Http::HttpRequest & request); void addMetric(const Aws::Http::HttpRequest & request, S3MetricType type, ProfileEvents::Count amount = 1) const; @@ -170,6 +192,9 @@ private: ThrottlerPtr put_request_throttler; const HTTPHeaderEntries extra_headers; + + size_t http_connection_pool_size = 0; + bool wait_on_pool_size_limit = true; }; } diff --git a/src/IO/S3/SessionAwareIOStream.h b/src/IO/S3/SessionAwareIOStream.h index 1640accb6fa..f7e42f99f51 100644 --- a/src/IO/S3/SessionAwareIOStream.h +++ b/src/IO/S3/SessionAwareIOStream.h @@ -18,6 +18,10 @@ public: { } + Session & getSession() { return session; } + + const Session & getSession() const { return session; } + private: /// Poco HTTP session is holder of response stream. Session session; From c8cbc9f8ce36fa49a0785c7f9792c6cf154e06da Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Mon, 12 Jun 2023 16:19:14 +0200 Subject: [PATCH 043/230] fix test --- tests/integration/test_merge_tree_s3/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_merge_tree_s3/test.py b/tests/integration/test_merge_tree_s3/test.py index 2ccd517923a..22805eb6e94 100644 --- a/tests/integration/test_merge_tree_s3/test.py +++ b/tests/integration/test_merge_tree_s3/test.py @@ -923,7 +923,7 @@ def test_merge_canceled_by_s3_errors_when_move(cluster, broken_s3, node_name): @pytest.mark.parametrize("node_name", ["node"]) @pytest.mark.parametrize( - "in_flight_memory", [(10, 245918115), (5, 156786752), (1, 106426187)] + "in_flight_memory", [(10, 288044299), (5, 193557290), (1, 128348733)] ) def test_s3_engine_heavy_write_check_mem( cluster, broken_s3, node_name, in_flight_memory From 8073e0bad1600746f4682f3ca41076bf15e71f50 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 14 Jun 2023 13:45:53 +0200 Subject: [PATCH 044/230] Fix tests --- .../test_mysql_database_engine/configs/user.xml | 10 ++++++++++ tests/integration/test_s3_cluster/configs/users.xml | 9 +++++++++ tests/integration/test_s3_cluster/test.py | 1 + .../test_storage_delta/configs/users.d/users.xml | 9 +++++++++ tests/integration/test_storage_delta/test.py | 1 + tests/integration/test_storage_hudi/test.py | 1 + .../test_storage_iceberg/configs/users.d/users.xml | 9 +++++++++ tests/integration/test_storage_iceberg/test.py | 1 + tests/integration/test_storage_kafka/configs/users.xml | 7 +++++++ tests/integration/test_storage_postgresql/test.py | 2 +- .../test_storage_rabbitmq/configs/users.xml | 7 +++++++ 11 files changed, 56 insertions(+), 1 deletion(-) create mode 100644 tests/integration/test_mysql_database_engine/configs/user.xml create mode 100644 tests/integration/test_s3_cluster/configs/users.xml create mode 100644 tests/integration/test_storage_delta/configs/users.d/users.xml create mode 100644 tests/integration/test_storage_iceberg/configs/users.d/users.xml diff --git a/tests/integration/test_mysql_database_engine/configs/user.xml b/tests/integration/test_mysql_database_engine/configs/user.xml new file mode 100644 index 00000000000..775c63350b0 --- /dev/null +++ b/tests/integration/test_mysql_database_engine/configs/user.xml @@ -0,0 +1,10 @@ + + + + + default + default + 1 + + + diff --git a/tests/integration/test_s3_cluster/configs/users.xml b/tests/integration/test_s3_cluster/configs/users.xml new file mode 100644 index 00000000000..4b6ba057ecb --- /dev/null +++ b/tests/integration/test_s3_cluster/configs/users.xml @@ -0,0 +1,9 @@ + + + + + default + 1 + + + diff --git a/tests/integration/test_s3_cluster/test.py b/tests/integration/test_s3_cluster/test.py index 41f19cdd12d..3b8fd80060f 100644 --- a/tests/integration/test_s3_cluster/test.py +++ b/tests/integration/test_s3_cluster/test.py @@ -68,6 +68,7 @@ def started_cluster(): cluster.add_instance( "s0_0_0", main_configs=["configs/cluster.xml", "configs/named_collections.xml"], + user_configs=["configs/users.xml"], macros={"replica": "node1", "shard": "shard1"}, with_minio=True, with_zookeeper=True, diff --git a/tests/integration/test_storage_delta/configs/users.d/users.xml b/tests/integration/test_storage_delta/configs/users.d/users.xml new file mode 100644 index 00000000000..4b6ba057ecb --- /dev/null +++ b/tests/integration/test_storage_delta/configs/users.d/users.xml @@ -0,0 +1,9 @@ + + + + + default + 1 + + + diff --git a/tests/integration/test_storage_delta/test.py b/tests/integration/test_storage_delta/test.py index 9477b66dab8..0cd1208edfa 100644 --- a/tests/integration/test_storage_delta/test.py +++ b/tests/integration/test_storage_delta/test.py @@ -53,6 +53,7 @@ def started_cluster(): cluster.add_instance( "node1", main_configs=["configs/config.d/named_collections.xml"], + user_configs=["configs/users.d/users.xml"], with_minio=True, ) diff --git a/tests/integration/test_storage_hudi/test.py b/tests/integration/test_storage_hudi/test.py index de9cde43609..3dbbcb7a06e 100644 --- a/tests/integration/test_storage_hudi/test.py +++ b/tests/integration/test_storage_hudi/test.py @@ -51,6 +51,7 @@ def started_cluster(): cluster.add_instance( "node1", main_configs=["configs/config.d/named_collections.xml"], + user_configs=["configs/users.d/users.xml"], with_minio=True, ) diff --git a/tests/integration/test_storage_iceberg/configs/users.d/users.xml b/tests/integration/test_storage_iceberg/configs/users.d/users.xml new file mode 100644 index 00000000000..4b6ba057ecb --- /dev/null +++ b/tests/integration/test_storage_iceberg/configs/users.d/users.xml @@ -0,0 +1,9 @@ + + + + + default + 1 + + + diff --git a/tests/integration/test_storage_iceberg/test.py b/tests/integration/test_storage_iceberg/test.py index b3b2f160740..c22b8cda9b5 100644 --- a/tests/integration/test_storage_iceberg/test.py +++ b/tests/integration/test_storage_iceberg/test.py @@ -53,6 +53,7 @@ def started_cluster(): cluster.add_instance( "node1", main_configs=["configs/config.d/named_collections.xml"], + user_configs=["configs/users.d/users.xml"], with_minio=True, ) diff --git a/tests/integration/test_storage_kafka/configs/users.xml b/tests/integration/test_storage_kafka/configs/users.xml index 992464a0ac2..3168de649f8 100644 --- a/tests/integration/test_storage_kafka/configs/users.xml +++ b/tests/integration/test_storage_kafka/configs/users.xml @@ -6,4 +6,11 @@ 0 + + + + default + 1 + + diff --git a/tests/integration/test_storage_postgresql/test.py b/tests/integration/test_storage_postgresql/test.py index 0c8fc597b5c..49bec6cbe5e 100644 --- a/tests/integration/test_storage_postgresql/test.py +++ b/tests/integration/test_storage_postgresql/test.py @@ -15,7 +15,7 @@ node1 = cluster.add_instance( node2 = cluster.add_instance( "node2", main_configs=["configs/named_collections.xml"], - user_configs=["configs/settings.xml"], + user_configs=["configs/settings.xml", "configs/users.xml"], with_postgres_cluster=True, ) diff --git a/tests/integration/test_storage_rabbitmq/configs/users.xml b/tests/integration/test_storage_rabbitmq/configs/users.xml index 2cef0a6de3c..e42fefa905b 100644 --- a/tests/integration/test_storage_rabbitmq/configs/users.xml +++ b/tests/integration/test_storage_rabbitmq/configs/users.xml @@ -4,4 +4,11 @@ 1 + + + + default + 1 + + From 3b4dba3d681cb2ef75e31740e801d8813ce4586f Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 15 Jun 2023 12:33:24 +0200 Subject: [PATCH 045/230] Fix tests --- .../test_mask_sensitive_info/configs/users.xml | 9 +++++++++ tests/integration/test_mask_sensitive_info/test.py | 1 + tests/integration/test_redirect_url_storage/test.py | 2 +- tests/integration/test_s3_cluster/test.py | 2 ++ .../test_storage_azure_blob_storage/configs/users.xml | 9 +++++++++ .../integration/test_storage_azure_blob_storage/test.py | 2 +- tests/integration/test_storage_dict/configs/users.xml | 9 +++++++++ .../test_storage_hudi/configs/users.d/users.xml | 9 +++++++++ .../test_storage_meilisearch/configs/users.xml | 9 +++++++++ tests/integration/test_storage_meilisearch/test.py | 2 +- tests/integration/test_storage_url/configs/users.xml | 9 +++++++++ tests/integration/test_storage_url/test.py | 1 + 12 files changed, 61 insertions(+), 3 deletions(-) create mode 100644 tests/integration/test_mask_sensitive_info/configs/users.xml create mode 100644 tests/integration/test_storage_azure_blob_storage/configs/users.xml create mode 100644 tests/integration/test_storage_dict/configs/users.xml create mode 100644 tests/integration/test_storage_hudi/configs/users.d/users.xml create mode 100644 tests/integration/test_storage_meilisearch/configs/users.xml create mode 100644 tests/integration/test_storage_url/configs/users.xml diff --git a/tests/integration/test_mask_sensitive_info/configs/users.xml b/tests/integration/test_mask_sensitive_info/configs/users.xml new file mode 100644 index 00000000000..4b6ba057ecb --- /dev/null +++ b/tests/integration/test_mask_sensitive_info/configs/users.xml @@ -0,0 +1,9 @@ + + + + + default + 1 + + + diff --git a/tests/integration/test_mask_sensitive_info/test.py b/tests/integration/test_mask_sensitive_info/test.py index 2131a76b5be..004491af4ac 100644 --- a/tests/integration/test_mask_sensitive_info/test.py +++ b/tests/integration/test_mask_sensitive_info/test.py @@ -9,6 +9,7 @@ node = cluster.add_instance( main_configs=[ "configs/named_collections.xml", ], + user_configs=["configs/users.xml"], with_zookeeper=True, ) diff --git a/tests/integration/test_redirect_url_storage/test.py b/tests/integration/test_redirect_url_storage/test.py index 225a34c9109..17a9a03008e 100644 --- a/tests/integration/test_redirect_url_storage/test.py +++ b/tests/integration/test_redirect_url_storage/test.py @@ -9,7 +9,7 @@ cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance( "node1", main_configs=["configs/named_collections.xml"], - user_configs=["configs/user.xml"], + user_configs=["configs/users.xml"], with_zookeeper=False, with_hdfs=True, ) diff --git a/tests/integration/test_s3_cluster/test.py b/tests/integration/test_s3_cluster/test.py index 3b8fd80060f..673ca318c92 100644 --- a/tests/integration/test_s3_cluster/test.py +++ b/tests/integration/test_s3_cluster/test.py @@ -76,12 +76,14 @@ def started_cluster(): cluster.add_instance( "s0_0_1", main_configs=["configs/cluster.xml", "configs/named_collections.xml"], + user_configs=["configs/users.xml"], macros={"replica": "replica2", "shard": "shard1"}, with_zookeeper=True, ) cluster.add_instance( "s0_1_0", main_configs=["configs/cluster.xml", "configs/named_collections.xml"], + user_configs=["configs/users.xml"], macros={"replica": "replica1", "shard": "shard2"}, with_zookeeper=True, ) diff --git a/tests/integration/test_storage_azure_blob_storage/configs/users.xml b/tests/integration/test_storage_azure_blob_storage/configs/users.xml new file mode 100644 index 00000000000..4b6ba057ecb --- /dev/null +++ b/tests/integration/test_storage_azure_blob_storage/configs/users.xml @@ -0,0 +1,9 @@ + + + + + default + 1 + + + diff --git a/tests/integration/test_storage_azure_blob_storage/test.py b/tests/integration/test_storage_azure_blob_storage/test.py index f9d337b6d86..54b3de8cd9b 100644 --- a/tests/integration/test_storage_azure_blob_storage/test.py +++ b/tests/integration/test_storage_azure_blob_storage/test.py @@ -25,7 +25,7 @@ def cluster(): cluster.add_instance( "node", main_configs=["configs/named_collections.xml"], - user_configs=["configs/disable_profilers.xml"], + user_configs=["configs/disable_profilers.xml", "configs/users.xml"], with_azurite=True, ) cluster.start() diff --git a/tests/integration/test_storage_dict/configs/users.xml b/tests/integration/test_storage_dict/configs/users.xml new file mode 100644 index 00000000000..4b6ba057ecb --- /dev/null +++ b/tests/integration/test_storage_dict/configs/users.xml @@ -0,0 +1,9 @@ + + + + + default + 1 + + + diff --git a/tests/integration/test_storage_hudi/configs/users.d/users.xml b/tests/integration/test_storage_hudi/configs/users.d/users.xml new file mode 100644 index 00000000000..4b6ba057ecb --- /dev/null +++ b/tests/integration/test_storage_hudi/configs/users.d/users.xml @@ -0,0 +1,9 @@ + + + + + default + 1 + + + diff --git a/tests/integration/test_storage_meilisearch/configs/users.xml b/tests/integration/test_storage_meilisearch/configs/users.xml new file mode 100644 index 00000000000..4b6ba057ecb --- /dev/null +++ b/tests/integration/test_storage_meilisearch/configs/users.xml @@ -0,0 +1,9 @@ + + + + + default + 1 + + + diff --git a/tests/integration/test_storage_meilisearch/test.py b/tests/integration/test_storage_meilisearch/test.py index ddcd7154154..3724bb18d34 100644 --- a/tests/integration/test_storage_meilisearch/test.py +++ b/tests/integration/test_storage_meilisearch/test.py @@ -16,7 +16,7 @@ def started_cluster(request): try: cluster = ClickHouseCluster(__file__) node = cluster.add_instance( - "meili", main_configs=["configs/named_collection.xml"], with_meili=True + "meili", main_configs=["configs/named_collection.xml"], user_configs=["configs/users.xml"], with_meili=True ) cluster.start() yield cluster diff --git a/tests/integration/test_storage_url/configs/users.xml b/tests/integration/test_storage_url/configs/users.xml new file mode 100644 index 00000000000..4b6ba057ecb --- /dev/null +++ b/tests/integration/test_storage_url/configs/users.xml @@ -0,0 +1,9 @@ + + + + + default + 1 + + + diff --git a/tests/integration/test_storage_url/test.py b/tests/integration/test_storage_url/test.py index f360ec105ec..7f359078967 100644 --- a/tests/integration/test_storage_url/test.py +++ b/tests/integration/test_storage_url/test.py @@ -6,6 +6,7 @@ cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance( "node1", main_configs=["configs/conf.xml", "configs/named_collections.xml"], + user_configs=["configs/users.xml"], with_nginx=True, ) From 05811d3dd8dda58defa9e6a7360ee17fdcc5c085 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 15 Jun 2023 12:35:13 +0200 Subject: [PATCH 046/230] Rename --- src/Access/Common/AccessType.h | 2 +- src/Storages/NamedCollectionsHelpers.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Access/Common/AccessType.h b/src/Access/Common/AccessType.h index 709a519e712..0b66a1b9578 100644 --- a/src/Access/Common/AccessType.h +++ b/src/Access/Common/AccessType.h @@ -142,7 +142,7 @@ enum class AccessType M(ACCESS_MANAGEMENT, "", GROUP, ALL) \ M(SHOW_NAMED_COLLECTIONS, "SHOW NAMED COLLECTIONS", NAMED_COLLECTION, NAMED_COLLECTION_ADMIN) \ M(SHOW_NAMED_COLLECTIONS_SECRETS, "SHOW NAMED COLLECTIONS SECRETS", NAMED_COLLECTION, NAMED_COLLECTION_ADMIN) \ - M(USE_NAMED_COLLECTION, "NAMED COLLECTION USAGE, NAMED COLLECTION", NAMED_COLLECTION, NAMED_COLLECTION_ADMIN) \ + M(NAMED_COLLECTION, "NAMED COLLECTION USAGE, USE NAMED COLLECTION", NAMED_COLLECTION, NAMED_COLLECTION_ADMIN) \ M(NAMED_COLLECTION_ADMIN, "NAMED COLLECTION CONTROL", NAMED_COLLECTION, ALL) \ \ M(SYSTEM_SHUTDOWN, "SYSTEM KILL, SHUTDOWN", GLOBAL, SYSTEM) \ diff --git a/src/Storages/NamedCollectionsHelpers.cpp b/src/Storages/NamedCollectionsHelpers.cpp index 29d47e131a6..f301cca92a1 100644 --- a/src/Storages/NamedCollectionsHelpers.cpp +++ b/src/Storages/NamedCollectionsHelpers.cpp @@ -76,7 +76,7 @@ MutableNamedCollectionPtr tryGetNamedCollectionWithOverrides( if (!collection_name.has_value()) return nullptr; - context->checkAccess(AccessType::USE_NAMED_COLLECTION, *collection_name); + context->checkAccess(AccessType::NAMED_COLLECTION, *collection_name); NamedCollectionPtr collection; if (throw_unknown_collection) @@ -121,7 +121,7 @@ MutableNamedCollectionPtr tryGetNamedCollectionWithOverrides( if (collection_name.empty()) return nullptr; - context->checkAccess(AccessType::USE_NAMED_COLLECTION, collection_name); + context->checkAccess(AccessType::NAMED_COLLECTION, collection_name); const auto & collection = NamedCollectionFactory::instance().get(collection_name); auto collection_copy = collection->duplicate(); From 5cceae1e1fa97126a7b1223927354d9b535e184b Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 15 Jun 2023 14:25:10 +0200 Subject: [PATCH 047/230] Fix --- tests/integration/test_storage_meilisearch/test.py | 5 ++++- tests/queries/0_stateless/01271_show_privileges.reference | 2 +- .../0_stateless/02117_show_create_table_system.reference | 6 +++--- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/tests/integration/test_storage_meilisearch/test.py b/tests/integration/test_storage_meilisearch/test.py index 3724bb18d34..b6acee18981 100644 --- a/tests/integration/test_storage_meilisearch/test.py +++ b/tests/integration/test_storage_meilisearch/test.py @@ -16,7 +16,10 @@ def started_cluster(request): try: cluster = ClickHouseCluster(__file__) node = cluster.add_instance( - "meili", main_configs=["configs/named_collection.xml"], user_configs=["configs/users.xml"], with_meili=True + "meili", + main_configs=["configs/named_collection.xml"], + user_configs=["configs/users.xml"], + with_meili=True, ) cluster.start() yield cluster diff --git a/tests/queries/0_stateless/01271_show_privileges.reference b/tests/queries/0_stateless/01271_show_privileges.reference index 925e0921759..b1ce5ab71d5 100644 --- a/tests/queries/0_stateless/01271_show_privileges.reference +++ b/tests/queries/0_stateless/01271_show_privileges.reference @@ -94,7 +94,7 @@ SHOW ACCESS [] \N ACCESS MANAGEMENT ACCESS MANAGEMENT [] \N ALL SHOW NAMED COLLECTIONS ['SHOW NAMED COLLECTIONS'] NAMED_COLLECTION NAMED COLLECTION ADMIN SHOW NAMED COLLECTIONS SECRETS ['SHOW NAMED COLLECTIONS SECRETS'] NAMED_COLLECTION NAMED COLLECTION ADMIN -USE NAMED COLLECTION ['NAMED COLLECTION USAGE','NAMED COLLECTION'] NAMED_COLLECTION NAMED COLLECTION ADMIN +NAMED COLLECTION ['NAMED COLLECTION USAGE','USE NAMED COLLECTION'] NAMED_COLLECTION NAMED COLLECTION ADMIN NAMED COLLECTION ADMIN ['NAMED COLLECTION CONTROL'] NAMED_COLLECTION ALL SYSTEM SHUTDOWN ['SYSTEM KILL','SHUTDOWN'] GLOBAL SYSTEM SYSTEM DROP DNS CACHE ['SYSTEM DROP DNS','DROP DNS CACHE','DROP DNS'] GLOBAL SYSTEM DROP CACHE diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index 143fb24a637..72c1027e7b1 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -297,7 +297,7 @@ CREATE TABLE system.grants ( `user_name` Nullable(String), `role_name` Nullable(String), - `access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'USE NAMED COLLECTION' = 96, 'NAMED COLLECTION ADMIN' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD SYMBOLS' = 111, 'SYSTEM RELOAD DICTIONARY' = 112, 'SYSTEM RELOAD MODEL' = 113, 'SYSTEM RELOAD FUNCTION' = 114, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 115, 'SYSTEM RELOAD' = 116, 'SYSTEM RESTART DISK' = 117, 'SYSTEM MERGES' = 118, 'SYSTEM TTL MERGES' = 119, 'SYSTEM FETCHES' = 120, 'SYSTEM MOVES' = 121, 'SYSTEM DISTRIBUTED SENDS' = 122, 'SYSTEM REPLICATED SENDS' = 123, 'SYSTEM SENDS' = 124, 'SYSTEM REPLICATION QUEUES' = 125, 'SYSTEM DROP REPLICA' = 126, 'SYSTEM SYNC REPLICA' = 127, 'SYSTEM RESTART REPLICA' = 128, 'SYSTEM RESTORE REPLICA' = 129, 'SYSTEM WAIT LOADING PARTS' = 130, 'SYSTEM SYNC DATABASE REPLICA' = 131, 'SYSTEM SYNC TRANSACTION LOG' = 132, 'SYSTEM SYNC FILE CACHE' = 133, 'SYSTEM FLUSH DISTRIBUTED' = 134, 'SYSTEM FLUSH LOGS' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM' = 140, 'dictGet' = 141, 'displaySecretsInShowAndSelect' = 142, 'addressToLine' = 143, 'addressToLineWithInlines' = 144, 'addressToSymbol' = 145, 'demangle' = 146, 'INTROSPECTION' = 147, 'FILE' = 148, 'URL' = 149, 'REMOTE' = 150, 'MONGO' = 151, 'REDIS' = 152, 'MEILISEARCH' = 153, 'MYSQL' = 154, 'POSTGRES' = 155, 'SQLITE' = 156, 'ODBC' = 157, 'JDBC' = 158, 'HDFS' = 159, 'S3' = 160, 'HIVE' = 161, 'AZURE' = 162, 'SOURCES' = 163, 'CLUSTER' = 164, 'ALL' = 165, 'NONE' = 166), + `access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION' = 96, 'NAMED COLLECTION ADMIN' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD SYMBOLS' = 111, 'SYSTEM RELOAD DICTIONARY' = 112, 'SYSTEM RELOAD MODEL' = 113, 'SYSTEM RELOAD FUNCTION' = 114, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 115, 'SYSTEM RELOAD' = 116, 'SYSTEM RESTART DISK' = 117, 'SYSTEM MERGES' = 118, 'SYSTEM TTL MERGES' = 119, 'SYSTEM FETCHES' = 120, 'SYSTEM MOVES' = 121, 'SYSTEM DISTRIBUTED SENDS' = 122, 'SYSTEM REPLICATED SENDS' = 123, 'SYSTEM SENDS' = 124, 'SYSTEM REPLICATION QUEUES' = 125, 'SYSTEM DROP REPLICA' = 126, 'SYSTEM SYNC REPLICA' = 127, 'SYSTEM RESTART REPLICA' = 128, 'SYSTEM RESTORE REPLICA' = 129, 'SYSTEM WAIT LOADING PARTS' = 130, 'SYSTEM SYNC DATABASE REPLICA' = 131, 'SYSTEM SYNC TRANSACTION LOG' = 132, 'SYSTEM SYNC FILE CACHE' = 133, 'SYSTEM FLUSH DISTRIBUTED' = 134, 'SYSTEM FLUSH LOGS' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM' = 140, 'dictGet' = 141, 'displaySecretsInShowAndSelect' = 142, 'addressToLine' = 143, 'addressToLineWithInlines' = 144, 'addressToSymbol' = 145, 'demangle' = 146, 'INTROSPECTION' = 147, 'FILE' = 148, 'URL' = 149, 'REMOTE' = 150, 'MONGO' = 151, 'REDIS' = 152, 'MEILISEARCH' = 153, 'MYSQL' = 154, 'POSTGRES' = 155, 'SQLITE' = 156, 'ODBC' = 157, 'JDBC' = 158, 'HDFS' = 159, 'S3' = 160, 'HIVE' = 161, 'AZURE' = 162, 'SOURCES' = 163, 'CLUSTER' = 164, 'ALL' = 165, 'NONE' = 166), `database` Nullable(String), `table` Nullable(String), `column` Nullable(String), @@ -581,10 +581,10 @@ ENGINE = SystemPartsColumns COMMENT 'SYSTEM TABLE is built on the fly.' CREATE TABLE system.privileges ( - `privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'USE NAMED COLLECTION' = 96, 'NAMED COLLECTION ADMIN' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD SYMBOLS' = 111, 'SYSTEM RELOAD DICTIONARY' = 112, 'SYSTEM RELOAD MODEL' = 113, 'SYSTEM RELOAD FUNCTION' = 114, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 115, 'SYSTEM RELOAD' = 116, 'SYSTEM RESTART DISK' = 117, 'SYSTEM MERGES' = 118, 'SYSTEM TTL MERGES' = 119, 'SYSTEM FETCHES' = 120, 'SYSTEM MOVES' = 121, 'SYSTEM DISTRIBUTED SENDS' = 122, 'SYSTEM REPLICATED SENDS' = 123, 'SYSTEM SENDS' = 124, 'SYSTEM REPLICATION QUEUES' = 125, 'SYSTEM DROP REPLICA' = 126, 'SYSTEM SYNC REPLICA' = 127, 'SYSTEM RESTART REPLICA' = 128, 'SYSTEM RESTORE REPLICA' = 129, 'SYSTEM WAIT LOADING PARTS' = 130, 'SYSTEM SYNC DATABASE REPLICA' = 131, 'SYSTEM SYNC TRANSACTION LOG' = 132, 'SYSTEM SYNC FILE CACHE' = 133, 'SYSTEM FLUSH DISTRIBUTED' = 134, 'SYSTEM FLUSH LOGS' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM' = 140, 'dictGet' = 141, 'displaySecretsInShowAndSelect' = 142, 'addressToLine' = 143, 'addressToLineWithInlines' = 144, 'addressToSymbol' = 145, 'demangle' = 146, 'INTROSPECTION' = 147, 'FILE' = 148, 'URL' = 149, 'REMOTE' = 150, 'MONGO' = 151, 'REDIS' = 152, 'MEILISEARCH' = 153, 'MYSQL' = 154, 'POSTGRES' = 155, 'SQLITE' = 156, 'ODBC' = 157, 'JDBC' = 158, 'HDFS' = 159, 'S3' = 160, 'HIVE' = 161, 'AZURE' = 162, 'SOURCES' = 163, 'CLUSTER' = 164, 'ALL' = 165, 'NONE' = 166), + `privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION' = 96, 'NAMED COLLECTION ADMIN' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD SYMBOLS' = 111, 'SYSTEM RELOAD DICTIONARY' = 112, 'SYSTEM RELOAD MODEL' = 113, 'SYSTEM RELOAD FUNCTION' = 114, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 115, 'SYSTEM RELOAD' = 116, 'SYSTEM RESTART DISK' = 117, 'SYSTEM MERGES' = 118, 'SYSTEM TTL MERGES' = 119, 'SYSTEM FETCHES' = 120, 'SYSTEM MOVES' = 121, 'SYSTEM DISTRIBUTED SENDS' = 122, 'SYSTEM REPLICATED SENDS' = 123, 'SYSTEM SENDS' = 124, 'SYSTEM REPLICATION QUEUES' = 125, 'SYSTEM DROP REPLICA' = 126, 'SYSTEM SYNC REPLICA' = 127, 'SYSTEM RESTART REPLICA' = 128, 'SYSTEM RESTORE REPLICA' = 129, 'SYSTEM WAIT LOADING PARTS' = 130, 'SYSTEM SYNC DATABASE REPLICA' = 131, 'SYSTEM SYNC TRANSACTION LOG' = 132, 'SYSTEM SYNC FILE CACHE' = 133, 'SYSTEM FLUSH DISTRIBUTED' = 134, 'SYSTEM FLUSH LOGS' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM' = 140, 'dictGet' = 141, 'displaySecretsInShowAndSelect' = 142, 'addressToLine' = 143, 'addressToLineWithInlines' = 144, 'addressToSymbol' = 145, 'demangle' = 146, 'INTROSPECTION' = 147, 'FILE' = 148, 'URL' = 149, 'REMOTE' = 150, 'MONGO' = 151, 'REDIS' = 152, 'MEILISEARCH' = 153, 'MYSQL' = 154, 'POSTGRES' = 155, 'SQLITE' = 156, 'ODBC' = 157, 'JDBC' = 158, 'HDFS' = 159, 'S3' = 160, 'HIVE' = 161, 'AZURE' = 162, 'SOURCES' = 163, 'CLUSTER' = 164, 'ALL' = 165, 'NONE' = 166), `aliases` Array(String), `level` Nullable(Enum8('GLOBAL' = 0, 'DATABASE' = 1, 'TABLE' = 2, 'DICTIONARY' = 3, 'VIEW' = 4, 'COLUMN' = 5, 'NAMED_COLLECTION' = 6)), - `parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'USE NAMED COLLECTION' = 96, 'NAMED COLLECTION ADMIN' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD SYMBOLS' = 111, 'SYSTEM RELOAD DICTIONARY' = 112, 'SYSTEM RELOAD MODEL' = 113, 'SYSTEM RELOAD FUNCTION' = 114, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 115, 'SYSTEM RELOAD' = 116, 'SYSTEM RESTART DISK' = 117, 'SYSTEM MERGES' = 118, 'SYSTEM TTL MERGES' = 119, 'SYSTEM FETCHES' = 120, 'SYSTEM MOVES' = 121, 'SYSTEM DISTRIBUTED SENDS' = 122, 'SYSTEM REPLICATED SENDS' = 123, 'SYSTEM SENDS' = 124, 'SYSTEM REPLICATION QUEUES' = 125, 'SYSTEM DROP REPLICA' = 126, 'SYSTEM SYNC REPLICA' = 127, 'SYSTEM RESTART REPLICA' = 128, 'SYSTEM RESTORE REPLICA' = 129, 'SYSTEM WAIT LOADING PARTS' = 130, 'SYSTEM SYNC DATABASE REPLICA' = 131, 'SYSTEM SYNC TRANSACTION LOG' = 132, 'SYSTEM SYNC FILE CACHE' = 133, 'SYSTEM FLUSH DISTRIBUTED' = 134, 'SYSTEM FLUSH LOGS' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM' = 140, 'dictGet' = 141, 'displaySecretsInShowAndSelect' = 142, 'addressToLine' = 143, 'addressToLineWithInlines' = 144, 'addressToSymbol' = 145, 'demangle' = 146, 'INTROSPECTION' = 147, 'FILE' = 148, 'URL' = 149, 'REMOTE' = 150, 'MONGO' = 151, 'REDIS' = 152, 'MEILISEARCH' = 153, 'MYSQL' = 154, 'POSTGRES' = 155, 'SQLITE' = 156, 'ODBC' = 157, 'JDBC' = 158, 'HDFS' = 159, 'S3' = 160, 'HIVE' = 161, 'AZURE' = 162, 'SOURCES' = 163, 'CLUSTER' = 164, 'ALL' = 165, 'NONE' = 166)) + `parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION' = 96, 'NAMED COLLECTION ADMIN' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD SYMBOLS' = 111, 'SYSTEM RELOAD DICTIONARY' = 112, 'SYSTEM RELOAD MODEL' = 113, 'SYSTEM RELOAD FUNCTION' = 114, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 115, 'SYSTEM RELOAD' = 116, 'SYSTEM RESTART DISK' = 117, 'SYSTEM MERGES' = 118, 'SYSTEM TTL MERGES' = 119, 'SYSTEM FETCHES' = 120, 'SYSTEM MOVES' = 121, 'SYSTEM DISTRIBUTED SENDS' = 122, 'SYSTEM REPLICATED SENDS' = 123, 'SYSTEM SENDS' = 124, 'SYSTEM REPLICATION QUEUES' = 125, 'SYSTEM DROP REPLICA' = 126, 'SYSTEM SYNC REPLICA' = 127, 'SYSTEM RESTART REPLICA' = 128, 'SYSTEM RESTORE REPLICA' = 129, 'SYSTEM WAIT LOADING PARTS' = 130, 'SYSTEM SYNC DATABASE REPLICA' = 131, 'SYSTEM SYNC TRANSACTION LOG' = 132, 'SYSTEM SYNC FILE CACHE' = 133, 'SYSTEM FLUSH DISTRIBUTED' = 134, 'SYSTEM FLUSH LOGS' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM' = 140, 'dictGet' = 141, 'displaySecretsInShowAndSelect' = 142, 'addressToLine' = 143, 'addressToLineWithInlines' = 144, 'addressToSymbol' = 145, 'demangle' = 146, 'INTROSPECTION' = 147, 'FILE' = 148, 'URL' = 149, 'REMOTE' = 150, 'MONGO' = 151, 'REDIS' = 152, 'MEILISEARCH' = 153, 'MYSQL' = 154, 'POSTGRES' = 155, 'SQLITE' = 156, 'ODBC' = 157, 'JDBC' = 158, 'HDFS' = 159, 'S3' = 160, 'HIVE' = 161, 'AZURE' = 162, 'SOURCES' = 163, 'CLUSTER' = 164, 'ALL' = 165, 'NONE' = 166)) ) ENGINE = SystemPrivileges COMMENT 'SYSTEM TABLE is built on the fly.' From b546d8e665b86429ac44770db7d73dd32b0a7156 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Fri, 16 Jun 2023 15:30:56 +0200 Subject: [PATCH 048/230] review fixes + test --- src/Common/ProfileEvents.cpp | 2 ++ src/Disks/IO/ReadBufferFromRemoteFSGather.h | 2 +- src/IO/ReadBufferFromS3.cpp | 34 +++++++++++++------ src/IO/ReadBufferFromS3.h | 2 ++ src/IO/S3/PocoHTTPClient.cpp | 1 + ...ing_from_s3_with_connection_pool.reference | 1 + ...89_reading_from_s3_with_connection_pool.sh | 29 ++++++++++++++++ 7 files changed, 60 insertions(+), 11 deletions(-) create mode 100644 tests/queries/0_stateless/02789_reading_from_s3_with_connection_pool.reference create mode 100755 tests/queries/0_stateless/02789_reading_from_s3_with_connection_pool.sh diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index f66f7bc6465..c9030070bf2 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -368,6 +368,8 @@ The server successfully detected this situation and will download merged part fr M(ReadBufferFromS3InitMicroseconds, "Time spent initializing connection to S3.") \ M(ReadBufferFromS3Bytes, "Bytes read from S3.") \ M(ReadBufferFromS3RequestsErrors, "Number of exceptions while reading from S3.") \ + M(ReadBufferFromS3ResetSessions, "Number of HTTP sessions that were reset in ReadBufferFromS3.") \ + M(ReadBufferFromS3PreservedSessions, "Number of HTTP sessions that were preserved in ReadBufferFromS3.") \ \ M(WriteBufferFromS3Microseconds, "Time spent on writing to S3.") \ M(WriteBufferFromS3Bytes, "Bytes written to S3.") \ diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.h b/src/Disks/IO/ReadBufferFromRemoteFSGather.h index 9bf55ab69ce..6488d532829 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.h +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.h @@ -73,7 +73,7 @@ private: const std::shared_ptr cache_log; const String query_id; const bool use_external_buffer; - bool with_cache; + const bool with_cache; size_t read_until_position = 0; size_t file_offset_of_buffer_end = 0; diff --git a/src/IO/ReadBufferFromS3.cpp b/src/IO/ReadBufferFromS3.cpp index 364253ba746..0b320ed86ff 100644 --- a/src/IO/ReadBufferFromS3.cpp +++ b/src/IO/ReadBufferFromS3.cpp @@ -1,5 +1,7 @@ -#include "config.h" #include +#include +#include +#include "config.h" #if USE_AWS_S3 @@ -24,6 +26,8 @@ namespace ProfileEvents extern const Event ReadBufferFromS3InitMicroseconds; extern const Event ReadBufferFromS3Bytes; extern const Event ReadBufferFromS3RequestsErrors; + extern const Event ReadBufferFromS3ResetSessions; + extern const Event ReadBufferFromS3PreservedSessions; extern const Event ReadBufferSeekCancelConnection; extern const Event S3GetObject; extern const Event DiskS3GetObject; @@ -46,6 +50,19 @@ void resetSession(Aws::S3::Model::GetObjectResult & read_result) throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Session of unexpected type encountered"); } } + +void resetSessionIfNeeded(bool read_all_range_successfully, std::optional & read_result) +{ + if (!read_all_range_successfully && read_result) + { + /// When we abandon a session with an ongoing GetObject request and there is another one trying to delete the same object this delete + /// operation will hang until GetObject's session idle timeouts. So we have to call `reset()` on GetObject's session session immediately. + resetSession(*read_result); + ProfileEvents::increment(ProfileEvents::ReadBufferFromS3ResetSessions); + } + else + ProfileEvents::increment(ProfileEvents::ReadBufferFromS3PreservedSessions); +} } namespace DB @@ -91,10 +108,7 @@ bool ReadBufferFromS3::nextImpl() if (read_until_position) { if (read_until_position == offset) - { - read_all_range_successfully = true; return false; - } if (read_until_position < offset) throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to read beyond right offset ({} > {})", offset, read_until_position - 1); @@ -384,10 +398,7 @@ ReadBufferFromS3::~ReadBufferFromS3() { try { - if (!read_all_range_successfully && read_result) - /// When we abandon a session with an ongoing GetObject request and there is another one trying to delete the same object this delete - /// operation will hang until GetObject's session idle timeouts. So we have to call `reset()` on GetObject's session session immediately. - resetSession(*read_result); + resetSessionIfNeeded(readAllRangeSuccessfully(), read_result); } catch (...) { @@ -397,8 +408,7 @@ ReadBufferFromS3::~ReadBufferFromS3() std::unique_ptr ReadBufferFromS3::initialize() { - if (!read_all_range_successfully && read_result) - resetSession(*read_result); + resetSessionIfNeeded(readAllRangeSuccessfully(), read_result); read_all_range_successfully = false; /** @@ -463,6 +473,10 @@ Aws::S3::Model::GetObjectResult ReadBufferFromS3::sendRequest(size_t range_begin } } +bool ReadBufferFromS3::readAllRangeSuccessfully() const +{ + return read_until_position ? offset == read_until_position : read_all_range_successfully; +} } #endif diff --git a/src/IO/ReadBufferFromS3.h b/src/IO/ReadBufferFromS3.h index 11299aa2c2a..d58971bea5b 100644 --- a/src/IO/ReadBufferFromS3.h +++ b/src/IO/ReadBufferFromS3.h @@ -95,6 +95,8 @@ private: Aws::S3::Model::GetObjectResult sendRequest(size_t range_begin, std::optional range_end_incl) const; + bool readAllRangeSuccessfully() const; + ReadSettings read_settings; bool use_external_buffer; diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp index 754b1bfd5b8..d64ddf0ec38 100644 --- a/src/IO/S3/PocoHTTPClient.cpp +++ b/src/IO/S3/PocoHTTPClient.cpp @@ -261,6 +261,7 @@ void PocoHTTPClient::makeRequestInternal( Aws::Utils::RateLimits::RateLimiterInterface * readLimiter , Aws::Utils::RateLimits::RateLimiterInterface * writeLimiter) const { + /// Most sessions in pool are already connected and it is not possible to set proxy host/port to a connected session. const auto request_configuration = per_request_configuration(request); if (http_connection_pool_size && request_configuration.proxy_host.empty()) makeRequestInternalImpl(request, request_configuration, response, readLimiter, writeLimiter); diff --git a/tests/queries/0_stateless/02789_reading_from_s3_with_connection_pool.reference b/tests/queries/0_stateless/02789_reading_from_s3_with_connection_pool.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/02789_reading_from_s3_with_connection_pool.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02789_reading_from_s3_with_connection_pool.sh b/tests/queries/0_stateless/02789_reading_from_s3_with_connection_pool.sh new file mode 100755 index 00000000000..7a8b94a10a8 --- /dev/null +++ b/tests/queries/0_stateless/02789_reading_from_s3_with_connection_pool.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-random-settings + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} -nm --query " +DROP TABLE IF EXISTS test_s3; + +CREATE TABLE test_s3 (a UInt64, b UInt64) +ENGINE = MergeTree ORDER BY a +SETTINGS disk = 's3_disk', min_bytes_for_wide_part = 0; + +INSERT INTO test_s3 SELECT number, number FROM numbers_mt(1e7); +" +query="SELECT a, b FROM test_s3" +query_id=$(${CLICKHOUSE_CLIENT} --query "select queryID() from ($query) limit 1" 2>&1) +${CLICKHOUSE_CLIENT} --query "SYSTEM FLUSH LOGS" +${CLICKHOUSE_CLIENT} -nm --query " +WITH + ProfileEvents['ReadBufferFromS3ResetSessions'] AS reset, + ProfileEvents['ReadBufferFromS3PreservedSessions'] AS preserved +SELECT preserved > reset +FROM system.query_log +WHERE type = 'QueryFinish' + AND current_database = currentDatabase() + AND query_id='$query_id'; +" From 76faacd23ca6137feb52741a7217432ab961aea3 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Sun, 18 Jun 2023 14:59:10 +0200 Subject: [PATCH 049/230] try to fix some trash in Disks --- programs/disks/CommandCopy.cpp | 2 +- src/Disks/DiskEncrypted.cpp | 31 +------------------ src/Disks/DiskEncrypted.h | 2 -- src/Disks/DiskLocal.cpp | 19 +----------- src/Disks/DiskLocal.h | 2 -- src/Disks/IDisk.cpp | 18 ++++++----- src/Disks/IDisk.h | 3 -- .../ObjectStorages/DiskObjectStorage.cpp | 16 ++++++---- src/Disks/ObjectStorages/DiskObjectStorage.h | 6 +++- .../MergeTree/DataPartStorageOnDiskBase.cpp | 10 +++--- 10 files changed, 33 insertions(+), 76 deletions(-) diff --git a/programs/disks/CommandCopy.cpp b/programs/disks/CommandCopy.cpp index 1cfce7fc022..5228b582d25 100644 --- a/programs/disks/CommandCopy.cpp +++ b/programs/disks/CommandCopy.cpp @@ -59,7 +59,7 @@ public: String relative_path_from = validatePathAndGetAsRelative(path_from); String relative_path_to = validatePathAndGetAsRelative(path_to); - disk_from->copy(relative_path_from, disk_to, relative_path_to); + disk_from->copyDirectoryContent(relative_path_from, disk_to, relative_path_to); } }; } diff --git a/src/Disks/DiskEncrypted.cpp b/src/Disks/DiskEncrypted.cpp index 6b515b100c9..f24e06fdef0 100644 --- a/src/Disks/DiskEncrypted.cpp +++ b/src/Disks/DiskEncrypted.cpp @@ -310,32 +310,6 @@ ReservationPtr DiskEncrypted::reserve(UInt64 bytes) return std::make_unique(std::static_pointer_cast(shared_from_this()), std::move(reservation)); } -void DiskEncrypted::copy(const String & from_path, const std::shared_ptr & to_disk, const String & to_path) -{ - /// Check if we can copy the file without deciphering. - if (isSameDiskType(*this, *to_disk)) - { - /// Disk type is the same, check if the key is the same too. - if (auto * to_disk_enc = typeid_cast(to_disk.get())) - { - auto from_settings = current_settings.get(); - auto to_settings = to_disk_enc->current_settings.get(); - if (from_settings->all_keys == to_settings->all_keys) - { - /// Keys are the same so we can simply copy the encrypted file. - auto wrapped_from_path = wrappedPath(from_path); - auto to_delegate = to_disk_enc->delegate; - auto wrapped_to_path = to_disk_enc->wrappedPath(to_path); - delegate->copy(wrapped_from_path, to_delegate, wrapped_to_path); - return; - } - } - } - - /// Copy the file through buffers with deciphering. - copyThroughBuffers(from_path, to_disk, to_path); -} - void DiskEncrypted::copyDirectoryContent(const String & from_dir, const std::shared_ptr & to_disk, const String & to_dir) { @@ -359,11 +333,8 @@ void DiskEncrypted::copyDirectoryContent(const String & from_dir, const std::sha } } - if (!to_disk->exists(to_dir)) - to_disk->createDirectories(to_dir); - /// Copy the file through buffers with deciphering. - copyThroughBuffers(from_dir, to_disk, to_dir); + IDisk::copyDirectoryContent(from_dir, to_disk, to_dir); } std::unique_ptr DiskEncrypted::readFile( diff --git a/src/Disks/DiskEncrypted.h b/src/Disks/DiskEncrypted.h index 69d051a9537..e085409cedf 100644 --- a/src/Disks/DiskEncrypted.h +++ b/src/Disks/DiskEncrypted.h @@ -110,8 +110,6 @@ public: delegate->listFiles(wrapped_path, file_names); } - void copy(const String & from_path, const std::shared_ptr & to_disk, const String & to_path) override; - void copyDirectoryContent(const String & from_dir, const std::shared_ptr & to_disk, const String & to_dir) override; std::unique_ptr readFile( diff --git a/src/Disks/DiskLocal.cpp b/src/Disks/DiskLocal.cpp index c76ea289101..72c9ccafc8d 100644 --- a/src/Disks/DiskLocal.cpp +++ b/src/Disks/DiskLocal.cpp @@ -417,29 +417,12 @@ bool inline isSameDiskType(const IDisk & one, const IDisk & another) return typeid(one) == typeid(another); } -void DiskLocal::copy(const String & from_path, const std::shared_ptr & to_disk, const String & to_path) -{ - if (isSameDiskType(*this, *to_disk)) - { - fs::path to = fs::path(to_disk->getPath()) / to_path; - fs::path from = fs::path(disk_path) / from_path; - if (from_path.ends_with('/')) - from = from.parent_path(); - if (fs::is_directory(from)) - to /= from.filename(); - - fs::copy(from, to, fs::copy_options::recursive | fs::copy_options::overwrite_existing); /// Use more optimal way. - } - else - copyThroughBuffers(from_path, to_disk, to_path, /* copy_root_dir */ true); /// Base implementation. -} - void DiskLocal::copyDirectoryContent(const String & from_dir, const std::shared_ptr & to_disk, const String & to_dir) { if (isSameDiskType(*this, *to_disk)) fs::copy(from_dir, to_dir, fs::copy_options::recursive | fs::copy_options::overwrite_existing); /// Use more optimal way. else - copyThroughBuffers(from_dir, to_disk, to_dir, /* copy_root_dir */ false); /// Base implementation. + IDisk::copyDirectoryContent(from_dir, to_disk, to_dir); } SyncGuardPtr DiskLocal::getDirectorySyncGuard(const String & path) const diff --git a/src/Disks/DiskLocal.h b/src/Disks/DiskLocal.h index 3d340ae40b7..1eee58900bf 100644 --- a/src/Disks/DiskLocal.h +++ b/src/Disks/DiskLocal.h @@ -63,8 +63,6 @@ public: void replaceFile(const String & from_path, const String & to_path) override; - void copy(const String & from_path, const std::shared_ptr & to_disk, const String & to_path) override; - void copyDirectoryContent(const String & from_dir, const std::shared_ptr & to_disk, const String & to_dir) override; void listFiles(const String & path, std::vector & file_names) const override; diff --git a/src/Disks/IDisk.cpp b/src/Disks/IDisk.cpp index bca867fec76..46a2c5b30c6 100644 --- a/src/Disks/IDisk.cpp +++ b/src/Disks/IDisk.cpp @@ -85,9 +85,16 @@ void asyncCopy(IDisk & from_disk, String from_path, IDisk & to_disk, String to_p if (from_disk.isFile(from_path)) { auto result = exec.execute( - [&from_disk, from_path, &to_disk, to_path, &settings]() + [&from_disk, from_path, &to_disk, to_path, &settings, thread_group = CurrentThread::getGroup()]() { - setThreadName("DiskCopier"); + SCOPE_EXIT_SAFE( + if (thread_group) + CurrentThread::detachFromGroupIfNotDetached(); + ); + + if (thread_group) + CurrentThread::attachToGroup(thread_group); + from_disk.copyFile(from_path, to_disk, fs::path(to_path) / fileName(from_path), settings); }); @@ -126,18 +133,13 @@ void IDisk::copyThroughBuffers(const String & from_path, const std::shared_ptr & to_disk, const String & to_path) -{ - copyThroughBuffers(from_path, to_disk, to_path, true); -} - void IDisk::copyDirectoryContent(const String & from_dir, const std::shared_ptr & to_disk, const String & to_dir) { if (!to_disk->exists(to_dir)) to_disk->createDirectories(to_dir); - copyThroughBuffers(from_dir, to_disk, to_dir, false); + copyThroughBuffers(from_dir, to_disk, to_dir, /* copy_root_dir */ false); } void IDisk::truncateFile(const String &, size_t) diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h index 5d75f3b70e5..8a4a29c36fd 100644 --- a/src/Disks/IDisk.h +++ b/src/Disks/IDisk.h @@ -181,9 +181,6 @@ public: /// If a file with `to_path` path already exists, it will be replaced. virtual void replaceFile(const String & from_path, const String & to_path) = 0; - /// Recursively copy data containing at `from_path` to `to_path` located at `to_disk`. - virtual void copy(const String & from_path, const std::shared_ptr & to_disk, const String & to_path); - /// Recursively copy files from from_dir to to_dir. Create to_dir if not exists. virtual void copyDirectoryContent(const String & from_dir, const std::shared_ptr & to_disk, const String & to_dir); diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp index 005d115a277..3fae67e2e9d 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp @@ -235,19 +235,23 @@ void DiskObjectStorage::moveFile(const String & from_path, const String & to_pat transaction->commit(); } - -void DiskObjectStorage::copy(const String & from_path, const std::shared_ptr & to_disk, const String & to_path) +void DiskObjectStorage::copyFile( + const String & from_file_path, + IDisk & to_disk, + const String & to_file_path, + const WriteSettings & settings) { - /// It's the same object storage disk - if (this == to_disk.get()) + if (this == &to_disk) { + /// It may use s3-server-side copy auto transaction = createObjectStorageTransaction(); - transaction->copyFile(from_path, to_path); + transaction->copyFile(from_file_path, to_file_path); transaction->commit(); } else { - IDisk::copy(from_path, to_disk, to_path); + /// Copy through buffers + IDisk::copyFile(from_file_path, to_disk, to_file_path, settings); } } diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.h b/src/Disks/ObjectStorages/DiskObjectStorage.h index b7dfaf67cf2..b6e4252749a 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.h +++ b/src/Disks/ObjectStorages/DiskObjectStorage.h @@ -152,7 +152,11 @@ public: Strings getBlobPath(const String & path) const override; void writeFileUsingBlobWritingFunction(const String & path, WriteMode mode, WriteBlobFunction && write_blob_function) override; - void copy(const String & from_path, const std::shared_ptr & to_disk, const String & to_path) override; + void copyFile( /// NOLINT + const String & from_file_path, + IDisk & to_disk, + const String & to_file_path, + const WriteSettings & settings = {}) override; void applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context_, const String &, const DisksMap &) override; diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp index 30776a8bc50..545cb062fb7 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp +++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp @@ -456,18 +456,18 @@ MutableDataPartStoragePtr DataPartStorageOnDiskBase::clonePart( const std::string & to, const std::string & dir_path, const DiskPtr & disk, - Poco::Logger * log) const + Poco::Logger *) const { String path_to_clone = fs::path(to) / dir_path / ""; if (disk->exists(path_to_clone)) { - LOG_WARNING(log, "Path {} already exists. Will remove it and clone again.", fullPath(disk, path_to_clone)); - disk->removeRecursive(path_to_clone); + throw Exception(ErrorCodes::DIRECTORY_ALREADY_EXISTS, + "Cannot clone part {} from '{}' to '{}': path '{}' already exists", + dir_path, getRelativePath(), path_to_clone, fullPath(disk, path_to_clone)); } - disk->createDirectories(to); - volume->getDisk()->copy(getRelativePath(), disk, to); + volume->getDisk()->copyDirectoryContent(getRelativePath(), disk, path_to_clone); volume->getDisk()->removeFileIfExists(fs::path(path_to_clone) / "delete-on-destroy.txt"); auto single_disk_volume = std::make_shared(disk->getName(), disk, 0); From a2833b206032a613b23ed503ebd983efc8d1dc53 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 19 Jun 2023 19:41:17 +0200 Subject: [PATCH 050/230] remove AsyncTrashPoolExecutor --- src/Common/CurrentMetrics.cpp | 2 + src/Disks/DiskEncrypted.cpp | 16 ++-- src/Disks/DiskEncrypted.h | 5 +- src/Disks/DiskLocal.cpp | 17 ++-- src/Disks/DiskLocal.h | 6 +- src/Disks/DiskSelector.cpp | 2 +- src/Disks/Executor.h | 42 ---------- src/Disks/IDisk.cpp | 25 +++--- src/Disks/IDisk.h | 19 +++-- .../registerDiskAzureBlobStorage.cpp | 7 +- .../ObjectStorages/DiskObjectStorage.cpp | 78 +++---------------- src/Disks/ObjectStorages/DiskObjectStorage.h | 7 +- .../DiskObjectStorageCommon.cpp | 2 +- ...jectStorageRemoteMetadataRestoreHelper.cpp | 53 ++++++------- ...ObjectStorageRemoteMetadataRestoreHelper.h | 2 +- .../ObjectStorages/HDFS/registerDiskHDFS.cpp | 5 +- .../Local/registerLocalObjectStorage.cpp | 2 +- .../ObjectStorages/S3/registerDiskS3.cpp | 7 +- .../Web/registerDiskWebServer.cpp | 4 +- src/Disks/loadLocalDiskConfig.cpp | 2 +- src/Interpreters/Context.cpp | 8 +- src/Storages/HDFS/StorageHDFS.cpp | 4 +- .../MergeTree/DataPartStorageOnDiskBase.cpp | 1 + 23 files changed, 112 insertions(+), 204 deletions(-) delete mode 100644 src/Disks/Executor.h diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp index f2ddb7a84c0..c54541d6785 100644 --- a/src/Common/CurrentMetrics.cpp +++ b/src/Common/CurrentMetrics.cpp @@ -141,6 +141,8 @@ M(MergeTreeOutdatedPartsLoaderThreadsActive, "Number of active threads in the threadpool for loading Outdated data parts.") \ M(MergeTreePartsCleanerThreads, "Number of threads in the MergeTree parts cleaner thread pool.") \ M(MergeTreePartsCleanerThreadsActive, "Number of threads in the MergeTree parts cleaner thread pool running a task.") \ + M(IDiskCopierThreads, "Number of threads for copying data between disks of different types.") \ + M(IDiskCopierThreadsActive, "Number of threads for copying data between disks of different types running a task.") \ M(SystemReplicasThreads, "Number of threads in the system.replicas thread pool.") \ M(SystemReplicasThreadsActive, "Number of threads in the system.replicas thread pool running a task.") \ M(RestartReplicaThreads, "Number of threads in the RESTART REPLICA thread pool.") \ diff --git a/src/Disks/DiskEncrypted.cpp b/src/Disks/DiskEncrypted.cpp index f24e06fdef0..bdc66ace7b3 100644 --- a/src/Disks/DiskEncrypted.cpp +++ b/src/Disks/DiskEncrypted.cpp @@ -285,19 +285,20 @@ private: }; DiskEncrypted::DiskEncrypted( - const String & name_, const Poco::Util::AbstractConfiguration & config_, const String & config_prefix_, const DisksMap & map_, bool use_fake_transaction_) - : DiskEncrypted(name_, parseDiskEncryptedSettings(name_, config_, config_prefix_, map_), use_fake_transaction_) + const String & name_, const Poco::Util::AbstractConfiguration & config_, const String & config_prefix_, const DisksMap & map_) + : DiskEncrypted(name_, parseDiskEncryptedSettings(name_, config_, config_prefix_, map_), config_, config_prefix_) { } -DiskEncrypted::DiskEncrypted(const String & name_, std::unique_ptr settings_, bool use_fake_transaction_) - : IDisk(name_) +DiskEncrypted::DiskEncrypted(const String & name_, std::unique_ptr settings_, + const Poco::Util::AbstractConfiguration & config_, const String & config_prefix_) + : IDisk(name_, config_, config_prefix_) , delegate(settings_->wrapped_disk) , encrypted_name(name_) , disk_path(settings_->disk_path) , disk_absolute_path(settings_->wrapped_disk->getPath() + settings_->disk_path) , current_settings(std::move(settings_)) - , use_fake_transaction(use_fake_transaction_) + , use_fake_transaction(config_.getBool(config_prefix_ + ".use_fake_transaction", true)) { delegate->createDirectories(disk_path); } @@ -414,7 +415,7 @@ std::unordered_map DiskEncrypted::getSerializedMetadata(const st void DiskEncrypted::applyNewSettings( const Poco::Util::AbstractConfiguration & config, - ContextPtr /*context*/, + ContextPtr context, const String & config_prefix, const DisksMap & disk_map) { @@ -426,6 +427,7 @@ void DiskEncrypted::applyNewSettings( throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Сhanging disk path on the fly is not supported. Disk {}", name); current_settings.set(std::move(new_settings)); + IDisk::applyNewSettings(config, context, config_prefix, disk_map); } void registerDiskEncrypted(DiskFactory & factory, bool global_skip_access_check) @@ -438,7 +440,7 @@ void registerDiskEncrypted(DiskFactory & factory, bool global_skip_access_check) const DisksMap & map) -> DiskPtr { bool skip_access_check = global_skip_access_check || config.getBool(config_prefix + ".skip_access_check", false); - DiskPtr disk = std::make_shared(name, config, config_prefix, map, config.getBool(config_prefix + ".use_fake_transaction", true)); + DiskPtr disk = std::make_shared(name, config, config_prefix, map); disk->startup(context, skip_access_check); return disk; }; diff --git a/src/Disks/DiskEncrypted.h b/src/Disks/DiskEncrypted.h index e085409cedf..ab5b7425f69 100644 --- a/src/Disks/DiskEncrypted.h +++ b/src/Disks/DiskEncrypted.h @@ -21,8 +21,9 @@ class WriteBufferFromFileBase; class DiskEncrypted : public IDisk { public: - DiskEncrypted(const String & name_, const Poco::Util::AbstractConfiguration & config_, const String & config_prefix_, const DisksMap & map_, bool use_fake_transaction_); - DiskEncrypted(const String & name_, std::unique_ptr settings_, bool use_fake_transaction_); + DiskEncrypted(const String & name_, const Poco::Util::AbstractConfiguration & config_, const String & config_prefix_, const DisksMap & map_); + DiskEncrypted(const String & name_, std::unique_ptr settings_, + const Poco::Util::AbstractConfiguration & config_, const String & config_prefix_); const String & getName() const override { return encrypted_name; } const String & getPath() const override { return disk_absolute_path; } diff --git a/src/Disks/DiskLocal.cpp b/src/Disks/DiskLocal.cpp index 72c9ccafc8d..504e35abac7 100644 --- a/src/Disks/DiskLocal.cpp +++ b/src/Disks/DiskLocal.cpp @@ -431,7 +431,7 @@ SyncGuardPtr DiskLocal::getDirectorySyncGuard(const String & path) const } -void DiskLocal::applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context, const String & config_prefix, const DisksMap &) +void DiskLocal::applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context, const String & config_prefix, const DisksMap & disk_map) { String new_disk_path; UInt64 new_keep_free_space_bytes; @@ -443,10 +443,13 @@ void DiskLocal::applyNewSettings(const Poco::Util::AbstractConfiguration & confi if (keep_free_space_bytes != new_keep_free_space_bytes) keep_free_space_bytes = new_keep_free_space_bytes; + + IDisk::applyNewSettings(config, context, config_prefix, disk_map); } -DiskLocal::DiskLocal(const String & name_, const String & path_, UInt64 keep_free_space_bytes_) - : IDisk(name_) +DiskLocal::DiskLocal(const String & name_, const String & path_, UInt64 keep_free_space_bytes_, + const Poco::Util::AbstractConfiguration & config, const String & config_prefix) + : IDisk(name_, config, config_prefix) , disk_path(path_) , keep_free_space_bytes(keep_free_space_bytes_) , logger(&Poco::Logger::get("DiskLocal")) @@ -455,9 +458,11 @@ DiskLocal::DiskLocal(const String & name_, const String & path_, UInt64 keep_fre } DiskLocal::DiskLocal( - const String & name_, const String & path_, UInt64 keep_free_space_bytes_, ContextPtr context, UInt64 local_disk_check_period_ms) - : DiskLocal(name_, path_, keep_free_space_bytes_) + const String & name_, const String & path_, UInt64 keep_free_space_bytes_, ContextPtr context, + const Poco::Util::AbstractConfiguration & config, const String & config_prefix) + : DiskLocal(name_, path_, keep_free_space_bytes_, config, config_prefix) { + auto local_disk_check_period_ms = config.getUInt("local_disk_check_period_ms", 0); if (local_disk_check_period_ms > 0) disk_checker = std::make_unique(this, context, local_disk_check_period_ms); } @@ -703,7 +708,7 @@ void registerDiskLocal(DiskFactory & factory, bool global_skip_access_check) bool skip_access_check = global_skip_access_check || config.getBool(config_prefix + ".skip_access_check", false); std::shared_ptr disk - = std::make_shared(name, path, keep_free_space_bytes, context, config.getUInt("local_disk_check_period_ms", 0)); + = std::make_shared(name, path, keep_free_space_bytes, context, config, config_prefix); disk->startup(context, skip_access_check); return disk; }; diff --git a/src/Disks/DiskLocal.h b/src/Disks/DiskLocal.h index 1eee58900bf..2306deeb619 100644 --- a/src/Disks/DiskLocal.h +++ b/src/Disks/DiskLocal.h @@ -19,13 +19,15 @@ public: friend class DiskLocalCheckThread; friend class DiskLocalReservation; - DiskLocal(const String & name_, const String & path_, UInt64 keep_free_space_bytes_); + DiskLocal(const String & name_, const String & path_, UInt64 keep_free_space_bytes_, + const Poco::Util::AbstractConfiguration & config, const String & config_prefix); DiskLocal( const String & name_, const String & path_, UInt64 keep_free_space_bytes_, ContextPtr context, - UInt64 local_disk_check_period_ms); + const Poco::Util::AbstractConfiguration & config, + const String & config_prefix); const String & getPath() const override { return disk_path; } diff --git a/src/Disks/DiskSelector.cpp b/src/Disks/DiskSelector.cpp index 9894e4251a2..e51f79867b5 100644 --- a/src/Disks/DiskSelector.cpp +++ b/src/Disks/DiskSelector.cpp @@ -53,7 +53,7 @@ void DiskSelector::initialize(const Poco::Util::AbstractConfiguration & config, disks.emplace( default_disk_name, std::make_shared( - default_disk_name, context->getPath(), 0, context, config.getUInt("local_disk_check_period_ms", 0))); + default_disk_name, context->getPath(), 0, context, config, config_prefix)); } is_initialized = true; diff --git a/src/Disks/Executor.h b/src/Disks/Executor.h deleted file mode 100644 index 7330bcdd559..00000000000 --- a/src/Disks/Executor.h +++ /dev/null @@ -1,42 +0,0 @@ -#pragma once - -#include -#include - -namespace DB -{ - -/// Interface to run task asynchronously with possibility to wait for execution. -class Executor -{ -public: - virtual ~Executor() = default; - virtual std::future execute(std::function task) = 0; -}; - -/// Executes task synchronously in case when disk doesn't support async operations. -class SyncExecutor : public Executor -{ -public: - SyncExecutor() = default; - std::future execute(std::function task) override - { - auto promise = std::make_shared>(); - try - { - task(); - promise->set_value(); - } - catch (...) - { - try - { - promise->set_exception(std::current_exception()); - } - catch (...) { } - } - return promise->get_future(); - } -}; - -} diff --git a/src/Disks/IDisk.cpp b/src/Disks/IDisk.cpp index 46a2c5b30c6..de61218d5a6 100644 --- a/src/Disks/IDisk.cpp +++ b/src/Disks/IDisk.cpp @@ -1,5 +1,4 @@ #include "IDisk.h" -#include "Disks/Executor.h" #include #include #include @@ -80,12 +79,15 @@ UInt128 IDisk::getEncryptedFileIV(const String &) const using ResultsCollector = std::vector>; -void asyncCopy(IDisk & from_disk, String from_path, IDisk & to_disk, String to_path, Executor & exec, ResultsCollector & results, bool copy_root_dir, const WriteSettings & settings) +void asyncCopy(IDisk & from_disk, String from_path, IDisk & to_disk, String to_path, ThreadPool & pool, ResultsCollector & results, bool copy_root_dir, const WriteSettings & settings) { if (from_disk.isFile(from_path)) { - auto result = exec.execute( - [&from_disk, from_path, &to_disk, to_path, &settings, thread_group = CurrentThread::getGroup()]() + auto promise = std::make_shared>(); + auto future = promise->get_future(); + + pool.scheduleOrThrowOnError( + [&from_disk, from_path, &to_disk, to_path, &settings, promise, thread_group = CurrentThread::getGroup()]() { SCOPE_EXIT_SAFE( if (thread_group) @@ -96,9 +98,10 @@ void asyncCopy(IDisk & from_disk, String from_path, IDisk & to_disk, String to_p CurrentThread::attachToGroup(thread_group); from_disk.copyFile(from_path, to_disk, fs::path(to_path) / fileName(from_path), settings); + promise->set_value(); }); - results.push_back(std::move(result)); + results.push_back(std::move(future)); } else { @@ -111,13 +114,12 @@ void asyncCopy(IDisk & from_disk, String from_path, IDisk & to_disk, String to_p } for (auto it = from_disk.iterateDirectory(from_path); it->isValid(); it->next()) - asyncCopy(from_disk, it->path(), to_disk, dest, exec, results, true, settings); + asyncCopy(from_disk, it->path(), to_disk, dest, pool, results, true, settings); } } void IDisk::copyThroughBuffers(const String & from_path, const std::shared_ptr & to_disk, const String & to_path, bool copy_root_dir) { - auto & exec = to_disk->getExecutor(); ResultsCollector results; WriteSettings settings; @@ -125,10 +127,8 @@ void IDisk::copyThroughBuffers(const String & from_path, const std::shared_ptr #include #include -#include +#include #include #include #include @@ -35,6 +35,12 @@ namespace Poco } } +namespace CurrentMetrics +{ + extern const Metric IDiskCopierThreads; + extern const Metric IDiskCopierThreadsActive; +} + namespace DB { @@ -110,9 +116,9 @@ class IDisk : public Space { public: /// Default constructor. - explicit IDisk(const String & name_, std::shared_ptr executor_ = std::make_shared()) + explicit IDisk(const String & name_, const Poco::Util::AbstractConfiguration & config, const String & config_prefix) : name(name_) - , executor(executor_) + , copying_thread_pool(CurrentMetrics::IDiskCopierThreads, CurrentMetrics::IDiskCopierThreadsActive, config.getUInt(config_prefix + ".thread_pool_size", 16)) { } @@ -376,7 +382,7 @@ public: virtual SyncGuardPtr getDirectorySyncGuard(const String & path) const; /// Applies new settings for disk in runtime. - virtual void applyNewSettings(const Poco::Util::AbstractConfiguration &, ContextPtr, const String &, const DisksMap &) {} + virtual void applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context, const String & config_prefix, const DisksMap & map); /// Quite leaky abstraction. Some disks can use additional disk to store /// some parts of metadata. In general case we have only one disk itself and @@ -456,9 +462,6 @@ protected: const String name; - /// Returns executor to perform asynchronous operations. - virtual Executor & getExecutor() { return *executor; } - /// Base implementation of the function copy(). /// It just opens two files, reads data by portions from the first file, and writes it to the second one. /// A derived class may override copy() to provide a faster implementation. @@ -467,7 +470,7 @@ protected: virtual void checkAccessImpl(const String & path); private: - std::shared_ptr executor; + ThreadPool copying_thread_pool; bool is_custom_disk = false; /// Check access to the disk. diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/registerDiskAzureBlobStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/registerDiskAzureBlobStorage.cpp index 562b2b2fec0..a09befe84a8 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/registerDiskAzureBlobStorage.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/registerDiskAzureBlobStorage.cpp @@ -31,9 +31,6 @@ void registerDiskAzureBlobStorage(DiskFactory & factory, bool global_skip_access getAzureBlobContainerClient(config, config_prefix), getAzureBlobStorageSettings(config, config_prefix, context)); - uint64_t copy_thread_pool_size = config.getUInt(config_prefix + ".thread_pool_size", 16); - bool send_metadata = config.getBool(config_prefix + ".send_metadata", false); - auto metadata_storage = std::make_shared(metadata_disk, ""); std::shared_ptr azure_blob_storage_disk = std::make_shared( @@ -42,8 +39,8 @@ void registerDiskAzureBlobStorage(DiskFactory & factory, bool global_skip_access "DiskAzureBlobStorage", std::move(metadata_storage), std::move(azure_object_storage), - send_metadata, - copy_thread_pool_size + config, + config_prefix ); bool skip_access_check = global_skip_access_check || config.getBool(config_prefix + ".skip_access_check", false); diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp index 3fae67e2e9d..e6f48f45827 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp @@ -38,55 +38,6 @@ namespace ErrorCodes extern const int DIRECTORY_DOESNT_EXIST; } -namespace -{ - -/// Runs tasks asynchronously using thread pool. -class AsyncThreadPoolExecutor : public Executor -{ -public: - AsyncThreadPoolExecutor(const String & name_, int thread_pool_size) - : name(name_) - , pool(CurrentMetrics::DiskObjectStorageAsyncThreads, CurrentMetrics::DiskObjectStorageAsyncThreadsActive, thread_pool_size) - {} - - std::future execute(std::function task) override - { - auto promise = std::make_shared>(); - pool.scheduleOrThrowOnError( - [promise, task]() - { - try - { - task(); - promise->set_value(); - } - catch (...) - { - tryLogCurrentException("Failed to run async task"); - - try - { - promise->set_exception(std::current_exception()); - } - catch (...) {} - } - }); - - return promise->get_future(); - } - - void setMaxThreads(size_t threads) - { - pool.setMaxThreads(threads); - } - -private: - String name; - ThreadPool pool; -}; - -} DiskTransactionPtr DiskObjectStorage::createTransaction() { @@ -106,27 +57,20 @@ DiskTransactionPtr DiskObjectStorage::createObjectStorageTransaction() send_metadata ? metadata_helper.get() : nullptr); } -std::shared_ptr DiskObjectStorage::getAsyncExecutor(const std::string & log_name, size_t size) -{ - static auto reader = std::make_shared(log_name, size); - return reader; -} - DiskObjectStorage::DiskObjectStorage( const String & name_, const String & object_storage_root_path_, const String & log_name, MetadataStoragePtr metadata_storage_, ObjectStoragePtr object_storage_, - bool send_metadata_, - uint64_t thread_pool_size_) - : IDisk(name_, getAsyncExecutor(log_name, thread_pool_size_)) + const Poco::Util::AbstractConfiguration & config, + const String & config_prefix) + : IDisk(name_, config, config_prefix) , object_storage_root_path(object_storage_root_path_) , log (&Poco::Logger::get("DiskObjectStorage(" + log_name + ")")) , metadata_storage(std::move(metadata_storage_)) , object_storage(std::move(object_storage_)) - , send_metadata(send_metadata_) - , threadpool_size(thread_pool_size_) + , send_metadata(config.getBool(config_prefix + ".send_metadata", false)) , metadata_helper(std::make_unique(this, ReadSettings{})) {} @@ -235,7 +179,7 @@ void DiskObjectStorage::moveFile(const String & from_path, const String & to_pat transaction->commit(); } -void DiskObjectStorage::copyFile( +void DiskObjectStorage::copyFile( /// NOLINT const String & from_file_path, IDisk & to_disk, const String & to_file_path, @@ -524,14 +468,15 @@ bool DiskObjectStorage::isWriteOnce() const DiskObjectStoragePtr DiskObjectStorage::createDiskObjectStorage() { + const auto config_prefix = "storage_configuration.disks." + name; return std::make_shared( getName(), object_storage_root_path, getName(), metadata_storage, object_storage, - send_metadata, - threadpool_size); + Context::getGlobalContextInstance()->getConfigRef(), + config_prefix); } void DiskObjectStorage::wrapWithCache(FileCachePtr cache, const FileCacheSettings & cache_settings, const String & layer_name) @@ -605,13 +550,10 @@ void DiskObjectStorage::writeFileUsingBlobWritingFunction(const String & path, W } void DiskObjectStorage::applyNewSettings( - const Poco::Util::AbstractConfiguration & config, ContextPtr context_, const String &, const DisksMap &) + const Poco::Util::AbstractConfiguration & config, ContextPtr context_, const String & config_prefix, const DisksMap & disk_map) { - const auto config_prefix = "storage_configuration.disks." + name; object_storage->applyNewSettings(config, config_prefix, context_); - - if (AsyncThreadPoolExecutor * exec = dynamic_cast(&getExecutor())) - exec->setMaxThreads(config.getInt(config_prefix + ".thread_pool_size", 16)); + IDisk::applyNewSettings(config, context_, config_prefix, disk_map); } void DiskObjectStorage::restoreMetadataIfNeeded( diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.h b/src/Disks/ObjectStorages/DiskObjectStorage.h index b6e4252749a..cd000ee705d 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.h +++ b/src/Disks/ObjectStorages/DiskObjectStorage.h @@ -33,8 +33,8 @@ public: const String & log_name, MetadataStoragePtr metadata_storage_, ObjectStoragePtr object_storage_, - bool send_metadata_, - uint64_t thread_pool_size_); + const Poco::Util::AbstractConfiguration & config, + const String & config_prefix); /// Create fake transaction DiskTransactionPtr createTransaction() override; @@ -200,8 +200,6 @@ public: /// Get names of all cache layers. Name is how cache is defined in configuration file. NameSet getCacheLayersNames() const override; - static std::shared_ptr getAsyncExecutor(const std::string & log_name, size_t size); - bool supportsStat() const override { return metadata_storage->supportsStat(); } struct stat stat(const String & path) const override; @@ -227,7 +225,6 @@ private: std::optional tryReserve(UInt64 bytes); const bool send_metadata; - size_t threadpool_size; std::unique_ptr metadata_helper; }; diff --git a/src/Disks/ObjectStorages/DiskObjectStorageCommon.cpp b/src/Disks/ObjectStorages/DiskObjectStorageCommon.cpp index 5ac6128c3c0..cc9e4b0b712 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageCommon.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorageCommon.cpp @@ -25,7 +25,7 @@ std::pair prepareForLocalMetadata( /// where the metadata files are stored locally auto metadata_path = getDiskMetadataPath(name, config, config_prefix, context); fs::create_directories(metadata_path); - auto metadata_disk = std::make_shared(name + "-metadata", metadata_path, 0); + auto metadata_disk = std::make_shared(name + "-metadata", metadata_path, 0, config, config_prefix); return std::make_pair(metadata_path, metadata_disk); } diff --git a/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp b/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp index 74d1698bf01..bbcdd40d85f 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp @@ -8,6 +8,14 @@ #include #include #include +#include + + +namespace CurrentMetrics +{ + extern const Metric LocalThread; + extern const Metric LocalThreadActive; +} namespace DB { @@ -101,7 +109,7 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::migrateFileToRestorableSchema updateObjectMetadata(object.remote_path, metadata); } } -void DiskObjectStorageRemoteMetadataRestoreHelper::migrateToRestorableSchemaRecursive(const String & path, Futures & results) +void DiskObjectStorageRemoteMetadataRestoreHelper::migrateToRestorableSchemaRecursive(const String & path, ThreadPool & pool) { checkStackSize(); /// This is needed to prevent stack overflow in case of cyclic symlinks. @@ -120,29 +128,26 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::migrateToRestorableSchemaRecu /// The whole directory can be migrated asynchronously. if (dir_contains_only_files) { - auto result = disk->getExecutor().execute([this, path] + pool.scheduleOrThrowOnError([this, path] { for (auto it = disk->iterateDirectory(path); it->isValid(); it->next()) migrateFileToRestorableSchema(it->path()); }); - - results.push_back(std::move(result)); } else { for (auto it = disk->iterateDirectory(path); it->isValid(); it->next()) - if (!disk->isDirectory(it->path())) + { + if (disk->isDirectory(it->path())) { - auto source_path = it->path(); - auto result = disk->getExecutor().execute([this, source_path] - { - migrateFileToRestorableSchema(source_path); - }); - - results.push_back(std::move(result)); + migrateToRestorableSchemaRecursive(it->path(), pool); } else - migrateToRestorableSchemaRecursive(it->path(), results); + { + auto source_path = it->path(); + pool.scheduleOrThrowOnError([this, source_path] { migrateFileToRestorableSchema(source_path); }); + } + } } } @@ -153,16 +158,13 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::migrateToRestorableSchema() { LOG_INFO(disk->log, "Start migration to restorable schema for disk {}", disk->name); - Futures results; + ThreadPool pool{CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive}; for (const auto & root : data_roots) if (disk->exists(root)) - migrateToRestorableSchemaRecursive(root + '/', results); + migrateToRestorableSchemaRecursive(root + '/', pool); - for (auto & result : results) - result.wait(); - for (auto & result : results) - result.get(); + pool.wait(); saveSchemaVersion(RESTORABLE_SCHEMA_VERSION); } @@ -355,8 +357,8 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::restoreFiles(IObjectStorage * { LOG_INFO(disk->log, "Starting restore files for disk {}", disk->name); - std::vector> results; - auto restore_files = [this, &source_object_storage, &restore_information, &results](const RelativePathsWithMetadata & objects) + ThreadPool pool{CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive}; + auto restore_files = [this, &source_object_storage, &restore_information, &pool](const RelativePathsWithMetadata & objects) { std::vector keys_names; for (const auto & object : objects) @@ -378,12 +380,10 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::restoreFiles(IObjectStorage * if (!keys_names.empty()) { - auto result = disk->getExecutor().execute([this, &source_object_storage, &restore_information, keys_names]() + pool.scheduleOrThrowOnError([this, &source_object_storage, &restore_information, keys_names]() { processRestoreFiles(source_object_storage, restore_information.source_path, keys_names); }); - - results.push_back(std::move(result)); } return true; @@ -394,10 +394,7 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::restoreFiles(IObjectStorage * restore_files(children); - for (auto & result : results) - result.wait(); - for (auto & result : results) - result.get(); + pool.wait(); LOG_INFO(disk->log, "Files are restored for disk {}", disk->name); diff --git a/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.h b/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.h index cb8d9b8a5af..e7de4afcaf3 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.h +++ b/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.h @@ -75,7 +75,7 @@ private: void saveSchemaVersion(const int & version) const; void updateObjectMetadata(const String & key, const ObjectAttributes & metadata) const; void migrateFileToRestorableSchema(const String & path) const; - void migrateToRestorableSchemaRecursive(const String & path, Futures & results); + void migrateToRestorableSchemaRecursive(const String & path, ThreadPool & pool); void readRestoreInformation(RestoreInformation & restore_information); void restoreFiles(IObjectStorage * source_object_storage, const RestoreInformation & restore_information); diff --git a/src/Disks/ObjectStorages/HDFS/registerDiskHDFS.cpp b/src/Disks/ObjectStorages/HDFS/registerDiskHDFS.cpp index 693b966caf2..e72e7028c4b 100644 --- a/src/Disks/ObjectStorages/HDFS/registerDiskHDFS.cpp +++ b/src/Disks/ObjectStorages/HDFS/registerDiskHDFS.cpp @@ -44,7 +44,6 @@ void registerDiskHDFS(DiskFactory & factory, bool global_skip_access_check) auto [_, metadata_disk] = prepareForLocalMetadata(name, config, config_prefix, context); auto metadata_storage = std::make_shared(metadata_disk, uri); - uint64_t copy_thread_pool_size = config.getUInt(config_prefix + ".thread_pool_size", 16); bool skip_access_check = global_skip_access_check || config.getBool(config_prefix + ".skip_access_check", false); DiskPtr disk = std::make_shared( @@ -53,8 +52,8 @@ void registerDiskHDFS(DiskFactory & factory, bool global_skip_access_check) "DiskHDFS", std::move(metadata_storage), std::move(hdfs_storage), - /* send_metadata = */ false, - copy_thread_pool_size); + config, + config_prefix); disk->startup(context, skip_access_check); return disk; diff --git a/src/Disks/ObjectStorages/Local/registerLocalObjectStorage.cpp b/src/Disks/ObjectStorages/Local/registerLocalObjectStorage.cpp index 251fc77d1f8..eb9039fed44 100644 --- a/src/Disks/ObjectStorages/Local/registerLocalObjectStorage.cpp +++ b/src/Disks/ObjectStorages/Local/registerLocalObjectStorage.cpp @@ -34,7 +34,7 @@ void registerDiskLocalObjectStorage(DiskFactory & factory, bool global_skip_acce metadata_storage = std::make_shared(metadata_disk, path); auto disk = std::make_shared( - name, path, "Local", metadata_storage, local_storage, false, /* threadpool_size */16); + name, path, "Local", metadata_storage, local_storage, config, config_prefix); disk->startup(context, global_skip_access_check); return disk; }; diff --git a/src/Disks/ObjectStorages/S3/registerDiskS3.cpp b/src/Disks/ObjectStorages/S3/registerDiskS3.cpp index f3a57069a30..fb125ae8517 100644 --- a/src/Disks/ObjectStorages/S3/registerDiskS3.cpp +++ b/src/Disks/ObjectStorages/S3/registerDiskS3.cpp @@ -150,17 +150,14 @@ void registerDiskS3(DiskFactory & factory, bool global_skip_access_check) } } - bool send_metadata = config.getBool(config_prefix + ".send_metadata", false); - uint64_t copy_thread_pool_size = config.getUInt(config_prefix + ".thread_pool_size", 16); - DiskObjectStoragePtr s3disk = std::make_shared( name, uri.key, type == "s3" ? "DiskS3" : "DiskS3Plain", std::move(metadata_storage), std::move(s3_storage), - send_metadata, - copy_thread_pool_size); + config, + config_prefix); s3disk->startup(context, skip_access_check); diff --git a/src/Disks/ObjectStorages/Web/registerDiskWebServer.cpp b/src/Disks/ObjectStorages/Web/registerDiskWebServer.cpp index 8a54de81815..bc6c17863ef 100644 --- a/src/Disks/ObjectStorages/Web/registerDiskWebServer.cpp +++ b/src/Disks/ObjectStorages/Web/registerDiskWebServer.cpp @@ -52,8 +52,8 @@ void registerDiskWebServer(DiskFactory & factory, bool global_skip_access_check) "DiskWebServer", metadata_storage, object_storage, - /* send_metadata */false, - /* threadpool_size */16); + config, + config_prefix); disk->startup(context, skip_access_check); return disk; }; diff --git a/src/Disks/loadLocalDiskConfig.cpp b/src/Disks/loadLocalDiskConfig.cpp index 0e5eca17ca7..0a9cdae1ae3 100644 --- a/src/Disks/loadLocalDiskConfig.cpp +++ b/src/Disks/loadLocalDiskConfig.cpp @@ -56,7 +56,7 @@ void loadDiskLocalConfig(const String & name, tmp_path = context->getPath(); // Create tmp disk for getting total disk space. - keep_free_space_bytes = static_cast(DiskLocal("tmp", tmp_path, 0).getTotalSpace() * ratio); + keep_free_space_bytes = static_cast(DiskLocal("tmp", tmp_path, 0, config, config_prefix).getTotalSpace() * ratio); } } diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 995e78d8f0b..a244b82b54a 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -875,9 +875,9 @@ catch (...) "It is ok to skip this exception as cleaning old temporary files is not necessary", path)); } -static VolumePtr createLocalSingleDiskVolume(const std::string & path) +static VolumePtr createLocalSingleDiskVolume(const std::string & path, const Poco::Util::AbstractConfiguration & config_) { - auto disk = std::make_shared("_tmp_default", path, 0); + auto disk = std::make_shared("_tmp_default", path, 0, config_, "storage_configuration.disks._tmp_default"); VolumePtr volume = std::make_shared("_tmp_default", disk, 0); return volume; } @@ -893,7 +893,7 @@ void Context::setTemporaryStoragePath(const String & path, size_t max_size) if (!shared->tmp_path.ends_with('/')) shared->tmp_path += '/'; - VolumePtr volume = createLocalSingleDiskVolume(shared->tmp_path); + VolumePtr volume = createLocalSingleDiskVolume(shared->tmp_path, getConfigRef()); for (const auto & disk : volume->getDisks()) { @@ -966,7 +966,7 @@ void Context::setTemporaryStorageInCache(const String & cache_disk_name, size_t LOG_DEBUG(shared->log, "Using file cache ({}) for temporary files", file_cache->getBasePath()); shared->tmp_path = file_cache->getBasePath(); - VolumePtr volume = createLocalSingleDiskVolume(shared->tmp_path); + VolumePtr volume = createLocalSingleDiskVolume(shared->tmp_path, getConfigRef()); shared->root_temp_data_on_disk = std::make_shared(volume, file_cache.get(), max_size); } diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index 3a96d68dc2e..ad92d7497c0 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -103,8 +103,8 @@ namespace { if (re2::RE2::FullMatch(file_name, matcher)) result.emplace_back( - String(ls.file_info[i].mName), - StorageHDFS::PathInfo{ls.file_info[i].mLastMod, static_cast(ls.file_info[i].mSize)}); + StorageHDFS::PathWithInfo{String(ls.file_info[i].mName), + StorageHDFS::PathInfo{ls.file_info[i].mLastMod, static_cast(ls.file_info[i].mSize)}}); } else if (is_directory && looking_for_directory) { diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp index 545cb062fb7..c397a634db6 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp +++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp @@ -467,6 +467,7 @@ MutableDataPartStoragePtr DataPartStorageOnDiskBase::clonePart( dir_path, getRelativePath(), path_to_clone, fullPath(disk, path_to_clone)); } + disk->createDirectories(to); volume->getDisk()->copyDirectoryContent(getRelativePath(), disk, path_to_clone); volume->getDisk()->removeFileIfExists(fs::path(path_to_clone) / "delete-on-destroy.txt"); From 4949dd05466fd6ef3dee6c74fd6006ba0c4d3707 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 19 Jun 2023 20:21:33 +0200 Subject: [PATCH 051/230] fix --- src/Common/CurrentMetrics.cpp | 4 ++-- src/Disks/IDisk.h | 1 - src/Disks/ObjectStorages/DiskObjectStorage.cpp | 6 ------ 3 files changed, 2 insertions(+), 9 deletions(-) diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp index c54541d6785..85e08b4ec8d 100644 --- a/src/Common/CurrentMetrics.cpp +++ b/src/Common/CurrentMetrics.cpp @@ -93,8 +93,8 @@ M(ThreadPoolFSReaderThreadsActive, "Number of threads in the thread pool for local_filesystem_read_method=threadpool running a task.") \ M(BackupsIOThreads, "Number of threads in the BackupsIO thread pool.") \ M(BackupsIOThreadsActive, "Number of threads in the BackupsIO thread pool running a task.") \ - M(DiskObjectStorageAsyncThreads, "Number of threads in the async thread pool for DiskObjectStorage.") \ - M(DiskObjectStorageAsyncThreadsActive, "Number of threads in the async thread pool for DiskObjectStorage running a task.") \ + M(DiskObjectStorageAsyncThreads, "Obsolete metric, shows nothing.") \ + M(DiskObjectStorageAsyncThreadsActive, "Obsolete metric, shows nothing.") \ M(StorageHiveThreads, "Number of threads in the StorageHive thread pool.") \ M(StorageHiveThreadsActive, "Number of threads in the StorageHive thread pool running a task.") \ M(TablesLoaderThreads, "Number of threads in the tables loader thread pool.") \ diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h index 42122ce4cb9..f935933bad7 100644 --- a/src/Disks/IDisk.h +++ b/src/Disks/IDisk.h @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp index e6f48f45827..e3922b6c505 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp @@ -19,12 +19,6 @@ #include #include -namespace CurrentMetrics -{ - extern const Metric DiskObjectStorageAsyncThreads; - extern const Metric DiskObjectStorageAsyncThreadsActive; -} - namespace DB { From 60a0843f58b311d6fccb827cf2f2d3261ef92ecf Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 20 Jun 2023 17:47:25 +0200 Subject: [PATCH 052/230] fix --- src/Backups/tests/gtest_backup_entries.cpp | 4 ++-- src/Disks/DiskEncrypted.cpp | 12 ++++++++++++ src/Disks/DiskEncrypted.h | 1 + src/Disks/DiskLocal.cpp | 11 ++++++++++- src/Disks/DiskLocal.h | 2 ++ src/Disks/IDisk.h | 8 +++++++- .../tests/gtest_cascade_and_memory_write_buffer.cpp | 2 +- src/Disks/tests/gtest_disk.cpp | 2 +- src/Disks/tests/gtest_disk_encrypted.cpp | 4 ++-- 9 files changed, 38 insertions(+), 8 deletions(-) diff --git a/src/Backups/tests/gtest_backup_entries.cpp b/src/Backups/tests/gtest_backup_entries.cpp index ca603d20787..75972b35ba4 100644 --- a/src/Backups/tests/gtest_backup_entries.cpp +++ b/src/Backups/tests/gtest_backup_entries.cpp @@ -24,7 +24,7 @@ protected: /// Make local disk. temp_dir = std::make_unique(); temp_dir->createDirectories(); - local_disk = std::make_shared("local_disk", temp_dir->path() + "/", 0); + local_disk = std::make_shared("local_disk", temp_dir->path() + "/"); /// Make encrypted disk. auto settings = std::make_unique(); @@ -38,7 +38,7 @@ protected: settings->current_key = key; settings->current_key_fingerprint = fingerprint; - encrypted_disk = std::make_shared("encrypted_disk", std::move(settings), true); + encrypted_disk = std::make_shared("encrypted_disk", std::move(settings)); } void TearDown() override diff --git a/src/Disks/DiskEncrypted.cpp b/src/Disks/DiskEncrypted.cpp index bdc66ace7b3..677dd73cc00 100644 --- a/src/Disks/DiskEncrypted.cpp +++ b/src/Disks/DiskEncrypted.cpp @@ -303,6 +303,18 @@ DiskEncrypted::DiskEncrypted(const String & name_, std::unique_ptrcreateDirectories(disk_path); } +DiskEncrypted::DiskEncrypted(const String & name_, std::unique_ptr settings_) + : IDisk(name_) + , delegate(settings_->wrapped_disk) + , encrypted_name(name_) + , disk_path(settings_->disk_path) + , disk_absolute_path(settings_->wrapped_disk->getPath() + settings_->disk_path) + , current_settings(std::move(settings_)) + , use_fake_transaction(true) +{ + delegate->createDirectories(disk_path); +} + ReservationPtr DiskEncrypted::reserve(UInt64 bytes) { auto reservation = delegate->reserve(bytes); diff --git a/src/Disks/DiskEncrypted.h b/src/Disks/DiskEncrypted.h index ab5b7425f69..9963770bd1c 100644 --- a/src/Disks/DiskEncrypted.h +++ b/src/Disks/DiskEncrypted.h @@ -24,6 +24,7 @@ public: DiskEncrypted(const String & name_, const Poco::Util::AbstractConfiguration & config_, const String & config_prefix_, const DisksMap & map_); DiskEncrypted(const String & name_, std::unique_ptr settings_, const Poco::Util::AbstractConfiguration & config_, const String & config_prefix_); + DiskEncrypted(const String & name_, std::unique_ptr settings_); const String & getName() const override { return encrypted_name; } const String & getPath() const override { return disk_absolute_path; } diff --git a/src/Disks/DiskLocal.cpp b/src/Disks/DiskLocal.cpp index 504e35abac7..9a61c176cf6 100644 --- a/src/Disks/DiskLocal.cpp +++ b/src/Disks/DiskLocal.cpp @@ -420,7 +420,7 @@ bool inline isSameDiskType(const IDisk & one, const IDisk & another) void DiskLocal::copyDirectoryContent(const String & from_dir, const std::shared_ptr & to_disk, const String & to_dir) { if (isSameDiskType(*this, *to_disk)) - fs::copy(from_dir, to_dir, fs::copy_options::recursive | fs::copy_options::overwrite_existing); /// Use more optimal way. + fs::copy(fs::path(disk_path) / from_dir, fs::path(to_disk->getPath()) / to_dir, fs::copy_options::recursive | fs::copy_options::overwrite_existing); /// Use more optimal way. else IDisk::copyDirectoryContent(from_dir, to_disk, to_dir); } @@ -467,6 +467,15 @@ DiskLocal::DiskLocal( disk_checker = std::make_unique(this, context, local_disk_check_period_ms); } +DiskLocal::DiskLocal(const String & name_, const String & path_) + : IDisk(name_) + , disk_path(path_) + , keep_free_space_bytes(0) + , logger(&Poco::Logger::get("DiskLocal")) + , data_source_description(getLocalDataSourceDescription(disk_path)) +{ +} + DataSourceDescription DiskLocal::getDataSourceDescription() const { return data_source_description; diff --git a/src/Disks/DiskLocal.h b/src/Disks/DiskLocal.h index 2306deeb619..b30732b67fd 100644 --- a/src/Disks/DiskLocal.h +++ b/src/Disks/DiskLocal.h @@ -29,6 +29,8 @@ public: const Poco::Util::AbstractConfiguration & config, const String & config_prefix); + DiskLocal(const String & name_, const String & path_); + const String & getPath() const override { return disk_path; } ReservationPtr reserve(UInt64 bytes) override; diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h index f935933bad7..ccef3db2dac 100644 --- a/src/Disks/IDisk.h +++ b/src/Disks/IDisk.h @@ -115,12 +115,18 @@ class IDisk : public Space { public: /// Default constructor. - explicit IDisk(const String & name_, const Poco::Util::AbstractConfiguration & config, const String & config_prefix) + IDisk(const String & name_, const Poco::Util::AbstractConfiguration & config, const String & config_prefix) : name(name_) , copying_thread_pool(CurrentMetrics::IDiskCopierThreads, CurrentMetrics::IDiskCopierThreadsActive, config.getUInt(config_prefix + ".thread_pool_size", 16)) { } + explicit IDisk(const String & name_) + : name(name_) + , copying_thread_pool(CurrentMetrics::IDiskCopierThreads, CurrentMetrics::IDiskCopierThreadsActive, 16) + { + } + /// This is a disk. bool isDisk() const override { return true; } diff --git a/src/Disks/tests/gtest_cascade_and_memory_write_buffer.cpp b/src/Disks/tests/gtest_cascade_and_memory_write_buffer.cpp index a24056a141f..482a38d8fc2 100644 --- a/src/Disks/tests/gtest_cascade_and_memory_write_buffer.cpp +++ b/src/Disks/tests/gtest_cascade_and_memory_write_buffer.cpp @@ -33,7 +33,7 @@ public: void SetUp() override { fs::create_directories(tmp_root); - disk = std::make_shared("local_disk", tmp_root, 0); + disk = std::make_shared("local_disk", tmp_root); } void TearDown() override diff --git a/src/Disks/tests/gtest_disk.cpp b/src/Disks/tests/gtest_disk.cpp index 1f33f536399..d57ca7bd81b 100644 --- a/src/Disks/tests/gtest_disk.cpp +++ b/src/Disks/tests/gtest_disk.cpp @@ -10,7 +10,7 @@ namespace fs = std::filesystem; DB::DiskPtr createDisk() { fs::create_directory("tmp/"); - return std::make_shared("local_disk", "tmp/", 0); + return std::make_shared("local_disk", "tmp/"); } void destroyDisk(DB::DiskPtr & disk) diff --git a/src/Disks/tests/gtest_disk_encrypted.cpp b/src/Disks/tests/gtest_disk_encrypted.cpp index ee9e284d409..b61b6140b0c 100644 --- a/src/Disks/tests/gtest_disk_encrypted.cpp +++ b/src/Disks/tests/gtest_disk_encrypted.cpp @@ -23,7 +23,7 @@ protected: /// Make local disk. temp_dir = std::make_unique(); temp_dir->createDirectories(); - local_disk = std::make_shared("local_disk", getDirectory(), 0); + local_disk = std::make_shared("local_disk", getDirectory()); } void TearDown() override @@ -42,7 +42,7 @@ protected: settings->current_key = key; settings->current_key_fingerprint = fingerprint; settings->disk_path = path; - encrypted_disk = std::make_shared("encrypted_disk", std::move(settings), true); + encrypted_disk = std::make_shared("encrypted_disk", std::move(settings)); } String getFileNames() From 876d5ae0a71dff7724bd665076a2d681a651829e Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Tue, 20 Jun 2023 20:27:43 +0200 Subject: [PATCH 053/230] fix ReadBufferFromS3 --- src/IO/ReadBufferFromS3.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/IO/ReadBufferFromS3.cpp b/src/IO/ReadBufferFromS3.cpp index 0b320ed86ff..fdbe1a4ba57 100644 --- a/src/IO/ReadBufferFromS3.cpp +++ b/src/IO/ReadBufferFromS3.cpp @@ -1,6 +1,4 @@ #include -#include -#include #include "config.h" #if USE_AWS_S3 From 51a5ef33e1285c13d5d4967635dbbf3ead5a908c Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 20 Jun 2023 23:20:37 +0200 Subject: [PATCH 054/230] fix --- src/Common/Exception.cpp | 12 ++++++++++++ src/Disks/IDisk.cpp | 24 +++++++++++++++--------- 2 files changed, 27 insertions(+), 9 deletions(-) diff --git a/src/Common/Exception.cpp b/src/Common/Exception.cpp index 20206b76225..3fd0a929d6f 100644 --- a/src/Common/Exception.cpp +++ b/src/Common/Exception.cpp @@ -400,6 +400,18 @@ PreformattedMessage getCurrentExceptionMessageAndPattern(bool with_stacktrace, b << " (version " << VERSION_STRING << VERSION_OFFICIAL << ")"; } catch (...) {} + +#ifdef ABORT_ON_LOGICAL_ERROR + try + { + throw; + } + catch (const std::logic_error &) + { + abortOnFailedAssertion(stream.str()); + } + catch (...) {} +#endif } catch (...) { diff --git a/src/Disks/IDisk.cpp b/src/Disks/IDisk.cpp index de61218d5a6..544ba014fde 100644 --- a/src/Disks/IDisk.cpp +++ b/src/Disks/IDisk.cpp @@ -89,16 +89,20 @@ void asyncCopy(IDisk & from_disk, String from_path, IDisk & to_disk, String to_p pool.scheduleOrThrowOnError( [&from_disk, from_path, &to_disk, to_path, &settings, promise, thread_group = CurrentThread::getGroup()]() { - SCOPE_EXIT_SAFE( + try + { + SCOPE_EXIT_SAFE(if (thread_group) CurrentThread::detachFromGroupIfNotDetached();); + if (thread_group) - CurrentThread::detachFromGroupIfNotDetached(); - ); + CurrentThread::attachToGroup(thread_group); - if (thread_group) - CurrentThread::attachToGroup(thread_group); - - from_disk.copyFile(from_path, to_disk, fs::path(to_path) / fileName(from_path), settings); - promise->set_value(); + from_disk.copyFile(from_path, to_disk, fs::path(to_path) / fileName(from_path), settings); + promise->set_value(); + } + catch (...) + { + promise->set_exception(std::current_exception()); + } }); results.push_back(std::move(future)); @@ -130,7 +134,9 @@ void IDisk::copyThroughBuffers(const String & from_path, const std::shared_ptr Date: Mon, 19 Jun 2023 23:35:35 +0000 Subject: [PATCH 055/230] Decrease default timeouts for S3 and HTTP requests --- docs/en/operations/settings/settings.md | 4 +- src/Backups/BackupIO_S3.cpp | 1 + src/Coordination/KeeperSnapshotManagerS3.cpp | 6 +-- src/Core/Defines.h | 2 +- src/Core/Settings.h | 1 + src/Core/SettingsChangesHistory.h | 2 + .../ObjectStorages/S3/S3ObjectStorage.cpp | 48 ++++++++++--------- src/Disks/ObjectStorages/S3/S3ObjectStorage.h | 16 +++++-- src/Disks/ObjectStorages/S3/diskSettings.cpp | 2 +- src/IO/S3/Client.cpp | 33 +++++++++---- src/IO/S3/Client.h | 22 +++++++-- src/IO/S3/tests/gtest_aws_s3_client.cpp | 1 + src/IO/WriteBufferFromS3.cpp | 4 +- src/IO/WriteBufferFromS3.h | 3 ++ src/IO/tests/gtest_writebuffer_s3.cpp | 1 + src/Storages/StorageS3.cpp | 5 +- src/Storages/StorageS3.h | 1 + src/Storages/StorageS3Settings.cpp | 5 +- src/Storages/StorageS3Settings.h | 3 +- 19 files changed, 109 insertions(+), 51 deletions(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index baefbb2cf6f..4916dfaaf7d 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -3501,7 +3501,7 @@ Possible values: - Any positive integer. - 0 - Disabled (infinite timeout). -Default value: 180. +Default value: 30. ## http_receive_timeout {#http_receive_timeout} @@ -3512,7 +3512,7 @@ Possible values: - Any positive integer. - 0 - Disabled (infinite timeout). -Default value: 180. +Default value: 30. ## check_query_single_value_result {#check_query_single_value_result} diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp index 967beba4bf5..9a2a457e13e 100644 --- a/src/Backups/BackupIO_S3.cpp +++ b/src/Backups/BackupIO_S3.cpp @@ -253,6 +253,7 @@ std::unique_ptr BackupWriterS3::writeFile(const String & file_name) { return std::make_unique( client, + client, // already has long timeout s3_uri.bucket, fs::path(s3_uri.key) / file_name, DBMS_DEFAULT_BUFFER_SIZE, diff --git a/src/Coordination/KeeperSnapshotManagerS3.cpp b/src/Coordination/KeeperSnapshotManagerS3.cpp index 1afe0b352c5..bf437f03ae3 100644 --- a/src/Coordination/KeeperSnapshotManagerS3.cpp +++ b/src/Coordination/KeeperSnapshotManagerS3.cpp @@ -144,14 +144,14 @@ void KeeperSnapshotManagerS3::uploadSnapshotImpl(const std::string & snapshot_pa const auto create_writer = [&](const auto & key) { - return WriteBufferFromS3 - { + return WriteBufferFromS3( + s3_client->client, s3_client->client, s3_client->uri.bucket, key, DBMS_DEFAULT_BUFFER_SIZE, request_settings_1 - }; + ); }; LOG_INFO(log, "Will try to upload snapshot on {} to S3", snapshot_path); diff --git a/src/Core/Defines.h b/src/Core/Defines.h index e9b84b71cae..efe14b93a3d 100644 --- a/src/Core/Defines.h +++ b/src/Core/Defines.h @@ -41,7 +41,7 @@ /// The boundary on which the blocks for asynchronous file operations should be aligned. #define DEFAULT_AIO_FILE_BLOCK_SIZE 4096 -#define DEFAULT_HTTP_READ_BUFFER_TIMEOUT 180 +#define DEFAULT_HTTP_READ_BUFFER_TIMEOUT 30 #define DEFAULT_HTTP_READ_BUFFER_CONNECTION_TIMEOUT 1 /// Maximum number of http-connections between two endpoints /// the number is unmotivated diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 204a27483df..5162e0f273e 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -102,6 +102,7 @@ class IColumn; M(Bool, s3_allow_parallel_part_upload, true, "Use multiple threads for s3 multipart upload. It may lead to slightly higher memory usage", 0) \ M(Bool, s3_throw_on_zero_files_match, false, "Throw an error, when ListObjects request cannot match any files", 0) \ M(UInt64, s3_retry_attempts, 10, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries", 0) \ + M(UInt64, s3_request_timeout_ms, 3000, "Idleness timeout for sending and receiving data to/from S3. Fail if a single TCP read or write call blocks for this long.", 0) \ M(Bool, enable_s3_requests_logging, false, "Enable very explicit logging of S3 requests. Makes sense for debug only.", 0) \ M(UInt64, hdfs_replication, 0, "The actual number of replications can be specified when the hdfs file is created.", 0) \ M(Bool, hdfs_truncate_on_insert, false, "Enables or disables truncate before insert in s3 engine tables", 0) \ diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 9fd45ac16d6..2886cdd288d 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -80,6 +80,8 @@ namespace SettingsChangesHistory /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972) static std::map settings_changes_history = { + {"23.6", {{"http_send_timeout", 180, 30, "3 minutes seems crazy long. Note that this is timeout for a single network write call, not for the whole upload operation."}, + {"http_receive_timeout", 180, 30, "See http_send_timeout."}}}, {"23.5", {{"input_format_parquet_preserve_order", true, false, "Allow Parquet reader to reorder rows for better parallelism."}, {"parallelize_output_from_storages", false, true, "Allow parallelism when executing queries that read from file/url/s3/etc. This may reorder rows."}, {"use_with_fill_by_sorting_prefix", false, true, "Columns preceding WITH FILL columns in ORDER BY clause form sorting prefix. Rows with different values in sorting prefix are filled independently"}, diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index e48924326e1..e46ca3d0828 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -149,7 +149,7 @@ private: bool S3ObjectStorage::exists(const StoredObject & object) const { auto settings_ptr = s3_settings.get(); - return S3::objectExists(*client.get(), bucket, object.remote_path, {}, settings_ptr->request_settings, /* for_disk_s3= */ true); + return S3::objectExists(*clients.get()->client, bucket, object.remote_path, {}, settings_ptr->request_settings, /* for_disk_s3= */ true); } std::unique_ptr S3ObjectStorage::readObjects( /// NOLINT @@ -168,7 +168,7 @@ std::unique_ptr S3ObjectStorage::readObjects( /// NOLINT (const std::string & path, size_t read_until_position) -> std::unique_ptr { return std::make_unique( - client.get(), + clients.get()->client, bucket, path, version_id, @@ -218,7 +218,7 @@ std::unique_ptr S3ObjectStorage::readObject( /// NOLINT { auto settings_ptr = s3_settings.get(); return std::make_unique( - client.get(), + clients.get()->client, bucket, object.remote_path, version_id, @@ -243,8 +243,10 @@ std::unique_ptr S3ObjectStorage::writeObject( /// NOLIN if (write_settings.s3_allow_parallel_part_upload) scheduler = threadPoolCallbackRunner(getThreadPoolWriter(), "VFSWrite"); + auto clients_ = clients.get(); return std::make_unique( - client.get(), + clients_->client, + clients_->client_with_long_timeout, bucket, object.remote_path, buf_size, @@ -258,7 +260,7 @@ std::unique_ptr S3ObjectStorage::writeObject( /// NOLIN ObjectStorageIteratorPtr S3ObjectStorage::iterate(const std::string & path_prefix) const { auto settings_ptr = s3_settings.get(); - auto client_ptr = client.get(); + auto client_ptr = clients.get()->client; return std::make_shared(bucket, path_prefix, client_ptr, settings_ptr->list_object_keys_size); } @@ -266,7 +268,7 @@ ObjectStorageIteratorPtr S3ObjectStorage::iterate(const std::string & path_prefi void S3ObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, int max_keys) const { auto settings_ptr = s3_settings.get(); - auto client_ptr = client.get(); + auto client_ptr = clients.get()->client; S3::ListObjectsV2Request request; request.SetBucket(bucket); @@ -307,7 +309,7 @@ void S3ObjectStorage::listObjects(const std::string & path, RelativePathsWithMet void S3ObjectStorage::removeObjectImpl(const StoredObject & object, bool if_exists) { - auto client_ptr = client.get(); + auto client_ptr = clients.get()->client; ProfileEvents::increment(ProfileEvents::S3DeleteObjects); ProfileEvents::increment(ProfileEvents::DiskS3DeleteObjects); @@ -333,7 +335,7 @@ void S3ObjectStorage::removeObjectsImpl(const StoredObjects & objects, bool if_e } else { - auto client_ptr = client.get(); + auto client_ptr = clients.get()->client; auto settings_ptr = s3_settings.get(); size_t chunk_size_limit = settings_ptr->objects_chunk_size_to_delete; @@ -394,7 +396,7 @@ void S3ObjectStorage::removeObjectsIfExist(const StoredObjects & objects) std::optional S3ObjectStorage::tryGetObjectMetadata(const std::string & path) const { auto settings_ptr = s3_settings.get(); - auto object_info = S3::getObjectInfo(*client.get(), bucket, path, {}, settings_ptr->request_settings, /* with_metadata= */ true, /* for_disk_s3= */ true, /* throw_on_error= */ false); + auto object_info = S3::getObjectInfo(*clients.get()->client, bucket, path, {}, settings_ptr->request_settings, /* with_metadata= */ true, /* for_disk_s3= */ true, /* throw_on_error= */ false); if (object_info.size == 0 && object_info.last_modification_time == 0 && object_info.metadata.empty()) return {}; @@ -410,7 +412,7 @@ std::optional S3ObjectStorage::tryGetObjectMetadata(const std::s ObjectMetadata S3ObjectStorage::getObjectMetadata(const std::string & path) const { auto settings_ptr = s3_settings.get(); - auto object_info = S3::getObjectInfo(*client.get(), bucket, path, {}, settings_ptr->request_settings, /* with_metadata= */ true, /* for_disk_s3= */ true); + auto object_info = S3::getObjectInfo(*clients.get()->client, bucket, path, {}, settings_ptr->request_settings, /* with_metadata= */ true, /* for_disk_s3= */ true); ObjectMetadata result; result.size_bytes = object_info.size; @@ -429,7 +431,7 @@ void S3ObjectStorage::copyObjectToAnotherObjectStorage( // NOLINT /// Shortcut for S3 if (auto * dest_s3 = dynamic_cast(&object_storage_to); dest_s3 != nullptr) { - auto client_ptr = client.get(); + auto client_ptr = clients.get()->client; auto settings_ptr = s3_settings.get(); auto size = S3::getObjectSize(*client_ptr, bucket, object_from.remote_path, {}, settings_ptr->request_settings, /* for_disk_s3= */ true); auto scheduler = threadPoolCallbackRunner(getThreadPoolWriter(), "S3ObjStor_copy"); @@ -445,7 +447,7 @@ void S3ObjectStorage::copyObjectToAnotherObjectStorage( // NOLINT void S3ObjectStorage::copyObject( // NOLINT const StoredObject & object_from, const StoredObject & object_to, std::optional object_to_attributes) { - auto client_ptr = client.get(); + auto client_ptr = clients.get()->client; auto settings_ptr = s3_settings.get(); auto size = S3::getObjectSize(*client_ptr, bucket, object_from.remote_path, {}, settings_ptr->request_settings, /* for_disk_s3= */ true); auto scheduler = threadPoolCallbackRunner(getThreadPoolWriter(), "S3ObjStor_copy"); @@ -458,35 +460,33 @@ void S3ObjectStorage::setNewSettings(std::unique_ptr && s3_settings.set(std::move(s3_settings_)); } -void S3ObjectStorage::setNewClient(std::unique_ptr && client_) -{ - client.set(std::move(client_)); -} - void S3ObjectStorage::shutdown() { - auto client_ptr = client.get(); + auto clients_ptr = clients.get(); /// This call stops any next retry attempts for ongoing S3 requests. /// If S3 request is failed and the method below is executed S3 client immediately returns the last failed S3 request outcome. /// If S3 is healthy nothing wrong will be happened and S3 requests will be processed in a regular way without errors. /// This should significantly speed up shutdown process if S3 is unhealthy. - const_cast(*client_ptr).DisableRequestProcessing(); + const_cast(*clients_ptr->client).DisableRequestProcessing(); + const_cast(*clients_ptr->client_with_long_timeout).DisableRequestProcessing(); } void S3ObjectStorage::startup() { - auto client_ptr = client.get(); + auto clients_ptr = clients.get(); /// Need to be enabled if it was disabled during shutdown() call. - const_cast(*client_ptr).EnableRequestProcessing(); + const_cast(*clients_ptr->client).EnableRequestProcessing(); + const_cast(*clients_ptr->client_with_long_timeout).EnableRequestProcessing(); } void S3ObjectStorage::applyNewSettings(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) { auto new_s3_settings = getSettings(config, config_prefix, context); auto new_client = getClient(config, config_prefix, context, *new_s3_settings); + auto new_clients = std::make_unique(std::move(new_client), *new_s3_settings); s3_settings.set(std::move(new_s3_settings)); - client.set(std::move(new_client)); + clients.set(std::move(new_clients)); } std::unique_ptr S3ObjectStorage::cloneObjectStorage( @@ -501,7 +501,9 @@ std::unique_ptr S3ObjectStorage::cloneObjectStorage( endpoint); } +S3ObjectStorage::Clients::Clients(std::shared_ptr client_, const S3ObjectStorageSettings & settings) + : client(std::move(client_)), client_with_long_timeout(client->clone(std::nullopt, settings.request_settings.long_request_timeout_ms)) {} + } - #endif diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h index 072e1354d38..527b1479d89 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h @@ -39,6 +39,16 @@ struct S3ObjectStorageSettings class S3ObjectStorage : public IObjectStorage { +public: + struct Clients + { + std::shared_ptr client; + std::shared_ptr client_with_long_timeout; + + Clients() = default; + Clients(std::shared_ptr client, const S3ObjectStorageSettings & settings); + }; + private: friend class S3PlainObjectStorage; @@ -51,7 +61,7 @@ private: String bucket_, String connection_string) : bucket(bucket_) - , client(std::move(client_)) + , clients(std::make_unique(std::move(client_), *s3_settings_)) , s3_settings(std::move(s3_settings_)) , s3_capabilities(s3_capabilities_) , version_id(std::move(version_id_)) @@ -159,14 +169,12 @@ public: private: void setNewSettings(std::unique_ptr && s3_settings_); - void setNewClient(std::unique_ptr && client_); - void removeObjectImpl(const StoredObject & object, bool if_exists); void removeObjectsImpl(const StoredObjects & objects, bool if_exists); std::string bucket; - MultiVersion client; + MultiVersion clients; MultiVersion s3_settings; S3Capabilities s3_capabilities; diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp index 409eb2a3dc3..cbf0392aae9 100644 --- a/src/Disks/ObjectStorages/S3/diskSettings.cpp +++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp @@ -129,7 +129,7 @@ std::unique_ptr getClient( throw Exception(ErrorCodes::BAD_ARGUMENTS, "S3 path must ends with '/', but '{}' doesn't.", uri.key); client_configuration.connectTimeoutMs = config.getUInt(config_prefix + ".connect_timeout_ms", 1000); - client_configuration.requestTimeoutMs = config.getUInt(config_prefix + ".request_timeout_ms", 30000); + client_configuration.requestTimeoutMs = config.getUInt(config_prefix + ".request_timeout_ms", 3000); client_configuration.maxConnections = config.getUInt(config_prefix + ".max_connections", 100); client_configuration.endpointOverride = uri.endpoint; diff --git a/src/IO/S3/Client.cpp b/src/IO/S3/Client.cpp index 668b1a3959d..7e20b1a9e8f 100644 --- a/src/IO/S3/Client.cpp +++ b/src/IO/S3/Client.cpp @@ -100,7 +100,7 @@ std::unique_ptr Client::create( size_t max_redirects_, ServerSideEncryptionKMSConfig sse_kms_config_, const std::shared_ptr & credentials_provider, - const Aws::Client::ClientConfiguration & client_configuration, + const PocoHTTPClientConfiguration & client_configuration, Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy sign_payloads, bool use_virtual_addressing) { @@ -109,9 +109,16 @@ std::unique_ptr Client::create( new Client(max_redirects_, std::move(sse_kms_config_), credentials_provider, client_configuration, sign_payloads, use_virtual_addressing)); } -std::unique_ptr Client::create(const Client & other) +std::unique_ptr Client::clone( + std::optional> override_retry_strategy, + std::optional override_request_timeout_ms) const { - return std::unique_ptr(new Client(other)); + PocoHTTPClientConfiguration new_configuration = client_configuration; + if (override_retry_strategy.has_value()) + new_configuration.retryStrategy = *override_retry_strategy; + if (override_request_timeout_ms.has_value()) + new_configuration.requestTimeoutMs = *override_request_timeout_ms; + return std::unique_ptr(new Client(*this, new_configuration)); } namespace @@ -134,11 +141,14 @@ Client::Client( size_t max_redirects_, ServerSideEncryptionKMSConfig sse_kms_config_, const std::shared_ptr & credentials_provider_, - const Aws::Client::ClientConfiguration & client_configuration, - Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy sign_payloads, - bool use_virtual_addressing) - : Aws::S3::S3Client(credentials_provider_, client_configuration, std::move(sign_payloads), use_virtual_addressing) + const PocoHTTPClientConfiguration & client_configuration_, + Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy sign_payloads_, + bool use_virtual_addressing_) + : Aws::S3::S3Client(credentials_provider_, client_configuration_, sign_payloads_, use_virtual_addressing_) , credentials_provider(credentials_provider_) + , client_configuration(client_configuration_) + , sign_payloads(sign_payloads_) + , use_virtual_addressing(use_virtual_addressing_) , max_redirects(max_redirects_) , sse_kms_config(std::move(sse_kms_config_)) , log(&Poco::Logger::get("S3Client")) @@ -175,10 +185,15 @@ Client::Client( ClientCacheRegistry::instance().registerClient(cache); } -Client::Client(const Client & other) - : Aws::S3::S3Client(other) +Client::Client( + const Client & other, const PocoHTTPClientConfiguration & client_configuration_) + : Aws::S3::S3Client(other.credentials_provider, client_configuration_, other.sign_payloads, + other.use_virtual_addressing) , initial_endpoint(other.initial_endpoint) , credentials_provider(other.credentials_provider) + , client_configuration(client_configuration_) + , sign_payloads(other.sign_payloads) + , use_virtual_addressing(other.use_virtual_addressing) , explicit_region(other.explicit_region) , detect_region(other.detect_region) , provider_type(other.provider_type) diff --git a/src/IO/S3/Client.h b/src/IO/S3/Client.h index e1b99c893a6..8904c850553 100644 --- a/src/IO/S3/Client.h +++ b/src/IO/S3/Client.h @@ -105,6 +105,8 @@ private: class Client : private Aws::S3::S3Client { public: + class RetryStrategy; + /// we use a factory method to verify arguments before creating a client because /// there are certain requirements on arguments for it to work correctly /// e.g. Client::RetryStrategy should be used @@ -112,11 +114,19 @@ public: size_t max_redirects_, ServerSideEncryptionKMSConfig sse_kms_config_, const std::shared_ptr & credentials_provider, - const Aws::Client::ClientConfiguration & client_configuration, + const PocoHTTPClientConfiguration & client_configuration, Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy sign_payloads, bool use_virtual_addressing); - static std::unique_ptr create(const Client & other); + /// Create a client with adjusted settings: + /// * override_retry_strategy can be used to disable retries to avoid nested retries when we have + /// a retry loop outside of S3 client. Specifically, for read and write buffers. Currently not + /// actually used. + /// * override_request_timeout_ms is used to increase timeout for CompleteMultipartUploadRequest + /// because it often sits idle for 10 seconds: https://github.com/ClickHouse/ClickHouse/pull/42321 + std::unique_ptr clone( + std::optional> override_retry_strategy = std::nullopt, + std::optional override_request_timeout_ms = std::nullopt) const; Client & operator=(const Client &) = delete; @@ -211,11 +221,12 @@ private: Client(size_t max_redirects_, ServerSideEncryptionKMSConfig sse_kms_config_, const std::shared_ptr & credentials_provider_, - const Aws::Client::ClientConfiguration& client_configuration, + const PocoHTTPClientConfiguration & client_configuration, Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy sign_payloads, bool use_virtual_addressing); - Client(const Client & other); + Client( + const Client & other, const PocoHTTPClientConfiguration & client_configuration); /// Leave regular functions private so we don't accidentally use them /// otherwise region and endpoint redirection won't work @@ -251,6 +262,9 @@ private: String initial_endpoint; std::shared_ptr credentials_provider; + PocoHTTPClientConfiguration client_configuration; + Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy sign_payloads; + bool use_virtual_addressing; std::string explicit_region; mutable bool detect_region = true; diff --git a/src/IO/S3/tests/gtest_aws_s3_client.cpp b/src/IO/S3/tests/gtest_aws_s3_client.cpp index a9b5fa03f30..5731e9061d6 100644 --- a/src/IO/S3/tests/gtest_aws_s3_client.cpp +++ b/src/IO/S3/tests/gtest_aws_s3_client.cpp @@ -89,6 +89,7 @@ void doWriteRequest(std::shared_ptr client, const DB::S3:: DB::S3Settings::RequestSettings request_settings; request_settings.max_unexpected_write_error_retries = max_unexpected_write_error_retries; DB::WriteBufferFromS3 write_buffer( + client, client, uri.bucket, uri.key, diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp index 990505adfb3..900861a7831 100644 --- a/src/IO/WriteBufferFromS3.cpp +++ b/src/IO/WriteBufferFromS3.cpp @@ -77,6 +77,7 @@ struct WriteBufferFromS3::PartData WriteBufferFromS3::WriteBufferFromS3( std::shared_ptr client_ptr_, + std::shared_ptr client_with_long_timeout_ptr_, const String & bucket_, const String & key_, size_t buf_size_, @@ -91,6 +92,7 @@ WriteBufferFromS3::WriteBufferFromS3( , upload_settings(request_settings.getUploadSettings()) , write_settings(write_settings_) , client_ptr(std::move(client_ptr_)) + , client_with_long_timeout_ptr(std::move(client_with_long_timeout_ptr_)) , object_metadata(std::move(object_metadata_)) , buffer_allocation_policy(ChooseBufferPolicy(upload_settings)) , task_tracker( @@ -551,7 +553,7 @@ void WriteBufferFromS3::completeMultipartUpload() ProfileEvents::increment(ProfileEvents::DiskS3CompleteMultipartUpload); Stopwatch watch; - auto outcome = client_ptr->CompleteMultipartUpload(req); + auto outcome = client_with_long_timeout_ptr->CompleteMultipartUpload(req); watch.stop(); ProfileEvents::increment(ProfileEvents::WriteBufferFromS3Microseconds, watch.elapsedMicroseconds()); diff --git a/src/IO/WriteBufferFromS3.h b/src/IO/WriteBufferFromS3.h index f4200b0a646..32f4867a439 100644 --- a/src/IO/WriteBufferFromS3.h +++ b/src/IO/WriteBufferFromS3.h @@ -29,6 +29,8 @@ class WriteBufferFromS3 final : public WriteBufferFromFileBase public: WriteBufferFromS3( std::shared_ptr client_ptr_, + /// for CompleteMultipartUploadRequest, because it blocks on recv() for a few seconds on big uploads + std::shared_ptr client_with_long_timeout_ptr_, const String & bucket_, const String & key_, size_t buf_size_, @@ -86,6 +88,7 @@ private: const S3Settings::RequestSettings::PartUploadSettings & upload_settings; const WriteSettings write_settings; const std::shared_ptr client_ptr; + const std::shared_ptr client_with_long_timeout_ptr; const std::optional> object_metadata; Poco::Logger * log = &Poco::Logger::get("WriteBufferFromS3"); diff --git a/src/IO/tests/gtest_writebuffer_s3.cpp b/src/IO/tests/gtest_writebuffer_s3.cpp index cd38291fb31..44c0ee67669 100644 --- a/src/IO/tests/gtest_writebuffer_s3.cpp +++ b/src/IO/tests/gtest_writebuffer_s3.cpp @@ -526,6 +526,7 @@ public: getAsyncPolicy().setAutoExecute(false); return std::make_unique( + client, client, bucket, file_name, diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index f4791e45e2b..135722dbce2 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -150,7 +150,7 @@ public: KeysWithInfo * read_keys_, const S3Settings::RequestSettings & request_settings_) : WithContext(context_) - , client(S3::Client::create(client_)) + , client(client_.clone()) , globbed_uri(globbed_uri_) , query(query_) , virtual_header(virtual_header_) @@ -783,6 +783,7 @@ public: write_buf = wrapWriteBufferWithCompressionMethod( std::make_unique( configuration_.client, + configuration_.client_with_long_timeout, bucket, key, DBMS_DEFAULT_BUFFER_SIZE, @@ -1296,6 +1297,8 @@ void StorageS3::Configuration::connect(ContextPtr context) context->getConfigRef().getUInt64("s3.expiration_window_seconds", S3::DEFAULT_EXPIRATION_WINDOW_SECONDS)), auth_settings.no_sign_request.value_or(context->getConfigRef().getBool("s3.no_sign_request", false)), }); + + client_with_long_timeout = client->clone(std::nullopt, request_settings.long_request_timeout_ms); } void StorageS3::processNamedCollectionResult(StorageS3::Configuration & configuration, const NamedCollection & collection) diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index 13053833623..8d571dd796f 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -274,6 +274,7 @@ public: HTTPHeaderEntries headers_from_ast; std::shared_ptr client; + std::shared_ptr client_with_long_timeout; std::vector keys; }; diff --git a/src/Storages/StorageS3Settings.cpp b/src/Storages/StorageS3Settings.cpp index 23b4630707c..89e6ee46b4d 100644 --- a/src/Storages/StorageS3Settings.cpp +++ b/src/Storages/StorageS3Settings.cpp @@ -199,7 +199,7 @@ S3Settings::RequestSettings::RequestSettings( list_object_keys_size = config.getUInt64(key + "list_object_keys_size", settings.s3_list_object_keys_size); throw_on_zero_files_match = config.getBool(key + "throw_on_zero_files_match", settings.s3_throw_on_zero_files_match); retry_attempts = config.getUInt64(key + "retry_attempts", settings.s3_retry_attempts); - request_timeout_ms = config.getUInt64(key + "request_timeout_ms", request_timeout_ms); + request_timeout_ms = config.getUInt64(key + "request_timeout_ms", settings.s3_request_timeout_ms); /// NOTE: it would be better to reuse old throttlers to avoid losing token bucket state on every config reload, /// which could lead to exceeding limit for short time. But it is good enough unless very high `burst` values are used. @@ -255,6 +255,9 @@ void S3Settings::RequestSettings::updateFromSettingsImpl(const Settings & settin if (!if_changed || settings.s3_retry_attempts.changed) retry_attempts = settings.s3_retry_attempts; + + if (!if_changed || settings.s3_request_timeout_ms.changed) + request_timeout_ms = settings.s3_request_timeout_ms; } void S3Settings::RequestSettings::updateFromSettings(const Settings & settings) diff --git a/src/Storages/StorageS3Settings.h b/src/Storages/StorageS3Settings.h index 41489927e7f..991e323acb6 100644 --- a/src/Storages/StorageS3Settings.h +++ b/src/Storages/StorageS3Settings.h @@ -69,7 +69,8 @@ struct S3Settings ThrottlerPtr get_request_throttler; ThrottlerPtr put_request_throttler; size_t retry_attempts = 10; - size_t request_timeout_ms = 30000; + size_t request_timeout_ms = 3000; + size_t long_request_timeout_ms = 30000; // TODO: Take this from config like request_timeout_ms bool throw_on_zero_files_match = false; From 9157314b2a8d03a87bc467e716c3557b7d9d768f Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 21 Jun 2023 20:29:32 +0200 Subject: [PATCH 056/230] fix --- .../ObjectStorages/DiskObjectStorage.cpp | 4 ++- .../MergeTree/DataPartStorageOnDiskBase.cpp | 27 +++++++++++++------ .../MergeTree/DataPartStorageOnDiskBase.h | 2 +- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 6 +++-- tests/integration/helpers/cluster.py | 2 +- .../configs/config.d/storage_conf.xml | 2 ++ .../test_merge_tree_s3_failover/test.py | 3 ++- 7 files changed, 32 insertions(+), 14 deletions(-) diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp index e3922b6c505..90eb87a56f1 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp @@ -544,8 +544,10 @@ void DiskObjectStorage::writeFileUsingBlobWritingFunction(const String & path, W } void DiskObjectStorage::applyNewSettings( - const Poco::Util::AbstractConfiguration & config, ContextPtr context_, const String & config_prefix, const DisksMap & disk_map) + const Poco::Util::AbstractConfiguration & config, ContextPtr context_, const String & /*config_prefix*/, const DisksMap & disk_map) { + /// FIXME we cannot use config_prefix that was passed through arguments because the disk may be wrapped with cache and we need another name + const auto config_prefix = "storage_configuration.disks." + name; object_storage->applyNewSettings(config, config_prefix, context_); IDisk::applyNewSettings(config, context_, config_prefix, disk_map); } diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp index c397a634db6..01fcc2698eb 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp +++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp @@ -455,23 +455,34 @@ MutableDataPartStoragePtr DataPartStorageOnDiskBase::freeze( MutableDataPartStoragePtr DataPartStorageOnDiskBase::clonePart( const std::string & to, const std::string & dir_path, - const DiskPtr & disk, - Poco::Logger *) const + const DiskPtr & dst_disk, + Poco::Logger * log) const { String path_to_clone = fs::path(to) / dir_path / ""; + auto src_disk = volume->getDisk(); - if (disk->exists(path_to_clone)) + if (dst_disk->exists(path_to_clone)) { throw Exception(ErrorCodes::DIRECTORY_ALREADY_EXISTS, "Cannot clone part {} from '{}' to '{}': path '{}' already exists", - dir_path, getRelativePath(), path_to_clone, fullPath(disk, path_to_clone)); + dir_path, getRelativePath(), path_to_clone, fullPath(dst_disk, path_to_clone)); } - disk->createDirectories(to); - volume->getDisk()->copyDirectoryContent(getRelativePath(), disk, path_to_clone); - volume->getDisk()->removeFileIfExists(fs::path(path_to_clone) / "delete-on-destroy.txt"); + try + { + dst_disk->createDirectories(to); + src_disk->copyDirectoryContent(getRelativePath(), dst_disk, path_to_clone); + } + catch (...) + { + /// It's safe to remove it recursively (even with zero-copy-replication) + /// because we've just did full copy through copyDirectoryContent + LOG_WARNING(log, "Removing directory {} after failed attempt to move a data part", path_to_clone); + dst_disk->removeRecursive(path_to_clone); + throw; + } - auto single_disk_volume = std::make_shared(disk->getName(), disk, 0); + auto single_disk_volume = std::make_shared(dst_disk->getName(), dst_disk, 0); return create(single_disk_volume, to, dir_path, /*initialize=*/ true); } diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.h b/src/Storages/MergeTree/DataPartStorageOnDiskBase.h index 043953eb20c..59f29b76b75 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.h +++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.h @@ -71,7 +71,7 @@ public: MutableDataPartStoragePtr clonePart( const std::string & to, const std::string & dir_path, - const DiskPtr & disk, + const DiskPtr & dst_disk, Poco::Logger * log) const override; void rename( diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index e1e64b82ea3..c5754f70265 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -502,8 +502,10 @@ void IMergeTreeDataPart::removeIfNeeded() throw Exception(ErrorCodes::LOGICAL_ERROR, "relative_path {} of part {} is invalid or not set", getDataPartStorage().getPartDirectory(), name); - const auto part_parent_directory = directoryPath(part_directory); - bool is_moving_part = part_parent_directory.ends_with("moving/"); + fs::path part_directory_path = getDataPartStorage().getRelativePath(); + if (part_directory_path.filename().empty()) + part_directory_path = part_directory_path.parent_path(); + bool is_moving_part = part_directory_path.parent_path().filename() == "moving"; if (!startsWith(file_name, "tmp") && !endsWith(file_name, ".tmp_proj") && !is_moving_part) { LOG_ERROR( diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index f57ebf40e54..2b14b2eeb25 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -36,6 +36,7 @@ try: from confluent_kafka.avro.cached_schema_registry_client import ( CachedSchemaRegistryClient, ) + from .hdfs_api import HDFSApi # imports requests_kerberos except Exception as e: logging.warning(f"Cannot import some modules, some tests may not work: {e}") @@ -51,7 +52,6 @@ from helpers.client import QueryRuntimeException import docker from .client import Client -from .hdfs_api import HDFSApi from .config_cluster import * diff --git a/tests/integration/test_merge_tree_s3_failover/configs/config.d/storage_conf.xml b/tests/integration/test_merge_tree_s3_failover/configs/config.d/storage_conf.xml index 4480327c4b5..235b9a7b7a1 100644 --- a/tests/integration/test_merge_tree_s3_failover/configs/config.d/storage_conf.xml +++ b/tests/integration/test_merge_tree_s3_failover/configs/config.d/storage_conf.xml @@ -72,4 +72,6 @@ + + true diff --git a/tests/integration/test_merge_tree_s3_failover/test.py b/tests/integration/test_merge_tree_s3_failover/test.py index 05aeeff2ec1..90dda631924 100644 --- a/tests/integration/test_merge_tree_s3_failover/test.py +++ b/tests/integration/test_merge_tree_s3_failover/test.py @@ -183,7 +183,8 @@ def test_move_failover(cluster): ) ENGINE=MergeTree() ORDER BY id TTL dt + INTERVAL 4 SECOND TO VOLUME 'external' - SETTINGS storage_policy='s3_cold' + SETTINGS storage_policy='s3_cold', temporary_directories_lifetime=1, + merge_tree_clear_old_temporary_directories_interval_seconds=1 """ ) From fc5ed7ffd7b6594beed5b3ed172ea79055358862 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 21 Jun 2023 18:45:28 +0000 Subject: [PATCH 057/230] Automatic style fix --- tests/integration/helpers/cluster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 2b14b2eeb25..4c356219537 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -36,7 +36,7 @@ try: from confluent_kafka.avro.cached_schema_registry_client import ( CachedSchemaRegistryClient, ) - from .hdfs_api import HDFSApi # imports requests_kerberos + from .hdfs_api import HDFSApi # imports requests_kerberos except Exception as e: logging.warning(f"Cannot import some modules, some tests may not work: {e}") From 1419bb7adbac4603439c02d8e8b68d1338437c48 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Tue, 20 Jun 2023 20:31:23 +0200 Subject: [PATCH 058/230] rollback changes in test --- tests/integration/test_merge_tree_s3/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_merge_tree_s3/test.py b/tests/integration/test_merge_tree_s3/test.py index 22805eb6e94..2ccd517923a 100644 --- a/tests/integration/test_merge_tree_s3/test.py +++ b/tests/integration/test_merge_tree_s3/test.py @@ -923,7 +923,7 @@ def test_merge_canceled_by_s3_errors_when_move(cluster, broken_s3, node_name): @pytest.mark.parametrize("node_name", ["node"]) @pytest.mark.parametrize( - "in_flight_memory", [(10, 288044299), (5, 193557290), (1, 128348733)] + "in_flight_memory", [(10, 245918115), (5, 156786752), (1, 106426187)] ) def test_s3_engine_heavy_write_check_mem( cluster, broken_s3, node_name, in_flight_memory From 220520c516bea15399396b5f82aa3ab2d6cd9ca3 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 22 Jun 2023 21:45:10 +0200 Subject: [PATCH 059/230] fix --- src/Common/Exception.cpp | 22 +++++++++---------- src/Storages/MergeTree/MergeTreeData.h | 2 +- .../MergeTree/MergeTreePartsMover.cpp | 10 +++++++-- src/Storages/StorageReplicatedMergeTree.cpp | 8 +++---- src/Storages/StorageReplicatedMergeTree.h | 6 ++--- .../test_s3_zero_copy_ttl/configs/s3.xml | 2 ++ .../integration/test_s3_zero_copy_ttl/test.py | 2 +- 7 files changed, 30 insertions(+), 22 deletions(-) diff --git a/src/Common/Exception.cpp b/src/Common/Exception.cpp index 3fd0a929d6f..9757c24a8ec 100644 --- a/src/Common/Exception.cpp +++ b/src/Common/Exception.cpp @@ -401,17 +401,17 @@ PreformattedMessage getCurrentExceptionMessageAndPattern(bool with_stacktrace, b } catch (...) {} -#ifdef ABORT_ON_LOGICAL_ERROR - try - { - throw; - } - catch (const std::logic_error &) - { - abortOnFailedAssertion(stream.str()); - } - catch (...) {} -#endif +// #ifdef ABORT_ON_LOGICAL_ERROR +// try +// { +// throw; +// } +// catch (const std::logic_error &) +// { +// abortOnFailedAssertion(stream.str()); +// } +// catch (...) {} +// #endif } catch (...) { diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index b27392b355b..f6f241c1e89 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -1040,7 +1040,7 @@ public: /// Fetch part only if some replica has it on shared storage like S3 /// Overridden in StorageReplicatedMergeTree - virtual MutableDataPartStoragePtr tryToFetchIfShared(const IMergeTreeDataPart &, const DiskPtr &, const String &) { return nullptr; } + virtual MutableDataPartPtr tryToFetchIfShared(const IMergeTreeDataPart &, const DiskPtr &, const String &) { return nullptr; } /// Check shared data usage on other replicas for detached/freezed part /// Remove local files and remote files if needed diff --git a/src/Storages/MergeTree/MergeTreePartsMover.cpp b/src/Storages/MergeTree/MergeTreePartsMover.cpp index 8fa4ac6c78a..59784935c7b 100644 --- a/src/Storages/MergeTree/MergeTreePartsMover.cpp +++ b/src/Storages/MergeTree/MergeTreePartsMover.cpp @@ -233,9 +233,15 @@ MergeTreePartsMover::TemporaryClonedPart MergeTreePartsMover::clonePart(const Me disk->createDirectories(path_to_clone); - cloned_part_storage = data->tryToFetchIfShared(*part, disk, fs::path(path_to_clone) / part->name); + auto zero_copy_part = data->tryToFetchIfShared(*part, disk, fs::path(path_to_clone) / part->name); - if (!cloned_part_storage) + if (zero_copy_part) + { + /// FIXME for some reason we cannot just use this part, we have to re-create it through MergeTreeDataPartBuilder + zero_copy_part->is_temp = false; /// Do not remove it in dtor + cloned_part_storage = zero_copy_part->getDataPartStoragePtr(); + } + else { LOG_INFO(log, "Part {} was not fetched, we are the first who move it to another disk, so we will copy it", part->name); cloned_part_storage = part->getDataPartStorage().clonePart(path_to_clone, part->getDataPartStorage().getPartDirectory(), disk, log); diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index bb99e21e4ab..e96049a456a 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -1972,7 +1972,7 @@ bool StorageReplicatedMergeTree::executeFetch(LogEntry & entry, bool need_to_che } -MutableDataPartStoragePtr StorageReplicatedMergeTree::executeFetchShared( +MergeTreeData::MutableDataPartPtr StorageReplicatedMergeTree::executeFetchShared( const String & source_replica, const String & new_part_name, const DiskPtr & disk, @@ -4444,7 +4444,7 @@ bool StorageReplicatedMergeTree::fetchPart( } -MutableDataPartStoragePtr StorageReplicatedMergeTree::fetchExistsPart( +MergeTreeData::MutableDataPartPtr StorageReplicatedMergeTree::fetchExistsPart( const String & part_name, const StorageMetadataPtr & metadata_snapshot, const String & source_replica_path, @@ -4550,7 +4550,7 @@ MutableDataPartStoragePtr StorageReplicatedMergeTree::fetchExistsPart( ProfileEvents::increment(ProfileEvents::ReplicatedPartFetches); LOG_DEBUG(log, "Fetched part {} from {}:{}", part_name, zookeeper_name, source_replica_path); - return part->getDataPartStoragePtr(); + return part; } void StorageReplicatedMergeTree::startup() @@ -8868,7 +8868,7 @@ std::pair StorageReplicatedMergeTree::unlockSharedDataByID( } -MutableDataPartStoragePtr StorageReplicatedMergeTree::tryToFetchIfShared( +MergeTreeData::MutableDataPartPtr StorageReplicatedMergeTree::tryToFetchIfShared( const IMergeTreeDataPart & part, const DiskPtr & disk, const String & path) diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index c08e05090b1..3ba5c61d1b0 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -243,7 +243,7 @@ public: bool canExecuteFetch(const ReplicatedMergeTreeLogEntry & entry, String & disable_reason) const; /// Fetch part only when it stored on shared storage like S3 - MutableDataPartStoragePtr executeFetchShared(const String & source_replica, const String & new_part_name, const DiskPtr & disk, const String & path); + MutableDataPartPtr executeFetchShared(const String & source_replica, const String & new_part_name, const DiskPtr & disk, const String & path); /// Lock part in zookeeper for use shared data in several nodes void lockSharedData(const IMergeTreeDataPart & part, bool replace_existing_lock, std::optional hardlinked_files) const override; @@ -285,7 +285,7 @@ public: MergeTreeDataFormatVersion data_format_version); /// Fetch part only if some replica has it on shared storage like S3 - MutableDataPartStoragePtr tryToFetchIfShared(const IMergeTreeDataPart & part, const DiskPtr & disk, const String & path) override; + MutableDataPartPtr tryToFetchIfShared(const IMergeTreeDataPart & part, const DiskPtr & disk, const String & path) override; /// Get best replica having this partition on a same type remote disk String getSharedDataReplica(const IMergeTreeDataPart & part, DataSourceType data_source_type) const; @@ -716,7 +716,7 @@ private: * Used for replace local part on the same s3-shared part in hybrid storage. * Returns false if part is already fetching right now. */ - MutableDataPartStoragePtr fetchExistsPart( + MutableDataPartPtr fetchExistsPart( const String & part_name, const StorageMetadataPtr & metadata_snapshot, const String & replica_path, diff --git a/tests/integration/test_s3_zero_copy_ttl/configs/s3.xml b/tests/integration/test_s3_zero_copy_ttl/configs/s3.xml index 5ffeb0c0d01..e179c848be1 100644 --- a/tests/integration/test_s3_zero_copy_ttl/configs/s3.xml +++ b/tests/integration/test_s3_zero_copy_ttl/configs/s3.xml @@ -33,4 +33,6 @@ true + + true diff --git a/tests/integration/test_s3_zero_copy_ttl/test.py b/tests/integration/test_s3_zero_copy_ttl/test.py index 7dcf3734653..04bff4a44fb 100644 --- a/tests/integration/test_s3_zero_copy_ttl/test.py +++ b/tests/integration/test_s3_zero_copy_ttl/test.py @@ -35,7 +35,7 @@ def test_ttl_move_and_s3(started_cluster): ORDER BY id PARTITION BY id TTL date TO DISK 's3_disk' - SETTINGS storage_policy='s3_and_default' + SETTINGS storage_policy='s3_and_default', temporary_directories_lifetime=1 """.format( i ) From 118f02b522420a786b093b3e55fcd404045df8a0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 23 Jun 2023 07:42:01 +0200 Subject: [PATCH 060/230] Add a test for calculate_text_stack_trace setting --- .../02796_calculate_text_stack_trace.reference | 2 ++ .../0_stateless/02796_calculate_text_stack_trace.sql | 8 ++++++++ 2 files changed, 10 insertions(+) create mode 100644 tests/queries/0_stateless/02796_calculate_text_stack_trace.reference create mode 100644 tests/queries/0_stateless/02796_calculate_text_stack_trace.sql diff --git a/tests/queries/0_stateless/02796_calculate_text_stack_trace.reference b/tests/queries/0_stateless/02796_calculate_text_stack_trace.reference new file mode 100644 index 00000000000..b261da18d51 --- /dev/null +++ b/tests/queries/0_stateless/02796_calculate_text_stack_trace.reference @@ -0,0 +1,2 @@ +1 +0 diff --git a/tests/queries/0_stateless/02796_calculate_text_stack_trace.sql b/tests/queries/0_stateless/02796_calculate_text_stack_trace.sql new file mode 100644 index 00000000000..3c2806ac010 --- /dev/null +++ b/tests/queries/0_stateless/02796_calculate_text_stack_trace.sql @@ -0,0 +1,8 @@ +SELECT throwIf(1); -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } +SYSTEM FLUSH LOGS; +SELECT length(stack_trace) > 1000 FROM system.query_log WHERE current_database = currentDatabase() AND query LIKE '%SELECT throwIf(1)%' AND query NOT LIKE '%system%' ORDER BY event_time_microseconds DESC LIMIT 1; + +SET calculate_text_stack_trace = 0; +SELECT throwIf(1); -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } +SYSTEM FLUSH LOGS; +SELECT length(stack_trace) FROM system.query_log WHERE current_database = currentDatabase() AND query LIKE '%SELECT throwIf(1)%' AND query NOT LIKE '%system%' ORDER BY event_time_microseconds DESC LIMIT 1; From 7fc8942ea9d7b15800d9d6ec8355b162013bf32e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 23 Jun 2023 07:50:44 +0200 Subject: [PATCH 061/230] Update test --- .../02796_calculate_text_stack_trace.reference | 4 ++++ .../02796_calculate_text_stack_trace.sql | 16 ++++++++++++---- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/02796_calculate_text_stack_trace.reference b/tests/queries/0_stateless/02796_calculate_text_stack_trace.reference index b261da18d51..c800bbce32b 100644 --- a/tests/queries/0_stateless/02796_calculate_text_stack_trace.reference +++ b/tests/queries/0_stateless/02796_calculate_text_stack_trace.reference @@ -1,2 +1,6 @@ 1 +1 +1 +0 +0 0 diff --git a/tests/queries/0_stateless/02796_calculate_text_stack_trace.sql b/tests/queries/0_stateless/02796_calculate_text_stack_trace.sql index 3c2806ac010..601bd16fb39 100644 --- a/tests/queries/0_stateless/02796_calculate_text_stack_trace.sql +++ b/tests/queries/0_stateless/02796_calculate_text_stack_trace.sql @@ -1,8 +1,16 @@ -SELECT throwIf(1); -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } +-- Tags: no-parallel + +TRUNCATE TABLE system.text_log; + +SELECT 'Hello', throwIf(1); -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } SYSTEM FLUSH LOGS; -SELECT length(stack_trace) > 1000 FROM system.query_log WHERE current_database = currentDatabase() AND query LIKE '%SELECT throwIf(1)%' AND query NOT LIKE '%system%' ORDER BY event_time_microseconds DESC LIMIT 1; +SELECT length(stack_trace) > 1000 FROM system.query_log WHERE current_database = currentDatabase() AND query LIKE '%SELECT \'Hello\', throwIf(1)%' AND query NOT LIKE '%system%' ORDER BY event_time_microseconds DESC LIMIT 1; +SELECT message LIKE '%Stack trace%' FROM system.text_log WHERE level = 'Error' AND message LIKE '%Exception%throwIf%' ORDER BY event_time_microseconds DESC LIMIT 10; + +TRUNCATE TABLE system.text_log; SET calculate_text_stack_trace = 0; -SELECT throwIf(1); -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } +SELECT 'World', throwIf(1); -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } SYSTEM FLUSH LOGS; -SELECT length(stack_trace) FROM system.query_log WHERE current_database = currentDatabase() AND query LIKE '%SELECT throwIf(1)%' AND query NOT LIKE '%system%' ORDER BY event_time_microseconds DESC LIMIT 1; +SELECT length(stack_trace) FROM system.query_log WHERE current_database = currentDatabase() AND query LIKE '%SELECT \'World\', throwIf(1)%' AND query NOT LIKE '%system%' ORDER BY event_time_microseconds DESC LIMIT 1; +SELECT message LIKE '%Stack trace%' FROM system.text_log WHERE level = 'Error' AND message LIKE '%Exception%throwIf%' ORDER BY event_time_microseconds DESC LIMIT 10; From b8ede5262a5df9b4db2c25ecebc7818f763f9e9c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 24 Jun 2023 07:51:49 +0200 Subject: [PATCH 062/230] Add a test for #42691 --- .../0_stateless/02802_with_cube_with_totals.reference | 8 ++++++++ tests/queries/0_stateless/02802_with_cube_with_totals.sql | 2 ++ 2 files changed, 10 insertions(+) create mode 100644 tests/queries/0_stateless/02802_with_cube_with_totals.reference create mode 100644 tests/queries/0_stateless/02802_with_cube_with_totals.sql diff --git a/tests/queries/0_stateless/02802_with_cube_with_totals.reference b/tests/queries/0_stateless/02802_with_cube_with_totals.reference new file mode 100644 index 00000000000..c7b7b570456 --- /dev/null +++ b/tests/queries/0_stateless/02802_with_cube_with_totals.reference @@ -0,0 +1,8 @@ +((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 +((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 + +((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 +\N +\N + +\N diff --git a/tests/queries/0_stateless/02802_with_cube_with_totals.sql b/tests/queries/0_stateless/02802_with_cube_with_totals.sql new file mode 100644 index 00000000000..77adb68eb4b --- /dev/null +++ b/tests/queries/0_stateless/02802_with_cube_with_totals.sql @@ -0,0 +1,2 @@ +SELECT tuple((2147483648, (-0., 1.1754943508222875e-38, 2147483646, '-9223372036854775808', NULL))), toInt128(0.0001) GROUP BY ((256, toInt64(1.1754943508222875e-38), NULL), NULL, -0., ((65535, '-92233720368547758.07'), 0.9999), tuple(((1., 3.4028234663852886e38, '1', 0.5), NULL, tuple('0.1')))) WITH CUBE WITH TOTALS; +SELECT NULL GROUP BY toUUID(NULL, '0', NULL, '0.0000065535'), 1 WITH CUBE WITH TOTALS; From f0aee54dab32b1db35171c526525354a7701e21a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 24 Jun 2023 07:57:40 +0200 Subject: [PATCH 063/230] Add a test for #32474 --- tests/queries/0_stateless/02804_intersect_bad_cast.reference | 0 tests/queries/0_stateless/02804_intersect_bad_cast.sql | 1 + 2 files changed, 1 insertion(+) create mode 100644 tests/queries/0_stateless/02804_intersect_bad_cast.reference create mode 100644 tests/queries/0_stateless/02804_intersect_bad_cast.sql diff --git a/tests/queries/0_stateless/02804_intersect_bad_cast.reference b/tests/queries/0_stateless/02804_intersect_bad_cast.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02804_intersect_bad_cast.sql b/tests/queries/0_stateless/02804_intersect_bad_cast.sql new file mode 100644 index 00000000000..c7eb8fdd3bc --- /dev/null +++ b/tests/queries/0_stateless/02804_intersect_bad_cast.sql @@ -0,0 +1 @@ +SELECT 2., * FROM (SELECT 1024, 256 INTERSECT SELECT 100 AND inf, 256); From fa6df80aa204ec4e9f2d872eba0a2c7baee2cce4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 24 Jun 2023 08:05:45 +0200 Subject: [PATCH 064/230] Add a test for #35801 --- .../02807_default_date_time_nullable.reference | 2 ++ .../02807_default_date_time_nullable.sql | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+) create mode 100644 tests/queries/0_stateless/02807_default_date_time_nullable.reference create mode 100644 tests/queries/0_stateless/02807_default_date_time_nullable.sql diff --git a/tests/queries/0_stateless/02807_default_date_time_nullable.reference b/tests/queries/0_stateless/02807_default_date_time_nullable.reference new file mode 100644 index 00000000000..d103460bff7 --- /dev/null +++ b/tests/queries/0_stateless/02807_default_date_time_nullable.reference @@ -0,0 +1,2 @@ +1 1977-01-01 00:00:00 +1 1977-01-01 00:00:00 diff --git a/tests/queries/0_stateless/02807_default_date_time_nullable.sql b/tests/queries/0_stateless/02807_default_date_time_nullable.sql new file mode 100644 index 00000000000..9152f198787 --- /dev/null +++ b/tests/queries/0_stateless/02807_default_date_time_nullable.sql @@ -0,0 +1,18 @@ +create temporary table test ( + data int, + default Nullable(DateTime) DEFAULT '1977-01-01 00:00:00' +) engine = Memory(); + +insert into test (data) select 1; + +select * from test; + +drop temporary table test; + +create temporary table test ( + data int, + default DateTime DEFAULT '1977-01-01 00:00:00' +) engine = Memory(); +insert into test (data) select 1; + +select * from test; From 5d43a64112711b339b82b1c0e8df7882546a1a3c Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 24 Jun 2023 13:13:49 +0200 Subject: [PATCH 065/230] Initialize text_log earlier to capture table startup messages While I was investigating some issues, I noticed that messages from table startup are not appears in system.text_log due to too late initialization. Signed-off-by: Azat Khuzhin --- programs/server/Server.cpp | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index d2d8a0d07fb..755b7f17d98 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1581,6 +1581,15 @@ try /// After attaching system databases we can initialize system log. global_context->initializeSystemLogs(); global_context->setSystemZooKeeperLogAfterInitializationIfNeeded(); + /// Build loggers before tables startup to make log messages from tables + /// attach available in system.text_log + { + String level_str = config().getString("text_log.level", ""); + int level = level_str.empty() ? INT_MAX : Poco::Logger::parseLevel(level_str); + setTextLog(global_context->getTextLog(), level); + + buildLoggers(config(), logger()); + } /// After the system database is created, attach virtual system tables (in addition to query_log and part_log) attachSystemTablesServer(global_context, *database_catalog.getSystemDatabase(), has_zookeeper); attachInformationSchema(global_context, *database_catalog.getDatabase(DatabaseCatalog::INFORMATION_SCHEMA)); @@ -1707,14 +1716,6 @@ try /// Must be done after initialization of `servers`, because async_metrics will access `servers` variable from its thread. async_metrics.start(); - { - String level_str = config().getString("text_log.level", ""); - int level = level_str.empty() ? INT_MAX : Poco::Logger::parseLevel(level_str); - setTextLog(global_context->getTextLog(), level); - } - - buildLoggers(config(), logger()); - main_config_reloader->start(); access_control.startPeriodicReloading(); From 0c7a4142e40b186da12c3ac3f0664cb3a94e979f Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 24 Jun 2023 20:57:39 +0200 Subject: [PATCH 066/230] Use separate default settings for clickhouse-local There are already two of them: - storage_file_read_method can use mmap method for clickhouse-local - there is no sense in disabling allow_introspection_functions for clickhouse-local since it can hurt only itself And likely there will be more, once the infrastructure will be there. Signed-off-by: Azat Khuzhin --- src/Core/Settings.h | 2 +- src/Core/SettingsOverridesLocal.cpp | 13 +++++++++++++ src/Core/SettingsOverridesLocal.h | 11 +++++++++++ src/Interpreters/Context.cpp | 3 +++ ...2800_clickhouse_local_default_settings.reference | 2 ++ .../02800_clickhouse_local_default_settings.sh | 8 ++++++++ 6 files changed, 38 insertions(+), 1 deletion(-) create mode 100644 src/Core/SettingsOverridesLocal.cpp create mode 100644 src/Core/SettingsOverridesLocal.h create mode 100644 tests/queries/0_stateless/02800_clickhouse_local_default_settings.reference create mode 100755 tests/queries/0_stateless/02800_clickhouse_local_default_settings.sh diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 3d42bd582ed..c51076f3237 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -657,7 +657,7 @@ class IColumn; M(UInt64, function_range_max_elements_in_block, 500000000, "Maximum number of values generated by function 'range' per block of data (sum of array sizes for every row in a block, see also 'max_block_size' and 'min_insert_block_size_rows'). It is a safety threshold.", 0) \ M(ShortCircuitFunctionEvaluation, short_circuit_function_evaluation, ShortCircuitFunctionEvaluation::ENABLE, "Setting for short-circuit function evaluation configuration. Possible values: 'enable' - use short-circuit function evaluation for functions that are suitable for it, 'disable' - disable short-circuit function evaluation, 'force_enable' - use short-circuit function evaluation for all functions.", 0) \ \ - M(LocalFSReadMethod, storage_file_read_method, LocalFSReadMethod::mmap, "Method of reading data from storage file, one of: read, pread, mmap. The mmap method does not apply to clickhouse-server (it's intended for clickhouse-local).", 0) \ + M(LocalFSReadMethod, storage_file_read_method, LocalFSReadMethod::pread, "Method of reading data from storage file, one of: read, pread, mmap. The mmap method does not apply to clickhouse-server (it's intended for clickhouse-local).", 0) \ M(String, local_filesystem_read_method, "pread_threadpool", "Method of reading data from local filesystem, one of: read, pread, mmap, io_uring, pread_threadpool. The 'io_uring' method is experimental and does not work for Log, TinyLog, StripeLog, File, Set and Join, and other tables with append-able files in presence of concurrent reads and writes.", 0) \ M(String, remote_filesystem_read_method, "threadpool", "Method of reading data from remote filesystem, one of: read, threadpool.", 0) \ M(Bool, local_filesystem_read_prefetch, false, "Should use prefetching when reading data from local filesystem.", 0) \ diff --git a/src/Core/SettingsOverridesLocal.cpp b/src/Core/SettingsOverridesLocal.cpp new file mode 100644 index 00000000000..2beb560ece2 --- /dev/null +++ b/src/Core/SettingsOverridesLocal.cpp @@ -0,0 +1,13 @@ +#include +#include + +namespace DB +{ + +void applySettingsOverridesForLocal(Settings & settings) +{ + settings.allow_introspection_functions = true; + settings.storage_file_read_method = LocalFSReadMethod::mmap; +} + +} diff --git a/src/Core/SettingsOverridesLocal.h b/src/Core/SettingsOverridesLocal.h new file mode 100644 index 00000000000..89b79f4ad55 --- /dev/null +++ b/src/Core/SettingsOverridesLocal.h @@ -0,0 +1,11 @@ +#pragma once + +namespace DB +{ + +struct Settings; + +/// Update some settings defaults for clickhouse-local +void applySettingsOverridesForLocal(Settings & settings); + +} diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 106264320b2..dccdf4efca0 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -48,6 +48,7 @@ #include #include #include +#include #include #include #include @@ -3646,6 +3647,8 @@ void Context::setDefaultProfiles(const Poco::Util::AbstractConfiguration & confi setCurrentProfile(shared->system_profile_name); applySettingsQuirks(settings, &Poco::Logger::get("SettingsQuirks")); + if (shared->application_type == ApplicationType::LOCAL) + applySettingsOverridesForLocal(settings); shared->buffer_profile_name = config.getString("buffer_profile", shared->system_profile_name); buffer_context = Context::createCopy(shared_from_this()); diff --git a/tests/queries/0_stateless/02800_clickhouse_local_default_settings.reference b/tests/queries/0_stateless/02800_clickhouse_local_default_settings.reference new file mode 100644 index 00000000000..0f18d1a3897 --- /dev/null +++ b/tests/queries/0_stateless/02800_clickhouse_local_default_settings.reference @@ -0,0 +1,2 @@ +allow_introspection_functions 1 +storage_file_read_method mmap diff --git a/tests/queries/0_stateless/02800_clickhouse_local_default_settings.sh b/tests/queries/0_stateless/02800_clickhouse_local_default_settings.sh new file mode 100755 index 00000000000..792e187fc51 --- /dev/null +++ b/tests/queries/0_stateless/02800_clickhouse_local_default_settings.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash +# Tags: no-random-settings, no-random-merge-tree-settings + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL -q "select name, value from system.settings where changed" From a7b14f87e0b43f02fac2cd216e906b045dbbfa42 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 24 Jun 2023 21:14:28 +0200 Subject: [PATCH 067/230] Throw an error instead of silenty ignore storage_file_read_method=mmap in server Signed-off-by: Azat Khuzhin --- src/Storages/StorageFile.cpp | 8 ++++---- .../0_stateless/02497_storage_file_reader_selection.sh | 2 ++ 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index ff67272e542..5301b159f96 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -205,7 +205,7 @@ std::unique_ptr selectReadBuffer( { auto read_method = context->getSettingsRef().storage_file_read_method; - /** But using mmap on server-side is unsafe for the following reasons: + /** Using mmap on server-side is unsafe for the following reasons: * - concurrent modifications of a file will result in SIGBUS; * - IO error from the device will result in SIGBUS; * - recovery from this signal is not feasible even with the usage of siglongjmp, @@ -214,10 +214,10 @@ std::unique_ptr selectReadBuffer( * * But we keep this mode for clickhouse-local as it is not so bad for a command line tool. */ + if (context->getApplicationType() == Context::ApplicationType::SERVER && read_method == LocalFSReadMethod::mmap) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Using storage_file_read_method=mmap is not safe in server mode. Consider using pread."); - if (S_ISREG(file_stat.st_mode) - && context->getApplicationType() != Context::ApplicationType::SERVER - && read_method == LocalFSReadMethod::mmap) + if (S_ISREG(file_stat.st_mode) && read_method == LocalFSReadMethod::mmap) { try { diff --git a/tests/queries/0_stateless/02497_storage_file_reader_selection.sh b/tests/queries/0_stateless/02497_storage_file_reader_selection.sh index 20bde68718d..25387e61db6 100755 --- a/tests/queries/0_stateless/02497_storage_file_reader_selection.sh +++ b/tests/queries/0_stateless/02497_storage_file_reader_selection.sh @@ -13,4 +13,6 @@ $CLICKHOUSE_LOCAL --storage_file_read_method=mmap --print-profile-events -q "SEL $CLICKHOUSE_LOCAL --storage_file_read_method=pread --print-profile-events -q "SELECT * FROM file($DATA_FILE) FORMAT Null" 2>&1 | grep -F -c "CreatedReadBufferMMap" $CLICKHOUSE_LOCAL --storage_file_read_method=pread --print-profile-events -q "SELECT * FROM file($DATA_FILE) FORMAT Null" 2>&1 | grep -F -c "CreatedReadBufferOrdinary" +$CLICKHOUSE_CLIENT --storage_file_read_method=mmap -nq "SELECT * FROM file('/dev/null', 'LineAsString') FORMAT Null -- { serverError BAD_ARGUMENTS }" + rm $DATA_FILE From 59f11863d7776134c383b168a1ec7ff2acc8bc16 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 24 Jun 2023 21:41:33 +0200 Subject: [PATCH 068/230] Simplify settings overrides or clickhouse-local Signed-off-by: Azat Khuzhin --- programs/local/LocalServer.cpp | 16 +++++++++++++++- src/Core/SettingsOverridesLocal.cpp | 13 ------------- src/Core/SettingsOverridesLocal.h | 11 ----------- src/Interpreters/Context.cpp | 3 --- 4 files changed, 15 insertions(+), 28 deletions(-) delete mode 100644 src/Core/SettingsOverridesLocal.cpp delete mode 100644 src/Core/SettingsOverridesLocal.h diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index caca7cfb50d..033d2b91ec6 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -71,6 +71,15 @@ namespace ErrorCodes extern const int FILE_ALREADY_EXISTS; } +void applySettingsOverridesForLocal(ContextMutablePtr context) +{ + Settings settings = context->getSettings(); + + settings.allow_introspection_functions = true; + settings.storage_file_read_method = LocalFSReadMethod::mmap; + + context->setSettings(settings); +} void LocalServer::processError(const String &) const { @@ -657,6 +666,12 @@ void LocalServer::processConfig() CompiledExpressionCacheFactory::instance().init(compiled_expression_cache_size, compiled_expression_cache_elements_size); #endif + /// NOTE: it is important to apply any overrides before + /// setDefaultProfiles() calls since it will copy current context (i.e. + /// there is separate context for Buffer tables). + applySettingsOverridesForLocal(global_context); + applyCmdOptions(global_context); + /// Load global settings from default_profile and system_profile. global_context->setDefaultProfiles(config()); @@ -671,7 +686,6 @@ void LocalServer::processConfig() std::string default_database = config().getString("default_database", "_local"); DatabaseCatalog::instance().attachDatabase(default_database, std::make_shared(default_database, global_context)); global_context->setCurrentDatabase(default_database); - applyCmdOptions(global_context); if (config().has("path")) { diff --git a/src/Core/SettingsOverridesLocal.cpp b/src/Core/SettingsOverridesLocal.cpp deleted file mode 100644 index 2beb560ece2..00000000000 --- a/src/Core/SettingsOverridesLocal.cpp +++ /dev/null @@ -1,13 +0,0 @@ -#include -#include - -namespace DB -{ - -void applySettingsOverridesForLocal(Settings & settings) -{ - settings.allow_introspection_functions = true; - settings.storage_file_read_method = LocalFSReadMethod::mmap; -} - -} diff --git a/src/Core/SettingsOverridesLocal.h b/src/Core/SettingsOverridesLocal.h deleted file mode 100644 index 89b79f4ad55..00000000000 --- a/src/Core/SettingsOverridesLocal.h +++ /dev/null @@ -1,11 +0,0 @@ -#pragma once - -namespace DB -{ - -struct Settings; - -/// Update some settings defaults for clickhouse-local -void applySettingsOverridesForLocal(Settings & settings); - -} diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index dccdf4efca0..106264320b2 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -48,7 +48,6 @@ #include #include #include -#include #include #include #include @@ -3647,8 +3646,6 @@ void Context::setDefaultProfiles(const Poco::Util::AbstractConfiguration & confi setCurrentProfile(shared->system_profile_name); applySettingsQuirks(settings, &Poco::Logger::get("SettingsQuirks")); - if (shared->application_type == ApplicationType::LOCAL) - applySettingsOverridesForLocal(settings); shared->buffer_profile_name = config.getString("buffer_profile", shared->system_profile_name); buffer_context = Context::createCopy(shared_from_this()); From 940cf69ce436107415c3990088738b83dfb201c7 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 25 Jun 2023 07:30:32 +0200 Subject: [PATCH 069/230] Add a test for #43358 --- tests/queries/0_stateless/02809_has_token.reference | 1 + tests/queries/0_stateless/02809_has_token.sql | 3 +++ 2 files changed, 4 insertions(+) create mode 100644 tests/queries/0_stateless/02809_has_token.reference create mode 100644 tests/queries/0_stateless/02809_has_token.sql diff --git a/tests/queries/0_stateless/02809_has_token.reference b/tests/queries/0_stateless/02809_has_token.reference new file mode 100644 index 00000000000..573541ac970 --- /dev/null +++ b/tests/queries/0_stateless/02809_has_token.reference @@ -0,0 +1 @@ +0 diff --git a/tests/queries/0_stateless/02809_has_token.sql b/tests/queries/0_stateless/02809_has_token.sql new file mode 100644 index 00000000000..08edf3756d1 --- /dev/null +++ b/tests/queries/0_stateless/02809_has_token.sql @@ -0,0 +1,3 @@ +-- in old versions of ClickHouse, the following query returned a wrong result: + +SELECT hasToken('', 'quota') AS r; From ed6bfd66fee9aa4c2e06eaf25ca81d02e09f075e Mon Sep 17 00:00:00 2001 From: Ilya Yatsishin <2159081+qoega@users.noreply.github.com> Date: Mon, 26 Jun 2023 17:28:59 +0200 Subject: [PATCH 070/230] Publish changes --- docker/packager/binary/build.sh | 4 ++++ docker/packager/packager | 1 + 2 files changed, 5 insertions(+) diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh index c0803c74147..08a9b07f3ce 100755 --- a/docker/packager/binary/build.sh +++ b/docker/packager/binary/build.sh @@ -15,6 +15,10 @@ if [ "$EXTRACT_TOOLCHAIN_DARWIN" = "1" ]; then mkdir -p /build/cmake/toolchain/darwin-x86_64 tar xJf /MacOSX11.0.sdk.tar.xz -C /build/cmake/toolchain/darwin-x86_64 --strip-components=1 ln -sf darwin-x86_64 /build/cmake/toolchain/darwin-aarch64 + + if [ "$EXPORT_SOURCES_WITH_SUBMODULES" = "1" ]; then + tar -c /build --exclude-vcs-ignores --exclude-vcs --exclude '/build/build' --exclude '/build/build_docker' --exclude '/build/debian' --exclude '/build/.cache' --exclude '/build/docs' --exclude '/build/tests/integration' | pigz -9 > /output/source_sub.tar.gz + fi fi # Uncomment to debug ccache. Don't put ccache log in /output right away, or it diff --git a/docker/packager/packager b/docker/packager/packager index 1b3df858cd2..42dc52aa37f 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -168,6 +168,7 @@ def parse_env_variables( "-DCMAKE_TOOLCHAIN_FILE=/build/cmake/darwin/toolchain-x86_64.cmake" ) result.append("EXTRACT_TOOLCHAIN_DARWIN=1") + result.append("EXPORT_SOURCES_WITH_SUBMODULES=1") elif is_cross_darwin_arm: cc = compiler[: -len(DARWIN_ARM_SUFFIX)] cmake_flags.append("-DCMAKE_AR:FILEPATH=/cctools/bin/aarch64-apple-darwin-ar") From dd3d2c9aeaa5798467521eaf2fc85f2332a07a6a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 27 Jun 2023 08:01:15 +0200 Subject: [PATCH 071/230] Fix syntax error --- tests/integration/test_attach_table_normalizer/test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/integration/test_attach_table_normalizer/test.py b/tests/integration/test_attach_table_normalizer/test.py index ba0068e9c59..49acefdcd17 100644 --- a/tests/integration/test_attach_table_normalizer/test.py +++ b/tests/integration/test_attach_table_normalizer/test.py @@ -4,7 +4,7 @@ from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) node = cluster.add_instance( - 'node', main_configs=["configs/config.xml"], with_zookeeper=True, stay_alive=True + "node", main_configs=["configs/config.xml"], with_zookeeper=True, stay_alive=True ) @@ -18,13 +18,13 @@ def started_cluster(): def replace_substring_to_substr(node): - node.exec_in_container(( + node.exec_in_container( [ "bash", "-c", "sed -i 's/substring/substr/g' /var/lib/clickhouse/metadata/default/file.sql", ], - user="root" + user="root", ) From 40f721ae4f290c76d492260d740c1eb37df20e4c Mon Sep 17 00:00:00 2001 From: serxa Date: Tue, 27 Jun 2023 17:14:33 +0000 Subject: [PATCH 072/230] fix possible race on shutdown wait --- programs/server/Server.cpp | 4 ++-- src/Server/waitServersToFinish.cpp | 11 +++++++---- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index d2d8a0d07fb..41df7a119d1 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1523,7 +1523,7 @@ try LOG_INFO(log, "Closed all listening sockets."); if (current_connections > 0) - current_connections = waitServersToFinish(servers_to_start_before_tables, config().getInt("shutdown_wait_unfinished", 5)); + current_connections = waitServersToFinish(servers_to_start_before_tables, servers_lock, config().getInt("shutdown_wait_unfinished", 5)); if (current_connections) LOG_INFO(log, "Closed connections to servers for tables. But {} remain. Probably some tables of other users cannot finish their connections after context shutdown.", current_connections); @@ -1827,7 +1827,7 @@ try global_context->getProcessList().killAllQueries(); if (current_connections) - current_connections = waitServersToFinish(servers, config().getInt("shutdown_wait_unfinished", 5)); + current_connections = waitServersToFinish(servers, servers_lock, config().getInt("shutdown_wait_unfinished", 5)); if (current_connections) LOG_WARNING(log, "Closed connections. But {} remain." diff --git a/src/Server/waitServersToFinish.cpp b/src/Server/waitServersToFinish.cpp index f2e36fae86c..3b07c082067 100644 --- a/src/Server/waitServersToFinish.cpp +++ b/src/Server/waitServersToFinish.cpp @@ -5,7 +5,7 @@ namespace DB { -size_t waitServersToFinish(std::vector & servers, size_t seconds_to_wait) +size_t waitServersToFinish(std::vector & servers, std::mutex & mutex, size_t seconds_to_wait) { const size_t sleep_max_ms = 1000 * seconds_to_wait; const size_t sleep_one_ms = 100; @@ -15,10 +15,13 @@ size_t waitServersToFinish(std::vector & servers, siz { current_connections = 0; - for (auto & server : servers) { - server.stop(); - current_connections += server.currentConnections(); + std::scoped_lock lock{mutex}; + for (auto & server : servers) + { + server.stop(); + current_connections += server.currentConnections(); + } } if (!current_connections) From 7583da9b3806850a3ed99e7b93f253c17ddb5aa8 Mon Sep 17 00:00:00 2001 From: serxa Date: Tue, 27 Jun 2023 18:48:54 +0000 Subject: [PATCH 073/230] fix --- src/Server/waitServersToFinish.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Server/waitServersToFinish.h b/src/Server/waitServersToFinish.h index 5e90790cefb..b6daa025964 100644 --- a/src/Server/waitServersToFinish.h +++ b/src/Server/waitServersToFinish.h @@ -5,6 +5,6 @@ namespace DB { class ProtocolServerAdapter; -size_t waitServersToFinish(std::vector & servers, size_t seconds_to_wait); +size_t waitServersToFinish(std::vector & servers, std::mutex & mutex, size_t seconds_to_wait); } From 6515d52f6018570560eeb56d93d05ca1b530a892 Mon Sep 17 00:00:00 2001 From: serxa Date: Tue, 27 Jun 2023 18:50:40 +0000 Subject: [PATCH 074/230] fix2 --- programs/keeper/Keeper.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index a1825665188..43c3489bbda 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -465,7 +465,7 @@ try LOG_INFO(log, "Closed all listening sockets."); if (current_connections > 0) - current_connections = waitServersToFinish(*servers, config().getInt("shutdown_wait_unfinished", 5)); + current_connections = waitServersToFinish(*servers, servers_lock, config().getInt("shutdown_wait_unfinished", 5)); if (current_connections) LOG_INFO(log, "Closed connections to Keeper. But {} remain. Probably some users cannot finish their connections after context shutdown.", current_connections); From 13854e5259ee446c7b76be2db619bd22fd6491bb Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Tue, 27 Jun 2023 17:23:51 +0200 Subject: [PATCH 075/230] impl --- src/Processors/QueryPlan/PartsSplitter.cpp | 6 +- ...nal_block_structure_mismatch_bug.reference | 9 +++ ...791_final_block_structure_mismatch_bug.sql | 66 +++++++++++++++++++ 3 files changed, 78 insertions(+), 3 deletions(-) create mode 100644 tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.reference create mode 100644 tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.sql diff --git a/src/Processors/QueryPlan/PartsSplitter.cpp b/src/Processors/QueryPlan/PartsSplitter.cpp index 9796e696f6c..e1fc3facf04 100644 --- a/src/Processors/QueryPlan/PartsSplitter.cpp +++ b/src/Processors/QueryPlan/PartsSplitter.cpp @@ -271,6 +271,9 @@ Pipes buildPipesForReadingByPKRanges( for (size_t i = 0; i < result_layers.size(); ++i) { pipes[i] = reading_step_getter(std::move(result_layers[i])); + auto pk_expression = std::make_shared(primary_key.expression->getActionsDAG().clone()); + pipes[i].addSimpleTransform([pk_expression](const Block & header) + { return std::make_shared(header, pk_expression); }); auto & filter_function = filters[i]; if (!filter_function) continue; @@ -279,9 +282,6 @@ Pipes buildPipesForReadingByPKRanges( ExpressionActionsPtr expression_actions = std::make_shared(std::move(actions)); auto description = fmt::format( "filter values in [{}, {})", i ? ::toString(borders[i - 1]) : "-inf", i < borders.size() ? ::toString(borders[i]) : "+inf"); - auto pk_expression = std::make_shared(primary_key.expression->getActionsDAG().clone()); - pipes[i].addSimpleTransform([pk_expression](const Block & header) - { return std::make_shared(header, pk_expression); }); pipes[i].addSimpleTransform( [&](const Block & header) { diff --git a/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.reference b/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.reference new file mode 100644 index 00000000000..a8401b1cae8 --- /dev/null +++ b/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.reference @@ -0,0 +1,9 @@ +1 +2 +3 +1 +2 +3 +1 +2 +3 diff --git a/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.sql b/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.sql new file mode 100644 index 00000000000..4c7ac50b8d0 --- /dev/null +++ b/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.sql @@ -0,0 +1,66 @@ +SET do_not_merge_across_partitions_select_final=1; + +CREATE TABLE test_block_mismatch +( + a UInt32, + b DateTime +) +ENGINE = ReplacingMergeTree +PARTITION BY toYYYYMM(b) +ORDER BY (toDate(b), a); + +INSERT INTO test_block_mismatch VALUES (1, toDateTime('2023-01-01 12:12:12')); +INSERT INTO test_block_mismatch VALUES (1, toDateTime('2023-01-01 12:12:12')); +SELECT count(*) FROM test_block_mismatch FINAL; + +INSERT INTO test_block_mismatch VALUES (1, toDateTime('2023-02-02 12:12:12')); +INSERT INTO test_block_mismatch VALUES (1, toDateTime('2023-02-02 12:12:12')); +SELECT count(*) FROM test_block_mismatch FINAL; + +INSERT INTO test_block_mismatch VALUES (2, toDateTime('2023-01-01 12:12:12')); +INSERT INTO test_block_mismatch VALUES (2, toDateTime('2023-01-01 12:12:12')); +SELECT count(*) FROM test_block_mismatch FINAL; + +CREATE TABLE test_block_mismatch_sk1 +( + a UInt32, + b DateTime +) +ENGINE = ReplacingMergeTree +PARTITION BY toYYYYMM(b) +PRIMARY KEY (toDate(b)) +ORDER BY (toDate(b), a); + +INSERT INTO test_block_mismatch_sk1 VALUES (1, toDateTime('2023-01-01 12:12:12')); +INSERT INTO test_block_mismatch_sk1 VALUES (1, toDateTime('2023-01-01 12:12:12')); +SELECT count(*) FROM test_block_mismatch_sk1 FINAL; + +INSERT INTO test_block_mismatch_sk1 VALUES (1, toDateTime('2023-02-02 12:12:12')); +INSERT INTO test_block_mismatch_sk1 VALUES (1, toDateTime('2023-02-02 12:12:12')); +SELECT count(*) FROM test_block_mismatch_sk1 FINAL; + +INSERT INTO test_block_mismatch_sk1 VALUES (2, toDateTime('2023-01-01 12:12:12')); +INSERT INTO test_block_mismatch_sk1 VALUES (2, toDateTime('2023-01-01 12:12:12')); +SELECT count(*) FROM test_block_mismatch_sk1 FINAL; + +CREATE TABLE test_block_mismatch_sk2 +( + a UInt32, + b DateTime +) +ENGINE = ReplacingMergeTree +PARTITION BY toYYYYMM(b) +PRIMARY KEY (a) +ORDER BY (a, toDate(b)); + +INSERT INTO test_block_mismatch_sk2 VALUES (1, toDateTime('2023-01-01 12:12:12')); +INSERT INTO test_block_mismatch_sk2 VALUES (1, toDateTime('2023-01-01 12:12:12')); +SELECT count(*) FROM test_block_mismatch_sk2 FINAL; + +INSERT INTO test_block_mismatch_sk2 VALUES (1, toDateTime('2023-02-02 12:12:12')); +INSERT INTO test_block_mismatch_sk2 VALUES (1, toDateTime('2023-02-02 12:12:12')); +SELECT count(*) FROM test_block_mismatch_sk2 FINAL; + +INSERT INTO test_block_mismatch_sk2 VALUES (2, toDateTime('2023-01-01 12:12:12')); +INSERT INTO test_block_mismatch_sk2 VALUES (2, toDateTime('2023-01-01 12:12:12')); +SELECT count(*) FROM test_block_mismatch_sk2 FINAL; From e2f20ea0e2b012796e05f1e734152609b34167e7 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Wed, 28 Jun 2023 00:30:51 +0200 Subject: [PATCH 076/230] fix --- src/Processors/QueryPlan/PartsSplitter.cpp | 6 ++--- src/Processors/QueryPlan/PartsSplitter.h | 1 + .../QueryPlan/ReadFromMergeTree.cpp | 22 +++++++++--------- ...nal_block_structure_mismatch_bug.reference | 1 + ...791_final_block_structure_mismatch_bug.sql | 23 +++++++++++++++++++ 5 files changed, 39 insertions(+), 14 deletions(-) diff --git a/src/Processors/QueryPlan/PartsSplitter.cpp b/src/Processors/QueryPlan/PartsSplitter.cpp index e1fc3facf04..533fbde1e13 100644 --- a/src/Processors/QueryPlan/PartsSplitter.cpp +++ b/src/Processors/QueryPlan/PartsSplitter.cpp @@ -256,6 +256,7 @@ namespace ErrorCodes Pipes buildPipesForReadingByPKRanges( const KeyDescription & primary_key, + ExpressionActionsPtr sorting_expr, RangesInDataParts parts, size_t max_layers, ContextPtr context, @@ -271,9 +272,8 @@ Pipes buildPipesForReadingByPKRanges( for (size_t i = 0; i < result_layers.size(); ++i) { pipes[i] = reading_step_getter(std::move(result_layers[i])); - auto pk_expression = std::make_shared(primary_key.expression->getActionsDAG().clone()); - pipes[i].addSimpleTransform([pk_expression](const Block & header) - { return std::make_shared(header, pk_expression); }); + pipes[i].addSimpleTransform([sorting_expr](const Block & header) + { return std::make_shared(header, sorting_expr); }); auto & filter_function = filters[i]; if (!filter_function) continue; diff --git a/src/Processors/QueryPlan/PartsSplitter.h b/src/Processors/QueryPlan/PartsSplitter.h index 56bca688c2d..4ba655a6f6d 100644 --- a/src/Processors/QueryPlan/PartsSplitter.h +++ b/src/Processors/QueryPlan/PartsSplitter.h @@ -18,6 +18,7 @@ using ReadingInOrderStepGetter = std::function; /// Will try to produce exactly max_layer pipes but may return less if data is distributed in not a very parallelizable way. Pipes buildPipesForReadingByPKRanges( const KeyDescription & primary_key, + ExpressionActionsPtr sorting_expr, RangesInDataParts parts, size_t max_layers, ContextPtr context, diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 3c38ecbbd3f..fac8ebd6e1f 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -979,6 +979,8 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal( RangesInDataParts lonely_parts; size_t sum_marks_in_lonely_parts = 0; + auto sorting_expr = std::make_shared(metadata_for_reading->getSortingKey().expression->getActionsDAG().clone()); + for (size_t range_index = 0; range_index < parts_to_merge_ranges.size() - 1; ++range_index) { Pipes pipes; @@ -1022,12 +1024,20 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal( info.use_uncompressed_cache); }; pipes = buildPipesForReadingByPKRanges( - metadata_for_reading->getPrimaryKey(), std::move(new_parts), num_streams, context, std::move(reading_step_getter)); + metadata_for_reading->getPrimaryKey(), + sorting_expr, + std::move(new_parts), + num_streams, + context, + std::move(reading_step_getter)); } else { pipes.emplace_back(read( std::move(new_parts), column_names, ReadFromMergeTree::ReadType::InOrder, num_streams, 0, info.use_uncompressed_cache)); + + pipes.back().addSimpleTransform([sorting_expr](const Block & header) + { return std::make_shared(header, sorting_expr); }); } /// Drop temporary columns, added by 'sorting_key_expr' @@ -1035,13 +1045,6 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal( out_projection = createProjection(pipes.front().getHeader()); } - auto sorting_expr = std::make_shared( - metadata_for_reading->getSortingKey().expression->getActionsDAG().clone()); - - for (auto & pipe : pipes) - pipe.addSimpleTransform([sorting_expr](const Block & header) - { return std::make_shared(header, sorting_expr); }); - /// If do_not_merge_across_partitions_select_final is true and there is only one part in partition /// with level > 0 then we won't postprocess this part if (settings.do_not_merge_across_partitions_select_final && @@ -1098,9 +1101,6 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal( if (!out_projection) out_projection = createProjection(pipe.getHeader()); - auto sorting_expr = std::make_shared( - metadata_for_reading->getSortingKey().expression->getActionsDAG().clone()); - pipe.addSimpleTransform([sorting_expr](const Block & header) { return std::make_shared(header, sorting_expr); diff --git a/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.reference b/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.reference index a8401b1cae8..ca810c46a2d 100644 --- a/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.reference +++ b/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.reference @@ -7,3 +7,4 @@ 1 2 3 +2 diff --git a/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.sql b/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.sql index 4c7ac50b8d0..a82e43d81f4 100644 --- a/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.sql +++ b/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.sql @@ -64,3 +64,26 @@ SELECT count(*) FROM test_block_mismatch_sk2 FINAL; INSERT INTO test_block_mismatch_sk2 VALUES (2, toDateTime('2023-01-01 12:12:12')); INSERT INTO test_block_mismatch_sk2 VALUES (2, toDateTime('2023-01-01 12:12:12')); SELECT count(*) FROM test_block_mismatch_sk2 FINAL; + +CREATE TABLE test_block_mismatch_magic_row_dist +( + a UInt32, + b DateTime +) +ENGINE = ReplacingMergeTree +PARTITION BY toYYYYMM(b) +ORDER BY (toDate(b), a); + +INSERT INTO test_block_mismatch_magic_row_dist VALUES (1, toDateTime('2023-02-02 12:12:12')); +INSERT INTO test_block_mismatch_magic_row_dist VALUES (1, toDateTime('2023-02-02 12:12:12')); +INSERT INTO test_block_mismatch_magic_row_dist VALUES (1, toDateTime('2023-02-02 12:12:12')); +INSERT INTO test_block_mismatch_magic_row_dist VALUES (1, toDateTime('2023-02-02 12:12:12')); + +optimize table test_block_mismatch_magic_row_dist final; + +system stop merges test_block_mismatch_magic_row_dist; + +INSERT INTO test_block_mismatch_magic_row_dist VALUES (1, toDateTime('2023-01-01 12:12:12')); +INSERT INTO test_block_mismatch_magic_row_dist VALUES (1, toDateTime('2023-01-01 12:12:12')); + +SELECT count(*) FROM test_block_mismatch_magic_row_dist FINAL; From 521137c55d18f956c86cf71b1ca7bca2601f7d70 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 28 Jun 2023 06:28:21 +0300 Subject: [PATCH 077/230] Update test.py --- tests/integration/test_attach_table_normalizer/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_attach_table_normalizer/test.py b/tests/integration/test_attach_table_normalizer/test.py index 49acefdcd17..10b400494ab 100644 --- a/tests/integration/test_attach_table_normalizer/test.py +++ b/tests/integration/test_attach_table_normalizer/test.py @@ -4,7 +4,7 @@ from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) node = cluster.add_instance( - "node", main_configs=["configs/config.xml"], with_zookeeper=True, stay_alive=True + "node", stay_alive=True ) From 5df6f3d6e28483a029f3a8859c8bd09fdab008a0 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 28 Jun 2023 03:40:09 +0000 Subject: [PATCH 078/230] Automatic style fix --- tests/integration/test_attach_table_normalizer/test.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/integration/test_attach_table_normalizer/test.py b/tests/integration/test_attach_table_normalizer/test.py index 10b400494ab..79093bf4014 100644 --- a/tests/integration/test_attach_table_normalizer/test.py +++ b/tests/integration/test_attach_table_normalizer/test.py @@ -3,9 +3,7 @@ import pytest from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) -node = cluster.add_instance( - "node", stay_alive=True -) +node = cluster.add_instance("node", stay_alive=True) @pytest.fixture(scope="module") From 68ac4d8cc934d4e9483b5257e9ffbdb84b92c709 Mon Sep 17 00:00:00 2001 From: xuelei Date: Wed, 28 Jun 2023 16:31:57 +0800 Subject: [PATCH 079/230] fix storage policy prompt display error --- src/Disks/StoragePolicy.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Disks/StoragePolicy.cpp b/src/Disks/StoragePolicy.cpp index f4be8b8fe86..02789132e55 100644 --- a/src/Disks/StoragePolicy.cpp +++ b/src/Disks/StoragePolicy.cpp @@ -302,7 +302,7 @@ void StoragePolicy::checkCompatibleWith(const StoragePolicyPtr & new_storage_pol for (const auto & volume : getVolumes()) { if (!new_volume_names.contains(volume->getName())) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "New storage policy {} shall contain volumes of old one", backQuote(name)); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "New storage policy {} shall contain volumes of old one", backQuote(new_storage_policy->getName())); std::unordered_set new_disk_names; for (const auto & disk : new_storage_policy->getVolumeByName(volume->getName())->getDisks()) @@ -310,7 +310,7 @@ void StoragePolicy::checkCompatibleWith(const StoragePolicyPtr & new_storage_pol for (const auto & disk : volume->getDisks()) if (!new_disk_names.contains(disk->getName())) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "New storage policy {} shall contain disks of old one", backQuote(name)); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "New storage policy {} shall contain disks of old one", backQuote(new_storage_policy->getName())); } } From 7e6d606b1c6b5277b1420a509cf841d1c1120ffc Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 28 Jun 2023 10:41:39 +0200 Subject: [PATCH 080/230] Remove bad code and fix a bug --- src/Common/QueryProfiler.cpp | 9 +++ src/Common/StringSearcher.h | 89 +---------------------- src/Common/Volnitsky.h | 3 - src/Functions/HasTokenImpl.h | 45 ++++++++---- src/Functions/hasToken.cpp | 5 +- src/Functions/hasTokenCaseInsensitive.cpp | 5 +- 6 files changed, 48 insertions(+), 108 deletions(-) diff --git a/src/Common/QueryProfiler.cpp b/src/Common/QueryProfiler.cpp index 085c8fb8af4..313d4b77739 100644 --- a/src/Common/QueryProfiler.cpp +++ b/src/Common/QueryProfiler.cpp @@ -120,6 +120,15 @@ void Timer::createIfNecessary(UInt64 thread_id, int clock_type, int pause_signal throw Exception(ErrorCodes::CANNOT_CREATE_TIMER, "Failed to create thread timer. The function " "'timer_create' returned non-zero but didn't set errno. This is bug in your OS."); + /// For example, it cannot be created if the server is run under QEMU: + /// "Failed to create thread timer, errno: 11, strerror: Resource temporarily unavailable." + + /// You could accidentally run the server under QEMU without being aware, + /// if you use Docker image for a different architecture, + /// and you have the "binfmt-misc" kernel module, and "qemu-user" tools. + + /// Also, it cannot be created if the server has too many threads. + throwFromErrno("Failed to create thread timer", ErrorCodes::CANNOT_CREATE_TIMER); } timer_id.emplace(local_timer_id); diff --git a/src/Common/StringSearcher.h b/src/Common/StringSearcher.h index 3ed192d05f3..b3065354f65 100644 --- a/src/Common/StringSearcher.h +++ b/src/Common/StringSearcher.h @@ -793,88 +793,6 @@ public: } }; - -// Searches for needle surrounded by token-separators. -// Separators are anything inside ASCII (0-128) and not alphanum. -// Any value outside of basic ASCII (>=128) is considered a non-separator symbol, hence UTF-8 strings -// should work just fine. But any Unicode whitespace is not considered a token separtor. -template -class TokenSearcher : public StringSearcherBase -{ - StringSearcher searcher; - size_t needle_size; - -public: - - template - requires (sizeof(CharT) == 1) - static bool isValidNeedle(const CharT * needle_, size_t needle_size_) - { - return std::none_of(needle_, needle_ + needle_size_, isTokenSeparator); - } - - template - requires (sizeof(CharT) == 1) - TokenSearcher(const CharT * needle_, size_t needle_size_) - : searcher(needle_, needle_size_) - , needle_size(needle_size_) - { - /// The caller is responsible for calling isValidNeedle() - chassert(isValidNeedle(needle_, needle_size_)); - } - - template - requires (sizeof(CharT) == 1) - ALWAYS_INLINE bool compare(const CharT * haystack, const CharT * haystack_end, const CharT * pos) const - { - // use searcher only if pos is in the beginning of token and pos + searcher.needle_size is end of token. - if (isToken(haystack, haystack_end, pos)) - return searcher.compare(haystack, haystack_end, pos); - - return false; - } - - template - requires (sizeof(CharT) == 1) - const CharT * search(const CharT * haystack, const CharT * const haystack_end) const - { - // use searcher.search(), then verify that returned value is a token - // if it is not, skip it and re-run - - const auto * pos = haystack; - while (pos < haystack_end) - { - pos = searcher.search(pos, haystack_end); - if (pos == haystack_end || isToken(haystack, haystack_end, pos)) - return pos; - - // assuming that heendle does not contain any token separators. - pos += needle_size; - } - return haystack_end; - } - - template - requires (sizeof(CharT) == 1) - const CharT * search(const CharT * haystack, size_t haystack_size) const - { - return search(haystack, haystack + haystack_size); - } - - template - requires (sizeof(CharT) == 1) - ALWAYS_INLINE bool isToken(const CharT * haystack, const CharT * const haystack_end, const CharT* p) const - { - return (p == haystack || isTokenSeparator(*(p - 1))) - && (p + needle_size >= haystack_end || isTokenSeparator(*(p + needle_size))); - } - - ALWAYS_INLINE static bool isTokenSeparator(const uint8_t c) - { - return !(isAlphaNumericASCII(c) || !isASCII(c)); - } -}; - } using ASCIICaseSensitiveStringSearcher = impl::StringSearcher; @@ -882,9 +800,6 @@ using ASCIICaseInsensitiveStringSearcher = impl::StringSearcher; using UTF8CaseSensitiveStringSearcher = impl::StringSearcher; using UTF8CaseInsensitiveStringSearcher = impl::StringSearcher; -using ASCIICaseSensitiveTokenSearcher = impl::TokenSearcher; -using ASCIICaseInsensitiveTokenSearcher = impl::TokenSearcher; - /// Use only with short haystacks where cheap initialization is required. template struct StdLibASCIIStringSearcher @@ -906,11 +821,11 @@ struct StdLibASCIIStringSearcher if constexpr (CaseInsensitive) return std::search( haystack_start, haystack_end, needle_start, needle_end, - [](char c1, char c2) {return std::toupper(c1) == std::toupper(c2);}); + [](char c1, char c2) { return std::toupper(c1) == std::toupper(c2); }); else return std::search( haystack_start, haystack_end, needle_start, needle_end, - [](char c1, char c2) {return c1 == c2;}); + [](char c1, char c2) { return c1 == c2; }); } template diff --git a/src/Common/Volnitsky.h b/src/Common/Volnitsky.h index 8f9aa23a38a..3360c197984 100644 --- a/src/Common/Volnitsky.h +++ b/src/Common/Volnitsky.h @@ -730,9 +730,6 @@ using VolnitskyUTF8 = VolnitskyBase; /// ignores non-ASCII bytes using VolnitskyCaseInsensitiveUTF8 = VolnitskyBase; -using VolnitskyCaseSensitiveToken = VolnitskyBase; -using VolnitskyCaseInsensitiveToken = VolnitskyBase; - using MultiVolnitsky = MultiVolnitskyBase; using MultiVolnitskyUTF8 = MultiVolnitskyBase; using MultiVolnitskyCaseInsensitive = MultiVolnitskyBase; diff --git a/src/Functions/HasTokenImpl.h b/src/Functions/HasTokenImpl.h index 8cacdfff99d..fdec5fcb0b7 100644 --- a/src/Functions/HasTokenImpl.h +++ b/src/Functions/HasTokenImpl.h @@ -17,7 +17,7 @@ namespace ErrorCodes /** Token search the string, means that needle must be surrounded by some separator chars, like whitespace or puctuation. */ -template +template struct HasTokenImpl { using ResultType = UInt8; @@ -46,7 +46,7 @@ struct HasTokenImpl const UInt8 * const end = haystack_data.data() + haystack_data.size(); const UInt8 * pos = begin; - if (!ASCIICaseSensitiveTokenSearcher::isValidNeedle(pattern.data(), pattern.size())) + if (!std::none_of(pattern.begin(), pattern.end(), isTokenSeparator)) { if (res_null) { @@ -58,7 +58,8 @@ struct HasTokenImpl throw Exception(ErrorCodes::BAD_ARGUMENTS, "Needle must not contain whitespace or separator characters"); } - TokenSearcher searcher(pattern.data(), pattern.size(), end - pos); + size_t pattern_size = pattern.size(); + Searcher searcher(pattern.data(), pattern_size, end - pos); if (res_null) std::ranges::fill(res_null->getData(), false); @@ -67,21 +68,31 @@ struct HasTokenImpl /// We will search for the next occurrence in all rows at once. while (pos < end && end != (pos = searcher.search(pos, end - pos))) { - /// Let's determine which index it refers to. - while (begin + haystack_offsets[i] <= pos) + /// The found substring is a token + if ((pos == begin || isTokenSeparator(pos[-1])) + && (pos + pattern_size == end || isTokenSeparator(pos[pattern_size]))) { - res[i] = negate; + /// Let's determine which index it refers to. + while (begin + haystack_offsets[i] <= pos) + { + res[i] = negate; + ++i; + } + + /// We check that the entry does not pass through the boundaries of strings. + if (pos + pattern.size() < begin + haystack_offsets[i]) + res[i] = !negate; + else + res[i] = negate; + + pos = begin + haystack_offsets[i]; ++i; } - - /// We check that the entry does not pass through the boundaries of strings. - if (pos + pattern.size() < begin + haystack_offsets[i]) - res[i] = !negate; else - res[i] = negate; - - pos = begin + haystack_offsets[i]; - ++i; + { + /// Not a token. Jump over it. + pos += pattern_size; + } } /// Tail, in which there can be no substring. @@ -113,6 +124,12 @@ struct HasTokenImpl { throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Function '{}' doesn't support FixedString haystack argument", name); } + +private: + static bool isTokenSeparator(UInt8 c) + { + return isASCII(c) && !isAlphaNumericASCII(c); + } }; } diff --git a/src/Functions/hasToken.cpp b/src/Functions/hasToken.cpp index b90750ea233..fa41abf2641 100644 --- a/src/Functions/hasToken.cpp +++ b/src/Functions/hasToken.cpp @@ -6,6 +6,7 @@ namespace DB { + struct NameHasToken { static constexpr auto name = "hasToken"; @@ -17,9 +18,9 @@ struct NameHasTokenOrNull }; using FunctionHasToken - = FunctionsStringSearch>; + = FunctionsStringSearch>; using FunctionHasTokenOrNull - = FunctionsStringSearch, ExecutionErrorPolicy::Null>; + = FunctionsStringSearch, ExecutionErrorPolicy::Null>; REGISTER_FUNCTION(HasToken) { diff --git a/src/Functions/hasTokenCaseInsensitive.cpp b/src/Functions/hasTokenCaseInsensitive.cpp index d7381e336b5..32675b9384d 100644 --- a/src/Functions/hasTokenCaseInsensitive.cpp +++ b/src/Functions/hasTokenCaseInsensitive.cpp @@ -6,6 +6,7 @@ namespace DB { + struct NameHasTokenCaseInsensitive { static constexpr auto name = "hasTokenCaseInsensitive"; @@ -17,9 +18,9 @@ struct NameHasTokenCaseInsensitiveOrNull }; using FunctionHasTokenCaseInsensitive - = FunctionsStringSearch>; + = FunctionsStringSearch>; using FunctionHasTokenCaseInsensitiveOrNull - = FunctionsStringSearch, ExecutionErrorPolicy::Null>; + = FunctionsStringSearch, ExecutionErrorPolicy::Null>; REGISTER_FUNCTION(HasTokenCaseInsensitive) { From 7024527542dd341e32dfe313cc54f8f537b69c98 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com> Date: Wed, 28 Jun 2023 12:43:10 +0200 Subject: [PATCH 081/230] Fix flaky test 00417_kill_query --- tests/queries/0_stateless/00417_kill_query.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/00417_kill_query.sh b/tests/queries/0_stateless/00417_kill_query.sh index dc690caca39..6eb0505f6bb 100755 --- a/tests/queries/0_stateless/00417_kill_query.sh +++ b/tests/queries/0_stateless/00417_kill_query.sh @@ -9,13 +9,13 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) QUERY_FIELND_NUM=4 -$CLICKHOUSE_CLIENT --max_block_size=1 -q "SELECT sleep(1) FROM system.numbers LIMIT 4" &>/dev/null & +$CLICKHOUSE_CLIENT --max_block_size=1 -q "SELECT sleep(1) FROM system.numbers LIMIT 30" &>/dev/null & sleep 1 $CLICKHOUSE_CLIENT -q "KILL QUERY WHERE current_database = '${CLICKHOUSE_DATABASE}' and query LIKE 'SELECT sleep(%' AND (elapsed >= 0.) SYNC" | cut -f $QUERY_FIELND_NUM -$CLICKHOUSE_CLIENT --max_block_size=1 -q "SELECT sleep(1) FROM system.numbers LIMIT 5" &>/dev/null & +$CLICKHOUSE_CLIENT --max_block_size=1 -q "SELECT sleep(1) FROM system.numbers LIMIT 31" &>/dev/null & sleep 1 -$CLICKHOUSE_CLIENT -q "KILL QUERY WHERE current_database = '${CLICKHOUSE_DATABASE}' and query = 'SELECT sleep(1) FROM system.numbers LIMIT 5' ASYNC" | cut -f $QUERY_FIELND_NUM +$CLICKHOUSE_CLIENT -q "KILL QUERY WHERE current_database = '${CLICKHOUSE_DATABASE}' and query = 'SELECT sleep(1) FROM system.numbers LIMIT 31' ASYNC" | cut -f $QUERY_FIELND_NUM $CLICKHOUSE_CLIENT -q "KILL QUERY WHERE 0 ASYNC" $CLICKHOUSE_CLIENT -q "KILL QUERY WHERE 0 FORMAT TabSeparated" From d98776b70850f140494bc5e799219877f50124ca Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com> Date: Wed, 28 Jun 2023 12:44:03 +0200 Subject: [PATCH 082/230] Fix flaky test 00417_kill_query --- tests/queries/0_stateless/00417_kill_query.reference | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/00417_kill_query.reference b/tests/queries/0_stateless/00417_kill_query.reference index 7e89d9674db..1a3b47964c0 100644 --- a/tests/queries/0_stateless/00417_kill_query.reference +++ b/tests/queries/0_stateless/00417_kill_query.reference @@ -1,2 +1,2 @@ -SELECT sleep(1) FROM system.numbers LIMIT 4 -SELECT sleep(1) FROM system.numbers LIMIT 5 +SELECT sleep(1) FROM system.numbers LIMIT 30 +SELECT sleep(1) FROM system.numbers LIMIT 31 From 112310e98fce282516b633c1b0a193e45b278aec Mon Sep 17 00:00:00 2001 From: xuelei Date: Wed, 28 Jun 2023 19:17:53 +0800 Subject: [PATCH 083/230] fix storage policy prompt display error --- src/Disks/StoragePolicy.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Disks/StoragePolicy.cpp b/src/Disks/StoragePolicy.cpp index 02789132e55..6faa7c13c49 100644 --- a/src/Disks/StoragePolicy.cpp +++ b/src/Disks/StoragePolicy.cpp @@ -302,7 +302,7 @@ void StoragePolicy::checkCompatibleWith(const StoragePolicyPtr & new_storage_pol for (const auto & volume : getVolumes()) { if (!new_volume_names.contains(volume->getName())) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "New storage policy {} shall contain volumes of old one", backQuote(new_storage_policy->getName())); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "New storage policy {} shall contain volumes of Old storage policy {}", backQuote(new_storage_policy->getName()),backQuote(name)); std::unordered_set new_disk_names; for (const auto & disk : new_storage_policy->getVolumeByName(volume->getName())->getDisks()) @@ -310,7 +310,7 @@ void StoragePolicy::checkCompatibleWith(const StoragePolicyPtr & new_storage_pol for (const auto & disk : volume->getDisks()) if (!new_disk_names.contains(disk->getName())) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "New storage policy {} shall contain disks of old one", backQuote(new_storage_policy->getName())); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "New storage policy {} shall contain disks of Old storage policy {}", backQuote(new_storage_policy->getName()),backQuote(name)); } } From a3994319776c77576bff2a256aed77265423e279 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 28 Jun 2023 16:40:57 +0300 Subject: [PATCH 084/230] Update StoragePolicy.cpp --- src/Disks/StoragePolicy.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Disks/StoragePolicy.cpp b/src/Disks/StoragePolicy.cpp index 6faa7c13c49..a02568f9489 100644 --- a/src/Disks/StoragePolicy.cpp +++ b/src/Disks/StoragePolicy.cpp @@ -302,7 +302,7 @@ void StoragePolicy::checkCompatibleWith(const StoragePolicyPtr & new_storage_pol for (const auto & volume : getVolumes()) { if (!new_volume_names.contains(volume->getName())) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "New storage policy {} shall contain volumes of Old storage policy {}", backQuote(new_storage_policy->getName()),backQuote(name)); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "New storage policy {} shall contain volumes of the old storage policy {}", backQuote(new_storage_policy->getName()),backQuote(name)); std::unordered_set new_disk_names; for (const auto & disk : new_storage_policy->getVolumeByName(volume->getName())->getDisks()) @@ -310,7 +310,7 @@ void StoragePolicy::checkCompatibleWith(const StoragePolicyPtr & new_storage_pol for (const auto & disk : volume->getDisks()) if (!new_disk_names.contains(disk->getName())) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "New storage policy {} shall contain disks of Old storage policy {}", backQuote(new_storage_policy->getName()),backQuote(name)); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "New storage policy {} shall contain disks of the old storage policy {}", backQuote(new_storage_policy->getName()),backQuote(name)); } } From 23d0a9e3a83f263f563c0d2b0983bff6aa9a2d90 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Wed, 28 Jun 2023 16:20:45 +0200 Subject: [PATCH 085/230] fix --- .../01861_explain_pipeline.reference | 18 +++++----- ...inal_streams_data_skipping_index.reference | 36 +++++++++---------- 2 files changed, 24 insertions(+), 30 deletions(-) diff --git a/tests/queries/0_stateless/01861_explain_pipeline.reference b/tests/queries/0_stateless/01861_explain_pipeline.reference index aec3ae06dce..427b3eaefc0 100644 --- a/tests/queries/0_stateless/01861_explain_pipeline.reference +++ b/tests/queries/0_stateless/01861_explain_pipeline.reference @@ -17,14 +17,12 @@ ExpressionTransform × 2 (ReadFromMergeTree) ExpressionTransform × 2 ReplacingSorted - ExpressionTransform - FilterSortedStreamByRange - Description: filter values in [(5), +inf) - ExpressionTransform - MergeTreeInOrder 0 → 1 - ReplacingSorted 2 → 1 + FilterSortedStreamByRange + Description: filter values in [(5), +inf) + ExpressionTransform + MergeTreeInOrder 0 → 1 + ReplacingSorted 2 → 1 + FilterSortedStreamByRange × 2 + Description: filter values in [-inf, (5)) ExpressionTransform × 2 - FilterSortedStreamByRange × 2 - Description: filter values in [-inf, (5)) - ExpressionTransform × 2 - MergeTreeInOrder × 2 0 → 1 + MergeTreeInOrder × 2 0 → 1 diff --git a/tests/queries/0_stateless/02780_final_streams_data_skipping_index.reference b/tests/queries/0_stateless/02780_final_streams_data_skipping_index.reference index d7a540ae479..5242c625325 100644 --- a/tests/queries/0_stateless/02780_final_streams_data_skipping_index.reference +++ b/tests/queries/0_stateless/02780_final_streams_data_skipping_index.reference @@ -9,17 +9,15 @@ ExpressionTransform × 2 (ReadFromMergeTree) ExpressionTransform × 2 AggregatingSortedTransform 2 → 1 - ExpressionTransform × 2 - FilterSortedStreamByRange × 2 - Description: filter values in [(999424), +inf) - ExpressionTransform × 2 - MergeTreeInOrder × 2 0 → 1 - AggregatingSortedTransform + FilterSortedStreamByRange × 2 + Description: filter values in [(999424), +inf) + ExpressionTransform × 2 + MergeTreeInOrder × 2 0 → 1 + AggregatingSortedTransform + FilterSortedStreamByRange + Description: filter values in [-inf, (999424)) ExpressionTransform - FilterSortedStreamByRange - Description: filter values in [-inf, (999424)) - ExpressionTransform - MergeTreeInOrder 0 → 1 + MergeTreeInOrder 0 → 1 EXPLAIN PIPELINE SELECT * FROM data FINAL WHERE v1 >= now() - INTERVAL 180 DAY SETTINGS max_threads=2, max_final_threads=2, force_data_skipping_indices='v1_index', use_skip_indexes_if_final=0 FORMAT LineAsString; @@ -30,14 +28,12 @@ ExpressionTransform × 2 (ReadFromMergeTree) ExpressionTransform × 2 AggregatingSortedTransform 2 → 1 - ExpressionTransform × 2 - FilterSortedStreamByRange × 2 - Description: filter values in [(999424), +inf) - ExpressionTransform × 2 - MergeTreeInOrder × 2 0 → 1 - AggregatingSortedTransform + FilterSortedStreamByRange × 2 + Description: filter values in [(999424), +inf) + ExpressionTransform × 2 + MergeTreeInOrder × 2 0 → 1 + AggregatingSortedTransform + FilterSortedStreamByRange + Description: filter values in [-inf, (999424)) ExpressionTransform - FilterSortedStreamByRange - Description: filter values in [-inf, (999424)) - ExpressionTransform - MergeTreeInOrder 0 → 1 + MergeTreeInOrder 0 → 1 From c9fad7b1410740d7ada64b65dfda5fefbe4a45ff Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Wed, 28 Jun 2023 18:40:48 +0200 Subject: [PATCH 086/230] Don't run 02782_uniq_exact_parallel_merging_bug in parallel with other tests --- .../0_stateless/02782_uniq_exact_parallel_merging_bug.sh | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/02782_uniq_exact_parallel_merging_bug.sh b/tests/queries/0_stateless/02782_uniq_exact_parallel_merging_bug.sh index d84ffd21b87..a7f71eacf0f 100755 --- a/tests/queries/0_stateless/02782_uniq_exact_parallel_merging_bug.sh +++ b/tests/queries/0_stateless/02782_uniq_exact_parallel_merging_bug.sh @@ -1,10 +1,8 @@ #!/usr/bin/env bash -# Tags: long, no-random-settings, no-tsan, no-asan, no-ubsan, no-msan +# Tags: long, no-random-settings, no-tsan, no-asan, no-ubsan, no-msan, no-parallel # shellcheck disable=SC2154 -unset CLICKHOUSE_LOG_COMMENT - CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh From 7f8ad3d5cbab240a5ef4d75b55f55478ceed22e0 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 28 Jun 2023 17:48:54 +0200 Subject: [PATCH 087/230] Convert assert to LOGICAL_ERROR in createBlockSelector() for zero weight Signed-off-by: Azat Khuzhin --- src/Interpreters/createBlockSelector.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/createBlockSelector.cpp b/src/Interpreters/createBlockSelector.cpp index 659fc483373..a8eb39e6c9d 100644 --- a/src/Interpreters/createBlockSelector.cpp +++ b/src/Interpreters/createBlockSelector.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include @@ -12,13 +13,19 @@ namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + template IColumn::Selector createBlockSelector( const IColumn & column, const std::vector & slots) { const auto total_weight = slots.size(); - assert(total_weight != 0); + if (total_weight == 0) + throw Exception(ErrorCodes::LOGICAL_ERROR, "weight is zero"); size_t num_rows = column.size(); IColumn::Selector selector(num_rows); From c9adfe1efd9aa0210185eecfbc9d446f4060077f Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 28 Jun 2023 17:53:14 +0200 Subject: [PATCH 088/230] Prohibit cluster with zero weight across all shards Before it leads to SIGSEGV, due to either divizion by zero or an a check in libdivide. Signed-off-by: Azat Khuzhin --- src/Interpreters/Cluster.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/Interpreters/Cluster.cpp b/src/Interpreters/Cluster.cpp index edbef77ef02..89bfb70f7c5 100644 --- a/src/Interpreters/Cluster.cpp +++ b/src/Interpreters/Cluster.cpp @@ -30,6 +30,7 @@ namespace ErrorCodes extern const int SYNTAX_ERROR; extern const int INVALID_SHARD_ID; extern const int NO_SUCH_REPLICA; + extern const int BAD_ARGUMENTS; } namespace @@ -614,6 +615,12 @@ Poco::Timespan Cluster::saturate(Poco::Timespan v, Poco::Timespan limit) void Cluster::initMisc() { + /// NOTE: It is possible to have cluster w/o shards for + /// optimize_skip_unused_shards (i.e. WHERE 0 expression), so check the + /// slots only if shards is not empty. + if (!shards_info.empty() && slot_to_shard.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cluster with zero weight on all shards is prohibited"); + for (const auto & shard_info : shards_info) { if (!shard_info.isLocal() && !shard_info.hasRemoteConnections()) From 2a12fb42461f0916455a9efd8fd9b5ada4edca69 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 28 Jun 2023 17:57:53 +0200 Subject: [PATCH 089/230] Initialize weight/slot_to_shards for cluster not from xml correcty This is: - clusterAllReplicas - copier - some distributed cases Signed-off-by: Azat Khuzhin --- src/Interpreters/Cluster.cpp | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/Interpreters/Cluster.cpp b/src/Interpreters/Cluster.cpp index 89bfb70f7c5..891586d88b6 100644 --- a/src/Interpreters/Cluster.cpp +++ b/src/Interpreters/Cluster.cpp @@ -525,7 +525,7 @@ Cluster::Cluster( addresses_with_failover.emplace_back(current); - addShard(settings, std::move(current), params.treat_local_as_remote, current_shard_num); + addShard(settings, std::move(current), params.treat_local_as_remote, current_shard_num, /* insert_paths= */ {}, /* weight= */ 1); ++current_shard_num; } @@ -553,7 +553,7 @@ Cluster::Cluster( addresses_with_failover.emplace_back(current); - addShard(settings, std::move(current), params.treat_local_as_remote, current_shard_num); + addShard(settings, std::move(current), params.treat_local_as_remote, current_shard_num, /* insert_paths= */ {}, /* weight= */ 1); ++current_shard_num; } @@ -715,6 +715,7 @@ Cluster::Cluster(Cluster::ReplicasAsShardsTag, const Cluster & from, const Setti ShardInfo info; info.shard_num = ++shard_num; + info.weight = 1; if (address.is_local) info.local_addresses.push_back(address); @@ -740,6 +741,8 @@ Cluster::Cluster(Cluster::ReplicasAsShardsTag, const Cluster & from, const Setti info.per_replica_pools = {std::move(pool)}; addresses_with_failover.emplace_back(Addresses{address}); + + slot_to_shard.insert(std::end(slot_to_shard), info.weight, shards_info.size()); shards_info.emplace_back(std::move(info)); } }; @@ -769,7 +772,11 @@ Cluster::Cluster(Cluster::SubclusterTag, const Cluster & from, const std::vector { for (size_t index : indices) { - shards_info.emplace_back(from.shards_info.at(index)); + const auto & from_shard = from.shards_info.at(index); + + if (from_shard.weight) + slot_to_shard.insert(std::end(slot_to_shard), from_shard.weight, shards_info.size()); + shards_info.emplace_back(from_shard); if (!from.addresses_with_failover.empty()) addresses_with_failover.emplace_back(from.addresses_with_failover.at(index)); From 006d05c6a7aacc6f1c321822725389778b8c299c Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 28 Jun 2023 18:03:04 +0200 Subject: [PATCH 090/230] Add test for INSERT INTO clusterAllReplicas() (leads to SIGSEGV before) Signed-off-by: Azat Khuzhin --- .../0_stateless/02804_clusterAllReplicas_insert.reference | 1 + .../queries/0_stateless/02804_clusterAllReplicas_insert.sql | 5 +++++ 2 files changed, 6 insertions(+) create mode 100644 tests/queries/0_stateless/02804_clusterAllReplicas_insert.reference create mode 100644 tests/queries/0_stateless/02804_clusterAllReplicas_insert.sql diff --git a/tests/queries/0_stateless/02804_clusterAllReplicas_insert.reference b/tests/queries/0_stateless/02804_clusterAllReplicas_insert.reference new file mode 100644 index 00000000000..0cfbf08886f --- /dev/null +++ b/tests/queries/0_stateless/02804_clusterAllReplicas_insert.reference @@ -0,0 +1 @@ +2 diff --git a/tests/queries/0_stateless/02804_clusterAllReplicas_insert.sql b/tests/queries/0_stateless/02804_clusterAllReplicas_insert.sql new file mode 100644 index 00000000000..05bda19eb9e --- /dev/null +++ b/tests/queries/0_stateless/02804_clusterAllReplicas_insert.sql @@ -0,0 +1,5 @@ +drop table if exists data; +create table data (key Int) engine=Memory(); +-- NOTE: internal_replication is false, so INSERT will be done only into one shard +insert into function clusterAllReplicas(test_cluster_two_shards, currentDatabase(), data, rand()) values (2); +select * from data order by key; From 71c144530081549c776e6432a48bebbca9f9f135 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com> Date: Wed, 28 Jun 2023 21:45:56 +0200 Subject: [PATCH 091/230] Update 00417_kill_query.sh --- tests/queries/0_stateless/00417_kill_query.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/00417_kill_query.sh b/tests/queries/0_stateless/00417_kill_query.sh index 6eb0505f6bb..cd5b788a147 100755 --- a/tests/queries/0_stateless/00417_kill_query.sh +++ b/tests/queries/0_stateless/00417_kill_query.sh @@ -13,6 +13,7 @@ $CLICKHOUSE_CLIENT --max_block_size=1 -q "SELECT sleep(1) FROM system.numbers LI sleep 1 $CLICKHOUSE_CLIENT -q "KILL QUERY WHERE current_database = '${CLICKHOUSE_DATABASE}' and query LIKE 'SELECT sleep(%' AND (elapsed >= 0.) SYNC" | cut -f $QUERY_FIELND_NUM +# 31 is for the query to be different from the previous one $CLICKHOUSE_CLIENT --max_block_size=1 -q "SELECT sleep(1) FROM system.numbers LIMIT 31" &>/dev/null & sleep 1 $CLICKHOUSE_CLIENT -q "KILL QUERY WHERE current_database = '${CLICKHOUSE_DATABASE}' and query = 'SELECT sleep(1) FROM system.numbers LIMIT 31' ASYNC" | cut -f $QUERY_FIELND_NUM From fdd8a0a3966028a5c72e7ce5e07410f68ce50da5 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com> Date: Thu, 29 Jun 2023 02:35:07 +0200 Subject: [PATCH 092/230] Fix flaky test 00416_pocopatch_progress_in_http_headers --- ...0416_pocopatch_progress_in_http_headers.sh | 31 +++++++++++++++++-- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/00416_pocopatch_progress_in_http_headers.sh b/tests/queries/0_stateless/00416_pocopatch_progress_in_http_headers.sh index b2189ab0cc2..7e954db2c86 100755 --- a/tests/queries/0_stateless/00416_pocopatch_progress_in_http_headers.sh +++ b/tests/queries/0_stateless/00416_pocopatch_progress_in_http_headers.sh @@ -4,9 +4,28 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&max_block_size=5&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0" -d 'SELECT max(number) FROM numbers(10)' 2>&1 | grep -E 'Content-Encoding|X-ClickHouse-Progress|^[0-9]' +RETRIES=5 + +result="" +lines_expected=4 +counter=0 +while [ $counter -lt $RETRIES ] && [ $(echo "$result" | wc -l) != "$lines_expected" ]; do + result=$(${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&max_block_size=5&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0" -d 'SELECT max(number) FROM numbers(10)' 2>&1 | grep -E 'Content-Encoding|X-ClickHouse-Progress|^[0-9]') + let counter=counter+1 + # echo "$result" | wc -l +done +echo "$result" + +result="" +lines_expected=12 +counter=0 +while [ $counter -lt $RETRIES ] && [ $(echo "$result" | wc -l) != "$lines_expected" ]; do + result=$(${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&max_block_size=1&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0&output_format_parallel_formatting=0" -d 'SELECT number FROM numbers(10)' 2>&1 | grep -E 'Content-Encoding|X-ClickHouse-Progress|^[0-9]') + let counter=counter+1 + # echo "$result" | wc -l +done +echo "$result" -${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&max_block_size=1&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0&output_format_parallel_formatting=0" -d 'SELECT number FROM numbers(10)' 2>&1 | grep -E 'Content-Encoding|X-ClickHouse-Progress|^[0-9]' ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&max_block_size=1&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0&enable_http_compression=1" -H 'Accept-Encoding: gzip' -d 'SELECT number FROM system.numbers LIMIT 10' | gzip -d # 'send_progress_in_http_headers' is false by default @@ -26,7 +45,13 @@ ${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}" -H 'Accept-Encoding: gzip' -d 'DROP ${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}" -H 'Accept-Encoding: gzip' -d 'CREATE TABLE insert_number_query (record UInt32) Engine = Memory' > /dev/null 2>&1 ${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}" -H 'Accept-Encoding: gzip' -d 'CREATE TABLE insert_number_query_2 (record UInt32) Engine = Memory' > /dev/null 2>&1 -${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&max_block_size=1&http_headers_progress_interval_ms=0&send_progress_in_http_headers=1" -d 'INSERT INTO insert_number_query (record) SELECT number FROM system.numbers LIMIT 10' 2>&1 | grep -E 'Content-Encoding|X-ClickHouse-Summary|^[0-9]' +result="" +counter=0 +while [ $counter -lt $RETRIES ] && [ -z "$result" ]; do + result=$(${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&max_block_size=1&http_headers_progress_interval_ms=0&send_progress_in_http_headers=1" -d 'INSERT INTO insert_number_query (record) SELECT number FROM system.numbers LIMIT 10' 2>&1 | grep -E 'Content-Encoding|X-ClickHouse-Summary|^[0-9]') + let counter=counter+1 +done +echo "$result" ${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}" -H 'Accept-Encoding: gzip' -d 'DROP TABLE insert_number_query' > /dev/null 2>&1 ${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}" -H 'Accept-Encoding: gzip' -d 'DROP TABLE insert_number_query_2' > /dev/null 2>&1 From 58581ce5f6bdfe0df9135a95c0df14404af91e2a Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com> Date: Thu, 29 Jun 2023 02:37:09 +0200 Subject: [PATCH 093/230] Update 00416_pocopatch_progress_in_http_headers.sh --- .../0_stateless/00416_pocopatch_progress_in_http_headers.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/queries/0_stateless/00416_pocopatch_progress_in_http_headers.sh b/tests/queries/0_stateless/00416_pocopatch_progress_in_http_headers.sh index 7e954db2c86..ad7e89a7357 100755 --- a/tests/queries/0_stateless/00416_pocopatch_progress_in_http_headers.sh +++ b/tests/queries/0_stateless/00416_pocopatch_progress_in_http_headers.sh @@ -12,7 +12,6 @@ counter=0 while [ $counter -lt $RETRIES ] && [ $(echo "$result" | wc -l) != "$lines_expected" ]; do result=$(${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&max_block_size=5&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0" -d 'SELECT max(number) FROM numbers(10)' 2>&1 | grep -E 'Content-Encoding|X-ClickHouse-Progress|^[0-9]') let counter=counter+1 - # echo "$result" | wc -l done echo "$result" @@ -22,7 +21,6 @@ counter=0 while [ $counter -lt $RETRIES ] && [ $(echo "$result" | wc -l) != "$lines_expected" ]; do result=$(${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&max_block_size=1&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0&output_format_parallel_formatting=0" -d 'SELECT number FROM numbers(10)' 2>&1 | grep -E 'Content-Encoding|X-ClickHouse-Progress|^[0-9]') let counter=counter+1 - # echo "$result" | wc -l done echo "$result" From f1d695463717703d9c9f076b0e18972425b6bf46 Mon Sep 17 00:00:00 2001 From: vdimir Date: Thu, 29 Jun 2023 13:09:55 +0000 Subject: [PATCH 094/230] Fix logical error in ANTI join with NULL --- src/Interpreters/HashJoin.cpp | 3 +++ .../02771_semi_join_use_nulls.reference | 16 ++++++++++++++++ .../0_stateless/02771_semi_join_use_nulls.sql.j2 | 6 ++++++ 3 files changed, 25 insertions(+) diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index 6fe2b8464f5..7fee2ab7a6f 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -1383,6 +1383,9 @@ NO_INLINE IColumn::Filter joinRightColumns( { if (!right_row_found && null_element_found) { + if constexpr (join_features.is_anti_join && join_features.left) + setUsed(filter, i); + addNotFoundRow(added_columns, current_offset); if constexpr (join_features.need_replication) diff --git a/tests/queries/0_stateless/02771_semi_join_use_nulls.reference b/tests/queries/0_stateless/02771_semi_join_use_nulls.reference index 8d4b1a3a75e..91c0d964968 100644 --- a/tests/queries/0_stateless/02771_semi_join_use_nulls.reference +++ b/tests/queries/0_stateless/02771_semi_join_use_nulls.reference @@ -11,7 +11,9 @@ 0 0 0 0 0 1 +\N 0 0 1 +\N 0 0 0 0 0 0 \N @@ -25,7 +27,9 @@ 0 0 0 0 0 2 +\N 1 0 2 +\N 1 0 \N 0 0 0 \N @@ -39,7 +43,9 @@ 0 \N 0 \N 0 1 +\N \N 0 1 +\N \N 0 0 0 0 0 0 @@ -53,7 +59,9 @@ 0 0 0 0 \N 2 +\N 1 \N 2 +\N 1 0 0 0 0 0 0 @@ -67,7 +75,9 @@ 0 0 0 0 0 1 +\N 0 0 1 +\N 0 0 0 0 0 0 0 @@ -81,7 +91,9 @@ 0 0 0 0 1 2 +\N 1 1 2 +\N 1 0 \N 0 0 0 0 @@ -95,7 +107,9 @@ 0 \N 0 \N 0 1 +\N \N 0 1 +\N \N 0 0 0 0 0 0 @@ -109,4 +123,6 @@ 0 0 0 0 1 2 +\N 1 1 2 +\N 1 diff --git a/tests/queries/0_stateless/02771_semi_join_use_nulls.sql.j2 b/tests/queries/0_stateless/02771_semi_join_use_nulls.sql.j2 index 37b2e63761b..248461a98bb 100644 --- a/tests/queries/0_stateless/02771_semi_join_use_nulls.sql.j2 +++ b/tests/queries/0_stateless/02771_semi_join_use_nulls.sql.j2 @@ -44,6 +44,12 @@ SELECT id > 1, d.idd FROM (SELECT {{ maybe_materialize }}(toLowCardinality(0)) A ON a.id = d.idd ; +SELECT * +FROM (SELECT {{ maybe_materialize }}(NULL :: Nullable(UInt64)) AS id) AS a +{{ strictness }} {{ kind }} JOIN (SELECT {{ maybe_materialize }}(1 :: UInt32) AS id) AS d +ON a.id = d.id +; + {% endfor -%} {% endfor -%} {% endfor -%} From c43acc6f909d22dab3c3282fabb46c3c6d877080 Mon Sep 17 00:00:00 2001 From: vdimir Date: Thu, 29 Jun 2023 13:19:04 +0000 Subject: [PATCH 095/230] better fix --- src/Interpreters/HashJoin.cpp | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index 7fee2ab7a6f..3e4f2902359 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -1284,7 +1284,6 @@ NO_INLINE IColumn::Filter joinRightColumns( for (size_t i = 0; i < rows; ++i) { bool right_row_found = false; - bool null_element_found = false; KnownRowsHolder known_rows; for (size_t onexpr_idx = 0; onexpr_idx < added_columns.join_on_keys.size(); ++onexpr_idx) @@ -1293,10 +1292,7 @@ NO_INLINE IColumn::Filter joinRightColumns( if constexpr (has_null_map) { if (join_keys.null_map && (*join_keys.null_map)[i]) - { - null_element_found = true; continue; - } } bool row_acceptable = !join_keys.isRowFiltered(i); @@ -1379,23 +1375,6 @@ NO_INLINE IColumn::Filter joinRightColumns( } } - if constexpr (has_null_map) - { - if (!right_row_found && null_element_found) - { - if constexpr (join_features.is_anti_join && join_features.left) - setUsed(filter, i); - - addNotFoundRow(added_columns, current_offset); - - if constexpr (join_features.need_replication) - { - (*added_columns.offsets_to_replicate)[i] = current_offset; - } - continue; - } - } - if (!right_row_found) { if constexpr (join_features.is_anti_join && join_features.left) From 4581526af76848ee7370d685e96f9cc3c464df6c Mon Sep 17 00:00:00 2001 From: vdimir Date: Thu, 29 Jun 2023 13:24:55 +0000 Subject: [PATCH 096/230] Remove has_null_map template parameter from hash join --- src/Interpreters/HashJoin.cpp | 50 +++++++++-------------------------- 1 file changed, 12 insertions(+), 38 deletions(-) diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index 3e4f2902359..967e58f6d40 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -581,7 +581,7 @@ namespace }; - template + template size_t NO_INLINE insertFromBlockImplTypeCase( HashJoin & join, Map & map, size_t rows, const ColumnRawPtrs & key_columns, const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, UInt8ColumnDataPtr join_mask, Arena & pool, bool & is_inserted) @@ -600,7 +600,7 @@ namespace for (size_t i = 0; i < rows; ++i) { - if (has_null_map && (*null_map)[i]) + if (null_map && (*null_map)[i]) { /// nulls are not inserted into hash table, /// keep them for RIGHT and FULL joins @@ -622,21 +622,6 @@ namespace return map.getBufferSizeInCells(); } - - template - size_t insertFromBlockImplType( - HashJoin & join, Map & map, size_t rows, const ColumnRawPtrs & key_columns, - const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, UInt8ColumnDataPtr join_mask, Arena & pool, bool & is_inserted) - { - if (null_map) - return insertFromBlockImplTypeCase( - join, map, rows, key_columns, key_sizes, stored_block, null_map, join_mask, pool, is_inserted); - else - return insertFromBlockImplTypeCase( - join, map, rows, key_columns, key_sizes, stored_block, null_map, join_mask, pool, is_inserted); - } - - template size_t insertFromBlockImpl( HashJoin & join, HashJoin::Type type, Maps & maps, size_t rows, const ColumnRawPtrs & key_columns, @@ -653,7 +638,7 @@ namespace #define M(TYPE) \ case HashJoin::Type::TYPE: \ - return insertFromBlockImplType>::Type>(\ + return insertFromBlockImplTypeCase>::Type>(\ join, *maps.TYPE, rows, key_columns, key_sizes, stored_block, null_map, join_mask, pool, is_inserted); \ break; @@ -1260,7 +1245,7 @@ void setUsed(IColumn::Filter & filter [[maybe_unused]], size_t pos [[maybe_unuse /// Joins right table columns which indexes are present in right_indexes using specified map. /// Makes filter (1 if row presented in right table) and returns offsets to replicate (for ALL JOINS). -template +template NO_INLINE IColumn::Filter joinRightColumns( std::vector && key_getter_vector, const std::vector & mapv, @@ -1289,11 +1274,8 @@ NO_INLINE IColumn::Filter joinRightColumns( for (size_t onexpr_idx = 0; onexpr_idx < added_columns.join_on_keys.size(); ++onexpr_idx) { const auto & join_keys = added_columns.join_on_keys[onexpr_idx]; - if constexpr (has_null_map) - { - if (join_keys.null_map && (*join_keys.null_map)[i]) - continue; - } + if (join_keys.null_map && (*join_keys.null_map)[i]) + continue; bool row_acceptable = !join_keys.isRowFiltered(i); using FindResult = typename KeyGetter::FindResult; @@ -1392,7 +1374,7 @@ NO_INLINE IColumn::Filter joinRightColumns( return filter; } -template +template IColumn::Filter joinRightColumnsSwitchMultipleDisjuncts( std::vector && key_getter_vector, const std::vector & mapv, @@ -1400,8 +1382,8 @@ IColumn::Filter joinRightColumnsSwitchMultipleDisjuncts( JoinStuff::JoinUsedFlags & used_flags [[maybe_unused]]) { return mapv.size() > 1 - ? joinRightColumns(std::forward>(key_getter_vector), mapv, added_columns, used_flags) - : joinRightColumns(std::forward>(key_getter_vector), mapv, added_columns, used_flags); + ? joinRightColumns(std::forward>(key_getter_vector), mapv, added_columns, used_flags) + : joinRightColumns(std::forward>(key_getter_vector), mapv, added_columns, used_flags); } template @@ -1411,21 +1393,13 @@ IColumn::Filter joinRightColumnsSwitchNullability( AddedColumns & added_columns, JoinStuff::JoinUsedFlags & used_flags) { - bool has_null_map = std::any_of(added_columns.join_on_keys.begin(), added_columns.join_on_keys.end(), - [](const auto & k) { return k.null_map; }); if (added_columns.need_filter) { - if (has_null_map) - return joinRightColumnsSwitchMultipleDisjuncts(std::forward>(key_getter_vector), mapv, added_columns, used_flags); - else - return joinRightColumnsSwitchMultipleDisjuncts(std::forward>(key_getter_vector), mapv, added_columns, used_flags); + return joinRightColumnsSwitchMultipleDisjuncts(std::forward>(key_getter_vector), mapv, added_columns, used_flags); } else { - if (has_null_map) - return joinRightColumnsSwitchMultipleDisjuncts(std::forward>(key_getter_vector), mapv, added_columns, used_flags); - else - return joinRightColumnsSwitchMultipleDisjuncts(std::forward>(key_getter_vector), mapv, added_columns, used_flags); + return joinRightColumnsSwitchMultipleDisjuncts(std::forward>(key_getter_vector), mapv, added_columns, used_flags); } } @@ -1850,7 +1824,7 @@ struct AdderNonJoined /// Based on: /// - map offsetInternal saved in used_flags for single disjuncts /// - flags in BlockWithFlags for multiple disjuncts -template +template class NotJoinedHash final : public NotJoinedBlocks::RightColumnsFiller { public: From 49ab480d40f268df1e597dfe14426eb5416a5fd6 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 29 Jun 2023 23:09:58 +0300 Subject: [PATCH 097/230] Update 00416_pocopatch_progress_in_http_headers.sh --- .../0_stateless/00416_pocopatch_progress_in_http_headers.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/00416_pocopatch_progress_in_http_headers.sh b/tests/queries/0_stateless/00416_pocopatch_progress_in_http_headers.sh index ad7e89a7357..2b0cae3c1d4 100755 --- a/tests/queries/0_stateless/00416_pocopatch_progress_in_http_headers.sh +++ b/tests/queries/0_stateless/00416_pocopatch_progress_in_http_headers.sh @@ -9,7 +9,7 @@ RETRIES=5 result="" lines_expected=4 counter=0 -while [ $counter -lt $RETRIES ] && [ $(echo "$result" | wc -l) != "$lines_expected" ]; do +while [ $counter -lt $RETRIES ] && [ "$(echo "$result" | wc -l)" != "$lines_expected" ]; do result=$(${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&max_block_size=5&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0" -d 'SELECT max(number) FROM numbers(10)' 2>&1 | grep -E 'Content-Encoding|X-ClickHouse-Progress|^[0-9]') let counter=counter+1 done @@ -18,7 +18,7 @@ echo "$result" result="" lines_expected=12 counter=0 -while [ $counter -lt $RETRIES ] && [ $(echo "$result" | wc -l) != "$lines_expected" ]; do +while [ $counter -lt $RETRIES ] && [ "$(echo "$result" | wc -l)" != "$lines_expected" ]; do result=$(${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&max_block_size=1&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0&output_format_parallel_formatting=0" -d 'SELECT number FROM numbers(10)' 2>&1 | grep -E 'Content-Encoding|X-ClickHouse-Progress|^[0-9]') let counter=counter+1 done From 3c4491b706e0cbd89086db845eb582e1227f3a74 Mon Sep 17 00:00:00 2001 From: Manas Alekar Date: Thu, 29 Jun 2023 14:31:40 -0700 Subject: [PATCH 098/230] Ignore APPEND and TRUNCATE modifiers if file does not exist. --- src/Client/ClientBase.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 34b3b1e228a..a8bdc5d0b08 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -575,9 +575,11 @@ try } auto flags = O_WRONLY | O_EXCL; - if (query_with_output->is_outfile_append) + + auto file_exists = fs::exists(out_file); + if (file_exists && query_with_output->is_outfile_append) flags |= O_APPEND; - else if (query_with_output->is_outfile_truncate) + else if (file_exists && query_with_output->is_outfile_truncate) flags |= O_TRUNC; else flags |= O_CREAT; From 42febefa966e89089065ecb6c7691731de4dde5c Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Thu, 29 Jun 2023 21:34:40 +0000 Subject: [PATCH 099/230] Try to fix flaky 02210_processors_profile_log --- tests/queries/0_stateless/02210_processors_profile_log.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02210_processors_profile_log.sql b/tests/queries/0_stateless/02210_processors_profile_log.sql index 44e563ef57b..92f6ab94293 100644 --- a/tests/queries/0_stateless/02210_processors_profile_log.sql +++ b/tests/queries/0_stateless/02210_processors_profile_log.sql @@ -15,7 +15,7 @@ SELECT multiIf( -- ExpressionTransform executes sleep(), -- so IProcessor::work() will spend 1 sec. - name = 'ExpressionTransform', elapsed_us>1e6, + name = 'ExpressionTransform', elapsed_us>=1e6, -- SourceFromSingleChunk, that feed data to ExpressionTransform, -- will feed first block and then wait in PortFull. name = 'SourceFromSingleChunk', output_wait_elapsed_us>1e6, From a705b08bd81658e878d7b7d214b057c661bbed69 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Thu, 29 Jun 2023 22:30:15 +0000 Subject: [PATCH 100/230] Update reference --- .../0_stateless/02210_processors_profile_log.reference | 6 +++--- tests/queries/0_stateless/02210_processors_profile_log.sql | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/queries/0_stateless/02210_processors_profile_log.reference b/tests/queries/0_stateless/02210_processors_profile_log.reference index 181022d2421..41543d0706a 100644 --- a/tests/queries/0_stateless/02210_processors_profile_log.reference +++ b/tests/queries/0_stateless/02210_processors_profile_log.reference @@ -18,13 +18,13 @@ SELECT multiIf( -- ExpressionTransform executes sleep(), -- so IProcessor::work() will spend 1 sec. - name = 'ExpressionTransform', elapsed_us>1e6, + name = 'ExpressionTransform', elapsed_us>=1e6, -- SourceFromSingleChunk, that feed data to ExpressionTransform, -- will feed first block and then wait in PortFull. - name = 'SourceFromSingleChunk', output_wait_elapsed_us>1e6, + name = 'SourceFromSingleChunk', output_wait_elapsed_us>=1e6, -- NullSource/LazyOutputFormatLazyOutputFormat are the outputs -- so they cannot starts to execute before sleep(1) will be executed. - input_wait_elapsed_us>1e6) + input_wait_elapsed_us>=1e6) elapsed, input_rows, input_bytes, diff --git a/tests/queries/0_stateless/02210_processors_profile_log.sql b/tests/queries/0_stateless/02210_processors_profile_log.sql index 92f6ab94293..a15ed26fd67 100644 --- a/tests/queries/0_stateless/02210_processors_profile_log.sql +++ b/tests/queries/0_stateless/02210_processors_profile_log.sql @@ -18,10 +18,10 @@ SELECT name = 'ExpressionTransform', elapsed_us>=1e6, -- SourceFromSingleChunk, that feed data to ExpressionTransform, -- will feed first block and then wait in PortFull. - name = 'SourceFromSingleChunk', output_wait_elapsed_us>1e6, + name = 'SourceFromSingleChunk', output_wait_elapsed_us>=1e6, -- NullSource/LazyOutputFormatLazyOutputFormat are the outputs -- so they cannot starts to execute before sleep(1) will be executed. - input_wait_elapsed_us>1e6) + input_wait_elapsed_us>=1e6) elapsed, input_rows, input_bytes, From 8f240ffcce6230636de57fe8a8638df3a29ac5e3 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 30 Jun 2023 10:50:44 +0200 Subject: [PATCH 101/230] tests: fix 02050_client_profile_events flakiness Signed-off-by: Azat Khuzhin --- tests/queries/0_stateless/02050_client_profile_events.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02050_client_profile_events.sh b/tests/queries/0_stateless/02050_client_profile_events.sh index dce0c80525a..05e48de771d 100755 --- a/tests/queries/0_stateless/02050_client_profile_events.sh +++ b/tests/queries/0_stateless/02050_client_profile_events.sh @@ -25,7 +25,7 @@ profile_events="$($CLICKHOUSE_CLIENT --max_block_size 1 --print-profile-events - test "$profile_events" -gt 1 && echo OK || echo "FAIL ($profile_events)" echo 'print each 100 ms' -profile_events="$($CLICKHOUSE_CLIENT --max_block_size 1 --print-profile-events --profile-events-delay-ms=100 -q 'select sleep(1) from numbers(2) format Null' |& grep -c 'SelectedRows')" +profile_events="$($CLICKHOUSE_CLIENT --max_block_size 1 --print-profile-events --profile-events-delay-ms=100 -q 'select sleep(0.2) from numbers(10) format Null' |& grep -c 'SelectedRows')" test "$profile_events" -gt 1 && echo OK || echo "FAIL ($profile_events)" echo 'check that ProfileEvents is new for each query' From 9a35921d005be1e7b34493d34429fb9dbf306ef7 Mon Sep 17 00:00:00 2001 From: Manas Alekar Date: Fri, 30 Jun 2023 13:16:02 -0700 Subject: [PATCH 102/230] Add tests. --- tests/queries/0_stateless/00415_into_outfile.reference | 4 ++++ tests/queries/0_stateless/00415_into_outfile.sh | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/tests/queries/0_stateless/00415_into_outfile.reference b/tests/queries/0_stateless/00415_into_outfile.reference index a609e77a50a..4576a2d9d60 100644 --- a/tests/queries/0_stateless/00415_into_outfile.reference +++ b/tests/queries/0_stateless/00415_into_outfile.reference @@ -1,5 +1,9 @@ performing test: select 1 2 3 +performing test: select_with_append +1 2 3 +performing test: select_with_truncate +1 2 3 performing test: union_all 1 2 3 4 diff --git a/tests/queries/0_stateless/00415_into_outfile.sh b/tests/queries/0_stateless/00415_into_outfile.sh index 77dc96a48e6..d360a29fa5a 100755 --- a/tests/queries/0_stateless/00415_into_outfile.sh +++ b/tests/queries/0_stateless/00415_into_outfile.sh @@ -21,6 +21,10 @@ function perform() perform "select" "SELECT 1, 2, 3 INTO OUTFILE '${CLICKHOUSE_TMP}/test_into_outfile_select.out'" +perform "select_with_append" "SELECT 1, 2, 3 INTO OUTFILE '${CLICKHOUSE_TMP}/test_into_outfile_select_with_append.out' APPEND" + +perform "select_with_truncate" "SELECT 1, 2, 3 INTO OUTFILE '${CLICKHOUSE_TMP}/test_into_outfile_select_with_truncate.out' TRUNCATE" + perform "union_all" "SELECT 1, 2 UNION ALL SELECT 3, 4 INTO OUTFILE '${CLICKHOUSE_TMP}/test_into_outfile_union_all.out' FORMAT TSV" | sort --numeric-sort perform "bad_union_all" "SELECT 1, 2 INTO OUTFILE '${CLICKHOUSE_TMP}/test_into_outfile_bad_union_all.out' UNION ALL SELECT 3, 4" From 20d7cf2bf6e87f65ede724a2b12a406a1d50c20a Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 3 Jul 2023 14:51:19 +0200 Subject: [PATCH 103/230] Fix tests --- tests/integration/test_storage_dict/test.py | 5 ++++- tests/integration/test_storage_s3/test.py | 8 ++++---- .../test_storage_s3/test_invalid_env_credentials.py | 1 + 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/tests/integration/test_storage_dict/test.py b/tests/integration/test_storage_dict/test.py index 1ed974f267d..dd4ab5c8d2c 100644 --- a/tests/integration/test_storage_dict/test.py +++ b/tests/integration/test_storage_dict/test.py @@ -10,7 +10,10 @@ def cluster(): try: cluster = ClickHouseCluster(__file__) cluster.add_instance( - "node1", main_configs=["configs/conf.xml"], with_nginx=True + "node1", + main_configs=["configs/conf.xml"], + user_configs=["configs/users.xml"], + with_nginx=True, ) cluster.start() diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index cecc201945c..45437fefa79 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -944,7 +944,7 @@ def test_predefined_connection_configuration(started_cluster): f"CREATE TABLE {name} (id UInt32) ENGINE = S3(s3_conf1, format='CSV')" ) assert ( - "To execute this query it's necessary to have grant USE NAMED COLLECTION ON s3_conf1" + "To execute this query it's necessary to have grant NAMED COLLECTION ON s3_conf1" in error ) error = instance.query_and_get_error( @@ -952,7 +952,7 @@ def test_predefined_connection_configuration(started_cluster): user="user", ) assert ( - "To execute this query it's necessary to have grant USE NAMED COLLECTION ON s3_conf1" + "To execute this query it's necessary to have grant NAMED COLLECTION ON s3_conf1" in error ) @@ -973,12 +973,12 @@ def test_predefined_connection_configuration(started_cluster): error = instance.query_and_get_error("SELECT * FROM s3(no_collection)") assert ( - "To execute this query it's necessary to have grant USE NAMED COLLECTION ON no_collection" + "To execute this query it's necessary to have grant NAMED COLLECTION ON no_collection" in error ) error = instance.query_and_get_error("SELECT * FROM s3(no_collection)", user="user") assert ( - "To execute this query it's necessary to have grant USE NAMED COLLECTION ON no_collection" + "To execute this query it's necessary to have grant NAMED COLLECTION ON no_collection" in error ) instance = started_cluster.instances["dummy"] # has named collection access diff --git a/tests/integration/test_storage_s3/test_invalid_env_credentials.py b/tests/integration/test_storage_s3/test_invalid_env_credentials.py index 0ee679014b1..d91cb7d68f9 100644 --- a/tests/integration/test_storage_s3/test_invalid_env_credentials.py +++ b/tests/integration/test_storage_s3/test_invalid_env_credentials.py @@ -92,6 +92,7 @@ def started_cluster(): "configs/use_environment_credentials.xml", "configs/named_collections.xml", ], + user_configs=["configs/users.xml"], ) logging.info("Starting cluster...") From fd545deba071ffc9c6bde43683ecfbec533e4498 Mon Sep 17 00:00:00 2001 From: velavokr Date: Sun, 2 Jul 2023 17:51:43 +0300 Subject: [PATCH 104/230] added a warning on autocalculated parallelizm limits underutilizing CPU cores --- cmake/limit_jobs.cmake | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/cmake/limit_jobs.cmake b/cmake/limit_jobs.cmake index a8f105b8987..100ce921b19 100644 --- a/cmake/limit_jobs.cmake +++ b/cmake/limit_jobs.cmake @@ -18,6 +18,9 @@ if (NOT PARALLEL_COMPILE_JOBS AND TOTAL_PHYSICAL_MEMORY AND MAX_COMPILER_MEMORY) if (NOT PARALLEL_COMPILE_JOBS) set (PARALLEL_COMPILE_JOBS 1) endif () + if (NOT NUMBER_OF_LOGICAL_CORES OR PARALLEL_COMPILE_JOBS LESS NUMBER_OF_LOGICAL_CORES) + set (PARALLEL_COMPILE_JOBS_LESS TRUE) + endif() endif () if (PARALLEL_COMPILE_JOBS AND (NOT NUMBER_OF_LOGICAL_CORES OR PARALLEL_COMPILE_JOBS LESS NUMBER_OF_LOGICAL_CORES)) @@ -33,6 +36,9 @@ if (NOT PARALLEL_LINK_JOBS AND TOTAL_PHYSICAL_MEMORY AND MAX_LINKER_MEMORY) if (NOT PARALLEL_LINK_JOBS) set (PARALLEL_LINK_JOBS 1) endif () + if (NOT NUMBER_OF_LOGICAL_CORES OR PARALLEL_LINK_JOBS LESS NUMBER_OF_LOGICAL_CORES) + set (PARALLEL_LINK_JOBS_LESS TRUE) + endif() endif () # ThinLTO provides its own parallel linking @@ -56,4 +62,10 @@ if (PARALLEL_COMPILE_JOBS OR PARALLEL_LINK_JOBS) message(STATUS "${CMAKE_CURRENT_SOURCE_DIR}: Have ${TOTAL_PHYSICAL_MEMORY} megabytes of memory. Limiting concurrent linkers jobs to ${PARALLEL_LINK_JOBS} and compiler jobs to ${PARALLEL_COMPILE_JOBS} (system has ${NUMBER_OF_LOGICAL_CORES} logical cores)") + if (PARALLEL_COMPILE_JOBS_LESS) + message(WARNING "The autocalculated compile jobs limit (${PARALLEL_COMPILE_JOBS}) will underutilize CPU cores (${NUMBER_OF_LOGICAL_CORES}). Set PARALLEL_COMPILE_JOBS to override.") + endif() + if (PARALLEL_LINK_JOBS_LESS) + message(WARNING "The autocalculated link jobs limit (${PARALLEL_LINK_JOBS}) will underutilize CPU cores (${NUMBER_OF_LOGICAL_CORES}). Set PARALLEL_LINK_JOBS to override.") + endif() endif () From 711d8db6443c4a87dcb3b7a28df3265079717e54 Mon Sep 17 00:00:00 2001 From: velavokr Date: Sun, 2 Jul 2023 17:59:48 +0300 Subject: [PATCH 105/230] better wording --- cmake/limit_jobs.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/limit_jobs.cmake b/cmake/limit_jobs.cmake index 100ce921b19..3a33b3b9989 100644 --- a/cmake/limit_jobs.cmake +++ b/cmake/limit_jobs.cmake @@ -63,9 +63,9 @@ if (PARALLEL_COMPILE_JOBS OR PARALLEL_LINK_JOBS) "${CMAKE_CURRENT_SOURCE_DIR}: Have ${TOTAL_PHYSICAL_MEMORY} megabytes of memory. Limiting concurrent linkers jobs to ${PARALLEL_LINK_JOBS} and compiler jobs to ${PARALLEL_COMPILE_JOBS} (system has ${NUMBER_OF_LOGICAL_CORES} logical cores)") if (PARALLEL_COMPILE_JOBS_LESS) - message(WARNING "The autocalculated compile jobs limit (${PARALLEL_COMPILE_JOBS}) will underutilize CPU cores (${NUMBER_OF_LOGICAL_CORES}). Set PARALLEL_COMPILE_JOBS to override.") + message(WARNING "The autocalculated compile jobs limit (${PARALLEL_COMPILE_JOBS}) underutilizes CPU cores (${NUMBER_OF_LOGICAL_CORES}). Set PARALLEL_COMPILE_JOBS to override.") endif() if (PARALLEL_LINK_JOBS_LESS) - message(WARNING "The autocalculated link jobs limit (${PARALLEL_LINK_JOBS}) will underutilize CPU cores (${NUMBER_OF_LOGICAL_CORES}). Set PARALLEL_LINK_JOBS to override.") + message(WARNING "The autocalculated link jobs limit (${PARALLEL_LINK_JOBS}) underutilizes CPU cores (${NUMBER_OF_LOGICAL_CORES}). Set PARALLEL_LINK_JOBS to override.") endif() endif () From ccda3c3a6e25a8d9b2245631691e4fe892b21f5a Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Mon, 3 Jul 2023 15:03:40 +0000 Subject: [PATCH 106/230] Try to fix logical error #51703 --- src/Interpreters/GraceHashJoin.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/Interpreters/GraceHashJoin.cpp b/src/Interpreters/GraceHashJoin.cpp index 4218a8ea4e1..4bfe0315138 100644 --- a/src/Interpreters/GraceHashJoin.cpp +++ b/src/Interpreters/GraceHashJoin.cpp @@ -638,10 +638,9 @@ void GraceHashJoin::addJoinedBlockImpl(Block block) if (current_block.rows() > 0) { std::lock_guard lock(hash_join_mutex); - auto current_buckets = getCurrentBuckets(); - if (!isPowerOf2(current_buckets.size())) [[unlikely]] + if (!isPowerOf2(buckets_snapshot.size())) [[unlikely]] { - throw Exception(ErrorCodes::LOGICAL_ERROR, "Broken buckets. its size({}) is not power of 2", current_buckets.size()); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Broken buckets. its size({}) is not power of 2", buckets_snapshot.size()); } if (!hash_join) hash_join = makeInMemoryJoin(); @@ -654,7 +653,7 @@ void GraceHashJoin::addJoinedBlockImpl(Block block) current_block = {}; // Must use the latest buckets snapshot in case that it has been rehashed by other threads. - buckets_snapshot = rehashBuckets(current_buckets.size() * 2); + buckets_snapshot = rehashBuckets(buckets_snapshot.size() * 2); auto right_blocks = hash_join->releaseJoinedBlocks(/* restructure */ false); hash_join = nullptr; From 66227ce8d3faacd7a60a1cde9c96f55cb6c1b134 Mon Sep 17 00:00:00 2001 From: velavokr Date: Sun, 2 Jul 2023 15:20:59 +0300 Subject: [PATCH 107/230] #51292 added default_temporary_table_engine setting --- docs/en/operations/settings/settings.md | 34 ++++++++++++++++++ src/Core/Settings.h | 1 + src/Interpreters/InterpreterCreateQuery.cpp | 35 ++++++++----------- src/Interpreters/InterpreterCreateQuery.h | 2 +- .../02184_default_table_engine.reference | 1 + .../02184_default_table_engine.sql | 4 +++ 6 files changed, 56 insertions(+), 21 deletions(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index cff13302cdc..0d5072d5474 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -3201,6 +3201,40 @@ ENGINE = Log └──────────────────────────────────────────────────────────────────────────┘ ``` +## default_temporary_table_engine {#default_temporary_table_engine} + +Same as [default_temporary_table_engine](#default_temporary_table_engine) but for temporary tables. + +Default value: `Memory`. + +In this example, any new temporary table that does not specify an `Engine` will use the `Log` table engine: + +Query: + +```sql +SET default_temporary_table_engine = 'Log'; + +CREATE TEMPORARY TABLE my_table ( + x UInt32, + y UInt32 +); + +SHOW CREATE TEMPORARY TABLE my_table; +``` + +Result: + +```response +┌─statement────────────────────────────────────────────────────────────────┐ +│ CREATE TEMPORARY TABLE default.my_table +( + `x` UInt32, + `y` UInt32 +) +ENGINE = Log +└──────────────────────────────────────────────────────────────────────────┘ +``` + ## data_type_default_nullable {#data_type_default_nullable} Allows data types without explicit modifiers [NULL or NOT NULL](../../sql-reference/statements/create/table.md/#null-modifiers) in column definition will be [Nullable](../../sql-reference/data-types/nullable.md/#data_type-nullable). diff --git a/src/Core/Settings.h b/src/Core/Settings.h index b7d12a518c8..59373df3ece 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -517,6 +517,7 @@ class IColumn; M(Seconds, wait_for_window_view_fire_signal_timeout, 10, "Timeout for waiting for window view fire signal in event time processing", 0) \ M(UInt64, min_free_disk_space_for_temporary_data, 0, "The minimum disk space to keep while writing temporary data used in external sorting and aggregation.", 0) \ \ + M(DefaultTableEngine, default_temporary_table_engine, DefaultTableEngine::Memory, "Default table engine used when ENGINE is not set in CREATE TEMPORARY statement.",0) \ M(DefaultTableEngine, default_table_engine, DefaultTableEngine::None, "Default table engine used when ENGINE is not set in CREATE statement.",0) \ M(Bool, show_table_uuid_in_table_create_query_if_not_nil, false, "For tables in databases with Engine=Atomic show UUID of the table in its CREATE query.", 0) \ M(Bool, database_atomic_wait_for_drop_and_detach_synchronously, false, "When executing DROP or DETACH TABLE in Atomic database, wait for table data to be finally dropped or detached.", 0) \ diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index d0bb3dd389f..1419203b45b 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -911,14 +911,13 @@ String InterpreterCreateQuery::getTableEngineName(DefaultTableEngine default_tab } } -void InterpreterCreateQuery::setDefaultTableEngine(ASTStorage & storage, ContextPtr local_context) +void InterpreterCreateQuery::setDefaultTableEngine(ASTStorage & storage, DefaultTableEngine engine) { - if (local_context->getSettingsRef().default_table_engine.value == DefaultTableEngine::None) + if (engine == DefaultTableEngine::None) throw Exception(ErrorCodes::ENGINE_REQUIRED, "Table engine is not specified in CREATE query"); auto engine_ast = std::make_shared(); - auto default_table_engine = local_context->getSettingsRef().default_table_engine.value; - engine_ast->name = getTableEngineName(default_table_engine); + engine_ast->name = getTableEngineName(engine); engine_ast->no_empty_args = true; storage.set(storage.engine, engine_ast); } @@ -943,24 +942,20 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const if (!create.cluster.empty()) throw Exception(ErrorCodes::INCORRECT_QUERY, "Temporary tables cannot be created with ON CLUSTER clause"); - if (create.storage) + if (create.storage && create.storage->engine) { - if (create.storage->engine) - { - if (create.storage->engine->name.starts_with("Replicated") || create.storage->engine->name == "KeeperMap") - throw Exception(ErrorCodes::INCORRECT_QUERY, "Temporary tables cannot be created with Replicated or KeeperMap table engines"); - } - else - throw Exception(ErrorCodes::INCORRECT_QUERY, "Invalid storage definition for temporary table"); + if (create.storage->engine->name.starts_with("Replicated") || create.storage->engine->name == "KeeperMap") + throw Exception(ErrorCodes::INCORRECT_QUERY, "Temporary tables cannot be created with Replicated or KeeperMap table engines"); + return; } else { - auto engine_ast = std::make_shared(); - engine_ast->name = "Memory"; - engine_ast->no_empty_args = true; - auto storage_ast = std::make_shared(); - storage_ast->set(storage_ast->engine, engine_ast); - create.set(create.storage, storage_ast); + if (!create.storage) + { + auto storage_ast = std::make_shared(); + create.set(create.storage, storage_ast); + } + setDefaultTableEngine(*create.storage, getContext()->getSettingsRef().default_temporary_table_engine.value); } return; } @@ -969,7 +964,7 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const { /// Some part of storage definition (such as PARTITION BY) is specified, but ENGINE is not: just set default one. if (!create.storage->engine) - setDefaultTableEngine(*create.storage, getContext()); + setDefaultTableEngine(*create.storage, getContext()->getSettingsRef().default_table_engine.value); return; } @@ -1008,7 +1003,7 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const } create.set(create.storage, std::make_shared()); - setDefaultTableEngine(*create.storage, getContext()); + setDefaultTableEngine(*create.storage, getContext()->getSettingsRef().default_table_engine.value); } static void generateUUIDForTable(ASTCreateQuery & create) diff --git a/src/Interpreters/InterpreterCreateQuery.h b/src/Interpreters/InterpreterCreateQuery.h index a5fa6576091..09a582d6686 100644 --- a/src/Interpreters/InterpreterCreateQuery.h +++ b/src/Interpreters/InterpreterCreateQuery.h @@ -91,7 +91,7 @@ private: TableProperties getTablePropertiesAndNormalizeCreateQuery(ASTCreateQuery & create) const; void validateTableStructure(const ASTCreateQuery & create, const TableProperties & properties) const; static String getTableEngineName(DefaultTableEngine default_table_engine); - static void setDefaultTableEngine(ASTStorage & storage, ContextPtr local_context); + static void setDefaultTableEngine(ASTStorage & storage, DefaultTableEngine engine); void setEngine(ASTCreateQuery & create) const; AccessRightsElements getRequiredAccess() const; diff --git a/tests/queries/0_stateless/02184_default_table_engine.reference b/tests/queries/0_stateless/02184_default_table_engine.reference index 870dff90efa..495b9627acb 100644 --- a/tests/queries/0_stateless/02184_default_table_engine.reference +++ b/tests/queries/0_stateless/02184_default_table_engine.reference @@ -27,3 +27,4 @@ CREATE TABLE default.val2\n(\n `n` Int32\n) AS values(\'n int\', 1, 2) CREATE TABLE default.log\n(\n `n` Int32\n)\nENGINE = Log CREATE TABLE default.kek\n(\n `n` Int32\n)\nENGINE = Memory CREATE TABLE default.lol\n(\n `n` Int32\n)\nENGINE = MergeTree\nORDER BY n\nSETTINGS min_bytes_for_wide_part = 123, index_granularity = 8192 +CREATE TEMPORARY TABLE tmp_log\n(\n `n` Int32\n)\nENGINE = Log diff --git a/tests/queries/0_stateless/02184_default_table_engine.sql b/tests/queries/0_stateless/02184_default_table_engine.sql index 109875d53a5..68422f273b0 100644 --- a/tests/queries/0_stateless/02184_default_table_engine.sql +++ b/tests/queries/0_stateless/02184_default_table_engine.sql @@ -128,3 +128,7 @@ SHOW CREATE TABLE kek; SHOW CREATE TABLE lol; DROP TABLE kek; DROP TABLE lol; + +SET default_temporary_table_engine = 'Log'; +CREATE TEMPORARY TABLE tmp_log (n int); +SHOW CREATE TEMPORARY TABLE tmp_log; From 1e10bf5bdf50aac027f0824bad812676988a1eb3 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 3 Jul 2023 13:47:52 +0300 Subject: [PATCH 108/230] Update docs/en/operations/settings/settings.md --- docs/en/operations/settings/settings.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 0d5072d5474..5f6cf98646b 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -3203,7 +3203,7 @@ ENGINE = Log ## default_temporary_table_engine {#default_temporary_table_engine} -Same as [default_temporary_table_engine](#default_temporary_table_engine) but for temporary tables. +Same as [default_table_engine](#default_table_engine) but for temporary tables. Default value: `Memory`. From 2f85d048ae42f0b06658b2acd38271d041be057e Mon Sep 17 00:00:00 2001 From: velavokr Date: Mon, 3 Jul 2023 16:14:19 +0300 Subject: [PATCH 109/230] bugfix --- src/Interpreters/InterpreterCreateQuery.cpp | 65 ++++++--------------- src/Interpreters/InterpreterCreateQuery.h | 2 - 2 files changed, 19 insertions(+), 48 deletions(-) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 1419203b45b..72312a33b3d 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -881,45 +881,21 @@ void InterpreterCreateQuery::validateTableStructure(const ASTCreateQuery & creat } } -String InterpreterCreateQuery::getTableEngineName(DefaultTableEngine default_table_engine) -{ - switch (default_table_engine) - { - case DefaultTableEngine::Log: - return "Log"; - - case DefaultTableEngine::StripeLog: - return "StripeLog"; - - case DefaultTableEngine::MergeTree: - return "MergeTree"; - - case DefaultTableEngine::ReplacingMergeTree: - return "ReplacingMergeTree"; - - case DefaultTableEngine::ReplicatedMergeTree: - return "ReplicatedMergeTree"; - - case DefaultTableEngine::ReplicatedReplacingMergeTree: - return "ReplicatedReplacingMergeTree"; - - case DefaultTableEngine::Memory: - return "Memory"; - - default: - throw Exception(ErrorCodes::LOGICAL_ERROR, "default_table_engine is set to unknown value"); +namespace { + void checkTemporaryTableEngineName(const String& name) { + if (name.starts_with("Replicated") || name == "KeeperMap") + throw Exception(ErrorCodes::INCORRECT_QUERY, "Temporary tables cannot be created with Replicated or KeeperMap table engines"); } -} -void InterpreterCreateQuery::setDefaultTableEngine(ASTStorage & storage, DefaultTableEngine engine) -{ - if (engine == DefaultTableEngine::None) - throw Exception(ErrorCodes::ENGINE_REQUIRED, "Table engine is not specified in CREATE query"); + void setDefaultTableEngine(ASTStorage &storage, DefaultTableEngine engine) { + if (engine == DefaultTableEngine::None) + throw Exception(ErrorCodes::ENGINE_REQUIRED, "Table engine is not specified in CREATE query"); - auto engine_ast = std::make_shared(); - engine_ast->name = getTableEngineName(engine); - engine_ast->no_empty_args = true; - storage.set(storage.engine, engine_ast); + auto engine_ast = std::make_shared(); + engine_ast->name = SettingFieldDefaultTableEngine(engine).toString(); + engine_ast->no_empty_args = true; + storage.set(storage.engine, engine_ast); + } } void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const @@ -942,21 +918,18 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const if (!create.cluster.empty()) throw Exception(ErrorCodes::INCORRECT_QUERY, "Temporary tables cannot be created with ON CLUSTER clause"); - if (create.storage && create.storage->engine) + if (!create.storage) { - if (create.storage->engine->name.starts_with("Replicated") || create.storage->engine->name == "KeeperMap") - throw Exception(ErrorCodes::INCORRECT_QUERY, "Temporary tables cannot be created with Replicated or KeeperMap table engines"); - return; + auto storage_ast = std::make_shared(); + create.set(create.storage, storage_ast); } - else + + if (!create.storage->engine) { - if (!create.storage) - { - auto storage_ast = std::make_shared(); - create.set(create.storage, storage_ast); - } setDefaultTableEngine(*create.storage, getContext()->getSettingsRef().default_temporary_table_engine.value); } + + checkTemporaryTableEngineName(create.storage->engine->name); return; } diff --git a/src/Interpreters/InterpreterCreateQuery.h b/src/Interpreters/InterpreterCreateQuery.h index 09a582d6686..67339dea928 100644 --- a/src/Interpreters/InterpreterCreateQuery.h +++ b/src/Interpreters/InterpreterCreateQuery.h @@ -90,8 +90,6 @@ private: /// Calculate list of columns, constraints, indices, etc... of table. Rewrite query in canonical way. TableProperties getTablePropertiesAndNormalizeCreateQuery(ASTCreateQuery & create) const; void validateTableStructure(const ASTCreateQuery & create, const TableProperties & properties) const; - static String getTableEngineName(DefaultTableEngine default_table_engine); - static void setDefaultTableEngine(ASTStorage & storage, DefaultTableEngine engine); void setEngine(ASTCreateQuery & create) const; AccessRightsElements getRequiredAccess() const; From ae87d43f887376d19f2df3e197bc20ecefa7b012 Mon Sep 17 00:00:00 2001 From: velavokr Date: Mon, 3 Jul 2023 18:28:56 +0300 Subject: [PATCH 110/230] test fixes --- src/Interpreters/InterpreterCreateQuery.cpp | 9 ++++++--- tests/queries/0_stateless/02184_default_table_engine.sql | 4 ++-- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 72312a33b3d..dc95335d3ad 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -881,13 +881,16 @@ void InterpreterCreateQuery::validateTableStructure(const ASTCreateQuery & creat } } -namespace { - void checkTemporaryTableEngineName(const String& name) { +namespace +{ + void checkTemporaryTableEngineName(const String& name) + { if (name.starts_with("Replicated") || name == "KeeperMap") throw Exception(ErrorCodes::INCORRECT_QUERY, "Temporary tables cannot be created with Replicated or KeeperMap table engines"); } - void setDefaultTableEngine(ASTStorage &storage, DefaultTableEngine engine) { + void setDefaultTableEngine(ASTStorage &storage, DefaultTableEngine engine) + { if (engine == DefaultTableEngine::None) throw Exception(ErrorCodes::ENGINE_REQUIRED, "Table engine is not specified in CREATE query"); diff --git a/tests/queries/0_stateless/02184_default_table_engine.sql b/tests/queries/0_stateless/02184_default_table_engine.sql index 68422f273b0..a984ec1b6c9 100644 --- a/tests/queries/0_stateless/02184_default_table_engine.sql +++ b/tests/queries/0_stateless/02184_default_table_engine.sql @@ -83,8 +83,8 @@ CREATE TEMPORARY TABLE tmp (n int); SHOW CREATE TEMPORARY TABLE tmp; CREATE TEMPORARY TABLE tmp1 (n int) ENGINE=Memory; CREATE TEMPORARY TABLE tmp2 (n int) ENGINE=Log; -CREATE TEMPORARY TABLE tmp2 (n int) ORDER BY n; -- {serverError 80} -CREATE TEMPORARY TABLE tmp2 (n int, PRIMARY KEY (n)); -- {serverError 80} +CREATE TEMPORARY TABLE tmp2 (n int) ORDER BY n; -- {serverError 36} +CREATE TEMPORARY TABLE tmp2 (n int, PRIMARY KEY (n)); -- {serverError 36} CREATE TABLE log (n int); SHOW CREATE log; From dcc0076ded42792fd41c7f83bca9ff3e5ce0ed4b Mon Sep 17 00:00:00 2001 From: velavokr Date: Mon, 3 Jul 2023 19:01:35 +0300 Subject: [PATCH 111/230] fixed comment --- src/Interpreters/InterpreterCreateQuery.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index dc95335d3ad..55d2449f739 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -914,9 +914,7 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const if (create.temporary) { - /// It's possible if some part of storage definition (such as PARTITION BY) is specified, but ENGINE is not. - /// It makes sense when default_table_engine setting is used, but not for temporary tables. - /// For temporary tables we ignore this setting to allow CREATE TEMPORARY TABLE query without specifying ENGINE + /// Some part of storage definition is specified, but ENGINE is not: just set the one from default_temporary_table_engine setting. if (!create.cluster.empty()) throw Exception(ErrorCodes::INCORRECT_QUERY, "Temporary tables cannot be created with ON CLUSTER clause"); From 6742432fd2801380350df489dd882ab538598a7e Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Mon, 3 Jul 2023 21:58:56 +0000 Subject: [PATCH 112/230] Number of bucket always increased by 2, so there is no reason provide it as parameter --- src/Interpreters/GraceHashJoin.cpp | 20 +++++++++----------- src/Interpreters/GraceHashJoin.h | 2 +- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/src/Interpreters/GraceHashJoin.cpp b/src/Interpreters/GraceHashJoin.cpp index 4bfe0315138..f5b2386fd1e 100644 --- a/src/Interpreters/GraceHashJoin.cpp +++ b/src/Interpreters/GraceHashJoin.cpp @@ -356,16 +356,16 @@ bool GraceHashJoin::hasMemoryOverflow(const InMemoryJoinPtr & hash_join_) const return hasMemoryOverflow(total_rows, total_bytes); } -GraceHashJoin::Buckets GraceHashJoin::rehashBuckets(size_t to_size) +GraceHashJoin::Buckets GraceHashJoin::rehashBuckets() { std::unique_lock lock(rehash_mutex); + + if (!isPowerOf2(buckets.size())) [[unlikely]] + throw Exception(ErrorCodes::LOGICAL_ERROR, "Number of buckets should be power of 2 but it's {}", buckets.size()); + + const size_t to_size = buckets.size() * 2; size_t current_size = buckets.size(); - if (to_size <= current_size) - return buckets; - - chassert(isPowerOf2(to_size)); - if (to_size > max_num_buckets) { throw Exception(ErrorCodes::LIMIT_EXCEEDED, @@ -623,6 +623,8 @@ Block GraceHashJoin::prepareRightBlock(const Block & block) void GraceHashJoin::addJoinedBlockImpl(Block block) { + LOG_ERROR(&Poco::Logger::get(__PRETTY_FUNCTION__), ""); + block = prepareRightBlock(block); Buckets buckets_snapshot = getCurrentBuckets(); size_t bucket_index = current_bucket->idx; @@ -638,10 +640,6 @@ void GraceHashJoin::addJoinedBlockImpl(Block block) if (current_block.rows() > 0) { std::lock_guard lock(hash_join_mutex); - if (!isPowerOf2(buckets_snapshot.size())) [[unlikely]] - { - throw Exception(ErrorCodes::LOGICAL_ERROR, "Broken buckets. its size({}) is not power of 2", buckets_snapshot.size()); - } if (!hash_join) hash_join = makeInMemoryJoin(); @@ -653,7 +651,7 @@ void GraceHashJoin::addJoinedBlockImpl(Block block) current_block = {}; // Must use the latest buckets snapshot in case that it has been rehashed by other threads. - buckets_snapshot = rehashBuckets(buckets_snapshot.size() * 2); + buckets_snapshot = rehashBuckets(); auto right_blocks = hash_join->releaseJoinedBlocks(/* restructure */ false); hash_join = nullptr; diff --git a/src/Interpreters/GraceHashJoin.h b/src/Interpreters/GraceHashJoin.h index b8d83f4cad0..fd3397ba15e 100644 --- a/src/Interpreters/GraceHashJoin.h +++ b/src/Interpreters/GraceHashJoin.h @@ -109,7 +109,7 @@ private: /// /// NB: after @rehashBuckets there may be rows that are written to the buckets that they do not belong to. /// It is fine; these rows will be written to the corresponding buckets during the third stage. - Buckets rehashBuckets(size_t to_size); + Buckets rehashBuckets(); /// Perform some bookkeeping after all calls to @joinBlock. void startReadingDelayedBlocks(); From 87a2c44778db4dce5a425c7f618009e5652a6bef Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 4 Jul 2023 00:43:25 +0200 Subject: [PATCH 113/230] Fix test 02789_object_type_invalid_num_of_rows --- .../0_stateless/02789_object_type_invalid_num_of_rows.reference | 1 - .../0_stateless/02789_object_type_invalid_num_of_rows.sql | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference b/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference index 7dec35f7acb..e69de29bb2d 100644 --- a/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference +++ b/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference @@ -1 +0,0 @@ -0.02 diff --git a/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.sql b/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.sql index a9c8a844aa0..d0fc6905593 100644 --- a/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.sql +++ b/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.sql @@ -1,2 +1,2 @@ set allow_experimental_object_type=1; -SELECT '0.02' GROUP BY GROUPING SETS (('6553.6'), (CAST('{"x" : 1}', 'Object(\'json\')'))) settings max_threads=1; -- { serverError NOT_IMPLEMENTED } +SELECT '0.02' GROUP BY GROUPING SETS (('6553.6'), (CAST('{"x" : 1}', 'Object(\'json\')'))) FORMAT Null; -- { serverError NOT_IMPLEMENTED } From be1353fe334cf90d5534036e306dc424cbf26773 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 4 Jul 2023 00:47:31 +0200 Subject: [PATCH 114/230] Revert "Merge pull request #51750 from ClickHouse/revert-51296-object_column_invalid_num_of_rows" This reverts commit 73d956249eaf5076691af80edac689d466a1f0fd, reversing changes made to 4d76b538c7838c55105746bfdd99db1e01e1fd91. --- .../0_stateless/02789_object_type_invalid_num_of_rows.reference | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference b/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference index e69de29bb2d..7dec35f7acb 100644 --- a/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference +++ b/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference @@ -0,0 +1 @@ +0.02 From c65ee49a37773a2034c4e9a439ba6ebaf1820955 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 4 Jul 2023 00:49:39 +0200 Subject: [PATCH 115/230] Revert "Merge pull request #51750 from ClickHouse/revert-51296-object_column_invalid_num_of_rows" This reverts commit 73d956249eaf5076691af80edac689d466a1f0fd, reversing changes made to 4d76b538c7838c55105746bfdd99db1e01e1fd91. --- src/Processors/QueryPlan/AggregatingStep.cpp | 2 ++ .../0_stateless/02789_object_type_invalid_num_of_rows.reference | 1 + 2 files changed, 3 insertions(+) create mode 100644 tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference diff --git a/src/Processors/QueryPlan/AggregatingStep.cpp b/src/Processors/QueryPlan/AggregatingStep.cpp index 4ac972e2a79..eebbfc04304 100644 --- a/src/Processors/QueryPlan/AggregatingStep.cpp +++ b/src/Processors/QueryPlan/AggregatingStep.cpp @@ -319,6 +319,8 @@ void AggregatingStep::transformPipeline(QueryPipelineBuilder & pipeline, const B { auto column_with_default = col.column->cloneEmpty(); col.type->insertDefaultInto(*column_with_default); + column_with_default->finalize(); + auto column = ColumnConst::create(std::move(column_with_default), 0); const auto * node = &dag->addColumn({ColumnPtr(std::move(column)), col.type, col.name}); node = &dag->materializeNode(*node); diff --git a/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference b/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference new file mode 100644 index 00000000000..7dec35f7acb --- /dev/null +++ b/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference @@ -0,0 +1 @@ +0.02 From c25da7cbf4432a8b49155902dd8e5f23929fa844 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 4 Jul 2023 01:03:49 +0200 Subject: [PATCH 116/230] Fix (benign) data rance in `transform` --- src/Columns/ColumnVector.h | 4 ++-- src/Functions/transform.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Columns/ColumnVector.h b/src/Columns/ColumnVector.h index bf790423d1d..b8ebff2a5d5 100644 --- a/src/Columns/ColumnVector.h +++ b/src/Columns/ColumnVector.h @@ -107,8 +107,8 @@ struct FloatCompareHelper } }; -template struct CompareHelper : public FloatCompareHelper {}; -template struct CompareHelper : public FloatCompareHelper {}; +template struct CompareHelper : public FloatCompareHelper {}; +template struct CompareHelper : public FloatCompareHelper {}; /** A template for columns that use a simple array to store. diff --git a/src/Functions/transform.cpp b/src/Functions/transform.cpp index 8d6e53c491e..1fc0e3adf96 100644 --- a/src/Functions/transform.cpp +++ b/src/Functions/transform.cpp @@ -698,6 +698,8 @@ namespace const DataTypePtr & from_type = arguments[0].type; + std::lock_guard lock(cache.mutex); + if (from_type->onlyNull()) { cache.is_empty = true; @@ -711,8 +713,6 @@ namespace throw Exception( ErrorCodes::ILLEGAL_COLUMN, "Second and third arguments of function {} must be constant arrays.", getName()); - std::lock_guard lock(cache.mutex); - const ColumnPtr & from_column_uncasted = array_from->getDataPtr(); cache.from_column = castColumn( From f86c5edfc465717a5344a8b71e140f0ceaa9ba47 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 4 Jul 2023 07:54:13 +0000 Subject: [PATCH 117/230] Remove debug tracing --- src/Interpreters/GraceHashJoin.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Interpreters/GraceHashJoin.cpp b/src/Interpreters/GraceHashJoin.cpp index f5b2386fd1e..8acdb4e90dd 100644 --- a/src/Interpreters/GraceHashJoin.cpp +++ b/src/Interpreters/GraceHashJoin.cpp @@ -623,8 +623,6 @@ Block GraceHashJoin::prepareRightBlock(const Block & block) void GraceHashJoin::addJoinedBlockImpl(Block block) { - LOG_ERROR(&Poco::Logger::get(__PRETTY_FUNCTION__), ""); - block = prepareRightBlock(block); Buckets buckets_snapshot = getCurrentBuckets(); size_t bucket_index = current_bucket->idx; From 1b71bbf1b119c937a176ff63a4ffaeb660d96038 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 4 Jul 2023 08:27:48 +0000 Subject: [PATCH 118/230] Add test for ZK disconnect --- tests/integration/test_keeper_map/test.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/tests/integration/test_keeper_map/test.py b/tests/integration/test_keeper_map/test.py index c6ec7103056..fbae875d2e6 100644 --- a/tests/integration/test_keeper_map/test.py +++ b/tests/integration/test_keeper_map/test.py @@ -1,4 +1,5 @@ import pytest +import time from helpers.cluster import ClickHouseCluster from helpers.network import PartitionManager @@ -39,9 +40,18 @@ def remove_children(client, path): def test_keeper_map_without_zk(started_cluster): + def wait_disconnect_from_zk(): + for _ in range(20): + if len(node.query_and_get_answer_with_error("SELECT * FROM system.zookeeper WHERE path='/'")[1]) != 0: + break + time.sleep(1) + else: + assert False, "ClickHouse didn't disconnect from ZK after DROP rule was added" + def assert_keeper_exception_after_partition(query): with PartitionManager() as pm: pm.drop_instance_zk_connections(node) + wait_disconnect_from_zk() error = node.query_and_get_error(query) assert "Coordination::Exception" in error @@ -49,17 +59,17 @@ def test_keeper_map_without_zk(started_cluster): "CREATE TABLE test_keeper_map_without_zk (key UInt64, value UInt64) ENGINE = KeeperMap('/test_without_zk') PRIMARY KEY(key);" ) - node.query( + node.query_with_retry( "CREATE TABLE test_keeper_map_without_zk (key UInt64, value UInt64) ENGINE = KeeperMap('/test_without_zk') PRIMARY KEY(key);" ) assert_keeper_exception_after_partition( "INSERT INTO test_keeper_map_without_zk VALUES (1, 11)" ) - node.query("INSERT INTO test_keeper_map_without_zk VALUES (1, 11)") + node.query_with_retry("INSERT INTO test_keeper_map_without_zk VALUES (1, 11)") assert_keeper_exception_after_partition("SELECT * FROM test_keeper_map_without_zk") - node.query("SELECT * FROM test_keeper_map_without_zk") + node.query_with_retry("SELECT * FROM test_keeper_map_without_zk") with PartitionManager() as pm: pm.drop_instance_zk_connections(node) @@ -67,7 +77,7 @@ def test_keeper_map_without_zk(started_cluster): error = node.query_and_get_error("SELECT * FROM test_keeper_map_without_zk") assert "Failed to activate table because of connection issues" in error - node.query("SELECT * FROM test_keeper_map_without_zk") + node.query_with_retry("SELECT * FROM test_keeper_map_without_zk") client = get_genuine_zk() remove_children(client, "/test_keeper_map/test_without_zk") From 30be0ab4a8af7247a12c94076cd17834f712b9d9 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 4 Jul 2023 09:00:53 +0000 Subject: [PATCH 119/230] Fix: unexpected number of buckets Number of buckets could become inconsistent if exception was thrown during new buckets creation --- src/Interpreters/GraceHashJoin.cpp | 63 ++++++++++++++++++------------ src/Interpreters/GraceHashJoin.h | 5 ++- 2 files changed, 40 insertions(+), 28 deletions(-) diff --git a/src/Interpreters/GraceHashJoin.cpp b/src/Interpreters/GraceHashJoin.cpp index 8acdb4e90dd..aa7091548d7 100644 --- a/src/Interpreters/GraceHashJoin.cpp +++ b/src/Interpreters/GraceHashJoin.cpp @@ -288,10 +288,7 @@ void GraceHashJoin::initBuckets() size_t initial_num_buckets = roundUpToPowerOfTwoOrZero(std::clamp(settings.grace_hash_join_initial_buckets, 1, settings.grace_hash_join_max_buckets)); - for (size_t i = 0; i < initial_num_buckets; ++i) - { - addBucket(buckets); - } + addBuckets(initial_num_buckets); if (buckets.empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "No buckets created"); @@ -368,40 +365,54 @@ GraceHashJoin::Buckets GraceHashJoin::rehashBuckets() if (to_size > max_num_buckets) { - throw Exception(ErrorCodes::LIMIT_EXCEEDED, + throw Exception( + ErrorCodes::LIMIT_EXCEEDED, "Too many grace hash join buckets ({} > {}), " "consider increasing grace_hash_join_max_buckets or max_rows_in_join/max_bytes_in_join", - to_size, max_num_buckets); + to_size, + max_num_buckets); } LOG_TRACE(log, "Rehashing from {} to {}", current_size, to_size); - buckets.reserve(to_size); - for (size_t i = current_size; i < to_size; ++i) - addBucket(buckets); + addBuckets(to_size - current_size); return buckets; } -void GraceHashJoin::addBucket(Buckets & destination) +void GraceHashJoin::addBuckets(const size_t bucket_count) { - // There could be exceptions from createStream, In ci tests - // there is a certain probability of failure in allocating memory, see memory_tracker_fault_probability. - // It may terminate this thread and leave a broken hash_join, and another thread cores when it tries to - // use the broken hash_join. So we print an exception message here to help debug. - try - { - auto & left_file = tmp_data->createStream(left_sample_block); - auto & right_file = tmp_data->createStream(prepareRightBlock(right_sample_block)); + // Exception can be thrown in number of cases: + // - during creation of temporary files for buckets + // - in CI tests, there is a certain probability of failure in allocating memory, see memory_tracker_fault_probability + // Therefore, new buckets are added only after all of them created successfully, + // otherwise we can end up having unexpected number of buckets - BucketPtr new_bucket = std::make_shared(destination.size(), left_file, right_file, log); - destination.emplace_back(std::move(new_bucket)); - } - catch (...) - { - LOG_ERROR(&Poco::Logger::get("GraceHashJoin"), "Can't create bucket. current buckets size: {}", destination.size()); - throw; - } + const size_t current_size = buckets.size(); + Buckets tmp_buckets; + tmp_buckets.reserve(bucket_count); + for (size_t i = 0; i < bucket_count; ++i) + try + { + auto & left_file = tmp_data->createStream(left_sample_block); + auto & right_file = tmp_data->createStream(prepareRightBlock(right_sample_block)); + + BucketPtr new_bucket = std::make_shared(current_size + i, left_file, right_file, log); + tmp_buckets.emplace_back(std::move(new_bucket)); + } + catch (...) + { + LOG_ERROR( + &Poco::Logger::get("GraceHashJoin"), + "Can't create bucket {} due to error: {}", + current_size + i, + getCurrentExceptionMessage(false)); + throw; + } + + buckets.reserve(buckets.size() + bucket_count); + for(auto & bucket : tmp_buckets) + buckets.emplace_back(std::move(bucket)); } void GraceHashJoin::checkTypesOfKeys(const Block & block) const diff --git a/src/Interpreters/GraceHashJoin.h b/src/Interpreters/GraceHashJoin.h index fd3397ba15e..78ba70bc764 100644 --- a/src/Interpreters/GraceHashJoin.h +++ b/src/Interpreters/GraceHashJoin.h @@ -101,8 +101,9 @@ private: bool hasMemoryOverflow(const InMemoryJoinPtr & hash_join_) const; bool hasMemoryOverflow(const BlocksList & blocks) const; - /// Create new bucket at the end of @destination. - void addBucket(Buckets & destination); + /// Add bucket_count new buckets + /// Throws if a bucket creation fails + void addBuckets(size_t bucket_count); /// Increase number of buckets to match desired_size. /// Called when HashJoin in-memory table for one bucket exceeds the limits. From 57ada39ccf1615910afb48152a30ba7410152a36 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 4 Jul 2023 09:31:15 +0000 Subject: [PATCH 120/230] Add retries and iptables rules dump --- tests/integration/helpers/cluster.py | 7 +-- tests/integration/helpers/network.py | 11 +++- tests/integration/test_keeper_map/test.py | 66 ++++++++++++++--------- 3 files changed, 54 insertions(+), 30 deletions(-) diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 21398790be3..3db0ad12295 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -3416,13 +3416,14 @@ class ClickHouseInstance: database=database, ) time.sleep(sleep_time) + + if result is not None: + return result except QueryRuntimeException as ex: logging.debug("Retry {} got exception {}".format(i + 1, ex)) time.sleep(sleep_time) - if result is not None: - return result - raise Exception("Query {sql} did not fail".format(sql)) + raise Exception("Query {} did not fail".format(sql)) # The same as query_and_get_error but ignores successful query. def query_and_get_answer_with_error( diff --git a/tests/integration/helpers/network.py b/tests/integration/helpers/network.py index 2df560708e0..60b46926589 100644 --- a/tests/integration/helpers/network.py +++ b/tests/integration/helpers/network.py @@ -32,6 +32,9 @@ class PartitionManager: {"destination": instance.ip_address, "source_port": 2181, "action": action} ) + def dump_rules(self): + return _NetworkManager.get().dump_rules() + def restore_instance_zk_connections(self, instance, action="DROP"): self._check_instance(instance) @@ -157,6 +160,10 @@ class _NetworkManager: cmd.extend(self._iptables_cmd_suffix(**kwargs)) self._exec_run(cmd, privileged=True) + def dump_rules(self): + cmd = ["iptables", "-L", "DOCKER-USER"] + return self._exec_run(cmd, privileged=True) + @staticmethod def clean_all_user_iptables_rules(): for i in range(1000): @@ -212,8 +219,8 @@ class _NetworkManager: def __init__( self, - container_expire_timeout=50, - container_exit_timeout=60, + container_expire_timeout=120, + container_exit_timeout=120, docker_api_version=os.environ.get("DOCKER_API_VERSION"), ): self.container_expire_timeout = container_expire_timeout diff --git a/tests/integration/test_keeper_map/test.py b/tests/integration/test_keeper_map/test.py index fbae875d2e6..d7b4230d872 100644 --- a/tests/integration/test_keeper_map/test.py +++ b/tests/integration/test_keeper_map/test.py @@ -1,8 +1,7 @@ import pytest -import time from helpers.cluster import ClickHouseCluster -from helpers.network import PartitionManager +from helpers.network import PartitionManager, _NetworkManager test_recover_staled_replica_run = 1 @@ -39,50 +38,67 @@ def remove_children(client, path): client.delete(child_path) -def test_keeper_map_without_zk(started_cluster): - def wait_disconnect_from_zk(): - for _ in range(20): - if len(node.query_and_get_answer_with_error("SELECT * FROM system.zookeeper WHERE path='/'")[1]) != 0: - break - time.sleep(1) - else: - assert False, "ClickHouse didn't disconnect from ZK after DROP rule was added" +def print_iptables_rules(): + print(f"iptables rules: {_NetworkManager.get().dump_rules()}") - def assert_keeper_exception_after_partition(query): - with PartitionManager() as pm: - pm.drop_instance_zk_connections(node) - wait_disconnect_from_zk() - error = node.query_and_get_error(query) + +def assert_keeper_exception_after_partition(query): + with PartitionManager() as pm: + pm.drop_instance_zk_connections(node) + try: + error = node.query_and_get_error_with_retry(query, sleep_time=1) assert "Coordination::Exception" in error + except: + print_iptables_rules() + raise + +def run_query(query): + try: + result = node.query_with_retry(query, sleep_time=1) + return result + except: + print_iptables_rules() + raise + + +def test_keeper_map_without_zk(started_cluster): assert_keeper_exception_after_partition( - "CREATE TABLE test_keeper_map_without_zk (key UInt64, value UInt64) ENGINE = KeeperMap('/test_without_zk') PRIMARY KEY(key);" + "CREATE TABLE test_keeper_map_without_zk (key UInt64, value UInt64) ENGINE = KeeperMap('/test_keeper_map_without_zk') PRIMARY KEY(key);" ) - node.query_with_retry( - "CREATE TABLE test_keeper_map_without_zk (key UInt64, value UInt64) ENGINE = KeeperMap('/test_without_zk') PRIMARY KEY(key);" + run_query( + "CREATE TABLE test_keeper_map_without_zk (key UInt64, value UInt64) ENGINE = KeeperMap('/test_keeper_map_without_zk') PRIMARY KEY(key);" ) assert_keeper_exception_after_partition( "INSERT INTO test_keeper_map_without_zk VALUES (1, 11)" ) - node.query_with_retry("INSERT INTO test_keeper_map_without_zk VALUES (1, 11)") + run_query("INSERT INTO test_keeper_map_without_zk VALUES (1, 11)") assert_keeper_exception_after_partition("SELECT * FROM test_keeper_map_without_zk") - node.query_with_retry("SELECT * FROM test_keeper_map_without_zk") + assert run_query("SELECT * FROM test_keeper_map_without_zk") == "1\t11\n" with PartitionManager() as pm: pm.drop_instance_zk_connections(node) node.restart_clickhouse(60) - error = node.query_and_get_error("SELECT * FROM test_keeper_map_without_zk") - assert "Failed to activate table because of connection issues" in error + try: + error = node.query_and_get_error_with_retry( + "SELECT * FROM test_keeper_map_without_zk", sleep_time=1 + ) + assert "Failed to activate table because of connection issues" in error + except: + print_iptables_rules() + raise - node.query_with_retry("SELECT * FROM test_keeper_map_without_zk") + run_query("SELECT * FROM test_keeper_map_without_zk") client = get_genuine_zk() - remove_children(client, "/test_keeper_map/test_without_zk") + remove_children(client, "/test_keeper_map/test_keeper_map_without_zk") node.restart_clickhouse(60) - error = node.query_and_get_error("SELECT * FROM test_keeper_map_without_zk") + error = node.query_and_get_error_with_retry( + "SELECT * FROM test_keeper_map_without_zk" + ) assert "Failed to activate table because of invalid metadata in ZooKeeper" in error node.query("DETACH TABLE test_keeper_map_without_zk") From 6bca452924bf4ceecbd4106acbfc99bc49276012 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 4 Jul 2023 12:42:27 +0200 Subject: [PATCH 121/230] Fix tests --- src/Access/tests/gtest_access_rights_ops.cpp | 2 +- .../test_s3_table_functions/configs/users.d/users.xml | 9 +++++++++ tests/integration/test_s3_table_functions/test.py | 9 ++++++--- 3 files changed, 16 insertions(+), 4 deletions(-) create mode 100644 tests/integration/test_s3_table_functions/configs/users.d/users.xml diff --git a/src/Access/tests/gtest_access_rights_ops.cpp b/src/Access/tests/gtest_access_rights_ops.cpp index 5f1f13ca5a2..c2e9501f58c 100644 --- a/src/Access/tests/gtest_access_rights_ops.cpp +++ b/src/Access/tests/gtest_access_rights_ops.cpp @@ -53,7 +53,7 @@ TEST(AccessRights, Union) "SHOW ROW POLICIES, SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, " "SYSTEM MOVES, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, " "SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, SYSTEM RESTART REPLICA, " - "SYSTEM RESTORE REPLICA, SYSTEM WAIT LOADING PARTS, SYSTEM SYNC DATABASE REPLICA, SYSTEM FLUSH DISTRIBUTED, dictGet ON db1.*, GRANT NAMED COLLECTION CONTROL ON db1"); + "SYSTEM RESTORE REPLICA, SYSTEM WAIT LOADING PARTS, SYSTEM SYNC DATABASE REPLICA, SYSTEM FLUSH DISTRIBUTED, dictGet ON db1.*, GRANT NAMED COLLECTION ADMIN ON db1"); } diff --git a/tests/integration/test_s3_table_functions/configs/users.d/users.xml b/tests/integration/test_s3_table_functions/configs/users.d/users.xml new file mode 100644 index 00000000000..4b6ba057ecb --- /dev/null +++ b/tests/integration/test_s3_table_functions/configs/users.d/users.xml @@ -0,0 +1,9 @@ + + + + + default + 1 + + + diff --git a/tests/integration/test_s3_table_functions/test.py b/tests/integration/test_s3_table_functions/test.py index 516d6582990..a6def175136 100644 --- a/tests/integration/test_s3_table_functions/test.py +++ b/tests/integration/test_s3_table_functions/test.py @@ -11,6 +11,9 @@ node = cluster.add_instance( main_configs=[ "configs/config.d/minio.xml", ], + user_configs=[ + "configs/users.d/users.xml", + ], with_minio=True, ) @@ -44,7 +47,7 @@ def test_s3_table_functions(started_cluster): """ INSERT INTO FUNCTION s3 ( - nc_s3, + nc_s3, filename = 'test_file.tsv.gz', format = 'TSV', structure = 'number UInt64', @@ -60,7 +63,7 @@ def test_s3_table_functions(started_cluster): """ SELECT count(*) FROM s3 ( - nc_s3, + nc_s3, filename = 'test_file.tsv.gz', format = 'TSV', structure = 'number UInt64', @@ -85,7 +88,7 @@ def test_s3_table_functions_timeouts(started_cluster): """ INSERT INTO FUNCTION s3 ( - nc_s3, + nc_s3, filename = 'test_file.tsv.gz', format = 'TSV', structure = 'number UInt64', From c9a26d43c5b053c3e5a0898e382c0405a4ecf4a6 Mon Sep 17 00:00:00 2001 From: Rory Crispin Date: Tue, 4 Jul 2023 12:06:15 +0100 Subject: [PATCH 122/230] (docs) Remove async_metric_log event_time_microseconds event_time_microseconds was removed from the system.asynchronous_metric_log in https://github.com/ClickHouse/ClickHouse/pull/36360 --- .../system-tables/asynchronous_metric_log.md | 25 +++++++++---------- .../system-tables/asynchronous_metric_log.md | 1 - .../system-tables/asynchronous_metric_log.md | 25 +++++++++---------- 3 files changed, 24 insertions(+), 27 deletions(-) diff --git a/docs/en/operations/system-tables/asynchronous_metric_log.md b/docs/en/operations/system-tables/asynchronous_metric_log.md index 4290799b6bc..efe57a202d8 100644 --- a/docs/en/operations/system-tables/asynchronous_metric_log.md +++ b/docs/en/operations/system-tables/asynchronous_metric_log.md @@ -9,7 +9,6 @@ Columns: - `event_date` ([Date](../../sql-reference/data-types/date.md)) — Event date. - `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Event time. -- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Event time with microseconds resolution. - `name` ([String](../../sql-reference/data-types/string.md)) — Metric name. - `value` ([Float64](../../sql-reference/data-types/float.md)) — Metric value. @@ -20,18 +19,18 @@ SELECT * FROM system.asynchronous_metric_log LIMIT 10 ``` ``` text -┌─event_date─┬──────────event_time─┬────event_time_microseconds─┬─name─────────────────────────────────────┬─────value─┐ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ CPUFrequencyMHz_0 │ 2120.9 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.arenas.all.pmuzzy │ 743 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.arenas.all.pdirty │ 26288 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.background_thread.run_intervals │ 0 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.background_thread.num_runs │ 0 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.retained │ 60694528 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.mapped │ 303161344 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.resident │ 260931584 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.metadata │ 12079488 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.allocated │ 133756128 │ -└────────────┴─────────────────────┴────────────────────────────┴──────────────────────────────────────────┴───────────┘ +┌─event_date─┬──────────event_time─┬─name─────────────────────────────────────┬─────value─┐ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ CPUFrequencyMHz_0 │ 2120.9 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.arenas.all.pmuzzy │ 743 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.arenas.all.pdirty │ 26288 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.background_thread.run_intervals │ 0 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.background_thread.num_runs │ 0 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.retained │ 60694528 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.mapped │ 303161344 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.resident │ 260931584 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.metadata │ 12079488 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.allocated │ 133756128 │ +└────────────┴─────────────────────┴──────────────────────────────────────────┴───────────┘ ``` **See Also** diff --git a/docs/ru/operations/system-tables/asynchronous_metric_log.md b/docs/ru/operations/system-tables/asynchronous_metric_log.md index 886fbb6cab0..5145889c95f 100644 --- a/docs/ru/operations/system-tables/asynchronous_metric_log.md +++ b/docs/ru/operations/system-tables/asynchronous_metric_log.md @@ -8,7 +8,6 @@ slug: /ru/operations/system-tables/asynchronous_metric_log Столбцы: - `event_date` ([Date](../../sql-reference/data-types/date.md)) — дата события. - `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — время события. -- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — время события в микросекундах. - `name` ([String](../../sql-reference/data-types/string.md)) — название метрики. - `value` ([Float64](../../sql-reference/data-types/float.md)) — значение метрики. diff --git a/docs/zh/operations/system-tables/asynchronous_metric_log.md b/docs/zh/operations/system-tables/asynchronous_metric_log.md index 419ad2a7ed6..9fa399f1aed 100644 --- a/docs/zh/operations/system-tables/asynchronous_metric_log.md +++ b/docs/zh/operations/system-tables/asynchronous_metric_log.md @@ -8,7 +8,6 @@ slug: /zh/operations/system-tables/asynchronous_metric_log 列: - `event_date` ([Date](../../sql-reference/data-types/date.md)) — 事件日期。 - `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — 事件时间。 -- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — 事件时间(微秒)。 - `name` ([String](../../sql-reference/data-types/string.md)) — 指标名。 - `value` ([Float64](../../sql-reference/data-types/float.md)) — 指标值。 @@ -17,18 +16,18 @@ slug: /zh/operations/system-tables/asynchronous_metric_log SELECT * FROM system.asynchronous_metric_log LIMIT 10 ``` ``` text -┌─event_date─┬──────────event_time─┬────event_time_microseconds─┬─name─────────────────────────────────────┬─────value─┐ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ CPUFrequencyMHz_0 │ 2120.9 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.arenas.all.pmuzzy │ 743 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.arenas.all.pdirty │ 26288 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.background_thread.run_intervals │ 0 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.background_thread.num_runs │ 0 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.retained │ 60694528 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.mapped │ 303161344 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.resident │ 260931584 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.metadata │ 12079488 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.allocated │ 133756128 │ -└────────────┴─────────────────────┴────────────────────────────┴──────────────────────────────────────────┴───────────┘ +┌─event_date─┬──────────event_time─┬─name─────────────────────────────────────┬─────value─┐ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ CPUFrequencyMHz_0 │ 2120.9 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.arenas.all.pmuzzy │ 743 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.arenas.all.pdirty │ 26288 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.background_thread.run_intervals │ 0 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.background_thread.num_runs │ 0 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.retained │ 60694528 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.mapped │ 303161344 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.resident │ 260931584 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.metadata │ 12079488 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.allocated │ 133756128 │ +└────────────┴─────────────────────┴──────────────────────────────────────────┴───────────┘ ``` **另请参阅** From fd6115f0e3ef53fb6b0a7c411de13148e8cdc10e Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 4 Jul 2023 11:42:27 +0000 Subject: [PATCH 123/230] Fix flaky tests with timeout --- ...20_create_sync_race_condition_zookeeper.sh | 17 ++++++++----- .../0_stateless/01632_tinylog_read_write.sh | 24 ++++++++++-------- .../02481_async_insert_race_long.sh | 25 +++++++++++-------- 3 files changed, 39 insertions(+), 27 deletions(-) diff --git a/tests/queries/0_stateless/01320_create_sync_race_condition_zookeeper.sh b/tests/queries/0_stateless/01320_create_sync_race_condition_zookeeper.sh index aee69e64b1b..57409d782ae 100755 --- a/tests/queries/0_stateless/01320_create_sync_race_condition_zookeeper.sh +++ b/tests/queries/0_stateless/01320_create_sync_race_condition_zookeeper.sh @@ -12,22 +12,27 @@ $CLICKHOUSE_CLIENT --allow_deprecated_database_ordinary=1 --query "CREATE DATABA function thread1() { - while true; do - $CLICKHOUSE_CLIENT -n --query "CREATE TABLE test_01320.r (x UInt64) ENGINE = ReplicatedMergeTree('/test/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/table', 'r') ORDER BY x; - DROP TABLE test_01320.r;" 2>&1 | grep -F "Code:" | grep -v "UNKNOWN_DATABASE" + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do + $CLICKHOUSE_CLIENT -n --query "CREATE TABLE test_01320.r (x UInt64) ENGINE = ReplicatedMergeTree('/test/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/table', 'r') ORDER BY x; DROP TABLE test_01320.r;" done } function thread2() { - while true; do $CLICKHOUSE_CLIENT --query "SYSTEM SYNC REPLICA test_01320.r" 2>/dev/null; done + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do + $CLICKHOUSE_CLIENT --query "SYSTEM SYNC REPLICA test_01320.r" 2>/dev/null; + done } export -f thread1 export -f thread2 -timeout 10 bash -c thread1 & -timeout 10 bash -c thread2 & +TIMEOUT=10 + +thread1 $TIMEOUT & +thread2 $TIMEOUT & wait diff --git a/tests/queries/0_stateless/01632_tinylog_read_write.sh b/tests/queries/0_stateless/01632_tinylog_read_write.sh index 69f985a9d0d..10625ec5d27 100755 --- a/tests/queries/0_stateless/01632_tinylog_read_write.sh +++ b/tests/queries/0_stateless/01632_tinylog_read_write.sh @@ -11,14 +11,16 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT --multiquery --query "DROP TABLE IF EXISTS test; CREATE TABLE IF NOT EXISTS test (x UInt64, s Array(Nullable(String))) ENGINE = TinyLog;" function thread_select { - while true; do + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do $CLICKHOUSE_CLIENT --local_filesystem_read_method pread --query "SELECT * FROM test FORMAT Null" sleep 0.0$RANDOM done } function thread_insert { - while true; do + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$1" ]; do $CLICKHOUSE_CLIENT --query "INSERT INTO test VALUES (1, ['Hello'])" sleep 0.0$RANDOM done @@ -30,15 +32,17 @@ export -f thread_insert # Do randomized queries and expect nothing extraordinary happens. -timeout 10 bash -c 'thread_select' & -timeout 10 bash -c 'thread_select' & -timeout 10 bash -c 'thread_select' & -timeout 10 bash -c 'thread_select' & +TIMEOUT=10 -timeout 10 bash -c 'thread_insert' & -timeout 10 bash -c 'thread_insert' & -timeout 10 bash -c 'thread_insert' & -timeout 10 bash -c 'thread_insert' & +thread_select $TIMEOUT & +thread_select $TIMEOUT & +thread_select $TIMEOUT & +thread_select $TIMEOUT & + +thread_insert $TIMEOUT & +thread_insert $TIMEOUT & +thread_insert $TIMEOUT & +thread_insert $TIMEOUT & wait echo "Done" diff --git a/tests/queries/0_stateless/02481_async_insert_race_long.sh b/tests/queries/0_stateless/02481_async_insert_race_long.sh index cec9278c127..c4b026c6aba 100755 --- a/tests/queries/0_stateless/02481_async_insert_race_long.sh +++ b/tests/queries/0_stateless/02481_async_insert_race_long.sh @@ -11,21 +11,24 @@ export MY_CLICKHOUSE_CLIENT="$CLICKHOUSE_CLIENT --async_insert_busy_timeout_ms 1 function insert1() { - while true; do + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do ${MY_CLICKHOUSE_CLIENT} --wait_for_async_insert 0 -q 'INSERT INTO async_inserts_race FORMAT CSV 1,"a"' done } function insert2() { - while true; do + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do ${MY_CLICKHOUSE_CLIENT} --wait_for_async_insert 0 -q 'INSERT INTO async_inserts_race FORMAT JSONEachRow {"id": 5, "s": "e"} {"id": 6, "s": "f"}' done } function insert3() { - while true; do + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do ${MY_CLICKHOUSE_CLIENT} --wait_for_async_insert 1 -q "INSERT INTO async_inserts_race VALUES (7, 'g') (8, 'h')" & sleep 0.05 done @@ -33,29 +36,29 @@ function insert3() function select1() { - while true; do + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do ${MY_CLICKHOUSE_CLIENT} -q "SELECT * FROM async_inserts_race FORMAT Null" done - } ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS async_inserts_race" ${CLICKHOUSE_CLIENT} -q "CREATE TABLE async_inserts_race (id UInt32, s String) ENGINE = MergeTree ORDER BY id" -TIMEOUT=10 - export -f insert1 export -f insert2 export -f insert3 export -f select1 +TIMEOUT=10 + for _ in {1..3}; do - timeout $TIMEOUT bash -c insert1 & - timeout $TIMEOUT bash -c insert2 & - timeout $TIMEOUT bash -c insert3 & + insert1 $TIMEOUT & + insert2 $TIMEOUT & + insert3 $TIMEOUT & done -timeout $TIMEOUT bash -c select1 & +select1 $TIMEOUT & wait echo "OK" From 1c2233b693077bbc5ce042c46a56aadaa49aab98 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 4 Jul 2023 15:46:40 +0000 Subject: [PATCH 124/230] Fix style check --- src/Interpreters/GraceHashJoin.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/GraceHashJoin.cpp b/src/Interpreters/GraceHashJoin.cpp index aa7091548d7..66dc1aa7bde 100644 --- a/src/Interpreters/GraceHashJoin.cpp +++ b/src/Interpreters/GraceHashJoin.cpp @@ -411,7 +411,7 @@ void GraceHashJoin::addBuckets(const size_t bucket_count) } buckets.reserve(buckets.size() + bucket_count); - for(auto & bucket : tmp_buckets) + for (auto & bucket : tmp_buckets) buckets.emplace_back(std::move(bucket)); } From e84769cb23b1447dce57eb957480f7c5d7cdced8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 4 Jul 2023 20:19:17 +0300 Subject: [PATCH 125/230] Update 02789_object_type_invalid_num_of_rows.reference --- .../0_stateless/02789_object_type_invalid_num_of_rows.reference | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference b/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference index 7dec35f7acb..8b137891791 100644 --- a/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference +++ b/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference @@ -1 +1 @@ -0.02 + From d987b94ed48594541bf91bb42fb4f5a8ced52e1f Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 4 Jul 2023 20:51:15 +0200 Subject: [PATCH 126/230] fix the way how broken parts are detached --- src/Storages/MergeTree/IMergeTreeDataPart.h | 8 +- src/Storages/MergeTree/MergeTreeData.cpp | 23 +- src/Storages/MergeTree/MergeTreeData.h | 10 +- .../ReplicatedMergeTreePartCheckThread.cpp | 402 ++++++++++-------- .../ReplicatedMergeTreePartCheckThread.h | 44 +- src/Storages/StorageReplicatedMergeTree.cpp | 68 ++- 6 files changed, 317 insertions(+), 238 deletions(-) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index fd73d802579..1fdcbd7309c 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -242,9 +242,11 @@ public: /// Frozen by ALTER TABLE ... FREEZE ... It is used for information purposes in system.parts table. mutable std::atomic is_frozen {false}; - /// Indicated that the part was marked Outdated because it's broken, not because it's actually outdated - /// See outdateBrokenPartAndCloneToDetached(...) - mutable bool outdated_because_broken = false; + /// Indicates that the part was marked Outdated by PartCheckThread because the part was not committed to ZooKeeper + mutable bool is_unexpected_local_part = false; + + /// Indicates that the part was detached and marked Outdated because it's broken + mutable std::atomic_bool was_removed_as_broken = false; /// Flag for keep S3 data when zero-copy replication over S3 turned on. mutable bool force_keep_shared_data = false; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index e9c3a7f66ae..e37d4273629 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -4023,22 +4023,15 @@ void MergeTreeData::restoreAndActivatePart(const DataPartPtr & part, DataPartsLo } -void MergeTreeData::outdateBrokenPartAndCloneToDetached(const DataPartPtr & part_to_detach, const String & prefix) +void MergeTreeData::outdateUnexpectedPartAndCloneToDetached(const DataPartPtr & part_to_detach) { - auto metadata_snapshot = getInMemoryMetadataPtr(); - if (prefix.empty()) - LOG_INFO(log, "Cloning part {} to {} and making it obsolete.", part_to_detach->getDataPartStorage().getPartDirectory(), part_to_detach->name); - else - LOG_INFO(log, "Cloning part {} to {}_{} and making it obsolete.", part_to_detach->getDataPartStorage().getPartDirectory(), prefix, part_to_detach->name); - - part_to_detach->makeCloneInDetached(prefix, metadata_snapshot); + LOG_INFO(log, "Cloning part {} to unexpected_{} and making it obsolete.", part_to_detach->getDataPartStorage().getPartDirectory(), part_to_detach->name); + part_to_detach->makeCloneInDetached("unexpected", getInMemoryMetadataPtr()); DataPartsLock lock = lockParts(); + part_to_detach->is_unexpected_local_part = true; if (part_to_detach->getState() == DataPartState::Active) - { - part_to_detach->outdated_because_broken = true; removePartsFromWorkingSet(NO_TRANSACTION_RAW, {part_to_detach}, true, &lock); - } } void MergeTreeData::forcefullyMovePartToDetachedAndRemoveFromMemory(const MergeTreeData::DataPartPtr & part_to_detach, const String & prefix, bool restore_covered) @@ -4677,24 +4670,24 @@ MergeTreeData::DataPartsVector MergeTreeData::getVisibleDataPartsVectorInPartiti return res; } -MergeTreeData::DataPartPtr MergeTreeData::getPartIfExists(const MergeTreePartInfo & part_info, const MergeTreeData::DataPartStates & valid_states) +MergeTreeData::DataPartPtr MergeTreeData::getPartIfExists(const MergeTreePartInfo & part_info, const MergeTreeData::DataPartStates & valid_states) const { auto lock = lockParts(); return getPartIfExistsUnlocked(part_info, valid_states, lock); } -MergeTreeData::DataPartPtr MergeTreeData::getPartIfExists(const String & part_name, const MergeTreeData::DataPartStates & valid_states) +MergeTreeData::DataPartPtr MergeTreeData::getPartIfExists(const String & part_name, const MergeTreeData::DataPartStates & valid_states) const { auto lock = lockParts(); return getPartIfExistsUnlocked(part_name, valid_states, lock); } -MergeTreeData::DataPartPtr MergeTreeData::getPartIfExistsUnlocked(const String & part_name, const DataPartStates & valid_states, DataPartsLock & acquired_lock) +MergeTreeData::DataPartPtr MergeTreeData::getPartIfExistsUnlocked(const String & part_name, const DataPartStates & valid_states, DataPartsLock & acquired_lock) const { return getPartIfExistsUnlocked(MergeTreePartInfo::fromPartName(part_name, format_version), valid_states, acquired_lock); } -MergeTreeData::DataPartPtr MergeTreeData::getPartIfExistsUnlocked(const MergeTreePartInfo & part_info, const DataPartStates & valid_states, DataPartsLock & /* acquired_lock */) +MergeTreeData::DataPartPtr MergeTreeData::getPartIfExistsUnlocked(const MergeTreePartInfo & part_info, const DataPartStates & valid_states, DataPartsLock & /* acquired_lock */) const { auto it = data_parts_by_info.find(part_info); if (it == data_parts_by_info.end()) diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index b27392b355b..d5991aaea71 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -521,10 +521,10 @@ public: DataPartsVector getDataPartsVectorInPartitionForInternalUsage(const DataPartStates & affordable_states, const String & partition_id, DataPartsLock * acquired_lock = nullptr) const; /// Returns the part with the given name and state or nullptr if no such part. - DataPartPtr getPartIfExistsUnlocked(const String & part_name, const DataPartStates & valid_states, DataPartsLock & acquired_lock); - DataPartPtr getPartIfExistsUnlocked(const MergeTreePartInfo & part_info, const DataPartStates & valid_states, DataPartsLock & acquired_lock); - DataPartPtr getPartIfExists(const String & part_name, const DataPartStates & valid_states); - DataPartPtr getPartIfExists(const MergeTreePartInfo & part_info, const DataPartStates & valid_states); + DataPartPtr getPartIfExistsUnlocked(const String & part_name, const DataPartStates & valid_states, DataPartsLock & acquired_lock) const; + DataPartPtr getPartIfExistsUnlocked(const MergeTreePartInfo & part_info, const DataPartStates & valid_states, DataPartsLock & acquired_lock) const; + DataPartPtr getPartIfExists(const String & part_name, const DataPartStates & valid_states) const; + DataPartPtr getPartIfExists(const MergeTreePartInfo & part_info, const DataPartStates & valid_states) const; /// Total size of active parts in bytes. size_t getTotalActiveSizeInBytes() const; @@ -654,7 +654,7 @@ public: virtual void forcefullyRemoveBrokenOutdatedPartFromZooKeeperBeforeDetaching(const String & /*part_name*/) {} /// Outdate broken part, set remove time to zero (remove as fast as possible) and make clone in detached directory. - void outdateBrokenPartAndCloneToDetached(const DataPartPtr & part, const String & prefix); + void outdateUnexpectedPartAndCloneToDetached(const DataPartPtr & part); /// If the part is Obsolete and not used by anybody else, immediately delete it from filesystem and remove from memory. void tryRemovePartImmediately(DataPartPtr && part); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp index c495fdaf5e2..d6f8dbac883 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp @@ -131,7 +131,7 @@ size_t ReplicatedMergeTreePartCheckThread::size() const } -ReplicatedMergeTreePartCheckThread::MissingPartSearchResult ReplicatedMergeTreePartCheckThread::searchForMissingPartOnOtherReplicas(const String & part_name) +bool ReplicatedMergeTreePartCheckThread::searchForMissingPartOnOtherReplicas(const String & part_name) const { auto zookeeper = storage.getZooKeeper(); @@ -198,13 +198,13 @@ ReplicatedMergeTreePartCheckThread::MissingPartSearchResult ReplicatedMergeTreeP continue; LOG_INFO(log, "Found the missing part {} at {} on {}", part_name, part_on_replica, replica); - return MissingPartSearchResult::FoundAndNeedFetch; + return true; } if (part_on_replica_info.contains(part_info)) { LOG_INFO(log, "Found part {} on {} that covers the missing part {}", part_on_replica, replica, part_name); - return MissingPartSearchResult::FoundAndDontNeedFetch; + return true; } if (part_info.contains(part_on_replica_info)) @@ -227,11 +227,10 @@ ReplicatedMergeTreePartCheckThread::MissingPartSearchResult ReplicatedMergeTreeP if (found_part_with_the_same_min_block && found_part_with_the_same_max_block) { - /// FIXME It may never appear LOG_INFO(log, "Found parts with the same min block and with the same max block as the missing part {} on replica {}. " "Hoping that it will eventually appear as a result of a merge. Parts: {}", part_name, replica, fmt::join(parts_found, ", ")); - return MissingPartSearchResult::FoundAndDontNeedFetch; + return true; } } } @@ -247,70 +246,9 @@ ReplicatedMergeTreePartCheckThread::MissingPartSearchResult ReplicatedMergeTreeP not_found_msg = "smaller parts with either the same min block or the same max block."; LOG_ERROR(log, "No replica has part covering {} and a merge is impossible: we didn't find {}", part_name, not_found_msg); - return MissingPartSearchResult::LostForever; + return false; } -void ReplicatedMergeTreePartCheckThread::searchForMissingPartAndFetchIfPossible(const String & part_name, bool exists_in_zookeeper) -{ - auto zookeeper = storage.getZooKeeper(); - auto missing_part_search_result = searchForMissingPartOnOtherReplicas(part_name); - - /// If the part is in ZooKeeper, remove it from there and add the task to download it to the queue. - if (exists_in_zookeeper) - { - if (missing_part_search_result == MissingPartSearchResult::FoundAndNeedFetch) - { - LOG_WARNING(log, "Part {} exists in ZooKeeper but not locally and found on other replica. Removing from ZooKeeper and queueing a fetch.", part_name); - } - else - { - LOG_WARNING(log, "Part {} exists in ZooKeeper but not locally and not found on other replica. Removing it from ZooKeeper.", part_name); - } - - /// We cannot simply remove part from ZooKeeper, because it may be removed from virtual_part, - /// so we have to create some entry in the queue. Maybe we will execute it (by fetching part or covering part from somewhere), - /// maybe will simply replace with empty part. - storage.removePartAndEnqueueFetch(part_name, /* storage_init = */false); - } - - ProfileEvents::increment(ProfileEvents::ReplicatedPartChecksFailed); - - if (missing_part_search_result == MissingPartSearchResult::LostForever) - { - auto lost_part_info = MergeTreePartInfo::fromPartName(part_name, storage.format_version); - if (lost_part_info.level != 0 || lost_part_info.mutation != 0) - { - Strings source_parts; - bool part_in_queue = storage.queue.checkPartInQueueAndGetSourceParts(part_name, source_parts); - - /// If it's MERGE/MUTATION etc. we shouldn't replace result part with empty part - /// because some source parts can be lost, but some of them can exist. - if (part_in_queue && !source_parts.empty()) - { - LOG_ERROR(log, "Part {} found in queue and some source parts for it was lost. Will check all source parts.", part_name); - for (const String & source_part_name : source_parts) - enqueuePart(source_part_name); - - return; - } - } - - ThreadFuzzer::maybeInjectSleep(); - - if (storage.createEmptyPartInsteadOfLost(zookeeper, part_name)) - { - /** This situation is possible if on all the replicas where the part was, it deteriorated. - * For example, a replica that has just written it has power turned off and the data has not been written from cache to disk. - */ - LOG_ERROR(log, "Part {} is lost forever.", part_name); - ProfileEvents::increment(ProfileEvents::ReplicatedDataLoss); - } - else - { - LOG_WARNING(log, "Cannot create empty part {} instead of lost. Will retry later", part_name); - } - } -} std::pair ReplicatedMergeTreePartCheckThread::findLocalPart(const String & part_name) { @@ -335,12 +273,12 @@ std::pair ReplicatedMergeTreePartCheckThread::findLo return std::make_pair(exists_in_zookeeper, part); } -CheckResult ReplicatedMergeTreePartCheckThread::checkPart(const String & part_name) +ReplicatedCheckResult ReplicatedMergeTreePartCheckThread::checkPartImpl(const String & part_name) { - LOG_INFO(log, "Checking part {}", part_name); - ProfileEvents::increment(ProfileEvents::ReplicatedPartChecks); - + ReplicatedCheckResult result; auto [exists_in_zookeeper, part] = findLocalPart(part_name); + result.exists_in_zookeeper = exists_in_zookeeper; + result.part = part; LOG_TRACE(log, "Part {} in zookeeper: {}, locally: {}", part_name, exists_in_zookeeper, part != nullptr); @@ -351,130 +289,236 @@ CheckResult ReplicatedMergeTreePartCheckThread::checkPart(const String & part_na { /// We cannot rely on exists_in_zookeeper, because the cleanup thread is probably going to remove it from ZooKeeper /// Also, it will avoid "Cannot commit empty part: Part ... (state Outdated) already exists, but it will be deleted soon" - LOG_WARNING(log, "Part {} is Outdated, will wait for cleanup thread to handle it and check again later", part_name); time_t lifetime = time(nullptr) - outdated->remove_time; time_t max_lifetime = storage.getSettings()->old_parts_lifetime.totalSeconds(); time_t delay = lifetime >= max_lifetime ? 0 : max_lifetime - lifetime; - enqueuePart(part_name, delay + 30); - return {part_name, true, "Part is Outdated, will recheck later"}; + result.recheck_after = delay + 30; + + auto message = PreformattedMessage::create("Part {} is Outdated, will wait for cleanup thread to handle it " + "and check again after {}s", part_name, result.recheck_after); + LOG_WARNING(log, message); + result.status = {part_name, true, message.text}; + result.action = ReplicatedCheckResult::RecheckLater; + return result; } } /// We do not have this or a covering part. if (!part) { - searchForMissingPartAndFetchIfPossible(part_name, exists_in_zookeeper); - return {part_name, false, "Part is missing, will search for it"}; + result.status = {part_name, false, "Part is missing, will search for it"}; + result.action = ReplicatedCheckResult::TryFetchMissing; + return result; } /// We have this part, and it's active. We will check whether we need this part and whether it has the right data. - if (part->name == part_name) - { - auto zookeeper = storage.getZooKeeper(); - auto table_lock = storage.lockForShare(RWLockImpl::NO_QUERY, storage.getSettings()->lock_acquire_timeout_for_background_operations); - - auto local_part_header = ReplicatedMergeTreePartHeader::fromColumnsAndChecksums( - part->getColumns(), part->checksums); - - /// The double get scheme is needed to retain compatibility with very old parts that were created - /// before the ReplicatedMergeTreePartHeader was introduced. - - String part_path = storage.replica_path + "/parts/" + part_name; - String part_znode; - /// If the part is in ZooKeeper, check its data with its checksums, and them with ZooKeeper. - if (zookeeper->tryGet(part_path, part_znode)) - { - LOG_INFO(log, "Checking data of part {}.", part_name); - - try - { - ReplicatedMergeTreePartHeader zk_part_header; - if (!part_znode.empty()) - zk_part_header = ReplicatedMergeTreePartHeader::fromString(part_znode); - else - { - String columns_znode = zookeeper->get(part_path + "/columns"); - String checksums_znode = zookeeper->get(part_path + "/checksums"); - zk_part_header = ReplicatedMergeTreePartHeader::fromColumnsAndChecksumsZNodes( - columns_znode, checksums_znode); - } - - if (local_part_header.getColumnsHash() != zk_part_header.getColumnsHash()) - throw Exception(ErrorCodes::TABLE_DIFFERS_TOO_MUCH, "Columns of local part {} are different from ZooKeeper", part_name); - - zk_part_header.getChecksums().checkEqual(local_part_header.getChecksums(), true); - - checkDataPart( - part, - true, - [this] { return need_stop.load(); }); - - if (need_stop) - { - LOG_INFO(log, "Checking part was cancelled."); - return {part_name, false, "Checking part was cancelled"}; - } - - LOG_INFO(log, "Part {} looks good.", part_name); - } - catch (const Exception & e) - { - /// Don't count the part as broken if we got known retryable exception. - /// In fact, there can be other similar situations because not all - /// of the exceptions are classified as retryable/non-retryable. But it is OK, - /// because there is a safety guard against deleting too many parts. - if (isRetryableException(e)) - throw; - - tryLogCurrentException(log, __PRETTY_FUNCTION__); - constexpr auto fmt_string = "Part {} looks broken. Removing it and will try to fetch."; - String message = fmt::format(fmt_string, part_name); - LOG_ERROR(log, fmt_string, part_name); - - /// Delete part locally. - storage.outdateBrokenPartAndCloneToDetached(part, "broken"); - - ThreadFuzzer::maybeInjectMemoryLimitException(); - ThreadFuzzer::maybeInjectSleep(); - - /// Part is broken, let's try to find it and fetch. - searchForMissingPartAndFetchIfPossible(part_name, exists_in_zookeeper); - - return {part_name, false, message}; - } - } - else if (part->modification_time + MAX_AGE_OF_LOCAL_PART_THAT_WASNT_ADDED_TO_ZOOKEEPER < time(nullptr)) - { - /// If the part is not in ZooKeeper, delete it locally. - /// Probably, someone just wrote down the part, and has not yet added to ZK. - /// Therefore, delete only if the part is old (not very reliable). - ProfileEvents::increment(ProfileEvents::ReplicatedPartChecksFailed); - constexpr auto fmt_string = "Unexpected part {} in filesystem. Removing."; - String message = fmt::format(fmt_string, part_name); - LOG_ERROR(log, fmt_string, part_name); - storage.outdateBrokenPartAndCloneToDetached(part, "unexpected"); - ThreadFuzzer::maybeInjectSleep(); - return {part_name, false, message}; - } - else - { - /// TODO You need to make sure that the part is still checked after a while. - /// Otherwise, it's possible that the part was not added to ZK, - /// but remained in the filesystem and in a number of active parts. - /// And then for a long time (before restarting), the data on the replicas will be different. - - LOG_TRACE(log, "Young part {} with age {} seconds hasn't been added to ZooKeeper yet. It's ok.", part_name, (time(nullptr) - part->modification_time)); - } - } - else + if (part->name != part_name) { /// If we have a covering part, ignore all the problems with this part. /// In the worst case, errors will still appear `old_parts_lifetime` seconds in error log until the part is removed as the old one. - LOG_WARNING(log, "We have part {} covering part {}", part->name, part_name); + auto message = PreformattedMessage::create("We have part {} covering part {}, will not check", part->name, part_name); + LOG_WARNING(log, message); + result.status = {part_name, true, message.text}; + result.action = ReplicatedCheckResult::DoNothing; + return result; } - part->checkMetadata(); - return {part_name, true, ""}; + time_t current_time = time(nullptr); + auto zookeeper = storage.getZooKeeper(); + auto table_lock = storage.lockForShare(RWLockImpl::NO_QUERY, storage.getSettings()->lock_acquire_timeout_for_background_operations); + + auto local_part_header = ReplicatedMergeTreePartHeader::fromColumnsAndChecksums( + part->getColumns(), part->checksums); + + + /// If the part is in ZooKeeper, check its data with its checksums, and them with ZooKeeper. + if (exists_in_zookeeper) + { + LOG_INFO(log, "Checking data of part {}.", part_name); + + /// The double get scheme is needed to retain compatibility with very old parts that were created + /// before the ReplicatedMergeTreePartHeader was introduced. + String part_path = storage.replica_path + "/parts/" + part_name; + String part_znode = zookeeper->get(part_path); + + try + { + ReplicatedMergeTreePartHeader zk_part_header; + if (!part_znode.empty()) + zk_part_header = ReplicatedMergeTreePartHeader::fromString(part_znode); + else + { + String columns_znode = zookeeper->get(part_path + "/columns"); + String checksums_znode = zookeeper->get(part_path + "/checksums"); + zk_part_header = ReplicatedMergeTreePartHeader::fromColumnsAndChecksumsZNodes( + columns_znode, checksums_znode); + } + + if (local_part_header.getColumnsHash() != zk_part_header.getColumnsHash()) + throw Exception(ErrorCodes::TABLE_DIFFERS_TOO_MUCH, "Columns of local part {} are different from ZooKeeper", part_name); + + zk_part_header.getChecksums().checkEqual(local_part_header.getChecksums(), true); + + checkDataPart( + part, + true, + [this] { return need_stop.load(); }); + + if (need_stop) + { + result.status = {part_name, false, "Checking part was cancelled"}; + result.action = ReplicatedCheckResult::Cancelled; + return result; + } + + part->checkMetadata(); + + LOG_INFO(log, "Part {} looks good.", part_name); + result.status = {part_name, true, ""}; + result.action = ReplicatedCheckResult::DoNothing; + return result; + } + catch (const Exception & e) + { + /// Don't count the part as broken if we got known retryable exception. + /// In fact, there can be other similar situations because not all + /// of the exceptions are classified as retryable/non-retryable. But it is OK, + /// because there is a safety guard against deleting too many parts. + if (isRetryableException(e)) + throw; + + tryLogCurrentException(log, __PRETTY_FUNCTION__); + + auto message = PreformattedMessage::create("Part {} looks broken. Removing it and will try to fetch.", part_name); + LOG_ERROR(log, message); + + /// Part is broken, let's try to find it and fetch. + result.status = {part_name, false, message}; + result.action = ReplicatedCheckResult::TryFetchMissing; + return result; + } + } + else if (part->modification_time + MAX_AGE_OF_LOCAL_PART_THAT_WASNT_ADDED_TO_ZOOKEEPER < current_time) + { + /// If the part is not in ZooKeeper, delete it locally. + /// Probably, someone just wrote down the part, and has not yet added to ZK. + /// Therefore, delete only if the part is old (not very reliable). + constexpr auto fmt_string = "Unexpected part {} in filesystem. Removing."; + String message = fmt::format(fmt_string, part_name); + LOG_ERROR(log, fmt_string, part_name); + result.status = {part_name, false, message}; + result.action = ReplicatedCheckResult::DetachUnexpected; + return result; + } + else + { + auto message = PreformattedMessage::create("Young part {} with age {} seconds hasn't been added to ZooKeeper yet. It's ok.", + part_name, (current_time - part->modification_time)); + LOG_INFO(log, message); + result.recheck_after = part->modification_time + MAX_AGE_OF_LOCAL_PART_THAT_WASNT_ADDED_TO_ZOOKEEPER - current_time; + result.status = {part_name, true, message}; + result.action = ReplicatedCheckResult::RecheckLater; + return result; + } +} + + +CheckResult ReplicatedMergeTreePartCheckThread::checkPartAndFix(const String & part_name) +{ + LOG_INFO(log, "Checking part {}", part_name); + ProfileEvents::increment(ProfileEvents::ReplicatedPartChecks); + + ReplicatedCheckResult result = checkPartImpl(part_name); + switch (result.action) + { + case ReplicatedCheckResult::None: UNREACHABLE(); + case ReplicatedCheckResult::DoNothing: break; + case ReplicatedCheckResult::Cancelled: + LOG_INFO(log, "Checking part was cancelled."); + break; + + case ReplicatedCheckResult::RecheckLater: + enqueuePart(part_name, result.recheck_after); + break; + + case ReplicatedCheckResult::DetachUnexpected: + chassert(!result.exists_in_zookeeper); + ProfileEvents::increment(ProfileEvents::ReplicatedPartChecksFailed); + + storage.outdateUnexpectedPartAndCloneToDetached(result.part); + break; + + case ReplicatedCheckResult::TryFetchMissing: + { + ProfileEvents::increment(ProfileEvents::ReplicatedPartChecksFailed); + + /// If the part is in ZooKeeper, remove it from there and add the task to download it to the queue (atomically). + if (result.exists_in_zookeeper) + { + /// We cannot simply remove part from ZooKeeper, because it may be removed from virtual_part, + /// so we have to create some entry in the queue. Maybe we will execute it (by fetching part or covering part from somewhere), + /// maybe will simply replace with empty part. + if (result.part) + LOG_WARNING(log, "Part {} exists in ZooKeeper and the local part was broken. Detaching it, removing from ZooKeeper and queueing a fetch.", part_name); + else + LOG_WARNING(log, "Part {} exists in ZooKeeper but not locally. Removing from ZooKeeper and queueing a fetch.", part_name); + + storage.removePartAndEnqueueFetch(part_name, /* storage_init = */ false); + break; + } + + chassert(!result.part); + + /// Part is not in ZooKeeper and not on disk (so there's nothing to detach or remove from ZooKeeper). + /// Probably we cannot execute some entry from the replication queue (so don't need to enqueue another one). + /// Either all replicas having the part are not active, or the part is lost forever. + bool is_lost = searchForMissingPartOnOtherReplicas(part_name); + if (is_lost) + onPartIsLostForever(part_name); + + break; + } + } + + return result.status; +} + +void ReplicatedMergeTreePartCheckThread::onPartIsLostForever(const String & part_name) +{ + auto lost_part_info = MergeTreePartInfo::fromPartName(part_name, storage.format_version); + if (lost_part_info.level != 0 || lost_part_info.mutation != 0) + { + Strings source_parts; + bool part_in_queue = storage.queue.checkPartInQueueAndGetSourceParts(part_name, source_parts); + + /// If it's MERGE/MUTATION etc. we shouldn't replace result part with empty part + /// because some source parts can be lost, but some of them can exist. + if (part_in_queue && !source_parts.empty()) + { + LOG_ERROR(log, "Part {} found in queue and some source parts for it was lost. Will check all source parts.", part_name); + for (const String & source_part_name : source_parts) + enqueuePart(source_part_name); + + return; + } + } + + ThreadFuzzer::maybeInjectSleep(); + + if (storage.createEmptyPartInsteadOfLost(storage.getZooKeeper(), part_name)) + { + /** This situation is possible if on all the replicas where the part was, it deteriorated. + * For example, a replica that has just written it has power turned off and the data has not been written from cache to disk. + */ + LOG_ERROR(log, "Part {} is lost forever.", part_name); + ProfileEvents::increment(ProfileEvents::ReplicatedDataLoss); + } + else + { + LOG_WARNING(log, "Cannot create empty part {} instead of lost. Will retry later", part_name); + constexpr time_t retry_after_seconds = 30; + enqueuePart(part_name, retry_after_seconds); + } } @@ -524,7 +568,7 @@ void ReplicatedMergeTreePartCheckThread::run() if (selected == parts_queue.end()) return; - checkPart(selected->first); + checkPartAndFix(selected->first); if (need_stop) return; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h index b86191dbf50..0a8fbc75c05 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h @@ -18,6 +18,27 @@ namespace DB class StorageReplicatedMergeTree; +struct ReplicatedCheckResult +{ + enum Action + { + None, + + Cancelled, + DoNothing, + RecheckLater, + + DetachUnexpected, + TryFetchMissing, + }; + + CheckResult status; + Action action = None; + + bool exists_in_zookeeper; + MergeTreeDataPartPtr part; + time_t recheck_after = 0; +}; /** Checks the integrity of the parts requested for validation. * @@ -44,7 +65,9 @@ public: size_t size() const; /// Check part by name - CheckResult checkPart(const String & part_name); + CheckResult checkPartAndFix(const String & part_name); + + ReplicatedCheckResult checkPartImpl(const String & part_name); std::unique_lock pausePartsCheck(); @@ -54,26 +77,13 @@ public: private: void run(); - /// Search for missing part and queue fetch if possible. Otherwise - /// remove part from zookeeper and queue. - void searchForMissingPartAndFetchIfPossible(const String & part_name, bool exists_in_zookeeper); + void onPartIsLostForever(const String & part_name); std::pair findLocalPart(const String & part_name); - enum MissingPartSearchResult - { - /// We found this part on other replica, let's fetch it. - FoundAndNeedFetch, - /// We found covering part or source part with same min and max block number - /// don't need to fetch because we should do it during normal queue processing. - FoundAndDontNeedFetch, - /// Covering part not found anywhere and exact part_name doesn't found on other - /// replicas. - LostForever, - }; - /// Search for missing part on other replicas or covering part on all replicas (including our replica). - MissingPartSearchResult searchForMissingPartOnOtherReplicas(const String & part_name); + /// Returns false if the part is lost forever. + bool searchForMissingPartOnOtherReplicas(const String & part_name) const; StorageReplicatedMergeTree & storage; String log_name; diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index b1ba06c77f9..56b8d431588 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -3351,6 +3351,17 @@ bool StorageReplicatedMergeTree::canExecuteFetch(const ReplicatedMergeTreeLogEnt return false; } + if (entry.source_replica.empty()) + { + auto part = getPartIfExists(entry.new_part_name, {MergeTreeDataPartState::Active, MergeTreeDataPartState::Outdated, MergeTreeDataPartState::Deleting}); + if (part && part->was_removed_as_broken) + { + disable_reason = fmt::format("Not executing fetch of part {} because we still have broken part with that name. " + "Waiting for the broken part to be removed first.", entry.new_part_name); + return false; + } + } + return true; } @@ -3731,23 +3742,44 @@ void StorageReplicatedMergeTree::removePartAndEnqueueFetch(const String & part_n { auto zookeeper = getZooKeeper(); + DataPartPtr broken_part; + auto outdate_broken_part = [this, &broken_part]() + { + if (broken_part) + return; + DataPartsLock lock = lockParts(); + if (broken_part->getState() == DataPartState::Active) + removePartsFromWorkingSet(NO_TRANSACTION_RAW, {broken_part}, true, &lock); + }; + /// We don't know exactly what happened to broken part /// and we are going to remove all covered log entries. /// It's quite dangerous, so clone covered parts to detached. auto broken_part_info = MergeTreePartInfo::fromPartName(part_name, format_version); - auto partition_range = getVisibleDataPartsVectorInPartition(getContext(), broken_part_info.partition_id); + auto partition_range = getDataPartsVectorInPartitionForInternalUsage({MergeTreeDataPartState::Active, MergeTreeDataPartState::Outdated}, + broken_part_info.partition_id); for (const auto & part : partition_range) { if (!broken_part_info.contains(part->info)) continue; - /// Broken part itself either already moved to detached or does not exist. - assert(broken_part_info != part->info); - part->makeCloneInDetached("covered-by-broken", getInMemoryMetadataPtr()); + if (broken_part_info == part->info) + { + chassert(!broken_part); + chassert(!storage_init); + part->was_removed_as_broken = true; + part->makeCloneInDetached("broken", getInMemoryMetadataPtr()); + broken_part = part; + } + else + { + part->makeCloneInDetached("covered-by-broken", getInMemoryMetadataPtr()); + } } ThreadFuzzer::maybeInjectSleep(); + ThreadFuzzer::maybeInjectMemoryLimitException(); /// It's possible that queue contains entries covered by part_name. /// For example, we had GET_PART all_1_42_5 and MUTATE_PART all_1_42_5_63, @@ -3762,6 +3794,7 @@ void StorageReplicatedMergeTree::removePartAndEnqueueFetch(const String & part_n queue.removePartProducingOpsInRange(zookeeper, broken_part_info, /* covering_entry= */ {}); ThreadFuzzer::maybeInjectSleep(); + ThreadFuzzer::maybeInjectMemoryLimitException(); String part_path = fs::path(replica_path) / "parts" / part_name; @@ -3780,7 +3813,7 @@ void StorageReplicatedMergeTree::removePartAndEnqueueFetch(const String & part_n /// but we are going to remove it from /parts and add to queue again. Coordination::Stat is_lost_stat; String is_lost_value = zookeeper->get(replica_path + "/is_lost", &is_lost_stat); - assert(is_lost_value == "0"); + chassert(is_lost_value == "0"); ops.emplace_back(zkutil::makeSetRequest(replica_path + "/is_lost", is_lost_value, is_lost_stat.version)); part_create_time = stat.ctime / 1000; @@ -3802,12 +3835,8 @@ void StorageReplicatedMergeTree::removePartAndEnqueueFetch(const String & part_n ReplicatedMergeTreeMergePredicate merge_pred = queue.getMergePredicate(zookeeper, PartitionIdsHint{broken_part_info.partition_id}); if (merge_pred.isGoingToBeDropped(broken_part_info)) { - LOG_INFO(log, "Broken part {} is covered by drop range, don't need to fetch it, removing it from ZooKeeper", part_name); - - /// But we have to remove it from ZooKeeper because broken parts are not removed from ZK during Outdated parts cleanup - /// There's a chance that DROP_RANGE will remove it, but only if it was not already removed by cleanup thread - if (exists_in_zookeeper) - removePartsFromZooKeeperWithRetries({part_name}); + LOG_INFO(log, "Broken part {} is covered by drop range, don't need to fetch it", part_name); + outdate_broken_part(); return; } @@ -3836,10 +3865,11 @@ void StorageReplicatedMergeTree::removePartAndEnqueueFetch(const String & part_n zkutil::KeeperMultiException::check(rc, ops, results); - String path_created = dynamic_cast(*results.back()).path_created; - log_entry->znode_name = path_created.substr(path_created.find_last_of('/') + 1); - queue.insert(zookeeper, log_entry); - break; + /// Make the part outdated after creating the log entry. + /// Otherwise, if we failed to create the entry, cleanup thread could remove the part from ZooKeeper (leading to diverged replicas) + outdate_broken_part(); + queue_updating_task->schedule(); + return; } } @@ -6841,10 +6871,10 @@ void StorageReplicatedMergeTree::clearOldPartsAndRemoveFromZKImpl(zkutil::ZooKee { /// Broken part can be removed from zk by removePartAndEnqueueFetch(...) only. /// Removal without enqueueing a fetch leads to intersecting parts. - if (part->is_duplicate || part->outdated_because_broken) + if (part->is_duplicate || part->is_unexpected_local_part) { - LOG_WARNING(log, "Will not remove part {} from ZooKeeper (is_duplicate: {}, outdated_because_broken: {})", - part->name, part->is_duplicate, part->outdated_because_broken); + LOG_WARNING(log, "Will not remove part {} from ZooKeeper (is_duplicate: {}, is_unexpected_local_part: {})", + part->name, part->is_duplicate, part->is_unexpected_local_part); parts_to_delete_only_from_filesystem.emplace_back(part); } else @@ -8189,7 +8219,7 @@ CheckResults StorageReplicatedMergeTree::checkData(const ASTPtr & query, Context { try { - results.push_back(part_check_thread.checkPart(part->name)); + results.push_back(part_check_thread.checkPartAndFix(part->name)); } catch (const Exception & ex) { From bb5b47cacf30c84f51e3c8a70040bf5707a5e742 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Tue, 4 Jul 2023 19:07:11 +0000 Subject: [PATCH 127/230] do not access Exception::thread_frame_pointers if not initialized --- src/Daemon/BaseDaemon.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index 6d29523a354..a75aac7a08e 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -154,7 +154,10 @@ static void signalHandler(int sig, siginfo_t * info, void * context) writePODBinary(*info, out); writePODBinary(signal_context, out); writePODBinary(stack_trace, out); - writeVectorBinary(Exception::thread_frame_pointers, out); + if (Exception::enable_job_stack_trace) + writeVectorBinary(Exception::thread_frame_pointers, out); + else + writeVarUInt(0, out); writeBinary(static_cast(getThreadId()), out); writePODBinary(current_thread, out); From 3021180e0133c8904a29cfc1d4254a0504f9a5fb Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 4 Jul 2023 21:33:12 +0200 Subject: [PATCH 128/230] Update --- .github/workflows/master.yml | 23 +++++++++-------------- .github/workflows/pull_request.yml | 24 ++++++++++-------------- tests/ci/ci_config.py | 2 +- 3 files changed, 20 insertions(+), 29 deletions(-) diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index c6270af0efa..6996221e1aa 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -851,8 +851,8 @@ jobs: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderBinRISCV64: - needs: [DockerHubPush] - runs-on: [self-hosted, builder] + needs: [ DockerHubPush ] + runs-on: [ self-hosted, builder ] steps: - name: Set envs run: | @@ -864,38 +864,33 @@ jobs: BUILD_NAME=binary_riscv64 EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync --recursive - git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | - # shellcheck disable=SC2046 - docker kill $(docker ps -q) ||: - # shellcheck disable=SC2046 - docker rm -f $(docker ps -a -q) ||: + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" ############################################################################################ ##################################### Docker images ####################################### diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 0ec4d997a4d..fe7c3bba410 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -912,8 +912,8 @@ jobs: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderBinRISCV64: - needs: [DockerHubPush, FastTest, StyleCheck] - runs-on: [self-hosted, builder] + needs: [ DockerHubPush, FastTest, StyleCheck ] + runs-on: [ self-hosted, builder ] steps: - name: Set envs run: | @@ -925,36 +925,32 @@ jobs: BUILD_NAME=binary_riscv64 EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync --recursive - git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | - # shellcheck disable=SC2046 - docker kill $(docker ps -q) ||: - # shellcheck disable=SC2046 - docker rm -f $(docker ps -a -q) ||: + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" ############################################################################################ ##################################### Docker images ####################################### diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 6f86c24184b..1777180a76e 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -174,7 +174,7 @@ CI_CONFIG = { "comment": "SSE2-only build", }, "binary_riscv64": { - "compiler": "clang-14-riscv64", + "compiler": "clang-16-riscv64", "build_type": "", "sanitizer": "", "package_type": "binary", From 6345879cdf4ba9c33f121a17a16e389761791de5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 4 Jul 2023 22:56:58 +0300 Subject: [PATCH 129/230] Update src/Disks/VolumeJBOD.cpp Co-authored-by: Sergei Trifonov --- src/Disks/VolumeJBOD.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Disks/VolumeJBOD.cpp b/src/Disks/VolumeJBOD.cpp index 885b1d56b0d..519f3378c4c 100644 --- a/src/Disks/VolumeJBOD.cpp +++ b/src/Disks/VolumeJBOD.cpp @@ -46,11 +46,11 @@ VolumeJBOD::VolumeJBOD( for (const auto & disk : disks) { auto size = disk->getTotalSpace(); - sizes.push_back(*size); if (size) sum_size += *size; else break; + sizes.push_back(*size); } if (sizes.size() == disks.size()) { From 5a3299572626c5ce5fcd53759b134de49287a4e3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 4 Jul 2023 22:57:39 +0300 Subject: [PATCH 130/230] Update src/Disks/IVolume.cpp Co-authored-by: Sergei Trifonov --- src/Disks/IVolume.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Disks/IVolume.cpp b/src/Disks/IVolume.cpp index 15b52acb422..43caf07d70a 100644 --- a/src/Disks/IVolume.cpp +++ b/src/Disks/IVolume.cpp @@ -51,7 +51,7 @@ IVolume::IVolume( std::optional IVolume::getMaxUnreservedFreeSpace() const { - std::optional res = 0; + std::optional res; for (const auto & disk : disks) res = std::max(res, disk->getUnreservedSpace()); return res; From c76cf53391426471d2a374b63c302e2a383258a5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 4 Jul 2023 22:14:37 +0200 Subject: [PATCH 131/230] Address review comments --- src/Disks/IVolume.cpp | 9 ++++++++- src/Disks/StoragePolicy.cpp | 13 ++++++++----- src/Disks/loadLocalDiskConfig.cpp | 2 +- 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/src/Disks/IVolume.cpp b/src/Disks/IVolume.cpp index 43caf07d70a..0b072e6ba8b 100644 --- a/src/Disks/IVolume.cpp +++ b/src/Disks/IVolume.cpp @@ -53,7 +53,14 @@ std::optional IVolume::getMaxUnreservedFreeSpace() const { std::optional res; for (const auto & disk : disks) - res = std::max(res, disk->getUnreservedSpace()); + { + auto disk_unreserved_space = disk->getUnreservedSpace(); + if (!disk_unreserved_space) + return std::nullopt; /// There is at least one unlimited disk. + + if (!res || *disk_unreserved_space > *res) + res = disk_unreserved_space; + } return res; } diff --git a/src/Disks/StoragePolicy.cpp b/src/Disks/StoragePolicy.cpp index 92cca23ca76..6b8d7186a15 100644 --- a/src/Disks/StoragePolicy.cpp +++ b/src/Disks/StoragePolicy.cpp @@ -209,14 +209,17 @@ DiskPtr StoragePolicy::tryGetDiskByName(const String & disk_name) const UInt64 StoragePolicy::getMaxUnreservedFreeSpace() const { - UInt64 res = 0; + std::optional res; for (const auto & volume : volumes) { - auto max_unreserved_for_volume = volume->getMaxUnreservedFreeSpace(); - if (max_unreserved_for_volume) - res = std::max(res, *max_unreserved_for_volume); + auto volume_unreserved_space = volume->getMaxUnreservedFreeSpace(); + if (!volume_unreserved_space) + return -1ULL; /// There is at least one unlimited disk. + + if (!res || *volume_unreserved_space > *res) + res = volume_unreserved_space; } - return res; + return res.value_or(-1ULL); } diff --git a/src/Disks/loadLocalDiskConfig.cpp b/src/Disks/loadLocalDiskConfig.cpp index 0e5eca17ca7..0c4a9e7af32 100644 --- a/src/Disks/loadLocalDiskConfig.cpp +++ b/src/Disks/loadLocalDiskConfig.cpp @@ -56,7 +56,7 @@ void loadDiskLocalConfig(const String & name, tmp_path = context->getPath(); // Create tmp disk for getting total disk space. - keep_free_space_bytes = static_cast(DiskLocal("tmp", tmp_path, 0).getTotalSpace() * ratio); + keep_free_space_bytes = static_cast(*DiskLocal("tmp", tmp_path, 0).getTotalSpace() * ratio); } } From ead43836f7b9f1eb04e8cd4e9c293f39ddf1ec1a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 4 Jul 2023 22:35:01 +0200 Subject: [PATCH 132/230] Fix the test --- .../02796_calculate_text_stack_trace.sql | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/tests/queries/0_stateless/02796_calculate_text_stack_trace.sql b/tests/queries/0_stateless/02796_calculate_text_stack_trace.sql index 601bd16fb39..52d55bdbe11 100644 --- a/tests/queries/0_stateless/02796_calculate_text_stack_trace.sql +++ b/tests/queries/0_stateless/02796_calculate_text_stack_trace.sql @@ -1,16 +1,20 @@ -- Tags: no-parallel -TRUNCATE TABLE system.text_log; - SELECT 'Hello', throwIf(1); -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } SYSTEM FLUSH LOGS; -SELECT length(stack_trace) > 1000 FROM system.query_log WHERE current_database = currentDatabase() AND query LIKE '%SELECT \'Hello\', throwIf(1)%' AND query NOT LIKE '%system%' ORDER BY event_time_microseconds DESC LIMIT 1; -SELECT message LIKE '%Stack trace%' FROM system.text_log WHERE level = 'Error' AND message LIKE '%Exception%throwIf%' ORDER BY event_time_microseconds DESC LIMIT 10; -TRUNCATE TABLE system.text_log; +SELECT length(stack_trace) > 1000 FROM system.query_log WHERE current_database = currentDatabase() AND query LIKE '%SELECT \'Hello\', throwIf(1)%' AND query NOT LIKE '%system%' ORDER BY event_time_microseconds DESC LIMIT 1; + +SELECT message LIKE '%Stack trace%' FROM system.text_log WHERE level = 'Error' AND message LIKE '%Exception%throwIf%' + AND query_id = (SELECT query_id FROM system.query_log WHERE current_database = currentDatabase() AND query LIKE '%SELECT \'Hello\', throwIf(1)%' AND query NOT LIKE '%system%' ORDER BY event_time_microseconds DESC LIMIT 1) + ORDER BY event_time_microseconds DESC LIMIT 10; SET calculate_text_stack_trace = 0; SELECT 'World', throwIf(1); -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } SYSTEM FLUSH LOGS; + SELECT length(stack_trace) FROM system.query_log WHERE current_database = currentDatabase() AND query LIKE '%SELECT \'World\', throwIf(1)%' AND query NOT LIKE '%system%' ORDER BY event_time_microseconds DESC LIMIT 1; -SELECT message LIKE '%Stack trace%' FROM system.text_log WHERE level = 'Error' AND message LIKE '%Exception%throwIf%' ORDER BY event_time_microseconds DESC LIMIT 10; + +SELECT message LIKE '%Stack trace%' FROM system.text_log WHERE level = 'Error' AND message LIKE '%Exception%throwIf%' + AND query_id = (SELECT query_id FROM system.query_log WHERE current_database = currentDatabase() AND query LIKE '%SELECT \'World\', throwIf(1)%' AND query NOT LIKE '%system%' ORDER BY event_time_microseconds DESC LIMIT 1) + ORDER BY event_time_microseconds DESC LIMIT 10; From 607a8a1c465baf85818ec41b8229f7afda8d6fb8 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 4 Jul 2023 22:52:59 +0200 Subject: [PATCH 133/230] fix --- src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp index d6f8dbac883..1cc3736bd2e 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp @@ -472,8 +472,8 @@ CheckResult ReplicatedMergeTreePartCheckThread::checkPartAndFix(const String & p /// Part is not in ZooKeeper and not on disk (so there's nothing to detach or remove from ZooKeeper). /// Probably we cannot execute some entry from the replication queue (so don't need to enqueue another one). /// Either all replicas having the part are not active, or the part is lost forever. - bool is_lost = searchForMissingPartOnOtherReplicas(part_name); - if (is_lost) + bool found_something = searchForMissingPartOnOtherReplicas(part_name); + if (!found_something) onPartIsLostForever(part_name); break; From da105d491661d4a7a564263d11499c74126f0453 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Tue, 4 Jul 2023 23:01:06 +0200 Subject: [PATCH 134/230] impl --- src/Functions/FunctionsHashing.h | 5 ++++- tests/queries/0_stateless/02790_keyed_hash_bug.reference | 1 + tests/queries/0_stateless/02790_keyed_hash_bug.sql | 2 ++ 3 files changed, 7 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02790_keyed_hash_bug.reference create mode 100644 tests/queries/0_stateless/02790_keyed_hash_bug.sql diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h index a4d4fbd085d..f20cf4a5ff4 100644 --- a/src/Functions/FunctionsHashing.h +++ b/src/Functions/FunctionsHashing.h @@ -81,7 +81,7 @@ namespace impl static SipHashKey parseSipHashKey(const ColumnWithTypeAndName & key) { - SipHashKey ret; + SipHashKey ret{}; const auto * tuple = checkAndGetColumn(key.column.get()); if (!tuple) @@ -90,6 +90,9 @@ namespace impl if (tuple->tupleSize() != 2) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "wrong tuple size: key must be a tuple of 2 UInt64"); + if (tuple->empty()) + return ret; + if (const auto * key0col = checkAndGetColumn(&(tuple->getColumn(0)))) ret.key0 = key0col->get64(0); else diff --git a/tests/queries/0_stateless/02790_keyed_hash_bug.reference b/tests/queries/0_stateless/02790_keyed_hash_bug.reference new file mode 100644 index 00000000000..a321a9052d0 --- /dev/null +++ b/tests/queries/0_stateless/02790_keyed_hash_bug.reference @@ -0,0 +1 @@ +16324913028386710556 diff --git a/tests/queries/0_stateless/02790_keyed_hash_bug.sql b/tests/queries/0_stateless/02790_keyed_hash_bug.sql new file mode 100644 index 00000000000..409e284d0d5 --- /dev/null +++ b/tests/queries/0_stateless/02790_keyed_hash_bug.sql @@ -0,0 +1,2 @@ +--- previously caused MemorySanitizer: use-of-uninitialized-value, because we tried to read hash key from empty tuple column during interpretation +SELECT sipHash64Keyed((1111111111111111111, toUInt64(222222222222223))) group by toUInt64(222222222222223); From 39199fd1168816c0e46da0011e21ad20573517e8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 5 Jul 2023 00:49:10 +0200 Subject: [PATCH 135/230] Update test --- .../0_stateless/02789_object_type_invalid_num_of_rows.reference | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference b/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference index 8b137891791..e69de29bb2d 100644 --- a/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference +++ b/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference @@ -1 +0,0 @@ - From ede63a0f4e8239c56999a72bfe3af3f59e63dfb2 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 5 Jul 2023 00:30:28 +0000 Subject: [PATCH 136/230] fix drop column with enabled sparse columns --- src/Storages/MergeTree/MutateTask.cpp | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index f4a071b8f27..f23ef82fca8 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -67,7 +67,9 @@ static void splitAndModifyMutationCommands( if (!isWidePart(part) || !isFullPartStorage(part->getDataPartStorage())) { - NameSet mutated_columns, dropped_columns; + NameSet mutated_columns; + NameSet dropped_columns; + for (const auto & command : commands) { if (command.type == MutationCommand::Type::MATERIALIZE_INDEX @@ -258,6 +260,10 @@ getColumnsForNewDataPart( storage_columns.emplace_back(column); } + NameSet storage_columns_set; + for (const auto & [name, _] : storage_columns) + storage_columns_set.insert(name); + for (const auto & command : all_commands) { if (command.type == MutationCommand::UPDATE) @@ -292,13 +298,15 @@ getColumnsForNewDataPart( SerializationInfoByName new_serialization_infos; for (const auto & [name, old_info] : serialization_infos) { - if (removed_columns.contains(name)) - continue; - auto it = renamed_columns_from_to.find(name); auto new_name = it == renamed_columns_from_to.end() ? name : it->second; - if (!updated_header.has(new_name)) + if (!storage_columns_set.contains(new_name)) + continue; + + /// In compact part we read all columns and all of them are in @updated_header. + /// But in wide part we must keep serialization infos for columns that are not touched by mutation. + if (!updated_header.has(new_name) && isWidePart(source_part)) { new_serialization_infos.emplace(new_name, old_info); continue; From 759b8b9a7685f566a88e86f5db5ebccb0db34869 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 5 Jul 2023 07:17:52 +0000 Subject: [PATCH 137/230] Fix more tests --- .../00941_system_columns_race_condition.sh | 46 +++++++++++-------- .../0_stateless/02470_mutation_sync_race.sh | 8 ++-- 2 files changed, 32 insertions(+), 22 deletions(-) diff --git a/tests/queries/0_stateless/00941_system_columns_race_condition.sh b/tests/queries/0_stateless/00941_system_columns_race_condition.sh index 69dfb30cd2c..4f2cd6ee91b 100755 --- a/tests/queries/0_stateless/00941_system_columns_race_condition.sh +++ b/tests/queries/0_stateless/00941_system_columns_race_condition.sh @@ -14,35 +14,43 @@ $CLICKHOUSE_CLIENT -q "CREATE TABLE alter_table (a UInt8, b Int16, c Float32, d function thread1() { - # NOTE: database = $CLICKHOUSE_DATABASE is unwanted - while true; do $CLICKHOUSE_CLIENT --query "SELECT name FROM system.columns UNION ALL SELECT name FROM system.columns FORMAT Null"; done + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do + # NOTE: database = $CLICKHOUSE_DATABASE is unwanted + $CLICKHOUSE_CLIENT --query "SELECT name FROM system.columns UNION ALL SELECT name FROM system.columns FORMAT Null"; + done } function thread2() { - while true; do $CLICKHOUSE_CLIENT -n --query "ALTER TABLE alter_table ADD COLUMN h String; ALTER TABLE alter_table MODIFY COLUMN h UInt64; ALTER TABLE alter_table DROP COLUMN h;"; done + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do + $CLICKHOUSE_CLIENT -n --query "ALTER TABLE alter_table ADD COLUMN h String; ALTER TABLE alter_table MODIFY COLUMN h UInt64; ALTER TABLE alter_table DROP COLUMN h;"; + done } # https://stackoverflow.com/questions/9954794/execute-a-shell-function-with-timeout export -f thread1; export -f thread2; -timeout 15 bash -c thread1 2> /dev/null & -timeout 15 bash -c thread1 2> /dev/null & -timeout 15 bash -c thread1 2> /dev/null & -timeout 15 bash -c thread1 2> /dev/null & -timeout 15 bash -c thread2 2> /dev/null & -timeout 15 bash -c thread2 2> /dev/null & -timeout 15 bash -c thread2 2> /dev/null & -timeout 15 bash -c thread2 2> /dev/null & -timeout 15 bash -c thread1 2> /dev/null & -timeout 15 bash -c thread1 2> /dev/null & -timeout 15 bash -c thread1 2> /dev/null & -timeout 15 bash -c thread1 2> /dev/null & -timeout 15 bash -c thread2 2> /dev/null & -timeout 15 bash -c thread2 2> /dev/null & -timeout 15 bash -c thread2 2> /dev/null & -timeout 15 bash -c thread2 2> /dev/null & +TIMEOUT=15 + +thread1 $TIMEOUT 2> /dev/null & +thread1 $TIMEOUT 2> /dev/null & +thread1 $TIMEOUT 2> /dev/null & +thread1 $TIMEOUT 2> /dev/null & +thread2 $TIMEOUT 2> /dev/null & +thread2 $TIMEOUT 2> /dev/null & +thread2 $TIMEOUT 2> /dev/null & +thread2 $TIMEOUT 2> /dev/null & +thread1 $TIMEOUT 2> /dev/null & +thread1 $TIMEOUT 2> /dev/null & +thread1 $TIMEOUT 2> /dev/null & +thread1 $TIMEOUT 2> /dev/null & +thread2 $TIMEOUT 2> /dev/null & +thread2 $TIMEOUT 2> /dev/null & +thread2 $TIMEOUT 2> /dev/null & +thread2 $TIMEOUT 2> /dev/null & wait diff --git a/tests/queries/0_stateless/02470_mutation_sync_race.sh b/tests/queries/0_stateless/02470_mutation_sync_race.sh index 6c259e46cb1..37e99663ab5 100755 --- a/tests/queries/0_stateless/02470_mutation_sync_race.sh +++ b/tests/queries/0_stateless/02470_mutation_sync_race.sh @@ -12,7 +12,11 @@ $CLICKHOUSE_CLIENT -q "insert into src values (0)" function thread() { + local TIMELIMIT=$((SECONDS+$1)) for i in $(seq 1000); do + if [ $SECONDS -ge "$TIMELIMIT" ]; then + return + fi $CLICKHOUSE_CLIENT -q "alter table src detach partition tuple()" $CLICKHOUSE_CLIENT -q "alter table src attach partition tuple()" $CLICKHOUSE_CLIENT -q "alter table src update A = ${i} where 1 settings mutations_sync=2" @@ -20,8 +24,6 @@ function thread() done } -export -f thread; - TIMEOUT=30 -timeout $TIMEOUT bash -c thread || true +thread $TIMEOUT || true \ No newline at end of file From 47cffa6f1ed6832e38d30a95f2c63e26506b0a10 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 5 Jul 2023 09:40:53 +0000 Subject: [PATCH 138/230] Properly check the first part disk --- tests/integration/test_multiple_disks/test.py | 86 +++++++++---------- 1 file changed, 43 insertions(+), 43 deletions(-) diff --git a/tests/integration/test_multiple_disks/test.py b/tests/integration/test_multiple_disks/test.py index b5606ee8bc2..c0fbe39196d 100644 --- a/tests/integration/test_multiple_disks/test.py +++ b/tests/integration/test_multiple_disks/test.py @@ -49,6 +49,18 @@ def start_cluster(): cluster.shutdown() +def get_oldest_part(node, table_name): + return node.query( + f"SELECT name FROM system.parts WHERE table = '{table_name}' and active = 1 ORDER BY modification_time LIMIT 1" + ).strip() + + +def get_disk_for_part(node, table_name, part): + return node.query( + f"SELECT disk_name FROM system.parts WHERE table == '{table_name}' and active = 1 and name = '{part}' ORDER BY modification_time" + ).strip() + + def test_system_tables(start_cluster): expected_disks_data = [ { @@ -694,15 +706,13 @@ def test_jbod_overflow(start_cluster, name, engine): def test_background_move(start_cluster, name, engine): try: node1.query_with_retry( - """ + f""" CREATE TABLE IF NOT EXISTS {name} ( s1 String ) ENGINE = {engine} ORDER BY tuple() SETTINGS storage_policy='moving_jbod_with_external' - """.format( - name=name, engine=engine - ) + """ ) node1.query(f"SYSTEM STOP MERGES {name}") @@ -718,25 +728,27 @@ def test_background_move(start_cluster, name, engine): ) ) + first_part = get_oldest_part(node1, name) + used_disks = get_used_disks_for_table(node1, name) retry = 20 i = 0 - while not sum(1 for x in used_disks if x == "jbod1") <= 2 and i < retry: + # multiple moves can be assigned in parallel so we can move later parts before the oldest + # we need to wait explicitly until the oldest part is moved + while get_disk_for_part(node1, name, first_part) != "external" and i < retry: time.sleep(0.5) - used_disks = get_used_disks_for_table(node1, name) i += 1 + used_disks = get_used_disks_for_table(node1, name) assert sum(1 for x in used_disks if x == "jbod1") <= 2 # first (oldest) part was moved to external - assert used_disks[0] == "external" + assert get_disk_for_part(node1, name, first_part) == "external" node1.query("SYSTEM FLUSH LOGS") path = node1.query( - "SELECT path_on_disk FROM system.part_log WHERE table = '{}' AND event_type='MovePart' AND part_name = 'all_1_1_0'".format( - name - ) + f"SELECT path_on_disk FROM system.part_log WHERE table = '{name}' AND event_type='MovePart' AND part_name = '{first_part}'" ) # first (oldest) part was moved to external @@ -762,36 +774,28 @@ def test_background_move(start_cluster, name, engine): def test_start_stop_moves(start_cluster, name, engine): try: node1.query_with_retry( - """ + f""" CREATE TABLE IF NOT EXISTS {name} ( s1 String ) ENGINE = {engine} ORDER BY tuple() SETTINGS storage_policy='moving_jbod_with_external' - """.format( - name=name, engine=engine - ) + """ ) - node1.query_with_retry("INSERT INTO {} VALUES ('HELLO')".format(name)) - node1.query_with_retry("INSERT INTO {} VALUES ('WORLD')".format(name)) + node1.query_with_retry(f"INSERT INTO {name} VALUES ('HELLO')") + node1.query_with_retry(f"INSERT INTO {name} VALUES ('WORLD')") used_disks = get_used_disks_for_table(node1, name) assert all(d == "jbod1" for d in used_disks), "All writes shoud go to jbods" - first_part = node1.query( - "SELECT name FROM system.parts WHERE table = '{}' and active = 1 ORDER BY modification_time LIMIT 1".format( - name - ) - ).strip() + first_part = get_oldest_part(node1, name) node1.query("SYSTEM STOP MOVES") with pytest.raises(QueryRuntimeException): node1.query( - "ALTER TABLE {} MOVE PART '{}' TO VOLUME 'external'".format( - name, first_part - ) + f"ALTER TABLE {name} MOVE PART '{first_part}' TO VOLUME 'external'" ) used_disks = get_used_disks_for_table(node1, name) @@ -801,24 +805,18 @@ def test_start_stop_moves(start_cluster, name, engine): node1.query("SYSTEM START MOVES") - node1.query( - "ALTER TABLE {} MOVE PART '{}' TO VOLUME 'external'".format( - name, first_part - ) - ) + node1.query(f"ALTER TABLE {name} MOVE PART '{first_part}' TO VOLUME 'external'") disk = node1.query( - "SELECT disk_name FROM system.parts WHERE table = '{}' and name = '{}' and active = 1".format( - name, first_part - ) + f"SELECT disk_name FROM system.parts WHERE table = '{name}' and name = '{first_part}' and active = 1" ).strip() assert disk == "external" - node1.query_with_retry("TRUNCATE TABLE {}".format(name)) + node1.query_with_retry(f"TRUNCATE TABLE {name}") - node1.query("SYSTEM STOP MOVES {}".format(name)) - node1.query("SYSTEM STOP MERGES {}".format(name)) + node1.query(f"SYSTEM STOP MOVES {name}") + node1.query(f"SYSTEM STOP MERGES {name}") for i in range(5): data = [] # 5MB in total @@ -831,6 +829,8 @@ def test_start_stop_moves(start_cluster, name, engine): ) ) + first_part = get_oldest_part(node1, name) + used_disks = get_used_disks_for_table(node1, name) retry = 5 @@ -843,23 +843,23 @@ def test_start_stop_moves(start_cluster, name, engine): # first (oldest) part doesn't move anywhere assert used_disks[0] == "jbod1" - node1.query("SYSTEM START MOVES {}".format(name)) + node1.query(f"SYSTEM START MOVES {name}") - # wait sometime until background backoff finishes + # multiple moves can be assigned in parallel so we can move later parts before the oldest + # we need to wait explicitly until the oldest part is moved retry = 60 i = 0 - while not sum(1 for x in used_disks if x == "jbod1") <= 2 and i < retry: + while get_disk_for_part(node1, name, first_part) != "external" and i < retry: time.sleep(1) - used_disks = get_used_disks_for_table(node1, name) i += 1 - node1.query("SYSTEM START MERGES {}".format(name)) + # first (oldest) part moved to external + assert get_disk_for_part(node1, name, first_part) == "external" + used_disks = get_used_disks_for_table(node1, name) assert sum(1 for x in used_disks if x == "jbod1") <= 2 - # first (oldest) part moved to external - assert used_disks[0] == "external" - + node1.query(f"SYSTEM START MERGES {name}") finally: node1.query_with_retry(f"DROP TABLE IF EXISTS {name} SYNC") From e6422f814418fce9e020e5f32029192e8f6a5dd7 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 5 Jul 2023 11:52:46 +0200 Subject: [PATCH 139/230] Delete comment, rename variable --- src/Interpreters/FilesystemCacheLog.h | 11 +---------- src/Interpreters/InterpreterSystemQuery.cpp | 8 ++++---- src/Parsers/ASTSystemQuery.cpp | 8 ++++---- src/Parsers/ASTSystemQuery.h | 4 ++-- src/Parsers/ParserSystemQuery.cpp | 4 ++-- 5 files changed, 13 insertions(+), 22 deletions(-) diff --git a/src/Interpreters/FilesystemCacheLog.h b/src/Interpreters/FilesystemCacheLog.h index d6dd00e5463..0d088a922e0 100644 --- a/src/Interpreters/FilesystemCacheLog.h +++ b/src/Interpreters/FilesystemCacheLog.h @@ -11,16 +11,7 @@ namespace DB { -/// -/// -------- Column --------- Type ------ -/// | event_date | DateTime | -/// | event_time | UInt64 | -/// | query_id | String | -/// | remote_file_path | String | -/// | segment_range | Tuple | -/// | read_type | String | -/// ------------------------------------- -/// + struct FilesystemCacheLogElement { enum class CacheType diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index e1ff8676bc7..1c2eb66923e 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -370,15 +370,15 @@ BlockIO InterpreterSystemQuery::execute() else { auto cache = FileCacheFactory::instance().getByName(query.filesystem_cache_name).cache; - if (query.delete_key.empty()) + if (query.key_to_drop.empty()) { cache->removeAllReleasable(); } else { - auto key = FileCacheKey::fromKeyString(query.delete_key); - if (query.delete_offset.has_value()) - cache->removeFileSegment(key, query.delete_offset.value()); + auto key = FileCacheKey::fromKeyString(query.key_to_drop); + if (query.offset_to_drop.has_value()) + cache->removeFileSegment(key, query.offset_to_drop.value()); else cache->removeKey(key); } diff --git a/src/Parsers/ASTSystemQuery.cpp b/src/Parsers/ASTSystemQuery.cpp index 9c5e7bff61e..22244a7075c 100644 --- a/src/Parsers/ASTSystemQuery.cpp +++ b/src/Parsers/ASTSystemQuery.cpp @@ -212,11 +212,11 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &, if (!filesystem_cache_name.empty()) { settings.ostr << (settings.hilite ? hilite_none : "") << " " << filesystem_cache_name; - if (!delete_key.empty()) + if (!key_to_drop.empty()) { - settings.ostr << (settings.hilite ? hilite_none : "") << " KEY " << delete_key; - if (delete_offset.has_value()) - settings.ostr << (settings.hilite ? hilite_none : "") << " OFFSET " << delete_offset.value(); + settings.ostr << (settings.hilite ? hilite_none : "") << " KEY " << key_to_drop; + if (offset_to_drop.has_value()) + settings.ostr << (settings.hilite ? hilite_none : "") << " OFFSET " << offset_to_drop.value(); } } } diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h index ebc3e9cd430..6c81162f103 100644 --- a/src/Parsers/ASTSystemQuery.h +++ b/src/Parsers/ASTSystemQuery.h @@ -107,8 +107,8 @@ public: UInt64 seconds{}; String filesystem_cache_name; - std::string delete_key; - std::optional delete_offset; + std::string key_to_drop; + std::optional offset_to_drop; String backup_name; diff --git a/src/Parsers/ParserSystemQuery.cpp b/src/Parsers/ParserSystemQuery.cpp index ef71e994d56..09c86876b48 100644 --- a/src/Parsers/ParserSystemQuery.cpp +++ b/src/Parsers/ParserSystemQuery.cpp @@ -409,9 +409,9 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & res->filesystem_cache_name = ast->as()->value.safeGet(); if (ParserKeyword{"KEY"}.ignore(pos, expected) && ParserIdentifier().parse(pos, ast, expected)) { - res->delete_key = ast->as()->name(); + res->key_to_drop = ast->as()->name(); if (ParserKeyword{"OFFSET"}.ignore(pos, expected) && ParserLiteral().parse(pos, ast, expected)) - res->delete_offset = ast->as()->value.safeGet(); + res->offset_to_drop = ast->as()->value.safeGet(); } } if (!parseQueryWithOnCluster(res, pos, expected)) From cf809c25cd0052b1a7d51aea8d5179a1c9b741d2 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 5 Jul 2023 11:24:29 +0000 Subject: [PATCH 140/230] fix CLEAR COLUMN query --- src/Storages/MergeTree/MutateTask.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index f23ef82fca8..1346d5937f7 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -301,14 +301,16 @@ getColumnsForNewDataPart( auto it = renamed_columns_from_to.find(name); auto new_name = it == renamed_columns_from_to.end() ? name : it->second; - if (!storage_columns_set.contains(new_name)) + /// Column can be removed only in this data part by CLEAR COLUMN query. + if (!storage_columns_set.contains(new_name) || removed_columns.contains(new_name)) continue; /// In compact part we read all columns and all of them are in @updated_header. /// But in wide part we must keep serialization infos for columns that are not touched by mutation. - if (!updated_header.has(new_name) && isWidePart(source_part)) + if (!updated_header.has(new_name)) { - new_serialization_infos.emplace(new_name, old_info); + if (isWidePart(source_part)) + new_serialization_infos.emplace(new_name, old_info); continue; } From 7cb7e138c13406f05d733323141649ae13a7f615 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 5 Jul 2023 14:16:46 +0200 Subject: [PATCH 141/230] Update --- .github/workflows/master.yml | 4 ++-- .github/workflows/pull_request.yml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 6996221e1aa..0fbcb95fc12 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -851,8 +851,8 @@ jobs: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderBinRISCV64: - needs: [ DockerHubPush ] - runs-on: [ self-hosted, builder ] + needs: [DockerHubPush] + runs-on: [self-hosted, builder] steps: - name: Set envs run: | diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index fe7c3bba410..f898e764915 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -912,8 +912,8 @@ jobs: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderBinRISCV64: - needs: [ DockerHubPush, FastTest, StyleCheck ] - runs-on: [ self-hosted, builder ] + needs: [DockerHubPush, FastTest, StyleCheck] + runs-on: [self-hosted, builder] steps: - name: Set envs run: | From e957600d5c287c52f93d0f631587852ad0869035 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Wed, 5 Jul 2023 12:28:27 +0000 Subject: [PATCH 142/230] wip --- src/Parsers/ASTColumnDeclaration.cpp | 5 ++ src/Parsers/ASTColumnDeclaration.h | 1 + src/Parsers/ASTCreateQuery.h | 3 +- src/Parsers/ParserCreateQuery.cpp | 26 +++++- src/Parsers/ParserCreateQuery.h | 11 ++- .../02811_primary_key_in_columns.sql | 83 +++++++++++++++++++ 6 files changed, 126 insertions(+), 3 deletions(-) create mode 100644 tests/queries/0_stateless/02811_primary_key_in_columns.sql diff --git a/src/Parsers/ASTColumnDeclaration.cpp b/src/Parsers/ASTColumnDeclaration.cpp index c2396708a73..12d000d5e9f 100644 --- a/src/Parsers/ASTColumnDeclaration.cpp +++ b/src/Parsers/ASTColumnDeclaration.cpp @@ -44,6 +44,7 @@ ASTPtr ASTColumnDeclaration::clone() const res->ttl = ttl->clone(); res->children.push_back(res->ttl); } + if (collation) { res->collation = collation->clone(); @@ -76,6 +77,10 @@ void ASTColumnDeclaration::formatImpl(const FormatSettings & settings, FormatSta << (*null_modifier ? "" : "NOT ") << "NULL" << (settings.hilite ? hilite_none : ""); } + if (primary_key_specifier) + settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") + << "PRIMARY KEY" << (settings.hilite ? hilite_none : ""); + if (default_expression) { settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << default_specifier << (settings.hilite ? hilite_none : ""); diff --git a/src/Parsers/ASTColumnDeclaration.h b/src/Parsers/ASTColumnDeclaration.h index 45814551db8..9d486667911 100644 --- a/src/Parsers/ASTColumnDeclaration.h +++ b/src/Parsers/ASTColumnDeclaration.h @@ -21,6 +21,7 @@ public: ASTPtr codec; ASTPtr ttl; ASTPtr collation; + bool primary_key_specifier = false; String getID(char delim) const override { return "ColumnDeclaration" + (delim + name); } diff --git a/src/Parsers/ASTCreateQuery.h b/src/Parsers/ASTCreateQuery.h index 230996f610e..ae45a244a03 100644 --- a/src/Parsers/ASTCreateQuery.h +++ b/src/Parsers/ASTCreateQuery.h @@ -56,6 +56,7 @@ public: ASTExpressionList * constraints = nullptr; ASTExpressionList * projections = nullptr; IAST * primary_key = nullptr; + IAST * primary_key_from_columns = nullptr; String getID(char) const override { return "Columns definition"; } @@ -76,7 +77,7 @@ public: f(reinterpret_cast(&primary_key)); f(reinterpret_cast(&constraints)); f(reinterpret_cast(&projections)); - f(reinterpret_cast(&primary_key)); + f(reinterpret_cast(&primary_key_from_columns)); } }; diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index adf3513ba40..1941bafab0d 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -300,11 +300,21 @@ bool ParserTablePropertiesDeclarationList::parseImpl(Pos & pos, ASTPtr & node, E ASTPtr constraints = std::make_shared(); ASTPtr projections = std::make_shared(); ASTPtr primary_key; + ASTPtr primary_key_from_columns; for (const auto & elem : list->children) { - if (elem->as()) + if (auto *cd = elem->as()) + { + if(cd->primary_key_specifier) + { + if(!primary_key_from_columns) + primary_key_from_columns = makeASTFunction("tuple"); + auto column_identifier = std::make_shared(cd->name); + primary_key_from_columns->children.push_back(column_identifier); + } columns->children.push_back(elem); + } else if (elem->as()) indices->children.push_back(elem); else if (elem->as()) @@ -336,6 +346,8 @@ bool ParserTablePropertiesDeclarationList::parseImpl(Pos & pos, ASTPtr & node, E res->set(res->projections, projections); if (primary_key) res->set(res->primary_key, primary_key); + if (primary_key_from_columns) + res->set(res->primary_key_from_columns, primary_key_from_columns); node = res; @@ -599,6 +611,7 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe /// List of columns. if (s_lparen.ignore(pos, expected)) { + /// Columns and all table properties (indices, constraints, projections, primary_key) if (!table_properties_p.parse(pos, columns_list, expected)) return false; @@ -699,6 +712,17 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe query->storage->primary_key = query->columns_list->primary_key; } + if (query->columns_list && (query->columns_list->primary_key_from_columns)) + { + /// If engine is not set will use default one + if (!query->storage) + query->set(query->storage, std::make_shared()); + else if (query->storage->primary_key) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Multiple primary keys are not allowed."); + + query->storage->primary_key = query->columns_list->primary_key_from_columns; + } + tryGetIdentifierNameInto(as_database, query->as_database); tryGetIdentifierNameInto(as_table, query->as_table); query->set(query->select, select); diff --git a/src/Parsers/ParserCreateQuery.h b/src/Parsers/ParserCreateQuery.h index 5f79a4b68f6..09935e2b608 100644 --- a/src/Parsers/ParserCreateQuery.h +++ b/src/Parsers/ParserCreateQuery.h @@ -135,6 +135,7 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E ParserKeyword s_remove{"REMOVE"}; ParserKeyword s_type{"TYPE"}; ParserKeyword s_collate{"COLLATE"}; + ParserKeyword s_primary_key{"PRIMARY KEY"}; ParserExpression expr_parser; ParserStringLiteral string_literal_parser; ParserLiteral literal_parser; @@ -177,6 +178,7 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E ASTPtr codec_expression; ASTPtr ttl_expression; ASTPtr collation_expression; + bool primary_key_specifier = false; auto null_check_without_moving = [&]() -> bool { @@ -198,6 +200,7 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E && !s_ephemeral.checkWithoutMoving(pos, expected) && !s_alias.checkWithoutMoving(pos, expected) && !s_auto_increment.checkWithoutMoving(pos, expected) + && !s_primary_key.checkWithoutMoving(pos, expected) && (require_type || (!s_comment.checkWithoutMoving(pos, expected) && !s_codec.checkWithoutMoving(pos, expected)))) @@ -266,7 +269,6 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E ParserDataType().parse(tmp_pos, type, tmp_expected); } } - /// This will rule out unusual expressions like *, t.* that cannot appear in DEFAULT if (default_expression && !dynamic_cast(default_expression.get())) return false; @@ -305,6 +307,11 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E return false; } + if (s_primary_key.ignore(pos, expected)) + { + primary_key_specifier = true; + } + node = column_declaration; if (type) @@ -346,6 +353,8 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E column_declaration->children.push_back(std::move(collation_expression)); } + column_declaration->primary_key_specifier = primary_key_specifier; + return true; } diff --git a/tests/queries/0_stateless/02811_primary_key_in_columns.sql b/tests/queries/0_stateless/02811_primary_key_in_columns.sql new file mode 100644 index 00000000000..df25fdd14ab --- /dev/null +++ b/tests/queries/0_stateless/02811_primary_key_in_columns.sql @@ -0,0 +1,83 @@ +DROP TABLE IF EXISTS pk_test1; +DROP TABLE IF EXISTS pk_test2; +DROP TABLE IF EXISTS pk_test3; +DROP TABLE IF EXISTS pk_test4; +DROP TABLE IF EXISTS pk_test5; +DROP TABLE IF EXISTS pk_test6; +DROP TABLE IF EXISTS pk_test7; +DROP TABLE IF EXISTS pk_test8; +DROP TABLE IF EXISTS pk_test9; +DROP TABLE IF EXISTS pk_test10; +DROP TABLE IF EXISTS pk_test11; +DROP TABLE IF EXISTS pk_test12; +DROP TABLE IF EXISTS pk_test12; +DROP TABLE IF EXISTS pk_test13; +DROP TABLE IF EXISTS pk_test14; +DROP TABLE IF EXISTS pk_test15; +DROP TABLE IF EXISTS pk_test16; +DROP TABLE IF EXISTS pk_test17; +DROP TABLE IF EXISTS pk_test18; +DROP TABLE IF EXISTS pk_test19; +DROP TABLE IF EXISTS pk_test20; +DROP TABLE IF EXISTS pk_test21; +DROP TABLE IF EXISTS pk_test22; +DROP TABLE IF EXISTS pk_test23; + +SET default_table_engine=MergeTree; + +CREATE TABLE pk_test1 (String a PRIMARY KEY, String b, String c); +CREATE TABLE pk_test2 (String a PRIMARY KEY, String b PRIMARY KEY, String c); +CREATE TABLE pk_test3 (String a PRIMARY KEY, String b PRIMARY KEY, String c PRIMARY KEY); + +CREATE TABLE pk_test4 (String a, String b PRIMARY KEY, String c PRIMARY KEY); +CREATE TABLE pk_test5 (String a, String b PRIMARY KEY, String c); +CREATE TABLE pk_test6 (String a, String b, String c PRIMARY KEY); + +CREATE TABLE pk_test7 (String a PRIMARY KEY, String b, String c, PRIMARY KEY (a)); +CREATE TABLE pk_test8 (String a PRIMARY KEY, String b PRIMARY KEY, String c, PRIMARY KEY (a)); +CREATE TABLE pk_test9 (String a PRIMARY KEY, String b PRIMARY KEY, String c PRIMARY KEY, PRIMARY KEY (a)); + +CREATE TABLE pk_test10 (String a, String b PRIMARY KEY, String c PRIMARY KEY, PRIMARY KEY (a)); +CREATE TABLE pk_test11 (String a, String b PRIMARY KEY, String c, PRIMARY KEY (a)); +CREATE TABLE pk_test12 (String a, String b, String c PRIMARY KEY, PRIMARY KEY (a)); + +CREATE TABLE pk_test12 (String a PRIMARY KEY, String b, String c) PRIMARY KEY (a,b,c); +CREATE TABLE pk_test13 (String a PRIMARY KEY, String b PRIMARY KEY, String c) PRIMARY KEY (a,b,c); +CREATE TABLE pk_test14 (String a PRIMARY KEY, String b PRIMARY KEY, String c PRIMARY KEY) PRIMARY KEY (a,b,c); + +CREATE TABLE pk_test15 (String a, String b PRIMARY KEY, String c PRIMARY KEY) PRIMARY KEY (a,b,c); +CREATE TABLE pk_test16 (String a, String b PRIMARY KEY, String c) PRIMARY KEY (a,b,c); +CREATE TABLE pk_test17 (String a, String b, String c PRIMARY KEY) PRIMARY KEY (a,b,c); + +CREATE TABLE pk_test18 (String a PRIMARY KEY, String b, String c) ORDER BY (a,b,c); +CREATE TABLE pk_test19 (String a PRIMARY KEY, String b PRIMARY KEY, String c) ORDER BY (a,b,c); +CREATE TABLE pk_test20 (String a PRIMARY KEY, String b PRIMARY KEY, String c PRIMARY KEY) ORDER BY (a,b,c); + +CREATE TABLE pk_test21 (String a, String b PRIMARY KEY, String c PRIMARY KEY) ORDER BY (a,b,c); +CREATE TABLE pk_test22 (String a, String b PRIMARY KEY, String c) ORDER BY (a,b,c); +CREATE TABLE pk_test23 (String a, String b, String c PRIMARY KEY) ORDER BY (a,b,c); + +DROP TABLE IF EXISTS pk_test1; +DROP TABLE IF EXISTS pk_test2; +DROP TABLE IF EXISTS pk_test3; +DROP TABLE IF EXISTS pk_test4; +DROP TABLE IF EXISTS pk_test5; +DROP TABLE IF EXISTS pk_test6; +DROP TABLE IF EXISTS pk_test7; +DROP TABLE IF EXISTS pk_test8; +DROP TABLE IF EXISTS pk_test9; +DROP TABLE IF EXISTS pk_test10; +DROP TABLE IF EXISTS pk_test11; +DROP TABLE IF EXISTS pk_test12; +DROP TABLE IF EXISTS pk_test12; +DROP TABLE IF EXISTS pk_test13; +DROP TABLE IF EXISTS pk_test14; +DROP TABLE IF EXISTS pk_test15; +DROP TABLE IF EXISTS pk_test16; +DROP TABLE IF EXISTS pk_test17; +DROP TABLE IF EXISTS pk_test18; +DROP TABLE IF EXISTS pk_test19; +DROP TABLE IF EXISTS pk_test20; +DROP TABLE IF EXISTS pk_test21; +DROP TABLE IF EXISTS pk_test22; +DROP TABLE IF EXISTS pk_test23; \ No newline at end of file From 8d5ddcbd3094182b44b3641f11acf6ba788faaf7 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 5 Jul 2023 15:40:06 +0200 Subject: [PATCH 143/230] Remove coverity --- .github/workflows/nightly.yml | 45 ----------------------------------- 1 file changed, 45 deletions(-) diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index cf61012f2bc..9de0444bd83 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -75,51 +75,6 @@ jobs: Codebrowser: needs: [DockerHubPush] uses: ./.github/workflows/woboq.yml - BuilderCoverity: - needs: DockerHubPush - runs-on: [self-hosted, builder] - steps: - - name: Set envs - run: | - cat >> "$GITHUB_ENV" << 'EOF' - BUILD_NAME=coverity - CACHES_PATH=${{runner.temp}}/../ccaches - IMAGES_PATH=${{runner.temp}}/images_path - REPO_COPY=${{runner.temp}}/build_check/ClickHouse - TEMP_PATH=${{runner.temp}}/build_check - EOF - echo "COVERITY_TOKEN=${{ secrets.COVERITY_TOKEN }}" >> "$GITHUB_ENV" - - name: Download changed images - uses: actions/download-artifact@v3 - with: - name: changed_images - path: ${{ env.IMAGES_PATH }} - - name: Check out repository code - uses: ClickHouse/checkout@v1 - with: - clear-repository: true - submodules: true - - name: Build - run: | - sudo rm -fr "$TEMP_PATH" - mkdir -p "$TEMP_PATH" - cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - - name: Upload Coverity Analysis - if: ${{ success() || failure() }} - run: | - curl --form token="${COVERITY_TOKEN}" \ - --form email='security+coverity@clickhouse.com' \ - --form file="@$TEMP_PATH/$BUILD_NAME/coverity-scan.tar.gz" \ - --form version="${GITHUB_REF#refs/heads/}-${GITHUB_SHA::6}" \ - --form description="Nighly Scan: $(date +'%Y-%m-%dT%H:%M:%S')" \ - https://scan.coverity.com/builds?project=ClickHouse%2FClickHouse - - name: Cleanup - if: always() - run: | - docker ps --quiet | xargs --no-run-if-empty docker kill ||: - docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: - sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" SonarCloud: runs-on: [self-hosted, builder] env: From 1da413e64eaa092b2ab685253f4cb32a93dcc53e Mon Sep 17 00:00:00 2001 From: Duc Canh Le Date: Wed, 5 Jul 2023 14:56:11 +0000 Subject: [PATCH 144/230] fix segfault when create invalid EmbeddedRocksdb table --- src/Storages/checkAndGetLiteralArgument.cpp | 10 +++++++++- .../02811_invalid_embedded_rocksdb_create.reference | 0 .../02811_invalid_embedded_rocksdb_create.sql | 1 + 3 files changed, 10 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02811_invalid_embedded_rocksdb_create.reference create mode 100644 tests/queries/0_stateless/02811_invalid_embedded_rocksdb_create.sql diff --git a/src/Storages/checkAndGetLiteralArgument.cpp b/src/Storages/checkAndGetLiteralArgument.cpp index 1aa942548a7..78ec1e55b64 100644 --- a/src/Storages/checkAndGetLiteralArgument.cpp +++ b/src/Storages/checkAndGetLiteralArgument.cpp @@ -12,7 +12,15 @@ namespace ErrorCodes template T checkAndGetLiteralArgument(const ASTPtr & arg, const String & arg_name) { - return checkAndGetLiteralArgument(*arg->as(), arg_name); + if (arg->as()) + return checkAndGetLiteralArgument(*arg->as(), arg_name); + + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Argument '{}' must be a literal, get {} (value: {})", + arg_name, + arg->getID(), + arg->formatForErrorMessage()); } template diff --git a/tests/queries/0_stateless/02811_invalid_embedded_rocksdb_create.reference b/tests/queries/0_stateless/02811_invalid_embedded_rocksdb_create.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02811_invalid_embedded_rocksdb_create.sql b/tests/queries/0_stateless/02811_invalid_embedded_rocksdb_create.sql new file mode 100644 index 00000000000..aac2652fbfa --- /dev/null +++ b/tests/queries/0_stateless/02811_invalid_embedded_rocksdb_create.sql @@ -0,0 +1 @@ +CREATE TABLE dict (`k` String, `v` String) ENGINE = EmbeddedRocksDB(k) PRIMARY KEY k; -- {serverError 36} \ No newline at end of file From 8f1ed5c90de4ada3764ea6384220459359eb7950 Mon Sep 17 00:00:00 2001 From: Duc Canh Le Date: Wed, 5 Jul 2023 15:04:38 +0000 Subject: [PATCH 145/230] add more check + line break --- src/Storages/checkAndGetLiteralArgument.cpp | 6 +++--- .../0_stateless/02811_invalid_embedded_rocksdb_create.sql | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Storages/checkAndGetLiteralArgument.cpp b/src/Storages/checkAndGetLiteralArgument.cpp index 78ec1e55b64..5baf47fe91a 100644 --- a/src/Storages/checkAndGetLiteralArgument.cpp +++ b/src/Storages/checkAndGetLiteralArgument.cpp @@ -12,15 +12,15 @@ namespace ErrorCodes template T checkAndGetLiteralArgument(const ASTPtr & arg, const String & arg_name) { - if (arg->as()) + if (arg && arg->as()) return checkAndGetLiteralArgument(*arg->as(), arg_name); throw Exception( ErrorCodes::BAD_ARGUMENTS, "Argument '{}' must be a literal, get {} (value: {})", arg_name, - arg->getID(), - arg->formatForErrorMessage()); + arg ? arg->getID() : "NULL", + arg ? arg->formatForErrorMessage() : "NULL"); } template diff --git a/tests/queries/0_stateless/02811_invalid_embedded_rocksdb_create.sql b/tests/queries/0_stateless/02811_invalid_embedded_rocksdb_create.sql index aac2652fbfa..bfe4ee0622e 100644 --- a/tests/queries/0_stateless/02811_invalid_embedded_rocksdb_create.sql +++ b/tests/queries/0_stateless/02811_invalid_embedded_rocksdb_create.sql @@ -1 +1 @@ -CREATE TABLE dict (`k` String, `v` String) ENGINE = EmbeddedRocksDB(k) PRIMARY KEY k; -- {serverError 36} \ No newline at end of file +CREATE TABLE dict (`k` String, `v` String) ENGINE = EmbeddedRocksDB(k) PRIMARY KEY k; -- {serverError 36} From 32e0348caa6ee34d1f631fceffbc6a93b09953d2 Mon Sep 17 00:00:00 2001 From: Ilya Yatsishin <2159081+qoega@users.noreply.github.com> Date: Wed, 5 Jul 2023 17:12:56 +0200 Subject: [PATCH 146/230] Revert "Publish changes" This reverts commit ed6bfd66fee9aa4c2e06eaf25ca81d02e09f075e. --- docker/packager/binary/build.sh | 4 ---- docker/packager/packager | 1 - 2 files changed, 5 deletions(-) diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh index 08a9b07f3ce..c0803c74147 100755 --- a/docker/packager/binary/build.sh +++ b/docker/packager/binary/build.sh @@ -15,10 +15,6 @@ if [ "$EXTRACT_TOOLCHAIN_DARWIN" = "1" ]; then mkdir -p /build/cmake/toolchain/darwin-x86_64 tar xJf /MacOSX11.0.sdk.tar.xz -C /build/cmake/toolchain/darwin-x86_64 --strip-components=1 ln -sf darwin-x86_64 /build/cmake/toolchain/darwin-aarch64 - - if [ "$EXPORT_SOURCES_WITH_SUBMODULES" = "1" ]; then - tar -c /build --exclude-vcs-ignores --exclude-vcs --exclude '/build/build' --exclude '/build/build_docker' --exclude '/build/debian' --exclude '/build/.cache' --exclude '/build/docs' --exclude '/build/tests/integration' | pigz -9 > /output/source_sub.tar.gz - fi fi # Uncomment to debug ccache. Don't put ccache log in /output right away, or it diff --git a/docker/packager/packager b/docker/packager/packager index 42dc52aa37f..1b3df858cd2 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -168,7 +168,6 @@ def parse_env_variables( "-DCMAKE_TOOLCHAIN_FILE=/build/cmake/darwin/toolchain-x86_64.cmake" ) result.append("EXTRACT_TOOLCHAIN_DARWIN=1") - result.append("EXPORT_SOURCES_WITH_SUBMODULES=1") elif is_cross_darwin_arm: cc = compiler[: -len(DARWIN_ARM_SUFFIX)] cmake_flags.append("-DCMAKE_AR:FILEPATH=/cctools/bin/aarch64-apple-darwin-ar") From bf190381f5b6fa068948330f54ae9ee583c1ea80 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Wed, 5 Jul 2023 17:03:18 +0000 Subject: [PATCH 147/230] addJoinedBlock -> addBlockToJoin --- src/Interpreters/ConcurrentHashJoin.cpp | 4 ++-- src/Interpreters/ConcurrentHashJoin.h | 6 +++--- src/Interpreters/DirectJoin.cpp | 2 +- src/Interpreters/DirectJoin.h | 4 ++-- src/Interpreters/FullSortingMergeJoin.h | 4 ++-- src/Interpreters/GraceHashJoin.cpp | 12 ++++++------ src/Interpreters/GraceHashJoin.h | 8 ++++---- src/Interpreters/HashJoin.cpp | 8 ++++---- src/Interpreters/HashJoin.h | 6 +++--- src/Interpreters/IJoin.h | 6 +++--- src/Interpreters/JoinSwitcher.cpp | 8 ++++---- src/Interpreters/JoinSwitcher.h | 2 +- src/Interpreters/MergeJoin.cpp | 2 +- src/Interpreters/MergeJoin.h | 2 +- src/Processors/Transforms/JoiningTransform.cpp | 2 +- src/Storages/StorageJoin.cpp | 4 ++-- 16 files changed, 40 insertions(+), 40 deletions(-) diff --git a/src/Interpreters/ConcurrentHashJoin.cpp b/src/Interpreters/ConcurrentHashJoin.cpp index fc24f0ae029..1a8e0ad96fa 100644 --- a/src/Interpreters/ConcurrentHashJoin.cpp +++ b/src/Interpreters/ConcurrentHashJoin.cpp @@ -49,7 +49,7 @@ ConcurrentHashJoin::ConcurrentHashJoin(ContextPtr context_, std::shared_ptrgetOnlyClause().key_names_right, right_block); @@ -77,7 +77,7 @@ bool ConcurrentHashJoin::addJoinedBlock(const Block & right_block, bool check_li if (!lock.owns_lock()) continue; - bool limit_exceeded = !hash_join->data->addJoinedBlock(dispatched_block, check_limits); + bool limit_exceeded = !hash_join->data->addBlockToJoin(dispatched_block, check_limits); dispatched_block = {}; blocks_left--; diff --git a/src/Interpreters/ConcurrentHashJoin.h b/src/Interpreters/ConcurrentHashJoin.h index 5e53f9845aa..1283879971d 100644 --- a/src/Interpreters/ConcurrentHashJoin.h +++ b/src/Interpreters/ConcurrentHashJoin.h @@ -16,13 +16,13 @@ namespace DB { /** - * Can run addJoinedBlock() parallelly to speedup the join process. On test, it almose linear speedup by + * Can run addBlockToJoin() parallelly to speedup the join process. On test, it almose linear speedup by * the degree of parallelism. * * The default HashJoin is not thread safe for inserting right table's rows and run it in a single thread. When * the right table is large, the join process is too slow. * - * We create multiple HashJoin instances here. In addJoinedBlock(), one input block is split into multiple blocks + * We create multiple HashJoin instances here. In addBlockToJoin(), one input block is split into multiple blocks * corresponding to the HashJoin instances by hashing every row on the join keys. And make a guarantee that every HashJoin * instance is written by only one thread. * @@ -37,7 +37,7 @@ public: ~ConcurrentHashJoin() override = default; const TableJoin & getTableJoin() const override { return *table_join; } - bool addJoinedBlock(const Block & block, bool check_limits) override; + bool addBlockToJoin(const Block & block, bool check_limits) override; void checkTypesOfKeys(const Block & block) const override; void joinBlock(Block & block, std::shared_ptr & not_processed) override; void setTotals(const Block & block) override; diff --git a/src/Interpreters/DirectJoin.cpp b/src/Interpreters/DirectJoin.cpp index cfefd7c5a91..431f216436d 100644 --- a/src/Interpreters/DirectJoin.cpp +++ b/src/Interpreters/DirectJoin.cpp @@ -103,7 +103,7 @@ DirectKeyValueJoin::DirectKeyValueJoin( right_sample_block_with_storage_column_names = right_sample_block_with_storage_column_names_; } -bool DirectKeyValueJoin::addJoinedBlock(const Block &, bool) +bool DirectKeyValueJoin::addBlockToJoin(const Block &, bool) { throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Unreachable code reached"); } diff --git a/src/Interpreters/DirectJoin.h b/src/Interpreters/DirectJoin.h index 644b66a9d99..e55ac278705 100644 --- a/src/Interpreters/DirectJoin.h +++ b/src/Interpreters/DirectJoin.h @@ -32,10 +32,10 @@ public: virtual const TableJoin & getTableJoin() const override { return *table_join; } - virtual bool addJoinedBlock(const Block &, bool) override; + virtual bool addBlockToJoin(const Block &, bool) override; virtual void checkTypesOfKeys(const Block &) const override; - /// Join the block with data from left hand of JOIN to the right hand data (that was previously built by calls to addJoinedBlock). + /// Join the block with data from left hand of JOIN to the right hand data (that was previously built by calls to addBlockToJoin). /// Could be called from different threads in parallel. virtual void joinBlock(Block & block, std::shared_ptr &) override; diff --git a/src/Interpreters/FullSortingMergeJoin.h b/src/Interpreters/FullSortingMergeJoin.h index 7318d1d24a1..a6b53a51c04 100644 --- a/src/Interpreters/FullSortingMergeJoin.h +++ b/src/Interpreters/FullSortingMergeJoin.h @@ -30,9 +30,9 @@ public: const TableJoin & getTableJoin() const override { return *table_join; } - bool addJoinedBlock(const Block & /* block */, bool /* check_limits */) override + bool addBlockToJoin(const Block & /* block */, bool /* check_limits */) override { - throw Exception(ErrorCodes::LOGICAL_ERROR, "FullSortingMergeJoin::addJoinedBlock should not be called"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "FullSortingMergeJoin::addBlockToJoin should not be called"); } static bool isSupported(const std::shared_ptr & table_join) diff --git a/src/Interpreters/GraceHashJoin.cpp b/src/Interpreters/GraceHashJoin.cpp index 4218a8ea4e1..f455622c4c8 100644 --- a/src/Interpreters/GraceHashJoin.cpp +++ b/src/Interpreters/GraceHashJoin.cpp @@ -310,13 +310,13 @@ bool GraceHashJoin::isSupported(const std::shared_ptr & table_join) GraceHashJoin::~GraceHashJoin() = default; -bool GraceHashJoin::addJoinedBlock(const Block & block, bool /*check_limits*/) +bool GraceHashJoin::addBlockToJoin(const Block & block, bool /*check_limits*/) { if (current_bucket == nullptr) throw Exception(ErrorCodes::LOGICAL_ERROR, "GraceHashJoin is not initialized"); Block materialized = materializeBlock(block); - addJoinedBlockImpl(std::move(materialized)); + addBlockToJoinImpl(std::move(materialized)); return true; } @@ -596,7 +596,7 @@ IBlocksStreamPtr GraceHashJoin::getDelayedBlocks() while (Block block = right_reader.read()) { num_rows += block.rows(); - addJoinedBlockImpl(std::move(block)); + addBlockToJoinImpl(std::move(block)); } LOG_TRACE(log, "Loaded bucket {} with {}(/{}) rows", @@ -621,7 +621,7 @@ Block GraceHashJoin::prepareRightBlock(const Block & block) return HashJoin::prepareRightBlock(block, hash_join_sample_block); } -void GraceHashJoin::addJoinedBlockImpl(Block block) +void GraceHashJoin::addBlockToJoinImpl(Block block) { block = prepareRightBlock(block); Buckets buckets_snapshot = getCurrentBuckets(); @@ -646,7 +646,7 @@ void GraceHashJoin::addJoinedBlockImpl(Block block) if (!hash_join) hash_join = makeInMemoryJoin(); - hash_join->addJoinedBlock(current_block, /* check_limits = */ false); + hash_join->addBlockToJoin(current_block, /* check_limits = */ false); if (!hasMemoryOverflow(hash_join)) return; @@ -677,7 +677,7 @@ void GraceHashJoin::addJoinedBlockImpl(Block block) hash_join = makeInMemoryJoin(); if (current_block.rows() > 0) - hash_join->addJoinedBlock(current_block, /* check_limits = */ false); + hash_join->addBlockToJoin(current_block, /* check_limits = */ false); } } diff --git a/src/Interpreters/GraceHashJoin.h b/src/Interpreters/GraceHashJoin.h index b8d83f4cad0..8224f1f1a4a 100644 --- a/src/Interpreters/GraceHashJoin.h +++ b/src/Interpreters/GraceHashJoin.h @@ -23,11 +23,11 @@ class HashJoin; * * The joining algorithm consists of three stages: * - * 1) During the first stage we accumulate blocks of the right table via @addJoinedBlock. + * 1) During the first stage we accumulate blocks of the right table via @addBlockToJoin. * Each input block is split into multiple buckets based on the hash of the row join keys. * The first bucket is added to the in-memory HashJoin, and the remaining buckets are written to disk for further processing. * When the size of HashJoin exceeds the limits, we double the number of buckets. - * There can be multiple threads calling addJoinedBlock, just like @ConcurrentHashJoin. + * There can be multiple threads calling addBlockToJoin, just like @ConcurrentHashJoin. * * 2) At the second stage we process left table blocks via @joinBlock. * Again, each input block is split into multiple buckets by hash. @@ -65,7 +65,7 @@ public: void initialize(const Block & sample_block) override; - bool addJoinedBlock(const Block & block, bool check_limits) override; + bool addBlockToJoin(const Block & block, bool check_limits) override; void checkTypesOfKeys(const Block & block) const override; void joinBlock(Block & block, std::shared_ptr & not_processed) override; @@ -94,7 +94,7 @@ private: InMemoryJoinPtr makeInMemoryJoin(); /// Add right table block to the @join. Calls @rehash on overflow. - void addJoinedBlockImpl(Block block); + void addBlockToJoinImpl(Block block); /// Check that join satisfies limits on rows/bytes in table_join. bool hasMemoryOverflow(size_t total_rows, size_t total_bytes) const; diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index 6fe2b8464f5..548039f257a 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -79,8 +79,8 @@ namespace JoinStuff { assert(flags[nullptr].size() <= size); need_flags = true; - // For one disjunct clause case, we don't need to reinit each time we call addJoinedBlock. - // and there is no value inserted in this JoinUsedFlags before addJoinedBlock finish. + // For one disjunct clause case, we don't need to reinit each time we call addBlockToJoin. + // and there is no value inserted in this JoinUsedFlags before addBlockToJoin finish. // So we reinit only when the hash table is rehashed to a larger size. if (flags.empty() || flags[nullptr].size() < size) [[unlikely]] { @@ -729,7 +729,7 @@ Block HashJoin::prepareRightBlock(const Block & block) const return prepareRightBlock(block, savedBlockSample()); } -bool HashJoin::addJoinedBlock(const Block & source_block_, bool check_limits) +bool HashJoin::addBlockToJoin(const Block & source_block_, bool check_limits) { if (!data) throw Exception(ErrorCodes::LOGICAL_ERROR, "Join data was released"); @@ -781,7 +781,7 @@ bool HashJoin::addJoinedBlock(const Block & source_block_, bool check_limits) size_t total_bytes = 0; { if (storage_join_lock) - throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "addJoinedBlock called when HashJoin locked to prevent updates"); + throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "addBlockToJoin called when HashJoin locked to prevent updates"); data->blocks_allocated_size += block_to_save.allocatedBytes(); data->blocks.emplace_back(std::move(block_to_save)); diff --git a/src/Interpreters/HashJoin.h b/src/Interpreters/HashJoin.h index 50eda4482bd..f30bbc3a46c 100644 --- a/src/Interpreters/HashJoin.h +++ b/src/Interpreters/HashJoin.h @@ -155,11 +155,11 @@ public: /** Add block of data from right hand of JOIN to the map. * Returns false, if some limit was exceeded and you should not insert more data. */ - bool addJoinedBlock(const Block & source_block_, bool check_limits) override; + bool addBlockToJoin(const Block & source_block_, bool check_limits) override; void checkTypesOfKeys(const Block & block) const override; - /** Join data from the map (that was previously built by calls to addJoinedBlock) to the block with data from "left" table. + /** Join data from the map (that was previously built by calls to addBlockToJoin) to the block with data from "left" table. * Could be called from different threads in parallel. */ void joinBlock(Block & block, ExtraBlockPtr & not_processed) override; @@ -406,7 +406,7 @@ private: Poco::Logger * log; /// Should be set via setLock to protect hash table from modification from StorageJoin - /// If set HashJoin instance is not available for modification (addJoinedBlock) + /// If set HashJoin instance is not available for modification (addBlockToJoin) TableLockHolder storage_join_lock = nullptr; void dataMapInit(MapsVariant &); diff --git a/src/Interpreters/IJoin.h b/src/Interpreters/IJoin.h index 83067b0eab7..97b119bd795 100644 --- a/src/Interpreters/IJoin.h +++ b/src/Interpreters/IJoin.h @@ -52,7 +52,7 @@ public: /// Add block of data from right hand of JOIN. /// @returns false, if some limit was exceeded and you should not insert more data. - virtual bool addJoinedBlock(const Block & block, bool check_limits = true) = 0; /// NOLINT + virtual bool addBlockToJoin(const Block & block, bool check_limits = true) = 0; /// NOLINT /* Some initialization may be required before joinBlock() call. * It's better to done in in constructor, but left block exact structure is not known at that moment. @@ -62,7 +62,7 @@ public: virtual void checkTypesOfKeys(const Block & block) const = 0; - /// Join the block with data from left hand of JOIN to the right hand data (that was previously built by calls to addJoinedBlock). + /// Join the block with data from left hand of JOIN to the right hand data (that was previously built by calls to addBlockToJoin). /// Could be called from different threads in parallel. virtual void joinBlock(Block & block, std::shared_ptr & not_processed) = 0; @@ -79,7 +79,7 @@ public: /// Returns true if no data to join with. virtual bool alwaysReturnsEmptySet() const = 0; - /// StorageJoin/Dictionary is already filled. No need to call addJoinedBlock. + /// StorageJoin/Dictionary is already filled. No need to call addBlockToJoin. /// Different query plan is used for such joins. virtual bool isFilled() const { return pipelineType() == JoinPipelineType::FilledRight; } virtual JoinPipelineType pipelineType() const { return JoinPipelineType::FillRightFirst; } diff --git a/src/Interpreters/JoinSwitcher.cpp b/src/Interpreters/JoinSwitcher.cpp index 15702784d74..5ea347549c1 100644 --- a/src/Interpreters/JoinSwitcher.cpp +++ b/src/Interpreters/JoinSwitcher.cpp @@ -19,16 +19,16 @@ JoinSwitcher::JoinSwitcher(std::shared_ptr table_join_, const Block & limits.max_bytes = table_join->defaultMaxBytes(); } -bool JoinSwitcher::addJoinedBlock(const Block & block, bool) +bool JoinSwitcher::addBlockToJoin(const Block & block, bool) { std::lock_guard lock(switch_mutex); if (switched) - return join->addJoinedBlock(block); + return join->addBlockToJoin(block); /// HashJoin with external limits check - join->addJoinedBlock(block, false); + join->addBlockToJoin(block, false); size_t rows = join->getTotalRowCount(); size_t bytes = join->getTotalByteCount(); @@ -48,7 +48,7 @@ bool JoinSwitcher::switchJoin() bool success = true; for (const Block & saved_block : right_blocks) - success = success && join->addJoinedBlock(saved_block); + success = success && join->addBlockToJoin(saved_block); switched = true; return success; diff --git a/src/Interpreters/JoinSwitcher.h b/src/Interpreters/JoinSwitcher.h index eec4787037d..fb5066b2d04 100644 --- a/src/Interpreters/JoinSwitcher.h +++ b/src/Interpreters/JoinSwitcher.h @@ -23,7 +23,7 @@ public: /// Add block of data from right hand of JOIN into current join object. /// If join-in-memory memory limit exceeded switches to join-on-disk and continue with it. /// @returns false, if join-on-disk disk limit exceeded - bool addJoinedBlock(const Block & block, bool check_limits) override; + bool addBlockToJoin(const Block & block, bool check_limits) override; void checkTypesOfKeys(const Block & block) const override { diff --git a/src/Interpreters/MergeJoin.cpp b/src/Interpreters/MergeJoin.cpp index d31510c2fb5..ceef1371f16 100644 --- a/src/Interpreters/MergeJoin.cpp +++ b/src/Interpreters/MergeJoin.cpp @@ -669,7 +669,7 @@ Block MergeJoin::modifyRightBlock(const Block & src_block) const return block; } -bool MergeJoin::addJoinedBlock(const Block & src_block, bool) +bool MergeJoin::addBlockToJoin(const Block & src_block, bool) { Block block = modifyRightBlock(src_block); diff --git a/src/Interpreters/MergeJoin.h b/src/Interpreters/MergeJoin.h index 8b5d884a0e6..03a661c5b8a 100644 --- a/src/Interpreters/MergeJoin.h +++ b/src/Interpreters/MergeJoin.h @@ -23,7 +23,7 @@ public: MergeJoin(std::shared_ptr table_join_, const Block & right_sample_block); const TableJoin & getTableJoin() const override { return *table_join; } - bool addJoinedBlock(const Block & block, bool check_limits) override; + bool addBlockToJoin(const Block & block, bool check_limits) override; void checkTypesOfKeys(const Block & block) const override; void joinBlock(Block &, ExtraBlockPtr & not_processed) override; diff --git a/src/Processors/Transforms/JoiningTransform.cpp b/src/Processors/Transforms/JoiningTransform.cpp index bba8ec6fa16..49b90d04b81 100644 --- a/src/Processors/Transforms/JoiningTransform.cpp +++ b/src/Processors/Transforms/JoiningTransform.cpp @@ -305,7 +305,7 @@ void FillingRightJoinSideTransform::work() if (for_totals) join->setTotals(block); else - stop_reading = !join->addJoinedBlock(block); + stop_reading = !join->addBlockToJoin(block); set_totals = for_totals; } diff --git a/src/Storages/StorageJoin.cpp b/src/Storages/StorageJoin.cpp index a238e9ef26c..640706aae17 100644 --- a/src/Storages/StorageJoin.cpp +++ b/src/Storages/StorageJoin.cpp @@ -146,7 +146,7 @@ void StorageJoin::mutate(const MutationCommands & commands, ContextPtr context) Block block; while (executor.pull(block)) { - new_data->addJoinedBlock(block, true); + new_data->addBlockToJoin(block, true); if (persistent) backup_stream.write(block); } @@ -257,7 +257,7 @@ void StorageJoin::insertBlock(const Block & block, ContextPtr context) if (!holder) throw Exception(ErrorCodes::DEADLOCK_AVOIDED, "StorageJoin: cannot insert data because current query tries to read from this storage"); - join->addJoinedBlock(block_to_insert, true); + join->addBlockToJoin(block_to_insert, true); } size_t StorageJoin::getSize(ContextPtr context) const From f7640ff5733822a9c6f4e119f6ff2ed7027a885d Mon Sep 17 00:00:00 2001 From: Feng Kaiyu Date: Thu, 6 Jul 2023 01:27:20 +0800 Subject: [PATCH 148/230] fix: correct exception message on policies comparison --- src/Disks/StoragePolicy.cpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/Disks/StoragePolicy.cpp b/src/Disks/StoragePolicy.cpp index f4be8b8fe86..71922e297df 100644 --- a/src/Disks/StoragePolicy.cpp +++ b/src/Disks/StoragePolicy.cpp @@ -302,7 +302,11 @@ void StoragePolicy::checkCompatibleWith(const StoragePolicyPtr & new_storage_pol for (const auto & volume : getVolumes()) { if (!new_volume_names.contains(volume->getName())) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "New storage policy {} shall contain volumes of old one", backQuote(name)); + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "New storage policy {} shall contain volumes of old one ({})", + backQuote(new_storage_policy->getName()), + backQuote(name)); std::unordered_set new_disk_names; for (const auto & disk : new_storage_policy->getVolumeByName(volume->getName())->getDisks()) @@ -310,7 +314,11 @@ void StoragePolicy::checkCompatibleWith(const StoragePolicyPtr & new_storage_pol for (const auto & disk : volume->getDisks()) if (!new_disk_names.contains(disk->getName())) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "New storage policy {} shall contain disks of old one", backQuote(name)); + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "New storage policy {} shall contain disks of old one ({})", + backQuote(new_storage_policy->getName()), + backQuote(name)); } } From b60a1c53d638b5c10727d3e5c0e6d5b5b8d5725a Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 5 Jul 2023 17:43:09 +0000 Subject: [PATCH 149/230] Fix oldest part fetching --- tests/integration/test_multiple_disks/test.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_multiple_disks/test.py b/tests/integration/test_multiple_disks/test.py index c0fbe39196d..fa79a9baa90 100644 --- a/tests/integration/test_multiple_disks/test.py +++ b/tests/integration/test_multiple_disks/test.py @@ -717,9 +717,10 @@ def test_background_move(start_cluster, name, engine): node1.query(f"SYSTEM STOP MERGES {name}") + first_part = None for i in range(5): data = [] # 5MB in total - for i in range(5): + for _ in range(5): data.append(get_random_string(1024 * 1024)) # 1MB row # small jbod size is 40MB, so lets insert 5MB batch 5 times node1.query_with_retry( @@ -728,7 +729,11 @@ def test_background_move(start_cluster, name, engine): ) ) - first_part = get_oldest_part(node1, name) + # we are doing moves in parallel so we need to fetch the name of first part before we add new parts + if i == 0: + first_part = get_oldest_part(node1, name) + + assert first_part is not None used_disks = get_used_disks_for_table(node1, name) From ce8b0cae822f7e049eba7e8967122890510a82c5 Mon Sep 17 00:00:00 2001 From: lcjh <120989324@qq.com> Date: Thu, 6 Jul 2023 02:14:48 +0800 Subject: [PATCH 150/230] remove duplicate condition --- src/Functions/FunctionUnixTimestamp64.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Functions/FunctionUnixTimestamp64.h b/src/Functions/FunctionUnixTimestamp64.h index 58a23f7266e..a2065465501 100644 --- a/src/Functions/FunctionUnixTimestamp64.h +++ b/src/Functions/FunctionUnixTimestamp64.h @@ -155,7 +155,6 @@ public: if (!((executeType(result_column, arguments, input_rows_count)) || (executeType(result_column, arguments, input_rows_count)) || (executeType(result_column, arguments, input_rows_count)) - || (executeType(result_column, arguments, input_rows_count)) || (executeType(result_column, arguments, input_rows_count)) || (executeType(result_column, arguments, input_rows_count)) || (executeType(result_column, arguments, input_rows_count)) From 44791af7102079b8a3db6a5a2fbe5fbaa8eae3bf Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 5 Jul 2023 22:54:22 +0200 Subject: [PATCH 151/230] stop merges properly for replicated tables --- tests/integration/test_multiple_disks/test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_multiple_disks/test.py b/tests/integration/test_multiple_disks/test.py index fa79a9baa90..4a934447345 100644 --- a/tests/integration/test_multiple_disks/test.py +++ b/tests/integration/test_multiple_disks/test.py @@ -711,7 +711,7 @@ def test_background_move(start_cluster, name, engine): s1 String ) ENGINE = {engine} ORDER BY tuple() - SETTINGS storage_policy='moving_jbod_with_external' + SETTINGS storage_policy='moving_jbod_with_external, max_replicated_merges_in_queue=0' """ ) @@ -784,7 +784,7 @@ def test_start_stop_moves(start_cluster, name, engine): s1 String ) ENGINE = {engine} ORDER BY tuple() - SETTINGS storage_policy='moving_jbod_with_external' + SETTINGS storage_policy='moving_jbod_with_external', max_replicated_merges_in_queue=0 """ ) From 5a3aadacde7e82d47ff550601191186c2eab9abb Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 5 Jul 2023 23:40:37 +0200 Subject: [PATCH 152/230] Fix error --- tests/ci/ci_config.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 1777180a76e..ea7d112c73e 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -179,10 +179,9 @@ CI_CONFIG = { "sanitizer": "", "package_type": "binary", "static_binary_name": "riscv64", - "bundled": "bundled", - "libraries": "static", "tidy": "disable", "with_coverage": False, + "comment": "", }, }, "builds_report_config": { From 98da25f1d3f4014fd13d1f53aa3ccee6da21d9f2 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 6 Jul 2023 01:17:41 +0200 Subject: [PATCH 153/230] Fix build --- cmake/target.cmake | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cmake/target.cmake b/cmake/target.cmake index ea4c206fc4f..0791da87bf0 100644 --- a/cmake/target.cmake +++ b/cmake/target.cmake @@ -44,6 +44,8 @@ if (CMAKE_CROSSCOMPILING) set (ENABLE_GRPC OFF CACHE INTERNAL "") set (ENABLE_HDFS OFF CACHE INTERNAL "") set (ENABLE_MYSQL OFF CACHE INTERNAL "") + # It might be ok, but we need to update 'sysroot' + set (ENABLE_RUST OFF CACHE INTERNAL "") elseif (ARCH_S390X) set (ENABLE_GRPC OFF CACHE INTERNAL "") set (ENABLE_SENTRY OFF CACHE INTERNAL "") From 698c49cd51f406d0a9e619b4c7d971857f1fb59b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 6 Jul 2023 02:19:03 +0300 Subject: [PATCH 154/230] Update 02811_invalid_embedded_rocksdb_create.sql --- .../0_stateless/02811_invalid_embedded_rocksdb_create.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/02811_invalid_embedded_rocksdb_create.sql b/tests/queries/0_stateless/02811_invalid_embedded_rocksdb_create.sql index bfe4ee0622e..a87ac5e0de0 100644 --- a/tests/queries/0_stateless/02811_invalid_embedded_rocksdb_create.sql +++ b/tests/queries/0_stateless/02811_invalid_embedded_rocksdb_create.sql @@ -1 +1,2 @@ +-- Tags: no-fasttest CREATE TABLE dict (`k` String, `v` String) ENGINE = EmbeddedRocksDB(k) PRIMARY KEY k; -- {serverError 36} From 75d051dd5554022ee7d9c215543c5ffad5c3df63 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 6 Jul 2023 01:49:53 +0200 Subject: [PATCH 155/230] Remove useless packages --- docker/test/sqllogic/run.sh | 4 ++-- docker/test/stateless/Dockerfile | 1 - docker/test/stress/Dockerfile | 3 --- docker/test/upgrade/Dockerfile | 3 --- docker/test/util/Dockerfile | 1 - docs/zh/development/build.md | 7 ------- .../0_stateless/02439_merge_selecting_partitions.sql | 1 - 7 files changed, 2 insertions(+), 18 deletions(-) diff --git a/docker/test/sqllogic/run.sh b/docker/test/sqllogic/run.sh index 8d0252e3c98..444252837a3 100755 --- a/docker/test/sqllogic/run.sh +++ b/docker/test/sqllogic/run.sh @@ -92,8 +92,8 @@ sudo clickhouse stop ||: for _ in $(seq 1 60); do if [[ $(wget --timeout=1 -q 'localhost:8123' -O-) == 'Ok.' ]]; then sleep 1 ; else break; fi ; done -grep -Fa "Fatal" /var/log/clickhouse-server/clickhouse-server.log ||: -pigz < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log.gz & +rg -Fa "Fatal" /var/log/clickhouse-server/clickhouse-server.log ||: +zstd < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log.zst & # Compressed (FIXME: remove once only github actions will be left) rm /var/log/clickhouse-server/clickhouse-server.log diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index 32996140521..e1e84c427ba 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -33,7 +33,6 @@ RUN apt-get update -y \ qemu-user-static \ sqlite3 \ sudo \ - telnet \ tree \ unixodbc \ wget \ diff --git a/docker/test/stress/Dockerfile b/docker/test/stress/Dockerfile index e9712f430fd..eddeb04758b 100644 --- a/docker/test/stress/Dockerfile +++ b/docker/test/stress/Dockerfile @@ -8,8 +8,6 @@ RUN apt-get update -y \ apt-get install --yes --no-install-recommends \ bash \ tzdata \ - fakeroot \ - debhelper \ parallel \ expect \ python3 \ @@ -20,7 +18,6 @@ RUN apt-get update -y \ sudo \ openssl \ netcat-openbsd \ - telnet \ brotli \ && apt-get clean diff --git a/docker/test/upgrade/Dockerfile b/docker/test/upgrade/Dockerfile index 8e5890b81a0..9152230af1c 100644 --- a/docker/test/upgrade/Dockerfile +++ b/docker/test/upgrade/Dockerfile @@ -8,8 +8,6 @@ RUN apt-get update -y \ apt-get install --yes --no-install-recommends \ bash \ tzdata \ - fakeroot \ - debhelper \ parallel \ expect \ python3 \ @@ -20,7 +18,6 @@ RUN apt-get update -y \ sudo \ openssl \ netcat-openbsd \ - telnet \ brotli \ && apt-get clean diff --git a/docker/test/util/Dockerfile b/docker/test/util/Dockerfile index 85e888f1df7..6a4c6aa3057 100644 --- a/docker/test/util/Dockerfile +++ b/docker/test/util/Dockerfile @@ -44,7 +44,6 @@ RUN apt-get update \ clang-${LLVM_VERSION} \ clang-tidy-${LLVM_VERSION} \ cmake \ - fakeroot \ gdb \ git \ gperf \ diff --git a/docs/zh/development/build.md b/docs/zh/development/build.md index d76f4b1577c..bb25755a615 100644 --- a/docs/zh/development/build.md +++ b/docs/zh/development/build.md @@ -3,13 +3,6 @@ slug: /zh/development/build --- # 如何构建 ClickHouse 发布包 {#ru-he-gou-jian-clickhouse-fa-bu-bao} -## 安装 Git 和 Pbuilder {#an-zhuang-git-he-pbuilder} - -``` bash -sudo apt-get update -sudo apt-get install git pbuilder debhelper lsb-release fakeroot sudo debian-archive-keyring debian-keyring -``` - ## 拉取 ClickHouse 源码 {#la-qu-clickhouse-yuan-ma} ``` bash diff --git a/tests/queries/0_stateless/02439_merge_selecting_partitions.sql b/tests/queries/0_stateless/02439_merge_selecting_partitions.sql index 88ce2834d6b..bcfcaa2acd3 100644 --- a/tests/queries/0_stateless/02439_merge_selecting_partitions.sql +++ b/tests/queries/0_stateless/02439_merge_selecting_partitions.sql @@ -1,4 +1,3 @@ - drop table if exists rmt; create table rmt (n int, m int) engine=ReplicatedMergeTree('/test/02439/{shard}/{database}', '{replica}') partition by n order by n; From db14b2c54fbd42d1c8123a15d87382fe00938a6a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 6 Jul 2023 02:16:38 +0200 Subject: [PATCH 156/230] Remove useless logs --- src/Interpreters/executeQuery.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index c52dab722c9..694226af6b0 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -322,8 +322,8 @@ static std::tuple executeQueryImpl( /// This does not have impact on the final span logs, because these internal queries are issued by external queries, /// we still have enough span logs for the execution of external queries. std::shared_ptr query_span = internal ? nullptr : std::make_shared("query"); - if (query_span) - LOG_DEBUG(&Poco::Logger::get("executeQuery"), "Query span trace_id for opentelemetry log: {}", query_span->trace_id); + if (query_span && query_span->trace_id != UUID{}) + LOG_TRACE(&Poco::Logger::get("executeQuery"), "Query span trace_id for opentelemetry log: {}", query_span->trace_id); auto query_start_time = std::chrono::system_clock::now(); From 5416b7b6df8104440d9d74cbdc68fd0505012654 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 6 Jul 2023 03:04:58 +0200 Subject: [PATCH 157/230] Fix incorrect log level = warning --- programs/server/Server.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index d2d8a0d07fb..686c3b90dd6 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1146,7 +1146,16 @@ try size_t merges_mutations_memory_usage_soft_limit = server_settings_.merges_mutations_memory_usage_soft_limit; size_t default_merges_mutations_server_memory_usage = static_cast(memory_amount * server_settings_.merges_mutations_memory_usage_to_ram_ratio); - if (merges_mutations_memory_usage_soft_limit == 0 || merges_mutations_memory_usage_soft_limit > default_merges_mutations_server_memory_usage) + if (merges_mutations_memory_usage_soft_limit == 0) + { + merges_mutations_memory_usage_soft_limit = default_merges_mutations_server_memory_usage; + LOG_INFO(log, "Setting merges_mutations_memory_usage_soft_limit was set to {}" + " ({} available * {:.2f} merges_mutations_memory_usage_to_ram_ratio)", + formatReadableSizeWithBinarySuffix(merges_mutations_memory_usage_soft_limit), + formatReadableSizeWithBinarySuffix(memory_amount), + server_settings_.merges_mutations_memory_usage_to_ram_ratio); + } + else if (merges_mutations_memory_usage_soft_limit > default_merges_mutations_server_memory_usage) { merges_mutations_memory_usage_soft_limit = default_merges_mutations_server_memory_usage; LOG_WARNING(log, "Setting merges_mutations_memory_usage_soft_limit was set to {}" From 64d5a85f6e731d9e8baba170aa7441555c030545 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 6 Jul 2023 03:16:06 +0200 Subject: [PATCH 158/230] Fix test_replicated_table_attach --- tests/integration/test_replicated_table_attach/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_replicated_table_attach/test.py b/tests/integration/test_replicated_table_attach/test.py index 2d209ddaf79..dee2be3fcf7 100644 --- a/tests/integration/test_replicated_table_attach/test.py +++ b/tests/integration/test_replicated_table_attach/test.py @@ -54,7 +54,7 @@ def test_startup_with_small_bg_pool_partitioned(started_cluster): assert_values() with PartitionManager() as pm: pm.drop_instance_zk_connections(node) - node.restart_clickhouse(stop_start_wait_sec=20) + node.restart_clickhouse(stop_start_wait_sec=300) assert_values() # check that we activate it in the end From e2c9f86f39e83b128d0fc82628bdae2ab0b8080b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 6 Jul 2023 03:31:10 +0200 Subject: [PATCH 159/230] Better usability of a test --- tests/queries/0_stateless/02125_many_mutations.sh | 2 ++ tests/queries/0_stateless/02125_many_mutations_2.sh | 2 ++ 2 files changed, 4 insertions(+) diff --git a/tests/queries/0_stateless/02125_many_mutations.sh b/tests/queries/0_stateless/02125_many_mutations.sh index b42d5bb15d3..54948fa1048 100755 --- a/tests/queries/0_stateless/02125_many_mutations.sh +++ b/tests/queries/0_stateless/02125_many_mutations.sh @@ -7,6 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # "max_parts_to_merge_at_once = 1" prevents merges to start in background before our own OPTIMIZE FINAL +$CLICKHOUSE_CLIENT -q "drop table if exists many_mutations" $CLICKHOUSE_CLIENT -q "create table many_mutations (x UInt32, y UInt32) engine = MergeTree order by x settings number_of_mutations_to_delay = 0, number_of_mutations_to_throw = 0, max_parts_to_merge_at_once = 1" $CLICKHOUSE_CLIENT -q "insert into many_mutations values (0, 0), (1, 1)" $CLICKHOUSE_CLIENT -q "system stop merges many_mutations" @@ -49,3 +50,4 @@ $CLICKHOUSE_CLIENT -q "system start merges many_mutations" $CLICKHOUSE_CLIENT -q "optimize table many_mutations final" --optimize_throw_if_noop 1 $CLICKHOUSE_CLIENT -q "select count() from system.mutations where database = currentDatabase() and table = 'many_mutations' and not is_done" $CLICKHOUSE_CLIENT -q "select x, y from many_mutations order by x" +$CLICKHOUSE_CLIENT -q "drop table many_mutations" diff --git a/tests/queries/0_stateless/02125_many_mutations_2.sh b/tests/queries/0_stateless/02125_many_mutations_2.sh index e5e3070a944..0351538b210 100755 --- a/tests/queries/0_stateless/02125_many_mutations_2.sh +++ b/tests/queries/0_stateless/02125_many_mutations_2.sh @@ -7,6 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # "max_parts_to_merge_at_once = 1" prevents merges to start in background before our own OPTIMIZE FINAL +$CLICKHOUSE_CLIENT -q "drop table if exists many_mutations" $CLICKHOUSE_CLIENT -q "create table many_mutations (x UInt32, y UInt32) engine = MergeTree order by x settings number_of_mutations_to_delay = 0, number_of_mutations_to_throw = 0, max_parts_to_merge_at_once = 1" $CLICKHOUSE_CLIENT -q "insert into many_mutations select number, number + 1 from numbers(2000)" $CLICKHOUSE_CLIENT -q "system stop merges many_mutations" @@ -51,3 +52,4 @@ $CLICKHOUSE_CLIENT -q "system flush logs" $CLICKHOUSE_CLIENT -q "select count() from system.mutations where database = currentDatabase() and table = 'many_mutations' and not is_done" $CLICKHOUSE_CLIENT -q "select count() from many_mutations" $CLICKHOUSE_CLIENT -q "select * from system.part_log where database = currentDatabase() and table == 'many_mutations' and peak_memory_usage > 1e9" +$CLICKHOUSE_CLIENT -q "drop table many_mutations" From 38c163b0662249b4da83e8b812662bf5b6d1a27a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 6 Jul 2023 03:43:59 +0200 Subject: [PATCH 160/230] Improve test --- .../0_stateless/02125_many_mutations.sh | 32 +++++++++---------- .../0_stateless/02125_many_mutations_2.sh | 32 +++++++++++-------- 2 files changed, 34 insertions(+), 30 deletions(-) diff --git a/tests/queries/0_stateless/02125_many_mutations.sh b/tests/queries/0_stateless/02125_many_mutations.sh index 54948fa1048..5a139e8b01d 100755 --- a/tests/queries/0_stateless/02125_many_mutations.sh +++ b/tests/queries/0_stateless/02125_many_mutations.sh @@ -7,19 +7,17 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # "max_parts_to_merge_at_once = 1" prevents merges to start in background before our own OPTIMIZE FINAL -$CLICKHOUSE_CLIENT -q "drop table if exists many_mutations" -$CLICKHOUSE_CLIENT -q "create table many_mutations (x UInt32, y UInt32) engine = MergeTree order by x settings number_of_mutations_to_delay = 0, number_of_mutations_to_throw = 0, max_parts_to_merge_at_once = 1" -$CLICKHOUSE_CLIENT -q "insert into many_mutations values (0, 0), (1, 1)" -$CLICKHOUSE_CLIENT -q "system stop merges many_mutations" - -$CLICKHOUSE_CLIENT -q "select x, y from many_mutations order by x" +$CLICKHOUSE_CLIENT --multiquery -q " +drop table if exists many_mutations; +create table many_mutations (x UInt32, y UInt32) engine = MergeTree order by x settings number_of_mutations_to_delay = 0, number_of_mutations_to_throw = 0, max_parts_to_merge_at_once = 1; +insert into many_mutations values (0, 0), (1, 1); +system stop merges many_mutations; +select x, y from many_mutations order by x; +" job() { - for _ in {1..1000} - do - $CLICKHOUSE_CLIENT -q "alter table many_mutations update y = y + 1 where 1" - done + yes "alter table many_mutations update y = y + 1 where 1;" | head -n 1000 | $CLICKHOUSE_CLIENT --multiquery } job & @@ -45,9 +43,11 @@ job & wait -$CLICKHOUSE_CLIENT -q "select count() from system.mutations where database = currentDatabase() and table = 'many_mutations' and not is_done" -$CLICKHOUSE_CLIENT -q "system start merges many_mutations" -$CLICKHOUSE_CLIENT -q "optimize table many_mutations final" --optimize_throw_if_noop 1 -$CLICKHOUSE_CLIENT -q "select count() from system.mutations where database = currentDatabase() and table = 'many_mutations' and not is_done" -$CLICKHOUSE_CLIENT -q "select x, y from many_mutations order by x" -$CLICKHOUSE_CLIENT -q "drop table many_mutations" +$CLICKHOUSE_CLIENT --multiquery -q " +select count() from system.mutations where database = currentDatabase() and table = 'many_mutations' and not is_done; +system start merges many_mutations; +optimize table many_mutations final SETTINGS optimize_throw_if_noop = 1; +select count() from system.mutations where database = currentDatabase() and table = 'many_mutations' and not is_done; +select x, y from many_mutations order by x; +drop table many_mutations; +" diff --git a/tests/queries/0_stateless/02125_many_mutations_2.sh b/tests/queries/0_stateless/02125_many_mutations_2.sh index 0351538b210..5b779c1b276 100755 --- a/tests/queries/0_stateless/02125_many_mutations_2.sh +++ b/tests/queries/0_stateless/02125_many_mutations_2.sh @@ -7,10 +7,12 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # "max_parts_to_merge_at_once = 1" prevents merges to start in background before our own OPTIMIZE FINAL -$CLICKHOUSE_CLIENT -q "drop table if exists many_mutations" -$CLICKHOUSE_CLIENT -q "create table many_mutations (x UInt32, y UInt32) engine = MergeTree order by x settings number_of_mutations_to_delay = 0, number_of_mutations_to_throw = 0, max_parts_to_merge_at_once = 1" -$CLICKHOUSE_CLIENT -q "insert into many_mutations select number, number + 1 from numbers(2000)" -$CLICKHOUSE_CLIENT -q "system stop merges many_mutations" +$CLICKHOUSE_CLIENT --multiquery -q " +drop table if exists many_mutations; +create table many_mutations (x UInt32, y UInt32) engine = MergeTree order by x settings number_of_mutations_to_delay = 0, number_of_mutations_to_throw = 0, max_parts_to_merge_at_once = 1; +insert into many_mutations select number, number + 1 from numbers(2000); +system stop merges many_mutations; +" $CLICKHOUSE_CLIENT -q "select count() from many_mutations" @@ -18,8 +20,8 @@ job() { for i in {1..1000} do - $CLICKHOUSE_CLIENT -q "alter table many_mutations delete where y = ${i} * 2 settings mutations_sync=0" - done + echo "alter table many_mutations delete where y = ${i} * 2 settings mutations_sync = 0;" + done | $CLICKHOUSE_CLIENT --multiquery } job & @@ -45,11 +47,13 @@ job & wait -$CLICKHOUSE_CLIENT -q "select count() from system.mutations where database = currentDatabase() and table = 'many_mutations' and not is_done" -$CLICKHOUSE_CLIENT -q "system start merges many_mutations" -$CLICKHOUSE_CLIENT -q "optimize table many_mutations final" --optimize_throw_if_noop 1 -$CLICKHOUSE_CLIENT -q "system flush logs" -$CLICKHOUSE_CLIENT -q "select count() from system.mutations where database = currentDatabase() and table = 'many_mutations' and not is_done" -$CLICKHOUSE_CLIENT -q "select count() from many_mutations" -$CLICKHOUSE_CLIENT -q "select * from system.part_log where database = currentDatabase() and table == 'many_mutations' and peak_memory_usage > 1e9" -$CLICKHOUSE_CLIENT -q "drop table many_mutations" +$CLICKHOUSE_CLIENT --multiquery -q " +select count() from system.mutations where database = currentDatabase() and table = 'many_mutations' and not is_done; +system start merges many_mutations; +optimize table many_mutations final SETTINGS optimize_throw_if_noop = 1; +system flush logs; +select count() from system.mutations where database = currentDatabase() and table = 'many_mutations' and not is_done; +select count() from many_mutations; +select * from system.part_log where database = currentDatabase() and table == 'many_mutations' and peak_memory_usage > 1e9; +drop table many_mutations; +" From d59f68b6009467e891b96e0725ec308aad236c63 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 6 Jul 2023 04:55:46 +0200 Subject: [PATCH 161/230] Remove useless code --- src/Access/Common/AccessType.h | 1 - src/Common/SymbolIndex.cpp | 1 - src/Interpreters/InterpreterSystemQuery.cpp | 15 --------------- src/Parsers/ASTSystemQuery.h | 1 - .../0_stateless/01271_show_privileges.reference | 1 - .../02117_show_create_table_system.reference | 6 +++--- 6 files changed, 3 insertions(+), 22 deletions(-) diff --git a/src/Access/Common/AccessType.h b/src/Access/Common/AccessType.h index f65a77c1d6a..c06bceb87e3 100644 --- a/src/Access/Common/AccessType.h +++ b/src/Access/Common/AccessType.h @@ -157,7 +157,6 @@ enum class AccessType M(SYSTEM_DROP_CACHE, "DROP CACHE", GROUP, SYSTEM) \ M(SYSTEM_RELOAD_CONFIG, "RELOAD CONFIG", GLOBAL, SYSTEM_RELOAD) \ M(SYSTEM_RELOAD_USERS, "RELOAD USERS", GLOBAL, SYSTEM_RELOAD) \ - M(SYSTEM_RELOAD_SYMBOLS, "RELOAD SYMBOLS", GLOBAL, SYSTEM_RELOAD) \ M(SYSTEM_RELOAD_DICTIONARY, "SYSTEM RELOAD DICTIONARIES, RELOAD DICTIONARY, RELOAD DICTIONARIES", GLOBAL, SYSTEM_RELOAD) \ M(SYSTEM_RELOAD_MODEL, "SYSTEM RELOAD MODELS, RELOAD MODEL, RELOAD MODELS", GLOBAL, SYSTEM_RELOAD) \ M(SYSTEM_RELOAD_FUNCTION, "SYSTEM RELOAD FUNCTIONS, RELOAD FUNCTION, RELOAD FUNCTIONS", GLOBAL, SYSTEM_RELOAD) \ diff --git a/src/Common/SymbolIndex.cpp b/src/Common/SymbolIndex.cpp index f1cace5017c..b4ae16670d8 100644 --- a/src/Common/SymbolIndex.cpp +++ b/src/Common/SymbolIndex.cpp @@ -9,7 +9,6 @@ #include -//#include #include #include diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index e1ff8676bc7..c74ff062471 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -470,16 +470,6 @@ BlockIO InterpreterSystemQuery::execute() getContext()->checkAccess(AccessType::SYSTEM_RELOAD_USERS); system_context->getAccessControl().reload(AccessControl::ReloadMode::ALL); break; - case Type::RELOAD_SYMBOLS: - { -#if defined(__ELF__) && !defined(OS_FREEBSD) - getContext()->checkAccess(AccessType::SYSTEM_RELOAD_SYMBOLS); - SymbolIndex::reload(); - break; -#else - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "SYSTEM RELOAD SYMBOLS is not supported on current platform"); -#endif - } case Type::STOP_MERGES: startStopAction(ActionLocks::PartsMerge, false); break; @@ -1056,11 +1046,6 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster() required_access.emplace_back(AccessType::SYSTEM_RELOAD_USERS); break; } - case Type::RELOAD_SYMBOLS: - { - required_access.emplace_back(AccessType::SYSTEM_RELOAD_SYMBOLS); - break; - } case Type::STOP_MERGES: case Type::START_MERGES: { diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h index ebc3e9cd430..528fbdce2c2 100644 --- a/src/Parsers/ASTSystemQuery.h +++ b/src/Parsers/ASTSystemQuery.h @@ -56,7 +56,6 @@ public: RELOAD_EMBEDDED_DICTIONARIES, RELOAD_CONFIG, RELOAD_USERS, - RELOAD_SYMBOLS, RESTART_DISK, STOP_MERGES, START_MERGES, diff --git a/tests/queries/0_stateless/01271_show_privileges.reference b/tests/queries/0_stateless/01271_show_privileges.reference index 9e6249bfcb3..f3c07cf11a7 100644 --- a/tests/queries/0_stateless/01271_show_privileges.reference +++ b/tests/queries/0_stateless/01271_show_privileges.reference @@ -108,7 +108,6 @@ SYSTEM DROP S3 CLIENT CACHE ['SYSTEM DROP S3 CLIENT','DROP S3 CLIENT CACHE'] GLO SYSTEM DROP CACHE ['DROP CACHE'] \N SYSTEM SYSTEM RELOAD CONFIG ['RELOAD CONFIG'] GLOBAL SYSTEM RELOAD SYSTEM RELOAD USERS ['RELOAD USERS'] GLOBAL SYSTEM RELOAD -SYSTEM RELOAD SYMBOLS ['RELOAD SYMBOLS'] GLOBAL SYSTEM RELOAD SYSTEM RELOAD DICTIONARY ['SYSTEM RELOAD DICTIONARIES','RELOAD DICTIONARY','RELOAD DICTIONARIES'] GLOBAL SYSTEM RELOAD SYSTEM RELOAD MODEL ['SYSTEM RELOAD MODELS','RELOAD MODEL','RELOAD MODELS'] GLOBAL SYSTEM RELOAD SYSTEM RELOAD FUNCTION ['SYSTEM RELOAD FUNCTIONS','RELOAD FUNCTION','RELOAD FUNCTIONS'] GLOBAL SYSTEM RELOAD diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index 3834b05601f..c7aded81ac6 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -297,7 +297,7 @@ CREATE TABLE system.grants ( `user_name` Nullable(String), `role_name` Nullable(String), - `access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION CONTROL' = 96, 'SYSTEM SHUTDOWN' = 97, 'SYSTEM DROP DNS CACHE' = 98, 'SYSTEM DROP MARK CACHE' = 99, 'SYSTEM DROP UNCOMPRESSED CACHE' = 100, 'SYSTEM DROP MMAP CACHE' = 101, 'SYSTEM DROP QUERY CACHE' = 102, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 103, 'SYSTEM DROP FILESYSTEM CACHE' = 104, 'SYSTEM DROP SCHEMA CACHE' = 105, 'SYSTEM DROP S3 CLIENT CACHE' = 106, 'SYSTEM DROP CACHE' = 107, 'SYSTEM RELOAD CONFIG' = 108, 'SYSTEM RELOAD USERS' = 109, 'SYSTEM RELOAD SYMBOLS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM DISTRIBUTED SENDS' = 121, 'SYSTEM REPLICATED SENDS' = 122, 'SYSTEM SENDS' = 123, 'SYSTEM REPLICATION QUEUES' = 124, 'SYSTEM DROP REPLICA' = 125, 'SYSTEM SYNC REPLICA' = 126, 'SYSTEM RESTART REPLICA' = 127, 'SYSTEM RESTORE REPLICA' = 128, 'SYSTEM WAIT LOADING PARTS' = 129, 'SYSTEM SYNC DATABASE REPLICA' = 130, 'SYSTEM SYNC TRANSACTION LOG' = 131, 'SYSTEM SYNC FILE CACHE' = 132, 'SYSTEM FLUSH DISTRIBUTED' = 133, 'SYSTEM FLUSH LOGS' = 134, 'SYSTEM FLUSH' = 135, 'SYSTEM THREAD FUZZER' = 136, 'SYSTEM UNFREEZE' = 137, 'SYSTEM FAILPOINT' = 138, 'SYSTEM' = 139, 'dictGet' = 140, 'displaySecretsInShowAndSelect' = 141, 'addressToLine' = 142, 'addressToLineWithInlines' = 143, 'addressToSymbol' = 144, 'demangle' = 145, 'INTROSPECTION' = 146, 'FILE' = 147, 'URL' = 148, 'REMOTE' = 149, 'MONGO' = 150, 'REDIS' = 151, 'MEILISEARCH' = 152, 'MYSQL' = 153, 'POSTGRES' = 154, 'SQLITE' = 155, 'ODBC' = 156, 'JDBC' = 157, 'HDFS' = 158, 'S3' = 159, 'HIVE' = 160, 'AZURE' = 161, 'SOURCES' = 162, 'CLUSTER' = 163, 'ALL' = 164, 'NONE' = 165), + `access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION CONTROL' = 96, 'SYSTEM SHUTDOWN' = 97, 'SYSTEM DROP DNS CACHE' = 98, 'SYSTEM DROP MARK CACHE' = 99, 'SYSTEM DROP UNCOMPRESSED CACHE' = 100, 'SYSTEM DROP MMAP CACHE' = 101, 'SYSTEM DROP QUERY CACHE' = 102, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 103, 'SYSTEM DROP FILESYSTEM CACHE' = 104, 'SYSTEM DROP SCHEMA CACHE' = 105, 'SYSTEM DROP S3 CLIENT CACHE' = 106, 'SYSTEM DROP CACHE' = 107, 'SYSTEM RELOAD CONFIG' = 108, 'SYSTEM RELOAD USERS' = 109, 'SYSTEM RELOAD DICTIONARY' = 110, 'SYSTEM RELOAD MODEL' = 111, 'SYSTEM RELOAD FUNCTION' = 112, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 113, 'SYSTEM RELOAD' = 114, 'SYSTEM RESTART DISK' = 115, 'SYSTEM MERGES' = 116, 'SYSTEM TTL MERGES' = 117, 'SYSTEM FETCHES' = 118, 'SYSTEM MOVES' = 119, 'SYSTEM DISTRIBUTED SENDS' = 120, 'SYSTEM REPLICATED SENDS' = 121, 'SYSTEM SENDS' = 122, 'SYSTEM REPLICATION QUEUES' = 123, 'SYSTEM DROP REPLICA' = 124, 'SYSTEM SYNC REPLICA' = 125, 'SYSTEM RESTART REPLICA' = 126, 'SYSTEM RESTORE REPLICA' = 127, 'SYSTEM WAIT LOADING PARTS' = 128, 'SYSTEM SYNC DATABASE REPLICA' = 129, 'SYSTEM SYNC TRANSACTION LOG' = 130, 'SYSTEM SYNC FILE CACHE' = 131, 'SYSTEM FLUSH DISTRIBUTED' = 132, 'SYSTEM FLUSH LOGS' = 133, 'SYSTEM FLUSH' = 134, 'SYSTEM THREAD FUZZER' = 135, 'SYSTEM UNFREEZE' = 136, 'SYSTEM FAILPOINT' = 137, 'SYSTEM' = 138, 'dictGet' = 139, 'displaySecretsInShowAndSelect' = 140, 'addressToLine' = 141, 'addressToLineWithInlines' = 142, 'addressToSymbol' = 143, 'demangle' = 144, 'INTROSPECTION' = 145, 'FILE' = 146, 'URL' = 147, 'REMOTE' = 148, 'MONGO' = 149, 'REDIS' = 150, 'MEILISEARCH' = 151, 'MYSQL' = 152, 'POSTGRES' = 153, 'SQLITE' = 154, 'ODBC' = 155, 'JDBC' = 156, 'HDFS' = 157, 'S3' = 158, 'HIVE' = 159, 'AZURE' = 160, 'SOURCES' = 161, 'CLUSTER' = 162, 'ALL' = 163, 'NONE' = 164), `database` Nullable(String), `table` Nullable(String), `column` Nullable(String), @@ -582,10 +582,10 @@ ENGINE = SystemPartsColumns COMMENT 'SYSTEM TABLE is built on the fly.' CREATE TABLE system.privileges ( - `privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION CONTROL' = 96, 'SYSTEM SHUTDOWN' = 97, 'SYSTEM DROP DNS CACHE' = 98, 'SYSTEM DROP MARK CACHE' = 99, 'SYSTEM DROP UNCOMPRESSED CACHE' = 100, 'SYSTEM DROP MMAP CACHE' = 101, 'SYSTEM DROP QUERY CACHE' = 102, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 103, 'SYSTEM DROP FILESYSTEM CACHE' = 104, 'SYSTEM DROP SCHEMA CACHE' = 105, 'SYSTEM DROP S3 CLIENT CACHE' = 106, 'SYSTEM DROP CACHE' = 107, 'SYSTEM RELOAD CONFIG' = 108, 'SYSTEM RELOAD USERS' = 109, 'SYSTEM RELOAD SYMBOLS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM DISTRIBUTED SENDS' = 121, 'SYSTEM REPLICATED SENDS' = 122, 'SYSTEM SENDS' = 123, 'SYSTEM REPLICATION QUEUES' = 124, 'SYSTEM DROP REPLICA' = 125, 'SYSTEM SYNC REPLICA' = 126, 'SYSTEM RESTART REPLICA' = 127, 'SYSTEM RESTORE REPLICA' = 128, 'SYSTEM WAIT LOADING PARTS' = 129, 'SYSTEM SYNC DATABASE REPLICA' = 130, 'SYSTEM SYNC TRANSACTION LOG' = 131, 'SYSTEM SYNC FILE CACHE' = 132, 'SYSTEM FLUSH DISTRIBUTED' = 133, 'SYSTEM FLUSH LOGS' = 134, 'SYSTEM FLUSH' = 135, 'SYSTEM THREAD FUZZER' = 136, 'SYSTEM UNFREEZE' = 137, 'SYSTEM FAILPOINT' = 138, 'SYSTEM' = 139, 'dictGet' = 140, 'displaySecretsInShowAndSelect' = 141, 'addressToLine' = 142, 'addressToLineWithInlines' = 143, 'addressToSymbol' = 144, 'demangle' = 145, 'INTROSPECTION' = 146, 'FILE' = 147, 'URL' = 148, 'REMOTE' = 149, 'MONGO' = 150, 'REDIS' = 151, 'MEILISEARCH' = 152, 'MYSQL' = 153, 'POSTGRES' = 154, 'SQLITE' = 155, 'ODBC' = 156, 'JDBC' = 157, 'HDFS' = 158, 'S3' = 159, 'HIVE' = 160, 'AZURE' = 161, 'SOURCES' = 162, 'CLUSTER' = 163, 'ALL' = 164, 'NONE' = 165), + `privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION CONTROL' = 96, 'SYSTEM SHUTDOWN' = 97, 'SYSTEM DROP DNS CACHE' = 98, 'SYSTEM DROP MARK CACHE' = 99, 'SYSTEM DROP UNCOMPRESSED CACHE' = 100, 'SYSTEM DROP MMAP CACHE' = 101, 'SYSTEM DROP QUERY CACHE' = 102, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 103, 'SYSTEM DROP FILESYSTEM CACHE' = 104, 'SYSTEM DROP SCHEMA CACHE' = 105, 'SYSTEM DROP S3 CLIENT CACHE' = 106, 'SYSTEM DROP CACHE' = 107, 'SYSTEM RELOAD CONFIG' = 108, 'SYSTEM RELOAD USERS' = 109, 'SYSTEM RELOAD DICTIONARY' = 110, 'SYSTEM RELOAD MODEL' = 111, 'SYSTEM RELOAD FUNCTION' = 112, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 113, 'SYSTEM RELOAD' = 114, 'SYSTEM RESTART DISK' = 115, 'SYSTEM MERGES' = 116, 'SYSTEM TTL MERGES' = 117, 'SYSTEM FETCHES' = 118, 'SYSTEM MOVES' = 119, 'SYSTEM DISTRIBUTED SENDS' = 120, 'SYSTEM REPLICATED SENDS' = 121, 'SYSTEM SENDS' = 122, 'SYSTEM REPLICATION QUEUES' = 123, 'SYSTEM DROP REPLICA' = 124, 'SYSTEM SYNC REPLICA' = 125, 'SYSTEM RESTART REPLICA' = 126, 'SYSTEM RESTORE REPLICA' = 127, 'SYSTEM WAIT LOADING PARTS' = 128, 'SYSTEM SYNC DATABASE REPLICA' = 129, 'SYSTEM SYNC TRANSACTION LOG' = 130, 'SYSTEM SYNC FILE CACHE' = 131, 'SYSTEM FLUSH DISTRIBUTED' = 132, 'SYSTEM FLUSH LOGS' = 133, 'SYSTEM FLUSH' = 134, 'SYSTEM THREAD FUZZER' = 135, 'SYSTEM UNFREEZE' = 136, 'SYSTEM FAILPOINT' = 137, 'SYSTEM' = 138, 'dictGet' = 139, 'displaySecretsInShowAndSelect' = 140, 'addressToLine' = 141, 'addressToLineWithInlines' = 142, 'addressToSymbol' = 143, 'demangle' = 144, 'INTROSPECTION' = 145, 'FILE' = 146, 'URL' = 147, 'REMOTE' = 148, 'MONGO' = 149, 'REDIS' = 150, 'MEILISEARCH' = 151, 'MYSQL' = 152, 'POSTGRES' = 153, 'SQLITE' = 154, 'ODBC' = 155, 'JDBC' = 156, 'HDFS' = 157, 'S3' = 158, 'HIVE' = 159, 'AZURE' = 160, 'SOURCES' = 161, 'CLUSTER' = 162, 'ALL' = 163, 'NONE' = 164), `aliases` Array(String), `level` Nullable(Enum8('GLOBAL' = 0, 'DATABASE' = 1, 'TABLE' = 2, 'DICTIONARY' = 3, 'VIEW' = 4, 'COLUMN' = 5, 'NAMED_COLLECTION' = 6)), - `parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION CONTROL' = 96, 'SYSTEM SHUTDOWN' = 97, 'SYSTEM DROP DNS CACHE' = 98, 'SYSTEM DROP MARK CACHE' = 99, 'SYSTEM DROP UNCOMPRESSED CACHE' = 100, 'SYSTEM DROP MMAP CACHE' = 101, 'SYSTEM DROP QUERY CACHE' = 102, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 103, 'SYSTEM DROP FILESYSTEM CACHE' = 104, 'SYSTEM DROP SCHEMA CACHE' = 105, 'SYSTEM DROP S3 CLIENT CACHE' = 106, 'SYSTEM DROP CACHE' = 107, 'SYSTEM RELOAD CONFIG' = 108, 'SYSTEM RELOAD USERS' = 109, 'SYSTEM RELOAD SYMBOLS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM DISTRIBUTED SENDS' = 121, 'SYSTEM REPLICATED SENDS' = 122, 'SYSTEM SENDS' = 123, 'SYSTEM REPLICATION QUEUES' = 124, 'SYSTEM DROP REPLICA' = 125, 'SYSTEM SYNC REPLICA' = 126, 'SYSTEM RESTART REPLICA' = 127, 'SYSTEM RESTORE REPLICA' = 128, 'SYSTEM WAIT LOADING PARTS' = 129, 'SYSTEM SYNC DATABASE REPLICA' = 130, 'SYSTEM SYNC TRANSACTION LOG' = 131, 'SYSTEM SYNC FILE CACHE' = 132, 'SYSTEM FLUSH DISTRIBUTED' = 133, 'SYSTEM FLUSH LOGS' = 134, 'SYSTEM FLUSH' = 135, 'SYSTEM THREAD FUZZER' = 136, 'SYSTEM UNFREEZE' = 137, 'SYSTEM FAILPOINT' = 138, 'SYSTEM' = 139, 'dictGet' = 140, 'displaySecretsInShowAndSelect' = 141, 'addressToLine' = 142, 'addressToLineWithInlines' = 143, 'addressToSymbol' = 144, 'demangle' = 145, 'INTROSPECTION' = 146, 'FILE' = 147, 'URL' = 148, 'REMOTE' = 149, 'MONGO' = 150, 'REDIS' = 151, 'MEILISEARCH' = 152, 'MYSQL' = 153, 'POSTGRES' = 154, 'SQLITE' = 155, 'ODBC' = 156, 'JDBC' = 157, 'HDFS' = 158, 'S3' = 159, 'HIVE' = 160, 'AZURE' = 161, 'SOURCES' = 162, 'CLUSTER' = 163, 'ALL' = 164, 'NONE' = 165)) + `parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION CONTROL' = 96, 'SYSTEM SHUTDOWN' = 97, 'SYSTEM DROP DNS CACHE' = 98, 'SYSTEM DROP MARK CACHE' = 99, 'SYSTEM DROP UNCOMPRESSED CACHE' = 100, 'SYSTEM DROP MMAP CACHE' = 101, 'SYSTEM DROP QUERY CACHE' = 102, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 103, 'SYSTEM DROP FILESYSTEM CACHE' = 104, 'SYSTEM DROP SCHEMA CACHE' = 105, 'SYSTEM DROP S3 CLIENT CACHE' = 106, 'SYSTEM DROP CACHE' = 107, 'SYSTEM RELOAD CONFIG' = 108, 'SYSTEM RELOAD USERS' = 109, 'SYSTEM RELOAD DICTIONARY' = 110, 'SYSTEM RELOAD MODEL' = 111, 'SYSTEM RELOAD FUNCTION' = 112, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 113, 'SYSTEM RELOAD' = 114, 'SYSTEM RESTART DISK' = 115, 'SYSTEM MERGES' = 116, 'SYSTEM TTL MERGES' = 117, 'SYSTEM FETCHES' = 118, 'SYSTEM MOVES' = 119, 'SYSTEM DISTRIBUTED SENDS' = 120, 'SYSTEM REPLICATED SENDS' = 121, 'SYSTEM SENDS' = 122, 'SYSTEM REPLICATION QUEUES' = 123, 'SYSTEM DROP REPLICA' = 124, 'SYSTEM SYNC REPLICA' = 125, 'SYSTEM RESTART REPLICA' = 126, 'SYSTEM RESTORE REPLICA' = 127, 'SYSTEM WAIT LOADING PARTS' = 128, 'SYSTEM SYNC DATABASE REPLICA' = 129, 'SYSTEM SYNC TRANSACTION LOG' = 130, 'SYSTEM SYNC FILE CACHE' = 131, 'SYSTEM FLUSH DISTRIBUTED' = 132, 'SYSTEM FLUSH LOGS' = 133, 'SYSTEM FLUSH' = 134, 'SYSTEM THREAD FUZZER' = 135, 'SYSTEM UNFREEZE' = 136, 'SYSTEM FAILPOINT' = 137, 'SYSTEM' = 138, 'dictGet' = 139, 'displaySecretsInShowAndSelect' = 140, 'addressToLine' = 141, 'addressToLineWithInlines' = 142, 'addressToSymbol' = 143, 'demangle' = 144, 'INTROSPECTION' = 145, 'FILE' = 146, 'URL' = 147, 'REMOTE' = 148, 'MONGO' = 149, 'REDIS' = 150, 'MEILISEARCH' = 151, 'MYSQL' = 152, 'POSTGRES' = 153, 'SQLITE' = 154, 'ODBC' = 155, 'JDBC' = 156, 'HDFS' = 157, 'S3' = 158, 'HIVE' = 159, 'AZURE' = 160, 'SOURCES' = 161, 'CLUSTER' = 162, 'ALL' = 163, 'NONE' = 164)) ) ENGINE = SystemPrivileges COMMENT 'SYSTEM TABLE is built on the fly.' From e8718e04cb2cfed00365f6e75c2c4e5bf2baa925 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 6 Jul 2023 05:58:05 +0300 Subject: [PATCH 162/230] Update --- src/Common/SymbolIndex.cpp | 7 ------- src/Common/SymbolIndex.h | 1 - 2 files changed, 8 deletions(-) diff --git a/src/Common/SymbolIndex.cpp b/src/Common/SymbolIndex.cpp index b4ae16670d8..4c7f3827125 100644 --- a/src/Common/SymbolIndex.cpp +++ b/src/Common/SymbolIndex.cpp @@ -560,13 +560,6 @@ MultiVersion::Version SymbolIndex::instance() return instanceImpl().get(); } -void SymbolIndex::reload() -{ - instanceImpl().set(std::unique_ptr(new SymbolIndex)); - /// Also drop stacktrace cache. - StackTrace::dropCache(); -} - } #endif diff --git a/src/Common/SymbolIndex.h b/src/Common/SymbolIndex.h index 47162331946..773f59b7914 100644 --- a/src/Common/SymbolIndex.h +++ b/src/Common/SymbolIndex.h @@ -24,7 +24,6 @@ protected: public: static MultiVersion::Version instance(); - static void reload(); struct Symbol { From af0de3d614c41ce24ad631b1e12328efb4dd444b Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 6 Jul 2023 06:17:11 +0000 Subject: [PATCH 163/230] Small fixes --- tests/integration/test_multiple_disks/test.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/tests/integration/test_multiple_disks/test.py b/tests/integration/test_multiple_disks/test.py index 4a934447345..5561d63840b 100644 --- a/tests/integration/test_multiple_disks/test.py +++ b/tests/integration/test_multiple_disks/test.py @@ -711,7 +711,7 @@ def test_background_move(start_cluster, name, engine): s1 String ) ENGINE = {engine} ORDER BY tuple() - SETTINGS storage_policy='moving_jbod_with_external, max_replicated_merges_in_queue=0' + SETTINGS storage_policy='moving_jbod_with_external', max_replicated_merges_in_queue=0 """ ) @@ -735,8 +735,6 @@ def test_background_move(start_cluster, name, engine): assert first_part is not None - used_disks = get_used_disks_for_table(node1, name) - retry = 20 i = 0 # multiple moves can be assigned in parallel so we can move later parts before the oldest @@ -745,9 +743,6 @@ def test_background_move(start_cluster, name, engine): time.sleep(0.5) i += 1 - used_disks = get_used_disks_for_table(node1, name) - assert sum(1 for x in used_disks if x == "jbod1") <= 2 - # first (oldest) part was moved to external assert get_disk_for_part(node1, name, first_part) == "external" @@ -861,9 +856,6 @@ def test_start_stop_moves(start_cluster, name, engine): # first (oldest) part moved to external assert get_disk_for_part(node1, name, first_part) == "external" - used_disks = get_used_disks_for_table(node1, name) - assert sum(1 for x in used_disks if x == "jbod1") <= 2 - node1.query(f"SYSTEM START MERGES {name}") finally: node1.query_with_retry(f"DROP TABLE IF EXISTS {name} SYNC") From 085f7caccffa20717ac6d96e13a5e8baae84db98 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 6 Jul 2023 06:30:47 +0000 Subject: [PATCH 164/230] Move config changes after configure --- docker/test/upgrade/run.sh | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docker/test/upgrade/run.sh b/docker/test/upgrade/run.sh index 82a88272df9..b8061309342 100644 --- a/docker/test/upgrade/run.sh +++ b/docker/test/upgrade/run.sh @@ -67,6 +67,13 @@ start stop mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.initial.log +# Start server from previous release +# Let's enable S3 storage by default +export USE_S3_STORAGE_FOR_MERGE_TREE=1 +# Previous version may not be ready for fault injections +export ZOOKEEPER_FAULT_INJECTION=0 +configure + # force_sync=false doesn't work correctly on some older versions sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \ | sed "s|false|true|" \ @@ -81,13 +88,6 @@ mv /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp /etc/cli sudo chown clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml -# Start server from previous release -# Let's enable S3 storage by default -export USE_S3_STORAGE_FOR_MERGE_TREE=1 -# Previous version may not be ready for fault injections -export ZOOKEEPER_FAULT_INJECTION=0 -configure - # it contains some new settings, but we can safely remove it rm /etc/clickhouse-server/config.d/merge_tree.xml rm /etc/clickhouse-server/users.d/nonconst_timezone.xml From 24e77083b38fbfdbec0d5a6fa8da65cb6a33a602 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Thu, 6 Jul 2023 09:50:44 +0000 Subject: [PATCH 165/230] Commit tests --- src/Parsers/ParserCreateQuery.cpp | 3 +- .../02811_primary_key_in_columns.reference | 0 .../02811_primary_key_in_columns.sql | 50 +++++++++---------- 3 files changed, 27 insertions(+), 26 deletions(-) create mode 100644 tests/queries/0_stateless/02811_primary_key_in_columns.reference diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index 1941bafab0d..60e15cb92f4 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -311,7 +311,7 @@ bool ParserTablePropertiesDeclarationList::parseImpl(Pos & pos, ASTPtr & node, E if(!primary_key_from_columns) primary_key_from_columns = makeASTFunction("tuple"); auto column_identifier = std::make_shared(cd->name); - primary_key_from_columns->children.push_back(column_identifier); + primary_key_from_columns->children[0]->as()->children.push_back(column_identifier); } columns->children.push_back(elem); } @@ -710,6 +710,7 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe throw Exception(ErrorCodes::BAD_ARGUMENTS, "Multiple primary keys are not allowed."); query->storage->primary_key = query->columns_list->primary_key; + } if (query->columns_list && (query->columns_list->primary_key_from_columns)) diff --git a/tests/queries/0_stateless/02811_primary_key_in_columns.reference b/tests/queries/0_stateless/02811_primary_key_in_columns.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02811_primary_key_in_columns.sql b/tests/queries/0_stateless/02811_primary_key_in_columns.sql index df25fdd14ab..0519f4c820b 100644 --- a/tests/queries/0_stateless/02811_primary_key_in_columns.sql +++ b/tests/queries/0_stateless/02811_primary_key_in_columns.sql @@ -23,39 +23,39 @@ DROP TABLE IF EXISTS pk_test21; DROP TABLE IF EXISTS pk_test22; DROP TABLE IF EXISTS pk_test23; -SET default_table_engine=MergeTree; +SET default_table_engine='MergeTree'; -CREATE TABLE pk_test1 (String a PRIMARY KEY, String b, String c); -CREATE TABLE pk_test2 (String a PRIMARY KEY, String b PRIMARY KEY, String c); -CREATE TABLE pk_test3 (String a PRIMARY KEY, String b PRIMARY KEY, String c PRIMARY KEY); +CREATE TABLE pk_test1 (a String PRIMARY KEY, b String, c String); +CREATE TABLE pk_test2 (a String PRIMARY KEY, b String PRIMARY KEY, c String); +CREATE TABLE pk_test3 (a String PRIMARY KEY, b String PRIMARY KEY, c String PRIMARY KEY); -CREATE TABLE pk_test4 (String a, String b PRIMARY KEY, String c PRIMARY KEY); -CREATE TABLE pk_test5 (String a, String b PRIMARY KEY, String c); -CREATE TABLE pk_test6 (String a, String b, String c PRIMARY KEY); +CREATE TABLE pk_test4 (a String, b String PRIMARY KEY, c String PRIMARY KEY); +CREATE TABLE pk_test5 (a String, b String PRIMARY KEY, c String); +CREATE TABLE pk_test6 (a String, b String, c String PRIMARY KEY); -CREATE TABLE pk_test7 (String a PRIMARY KEY, String b, String c, PRIMARY KEY (a)); -CREATE TABLE pk_test8 (String a PRIMARY KEY, String b PRIMARY KEY, String c, PRIMARY KEY (a)); -CREATE TABLE pk_test9 (String a PRIMARY KEY, String b PRIMARY KEY, String c PRIMARY KEY, PRIMARY KEY (a)); +CREATE TABLE pk_test7 (a String PRIMARY KEY, b String, c String, PRIMARY KEY (a)); -- { clientError BAD_ARGUMENTS } +CREATE TABLE pk_test8 (a String PRIMARY KEY, b String PRIMARY KEY, c String, PRIMARY KEY (a)); -- { clientError BAD_ARGUMENTS } +CREATE TABLE pk_test9 (a String PRIMARY KEY, b String PRIMARY KEY, c String PRIMARY KEY, PRIMARY KEY (a)); -- { clientError BAD_ARGUMENTS } -CREATE TABLE pk_test10 (String a, String b PRIMARY KEY, String c PRIMARY KEY, PRIMARY KEY (a)); -CREATE TABLE pk_test11 (String a, String b PRIMARY KEY, String c, PRIMARY KEY (a)); -CREATE TABLE pk_test12 (String a, String b, String c PRIMARY KEY, PRIMARY KEY (a)); +CREATE TABLE pk_test10 (a String, b String PRIMARY KEY, c String PRIMARY KEY, PRIMARY KEY (a)); -- { clientError BAD_ARGUMENTS } +CREATE TABLE pk_test11 (a String, b String PRIMARY KEY, c String, PRIMARY KEY (a)); -- { clientError BAD_ARGUMENTS } +CREATE TABLE pk_test12 (a String, b String, c String PRIMARY KEY, PRIMARY KEY (a)); -- { clientError BAD_ARGUMENTS } -CREATE TABLE pk_test12 (String a PRIMARY KEY, String b, String c) PRIMARY KEY (a,b,c); -CREATE TABLE pk_test13 (String a PRIMARY KEY, String b PRIMARY KEY, String c) PRIMARY KEY (a,b,c); -CREATE TABLE pk_test14 (String a PRIMARY KEY, String b PRIMARY KEY, String c PRIMARY KEY) PRIMARY KEY (a,b,c); +CREATE TABLE pk_test12 (a String PRIMARY KEY, b String, c String) PRIMARY KEY (a,b,c); -- { clientError BAD_ARGUMENTS } +CREATE TABLE pk_test13 (a String PRIMARY KEY, b String PRIMARY KEY, c String) PRIMARY KEY (a,b,c); -- { clientError BAD_ARGUMENTS } +CREATE TABLE pk_test14 (a String PRIMARY KEY, b String PRIMARY KEY, c String PRIMARY KEY) PRIMARY KEY (a,b,c); -- { clientError BAD_ARGUMENTS } -CREATE TABLE pk_test15 (String a, String b PRIMARY KEY, String c PRIMARY KEY) PRIMARY KEY (a,b,c); -CREATE TABLE pk_test16 (String a, String b PRIMARY KEY, String c) PRIMARY KEY (a,b,c); -CREATE TABLE pk_test17 (String a, String b, String c PRIMARY KEY) PRIMARY KEY (a,b,c); +CREATE TABLE pk_test15 (a String, b String PRIMARY KEY, c String PRIMARY KEY) PRIMARY KEY (a,b,c); -- { clientError BAD_ARGUMENTS } +CREATE TABLE pk_test16 (a String, b String PRIMARY KEY, c String) PRIMARY KEY (a,b,c); -- { clientError BAD_ARGUMENTS } +CREATE TABLE pk_test17 (a String, b String, c String PRIMARY KEY) PRIMARY KEY (a,b,c); -- { clientError BAD_ARGUMENTS } -CREATE TABLE pk_test18 (String a PRIMARY KEY, String b, String c) ORDER BY (a,b,c); -CREATE TABLE pk_test19 (String a PRIMARY KEY, String b PRIMARY KEY, String c) ORDER BY (a,b,c); -CREATE TABLE pk_test20 (String a PRIMARY KEY, String b PRIMARY KEY, String c PRIMARY KEY) ORDER BY (a,b,c); +CREATE TABLE pk_test18 (a String PRIMARY KEY, b String, c String) ORDER BY (a,b,c); +CREATE TABLE pk_test19 (a String PRIMARY KEY, b String PRIMARY KEY, c String) ORDER BY (a,b,c); +CREATE TABLE pk_test20 (a String PRIMARY KEY, b String PRIMARY KEY, c String PRIMARY KEY) ORDER BY (a,b,c); -CREATE TABLE pk_test21 (String a, String b PRIMARY KEY, String c PRIMARY KEY) ORDER BY (a,b,c); -CREATE TABLE pk_test22 (String a, String b PRIMARY KEY, String c) ORDER BY (a,b,c); -CREATE TABLE pk_test23 (String a, String b, String c PRIMARY KEY) ORDER BY (a,b,c); +CREATE TABLE pk_test21 (a String, b String PRIMARY KEY, c String PRIMARY KEY) ORDER BY (a,b,c); -- { serverError BAD_ARGUMENTS } +CREATE TABLE pk_test22 (a String, b String PRIMARY KEY, c String) ORDER BY (a,b,c); -- { serverError BAD_ARGUMENTS } +CREATE TABLE pk_test23 (a String, b String, c String PRIMARY KEY) ORDER BY (a,b,c); -- { serverError BAD_ARGUMENTS } DROP TABLE IF EXISTS pk_test1; DROP TABLE IF EXISTS pk_test2; From 12ebb30781e8427a1e797464c3bd4675787c87e9 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Thu, 6 Jul 2023 10:34:34 +0000 Subject: [PATCH 166/230] style --- src/Parsers/ParserCreateQuery.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index 60e15cb92f4..c4c02ab7417 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -306,9 +306,9 @@ bool ParserTablePropertiesDeclarationList::parseImpl(Pos & pos, ASTPtr & node, E { if (auto *cd = elem->as()) { - if(cd->primary_key_specifier) + if (cd->primary_key_specifier) { - if(!primary_key_from_columns) + if (!primary_key_from_columns) primary_key_from_columns = makeASTFunction("tuple"); auto column_identifier = std::make_shared(cd->name); primary_key_from_columns->children[0]->as()->children.push_back(column_identifier); From c7ccf23a24a7fb2bb1245b76fc9169649cd474c3 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Thu, 6 Jul 2023 10:44:06 +0000 Subject: [PATCH 167/230] Update CREATE TABLE docs --- .../mergetree-family/mergetree.md | 50 +++++++++---------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 67043ef1062..4f506126682 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -37,8 +37,8 @@ The [Merge](/docs/en/engines/table-engines/special/merge.md/#merge) engine does ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] ( - name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1] [TTL expr1], - name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2] [TTL expr2], + name1 [type1] [DEFAULT|MATERIALIZED|ALIAS|EPHEMERAL expr1] [TTL expr1] [CODEC(codec1)] [[NOT] NULL|PRIMARY KEY], + name2 [type2] [DEFAULT|MATERIALIZED|ALIAS|EPHEMERAL expr2] [TTL expr2] [CODEC(codec2)] [[NOT] NULL|PRIMARY KEY], ... INDEX index_name1 expr1 TYPE type1(...) [GRANULARITY value1], INDEX index_name2 expr2 TYPE type2(...) [GRANULARITY value2], @@ -439,41 +439,41 @@ Syntax: `ngrambf_v1(n, size_of_bloom_filter_in_bytes, number_of_hash_functions, - `number_of_hash_functions` — The number of hash functions used in the Bloom filter. - `random_seed` — The seed for Bloom filter hash functions. -Users can create [UDF](/docs/en/sql-reference/statements/create/function.md) to estimate the parameters set of `ngrambf_v1`. Query statements are as follows: +Users can create [UDF](/docs/en/sql-reference/statements/create/function.md) to estimate the parameters set of `ngrambf_v1`. Query statements are as follows: ```sql -CREATE FUNCTION bfEstimateFunctions [ON CLUSTER cluster] -AS -(total_nubmer_of_all_grams, size_of_bloom_filter_in_bits) -> round((size_of_bloom_filter_in_bits / total_nubmer_of_all_grams) * log(2)); - -CREATE FUNCTION bfEstimateBmSize [ON CLUSTER cluster] -AS -(total_nubmer_of_all_grams, probability_of_false_positives) -> ceil((total_nubmer_of_all_grams * log(probability_of_false_positives)) / log(1 / pow(2, log(2)))); - -CREATE FUNCTION bfEstimateFalsePositive [ON CLUSTER cluster] -AS -(total_nubmer_of_all_grams, number_of_hash_functions, size_of_bloom_filter_in_bytes) -> pow(1 - exp(-number_of_hash_functions/ (size_of_bloom_filter_in_bytes / total_nubmer_of_all_grams)), number_of_hash_functions); - -CREATE FUNCTION bfEstimateGramNumber [ON CLUSTER cluster] -AS +CREATE FUNCTION bfEstimateFunctions [ON CLUSTER cluster] +AS +(total_nubmer_of_all_grams, size_of_bloom_filter_in_bits) -> round((size_of_bloom_filter_in_bits / total_nubmer_of_all_grams) * log(2)); + +CREATE FUNCTION bfEstimateBmSize [ON CLUSTER cluster] +AS +(total_nubmer_of_all_grams, probability_of_false_positives) -> ceil((total_nubmer_of_all_grams * log(probability_of_false_positives)) / log(1 / pow(2, log(2)))); + +CREATE FUNCTION bfEstimateFalsePositive [ON CLUSTER cluster] +AS +(total_nubmer_of_all_grams, number_of_hash_functions, size_of_bloom_filter_in_bytes) -> pow(1 - exp(-number_of_hash_functions/ (size_of_bloom_filter_in_bytes / total_nubmer_of_all_grams)), number_of_hash_functions); + +CREATE FUNCTION bfEstimateGramNumber [ON CLUSTER cluster] +AS (number_of_hash_functions, probability_of_false_positives, size_of_bloom_filter_in_bytes) -> ceil(size_of_bloom_filter_in_bytes / (-number_of_hash_functions / log(1 - exp(log(probability_of_false_positives) / number_of_hash_functions)))) -``` +``` To use those functions,we need to specify two parameter at least. -For example, if there 4300 ngrams in the granule and we expect false positives to be less than 0.0001. The other parameters can be estimated by executing following queries: - +For example, if there 4300 ngrams in the granule and we expect false positives to be less than 0.0001. The other parameters can be estimated by executing following queries: + ```sql --- estimate number of bits in the filter -SELECT bfEstimateBmSize(4300, 0.0001) / 8 as size_of_bloom_filter_in_bytes; +SELECT bfEstimateBmSize(4300, 0.0001) / 8 as size_of_bloom_filter_in_bytes; ┌─size_of_bloom_filter_in_bytes─┐ │ 10304 │ └───────────────────────────────┘ - + --- estimate number of hash functions SELECT bfEstimateFunctions(4300, bfEstimateBmSize(4300, 0.0001)) as number_of_hash_functions - + ┌─number_of_hash_functions─┐ │ 13 │ └──────────────────────────┘ @@ -991,7 +991,7 @@ use a local disk to cache data from a table stored at a URL. Neither the cache d nor the web storage is configured in the ClickHouse configuration files; both are configured in the CREATE/ATTACH query settings. -In the settings highlighted below notice that the disk of `type=web` is nested within +In the settings highlighted below notice that the disk of `type=web` is nested within the disk of `type=cache`. ```sql @@ -1308,7 +1308,7 @@ configuration file. In this sample configuration: - the disk is of type `web` - the data is hosted at `http://nginx:80/test1/` -- a cache on local storage is used +- a cache on local storage is used ```xml From c23e29d6aa836980337683800c6c2b029cfb7c40 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Tue, 20 Jun 2023 20:27:56 +0200 Subject: [PATCH 168/230] don't account session's memory in thread/user mem tracker --- src/Common/MemoryTrackerSwitcher.h | 42 ++++++++++++++++++++ src/IO/HTTPCommon.cpp | 4 ++ src/Interpreters/AsynchronousInsertQueue.cpp | 2 +- src/Interpreters/AsynchronousInsertQueue.h | 35 +++------------- src/Server/InterserverIOHTTPHandler.cpp | 1 + 5 files changed, 54 insertions(+), 30 deletions(-) create mode 100644 src/Common/MemoryTrackerSwitcher.h diff --git a/src/Common/MemoryTrackerSwitcher.h b/src/Common/MemoryTrackerSwitcher.h new file mode 100644 index 00000000000..0fefcbb280a --- /dev/null +++ b/src/Common/MemoryTrackerSwitcher.h @@ -0,0 +1,42 @@ +#pragma once + +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +struct MemoryTrackerSwitcher +{ + explicit MemoryTrackerSwitcher(MemoryTracker * new_tracker) + { + if (!current_thread) + throw Exception(ErrorCodes::LOGICAL_ERROR, "current_thread is not initialized"); + + auto * thread_tracker = CurrentThread::getMemoryTracker(); + prev_untracked_memory = current_thread->untracked_memory; + prev_memory_tracker_parent = thread_tracker->getParent(); + + current_thread->untracked_memory = 0; + thread_tracker->setParent(new_tracker); + } + + ~MemoryTrackerSwitcher() + { + CurrentThread::flushUntrackedMemory(); + auto * thread_tracker = CurrentThread::getMemoryTracker(); + + current_thread->untracked_memory = prev_untracked_memory; + thread_tracker->setParent(prev_memory_tracker_parent); + } + + MemoryTracker * prev_memory_tracker_parent = nullptr; + Int64 prev_untracked_memory = 0; +}; + +} diff --git a/src/IO/HTTPCommon.cpp b/src/IO/HTTPCommon.cpp index f3e2064c8bf..1731b4022ea 100644 --- a/src/IO/HTTPCommon.cpp +++ b/src/IO/HTTPCommon.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -107,6 +108,9 @@ namespace ObjectPtr allocObject() override { + /// Pool is global, we shouldn't attribute this memory to query/user. + MemoryTrackerSwitcher switcher{&total_memory_tracker}; + auto session = makeHTTPSessionImpl(host, port, https, true, resolve_host); if (!proxy_host.empty()) { diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index dc2310cfebf..e6417de53b4 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -125,7 +125,7 @@ void AsynchronousInsertQueue::InsertData::Entry::finish(std::exception_ptr excep // Entries data must be destroyed in context of user who runs async insert. // Each entry in the list may correspond to a different user, // so we need to switch current thread's MemoryTracker. - UserMemoryTrackerSwitcher switcher(user_memory_tracker); + MemoryTrackerSwitcher switcher(user_memory_tracker); bytes = ""; } diff --git a/src/Interpreters/AsynchronousInsertQueue.h b/src/Interpreters/AsynchronousInsertQueue.h index bc60c86d067..f18db69a7bb 100644 --- a/src/Interpreters/AsynchronousInsertQueue.h +++ b/src/Interpreters/AsynchronousInsertQueue.h @@ -1,10 +1,12 @@ #pragma once -#include -#include -#include #include +#include #include +#include +#include +#include + #include namespace DB @@ -60,31 +62,6 @@ private: UInt128 calculateHash() const; }; - struct UserMemoryTrackerSwitcher - { - explicit UserMemoryTrackerSwitcher(MemoryTracker * new_tracker) - { - auto * thread_tracker = CurrentThread::getMemoryTracker(); - prev_untracked_memory = current_thread->untracked_memory; - prev_memory_tracker_parent = thread_tracker->getParent(); - - current_thread->untracked_memory = 0; - thread_tracker->setParent(new_tracker); - } - - ~UserMemoryTrackerSwitcher() - { - CurrentThread::flushUntrackedMemory(); - auto * thread_tracker = CurrentThread::getMemoryTracker(); - - current_thread->untracked_memory = prev_untracked_memory; - thread_tracker->setParent(prev_memory_tracker_parent); - } - - MemoryTracker * prev_memory_tracker_parent; - Int64 prev_untracked_memory; - }; - struct InsertData { struct Entry @@ -114,7 +91,7 @@ private: // so we need to switch current thread's MemoryTracker parent on each iteration. while (it != entries.end()) { - UserMemoryTrackerSwitcher switcher((*it)->user_memory_tracker); + MemoryTrackerSwitcher switcher((*it)->user_memory_tracker); it = entries.erase(it); } } diff --git a/src/Server/InterserverIOHTTPHandler.cpp b/src/Server/InterserverIOHTTPHandler.cpp index ea71d954cc0..9741592868a 100644 --- a/src/Server/InterserverIOHTTPHandler.cpp +++ b/src/Server/InterserverIOHTTPHandler.cpp @@ -80,6 +80,7 @@ void InterserverIOHTTPHandler::processQuery(HTTPServerRequest & request, HTTPSer void InterserverIOHTTPHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) { setThreadName("IntersrvHandler"); + ThreadStatus thread_status; /// In order to work keep-alive. if (request.getVersion() == HTTPServerRequest::HTTP_1_1) From aec720563612e3d7faa09bcb2c4b2cc4e5e8935c Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Mon, 3 Jul 2023 23:11:32 +0200 Subject: [PATCH 169/230] rework pool usage --- src/IO/HTTPCommon.cpp | 44 ++++++++----- src/IO/HTTPCommon.h | 12 ++++ src/IO/ReadBufferFromS3.cpp | 29 ++++++--- src/IO/ReadWriteBufferFromHTTP.cpp | 65 ++++++++----------- .../Formats/Impl/AvroRowInputFormat.cpp | 22 +++---- 5 files changed, 95 insertions(+), 77 deletions(-) diff --git a/src/IO/HTTPCommon.cpp b/src/IO/HTTPCommon.cpp index 1731b4022ea..2f5e0a172a0 100644 --- a/src/IO/HTTPCommon.cpp +++ b/src/IO/HTTPCommon.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #include #include @@ -41,6 +42,7 @@ namespace ErrorCodes extern const int RECEIVED_ERROR_TOO_MANY_REQUESTS; extern const int FEATURE_IS_NOT_ENABLED_AT_BUILD_TIME; extern const int UNSUPPORTED_URI_SCHEME; + extern const int LOGICAL_ERROR; } @@ -271,27 +273,17 @@ namespace auto retry_timeout = timeouts.connection_timeout.totalMicroseconds(); auto session = pool_ptr->second->get(retry_timeout); - /// We store exception messages in session data. - /// Poco HTTPSession also stores exception, but it can be removed at any time. const auto & session_data = session->sessionData(); - if (!session_data.empty()) + if (session_data.empty() || !Poco::AnyCast(&session_data)) { - auto msg = Poco::AnyCast(session_data); - if (!msg.empty()) - { - LOG_TRACE((&Poco::Logger::get("HTTPCommon")), "Failed communicating with {} with error '{}' will try to reconnect session", host, msg); + session->reset(); - if (resolve_host) - { - updateHostIfIpChanged(session, DNSResolver::instance().resolveHost(host).toString()); - } - } - /// Reset the message, once it has been printed, - /// otherwise you will get report for failed parts on and on, - /// even for different tables (since they uses the same session). - session->attachSessionData({}); + if (resolve_host) + updateHostIfIpChanged(session, DNSResolver::instance().resolveHost(host).toString()); } + session->attachSessionData({}); + setTimeouts(*session, timeouts); return session; @@ -388,4 +380,24 @@ Exception HTTPException::makeExceptionMessage( uri, static_cast(http_status), reason, body); } +void markSessionForReuse(Poco::Net::HTTPSession & session) +{ + const auto & session_data = session.sessionData(); + if (!session_data.empty() && !Poco::AnyCast(&session_data)) + throw Exception( + ErrorCodes::LOGICAL_ERROR, "Data of an unexpected type ({}) is attached to the session", session_data.type().name()); + + session.attachSessionData(HTTPSessionReuseTag{}); +} + +void markSessionForReuse(HTTPSessionPtr session) +{ + markSessionForReuse(*session); +} + +void markSessionForReuse(PooledHTTPSessionPtr session) +{ + markSessionForReuse(static_cast(*session)); +} + } diff --git a/src/IO/HTTPCommon.h b/src/IO/HTTPCommon.h index db8fc2a2a40..4733f366c8a 100644 --- a/src/IO/HTTPCommon.h +++ b/src/IO/HTTPCommon.h @@ -55,6 +55,18 @@ private: using PooledHTTPSessionPtr = PoolBase::Entry; // SingleEndpointHTTPSessionPool::Entry using HTTPSessionPtr = std::shared_ptr; +/// If a session have this tag attached, it will be reused without calling `reset()` on it. +/// All pooled sessions don't have this tag attached after being taken from a pool. +/// If the request and the response were fully written/read, the client code should add this tag +/// explicitly by calling `markSessionForReuse()`. +struct HTTPSessionReuseTag +{ +}; + +void markSessionForReuse(HTTPSessionPtr session); +void markSessionForReuse(PooledHTTPSessionPtr session); + + void setResponseDefaultHeaders(HTTPServerResponse & response, size_t keep_alive_timeout); /// Create session object to perform requests and set required parameters. diff --git a/src/IO/ReadBufferFromS3.cpp b/src/IO/ReadBufferFromS3.cpp index fdbe1a4ba57..5c562d32fbc 100644 --- a/src/IO/ReadBufferFromS3.cpp +++ b/src/IO/ReadBufferFromS3.cpp @@ -1,3 +1,4 @@ +#include #include #include "config.h" @@ -35,31 +36,41 @@ namespace ProfileEvents namespace { -void resetSession(Aws::S3::Model::GetObjectResult & read_result) +DB::PooledHTTPSessionPtr getSession(Aws::S3::Model::GetObjectResult & read_result) { if (auto * session_aware_stream = dynamic_cast *>(&read_result.GetBody())) - { - auto & session - = static_cast(*static_cast(session_aware_stream->getSession())); - session.reset(); - } + return static_cast(session_aware_stream->getSession()); else if (!dynamic_cast *>(&read_result.GetBody())) - { throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Session of unexpected type encountered"); + return {}; +} + +void resetSession(Aws::S3::Model::GetObjectResult & read_result) +{ + if (auto session = getSession(read_result); !session.isNull()) + { + auto & http_session = static_cast(*session); + http_session.reset(); } } void resetSessionIfNeeded(bool read_all_range_successfully, std::optional & read_result) { - if (!read_all_range_successfully && read_result) + if (!read_result) + return; + + if (!read_all_range_successfully) { /// When we abandon a session with an ongoing GetObject request and there is another one trying to delete the same object this delete /// operation will hang until GetObject's session idle timeouts. So we have to call `reset()` on GetObject's session session immediately. resetSession(*read_result); ProfileEvents::increment(ProfileEvents::ReadBufferFromS3ResetSessions); } - else + else if (auto session = getSession(*read_result); !session.isNull()) + { + DB::markSessionForReuse(session); ProfileEvents::increment(ProfileEvents::ReadBufferFromS3PreservedSessions); + } } } diff --git a/src/IO/ReadWriteBufferFromHTTP.cpp b/src/IO/ReadWriteBufferFromHTTP.cpp index cf1159bfb4b..b834c17ab6c 100644 --- a/src/IO/ReadWriteBufferFromHTTP.cpp +++ b/src/IO/ReadWriteBufferFromHTTP.cpp @@ -1,5 +1,7 @@ #include "ReadWriteBufferFromHTTP.h" +#include + namespace ProfileEvents { extern const Event ReadBufferSeekCancelConnection; @@ -146,30 +148,20 @@ std::istream * ReadWriteBufferFromHTTPBase::callImpl( LOG_TRACE(log, "Sending request to {}", uri_.toString()); auto sess = current_session->getSession(); - try - { - auto & stream_out = sess->sendRequest(request); + auto & stream_out = sess->sendRequest(request); - if (out_stream_callback) - out_stream_callback(stream_out); + if (out_stream_callback) + out_stream_callback(stream_out); - auto result_istr = receiveResponse(*sess, request, response, true); - response.getCookies(cookies); + auto result_istr = receiveResponse(*sess, request, response, true); + response.getCookies(cookies); - /// we can fetch object info while the request is being processed - /// and we don't want to override any context used by it - if (!for_object_info) - content_encoding = response.get("Content-Encoding", ""); + /// we can fetch object info while the request is being processed + /// and we don't want to override any context used by it + if (!for_object_info) + content_encoding = response.get("Content-Encoding", ""); - return result_istr; - } - catch (const Poco::Exception & e) - { - /// We use session data storage as storage for exception text - /// Depend on it we can deduce to reconnect session or reresolve session host - sess->attachSessionData(e.message()); - throw; - } + return result_istr; } template @@ -429,23 +421,10 @@ void ReadWriteBufferFromHTTPBase::initialize() if (!read_range.end && response.hasContentLength()) file_info = parseFileInfo(response, withPartialContent(read_range) ? getOffset() : 0); - try - { - impl = std::make_unique(*istr, buffer_size); + impl = std::make_unique(*istr, buffer_size); - if (use_external_buffer) - { - setupExternalBuffer(); - } - } - catch (const Poco::Exception & e) - { - /// We use session data storage as storage for exception text - /// Depend on it we can deduce to reconnect session or reresolve session host - auto sess = session->getSession(); - sess->attachSessionData(e.message()); - throw; - } + if (use_external_buffer) + setupExternalBuffer(); } template @@ -460,7 +439,11 @@ bool ReadWriteBufferFromHTTPBase::nextImpl() if ((read_range.end && getOffset() > read_range.end.value()) || (file_info && file_info->file_size && getOffset() >= file_info->file_size.value())) + { + /// Response was fully read. + markSessionForReuse(session->getSession()); return false; + } if (impl) { @@ -582,7 +565,11 @@ bool ReadWriteBufferFromHTTPBase::nextImpl() std::rethrow_exception(exception); if (!result) + { + /// Eof is reached, i.e response was fully read. + markSessionForReuse(session->getSession()); return false; + } internal_buffer = impl->buffer(); working_buffer = internal_buffer; @@ -635,12 +622,14 @@ size_t ReadWriteBufferFromHTTPBase::readBigAt(char * to, si bool cancelled; size_t r = copyFromIStreamWithProgressCallback(*result_istr, to, n, progress_callback, &cancelled); + if (!cancelled) + /// Response was fully read. + markSessionForReuse(sess); + return r; } catch (const Poco::Exception & e) { - sess->attachSessionData(e.message()); - LOG_ERROR( log, "HTTP request (positioned) to `{}` with range [{}, {}) failed at try {}/{}: {}", diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp index 1ec7491658e..4cd73cb23b5 100644 --- a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp @@ -935,23 +935,17 @@ private: request.setHost(url.getHost()); auto session = makePooledHTTPSession(url, timeouts, 1); - std::istream * response_body{}; - try - { - session->sendRequest(request); + session->sendRequest(request); + + Poco::Net::HTTPResponse response; + std::istream * response_body = receiveResponse(*session, request, response, false); - Poco::Net::HTTPResponse response; - response_body = receiveResponse(*session, request, response, false); - } - catch (const Poco::Exception & e) - { - /// We use session data storage as storage for exception text - /// Depend on it we can deduce to reconnect session or reresolve session host - session->attachSessionData(e.message()); - throw; - } Poco::JSON::Parser parser; auto json_body = parser.parse(*response_body).extract(); + + /// Response was fully read. + markSessionForReuse(session); + auto schema = json_body->getValue("schema"); LOG_TRACE((&Poco::Logger::get("AvroConfluentRowInputFormat")), "Successfully fetched schema id = {}\n{}", id, schema); return avro::compileJsonSchemaFromString(schema); From 5a6957d95e46861f39bdb1c39e442951b1e26d47 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 6 Jul 2023 13:02:03 +0000 Subject: [PATCH 170/230] Disable ThinLTO on non-Linux MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cross-compiling on Linux for Mac failed with CMake parameters -DCMAKE_BUILD_TYPE=None -DENABLE_CLICKHOUSE_SELF_EXTRACTING=1 -DENABLE_TESTS=0 (see below). This happened e.g. in #51243. The problem was that ThinLTO enabled/disabled depends on ENABLE_TESTS (see the top-level CMakeLists.txt). If ENABLE_TESTS=0 then ThinLTO is activated. On Linux, building/linking works with or without ThinLTO but on Mac building/linking the self-extracting compressor binary doesn’t work if ThinLTO is on. This is quite weird, as a workaround restrict ThinLTO to Linux. ------- [185/187] Linking CXX static library base/glibc-compatibility/libglibc-compatibility.a [186/187] Linking CXX static library contrib/zstd-cmake/lib_zstd.a [187/187] Linking CXX executable utils/self-extracting-executable/pre_compressor -- Configuring done -- Generating done -- Build files have been written to: /home/ubuntu/repo/ch4/build [0/2] Re-checking globbed directories... [108/108] Linking CXX executable utils/self-extracting-executable/pre_compressor FAILED: utils/self-extracting-executable/pre_compressor : && /usr/bin/clang++-16 --target=x86_64-apple-darwin -std=c++20 -fdiagnostics-color=always -Xclang -fuse-ctor-homing -Wno-enum-constexpr-conversion -fsized-deallocation -gdwarf-aranges -pipe -mssse3 -msse4.1 -msse4.2 -mpclmul -mpopcnt -fasynchronous-unwind-tables -ffile-prefix-map=/home/ubuntu/repo/ch4=. -falign-functions=32 -mbranches-within-32B-boundaries -stdlib=libc++ -fdiagnostics-absolute-paths -fstrict -vtable-pointers -Wall -Wextra -Wframe-larger-than=65536 -Weverything -Wpedantic -Wno-zero-length-array -Wno-c++98-compat-pedantic -Wno-c++98-compat -Wno-c++20-compat -Wno-sign-conversion -Wno-implicit-int-conversion -Wno-implicit-int-float-conversion -Wno-ctad-maybe-unsupported -Wno-disabled-macro-expansion -Wno-documentation-unknown-command -Wno-double-promotion -Wno-exit-time-destructors -Wno-float-equal -Wn o-global-constructors -Wno-missing-prototypes -Wno-missing-variable-declarations -Wno-padded -Wno-switch-enum -Wno-undefined-func-template -Wno-unused-template -Wno-vla -Wno-weak-template-vtables -Wno-weak-vtables -Wno-thread-safety-negative -Wno-enum-constexpr-conversion -Wno-unsafe-buffer-usage -O2 -g -DNDEBUG -O3 -g -gdwarf-4 -flto=thin -fwhole-program-vtables -isysroot /home/ubuntu/repo/ch4/cmake/darwin/.. /toolchain/darwin-x86_64 -mmacosx-version-min=10.15 -Wl,-headerpad_max_install_names --ld-path=/home/ubuntu/cctools/bin/x86_64-apple-darwin-ld -rdynamic -Wl,-U,_inside_main -flto=thin -fwhole-program-vtables utils/self-extracting-executable/CMakeFiles/pre_compressor.dir/compressor.cpp.o -o utils/self-extracting-executable/pre_compressor contrib/zstd-cmake/lib_zstd.a contrib/libcxx-cmake/libcxx.a contrib/lib cxxabi-cmake/libcxxabi.a -nodefaultlibs -lc -lm -lpthread -ldl && : clang: warning: argument unused during compilation: '-stdlib=libc++' [-Wunused-command-line-argument] ld: warning: ignoring file utils/self-extracting-executable/CMakeFiles/pre_compressor.dir/compressor.cpp.o, building for macOS-x86_64 but attempting to link with file built for unknown-unsupported file format ( 0xDE 0xC0 0x17 0x0B 0x00 0x00 0x00 0x00 0x14 0x00 0x00 0x00 0x88 0x3E 0x03 0x00 ) ld: warning: ignoring file contrib/zstd-cmake/lib_zstd.a, building for macOS-x86_64 but attempting to link with file built for macOS-x86_64 ld: warning: ignoring file contrib/libcxxabi-cmake/libcxxabi.a, building for macOS-x86_64 but attempting to link with file built for unknown-unsupported file format ( 0x21 0x3C 0x61 0x72 0x63 0x68 0x3E 0x0A 0x23 0x31 0x2F 0x31 0x32 0x20 0x20 0x20 ) ld: warning: ignoring file contrib/libcxx-cmake/libcxx.a, building for macOS-x86_64 but attempting to link with file built for unknown-unsupported file format ( 0x21 0x3C 0x61 0x72 0x63 0x68 0x3E 0x0A 0x23 0x31 0x2F 0x31 0x32 0x20 0x20 0x20 ) Undefined symbols for architecture x86_64: "_main", referenced from: implicit entry/start for main executable ld: symbol(s) not found for architecture x86_64 clang: error: linker command failed with exit code 1 (use -v to see invocation) ninja: build stopped: subcommand failed. --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5d6ed75bb29..06ee98b5ee1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -344,9 +344,9 @@ if (COMPILER_CLANG) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fdiagnostics-absolute-paths") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fdiagnostics-absolute-paths") - if (NOT ENABLE_TESTS AND NOT SANITIZE) + if (NOT ENABLE_TESTS AND NOT SANITIZE AND OS_LINUX) # https://clang.llvm.org/docs/ThinLTO.html - # Applies to clang only. + # Applies to clang and linux only. # Disabled when building with tests or sanitizers. option(ENABLE_THINLTO "Clang-specific link time optimization" ON) endif() From 6bbaade4a63524c4c1c4376e18d8fa1f3e3914a9 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 6 Jul 2023 13:15:38 +0200 Subject: [PATCH 171/230] Update sccache, do not fail on connection error --- docker/test/util/Dockerfile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docker/test/util/Dockerfile b/docker/test/util/Dockerfile index 85e888f1df7..b255a2cc23d 100644 --- a/docker/test/util/Dockerfile +++ b/docker/test/util/Dockerfile @@ -94,7 +94,10 @@ RUN mkdir /tmp/ccache \ && rm -rf /tmp/ccache ARG TARGETARCH -ARG SCCACHE_VERSION=v0.4.1 +ARG SCCACHE_VERSION=v0.5.4 +ENV SCCACHE_IGNORE_SERVER_IO_ERROR=1 +# sccache requires a value for the region. So by default we use The Default Region +ENV SCCACHE_REGION=us-east-1 RUN arch=${TARGETARCH:-amd64} \ && case $arch in \ amd64) rarch=x86_64 ;; \ From 7644f0b37c88cd924f20ecec4acc599e50491423 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 6 Jul 2023 14:44:06 +0000 Subject: [PATCH 172/230] Cosmetics: move code around --- src/IO/VarInt.h | 282 +++++++++++++++++++++++------------------------- 1 file changed, 132 insertions(+), 150 deletions(-) diff --git a/src/IO/VarInt.h b/src/IO/VarInt.h index f6441391c8f..a88347d68eb 100644 --- a/src/IO/VarInt.h +++ b/src/IO/VarInt.h @@ -12,24 +12,77 @@ namespace DB /// Variable-Length Quantity (VLQ) Base-128 compression, also known as Variable Byte (VB) or Varint encoding. -/// Write UInt64 in variable length format (base128) -void writeVarUInt(UInt64 x, std::ostream & ostr); -void writeVarUInt(UInt64 x, WriteBuffer & ostr); -char * writeVarUInt(UInt64 x, char * ostr); - -/// Read UInt64, written in variable length format (base128) -void readVarUInt(UInt64 & x, std::istream & istr); -void readVarUInt(UInt64 & x, ReadBuffer & istr); -const char * readVarUInt(UInt64 & x, const char * istr, size_t size); - -/// Get the length of an variable-length-encoded integer -size_t getLengthOfVarUInt(UInt64 x); -size_t getLengthOfVarInt(Int64 x); - [[noreturn]] void throwReadAfterEOF(); [[noreturn]] void throwValueTooLargeForVarIntEncoding(UInt64 x); -/// Write Int64 in variable length format (base128) + +/// NOTE: Due to historical reasons, only values up to 1<<63-1 can be safely encoded/decoded (bigger values are not idempotent under +/// encoding/decoding). This cannot be changed without breaking backward compatibility (some drivers, e.g. clickhouse-rs (Rust), have the +/// same limitation, others support the full 1<<64 range, e.g. clickhouse-driver (Python)) +constexpr UInt64 VAR_UINT_MAX = (1ULL<<63) - 1; + +inline void writeVarUInt(UInt64 x, WriteBuffer & ostr) +{ + if (x > VAR_UINT_MAX) [[unlikely]] + throwValueTooLargeForVarIntEncoding(x); + + for (size_t i = 0; i < 9; ++i) + { + uint8_t byte = x & 0x7F; + if (x > 0x7F) + byte |= 0x80; + + ostr.nextIfAtEnd(); + *ostr.position() = byte; + ++ostr.position(); + + x >>= 7; + if (!x) + return; + } +} + +inline void writeVarUInt(UInt64 x, std::ostream & ostr) +{ + if (x > VAR_UINT_MAX) [[unlikely]] + throwValueTooLargeForVarIntEncoding(x); + + for (size_t i = 0; i < 9; ++i) + { + uint8_t byte = x & 0x7F; + if (x > 0x7F) + byte |= 0x80; + + ostr.put(byte); + + x >>= 7; + if (!x) + return; + } +} + +inline char * writeVarUInt(UInt64 x, char * ostr) +{ + if (x > VAR_UINT_MAX) [[unlikely]] + throwValueTooLargeForVarIntEncoding(x); + + for (size_t i = 0; i < 9; ++i) + { + uint8_t byte = x & 0x7F; + if (x > 0x7F) + byte |= 0x80; + + *ostr = byte; + ++ostr; + + x >>= 7; + if (!x) + return ostr; + } + + return ostr; +} + template inline void writeVarInt(Int64 x, Out & ostr) { @@ -41,8 +94,71 @@ inline char * writeVarInt(Int64 x, char * ostr) return writeVarUInt(static_cast((x << 1) ^ (x >> 63)), ostr); } +namespace impl +{ + +template +inline void readVarUInt(UInt64 & x, ReadBuffer & istr) +{ + x = 0; + for (size_t i = 0; i < 9; ++i) + { + if constexpr (!fast) + if (istr.eof()) [[unlikely]] + throwReadAfterEOF(); + + UInt64 byte = *istr.position(); + ++istr.position(); + x |= (byte & 0x7F) << (7 * i); + + if (!(byte & 0x80)) + return; + } +} + +} + +inline void readVarUInt(UInt64 & x, ReadBuffer & istr) +{ + if (istr.buffer().end() - istr.position() >= 9) + return impl::readVarUInt(x, istr); + return impl::readVarUInt(x, istr); +} + +inline void readVarUInt(UInt64 & x, std::istream & istr) +{ + x = 0; + for (size_t i = 0; i < 9; ++i) + { + UInt64 byte = istr.get(); + x |= (byte & 0x7F) << (7 * i); + + if (!(byte & 0x80)) + return; + } +} + +inline const char * readVarUInt(UInt64 & x, const char * istr, size_t size) +{ + const char * end = istr + size; + + x = 0; + for (size_t i = 0; i < 9; ++i) + { + if (istr == end) [[unlikely]] + throwReadAfterEOF(); + + UInt64 byte = *istr; + ++istr; + x |= (byte & 0x7F) << (7 * i); + + if (!(byte & 0x80)) + return istr; + } + + return istr; +} -/// Read Int64, written in variable length format (base128) template inline void readVarInt(Int64 & x, In & istr) { @@ -57,9 +173,6 @@ inline const char * readVarInt(Int64 & x, const char * istr, size_t size) return res; } - -/// For [U]Int32, [U]Int16, size_t. - inline void readVarUInt(UInt32 & x, ReadBuffer & istr) { UInt64 tmp; @@ -97,137 +210,6 @@ inline void readVarUInt(T & x, ReadBuffer & istr) x = tmp; } -template -inline void readVarUIntImpl(UInt64 & x, ReadBuffer & istr) -{ - x = 0; - for (size_t i = 0; i < 9; ++i) - { - if constexpr (!fast) - if (istr.eof()) [[unlikely]] - throwReadAfterEOF(); - - UInt64 byte = *istr.position(); - ++istr.position(); - x |= (byte & 0x7F) << (7 * i); - - if (!(byte & 0x80)) - return; - } -} - -inline void readVarUInt(UInt64 & x, ReadBuffer & istr) -{ - if (istr.buffer().end() - istr.position() >= 9) - return readVarUIntImpl(x, istr); - return readVarUIntImpl(x, istr); -} - - -inline void readVarUInt(UInt64 & x, std::istream & istr) -{ - x = 0; - for (size_t i = 0; i < 9; ++i) - { - UInt64 byte = istr.get(); - x |= (byte & 0x7F) << (7 * i); - - if (!(byte & 0x80)) - return; - } -} - -inline const char * readVarUInt(UInt64 & x, const char * istr, size_t size) -{ - const char * end = istr + size; - - x = 0; - for (size_t i = 0; i < 9; ++i) - { - if (istr == end) [[unlikely]] - throwReadAfterEOF(); - - UInt64 byte = *istr; - ++istr; - x |= (byte & 0x7F) << (7 * i); - - if (!(byte & 0x80)) - return istr; - } - - return istr; -} - -/// NOTE: Due to historical reasons, only values up to 1<<63-1 can be safely encoded/decoded (bigger values are not idempotent under -/// encoding/decoding). This cannot be changed without breaking backward compatibility (some drivers, e.g. clickhouse-rs (Rust), have the -/// same limitation, others support the full 1<<64 range, e.g. clickhouse-driver (Python)) -constexpr UInt64 VAR_UINT_MAX = (1ULL<<63) - 1; - -inline void writeVarUInt(UInt64 x, WriteBuffer & ostr) -{ - if (x > VAR_UINT_MAX) [[unlikely]] - throwValueTooLargeForVarIntEncoding(x); - - for (size_t i = 0; i < 9; ++i) - { - uint8_t byte = x & 0x7F; - if (x > 0x7F) - byte |= 0x80; - - ostr.nextIfAtEnd(); - *ostr.position() = byte; - ++ostr.position(); - - x >>= 7; - if (!x) - return; - } -} - - -inline void writeVarUInt(UInt64 x, std::ostream & ostr) -{ - if (x > VAR_UINT_MAX) [[unlikely]] - throwValueTooLargeForVarIntEncoding(x); - - for (size_t i = 0; i < 9; ++i) - { - uint8_t byte = x & 0x7F; - if (x > 0x7F) - byte |= 0x80; - - ostr.put(byte); - - x >>= 7; - if (!x) - return; - } -} - - -inline char * writeVarUInt(UInt64 x, char * ostr) -{ - if (x > VAR_UINT_MAX) [[unlikely]] - throwValueTooLargeForVarIntEncoding(x); - - for (size_t i = 0; i < 9; ++i) - { - uint8_t byte = x & 0x7F; - if (x > 0x7F) - byte |= 0x80; - - *ostr = byte; - ++ostr; - - x >>= 7; - if (!x) - return ostr; - } - - return ostr; -} - - inline size_t getLengthOfVarUInt(UInt64 x) { return x < (1ULL << 7) ? 1 From 3f744c1e14ba7350c2dab4a8ccf145c26762f0c3 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 6 Jul 2023 14:47:40 +0000 Subject: [PATCH 173/230] Cosmetics: rename template parameter --- src/IO/VarInt.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/IO/VarInt.h b/src/IO/VarInt.h index a88347d68eb..9099b5e7f6a 100644 --- a/src/IO/VarInt.h +++ b/src/IO/VarInt.h @@ -97,13 +97,13 @@ inline char * writeVarInt(Int64 x, char * ostr) namespace impl { -template +template inline void readVarUInt(UInt64 & x, ReadBuffer & istr) { x = 0; for (size_t i = 0; i < 9; ++i) { - if constexpr (!fast) + if constexpr (check_eof) if (istr.eof()) [[unlikely]] throwReadAfterEOF(); @@ -121,8 +121,8 @@ inline void readVarUInt(UInt64 & x, ReadBuffer & istr) inline void readVarUInt(UInt64 & x, ReadBuffer & istr) { if (istr.buffer().end() - istr.position() >= 9) - return impl::readVarUInt(x, istr); - return impl::readVarUInt(x, istr); + return impl::readVarUInt(x, istr); + return impl::readVarUInt(x, istr); } inline void readVarUInt(UInt64 & x, std::istream & istr) From abf36065b7bbddeba2b80f76ad966a9167852089 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 6 Jul 2023 17:24:06 +0200 Subject: [PATCH 174/230] fix --- .../ReplicatedMergeTreePartCheckThread.cpp | 89 ++++++++++--------- .../ReplicatedMergeTreePartCheckThread.h | 4 +- src/Storages/StorageReplicatedMergeTree.cpp | 17 +++- .../__init__.py | 0 .../configs/testkeeper.xml | 6 -- .../test.py | 65 -------------- .../02254_projection_broken_part.reference | 6 ++ .../02254_projection_broken_part.sh | 44 +++++++++ 8 files changed, 115 insertions(+), 116 deletions(-) delete mode 100644 tests/integration/test_projection_report_broken_part/__init__.py delete mode 100644 tests/integration/test_projection_report_broken_part/configs/testkeeper.xml delete mode 100644 tests/integration/test_projection_report_broken_part/test.py create mode 100644 tests/queries/0_stateless/02254_projection_broken_part.reference create mode 100755 tests/queries/0_stateless/02254_projection_broken_part.sh diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp index 1cc3736bd2e..ffe3f883f80 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp @@ -63,6 +63,7 @@ void ReplicatedMergeTreePartCheckThread::enqueuePart(const String & name, time_t if (parts_set.contains(name)) return; + LOG_TRACE(log, "Enqueueing {} for check after after {}s", name, delay_to_check_seconds); parts_queue.emplace_back(name, time(nullptr) + delay_to_check_seconds); parts_set.insert(name); task->schedule(); @@ -423,7 +424,7 @@ ReplicatedCheckResult ReplicatedMergeTreePartCheckThread::checkPartImpl(const St } -CheckResult ReplicatedMergeTreePartCheckThread::checkPartAndFix(const String & part_name) +CheckResult ReplicatedMergeTreePartCheckThread::checkPartAndFix(const String & part_name, std::optional * recheck_after) { LOG_INFO(log, "Checking part {}", part_name); ProfileEvents::increment(ProfileEvents::ReplicatedPartChecks); @@ -438,7 +439,11 @@ CheckResult ReplicatedMergeTreePartCheckThread::checkPartAndFix(const String & p break; case ReplicatedCheckResult::RecheckLater: - enqueuePart(part_name, result.recheck_after); + /// NOTE We cannot enqueue it from the check thread itself + if (recheck_after) + *recheck_after = result.recheck_after; + else + enqueuePart(part_name, result.recheck_after); break; case ReplicatedCheckResult::DetachUnexpected: @@ -471,10 +476,22 @@ CheckResult ReplicatedMergeTreePartCheckThread::checkPartAndFix(const String & p /// Part is not in ZooKeeper and not on disk (so there's nothing to detach or remove from ZooKeeper). /// Probably we cannot execute some entry from the replication queue (so don't need to enqueue another one). - /// Either all replicas having the part are not active, or the part is lost forever. + /// Either all replicas having the part are not active... bool found_something = searchForMissingPartOnOtherReplicas(part_name); - if (!found_something) - onPartIsLostForever(part_name); + if (found_something) + break; + + /// ... or the part is lost forever + bool handled_lost_part = onPartIsLostForever(part_name); + if (handled_lost_part) + break; + + /// We failed to create empty part, need retry + constexpr time_t retry_after_seconds = 30; + if (recheck_after) + *recheck_after = retry_after_seconds; + else + enqueuePart(part_name, retry_after_seconds); break; } @@ -483,7 +500,7 @@ CheckResult ReplicatedMergeTreePartCheckThread::checkPartAndFix(const String & p return result.status; } -void ReplicatedMergeTreePartCheckThread::onPartIsLostForever(const String & part_name) +bool ReplicatedMergeTreePartCheckThread::onPartIsLostForever(const String & part_name) { auto lost_part_info = MergeTreePartInfo::fromPartName(part_name, storage.format_version); if (lost_part_info.level != 0 || lost_part_info.mutation != 0) @@ -499,7 +516,7 @@ void ReplicatedMergeTreePartCheckThread::onPartIsLostForever(const String & part for (const String & source_part_name : source_parts) enqueuePart(source_part_name); - return; + return true; } } @@ -512,13 +529,11 @@ void ReplicatedMergeTreePartCheckThread::onPartIsLostForever(const String & part */ LOG_ERROR(log, "Part {} is lost forever.", part_name); ProfileEvents::increment(ProfileEvents::ReplicatedDataLoss); + return true; } - else - { - LOG_WARNING(log, "Cannot create empty part {} instead of lost. Will retry later", part_name); - constexpr time_t retry_after_seconds = 30; - enqueuePart(part_name, retry_after_seconds); - } + + LOG_WARNING(log, "Cannot create empty part {} instead of lost. Will retry later", part_name); + return false; } @@ -533,42 +548,29 @@ void ReplicatedMergeTreePartCheckThread::run() /// Take part from the queue for verification. PartsToCheckQueue::iterator selected = parts_queue.end(); /// end from std::list is not get invalidated - time_t min_check_time = std::numeric_limits::max(); { std::lock_guard lock(parts_mutex); - if (parts_queue.empty()) + if (parts_queue.empty() && !parts_set.empty()) { - if (!parts_set.empty()) - { - parts_set.clear(); - throw Exception(ErrorCodes::LOGICAL_ERROR, "Non-empty parts_set with empty parts_queue. This is a bug."); - } + parts_set.clear(); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Non-empty parts_set with empty parts_queue. This is a bug."); } - else - { - for (auto it = parts_queue.begin(); it != parts_queue.end(); ++it) - { - if (it->second <= current_time) - { - selected = it; - break; - } - if (it->second < min_check_time) - { - min_check_time = it->second; - selected = it; - } - } - } + selected = std::find_if(parts_queue.begin(), parts_queue.end(), [current_time](const auto & elem) + { + return elem.second <= current_time; + }); + if (selected == parts_queue.end()) + return; + + /// Move selected part to the end of the queue + parts_queue.splice(parts_queue.end(), parts_queue, selected); } - if (selected == parts_queue.end()) - return; - - checkPartAndFix(selected->first); + std::optional recheck_after; + checkPartAndFix(selected->first, &recheck_after); if (need_stop) return; @@ -581,6 +583,11 @@ void ReplicatedMergeTreePartCheckThread::run() { throw Exception(ErrorCodes::LOGICAL_ERROR, "Someone erased checking part from parts_queue. This is a bug."); } + else if (recheck_after.has_value()) + { + LOG_TRACE(log, "Will recheck part {} after after {}s", selected->first, *recheck_after); + selected->second = time(nullptr) + *recheck_after; + } else { parts_set.erase(selected->first); @@ -596,7 +603,7 @@ void ReplicatedMergeTreePartCheckThread::run() { tryLogCurrentException(log, __PRETTY_FUNCTION__); - if (e.code == Coordination::Error::ZSESSIONEXPIRED) + if (Coordination::isHardwareError(e.code)) return; task->scheduleAfter(PART_CHECK_ERROR_SLEEP_MS); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h index 0a8fbc75c05..fc76cbad4ed 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h @@ -65,7 +65,7 @@ public: size_t size() const; /// Check part by name - CheckResult checkPartAndFix(const String & part_name); + CheckResult checkPartAndFix(const String & part_name, std::optional * recheck_after = nullptr); ReplicatedCheckResult checkPartImpl(const String & part_name); @@ -77,7 +77,7 @@ public: private: void run(); - void onPartIsLostForever(const String & part_name); + bool onPartIsLostForever(const String & part_name); std::pair findLocalPart(const String & part_name); diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index ff319e47946..e8176ac1d5f 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -3366,6 +3366,10 @@ bool StorageReplicatedMergeTree::canExecuteFetch(const ReplicatedMergeTreeLogEnt { disable_reason = fmt::format("Not executing fetch of part {} because we still have broken part with that name. " "Waiting for the broken part to be removed first.", entry.new_part_name); + + constexpr time_t min_interval_to_wakeup_cleanup_s = 30; + if (entry.last_postpone_time + min_interval_to_wakeup_cleanup_s < time(nullptr)) + const_cast(this)->cleanup_thread.wakeup(); return false; } } @@ -3753,11 +3757,13 @@ void StorageReplicatedMergeTree::removePartAndEnqueueFetch(const String & part_n DataPartPtr broken_part; auto outdate_broken_part = [this, &broken_part]() { - if (broken_part) + if (!broken_part) return; DataPartsLock lock = lockParts(); if (broken_part->getState() == DataPartState::Active) removePartsFromWorkingSet(NO_TRANSACTION_RAW, {broken_part}, true, &lock); + broken_part.reset(); + cleanup_thread.wakeup(); }; /// We don't know exactly what happened to broken part @@ -3767,6 +3773,7 @@ void StorageReplicatedMergeTree::removePartAndEnqueueFetch(const String & part_n auto partition_range = getDataPartsVectorInPartitionForInternalUsage({MergeTreeDataPartState::Active, MergeTreeDataPartState::Outdated}, broken_part_info.partition_id); + Strings detached_parts; for (const auto & part : partition_range) { if (!broken_part_info.contains(part->info)) @@ -3784,7 +3791,9 @@ void StorageReplicatedMergeTree::removePartAndEnqueueFetch(const String & part_n { part->makeCloneInDetached("covered-by-broken", getInMemoryMetadataPtr()); } + detached_parts.push_back(part->name); } + LOG_WARNING(log, "Detached {} parts covered by broken part {}: {}", detached_parts.size(), part_name, fmt::join(detached_parts, ", ")); ThreadFuzzer::maybeInjectSleep(); ThreadFuzzer::maybeInjectMemoryLimitException(); @@ -3873,10 +3882,14 @@ void StorageReplicatedMergeTree::removePartAndEnqueueFetch(const String & part_n zkutil::KeeperMultiException::check(rc, ops, results); + String path_created = dynamic_cast(*results.back()).path_created; + log_entry->znode_name = path_created.substr(path_created.find_last_of('/') + 1); + LOG_DEBUG(log, "Created entry {} to fetch missing part {}", log_entry->znode_name, part_name); + queue.insert(zookeeper, log_entry); + /// Make the part outdated after creating the log entry. /// Otherwise, if we failed to create the entry, cleanup thread could remove the part from ZooKeeper (leading to diverged replicas) outdate_broken_part(); - queue_updating_task->schedule(); return; } } diff --git a/tests/integration/test_projection_report_broken_part/__init__.py b/tests/integration/test_projection_report_broken_part/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/integration/test_projection_report_broken_part/configs/testkeeper.xml b/tests/integration/test_projection_report_broken_part/configs/testkeeper.xml deleted file mode 100644 index 617371b13fa..00000000000 --- a/tests/integration/test_projection_report_broken_part/configs/testkeeper.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - testkeeper - - diff --git a/tests/integration/test_projection_report_broken_part/test.py b/tests/integration/test_projection_report_broken_part/test.py deleted file mode 100644 index f376adf4f1a..00000000000 --- a/tests/integration/test_projection_report_broken_part/test.py +++ /dev/null @@ -1,65 +0,0 @@ -# pylint: disable=unused-argument -# pylint: disable=redefined-outer-name -# pylint: disable=line-too-long - -import pytest -import time - -from helpers.client import QueryRuntimeException -from helpers.cluster import ClickHouseCluster - -cluster = ClickHouseCluster(__file__) -node = cluster.add_instance( - "node", - main_configs=[ - "configs/testkeeper.xml", - ], -) - - -@pytest.fixture(scope="module", autouse=True) -def start_cluster(): - try: - cluster.start() - yield cluster - finally: - cluster.shutdown() - - -def test_projection_broken_part(): - node.query( - """ - create table test_projection_broken_parts_1 (a int, b int, projection ab (select a, sum(b) group by a)) - engine = ReplicatedMergeTree('/clickhouse-tables/test_projection_broken_parts', 'r1') - order by a settings index_granularity = 1; - - create table test_projection_broken_parts_2 (a int, b int, projection ab (select a, sum(b) group by a)) - engine ReplicatedMergeTree('/clickhouse-tables/test_projection_broken_parts', 'r2') - order by a settings index_granularity = 1; - - insert into test_projection_broken_parts_1 values (1, 1), (1, 2), (1, 3); - - system sync replica test_projection_broken_parts_2; - """ - ) - - # break projection part - node.exec_in_container( - [ - "bash", - "-c", - "rm /var/lib/clickhouse/data/default/test_projection_broken_parts_1/all_0_0_0/ab.proj/data.bin", - ] - ) - - expected_error = "No such file or directory" - assert expected_error in node.query_and_get_error( - "select sum(b) from test_projection_broken_parts_1 group by a" - ) - - time.sleep(2) - - assert ( - int(node.query("select sum(b) from test_projection_broken_parts_1 group by a")) - == 6 - ) diff --git a/tests/queries/0_stateless/02254_projection_broken_part.reference b/tests/queries/0_stateless/02254_projection_broken_part.reference new file mode 100644 index 00000000000..68538fd31ea --- /dev/null +++ b/tests/queries/0_stateless/02254_projection_broken_part.reference @@ -0,0 +1,6 @@ +1 1 1 all_0_0_0 +1 1 2 all_0_0_0 +1 1 3 all_0_0_0 +2 6 +0 +5 6 diff --git a/tests/queries/0_stateless/02254_projection_broken_part.sh b/tests/queries/0_stateless/02254_projection_broken_part.sh new file mode 100755 index 00000000000..d276c67f8de --- /dev/null +++ b/tests/queries/0_stateless/02254_projection_broken_part.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +# Tags: long, zookeeper + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "drop table if exists projection_broken_parts_1 sync;" +$CLICKHOUSE_CLIENT -q "drop table if exists projection_broken_parts_1 sync;" + +$CLICKHOUSE_CLIENT -q "create table projection_broken_parts_1 (a int, b int, projection ab (select a, sum(b) group by a)) + engine = ReplicatedMergeTree('/test/02369/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/rmt', 'r1') + order by a settings index_granularity = 1;" + +$CLICKHOUSE_CLIENT -q "create table projection_broken_parts_2 (a int, b int, projection ab (select a, sum(b) group by a)) + engine = ReplicatedMergeTree('/test/02369/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/rmt', 'r2') + order by a settings index_granularity = 1;" + +$CLICKHOUSE_CLIENT --insert_keeper_fault_injection_probability=0 -q "insert into projection_broken_parts_1 values (1, 1), (1, 2), (1, 3);" +$CLICKHOUSE_CLIENT -q "system sync replica projection_broken_parts_2;" +$CLICKHOUSE_CLIENT -q "select 1, *, _part from projection_broken_parts_2 order by b;" +$CLICKHOUSE_CLIENT -q "select 2, sum(b) from projection_broken_parts_2 group by a;" + +path=$($CLICKHOUSE_CLIENT -q "select path from system.parts where database='$CLICKHOUSE_DATABASE' and table='projection_broken_parts_1' and name='all_0_0_0'") +# ensure that path is absolute before removing +$CLICKHOUSE_CLIENT -q "select throwIf(substring('$path', 1, 1) != '/', 'Path is relative: $path')" || exit +rm -f "$path/ab.proj/data.bin" + +$CLICKHOUSE_CLIENT -q "select 3, sum(b) from projection_broken_parts_1 group by a;" 2>/dev/null + +num_tries=0 +while ! $CLICKHOUSE_CLIENT -q "select 4, sum(b) from projection_broken_parts_1 group by a format Null;" 2>/dev/null; do + sleep 1; + num_tries=$((num_tries+1)) + if [ $num_tries -eq 60 ]; then + break + fi +done + +$CLICKHOUSE_CLIENT -q "system sync replica projection_broken_parts_1;" +$CLICKHOUSE_CLIENT -q "select 5, sum(b) from projection_broken_parts_1 group by a;" + +$CLICKHOUSE_CLIENT -q "drop table if exists projection_broken_parts_1 sync;" +$CLICKHOUSE_CLIENT -q "drop table if exists projection_broken_parts_1 sync;" From 63b9c1ac0670947b49a916b5b6e47cab1dd1d3d0 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Thu, 6 Jul 2023 18:58:13 +0200 Subject: [PATCH 175/230] add test --- src/Common/ProfileEvents.cpp | 2 ++ src/IO/ReadWriteBufferFromHTTP.cpp | 6 ++++ ...ing_from_s3_with_connection_pool.reference | 1 + ...89_reading_from_s3_with_connection_pool.sh | 32 ++++++++++++++++++- 4 files changed, 40 insertions(+), 1 deletion(-) diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index c9030070bf2..3bee12731aa 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -371,6 +371,8 @@ The server successfully detected this situation and will download merged part fr M(ReadBufferFromS3ResetSessions, "Number of HTTP sessions that were reset in ReadBufferFromS3.") \ M(ReadBufferFromS3PreservedSessions, "Number of HTTP sessions that were preserved in ReadBufferFromS3.") \ \ + M(ReadWriteBufferFromHTTPPreservedSessions, "Number of HTTP sessions that were preserved in ReadWriteBufferFromHTTP.") \ + \ M(WriteBufferFromS3Microseconds, "Time spent on writing to S3.") \ M(WriteBufferFromS3Bytes, "Bytes written to S3.") \ M(WriteBufferFromS3RequestsErrors, "Number of exceptions while writing to S3.") \ diff --git a/src/IO/ReadWriteBufferFromHTTP.cpp b/src/IO/ReadWriteBufferFromHTTP.cpp index b834c17ab6c..6d1c0f7aafa 100644 --- a/src/IO/ReadWriteBufferFromHTTP.cpp +++ b/src/IO/ReadWriteBufferFromHTTP.cpp @@ -5,6 +5,7 @@ namespace ProfileEvents { extern const Event ReadBufferSeekCancelConnection; +extern const Event ReadWriteBufferFromHTTPPreservedSessions; } namespace DB @@ -442,6 +443,7 @@ bool ReadWriteBufferFromHTTPBase::nextImpl() { /// Response was fully read. markSessionForReuse(session->getSession()); + ProfileEvents::increment(ProfileEvents::ReadWriteBufferFromHTTPPreservedSessions); return false; } @@ -568,6 +570,7 @@ bool ReadWriteBufferFromHTTPBase::nextImpl() { /// Eof is reached, i.e response was fully read. markSessionForReuse(session->getSession()); + ProfileEvents::increment(ProfileEvents::ReadWriteBufferFromHTTPPreservedSessions); return false; } @@ -623,8 +626,11 @@ size_t ReadWriteBufferFromHTTPBase::readBigAt(char * to, si size_t r = copyFromIStreamWithProgressCallback(*result_istr, to, n, progress_callback, &cancelled); if (!cancelled) + { /// Response was fully read. markSessionForReuse(sess); + ProfileEvents::increment(ProfileEvents::ReadWriteBufferFromHTTPPreservedSessions); + } return r; } diff --git a/tests/queries/0_stateless/02789_reading_from_s3_with_connection_pool.reference b/tests/queries/0_stateless/02789_reading_from_s3_with_connection_pool.reference index d00491fd7e5..6ed281c757a 100644 --- a/tests/queries/0_stateless/02789_reading_from_s3_with_connection_pool.reference +++ b/tests/queries/0_stateless/02789_reading_from_s3_with_connection_pool.reference @@ -1 +1,2 @@ 1 +1 diff --git a/tests/queries/0_stateless/02789_reading_from_s3_with_connection_pool.sh b/tests/queries/0_stateless/02789_reading_from_s3_with_connection_pool.sh index 7a8b94a10a8..ce90157d004 100755 --- a/tests/queries/0_stateless/02789_reading_from_s3_with_connection_pool.sh +++ b/tests/queries/0_stateless/02789_reading_from_s3_with_connection_pool.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-random-settings +# Tags: no-fasttest, no-random-settings, no-replicated-database CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -27,3 +27,33 @@ WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND query_id='$query_id'; " + + +# Test connection pool in ReadWriteBufferFromHTTP + +query_id=$(${CLICKHOUSE_CLIENT} -nq " +create table mut (n int, m int, k int) engine=ReplicatedMergeTree('/test/02441/{database}/mut', '1') order by n; +set insert_keeper_fault_injection_probability=0; +insert into mut values (1, 2, 3), (10, 20, 30); + +system stop merges mut; +alter table mut delete where n = 10; + +select queryID() from( + -- a funny way to wait for a MUTATE_PART to be assigned + select sleepEachRow(2) from url('http://localhost:8123/?param_tries={1..10}&query=' || encodeURLComponent( + 'select 1 where ''MUTATE_PART'' not in (select type from system.replication_queue where database=''' || currentDatabase() || ''' and table=''mut'')' + ), 'LineAsString', 's String') + -- queryID() will be returned for each row, since the query above doesn't return anything we need to return a fake row + union all + select 1 +) limit 1 settings max_threads=1; +" 2>&1) +${CLICKHOUSE_CLIENT} --query "SYSTEM FLUSH LOGS" +${CLICKHOUSE_CLIENT} -nm --query " +SELECT ProfileEvents['ReadWriteBufferFromHTTPPreservedSessions'] > 0 +FROM system.query_log +WHERE type = 'QueryFinish' + AND current_database = currentDatabase() + AND query_id='$query_id'; +" From 58793816a73b7b17eb72c35f0266276bc40507b4 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 6 Jul 2023 19:04:34 +0200 Subject: [PATCH 176/230] fix paranoid check --- src/Storages/MergeTree/MergeTreeData.cpp | 2 +- .../MergeTree/ReplicatedMergeTreeQueue.cpp | 3 ++ src/Storages/StorageReplicatedMergeTree.cpp | 15 ++++--- src/Storages/StorageReplicatedMergeTree.h | 2 + .../02254_projection_broken_part.sh | 4 +- ...2255_broken_parts_chain_on_start.reference | 8 ++++ .../02255_broken_parts_chain_on_start.sh | 43 +++++++++++++++++++ 7 files changed, 68 insertions(+), 9 deletions(-) create mode 100644 tests/queries/0_stateless/02255_broken_parts_chain_on_start.reference create mode 100755 tests/queries/0_stateless/02255_broken_parts_chain_on_start.sh diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 4dc3583c706..b7fde55880e 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -2651,7 +2651,7 @@ size_t MergeTreeData::clearOldBrokenPartsFromDetachedDirectory() for (auto & [old_name, new_name, disk] : renamed_parts.old_and_new_names) { removeDetachedPart(disk, fs::path(relative_data_path) / "detached" / new_name / "", old_name); - LOG_DEBUG(log, "Removed broken detached part {} due to a timeout for broken detached parts", old_name); + LOG_WARNING(log, "Removed broken detached part {} due to a timeout for broken detached parts", old_name); old_name.clear(); } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 792843cbe18..07f46c07466 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -218,6 +218,9 @@ void ReplicatedMergeTreeQueue::createLogEntriesToFetchBrokenParts() for (const auto & broken_part_name : broken_parts) storage.removePartAndEnqueueFetch(broken_part_name, /* storage_init = */true); + Strings parts_in_zk = storage.getZooKeeper()->getChildren(replica_path + "/parts"); + storage.paranoidCheckForCoveredPartsInZooKeeperOnStart(parts_in_zk, {}); + std::lock_guard lock(state_mutex); /// broken_parts_to_enqueue_fetches_on_loading can be assigned only once on table startup, /// so actually no race conditions are possible diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index e8176ac1d5f..2da18f69baf 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -1260,8 +1260,7 @@ static time_t tryGetPartCreateTime(zkutil::ZooKeeperPtr & zookeeper, const Strin return res; } -static void paranoidCheckForCoveredPartsInZooKeeperOnStart(const StorageReplicatedMergeTree * storage, const Strings & parts_in_zk, - MergeTreeDataFormatVersion format_version, Poco::Logger * log) +void StorageReplicatedMergeTree::paranoidCheckForCoveredPartsInZooKeeperOnStart(const Strings & parts_in_zk, const Strings & parts_to_fetch) const { #ifdef ABORT_ON_LOGICAL_ERROR constexpr bool paranoid_check_for_covered_parts_default = true; @@ -1275,15 +1274,15 @@ static void paranoidCheckForCoveredPartsInZooKeeperOnStart(const StorageReplicat return; /// FIXME https://github.com/ClickHouse/ClickHouse/issues/51182 - if (storage->getSettings()->use_metadata_cache) + if (getSettings()->use_metadata_cache) return; ActiveDataPartSet active_set(format_version); for (const auto & part_name : parts_in_zk) active_set.add(part_name); - const auto disks = storage->getStoragePolicy()->getDisks(); - auto path = storage->getRelativeDataPath(); + const auto disks = getStoragePolicy()->getDisks(); + auto path = getRelativeDataPath(); for (const auto & part_name : parts_in_zk) { @@ -1296,6 +1295,9 @@ static void paranoidCheckForCoveredPartsInZooKeeperOnStart(const StorageReplicat if (disk->exists(fs::path(path) / part_name)) found = true; + if (!found) + found = std::find(parts_to_fetch.begin(), parts_to_fetch.end(), part_name) != parts_to_fetch.end(); + if (!found) { LOG_WARNING(log, "Part {} exists in ZooKeeper and covered by another part in ZooKeeper ({}), but doesn't exist on any disk. " @@ -1310,7 +1312,6 @@ void StorageReplicatedMergeTree::checkParts(bool skip_sanity_checks) auto zookeeper = getZooKeeper(); Strings expected_parts_vec = zookeeper->getChildren(fs::path(replica_path) / "parts"); - paranoidCheckForCoveredPartsInZooKeeperOnStart(this, expected_parts_vec, format_version, log); /// Parts in ZK. NameSet expected_parts(expected_parts_vec.begin(), expected_parts_vec.end()); @@ -1345,6 +1346,8 @@ void StorageReplicatedMergeTree::checkParts(bool skip_sanity_checks) if (!getActiveContainingPart(missing_name)) parts_to_fetch.push_back(missing_name); + paranoidCheckForCoveredPartsInZooKeeperOnStart(expected_parts_vec, parts_to_fetch); + /** To check the adequacy, for the parts that are in the FS, but not in ZK, we will only consider not the most recent parts. * Because unexpected new parts usually arise only because they did not have time to enroll in ZK with a rough restart of the server. * It also occurs from deduplicated parts that did not have time to retire. diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index bdd3f0da5bf..72a022fce26 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -584,6 +584,8 @@ private: void forcefullyRemoveBrokenOutdatedPartFromZooKeeperBeforeDetaching(const String & part_name) override; + void paranoidCheckForCoveredPartsInZooKeeperOnStart(const Strings & parts_in_zk, const Strings & parts_to_fetch) const; + /// Removes a part from ZooKeeper and adds a task to the queue to download it. It is supposed to do this with broken parts. void removePartAndEnqueueFetch(const String & part_name, bool storage_init); diff --git a/tests/queries/0_stateless/02254_projection_broken_part.sh b/tests/queries/0_stateless/02254_projection_broken_part.sh index d276c67f8de..6ba5093f234 100755 --- a/tests/queries/0_stateless/02254_projection_broken_part.sh +++ b/tests/queries/0_stateless/02254_projection_broken_part.sh @@ -9,11 +9,11 @@ $CLICKHOUSE_CLIENT -q "drop table if exists projection_broken_parts_1 sync;" $CLICKHOUSE_CLIENT -q "drop table if exists projection_broken_parts_1 sync;" $CLICKHOUSE_CLIENT -q "create table projection_broken_parts_1 (a int, b int, projection ab (select a, sum(b) group by a)) - engine = ReplicatedMergeTree('/test/02369/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/rmt', 'r1') + engine = ReplicatedMergeTree('/test/02254/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/rmt', 'r1') order by a settings index_granularity = 1;" $CLICKHOUSE_CLIENT -q "create table projection_broken_parts_2 (a int, b int, projection ab (select a, sum(b) group by a)) - engine = ReplicatedMergeTree('/test/02369/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/rmt', 'r2') + engine = ReplicatedMergeTree('/test/02254/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/rmt', 'r2') order by a settings index_granularity = 1;" $CLICKHOUSE_CLIENT --insert_keeper_fault_injection_probability=0 -q "insert into projection_broken_parts_1 values (1, 1), (1, 2), (1, 3);" diff --git a/tests/queries/0_stateless/02255_broken_parts_chain_on_start.reference b/tests/queries/0_stateless/02255_broken_parts_chain_on_start.reference new file mode 100644 index 00000000000..d55cb5baf93 --- /dev/null +++ b/tests/queries/0_stateless/02255_broken_parts_chain_on_start.reference @@ -0,0 +1,8 @@ +1 1 10 all_0_0_0_1 +1 1 20 all_0_0_0_1 +1 1 30 all_0_0_0_1 +0 +0 +1 1 10 all_0_0_0_1 +1 1 20 all_0_0_0_1 +1 1 30 all_0_0_0_1 diff --git a/tests/queries/0_stateless/02255_broken_parts_chain_on_start.sh b/tests/queries/0_stateless/02255_broken_parts_chain_on_start.sh new file mode 100755 index 00000000000..de260937b9c --- /dev/null +++ b/tests/queries/0_stateless/02255_broken_parts_chain_on_start.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash +# Tags: long, zookeeper + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "drop table if exists rmt1 sync;" +$CLICKHOUSE_CLIENT -q "drop table if exists rmt2 sync;" + +$CLICKHOUSE_CLIENT -q "create table rmt1 (a int, b int) + engine = ReplicatedMergeTree('/test/02255/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/rmt', 'r1') order by a settings old_parts_lifetime=100500;" + +$CLICKHOUSE_CLIENT -q "create table rmt2 (a int, b int) + engine = ReplicatedMergeTree('/test/02255/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/rmt', 'r2') order by a settings old_parts_lifetime=100500;" + +$CLICKHOUSE_CLIENT --insert_keeper_fault_injection_probability=0 -q "insert into rmt1 values (1, 1), (1, 2), (1, 3);" +$CLICKHOUSE_CLIENT -q "alter table rmt1 update b = b*10 where 1 settings mutations_sync=1" +$CLICKHOUSE_CLIENT -q "system sync replica rmt2;" +$CLICKHOUSE_CLIENT -q "select 1, *, _part from rmt2 order by b;" + +path=$($CLICKHOUSE_CLIENT -q "select path from system.parts where database='$CLICKHOUSE_DATABASE' and table='rmt1' and name='all_0_0_0'") +# ensure that path is absolute before removing +$CLICKHOUSE_CLIENT -q "select throwIf(substring('$path', 1, 1) != '/', 'Path is relative: $path')" || exit +rm -f "$path/data.bin" + +path=$($CLICKHOUSE_CLIENT -q "select path from system.parts where database='$CLICKHOUSE_DATABASE' and table='rmt1' and name='all_0_0_0_1'") +# ensure that path is absolute before removing +$CLICKHOUSE_CLIENT -q "select throwIf(substring('$path', 1, 1) != '/', 'Path is relative: $path')" || exit +rm -f "$path/data.bin" + +$CLICKHOUSE_CLIENT -q "detach table rmt1 sync" +$CLICKHOUSE_CLIENT -q "attach table rmt1" 2>/dev/null + +$CLICKHOUSE_CLIENT -q "system sync replica rmt1;" +$CLICKHOUSE_CLIENT -q "select 1, *, _part from rmt1 order by b;" + +$CLICKHOUSE_CLIENT -q "truncate table rmt1" + +$CLICKHOUSE_CLIENT -q "SELECT table, lost_part_count FROM system.replicas WHERE database=currentDatabase() AND lost_part_count!=0"; + +$CLICKHOUSE_CLIENT -q "drop table if exists projection_broken_parts_1 sync;" +$CLICKHOUSE_CLIENT -q "drop table if exists projection_broken_parts_1 sync;" From ec5e26a017c39eb4d76a1b07e4083cc53a225a5d Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 6 Jul 2023 21:08:53 +0200 Subject: [PATCH 177/230] Pin rust nightly (to make it stable) Because of using Rust nightly, and without #49601 the Rust toolchain is very unstable, and can be frequently failed. So let's ping particular version. Also I've looked and it seems that Rust archives stores this archive without any TTL, since there is even a version for 2015 year. Follow-up for: #50541 Signed-off-by: Azat Khuzhin --- docker/packager/binary/Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index e824161a688..897bcd24d04 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -49,8 +49,8 @@ ENV CARGO_HOME=/rust/cargo ENV PATH="/rust/cargo/bin:${PATH}" RUN curl https://sh.rustup.rs -sSf | bash -s -- -y && \ chmod 777 -R /rust && \ - rustup toolchain install nightly && \ - rustup default nightly && \ + rustup toolchain install nightly-2023-07-04 && \ + rustup default nightly-2023-07-04 && \ rustup component add rust-src && \ rustup target add aarch64-unknown-linux-gnu && \ rustup target add x86_64-apple-darwin && \ From c1fa38ea8ed98123a780f2a35c41b8eaf85e2ec0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 6 Jul 2023 22:22:36 +0200 Subject: [PATCH 178/230] Add RISC-V 64 to the universal installer --- docs/_includes/install/universal.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/_includes/install/universal.sh b/docs/_includes/install/universal.sh index 1699be138c8..5d4571aed9e 100755 --- a/docs/_includes/install/universal.sh +++ b/docs/_includes/install/universal.sh @@ -33,6 +33,9 @@ then elif [ "${ARCH}" = "powerpc64le" -o "${ARCH}" = "ppc64le" ] then DIR="powerpc64le" + elif [ "${ARCH}" = "riscv64" ] + then + DIR="riscv64" fi elif [ "${OS}" = "FreeBSD" ] then From 271297823ae6abe82908220d1a540fbf0113f4d8 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 6 Jul 2023 14:56:05 +0000 Subject: [PATCH 179/230] Allow var-int encoded 64-bit integers with MSB=1 Resolves: #51486 Until now, it was illegal to encode 64-bit (unsigned) integers with MSB=1, i.e. values > (1ULL<<63) - 1, as var-int. In more detail, the var-int code used by ClickHouse server and client spent at most 9 bytes per value such that 9 * 7 = 63 bits could be encoded. Some 3rd party clients (e.g. Rust clickhouse-rs) had the same limitation, whereas other clients understand the full range (Python clickhouse-driver). PRs #47608 and #48628 added sanity checks as asserts or exceptions during var-int encoding on the server side. This was considered okay as such huge integers so far occurred only during testing (usually fuzzing) but not in practice. Issue #51486 is a new fuzzing issue where the exception thrown from the sanity check led to a half-baked progress packet and as a result, a logical error / server crash. The only fix which is not another bandaid is to allow the full range in var-int coding. Clients will have to allow the full range too, a note will be added to the changelog. (the alternative was to create another protocol version but as var-int is used all over the place this was considered infeasible) Review note: this is the relevant commit. --- src/IO/VarInt.cpp | 9 --- src/IO/VarInt.h | 65 ++++++++----------- src/Server/TCPHandler.cpp | 7 +- .../0_stateless/02812_large_varints.reference | 0 .../0_stateless/02812_large_varints.sql | 4 ++ 5 files changed, 34 insertions(+), 51 deletions(-) create mode 100644 tests/queries/0_stateless/02812_large_varints.reference create mode 100644 tests/queries/0_stateless/02812_large_varints.sql diff --git a/src/IO/VarInt.cpp b/src/IO/VarInt.cpp index ca4b95fcb60..a4b249b01d7 100644 --- a/src/IO/VarInt.cpp +++ b/src/IO/VarInt.cpp @@ -6,7 +6,6 @@ namespace DB namespace ErrorCodes { extern const int ATTEMPT_TO_READ_AFTER_EOF; - extern const int BAD_ARGUMENTS; } void throwReadAfterEOF() @@ -14,12 +13,4 @@ void throwReadAfterEOF() throw Exception(ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF, "Attempt to read after eof"); } -void throwValueTooLargeForVarIntEncoding(UInt64 x) -{ - /// Under practical circumstances, we should virtually never end up here but AST Fuzzer manages to create superlarge input integers - /// which trigger this exception. Intentionally not throwing LOGICAL_ERROR or calling abort() or [ch]assert(false), so AST Fuzzer - /// can swallow the exception and continue to run. - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Value {} is too large for VarInt encoding", x); -} - } diff --git a/src/IO/VarInt.h b/src/IO/VarInt.h index 9099b5e7f6a..2a2743e3407 100644 --- a/src/IO/VarInt.h +++ b/src/IO/VarInt.h @@ -13,73 +13,59 @@ namespace DB /// Variable-Length Quantity (VLQ) Base-128 compression, also known as Variable Byte (VB) or Varint encoding. [[noreturn]] void throwReadAfterEOF(); -[[noreturn]] void throwValueTooLargeForVarIntEncoding(UInt64 x); -/// NOTE: Due to historical reasons, only values up to 1<<63-1 can be safely encoded/decoded (bigger values are not idempotent under -/// encoding/decoding). This cannot be changed without breaking backward compatibility (some drivers, e.g. clickhouse-rs (Rust), have the -/// same limitation, others support the full 1<<64 range, e.g. clickhouse-driver (Python)) -constexpr UInt64 VAR_UINT_MAX = (1ULL<<63) - 1; - inline void writeVarUInt(UInt64 x, WriteBuffer & ostr) { - if (x > VAR_UINT_MAX) [[unlikely]] - throwValueTooLargeForVarIntEncoding(x); - - for (size_t i = 0; i < 9; ++i) + while (x > 0x7F) { - uint8_t byte = x & 0x7F; - if (x > 0x7F) - byte |= 0x80; + uint8_t byte = 0x80 | (x & 0x7F); ostr.nextIfAtEnd(); *ostr.position() = byte; ++ostr.position(); x >>= 7; - if (!x) - return; } + + uint8_t final_byte = static_cast(x); + + ostr.nextIfAtEnd(); + *ostr.position() = final_byte; + ++ostr.position(); } inline void writeVarUInt(UInt64 x, std::ostream & ostr) { - if (x > VAR_UINT_MAX) [[unlikely]] - throwValueTooLargeForVarIntEncoding(x); - - for (size_t i = 0; i < 9; ++i) + while (x > 0x7F) { - uint8_t byte = x & 0x7F; - if (x > 0x7F) - byte |= 0x80; - + uint8_t byte = 0x80 | (x & 0x7F); ostr.put(byte); x >>= 7; - if (!x) - return; } + + uint8_t final_byte = static_cast(x); + ostr.put(final_byte); } inline char * writeVarUInt(UInt64 x, char * ostr) { - if (x > VAR_UINT_MAX) [[unlikely]] - throwValueTooLargeForVarIntEncoding(x); - - for (size_t i = 0; i < 9; ++i) + while (x > 0x7F) { - uint8_t byte = x & 0x7F; - if (x > 0x7F) - byte |= 0x80; + uint8_t byte = 0x80 | (x & 0x7F); *ostr = byte; ++ostr; x >>= 7; - if (!x) - return ostr; } + uint8_t final_byte = static_cast(x); + + *ostr = final_byte; + ++ostr; + return ostr; } @@ -101,7 +87,7 @@ template inline void readVarUInt(UInt64 & x, ReadBuffer & istr) { x = 0; - for (size_t i = 0; i < 9; ++i) + for (size_t i = 0; i < 10; ++i) { if constexpr (check_eof) if (istr.eof()) [[unlikely]] @@ -120,7 +106,7 @@ inline void readVarUInt(UInt64 & x, ReadBuffer & istr) inline void readVarUInt(UInt64 & x, ReadBuffer & istr) { - if (istr.buffer().end() - istr.position() >= 9) + if (istr.buffer().end() - istr.position() >= 10) return impl::readVarUInt(x, istr); return impl::readVarUInt(x, istr); } @@ -128,7 +114,7 @@ inline void readVarUInt(UInt64 & x, ReadBuffer & istr) inline void readVarUInt(UInt64 & x, std::istream & istr) { x = 0; - for (size_t i = 0; i < 9; ++i) + for (size_t i = 0; i < 10; ++i) { UInt64 byte = istr.get(); x |= (byte & 0x7F) << (7 * i); @@ -143,7 +129,7 @@ inline const char * readVarUInt(UInt64 & x, const char * istr, size_t size) const char * end = istr + size; x = 0; - for (size_t i = 0; i < 9; ++i) + for (size_t i = 0; i < 10; ++i) { if (istr == end) [[unlikely]] throwReadAfterEOF(); @@ -220,7 +206,8 @@ inline size_t getLengthOfVarUInt(UInt64 x) : (x < (1ULL << 42) ? 6 : (x < (1ULL << 49) ? 7 : (x < (1ULL << 56) ? 8 - : 9))))))); + : (x < (1ULL << 63) ? 9 + : 10)))))))); } diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 4d9fb47c893..36566832ebc 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -1905,17 +1905,18 @@ void TCPHandler::sendData(const Block & block) { initBlockOutput(block); - auto prev_bytes_written_out = out->count(); - auto prev_bytes_written_compressed_out = state.maybe_compressed_out->count(); + size_t prev_bytes_written_out = out->count(); + size_t prev_bytes_written_compressed_out = state.maybe_compressed_out->count(); try { /// For testing hedged requests if (unknown_packet_in_send_data) { + constexpr UInt64 marker = (1ULL<<63) - 1; --unknown_packet_in_send_data; if (unknown_packet_in_send_data == 0) - writeVarUInt(VAR_UINT_MAX, *out); + writeVarUInt(marker, *out); } writeVarUInt(Protocol::Server::Data, *out); diff --git a/tests/queries/0_stateless/02812_large_varints.reference b/tests/queries/0_stateless/02812_large_varints.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02812_large_varints.sql b/tests/queries/0_stateless/02812_large_varints.sql new file mode 100644 index 00000000000..cfbebb7292e --- /dev/null +++ b/tests/queries/0_stateless/02812_large_varints.sql @@ -0,0 +1,4 @@ +-- 64-bit integers with MSB set (i.e. values > (1ULL<<63) - 1) could for historical/compat reasons not be serialized as var-ints (issue #51486). +-- These two queries internally produce such big values, run them to be sure no bad things happen. +SELECT topKWeightedState(65535)(now(), -2) FORMAT Null; +SELECT number FROM numbers(toUInt64(-1)) limit 10 Format Null; From fc19e74ba9084e66a7ff43565ef80a78dda65570 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Fri, 7 Jul 2023 01:12:17 +0200 Subject: [PATCH 180/230] fix deadlock on DatabaseCatalog shutdown --- src/Interpreters/DatabaseCatalog.cpp | 11 ++++++++++- src/Interpreters/DatabaseCatalog.h | 2 ++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 4cb2f6e3b3d..dc1861b3bd8 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -56,6 +56,7 @@ namespace ErrorCodes extern const int DATABASE_ACCESS_DENIED; extern const int LOGICAL_ERROR; extern const int HAVE_DEPENDENT_OBJECTS; + extern const int UNFINISHED; } TemporaryTableHolder::TemporaryTableHolder(ContextPtr context_, const TemporaryTableHolder::Creator & creator, const ASTPtr & query) @@ -196,6 +197,9 @@ void DatabaseCatalog::startupBackgroundCleanup() void DatabaseCatalog::shutdownImpl() { + is_shutting_down = true; + wait_table_finally_dropped.notify_all(); + if (cleanup_task) (*cleanup_task)->deactivate(); @@ -1160,8 +1164,13 @@ void DatabaseCatalog::waitTableFinallyDropped(const UUID & uuid) std::unique_lock lock{tables_marked_dropped_mutex}; wait_table_finally_dropped.wait(lock, [&]() TSA_REQUIRES(tables_marked_dropped_mutex) -> bool { - return !tables_marked_dropped_ids.contains(uuid); + return !tables_marked_dropped_ids.contains(uuid) || is_shutting_down; }); + + /// TSA doesn't support unique_lock + if (TSA_SUPPRESS_WARNING_FOR_READ(tables_marked_dropped_ids).contains(uuid)) + throw Exception(ErrorCodes::UNFINISHED, "Did not finish dropping the table with UUID {} because the server is shutting down, " + "will finish after restart", uuid); } void DatabaseCatalog::addDependencies( diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h index 258ea2dee7c..d502505027f 100644 --- a/src/Interpreters/DatabaseCatalog.h +++ b/src/Interpreters/DatabaseCatalog.h @@ -308,6 +308,8 @@ private: Poco::Logger * log; + std::atomic_bool is_shutting_down = false; + /// Do not allow simultaneous execution of DDL requests on the same table. /// database name -> database guard -> (table name mutex, counter), /// counter: how many threads are running a query on the table at the same time From 3ec617b1840e7a64761c0e45926719a6d41363c1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 7 Jul 2023 01:31:52 +0200 Subject: [PATCH 181/230] Fix build --- programs/keeper-converter/KeeperConverter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/keeper-converter/KeeperConverter.cpp b/programs/keeper-converter/KeeperConverter.cpp index a049e6bc2b3..20448aafa2f 100644 --- a/programs/keeper-converter/KeeperConverter.cpp +++ b/programs/keeper-converter/KeeperConverter.cpp @@ -42,7 +42,7 @@ int mainEntryClickHouseKeeperConverter(int argc, char ** argv) { auto keeper_context = std::make_shared(true); keeper_context->setDigestEnabled(true); - keeper_context->setSnapshotDisk(std::make_shared("Keeper-snapshots", options["output-dir"].as(), 0)); + keeper_context->setSnapshotDisk(std::make_shared("Keeper-snapshots", options["output-dir"].as())); DB::KeeperStorage storage(/* tick_time_ms */ 500, /* superdigest */ "", keeper_context, /* initialize_system_nodes */ false); From 48eb30de513f3561eef6cd8be661023438405e0e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 7 Jul 2023 01:41:36 +0200 Subject: [PATCH 182/230] Fix build --- src/Coordination/KeeperContext.cpp | 6 +++--- src/Coordination/tests/gtest_coordination.cpp | 10 +++++----- utils/keeper-data-dumper/main.cpp | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/Coordination/KeeperContext.cpp b/src/Coordination/KeeperContext.cpp index 3c3c0500540..408344ee67f 100644 --- a/src/Coordination/KeeperContext.cpp +++ b/src/Coordination/KeeperContext.cpp @@ -220,7 +220,7 @@ KeeperContext::Storage KeeperContext::getLogsPathFromConfig(const Poco::Util::Ab if (!fs::exists(path)) fs::create_directories(path); - return std::make_shared("LocalLogDisk", path, 0); + return std::make_shared("LocalLogDisk", path); }; /// the most specialized path @@ -246,7 +246,7 @@ KeeperContext::Storage KeeperContext::getSnapshotsPathFromConfig(const Poco::Uti if (!fs::exists(path)) fs::create_directories(path); - return std::make_shared("LocalSnapshotDisk", path, 0); + return std::make_shared("LocalSnapshotDisk", path); }; /// the most specialized path @@ -272,7 +272,7 @@ KeeperContext::Storage KeeperContext::getStatePathFromConfig(const Poco::Util::A if (!fs::exists(path)) fs::create_directories(path); - return std::make_shared("LocalStateFileDisk", path, 0); + return std::make_shared("LocalStateFileDisk", path); }; if (config.has("keeper_server.state_storage_disk")) diff --git a/src/Coordination/tests/gtest_coordination.cpp b/src/Coordination/tests/gtest_coordination.cpp index 0f60c960b8b..6df149bbfbe 100644 --- a/src/Coordination/tests/gtest_coordination.cpp +++ b/src/Coordination/tests/gtest_coordination.cpp @@ -71,16 +71,16 @@ protected: DB::KeeperContextPtr keeper_context = std::make_shared(true); Poco::Logger * log{&Poco::Logger::get("CoordinationTest")}; - void setLogDirectory(const std::string & path) { keeper_context->setLogDisk(std::make_shared("LogDisk", path, 0)); } + void setLogDirectory(const std::string & path) { keeper_context->setLogDisk(std::make_shared("LogDisk", path)); } void setSnapshotDirectory(const std::string & path) { - keeper_context->setSnapshotDisk(std::make_shared("SnapshotDisk", path, 0)); + keeper_context->setSnapshotDisk(std::make_shared("SnapshotDisk", path)); } void setStateFileDirectory(const std::string & path) { - keeper_context->setStateFileDisk(std::make_shared("StateFile", path, 0)); + keeper_context->setStateFileDisk(std::make_shared("StateFile", path)); } }; @@ -1503,9 +1503,9 @@ void testLogAndStateMachine( using namespace DB; ChangelogDirTest snapshots("./snapshots"); - keeper_context->setSnapshotDisk(std::make_shared("SnapshotDisk", "./snapshots", 0)); + keeper_context->setSnapshotDisk(std::make_shared("SnapshotDisk", "./snapshots")); ChangelogDirTest logs("./logs"); - keeper_context->setLogDisk(std::make_shared("LogDisk", "./logs", 0)); + keeper_context->setLogDisk(std::make_shared("LogDisk", "./logs")); ResponsesQueue queue(std::numeric_limits::max()); SnapshotsQueue snapshots_queue{1}; diff --git a/utils/keeper-data-dumper/main.cpp b/utils/keeper-data-dumper/main.cpp index 5a6fd15d72c..51a09b676dc 100644 --- a/utils/keeper-data-dumper/main.cpp +++ b/utils/keeper-data-dumper/main.cpp @@ -64,8 +64,8 @@ int main(int argc, char *argv[]) SnapshotsQueue snapshots_queue{1}; CoordinationSettingsPtr settings = std::make_shared(); KeeperContextPtr keeper_context = std::make_shared(true); - keeper_context->setLogDisk(std::make_shared("LogDisk", argv[2], 0)); - keeper_context->setSnapshotDisk(std::make_shared("LogDisk", argv[1], 0)); + keeper_context->setLogDisk(std::make_shared("LogDisk", argv[2])); + keeper_context->setSnapshotDisk(std::make_shared("LogDisk", argv[1])); auto state_machine = std::make_shared(queue, snapshots_queue, settings, keeper_context, nullptr); state_machine->init(); From c47b32b17a59202f4b21f5cff09898d41d436925 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 7 Jul 2023 01:52:18 +0200 Subject: [PATCH 183/230] Fix build --- src/Coordination/KeeperContext.cpp | 6 +++--- src/Coordination/tests/gtest_coordination.cpp | 10 +++++----- utils/keeper-data-dumper/main.cpp | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/Coordination/KeeperContext.cpp b/src/Coordination/KeeperContext.cpp index 408344ee67f..3c3c0500540 100644 --- a/src/Coordination/KeeperContext.cpp +++ b/src/Coordination/KeeperContext.cpp @@ -220,7 +220,7 @@ KeeperContext::Storage KeeperContext::getLogsPathFromConfig(const Poco::Util::Ab if (!fs::exists(path)) fs::create_directories(path); - return std::make_shared("LocalLogDisk", path); + return std::make_shared("LocalLogDisk", path, 0); }; /// the most specialized path @@ -246,7 +246,7 @@ KeeperContext::Storage KeeperContext::getSnapshotsPathFromConfig(const Poco::Uti if (!fs::exists(path)) fs::create_directories(path); - return std::make_shared("LocalSnapshotDisk", path); + return std::make_shared("LocalSnapshotDisk", path, 0); }; /// the most specialized path @@ -272,7 +272,7 @@ KeeperContext::Storage KeeperContext::getStatePathFromConfig(const Poco::Util::A if (!fs::exists(path)) fs::create_directories(path); - return std::make_shared("LocalStateFileDisk", path); + return std::make_shared("LocalStateFileDisk", path, 0); }; if (config.has("keeper_server.state_storage_disk")) diff --git a/src/Coordination/tests/gtest_coordination.cpp b/src/Coordination/tests/gtest_coordination.cpp index 6df149bbfbe..0f60c960b8b 100644 --- a/src/Coordination/tests/gtest_coordination.cpp +++ b/src/Coordination/tests/gtest_coordination.cpp @@ -71,16 +71,16 @@ protected: DB::KeeperContextPtr keeper_context = std::make_shared(true); Poco::Logger * log{&Poco::Logger::get("CoordinationTest")}; - void setLogDirectory(const std::string & path) { keeper_context->setLogDisk(std::make_shared("LogDisk", path)); } + void setLogDirectory(const std::string & path) { keeper_context->setLogDisk(std::make_shared("LogDisk", path, 0)); } void setSnapshotDirectory(const std::string & path) { - keeper_context->setSnapshotDisk(std::make_shared("SnapshotDisk", path)); + keeper_context->setSnapshotDisk(std::make_shared("SnapshotDisk", path, 0)); } void setStateFileDirectory(const std::string & path) { - keeper_context->setStateFileDisk(std::make_shared("StateFile", path)); + keeper_context->setStateFileDisk(std::make_shared("StateFile", path, 0)); } }; @@ -1503,9 +1503,9 @@ void testLogAndStateMachine( using namespace DB; ChangelogDirTest snapshots("./snapshots"); - keeper_context->setSnapshotDisk(std::make_shared("SnapshotDisk", "./snapshots")); + keeper_context->setSnapshotDisk(std::make_shared("SnapshotDisk", "./snapshots", 0)); ChangelogDirTest logs("./logs"); - keeper_context->setLogDisk(std::make_shared("LogDisk", "./logs")); + keeper_context->setLogDisk(std::make_shared("LogDisk", "./logs", 0)); ResponsesQueue queue(std::numeric_limits::max()); SnapshotsQueue snapshots_queue{1}; diff --git a/utils/keeper-data-dumper/main.cpp b/utils/keeper-data-dumper/main.cpp index 51a09b676dc..5a6fd15d72c 100644 --- a/utils/keeper-data-dumper/main.cpp +++ b/utils/keeper-data-dumper/main.cpp @@ -64,8 +64,8 @@ int main(int argc, char *argv[]) SnapshotsQueue snapshots_queue{1}; CoordinationSettingsPtr settings = std::make_shared(); KeeperContextPtr keeper_context = std::make_shared(true); - keeper_context->setLogDisk(std::make_shared("LogDisk", argv[2])); - keeper_context->setSnapshotDisk(std::make_shared("LogDisk", argv[1])); + keeper_context->setLogDisk(std::make_shared("LogDisk", argv[2], 0)); + keeper_context->setSnapshotDisk(std::make_shared("LogDisk", argv[1], 0)); auto state_machine = std::make_shared(queue, snapshots_queue, settings, keeper_context, nullptr); state_machine->init(); From 685f2949b75fad05bf1959931b626b73cdab55e9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 7 Jul 2023 02:53:13 +0300 Subject: [PATCH 184/230] Revert "Fix build" --- src/Coordination/KeeperContext.cpp | 6 +++--- src/Coordination/tests/gtest_coordination.cpp | 10 +++++----- utils/keeper-data-dumper/main.cpp | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/Coordination/KeeperContext.cpp b/src/Coordination/KeeperContext.cpp index 3c3c0500540..408344ee67f 100644 --- a/src/Coordination/KeeperContext.cpp +++ b/src/Coordination/KeeperContext.cpp @@ -220,7 +220,7 @@ KeeperContext::Storage KeeperContext::getLogsPathFromConfig(const Poco::Util::Ab if (!fs::exists(path)) fs::create_directories(path); - return std::make_shared("LocalLogDisk", path, 0); + return std::make_shared("LocalLogDisk", path); }; /// the most specialized path @@ -246,7 +246,7 @@ KeeperContext::Storage KeeperContext::getSnapshotsPathFromConfig(const Poco::Uti if (!fs::exists(path)) fs::create_directories(path); - return std::make_shared("LocalSnapshotDisk", path, 0); + return std::make_shared("LocalSnapshotDisk", path); }; /// the most specialized path @@ -272,7 +272,7 @@ KeeperContext::Storage KeeperContext::getStatePathFromConfig(const Poco::Util::A if (!fs::exists(path)) fs::create_directories(path); - return std::make_shared("LocalStateFileDisk", path, 0); + return std::make_shared("LocalStateFileDisk", path); }; if (config.has("keeper_server.state_storage_disk")) diff --git a/src/Coordination/tests/gtest_coordination.cpp b/src/Coordination/tests/gtest_coordination.cpp index 0f60c960b8b..6df149bbfbe 100644 --- a/src/Coordination/tests/gtest_coordination.cpp +++ b/src/Coordination/tests/gtest_coordination.cpp @@ -71,16 +71,16 @@ protected: DB::KeeperContextPtr keeper_context = std::make_shared(true); Poco::Logger * log{&Poco::Logger::get("CoordinationTest")}; - void setLogDirectory(const std::string & path) { keeper_context->setLogDisk(std::make_shared("LogDisk", path, 0)); } + void setLogDirectory(const std::string & path) { keeper_context->setLogDisk(std::make_shared("LogDisk", path)); } void setSnapshotDirectory(const std::string & path) { - keeper_context->setSnapshotDisk(std::make_shared("SnapshotDisk", path, 0)); + keeper_context->setSnapshotDisk(std::make_shared("SnapshotDisk", path)); } void setStateFileDirectory(const std::string & path) { - keeper_context->setStateFileDisk(std::make_shared("StateFile", path, 0)); + keeper_context->setStateFileDisk(std::make_shared("StateFile", path)); } }; @@ -1503,9 +1503,9 @@ void testLogAndStateMachine( using namespace DB; ChangelogDirTest snapshots("./snapshots"); - keeper_context->setSnapshotDisk(std::make_shared("SnapshotDisk", "./snapshots", 0)); + keeper_context->setSnapshotDisk(std::make_shared("SnapshotDisk", "./snapshots")); ChangelogDirTest logs("./logs"); - keeper_context->setLogDisk(std::make_shared("LogDisk", "./logs", 0)); + keeper_context->setLogDisk(std::make_shared("LogDisk", "./logs")); ResponsesQueue queue(std::numeric_limits::max()); SnapshotsQueue snapshots_queue{1}; diff --git a/utils/keeper-data-dumper/main.cpp b/utils/keeper-data-dumper/main.cpp index 5a6fd15d72c..51a09b676dc 100644 --- a/utils/keeper-data-dumper/main.cpp +++ b/utils/keeper-data-dumper/main.cpp @@ -64,8 +64,8 @@ int main(int argc, char *argv[]) SnapshotsQueue snapshots_queue{1}; CoordinationSettingsPtr settings = std::make_shared(); KeeperContextPtr keeper_context = std::make_shared(true); - keeper_context->setLogDisk(std::make_shared("LogDisk", argv[2], 0)); - keeper_context->setSnapshotDisk(std::make_shared("LogDisk", argv[1], 0)); + keeper_context->setLogDisk(std::make_shared("LogDisk", argv[2])); + keeper_context->setSnapshotDisk(std::make_shared("LogDisk", argv[1])); auto state_machine = std::make_shared(queue, snapshots_queue, settings, keeper_context, nullptr); state_machine->init(); From d30be39224f94618393c9502961632422b6676f5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 7 Jul 2023 03:28:33 +0200 Subject: [PATCH 185/230] Fix flaky test 00175_partition_by_ignore and move it to correct location --- .../00175_partition_by_ignore.reference | 0 .../{1_stateful => 0_stateless}/00175_partition_by_ignore.sql | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename tests/queries/{1_stateful => 0_stateless}/00175_partition_by_ignore.reference (100%) rename tests/queries/{1_stateful => 0_stateless}/00175_partition_by_ignore.sql (90%) diff --git a/tests/queries/1_stateful/00175_partition_by_ignore.reference b/tests/queries/0_stateless/00175_partition_by_ignore.reference similarity index 100% rename from tests/queries/1_stateful/00175_partition_by_ignore.reference rename to tests/queries/0_stateless/00175_partition_by_ignore.reference diff --git a/tests/queries/1_stateful/00175_partition_by_ignore.sql b/tests/queries/0_stateless/00175_partition_by_ignore.sql similarity index 90% rename from tests/queries/1_stateful/00175_partition_by_ignore.sql rename to tests/queries/0_stateless/00175_partition_by_ignore.sql index 737d1b59fe3..19d63c82a87 100644 --- a/tests/queries/1_stateful/00175_partition_by_ignore.sql +++ b/tests/queries/0_stateless/00175_partition_by_ignore.sql @@ -2,7 +2,7 @@ SELECT '-- check that partition key with ignore works correctly'; DROP TABLE IF EXISTS partition_by_ignore SYNC; -CREATE TABLE partition_by_ignore (ts DateTime, ts_2 DateTime) ENGINE=MergeTree PARTITION BY (toYYYYMM(ts), ignore(ts_2)) ORDER BY tuple(); +CREATE TABLE partition_by_ignore (ts DateTime, ts_2 DateTime) ENGINE=MergeTree PARTITION BY (toYYYYMM(ts), ignore(ts_2)) ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO partition_by_ignore SELECT toDateTime('2022-08-03 00:00:00') + toIntervalDay(number), toDateTime('2022-08-04 00:00:00') + toIntervalDay(number) FROM numbers(60); EXPLAIN ESTIMATE SELECT count() FROM partition_by_ignore WHERE ts BETWEEN toDateTime('2022-08-07 00:00:00') AND toDateTime('2022-08-10 00:00:00') FORMAT CSV; From f8ac899c3fefb1268a5197dc4d85c2ee1eb174ca Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 7 Jul 2023 03:49:50 +0200 Subject: [PATCH 186/230] Fix flaky test 02360_send_logs_level_colors: avoid usage of `file` tool --- tests/queries/0_stateless/02360_send_logs_level_colors.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/02360_send_logs_level_colors.sh b/tests/queries/0_stateless/02360_send_logs_level_colors.sh index 0585e779815..a9b7d4dd3c1 100755 --- a/tests/queries/0_stateless/02360_send_logs_level_colors.sh +++ b/tests/queries/0_stateless/02360_send_logs_level_colors.sh @@ -1,11 +1,12 @@ #!/usr/bin/env bash CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=trace + # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh file_name="${CLICKHOUSE_TMP}/res_${CLICKHOUSE_DATABASE}.log" -CLICKHOUSE_CLIENT=$(echo ${CLICKHOUSE_CLIENT} | sed 's/'"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"'/--send_logs_level=trace/g') # Run query via expect to make isatty() return true function run() @@ -20,8 +21,7 @@ spawn bash -c "$command" expect 1 EOF - file "$file_name" | grep -o "ASCII text" - file "$file_name" | grep -o "with escape sequences" + rg -F $'\x1b' "$file_name" && cat "$file_name" || echo "ASCII text" } run "$CLICKHOUSE_CLIENT -q 'SELECT 1' 2>$file_name" From f0cc90a7fb0dcf75725e0f4e437828cbb4465143 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 7 Jul 2023 04:07:03 +0200 Subject: [PATCH 187/230] Revert "Merge pull request #51822 from kssenii/minor-changes" This reverts commit 5ac85f4fa888b4cca9d433b98505d52777281c6e, reversing changes made to 376c903da9502fb2efce180178d96c14a664f298. --- src/Interpreters/FilesystemCacheLog.h | 11 ++++++++++- src/Interpreters/InterpreterSystemQuery.cpp | 8 ++++---- src/Parsers/ASTSystemQuery.cpp | 8 ++++---- src/Parsers/ASTSystemQuery.h | 4 ++-- src/Parsers/ParserSystemQuery.cpp | 4 ++-- 5 files changed, 22 insertions(+), 13 deletions(-) diff --git a/src/Interpreters/FilesystemCacheLog.h b/src/Interpreters/FilesystemCacheLog.h index 0d088a922e0..d6dd00e5463 100644 --- a/src/Interpreters/FilesystemCacheLog.h +++ b/src/Interpreters/FilesystemCacheLog.h @@ -11,7 +11,16 @@ namespace DB { - +/// +/// -------- Column --------- Type ------ +/// | event_date | DateTime | +/// | event_time | UInt64 | +/// | query_id | String | +/// | remote_file_path | String | +/// | segment_range | Tuple | +/// | read_type | String | +/// ------------------------------------- +/// struct FilesystemCacheLogElement { enum class CacheType diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index 1c2eb66923e..e1ff8676bc7 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -370,15 +370,15 @@ BlockIO InterpreterSystemQuery::execute() else { auto cache = FileCacheFactory::instance().getByName(query.filesystem_cache_name).cache; - if (query.key_to_drop.empty()) + if (query.delete_key.empty()) { cache->removeAllReleasable(); } else { - auto key = FileCacheKey::fromKeyString(query.key_to_drop); - if (query.offset_to_drop.has_value()) - cache->removeFileSegment(key, query.offset_to_drop.value()); + auto key = FileCacheKey::fromKeyString(query.delete_key); + if (query.delete_offset.has_value()) + cache->removeFileSegment(key, query.delete_offset.value()); else cache->removeKey(key); } diff --git a/src/Parsers/ASTSystemQuery.cpp b/src/Parsers/ASTSystemQuery.cpp index 22244a7075c..9c5e7bff61e 100644 --- a/src/Parsers/ASTSystemQuery.cpp +++ b/src/Parsers/ASTSystemQuery.cpp @@ -212,11 +212,11 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &, if (!filesystem_cache_name.empty()) { settings.ostr << (settings.hilite ? hilite_none : "") << " " << filesystem_cache_name; - if (!key_to_drop.empty()) + if (!delete_key.empty()) { - settings.ostr << (settings.hilite ? hilite_none : "") << " KEY " << key_to_drop; - if (offset_to_drop.has_value()) - settings.ostr << (settings.hilite ? hilite_none : "") << " OFFSET " << offset_to_drop.value(); + settings.ostr << (settings.hilite ? hilite_none : "") << " KEY " << delete_key; + if (delete_offset.has_value()) + settings.ostr << (settings.hilite ? hilite_none : "") << " OFFSET " << delete_offset.value(); } } } diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h index 6c81162f103..ebc3e9cd430 100644 --- a/src/Parsers/ASTSystemQuery.h +++ b/src/Parsers/ASTSystemQuery.h @@ -107,8 +107,8 @@ public: UInt64 seconds{}; String filesystem_cache_name; - std::string key_to_drop; - std::optional offset_to_drop; + std::string delete_key; + std::optional delete_offset; String backup_name; diff --git a/src/Parsers/ParserSystemQuery.cpp b/src/Parsers/ParserSystemQuery.cpp index 09c86876b48..ef71e994d56 100644 --- a/src/Parsers/ParserSystemQuery.cpp +++ b/src/Parsers/ParserSystemQuery.cpp @@ -409,9 +409,9 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & res->filesystem_cache_name = ast->as()->value.safeGet(); if (ParserKeyword{"KEY"}.ignore(pos, expected) && ParserIdentifier().parse(pos, ast, expected)) { - res->key_to_drop = ast->as()->name(); + res->delete_key = ast->as()->name(); if (ParserKeyword{"OFFSET"}.ignore(pos, expected) && ParserLiteral().parse(pos, ast, expected)) - res->offset_to_drop = ast->as()->value.safeGet(); + res->delete_offset = ast->as()->value.safeGet(); } } if (!parseQueryWithOnCluster(res, pos, expected)) From 7cece62d26d01621f2cd9e8cc8b6b7a68d808dd9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 7 Jul 2023 04:07:19 +0200 Subject: [PATCH 188/230] Revert "Merge pull request #51547 from kssenii/more-flexible-drop-cache" This reverts commit 2ce7bcaa3d5fb36a11ae0211eabd5a89c2a8c5de, reversing changes made to e897207cd5402307295fb3dcf5c8650d5e0a4668. --- .../IO/CachedOnDiskReadBufferFromFile.cpp | 13 ++-- src/Disks/IO/CachedOnDiskReadBufferFromFile.h | 2 +- src/Disks/IO/ReadBufferFromRemoteFSGather.cpp | 2 - src/Interpreters/Cache/FileCache.cpp | 34 +++++----- src/Interpreters/Cache/FileCache.h | 12 +--- src/Interpreters/Cache/FileCacheKey.cpp | 5 -- src/Interpreters/Cache/FileCacheKey.h | 2 - src/Interpreters/Cache/Metadata.cpp | 26 +------ src/Interpreters/Cache/Metadata.h | 8 +-- src/Interpreters/FilesystemCacheLog.cpp | 4 -- src/Interpreters/FilesystemCacheLog.h | 2 - src/Interpreters/InterpreterSystemQuery.cpp | 13 +--- src/Parsers/ASTSystemQuery.cpp | 8 --- src/Parsers/ASTSystemQuery.h | 2 - src/Parsers/ParserSystemQuery.cpp | 8 --- ...2808_filesystem_cache_drop_query.reference | 4 -- .../02808_filesystem_cache_drop_query.sh | 67 ------------------- 17 files changed, 30 insertions(+), 182 deletions(-) delete mode 100644 tests/queries/0_stateless/02808_filesystem_cache_drop_query.reference delete mode 100755 tests/queries/0_stateless/02808_filesystem_cache_drop_query.sh diff --git a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp index 15b6a9211de..81aa29639ac 100644 --- a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp +++ b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp @@ -74,22 +74,19 @@ CachedOnDiskReadBufferFromFile::CachedOnDiskReadBufferFromFile( } void CachedOnDiskReadBufferFromFile::appendFilesystemCacheLog( - const FileSegment & file_segment, CachedOnDiskReadBufferFromFile::ReadType type) + const FileSegment::Range & file_segment_range, CachedOnDiskReadBufferFromFile::ReadType type) { if (!cache_log) return; - const auto range = file_segment.range(); FilesystemCacheLogElement elem { .event_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()), .query_id = query_id, .source_file_path = source_file_path, - .file_segment_range = { range.left, range.right }, + .file_segment_range = { file_segment_range.left, file_segment_range.right }, .requested_range = { first_offset, read_until_position }, - .file_segment_key = file_segment.key().toString(), - .file_segment_offset = file_segment.offset(), - .file_segment_size = range.size(), + .file_segment_size = file_segment_range.size(), .read_from_cache_attempted = true, .read_buffer_id = current_buffer_id, .profile_counters = std::make_shared( @@ -498,7 +495,7 @@ bool CachedOnDiskReadBufferFromFile::completeFileSegmentAndGetNext() auto completed_range = current_file_segment->range(); if (cache_log) - appendFilesystemCacheLog(*current_file_segment, read_type); + appendFilesystemCacheLog(completed_range, read_type); chassert(file_offset_of_buffer_end > completed_range.right); @@ -521,7 +518,7 @@ CachedOnDiskReadBufferFromFile::~CachedOnDiskReadBufferFromFile() { if (cache_log && file_segments && !file_segments->empty()) { - appendFilesystemCacheLog(file_segments->front(), read_type); + appendFilesystemCacheLog(file_segments->front().range(), read_type); } } diff --git a/src/Disks/IO/CachedOnDiskReadBufferFromFile.h b/src/Disks/IO/CachedOnDiskReadBufferFromFile.h index 36cf8a54183..b4e7701de75 100644 --- a/src/Disks/IO/CachedOnDiskReadBufferFromFile.h +++ b/src/Disks/IO/CachedOnDiskReadBufferFromFile.h @@ -90,7 +90,7 @@ private: bool completeFileSegmentAndGetNext(); - void appendFilesystemCacheLog(const FileSegment & file_segment, ReadType read_type); + void appendFilesystemCacheLog(const FileSegment::Range & file_segment_range, ReadType read_type); bool writeCache(char * data, size_t size, size_t offset, FileSegment & file_segment); diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp index 2cd90731f1d..16c1def7b11 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp @@ -90,8 +90,6 @@ void ReadBufferFromRemoteFSGather::appendUncachedReadInfo() .source_file_path = current_object.remote_path, .file_segment_range = { 0, current_object.bytes_size }, .cache_type = FilesystemCacheLogElement::CacheType::READ_FROM_FS_BYPASSING_CACHE, - .file_segment_key = {}, - .file_segment_offset = {}, .file_segment_size = current_object.bytes_size, .read_from_cache_attempted = false, }; diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index de8ae33433a..91d1c63e832 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -806,13 +806,6 @@ bool FileCache::tryReserve(FileSegment & file_segment, const size_t size) return true; } -void FileCache::removeKey(const Key & key) -{ - assertInitialized(); - auto locked_key = metadata.lockKeyMetadata(key, CacheMetadata::KeyNotFoundPolicy::THROW); - locked_key->removeAll(); -} - void FileCache::removeKeyIfExists(const Key & key) { assertInitialized(); @@ -825,14 +818,7 @@ void FileCache::removeKeyIfExists(const Key & key) /// But if we have multiple replicated zero-copy tables on the same server /// it became possible to start removing something from cache when it is used /// by other "zero-copy" tables. That is why it's not an error. - locked_key->removeAll(/* if_releasable */true); -} - -void FileCache::removeFileSegment(const Key & key, size_t offset) -{ - assertInitialized(); - auto locked_key = metadata.lockKeyMetadata(key, CacheMetadata::KeyNotFoundPolicy::THROW); - locked_key->removeFileSegment(offset); + locked_key->removeAllReleasable(); } void FileCache::removePathIfExists(const String & path) @@ -844,12 +830,22 @@ void FileCache::removeAllReleasable() { assertInitialized(); - metadata.iterate([](LockedKey & locked_key) { locked_key.removeAll(/* if_releasable */true); }); + auto lock = lockCache(); + + main_priority->iterate([&](LockedKey & locked_key, const FileSegmentMetadataPtr & segment_metadata) + { + if (segment_metadata->releasable()) + { + auto file_segment = segment_metadata->file_segment; + locked_key.removeFileSegment(file_segment->offset(), file_segment->lock()); + return PriorityIterationResult::REMOVE_AND_CONTINUE; + } + return PriorityIterationResult::CONTINUE; + }, lock); if (stash) { /// Remove all access information. - auto lock = lockCache(); stash->records.clear(); stash->queue->removeAll(lock); } @@ -919,7 +915,7 @@ void FileCache::loadMetadata() continue; } - const auto key = Key::fromKeyString(key_directory.filename().string()); + const auto key = Key(unhexUInt(key_directory.filename().string().data())); auto locked_key = metadata.lockKeyMetadata(key, CacheMetadata::KeyNotFoundPolicy::CREATE_EMPTY, /* is_initial_load */true); for (fs::directory_iterator offset_it{key_directory}; offset_it != fs::directory_iterator(); ++offset_it) @@ -1074,7 +1070,7 @@ FileSegmentsHolderPtr FileCache::getSnapshot() FileSegmentsHolderPtr FileCache::getSnapshot(const Key & key) { FileSegments file_segments; - auto locked_key = metadata.lockKeyMetadata(key, CacheMetadata::KeyNotFoundPolicy::THROW_LOGICAL); + auto locked_key = metadata.lockKeyMetadata(key, CacheMetadata::KeyNotFoundPolicy::THROW); for (const auto & [_, file_segment_metadata] : *locked_key->getKeyMetadata()) file_segments.push_back(FileSegment::getSnapshot(file_segment_metadata->file_segment)); return std::make_unique(std::move(file_segments)); diff --git a/src/Interpreters/Cache/FileCache.h b/src/Interpreters/Cache/FileCache.h index 2e6a5094758..0e3b17baa2f 100644 --- a/src/Interpreters/Cache/FileCache.h +++ b/src/Interpreters/Cache/FileCache.h @@ -83,19 +83,13 @@ public: FileSegmentsHolderPtr set(const Key & key, size_t offset, size_t size, const CreateFileSegmentSettings & settings); - /// Remove file segment by `key` and `offset`. Throws if file segment does not exist. - void removeFileSegment(const Key & key, size_t offset); - - /// Remove files by `key`. Throws if key does not exist. - void removeKey(const Key & key); - - /// Remove files by `key`. + /// Remove files by `key`. Removes files which might be used at the moment. void removeKeyIfExists(const Key & key); - /// Removes files by `path`. + /// Removes files by `path`. Removes files which might be used at the moment. void removePathIfExists(const String & path); - /// Remove files by `key`. + /// Remove files by `key`. Will not remove files which are used at the moment. void removeAllReleasable(); std::vector tryGetCachePaths(const Key & key); diff --git a/src/Interpreters/Cache/FileCacheKey.cpp b/src/Interpreters/Cache/FileCacheKey.cpp index 772fcd600bf..f97cdc058aa 100644 --- a/src/Interpreters/Cache/FileCacheKey.cpp +++ b/src/Interpreters/Cache/FileCacheKey.cpp @@ -28,9 +28,4 @@ FileCacheKey FileCacheKey::random() return FileCacheKey(UUIDHelpers::generateV4().toUnderType()); } -FileCacheKey FileCacheKey::fromKeyString(const std::string & key_str) -{ - return FileCacheKey(unhexUInt(key_str.data())); -} - } diff --git a/src/Interpreters/Cache/FileCacheKey.h b/src/Interpreters/Cache/FileCacheKey.h index e788cd5e7cd..bab8359732c 100644 --- a/src/Interpreters/Cache/FileCacheKey.h +++ b/src/Interpreters/Cache/FileCacheKey.h @@ -21,8 +21,6 @@ struct FileCacheKey static FileCacheKey random(); bool operator==(const FileCacheKey & other) const { return key == other.key; } - - static FileCacheKey fromKeyString(const std::string & key_str); }; using FileCacheKeyAndOffset = std::pair; diff --git a/src/Interpreters/Cache/Metadata.cpp b/src/Interpreters/Cache/Metadata.cpp index 0a2d58432e4..bfaa00eac2c 100644 --- a/src/Interpreters/Cache/Metadata.cpp +++ b/src/Interpreters/Cache/Metadata.cpp @@ -25,7 +25,6 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; - extern const int BAD_ARGUMENTS; } FileSegmentMetadata::FileSegmentMetadata(FileSegmentPtr && file_segment_) @@ -192,8 +191,6 @@ LockedKeyPtr CacheMetadata::lockKeyMetadata( if (it == end()) { if (key_not_found_policy == KeyNotFoundPolicy::THROW) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "No such key `{}` in cache", key); - else if (key_not_found_policy == KeyNotFoundPolicy::THROW_LOGICAL) throw Exception(ErrorCodes::LOGICAL_ERROR, "No such key `{}` in cache", key); else if (key_not_found_policy == KeyNotFoundPolicy::RETURN_NULL) return nullptr; @@ -218,8 +215,6 @@ LockedKeyPtr CacheMetadata::lockKeyMetadata( return locked_metadata; if (key_not_found_policy == KeyNotFoundPolicy::THROW) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "No such key `{}` in cache", key); - else if (key_not_found_policy == KeyNotFoundPolicy::THROW_LOGICAL) throw Exception(ErrorCodes::LOGICAL_ERROR, "No such key `{}` in cache", key); if (key_not_found_policy == KeyNotFoundPolicy::RETURN_NULL) @@ -563,11 +558,11 @@ bool LockedKey::isLastOwnerOfFileSegment(size_t offset) const return file_segment_metadata->file_segment.use_count() == 2; } -void LockedKey::removeAll(bool if_releasable) +void LockedKey::removeAllReleasable() { for (auto it = key_metadata->begin(); it != key_metadata->end();) { - if (if_releasable && !it->second->releasable()) + if (!it->second->releasable()) { ++it; continue; @@ -588,32 +583,17 @@ void LockedKey::removeAll(bool if_releasable) } } -KeyMetadata::iterator LockedKey::removeFileSegment(size_t offset) -{ - auto it = key_metadata->find(offset); - if (it == key_metadata->end()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no offset {}", offset); - - auto file_segment = it->second->file_segment; - return removeFileSegmentImpl(it, file_segment->lock()); -} - KeyMetadata::iterator LockedKey::removeFileSegment(size_t offset, const FileSegmentGuard::Lock & segment_lock) { auto it = key_metadata->find(offset); if (it == key_metadata->end()) throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no offset {}", offset); - return removeFileSegmentImpl(it, segment_lock); -} - -KeyMetadata::iterator LockedKey::removeFileSegmentImpl(KeyMetadata::iterator it, const FileSegmentGuard::Lock & segment_lock) -{ auto file_segment = it->second->file_segment; LOG_DEBUG( key_metadata->log, "Remove from cache. Key: {}, offset: {}, size: {}", - getKey(), file_segment->offset(), file_segment->reserved_size); + getKey(), offset, file_segment->reserved_size); chassert(file_segment->assertCorrectnessUnlocked(segment_lock)); diff --git a/src/Interpreters/Cache/Metadata.h b/src/Interpreters/Cache/Metadata.h index 42d74338e12..503c19f4150 100644 --- a/src/Interpreters/Cache/Metadata.h +++ b/src/Interpreters/Cache/Metadata.h @@ -87,7 +87,7 @@ struct CacheMetadata : public std::unordered_map, { public: using Key = FileCacheKey; - using IterateCacheMetadataFunc = std::function; + using IterateCacheMetadataFunc = std::function; explicit CacheMetadata(const std::string & path_); @@ -106,7 +106,6 @@ public: enum class KeyNotFoundPolicy { THROW, - THROW_LOGICAL, CREATE_EMPTY, RETURN_NULL, }; @@ -170,10 +169,9 @@ struct LockedKey : private boost::noncopyable std::shared_ptr getKeyMetadata() const { return key_metadata; } std::shared_ptr getKeyMetadata() { return key_metadata; } - void removeAll(bool if_releasable = true); + void removeAllReleasable(); KeyMetadata::iterator removeFileSegment(size_t offset, const FileSegmentGuard::Lock &); - KeyMetadata::iterator removeFileSegment(size_t offset); void shrinkFileSegmentToDownloadedSize(size_t offset, const FileSegmentGuard::Lock &); @@ -190,8 +188,6 @@ struct LockedKey : private boost::noncopyable std::string toString() const; private: - KeyMetadata::iterator removeFileSegmentImpl(KeyMetadata::iterator it, const FileSegmentGuard::Lock &); - const std::shared_ptr key_metadata; KeyGuard::Lock lock; /// `lock` must be destructed before `key_metadata`. }; diff --git a/src/Interpreters/FilesystemCacheLog.cpp b/src/Interpreters/FilesystemCacheLog.cpp index b660db064d1..17f0fda71ec 100644 --- a/src/Interpreters/FilesystemCacheLog.cpp +++ b/src/Interpreters/FilesystemCacheLog.cpp @@ -40,8 +40,6 @@ NamesAndTypesList FilesystemCacheLogElement::getNamesAndTypes() {"source_file_path", std::make_shared()}, {"file_segment_range", std::make_shared(types)}, {"total_requested_range", std::make_shared(types)}, - {"key", std::make_shared()}, - {"offset", std::make_shared()}, {"size", std::make_shared()}, {"read_type", std::make_shared()}, {"read_from_cache_attempted", std::make_shared()}, @@ -62,8 +60,6 @@ void FilesystemCacheLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insert(source_file_path); columns[i++]->insert(Tuple{file_segment_range.first, file_segment_range.second}); columns[i++]->insert(Tuple{requested_range.first, requested_range.second}); - columns[i++]->insert(file_segment_key); - columns[i++]->insert(file_segment_offset); columns[i++]->insert(file_segment_size); columns[i++]->insert(typeToString(cache_type)); columns[i++]->insert(read_from_cache_attempted); diff --git a/src/Interpreters/FilesystemCacheLog.h b/src/Interpreters/FilesystemCacheLog.h index d6dd00e5463..1b22d561c51 100644 --- a/src/Interpreters/FilesystemCacheLog.h +++ b/src/Interpreters/FilesystemCacheLog.h @@ -39,8 +39,6 @@ struct FilesystemCacheLogElement std::pair file_segment_range{}; std::pair requested_range{}; CacheType cache_type{}; - std::string file_segment_key; - size_t file_segment_offset; size_t file_segment_size; bool read_from_cache_attempted; String read_buffer_id; diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index e1ff8676bc7..f2d011b12d1 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -370,18 +370,7 @@ BlockIO InterpreterSystemQuery::execute() else { auto cache = FileCacheFactory::instance().getByName(query.filesystem_cache_name).cache; - if (query.delete_key.empty()) - { - cache->removeAllReleasable(); - } - else - { - auto key = FileCacheKey::fromKeyString(query.delete_key); - if (query.delete_offset.has_value()) - cache->removeFileSegment(key, query.delete_offset.value()); - else - cache->removeKey(key); - } + cache->removeAllReleasable(); } break; } diff --git a/src/Parsers/ASTSystemQuery.cpp b/src/Parsers/ASTSystemQuery.cpp index 9c5e7bff61e..a91449ff035 100644 --- a/src/Parsers/ASTSystemQuery.cpp +++ b/src/Parsers/ASTSystemQuery.cpp @@ -210,15 +210,7 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &, else if (type == Type::DROP_FILESYSTEM_CACHE) { if (!filesystem_cache_name.empty()) - { settings.ostr << (settings.hilite ? hilite_none : "") << " " << filesystem_cache_name; - if (!delete_key.empty()) - { - settings.ostr << (settings.hilite ? hilite_none : "") << " KEY " << delete_key; - if (delete_offset.has_value()) - settings.ostr << (settings.hilite ? hilite_none : "") << " OFFSET " << delete_offset.value(); - } - } } else if (type == Type::UNFREEZE) { diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h index ebc3e9cd430..ca4802d9a9b 100644 --- a/src/Parsers/ASTSystemQuery.h +++ b/src/Parsers/ASTSystemQuery.h @@ -107,8 +107,6 @@ public: UInt64 seconds{}; String filesystem_cache_name; - std::string delete_key; - std::optional delete_offset; String backup_name; diff --git a/src/Parsers/ParserSystemQuery.cpp b/src/Parsers/ParserSystemQuery.cpp index ef71e994d56..48dbe60e241 100644 --- a/src/Parsers/ParserSystemQuery.cpp +++ b/src/Parsers/ParserSystemQuery.cpp @@ -405,15 +405,7 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & ParserLiteral path_parser; ASTPtr ast; if (path_parser.parse(pos, ast, expected)) - { res->filesystem_cache_name = ast->as()->value.safeGet(); - if (ParserKeyword{"KEY"}.ignore(pos, expected) && ParserIdentifier().parse(pos, ast, expected)) - { - res->delete_key = ast->as()->name(); - if (ParserKeyword{"OFFSET"}.ignore(pos, expected) && ParserLiteral().parse(pos, ast, expected)) - res->delete_offset = ast->as()->value.safeGet(); - } - } if (!parseQueryWithOnCluster(res, pos, expected)) return false; break; diff --git a/tests/queries/0_stateless/02808_filesystem_cache_drop_query.reference b/tests/queries/0_stateless/02808_filesystem_cache_drop_query.reference deleted file mode 100644 index d80fc78e03d..00000000000 --- a/tests/queries/0_stateless/02808_filesystem_cache_drop_query.reference +++ /dev/null @@ -1,4 +0,0 @@ -1 -0 -1 -0 diff --git a/tests/queries/0_stateless/02808_filesystem_cache_drop_query.sh b/tests/queries/0_stateless/02808_filesystem_cache_drop_query.sh deleted file mode 100755 index 9d987d0ebf2..00000000000 --- a/tests/queries/0_stateless/02808_filesystem_cache_drop_query.sh +++ /dev/null @@ -1,67 +0,0 @@ -#!/usr/bin/env bash -# Tags: no-fasttest, no-parallel, no-s3-storage, no-random-settings - -# set -x - -CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CUR_DIR"/../shell_config.sh - - -disk_name="${CLICKHOUSE_TEST_UNIQUE_NAME}" -$CLICKHOUSE_CLIENT -nm --query """ -DROP TABLE IF EXISTS test; -CREATE TABLE test (a Int32, b String) -ENGINE = MergeTree() ORDER BY tuple() -SETTINGS disk = disk_$disk_name(type = cache, max_size = '100Ki', path = ${CLICKHOUSE_TEST_UNIQUE_NAME}, disk = s3_disk); - -INSERT INTO test SELECT 1, 'test'; -""" - -query_id=$RANDOM - -$CLICKHOUSE_CLIENT --query_id "$query_id" --query "SELECT * FROM test FORMAT Null SETTINGS enable_filesystem_cache_log = 1" - -${CLICKHOUSE_CLIENT} -q " system flush logs" - -key=$($CLICKHOUSE_CLIENT -nm --query """ -SELECT key FROM system.filesystem_cache_log WHERE query_id = '$query_id' ORDER BY size DESC LIMIT 1; -""") - -offset=$($CLICKHOUSE_CLIENT -nm --query """ -SELECT offset FROM system.filesystem_cache_log WHERE query_id = '$query_id' ORDER BY size DESC LIMIT 1; -""") - -$CLICKHOUSE_CLIENT -nm --query """ -SELECT count() FROM system.filesystem_cache WHERE key = '$key' AND file_segment_range_begin = $offset; -""" - -$CLICKHOUSE_CLIENT -nm --query """ -SYSTEM DROP FILESYSTEM CACHE '$disk_name' KEY $key OFFSET $offset; -""" - -$CLICKHOUSE_CLIENT -nm --query """ -SELECT count() FROM system.filesystem_cache WHERE key = '$key' AND file_segment_range_begin = $offset; -""" - -query_id=$RANDOM$RANDOM - -$CLICKHOUSE_CLIENT --query_id "$query_id" --query "SELECT * FROM test FORMAT Null SETTINGS enable_filesystem_cache_log = 1" - -${CLICKHOUSE_CLIENT} -q " system flush logs" - -key=$($CLICKHOUSE_CLIENT -nm --query """ -SELECT key FROM system.filesystem_cache_log WHERE query_id = '$query_id' ORDER BY size DESC LIMIT 1; -""") - -$CLICKHOUSE_CLIENT -nm --query """ -SELECT count() FROM system.filesystem_cache WHERE key = '$key'; -""" - -$CLICKHOUSE_CLIENT -nm --query """ -SYSTEM DROP FILESYSTEM CACHE '$disk_name' KEY $key -""" - -$CLICKHOUSE_CLIENT -nm --query """ -SELECT count() FROM system.filesystem_cache WHERE key = '$key'; -""" From d0ad416e352f39e20b034c5ee1b51cb9efdc6aec Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 7 Jul 2023 04:41:47 +0200 Subject: [PATCH 189/230] Fix flaky test detach_attach_partition_race --- .../0_stateless/01164_detach_attach_partition_race.sh | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01164_detach_attach_partition_race.sh b/tests/queries/0_stateless/01164_detach_attach_partition_race.sh index 7640b9dddf2..3aec4c3445d 100755 --- a/tests/queries/0_stateless/01164_detach_attach_partition_race.sh +++ b/tests/queries/0_stateless/01164_detach_attach_partition_race.sh @@ -2,9 +2,12 @@ # Tags: race CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=none # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +$CLICKHOUSE_CLIENT -q "drop table if exists mt" + $CLICKHOUSE_CLIENT -q "create table mt (n int) engine=MergeTree order by n settings parts_to_throw_insert=1000" $CLICKHOUSE_CLIENT -q "insert into mt values (1)" $CLICKHOUSE_CLIENT -q "insert into mt values (2)" @@ -13,7 +16,9 @@ $CLICKHOUSE_CLIENT -q "insert into mt values (3)" function thread_insert() { while true; do - $CLICKHOUSE_CLIENT -q "insert into mt values (rand())"; + # It might be the case that the threads are terminated and exited, but some children didn't and they are still sending queries when we are dropping tables. + # That's why the "Table doesn't exist" error is allowed, while other errors don't. + $CLICKHOUSE_CLIENT -q "insert into mt values (rand())" 2>&1 | tr -d '\n' | rg -v "Table .+ doesn't exist"; done } From 2246e86159824f9e658ca28ecb796295a1b8585c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 7 Jul 2023 05:30:32 +0200 Subject: [PATCH 190/230] Fix error in subquery operators --- .../AggregateFunctionMinMaxAny.h | 48 +++++++++++-------- .../02812_subquery_operators.reference | 6 +++ .../0_stateless/02812_subquery_operators.sql | 6 +++ 3 files changed, 41 insertions(+), 19 deletions(-) create mode 100644 tests/queries/0_stateless/02812_subquery_operators.reference create mode 100644 tests/queries/0_stateless/02812_subquery_operators.sql diff --git a/src/AggregateFunctions/AggregateFunctionMinMaxAny.h b/src/AggregateFunctions/AggregateFunctionMinMaxAny.h index 5312df32459..6bfa6895a5c 100644 --- a/src/AggregateFunctions/AggregateFunctionMinMaxAny.h +++ b/src/AggregateFunctions/AggregateFunctionMinMaxAny.h @@ -51,7 +51,8 @@ private: T value = T{}; public: - static constexpr bool is_nullable = false; + static constexpr bool result_is_nullable = false; + static constexpr bool should_skip_null_arguments = true; static constexpr bool is_any = false; bool has() const @@ -501,7 +502,8 @@ private: char small_data[MAX_SMALL_STRING_SIZE]; /// Including the terminating zero. public: - static constexpr bool is_nullable = false; + static constexpr bool result_is_nullable = false; + static constexpr bool should_skip_null_arguments = true; static constexpr bool is_any = false; bool has() const @@ -769,7 +771,7 @@ static_assert( /// For any other value types. -template +template struct SingleValueDataGeneric { private: @@ -779,12 +781,13 @@ private: bool has_value = false; public: - static constexpr bool is_nullable = IS_NULLABLE; + static constexpr bool result_is_nullable = RESULT_IS_NULLABLE; + static constexpr bool should_skip_null_arguments = !RESULT_IS_NULLABLE; static constexpr bool is_any = false; bool has() const { - if constexpr (is_nullable) + if constexpr (result_is_nullable) return has_value; return !value.isNull(); } @@ -820,14 +823,14 @@ public: void change(const IColumn & column, size_t row_num, Arena *) { column.get(row_num, value); - if constexpr (is_nullable) + if constexpr (result_is_nullable) has_value = true; } void change(const Self & to, Arena *) { value = to.value; - if constexpr (is_nullable) + if constexpr (result_is_nullable) has_value = true; } @@ -844,7 +847,7 @@ public: bool changeFirstTime(const Self & to, Arena * arena) { - if (!has() && (is_nullable || to.has())) + if (!has() && (result_is_nullable || to.has())) { change(to, arena); return true; @@ -879,7 +882,7 @@ public: } else { - if constexpr (is_nullable) + if constexpr (result_is_nullable) { Field new_value; column.get(row_num, new_value); @@ -910,7 +913,7 @@ public: { if (!to.has()) return false; - if constexpr (is_nullable) + if constexpr (result_is_nullable) { if (!has()) { @@ -945,7 +948,7 @@ public: } else { - if constexpr (is_nullable) + if constexpr (result_is_nullable) { Field new_value; column.get(row_num, new_value); @@ -975,7 +978,7 @@ public: { if (!to.has()) return false; - if constexpr (is_nullable) + if constexpr (result_is_nullable) { if (!value.isNull() && (to.value.isNull() || value < to.value)) { @@ -1138,13 +1141,20 @@ struct AggregateFunctionAnyLastData : Data #endif }; + +/** The aggregate function 'singleValueOrNull' is used to implement subquery operators, + * such as x = ALL (SELECT ...) + * It checks if there is only one unique non-NULL value in the data. + * If there is only one unique value - returns it. + * If there are zero or at least two distinct values - returns NULL. + */ template struct AggregateFunctionSingleValueOrNullData : Data { - static constexpr bool is_nullable = true; - using Self = AggregateFunctionSingleValueOrNullData; + static constexpr bool result_is_nullable = true; + bool first_value = true; bool is_null = false; @@ -1166,7 +1176,7 @@ struct AggregateFunctionSingleValueOrNullData : Data if (!to.has()) return; - if (first_value) + if (first_value && !to.first_value) { first_value = false; this->change(to, arena); @@ -1311,7 +1321,7 @@ public: static DataTypePtr createResultType(const DataTypePtr & type_) { - if constexpr (Data::is_nullable) + if constexpr (Data::result_is_nullable) return makeNullable(type_); return type_; } @@ -1431,13 +1441,13 @@ public: } AggregateFunctionPtr getOwnNullAdapter( - const AggregateFunctionPtr & nested_function, + const AggregateFunctionPtr & original_function, const DataTypes & /*arguments*/, const Array & /*params*/, const AggregateFunctionProperties & /*properties*/) const override { - if (Data::is_nullable) - return nested_function; + if (Data::result_is_nullable && !Data::should_skip_null_arguments) + return original_function; return nullptr; } diff --git a/tests/queries/0_stateless/02812_subquery_operators.reference b/tests/queries/0_stateless/02812_subquery_operators.reference new file mode 100644 index 00000000000..aed0a046f99 --- /dev/null +++ b/tests/queries/0_stateless/02812_subquery_operators.reference @@ -0,0 +1,6 @@ + +Hello +Hello +123 +1 + ['\0'] [] \0 [''] diff --git a/tests/queries/0_stateless/02812_subquery_operators.sql b/tests/queries/0_stateless/02812_subquery_operators.sql new file mode 100644 index 00000000000..b0638b43e89 --- /dev/null +++ b/tests/queries/0_stateless/02812_subquery_operators.sql @@ -0,0 +1,6 @@ +SELECT singleValueOrNull(toNullable('')); +SELECT singleValueOrNull(toNullable('Hello')); +SELECT singleValueOrNull((SELECT 'Hello')); +SELECT singleValueOrNull(toNullable(123)); +SELECT '' = ALL (SELECT toNullable('')); +SELECT '', ['\0'], [], singleValueOrNull(( SELECT '\0' ) ), ['']; From 4c44c1f6ea422356bbed589aa5053fcd08139cb6 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 7 Jul 2023 06:32:42 +0000 Subject: [PATCH 191/230] Wait inside the function --- tests/queries/0_stateless/02481_async_insert_race_long.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/02481_async_insert_race_long.sh b/tests/queries/0_stateless/02481_async_insert_race_long.sh index c4b026c6aba..d8153967e9a 100755 --- a/tests/queries/0_stateless/02481_async_insert_race_long.sh +++ b/tests/queries/0_stateless/02481_async_insert_race_long.sh @@ -32,6 +32,8 @@ function insert3() ${MY_CLICKHOUSE_CLIENT} --wait_for_async_insert 1 -q "INSERT INTO async_inserts_race VALUES (7, 'g') (8, 'h')" & sleep 0.05 done + + wait } function select1() From fb2affcae31afa1558706592860cc8f32e44ecde Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 7 Jul 2023 08:37:08 +0000 Subject: [PATCH 192/230] Dump all rules --- tests/integration/helpers/network.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/helpers/network.py b/tests/integration/helpers/network.py index 60b46926589..fe3a858b867 100644 --- a/tests/integration/helpers/network.py +++ b/tests/integration/helpers/network.py @@ -161,7 +161,7 @@ class _NetworkManager: self._exec_run(cmd, privileged=True) def dump_rules(self): - cmd = ["iptables", "-L", "DOCKER-USER"] + cmd = ["iptables", "-L"] return self._exec_run(cmd, privileged=True) @staticmethod From 23bd23802fc160a34e09db83c87fda53ef645e19 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 7 Jul 2023 12:26:15 +0300 Subject: [PATCH 193/230] CacheDictionary request only unique keys from source --- src/Dictionaries/CacheDictionary.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/Dictionaries/CacheDictionary.cpp b/src/Dictionaries/CacheDictionary.cpp index c5c88a9f142..e27e25ea7c4 100644 --- a/src/Dictionaries/CacheDictionary.cpp +++ b/src/Dictionaries/CacheDictionary.cpp @@ -552,13 +552,14 @@ void CacheDictionary::update(CacheDictionaryUpdateUnitPtr Date: Fri, 7 Jul 2023 09:58:35 +0000 Subject: [PATCH 194/230] Skip parallel keepermap test --- tests/integration/parallel_skip.json | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/integration/parallel_skip.json b/tests/integration/parallel_skip.json index e9089fcde73..d060218456a 100644 --- a/tests/integration/parallel_skip.json +++ b/tests/integration/parallel_skip.json @@ -66,5 +66,7 @@ "test_server_reload/test.py::test_remove_http_port", "test_server_reload/test.py::test_remove_mysql_port", "test_server_reload/test.py::test_remove_postgresql_port", - "test_server_reload/test.py::test_remove_tcp_port" + "test_server_reload/test.py::test_remove_tcp_port", + + "test_keeper_map/test.py::test_keeper_map_without_zk" ] From 2ada80aa109991f664f6a9495eddcc013215b94d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 7 Jul 2023 13:32:39 +0300 Subject: [PATCH 195/230] Update 02360_send_logs_level_colors.sh --- tests/queries/0_stateless/02360_send_logs_level_colors.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02360_send_logs_level_colors.sh b/tests/queries/0_stateless/02360_send_logs_level_colors.sh index a9b7d4dd3c1..127c94c88e2 100755 --- a/tests/queries/0_stateless/02360_send_logs_level_colors.sh +++ b/tests/queries/0_stateless/02360_send_logs_level_colors.sh @@ -21,7 +21,7 @@ spawn bash -c "$command" expect 1 EOF - rg -F $'\x1b' "$file_name" && cat "$file_name" || echo "ASCII text" + grep -F $'\x1b' "$file_name" && cat "$file_name" || echo "ASCII text" } run "$CLICKHOUSE_CLIENT -q 'SELECT 1' 2>$file_name" From 602392bb6206590e0d24df05eabf69a970767756 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Fri, 7 Jul 2023 12:37:16 +0200 Subject: [PATCH 196/230] Print short fault info only from safe fields --- src/Daemon/BaseDaemon.cpp | 83 ++++++++++++++++++++++++--------------- 1 file changed, 52 insertions(+), 31 deletions(-) diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index 6d29523a354..f766880bd34 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -310,6 +310,56 @@ private: { ThreadStatus thread_status; + /// First log those fields that are safe to access and that should not cause new fault. + /// That way we will have some duplicated info in the log but we don't loose important info + /// in case of double fault. + + std::string signal_description = "Unknown signal"; + + /// Some of these are not really signals, but our own indications on failure reason. + if (sig == StdTerminate) + signal_description = "std::terminate"; + else if (sig == SanitizerTrap) + signal_description = "sanitizer trap"; + else if (sig >= 0) + signal_description = strsignal(sig); // NOLINT(concurrency-mt-unsafe) // it is not thread-safe but ok in this context + + String error_message; + + if (sig != SanitizerTrap) + error_message = signalToErrorMessage(sig, info, *context); + else + error_message = "Sanitizer trap."; + + LOG_FATAL(log, "########## Short fault info ############"); + + LOG_FATAL(log, "(version {}{}, build id: {}, git hash: {}) (from thread {}) Received signal {} ({})", + VERSION_STRING, VERSION_OFFICIAL, daemon.build_id, daemon.git_hash, + thread_num, signal_description, sig); + + LOG_FATAL(log, fmt::runtime(error_message)); + + String bare_stacktrace_str; + if (stack_trace.getSize()) + { + /// Write bare stack trace (addresses) just in case if we will fail to print symbolized stack trace. + /// NOTE: This still require memory allocations and mutex lock inside logger. + /// BTW we can also print it to stderr using write syscalls. + + WriteBufferFromOwnString bare_stacktrace; + writeString("Stack trace:", bare_stacktrace); + for (size_t i = stack_trace.getOffset(); i < stack_trace.getSize(); ++i) + { + writeChar(' ', bare_stacktrace); + writePointerHex(stack_trace.getFramePointers()[i], bare_stacktrace); + } + + LOG_FATAL(log, fmt::runtime(bare_stacktrace.str())); + bare_stacktrace_str = bare_stacktrace.str(); + } + + /// Now try to access potentially unsafe data in thread_ptr. + String query_id; String query; @@ -326,16 +376,6 @@ private: } } - std::string signal_description = "Unknown signal"; - - /// Some of these are not really signals, but our own indications on failure reason. - if (sig == StdTerminate) - signal_description = "std::terminate"; - else if (sig == SanitizerTrap) - signal_description = "sanitizer trap"; - else if (sig >= 0) - signal_description = strsignal(sig); // NOLINT(concurrency-mt-unsafe) // it is not thread-safe but ok in this context - LOG_FATAL(log, "########################################"); if (query_id.empty()) @@ -351,30 +391,11 @@ private: thread_num, query_id, query, signal_description, sig); } - String error_message; - - if (sig != SanitizerTrap) - error_message = signalToErrorMessage(sig, info, *context); - else - error_message = "Sanitizer trap."; - LOG_FATAL(log, fmt::runtime(error_message)); - if (stack_trace.getSize()) + if (!bare_stacktrace_str.empty()) { - /// Write bare stack trace (addresses) just in case if we will fail to print symbolized stack trace. - /// NOTE: This still require memory allocations and mutex lock inside logger. - /// BTW we can also print it to stderr using write syscalls. - - WriteBufferFromOwnString bare_stacktrace; - writeString("Stack trace:", bare_stacktrace); - for (size_t i = stack_trace.getOffset(); i < stack_trace.getSize(); ++i) - { - writeChar(' ', bare_stacktrace); - writePointerHex(stack_trace.getFramePointers()[i], bare_stacktrace); - } - - LOG_FATAL(log, fmt::runtime(bare_stacktrace.str())); + LOG_FATAL(log, fmt::runtime(bare_stacktrace_str)); } /// Write symbolized stack trace line by line for better grep-ability. From 50bda59a0d226b108ab1521ae6499d35bab01ad0 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Fri, 7 Jul 2023 11:05:42 +0000 Subject: [PATCH 197/230] Fix typo --- .../test_s3_zero_copy_replication/test.py | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/tests/integration/test_s3_zero_copy_replication/test.py b/tests/integration/test_s3_zero_copy_replication/test.py index bc13c127610..2a4e0eece08 100644 --- a/tests/integration/test_s3_zero_copy_replication/test.py +++ b/tests/integration/test_s3_zero_copy_replication/test.py @@ -48,7 +48,7 @@ def get_large_objects_count(cluster, size=100, folder="data"): return counter -def check_objects_exisis(cluster, object_list, folder="data"): +def check_objects_exist(cluster, object_list, folder="data"): minio = cluster.minio_client for obj in object_list: if obj: @@ -466,7 +466,7 @@ def s3_zero_copy_unfreeze_base(cluster, unfreeze_query_template): assert objects01 == objects02 - check_objects_exisis(cluster, objects01) + check_objects_exist(cluster, objects01) node1.query("TRUNCATE TABLE unfreeze_test") node2.query("SYSTEM SYNC REPLICA unfreeze_test", timeout=30) @@ -477,12 +477,12 @@ def s3_zero_copy_unfreeze_base(cluster, unfreeze_query_template): assert objects01 == objects11 assert objects01 == objects12 - check_objects_exisis(cluster, objects11) + check_objects_exist(cluster, objects11) node1.query(f"{unfreeze_query_template} 'freeze_backup1'") wait_mutations(node1, "unfreeze_test", 10) - check_objects_exisis(cluster, objects12) + check_objects_exist(cluster, objects12) node2.query(f"{unfreeze_query_template} 'freeze_backup2'") wait_mutations(node2, "unfreeze_test", 10) @@ -540,8 +540,8 @@ def s3_zero_copy_drop_detached(cluster, unfreeze_query_template): wait_mutations(node1, "drop_detached_test", 10) wait_mutations(node2, "drop_detached_test", 10) - check_objects_exisis(cluster, objects1) - check_objects_exisis(cluster, objects2) + check_objects_exist(cluster, objects1) + check_objects_exist(cluster, objects2) node2.query( "ALTER TABLE drop_detached_test DROP DETACHED PARTITION '1'", @@ -551,8 +551,8 @@ def s3_zero_copy_drop_detached(cluster, unfreeze_query_template): wait_mutations(node1, "drop_detached_test", 10) wait_mutations(node2, "drop_detached_test", 10) - check_objects_exisis(cluster, objects1) - check_objects_exisis(cluster, objects2) + check_objects_exist(cluster, objects1) + check_objects_exist(cluster, objects2) node1.query( "ALTER TABLE drop_detached_test DROP DETACHED PARTITION '1'", @@ -562,7 +562,7 @@ def s3_zero_copy_drop_detached(cluster, unfreeze_query_template): wait_mutations(node1, "drop_detached_test", 10) wait_mutations(node2, "drop_detached_test", 10) - check_objects_exisis(cluster, objects1) + check_objects_exist(cluster, objects1) check_objects_not_exisis(cluster, objects_diff) node1.query( @@ -573,7 +573,7 @@ def s3_zero_copy_drop_detached(cluster, unfreeze_query_template): wait_mutations(node1, "drop_detached_test", 10) wait_mutations(node2, "drop_detached_test", 10) - check_objects_exisis(cluster, objects1) + check_objects_exist(cluster, objects1) node2.query( "ALTER TABLE drop_detached_test DROP DETACHED PARTITION '0'", @@ -682,7 +682,7 @@ def test_s3_zero_copy_keeps_data_after_mutation(started_cluster): wait_for_active_parts(node2, 4, "zero_copy_mutation") objects1 = node1.get_table_objects("zero_copy_mutation") - check_objects_exisis(cluster, objects1) + check_objects_exist(cluster, objects1) node1.query( """ @@ -710,7 +710,7 @@ def test_s3_zero_copy_keeps_data_after_mutation(started_cluster): nodeY = node2 objectsY = nodeY.get_table_objects("zero_copy_mutation") - check_objects_exisis(cluster, objectsY) + check_objects_exist(cluster, objectsY) nodeX.query( """ @@ -745,7 +745,7 @@ def test_s3_zero_copy_keeps_data_after_mutation(started_cluster): """ ) - check_objects_exisis(cluster, objectsY) + check_objects_exist(cluster, objectsY) nodeY.query( """ From d439db31397e8576a6e49e209bf069612ef9d2f5 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Fri, 7 Jul 2023 13:10:55 +0200 Subject: [PATCH 198/230] Print just signal number first, and only then get its description --- src/Daemon/BaseDaemon.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index f766880bd34..422f6ffb63f 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -314,6 +314,11 @@ private: /// That way we will have some duplicated info in the log but we don't loose important info /// in case of double fault. + LOG_FATAL(log, "########## Short fault info ############"); + LOG_FATAL(log, "(version {}{}, build id: {}, git hash: {}) (from thread {}) Received signal {}", + VERSION_STRING, VERSION_OFFICIAL, daemon.build_id, daemon.git_hash, + thread_num, sig); + std::string signal_description = "Unknown signal"; /// Some of these are not really signals, but our own indications on failure reason. @@ -324,6 +329,8 @@ private: else if (sig >= 0) signal_description = strsignal(sig); // NOLINT(concurrency-mt-unsafe) // it is not thread-safe but ok in this context + LOG_FATAL(log, "Signal description: {}", signal_description); + String error_message; if (sig != SanitizerTrap) @@ -331,12 +338,6 @@ private: else error_message = "Sanitizer trap."; - LOG_FATAL(log, "########## Short fault info ############"); - - LOG_FATAL(log, "(version {}{}, build id: {}, git hash: {}) (from thread {}) Received signal {} ({})", - VERSION_STRING, VERSION_OFFICIAL, daemon.build_id, daemon.git_hash, - thread_num, signal_description, sig); - LOG_FATAL(log, fmt::runtime(error_message)); String bare_stacktrace_str; From 6d798e0bde13416488409718fd2db6191dde1197 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 7 Jul 2023 10:16:36 +0000 Subject: [PATCH 199/230] Better check for current_thread --- src/Common/ThreadStatus.cpp | 9 +++++---- src/Common/ThreadStatus.h | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/Common/ThreadStatus.cpp b/src/Common/ThreadStatus.cpp index 7a602afe7e7..b39ea7e8ea8 100644 --- a/src/Common/ThreadStatus.cpp +++ b/src/Common/ThreadStatus.cpp @@ -199,13 +199,14 @@ ThreadStatus::~ThreadStatus() if (deleter) deleter(); + chassert(!check_current_thread_on_destruction || current_thread == this); + /// Only change current_thread if it's currently being used by this ThreadStatus /// For example, PushingToViews chain creates and deletes ThreadStatus instances while running in the main query thread - if (check_current_thread_on_destruction) - { - assert(current_thread == this); + if (current_thread == this) current_thread = nullptr; - } + else if (check_current_thread_on_destruction) + LOG_ERROR(log, "current_thread contains invalid address"); } void ThreadStatus::updatePerformanceCounters() diff --git a/src/Common/ThreadStatus.h b/src/Common/ThreadStatus.h index 7c8dbdb68bd..aa1e3eea6e5 100644 --- a/src/Common/ThreadStatus.h +++ b/src/Common/ThreadStatus.h @@ -224,7 +224,7 @@ private: Poco::Logger * log = nullptr; - bool check_current_thread_on_destruction; + [[maybe_unused]] bool check_current_thread_on_destruction; public: explicit ThreadStatus(bool check_current_thread_on_destruction_ = true); From d9d0e9062a4f30775b1b0d32121fef3da1ea33bf Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 7 Jul 2023 11:41:01 +0000 Subject: [PATCH 200/230] Remove maybe_unused --- src/Common/ThreadStatus.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/ThreadStatus.h b/src/Common/ThreadStatus.h index aa1e3eea6e5..7c8dbdb68bd 100644 --- a/src/Common/ThreadStatus.h +++ b/src/Common/ThreadStatus.h @@ -224,7 +224,7 @@ private: Poco::Logger * log = nullptr; - [[maybe_unused]] bool check_current_thread_on_destruction; + bool check_current_thread_on_destruction; public: explicit ThreadStatus(bool check_current_thread_on_destruction_ = true); From 36e52efc3e7602e43628246562b2db70ca85e765 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 7 Jul 2023 11:57:12 +0000 Subject: [PATCH 201/230] Remove timeout --- .../01164_detach_attach_partition_race.sh | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/queries/0_stateless/01164_detach_attach_partition_race.sh b/tests/queries/0_stateless/01164_detach_attach_partition_race.sh index 3aec4c3445d..e645cb5aae7 100755 --- a/tests/queries/0_stateless/01164_detach_attach_partition_race.sh +++ b/tests/queries/0_stateless/01164_detach_attach_partition_race.sh @@ -2,7 +2,6 @@ # Tags: race CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=none # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh @@ -15,16 +14,16 @@ $CLICKHOUSE_CLIENT -q "insert into mt values (3)" function thread_insert() { - while true; do - # It might be the case that the threads are terminated and exited, but some children didn't and they are still sending queries when we are dropping tables. - # That's why the "Table doesn't exist" error is allowed, while other errors don't. - $CLICKHOUSE_CLIENT -q "insert into mt values (rand())" 2>&1 | tr -d '\n' | rg -v "Table .+ doesn't exist"; + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do + $CLICKHOUSE_CLIENT -q "insert into mt values (rand())"; done } function thread_detach_attach() { - while true; do + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do $CLICKHOUSE_CLIENT -q "alter table mt detach partition id 'all'"; $CLICKHOUSE_CLIENT -q "alter table mt attach partition id 'all'"; done @@ -32,7 +31,8 @@ function thread_detach_attach() function thread_drop_detached() { - while true; do + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do $CLICKHOUSE_CLIENT --allow_drop_detached 1 -q "alter table mt drop detached partition id 'all'"; done } @@ -43,10 +43,10 @@ export -f thread_drop_detached; TIMEOUT=10 -timeout $TIMEOUT bash -c thread_insert & -timeout $TIMEOUT bash -c thread_detach_attach 2> /dev/null & -timeout $TIMEOUT bash -c thread_detach_attach 2> /dev/null & -timeout $TIMEOUT bash -c thread_drop_detached 2> /dev/null & +thread_insert $TIMEOUT & +thread_detach_attach $TIMEOUT 2> /dev/null & +thread_detach_attach $TIMEOUT 2> /dev/null & +thread_drop_detached $TIMEOUT 2> /dev/null & wait From 1e0d97c282b1415aed77dd7198ab244a84c7aea9 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 7 Jul 2023 12:19:10 +0000 Subject: [PATCH 202/230] Do not remove inputs after ActionsDAG::merge --- src/Interpreters/ActionsDAG.cpp | 2 +- .../0_stateless/02812_bug_with_unused_join_columns.reference | 0 .../queries/0_stateless/02812_bug_with_unused_join_columns.sql | 1 + 3 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02812_bug_with_unused_join_columns.reference create mode 100644 tests/queries/0_stateless/02812_bug_with_unused_join_columns.sql diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 906875dd314..46c14c503e4 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -1366,7 +1366,7 @@ ActionsDAGPtr ActionsDAG::merge(ActionsDAG && first, ActionsDAG && second) first.mergeInplace(std::move(second)); /// Drop unused inputs and, probably, some actions. - first.removeUnusedActions(); + first.removeUnusedActions(false); return std::make_shared(std::move(first)); } diff --git a/tests/queries/0_stateless/02812_bug_with_unused_join_columns.reference b/tests/queries/0_stateless/02812_bug_with_unused_join_columns.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02812_bug_with_unused_join_columns.sql b/tests/queries/0_stateless/02812_bug_with_unused_join_columns.sql new file mode 100644 index 00000000000..6c801b5b73e --- /dev/null +++ b/tests/queries/0_stateless/02812_bug_with_unused_join_columns.sql @@ -0,0 +1 @@ +SELECT concat(func.name, comb.name) AS x FROM system.functions AS func JOIN system.aggregate_function_combinators AS comb using name WHERE is_aggregate settings allow_experimental_analyzer=1; From fa7fe5277c99c036ff488997aab46b36c6901610 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 7 Jul 2023 12:25:13 +0000 Subject: [PATCH 203/230] Better comment. --- src/Interpreters/ActionsDAG.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 46c14c503e4..2f9fc7e5746 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -1365,7 +1365,7 @@ ActionsDAGPtr ActionsDAG::merge(ActionsDAG && first, ActionsDAG && second) { first.mergeInplace(std::move(second)); - /// Drop unused inputs and, probably, some actions. + /// Some actions could become unused. Do not drop inputs to preserve the header. first.removeUnusedActions(false); return std::make_shared(std::move(first)); From ee33000fc24367166ebf56772b0be4ca0ee25192 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 7 Jul 2023 14:08:54 +0000 Subject: [PATCH 204/230] Fixing tests. --- src/Interpreters/ActionsDAG.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 2f9fc7e5746..e68e2580231 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -465,8 +465,12 @@ void ActionsDAG::removeUnusedActions(const Names & required_names, bool allow_re void ActionsDAG::removeUnusedActions(bool allow_remove_inputs, bool allow_constant_folding) { std::unordered_set visited_nodes; + std::unordered_set used_inputs; std::stack stack; + for (const auto * input : inputs) + used_inputs.insert(input); + for (const auto * node : outputs) { visited_nodes.insert(node); @@ -484,7 +488,7 @@ void ActionsDAG::removeUnusedActions(bool allow_remove_inputs, bool allow_consta stack.push(&node); } - if (node.type == ActionType::INPUT && !allow_remove_inputs) + if (node.type == ActionType::INPUT && !allow_remove_inputs && used_inputs.contains(&node)) visited_nodes.insert(&node); } From e08f140d62988cd0340ec75f441891a2c01539c3 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Fri, 7 Jul 2023 17:32:10 +0300 Subject: [PATCH 205/230] Update 02254_projection_broken_part.sh --- tests/queries/0_stateless/02254_projection_broken_part.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02254_projection_broken_part.sh b/tests/queries/0_stateless/02254_projection_broken_part.sh index 6ba5093f234..3521d1d9d16 100755 --- a/tests/queries/0_stateless/02254_projection_broken_part.sh +++ b/tests/queries/0_stateless/02254_projection_broken_part.sh @@ -26,7 +26,7 @@ path=$($CLICKHOUSE_CLIENT -q "select path from system.parts where database='$CLI $CLICKHOUSE_CLIENT -q "select throwIf(substring('$path', 1, 1) != '/', 'Path is relative: $path')" || exit rm -f "$path/ab.proj/data.bin" -$CLICKHOUSE_CLIENT -q "select 3, sum(b) from projection_broken_parts_1 group by a;" 2>/dev/null +$CLICKHOUSE_CLIENT -q "select 3, sum(b) from projection_broken_parts_1 group by a format Null;" 2>/dev/null num_tries=0 while ! $CLICKHOUSE_CLIENT -q "select 4, sum(b) from projection_broken_parts_1 group by a format Null;" 2>/dev/null; do From 05b7da78130b21367b69a2cc22a319be11de8207 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Fri, 7 Jul 2023 10:32:44 -0400 Subject: [PATCH 206/230] add doc note for MongoDB Atlas --- docs/en/engines/table-engines/integrations/mongodb.md | 9 +++++++++ docs/en/sql-reference/table-functions/mongodb.md | 8 ++++++++ 2 files changed, 17 insertions(+) diff --git a/docs/en/engines/table-engines/integrations/mongodb.md b/docs/en/engines/table-engines/integrations/mongodb.md index 912f81573db..f87e8da8b5b 100644 --- a/docs/en/engines/table-engines/integrations/mongodb.md +++ b/docs/en/engines/table-engines/integrations/mongodb.md @@ -33,6 +33,15 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name - `options` — MongoDB connection string options (optional parameter). +:::tip +If you are using the MongoDB Atlas cloud offering please add these options: + +``` +'connectTimeoutMS=10000&ssl=true&authSource=admin' +``` + +::: + ## Usage Example {#usage-example} Create a table in ClickHouse which allows to read data from MongoDB collection: diff --git a/docs/en/sql-reference/table-functions/mongodb.md b/docs/en/sql-reference/table-functions/mongodb.md index aad60a7003c..a483414c0d4 100644 --- a/docs/en/sql-reference/table-functions/mongodb.md +++ b/docs/en/sql-reference/table-functions/mongodb.md @@ -30,6 +30,14 @@ mongodb(host:port, database, collection, user, password, structure [, options]) - `options` - MongoDB connection string options (optional parameter). +:::tip +If you are using the MongoDB Atlas cloud offering please add these options: + +``` +'connectTimeoutMS=10000&ssl=true&authSource=admin' +``` + +::: **Returned Value** From 0bd16d47be2c2040ab1d6787e953b0c4154ee0a1 Mon Sep 17 00:00:00 2001 From: Slach Date: Fri, 7 Jul 2023 19:44:20 +0500 Subject: [PATCH 207/230] fix documentation insconsistency about additional_tables_filter during reproduce https://github.com/ClickHouse/ClickHouse/issues/51948 Signed-off-by: Slach --- docs/en/operations/settings/settings.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 5f6cf98646b..195a9e26b53 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -17,7 +17,8 @@ Default value: 0. **Example** ``` sql -insert into table_1 values (1, 'a'), (2, 'bb'), (3, 'ccc'), (4, 'dddd'); +INSERT INTO table_1 VALUES (1, 'a'), (2, 'bb'), (3, 'ccc'), (4, 'dddd'); +SELECT * FROM table_1; ``` ```response ┌─x─┬─y────┐ @@ -30,7 +31,7 @@ insert into table_1 values (1, 'a'), (2, 'bb'), (3, 'ccc'), (4, 'dddd'); ```sql SELECT * FROM table_1 -SETTINGS additional_table_filters = (('table_1', 'x != 2')) +SETTINGS additional_table_filters = {'table_1': 'x != 2'} ``` ```response ┌─x─┬─y────┐ @@ -50,7 +51,8 @@ Default value: `''`. **Example** ``` sql -insert into table_1 values (1, 'a'), (2, 'bb'), (3, 'ccc'), (4, 'dddd'); +INSERT INTO table_1 VALUES (1, 'a'), (2, 'bb'), (3, 'ccc'), (4, 'dddd'); +SElECT * FROM table_1; ``` ```response ┌─x─┬─y────┐ From 50ea0855bf622ede96cb9726d5010d03c8dbebf4 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Fri, 7 Jul 2023 17:47:09 +0300 Subject: [PATCH 208/230] Update 02439_merge_selecting_partitions.sql (#51862) * Update 02439_merge_selecting_partitions.sql * Update 02439_merge_selecting_partitions.reference * Update 02439_merge_selecting_partitions.reference * fix --- .../0_stateless/02439_merge_selecting_partitions.reference | 1 - .../0_stateless/02439_merge_selecting_partitions.sql | 6 ++++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/02439_merge_selecting_partitions.reference b/tests/queries/0_stateless/02439_merge_selecting_partitions.reference index e836994b3aa..e69de29bb2d 100644 --- a/tests/queries/0_stateless/02439_merge_selecting_partitions.reference +++ b/tests/queries/0_stateless/02439_merge_selecting_partitions.reference @@ -1 +0,0 @@ -/test/02439/s1/default/block_numbers/123 diff --git a/tests/queries/0_stateless/02439_merge_selecting_partitions.sql b/tests/queries/0_stateless/02439_merge_selecting_partitions.sql index 88ce2834d6b..3d0c0af84d5 100644 --- a/tests/queries/0_stateless/02439_merge_selecting_partitions.sql +++ b/tests/queries/0_stateless/02439_merge_selecting_partitions.sql @@ -22,7 +22,9 @@ select sleepEachRow(3) as higher_probability_of_reproducing_the_issue format Nul system flush logs; -- it should not list unneeded partitions where we cannot merge anything -select distinct path from system.zookeeper_log where path like '/test/02439/s1/' || currentDatabase() || '/block_numbers/%' - and op_num in ('List', 'SimpleList', 'FilteredList') and path not like '%/block_numbers/1'; +select * from system.zookeeper_log where path like '/test/02439/s1/' || currentDatabase() || '/block_numbers/%' + and op_num in ('List', 'SimpleList', 'FilteredList') + and path not like '%/block_numbers/1' and path not like '%/block_numbers/123' + and event_time >= now() - interval 1 minute; drop table rmt; From 8266067e1a650453968f278f64e20bd4addc7aa2 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 7 Jul 2023 19:09:55 +0300 Subject: [PATCH 209/230] Fixed style check --- src/Dictionaries/CacheDictionary.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Dictionaries/CacheDictionary.cpp b/src/Dictionaries/CacheDictionary.cpp index e27e25ea7c4..3011151ef00 100644 --- a/src/Dictionaries/CacheDictionary.cpp +++ b/src/Dictionaries/CacheDictionary.cpp @@ -549,12 +549,12 @@ void CacheDictionary::update(CacheDictionaryUpdateUnitPtr Date: Fri, 7 Jul 2023 18:39:20 +0200 Subject: [PATCH 210/230] comments for the tests --- ...nal_block_structure_mismatch_bug.reference | 1 - ...791_final_block_structure_mismatch_bug.sql | 38 ++++++++----------- 2 files changed, 15 insertions(+), 24 deletions(-) diff --git a/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.reference b/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.reference index ca810c46a2d..a8401b1cae8 100644 --- a/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.reference +++ b/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.reference @@ -7,4 +7,3 @@ 1 2 3 -2 diff --git a/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.sql b/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.sql index a82e43d81f4..394e3bff87b 100644 --- a/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.sql +++ b/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.sql @@ -17,10 +17,18 @@ INSERT INTO test_block_mismatch VALUES (1, toDateTime('2023-02-02 12:12:12')); INSERT INTO test_block_mismatch VALUES (1, toDateTime('2023-02-02 12:12:12')); SELECT count(*) FROM test_block_mismatch FINAL; +optimize table test_block_mismatch final; +system stop merges test_block_mismatch; + INSERT INTO test_block_mismatch VALUES (2, toDateTime('2023-01-01 12:12:12')); INSERT INTO test_block_mismatch VALUES (2, toDateTime('2023-01-01 12:12:12')); +-- one lonely part in 2023-02-02 partition and 3 parts in 2023-01-01 partition. +-- lonely part will not be processed by PartsSplitter and 2023-01-01's parts will be - previously this led to the `Block structure mismatch in Pipe::unitePipes` exception. SELECT count(*) FROM test_block_mismatch FINAL; + +-- variations of the test above with slightly modified table definitions + CREATE TABLE test_block_mismatch_sk1 ( a UInt32, @@ -39,10 +47,14 @@ INSERT INTO test_block_mismatch_sk1 VALUES (1, toDateTime('2023-02-02 12:12:12') INSERT INTO test_block_mismatch_sk1 VALUES (1, toDateTime('2023-02-02 12:12:12')); SELECT count(*) FROM test_block_mismatch_sk1 FINAL; +optimize table test_block_mismatch_sk1 final; +system stop merges test_block_mismatch_sk1; + INSERT INTO test_block_mismatch_sk1 VALUES (2, toDateTime('2023-01-01 12:12:12')); INSERT INTO test_block_mismatch_sk1 VALUES (2, toDateTime('2023-01-01 12:12:12')); SELECT count(*) FROM test_block_mismatch_sk1 FINAL; + CREATE TABLE test_block_mismatch_sk2 ( a UInt32, @@ -61,29 +73,9 @@ INSERT INTO test_block_mismatch_sk2 VALUES (1, toDateTime('2023-02-02 12:12:12') INSERT INTO test_block_mismatch_sk2 VALUES (1, toDateTime('2023-02-02 12:12:12')); SELECT count(*) FROM test_block_mismatch_sk2 FINAL; +optimize table test_block_mismatch_sk2 final; +system stop merges test_block_mismatch_sk2; + INSERT INTO test_block_mismatch_sk2 VALUES (2, toDateTime('2023-01-01 12:12:12')); INSERT INTO test_block_mismatch_sk2 VALUES (2, toDateTime('2023-01-01 12:12:12')); SELECT count(*) FROM test_block_mismatch_sk2 FINAL; - -CREATE TABLE test_block_mismatch_magic_row_dist -( - a UInt32, - b DateTime -) -ENGINE = ReplacingMergeTree -PARTITION BY toYYYYMM(b) -ORDER BY (toDate(b), a); - -INSERT INTO test_block_mismatch_magic_row_dist VALUES (1, toDateTime('2023-02-02 12:12:12')); -INSERT INTO test_block_mismatch_magic_row_dist VALUES (1, toDateTime('2023-02-02 12:12:12')); -INSERT INTO test_block_mismatch_magic_row_dist VALUES (1, toDateTime('2023-02-02 12:12:12')); -INSERT INTO test_block_mismatch_magic_row_dist VALUES (1, toDateTime('2023-02-02 12:12:12')); - -optimize table test_block_mismatch_magic_row_dist final; - -system stop merges test_block_mismatch_magic_row_dist; - -INSERT INTO test_block_mismatch_magic_row_dist VALUES (1, toDateTime('2023-01-01 12:12:12')); -INSERT INTO test_block_mismatch_magic_row_dist VALUES (1, toDateTime('2023-01-01 12:12:12')); - -SELECT count(*) FROM test_block_mismatch_magic_row_dist FINAL; From 93b76c93210bccfda6d6b2413bf07cf48c4f9fa3 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Fri, 7 Jul 2023 16:40:47 +0200 Subject: [PATCH 211/230] better logs on shutdown --- base/base/getThreadId.cpp | 41 +++++++++++++++++----------- base/base/getThreadId.h | 2 ++ src/Daemon/BaseDaemon.cpp | 1 + src/Interpreters/Context.cpp | 41 ++++++++++++++++------------ src/Interpreters/DatabaseCatalog.cpp | 2 ++ 5 files changed, 53 insertions(+), 34 deletions(-) diff --git a/base/base/getThreadId.cpp b/base/base/getThreadId.cpp index b6c22bb8856..a42d79c5698 100644 --- a/base/base/getThreadId.cpp +++ b/base/base/getThreadId.cpp @@ -15,25 +15,34 @@ static thread_local uint64_t current_tid = 0; + +static void setCurrentThreadId() +{ +#if defined(OS_ANDROID) + current_tid = gettid(); +#elif defined(OS_LINUX) + current_tid = static_cast(syscall(SYS_gettid)); /// This call is always successful. - man gettid +#elif defined(OS_FREEBSD) + current_tid = pthread_getthreadid_np(); +#elif defined(OS_SUNOS) + // On Solaris-derived systems, this returns the ID of the LWP, analogous + // to a thread. + current_tid = static_cast(pthread_self()); +#else + if (0 != pthread_threadid_np(nullptr, ¤t_tid)) + throw std::logic_error("pthread_threadid_np returned error"); +#endif +} + uint64_t getThreadId() { if (!current_tid) - { -#if defined(OS_ANDROID) - current_tid = gettid(); -#elif defined(OS_LINUX) - current_tid = static_cast(syscall(SYS_gettid)); /// This call is always successful. - man gettid -#elif defined(OS_FREEBSD) - current_tid = pthread_getthreadid_np(); -#elif defined(OS_SUNOS) - // On Solaris-derived systems, this returns the ID of the LWP, analogous - // to a thread. - current_tid = static_cast(pthread_self()); -#else - if (0 != pthread_threadid_np(nullptr, ¤t_tid)) - throw std::logic_error("pthread_threadid_np returned error"); -#endif - } + setCurrentThreadId(); return current_tid; } + +void updateCurrentThreadIdAfterFork() +{ + setCurrentThreadId(); +} diff --git a/base/base/getThreadId.h b/base/base/getThreadId.h index a1b5ff5f3e8..f90c76029e1 100644 --- a/base/base/getThreadId.h +++ b/base/base/getThreadId.h @@ -3,3 +3,5 @@ /// Obtain thread id from OS. The value is cached in thread local variable. uint64_t getThreadId(); + +void updateCurrentThreadIdAfterFork(); diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index 6d29523a354..d63e9976437 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -1101,6 +1101,7 @@ void BaseDaemon::setupWatchdog() if (0 == pid) { + updateCurrentThreadIdAfterFork(); logger().information("Forked a child process to watch"); #if defined(OS_LINUX) if (0 != prctl(PR_SET_PDEATHSIG, SIGKILL)) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 7482450d529..7b3d419cce4 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -176,6 +176,15 @@ namespace ErrorCodes extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH; } +#define SHUTDOWN(log, desc, ptr, method) do \ +{ \ + if (ptr) \ + { \ + LOG_DEBUG(log, "Shutting down " desc); \ + ptr->method; \ + } \ +} while (false) \ + /** Set of known objects (environment), that could be used in query. * Shared (global) part. Order of members (especially, order of destruction) is very important. @@ -479,35 +488,29 @@ struct ContextSharedPart : boost::noncopyable /// Stop periodic reloading of the configuration files. /// This must be done first because otherwise the reloading may pass a changed config /// to some destroyed parts of ContextSharedPart. - if (external_dictionaries_loader) - external_dictionaries_loader->enablePeriodicUpdates(false); - if (external_user_defined_executable_functions_loader) - external_user_defined_executable_functions_loader->enablePeriodicUpdates(false); - if (user_defined_sql_objects_loader) - user_defined_sql_objects_loader->stopWatching(); + SHUTDOWN(log, "dictionaries loader", external_dictionaries_loader, enablePeriodicUpdates(false)); + SHUTDOWN(log, "UDFs loader", external_user_defined_executable_functions_loader, enablePeriodicUpdates(false)); + SHUTDOWN(log, "another UDFs loader", user_defined_sql_objects_loader, stopWatching()); + + LOG_TRACE(log, "Shutting down named sessions"); Session::shutdownNamedSessions(); /// Waiting for current backups/restores to be finished. This must be done before `DatabaseCatalog::shutdown()`. - if (backups_worker) - backups_worker->shutdown(); + SHUTDOWN(log, "backups worker", backups_worker, shutdown()); /** After system_logs have been shut down it is guaranteed that no system table gets created or written to. * Note that part changes at shutdown won't be logged to part log. */ - if (system_logs) - system_logs->shutdown(); + SHUTDOWN(log, "system logs", system_logs, shutdown()); + LOG_TRACE(log, "Shutting down database catalog"); DatabaseCatalog::shutdown(); - if (merge_mutate_executor) - merge_mutate_executor->wait(); - if (fetch_executor) - fetch_executor->wait(); - if (moves_executor) - moves_executor->wait(); - if (common_executor) - common_executor->wait(); + SHUTDOWN(log, "merges executor", merge_mutate_executor, wait()); + SHUTDOWN(log, "fetches executor", fetch_executor, wait()); + SHUTDOWN(log, "moves executor", moves_executor, wait()); + SHUTDOWN(log, "common executor", common_executor, wait()); TransactionLog::shutdownIfAny(); @@ -533,10 +536,12 @@ struct ContextSharedPart : boost::noncopyable /// DDLWorker should be deleted without lock, cause its internal thread can /// take it as well, which will cause deadlock. + LOG_TRACE(log, "Shutting down DDLWorker"); delete_ddl_worker.reset(); /// Background operations in cache use background schedule pool. /// Deactivate them before destructing it. + LOG_TRACE(log, "Shutting down caches"); const auto & caches = FileCacheFactory::instance().getAll(); for (const auto & [_, cache] : caches) cache->cache->deactivateBackgroundOperations(); diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 4cb2f6e3b3d..271330bc64a 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -227,9 +227,11 @@ void DatabaseCatalog::shutdownImpl() databases_with_delayed_shutdown.push_back(database.second); continue; } + LOG_TRACE(log, "Shutting down database {}", database.first); database.second->shutdown(); } + LOG_TRACE(log, "Shutting down system databases"); for (auto & database : databases_with_delayed_shutdown) { database->shutdown(); From a96874850ec0faaf049cce01feee6c4a572d7961 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 8 Jul 2023 00:55:57 +0200 Subject: [PATCH 212/230] Revert "Merge pull request #48115 from save-my-heart/throw_non_parametric_function" This reverts commit 5f930aeb2619bda8f27f3cfc6ba01ffaf48c3d64, reversing changes made to 35572321a14d617cfd110a48d8d3416615bd75c9. --- .../UserDefined/UserDefinedSQLFunctionVisitor.cpp | 7 ------- src/Interpreters/ActionsVisitor.cpp | 7 ------- .../0_stateless/02701_non_parametric_function.reference | 0 .../0_stateless/02701_non_parametric_function.sql | 9 --------- 4 files changed, 23 deletions(-) delete mode 100644 tests/queries/0_stateless/02701_non_parametric_function.reference delete mode 100644 tests/queries/0_stateless/02701_non_parametric_function.sql diff --git a/src/Functions/UserDefined/UserDefinedSQLFunctionVisitor.cpp b/src/Functions/UserDefined/UserDefinedSQLFunctionVisitor.cpp index 597e4efe35e..360d1cdf76c 100644 --- a/src/Functions/UserDefined/UserDefinedSQLFunctionVisitor.cpp +++ b/src/Functions/UserDefined/UserDefinedSQLFunctionVisitor.cpp @@ -20,7 +20,6 @@ namespace DB namespace ErrorCodes { extern const int UNSUPPORTED_METHOD; - extern const int FUNCTION_CANNOT_HAVE_PARAMETERS; } void UserDefinedSQLFunctionVisitor::visit(ASTPtr & ast) @@ -139,12 +138,6 @@ ASTPtr UserDefinedSQLFunctionVisitor::tryToReplaceFunction(const ASTFunction & f if (!user_defined_function) return nullptr; - /// All UDFs are not parametric for now. - if (function.parameters) - { - throw Exception(ErrorCodes::FUNCTION_CANNOT_HAVE_PARAMETERS, "Function {} is not parametric", function.name); - } - const auto & function_arguments_list = function.children.at(0)->as(); auto & function_arguments = function_arguments_list->children; diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 10502b7e66d..01f2d4cf22e 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -78,7 +78,6 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION; - extern const int FUNCTION_CANNOT_HAVE_PARAMETERS; } static NamesAndTypesList::iterator findColumn(const String & name, NamesAndTypesList & cols) @@ -1106,12 +1105,6 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & } } - /// Normal functions are not parametric for now. - if (node.parameters) - { - throw Exception(ErrorCodes::FUNCTION_CANNOT_HAVE_PARAMETERS, "Function {} is not parametric", node.name); - } - Names argument_names; DataTypes argument_types; bool arguments_present = true; diff --git a/tests/queries/0_stateless/02701_non_parametric_function.reference b/tests/queries/0_stateless/02701_non_parametric_function.reference deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/queries/0_stateless/02701_non_parametric_function.sql b/tests/queries/0_stateless/02701_non_parametric_function.sql deleted file mode 100644 index 5261fa7b082..00000000000 --- a/tests/queries/0_stateless/02701_non_parametric_function.sql +++ /dev/null @@ -1,9 +0,0 @@ --- Tags: no-parallel - -SELECT * FROM system.numbers WHERE number > toUInt64(10)(number) LIMIT 10; -- { serverError 309 } - -CREATE FUNCTION IF NOT EXISTS sum_udf as (x, y) -> (x + y); - -SELECT sum_udf(1)(1, 2); -- { serverError 309 } - -DROP FUNCTION IF EXISTS sum_udf; From f4696d762cb3e15878b99c51bcad9ee15a8972c1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 8 Jul 2023 00:56:42 +0200 Subject: [PATCH 213/230] Revert "Merge pull request #49419 from ClickHouse/fix-function-parameter-exception" This reverts commit b921476a3be536b17b967391cefab3888c0c96b2, reversing changes made to 7896d307379bc813665fa5b11d08c202ea67f4fb. --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 15 --------------- tests/analyzer_tech_debt.txt | 1 + 2 files changed, 1 insertion(+), 15 deletions(-) diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index 163092f1b7f..da8933aabaa 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -116,7 +116,6 @@ namespace ErrorCodes extern const int UNKNOWN_TABLE; extern const int ILLEGAL_COLUMN; extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH; - extern const int FUNCTION_CANNOT_HAVE_PARAMETERS; } /** Query analyzer implementation overview. Please check documentation in QueryAnalysisPass.h first. @@ -4897,11 +4896,6 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi lambda_expression_untyped->formatASTForErrorMessage(), scope.scope_node->formatASTForErrorMessage()); - if (!parameters.empty()) - { - throw Exception(ErrorCodes::FUNCTION_CANNOT_HAVE_PARAMETERS, "Function {} is not parametric", function_node.formatASTForErrorMessage()); - } - auto lambda_expression_clone = lambda_expression_untyped->clone(); IdentifierResolveScope lambda_scope(lambda_expression_clone, &scope /*parent_scope*/); @@ -5018,12 +5012,9 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi } FunctionOverloadResolverPtr function = UserDefinedExecutableFunctionFactory::instance().tryGet(function_name, scope.context, parameters); - bool is_executable_udf = false; if (!function) function = FunctionFactory::instance().tryGet(function_name, scope.context); - else - is_executable_udf = true; if (!function) { @@ -5074,12 +5065,6 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi return result_projection_names; } - /// Executable UDFs may have parameters. They are checked in UserDefinedExecutableFunctionFactory. - if (!parameters.empty() && !is_executable_udf) - { - throw Exception(ErrorCodes::FUNCTION_CANNOT_HAVE_PARAMETERS, "Function {} is not parametric", function_name); - } - /** For lambda arguments we need to initialize lambda argument types DataTypeFunction using `getLambdaArgumentTypes` function. * Then each lambda arguments are initialized with columns, where column source is lambda. * This information is important for later steps of query processing. diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt index f7cc13dd2e2..f838a19940a 100644 --- a/tests/analyzer_tech_debt.txt +++ b/tests/analyzer_tech_debt.txt @@ -111,6 +111,7 @@ 00917_multiple_joins_denny_crane 00725_join_on_bug_1 00636_partition_key_parts_pruning +00261_storage_aliases_and_array_join 01825_type_json_multiple_files 01281_group_by_limit_memory_tracking 02723_zookeeper_name From 19072c9b475fef191dfd18929cc81c25e8115026 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 8 Jul 2023 02:03:23 +0300 Subject: [PATCH 214/230] Corrent example about parametric executable UDFs. --- docs/en/sql-reference/functions/udf.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/udf.md b/docs/en/sql-reference/functions/udf.md index 9c6b1b0c66b..51734beed03 100644 --- a/docs/en/sql-reference/functions/udf.md +++ b/docs/en/sql-reference/functions/udf.md @@ -171,12 +171,13 @@ Result: └──────────────────────────────┘ ``` -Executable user defined functions can take constant parameters configured in `command` setting (works only for user defined functions with `executable` type). +Executable user defined functions can take constant parameters configured in `command` setting (works only for user defined functions with `executable` type). It also requires the `execute_direct` option (to ensure no shell argument expansion vulnerability). File `test_function_parameter_python.xml` (`/etc/clickhouse-server/test_function_parameter_python.xml` with default path settings). ```xml executable + true test_function_parameter_python String From 6990f078a0bf87f23d478e83c51001b7cb0d4b8a Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com> Date: Fri, 7 Jul 2023 19:19:30 -0400 Subject: [PATCH 215/230] cleaner way --- src/Daemon/BaseDaemon.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index a75aac7a08e..af2d355d335 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -154,10 +154,7 @@ static void signalHandler(int sig, siginfo_t * info, void * context) writePODBinary(*info, out); writePODBinary(signal_context, out); writePODBinary(stack_trace, out); - if (Exception::enable_job_stack_trace) - writeVectorBinary(Exception::thread_frame_pointers, out); - else - writeVarUInt(0, out); + writeVectorBinary(Exception::enable_job_stack_trace ? Exception::thread_frame_pointers : std::vector{}, out); writeBinary(static_cast(getThreadId()), out); writePODBinary(current_thread, out); From 9144a2dbb2a17af72304267edfe5a81ee7daa0b9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 8 Jul 2023 05:23:13 +0200 Subject: [PATCH 216/230] Fix unrelated messages from LSan in clickhouse-client --- tests/clickhouse-test | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 4860ce0fac9..95470f77987 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -57,6 +57,8 @@ MESSAGES_TO_RETRY = [ "ConnectionPoolWithFailover: Connection failed at try", "DB::Exception: New table appeared in database being dropped or detached. Try again", "is already started to be removing by another replica right now", + # This is from LSan, and it indicates its own internal problem: + "Unable to get registers from thread", ] MAX_RETRIES = 3 From c828db572078bb68bbcd20c6850073030d4addac Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 8 Jul 2023 05:57:23 +0200 Subject: [PATCH 217/230] Allow OOM in AST Fuzzer with Sanitizers --- docker/test/fuzzer/run-fuzzer.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh index d2c8de7a211..5cda0831a84 100755 --- a/docker/test/fuzzer/run-fuzzer.sh +++ b/docker/test/fuzzer/run-fuzzer.sh @@ -291,7 +291,7 @@ quit if [ "$server_died" == 1 ] then # The server has died. - if ! rg --text -o 'Received signal.*|Logical error.*|Assertion.*failed|Failed assertion.*|.*runtime error: .*|.*is located.*|(SUMMARY|ERROR): [a-zA-Z]+Sanitizer:.*|.*_LIBCPP_ASSERT.*' server.log > description.txt + if ! rg --text -o 'Received signal.*|Logical error.*|Assertion.*failed|Failed assertion.*|.*runtime error: .*|.*is located.*|(SUMMARY|ERROR): [a-zA-Z]+Sanitizer:.*|.*_LIBCPP_ASSERT.*|.*Child process was terminated by signal 9.*' server.log > description.txt then echo "Lost connection to server. See the logs." > description.txt fi From 1bdcd29da2bfc4cab02a0db5dedeb7d0515ac49c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 8 Jul 2023 06:02:38 +0200 Subject: [PATCH 218/230] Disable one test under Analyzer --- tests/analyzer_tech_debt.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt index f7cc13dd2e2..a10f72e743a 100644 --- a/tests/analyzer_tech_debt.txt +++ b/tests/analyzer_tech_debt.txt @@ -128,3 +128,4 @@ 02784_parallel_replicas_automatic_disabling 02581_share_big_sets_between_mutation_tasks_long 02581_share_big_sets_between_multiple_mutations_tasks_long +00992_system_parts_race_condition_zookeeper_long From adbd85b975aba4618ddf2a934422559410eeea48 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 8 Jul 2023 06:26:44 +0200 Subject: [PATCH 219/230] Fix Docker --- tests/integration/conftest.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 749f4aa1cde..5933883f7b0 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -42,6 +42,13 @@ def cleanup_environment(): logging.debug(f"Docker ps before start:{r.stdout}") else: logging.debug(f"No running containers") + + logging.debug("Pruning Docker networks") + run_and_check( + ["docker network prune"], + shell=True, + nothrow=True, + ) except Exception as e: logging.exception(f"cleanup_environment:{str(e)}") pass From cdbf279b65cca972ce63dd7fd835d2b46359f7f3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 8 Jul 2023 06:46:28 +0200 Subject: [PATCH 220/230] Fix test 01825_type_json_from_map --- tests/queries/0_stateless/01825_type_json_from_map.sql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01825_type_json_from_map.sql b/tests/queries/0_stateless/01825_type_json_from_map.sql index 2480aca1667..51e60843a1a 100644 --- a/tests/queries/0_stateless/01825_type_json_from_map.sql +++ b/tests/queries/0_stateless/01825_type_json_from_map.sql @@ -1,4 +1,5 @@ --- Tags: no-fasttest +-- Tags: no-fasttest, no-random-merge-tree-settings +-- For example, it is 4 times slower with --merge_max_block_size=5967 --index_granularity=55 --min_bytes_for_wide_part=847510133 DROP TABLE IF EXISTS t_json; DROP TABLE IF EXISTS t_map; From 0b0caec9c435aaf0df3e01ef64bf06397d11f2ce Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 8 Jul 2023 07:51:17 +0300 Subject: [PATCH 221/230] Update Context.cpp --- src/Interpreters/Context.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 7b3d419cce4..8df8723123f 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -181,7 +181,7 @@ namespace ErrorCodes if (ptr) \ { \ LOG_DEBUG(log, "Shutting down " desc); \ - ptr->method; \ + (ptr)->method; \ } \ } while (false) \ From 4de02c243816f907643eefbbe4743861660b6d99 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 8 Jul 2023 07:04:33 +0200 Subject: [PATCH 222/230] Fix test 02354_distributed_with_external_aggregation_memory_usage --- ...distributed_with_external_aggregation_memory_usage.sql | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql b/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql index 548660e36b1..c8ec40bb0a7 100644 --- a/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql +++ b/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql @@ -1,5 +1,7 @@ -- Tags: long, no-tsan, no-msan, no-asan, no-ubsan, no-debug, no-s3-storage +DROP TABLE IF EXISTS t_2354_dist_with_external_aggr; + create table t_2354_dist_with_external_aggr(a UInt64, b String, c FixedString(100)) engine = MergeTree order by tuple(); insert into t_2354_dist_with_external_aggr select number, toString(number) as s, toFixedString(s, 100) from numbers_mt(5e7); @@ -15,8 +17,12 @@ set max_bytes_before_external_group_by = '2G', -- whole aggregation state of local aggregation uncompressed is 5.8G -- it is hard to provide an accurate estimation for memory usage, so 4G is just the actual value taken from the logs + delta +-- also avoid using localhost, so the queries will go over separate connections +-- (otherwise the memory usage for merge will be counted together with the localhost query) select a, b, c, sum(a) as s -from remote('127.0.0.{1,2}', currentDatabase(), t_2354_dist_with_external_aggr) +from remote('127.0.0.{2,3}', currentDatabase(), t_2354_dist_with_external_aggr) group by a, b, c format Null settings max_memory_usage = '4Gi'; + +DROP TABLE t_2354_dist_with_external_aggr; From df31034820c245030b16fddd7b9b3e06c07b0d51 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Fri, 7 Jul 2023 20:29:59 +0200 Subject: [PATCH 223/230] rollback merge tasks on exception --- src/Storages/MergeTree/IExecutableTask.h | 11 +++++++---- .../MergeTree/MergeFromLogEntryTask.cpp | 2 +- .../MergeTree/MergeFromLogEntryTask.h | 2 +- .../MergeTree/MergePlainMergeTreeTask.cpp | 19 ++++++++++++++++--- .../MergeTree/MergePlainMergeTreeTask.h | 5 +++-- .../MergeTree/MergeTreeBackgroundExecutor.cpp | 17 ++++++++++------- .../MergeTree/MutateFromLogEntryTask.cpp | 2 +- .../MergeTree/MutateFromLogEntryTask.h | 2 +- .../MergeTree/MutatePlainMergeTreeTask.cpp | 4 ++-- .../MergeTree/MutatePlainMergeTreeTask.h | 5 +++-- src/Storages/MergeTree/MutateTask.cpp | 15 +++++++++------ .../ReplicatedMergeMutateTaskBase.cpp | 2 +- .../MergeTree/ReplicatedMergeMutateTaskBase.h | 3 ++- .../MergeTree/tests/gtest_executor.cpp | 10 ++++++---- src/Storages/StorageMergeTree.cpp | 2 +- 15 files changed, 64 insertions(+), 37 deletions(-) diff --git a/src/Storages/MergeTree/IExecutableTask.h b/src/Storages/MergeTree/IExecutableTask.h index d0c2d4a840e..738056e0ea0 100644 --- a/src/Storages/MergeTree/IExecutableTask.h +++ b/src/Storages/MergeTree/IExecutableTask.h @@ -32,8 +32,9 @@ public: using TaskResultCallback = std::function; virtual bool executeStep() = 0; virtual void onCompleted() = 0; - virtual StorageID getStorageID() = 0; - virtual Priority getPriority() = 0; + virtual StorageID getStorageID() const = 0; + virtual String getQueryId() const = 0; + virtual Priority getPriority() const = 0; virtual ~IExecutableTask() = default; }; @@ -63,12 +64,14 @@ public: } void onCompleted() override { job_result_callback(!res); } - StorageID getStorageID() override { return id; } - Priority getPriority() override + StorageID getStorageID() const override { return id; } + Priority getPriority() const override { throw Exception(ErrorCodes::LOGICAL_ERROR, "getPriority() method is not supported by LambdaAdapter"); } + String getQueryId() const override { return id.getShortName() + "::lambda"; } + private: bool res = false; std::function job_to_execute; diff --git a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp index 17582e7df98..9f54c554c85 100644 --- a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp @@ -287,7 +287,7 @@ ReplicatedMergeMutateTaskBase::PrepareResult MergeFromLogEntryTask::prepare() task_context = Context::createCopy(storage.getContext()); task_context->makeQueryContext(); - task_context->setCurrentQueryId(""); + task_context->setCurrentQueryId(getQueryId()); /// Add merge to list merge_mutate_entry = storage.getContext()->getMergeList().insert( diff --git a/src/Storages/MergeTree/MergeFromLogEntryTask.h b/src/Storages/MergeTree/MergeFromLogEntryTask.h index 62908f79fb4..16e69a568ba 100644 --- a/src/Storages/MergeTree/MergeFromLogEntryTask.h +++ b/src/Storages/MergeTree/MergeFromLogEntryTask.h @@ -24,7 +24,7 @@ public: StorageReplicatedMergeTree & storage_, IExecutableTask::TaskResultCallback & task_result_callback_); - Priority getPriority() override { return priority; } + Priority getPriority() const override { return priority; } protected: /// Both return false if we can't execute merge. diff --git a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp index 9302bdf11de..3f5753a0c95 100644 --- a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp +++ b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp @@ -3,8 +3,10 @@ #include #include #include +#include #include #include +#include namespace DB @@ -16,7 +18,7 @@ namespace ErrorCodes } -StorageID MergePlainMergeTreeTask::getStorageID() +StorageID MergePlainMergeTreeTask::getStorageID() const { return storage.getStorageID(); } @@ -77,7 +79,6 @@ bool MergePlainMergeTreeTask::executeStep() throw Exception(ErrorCodes::LOGICAL_ERROR, "Task with state SUCCESS mustn't be executed again"); } } - return false; } @@ -145,16 +146,28 @@ void MergePlainMergeTreeTask::finish() storage.merger_mutator.renameMergedTemporaryPart(new_part, future_part->parts, txn, transaction); transaction.commit(); + ThreadFuzzer::maybeInjectSleep(); + ThreadFuzzer::maybeInjectMemoryLimitException(); + write_part_log({}); storage.incrementMergedPartsProfileEvent(new_part->getType()); transfer_profile_counters_to_initial_query(); + + if (auto txn_ = txn_holder.getTransaction()) + { + /// Explicitly commit the transaction if we own it (it's a background merge, not OPTIMIZE) + TransactionLog::instance().commitTransaction(txn_, /* throw_on_unknown_status */ false); + ThreadFuzzer::maybeInjectSleep(); + ThreadFuzzer::maybeInjectMemoryLimitException(); + } + } ContextMutablePtr MergePlainMergeTreeTask::createTaskContext() const { auto context = Context::createCopy(storage.getContext()); context->makeQueryContext(); - auto queryId = storage.getStorageID().getShortName() + "::" + future_part->name; + auto queryId = getQueryId(); context->setCurrentQueryId(queryId); return context; } diff --git a/src/Storages/MergeTree/MergePlainMergeTreeTask.h b/src/Storages/MergeTree/MergePlainMergeTreeTask.h index 95df8c90c9b..5cc9c0e50d3 100644 --- a/src/Storages/MergeTree/MergePlainMergeTreeTask.h +++ b/src/Storages/MergeTree/MergePlainMergeTreeTask.h @@ -39,8 +39,9 @@ public: bool executeStep() override; void onCompleted() override; - StorageID getStorageID() override; - Priority getPriority() override { return priority; } + StorageID getStorageID() const override; + Priority getPriority() const override { return priority; } + String getQueryId() const override { return getStorageID().getShortName() + "::" + merge_mutate_entry->future_part->name; } void setCurrentTransaction(MergeTreeTransactionHolder && txn_holder_, MergeTreeTransactionPtr && txn_) { diff --git a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp index d4f8d1140a2..6eab4337162 100644 --- a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp @@ -136,7 +136,7 @@ bool MergeTreeBackgroundExecutor::trySchedule(ExecutableTaskPtr task) return true; } -void printExceptionWithRespectToAbort(Poco::Logger * log) +void printExceptionWithRespectToAbort(Poco::Logger * log, const String & query_id) { std::exception_ptr ex = std::current_exception(); @@ -155,14 +155,14 @@ void printExceptionWithRespectToAbort(Poco::Logger * log) if (e.code() == ErrorCodes::ABORTED) LOG_DEBUG(log, getExceptionMessageAndPattern(e, /* with_stacktrace */ false)); else - tryLogCurrentException(__PRETTY_FUNCTION__); + tryLogCurrentException(log, "Exception while executing background task {" + query_id + "}"); }); } catch (...) { NOEXCEPT_SCOPE({ ALLOW_ALLOCATIONS_IN_SCOPE; - tryLogCurrentException(__PRETTY_FUNCTION__); + tryLogCurrentException(log, "Exception while executing background task {" + query_id + "}"); }); } } @@ -239,7 +239,9 @@ void MergeTreeBackgroundExecutor::routine(TaskRuntimeDataPtr item) has_tasks.notify_one(); }; - auto release_task = [this, &erase_from_active, &on_task_done](TaskRuntimeDataPtr && item_) + String query_id; + + auto release_task = [this, &erase_from_active, &on_task_done, &query_id](TaskRuntimeDataPtr && item_) { std::lock_guard guard(mutex); @@ -256,7 +258,7 @@ void MergeTreeBackgroundExecutor::routine(TaskRuntimeDataPtr item) } catch (...) { - printExceptionWithRespectToAbort(log); + printExceptionWithRespectToAbort(log, query_id); } on_task_done(std::move(item_)); @@ -267,11 +269,12 @@ void MergeTreeBackgroundExecutor::routine(TaskRuntimeDataPtr item) try { ALLOW_ALLOCATIONS_IN_SCOPE; + item->task->getQueryId(); need_execute_again = item->task->executeStep(); } catch (...) { - printExceptionWithRespectToAbort(log); + printExceptionWithRespectToAbort(log, query_id); /// Release the task with exception context. /// An exception context is needed to proper delete write buffers without finalization release_task(std::move(item)); @@ -298,7 +301,7 @@ void MergeTreeBackgroundExecutor::routine(TaskRuntimeDataPtr item) } catch (...) { - printExceptionWithRespectToAbort(log); + printExceptionWithRespectToAbort(log, query_id); on_task_done(std::move(item)); return; } diff --git a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp index ba55fb400ca..6cb9d50436e 100644 --- a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp @@ -191,7 +191,7 @@ ReplicatedMergeMutateTaskBase::PrepareResult MutateFromLogEntryTask::prepare() task_context = Context::createCopy(storage.getContext()); task_context->makeQueryContext(); - task_context->setCurrentQueryId(""); + task_context->setCurrentQueryId(getQueryId()); merge_mutate_entry = storage.getContext()->getMergeList().insert( storage.getStorageID(), diff --git a/src/Storages/MergeTree/MutateFromLogEntryTask.h b/src/Storages/MergeTree/MutateFromLogEntryTask.h index b6d3f5d4b6b..42d8307e948 100644 --- a/src/Storages/MergeTree/MutateFromLogEntryTask.h +++ b/src/Storages/MergeTree/MutateFromLogEntryTask.h @@ -31,7 +31,7 @@ public: {} - Priority getPriority() override { return priority; } + Priority getPriority() const override { return priority; } private: diff --git a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp index 3180431d31b..bf8e879e3d0 100644 --- a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp +++ b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp @@ -13,7 +13,7 @@ namespace ErrorCodes } -StorageID MutatePlainMergeTreeTask::getStorageID() +StorageID MutatePlainMergeTreeTask::getStorageID() const { return storage.getStorageID(); } @@ -137,7 +137,7 @@ ContextMutablePtr MutatePlainMergeTreeTask::createTaskContext() const { auto context = Context::createCopy(storage.getContext()); context->makeQueryContext(); - auto queryId = storage.getStorageID().getShortName() + "::" + future_part->name; + auto queryId = getQueryId(); context->setCurrentQueryId(queryId); return context; } diff --git a/src/Storages/MergeTree/MutatePlainMergeTreeTask.h b/src/Storages/MergeTree/MutatePlainMergeTreeTask.h index bd03c276256..ef11780a873 100644 --- a/src/Storages/MergeTree/MutatePlainMergeTreeTask.h +++ b/src/Storages/MergeTree/MutatePlainMergeTreeTask.h @@ -41,8 +41,9 @@ public: bool executeStep() override; void onCompleted() override; - StorageID getStorageID() override; - Priority getPriority() override { return priority; } + StorageID getStorageID() const override; + Priority getPriority() const override { return priority; } + String getQueryId() const override { return getStorageID().getShortName() + "::" + merge_mutate_entry->future_part->name; } private: diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index f4a071b8f27..be512884756 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -884,8 +884,9 @@ public: } void onCompleted() override { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); } - StorageID getStorageID() override { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); } - Priority getPriority() override { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); } + StorageID getStorageID() const override { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); } + Priority getPriority() const override { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); } + String getQueryId() const override { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); } bool executeStep() override { @@ -1206,8 +1207,9 @@ public: explicit MutateAllPartColumnsTask(MutationContextPtr ctx_) : ctx(ctx_) {} void onCompleted() override { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); } - StorageID getStorageID() override { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); } - Priority getPriority() override { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); } + StorageID getStorageID() const override { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); } + Priority getPriority() const override { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); } + String getQueryId() const override { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); } bool executeStep() override { @@ -1434,8 +1436,9 @@ public: explicit MutateSomePartColumnsTask(MutationContextPtr ctx_) : ctx(ctx_) {} void onCompleted() override { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); } - StorageID getStorageID() override { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); } - Priority getPriority() override { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); } + StorageID getStorageID() const override { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); } + Priority getPriority() const override { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); } + String getQueryId() const override { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); } bool executeStep() override { diff --git a/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.cpp b/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.cpp index 61356558e16..b4748ee77ea 100644 --- a/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.cpp @@ -16,7 +16,7 @@ namespace ErrorCodes extern const int PART_IS_TEMPORARILY_LOCKED; } -StorageID ReplicatedMergeMutateTaskBase::getStorageID() +StorageID ReplicatedMergeMutateTaskBase::getStorageID() const { return storage.getStorageID(); } diff --git a/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.h b/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.h index 1e7f9834245..ba514f11f20 100644 --- a/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.h +++ b/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.h @@ -33,7 +33,8 @@ public: ~ReplicatedMergeMutateTaskBase() override = default; void onCompleted() override; - StorageID getStorageID() override; + StorageID getStorageID() const override; + String getQueryId() const override { return getStorageID().getShortName() + "::" + selected_entry->log_entry->new_part_name; } bool executeStep() override; protected: diff --git a/src/Storages/MergeTree/tests/gtest_executor.cpp b/src/Storages/MergeTree/tests/gtest_executor.cpp index 5815b74284a..6f34eb4dfbd 100644 --- a/src/Storages/MergeTree/tests/gtest_executor.cpp +++ b/src/Storages/MergeTree/tests/gtest_executor.cpp @@ -39,7 +39,7 @@ public: return false; } - StorageID getStorageID() override + StorageID getStorageID() const override { return {"test", name}; } @@ -51,7 +51,8 @@ public: throw std::runtime_error("Unlucky..."); } - Priority getPriority() override { return {}; } + Priority getPriority() const override { return {}; } + String getQueryId() const override { return {}; } private: std::mt19937 generator; @@ -79,14 +80,15 @@ public: return --step_count; } - StorageID getStorageID() override + StorageID getStorageID() const override { return {"test", name}; } void onCompleted() override {} - Priority getPriority() override { return priority; } + Priority getPriority() const override { return priority; } + String getQueryId() const override { return "test::lambda"; } private: String name; diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 4c0c0c8e3fa..add1d112c1a 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -1269,7 +1269,7 @@ bool StorageMergeTree::scheduleDataProcessingJob(BackgroundJobsAssignee & assign { /// TODO Transactions: avoid beginning transaction if there is nothing to merge. txn = TransactionLog::instance().beginTransaction(); - transaction_for_merge = MergeTreeTransactionHolder{txn, /* autocommit = */ true}; + transaction_for_merge = MergeTreeTransactionHolder{txn, /* autocommit = */ false}; } bool has_mutations = false; From 44ae3a0986c941f234a7cb63468e77b626d10713 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Sat, 8 Jul 2023 14:58:38 +0200 Subject: [PATCH 224/230] fix a bug in projections --- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 13 ++++++++++++- src/Storages/MergeTree/IMergeTreeDataPart.h | 9 ++++++++- src/Storages/MergeTree/MergeTreeData.cpp | 14 +++++++++++++- src/Storages/MergeTree/MutateTask.cpp | 2 +- src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp | 4 ++-- src/Storages/StorageMergeTree.cpp | 2 +- src/Storages/StorageReplicatedMergeTree.cpp | 2 +- 7 files changed, 38 insertions(+), 8 deletions(-) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index b9591864869..9309f0d4df6 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -312,15 +312,20 @@ IMergeTreeDataPart::IMergeTreeDataPart( const IMergeTreeDataPart * parent_part_) : DataPartStorageHolder(data_part_storage_) , storage(storage_) - , name(name_) + , mutable_name(name_) + , name(mutable_name) , info(info_) , index_granularity_info(storage_, part_type_) , part_type(part_type_) , parent_part(parent_part_) + , parent_part_name(parent_part ? parent_part->name : "") , use_metadata_cache(storage.use_metadata_cache) { if (parent_part) + { + chassert(parent_part_name.starts_with(parent_part->info.partition_id)); /// Make sure there's no prefix state = MergeTreeDataPartState::Active; + } incrementStateMetric(state); incrementTypeMetric(part_type); @@ -337,6 +342,12 @@ IMergeTreeDataPart::~IMergeTreeDataPart() decrementTypeMetric(part_type); } +void IMergeTreeDataPart::setName(const String & new_name) +{ + mutable_name = new_name; + for (auto & proj_part : projection_parts) + proj_part.second->parent_part_name = new_name; +} String IMergeTreeDataPart::getNewName(const MergeTreePartInfo & new_part_info) const { diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index 92dbe084081..2c0cf37b3a5 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -200,9 +200,14 @@ public: /// If token is not empty, block id is calculated based on it instead of block data String getZeroLevelPartBlockID(std::string_view token) const; + void setName(const String & new_name); + const MergeTreeData & storage; - String name; +private: + String mutable_name; +public: + const String & name; // const ref to private mutable_name MergeTreePartInfo info; /// Part unique identifier. @@ -386,6 +391,7 @@ public: bool isProjectionPart() const { return parent_part != nullptr; } const IMergeTreeDataPart * getParentPart() const { return parent_part; } + String getParentPartName() const { return parent_part_name; } const std::map> & getProjectionParts() const { return projection_parts; } @@ -519,6 +525,7 @@ protected: /// Not null when it's a projection part. const IMergeTreeDataPart * parent_part; + String parent_part_name; std::map> projection_parts; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index b7fde55880e..f81726863b2 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -7455,7 +7455,19 @@ void MergeTreeData::reportBrokenPart(MergeTreeData::DataPartPtr data_part) const return; if (data_part->isProjectionPart()) - data_part = data_part->getParentPart()->shared_from_this(); + { + String parent_part_name = data_part->getParentPartName(); + auto parent_part = getPartIfExists(parent_part_name, {DataPartState::PreActive, DataPartState::Active, DataPartState::Outdated}); + + if (!parent_part) + { + LOG_WARNING(log, "Did not find parent part {} for potentially broken projection part {}", + parent_part_name, data_part->getDataPartStorage().getFullPath()); + return; + } + + data_part = parent_part; + } if (data_part->getDataPartStorage().isBroken()) { diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index f4a071b8f27..41f767cc4de 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -917,7 +917,7 @@ public: { LOG_DEBUG(log, "Merged a projection part in level {}", current_level); selected_parts[0]->renameTo(projection.name + ".proj", true); - selected_parts[0]->name = projection.name; + selected_parts[0]->setName(projection.name); selected_parts[0]->is_temp = false; ctx->new_data_part->addProjectionPart(name, std::move(selected_parts[0])); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 4128654a632..22e2ab945eb 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -788,7 +788,7 @@ std::pair, bool> ReplicatedMergeTreeSinkImpl:: part->info.level = 0; part->info.mutation = 0; - part->name = part->getNewName(part->info); + part->setName(part->getNewName(part->info)); StorageReplicatedMergeTree::LogEntry log_entry; @@ -914,7 +914,7 @@ std::pair, bool> ReplicatedMergeTreeSinkImpl:: /// Note that it may also appear on filesystem right now in PreActive state due to concurrent inserts of the same data. /// It will be checked when we will try to rename directory. - part->name = existing_part_name; + part->setName(existing_part_name); part->info = MergeTreePartInfo::fromPartName(existing_part_name, storage.format_version); /// Used only for exception messages. block_number = part->info.min_block; diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 4c0c0c8e3fa..d427a857f07 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -2260,7 +2260,7 @@ void StorageMergeTree::fillNewPartName(MutableDataPartPtr & part, DataPartsLock { part->info.min_block = part->info.max_block = increment.get(); part->info.mutation = 0; - part->name = part->getNewName(part->info); + part->setName(part->getNewName(part->info)); } } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 2da18f69baf..8a21da69460 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -9262,7 +9262,7 @@ bool StorageReplicatedMergeTree::createEmptyPartInsteadOfLost(zkutil::ZooKeeperP } MergeTreeData::MutableDataPartPtr new_data_part = createEmptyPart(new_part_info, partition, lost_part_name, NO_TRANSACTION_PTR); - new_data_part->name = lost_part_name; + new_data_part->setName(lost_part_name); try { From 85531f32cfb5339c45dade1b84c2a20f0a694cfe Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 8 Jul 2023 19:32:44 +0300 Subject: [PATCH 225/230] Update 02804_clusterAllReplicas_insert.sql --- tests/queries/0_stateless/02804_clusterAllReplicas_insert.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/02804_clusterAllReplicas_insert.sql b/tests/queries/0_stateless/02804_clusterAllReplicas_insert.sql index 05bda19eb9e..c39d9e7d78b 100644 --- a/tests/queries/0_stateless/02804_clusterAllReplicas_insert.sql +++ b/tests/queries/0_stateless/02804_clusterAllReplicas_insert.sql @@ -3,3 +3,4 @@ create table data (key Int) engine=Memory(); -- NOTE: internal_replication is false, so INSERT will be done only into one shard insert into function clusterAllReplicas(test_cluster_two_shards, currentDatabase(), data, rand()) values (2); select * from data order by key; +drop table data; From 2a8c7d0ea23e2b7a41d03d32b0fb44513fa309e0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 8 Jul 2023 21:52:19 +0300 Subject: [PATCH 226/230] Update src/Parsers/ParserCreateQuery.cpp Co-authored-by: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com> --- src/Parsers/ParserCreateQuery.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index c4c02ab7417..415d3321eb5 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -304,7 +304,7 @@ bool ParserTablePropertiesDeclarationList::parseImpl(Pos & pos, ASTPtr & node, E for (const auto & elem : list->children) { - if (auto *cd = elem->as()) + if (auto * cd = elem->as()) { if (cd->primary_key_specifier) { From a10aa9ad50db5bd3b95a7ebe4ccce4bf10c8e1f6 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 3 May 2023 10:09:11 +0200 Subject: [PATCH 227/230] Force libunwind usage (removes gcc_eh support) libunwind is reentrant and signal safe, and works faster then then gcc_eh (plus it has some custom patches for problems that have been found during it's usage in ClickHouse). gcc_eh may be missing in the system (if gcc was not installed), and even if it exists clickhouse uses -nodefaultlibs, so some care should be made to make it work. Also this library is tiny and there shouln't be any problem to require it always (there is already tendency to require some contrib libraries, i.e. poco). Signed-off-by: Azat Khuzhin --- CMakeLists.txt | 1 - cmake/darwin/default_libs.cmake | 1 + cmake/target.cmake | 1 - cmake/unwind.cmake | 14 +------------- contrib/jemalloc-cmake/CMakeLists.txt | 17 +++++++---------- contrib/libcxx-cmake/CMakeLists.txt | 4 +--- contrib/libcxxabi-cmake/CMakeLists.txt | 6 ++---- docker/test/fasttest/run.sh | 1 - docs/en/development/build-cross-riscv.md | 2 +- programs/server/Server.cpp | 6 +----- src/Common/QueryProfiler.cpp | 8 ++++---- src/Common/QueryProfiler.h | 4 ++-- src/Common/StackTrace.cpp | 9 +-------- src/Common/config.h.in | 1 - .../System/StorageSystemBuildOptions.cpp.in | 1 - 15 files changed, 21 insertions(+), 55 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 06ee98b5ee1..45c3c422d7a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -87,7 +87,6 @@ if (ENABLE_FUZZING) set (ENABLE_CLICKHOUSE_ODBC_BRIDGE OFF) set (ENABLE_LIBRARIES 0) set (ENABLE_SSL 1) - set (USE_UNWIND ON) set (ENABLE_EMBEDDED_COMPILER 0) set (ENABLE_EXAMPLES 0) set (ENABLE_UTILS 0) diff --git a/cmake/darwin/default_libs.cmake b/cmake/darwin/default_libs.cmake index 812847e6201..42b8473cb75 100644 --- a/cmake/darwin/default_libs.cmake +++ b/cmake/darwin/default_libs.cmake @@ -15,6 +15,7 @@ set(CMAKE_OSX_DEPLOYMENT_TARGET 10.15) set(THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads REQUIRED) +include (cmake/unwind.cmake) include (cmake/cxx.cmake) link_libraries(global-group) diff --git a/cmake/target.cmake b/cmake/target.cmake index 0791da87bf0..ffab08f1103 100644 --- a/cmake/target.cmake +++ b/cmake/target.cmake @@ -40,7 +40,6 @@ if (CMAKE_CROSSCOMPILING) set (OPENSSL_NO_ASM ON CACHE INTERNAL "") set (ENABLE_JEMALLOC ON CACHE INTERNAL "") set (ENABLE_PARQUET OFF CACHE INTERNAL "") - set (USE_UNWIND OFF CACHE INTERNAL "") set (ENABLE_GRPC OFF CACHE INTERNAL "") set (ENABLE_HDFS OFF CACHE INTERNAL "") set (ENABLE_MYSQL OFF CACHE INTERNAL "") diff --git a/cmake/unwind.cmake b/cmake/unwind.cmake index c9f5f30a5d6..84e4f01b752 100644 --- a/cmake/unwind.cmake +++ b/cmake/unwind.cmake @@ -1,13 +1 @@ -option (USE_UNWIND "Enable libunwind (better stacktraces)" ${ENABLE_LIBRARIES}) - -if (USE_UNWIND) - add_subdirectory(contrib/libunwind-cmake) - set (UNWIND_LIBRARIES unwind) - set (EXCEPTION_HANDLING_LIBRARY ${UNWIND_LIBRARIES}) - - message (STATUS "Using libunwind: ${UNWIND_LIBRARIES}") -else () - set (EXCEPTION_HANDLING_LIBRARY gcc_eh) -endif () - -message (STATUS "Using exception handler: ${EXCEPTION_HANDLING_LIBRARY}") +add_subdirectory(contrib/libunwind-cmake) diff --git a/contrib/jemalloc-cmake/CMakeLists.txt b/contrib/jemalloc-cmake/CMakeLists.txt index 97f723bb540..20025dfc63e 100644 --- a/contrib/jemalloc-cmake/CMakeLists.txt +++ b/contrib/jemalloc-cmake/CMakeLists.txt @@ -170,16 +170,13 @@ endif () target_compile_definitions(_jemalloc PRIVATE -DJEMALLOC_PROF=1) -if (USE_UNWIND) - # jemalloc provides support for two different libunwind flavors: the original HP libunwind and the one coming with gcc / g++ / libstdc++. - # The latter is identified by `JEMALLOC_PROF_LIBGCC` and uses `_Unwind_Backtrace` method instead of `unw_backtrace`. - # At the time ClickHouse uses LLVM libunwind which follows libgcc's way of backtracing. - - # ClickHouse has to provide `unw_backtrace` method by the means of [commit 8e2b31e](https://github.com/ClickHouse/libunwind/commit/8e2b31e766dd502f6df74909e04a7dbdf5182eb1). - - target_compile_definitions (_jemalloc PRIVATE -DJEMALLOC_PROF_LIBGCC=1) - target_link_libraries (_jemalloc PRIVATE unwind) -endif () +# jemalloc provides support for two different libunwind flavors: the original HP libunwind and the one coming with gcc / g++ / libstdc++. +# The latter is identified by `JEMALLOC_PROF_LIBGCC` and uses `_Unwind_Backtrace` method instead of `unw_backtrace`. +# At the time ClickHouse uses LLVM libunwind which follows libgcc's way of backtracing. +# +# ClickHouse has to provide `unw_backtrace` method by the means of [commit 8e2b31e](https://github.com/ClickHouse/libunwind/commit/8e2b31e766dd502f6df74909e04a7dbdf5182eb1). +target_compile_definitions (_jemalloc PRIVATE -DJEMALLOC_PROF_LIBGCC=1) +target_link_libraries (_jemalloc PRIVATE unwind) # for RTLD_NEXT target_compile_options(_jemalloc PRIVATE -D_GNU_SOURCE) diff --git a/contrib/libcxx-cmake/CMakeLists.txt b/contrib/libcxx-cmake/CMakeLists.txt index a13e4f0f60a..b7e59e2c9a3 100644 --- a/contrib/libcxx-cmake/CMakeLists.txt +++ b/contrib/libcxx-cmake/CMakeLists.txt @@ -61,9 +61,7 @@ target_include_directories(cxx SYSTEM BEFORE PUBLIC $<$:$ target_compile_definitions(cxx PRIVATE -D_LIBCPP_BUILDING_LIBRARY -DLIBCXX_BUILDING_LIBCXXABI) # Enable capturing stack traces for all exceptions. -if (USE_UNWIND) - target_compile_definitions(cxx PUBLIC -DSTD_EXCEPTION_HAS_STACK_TRACE=1) -endif () +target_compile_definitions(cxx PUBLIC -DSTD_EXCEPTION_HAS_STACK_TRACE=1) if (USE_MUSL) target_compile_definitions(cxx PUBLIC -D_LIBCPP_HAS_MUSL_LIBC=1) diff --git a/contrib/libcxxabi-cmake/CMakeLists.txt b/contrib/libcxxabi-cmake/CMakeLists.txt index 0473527912e..c7ee34e6e28 100644 --- a/contrib/libcxxabi-cmake/CMakeLists.txt +++ b/contrib/libcxxabi-cmake/CMakeLists.txt @@ -35,12 +35,10 @@ target_include_directories(cxxabi SYSTEM BEFORE ) target_compile_definitions(cxxabi PRIVATE -D_LIBCPP_BUILDING_LIBRARY) target_compile_options(cxxabi PRIVATE -nostdinc++ -fno-sanitize=undefined -Wno-macro-redefined) # If we don't disable UBSan, infinite recursion happens in dynamic_cast. -target_link_libraries(cxxabi PUBLIC ${EXCEPTION_HANDLING_LIBRARY}) +target_link_libraries(cxxabi PUBLIC unwind) # Enable capturing stack traces for all exceptions. -if (USE_UNWIND) - target_compile_definitions(cxxabi PUBLIC -DSTD_EXCEPTION_HAS_STACK_TRACE=1) -endif () +target_compile_definitions(cxxabi PUBLIC -DSTD_EXCEPTION_HAS_STACK_TRACE=1) install( TARGETS cxxabi diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index 989ed9d2fbb..828c73e6781 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -166,7 +166,6 @@ function run_cmake "-DENABLE_UTILS=0" "-DENABLE_EMBEDDED_COMPILER=0" "-DENABLE_THINLTO=0" - "-DUSE_UNWIND=1" "-DENABLE_NURAFT=1" "-DENABLE_SIMDJSON=1" "-DENABLE_JEMALLOC=1" diff --git a/docs/en/development/build-cross-riscv.md b/docs/en/development/build-cross-riscv.md index e3550a046c7..c21353f7f73 100644 --- a/docs/en/development/build-cross-riscv.md +++ b/docs/en/development/build-cross-riscv.md @@ -23,7 +23,7 @@ sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" ``` bash cd ClickHouse mkdir build-riscv64 -CC=clang-16 CXX=clang++-16 cmake . -Bbuild-riscv64 -G Ninja -DCMAKE_TOOLCHAIN_FILE=cmake/linux/toolchain-riscv64.cmake -DGLIBC_COMPATIBILITY=OFF -DENABLE_LDAP=OFF -DOPENSSL_NO_ASM=ON -DENABLE_JEMALLOC=ON -DENABLE_PARQUET=OFF -DUSE_UNWIND=OFF -DENABLE_GRPC=OFF -DENABLE_HDFS=OFF -DENABLE_MYSQL=OFF +CC=clang-16 CXX=clang++-16 cmake . -Bbuild-riscv64 -G Ninja -DCMAKE_TOOLCHAIN_FILE=cmake/linux/toolchain-riscv64.cmake -DGLIBC_COMPATIBILITY=OFF -DENABLE_LDAP=OFF -DOPENSSL_NO_ASM=ON -DENABLE_JEMALLOC=ON -DENABLE_PARQUET=OFF -DENABLE_GRPC=OFF -DENABLE_HDFS=OFF -DENABLE_MYSQL=OFF ninja -C build-riscv64 ``` diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 7fbbcd39446..071f7d3177e 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1627,7 +1627,7 @@ try /// Init trace collector only after trace_log system table was created /// Disable it if we collect test coverage information, because it will work extremely slow. -#if USE_UNWIND && !WITH_COVERAGE +#if !WITH_COVERAGE /// Profilers cannot work reliably with any other libunwind or without PHDR cache. if (hasPHDRCache()) { @@ -1650,10 +1650,6 @@ try /// Describe multiple reasons when query profiler cannot work. -#if !USE_UNWIND - LOG_INFO(log, "Query Profiler and TraceCollector are disabled because they cannot work without bundled unwind (stack unwinding) library."); -#endif - #if WITH_COVERAGE LOG_INFO(log, "Query Profiler and TraceCollector are disabled because they work extremely slow with test coverage."); #endif diff --git a/src/Common/QueryProfiler.cpp b/src/Common/QueryProfiler.cpp index 313d4b77739..dc9f3610513 100644 --- a/src/Common/QueryProfiler.cpp +++ b/src/Common/QueryProfiler.cpp @@ -91,7 +91,7 @@ namespace ErrorCodes extern const int NOT_IMPLEMENTED; } -#if USE_UNWIND +#ifndef __APPLE__ Timer::Timer() : log(&Poco::Logger::get("Timer")) {} @@ -209,13 +209,13 @@ QueryProfilerBase::QueryProfilerBase(UInt64 thread_id, int clock_t UNUSED(pause_signal); throw Exception(ErrorCodes::NOT_IMPLEMENTED, "QueryProfiler disabled because they cannot work under sanitizers"); -#elif !USE_UNWIND +#elif defined(__APPLE__) UNUSED(thread_id); UNUSED(clock_type); UNUSED(period); UNUSED(pause_signal); - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "QueryProfiler cannot work with stock libunwind"); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "QueryProfiler cannot work on OSX"); #else /// Sanity check. if (!hasPHDRCache()) @@ -264,7 +264,7 @@ QueryProfilerBase::~QueryProfilerBase() template void QueryProfilerBase::cleanup() { -#if USE_UNWIND +#ifndef __APPLE__ timer.stop(); signal_handler_disarmed = true; #endif diff --git a/src/Common/QueryProfiler.h b/src/Common/QueryProfiler.h index 6a9ed10e315..87432a4b699 100644 --- a/src/Common/QueryProfiler.h +++ b/src/Common/QueryProfiler.h @@ -28,7 +28,7 @@ namespace DB * Note that signal handler implementation is defined by template parameter. See QueryProfilerReal and QueryProfilerCPU. */ -#if USE_UNWIND +#ifndef __APPLE__ class Timer { public: @@ -60,7 +60,7 @@ private: Poco::Logger * log; -#if USE_UNWIND +#ifndef __APPLE__ inline static thread_local Timer timer = Timer(); #endif diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp index aea0f854fe1..c13b63854e4 100644 --- a/src/Common/StackTrace.cpp +++ b/src/Common/StackTrace.cpp @@ -20,13 +20,10 @@ #include #include #include +#include #include "config.h" -#if USE_UNWIND -# include -#endif - namespace { /// Currently this variable is set up once on server startup. @@ -287,12 +284,8 @@ StackTrace::StackTrace(const ucontext_t & signal_context) void StackTrace::tryCapture() { -#if USE_UNWIND size = unw_backtrace(frame_pointers.data(), capacity); __msan_unpoison(frame_pointers.data(), size * sizeof(frame_pointers[0])); -#else - size = 0; -#endif } /// ClickHouse uses bundled libc++ so type names will be the same on every system thus it's safe to hardcode them diff --git a/src/Common/config.h.in b/src/Common/config.h.in index 71b4e098c8f..1cb13d3ae3e 100644 --- a/src/Common/config.h.in +++ b/src/Common/config.h.in @@ -9,7 +9,6 @@ #cmakedefine01 USE_AWS_S3 #cmakedefine01 USE_AZURE_BLOB_STORAGE #cmakedefine01 USE_BROTLI -#cmakedefine01 USE_UNWIND #cmakedefine01 USE_CASSANDRA #cmakedefine01 USE_SENTRY #cmakedefine01 USE_GRPC diff --git a/src/Storages/System/StorageSystemBuildOptions.cpp.in b/src/Storages/System/StorageSystemBuildOptions.cpp.in index c2d35c96ce5..4e7a25d7726 100644 --- a/src/Storages/System/StorageSystemBuildOptions.cpp.in +++ b/src/Storages/System/StorageSystemBuildOptions.cpp.in @@ -23,7 +23,6 @@ const char * auto_config_build[] "USE_EMBEDDED_COMPILER", "@USE_EMBEDDED_COMPILER@", "USE_GLIBC_COMPATIBILITY", "@GLIBC_COMPATIBILITY@", "USE_JEMALLOC", "@ENABLE_JEMALLOC@", - "USE_UNWIND", "@USE_UNWIND@", "USE_ICU", "@USE_ICU@", "USE_H3", "@USE_H3@", "USE_MYSQL", "@USE_MYSQL@", From 45d36b736a8d6b207fb9cf88f8f0ba8f2a7e0ce6 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sat, 8 Jul 2023 23:14:02 +0000 Subject: [PATCH 228/230] Update version_date.tsv and changelogs after v23.6.2.18-stable --- docker/keeper/Dockerfile | 2 +- docker/server/Dockerfile.alpine | 2 +- docker/server/Dockerfile.ubuntu | 2 +- docs/changelogs/v23.6.2.18-stable.md | 25 +++++++++++++++++++++++++ utils/list-versions/version_date.tsv | 2 ++ 5 files changed, 30 insertions(+), 3 deletions(-) create mode 100644 docs/changelogs/v23.6.2.18-stable.md diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile index f13fcdc14d6..8a6324aef88 100644 --- a/docker/keeper/Dockerfile +++ b/docker/keeper/Dockerfile @@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \ esac ARG REPOSITORY="https://s3.amazonaws.com/clickhouse-builds/22.4/31c367d3cd3aefd316778601ff6565119fe36682/package_release" -ARG VERSION="23.6.1.1524" +ARG VERSION="23.6.2.18" ARG PACKAGES="clickhouse-keeper" # user/group precreated explicitly with fixed uid/gid on purpose. diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index 5e5be3f6d73..7f453627601 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -33,7 +33,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="23.6.1.1524" +ARG VERSION="23.6.2.18" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # user/group precreated explicitly with fixed uid/gid on purpose. diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index 8693193455f..1fa7b83ae16 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -23,7 +23,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list ARG REPO_CHANNEL="stable" ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" -ARG VERSION="23.6.1.1524" +ARG VERSION="23.6.2.18" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # set non-empty deb_location_url url to create a docker image diff --git a/docs/changelogs/v23.6.2.18-stable.md b/docs/changelogs/v23.6.2.18-stable.md new file mode 100644 index 00000000000..1f872a190ba --- /dev/null +++ b/docs/changelogs/v23.6.2.18-stable.md @@ -0,0 +1,25 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.6.2.18-stable (89f39a7ccfe) FIXME as compared to v23.6.1.1524-stable (d1c7e13d088) + +#### Build/Testing/Packaging Improvement +* Backported in [#51888](https://github.com/ClickHouse/ClickHouse/issues/51888): Update cargo dependencies. [#51721](https://github.com/ClickHouse/ClickHouse/pull/51721) ([Raúl Marín](https://github.com/Algunenano)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix reading from empty column in `parseSipHashKey` [#51804](https://github.com/ClickHouse/ClickHouse/pull/51804) ([Nikita Taranov](https://github.com/nickitat)). +* Allow parametric UDFs [#51964](https://github.com/ClickHouse/ClickHouse/pull/51964) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Remove the usage of Analyzer setting in the client [#51578](https://github.com/ClickHouse/ClickHouse/pull/51578) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix 02116_tuple_element with Analyzer [#51669](https://github.com/ClickHouse/ClickHouse/pull/51669) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix SQLLogic docker images [#51719](https://github.com/ClickHouse/ClickHouse/pull/51719) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix source image for sqllogic [#51728](https://github.com/ClickHouse/ClickHouse/pull/51728) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Pin for docker-ce [#51743](https://github.com/ClickHouse/ClickHouse/pull/51743) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 2a098d8c1da..dd46f6103d0 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,3 +1,4 @@ +v23.6.2.18-stable 2023-07-09 v23.6.1.1524-stable 2023-06-30 v23.5.4.25-stable 2023-06-29 v23.5.3.24-stable 2023-06-17 @@ -55,6 +56,7 @@ v22.9.4.32-stable 2022-10-26 v22.9.3.18-stable 2022-09-30 v22.9.2.7-stable 2022-09-23 v22.9.1.2603-stable 2022-09-22 +v22.8.20.11-lts 2023-07-09 v22.8.19.10-lts 2023-06-17 v22.8.18.31-lts 2023-06-12 v22.8.17.17-lts 2023-04-22 From c968fe808fc1b7693e53bb3d4f9adc03f41c7066 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sat, 8 Jul 2023 23:17:41 +0000 Subject: [PATCH 229/230] Update version_date.tsv and changelogs after v22.8.20.11-lts --- docker/keeper/Dockerfile | 2 +- docker/server/Dockerfile.alpine | 2 +- docker/server/Dockerfile.ubuntu | 2 +- docs/changelogs/v22.8.20.11-lts.md | 20 ++++++++++++++++++++ utils/list-versions/version_date.tsv | 2 ++ 5 files changed, 25 insertions(+), 3 deletions(-) create mode 100644 docs/changelogs/v22.8.20.11-lts.md diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile index f13fcdc14d6..8a6324aef88 100644 --- a/docker/keeper/Dockerfile +++ b/docker/keeper/Dockerfile @@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \ esac ARG REPOSITORY="https://s3.amazonaws.com/clickhouse-builds/22.4/31c367d3cd3aefd316778601ff6565119fe36682/package_release" -ARG VERSION="23.6.1.1524" +ARG VERSION="23.6.2.18" ARG PACKAGES="clickhouse-keeper" # user/group precreated explicitly with fixed uid/gid on purpose. diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index 5e5be3f6d73..7f453627601 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -33,7 +33,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="23.6.1.1524" +ARG VERSION="23.6.2.18" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # user/group precreated explicitly with fixed uid/gid on purpose. diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index 8693193455f..1fa7b83ae16 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -23,7 +23,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list ARG REPO_CHANNEL="stable" ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" -ARG VERSION="23.6.1.1524" +ARG VERSION="23.6.2.18" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # set non-empty deb_location_url url to create a docker image diff --git a/docs/changelogs/v22.8.20.11-lts.md b/docs/changelogs/v22.8.20.11-lts.md new file mode 100644 index 00000000000..bd45ce9319a --- /dev/null +++ b/docs/changelogs/v22.8.20.11-lts.md @@ -0,0 +1,20 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v22.8.20.11-lts (c9ca79e24e8) FIXME as compared to v22.8.19.10-lts (989bc2fe8b0) + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix broken index analysis when binary operator contains a null constant argument [#50177](https://github.com/ClickHouse/ClickHouse/pull/50177) ([Amos Bird](https://github.com/amosbird)). +* Fix incorrect constant folding [#50536](https://github.com/ClickHouse/ClickHouse/pull/50536) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix fuzzer failure in ActionsDAG [#51301](https://github.com/ClickHouse/ClickHouse/pull/51301) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix segfault in MathUnary [#51499](https://github.com/ClickHouse/ClickHouse/pull/51499) ([Ilya Yatsishin](https://github.com/qoega)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Decoupled commits from [#51180](https://github.com/ClickHouse/ClickHouse/issues/51180) for backports [#51561](https://github.com/ClickHouse/ClickHouse/pull/51561) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 2a098d8c1da..dd46f6103d0 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,3 +1,4 @@ +v23.6.2.18-stable 2023-07-09 v23.6.1.1524-stable 2023-06-30 v23.5.4.25-stable 2023-06-29 v23.5.3.24-stable 2023-06-17 @@ -55,6 +56,7 @@ v22.9.4.32-stable 2022-10-26 v22.9.3.18-stable 2022-09-30 v22.9.2.7-stable 2022-09-23 v22.9.1.2603-stable 2022-09-22 +v22.8.20.11-lts 2023-07-09 v22.8.19.10-lts 2023-06-17 v22.8.18.31-lts 2023-06-12 v22.8.17.17-lts 2023-04-22 From 8d9e1d41c5d0dc8220b97c68ebe6a21c10042b2a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 9 Jul 2023 03:39:05 +0200 Subject: [PATCH 230/230] Move a test to the right place --- .../00178_query_datetime64_index.reference | 0 .../{1_stateful => 0_stateless}/00178_query_datetime64_index.sql | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename tests/queries/{1_stateful => 0_stateless}/00178_query_datetime64_index.reference (100%) rename tests/queries/{1_stateful => 0_stateless}/00178_query_datetime64_index.sql (100%) diff --git a/tests/queries/1_stateful/00178_query_datetime64_index.reference b/tests/queries/0_stateless/00178_query_datetime64_index.reference similarity index 100% rename from tests/queries/1_stateful/00178_query_datetime64_index.reference rename to tests/queries/0_stateless/00178_query_datetime64_index.reference diff --git a/tests/queries/1_stateful/00178_query_datetime64_index.sql b/tests/queries/0_stateless/00178_query_datetime64_index.sql similarity index 100% rename from tests/queries/1_stateful/00178_query_datetime64_index.sql rename to tests/queries/0_stateless/00178_query_datetime64_index.sql