From b2e2b4cce7f913957317b5df3eddf930ac8407f8 Mon Sep 17 00:00:00 2001 From: liyang830 Date: Mon, 18 Apr 2022 19:58:57 +0800 Subject: [PATCH 001/522] fix attach table dictionaries function name normalizer --- src/Databases/DatabaseOrdinary.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp index b5557d9a08d..baf93182a57 100644 --- a/src/Databases/DatabaseOrdinary.cpp +++ b/src/Databases/DatabaseOrdinary.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -106,6 +107,7 @@ void DatabaseOrdinary::loadStoredObjects( const auto & name = name_with_path_and_query.first; const auto & path = name_with_path_and_query.second.path; const auto & ast = name_with_path_and_query.second.ast; + FunctionNameNormalizer().visit(ast.get()); const auto & create_query = ast->as(); if (create_query.is_dictionary) @@ -128,6 +130,7 @@ void DatabaseOrdinary::loadStoredObjects( const auto & name = name_with_path_and_query.first; const auto & path = name_with_path_and_query.second.path; const auto & ast = name_with_path_and_query.second.ast; + FunctionNameNormalizer().visit(ast.get()); const auto & create_query = ast->as(); if (!create_query.is_dictionary) @@ -167,6 +170,7 @@ void DatabaseOrdinary::loadTablesMetadata(ContextPtr local_context, ParsedTables auto ast = parseQueryFromMetadata(log, getContext(), full_path.string(), /*throw_on_error*/ true, /*remove_empty*/ false); if (ast) { + FunctionNameNormalizer().visit(ast.get()); auto * create_query = ast->as(); create_query->setDatabase(database_name); @@ -220,6 +224,7 @@ void DatabaseOrdinary::loadTablesMetadata(ContextPtr local_context, ParsedTables void DatabaseOrdinary::loadTableFromMetadata(ContextMutablePtr local_context, const String & file_path, const QualifiedTableName & name, const ASTPtr & ast, bool force_restore) { assert(name.database == database_name); + FunctionNameNormalizer().visit(ast.get()); const auto & create_query = ast->as(); tryAttachTable( From f091c8d1d8ff0577e60bf1aed0d3f97d30cdb35f Mon Sep 17 00:00:00 2001 From: liyang830 Date: Fri, 17 Jun 2022 16:42:05 +0800 Subject: [PATCH 002/522] fix: attach table normalizer, add test --- src/Databases/DatabaseOrdinary.cpp | 3 -- src/Interpreters/InterpreterCreateQuery.cpp | 1 + .../test_attach_table_normalizer/__init__.py | 0 .../configs/config.xml | 4 ++ .../test_attach_table_normalizer/test.py | 43 +++++++++++++++++++ 5 files changed, 48 insertions(+), 3 deletions(-) create mode 100644 tests/integration/test_attach_table_normalizer/__init__.py create mode 100644 tests/integration/test_attach_table_normalizer/configs/config.xml create mode 100644 tests/integration/test_attach_table_normalizer/test.py diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp index baf93182a57..5708ff50323 100644 --- a/src/Databases/DatabaseOrdinary.cpp +++ b/src/Databases/DatabaseOrdinary.cpp @@ -107,7 +107,6 @@ void DatabaseOrdinary::loadStoredObjects( const auto & name = name_with_path_and_query.first; const auto & path = name_with_path_and_query.second.path; const auto & ast = name_with_path_and_query.second.ast; - FunctionNameNormalizer().visit(ast.get()); const auto & create_query = ast->as(); if (create_query.is_dictionary) @@ -170,7 +169,6 @@ void DatabaseOrdinary::loadTablesMetadata(ContextPtr local_context, ParsedTables auto ast = parseQueryFromMetadata(log, getContext(), full_path.string(), /*throw_on_error*/ true, /*remove_empty*/ false); if (ast) { - FunctionNameNormalizer().visit(ast.get()); auto * create_query = ast->as(); create_query->setDatabase(database_name); @@ -224,7 +222,6 @@ void DatabaseOrdinary::loadTablesMetadata(ContextPtr local_context, ParsedTables void DatabaseOrdinary::loadTableFromMetadata(ContextMutablePtr local_context, const String & file_path, const QualifiedTableName & name, const ASTPtr & ast, bool force_restore) { assert(name.database == database_name); - FunctionNameNormalizer().visit(ast.get()); const auto & create_query = ast->as(); tryAttachTable( diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index ed996430996..7eb293b1813 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -953,6 +953,7 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) // Table SQL definition is available even if the table is detached (even permanently) auto query = database->getCreateTableQuery(create.getTable(), getContext()); + FunctionNameNormalizer().visit(query.get()); auto create_query = query->as(); if (!create.is_dictionary && create_query.is_dictionary) diff --git a/tests/integration/test_attach_table_normalizer/__init__.py b/tests/integration/test_attach_table_normalizer/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_attach_table_normalizer/configs/config.xml b/tests/integration/test_attach_table_normalizer/configs/config.xml new file mode 100644 index 00000000000..0500e2ad554 --- /dev/null +++ b/tests/integration/test_attach_table_normalizer/configs/config.xml @@ -0,0 +1,4 @@ + + 1 + 1 + diff --git a/tests/integration/test_attach_table_normalizer/test.py b/tests/integration/test_attach_table_normalizer/test.py new file mode 100644 index 00000000000..3e86d567c5b --- /dev/null +++ b/tests/integration/test_attach_table_normalizer/test.py @@ -0,0 +1,43 @@ +import pytest + +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance('node', main_configs=["configs/config.xml"], with_zookeeper=True) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + +def replace_substring_to_substr(node): + node.exec_in_container(["bash", "-c", "sed -i 's/substring/substr/g' /var/lib/clickhouse/metadata/default/file.sql"], user="root") + +@pytest.mark.parametrize("engine", ['Ordinary', 'Atomic']) +def test_attach_substr(started_cluster, engine): + # Initialize + node.query("CREATE TABLE default.file(`s` String, `n` UInt8) ENGINE = MergeTree PARTITION BY substring(s, 1, 2) ORDER BY n ") + + # Detach table file + node.query("DETACH TABLE file") + + # Replace subtring to substr + replace_substring_to_substr(node) + + # Attach table file + node.query("ATTACH TABLE file") + +@pytest.mark.parametrize("engine", ['Ordinary', 'Atomic']) +def test_attach_substr(started_cluster, engine): + # Initialize + node.query("CREATE TABLE default.file(`s` String, `n` UInt8) ENGINE = MergeTree PARTITION BY substring(s, 1, 2) ORDER BY n ") + + # Replace subtring to substr + replace_substring_to_substr(node) + + # Restart clickhouse + node.restart_clickhouse(kill=True) From c7a85d565cb17c068528bdbf38a74d0ab29a1450 Mon Sep 17 00:00:00 2001 From: liyang830 Date: Fri, 17 Jun 2022 17:51:33 +0800 Subject: [PATCH 003/522] fix: rename restart test --- tests/integration/test_attach_table_normalizer/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_attach_table_normalizer/test.py b/tests/integration/test_attach_table_normalizer/test.py index 3e86d567c5b..5a31801b99c 100644 --- a/tests/integration/test_attach_table_normalizer/test.py +++ b/tests/integration/test_attach_table_normalizer/test.py @@ -32,7 +32,7 @@ def test_attach_substr(started_cluster, engine): node.query("ATTACH TABLE file") @pytest.mark.parametrize("engine", ['Ordinary', 'Atomic']) -def test_attach_substr(started_cluster, engine): +def test_attach_substr_restart(started_cluster, engine): # Initialize node.query("CREATE TABLE default.file(`s` String, `n` UInt8) ENGINE = MergeTree PARTITION BY substring(s, 1, 2) ORDER BY n ") From 701c687e7933f77ad51e91fa8bf1ef6ff2282f8d Mon Sep 17 00:00:00 2001 From: liyang830 Date: Sat, 18 Jun 2022 17:13:50 +0800 Subject: [PATCH 004/522] fix : test error --- src/Databases/DatabaseOrdinary.cpp | 2 +- tests/integration/test_attach_table_normalizer/test.py | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp index 5708ff50323..1477014a869 100644 --- a/src/Databases/DatabaseOrdinary.cpp +++ b/src/Databases/DatabaseOrdinary.cpp @@ -129,7 +129,6 @@ void DatabaseOrdinary::loadStoredObjects( const auto & name = name_with_path_and_query.first; const auto & path = name_with_path_and_query.second.path; const auto & ast = name_with_path_and_query.second.ast; - FunctionNameNormalizer().visit(ast.get()); const auto & create_query = ast->as(); if (!create_query.is_dictionary) @@ -169,6 +168,7 @@ void DatabaseOrdinary::loadTablesMetadata(ContextPtr local_context, ParsedTables auto ast = parseQueryFromMetadata(log, getContext(), full_path.string(), /*throw_on_error*/ true, /*remove_empty*/ false); if (ast) { + FunctionNameNormalizer().visit(ast.get()); auto * create_query = ast->as(); create_query->setDatabase(database_name); diff --git a/tests/integration/test_attach_table_normalizer/test.py b/tests/integration/test_attach_table_normalizer/test.py index 5a31801b99c..80c4b99dfcc 100644 --- a/tests/integration/test_attach_table_normalizer/test.py +++ b/tests/integration/test_attach_table_normalizer/test.py @@ -20,12 +20,13 @@ def replace_substring_to_substr(node): @pytest.mark.parametrize("engine", ['Ordinary', 'Atomic']) def test_attach_substr(started_cluster, engine): # Initialize + node.query("DROP TABLE IF EXISTS default.file") node.query("CREATE TABLE default.file(`s` String, `n` UInt8) ENGINE = MergeTree PARTITION BY substring(s, 1, 2) ORDER BY n ") # Detach table file node.query("DETACH TABLE file") - # Replace subtring to substr + # Replace substring to substr replace_substring_to_substr(node) # Attach table file @@ -34,9 +35,10 @@ def test_attach_substr(started_cluster, engine): @pytest.mark.parametrize("engine", ['Ordinary', 'Atomic']) def test_attach_substr_restart(started_cluster, engine): # Initialize + node.query("DROP TABLE IF EXISTS default.file") node.query("CREATE TABLE default.file(`s` String, `n` UInt8) ENGINE = MergeTree PARTITION BY substring(s, 1, 2) ORDER BY n ") - # Replace subtring to substr + # Replace substring to substr replace_substring_to_substr(node) # Restart clickhouse From 252e750fd79090dc4fdb8bfb1317d8f8b1f3136c Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 8 Jul 2022 17:57:24 +0200 Subject: [PATCH 005/522] Update test.py --- tests/integration/test_attach_table_normalizer/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_attach_table_normalizer/test.py b/tests/integration/test_attach_table_normalizer/test.py index 80c4b99dfcc..f2d99588b94 100644 --- a/tests/integration/test_attach_table_normalizer/test.py +++ b/tests/integration/test_attach_table_normalizer/test.py @@ -3,7 +3,7 @@ import pytest from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) -node = cluster.add_instance('node', main_configs=["configs/config.xml"], with_zookeeper=True) +node = cluster.add_instance('node', main_configs=["configs/config.xml"], with_zookeeper=True, stay_alive=True) @pytest.fixture(scope="module") From 2de309c34f366967b50aed8e504a6748b7543057 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 14 Nov 2021 00:56:52 +0300 Subject: [PATCH 006/522] Add Linux RISC-V 64 build to CI --- cmake/target.cmake | 11 +++++++++++ docker/packager/packager | 18 ++++++++++++++++++ docs/en/development/build-cross-riscv.md | 2 +- 3 files changed, 30 insertions(+), 1 deletion(-) diff --git a/cmake/target.cmake b/cmake/target.cmake index 0fb5e8a20de..6b78a9253b2 100644 --- a/cmake/target.cmake +++ b/cmake/target.cmake @@ -33,6 +33,17 @@ if (CMAKE_CROSSCOMPILING) elseif (ARCH_PPC64LE) set (ENABLE_GRPC OFF CACHE INTERNAL "") set (ENABLE_SENTRY OFF CACHE INTERNAL "") + elseif (ARCH_RISCV64 OFF CACHE INTERNAL "") + # RISC-V support is preliminary + set (GLIBC_COMPATIBILITY OFF CACHE INTERNAL "") + set (ENABLE_LDAP OFF CACHE INTERNAL "") + set (OPENSSL_NO_ASM ON CACHE INTERNAL "") + set (ENABLE_JEMALLOC ON CACHE INTERNAL "") + set (ENABLE_PARQUET OFF CACHE INTERNAL "") + set (USE_UNWIND OFF CACHE INTERNAL "") + set (ENABLE_GRPC OFF CACHE INTERNAL "") + set (ENABLE_HDFS OFF CACHE INTERNAL "") + set (ENABLE_MYSQL OFF CACHE INTERNAL "") endif () elseif (OS_FREEBSD) # FIXME: broken dependencies diff --git a/docker/packager/packager b/docker/packager/packager index 66eb568d460..98b864edbc6 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -130,6 +130,7 @@ def parse_env_variables( ARM_SUFFIX = "-aarch64" FREEBSD_SUFFIX = "-freebsd" PPC_SUFFIX = "-ppc64le" + RISCV_SUFFIX = "-riscv64" result = [] result.append("OUTPUT_DIR=/output") @@ -140,6 +141,7 @@ def parse_env_variables( is_cross_darwin_arm = compiler.endswith(DARWIN_ARM_SUFFIX) is_cross_arm = compiler.endswith(ARM_SUFFIX) is_cross_ppc = compiler.endswith(PPC_SUFFIX) + is_cross_riscv = compiler.endswith(RISCV_SUFFIX) is_cross_freebsd = compiler.endswith(FREEBSD_SUFFIX) if is_cross_darwin: @@ -186,6 +188,11 @@ def parse_env_variables( cmake_flags.append( "-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-ppc64le.cmake" ) + elif is_cross_riscv: + cc = compiler[: -len(RISCV_SUFFIX)] + cmake_flags.append( + "-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-riscv64.cmake" + ) else: cc = compiler result.append("DEB_ARCH=amd64") @@ -329,6 +336,7 @@ if __name__ == "__main__": ) parser.add_argument("--output-dir", type=dir_name, required=True) parser.add_argument("--build-type", choices=("debug", ""), default="") +<<<<<<< HEAD parser.add_argument( "--compiler", @@ -352,6 +360,16 @@ if __name__ == "__main__": ) parser.add_argument("--shared-libraries", action="store_true") +======= + parser.add_argument("--compiler", choices=("clang-11", "clang-11-darwin", "clang-11-darwin-aarch64", "clang-11-aarch64", + "clang-12", "clang-12-darwin", "clang-12-darwin-aarch64", "clang-12-aarch64", + "clang-13", "clang-13-darwin", "clang-13-darwin-aarch64", "clang-13-aarch64", + "clang-13-ppc64le", "clang-13-riscv64", + "clang-11-freebsd", "clang-12-freebsd", "clang-13-freebsd", "gcc-11"), default="clang-13") + parser.add_argument("--sanitizer", choices=("address", "thread", "memory", "undefined", ""), default="") + parser.add_argument("--unbundled", action="store_true") + parser.add_argument("--split-binary", action="store_true") +>>>>>>> 8bc7bf3d87 (Add Linux RISC-V 64 build to CI) parser.add_argument("--clang-tidy", action="store_true") parser.add_argument("--cache", choices=("ccache", "distcc", ""), default="") parser.add_argument( diff --git a/docs/en/development/build-cross-riscv.md b/docs/en/development/build-cross-riscv.md index a0b31ff131a..b94b1072f28 100644 --- a/docs/en/development/build-cross-riscv.md +++ b/docs/en/development/build-cross-riscv.md @@ -11,7 +11,7 @@ This is for the case when you have Linux machine and want to use it to build `cl The cross-build for RISC-V 64 is based on the [Build instructions](../development/build.md), follow them first. -## Install Clang-13 +## Install Clang-14 Follow the instructions from https://apt.llvm.org/ for your Ubuntu or Debian setup or do ``` From 1bd3b8825c8bf72b0d32ff6a0287f853eebbdcaf Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 14 Nov 2021 02:48:38 +0300 Subject: [PATCH 007/522] Fix typo --- cmake/target.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/target.cmake b/cmake/target.cmake index 6b78a9253b2..86b060f53e1 100644 --- a/cmake/target.cmake +++ b/cmake/target.cmake @@ -33,7 +33,7 @@ if (CMAKE_CROSSCOMPILING) elseif (ARCH_PPC64LE) set (ENABLE_GRPC OFF CACHE INTERNAL "") set (ENABLE_SENTRY OFF CACHE INTERNAL "") - elseif (ARCH_RISCV64 OFF CACHE INTERNAL "") + elseif (ARCH_RISCV64) # RISC-V support is preliminary set (GLIBC_COMPATIBILITY OFF CACHE INTERNAL "") set (ENABLE_LDAP OFF CACHE INTERNAL "") From 1021b756ac33806bec7525bac9a1b45a76d9c507 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 14 Aug 2022 06:12:38 +0200 Subject: [PATCH 008/522] Fix conflict --- docker/packager/packager | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/docker/packager/packager b/docker/packager/packager index 98b864edbc6..3769e321ccc 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -336,7 +336,6 @@ if __name__ == "__main__": ) parser.add_argument("--output-dir", type=dir_name, required=True) parser.add_argument("--build-type", choices=("debug", ""), default="") -<<<<<<< HEAD parser.add_argument( "--compiler", @@ -347,6 +346,7 @@ if __name__ == "__main__": "clang-14-darwin-aarch64", "clang-14-aarch64", "clang-14-ppc64le", + "clang-14-riscv64", "clang-14-freebsd", "gcc-11", ), @@ -360,16 +360,6 @@ if __name__ == "__main__": ) parser.add_argument("--shared-libraries", action="store_true") -======= - parser.add_argument("--compiler", choices=("clang-11", "clang-11-darwin", "clang-11-darwin-aarch64", "clang-11-aarch64", - "clang-12", "clang-12-darwin", "clang-12-darwin-aarch64", "clang-12-aarch64", - "clang-13", "clang-13-darwin", "clang-13-darwin-aarch64", "clang-13-aarch64", - "clang-13-ppc64le", "clang-13-riscv64", - "clang-11-freebsd", "clang-12-freebsd", "clang-13-freebsd", "gcc-11"), default="clang-13") - parser.add_argument("--sanitizer", choices=("address", "thread", "memory", "undefined", ""), default="") - parser.add_argument("--unbundled", action="store_true") - parser.add_argument("--split-binary", action="store_true") ->>>>>>> 8bc7bf3d87 (Add Linux RISC-V 64 build to CI) parser.add_argument("--clang-tidy", action="store_true") parser.add_argument("--cache", choices=("ccache", "distcc", ""), default="") parser.add_argument( From edc99648ade4ef39e633da31b97995f6b5d3cd5c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 14 Aug 2022 06:14:43 +0200 Subject: [PATCH 009/522] Add build to CI --- tests/ci/ci_config.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 5b8f3b4227e..8dd4843cb88 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -177,6 +177,17 @@ CI_CONFIG = { "tidy": "disable", "with_coverage": False, }, + "binary_riscv64": { + "compiler": "clang-14-riscv64", + "build_type": "", + "sanitizer": "", + "package_type": "binary", + "static_binary_name": "riscv64", + "bundled": "bundled", + "libraries": "static", + "tidy": "disable", + "with_coverage": False, + }, }, "builds_report_config": { "ClickHouse build check": [ @@ -198,6 +209,7 @@ CI_CONFIG = { "binary_freebsd", "binary_darwin_aarch64", "binary_ppc64le", + "binary_riscv64", ], }, "tests_config": { From ec334a3a0866a773b2bb34f1d08be789831df33d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 14 Aug 2022 09:45:38 +0200 Subject: [PATCH 010/522] Programming in YAML with copy-paste --- .github/workflows/master.yml | 49 +++++++++++++++++++++++++++++- .github/workflows/pull_request.yml | 47 +++++++++++++++++++++++++++- 2 files changed, 94 insertions(+), 2 deletions(-) diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 2acc1468328..69a28350945 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -923,6 +923,53 @@ jobs: # shellcheck disable=SC2046 docker rm -f $(docker ps -a -q) ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" + BuilderBinRISCV64: + needs: [DockerHubPush] + runs-on: [self-hosted, builder] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/build_check + IMAGES_PATH=${{runner.temp}}/images_path + REPO_COPY=${{runner.temp}}/build_check/ClickHouse + CACHES_PATH=${{runner.temp}}/../ccaches + BUILD_NAME=binary_riscv64 + EOF + - name: Download changed images + uses: actions/download-artifact@v2 + with: + name: changed_images + path: ${{ env.IMAGES_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + with: + fetch-depth: 0 # otherwise we will have no info about contributors + - name: Build + run: | + git -C "$GITHUB_WORKSPACE" submodule sync --recursive + git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10 + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" + - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} + uses: actions/upload-artifact@v2 + with: + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json + - name: Cleanup + if: always() + run: | + # shellcheck disable=SC2046 + docker kill $(docker ps -q) ||: + # shellcheck disable=SC2046 + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" ############################################################################################ ##################################### Docker images ####################################### ############################################################################################ @@ -1009,8 +1056,8 @@ jobs: - BuilderBinDarwin - BuilderBinDarwinAarch64 - BuilderBinFreeBSD - # - BuilderBinGCC - BuilderBinPPC64 + - BuilderBinRISCV64 - BuilderBinClangTidy - BuilderDebShared runs-on: [self-hosted, style-checker] diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 513df8487c4..f17c25cd164 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -935,6 +935,51 @@ jobs: # shellcheck disable=SC2046 docker rm -f $(docker ps -a -q) ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" + BuilderBinRISCV64: + needs: [DockerHubPush, FastTest, StyleCheck] + runs-on: [self-hosted, builder] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/build_check + IMAGES_PATH=${{runner.temp}}/images_path + REPO_COPY=${{runner.temp}}/build_check/ClickHouse + CACHES_PATH=${{runner.temp}}/../ccaches + BUILD_NAME=binary_riscv64 + EOF + - name: Download changed images + uses: actions/download-artifact@v2 + with: + name: changed_images + path: ${{ env.IMAGES_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Build + run: | + git -C "$GITHUB_WORKSPACE" submodule sync --recursive + git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10 + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" + - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} + uses: actions/upload-artifact@v2 + with: + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json + - name: Cleanup + if: always() + run: | + # shellcheck disable=SC2046 + docker kill $(docker ps -q) ||: + # shellcheck disable=SC2046 + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" ############################################################################################ ##################################### Docker images ####################################### ############################################################################################ @@ -1021,8 +1066,8 @@ jobs: - BuilderBinDarwin - BuilderBinDarwinAarch64 - BuilderBinFreeBSD - # - BuilderBinGCC - BuilderBinPPC64 + - BuilderBinRISCV64 - BuilderBinClangTidy - BuilderDebShared runs-on: [self-hosted, style-checker] From a517d9d65fcd6646944d45d295284edf3c87cf99 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 15 Aug 2022 04:23:56 +0200 Subject: [PATCH 011/522] Maybe make it possible --- docker/packager/binary/Dockerfile | 9 +++++++++ docker/packager/packager | 4 ++-- tests/ci/ci_config.py | 2 +- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index 74919bb2100..67e61f2036b 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -44,6 +44,15 @@ RUN add-apt-repository ppa:ubuntu-toolchain-r/test --yes \ # A cross-linker for RISC-V 64 (we need it, because LLVM's LLD does not work): RUN apt-get install binutils-riscv64-linux-gnu +# Build LLVM from trunk, because only that version can build the RISC-V binaries successfully. +# It can be also used to check it for regular builds. +RUN git clone git@github.com:llvm/llvm-project.git \ + && mkdir llvm-build \ + && cd llvm-build \ + && cmake -GNinja -DCMAKE_BUILD_TYPE:STRING=Release -DLLVM_ENABLE_PROJECTS=all -DLLVM_TARGETS_TO_BUILD=all ../llvm-project/llvm \ + && ninja \ + && ninja install + # Architecture of the image when BuildKit/buildx is used ARG TARGETARCH ARG NFPM_VERSION=2.16.0 diff --git a/docker/packager/packager b/docker/packager/packager index 3769e321ccc..03f3bd80c96 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -189,7 +189,7 @@ def parse_env_variables( "-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-ppc64le.cmake" ) elif is_cross_riscv: - cc = compiler[: -len(RISCV_SUFFIX)] + cc = "clang" cmake_flags.append( "-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-riscv64.cmake" ) @@ -346,7 +346,7 @@ if __name__ == "__main__": "clang-14-darwin-aarch64", "clang-14-aarch64", "clang-14-ppc64le", - "clang-14-riscv64", + "clang-trunk-riscv64", "clang-14-freebsd", "gcc-11", ), diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 8dd4843cb88..f66eba7d966 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -178,7 +178,7 @@ CI_CONFIG = { "with_coverage": False, }, "binary_riscv64": { - "compiler": "clang-14-riscv64", + "compiler": "clang-trunk-riscv64", "build_type": "", "sanitizer": "", "package_type": "binary", From e702adf0c967cafdc03d80b23d83a477c52eab09 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 15 Aug 2022 06:23:00 +0200 Subject: [PATCH 012/522] Fix error --- docker/packager/binary/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index 67e61f2036b..fdb2b324e06 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -46,7 +46,7 @@ RUN apt-get install binutils-riscv64-linux-gnu # Build LLVM from trunk, because only that version can build the RISC-V binaries successfully. # It can be also used to check it for regular builds. -RUN git clone git@github.com:llvm/llvm-project.git \ +RUN git clone https://github.com/llvm/llvm-project.git \ && mkdir llvm-build \ && cd llvm-build \ && cmake -GNinja -DCMAKE_BUILD_TYPE:STRING=Release -DLLVM_ENABLE_PROJECTS=all -DLLVM_TARGETS_TO_BUILD=all ../llvm-project/llvm \ From 7b99581e5c2a9d9e4cde387ae7cd6928cb9918bc Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 17 Aug 2022 07:47:42 +0200 Subject: [PATCH 013/522] Better machines to build Docker images --- .github/workflows/backport_branches.yml | 4 ++-- .github/workflows/docs_check.yml | 4 ++-- .github/workflows/docs_release.yml | 4 ++-- .github/workflows/master.yml | 4 ++-- .github/workflows/nightly.yml | 4 ++-- .github/workflows/pull_request.yml | 4 ++-- .github/workflows/release_branches.yml | 4 ++-- 7 files changed, 14 insertions(+), 14 deletions(-) diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml index e1b2b1fad01..bd399e48100 100644 --- a/.github/workflows/backport_branches.yml +++ b/.github/workflows/backport_branches.yml @@ -22,7 +22,7 @@ jobs: cd "$GITHUB_WORKSPACE/tests/ci" python3 -m unittest discover -s . -p '*_test.py' DockerHubPushAarch64: - runs-on: [self-hosted, style-checker-aarch64] + runs-on: [self-hosted, func-tester-aarch64] steps: - name: Clear repository run: | @@ -39,7 +39,7 @@ jobs: name: changed_images_aarch64 path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: - runs-on: [self-hosted, style-checker] + runs-on: [self-hosted, builder] steps: - name: Clear repository run: | diff --git a/.github/workflows/docs_check.yml b/.github/workflows/docs_check.yml index b50584a2c01..850f690f44d 100644 --- a/.github/workflows/docs_check.yml +++ b/.github/workflows/docs_check.yml @@ -31,7 +31,7 @@ jobs: python3 run_check.py DockerHubPushAarch64: needs: CheckLabels - runs-on: [self-hosted, style-checker-aarch64] + runs-on: [self-hosted, func-tester-aarch64] steps: - name: Clear repository run: | @@ -49,7 +49,7 @@ jobs: path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: needs: CheckLabels - runs-on: [self-hosted, style-checker] + runs-on: [self-hosted, builder] steps: - name: Clear repository run: | diff --git a/.github/workflows/docs_release.yml b/.github/workflows/docs_release.yml index e0fdb0c2f7b..cb2f2b8453d 100644 --- a/.github/workflows/docs_release.yml +++ b/.github/workflows/docs_release.yml @@ -20,7 +20,7 @@ concurrency: workflow_dispatch: jobs: DockerHubPushAarch64: - runs-on: [self-hosted, style-checker-aarch64] + runs-on: [self-hosted, func-tester-aarch64] steps: - name: Clear repository run: | @@ -37,7 +37,7 @@ jobs: name: changed_images_aarch64 path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: - runs-on: [self-hosted, style-checker] + runs-on: [self-hosted, builder] steps: - name: Clear repository run: | diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 69a28350945..552272b38e5 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -22,7 +22,7 @@ jobs: cd "$GITHUB_WORKSPACE/tests/ci" python3 -m unittest discover -s . -p '*_test.py' DockerHubPushAarch64: - runs-on: [self-hosted, style-checker-aarch64] + runs-on: [self-hosted, func-tester-aarch64] steps: - name: Clear repository run: | @@ -39,7 +39,7 @@ jobs: name: changed_images_aarch64 path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: - runs-on: [self-hosted, style-checker] + runs-on: [self-hosted, builder] steps: - name: Clear repository run: | diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index e712ada1551..bff937b832f 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -11,7 +11,7 @@ env: jobs: DockerHubPushAarch64: - runs-on: [self-hosted, style-checker-aarch64] + runs-on: [self-hosted, func-tester-aarch64] steps: - name: Clear repository run: | @@ -28,7 +28,7 @@ jobs: name: changed_images_aarch64 path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: - runs-on: [self-hosted, style-checker] + runs-on: [self-hosted, builder] steps: - name: Clear repository run: | diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index f17c25cd164..c10767c55e6 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -47,7 +47,7 @@ jobs: python3 -m unittest discover -s . -p '*_test.py' DockerHubPushAarch64: needs: CheckLabels - runs-on: [self-hosted, style-checker-aarch64] + runs-on: [self-hosted, func-tester-aarch64] steps: - name: Clear repository run: | @@ -65,7 +65,7 @@ jobs: path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: needs: CheckLabels - runs-on: [self-hosted, style-checker] + runs-on: [self-hosted, builder] steps: - name: Clear repository run: | diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index fdfedc56f5d..1f082f0ab64 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -13,7 +13,7 @@ on: # yamllint disable-line rule:truthy jobs: DockerHubPushAarch64: - runs-on: [self-hosted, style-checker-aarch64] + runs-on: [self-hosted, func-tester-aarch64] steps: - name: Clear repository run: | @@ -30,7 +30,7 @@ jobs: name: changed_images_aarch64 path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: - runs-on: [self-hosted, style-checker] + runs-on: [self-hosted, builder] steps: - name: Clear repository run: | From 2b82916175f1eda4d0456fcbcb7784b5d6ba377a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 17 Aug 2022 09:13:04 +0200 Subject: [PATCH 014/522] Do not put garbage in the Docker image --- docker/packager/binary/Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index fdb2b324e06..cbab3d501d6 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -51,7 +51,8 @@ RUN git clone https://github.com/llvm/llvm-project.git \ && cd llvm-build \ && cmake -GNinja -DCMAKE_BUILD_TYPE:STRING=Release -DLLVM_ENABLE_PROJECTS=all -DLLVM_TARGETS_TO_BUILD=all ../llvm-project/llvm \ && ninja \ - && ninja install + && ninja install \ + && cd .. && rm -rf llvm-build llvm-project # Architecture of the image when BuildKit/buildx is used ARG TARGETARCH From 0eca4d9560ec20290aa35ae9765bf293dbfe01ce Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 20 Aug 2022 18:50:27 +0200 Subject: [PATCH 015/522] Maybe fix error --- contrib/sysroot | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/sysroot b/contrib/sysroot index e9fb375d0a1..38c88421249 160000 --- a/contrib/sysroot +++ b/contrib/sysroot @@ -1 +1 @@ -Subproject commit e9fb375d0a1e5ebfd74c043f088f2342552103f8 +Subproject commit 38c8842124940a26e7e851c083cd61c651a83ee3 From c08766aa3476e7faea38187061993eeb1b76454e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 21 Aug 2022 20:51:59 +0200 Subject: [PATCH 016/522] Revert "Do not put garbage in the Docker image" This reverts commit 2b82916175f1eda4d0456fcbcb7784b5d6ba377a. --- docker/packager/binary/Dockerfile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index cbab3d501d6..fdb2b324e06 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -51,8 +51,7 @@ RUN git clone https://github.com/llvm/llvm-project.git \ && cd llvm-build \ && cmake -GNinja -DCMAKE_BUILD_TYPE:STRING=Release -DLLVM_ENABLE_PROJECTS=all -DLLVM_TARGETS_TO_BUILD=all ../llvm-project/llvm \ && ninja \ - && ninja install \ - && cd .. && rm -rf llvm-build llvm-project + && ninja install # Architecture of the image when BuildKit/buildx is used ARG TARGETARCH From 47ccb28ad387642bfc1549642dd43bfea5c06f4a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 21 Aug 2022 20:52:01 +0200 Subject: [PATCH 017/522] Revert "Better machines to build Docker images" This reverts commit 7b99581e5c2a9d9e4cde387ae7cd6928cb9918bc. --- .github/workflows/backport_branches.yml | 4 ++-- .github/workflows/docs_check.yml | 4 ++-- .github/workflows/docs_release.yml | 4 ++-- .github/workflows/master.yml | 4 ++-- .github/workflows/nightly.yml | 4 ++-- .github/workflows/pull_request.yml | 4 ++-- .github/workflows/release_branches.yml | 4 ++-- 7 files changed, 14 insertions(+), 14 deletions(-) diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml index bd399e48100..e1b2b1fad01 100644 --- a/.github/workflows/backport_branches.yml +++ b/.github/workflows/backport_branches.yml @@ -22,7 +22,7 @@ jobs: cd "$GITHUB_WORKSPACE/tests/ci" python3 -m unittest discover -s . -p '*_test.py' DockerHubPushAarch64: - runs-on: [self-hosted, func-tester-aarch64] + runs-on: [self-hosted, style-checker-aarch64] steps: - name: Clear repository run: | @@ -39,7 +39,7 @@ jobs: name: changed_images_aarch64 path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: - runs-on: [self-hosted, builder] + runs-on: [self-hosted, style-checker] steps: - name: Clear repository run: | diff --git a/.github/workflows/docs_check.yml b/.github/workflows/docs_check.yml index 850f690f44d..b50584a2c01 100644 --- a/.github/workflows/docs_check.yml +++ b/.github/workflows/docs_check.yml @@ -31,7 +31,7 @@ jobs: python3 run_check.py DockerHubPushAarch64: needs: CheckLabels - runs-on: [self-hosted, func-tester-aarch64] + runs-on: [self-hosted, style-checker-aarch64] steps: - name: Clear repository run: | @@ -49,7 +49,7 @@ jobs: path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: needs: CheckLabels - runs-on: [self-hosted, builder] + runs-on: [self-hosted, style-checker] steps: - name: Clear repository run: | diff --git a/.github/workflows/docs_release.yml b/.github/workflows/docs_release.yml index cb2f2b8453d..e0fdb0c2f7b 100644 --- a/.github/workflows/docs_release.yml +++ b/.github/workflows/docs_release.yml @@ -20,7 +20,7 @@ concurrency: workflow_dispatch: jobs: DockerHubPushAarch64: - runs-on: [self-hosted, func-tester-aarch64] + runs-on: [self-hosted, style-checker-aarch64] steps: - name: Clear repository run: | @@ -37,7 +37,7 @@ jobs: name: changed_images_aarch64 path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: - runs-on: [self-hosted, builder] + runs-on: [self-hosted, style-checker] steps: - name: Clear repository run: | diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 552272b38e5..69a28350945 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -22,7 +22,7 @@ jobs: cd "$GITHUB_WORKSPACE/tests/ci" python3 -m unittest discover -s . -p '*_test.py' DockerHubPushAarch64: - runs-on: [self-hosted, func-tester-aarch64] + runs-on: [self-hosted, style-checker-aarch64] steps: - name: Clear repository run: | @@ -39,7 +39,7 @@ jobs: name: changed_images_aarch64 path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: - runs-on: [self-hosted, builder] + runs-on: [self-hosted, style-checker] steps: - name: Clear repository run: | diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index bff937b832f..e712ada1551 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -11,7 +11,7 @@ env: jobs: DockerHubPushAarch64: - runs-on: [self-hosted, func-tester-aarch64] + runs-on: [self-hosted, style-checker-aarch64] steps: - name: Clear repository run: | @@ -28,7 +28,7 @@ jobs: name: changed_images_aarch64 path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: - runs-on: [self-hosted, builder] + runs-on: [self-hosted, style-checker] steps: - name: Clear repository run: | diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index c10767c55e6..f17c25cd164 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -47,7 +47,7 @@ jobs: python3 -m unittest discover -s . -p '*_test.py' DockerHubPushAarch64: needs: CheckLabels - runs-on: [self-hosted, func-tester-aarch64] + runs-on: [self-hosted, style-checker-aarch64] steps: - name: Clear repository run: | @@ -65,7 +65,7 @@ jobs: path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: needs: CheckLabels - runs-on: [self-hosted, builder] + runs-on: [self-hosted, style-checker] steps: - name: Clear repository run: | diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index 1f082f0ab64..fdfedc56f5d 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -13,7 +13,7 @@ on: # yamllint disable-line rule:truthy jobs: DockerHubPushAarch64: - runs-on: [self-hosted, func-tester-aarch64] + runs-on: [self-hosted, style-checker-aarch64] steps: - name: Clear repository run: | @@ -30,7 +30,7 @@ jobs: name: changed_images_aarch64 path: ${{ runner.temp }}/docker_images_check/changed_images_aarch64.json DockerHubPushAmd64: - runs-on: [self-hosted, builder] + runs-on: [self-hosted, style-checker] steps: - name: Clear repository run: | From dd9085346af943c9dca0ab18fe7f4c16fda38ae5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 21 Aug 2022 20:52:03 +0200 Subject: [PATCH 018/522] Revert "Fix error" This reverts commit e702adf0c967cafdc03d80b23d83a477c52eab09. --- docker/packager/binary/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index fdb2b324e06..67e61f2036b 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -46,7 +46,7 @@ RUN apt-get install binutils-riscv64-linux-gnu # Build LLVM from trunk, because only that version can build the RISC-V binaries successfully. # It can be also used to check it for regular builds. -RUN git clone https://github.com/llvm/llvm-project.git \ +RUN git clone git@github.com:llvm/llvm-project.git \ && mkdir llvm-build \ && cd llvm-build \ && cmake -GNinja -DCMAKE_BUILD_TYPE:STRING=Release -DLLVM_ENABLE_PROJECTS=all -DLLVM_TARGETS_TO_BUILD=all ../llvm-project/llvm \ From cb2e5f316c442ac0528980f5204d0afa45d030ac Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 21 Aug 2022 20:52:04 +0200 Subject: [PATCH 019/522] Revert "Maybe make it possible" This reverts commit a517d9d65fcd6646944d45d295284edf3c87cf99. --- docker/packager/binary/Dockerfile | 9 --------- docker/packager/packager | 4 ++-- tests/ci/ci_config.py | 2 +- 3 files changed, 3 insertions(+), 12 deletions(-) diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index 67e61f2036b..74919bb2100 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -44,15 +44,6 @@ RUN add-apt-repository ppa:ubuntu-toolchain-r/test --yes \ # A cross-linker for RISC-V 64 (we need it, because LLVM's LLD does not work): RUN apt-get install binutils-riscv64-linux-gnu -# Build LLVM from trunk, because only that version can build the RISC-V binaries successfully. -# It can be also used to check it for regular builds. -RUN git clone git@github.com:llvm/llvm-project.git \ - && mkdir llvm-build \ - && cd llvm-build \ - && cmake -GNinja -DCMAKE_BUILD_TYPE:STRING=Release -DLLVM_ENABLE_PROJECTS=all -DLLVM_TARGETS_TO_BUILD=all ../llvm-project/llvm \ - && ninja \ - && ninja install - # Architecture of the image when BuildKit/buildx is used ARG TARGETARCH ARG NFPM_VERSION=2.16.0 diff --git a/docker/packager/packager b/docker/packager/packager index 03f3bd80c96..3769e321ccc 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -189,7 +189,7 @@ def parse_env_variables( "-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-ppc64le.cmake" ) elif is_cross_riscv: - cc = "clang" + cc = compiler[: -len(RISCV_SUFFIX)] cmake_flags.append( "-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-riscv64.cmake" ) @@ -346,7 +346,7 @@ if __name__ == "__main__": "clang-14-darwin-aarch64", "clang-14-aarch64", "clang-14-ppc64le", - "clang-trunk-riscv64", + "clang-14-riscv64", "clang-14-freebsd", "gcc-11", ), diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index aa5762d7536..7110ff628ad 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -178,7 +178,7 @@ CI_CONFIG = { "with_coverage": False, }, "binary_riscv64": { - "compiler": "clang-trunk-riscv64", + "compiler": "clang-14-riscv64", "build_type": "", "sanitizer": "", "package_type": "binary", From 935bc723299056c816646fb9067638a60ddfb085 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 14 Feb 2023 18:51:10 +0100 Subject: [PATCH 020/522] [TEST] Tune allocator Processing of the default max_block_size can be faster then running mmap()/munmap() plus memory dependencies. Here is an example: SELECT count() FROM zeros(10_000_000) WHERE NOT ignore(randomString(1000)) SETTINGS function_implementation='avx2' - Before this patch it takes: ~6sec - After: 1.3sec And even though 128MiB should be enough, since for this query size of allocation for string will be 65409*(1000+1)=65474409 bytes, due to rounding to power of two it will not, so let's try simply use 256MiB (another option is to use strict comparison for MMAP_THRESHOLD) and see the perf tests. But also note, that this has other allocator side effects (performance, fragmentation), so unlikely this is for upstream. I've found this while I was playing with PODArray [1]. [1]: https://s3.amazonaws.com/clickhouse-test-reports/45654/2101b66570cbb9eb9a492afa8ab82d562c34336b/performance_comparison_[1/4]/report.html Signed-off-by: Azat Khuzhin --- src/Common/Allocator.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/Allocator.cpp b/src/Common/Allocator.cpp index 5a66ddb63a2..c02210f2ece 100644 --- a/src/Common/Allocator.cpp +++ b/src/Common/Allocator.cpp @@ -8,7 +8,7 @@ * See also: https://gcc.gnu.org/legacy-ml/gcc-help/2017-12/msg00021.html */ #ifdef NDEBUG - __attribute__((__weak__)) extern const size_t MMAP_THRESHOLD = 64 * (1ULL << 20); + __attribute__((__weak__)) extern const size_t MMAP_THRESHOLD = 256 * (1ULL << 20); #else /** * In debug build, use small mmap threshold to reproduce more memory From 5781eb67cba3e827ecf47b7929c47777a6e48094 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 26 Feb 2023 01:28:13 +0300 Subject: [PATCH 021/522] Update test.py --- tests/integration/test_attach_table_normalizer/test.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/integration/test_attach_table_normalizer/test.py b/tests/integration/test_attach_table_normalizer/test.py index f2d99588b94..526da39935a 100644 --- a/tests/integration/test_attach_table_normalizer/test.py +++ b/tests/integration/test_attach_table_normalizer/test.py @@ -17,8 +17,7 @@ def started_cluster(): def replace_substring_to_substr(node): node.exec_in_container(["bash", "-c", "sed -i 's/substring/substr/g' /var/lib/clickhouse/metadata/default/file.sql"], user="root") -@pytest.mark.parametrize("engine", ['Ordinary', 'Atomic']) -def test_attach_substr(started_cluster, engine): +def test_attach_substr(started_cluster): # Initialize node.query("DROP TABLE IF EXISTS default.file") node.query("CREATE TABLE default.file(`s` String, `n` UInt8) ENGINE = MergeTree PARTITION BY substring(s, 1, 2) ORDER BY n ") @@ -32,8 +31,7 @@ def test_attach_substr(started_cluster, engine): # Attach table file node.query("ATTACH TABLE file") -@pytest.mark.parametrize("engine", ['Ordinary', 'Atomic']) -def test_attach_substr_restart(started_cluster, engine): +def test_attach_substr_restart(started_cluster): # Initialize node.query("DROP TABLE IF EXISTS default.file") node.query("CREATE TABLE default.file(`s` String, `n` UInt8) ENGINE = MergeTree PARTITION BY substring(s, 1, 2) ORDER BY n ") From 63982a20936bb384a4c4f88f9e4ed2282680e33b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 26 Feb 2023 01:29:29 +0300 Subject: [PATCH 022/522] Delete config.xml --- .../test_attach_table_normalizer/configs/config.xml | 4 ---- 1 file changed, 4 deletions(-) delete mode 100644 tests/integration/test_attach_table_normalizer/configs/config.xml diff --git a/tests/integration/test_attach_table_normalizer/configs/config.xml b/tests/integration/test_attach_table_normalizer/configs/config.xml deleted file mode 100644 index 0500e2ad554..00000000000 --- a/tests/integration/test_attach_table_normalizer/configs/config.xml +++ /dev/null @@ -1,4 +0,0 @@ - - 1 - 1 - From 0e01991eb7b1331d2fca09c94b3e41fdd5c32bb3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 26 Feb 2023 04:33:56 +0300 Subject: [PATCH 023/522] Update test.py --- .../test_attach_table_normalizer/test.py | 24 +++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/tests/integration/test_attach_table_normalizer/test.py b/tests/integration/test_attach_table_normalizer/test.py index 526da39935a..ddbb02bf4ef 100644 --- a/tests/integration/test_attach_table_normalizer/test.py +++ b/tests/integration/test_attach_table_normalizer/test.py @@ -3,7 +3,9 @@ import pytest from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) -node = cluster.add_instance('node', main_configs=["configs/config.xml"], with_zookeeper=True, stay_alive=True) +node = cluster.add_instance( + 'node', main_configs=["configs/config.xml"], with_zookeeper=True, stay_alive=True +) @pytest.fixture(scope="module") @@ -14,13 +16,24 @@ def started_cluster(): finally: cluster.shutdown() + def replace_substring_to_substr(node): - node.exec_in_container(["bash", "-c", "sed -i 's/substring/substr/g' /var/lib/clickhouse/metadata/default/file.sql"], user="root") + node.exec_in_container(( + [ + "bash", + "-c", + "sed -i 's/substring/substr/g' /var/lib/clickhouse/metadata/default/file.sql", + ], + user="root", + ) + def test_attach_substr(started_cluster): # Initialize node.query("DROP TABLE IF EXISTS default.file") - node.query("CREATE TABLE default.file(`s` String, `n` UInt8) ENGINE = MergeTree PARTITION BY substring(s, 1, 2) ORDER BY n ") + node.query( + "CREATE TABLE default.file(`s` String, `n` UInt8) ENGINE = MergeTree PARTITION BY substring(s, 1, 2) ORDER BY n " + ) # Detach table file node.query("DETACH TABLE file") @@ -31,10 +44,13 @@ def test_attach_substr(started_cluster): # Attach table file node.query("ATTACH TABLE file") + def test_attach_substr_restart(started_cluster): # Initialize node.query("DROP TABLE IF EXISTS default.file") - node.query("CREATE TABLE default.file(`s` String, `n` UInt8) ENGINE = MergeTree PARTITION BY substring(s, 1, 2) ORDER BY n ") + node.query( + "CREATE TABLE default.file(`s` String, `n` UInt8) ENGINE = MergeTree PARTITION BY substring(s, 1, 2) ORDER BY n " + ) # Replace substring to substr replace_substring_to_substr(node) From e997b1393ce12ba639049147afdedb13e338af38 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 19 Apr 2023 22:40:13 +0200 Subject: [PATCH 024/522] Play with MMAP_THRESHOLD (set it to 128MiB) Signed-off-by: Azat Khuzhin --- src/Common/Allocator.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/Allocator.cpp b/src/Common/Allocator.cpp index c02210f2ece..0fb90e5a47e 100644 --- a/src/Common/Allocator.cpp +++ b/src/Common/Allocator.cpp @@ -8,7 +8,7 @@ * See also: https://gcc.gnu.org/legacy-ml/gcc-help/2017-12/msg00021.html */ #ifdef NDEBUG - __attribute__((__weak__)) extern const size_t MMAP_THRESHOLD = 256 * (1ULL << 20); + __attribute__((__weak__)) extern const size_t MMAP_THRESHOLD = 128 * (1ULL << 20); #else /** * In debug build, use small mmap threshold to reproduce more memory From 491c26fb0aa08dd75adf46699225658fd9a45d5d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 29 Apr 2023 18:55:19 +0200 Subject: [PATCH 025/522] Slight improvement in Disks interface --- src/Disks/DiskEncrypted.cpp | 2 +- src/Disks/DiskEncrypted.h | 6 +-- src/Disks/DiskLocal.cpp | 44 +++++++++++++------ src/Disks/DiskLocal.h | 8 ++-- src/Disks/IDisk.h | 8 ++-- src/Disks/IVolume.cpp | 4 +- src/Disks/IVolume.h | 2 +- .../ObjectStorages/DiskObjectStorage.cpp | 17 ++++--- src/Disks/ObjectStorages/DiskObjectStorage.h | 12 +++-- src/Disks/StoragePolicy.cpp | 27 ++++++++++-- src/Disks/VolumeJBOD.cpp | 22 +++++++--- src/Disks/VolumeJBOD.h | 4 +- src/Functions/filesystem.cpp | 6 +-- .../ServerAsynchronousMetrics.cpp | 23 ++++++---- .../MergeTree/MergeTreePartsMover.cpp | 12 +++-- src/Storages/System/StorageSystemDisks.cpp | 6 +-- 16 files changed, 130 insertions(+), 73 deletions(-) diff --git a/src/Disks/DiskEncrypted.cpp b/src/Disks/DiskEncrypted.cpp index db18e9652e7..1f8d75dbeb8 100644 --- a/src/Disks/DiskEncrypted.cpp +++ b/src/Disks/DiskEncrypted.cpp @@ -184,7 +184,7 @@ public: } UInt64 getSize() const override { return reservation->getSize(); } - UInt64 getUnreservedSpace() const override { return reservation->getUnreservedSpace(); } + std::optional getUnreservedSpace() const override { return reservation->getUnreservedSpace(); } DiskPtr getDisk(size_t i) const override { diff --git a/src/Disks/DiskEncrypted.h b/src/Disks/DiskEncrypted.h index 8e824a1f7e5..5d04558792e 100644 --- a/src/Disks/DiskEncrypted.h +++ b/src/Disks/DiskEncrypted.h @@ -256,17 +256,17 @@ public: return std::make_shared(*this); } - UInt64 getTotalSpace() const override + std::optional getTotalSpace() const override { return delegate->getTotalSpace(); } - UInt64 getAvailableSpace() const override + std::optional getAvailableSpace() const override { return delegate->getAvailableSpace(); } - UInt64 getUnreservedSpace() const override + std::optional getUnreservedSpace() const override { return delegate->getUnreservedSpace(); } diff --git a/src/Disks/DiskLocal.cpp b/src/Disks/DiskLocal.cpp index 49f28a19b31..af9d4ffd19c 100644 --- a/src/Disks/DiskLocal.cpp +++ b/src/Disks/DiskLocal.cpp @@ -97,7 +97,8 @@ static void loadDiskLocalConfig(const String & name, tmp_path = context->getPath(); // Create tmp disk for getting total disk space. - keep_free_space_bytes = static_cast(DiskLocal("tmp", tmp_path, 0).getTotalSpace() * ratio); + auto total_space_of_local_disk = DiskLocal("tmp", tmp_path, 0).getTotalSpace(); + keep_free_space_bytes = total_space_of_local_disk ? static_cast(*total_space_of_local_disk * ratio) : 0; } } @@ -128,7 +129,7 @@ public: {} UInt64 getSize() const override { return size; } - UInt64 getUnreservedSpace() const override { return unreserved_space; } + std::optional getUnreservedSpace() const override { return unreserved_space; } DiskPtr getDisk(size_t i) const override { @@ -225,8 +226,11 @@ std::optional DiskLocal::tryReserve(UInt64 bytes) { std::lock_guard lock(DiskLocal::reservation_mutex); - UInt64 available_space = getAvailableSpace(); - UInt64 unreserved_space = available_space - std::min(available_space, reserved_bytes); + auto available_space = getAvailableSpace(); + + UInt64 unreserved_space = available_space + ? *available_space - std::min(*available_space, reserved_bytes) + : std::numeric_limits::max(); if (bytes == 0) { @@ -237,12 +241,24 @@ std::optional DiskLocal::tryReserve(UInt64 bytes) if (unreserved_space >= bytes) { - LOG_TRACE( - logger, - "Reserved {} on local disk {}, having unreserved {}.", - ReadableSize(bytes), - backQuote(name), - ReadableSize(unreserved_space)); + if (available_space) + { + LOG_TRACE( + logger, + "Reserved {} on local disk {}, having unreserved {}.", + ReadableSize(bytes), + backQuote(name), + ReadableSize(unreserved_space)); + } + else + { + LOG_TRACE( + logger, + "Reserved {} on local disk {}.", + ReadableSize(bytes), + backQuote(name)); + } + ++reservation_count; reserved_bytes += bytes; return {unreserved_space - bytes}; @@ -268,14 +284,14 @@ static UInt64 getTotalSpaceByName(const String & name, const String & disk_path, return total_size - keep_free_space_bytes; } -UInt64 DiskLocal::getTotalSpace() const +std::optional DiskLocal::getTotalSpace() const { if (broken || readonly) return 0; return getTotalSpaceByName(name, disk_path, keep_free_space_bytes); } -UInt64 DiskLocal::getAvailableSpace() const +std::optional DiskLocal::getAvailableSpace() const { if (broken || readonly) return 0; @@ -292,10 +308,10 @@ UInt64 DiskLocal::getAvailableSpace() const return total_size - keep_free_space_bytes; } -UInt64 DiskLocal::getUnreservedSpace() const +std::optional DiskLocal::getUnreservedSpace() const { std::lock_guard lock(DiskLocal::reservation_mutex); - auto available_space = getAvailableSpace(); + auto available_space = *getAvailableSpace(); available_space -= std::min(available_space, reserved_bytes); return available_space; } diff --git a/src/Disks/DiskLocal.h b/src/Disks/DiskLocal.h index 7ea2c04704c..6da62332726 100644 --- a/src/Disks/DiskLocal.h +++ b/src/Disks/DiskLocal.h @@ -31,11 +31,9 @@ public: ReservationPtr reserve(UInt64 bytes) override; - UInt64 getTotalSpace() const override; - - UInt64 getAvailableSpace() const override; - - UInt64 getUnreservedSpace() const override; + std::optional getTotalSpace() const override; + std::optional getAvailableSpace() const override; + std::optional getUnreservedSpace() const override; UInt64 getKeepingFreeSpace() const override { return keep_free_space_bytes; } diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h index 68798047cfd..7202d1f5cfc 100644 --- a/src/Disks/IDisk.h +++ b/src/Disks/IDisk.h @@ -127,13 +127,13 @@ public: const String & getName() const override { return name; } /// Total available space on the disk. - virtual UInt64 getTotalSpace() const = 0; + virtual std::optional getTotalSpace() const = 0; /// Space currently available on the disk. - virtual UInt64 getAvailableSpace() const = 0; + virtual std::optional getAvailableSpace() const = 0; /// Space available for reservation (available space minus reserved space). - virtual UInt64 getUnreservedSpace() const = 0; + virtual std::optional getUnreservedSpace() const = 0; /// Amount of bytes which should be kept free on the disk. virtual UInt64 getKeepingFreeSpace() const { return 0; } @@ -463,7 +463,7 @@ public: /// Space available for reservation /// (with this reservation already take into account). - virtual UInt64 getUnreservedSpace() const = 0; + virtual std::optional getUnreservedSpace() const = 0; /// Get i-th disk where reservation take place. virtual DiskPtr getDisk(size_t i = 0) const = 0; /// NOLINT diff --git a/src/Disks/IVolume.cpp b/src/Disks/IVolume.cpp index eb474f12ad2..15b52acb422 100644 --- a/src/Disks/IVolume.cpp +++ b/src/Disks/IVolume.cpp @@ -49,9 +49,9 @@ IVolume::IVolume( throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "Volume must contain at least one disk"); } -UInt64 IVolume::getMaxUnreservedFreeSpace() const +std::optional IVolume::getMaxUnreservedFreeSpace() const { - UInt64 res = 0; + std::optional res = 0; for (const auto & disk : disks) res = std::max(res, disk->getUnreservedSpace()); return res; diff --git a/src/Disks/IVolume.h b/src/Disks/IVolume.h index ada28caa960..f40d4dcba60 100644 --- a/src/Disks/IVolume.h +++ b/src/Disks/IVolume.h @@ -74,7 +74,7 @@ public: virtual VolumeType getType() const = 0; /// Return biggest unreserved space across all disks - UInt64 getMaxUnreservedFreeSpace() const; + std::optional getMaxUnreservedFreeSpace() const; DiskPtr getDisk() const { return getDisk(0); } virtual DiskPtr getDisk(size_t i) const { return disks[i]; } diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp index bf5d0ab829d..2f4e0db070f 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp @@ -469,18 +469,25 @@ void DiskObjectStorage::removeSharedRecursive( transaction->commit(); } -std::optional DiskObjectStorage::tryReserve(UInt64 bytes) +bool DiskObjectStorage::tryReserve(UInt64 bytes) { std::lock_guard lock(reservation_mutex); auto available_space = getAvailableSpace(); - UInt64 unreserved_space = available_space - std::min(available_space, reserved_bytes); + if (!available_space) + { + ++reservation_count; + reserved_bytes += bytes; + return true; + } + + UInt64 unreserved_space = *available_space - std::min(*available_space, reserved_bytes); if (bytes == 0) { LOG_TRACE(log, "Reserved 0 bytes on remote disk {}", backQuote(name)); ++reservation_count; - return {unreserved_space}; + return true; } if (unreserved_space >= bytes) @@ -493,14 +500,14 @@ std::optional DiskObjectStorage::tryReserve(UInt64 bytes) ReadableSize(unreserved_space)); ++reservation_count; reserved_bytes += bytes; - return {unreserved_space - bytes}; + return true; } else { LOG_TRACE(log, "Could not reserve {} on remote disk {}. Not enough unreserved space", ReadableSize(bytes), backQuote(name)); } - return {}; + return false; } bool DiskObjectStorage::supportsCache() const diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.h b/src/Disks/ObjectStorages/DiskObjectStorage.h index 4372bc75950..2c544e01ca9 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.h +++ b/src/Disks/ObjectStorages/DiskObjectStorage.h @@ -53,11 +53,9 @@ public: const std::string & getCacheName() const override { return object_storage->getCacheName(); } - UInt64 getTotalSpace() const override { return std::numeric_limits::max(); } - - UInt64 getAvailableSpace() const override { return std::numeric_limits::max(); } - - UInt64 getUnreservedSpace() const override { return std::numeric_limits::max(); } + std::optional getTotalSpace() const override { return {}; } + std::optional getAvailableSpace() const override { return {}; } + std::optional getUnreservedSpace() const override { return {}; } UInt64 getKeepingFreeSpace() const override { return 0; } @@ -223,7 +221,7 @@ private: UInt64 reservation_count = 0; std::mutex reservation_mutex; - std::optional tryReserve(UInt64 bytes); + bool tryReserve(UInt64 bytes); const bool send_metadata; size_t threadpool_size; @@ -244,7 +242,7 @@ public: UInt64 getSize() const override { return size; } - UInt64 getUnreservedSpace() const override { return unreserved_space; } + std::optional getUnreservedSpace() const override { return unreserved_space; } DiskPtr getDisk(size_t i) const override; diff --git a/src/Disks/StoragePolicy.cpp b/src/Disks/StoragePolicy.cpp index f4be8b8fe86..92cca23ca76 100644 --- a/src/Disks/StoragePolicy.cpp +++ b/src/Disks/StoragePolicy.cpp @@ -211,7 +211,11 @@ UInt64 StoragePolicy::getMaxUnreservedFreeSpace() const { UInt64 res = 0; for (const auto & volume : volumes) - res = std::max(res, volume->getMaxUnreservedFreeSpace()); + { + auto max_unreserved_for_volume = volume->getMaxUnreservedFreeSpace(); + if (max_unreserved_for_volume) + res = std::max(res, *max_unreserved_for_volume); + } return res; } @@ -248,22 +252,37 @@ ReservationPtr StoragePolicy::reserveAndCheck(UInt64 bytes) const ReservationPtr StoragePolicy::makeEmptyReservationOnLargestDisk() const { UInt64 max_space = 0; + bool found_bottomless_disk = false; DiskPtr max_disk; + for (const auto & volume : volumes) { for (const auto & disk : volume->getDisks()) { - auto avail_space = disk->getAvailableSpace(); - if (avail_space > max_space) + auto available_space = disk->getAvailableSpace(); + + if (!available_space) { - max_space = avail_space; + max_disk = disk; + found_bottomless_disk = true; + break; + } + + if (*available_space > max_space) + { + max_space = *available_space; max_disk = disk; } } + + if (found_bottomless_disk) + break; } + if (!max_disk) throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "There is no space on any disk in storage policy: {}. " "It's likely all disks are broken", name); + auto reservation = max_disk->reserve(0); if (!reservation) { diff --git a/src/Disks/VolumeJBOD.cpp b/src/Disks/VolumeJBOD.cpp index 64bd2619665..885b1d56b0d 100644 --- a/src/Disks/VolumeJBOD.cpp +++ b/src/Disks/VolumeJBOD.cpp @@ -40,20 +40,28 @@ VolumeJBOD::VolumeJBOD( auto ratio = config.getDouble(config_prefix + ".max_data_part_size_ratio"); if (ratio < 0) throw Exception(ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG, "'max_data_part_size_ratio' have to be not less then 0."); + UInt64 sum_size = 0; std::vector sizes; for (const auto & disk : disks) { - sizes.push_back(disk->getTotalSpace()); - sum_size += sizes.back(); + auto size = disk->getTotalSpace(); + sizes.push_back(*size); + if (size) + sum_size += *size; + else + break; } - max_data_part_size = static_cast(sum_size * ratio / disks.size()); - for (size_t i = 0; i < disks.size(); ++i) + if (sizes.size() == disks.size()) { - if (sizes[i] < max_data_part_size) + max_data_part_size = static_cast(sum_size * ratio / disks.size()); + for (size_t i = 0; i < disks.size(); ++i) { - LOG_WARNING(logger, "Disk {} on volume {} have not enough space ({}) for containing part the size of max_data_part_size ({})", - backQuote(disks[i]->getName()), backQuote(config_prefix), ReadableSize(sizes[i]), ReadableSize(max_data_part_size)); + if (sizes[i] < max_data_part_size) + { + LOG_WARNING(logger, "Disk {} on volume {} have not enough space ({}) for containing part the size of max_data_part_size ({})", + backQuote(disks[i]->getName()), backQuote(config_prefix), ReadableSize(sizes[i]), ReadableSize(max_data_part_size)); + } } } } diff --git a/src/Disks/VolumeJBOD.h b/src/Disks/VolumeJBOD.h index ef6f215bf18..8d270a6c71c 100644 --- a/src/Disks/VolumeJBOD.h +++ b/src/Disks/VolumeJBOD.h @@ -68,7 +68,7 @@ private: struct DiskWithSize { DiskPtr disk; - uint64_t free_size = 0; + std::optional free_size = 0; DiskWithSize(DiskPtr disk_) : disk(disk_) @@ -80,7 +80,7 @@ private: return free_size < rhs.free_size; } - ReservationPtr reserve(uint64_t bytes) + ReservationPtr reserve(UInt64 bytes) { ReservationPtr reservation = disk->reserve(bytes); if (!reservation) diff --git a/src/Functions/filesystem.cpp b/src/Functions/filesystem.cpp index 1eb1c27211c..9fbf9b0cbe7 100644 --- a/src/Functions/filesystem.cpp +++ b/src/Functions/filesystem.cpp @@ -22,19 +22,19 @@ namespace struct FilesystemAvailable { static constexpr auto name = "filesystemAvailable"; - static std::uintmax_t get(const DiskPtr & disk) { return disk->getAvailableSpace(); } + static UInt64 get(const DiskPtr & disk) { return disk->getAvailableSpace().value_or(std::numeric_limits::max()); } }; struct FilesystemUnreserved { static constexpr auto name = "filesystemUnreserved"; - static std::uintmax_t get(const DiskPtr & disk) { return disk->getUnreservedSpace(); } + static UInt64 get(const DiskPtr & disk) { return disk->getUnreservedSpace().value_or(std::numeric_limits::max()); } }; struct FilesystemCapacity { static constexpr auto name = "filesystemCapacity"; - static std::uintmax_t get(const DiskPtr & disk) { return disk->getTotalSpace(); } + static UInt64 get(const DiskPtr & disk) { return disk->getTotalSpace().value_or(std::numeric_limits::max()); } }; template diff --git a/src/Interpreters/ServerAsynchronousMetrics.cpp b/src/Interpreters/ServerAsynchronousMetrics.cpp index e6e1a03f11c..0fbcfc9e6a1 100644 --- a/src/Interpreters/ServerAsynchronousMetrics.cpp +++ b/src/Interpreters/ServerAsynchronousMetrics.cpp @@ -191,14 +191,21 @@ void ServerAsynchronousMetrics::updateImpl(AsynchronousMetricValues & new_values auto available = disk->getAvailableSpace(); auto unreserved = disk->getUnreservedSpace(); - new_values[fmt::format("DiskTotal_{}", name)] = { total, - "The total size in bytes of the disk (virtual filesystem). Remote filesystems can show a large value like 16 EiB." }; - new_values[fmt::format("DiskUsed_{}", name)] = { total - available, - "Used bytes on the disk (virtual filesystem). Remote filesystems not always provide this information." }; - new_values[fmt::format("DiskAvailable_{}", name)] = { available, - "Available bytes on the disk (virtual filesystem). Remote filesystems can show a large value like 16 EiB." }; - new_values[fmt::format("DiskUnreserved_{}", name)] = { unreserved, - "Available bytes on the disk (virtual filesystem) without the reservations for merges, fetches, and moves. Remote filesystems can show a large value like 16 EiB." }; + new_values[fmt::format("DiskTotal_{}", name)] = { *total, + "The total size in bytes of the disk (virtual filesystem). Remote filesystems may not provide this information." }; + + if (available) + { + new_values[fmt::format("DiskUsed_{}", name)] = { *total - *available, + "Used bytes on the disk (virtual filesystem). Remote filesystems not always provide this information." }; + + new_values[fmt::format("DiskAvailable_{}", name)] = { *available, + "Available bytes on the disk (virtual filesystem). Remote filesystems may not provide this information." }; + } + + if (unreserved) + new_values[fmt::format("DiskUnreserved_{}", name)] = { *unreserved, + "Available bytes on the disk (virtual filesystem) without the reservations for merges, fetches, and moves. Remote filesystems may not provide this information." }; } } diff --git a/src/Storages/MergeTree/MergeTreePartsMover.cpp b/src/Storages/MergeTree/MergeTreePartsMover.cpp index e1da57744b3..391b04573d7 100644 --- a/src/Storages/MergeTree/MergeTreePartsMover.cpp +++ b/src/Storages/MergeTree/MergeTreePartsMover.cpp @@ -111,11 +111,15 @@ bool MergeTreePartsMover::selectPartsForMove( { for (const auto & disk : volumes[i]->getDisks()) { - UInt64 required_maximum_available_space = static_cast(disk->getTotalSpace() * policy->getMoveFactor()); - UInt64 unreserved_space = disk->getUnreservedSpace(); + auto total_space = disk->getTotalSpace(); + auto unreserved_space = disk->getUnreservedSpace(); + if (total_space && unreserved_space) + { + UInt64 required_maximum_available_space = static_cast(*total_space * policy->getMoveFactor()); - if (unreserved_space < required_maximum_available_space && !disk->isBroken()) - need_to_move.emplace(disk, required_maximum_available_space - unreserved_space); + if (*unreserved_space < required_maximum_available_space && !disk->isBroken()) + need_to_move.emplace(disk, required_maximum_available_space - *unreserved_space); + } } } } diff --git a/src/Storages/System/StorageSystemDisks.cpp b/src/Storages/System/StorageSystemDisks.cpp index 002da7abd14..23a00cc7ae5 100644 --- a/src/Storages/System/StorageSystemDisks.cpp +++ b/src/Storages/System/StorageSystemDisks.cpp @@ -64,9 +64,9 @@ Pipe StorageSystemDisks::read( { col_name->insert(disk_name); col_path->insert(disk_ptr->getPath()); - col_free->insert(disk_ptr->getAvailableSpace()); - col_total->insert(disk_ptr->getTotalSpace()); - col_unreserved->insert(disk_ptr->getUnreservedSpace()); + col_free->insert(disk_ptr->getAvailableSpace().value_or(std::numeric_limits::max())); + col_total->insert(disk_ptr->getTotalSpace().value_or(std::numeric_limits::max())); + col_unreserved->insert(disk_ptr->getUnreservedSpace().value_or(std::numeric_limits::max())); col_keep->insert(disk_ptr->getKeepingFreeSpace()); auto data_source_description = disk_ptr->getDataSourceDescription(); col_type->insert(toString(data_source_description.type)); From e1bf96a786be0883993d2d9e8a5d2c1fcd89095c Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Mon, 22 May 2023 14:29:15 +0800 Subject: [PATCH 026/522] finish dev --- src/Functions/geohashEncode.cpp | 76 ++++++++++++++++++++++++++++----- 1 file changed, 66 insertions(+), 10 deletions(-) diff --git a/src/Functions/geohashEncode.cpp b/src/Functions/geohashEncode.cpp index bc0c8b8fc5f..a05fa7fc8d6 100644 --- a/src/Functions/geohashEncode.cpp +++ b/src/Functions/geohashEncode.cpp @@ -37,7 +37,7 @@ public: bool isVariadic() const override { return true; } size_t getNumberOfArguments() const override { return 0; } - ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {2}; } + // ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {2}; } bool useDefaultImplementationForConstants() const override { return true; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } @@ -59,7 +59,50 @@ public: } template - bool tryExecute(const IColumn * lon_column, const IColumn * lat_column, UInt64 precision_value, ColumnPtr & result) const + bool tryVectorVector(const IColumn * lon_column, const IColumn * lat_column, const IColumn * precision_column, ColumnPtr & result) const + { + const ColumnVector * longitude = checkAndGetColumn>(lon_column); + const ColumnVector * latitude = checkAndGetColumn>(lat_column); + if (!latitude || !longitude) + return false; + + auto col_str = ColumnString::create(); + ColumnString::Chars & out_vec = col_str->getChars(); + ColumnString::Offsets & out_offsets = col_str->getOffsets(); + + const size_t size = lat_column->size(); + + out_offsets.resize(size); + out_vec.resize(size * (GEOHASH_MAX_TEXT_LENGTH + 1)); + + char * begin = reinterpret_cast(out_vec.data()); + char * pos = begin; + + for (size_t i = 0; i < size; ++i) + { + const Float64 longitude_value = longitude->getElement(i); + const Float64 latitude_value = latitude->getElement(i); + const UInt64 precision_value = std::min(precision_column->get64(i), GEOHASH_MAX_TEXT_LENGTH); + + const size_t encoded_size = geohashEncode(longitude_value, latitude_value, precision_value, pos); + + pos += encoded_size; + *pos = '\0'; + out_offsets[i] = ++pos - begin; + } + out_vec.resize(pos - begin); + + if (!out_offsets.empty() && out_offsets.back() != out_vec.size()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Column size mismatch (internal logical error)"); + + result = std::move(col_str); + + return true; + + } + + template + bool tryVectorConstant(const IColumn * lon_column, const IColumn * lat_column, UInt64 precision_value, ColumnPtr & result) const { const ColumnVector * longitude = checkAndGetColumn>(lon_column); const ColumnVector * latitude = checkAndGetColumn>(lat_column); @@ -105,16 +148,29 @@ public: const IColumn * longitude = arguments[0].column.get(); const IColumn * latitude = arguments[1].column.get(); - const UInt64 precision_value = std::min(GEOHASH_MAX_TEXT_LENGTH, - arguments.size() == 3 ? arguments[2].column->get64(0) : GEOHASH_MAX_TEXT_LENGTH); + if (arguments.size() < 3 || isColumnConst(*arguments[3].column)) + { + const UInt64 precision_value = std::min( + GEOHASH_MAX_TEXT_LENGTH, arguments.size() == 3 ? arguments[2].column->get64(0) : GEOHASH_MAX_TEXT_LENGTH); - ColumnPtr res_column; + ColumnPtr res_column; + if (tryVectorConstant(longitude, latitude, precision_value, res_column) + || tryVectorConstant(longitude, latitude, precision_value, res_column) + || tryVectorConstant(longitude, latitude, precision_value, res_column) + || tryVectorConstant(longitude, latitude, precision_value, res_column)) + return res_column; + } + else + { + const IColumn * precision = arguments[2].column.get(); + ColumnPtr res_column; + if (tryVectorVector(longitude, latitude, precision, res_column) + || tryVectorVector(longitude, latitude, precision, res_column) + || tryVectorVector(longitude, latitude, precision, res_column) + || tryVectorVector(longitude, latitude, precision, res_column)) + return res_column; - if (tryExecute(longitude, latitude, precision_value, res_column) || - tryExecute(longitude, latitude, precision_value, res_column) || - tryExecute(longitude, latitude, precision_value, res_column) || - tryExecute(longitude, latitude, precision_value, res_column)) - return res_column; + } std::string arguments_description; for (size_t i = 0; i < arguments.size(); ++i) From 1f91a75b5472f3f1321aac9a76c3078880ba5dc9 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Mon, 22 May 2023 15:52:58 +0800 Subject: [PATCH 027/522] fix bugs of geoHashEncode --- src/Functions/geohashEncode.cpp | 114 ++++++------------------------- src/Storages/HDFS/HDFSCommon.cpp | 4 +- 2 files changed, 22 insertions(+), 96 deletions(-) diff --git a/src/Functions/geohashEncode.cpp b/src/Functions/geohashEncode.cpp index a05fa7fc8d6..5f225a96c2b 100644 --- a/src/Functions/geohashEncode.cpp +++ b/src/Functions/geohashEncode.cpp @@ -4,6 +4,7 @@ #include #include +#include #include @@ -58,14 +59,25 @@ public: return std::make_shared(); } - template - bool tryVectorVector(const IColumn * lon_column, const IColumn * lat_column, const IColumn * precision_column, ColumnPtr & result) const + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { - const ColumnVector * longitude = checkAndGetColumn>(lon_column); - const ColumnVector * latitude = checkAndGetColumn>(lat_column); - if (!latitude || !longitude) - return false; + const IColumn * longitude = arguments[0].column.get(); + const IColumn * latitude = arguments[1].column.get(); + ColumnPtr precision; + if (arguments.size() < 3) + precision = DataTypeUInt8().createColumnConst(longitude->size(), GEOHASH_MAX_TEXT_LENGTH); + else + precision = arguments[2].column; + + ColumnPtr res_column; + vector(longitude, latitude, precision.get(), res_column); + return res_column; + } + +private: + void vector(const IColumn * lon_column, const IColumn * lat_column, const IColumn * precision_column, ColumnPtr & result) const + { auto col_str = ColumnString::create(); ColumnString::Chars & out_vec = col_str->getChars(); ColumnString::Offsets & out_offsets = col_str->getOffsets(); @@ -80,8 +92,8 @@ public: for (size_t i = 0; i < size; ++i) { - const Float64 longitude_value = longitude->getElement(i); - const Float64 latitude_value = latitude->getElement(i); + const Float64 longitude_value = lon_column->getFloat64(i); + const Float64 latitude_value = lat_column->getFloat64(i); const UInt64 precision_value = std::min(precision_column->get64(i), GEOHASH_MAX_TEXT_LENGTH); const size_t encoded_size = geohashEncode(longitude_value, latitude_value, precision_value, pos); @@ -96,92 +108,6 @@ public: throw Exception(ErrorCodes::LOGICAL_ERROR, "Column size mismatch (internal logical error)"); result = std::move(col_str); - - return true; - - } - - template - bool tryVectorConstant(const IColumn * lon_column, const IColumn * lat_column, UInt64 precision_value, ColumnPtr & result) const - { - const ColumnVector * longitude = checkAndGetColumn>(lon_column); - const ColumnVector * latitude = checkAndGetColumn>(lat_column); - if (!latitude || !longitude) - return false; - - auto col_str = ColumnString::create(); - ColumnString::Chars & out_vec = col_str->getChars(); - ColumnString::Offsets & out_offsets = col_str->getOffsets(); - - const size_t size = lat_column->size(); - - out_offsets.resize(size); - out_vec.resize(size * (GEOHASH_MAX_TEXT_LENGTH + 1)); - - char * begin = reinterpret_cast(out_vec.data()); - char * pos = begin; - - for (size_t i = 0; i < size; ++i) - { - const Float64 longitude_value = longitude->getElement(i); - const Float64 latitude_value = latitude->getElement(i); - - const size_t encoded_size = geohashEncode(longitude_value, latitude_value, precision_value, pos); - - pos += encoded_size; - *pos = '\0'; - out_offsets[i] = ++pos - begin; - } - out_vec.resize(pos - begin); - - if (!out_offsets.empty() && out_offsets.back() != out_vec.size()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Column size mismatch (internal logical error)"); - - result = std::move(col_str); - - return true; - - } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override - { - const IColumn * longitude = arguments[0].column.get(); - const IColumn * latitude = arguments[1].column.get(); - - if (arguments.size() < 3 || isColumnConst(*arguments[3].column)) - { - const UInt64 precision_value = std::min( - GEOHASH_MAX_TEXT_LENGTH, arguments.size() == 3 ? arguments[2].column->get64(0) : GEOHASH_MAX_TEXT_LENGTH); - - ColumnPtr res_column; - if (tryVectorConstant(longitude, latitude, precision_value, res_column) - || tryVectorConstant(longitude, latitude, precision_value, res_column) - || tryVectorConstant(longitude, latitude, precision_value, res_column) - || tryVectorConstant(longitude, latitude, precision_value, res_column)) - return res_column; - } - else - { - const IColumn * precision = arguments[2].column.get(); - ColumnPtr res_column; - if (tryVectorVector(longitude, latitude, precision, res_column) - || tryVectorVector(longitude, latitude, precision, res_column) - || tryVectorVector(longitude, latitude, precision, res_column) - || tryVectorVector(longitude, latitude, precision, res_column)) - return res_column; - - } - - std::string arguments_description; - for (size_t i = 0; i < arguments.size(); ++i) - { - if (i != 0) - arguments_description += ", "; - arguments_description += arguments[i].column->getName(); - } - - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Unsupported argument types: {} for function {}", - arguments_description, getName()); } }; diff --git a/src/Storages/HDFS/HDFSCommon.cpp b/src/Storages/HDFS/HDFSCommon.cpp index 932e80831fe..7b149518c0a 100644 --- a/src/Storages/HDFS/HDFSCommon.cpp +++ b/src/Storages/HDFS/HDFSCommon.cpp @@ -38,8 +38,8 @@ HDFSFileInfo::~HDFSFileInfo() } -void HDFSBuilderWrapper::loadFromConfig(const Poco::Util::AbstractConfiguration & config, - const String & prefix, bool isUser) +void HDFSBuilderWrapper::loadFromConfig( + const Poco::Util::AbstractConfiguration & config, const String & prefix, [[maybe_unused]] bool isUser) { Poco::Util::AbstractConfiguration::Keys keys; From 39806657711f933c7e0d0fa04e8cc0e8cd769eaa Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Mon, 22 May 2023 15:58:28 +0800 Subject: [PATCH 028/522] fix uts --- tests/queries/0_stateless/00932_geohash_support.reference | 4 ++++ tests/queries/0_stateless/00932_geohash_support.sql | 5 ++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/00932_geohash_support.reference b/tests/queries/0_stateless/00932_geohash_support.reference index ffc290681c7..0dcb084eb6d 100644 --- a/tests/queries/0_stateless/00932_geohash_support.reference +++ b/tests/queries/0_stateless/00932_geohash_support.reference @@ -9,6 +9,10 @@ default precision: ezs42d000000 mixing const and non-const-columns: ezs42d000000 +ezs42d000000 +ezs42d000000 +ezs42d000000 +ezs42d000000 from table (with const precision): 1 6 Ok 1 6 Ok diff --git a/tests/queries/0_stateless/00932_geohash_support.sql b/tests/queries/0_stateless/00932_geohash_support.sql index aeed72176b9..89f8eba9ca2 100644 --- a/tests/queries/0_stateless/00932_geohash_support.sql +++ b/tests/queries/0_stateless/00932_geohash_support.sql @@ -24,7 +24,10 @@ select geohashEncode(-5.60302734375, 42.593994140625); select 'mixing const and non-const-columns:'; select geohashEncode(materialize(-5.60302734375), materialize(42.593994140625), 0); -select geohashEncode(materialize(-5.60302734375), materialize(42.593994140625), materialize(0)); -- { serverError 44 } +select geohashEncode(materialize(-5.60302734375), materialize(42.593994140625), materialize(0)); +select geohashEncode(-5.60302734375, materialize(42.593994140625), 0); +select geohashEncode(materialize(-5.60302734375), 42.593994140625, 0); +select geohashEncode(-5.60302734375, 42.593994140625, 0); select 'from table (with const precision):'; From 056e5824b57a78314b7ae565585ef0afea1bd836 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Mon, 22 May 2023 16:02:42 +0800 Subject: [PATCH 029/522] remove useless code --- src/Functions/geohashEncode.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Functions/geohashEncode.cpp b/src/Functions/geohashEncode.cpp index 5f225a96c2b..ff61bf7d27c 100644 --- a/src/Functions/geohashEncode.cpp +++ b/src/Functions/geohashEncode.cpp @@ -38,7 +38,6 @@ public: bool isVariadic() const override { return true; } size_t getNumberOfArguments() const override { return 0; } - // ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {2}; } bool useDefaultImplementationForConstants() const override { return true; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } From ceecb1488af0fe4413053c87d9042b9d79602371 Mon Sep 17 00:00:00 2001 From: FFFFFFFHHHHHHH <916677625@qq.com> Date: Mon, 22 May 2023 17:29:52 +0800 Subject: [PATCH 030/522] add function arrayJaccardIndex --- src/Functions/array/arrayJaccardIndex.cpp | 161 ++++++++++++++++++ .../02737_arrayJaccardIndex.reference | 32 ++++ .../0_stateless/02737_arrayJaccardIndex.sql | 26 +++ 3 files changed, 219 insertions(+) create mode 100644 src/Functions/array/arrayJaccardIndex.cpp create mode 100644 tests/queries/0_stateless/02737_arrayJaccardIndex.reference create mode 100644 tests/queries/0_stateless/02737_arrayJaccardIndex.sql diff --git a/src/Functions/array/arrayJaccardIndex.cpp b/src/Functions/array/arrayJaccardIndex.cpp new file mode 100644 index 00000000000..8cce98ab64d --- /dev/null +++ b/src/Functions/array/arrayJaccardIndex.cpp @@ -0,0 +1,161 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int ILLEGAL_COLUMN; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int LOGICAL_ERROR; +} + +class FunctionArrayJaccardIndex : public IFunction +{ +public: + using ResultType = Float64; + static constexpr auto name = "arrayJaccardIndex"; + String getName() const override { return name; } + static FunctionPtr create(ContextPtr context_) { return std::make_shared(context_); } + explicit FunctionArrayJaccardIndex(ContextPtr context_) : context(context_) {} + size_t getNumberOfArguments() const override { return 2; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo &) const override { return true; } + bool useDefaultImplementationForConstants() const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + DataTypes types; + for (size_t i = 0; i < 2; ++i) + { + const auto * array_type = checkAndGetDataType(arguments[i].get()); + if (!array_type) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Argument {} of function {} must be array, but it has type{}.", i + 1, getName(), arguments[i]->getName()); + } + return std::make_shared>(); + } + + template + static void vector(const ColumnArray::Offsets & intersect_offsets, const ColumnArray::Offsets & left_offsets, const ColumnArray::Offsets & right_offsets, PaddedPODArray & res) + { + size_t left_size; + size_t right_size; + for (size_t i = 0; i < res.size(); ++i) + { + if constexpr (is_const_left) + left_size = left_offsets[0]; + else + left_size = left_offsets[i] - left_offsets[i - 1]; + if constexpr (is_const_right) + right_size = right_offsets[0]; + else + right_size = right_offsets[i] - right_offsets[i - 1]; + + size_t intersect_size = intersect_offsets[i] - intersect_offsets[i - 1]; + res[i] = static_cast(intersect_size) / (left_size + right_size - intersect_size); + if (unlikely(isnan(res[i]))) + res[i] = 1; + } + } + + template + static void vectorWithEmptyIntersect(const ColumnArray::Offsets & left_offsets, const ColumnArray::Offsets & right_offsets, PaddedPODArray & res) + { + size_t left_size; + size_t right_size; + for (size_t i = 0; i < res.size(); ++i) + { + if constexpr (is_const_left) + left_size = left_offsets[0]; + else + left_size = left_offsets[i] - left_offsets[i - 1]; + if constexpr (is_const_right) + right_size = right_offsets[0]; + else + right_size = right_offsets[i] - right_offsets[i - 1]; + + res[i] = static_cast(left_size + right_size == 0); + } + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + bool is_const_left; + bool is_const_right; + const ColumnArray * left_array; + const ColumnArray * right_array; + + auto cast_array = [&](const ColumnWithTypeAndName & col) + { + const ColumnArray * res; + bool is_const = false; + if (typeid_cast(col.column.get())) + { + res = checkAndGetColumn(checkAndGetColumnConst(col.column.get())->getDataColumnPtr().get()); + is_const = true; + } + else if (!(res = checkAndGetColumn(col.column.get()))) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Argument for function {} must be array but it has type {}.", + col.column->getName(), getName()); + return std::make_pair(res, is_const); + }; + + std::tie(left_array, is_const_left) = cast_array(arguments[0]); + std::tie(right_array, is_const_right) = cast_array(arguments[1]); + + auto intersect_array = FunctionFactory::instance().get("arrayIntersect", context)->build(arguments); + ColumnWithTypeAndName intersect_column; + intersect_column.type = intersect_array->getResultType(); + intersect_column.column = intersect_array->execute(arguments, intersect_column.type, input_rows_count); + const auto * return_type_intersect = checkAndGetDataType(intersect_column.type.get()); + if (!return_type_intersect) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected return type for function arrayIntersect"); + + auto col_res = ColumnVector::create(); + typename ColumnVector::Container & vec_res = col_res->getData(); + vec_res.resize(input_rows_count); + +#define EXECUTE_VECTOR(is_const_left, is_const_right) \ + if (typeid_cast(return_type_intersect->getNestedType().get())) \ + vectorWithEmptyIntersect(left_array->getOffsets(), right_array->getOffsets(), vec_res); \ + else \ + { \ + const ColumnArray * col_array = checkAndGetColumn(intersect_column.column.get()); \ + vector(col_array->getOffsets(), left_array->getOffsets(), right_array->getOffsets(), vec_res); \ + } + + if (!is_const_left && !is_const_right) + EXECUTE_VECTOR(false, false) + else if (!is_const_left && is_const_right) + EXECUTE_VECTOR(false, true) + else if (is_const_left && !is_const_right) + EXECUTE_VECTOR(true, false) + else + EXECUTE_VECTOR(true, true) + +#undef EXECUTE_VECTOR + + return col_res; + } + +private: + ContextPtr context; +}; + +REGISTER_FUNCTION(ArrayJaccardIndex) +{ + factory.registerFunction(); +} + +} diff --git a/tests/queries/0_stateless/02737_arrayJaccardIndex.reference b/tests/queries/0_stateless/02737_arrayJaccardIndex.reference new file mode 100644 index 00000000000..e6934bfe092 --- /dev/null +++ b/tests/queries/0_stateless/02737_arrayJaccardIndex.reference @@ -0,0 +1,32 @@ +0 +0.5 +1 +0.67 +1 +0 +0 +0 +1 +0 +0 +0 +0 +0.5 +1 +0.67 +0.5 +0.5 +0.5 +0.5 +1 +1 +1 +1 +1 +1 +1 +1 +0.33 +0.2 +1 +1 diff --git a/tests/queries/0_stateless/02737_arrayJaccardIndex.sql b/tests/queries/0_stateless/02737_arrayJaccardIndex.sql new file mode 100644 index 00000000000..c3f04ba0b10 --- /dev/null +++ b/tests/queries/0_stateless/02737_arrayJaccardIndex.sql @@ -0,0 +1,26 @@ +drop table if exists array_jaccard_index; + +create table array_jaccard_index (arr Array(UInt8)) engine=MergeTree partition by arr order by arr; + +insert into array_jaccard_index values ([1,2,3]); +insert into array_jaccard_index values ([1,2]); +insert into array_jaccard_index values ([1]); +insert into array_jaccard_index values ([]); + +select round(arrayJaccardIndex(arr, [1,2]), 2) from array_jaccard_index order by arr; +select round(arrayJaccardIndex(arr, []), 2) from array_jaccard_index order by arr; +select round(arrayJaccardIndex([], arr), 2) from array_jaccard_index order by arr; +select round(arrayJaccardIndex([1,2], arr), 2) from array_jaccard_index order by arr; +select round(arrayJaccardIndex([1,2], [1,2,3,4]), 2) from array_jaccard_index order by arr; +select round(arrayJaccardIndex([], []), 2) from array_jaccard_index order by arr; +select round(arrayJaccardIndex(arr, arr), 2) from array_jaccard_index order by arr; + +drop table if exists array_jaccard_index; + +select round(arrayJaccardIndex(['a'], ['a', 'aa', 'aaa']), 2); + +select round(arrayJaccardIndex([1, 1.1, 2.2], [2.2, 3.3, 444]), 2); + +select round(arrayJaccardIndex([], []), 2); + +select round(arrayJaccardIndex([toUInt16(1)], [toUInt32(1)]), 2); From 656d6abb2ee2d221df01367bcb53465289e4981c Mon Sep 17 00:00:00 2001 From: FFFFFFFHHHHHHH <916677625@qq.com> Date: Mon, 22 May 2023 22:49:02 +0800 Subject: [PATCH 031/522] fix style --- src/Functions/array/arrayJaccardIndex.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/array/arrayJaccardIndex.cpp b/src/Functions/array/arrayJaccardIndex.cpp index 8cce98ab64d..c1ec8b53d25 100644 --- a/src/Functions/array/arrayJaccardIndex.cpp +++ b/src/Functions/array/arrayJaccardIndex.cpp @@ -100,7 +100,7 @@ public: { const ColumnArray * res; bool is_const = false; - if (typeid_cast(col.column.get())) + if (typeid_cast(col.column.get())) { res = checkAndGetColumn(checkAndGetColumnConst(col.column.get())->getDataColumnPtr().get()); is_const = true; From b0b9f2a037918b8f745df952a9491b97de6fdada Mon Sep 17 00:00:00 2001 From: FFFFFFFHHHHHHH <916677625@qq.com> Date: Tue, 23 May 2023 10:39:23 +0800 Subject: [PATCH 032/522] fix test --- .../02415_all_new_functions_must_be_documented.reference | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference index 79a6ad1fa2d..5ef83a57ecf 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference @@ -112,6 +112,7 @@ arrayFirstIndex arrayFirstOrNull arrayFlatten arrayIntersect +arrayJaccardIndex arrayJoin arrayLast arrayLastIndex From 87907dafa7a8179382c98cb1718b58a002617e08 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Wed, 24 May 2023 14:27:37 +0800 Subject: [PATCH 033/522] fix code style --- src/Functions/geohashEncode.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Functions/geohashEncode.cpp b/src/Functions/geohashEncode.cpp index ff61bf7d27c..7c353b822aa 100644 --- a/src/Functions/geohashEncode.cpp +++ b/src/Functions/geohashEncode.cpp @@ -17,7 +17,6 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; - extern const int ILLEGAL_COLUMN; extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION; } From adfedb4df01bd0dcd2870df5f6b28b82017650a0 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 6 Jun 2023 14:46:34 +0200 Subject: [PATCH 034/522] Add USE NAMED COLLECTION access --- src/Access/Common/AccessRightsElement.cpp | 2 +- src/Access/Common/AccessType.h | 1 + .../ClickHouseDictionarySource.cpp | 2 +- src/Dictionaries/MySQLDictionarySource.cpp | 2 +- src/Storages/NamedCollectionsHelpers.cpp | 29 +++++++++++------ src/Storages/NamedCollectionsHelpers.h | 2 +- .../helpers/0_common_instance_users.xml | 6 +++- .../test_storage_s3/configs/access.xml | 19 +++++++++++ tests/integration/test_storage_s3/test.py | 32 +++++++++++++++---- 9 files changed, 74 insertions(+), 21 deletions(-) create mode 100644 tests/integration/test_storage_s3/configs/access.xml diff --git a/src/Access/Common/AccessRightsElement.cpp b/src/Access/Common/AccessRightsElement.cpp index e11d43634ec..835f414df37 100644 --- a/src/Access/Common/AccessRightsElement.cpp +++ b/src/Access/Common/AccessRightsElement.cpp @@ -155,7 +155,7 @@ namespace AccessRightsElement::AccessRightsElement(AccessFlags access_flags_, std::string_view database_) - : access_flags(access_flags_), database(database_), any_database(false) + : access_flags(access_flags_), database(database_), parameter(database_), any_database(false), any_parameter(false) { } diff --git a/src/Access/Common/AccessType.h b/src/Access/Common/AccessType.h index 6394c0279a7..6625ccb652b 100644 --- a/src/Access/Common/AccessType.h +++ b/src/Access/Common/AccessType.h @@ -142,6 +142,7 @@ enum class AccessType M(ACCESS_MANAGEMENT, "", GROUP, ALL) \ M(SHOW_NAMED_COLLECTIONS, "SHOW NAMED COLLECTIONS", NAMED_COLLECTION, NAMED_COLLECTION_CONTROL) \ M(SHOW_NAMED_COLLECTIONS_SECRETS, "SHOW NAMED COLLECTIONS SECRETS", NAMED_COLLECTION, NAMED_COLLECTION_CONTROL) \ + M(USE_NAMED_COLLECTION, "USE NAMED COLLECTION", NAMED_COLLECTION, NAMED_COLLECTION_CONTROL) \ M(NAMED_COLLECTION_CONTROL, "", NAMED_COLLECTION, ALL) \ \ M(SYSTEM_SHUTDOWN, "SYSTEM KILL, SHUTDOWN", GLOBAL, SYSTEM) \ diff --git a/src/Dictionaries/ClickHouseDictionarySource.cpp b/src/Dictionaries/ClickHouseDictionarySource.cpp index 65147ee664e..2dc7f6145b3 100644 --- a/src/Dictionaries/ClickHouseDictionarySource.cpp +++ b/src/Dictionaries/ClickHouseDictionarySource.cpp @@ -217,7 +217,7 @@ void registerDictionarySourceClickHouse(DictionarySourceFactory & factory) std::optional configuration; std::string settings_config_prefix = config_prefix + ".clickhouse"; - auto named_collection = created_from_ddl ? tryGetNamedCollectionWithOverrides(config, settings_config_prefix) : nullptr; + auto named_collection = created_from_ddl ? tryGetNamedCollectionWithOverrides(config, settings_config_prefix, global_context) : nullptr; if (named_collection) { diff --git a/src/Dictionaries/MySQLDictionarySource.cpp b/src/Dictionaries/MySQLDictionarySource.cpp index 730217f96b7..e61409e2b54 100644 --- a/src/Dictionaries/MySQLDictionarySource.cpp +++ b/src/Dictionaries/MySQLDictionarySource.cpp @@ -71,7 +71,7 @@ void registerDictionarySourceMysql(DictionarySourceFactory & factory) MySQLSettings mysql_settings; std::optional dictionary_configuration; - auto named_collection = created_from_ddl ? tryGetNamedCollectionWithOverrides(config, settings_config_prefix) : nullptr; + auto named_collection = created_from_ddl ? tryGetNamedCollectionWithOverrides(config, settings_config_prefix, global_context) : nullptr; if (named_collection) { auto allowed_arguments{dictionary_allowed_keys}; diff --git a/src/Storages/NamedCollectionsHelpers.cpp b/src/Storages/NamedCollectionsHelpers.cpp index 83128ab025a..efd5af29f48 100644 --- a/src/Storages/NamedCollectionsHelpers.cpp +++ b/src/Storages/NamedCollectionsHelpers.cpp @@ -1,4 +1,5 @@ #include "NamedCollectionsHelpers.h" +#include #include #include #include @@ -15,19 +16,16 @@ namespace ErrorCodes namespace { - NamedCollectionPtr tryGetNamedCollectionFromASTs(ASTs asts, bool throw_unknown_collection) + std::optional getCollectionName(ASTs asts) { if (asts.empty()) - return nullptr; + return std::nullopt; const auto * identifier = asts[0]->as(); if (!identifier) - return nullptr; + return std::nullopt; - const auto & collection_name = identifier->name(); - if (throw_unknown_collection) - return NamedCollectionFactory::instance().get(collection_name); - return NamedCollectionFactory::instance().tryGet(collection_name); + return identifier->name(); } std::optional>> getKeyValueFromAST(ASTPtr ast, bool fallback_to_ast_value, ContextPtr context) @@ -74,10 +72,21 @@ MutableNamedCollectionPtr tryGetNamedCollectionWithOverrides( NamedCollectionUtils::loadIfNot(); - auto collection = tryGetNamedCollectionFromASTs(asts, throw_unknown_collection); + auto collection_name = getCollectionName(asts); + if (!collection_name.has_value()) + return nullptr; + + NamedCollectionPtr collection; + if (throw_unknown_collection) + collection = NamedCollectionFactory::instance().get(*collection_name); + else + collection = NamedCollectionFactory::instance().tryGet(*collection_name); + if (!collection) return nullptr; + context->checkAccess(AccessType::USE_NAMED_COLLECTION, *collection_name); + auto collection_copy = collection->duplicate(); if (asts.size() == 1) @@ -106,12 +115,14 @@ MutableNamedCollectionPtr tryGetNamedCollectionWithOverrides( } MutableNamedCollectionPtr tryGetNamedCollectionWithOverrides( - const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix) + const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) { auto collection_name = config.getString(config_prefix + ".name", ""); if (collection_name.empty()) return nullptr; + context->checkAccess(AccessType::USE_NAMED_COLLECTION, collection_name); + const auto & collection = NamedCollectionFactory::instance().get(collection_name); auto collection_copy = collection->duplicate(); diff --git a/src/Storages/NamedCollectionsHelpers.h b/src/Storages/NamedCollectionsHelpers.h index 1473a3fbe48..15ed7c9e19b 100644 --- a/src/Storages/NamedCollectionsHelpers.h +++ b/src/Storages/NamedCollectionsHelpers.h @@ -22,7 +22,7 @@ MutableNamedCollectionPtr tryGetNamedCollectionWithOverrides( ASTs asts, ContextPtr context, bool throw_unknown_collection = true, std::vector> * complex_args = nullptr); /// Helper function to get named collection for dictionary source. /// Dictionaries have collection name as name argument of dict configuration and other arguments are overrides. -MutableNamedCollectionPtr tryGetNamedCollectionWithOverrides(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix); +MutableNamedCollectionPtr tryGetNamedCollectionWithOverrides(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context); HTTPHeaderEntries getHeadersFromNamedCollection(const NamedCollection & collection); diff --git a/tests/integration/helpers/0_common_instance_users.xml b/tests/integration/helpers/0_common_instance_users.xml index 3399ef5915a..6aae12400fd 100644 --- a/tests/integration/helpers/0_common_instance_users.xml +++ b/tests/integration/helpers/0_common_instance_users.xml @@ -1,7 +1,11 @@ - 1 + + GRANT ACCESS MANAGEMENT ON *.* WITH GRANT OPTION + GRANT ALL ON *.* WITH GRANT OPTION + GRANT USE NAMED COLLECTION ON * WITH GRANT OPTION + diff --git a/tests/integration/test_storage_s3/configs/access.xml b/tests/integration/test_storage_s3/configs/access.xml new file mode 100644 index 00000000000..8bded9104f6 --- /dev/null +++ b/tests/integration/test_storage_s3/configs/access.xml @@ -0,0 +1,19 @@ + + + + + default + default + + GRANT admin_role + + + + + + + GRANT USE NAMED COLLECTION ON * WITH GRANT OPTION + + + + diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index f983bd618e3..01dd4fd7856 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -55,6 +55,7 @@ def started_cluster(): "configs/named_collections.xml", "configs/schema_cache.xml", ], + user_configs=["configs/access.xml"], ) cluster.add_instance( "s3_max_redirects", @@ -921,22 +922,39 @@ def test_predefined_connection_configuration(started_cluster): instance = started_cluster.instances["dummy"] # type: ClickHouseInstance name = "test_table" - instance.query("drop table if exists {}".format(name)) + instance.query("CREATE USER user") + instance.query("GRANT CREATE ON *.* TO user") + instance.query("GRANT SOURCES ON *.* TO user") + instance.query("GRANT SELECT ON *.* TO user") + + instance.query(f"drop table if exists {name}", user="user") + error = instance.query_and_get_error( + f"CREATE TABLE {name} (id UInt32) ENGINE = S3(s3_conf1, format='CSV')" + ) + assert "To execute this query it's necessary to have grant USE NAMED COLLECTION ON s3_conf1" in error + error = instance.query_and_get_error( + f"CREATE TABLE {name} (id UInt32) ENGINE = S3(s3_conf1, format='CSV')", user="user" + ) + assert "To execute this query it's necessary to have grant USE NAMED COLLECTION ON s3_conf1" in error + + instance.query("GRANT USE NAMED COLLECTION ON s3_conf1 TO user", user="admin") instance.query( - "CREATE TABLE {} (id UInt32) ENGINE = S3(s3_conf1, format='CSV')".format(name) + f"CREATE TABLE {name} (id UInt32) ENGINE = S3(s3_conf1, format='CSV')", user="user" ) - instance.query("INSERT INTO {} SELECT number FROM numbers(10)".format(name)) - result = instance.query("SELECT * FROM {}".format(name)) + instance.query(f"INSERT INTO {name} SELECT number FROM numbers(10)") + result = instance.query(f"SELECT * FROM {name}") assert result == instance.query("SELECT number FROM numbers(10)") result = instance.query( - "SELECT * FROM s3(s3_conf1, format='CSV', structure='id UInt32')" + "SELECT * FROM s3(s3_conf1, format='CSV', structure='id UInt32')", user="user" ) assert result == instance.query("SELECT number FROM numbers(10)") - result = instance.query_and_get_error("SELECT * FROM s3(no_collection)") - assert "There is no named collection `no_collection`" in result + error = instance.query_and_get_error("SELECT * FROM s3(no_collection)") + assert "There is no named collection `no_collection`" in error + error = instance.query_and_get_error("SELECT * FROM s3(no_collection)", user="user") + assert "There is no named collection `no_collection`" in error result = "" From c6acdd7008e625907e2207a5b4cff554b3490a9d Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 6 Jun 2023 18:53:31 +0200 Subject: [PATCH 035/522] Fix fast test, fix black check --- tests/integration/test_storage_s3/test.py | 18 +++++++++++++----- .../01271_show_privileges.reference | 1 + .../02117_show_create_table_system.reference | 6 +++--- 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index 01dd4fd7856..f1cbd3366b4 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -931,15 +931,23 @@ def test_predefined_connection_configuration(started_cluster): error = instance.query_and_get_error( f"CREATE TABLE {name} (id UInt32) ENGINE = S3(s3_conf1, format='CSV')" ) - assert "To execute this query it's necessary to have grant USE NAMED COLLECTION ON s3_conf1" in error - error = instance.query_and_get_error( - f"CREATE TABLE {name} (id UInt32) ENGINE = S3(s3_conf1, format='CSV')", user="user" + assert ( + "To execute this query it's necessary to have grant USE NAMED COLLECTION ON s3_conf1" + in error + ) + error = instance.query_and_get_error( + f"CREATE TABLE {name} (id UInt32) ENGINE = S3(s3_conf1, format='CSV')", + user="user", + ) + assert ( + "To execute this query it's necessary to have grant USE NAMED COLLECTION ON s3_conf1" + in error ) - assert "To execute this query it's necessary to have grant USE NAMED COLLECTION ON s3_conf1" in error instance.query("GRANT USE NAMED COLLECTION ON s3_conf1 TO user", user="admin") instance.query( - f"CREATE TABLE {name} (id UInt32) ENGINE = S3(s3_conf1, format='CSV')", user="user" + f"CREATE TABLE {name} (id UInt32) ENGINE = S3(s3_conf1, format='CSV')", + user="user", ) instance.query(f"INSERT INTO {name} SELECT number FROM numbers(10)") diff --git a/tests/queries/0_stateless/01271_show_privileges.reference b/tests/queries/0_stateless/01271_show_privileges.reference index ec245d8b9e0..c32ac39a1f5 100644 --- a/tests/queries/0_stateless/01271_show_privileges.reference +++ b/tests/queries/0_stateless/01271_show_privileges.reference @@ -94,6 +94,7 @@ SHOW ACCESS [] \N ACCESS MANAGEMENT ACCESS MANAGEMENT [] \N ALL SHOW NAMED COLLECTIONS ['SHOW NAMED COLLECTIONS'] NAMED_COLLECTION NAMED COLLECTION CONTROL SHOW NAMED COLLECTIONS SECRETS ['SHOW NAMED COLLECTIONS SECRETS'] NAMED_COLLECTION NAMED COLLECTION CONTROL +USE NAMED COLLECTION ['USE NAMED COLLECTION'] NAMED_COLLECTION NAMED COLLECTION CONTROL NAMED COLLECTION CONTROL [] NAMED_COLLECTION ALL SYSTEM SHUTDOWN ['SYSTEM KILL','SHUTDOWN'] GLOBAL SYSTEM SYSTEM DROP DNS CACHE ['SYSTEM DROP DNS','DROP DNS CACHE','DROP DNS'] GLOBAL SYSTEM DROP CACHE diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index 09cc62dac00..9630767a552 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -297,7 +297,7 @@ CREATE TABLE system.grants ( `user_name` Nullable(String), `role_name` Nullable(String), - `access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION CONTROL' = 96, 'SYSTEM SHUTDOWN' = 97, 'SYSTEM DROP DNS CACHE' = 98, 'SYSTEM DROP MARK CACHE' = 99, 'SYSTEM DROP UNCOMPRESSED CACHE' = 100, 'SYSTEM DROP MMAP CACHE' = 101, 'SYSTEM DROP QUERY CACHE' = 102, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 103, 'SYSTEM DROP FILESYSTEM CACHE' = 104, 'SYSTEM DROP SCHEMA CACHE' = 105, 'SYSTEM DROP S3 CLIENT CACHE' = 106, 'SYSTEM DROP CACHE' = 107, 'SYSTEM RELOAD CONFIG' = 108, 'SYSTEM RELOAD USERS' = 109, 'SYSTEM RELOAD SYMBOLS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM DISTRIBUTED SENDS' = 121, 'SYSTEM REPLICATED SENDS' = 122, 'SYSTEM SENDS' = 123, 'SYSTEM REPLICATION QUEUES' = 124, 'SYSTEM DROP REPLICA' = 125, 'SYSTEM SYNC REPLICA' = 126, 'SYSTEM RESTART REPLICA' = 127, 'SYSTEM RESTORE REPLICA' = 128, 'SYSTEM WAIT LOADING PARTS' = 129, 'SYSTEM SYNC DATABASE REPLICA' = 130, 'SYSTEM SYNC TRANSACTION LOG' = 131, 'SYSTEM SYNC FILE CACHE' = 132, 'SYSTEM FLUSH DISTRIBUTED' = 133, 'SYSTEM FLUSH LOGS' = 134, 'SYSTEM FLUSH' = 135, 'SYSTEM THREAD FUZZER' = 136, 'SYSTEM UNFREEZE' = 137, 'SYSTEM FAILPOINT' = 138, 'SYSTEM' = 139, 'dictGet' = 140, 'displaySecretsInShowAndSelect' = 141, 'addressToLine' = 142, 'addressToLineWithInlines' = 143, 'addressToSymbol' = 144, 'demangle' = 145, 'INTROSPECTION' = 146, 'FILE' = 147, 'URL' = 148, 'REMOTE' = 149, 'MONGO' = 150, 'MEILISEARCH' = 151, 'MYSQL' = 152, 'POSTGRES' = 153, 'SQLITE' = 154, 'ODBC' = 155, 'JDBC' = 156, 'HDFS' = 157, 'S3' = 158, 'HIVE' = 159, 'SOURCES' = 160, 'CLUSTER' = 161, 'ALL' = 162, 'NONE' = 163), + `access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'USE NAMED COLLECTION' = 96, 'NAMED COLLECTION CONTROL' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD SYMBOLS' = 111, 'SYSTEM RELOAD DICTIONARY' = 112, 'SYSTEM RELOAD MODEL' = 113, 'SYSTEM RELOAD FUNCTION' = 114, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 115, 'SYSTEM RELOAD' = 116, 'SYSTEM RESTART DISK' = 117, 'SYSTEM MERGES' = 118, 'SYSTEM TTL MERGES' = 119, 'SYSTEM FETCHES' = 120, 'SYSTEM MOVES' = 121, 'SYSTEM DISTRIBUTED SENDS' = 122, 'SYSTEM REPLICATED SENDS' = 123, 'SYSTEM SENDS' = 124, 'SYSTEM REPLICATION QUEUES' = 125, 'SYSTEM DROP REPLICA' = 126, 'SYSTEM SYNC REPLICA' = 127, 'SYSTEM RESTART REPLICA' = 128, 'SYSTEM RESTORE REPLICA' = 129, 'SYSTEM WAIT LOADING PARTS' = 130, 'SYSTEM SYNC DATABASE REPLICA' = 131, 'SYSTEM SYNC TRANSACTION LOG' = 132, 'SYSTEM SYNC FILE CACHE' = 133, 'SYSTEM FLUSH DISTRIBUTED' = 134, 'SYSTEM FLUSH LOGS' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM' = 140, 'dictGet' = 141, 'displaySecretsInShowAndSelect' = 142, 'addressToLine' = 143, 'addressToLineWithInlines' = 144, 'addressToSymbol' = 145, 'demangle' = 146, 'INTROSPECTION' = 147, 'FILE' = 148, 'URL' = 149, 'REMOTE' = 150, 'MONGO' = 151, 'MEILISEARCH' = 152, 'MYSQL' = 153, 'POSTGRES' = 154, 'SQLITE' = 155, 'ODBC' = 156, 'JDBC' = 157, 'HDFS' = 158, 'S3' = 159, 'HIVE' = 160, 'SOURCES' = 161, 'CLUSTER' = 162, 'ALL' = 163, 'NONE' = 164), `database` Nullable(String), `table` Nullable(String), `column` Nullable(String), @@ -581,10 +581,10 @@ ENGINE = SystemPartsColumns COMMENT 'SYSTEM TABLE is built on the fly.' CREATE TABLE system.privileges ( - `privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION CONTROL' = 96, 'SYSTEM SHUTDOWN' = 97, 'SYSTEM DROP DNS CACHE' = 98, 'SYSTEM DROP MARK CACHE' = 99, 'SYSTEM DROP UNCOMPRESSED CACHE' = 100, 'SYSTEM DROP MMAP CACHE' = 101, 'SYSTEM DROP QUERY CACHE' = 102, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 103, 'SYSTEM DROP FILESYSTEM CACHE' = 104, 'SYSTEM DROP SCHEMA CACHE' = 105, 'SYSTEM DROP S3 CLIENT CACHE' = 106, 'SYSTEM DROP CACHE' = 107, 'SYSTEM RELOAD CONFIG' = 108, 'SYSTEM RELOAD USERS' = 109, 'SYSTEM RELOAD SYMBOLS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM DISTRIBUTED SENDS' = 121, 'SYSTEM REPLICATED SENDS' = 122, 'SYSTEM SENDS' = 123, 'SYSTEM REPLICATION QUEUES' = 124, 'SYSTEM DROP REPLICA' = 125, 'SYSTEM SYNC REPLICA' = 126, 'SYSTEM RESTART REPLICA' = 127, 'SYSTEM RESTORE REPLICA' = 128, 'SYSTEM WAIT LOADING PARTS' = 129, 'SYSTEM SYNC DATABASE REPLICA' = 130, 'SYSTEM SYNC TRANSACTION LOG' = 131, 'SYSTEM SYNC FILE CACHE' = 132, 'SYSTEM FLUSH DISTRIBUTED' = 133, 'SYSTEM FLUSH LOGS' = 134, 'SYSTEM FLUSH' = 135, 'SYSTEM THREAD FUZZER' = 136, 'SYSTEM UNFREEZE' = 137, 'SYSTEM FAILPOINT' = 138, 'SYSTEM' = 139, 'dictGet' = 140, 'displaySecretsInShowAndSelect' = 141, 'addressToLine' = 142, 'addressToLineWithInlines' = 143, 'addressToSymbol' = 144, 'demangle' = 145, 'INTROSPECTION' = 146, 'FILE' = 147, 'URL' = 148, 'REMOTE' = 149, 'MONGO' = 150, 'MEILISEARCH' = 151, 'MYSQL' = 152, 'POSTGRES' = 153, 'SQLITE' = 154, 'ODBC' = 155, 'JDBC' = 156, 'HDFS' = 157, 'S3' = 158, 'HIVE' = 159, 'SOURCES' = 160, 'CLUSTER' = 161, 'ALL' = 162, 'NONE' = 163), + `privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'USE NAMED COLLECTION' = 96, 'NAMED COLLECTION CONTROL' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD SYMBOLS' = 111, 'SYSTEM RELOAD DICTIONARY' = 112, 'SYSTEM RELOAD MODEL' = 113, 'SYSTEM RELOAD FUNCTION' = 114, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 115, 'SYSTEM RELOAD' = 116, 'SYSTEM RESTART DISK' = 117, 'SYSTEM MERGES' = 118, 'SYSTEM TTL MERGES' = 119, 'SYSTEM FETCHES' = 120, 'SYSTEM MOVES' = 121, 'SYSTEM DISTRIBUTED SENDS' = 122, 'SYSTEM REPLICATED SENDS' = 123, 'SYSTEM SENDS' = 124, 'SYSTEM REPLICATION QUEUES' = 125, 'SYSTEM DROP REPLICA' = 126, 'SYSTEM SYNC REPLICA' = 127, 'SYSTEM RESTART REPLICA' = 128, 'SYSTEM RESTORE REPLICA' = 129, 'SYSTEM WAIT LOADING PARTS' = 130, 'SYSTEM SYNC DATABASE REPLICA' = 131, 'SYSTEM SYNC TRANSACTION LOG' = 132, 'SYSTEM SYNC FILE CACHE' = 133, 'SYSTEM FLUSH DISTRIBUTED' = 134, 'SYSTEM FLUSH LOGS' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM' = 140, 'dictGet' = 141, 'displaySecretsInShowAndSelect' = 142, 'addressToLine' = 143, 'addressToLineWithInlines' = 144, 'addressToSymbol' = 145, 'demangle' = 146, 'INTROSPECTION' = 147, 'FILE' = 148, 'URL' = 149, 'REMOTE' = 150, 'MONGO' = 151, 'MEILISEARCH' = 152, 'MYSQL' = 153, 'POSTGRES' = 154, 'SQLITE' = 155, 'ODBC' = 156, 'JDBC' = 157, 'HDFS' = 158, 'S3' = 159, 'HIVE' = 160, 'SOURCES' = 161, 'CLUSTER' = 162, 'ALL' = 163, 'NONE' = 164), `aliases` Array(String), `level` Nullable(Enum8('GLOBAL' = 0, 'DATABASE' = 1, 'TABLE' = 2, 'DICTIONARY' = 3, 'VIEW' = 4, 'COLUMN' = 5, 'NAMED_COLLECTION' = 6)), - `parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION CONTROL' = 96, 'SYSTEM SHUTDOWN' = 97, 'SYSTEM DROP DNS CACHE' = 98, 'SYSTEM DROP MARK CACHE' = 99, 'SYSTEM DROP UNCOMPRESSED CACHE' = 100, 'SYSTEM DROP MMAP CACHE' = 101, 'SYSTEM DROP QUERY CACHE' = 102, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 103, 'SYSTEM DROP FILESYSTEM CACHE' = 104, 'SYSTEM DROP SCHEMA CACHE' = 105, 'SYSTEM DROP S3 CLIENT CACHE' = 106, 'SYSTEM DROP CACHE' = 107, 'SYSTEM RELOAD CONFIG' = 108, 'SYSTEM RELOAD USERS' = 109, 'SYSTEM RELOAD SYMBOLS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM DISTRIBUTED SENDS' = 121, 'SYSTEM REPLICATED SENDS' = 122, 'SYSTEM SENDS' = 123, 'SYSTEM REPLICATION QUEUES' = 124, 'SYSTEM DROP REPLICA' = 125, 'SYSTEM SYNC REPLICA' = 126, 'SYSTEM RESTART REPLICA' = 127, 'SYSTEM RESTORE REPLICA' = 128, 'SYSTEM WAIT LOADING PARTS' = 129, 'SYSTEM SYNC DATABASE REPLICA' = 130, 'SYSTEM SYNC TRANSACTION LOG' = 131, 'SYSTEM SYNC FILE CACHE' = 132, 'SYSTEM FLUSH DISTRIBUTED' = 133, 'SYSTEM FLUSH LOGS' = 134, 'SYSTEM FLUSH' = 135, 'SYSTEM THREAD FUZZER' = 136, 'SYSTEM UNFREEZE' = 137, 'SYSTEM FAILPOINT' = 138, 'SYSTEM' = 139, 'dictGet' = 140, 'displaySecretsInShowAndSelect' = 141, 'addressToLine' = 142, 'addressToLineWithInlines' = 143, 'addressToSymbol' = 144, 'demangle' = 145, 'INTROSPECTION' = 146, 'FILE' = 147, 'URL' = 148, 'REMOTE' = 149, 'MONGO' = 150, 'MEILISEARCH' = 151, 'MYSQL' = 152, 'POSTGRES' = 153, 'SQLITE' = 154, 'ODBC' = 155, 'JDBC' = 156, 'HDFS' = 157, 'S3' = 158, 'HIVE' = 159, 'SOURCES' = 160, 'CLUSTER' = 161, 'ALL' = 162, 'NONE' = 163)) + `parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'USE NAMED COLLECTION' = 96, 'NAMED COLLECTION CONTROL' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD SYMBOLS' = 111, 'SYSTEM RELOAD DICTIONARY' = 112, 'SYSTEM RELOAD MODEL' = 113, 'SYSTEM RELOAD FUNCTION' = 114, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 115, 'SYSTEM RELOAD' = 116, 'SYSTEM RESTART DISK' = 117, 'SYSTEM MERGES' = 118, 'SYSTEM TTL MERGES' = 119, 'SYSTEM FETCHES' = 120, 'SYSTEM MOVES' = 121, 'SYSTEM DISTRIBUTED SENDS' = 122, 'SYSTEM REPLICATED SENDS' = 123, 'SYSTEM SENDS' = 124, 'SYSTEM REPLICATION QUEUES' = 125, 'SYSTEM DROP REPLICA' = 126, 'SYSTEM SYNC REPLICA' = 127, 'SYSTEM RESTART REPLICA' = 128, 'SYSTEM RESTORE REPLICA' = 129, 'SYSTEM WAIT LOADING PARTS' = 130, 'SYSTEM SYNC DATABASE REPLICA' = 131, 'SYSTEM SYNC TRANSACTION LOG' = 132, 'SYSTEM SYNC FILE CACHE' = 133, 'SYSTEM FLUSH DISTRIBUTED' = 134, 'SYSTEM FLUSH LOGS' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM' = 140, 'dictGet' = 141, 'displaySecretsInShowAndSelect' = 142, 'addressToLine' = 143, 'addressToLineWithInlines' = 144, 'addressToSymbol' = 145, 'demangle' = 146, 'INTROSPECTION' = 147, 'FILE' = 148, 'URL' = 149, 'REMOTE' = 150, 'MONGO' = 151, 'MEILISEARCH' = 152, 'MYSQL' = 153, 'POSTGRES' = 154, 'SQLITE' = 155, 'ODBC' = 156, 'JDBC' = 157, 'HDFS' = 158, 'S3' = 159, 'HIVE' = 160, 'SOURCES' = 161, 'CLUSTER' = 162, 'ALL' = 163, 'NONE' = 164)) ) ENGINE = SystemPrivileges COMMENT 'SYSTEM TABLE is built on the fly.' From 6c48aba69392b68c08a4105d6c5ebd9dbf9392c1 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 8 Jun 2023 20:30:34 +0200 Subject: [PATCH 036/522] Fix tests --- tests/integration/helpers/0_common_instance_users.xml | 6 +----- .../test_dictionaries_mysql/configs/users.xml | 1 + tests/integration/test_dictionaries_mysql/test.py | 3 ++- .../test_dictionaries_postgresql/configs/users.xml | 10 ++++++++++ .../test_mysql_database_engine/configs/users.xml | 9 +++++++++ tests/integration/test_mysql_database_engine/test.py | 1 + .../configs/users.d/0a_users_no_default_access.xml | 9 +++++++++ .../test_postgresql_database_engine/configs/users.xml | 9 +++++++++ .../test_postgresql_database_engine/test.py | 2 +- .../configs/users.xml | 7 +++++++ .../test_redirect_url_storage/configs/users.xml | 9 +++++++++ tests/integration/test_redirect_url_storage/test.py | 1 + .../integration/test_storage_mongodb/configs/users.xml | 9 +++++++++ tests/integration/test_storage_mongodb/test.py | 1 + tests/integration/test_storage_mysql/configs/users.xml | 1 + tests/integration/test_storage_mysql/test.py | 1 + .../test_storage_postgresql/configs/users.xml | 9 +++++++++ tests/integration/test_storage_postgresql/test.py | 2 +- tests/integration/test_storage_s3/configs/users.xml | 9 +++++++++ tests/integration/test_storage_s3/test.py | 2 +- .../test_table_function_mongodb/configs/users.xml | 9 +++++++++ tests/integration/test_table_function_mongodb/test.py | 1 + 22 files changed, 102 insertions(+), 9 deletions(-) create mode 100644 tests/integration/test_dictionaries_postgresql/configs/users.xml create mode 100644 tests/integration/test_mysql_database_engine/configs/users.xml create mode 100644 tests/integration/test_named_collections/configs/users.d/0a_users_no_default_access.xml create mode 100644 tests/integration/test_postgresql_database_engine/configs/users.xml create mode 100644 tests/integration/test_redirect_url_storage/configs/users.xml create mode 100644 tests/integration/test_storage_mongodb/configs/users.xml create mode 100644 tests/integration/test_storage_postgresql/configs/users.xml create mode 100644 tests/integration/test_storage_s3/configs/users.xml create mode 100644 tests/integration/test_table_function_mongodb/configs/users.xml diff --git a/tests/integration/helpers/0_common_instance_users.xml b/tests/integration/helpers/0_common_instance_users.xml index 6aae12400fd..3399ef5915a 100644 --- a/tests/integration/helpers/0_common_instance_users.xml +++ b/tests/integration/helpers/0_common_instance_users.xml @@ -1,11 +1,7 @@ - - GRANT ACCESS MANAGEMENT ON *.* WITH GRANT OPTION - GRANT ALL ON *.* WITH GRANT OPTION - GRANT USE NAMED COLLECTION ON * WITH GRANT OPTION - + 1 diff --git a/tests/integration/test_dictionaries_mysql/configs/users.xml b/tests/integration/test_dictionaries_mysql/configs/users.xml index 4555a2ed494..70c7d3bc2c1 100644 --- a/tests/integration/test_dictionaries_mysql/configs/users.xml +++ b/tests/integration/test_dictionaries_mysql/configs/users.xml @@ -12,6 +12,7 @@ default default + 1 diff --git a/tests/integration/test_dictionaries_mysql/test.py b/tests/integration/test_dictionaries_mysql/test.py index a12139a0bea..8252a2fd514 100644 --- a/tests/integration/test_dictionaries_mysql/test.py +++ b/tests/integration/test_dictionaries_mysql/test.py @@ -8,9 +8,10 @@ import logging DICTS = ["configs/dictionaries/mysql_dict1.xml", "configs/dictionaries/mysql_dict2.xml"] CONFIG_FILES = ["configs/remote_servers.xml", "configs/named_collections.xml"] +USER_CONFIGS = ["configs/users.xml"] cluster = ClickHouseCluster(__file__) instance = cluster.add_instance( - "instance", main_configs=CONFIG_FILES, with_mysql=True, dictionaries=DICTS + "instance", main_configs=CONFIG_FILES, user_configs=USER_CONFIGS, with_mysql=True, dictionaries=DICTS ) create_table_mysql_template = """ diff --git a/tests/integration/test_dictionaries_postgresql/configs/users.xml b/tests/integration/test_dictionaries_postgresql/configs/users.xml new file mode 100644 index 00000000000..beb08eb6ed4 --- /dev/null +++ b/tests/integration/test_dictionaries_postgresql/configs/users.xml @@ -0,0 +1,10 @@ + + + + + default + default + 1 + + + diff --git a/tests/integration/test_mysql_database_engine/configs/users.xml b/tests/integration/test_mysql_database_engine/configs/users.xml new file mode 100644 index 00000000000..4b6ba057ecb --- /dev/null +++ b/tests/integration/test_mysql_database_engine/configs/users.xml @@ -0,0 +1,9 @@ + + + + + default + 1 + + + diff --git a/tests/integration/test_mysql_database_engine/test.py b/tests/integration/test_mysql_database_engine/test.py index 52a7b319551..18dde5307fd 100644 --- a/tests/integration/test_mysql_database_engine/test.py +++ b/tests/integration/test_mysql_database_engine/test.py @@ -12,6 +12,7 @@ cluster = ClickHouseCluster(__file__) clickhouse_node = cluster.add_instance( "node1", main_configs=["configs/remote_servers.xml", "configs/named_collections.xml"], + user_configs=["configs/users.xml"], with_mysql=True, stay_alive=True, ) diff --git a/tests/integration/test_named_collections/configs/users.d/0a_users_no_default_access.xml b/tests/integration/test_named_collections/configs/users.d/0a_users_no_default_access.xml new file mode 100644 index 00000000000..b8f38f04ca9 --- /dev/null +++ b/tests/integration/test_named_collections/configs/users.d/0a_users_no_default_access.xml @@ -0,0 +1,9 @@ + + + + + default + default + + + diff --git a/tests/integration/test_postgresql_database_engine/configs/users.xml b/tests/integration/test_postgresql_database_engine/configs/users.xml new file mode 100644 index 00000000000..4b6ba057ecb --- /dev/null +++ b/tests/integration/test_postgresql_database_engine/configs/users.xml @@ -0,0 +1,9 @@ + + + + + default + 1 + + + diff --git a/tests/integration/test_postgresql_database_engine/test.py b/tests/integration/test_postgresql_database_engine/test.py index d9f06f0295b..68e6f444f73 100644 --- a/tests/integration/test_postgresql_database_engine/test.py +++ b/tests/integration/test_postgresql_database_engine/test.py @@ -8,7 +8,7 @@ from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance( - "node1", main_configs=["configs/named_collections.xml"], with_postgres=True + "node1", main_configs=["configs/named_collections.xml"], user_configs=["configs/users.xml"], with_postgres=True ) postgres_table_template = """ diff --git a/tests/integration/test_postgresql_replica_database_engine_2/configs/users.xml b/tests/integration/test_postgresql_replica_database_engine_2/configs/users.xml index 26ea20e012f..e0c51962193 100644 --- a/tests/integration/test_postgresql_replica_database_engine_2/configs/users.xml +++ b/tests/integration/test_postgresql_replica_database_engine_2/configs/users.xml @@ -4,4 +4,11 @@ 1 + + + + default + 1 + + diff --git a/tests/integration/test_redirect_url_storage/configs/users.xml b/tests/integration/test_redirect_url_storage/configs/users.xml new file mode 100644 index 00000000000..4b6ba057ecb --- /dev/null +++ b/tests/integration/test_redirect_url_storage/configs/users.xml @@ -0,0 +1,9 @@ + + + + + default + 1 + + + diff --git a/tests/integration/test_redirect_url_storage/test.py b/tests/integration/test_redirect_url_storage/test.py index b2178655444..225a34c9109 100644 --- a/tests/integration/test_redirect_url_storage/test.py +++ b/tests/integration/test_redirect_url_storage/test.py @@ -9,6 +9,7 @@ cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance( "node1", main_configs=["configs/named_collections.xml"], + user_configs=["configs/user.xml"], with_zookeeper=False, with_hdfs=True, ) diff --git a/tests/integration/test_storage_mongodb/configs/users.xml b/tests/integration/test_storage_mongodb/configs/users.xml new file mode 100644 index 00000000000..4b6ba057ecb --- /dev/null +++ b/tests/integration/test_storage_mongodb/configs/users.xml @@ -0,0 +1,9 @@ + + + + + default + 1 + + + diff --git a/tests/integration/test_storage_mongodb/test.py b/tests/integration/test_storage_mongodb/test.py index 6ba5520704d..174ad908d60 100644 --- a/tests/integration/test_storage_mongodb/test.py +++ b/tests/integration/test_storage_mongodb/test.py @@ -17,6 +17,7 @@ def started_cluster(request): "configs_secure/config.d/ssl_conf.xml", "configs/named_collections.xml", ], + user_configs=["configs/users.xml"], with_mongo=True, with_mongo_secure=request.param, ) diff --git a/tests/integration/test_storage_mysql/configs/users.xml b/tests/integration/test_storage_mysql/configs/users.xml index d030ccb0e72..a11985dd113 100644 --- a/tests/integration/test_storage_mysql/configs/users.xml +++ b/tests/integration/test_storage_mysql/configs/users.xml @@ -12,6 +12,7 @@ ::/0 default + 1 diff --git a/tests/integration/test_storage_mysql/test.py b/tests/integration/test_storage_mysql/test.py index 49629575ec7..3e3132949e7 100644 --- a/tests/integration/test_storage_mysql/test.py +++ b/tests/integration/test_storage_mysql/test.py @@ -13,6 +13,7 @@ cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance( "node1", main_configs=["configs/remote_servers.xml", "configs/named_collections.xml"], + user_configs=["configs/users.xml"], with_mysql=True, ) node2 = cluster.add_instance( diff --git a/tests/integration/test_storage_postgresql/configs/users.xml b/tests/integration/test_storage_postgresql/configs/users.xml new file mode 100644 index 00000000000..4b6ba057ecb --- /dev/null +++ b/tests/integration/test_storage_postgresql/configs/users.xml @@ -0,0 +1,9 @@ + + + + + default + 1 + + + diff --git a/tests/integration/test_storage_postgresql/test.py b/tests/integration/test_storage_postgresql/test.py index d60a90ed7ce..2ce1bac3cff 100644 --- a/tests/integration/test_storage_postgresql/test.py +++ b/tests/integration/test_storage_postgresql/test.py @@ -7,7 +7,7 @@ from helpers.postgres_utility import get_postgres_conn cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance( - "node1", main_configs=["configs/named_collections.xml"], with_postgres=True + "node1", main_configs=["configs/named_collections.xml"], user_configs=["configs/users.xml"], with_postgres=True ) node2 = cluster.add_instance( "node2", diff --git a/tests/integration/test_storage_s3/configs/users.xml b/tests/integration/test_storage_s3/configs/users.xml new file mode 100644 index 00000000000..4b6ba057ecb --- /dev/null +++ b/tests/integration/test_storage_s3/configs/users.xml @@ -0,0 +1,9 @@ + + + + + default + 1 + + + diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index f1cbd3366b4..75473f3c406 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -55,7 +55,7 @@ def started_cluster(): "configs/named_collections.xml", "configs/schema_cache.xml", ], - user_configs=["configs/access.xml"], + user_configs=["configs/access.xml", "configs/users.xml"], ) cluster.add_instance( "s3_max_redirects", diff --git a/tests/integration/test_table_function_mongodb/configs/users.xml b/tests/integration/test_table_function_mongodb/configs/users.xml new file mode 100644 index 00000000000..4b6ba057ecb --- /dev/null +++ b/tests/integration/test_table_function_mongodb/configs/users.xml @@ -0,0 +1,9 @@ + + + + + default + 1 + + + diff --git a/tests/integration/test_table_function_mongodb/test.py b/tests/integration/test_table_function_mongodb/test.py index e0ad71b0079..3b6ace9d11b 100644 --- a/tests/integration/test_table_function_mongodb/test.py +++ b/tests/integration/test_table_function_mongodb/test.py @@ -16,6 +16,7 @@ def started_cluster(request): main_configs=[ "configs_secure/config.d/ssl_conf.xml", ], + user_configs=["configs/users.xml"], with_mongo_secure=request.param, ) cluster.start() From 5cf29fbf762e0efc51142afb3396a16414c121fc Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 9 Jun 2023 13:13:33 +0200 Subject: [PATCH 037/522] Fix black check --- tests/integration/test_dictionaries_mysql/test.py | 6 +++++- tests/integration/test_postgresql_database_engine/test.py | 5 ++++- tests/integration/test_storage_postgresql/test.py | 5 ++++- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/tests/integration/test_dictionaries_mysql/test.py b/tests/integration/test_dictionaries_mysql/test.py index 8252a2fd514..ee0d957b8a9 100644 --- a/tests/integration/test_dictionaries_mysql/test.py +++ b/tests/integration/test_dictionaries_mysql/test.py @@ -11,7 +11,11 @@ CONFIG_FILES = ["configs/remote_servers.xml", "configs/named_collections.xml"] USER_CONFIGS = ["configs/users.xml"] cluster = ClickHouseCluster(__file__) instance = cluster.add_instance( - "instance", main_configs=CONFIG_FILES, user_configs=USER_CONFIGS, with_mysql=True, dictionaries=DICTS + "instance", + main_configs=CONFIG_FILES, + user_configs=USER_CONFIGS, + with_mysql=True, + dictionaries=DICTS, ) create_table_mysql_template = """ diff --git a/tests/integration/test_postgresql_database_engine/test.py b/tests/integration/test_postgresql_database_engine/test.py index 68e6f444f73..59a464f9020 100644 --- a/tests/integration/test_postgresql_database_engine/test.py +++ b/tests/integration/test_postgresql_database_engine/test.py @@ -8,7 +8,10 @@ from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance( - "node1", main_configs=["configs/named_collections.xml"], user_configs=["configs/users.xml"], with_postgres=True + "node1", + main_configs=["configs/named_collections.xml"], + user_configs=["configs/users.xml"], + with_postgres=True, ) postgres_table_template = """ diff --git a/tests/integration/test_storage_postgresql/test.py b/tests/integration/test_storage_postgresql/test.py index 2ce1bac3cff..0c8fc597b5c 100644 --- a/tests/integration/test_storage_postgresql/test.py +++ b/tests/integration/test_storage_postgresql/test.py @@ -7,7 +7,10 @@ from helpers.postgres_utility import get_postgres_conn cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance( - "node1", main_configs=["configs/named_collections.xml"], user_configs=["configs/users.xml"], with_postgres=True + "node1", + main_configs=["configs/named_collections.xml"], + user_configs=["configs/users.xml"], + with_postgres=True, ) node2 = cluster.add_instance( "node2", From 7bd1c183ebe535ec3f8799e82d73f9b064c967c8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 10 Jun 2023 13:16:02 +0300 Subject: [PATCH 038/522] Update test.py --- tests/integration/test_attach_table_normalizer/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_attach_table_normalizer/test.py b/tests/integration/test_attach_table_normalizer/test.py index ddbb02bf4ef..ba0068e9c59 100644 --- a/tests/integration/test_attach_table_normalizer/test.py +++ b/tests/integration/test_attach_table_normalizer/test.py @@ -24,7 +24,7 @@ def replace_substring_to_substr(node): "-c", "sed -i 's/substring/substr/g' /var/lib/clickhouse/metadata/default/file.sql", ], - user="root", + user="root" ) From 6c776f4483382afa395bb5929e1b1351468795ec Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 13 Jun 2023 12:40:53 +0200 Subject: [PATCH 039/522] Better --- src/Access/Common/AccessType.h | 14 ++++++------- src/Access/UsersConfigAccessStorage.cpp | 2 +- src/Storages/NamedCollectionsHelpers.cpp | 4 ++-- tests/integration/test_storage_s3/test.py | 25 ++++++++++++++++++++--- 4 files changed, 32 insertions(+), 13 deletions(-) diff --git a/src/Access/Common/AccessType.h b/src/Access/Common/AccessType.h index 3a94a5037b2..16ee5177d66 100644 --- a/src/Access/Common/AccessType.h +++ b/src/Access/Common/AccessType.h @@ -70,7 +70,7 @@ enum class AccessType M(ALTER_FREEZE_PARTITION, "FREEZE PARTITION, UNFREEZE", TABLE, ALTER_TABLE) \ \ M(ALTER_DATABASE_SETTINGS, "ALTER DATABASE SETTING, ALTER MODIFY DATABASE SETTING, MODIFY DATABASE SETTING", DATABASE, ALTER_DATABASE) /* allows to execute ALTER MODIFY SETTING */\ - M(ALTER_NAMED_COLLECTION, "", NAMED_COLLECTION, NAMED_COLLECTION_CONTROL) /* allows to execute ALTER NAMED COLLECTION */\ + M(ALTER_NAMED_COLLECTION, "", NAMED_COLLECTION, NAMED_COLLECTION_ADMIN) /* allows to execute ALTER NAMED COLLECTION */\ \ M(ALTER_TABLE, "", GROUP, ALTER) \ M(ALTER_DATABASE, "", GROUP, ALTER) \ @@ -92,7 +92,7 @@ enum class AccessType M(CREATE_ARBITRARY_TEMPORARY_TABLE, "", GLOBAL, CREATE) /* allows to create and manipulate temporary tables with arbitrary table engine */\ M(CREATE_FUNCTION, "", GLOBAL, CREATE) /* allows to execute CREATE FUNCTION */ \ - M(CREATE_NAMED_COLLECTION, "", NAMED_COLLECTION, NAMED_COLLECTION_CONTROL) /* allows to execute CREATE NAMED COLLECTION */ \ + M(CREATE_NAMED_COLLECTION, "", NAMED_COLLECTION, NAMED_COLLECTION_ADMIN) /* allows to execute CREATE NAMED COLLECTION */ \ M(CREATE, "", GROUP, ALL) /* allows to execute {CREATE|ATTACH} */ \ \ M(DROP_DATABASE, "", DATABASE, DROP) /* allows to execute {DROP|DETACH} DATABASE */\ @@ -101,7 +101,7 @@ enum class AccessType implicitly enabled by the grant DROP_TABLE */\ M(DROP_DICTIONARY, "", DICTIONARY, DROP) /* allows to execute {DROP|DETACH} DICTIONARY */\ M(DROP_FUNCTION, "", GLOBAL, DROP) /* allows to execute DROP FUNCTION */\ - M(DROP_NAMED_COLLECTION, "", NAMED_COLLECTION, NAMED_COLLECTION_CONTROL) /* allows to execute DROP NAMED COLLECTION */\ + M(DROP_NAMED_COLLECTION, "", NAMED_COLLECTION, NAMED_COLLECTION_ADMIN) /* allows to execute DROP NAMED COLLECTION */\ M(DROP, "", GROUP, ALL) /* allows to execute {DROP|DETACH} */\ \ M(UNDROP_TABLE, "", TABLE, ALL) /* allows to execute {UNDROP} TABLE */\ @@ -140,10 +140,10 @@ enum class AccessType M(SHOW_SETTINGS_PROFILES, "SHOW PROFILES, SHOW CREATE SETTINGS PROFILE, SHOW CREATE PROFILE", GLOBAL, SHOW_ACCESS) \ M(SHOW_ACCESS, "", GROUP, ACCESS_MANAGEMENT) \ M(ACCESS_MANAGEMENT, "", GROUP, ALL) \ - M(SHOW_NAMED_COLLECTIONS, "SHOW NAMED COLLECTIONS", NAMED_COLLECTION, NAMED_COLLECTION_CONTROL) \ - M(SHOW_NAMED_COLLECTIONS_SECRETS, "SHOW NAMED COLLECTIONS SECRETS", NAMED_COLLECTION, NAMED_COLLECTION_CONTROL) \ - M(USE_NAMED_COLLECTION, "USE NAMED COLLECTION", NAMED_COLLECTION, NAMED_COLLECTION_CONTROL) \ - M(NAMED_COLLECTION_CONTROL, "", NAMED_COLLECTION, ALL) \ + M(SHOW_NAMED_COLLECTIONS, "SHOW NAMED COLLECTIONS", NAMED_COLLECTION, NAMED_COLLECTION_ADMIN) \ + M(SHOW_NAMED_COLLECTIONS_SECRETS, "SHOW NAMED COLLECTIONS SECRETS", NAMED_COLLECTION, NAMED_COLLECTION_ADMIN) \ + M(USE_NAMED_COLLECTION, "NAMED COLLECTION USAGE, NAMED COLLECTION", NAMED_COLLECTION, NAMED_COLLECTION_ADMIN) \ + M(NAMED_COLLECTION_ADMIN, "NAMED COLLECTION CONTROL", NAMED_COLLECTION, ALL) \ \ M(SYSTEM_SHUTDOWN, "SYSTEM KILL, SHUTDOWN", GLOBAL, SYSTEM) \ M(SYSTEM_DROP_DNS_CACHE, "SYSTEM DROP DNS, DROP DNS CACHE, DROP DNS", GLOBAL, SYSTEM_DROP_CACHE) \ diff --git a/src/Access/UsersConfigAccessStorage.cpp b/src/Access/UsersConfigAccessStorage.cpp index 187258d0fcd..15765045c97 100644 --- a/src/Access/UsersConfigAccessStorage.cpp +++ b/src/Access/UsersConfigAccessStorage.cpp @@ -328,7 +328,7 @@ namespace if (!named_collection_control) { - user->access.revoke(AccessType::NAMED_COLLECTION_CONTROL); + user->access.revoke(AccessType::NAMED_COLLECTION_ADMIN); } if (!show_named_collections_secrets) diff --git a/src/Storages/NamedCollectionsHelpers.cpp b/src/Storages/NamedCollectionsHelpers.cpp index efd5af29f48..29d47e131a6 100644 --- a/src/Storages/NamedCollectionsHelpers.cpp +++ b/src/Storages/NamedCollectionsHelpers.cpp @@ -76,6 +76,8 @@ MutableNamedCollectionPtr tryGetNamedCollectionWithOverrides( if (!collection_name.has_value()) return nullptr; + context->checkAccess(AccessType::USE_NAMED_COLLECTION, *collection_name); + NamedCollectionPtr collection; if (throw_unknown_collection) collection = NamedCollectionFactory::instance().get(*collection_name); @@ -85,8 +87,6 @@ MutableNamedCollectionPtr tryGetNamedCollectionWithOverrides( if (!collection) return nullptr; - context->checkAccess(AccessType::USE_NAMED_COLLECTION, *collection_name); - auto collection_copy = collection->duplicate(); if (asts.size() == 1) diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index 75473f3c406..28117d694d6 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -57,6 +57,16 @@ def started_cluster(): ], user_configs=["configs/access.xml", "configs/users.xml"], ) + cluster.add_instance( + "dummy_without_named_collections", + with_minio=True, + main_configs=[ + "configs/defaultS3.xml", + "configs/named_collections.xml", + "configs/schema_cache.xml", + ], + user_configs=["configs/access.xml"], + ) cluster.add_instance( "s3_max_redirects", with_minio=True, @@ -919,7 +929,7 @@ def test_truncate_table(started_cluster): def test_predefined_connection_configuration(started_cluster): bucket = started_cluster.minio_bucket - instance = started_cluster.instances["dummy"] # type: ClickHouseInstance + instance = started_cluster.instances["dummy_without_named_collections"] # type: ClickHouseInstance name = "test_table" instance.query("CREATE USER user") @@ -944,7 +954,7 @@ def test_predefined_connection_configuration(started_cluster): in error ) - instance.query("GRANT USE NAMED COLLECTION ON s3_conf1 TO user", user="admin") + instance.query("GRANT NAMED COLLECTION ON s3_conf1 TO user", user="admin") instance.query( f"CREATE TABLE {name} (id UInt32) ENGINE = S3(s3_conf1, format='CSV')", user="user", @@ -960,8 +970,17 @@ def test_predefined_connection_configuration(started_cluster): assert result == instance.query("SELECT number FROM numbers(10)") error = instance.query_and_get_error("SELECT * FROM s3(no_collection)") - assert "There is no named collection `no_collection`" in error + assert ( + "To execute this query it's necessary to have grant USE NAMED COLLECTION ON no_collection" + in error + ) error = instance.query_and_get_error("SELECT * FROM s3(no_collection)", user="user") + assert ( + "To execute this query it's necessary to have grant USE NAMED COLLECTION ON no_collection" + in error + ) + instance = started_cluster.instances["dummy"] # has named collection access + error = instance.query_and_get_error("SELECT * FROM s3(no_collection)") assert "There is no named collection `no_collection`" in error From 62b94073a2c79f3f336f62ad359e2789541dbdd7 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 13 Jun 2023 13:32:56 +0200 Subject: [PATCH 040/522] Fix black check --- tests/integration/test_storage_s3/test.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index 28117d694d6..cec92222d4c 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -929,7 +929,9 @@ def test_truncate_table(started_cluster): def test_predefined_connection_configuration(started_cluster): bucket = started_cluster.minio_bucket - instance = started_cluster.instances["dummy_without_named_collections"] # type: ClickHouseInstance + instance = started_cluster.instances[ + "dummy_without_named_collections" + ] # type: ClickHouseInstance name = "test_table" instance.query("CREATE USER user") @@ -979,7 +981,7 @@ def test_predefined_connection_configuration(started_cluster): "To execute this query it's necessary to have grant USE NAMED COLLECTION ON no_collection" in error ) - instance = started_cluster.instances["dummy"] # has named collection access + instance = started_cluster.instances["dummy"] # has named collection access error = instance.query_and_get_error("SELECT * FROM s3(no_collection)") assert "There is no named collection `no_collection`" in error From 188c613c655a918d618ade00ef7f763b1601d4e5 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 13 Jun 2023 14:30:36 +0200 Subject: [PATCH 041/522] Update tests --- .../0_stateless/01271_show_privileges.reference | 14 +++++++------- .../02117_show_create_table_system.reference | 6 +++--- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/queries/0_stateless/01271_show_privileges.reference b/tests/queries/0_stateless/01271_show_privileges.reference index c78c1a540f2..13113aeb194 100644 --- a/tests/queries/0_stateless/01271_show_privileges.reference +++ b/tests/queries/0_stateless/01271_show_privileges.reference @@ -39,7 +39,7 @@ ALTER MOVE PARTITION ['ALTER MOVE PART','MOVE PARTITION','MOVE PART'] TABLE ALTE ALTER FETCH PARTITION ['ALTER FETCH PART','FETCH PARTITION'] TABLE ALTER TABLE ALTER FREEZE PARTITION ['FREEZE PARTITION','UNFREEZE'] TABLE ALTER TABLE ALTER DATABASE SETTINGS ['ALTER DATABASE SETTING','ALTER MODIFY DATABASE SETTING','MODIFY DATABASE SETTING'] DATABASE ALTER DATABASE -ALTER NAMED COLLECTION [] NAMED_COLLECTION NAMED COLLECTION CONTROL +ALTER NAMED COLLECTION [] NAMED_COLLECTION NAMED COLLECTION ADMIN ALTER TABLE [] \N ALTER ALTER DATABASE [] \N ALTER ALTER VIEW REFRESH ['ALTER LIVE VIEW REFRESH','REFRESH VIEW'] VIEW ALTER VIEW @@ -53,14 +53,14 @@ CREATE DICTIONARY [] DICTIONARY CREATE CREATE TEMPORARY TABLE [] GLOBAL CREATE ARBITRARY TEMPORARY TABLE CREATE ARBITRARY TEMPORARY TABLE [] GLOBAL CREATE CREATE FUNCTION [] GLOBAL CREATE -CREATE NAMED COLLECTION [] NAMED_COLLECTION NAMED COLLECTION CONTROL +CREATE NAMED COLLECTION [] NAMED_COLLECTION NAMED COLLECTION ADMIN CREATE [] \N ALL DROP DATABASE [] DATABASE DROP DROP TABLE [] TABLE DROP DROP VIEW [] VIEW DROP DROP DICTIONARY [] DICTIONARY DROP DROP FUNCTION [] GLOBAL DROP -DROP NAMED COLLECTION [] NAMED_COLLECTION NAMED COLLECTION CONTROL +DROP NAMED COLLECTION [] NAMED_COLLECTION NAMED COLLECTION ADMIN DROP [] \N ALL UNDROP TABLE [] TABLE ALL TRUNCATE ['TRUNCATE TABLE'] TABLE ALL @@ -92,10 +92,10 @@ SHOW QUOTAS ['SHOW CREATE QUOTA'] GLOBAL SHOW ACCESS SHOW SETTINGS PROFILES ['SHOW PROFILES','SHOW CREATE SETTINGS PROFILE','SHOW CREATE PROFILE'] GLOBAL SHOW ACCESS SHOW ACCESS [] \N ACCESS MANAGEMENT ACCESS MANAGEMENT [] \N ALL -SHOW NAMED COLLECTIONS ['SHOW NAMED COLLECTIONS'] NAMED_COLLECTION NAMED COLLECTION CONTROL -SHOW NAMED COLLECTIONS SECRETS ['SHOW NAMED COLLECTIONS SECRETS'] NAMED_COLLECTION NAMED COLLECTION CONTROL -USE NAMED COLLECTION ['USE NAMED COLLECTION'] NAMED_COLLECTION NAMED COLLECTION CONTROL -NAMED COLLECTION CONTROL [] NAMED_COLLECTION ALL +SHOW NAMED COLLECTIONS ['SHOW NAMED COLLECTIONS'] NAMED_COLLECTION NAMED COLLECTION ADMIN +SHOW NAMED COLLECTIONS SECRETS ['SHOW NAMED COLLECTIONS SECRETS'] NAMED_COLLECTION NAMED COLLECTION ADMIN +USE NAMED COLLECTION ['NAMED COLLECTION USAGE','NAMED COLLECTION'] NAMED_COLLECTION NAMED COLLECTION ADMIN +NAMED COLLECTION ADMIN ['NAMED COLLECTION CONTROL'] NAMED_COLLECTION ALL SYSTEM SHUTDOWN ['SYSTEM KILL','SHUTDOWN'] GLOBAL SYSTEM SYSTEM DROP DNS CACHE ['SYSTEM DROP DNS','DROP DNS CACHE','DROP DNS'] GLOBAL SYSTEM DROP CACHE SYSTEM DROP MARK CACHE ['SYSTEM DROP MARK','DROP MARK CACHE','DROP MARKS'] GLOBAL SYSTEM DROP CACHE diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index a6db15d6bbf..0e71a5ed024 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -297,7 +297,7 @@ CREATE TABLE system.grants ( `user_name` Nullable(String), `role_name` Nullable(String), - `access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'USE NAMED COLLECTION' = 96, 'NAMED COLLECTION CONTROL' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD SYMBOLS' = 111, 'SYSTEM RELOAD DICTIONARY' = 112, 'SYSTEM RELOAD MODEL' = 113, 'SYSTEM RELOAD FUNCTION' = 114, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 115, 'SYSTEM RELOAD' = 116, 'SYSTEM RESTART DISK' = 117, 'SYSTEM MERGES' = 118, 'SYSTEM TTL MERGES' = 119, 'SYSTEM FETCHES' = 120, 'SYSTEM MOVES' = 121, 'SYSTEM DISTRIBUTED SENDS' = 122, 'SYSTEM REPLICATED SENDS' = 123, 'SYSTEM SENDS' = 124, 'SYSTEM REPLICATION QUEUES' = 125, 'SYSTEM DROP REPLICA' = 126, 'SYSTEM SYNC REPLICA' = 127, 'SYSTEM RESTART REPLICA' = 128, 'SYSTEM RESTORE REPLICA' = 129, 'SYSTEM WAIT LOADING PARTS' = 130, 'SYSTEM SYNC DATABASE REPLICA' = 131, 'SYSTEM SYNC TRANSACTION LOG' = 132, 'SYSTEM SYNC FILE CACHE' = 133, 'SYSTEM FLUSH DISTRIBUTED' = 134, 'SYSTEM FLUSH LOGS' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM' = 140, 'dictGet' = 141, 'displaySecretsInShowAndSelect' = 142, 'addressToLine' = 143, 'addressToLineWithInlines' = 144, 'addressToSymbol' = 145, 'demangle' = 146, 'INTROSPECTION' = 147, 'FILE' = 148, 'URL' = 149, 'REMOTE' = 150, 'MONGO' = 151, 'MEILISEARCH' = 152, 'MYSQL' = 153, 'POSTGRES' = 154, 'SQLITE' = 155, 'ODBC' = 156, 'JDBC' = 157, 'HDFS' = 158, 'S3' = 159, 'HIVE' = 160, 'AZURE' = 161, 'SOURCES' = 162, 'CLUSTER' = 163, 'ALL' = 164, 'NONE' = 165), + `access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'USE NAMED COLLECTION' = 96, 'NAMED COLLECTION ADMIN' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD SYMBOLS' = 111, 'SYSTEM RELOAD DICTIONARY' = 112, 'SYSTEM RELOAD MODEL' = 113, 'SYSTEM RELOAD FUNCTION' = 114, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 115, 'SYSTEM RELOAD' = 116, 'SYSTEM RESTART DISK' = 117, 'SYSTEM MERGES' = 118, 'SYSTEM TTL MERGES' = 119, 'SYSTEM FETCHES' = 120, 'SYSTEM MOVES' = 121, 'SYSTEM DISTRIBUTED SENDS' = 122, 'SYSTEM REPLICATED SENDS' = 123, 'SYSTEM SENDS' = 124, 'SYSTEM REPLICATION QUEUES' = 125, 'SYSTEM DROP REPLICA' = 126, 'SYSTEM SYNC REPLICA' = 127, 'SYSTEM RESTART REPLICA' = 128, 'SYSTEM RESTORE REPLICA' = 129, 'SYSTEM WAIT LOADING PARTS' = 130, 'SYSTEM SYNC DATABASE REPLICA' = 131, 'SYSTEM SYNC TRANSACTION LOG' = 132, 'SYSTEM SYNC FILE CACHE' = 133, 'SYSTEM FLUSH DISTRIBUTED' = 134, 'SYSTEM FLUSH LOGS' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM' = 140, 'dictGet' = 141, 'displaySecretsInShowAndSelect' = 142, 'addressToLine' = 143, 'addressToLineWithInlines' = 144, 'addressToSymbol' = 145, 'demangle' = 146, 'INTROSPECTION' = 147, 'FILE' = 148, 'URL' = 149, 'REMOTE' = 150, 'MONGO' = 151, 'MEILISEARCH' = 152, 'MYSQL' = 153, 'POSTGRES' = 154, 'SQLITE' = 155, 'ODBC' = 156, 'JDBC' = 157, 'HDFS' = 158, 'S3' = 159, 'HIVE' = 160, 'AZURE' = 161, 'SOURCES' = 162, 'CLUSTER' = 163, 'ALL' = 164, 'NONE' = 165), `database` Nullable(String), `table` Nullable(String), `column` Nullable(String), @@ -581,10 +581,10 @@ ENGINE = SystemPartsColumns COMMENT 'SYSTEM TABLE is built on the fly.' CREATE TABLE system.privileges ( - `privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'USE NAMED COLLECTION' = 96, 'NAMED COLLECTION CONTROL' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD SYMBOLS' = 111, 'SYSTEM RELOAD DICTIONARY' = 112, 'SYSTEM RELOAD MODEL' = 113, 'SYSTEM RELOAD FUNCTION' = 114, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 115, 'SYSTEM RELOAD' = 116, 'SYSTEM RESTART DISK' = 117, 'SYSTEM MERGES' = 118, 'SYSTEM TTL MERGES' = 119, 'SYSTEM FETCHES' = 120, 'SYSTEM MOVES' = 121, 'SYSTEM DISTRIBUTED SENDS' = 122, 'SYSTEM REPLICATED SENDS' = 123, 'SYSTEM SENDS' = 124, 'SYSTEM REPLICATION QUEUES' = 125, 'SYSTEM DROP REPLICA' = 126, 'SYSTEM SYNC REPLICA' = 127, 'SYSTEM RESTART REPLICA' = 128, 'SYSTEM RESTORE REPLICA' = 129, 'SYSTEM WAIT LOADING PARTS' = 130, 'SYSTEM SYNC DATABASE REPLICA' = 131, 'SYSTEM SYNC TRANSACTION LOG' = 132, 'SYSTEM SYNC FILE CACHE' = 133, 'SYSTEM FLUSH DISTRIBUTED' = 134, 'SYSTEM FLUSH LOGS' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM' = 140, 'dictGet' = 141, 'displaySecretsInShowAndSelect' = 142, 'addressToLine' = 143, 'addressToLineWithInlines' = 144, 'addressToSymbol' = 145, 'demangle' = 146, 'INTROSPECTION' = 147, 'FILE' = 148, 'URL' = 149, 'REMOTE' = 150, 'MONGO' = 151, 'MEILISEARCH' = 152, 'MYSQL' = 153, 'POSTGRES' = 154, 'SQLITE' = 155, 'ODBC' = 156, 'JDBC' = 157, 'HDFS' = 158, 'S3' = 159, 'HIVE' = 160, 'AZURE' = 161, 'SOURCES' = 162, 'CLUSTER' = 163, 'ALL' = 164, 'NONE' = 165), + `privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'USE NAMED COLLECTION' = 96, 'NAMED COLLECTION ADMIN' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD SYMBOLS' = 111, 'SYSTEM RELOAD DICTIONARY' = 112, 'SYSTEM RELOAD MODEL' = 113, 'SYSTEM RELOAD FUNCTION' = 114, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 115, 'SYSTEM RELOAD' = 116, 'SYSTEM RESTART DISK' = 117, 'SYSTEM MERGES' = 118, 'SYSTEM TTL MERGES' = 119, 'SYSTEM FETCHES' = 120, 'SYSTEM MOVES' = 121, 'SYSTEM DISTRIBUTED SENDS' = 122, 'SYSTEM REPLICATED SENDS' = 123, 'SYSTEM SENDS' = 124, 'SYSTEM REPLICATION QUEUES' = 125, 'SYSTEM DROP REPLICA' = 126, 'SYSTEM SYNC REPLICA' = 127, 'SYSTEM RESTART REPLICA' = 128, 'SYSTEM RESTORE REPLICA' = 129, 'SYSTEM WAIT LOADING PARTS' = 130, 'SYSTEM SYNC DATABASE REPLICA' = 131, 'SYSTEM SYNC TRANSACTION LOG' = 132, 'SYSTEM SYNC FILE CACHE' = 133, 'SYSTEM FLUSH DISTRIBUTED' = 134, 'SYSTEM FLUSH LOGS' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM' = 140, 'dictGet' = 141, 'displaySecretsInShowAndSelect' = 142, 'addressToLine' = 143, 'addressToLineWithInlines' = 144, 'addressToSymbol' = 145, 'demangle' = 146, 'INTROSPECTION' = 147, 'FILE' = 148, 'URL' = 149, 'REMOTE' = 150, 'MONGO' = 151, 'MEILISEARCH' = 152, 'MYSQL' = 153, 'POSTGRES' = 154, 'SQLITE' = 155, 'ODBC' = 156, 'JDBC' = 157, 'HDFS' = 158, 'S3' = 159, 'HIVE' = 160, 'AZURE' = 161, 'SOURCES' = 162, 'CLUSTER' = 163, 'ALL' = 164, 'NONE' = 165), `aliases` Array(String), `level` Nullable(Enum8('GLOBAL' = 0, 'DATABASE' = 1, 'TABLE' = 2, 'DICTIONARY' = 3, 'VIEW' = 4, 'COLUMN' = 5, 'NAMED_COLLECTION' = 6)), - `parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'USE NAMED COLLECTION' = 96, 'NAMED COLLECTION CONTROL' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD SYMBOLS' = 111, 'SYSTEM RELOAD DICTIONARY' = 112, 'SYSTEM RELOAD MODEL' = 113, 'SYSTEM RELOAD FUNCTION' = 114, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 115, 'SYSTEM RELOAD' = 116, 'SYSTEM RESTART DISK' = 117, 'SYSTEM MERGES' = 118, 'SYSTEM TTL MERGES' = 119, 'SYSTEM FETCHES' = 120, 'SYSTEM MOVES' = 121, 'SYSTEM DISTRIBUTED SENDS' = 122, 'SYSTEM REPLICATED SENDS' = 123, 'SYSTEM SENDS' = 124, 'SYSTEM REPLICATION QUEUES' = 125, 'SYSTEM DROP REPLICA' = 126, 'SYSTEM SYNC REPLICA' = 127, 'SYSTEM RESTART REPLICA' = 128, 'SYSTEM RESTORE REPLICA' = 129, 'SYSTEM WAIT LOADING PARTS' = 130, 'SYSTEM SYNC DATABASE REPLICA' = 131, 'SYSTEM SYNC TRANSACTION LOG' = 132, 'SYSTEM SYNC FILE CACHE' = 133, 'SYSTEM FLUSH DISTRIBUTED' = 134, 'SYSTEM FLUSH LOGS' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM' = 140, 'dictGet' = 141, 'displaySecretsInShowAndSelect' = 142, 'addressToLine' = 143, 'addressToLineWithInlines' = 144, 'addressToSymbol' = 145, 'demangle' = 146, 'INTROSPECTION' = 147, 'FILE' = 148, 'URL' = 149, 'REMOTE' = 150, 'MONGO' = 151, 'MEILISEARCH' = 152, 'MYSQL' = 153, 'POSTGRES' = 154, 'SQLITE' = 155, 'ODBC' = 156, 'JDBC' = 157, 'HDFS' = 158, 'S3' = 159, 'HIVE' = 160, 'AZURE' = 161, 'SOURCES' = 162, 'CLUSTER' = 163, 'ALL' = 164, 'NONE' = 165)) + `parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'USE NAMED COLLECTION' = 96, 'NAMED COLLECTION ADMIN' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD SYMBOLS' = 111, 'SYSTEM RELOAD DICTIONARY' = 112, 'SYSTEM RELOAD MODEL' = 113, 'SYSTEM RELOAD FUNCTION' = 114, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 115, 'SYSTEM RELOAD' = 116, 'SYSTEM RESTART DISK' = 117, 'SYSTEM MERGES' = 118, 'SYSTEM TTL MERGES' = 119, 'SYSTEM FETCHES' = 120, 'SYSTEM MOVES' = 121, 'SYSTEM DISTRIBUTED SENDS' = 122, 'SYSTEM REPLICATED SENDS' = 123, 'SYSTEM SENDS' = 124, 'SYSTEM REPLICATION QUEUES' = 125, 'SYSTEM DROP REPLICA' = 126, 'SYSTEM SYNC REPLICA' = 127, 'SYSTEM RESTART REPLICA' = 128, 'SYSTEM RESTORE REPLICA' = 129, 'SYSTEM WAIT LOADING PARTS' = 130, 'SYSTEM SYNC DATABASE REPLICA' = 131, 'SYSTEM SYNC TRANSACTION LOG' = 132, 'SYSTEM SYNC FILE CACHE' = 133, 'SYSTEM FLUSH DISTRIBUTED' = 134, 'SYSTEM FLUSH LOGS' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM' = 140, 'dictGet' = 141, 'displaySecretsInShowAndSelect' = 142, 'addressToLine' = 143, 'addressToLineWithInlines' = 144, 'addressToSymbol' = 145, 'demangle' = 146, 'INTROSPECTION' = 147, 'FILE' = 148, 'URL' = 149, 'REMOTE' = 150, 'MONGO' = 151, 'MEILISEARCH' = 152, 'MYSQL' = 153, 'POSTGRES' = 154, 'SQLITE' = 155, 'ODBC' = 156, 'JDBC' = 157, 'HDFS' = 158, 'S3' = 159, 'HIVE' = 160, 'AZURE' = 161, 'SOURCES' = 162, 'CLUSTER' = 163, 'ALL' = 164, 'NONE' = 165)) ) ENGINE = SystemPrivileges COMMENT 'SYSTEM TABLE is built on the fly.' From 9e8ca5f5ae8c4bde3e7aab7854a59c2f1d85e472 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Tue, 13 Jun 2023 13:09:39 +0000 Subject: [PATCH 042/522] Add integration test check with enabled analyzer --- .github/workflows/pull_request.yml | 210 ++++++++++++++++++ tests/ci/integration_test_check.py | 2 + .../helpers/0_common_enable_analyzer.xml | 7 + tests/integration/helpers/cluster.py | 2 + 4 files changed, 221 insertions(+) create mode 100644 tests/integration/helpers/0_common_enable_analyzer.xml diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index afc08f3e637..9a39b1177cf 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -3861,6 +3861,216 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" + IntegrationTestsAnalyzerAsan0: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/integration_tests_asan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Integration tests (asan) + REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM=0 + RUN_BY_HASH_TOTAL=6 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Integration test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + IntegrationTestsAnalyzerAsan1: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/integration_tests_asan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Integration tests (asan) + REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM=1 + RUN_BY_HASH_TOTAL=6 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Integration test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + IntegrationTestsAnalyzerAsan2: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/integration_tests_asan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Integration tests (asan) + REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM=2 + RUN_BY_HASH_TOTAL=6 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Integration test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + IntegrationTestsAnalyzerAsan3: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/integration_tests_asan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Integration tests (asan) + REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM=3 + RUN_BY_HASH_TOTAL=6 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Integration test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + IntegrationTestsAnalyzerAsan4: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/integration_tests_asan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Integration tests (asan) + REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM=4 + RUN_BY_HASH_TOTAL=6 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Integration test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + IntegrationTestsAnalyzerAsan5: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/integration_tests_asan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Integration tests (asan) + REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM=5 + RUN_BY_HASH_TOTAL=6 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Integration test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" IntegrationTestsTsan0: needs: [BuilderDebTsan] runs-on: [self-hosted, stress-tester] diff --git a/tests/ci/integration_test_check.py b/tests/ci/integration_test_check.py index 8ef6244a1c5..523b1cfaab5 100644 --- a/tests/ci/integration_test_check.py +++ b/tests/ci/integration_test_check.py @@ -226,6 +226,8 @@ def main(): download_all_deb_packages(check_name, reports_path, build_path) my_env = get_env_for_runner(build_path, repo_path, result_path, work_path) + if "analyzer" in check_name.lower(): + my_env["USE_NEW_ANALYZER"] = "1" json_path = os.path.join(work_path, "params.json") with open(json_path, "w", encoding="utf-8") as json_params: diff --git a/tests/integration/helpers/0_common_enable_analyzer.xml b/tests/integration/helpers/0_common_enable_analyzer.xml new file mode 100644 index 00000000000..aa374364ef0 --- /dev/null +++ b/tests/integration/helpers/0_common_enable_analyzer.xml @@ -0,0 +1,7 @@ + + + + 1 + + + diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index f57ebf40e54..6d66a539cdc 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -4187,6 +4187,8 @@ class ClickHouseInstance: ) write_embedded_config("0_common_instance_users.xml", users_d_dir) + if os.environ.get('USE_NEW_ANALYZER') is not None: + write_embedded_config("0_common_enable_analyzer.xml", users_d_dir) if len(self.custom_dictionaries_paths): write_embedded_config("0_common_enable_dictionaries.xml", self.config_d_dir) From 5d541332482ae8fc37bacd4f8db340b599779c92 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 13 Jun 2023 13:31:14 +0000 Subject: [PATCH 043/522] Automatic style fix --- tests/integration/helpers/cluster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 6d66a539cdc..8c54a37cf60 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -4187,7 +4187,7 @@ class ClickHouseInstance: ) write_embedded_config("0_common_instance_users.xml", users_d_dir) - if os.environ.get('USE_NEW_ANALYZER') is not None: + if os.environ.get("USE_NEW_ANALYZER") is not None: write_embedded_config("0_common_enable_analyzer.xml", users_d_dir) if len(self.custom_dictionaries_paths): From ed4455e2915968c664a2498f6d9503f358e6109e Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Tue, 13 Jun 2023 15:44:30 +0200 Subject: [PATCH 044/522] Update tests/integration/helpers/cluster.py From 9652b38a6cd51f8c0f5a65cc70108d126589c793 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Tue, 13 Jun 2023 23:10:03 +0000 Subject: [PATCH 045/522] Fix CHECK_NAME --- .github/workflows/pull_request.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 9a39b1177cf..59beddac8d5 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -3870,7 +3870,7 @@ jobs: cat >> "$GITHUB_ENV" << 'EOF' TEMP_PATH=${{runner.temp}}/integration_tests_asan REPORTS_PATH=${{runner.temp}}/reports_dir - CHECK_NAME=Integration tests (asan) + CHECK_NAME=Integration tests (asan, analyzer) REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse RUN_BY_HASH_NUM=0 RUN_BY_HASH_TOTAL=6 @@ -3905,7 +3905,7 @@ jobs: cat >> "$GITHUB_ENV" << 'EOF' TEMP_PATH=${{runner.temp}}/integration_tests_asan REPORTS_PATH=${{runner.temp}}/reports_dir - CHECK_NAME=Integration tests (asan) + CHECK_NAME=Integration tests (asan, analyzer) REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse RUN_BY_HASH_NUM=1 RUN_BY_HASH_TOTAL=6 @@ -3940,7 +3940,7 @@ jobs: cat >> "$GITHUB_ENV" << 'EOF' TEMP_PATH=${{runner.temp}}/integration_tests_asan REPORTS_PATH=${{runner.temp}}/reports_dir - CHECK_NAME=Integration tests (asan) + CHECK_NAME=Integration tests (asan, analyzer) REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse RUN_BY_HASH_NUM=2 RUN_BY_HASH_TOTAL=6 @@ -3975,7 +3975,7 @@ jobs: cat >> "$GITHUB_ENV" << 'EOF' TEMP_PATH=${{runner.temp}}/integration_tests_asan REPORTS_PATH=${{runner.temp}}/reports_dir - CHECK_NAME=Integration tests (asan) + CHECK_NAME=Integration tests (asan, analyzer) REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse RUN_BY_HASH_NUM=3 RUN_BY_HASH_TOTAL=6 @@ -4010,7 +4010,7 @@ jobs: cat >> "$GITHUB_ENV" << 'EOF' TEMP_PATH=${{runner.temp}}/integration_tests_asan REPORTS_PATH=${{runner.temp}}/reports_dir - CHECK_NAME=Integration tests (asan) + CHECK_NAME=Integration tests (asan, analyzer) REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse RUN_BY_HASH_NUM=4 RUN_BY_HASH_TOTAL=6 @@ -4045,7 +4045,7 @@ jobs: cat >> "$GITHUB_ENV" << 'EOF' TEMP_PATH=${{runner.temp}}/integration_tests_asan REPORTS_PATH=${{runner.temp}}/reports_dir - CHECK_NAME=Integration tests (asan) + CHECK_NAME=Integration tests (asan, analyzer) REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse RUN_BY_HASH_NUM=5 RUN_BY_HASH_TOTAL=6 From ed318d10353101c76a4493ccd9fa6c239868abd3 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Wed, 14 Jun 2023 10:35:36 +0000 Subject: [PATCH 046/522] Add input_format_csv_ignore_extra_columns setting (prototype) --- src/Core/Settings.h | 1 + src/Formats/FormatFactory.cpp | 1 + src/Formats/FormatSettings.h | 1 + src/Processors/Formats/Impl/CSVRowInputFormat.cpp | 15 ++++++++++++++- tests/queries/0_stateless/00301_csv.reference | 4 ++++ tests/queries/0_stateless/00301_csv.sh | 10 ++++++++++ 6 files changed, 31 insertions(+), 1 deletion(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index bc879b9bdf6..d38f7767252 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -835,6 +835,7 @@ class IColumn; M(Bool, input_format_import_nested_json, false, "Map nested JSON data to nested tables (it works for JSONEachRow format).", 0) \ M(Bool, input_format_defaults_for_omitted_fields, true, "For input data calculate default expressions for omitted fields (it works for JSONEachRow, -WithNames, -WithNamesAndTypes formats).", IMPORTANT) \ M(Bool, input_format_csv_empty_as_default, true, "Treat empty fields in CSV input as default values.", 0) \ + M(Bool, input_format_csv_ignore_extra_columns, false, "", 0) \ M(Bool, input_format_tsv_empty_as_default, false, "Treat empty fields in TSV input as default values.", 0) \ M(Bool, input_format_tsv_enum_as_number, false, "Treat inserted enum values in TSV formats as enum indices.", 0) \ M(Bool, input_format_null_as_default, true, "Initialize null fields with default values if the data type of this field is not nullable and it is supported by the input format", 0) \ diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index c235afae57e..0218d268c51 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -63,6 +63,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.csv.delimiter = settings.format_csv_delimiter; format_settings.csv.tuple_delimiter = settings.format_csv_delimiter; format_settings.csv.empty_as_default = settings.input_format_csv_empty_as_default; + format_settings.csv.ignore_extra_columns = settings.input_format_csv_ignore_extra_columns; format_settings.csv.enum_as_number = settings.input_format_csv_enum_as_number; format_settings.csv.null_representation = settings.format_csv_null_representation; format_settings.csv.arrays_as_nested_csv = settings.input_format_csv_arrays_as_nested_csv; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 787c1a64759..3bc53140fe5 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -128,6 +128,7 @@ struct FormatSettings bool allow_single_quotes = true; bool allow_double_quotes = true; bool empty_as_default = false; + bool ignore_extra_columns = false; bool crlf_end_of_line = false; bool enum_as_number = false; bool arrays_as_nested_csv = false; diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp index ae75240e0ee..0cc5889b732 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp @@ -302,14 +302,27 @@ bool CSVFormatReader::readField( return false; } + auto skip_all = [&]() + { + if (!is_last_file_column || !format_settings.csv.ignore_extra_columns) + { + return; + } + //std::cout << "skip !!!" << std::endl; + buf->position() = find_first_symbols<'\n'>(buf->position(), buf->buffer().end()); + }; if (format_settings.null_as_default && !isNullableOrLowCardinalityNullable(type)) { /// If value is null but type is not nullable then use default value instead. - return SerializationNullable::deserializeTextCSVImpl(column, *buf, format_settings, serialization); + bool res = SerializationNullable::deserializeTextCSVImpl(column, *buf, format_settings, serialization); + skip_all(); + return res; } /// Read the column normally. serialization->deserializeTextCSV(column, *buf, format_settings); + + skip_all(); return true; } diff --git a/tests/queries/0_stateless/00301_csv.reference b/tests/queries/0_stateless/00301_csv.reference index 9863da4b640..61279f3b84a 100644 --- a/tests/queries/0_stateless/00301_csv.reference +++ b/tests/queries/0_stateless/00301_csv.reference @@ -11,3 +11,7 @@ default-eof 1 2019-06-19 2016-01-01 01:02:03 NUL 2016-01-02 01:02:03 Nhello \N \N +Hello world 1 2016-01-01 +Hello world 2 2016-01-02 +Hello world 3 2016-01-03 +Hello world 4 2016-01-04 diff --git a/tests/queries/0_stateless/00301_csv.sh b/tests/queries/0_stateless/00301_csv.sh index b2618343dc0..e99c39a0f6f 100755 --- a/tests/queries/0_stateless/00301_csv.sh +++ b/tests/queries/0_stateless/00301_csv.sh @@ -37,3 +37,13 @@ echo 'NULL, NULL $CLICKHOUSE_CLIENT --query="SELECT * FROM csv ORDER BY s NULLS LAST"; $CLICKHOUSE_CLIENT --query="DROP TABLE csv"; + + +$CLICKHOUSE_CLIENT --query="CREATE TABLE csv (s String, n UInt64 DEFAULT 1, d Date DEFAULT '2019-06-19') ENGINE = Memory"; + +echo 'Hello world, 1, 2016-01-01 +Hello world, 2 ,2016-01-02, +Hello world, 3 ,2016-01-03, 2016-01-13 +Hello world, 4 ,2016-01-04, 2016-01-14, 2016-01-15' | $CLICKHOUSE_CLIENT --input_format_csv_empty_as_default=1 --input_format_csv_ignore_extra_columns=1 --query="INSERT INTO csv FORMAT CSV"; +$CLICKHOUSE_CLIENT --query="SELECT * FROM csv ORDER BY s, n"; +$CLICKHOUSE_CLIENT --query="DROP TABLE csv"; \ No newline at end of file From 2b40734900f121f60ad50e37c2c6fa2f9376e3d5 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Mon, 12 Jun 2023 14:29:16 +0200 Subject: [PATCH 047/522] use const-size tasks in prefetch pool --- .../MergeTree/MergeTreePrefetchedReadPool.cpp | 107 +++++++----------- 1 file changed, 43 insertions(+), 64 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp index 63a205a1a61..f0dd2123ca4 100644 --- a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp @@ -1,18 +1,18 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include -#include -#include namespace ProfileEvents @@ -296,31 +296,12 @@ MergeTreeReadTaskPtr MergeTreePrefetchedReadPool::getTask(size_t thread) return task; } -size_t MergeTreePrefetchedReadPool::getApproxSizeOfGranule(const IMergeTreeDataPart & part) const +size_t getApproximateSizeOfGranule(const IMergeTreeDataPart & part, const Names & columns_to_read) { - const auto & columns = part.getColumns(); - auto all_columns_are_fixed_size = columns.end() == std::find_if( - columns.begin(), columns.end(), - [](const auto & col){ return col.type->haveMaximumSizeOfValue() == false; }); - - if (all_columns_are_fixed_size) - { - size_t approx_size = 0; - for (const auto & col : columns) - approx_size += col.type->getMaximumSizeOfValueInMemory() * fixed_index_granularity; - - if (!index_granularity_bytes) - return approx_size; - - return std::min(index_granularity_bytes, approx_size); - } - - const size_t approx_size = static_cast(std::round(static_cast(part.getBytesOnDisk()) / part.getMarksCount())); - - if (!index_granularity_bytes) - return approx_size; - - return std::min(index_granularity_bytes, approx_size); + ColumnSize columns_size{}; + for (const auto & col_name : columns_to_read) + columns_size.add(part.getColumnSize(col_name)); + return columns_size.data_compressed / part.getMarksCount(); } MergeTreePrefetchedReadPool::PartsInfos MergeTreePrefetchedReadPool::getPartsInfos( @@ -347,7 +328,7 @@ MergeTreePrefetchedReadPool::PartsInfos MergeTreePrefetchedReadPool::getPartsInf for (const auto & range : part.ranges) part_info->sum_marks += range.end - range.begin; - part_info->approx_size_of_mark = getApproxSizeOfGranule(*part_info->data_part); + part_info->approx_size_of_mark = getApproximateSizeOfGranule(*part_info->data_part, column_names); const auto task_columns = getReadTaskColumns( part_reader_info, @@ -357,7 +338,7 @@ MergeTreePrefetchedReadPool::PartsInfos MergeTreePrefetchedReadPool::getPartsInf prewhere_info, actions_settings, reader_settings, - /*with_subcolumns=*/ true); + /* with_subcolumns */ true); part_info->size_predictor = !predict_block_size_bytes ? nullptr @@ -421,10 +402,6 @@ MergeTreePrefetchedReadPool::ThreadsTasks MergeTreePrefetchedReadPool::createThr } size_t min_prefetch_step_marks = 0; - if (settings.filesystem_prefetches_limit && settings.filesystem_prefetches_limit < sum_marks) - { - min_prefetch_step_marks = static_cast(std::round(static_cast(sum_marks) / settings.filesystem_prefetches_limit)); - } for (const auto & part : parts_infos) { @@ -437,12 +414,6 @@ MergeTreePrefetchedReadPool::ThreadsTasks MergeTreePrefetchedReadPool::createThr part->prefetch_step_marks = std::max( 1, static_cast(std::round(static_cast(settings.filesystem_prefetch_step_bytes) / part->approx_size_of_mark))); } - else - { - /// Experimentally derived ratio. - part->prefetch_step_marks = static_cast( - std::round(std::pow(std::max(1, static_cast(std::round(sum_marks / 1000))), double(1.5)))); - } /// This limit is important to avoid spikes of slow aws getObject requests when parallelizing within one file. /// (The default is taken from here https://docs.aws.amazon.com/whitepapers/latest/s3-optimizing-performance-best-practices/use-byte-range-fetches.html). @@ -450,13 +421,13 @@ MergeTreePrefetchedReadPool::ThreadsTasks MergeTreePrefetchedReadPool::createThr && settings.filesystem_prefetch_min_bytes_for_single_read_task && part->approx_size_of_mark < settings.filesystem_prefetch_min_bytes_for_single_read_task) { - - const size_t new_min_prefetch_step_marks = static_cast( + const size_t min_prefetch_step_marks_by_total_cols = static_cast( std::ceil(static_cast(settings.filesystem_prefetch_min_bytes_for_single_read_task) / part->approx_size_of_mark)); + /// At least one task to start working on it right now and another one to prefetch in the meantime. + const size_t new_min_prefetch_step_marks = std::min(min_prefetch_step_marks_by_total_cols, sum_marks / threads / 2); if (min_prefetch_step_marks < new_min_prefetch_step_marks) { - LOG_TEST( - log, "Increasing min prefetch step from {} to {}", min_prefetch_step_marks, new_min_prefetch_step_marks); + LOG_DEBUG(log, "Increasing min prefetch step from {} to {}", min_prefetch_step_marks, new_min_prefetch_step_marks); min_prefetch_step_marks = new_min_prefetch_step_marks; } @@ -464,25 +435,33 @@ MergeTreePrefetchedReadPool::ThreadsTasks MergeTreePrefetchedReadPool::createThr if (part->prefetch_step_marks < min_prefetch_step_marks) { - LOG_TEST( - log, "Increasing prefetch step from {} to {} because of the prefetches limit {}", - part->prefetch_step_marks, min_prefetch_step_marks, settings.filesystem_prefetches_limit); + LOG_DEBUG(log, "Increasing prefetch step from {} to {}", part->prefetch_step_marks, min_prefetch_step_marks); part->prefetch_step_marks = min_prefetch_step_marks; } - LOG_TEST(log, - "Part: {}, sum_marks: {}, approx mark size: {}, prefetch_step_bytes: {}, prefetch_step_marks: {}, (ranges: {})", - part->data_part->name, part->sum_marks, part->approx_size_of_mark, - settings.filesystem_prefetch_step_bytes, part->prefetch_step_marks, toString(part->ranges)); + LOG_DEBUG( + log, + "Part: {}, sum_marks: {}, approx mark size: {}, prefetch_step_bytes: {}, prefetch_step_marks: {}, (ranges: {})", + part->data_part->name, + part->sum_marks, + part->approx_size_of_mark, + settings.filesystem_prefetch_step_bytes, + part->prefetch_step_marks, + toString(part->ranges)); } const size_t min_marks_per_thread = (sum_marks - 1) / threads + 1; LOG_DEBUG( log, - "Sum marks: {}, threads: {}, min_marks_per_thread: {}, result prefetch step marks: {}, prefetches limit: {}, total_size_approx: {}", - sum_marks, threads, min_marks_per_thread, settings.filesystem_prefetch_step_bytes, settings.filesystem_prefetches_limit, total_size_approx); + "Sum marks: {}, threads: {}, min_marks_per_thread: {}, min prefetch step marks: {}, prefetches limit: {}, total_size_approx: {}", + sum_marks, + threads, + min_marks_per_thread, + min_prefetch_step_marks, + settings.filesystem_prefetches_limit, + total_size_approx); size_t allowed_memory_usage = settings.filesystem_prefetch_max_memory_usage; if (!allowed_memory_usage) @@ -492,6 +471,7 @@ MergeTreePrefetchedReadPool::ThreadsTasks MergeTreePrefetchedReadPool::createThr : std::nullopt; ThreadsTasks result_threads_tasks; + size_t total_tasks = 0; for (size_t i = 0, part_idx = 0; i < threads && part_idx < parts_infos.size(); ++i) { auto need_marks = min_marks_per_thread; @@ -606,12 +586,11 @@ MergeTreePrefetchedReadPool::ThreadsTasks MergeTreePrefetchedReadPool::createThr ++priority.value; result_threads_tasks[i].push_back(std::move(read_task)); + ++total_tasks; } } - LOG_TEST( - log, "Result tasks {} for {} threads: {}", - result_threads_tasks.size(), threads, dumpTasks(result_threads_tasks)); + LOG_TEST(log, "Result tasks {} for {} threads: {}", total_tasks, threads, dumpTasks(result_threads_tasks)); return result_threads_tasks; } From e88fc3989534986e78561a967a9263eda7548d3f Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Mon, 12 Jun 2023 14:32:09 +0200 Subject: [PATCH 048/522] cosmetics --- .../IO/AsynchronousBoundedReadBuffer.cpp | 23 +++++++++++-------- .../IO/CachedOnDiskReadBufferFromFile.cpp | 4 ++++ 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/src/Disks/IO/AsynchronousBoundedReadBuffer.cpp b/src/Disks/IO/AsynchronousBoundedReadBuffer.cpp index f9bd68222ae..6651658e156 100644 --- a/src/Disks/IO/AsynchronousBoundedReadBuffer.cpp +++ b/src/Disks/IO/AsynchronousBoundedReadBuffer.cpp @@ -33,6 +33,15 @@ namespace ProfileEvents extern const Event RemoteFSBuffers; } +namespace +{ +size_t chooseBufferSizeForRemoteReading(const DB::ReadSettings & settings, size_t file_size) +{ + /// Buffers used for prefetch or pre-download better to have enough size, but not bigger than the whole file. + return std::min(std::max(settings.prefetch_buffer_size, DBMS_DEFAULT_BUFFER_SIZE), file_size); +} +} + namespace DB { @@ -42,23 +51,17 @@ namespace ErrorCodes extern const int ARGUMENT_OUT_OF_BOUND; } -static size_t chooseBufferSize(const ReadSettings & settings, size_t file_size) -{ - /// Buffers used for prefetch or pre-download better to have enough size, but not bigger than the whole file. - return std::min(std::max(settings.prefetch_buffer_size, DBMS_DEFAULT_BUFFER_SIZE), file_size); -} - AsynchronousBoundedReadBuffer::AsynchronousBoundedReadBuffer( ImplPtr impl_, IAsynchronousReader & reader_, const ReadSettings & settings_, AsyncReadCountersPtr async_read_counters_, FilesystemReadPrefetchesLogPtr prefetches_log_) - : ReadBufferFromFileBase(chooseBufferSize(settings_, impl_->getFileSize()), nullptr, 0) + : ReadBufferFromFileBase(chooseBufferSizeForRemoteReading(settings_, impl_->getFileSize()), nullptr, 0) , impl(std::move(impl_)) , read_settings(settings_) , reader(reader_) - , prefetch_buffer(chooseBufferSize(settings_, impl->getFileSize())) + , prefetch_buffer(chooseBufferSizeForRemoteReading(read_settings, impl->getFileSize())) , query_id(CurrentThread::isInitialized() && CurrentThread::get().getQueryContext() != nullptr ? CurrentThread::getQueryId() : "") , current_reader_id(getRandomASCIIString(8)) , log(&Poco::Logger::get("AsynchronousBoundedReadBuffer")) @@ -111,7 +114,7 @@ void AsynchronousBoundedReadBuffer::prefetch(Priority priority) last_prefetch_info.submit_time = std::chrono::system_clock::now(); last_prefetch_info.priority = priority; - chassert(prefetch_buffer.size() == chooseBufferSize(read_settings, impl->getFileSize())); + chassert(prefetch_buffer.size() == chooseBufferSizeForRemoteReading(read_settings, impl->getFileSize())); prefetch_future = asyncReadInto(prefetch_buffer.data(), prefetch_buffer.size(), priority); ProfileEvents::increment(ProfileEvents::RemoteFSPrefetches); } @@ -190,7 +193,7 @@ bool AsynchronousBoundedReadBuffer::nextImpl() { ProfileEventTimeIncrement watch(ProfileEvents::SynchronousRemoteReadWaitMicroseconds); - chassert(memory.size() == chooseBufferSize(read_settings, impl->getFileSize())); + chassert(memory.size() == chooseBufferSizeForRemoteReading(read_settings, impl->getFileSize())); std::tie(size, offset) = impl->readInto(memory.data(), memory.size(), file_offset_of_buffer_end, bytes_to_ignore); ProfileEvents::increment(ProfileEvents::RemoteFSUnprefetchedReads); diff --git a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp index 6317aba20e9..bfde6d0984c 100644 --- a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp +++ b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp @@ -1085,6 +1085,10 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep() first_offset, file_segments->toString()); + /// Release buffer a little bit earlier. + if (read_until_position == file_offset_of_buffer_end) + implementation_buffer.reset(); + return result; } From 1d33043fe673d5ebc86b68fbbdb563c1cbcdbb0f Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Mon, 12 Jun 2023 16:18:47 +0200 Subject: [PATCH 049/522] changes around buffer sizes --- .../IO/AsynchronousBoundedReadBuffer.cpp | 9 ---- src/Disks/IO/ReadBufferFromRemoteFSGather.cpp | 41 ++++++++++++++----- src/Disks/IO/ReadBufferFromRemoteFSGather.h | 1 + 3 files changed, 31 insertions(+), 20 deletions(-) diff --git a/src/Disks/IO/AsynchronousBoundedReadBuffer.cpp b/src/Disks/IO/AsynchronousBoundedReadBuffer.cpp index 6651658e156..86ee541dcbd 100644 --- a/src/Disks/IO/AsynchronousBoundedReadBuffer.cpp +++ b/src/Disks/IO/AsynchronousBoundedReadBuffer.cpp @@ -33,15 +33,6 @@ namespace ProfileEvents extern const Event RemoteFSBuffers; } -namespace -{ -size_t chooseBufferSizeForRemoteReading(const DB::ReadSettings & settings, size_t file_size) -{ - /// Buffers used for prefetch or pre-download better to have enough size, but not bigger than the whole file. - return std::min(std::max(settings.prefetch_buffer_size, DBMS_DEFAULT_BUFFER_SIZE), file_size); -} -} - namespace DB { diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp index eb9c509e459..537c0cf1be7 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp @@ -2,14 +2,27 @@ #include +#include #include #include -#include +#include #include -#include -#include #include +#include +#include +using namespace DB; + + +namespace +{ +bool withCache(const ReadSettings & settings) +{ + return settings.remote_fs_cache && settings.enable_filesystem_cache + && (!CurrentThread::getQueryId().empty() || settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache + || !settings.avoid_readthrough_cache_outside_query_context); +} +} namespace DB { @@ -18,29 +31,35 @@ namespace ErrorCodes extern const int CANNOT_SEEK_THROUGH_FILE; } +size_t chooseBufferSizeForRemoteReading(const DB::ReadSettings & settings, size_t file_size) +{ + /// Only when cache is used we could download bigger portions of FileSegments than what we actually gonna read within particular task. + if (!withCache(settings)) + return settings.remote_fs_buffer_size; + + /// Buffers used for prefetch and pre-download better to have enough size, but not bigger than the whole file. + return std::min(std::max(settings.remote_fs_buffer_size, DBMS_DEFAULT_BUFFER_SIZE), file_size); +} + ReadBufferFromRemoteFSGather::ReadBufferFromRemoteFSGather( ReadBufferCreator && read_buffer_creator_, const StoredObjects & blobs_to_read_, const ReadSettings & settings_, std::shared_ptr cache_log_, bool use_external_buffer_) - : ReadBufferFromFileBase(use_external_buffer_ ? 0 : settings_.remote_fs_buffer_size, nullptr, 0) + : ReadBufferFromFileBase( + use_external_buffer_ ? 0 : chooseBufferSizeForRemoteReading(settings_, getTotalSize(blobs_to_read_)), nullptr, 0) , settings(settings_) , blobs_to_read(blobs_to_read_) , read_buffer_creator(std::move(read_buffer_creator_)) , cache_log(settings.enable_filesystem_cache_log ? cache_log_ : nullptr) - , query_id(CurrentThread::isInitialized() && CurrentThread::get().getQueryContext() != nullptr ? CurrentThread::getQueryId() : "") + , query_id(CurrentThread::getQueryId()) , use_external_buffer(use_external_buffer_) + , with_cache(withCache(settings)) , log(&Poco::Logger::get("ReadBufferFromRemoteFSGather")) { if (!blobs_to_read.empty()) current_object = blobs_to_read.front(); - - with_cache = settings.remote_fs_cache - && settings.enable_filesystem_cache - && (!query_id.empty() - || settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache - || !settings.avoid_readthrough_cache_outside_query_context); } SeekableReadBufferPtr ReadBufferFromRemoteFSGather::createImplementationBuffer(const StoredObject & object) diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.h b/src/Disks/IO/ReadBufferFromRemoteFSGather.h index 272ed2b3ac1..9bf55ab69ce 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.h +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.h @@ -86,4 +86,5 @@ private: Poco::Logger * log; }; +size_t chooseBufferSizeForRemoteReading(const DB::ReadSettings & settings, size_t file_size); } From 1dddcc94726bfca062da2af1b9880df5fa5e4268 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Mon, 12 Jun 2023 16:19:05 +0200 Subject: [PATCH 050/522] use connection pool --- src/Common/PoolBase.h | 88 +++++++++++++------- src/Disks/ObjectStorages/S3/diskSettings.cpp | 3 + src/IO/HTTPCommon.cpp | 59 ++++++++++--- src/IO/HTTPCommon.h | 16 +++- src/IO/ReadBufferFromS3.cpp | 50 ++++++++++- src/IO/ReadBufferFromS3.h | 6 +- src/IO/S3/PocoHTTPClient.cpp | 40 +++++++-- src/IO/S3/PocoHTTPClient.h | 25 ++++++ src/IO/S3/SessionAwareIOStream.h | 4 + 9 files changed, 239 insertions(+), 52 deletions(-) diff --git a/src/Common/PoolBase.h b/src/Common/PoolBase.h index 8cabb472d8f..5575b56f299 100644 --- a/src/Common/PoolBase.h +++ b/src/Common/PoolBase.h @@ -1,9 +1,11 @@ #pragma once -#include #include -#include +#include +#include +#include #include +#include #include #include @@ -15,14 +17,6 @@ namespace ProfileEvents extern const Event ConnectionPoolIsFullMicroseconds; } -namespace DB -{ - namespace ErrorCodes - { - extern const int LOGICAL_ERROR; - } -} - /** A class from which you can inherit and get a pool of something. Used for database connection pools. * Descendant class must provide a method for creating a new object to place in the pool. */ @@ -35,6 +29,22 @@ public: using ObjectPtr = std::shared_ptr; using Ptr = std::shared_ptr>; + enum class BehaviourOnLimit + { + /** + * Default behaviour - when limit on pool size is reached, callers will wait until object will be returned back in pool. + */ + Wait, + + /** + * If no free objects in pool - allocate a new object, but not store it in pool. + * This behaviour is needed when we simply don't want to waste time waiting or if we cannot guarantee that query could be processed using fixed amount of connections. + * For example, when we read from table on s3, one GetObject request corresponds to the whole FileSystemCache segment. This segments are shared between different + * reading tasks, so in general case connection could be taken from pool by one task and returned back by another one. And these tasks are processed completely independently. + */ + AllocateNewBypassingPool, + }; + private: /** The object with the flag, whether it is currently used. */ @@ -89,37 +99,53 @@ public: Object & operator*() && = delete; const Object & operator*() const && = delete; - Object * operator->() & { return &*data->data.object; } - const Object * operator->() const & { return &*data->data.object; } - Object & operator*() & { return *data->data.object; } - const Object & operator*() const & { return *data->data.object; } + Object * operator->() & { return castToObjectPtr(); } + const Object * operator->() const & { return castToObjectPtr(); } + Object & operator*() & { return *castToObjectPtr(); } + const Object & operator*() const & { return *castToObjectPtr(); } /** * Expire an object to make it reallocated later. */ void expire() { - data->data.is_expired = true; + if (data.index() == 1) + std::get<1>(data)->data.is_expired = true; } - bool isNull() const { return data == nullptr; } - - PoolBase * getPool() const - { - if (!data) - throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Attempt to get pool from uninitialized entry"); - return &data->data.pool; - } + bool isNull() const { return data.index() == 0 ? !std::get<0>(data) : !std::get<1>(data); } private: - std::shared_ptr data; + /** + * Plain object will be stored instead of PoolEntryHelper if fallback was made in get() (see BehaviourOnLimit::AllocateNewBypassingPool). + */ + std::variant> data; - explicit Entry(PooledObject & object) : data(std::make_shared(object)) {} + explicit Entry(ObjectPtr && object) : data(std::move(object)) { } + + explicit Entry(PooledObject & object) : data(std::make_shared(object)) { } + + auto castToObjectPtr() const + { + return std::visit( + [](const auto & ptr) + { + using T = std::decay_t; + if constexpr (std::is_same_v) + return ptr.get(); + else + return ptr->data.object.get(); + }, + data); + } }; virtual ~PoolBase() = default; - /** Allocates the object. Wait for free object in pool for 'timeout'. With 'timeout' < 0, the timeout is infinite. */ + /** Allocates the object. + * If 'behaviour_on_limit' is Wait - wait for free object in pool for 'timeout'. With 'timeout' < 0, the timeout is infinite. + * If 'behaviour_on_limit' is AllocateNewBypassingPool and there is no free object - a new object will be created but not stored in the pool. + */ Entry get(Poco::Timespan::TimeDiff timeout) { std::unique_lock lock(mutex); @@ -150,6 +176,9 @@ public: return Entry(*items.back()); } + if (behaviour_on_limit == BehaviourOnLimit::AllocateNewBypassingPool) + return Entry(allocObject()); + Stopwatch blocked; if (timeout < 0) { @@ -184,6 +213,8 @@ private: /** The maximum size of the pool. */ unsigned max_items; + BehaviourOnLimit behaviour_on_limit; + /** Pool. */ Objects items; @@ -192,11 +223,10 @@ private: std::condition_variable available; protected: - Poco::Logger * log; - PoolBase(unsigned max_items_, Poco::Logger * log_) - : max_items(max_items_), log(log_) + PoolBase(unsigned max_items_, Poco::Logger * log_, BehaviourOnLimit behaviour_on_limit_ = BehaviourOnLimit::Wait) + : max_items(max_items_), behaviour_on_limit(behaviour_on_limit_), log(log_) { items.reserve(max_items); } diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp index 409eb2a3dc3..fe57fb24bbd 100644 --- a/src/Disks/ObjectStorages/S3/diskSettings.cpp +++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp @@ -132,6 +132,9 @@ std::unique_ptr getClient( client_configuration.requestTimeoutMs = config.getUInt(config_prefix + ".request_timeout_ms", 30000); client_configuration.maxConnections = config.getUInt(config_prefix + ".max_connections", 100); client_configuration.endpointOverride = uri.endpoint; + client_configuration.http_keep_alive_timeout_ms = config.getUInt(config_prefix + ".http_keep_alive_timeout_ms", 10000); + client_configuration.http_connection_pool_size = config.getUInt(config_prefix + ".http_connection_pool_size", 1000); + client_configuration.wait_on_pool_size_limit = false; auto proxy_config = getProxyConfiguration(config_prefix, config); if (proxy_config) diff --git a/src/IO/HTTPCommon.cpp b/src/IO/HTTPCommon.cpp index 3ec9b3d0a83..f3e2064c8bf 100644 --- a/src/IO/HTTPCommon.cpp +++ b/src/IO/HTTPCommon.cpp @@ -131,8 +131,12 @@ namespace UInt16 proxy_port_, bool proxy_https_, size_t max_pool_size_, - bool resolve_host_ = true) - : Base(static_cast(max_pool_size_), &Poco::Logger::get("HTTPSessionPool")) + bool resolve_host_, + bool wait_on_pool_size_limit) + : Base( + static_cast(max_pool_size_), + &Poco::Logger::get("HTTPSessionPool"), + wait_on_pool_size_limit ? BehaviourOnLimit::Wait : BehaviourOnLimit::AllocateNewBypassingPool) , host(host_) , port(port_) , https(https_) @@ -155,11 +159,12 @@ namespace String proxy_host; UInt16 proxy_port; bool is_proxy_https; + bool wait_on_pool_size_limit; bool operator ==(const Key & rhs) const { - return std::tie(target_host, target_port, is_target_https, proxy_host, proxy_port, is_proxy_https) - == std::tie(rhs.target_host, rhs.target_port, rhs.is_target_https, rhs.proxy_host, rhs.proxy_port, rhs.is_proxy_https); + return std::tie(target_host, target_port, is_target_https, proxy_host, proxy_port, is_proxy_https, wait_on_pool_size_limit) + == std::tie(rhs.target_host, rhs.target_port, rhs.is_target_https, rhs.proxy_host, rhs.proxy_port, rhs.is_proxy_https, rhs.wait_on_pool_size_limit); } }; @@ -178,6 +183,7 @@ namespace s.update(k.proxy_host); s.update(k.proxy_port); s.update(k.is_proxy_https); + s.update(k.wait_on_pool_size_limit); return s.get64(); } }; @@ -218,14 +224,14 @@ namespace const Poco::URI & proxy_uri, const ConnectionTimeouts & timeouts, size_t max_connections_per_endpoint, - bool resolve_host = true) + bool resolve_host, + bool wait_on_pool_size_limit) { - std::lock_guard lock(mutex); + std::unique_lock lock(mutex); const std::string & host = uri.getHost(); UInt16 port = uri.getPort(); bool https = isHTTPS(uri); - String proxy_host; UInt16 proxy_port = 0; bool proxy_https = false; @@ -236,11 +242,27 @@ namespace proxy_https = isHTTPS(proxy_uri); } - HTTPSessionPool::Key key{host, port, https, proxy_host, proxy_port, proxy_https}; + HTTPSessionPool::Key key{host, port, https, proxy_host, proxy_port, proxy_https, wait_on_pool_size_limit}; auto pool_ptr = endpoints_pool.find(key); if (pool_ptr == endpoints_pool.end()) std::tie(pool_ptr, std::ignore) = endpoints_pool.emplace( - key, std::make_shared(host, port, https, proxy_host, proxy_port, proxy_https, max_connections_per_endpoint, resolve_host)); + key, + std::make_shared( + host, + port, + https, + proxy_host, + proxy_port, + proxy_https, + max_connections_per_endpoint, + resolve_host, + wait_on_pool_size_limit)); + + /// Some routines held session objects until the end of its lifetime. Also this routines may create another sessions in this time frame. + /// If some other session holds `lock` because it waits on another lock inside `pool_ptr->second->get` it isn't possible to create any + /// new session and thus finish routine, return session to the pool and unlock the thread waiting inside `pool_ptr->second->get`. + /// To avoid such a deadlock we unlock `lock` before entering `pool_ptr->second->get`. + lock.unlock(); auto retry_timeout = timeouts.connection_timeout.totalMicroseconds(); auto session = pool_ptr->second->get(retry_timeout); @@ -295,14 +317,25 @@ HTTPSessionPtr makeHTTPSession(const Poco::URI & uri, const ConnectionTimeouts & } -PooledHTTPSessionPtr makePooledHTTPSession(const Poco::URI & uri, const ConnectionTimeouts & timeouts, size_t per_endpoint_pool_size, bool resolve_host) +PooledHTTPSessionPtr makePooledHTTPSession( + const Poco::URI & uri, + const ConnectionTimeouts & timeouts, + size_t per_endpoint_pool_size, + bool resolve_host, + bool wait_on_pool_size_limit) { - return makePooledHTTPSession(uri, {}, timeouts, per_endpoint_pool_size, resolve_host); + return makePooledHTTPSession(uri, {}, timeouts, per_endpoint_pool_size, resolve_host, wait_on_pool_size_limit); } -PooledHTTPSessionPtr makePooledHTTPSession(const Poco::URI & uri, const Poco::URI & proxy_uri, const ConnectionTimeouts & timeouts, size_t per_endpoint_pool_size, bool resolve_host) +PooledHTTPSessionPtr makePooledHTTPSession( + const Poco::URI & uri, + const Poco::URI & proxy_uri, + const ConnectionTimeouts & timeouts, + size_t per_endpoint_pool_size, + bool resolve_host, + bool wait_on_pool_size_limit) { - return HTTPSessionPool::instance().getSession(uri, proxy_uri, timeouts, per_endpoint_pool_size, resolve_host); + return HTTPSessionPool::instance().getSession(uri, proxy_uri, timeouts, per_endpoint_pool_size, resolve_host, wait_on_pool_size_limit); } bool isRedirect(const Poco::Net::HTTPResponse::HTTPStatus status) { return status == Poco::Net::HTTPResponse::HTTP_MOVED_PERMANENTLY || status == Poco::Net::HTTPResponse::HTTP_FOUND || status == Poco::Net::HTTPResponse::HTTP_SEE_OTHER || status == Poco::Net::HTTPResponse::HTTP_TEMPORARY_REDIRECT; } diff --git a/src/IO/HTTPCommon.h b/src/IO/HTTPCommon.h index 3616a33c1c7..db8fc2a2a40 100644 --- a/src/IO/HTTPCommon.h +++ b/src/IO/HTTPCommon.h @@ -61,8 +61,20 @@ void setResponseDefaultHeaders(HTTPServerResponse & response, size_t keep_alive_ HTTPSessionPtr makeHTTPSession(const Poco::URI & uri, const ConnectionTimeouts & timeouts, bool resolve_host = true); /// As previous method creates session, but tooks it from pool, without and with proxy uri. -PooledHTTPSessionPtr makePooledHTTPSession(const Poco::URI & uri, const ConnectionTimeouts & timeouts, size_t per_endpoint_pool_size, bool resolve_host = true); -PooledHTTPSessionPtr makePooledHTTPSession(const Poco::URI & uri, const Poco::URI & proxy_uri, const ConnectionTimeouts & timeouts, size_t per_endpoint_pool_size, bool resolve_host = true); +PooledHTTPSessionPtr makePooledHTTPSession( + const Poco::URI & uri, + const ConnectionTimeouts & timeouts, + size_t per_endpoint_pool_size, + bool resolve_host = true, + bool wait_on_pool_size_limit = true); + +PooledHTTPSessionPtr makePooledHTTPSession( + const Poco::URI & uri, + const Poco::URI & proxy_uri, + const ConnectionTimeouts & timeouts, + size_t per_endpoint_pool_size, + bool resolve_host = true, + bool wait_on_pool_size_limit = true); bool isRedirect(Poco::Net::HTTPResponse::HTTPStatus status); diff --git a/src/IO/ReadBufferFromS3.cpp b/src/IO/ReadBufferFromS3.cpp index d1cb1ec9ab0..364253ba746 100644 --- a/src/IO/ReadBufferFromS3.cpp +++ b/src/IO/ReadBufferFromS3.cpp @@ -31,6 +31,23 @@ namespace ProfileEvents extern const Event RemoteReadThrottlerSleepMicroseconds; } +namespace +{ +void resetSession(Aws::S3::Model::GetObjectResult & read_result) +{ + if (auto * session_aware_stream = dynamic_cast *>(&read_result.GetBody())) + { + auto & session + = static_cast(*static_cast(session_aware_stream->getSession())); + session.reset(); + } + else if (!dynamic_cast *>(&read_result.GetBody())) + { + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Session of unexpected type encountered"); + } +} +} + namespace DB { namespace ErrorCodes @@ -74,7 +91,10 @@ bool ReadBufferFromS3::nextImpl() if (read_until_position) { if (read_until_position == offset) + { + read_all_range_successfully = true; return false; + } if (read_until_position < offset) throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to read beyond right offset ({} > {})", offset, read_until_position - 1); @@ -154,7 +174,10 @@ bool ReadBufferFromS3::nextImpl() } if (!next_result) + { + read_all_range_successfully = true; return false; + } BufferBase::set(impl->buffer().begin(), impl->buffer().size(), impl->offset()); @@ -240,6 +263,8 @@ off_t ReadBufferFromS3::seek(off_t offset_, int whence) if (offset_ == getPosition() && whence == SEEK_SET) return offset_; + read_all_range_successfully = false; + if (impl && restricted_seek) { throw Exception( @@ -312,6 +337,8 @@ void ReadBufferFromS3::setReadUntilPosition(size_t position) { if (position != static_cast(read_until_position)) { + read_all_range_successfully = false; + if (impl) { if (!atEndOfRequestedRangeGuess()) @@ -328,6 +355,8 @@ void ReadBufferFromS3::setReadUntilEnd() { if (read_until_position) { + read_all_range_successfully = false; + read_until_position = 0; if (impl) { @@ -351,8 +380,27 @@ bool ReadBufferFromS3::atEndOfRequestedRangeGuess() return false; } +ReadBufferFromS3::~ReadBufferFromS3() +{ + try + { + if (!read_all_range_successfully && read_result) + /// When we abandon a session with an ongoing GetObject request and there is another one trying to delete the same object this delete + /// operation will hang until GetObject's session idle timeouts. So we have to call `reset()` on GetObject's session session immediately. + resetSession(*read_result); + } + catch (...) + { + tryLogCurrentException(log); + } +} + std::unique_ptr ReadBufferFromS3::initialize() { + if (!read_all_range_successfully && read_result) + resetSession(*read_result); + read_all_range_successfully = false; + /** * If remote_filesystem_read_method = 'threadpool', then for MergeTree family tables * exact byte ranges to read are always passed here. @@ -363,7 +411,7 @@ std::unique_ptr ReadBufferFromS3::initialize() read_result = sendRequest(offset, read_until_position ? std::make_optional(read_until_position - 1) : std::nullopt); size_t buffer_size = use_external_buffer ? 0 : read_settings.remote_fs_buffer_size; - return std::make_unique(read_result.GetBody(), buffer_size); + return std::make_unique(read_result->GetBody(), buffer_size); } Aws::S3::Model::GetObjectResult ReadBufferFromS3::sendRequest(size_t range_begin, std::optional range_end_incl) const diff --git a/src/IO/ReadBufferFromS3.h b/src/IO/ReadBufferFromS3.h index 0f665861a1e..11299aa2c2a 100644 --- a/src/IO/ReadBufferFromS3.h +++ b/src/IO/ReadBufferFromS3.h @@ -41,7 +41,7 @@ private: std::atomic offset = 0; std::atomic read_until_position = 0; - Aws::S3::Model::GetObjectResult read_result; + std::optional read_result; std::unique_ptr impl; Poco::Logger * log = &Poco::Logger::get("ReadBufferFromS3"); @@ -60,6 +60,8 @@ public: bool restricted_seek_ = false, std::optional file_size = std::nullopt); + ~ReadBufferFromS3() override; + bool nextImpl() override; off_t seek(off_t off, int whence) override; @@ -100,6 +102,8 @@ private: /// There is different seek policy for disk seek and for non-disk seek /// (non-disk seek is applied for seekable input formats: orc, arrow, parquet). bool restricted_seek; + + bool read_all_range_successfully = false; }; } diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp index bfda7149343..754b1bfd5b8 100644 --- a/src/IO/S3/PocoHTTPClient.cpp +++ b/src/IO/S3/PocoHTTPClient.cpp @@ -1,3 +1,4 @@ +#include #include "Common/DNSResolver.h" #include "config.h" @@ -138,8 +139,9 @@ PocoHTTPClient::PocoHTTPClient(const PocoHTTPClientConfiguration & client_config , timeouts(ConnectionTimeouts( Poco::Timespan(client_configuration.connectTimeoutMs * 1000), /// connection timeout. Poco::Timespan(client_configuration.requestTimeoutMs * 1000), /// send timeout. - Poco::Timespan(client_configuration.requestTimeoutMs * 1000) /// receive timeout. - )) + Poco::Timespan(client_configuration.requestTimeoutMs * 1000), /// receive timeout. + Poco::Timespan(client_configuration.enableTcpKeepAlive ? client_configuration.tcpKeepAliveIntervalMs * 1000 : 0), + Poco::Timespan(client_configuration.http_keep_alive_timeout_ms * 1000))) /// flag indicating whether keep-alive is enabled is set to each session upon creation , remote_host_filter(client_configuration.remote_host_filter) , s3_max_redirects(client_configuration.s3_max_redirects) , enable_s3_requests_logging(client_configuration.enable_s3_requests_logging) @@ -147,6 +149,8 @@ PocoHTTPClient::PocoHTTPClient(const PocoHTTPClientConfiguration & client_config , get_request_throttler(client_configuration.get_request_throttler) , put_request_throttler(client_configuration.put_request_throttler) , extra_headers(client_configuration.extra_headers) + , http_connection_pool_size(client_configuration.http_connection_pool_size) + , wait_on_pool_size_limit(client_configuration.wait_on_pool_size_limit) { } @@ -254,9 +258,26 @@ void PocoHTTPClient::addMetric(const Aws::Http::HttpRequest & request, S3MetricT void PocoHTTPClient::makeRequestInternal( Aws::Http::HttpRequest & request, std::shared_ptr & response, + Aws::Utils::RateLimits::RateLimiterInterface * readLimiter , + Aws::Utils::RateLimits::RateLimiterInterface * writeLimiter) const +{ + const auto request_configuration = per_request_configuration(request); + if (http_connection_pool_size && request_configuration.proxy_host.empty()) + makeRequestInternalImpl(request, request_configuration, response, readLimiter, writeLimiter); + else + makeRequestInternalImpl(request, request_configuration, response, readLimiter, writeLimiter); +} + +template +void PocoHTTPClient::makeRequestInternalImpl( + Aws::Http::HttpRequest & request, + const ClientConfigurationPerRequest & request_configuration, + std::shared_ptr & response, Aws::Utils::RateLimits::RateLimiterInterface *, Aws::Utils::RateLimits::RateLimiterInterface *) const { + using SessionPtr = std::conditional_t; + Poco::Logger * log = &Poco::Logger::get("AWSClient"); auto uri = request.GetUri().GetURIString(); @@ -303,8 +324,7 @@ void PocoHTTPClient::makeRequestInternal( for (unsigned int attempt = 0; attempt <= s3_max_redirects; ++attempt) { Poco::URI target_uri(uri); - HTTPSessionPtr session; - auto request_configuration = per_request_configuration(request); + SessionPtr session; if (!request_configuration.proxy_host.empty()) { @@ -313,7 +333,11 @@ void PocoHTTPClient::makeRequestInternal( /// Reverse proxy can replace host header with resolved ip address instead of host name. /// This can lead to request signature difference on S3 side. - session = makeHTTPSession(target_uri, timeouts, /* resolve_host = */ false); + if constexpr (pooled) + session = makePooledHTTPSession( + target_uri, timeouts, http_connection_pool_size, /* resolve_host = */ true, wait_on_pool_size_limit); + else + session = makeHTTPSession(target_uri, timeouts, /* resolve_host = */ false); bool use_tunnel = request_configuration.proxy_scheme == Aws::Http::Scheme::HTTP && target_uri.getScheme() == "https"; session->setProxy( @@ -325,7 +349,11 @@ void PocoHTTPClient::makeRequestInternal( } else { - session = makeHTTPSession(target_uri, timeouts, /* resolve_host = */ true); + if constexpr (pooled) + session = makePooledHTTPSession( + target_uri, timeouts, http_connection_pool_size, /* resolve_host = */ true, wait_on_pool_size_limit); + else + session = makeHTTPSession(target_uri, timeouts, /* resolve_host = */ false); } /// In case of error this address will be written to logs diff --git a/src/IO/S3/PocoHTTPClient.h b/src/IO/S3/PocoHTTPClient.h index 762178a9365..92d3d5c5747 100644 --- a/src/IO/S3/PocoHTTPClient.h +++ b/src/IO/S3/PocoHTTPClient.h @@ -53,6 +53,13 @@ struct PocoHTTPClientConfiguration : public Aws::Client::ClientConfiguration ThrottlerPtr put_request_throttler; HTTPHeaderEntries extra_headers; + /// Not a client parameter in terms of HTTP and we won't send it to the server. Used internally to determine when connection have to be re-established. + uint32_t http_keep_alive_timeout_ms = 0; + /// Zero means pooling will not be used. + size_t http_connection_pool_size = 0; + /// See PoolBase::BehaviourOnLimit + bool wait_on_pool_size_limit = true; + void updateSchemeAndRegion(); std::function error_report; @@ -90,6 +97,12 @@ public: ); } + void SetResponseBody(Aws::IStream & incoming_stream, PooledHTTPSessionPtr & session_) /// NOLINT + { + body_stream = Aws::Utils::Stream::ResponseStream( + Aws::New>("http result streambuf", session_, incoming_stream.rdbuf())); + } + void SetResponseBody(std::string & response_body) /// NOLINT { auto stream = Aws::New("http result buf", response_body); // STYLE_CHECK_ALLOW_STD_STRING_STREAM @@ -149,6 +162,15 @@ private: EnumSize, }; + template + void makeRequestInternalImpl( + Aws::Http::HttpRequest & request, + const ClientConfigurationPerRequest & per_request_configuration, + std::shared_ptr & response, + Aws::Utils::RateLimits::RateLimiterInterface * readLimiter, + Aws::Utils::RateLimits::RateLimiterInterface * writeLimiter) const; + +protected: static S3MetricKind getMetricKind(const Aws::Http::HttpRequest & request); void addMetric(const Aws::Http::HttpRequest & request, S3MetricType type, ProfileEvents::Count amount = 1) const; @@ -170,6 +192,9 @@ private: ThrottlerPtr put_request_throttler; const HTTPHeaderEntries extra_headers; + + size_t http_connection_pool_size = 0; + bool wait_on_pool_size_limit = true; }; } diff --git a/src/IO/S3/SessionAwareIOStream.h b/src/IO/S3/SessionAwareIOStream.h index 1640accb6fa..f7e42f99f51 100644 --- a/src/IO/S3/SessionAwareIOStream.h +++ b/src/IO/S3/SessionAwareIOStream.h @@ -18,6 +18,10 @@ public: { } + Session & getSession() { return session; } + + const Session & getSession() const { return session; } + private: /// Poco HTTP session is holder of response stream. Session session; From c8cbc9f8ce36fa49a0785c7f9792c6cf154e06da Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Mon, 12 Jun 2023 16:19:14 +0200 Subject: [PATCH 051/522] fix test --- tests/integration/test_merge_tree_s3/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_merge_tree_s3/test.py b/tests/integration/test_merge_tree_s3/test.py index 2ccd517923a..22805eb6e94 100644 --- a/tests/integration/test_merge_tree_s3/test.py +++ b/tests/integration/test_merge_tree_s3/test.py @@ -923,7 +923,7 @@ def test_merge_canceled_by_s3_errors_when_move(cluster, broken_s3, node_name): @pytest.mark.parametrize("node_name", ["node"]) @pytest.mark.parametrize( - "in_flight_memory", [(10, 245918115), (5, 156786752), (1, 106426187)] + "in_flight_memory", [(10, 288044299), (5, 193557290), (1, 128348733)] ) def test_s3_engine_heavy_write_check_mem( cluster, broken_s3, node_name, in_flight_memory From 8073e0bad1600746f4682f3ca41076bf15e71f50 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 14 Jun 2023 13:45:53 +0200 Subject: [PATCH 052/522] Fix tests --- .../test_mysql_database_engine/configs/user.xml | 10 ++++++++++ tests/integration/test_s3_cluster/configs/users.xml | 9 +++++++++ tests/integration/test_s3_cluster/test.py | 1 + .../test_storage_delta/configs/users.d/users.xml | 9 +++++++++ tests/integration/test_storage_delta/test.py | 1 + tests/integration/test_storage_hudi/test.py | 1 + .../test_storage_iceberg/configs/users.d/users.xml | 9 +++++++++ tests/integration/test_storage_iceberg/test.py | 1 + tests/integration/test_storage_kafka/configs/users.xml | 7 +++++++ tests/integration/test_storage_postgresql/test.py | 2 +- .../test_storage_rabbitmq/configs/users.xml | 7 +++++++ 11 files changed, 56 insertions(+), 1 deletion(-) create mode 100644 tests/integration/test_mysql_database_engine/configs/user.xml create mode 100644 tests/integration/test_s3_cluster/configs/users.xml create mode 100644 tests/integration/test_storage_delta/configs/users.d/users.xml create mode 100644 tests/integration/test_storage_iceberg/configs/users.d/users.xml diff --git a/tests/integration/test_mysql_database_engine/configs/user.xml b/tests/integration/test_mysql_database_engine/configs/user.xml new file mode 100644 index 00000000000..775c63350b0 --- /dev/null +++ b/tests/integration/test_mysql_database_engine/configs/user.xml @@ -0,0 +1,10 @@ + + + + + default + default + 1 + + + diff --git a/tests/integration/test_s3_cluster/configs/users.xml b/tests/integration/test_s3_cluster/configs/users.xml new file mode 100644 index 00000000000..4b6ba057ecb --- /dev/null +++ b/tests/integration/test_s3_cluster/configs/users.xml @@ -0,0 +1,9 @@ + + + + + default + 1 + + + diff --git a/tests/integration/test_s3_cluster/test.py b/tests/integration/test_s3_cluster/test.py index 41f19cdd12d..3b8fd80060f 100644 --- a/tests/integration/test_s3_cluster/test.py +++ b/tests/integration/test_s3_cluster/test.py @@ -68,6 +68,7 @@ def started_cluster(): cluster.add_instance( "s0_0_0", main_configs=["configs/cluster.xml", "configs/named_collections.xml"], + user_configs=["configs/users.xml"], macros={"replica": "node1", "shard": "shard1"}, with_minio=True, with_zookeeper=True, diff --git a/tests/integration/test_storage_delta/configs/users.d/users.xml b/tests/integration/test_storage_delta/configs/users.d/users.xml new file mode 100644 index 00000000000..4b6ba057ecb --- /dev/null +++ b/tests/integration/test_storage_delta/configs/users.d/users.xml @@ -0,0 +1,9 @@ + + + + + default + 1 + + + diff --git a/tests/integration/test_storage_delta/test.py b/tests/integration/test_storage_delta/test.py index 9477b66dab8..0cd1208edfa 100644 --- a/tests/integration/test_storage_delta/test.py +++ b/tests/integration/test_storage_delta/test.py @@ -53,6 +53,7 @@ def started_cluster(): cluster.add_instance( "node1", main_configs=["configs/config.d/named_collections.xml"], + user_configs=["configs/users.d/users.xml"], with_minio=True, ) diff --git a/tests/integration/test_storage_hudi/test.py b/tests/integration/test_storage_hudi/test.py index de9cde43609..3dbbcb7a06e 100644 --- a/tests/integration/test_storage_hudi/test.py +++ b/tests/integration/test_storage_hudi/test.py @@ -51,6 +51,7 @@ def started_cluster(): cluster.add_instance( "node1", main_configs=["configs/config.d/named_collections.xml"], + user_configs=["configs/users.d/users.xml"], with_minio=True, ) diff --git a/tests/integration/test_storage_iceberg/configs/users.d/users.xml b/tests/integration/test_storage_iceberg/configs/users.d/users.xml new file mode 100644 index 00000000000..4b6ba057ecb --- /dev/null +++ b/tests/integration/test_storage_iceberg/configs/users.d/users.xml @@ -0,0 +1,9 @@ + + + + + default + 1 + + + diff --git a/tests/integration/test_storage_iceberg/test.py b/tests/integration/test_storage_iceberg/test.py index b3b2f160740..c22b8cda9b5 100644 --- a/tests/integration/test_storage_iceberg/test.py +++ b/tests/integration/test_storage_iceberg/test.py @@ -53,6 +53,7 @@ def started_cluster(): cluster.add_instance( "node1", main_configs=["configs/config.d/named_collections.xml"], + user_configs=["configs/users.d/users.xml"], with_minio=True, ) diff --git a/tests/integration/test_storage_kafka/configs/users.xml b/tests/integration/test_storage_kafka/configs/users.xml index 992464a0ac2..3168de649f8 100644 --- a/tests/integration/test_storage_kafka/configs/users.xml +++ b/tests/integration/test_storage_kafka/configs/users.xml @@ -6,4 +6,11 @@ 0 + + + + default + 1 + + diff --git a/tests/integration/test_storage_postgresql/test.py b/tests/integration/test_storage_postgresql/test.py index 0c8fc597b5c..49bec6cbe5e 100644 --- a/tests/integration/test_storage_postgresql/test.py +++ b/tests/integration/test_storage_postgresql/test.py @@ -15,7 +15,7 @@ node1 = cluster.add_instance( node2 = cluster.add_instance( "node2", main_configs=["configs/named_collections.xml"], - user_configs=["configs/settings.xml"], + user_configs=["configs/settings.xml", "configs/users.xml"], with_postgres_cluster=True, ) diff --git a/tests/integration/test_storage_rabbitmq/configs/users.xml b/tests/integration/test_storage_rabbitmq/configs/users.xml index 2cef0a6de3c..e42fefa905b 100644 --- a/tests/integration/test_storage_rabbitmq/configs/users.xml +++ b/tests/integration/test_storage_rabbitmq/configs/users.xml @@ -4,4 +4,11 @@ 1 + + + + default + 1 + + From afcc3aca363ff6cee0cb7f2417b711e08854d96c Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Wed, 14 Jun 2023 13:14:59 +0000 Subject: [PATCH 053/522] Update ci_config.py --- tests/ci/ci_config.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index d829115cfe1..36bca9d741d 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -325,6 +325,9 @@ CI_CONFIG = { "Integration tests (asan)": { "required_build": "package_asan", }, + "Integration tests (asan, analyzer)": { + "required_build": "package_asan", + }, "Integration tests (tsan)": { "required_build": "package_tsan", }, From fe8172fbd9c58fadc5c0523c69e5adce05887dd2 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Wed, 14 Jun 2023 13:17:04 +0000 Subject: [PATCH 054/522] Review fixes --- tests/ci/integration_test_check.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/ci/integration_test_check.py b/tests/ci/integration_test_check.py index 523b1cfaab5..0d483c08456 100644 --- a/tests/ci/integration_test_check.py +++ b/tests/ci/integration_test_check.py @@ -71,7 +71,7 @@ def get_json_params_dict( } -def get_env_for_runner(build_path, repo_path, result_path, work_path): +def get_env_for_runner(check_name, build_path, repo_path, result_path, work_path): binary_path = os.path.join(build_path, "clickhouse") odbc_bridge_path = os.path.join(build_path, "clickhouse-odbc-bridge") library_bridge_path = os.path.join(build_path, "clickhouse-library-bridge") @@ -88,6 +88,9 @@ def get_env_for_runner(build_path, repo_path, result_path, work_path): my_env["CLICKHOUSE_TESTS_JSON_PARAMS_PATH"] = os.path.join(work_path, "params.json") my_env["CLICKHOUSE_TESTS_RUNNER_RESTART_DOCKER"] = "0" + if "analyzer" in check_name.lower(): + my_env["USE_NEW_ANALYZER"] = "1" + return my_env @@ -225,9 +228,7 @@ def main(): else: download_all_deb_packages(check_name, reports_path, build_path) - my_env = get_env_for_runner(build_path, repo_path, result_path, work_path) - if "analyzer" in check_name.lower(): - my_env["USE_NEW_ANALYZER"] = "1" + my_env = get_env_for_runner(check_name, build_path, repo_path, result_path, work_path) json_path = os.path.join(work_path, "params.json") with open(json_path, "w", encoding="utf-8") as json_params: From 1230519bec047857d7fb9b1edd6baec1a7be8e6a Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 14 Jun 2023 13:38:44 +0000 Subject: [PATCH 055/522] Automatic style fix --- tests/ci/integration_test_check.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/ci/integration_test_check.py b/tests/ci/integration_test_check.py index 0d483c08456..843bbc8b3ee 100644 --- a/tests/ci/integration_test_check.py +++ b/tests/ci/integration_test_check.py @@ -228,7 +228,9 @@ def main(): else: download_all_deb_packages(check_name, reports_path, build_path) - my_env = get_env_for_runner(check_name, build_path, repo_path, result_path, work_path) + my_env = get_env_for_runner( + check_name, build_path, repo_path, result_path, work_path + ) json_path = os.path.join(work_path, "params.json") with open(json_path, "w", encoding="utf-8") as json_params: From a91fc3ddb33865d2db8170ff96e636de293b323a Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Wed, 14 Jun 2023 16:44:31 +0000 Subject: [PATCH 056/522] Add docs/ add more cases in test --- docs/en/interfaces/formats.md | 3 +- .../operations/settings/settings-formats.md | 5 +++ docs/ru/interfaces/formats.md | 4 +- docs/ru/operations/settings/settings.md | 8 +++- src/Core/Settings.h | 2 +- src/Formats/FormatFactory.cpp | 2 +- src/Formats/FormatSettings.h | 2 +- .../Formats/Impl/CSVRowInputFormat.cpp | 39 +++++++++---------- .../RowInputFormatWithNamesAndTypes.cpp | 4 ++ tests/queries/0_stateless/00301_csv.reference | 10 +++-- tests/queries/0_stateless/00301_csv.sh | 13 ++++--- 11 files changed, 56 insertions(+), 36 deletions(-) diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 324930e248f..950692deb77 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -470,6 +470,7 @@ The CSV format supports the output of totals and extremes the same way as `TabSe - [input_format_csv_detect_header](/docs/en/operations/settings/settings-formats.md/#input_format_csv_detect_header) - automatically detect header with names and types in CSV format. Default value - `true`. - [input_format_csv_skip_trailing_empty_lines](/docs/en/operations/settings/settings-formats.md/#input_format_csv_skip_trailing_empty_lines) - skip trailing empty lines at the end of data. Default value - `false`. - [input_format_csv_trim_whitespaces](/docs/en/operations/settings/settings-formats.md/#input_format_csv_trim_whitespaces) - trim spaces and tabs in non-quoted CSV strings. Default value - `true`. +- [input_format_csv_ignore_extra_columns](/docs/en/operations/settings/settings-formats.md/#input_format_csv_ignore_extra_columns) - ignore extra colums in CSV input. Default value - `false`. ## CSVWithNames {#csvwithnames} @@ -2062,7 +2063,7 @@ Special format for reading Parquet file metadata (https://parquet.apache.org/doc - logical_type - column logical type - compression - compression used for this column - total_uncompressed_size - total uncompressed bytes size of the column, calculated as the sum of total_uncompressed_size of the column from all row groups - - total_compressed_size - total compressed bytes size of the column, calculated as the sum of total_compressed_size of the column from all row groups + - total_compressed_size - total compressed bytes size of the column, calculated as the sum of total_compressed_size of the column from all row groups - space_saved - percent of space saved by compression, calculated as (1 - total_compressed_size/total_uncompressed_size). - encodings - the list of encodings used for this column - row_groups - the list of row groups metadata with the next structure: diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md index 26501f3f3f6..e721c9408e3 100644 --- a/docs/en/operations/settings/settings-formats.md +++ b/docs/en/operations/settings/settings-formats.md @@ -931,6 +931,11 @@ Result ```text " string " ``` +### input_format_csv_ignore_extra_columns {#input_format_csv_ignore_extra_columns} + +Ignore extra colums in CSV input. + +Disabled by default. ## Values format settings {#values-format-settings} diff --git a/docs/ru/interfaces/formats.md b/docs/ru/interfaces/formats.md index 48a6132170a..8488f4ce55a 100644 --- a/docs/ru/interfaces/formats.md +++ b/docs/ru/interfaces/formats.md @@ -401,8 +401,8 @@ $ clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO test.csv FOR - [output_format_csv_crlf_end_of_line](../operations/settings/settings.md#output_format_csv_crlf_end_of_line) - если установлено значение true, конец строки в формате вывода CSV будет `\r\n` вместо `\n`. Значение по умолчанию - `false`. - [input_format_csv_skip_first_lines](../operations/settings/settings.md#input_format_csv_skip_first_lines) - пропустить указанное количество строк в начале данных. Значение по умолчанию - `0`. - [input_format_csv_detect_header](../operations/settings/settings.md#input_format_csv_detect_header) - обнаружить заголовок с именами и типами в формате CSV. Значение по умолчанию - `true`. -- [input_format_csv_trim_whitespaces](../operations/settings/settings.md#input_format_csv_trim_whitespaces) - удалить пробелы и символы табуляции из строк без кавычек. -Значение по умолчанию - `true`. +- [input_format_csv_trim_whitespaces](../operations/settings/settings.md#input_format_csv_trim_whitespaces) - удалить пробелы и символы табуляции из строк без кавычек. Значение по умолчанию - `true`. +- [input_format_csv_ignore_extra_columns](../operations/settings/settings.md/#input_format_csv_ignore_extra_columns) - игнорировать дополнительные столбцы. Значение по умолчанию - `false`. ## CSVWithNames {#csvwithnames} diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index e3da8302fc8..33d9300f8e1 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -1686,7 +1686,7 @@ SELECT * FROM table_with_enum_column_for_csv_insert; ## input_format_csv_detect_header {#input_format_csv_detect_header} Обнаружить заголовок с именами и типами в формате CSV. - + Значение по умолчанию - `true`. ## input_format_csv_skip_first_lines {#input_format_csv_skip_first_lines} @@ -1727,6 +1727,12 @@ echo ' string ' | ./clickhouse local -q "select * from table FORMAT CSV" --in " string " ``` +## input_format_csv_ignore_extra_columns {#input_format_csv_ignore_extra_columns} + +Игнорировать дополнительные столбцы. + +Выключено по умолчанию. + ## output_format_tsv_crlf_end_of_line {#settings-output-format-tsv-crlf-end-of-line} Использовать в качестве разделителя строк для TSV формата CRLF (DOC/Windows стиль) вместо LF (Unix стиль). diff --git a/src/Core/Settings.h b/src/Core/Settings.h index d38f7767252..9582419b98c 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -835,7 +835,6 @@ class IColumn; M(Bool, input_format_import_nested_json, false, "Map nested JSON data to nested tables (it works for JSONEachRow format).", 0) \ M(Bool, input_format_defaults_for_omitted_fields, true, "For input data calculate default expressions for omitted fields (it works for JSONEachRow, -WithNames, -WithNamesAndTypes formats).", IMPORTANT) \ M(Bool, input_format_csv_empty_as_default, true, "Treat empty fields in CSV input as default values.", 0) \ - M(Bool, input_format_csv_ignore_extra_columns, false, "", 0) \ M(Bool, input_format_tsv_empty_as_default, false, "Treat empty fields in TSV input as default values.", 0) \ M(Bool, input_format_tsv_enum_as_number, false, "Treat inserted enum values in TSV formats as enum indices.", 0) \ M(Bool, input_format_null_as_default, true, "Initialize null fields with default values if the data type of this field is not nullable and it is supported by the input format", 0) \ @@ -1001,6 +1000,7 @@ class IColumn; M(Bool, regexp_dict_allow_hyperscan, true, "Allow regexp_tree dictionary using Hyperscan library.", 0) \ \ M(Bool, dictionary_use_async_executor, false, "Execute a pipeline for reading from a dictionary with several threads. It's supported only by DIRECT dictionary with CLICKHOUSE source.", 0) \ + M(Bool, input_format_csv_ignore_extra_columns, false, "Ignore extra colums in CSV input", 0) \ // End of FORMAT_FACTORY_SETTINGS // Please add settings non-related to formats into the COMMON_SETTINGS above. diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 0218d268c51..f29b55f7e73 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -63,7 +63,6 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.csv.delimiter = settings.format_csv_delimiter; format_settings.csv.tuple_delimiter = settings.format_csv_delimiter; format_settings.csv.empty_as_default = settings.input_format_csv_empty_as_default; - format_settings.csv.ignore_extra_columns = settings.input_format_csv_ignore_extra_columns; format_settings.csv.enum_as_number = settings.input_format_csv_enum_as_number; format_settings.csv.null_representation = settings.format_csv_null_representation; format_settings.csv.arrays_as_nested_csv = settings.input_format_csv_arrays_as_nested_csv; @@ -72,6 +71,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.csv.try_detect_header = settings.input_format_csv_detect_header; format_settings.csv.skip_trailing_empty_lines = settings.input_format_csv_skip_trailing_empty_lines; format_settings.csv.trim_whitespaces = settings.input_format_csv_trim_whitespaces; + format_settings.csv.ignore_extra_columns = settings.input_format_csv_ignore_extra_columns; format_settings.hive_text.fields_delimiter = settings.input_format_hive_text_fields_delimiter; format_settings.hive_text.collection_items_delimiter = settings.input_format_hive_text_collection_items_delimiter; format_settings.hive_text.map_keys_delimiter = settings.input_format_hive_text_map_keys_delimiter; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 3bc53140fe5..38148bda373 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -128,7 +128,6 @@ struct FormatSettings bool allow_single_quotes = true; bool allow_double_quotes = true; bool empty_as_default = false; - bool ignore_extra_columns = false; bool crlf_end_of_line = false; bool enum_as_number = false; bool arrays_as_nested_csv = false; @@ -140,6 +139,7 @@ struct FormatSettings bool try_detect_header = true; bool skip_trailing_empty_lines = false; bool trim_whitespaces = true; + bool ignore_extra_columns = false; } csv; struct HiveText diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp index 0cc5889b732..8aaf8fd3e2f 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp @@ -288,6 +288,8 @@ bool CSVFormatReader::readField( const bool at_delimiter = !buf->eof() && *buf->position() == format_settings.csv.delimiter; const bool at_last_column_line_end = is_last_file_column && (buf->eof() || *buf->position() == '\n' || *buf->position() == '\r'); + bool res = false; + /// Note: Tuples are serialized in CSV as separate columns, but with empty_as_default or null_as_default /// only one empty or NULL column will be expected if (format_settings.csv.empty_as_default && (at_delimiter || at_last_column_line_end)) @@ -299,31 +301,28 @@ bool CSVFormatReader::readField( /// they do not contain empty unquoted fields, so this check /// works for tuples as well. column.insertDefault(); - return false; } - - auto skip_all = [&]() - { - if (!is_last_file_column || !format_settings.csv.ignore_extra_columns) - { - return; - } - //std::cout << "skip !!!" << std::endl; - buf->position() = find_first_symbols<'\n'>(buf->position(), buf->buffer().end()); - }; - if (format_settings.null_as_default && !isNullableOrLowCardinalityNullable(type)) + else if (format_settings.null_as_default && !isNullableOrLowCardinalityNullable(type)) { /// If value is null but type is not nullable then use default value instead. - bool res = SerializationNullable::deserializeTextCSVImpl(column, *buf, format_settings, serialization); - skip_all(); - return res; + res = SerializationNullable::deserializeTextCSVImpl(column, *buf, format_settings, serialization); + } + else + { + /// Read the column normally. + serialization->deserializeTextCSV(column, *buf, format_settings); + res = true; } - /// Read the column normally. - serialization->deserializeTextCSV(column, *buf, format_settings); - - skip_all(); - return true; + if (is_last_file_column && format_settings.csv.ignore_extra_columns) + { + while (checkChar(format_settings.csv.delimiter, *buf)) + { + skipField(); + skipWhitespacesAndTabs(*buf); + } + } + return res; } void CSVFormatReader::skipPrefixBeforeHeader() diff --git a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp index eaedbbb4a1e..24bf1d0d595 100644 --- a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp +++ b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp @@ -212,8 +212,12 @@ bool RowInputFormatWithNamesAndTypes::readRow(MutableColumns & columns, RowReadE format_reader->skipRowStartDelimiter(); ext.read_columns.resize(data_types.size()); + //std::cout << "col size " << column_mapping->column_indexes_for_input_fields.size() << std::endl; for (size_t file_column = 0; file_column < column_mapping->column_indexes_for_input_fields.size(); ++file_column) { + // std::cout << " file_column " << file_column << column_mapping->names_of_columns[file_column] << std::endl; + + const auto & column_index = column_mapping->column_indexes_for_input_fields[file_column]; const bool is_last_file_column = file_column + 1 == column_mapping->column_indexes_for_input_fields.size(); if (column_index) diff --git a/tests/queries/0_stateless/00301_csv.reference b/tests/queries/0_stateless/00301_csv.reference index 61279f3b84a..3dbe3116bea 100644 --- a/tests/queries/0_stateless/00301_csv.reference +++ b/tests/queries/0_stateless/00301_csv.reference @@ -11,7 +11,9 @@ default-eof 1 2019-06-19 2016-01-01 01:02:03 NUL 2016-01-02 01:02:03 Nhello \N \N -Hello world 1 2016-01-01 -Hello world 2 2016-01-02 -Hello world 3 2016-01-03 -Hello world 4 2016-01-04 +Hello 1 String1 +Hello 2 String2 +Hello 3 String3 +Hello 4 String4 +Hello 5 String5 +Hello 6 String6 diff --git a/tests/queries/0_stateless/00301_csv.sh b/tests/queries/0_stateless/00301_csv.sh index e99c39a0f6f..fafe75f6f63 100755 --- a/tests/queries/0_stateless/00301_csv.sh +++ b/tests/queries/0_stateless/00301_csv.sh @@ -39,11 +39,14 @@ $CLICKHOUSE_CLIENT --query="SELECT * FROM csv ORDER BY s NULLS LAST"; $CLICKHOUSE_CLIENT --query="DROP TABLE csv"; -$CLICKHOUSE_CLIENT --query="CREATE TABLE csv (s String, n UInt64 DEFAULT 1, d Date DEFAULT '2019-06-19') ENGINE = Memory"; +$CLICKHOUSE_CLIENT --query="CREATE TABLE csv (s String, n UInt64 DEFAULT 3, d String DEFAULT 'String4') ENGINE = Memory"; -echo 'Hello world, 1, 2016-01-01 -Hello world, 2 ,2016-01-02, -Hello world, 3 ,2016-01-03, 2016-01-13 -Hello world, 4 ,2016-01-04, 2016-01-14, 2016-01-15' | $CLICKHOUSE_CLIENT --input_format_csv_empty_as_default=1 --input_format_csv_ignore_extra_columns=1 --query="INSERT INTO csv FORMAT CSV"; +echo 'Hello, 1, String1 +Hello, 2, String2, +Hello, 3, String3, 2016-01-13 +Hello, 4, , 2016-01-14 +Hello, 5, String5, 2016-01-15, 2016-01-16 +Hello, 6, String6, "line with a +break"' | $CLICKHOUSE_CLIENT --input_format_defaults_for_omitted_fields=1 --input_format_csv_empty_as_default=1 --input_format_csv_ignore_extra_columns=1 --query="INSERT INTO csv FORMAT CSV"; $CLICKHOUSE_CLIENT --query="SELECT * FROM csv ORDER BY s, n"; $CLICKHOUSE_CLIENT --query="DROP TABLE csv"; \ No newline at end of file From 3b4dba3d681cb2ef75e31740e801d8813ce4586f Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 15 Jun 2023 12:33:24 +0200 Subject: [PATCH 057/522] Fix tests --- .../test_mask_sensitive_info/configs/users.xml | 9 +++++++++ tests/integration/test_mask_sensitive_info/test.py | 1 + tests/integration/test_redirect_url_storage/test.py | 2 +- tests/integration/test_s3_cluster/test.py | 2 ++ .../test_storage_azure_blob_storage/configs/users.xml | 9 +++++++++ .../integration/test_storage_azure_blob_storage/test.py | 2 +- tests/integration/test_storage_dict/configs/users.xml | 9 +++++++++ .../test_storage_hudi/configs/users.d/users.xml | 9 +++++++++ .../test_storage_meilisearch/configs/users.xml | 9 +++++++++ tests/integration/test_storage_meilisearch/test.py | 2 +- tests/integration/test_storage_url/configs/users.xml | 9 +++++++++ tests/integration/test_storage_url/test.py | 1 + 12 files changed, 61 insertions(+), 3 deletions(-) create mode 100644 tests/integration/test_mask_sensitive_info/configs/users.xml create mode 100644 tests/integration/test_storage_azure_blob_storage/configs/users.xml create mode 100644 tests/integration/test_storage_dict/configs/users.xml create mode 100644 tests/integration/test_storage_hudi/configs/users.d/users.xml create mode 100644 tests/integration/test_storage_meilisearch/configs/users.xml create mode 100644 tests/integration/test_storage_url/configs/users.xml diff --git a/tests/integration/test_mask_sensitive_info/configs/users.xml b/tests/integration/test_mask_sensitive_info/configs/users.xml new file mode 100644 index 00000000000..4b6ba057ecb --- /dev/null +++ b/tests/integration/test_mask_sensitive_info/configs/users.xml @@ -0,0 +1,9 @@ + + + + + default + 1 + + + diff --git a/tests/integration/test_mask_sensitive_info/test.py b/tests/integration/test_mask_sensitive_info/test.py index 2131a76b5be..004491af4ac 100644 --- a/tests/integration/test_mask_sensitive_info/test.py +++ b/tests/integration/test_mask_sensitive_info/test.py @@ -9,6 +9,7 @@ node = cluster.add_instance( main_configs=[ "configs/named_collections.xml", ], + user_configs=["configs/users.xml"], with_zookeeper=True, ) diff --git a/tests/integration/test_redirect_url_storage/test.py b/tests/integration/test_redirect_url_storage/test.py index 225a34c9109..17a9a03008e 100644 --- a/tests/integration/test_redirect_url_storage/test.py +++ b/tests/integration/test_redirect_url_storage/test.py @@ -9,7 +9,7 @@ cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance( "node1", main_configs=["configs/named_collections.xml"], - user_configs=["configs/user.xml"], + user_configs=["configs/users.xml"], with_zookeeper=False, with_hdfs=True, ) diff --git a/tests/integration/test_s3_cluster/test.py b/tests/integration/test_s3_cluster/test.py index 3b8fd80060f..673ca318c92 100644 --- a/tests/integration/test_s3_cluster/test.py +++ b/tests/integration/test_s3_cluster/test.py @@ -76,12 +76,14 @@ def started_cluster(): cluster.add_instance( "s0_0_1", main_configs=["configs/cluster.xml", "configs/named_collections.xml"], + user_configs=["configs/users.xml"], macros={"replica": "replica2", "shard": "shard1"}, with_zookeeper=True, ) cluster.add_instance( "s0_1_0", main_configs=["configs/cluster.xml", "configs/named_collections.xml"], + user_configs=["configs/users.xml"], macros={"replica": "replica1", "shard": "shard2"}, with_zookeeper=True, ) diff --git a/tests/integration/test_storage_azure_blob_storage/configs/users.xml b/tests/integration/test_storage_azure_blob_storage/configs/users.xml new file mode 100644 index 00000000000..4b6ba057ecb --- /dev/null +++ b/tests/integration/test_storage_azure_blob_storage/configs/users.xml @@ -0,0 +1,9 @@ + + + + + default + 1 + + + diff --git a/tests/integration/test_storage_azure_blob_storage/test.py b/tests/integration/test_storage_azure_blob_storage/test.py index f9d337b6d86..54b3de8cd9b 100644 --- a/tests/integration/test_storage_azure_blob_storage/test.py +++ b/tests/integration/test_storage_azure_blob_storage/test.py @@ -25,7 +25,7 @@ def cluster(): cluster.add_instance( "node", main_configs=["configs/named_collections.xml"], - user_configs=["configs/disable_profilers.xml"], + user_configs=["configs/disable_profilers.xml", "configs/users.xml"], with_azurite=True, ) cluster.start() diff --git a/tests/integration/test_storage_dict/configs/users.xml b/tests/integration/test_storage_dict/configs/users.xml new file mode 100644 index 00000000000..4b6ba057ecb --- /dev/null +++ b/tests/integration/test_storage_dict/configs/users.xml @@ -0,0 +1,9 @@ + + + + + default + 1 + + + diff --git a/tests/integration/test_storage_hudi/configs/users.d/users.xml b/tests/integration/test_storage_hudi/configs/users.d/users.xml new file mode 100644 index 00000000000..4b6ba057ecb --- /dev/null +++ b/tests/integration/test_storage_hudi/configs/users.d/users.xml @@ -0,0 +1,9 @@ + + + + + default + 1 + + + diff --git a/tests/integration/test_storage_meilisearch/configs/users.xml b/tests/integration/test_storage_meilisearch/configs/users.xml new file mode 100644 index 00000000000..4b6ba057ecb --- /dev/null +++ b/tests/integration/test_storage_meilisearch/configs/users.xml @@ -0,0 +1,9 @@ + + + + + default + 1 + + + diff --git a/tests/integration/test_storage_meilisearch/test.py b/tests/integration/test_storage_meilisearch/test.py index ddcd7154154..3724bb18d34 100644 --- a/tests/integration/test_storage_meilisearch/test.py +++ b/tests/integration/test_storage_meilisearch/test.py @@ -16,7 +16,7 @@ def started_cluster(request): try: cluster = ClickHouseCluster(__file__) node = cluster.add_instance( - "meili", main_configs=["configs/named_collection.xml"], with_meili=True + "meili", main_configs=["configs/named_collection.xml"], user_configs=["configs/users.xml"], with_meili=True ) cluster.start() yield cluster diff --git a/tests/integration/test_storage_url/configs/users.xml b/tests/integration/test_storage_url/configs/users.xml new file mode 100644 index 00000000000..4b6ba057ecb --- /dev/null +++ b/tests/integration/test_storage_url/configs/users.xml @@ -0,0 +1,9 @@ + + + + + default + 1 + + + diff --git a/tests/integration/test_storage_url/test.py b/tests/integration/test_storage_url/test.py index f360ec105ec..7f359078967 100644 --- a/tests/integration/test_storage_url/test.py +++ b/tests/integration/test_storage_url/test.py @@ -6,6 +6,7 @@ cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance( "node1", main_configs=["configs/conf.xml", "configs/named_collections.xml"], + user_configs=["configs/users.xml"], with_nginx=True, ) From 05811d3dd8dda58defa9e6a7360ee17fdcc5c085 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 15 Jun 2023 12:35:13 +0200 Subject: [PATCH 058/522] Rename --- src/Access/Common/AccessType.h | 2 +- src/Storages/NamedCollectionsHelpers.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Access/Common/AccessType.h b/src/Access/Common/AccessType.h index 709a519e712..0b66a1b9578 100644 --- a/src/Access/Common/AccessType.h +++ b/src/Access/Common/AccessType.h @@ -142,7 +142,7 @@ enum class AccessType M(ACCESS_MANAGEMENT, "", GROUP, ALL) \ M(SHOW_NAMED_COLLECTIONS, "SHOW NAMED COLLECTIONS", NAMED_COLLECTION, NAMED_COLLECTION_ADMIN) \ M(SHOW_NAMED_COLLECTIONS_SECRETS, "SHOW NAMED COLLECTIONS SECRETS", NAMED_COLLECTION, NAMED_COLLECTION_ADMIN) \ - M(USE_NAMED_COLLECTION, "NAMED COLLECTION USAGE, NAMED COLLECTION", NAMED_COLLECTION, NAMED_COLLECTION_ADMIN) \ + M(NAMED_COLLECTION, "NAMED COLLECTION USAGE, USE NAMED COLLECTION", NAMED_COLLECTION, NAMED_COLLECTION_ADMIN) \ M(NAMED_COLLECTION_ADMIN, "NAMED COLLECTION CONTROL", NAMED_COLLECTION, ALL) \ \ M(SYSTEM_SHUTDOWN, "SYSTEM KILL, SHUTDOWN", GLOBAL, SYSTEM) \ diff --git a/src/Storages/NamedCollectionsHelpers.cpp b/src/Storages/NamedCollectionsHelpers.cpp index 29d47e131a6..f301cca92a1 100644 --- a/src/Storages/NamedCollectionsHelpers.cpp +++ b/src/Storages/NamedCollectionsHelpers.cpp @@ -76,7 +76,7 @@ MutableNamedCollectionPtr tryGetNamedCollectionWithOverrides( if (!collection_name.has_value()) return nullptr; - context->checkAccess(AccessType::USE_NAMED_COLLECTION, *collection_name); + context->checkAccess(AccessType::NAMED_COLLECTION, *collection_name); NamedCollectionPtr collection; if (throw_unknown_collection) @@ -121,7 +121,7 @@ MutableNamedCollectionPtr tryGetNamedCollectionWithOverrides( if (collection_name.empty()) return nullptr; - context->checkAccess(AccessType::USE_NAMED_COLLECTION, collection_name); + context->checkAccess(AccessType::NAMED_COLLECTION, collection_name); const auto & collection = NamedCollectionFactory::instance().get(collection_name); auto collection_copy = collection->duplicate(); From 806176d88e0b4237c16e23aed27179ed93aa17c1 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Thu, 15 Jun 2023 11:23:08 +0000 Subject: [PATCH 059/522] Add input_format_csv_missing_as_default setting and tests --- docs/en/interfaces/formats.md | 3 ++- .../operations/settings/settings-formats.md | 8 +++++++- docs/ru/interfaces/formats.md | 3 ++- docs/ru/operations/settings/settings.md | 8 +++++++- src/Core/Settings.h | 3 ++- src/Dictionaries/CacheDictionary.cpp | 2 +- src/Formats/FormatFactory.cpp | 1 + src/Formats/FormatSettings.h | 1 + .../Formats/Impl/CSVRowInputFormat.cpp | 18 +++++++++++++++++- .../Formats/Impl/CSVRowInputFormat.h | 1 + .../RowInputFormatWithNamesAndTypes.cpp | 4 ---- tests/queries/0_stateless/00301_csv.reference | 10 ++++++++++ tests/queries/0_stateless/00301_csv.sh | 19 +++++++++++++++++-- 13 files changed, 68 insertions(+), 13 deletions(-) diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 950692deb77..e0b0fcfabd5 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -470,7 +470,8 @@ The CSV format supports the output of totals and extremes the same way as `TabSe - [input_format_csv_detect_header](/docs/en/operations/settings/settings-formats.md/#input_format_csv_detect_header) - automatically detect header with names and types in CSV format. Default value - `true`. - [input_format_csv_skip_trailing_empty_lines](/docs/en/operations/settings/settings-formats.md/#input_format_csv_skip_trailing_empty_lines) - skip trailing empty lines at the end of data. Default value - `false`. - [input_format_csv_trim_whitespaces](/docs/en/operations/settings/settings-formats.md/#input_format_csv_trim_whitespaces) - trim spaces and tabs in non-quoted CSV strings. Default value - `true`. -- [input_format_csv_ignore_extra_columns](/docs/en/operations/settings/settings-formats.md/#input_format_csv_ignore_extra_columns) - ignore extra colums in CSV input. Default value - `false`. +- [input_format_csv_ignore_extra_columns](/docs/en/operations/settings/settings-formats.md/#input_format_csv_ignore_extra_columns) - ignore extra columns in CSV input (if your file has more columns than expected). Default value - `false`. +- [input_format_csv_missing_as_default](/docs/en/operations/settings/settings-formats.md/#input_format_csv_missing_as_default) - treat missing fields in CSV input as default values. Default value - `false`. ## CSVWithNames {#csvwithnames} diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md index e721c9408e3..6d9a1fb5160 100644 --- a/docs/en/operations/settings/settings-formats.md +++ b/docs/en/operations/settings/settings-formats.md @@ -933,7 +933,13 @@ Result ``` ### input_format_csv_ignore_extra_columns {#input_format_csv_ignore_extra_columns} -Ignore extra colums in CSV input. +Ignore extra columns in CSV input (if your file has more columns than expected). + +Disabled by default. + +### input_format_csv_missing_as_default {#input_format_csv_missing_as_default} + +Treat missing fields in CSV input as default values. Disabled by default. diff --git a/docs/ru/interfaces/formats.md b/docs/ru/interfaces/formats.md index 8488f4ce55a..7e3bb3f7d26 100644 --- a/docs/ru/interfaces/formats.md +++ b/docs/ru/interfaces/formats.md @@ -402,7 +402,8 @@ $ clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO test.csv FOR - [input_format_csv_skip_first_lines](../operations/settings/settings.md#input_format_csv_skip_first_lines) - пропустить указанное количество строк в начале данных. Значение по умолчанию - `0`. - [input_format_csv_detect_header](../operations/settings/settings.md#input_format_csv_detect_header) - обнаружить заголовок с именами и типами в формате CSV. Значение по умолчанию - `true`. - [input_format_csv_trim_whitespaces](../operations/settings/settings.md#input_format_csv_trim_whitespaces) - удалить пробелы и символы табуляции из строк без кавычек. Значение по умолчанию - `true`. -- [input_format_csv_ignore_extra_columns](../operations/settings/settings.md/#input_format_csv_ignore_extra_columns) - игнорировать дополнительные столбцы. Значение по умолчанию - `false`. +- [input_format_csv_ignore_extra_columns](../operations/settings/settings.md/#input_format_csv_ignore_extra_columns) - игнорировать дополнительные столбцы (если файл содержит больше столбцов чем ожидается). Значение по умолчанию - `false`. +- [input_format_csv_missing_as_default](../operations/settings/settings.md/#input_format_csv_missing_as_default) - рассматривать отсутствующие поля в CSV в качестве значений по умолчанию. Значение по умолчанию - `false`. ## CSVWithNames {#csvwithnames} diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 33d9300f8e1..61cfc332585 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -1729,7 +1729,13 @@ echo ' string ' | ./clickhouse local -q "select * from table FORMAT CSV" --in ## input_format_csv_ignore_extra_columns {#input_format_csv_ignore_extra_columns} -Игнорировать дополнительные столбцы. +Игнорировать дополнительные столбцы (если файл содержит больше столбцов чем ожидается). + +Выключено по умолчанию. + +## input_format_csv_missing_as_default {#input_format_csv_missing_as_default} + +Рассматривать отсутствующие поля в CSV в качестве значений по умолчанию. Выключено по умолчанию. diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 9582419b98c..ce7c28996e8 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -1000,7 +1000,8 @@ class IColumn; M(Bool, regexp_dict_allow_hyperscan, true, "Allow regexp_tree dictionary using Hyperscan library.", 0) \ \ M(Bool, dictionary_use_async_executor, false, "Execute a pipeline for reading from a dictionary with several threads. It's supported only by DIRECT dictionary with CLICKHOUSE source.", 0) \ - M(Bool, input_format_csv_ignore_extra_columns, false, "Ignore extra colums in CSV input", 0) \ + M(Bool, input_format_csv_ignore_extra_columns, false, "Ignore extra columns in CSV input (if your file has more columns than expected)", 0) \ + M(Bool, input_format_csv_missing_as_default, false, "Treat missing fields in CSV input as default values", 0) \ // End of FORMAT_FACTORY_SETTINGS // Please add settings non-related to formats into the COMMON_SETTINGS above. diff --git a/src/Dictionaries/CacheDictionary.cpp b/src/Dictionaries/CacheDictionary.cpp index c5c88a9f142..359f7c17436 100644 --- a/src/Dictionaries/CacheDictionary.cpp +++ b/src/Dictionaries/CacheDictionary.cpp @@ -138,7 +138,7 @@ Columns CacheDictionary::getColumns( const Columns & default_values_columns) const { /** - * Flow of getColumsImpl + * Flow of getColumnsImpl * 1. Get fetch result from storage * 2. If all keys are found in storage and not expired * 2.1. If storage returns fetched columns in order of keys then result is returned to client. diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index f29b55f7e73..102b5d7eec0 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -72,6 +72,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.csv.skip_trailing_empty_lines = settings.input_format_csv_skip_trailing_empty_lines; format_settings.csv.trim_whitespaces = settings.input_format_csv_trim_whitespaces; format_settings.csv.ignore_extra_columns = settings.input_format_csv_ignore_extra_columns; + format_settings.csv.missing_as_default = settings.input_format_csv_missing_as_default; format_settings.hive_text.fields_delimiter = settings.input_format_hive_text_fields_delimiter; format_settings.hive_text.collection_items_delimiter = settings.input_format_hive_text_collection_items_delimiter; format_settings.hive_text.map_keys_delimiter = settings.input_format_hive_text_map_keys_delimiter; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 38148bda373..2b52d88184c 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -140,6 +140,7 @@ struct FormatSettings bool skip_trailing_empty_lines = false; bool trim_whitespaces = true; bool ignore_extra_columns = false; + bool missing_as_default = false; } csv; struct HiveText diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp index 8aaf8fd3e2f..dcc057baef2 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp @@ -147,7 +147,18 @@ CSVFormatReader::CSVFormatReader(PeekableReadBuffer & buf_, const FormatSettings void CSVFormatReader::skipFieldDelimiter() { skipWhitespacesAndTabs(*buf); - assertChar(format_settings.csv.delimiter, *buf); + + bool res = checkChar(format_settings.csv.delimiter, *buf); + if (!res && !format_settings.csv.missing_as_default) + { + char err[2] = {format_settings.csv.delimiter, '\0'}; + throwAtAssertionFailed(err, *buf); + } + + if (!res && format_settings.csv.missing_as_default) + { + current_row_has_missing_fields = true; + } } template @@ -187,6 +198,7 @@ void CSVFormatReader::skipRowEndDelimiter() return; skipEndOfLine(*buf); + current_row_has_missing_fields = false; } void CSVFormatReader::skipHeaderRow() @@ -302,6 +314,10 @@ bool CSVFormatReader::readField( /// works for tuples as well. column.insertDefault(); } + else if (current_row_has_missing_fields) + { + column.insertDefault(); + } else if (format_settings.null_as_default && !isNullableOrLowCardinalityNullable(type)) { /// If value is null but type is not nullable then use default value instead. diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.h b/src/Processors/Formats/Impl/CSVRowInputFormat.h index 0c8099a216c..3958c66bbc6 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.h +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.h @@ -89,6 +89,7 @@ public: protected: PeekableReadBuffer * buf; + bool current_row_has_missing_fields = false; }; class CSVSchemaReader : public FormatWithNamesAndTypesSchemaReader diff --git a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp index 24bf1d0d595..eaedbbb4a1e 100644 --- a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp +++ b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp @@ -212,12 +212,8 @@ bool RowInputFormatWithNamesAndTypes::readRow(MutableColumns & columns, RowReadE format_reader->skipRowStartDelimiter(); ext.read_columns.resize(data_types.size()); - //std::cout << "col size " << column_mapping->column_indexes_for_input_fields.size() << std::endl; for (size_t file_column = 0; file_column < column_mapping->column_indexes_for_input_fields.size(); ++file_column) { - // std::cout << " file_column " << file_column << column_mapping->names_of_columns[file_column] << std::endl; - - const auto & column_index = column_mapping->column_indexes_for_input_fields[file_column]; const bool is_last_file_column = file_column + 1 == column_mapping->column_indexes_for_input_fields.size(); if (column_index) diff --git a/tests/queries/0_stateless/00301_csv.reference b/tests/queries/0_stateless/00301_csv.reference index 3dbe3116bea..fa85fd924e1 100644 --- a/tests/queries/0_stateless/00301_csv.reference +++ b/tests/queries/0_stateless/00301_csv.reference @@ -1,19 +1,29 @@ +=== Test input_format_csv_empty_as_default Hello, world 123 2016-01-01 Hello, "world" 456 2016-01-02 Hello "world" 789 2016-01-03 Hello\n world 100 2016-01-04 default 1 2019-06-19 default-eof 1 2019-06-19 +=== Test datetime 2016-01-01 01:02:03 1 2016-01-02 01:02:03 2 2017-08-15 13:15:01 3 1970-01-02 05:46:39 4 +=== Test nullable datetime 2016-01-01 01:02:03 NUL 2016-01-02 01:02:03 Nhello \N \N +=== Test input_format_csv_ignore_extra_columns Hello 1 String1 Hello 2 String2 Hello 3 String3 Hello 4 String4 Hello 5 String5 Hello 6 String6 +=== Test input_format_csv_missing_as_default +Hello 0 33 \N 55 Default +Hello 0 33 \N 55 Default +Hello 1 2 \N 55 Default +Hello 1 2 3 4 String +Hello 1 2 3 4 String diff --git a/tests/queries/0_stateless/00301_csv.sh b/tests/queries/0_stateless/00301_csv.sh index fafe75f6f63..887a75b0ded 100755 --- a/tests/queries/0_stateless/00301_csv.sh +++ b/tests/queries/0_stateless/00301_csv.sh @@ -4,6 +4,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +echo === Test input_format_csv_empty_as_default $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS csv"; $CLICKHOUSE_CLIENT --query="CREATE TABLE csv (s String, n UInt64 DEFAULT 1, d Date DEFAULT '2019-06-19') ENGINE = Memory"; @@ -18,6 +19,7 @@ Hello "world", 789 ,2016-01-03 $CLICKHOUSE_CLIENT --query="SELECT * FROM csv ORDER BY d, s"; $CLICKHOUSE_CLIENT --query="DROP TABLE csv"; +echo === Test datetime $CLICKHOUSE_CLIENT --query="CREATE TABLE csv (t DateTime('Asia/Istanbul'), s String) ENGINE = Memory"; echo '"2016-01-01 01:02:03","1" @@ -28,7 +30,7 @@ echo '"2016-01-01 01:02:03","1" $CLICKHOUSE_CLIENT --query="SELECT * FROM csv ORDER BY s"; $CLICKHOUSE_CLIENT --query="DROP TABLE csv"; - +echo === Test nullable datetime $CLICKHOUSE_CLIENT --query="CREATE TABLE csv (t Nullable(DateTime('Asia/Istanbul')), s Nullable(String)) ENGINE = Memory"; echo 'NULL, NULL @@ -39,6 +41,7 @@ $CLICKHOUSE_CLIENT --query="SELECT * FROM csv ORDER BY s NULLS LAST"; $CLICKHOUSE_CLIENT --query="DROP TABLE csv"; +echo === Test input_format_csv_ignore_extra_columns $CLICKHOUSE_CLIENT --query="CREATE TABLE csv (s String, n UInt64 DEFAULT 3, d String DEFAULT 'String4') ENGINE = Memory"; echo 'Hello, 1, String1 @@ -49,4 +52,16 @@ Hello, 5, String5, 2016-01-15, 2016-01-16 Hello, 6, String6, "line with a break"' | $CLICKHOUSE_CLIENT --input_format_defaults_for_omitted_fields=1 --input_format_csv_empty_as_default=1 --input_format_csv_ignore_extra_columns=1 --query="INSERT INTO csv FORMAT CSV"; $CLICKHOUSE_CLIENT --query="SELECT * FROM csv ORDER BY s, n"; -$CLICKHOUSE_CLIENT --query="DROP TABLE csv"; \ No newline at end of file +$CLICKHOUSE_CLIENT --query="DROP TABLE csv"; + + +echo === Test input_format_csv_missing_as_default +$CLICKHOUSE_CLIENT --query="CREATE TABLE csv (f1 String, f2 UInt64, f3 UInt64 Default 33, f4 Nullable(UInt64), f5 Nullable(UInt64) Default 55, f6 String DEFAULT 'Default') ENGINE = Memory"; + +echo 'Hello +Hello, +Hello, 1, 2 +Hello, 1, 2, 3, 4, String +Hello, 1, 2, 3, 4, String,'| $CLICKHOUSE_CLIENT --input_format_defaults_for_omitted_fields=1 --input_format_csv_empty_as_default=1 --input_format_csv_missing_as_default=1 --query="INSERT INTO csv FORMAT CSV"; +$CLICKHOUSE_CLIENT --query="SELECT * FROM csv ORDER BY f1, f2, f3, f4 NULLS FIRST, f5, f6"; +$CLICKHOUSE_CLIENT --query="DROP TABLE csv"; From 5cceae1e1fa97126a7b1223927354d9b535e184b Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 15 Jun 2023 14:25:10 +0200 Subject: [PATCH 060/522] Fix --- tests/integration/test_storage_meilisearch/test.py | 5 ++++- tests/queries/0_stateless/01271_show_privileges.reference | 2 +- .../0_stateless/02117_show_create_table_system.reference | 6 +++--- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/tests/integration/test_storage_meilisearch/test.py b/tests/integration/test_storage_meilisearch/test.py index 3724bb18d34..b6acee18981 100644 --- a/tests/integration/test_storage_meilisearch/test.py +++ b/tests/integration/test_storage_meilisearch/test.py @@ -16,7 +16,10 @@ def started_cluster(request): try: cluster = ClickHouseCluster(__file__) node = cluster.add_instance( - "meili", main_configs=["configs/named_collection.xml"], user_configs=["configs/users.xml"], with_meili=True + "meili", + main_configs=["configs/named_collection.xml"], + user_configs=["configs/users.xml"], + with_meili=True, ) cluster.start() yield cluster diff --git a/tests/queries/0_stateless/01271_show_privileges.reference b/tests/queries/0_stateless/01271_show_privileges.reference index 925e0921759..b1ce5ab71d5 100644 --- a/tests/queries/0_stateless/01271_show_privileges.reference +++ b/tests/queries/0_stateless/01271_show_privileges.reference @@ -94,7 +94,7 @@ SHOW ACCESS [] \N ACCESS MANAGEMENT ACCESS MANAGEMENT [] \N ALL SHOW NAMED COLLECTIONS ['SHOW NAMED COLLECTIONS'] NAMED_COLLECTION NAMED COLLECTION ADMIN SHOW NAMED COLLECTIONS SECRETS ['SHOW NAMED COLLECTIONS SECRETS'] NAMED_COLLECTION NAMED COLLECTION ADMIN -USE NAMED COLLECTION ['NAMED COLLECTION USAGE','NAMED COLLECTION'] NAMED_COLLECTION NAMED COLLECTION ADMIN +NAMED COLLECTION ['NAMED COLLECTION USAGE','USE NAMED COLLECTION'] NAMED_COLLECTION NAMED COLLECTION ADMIN NAMED COLLECTION ADMIN ['NAMED COLLECTION CONTROL'] NAMED_COLLECTION ALL SYSTEM SHUTDOWN ['SYSTEM KILL','SHUTDOWN'] GLOBAL SYSTEM SYSTEM DROP DNS CACHE ['SYSTEM DROP DNS','DROP DNS CACHE','DROP DNS'] GLOBAL SYSTEM DROP CACHE diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index 143fb24a637..72c1027e7b1 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -297,7 +297,7 @@ CREATE TABLE system.grants ( `user_name` Nullable(String), `role_name` Nullable(String), - `access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'USE NAMED COLLECTION' = 96, 'NAMED COLLECTION ADMIN' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD SYMBOLS' = 111, 'SYSTEM RELOAD DICTIONARY' = 112, 'SYSTEM RELOAD MODEL' = 113, 'SYSTEM RELOAD FUNCTION' = 114, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 115, 'SYSTEM RELOAD' = 116, 'SYSTEM RESTART DISK' = 117, 'SYSTEM MERGES' = 118, 'SYSTEM TTL MERGES' = 119, 'SYSTEM FETCHES' = 120, 'SYSTEM MOVES' = 121, 'SYSTEM DISTRIBUTED SENDS' = 122, 'SYSTEM REPLICATED SENDS' = 123, 'SYSTEM SENDS' = 124, 'SYSTEM REPLICATION QUEUES' = 125, 'SYSTEM DROP REPLICA' = 126, 'SYSTEM SYNC REPLICA' = 127, 'SYSTEM RESTART REPLICA' = 128, 'SYSTEM RESTORE REPLICA' = 129, 'SYSTEM WAIT LOADING PARTS' = 130, 'SYSTEM SYNC DATABASE REPLICA' = 131, 'SYSTEM SYNC TRANSACTION LOG' = 132, 'SYSTEM SYNC FILE CACHE' = 133, 'SYSTEM FLUSH DISTRIBUTED' = 134, 'SYSTEM FLUSH LOGS' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM' = 140, 'dictGet' = 141, 'displaySecretsInShowAndSelect' = 142, 'addressToLine' = 143, 'addressToLineWithInlines' = 144, 'addressToSymbol' = 145, 'demangle' = 146, 'INTROSPECTION' = 147, 'FILE' = 148, 'URL' = 149, 'REMOTE' = 150, 'MONGO' = 151, 'REDIS' = 152, 'MEILISEARCH' = 153, 'MYSQL' = 154, 'POSTGRES' = 155, 'SQLITE' = 156, 'ODBC' = 157, 'JDBC' = 158, 'HDFS' = 159, 'S3' = 160, 'HIVE' = 161, 'AZURE' = 162, 'SOURCES' = 163, 'CLUSTER' = 164, 'ALL' = 165, 'NONE' = 166), + `access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION' = 96, 'NAMED COLLECTION ADMIN' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD SYMBOLS' = 111, 'SYSTEM RELOAD DICTIONARY' = 112, 'SYSTEM RELOAD MODEL' = 113, 'SYSTEM RELOAD FUNCTION' = 114, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 115, 'SYSTEM RELOAD' = 116, 'SYSTEM RESTART DISK' = 117, 'SYSTEM MERGES' = 118, 'SYSTEM TTL MERGES' = 119, 'SYSTEM FETCHES' = 120, 'SYSTEM MOVES' = 121, 'SYSTEM DISTRIBUTED SENDS' = 122, 'SYSTEM REPLICATED SENDS' = 123, 'SYSTEM SENDS' = 124, 'SYSTEM REPLICATION QUEUES' = 125, 'SYSTEM DROP REPLICA' = 126, 'SYSTEM SYNC REPLICA' = 127, 'SYSTEM RESTART REPLICA' = 128, 'SYSTEM RESTORE REPLICA' = 129, 'SYSTEM WAIT LOADING PARTS' = 130, 'SYSTEM SYNC DATABASE REPLICA' = 131, 'SYSTEM SYNC TRANSACTION LOG' = 132, 'SYSTEM SYNC FILE CACHE' = 133, 'SYSTEM FLUSH DISTRIBUTED' = 134, 'SYSTEM FLUSH LOGS' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM' = 140, 'dictGet' = 141, 'displaySecretsInShowAndSelect' = 142, 'addressToLine' = 143, 'addressToLineWithInlines' = 144, 'addressToSymbol' = 145, 'demangle' = 146, 'INTROSPECTION' = 147, 'FILE' = 148, 'URL' = 149, 'REMOTE' = 150, 'MONGO' = 151, 'REDIS' = 152, 'MEILISEARCH' = 153, 'MYSQL' = 154, 'POSTGRES' = 155, 'SQLITE' = 156, 'ODBC' = 157, 'JDBC' = 158, 'HDFS' = 159, 'S3' = 160, 'HIVE' = 161, 'AZURE' = 162, 'SOURCES' = 163, 'CLUSTER' = 164, 'ALL' = 165, 'NONE' = 166), `database` Nullable(String), `table` Nullable(String), `column` Nullable(String), @@ -581,10 +581,10 @@ ENGINE = SystemPartsColumns COMMENT 'SYSTEM TABLE is built on the fly.' CREATE TABLE system.privileges ( - `privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'USE NAMED COLLECTION' = 96, 'NAMED COLLECTION ADMIN' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD SYMBOLS' = 111, 'SYSTEM RELOAD DICTIONARY' = 112, 'SYSTEM RELOAD MODEL' = 113, 'SYSTEM RELOAD FUNCTION' = 114, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 115, 'SYSTEM RELOAD' = 116, 'SYSTEM RESTART DISK' = 117, 'SYSTEM MERGES' = 118, 'SYSTEM TTL MERGES' = 119, 'SYSTEM FETCHES' = 120, 'SYSTEM MOVES' = 121, 'SYSTEM DISTRIBUTED SENDS' = 122, 'SYSTEM REPLICATED SENDS' = 123, 'SYSTEM SENDS' = 124, 'SYSTEM REPLICATION QUEUES' = 125, 'SYSTEM DROP REPLICA' = 126, 'SYSTEM SYNC REPLICA' = 127, 'SYSTEM RESTART REPLICA' = 128, 'SYSTEM RESTORE REPLICA' = 129, 'SYSTEM WAIT LOADING PARTS' = 130, 'SYSTEM SYNC DATABASE REPLICA' = 131, 'SYSTEM SYNC TRANSACTION LOG' = 132, 'SYSTEM SYNC FILE CACHE' = 133, 'SYSTEM FLUSH DISTRIBUTED' = 134, 'SYSTEM FLUSH LOGS' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM' = 140, 'dictGet' = 141, 'displaySecretsInShowAndSelect' = 142, 'addressToLine' = 143, 'addressToLineWithInlines' = 144, 'addressToSymbol' = 145, 'demangle' = 146, 'INTROSPECTION' = 147, 'FILE' = 148, 'URL' = 149, 'REMOTE' = 150, 'MONGO' = 151, 'REDIS' = 152, 'MEILISEARCH' = 153, 'MYSQL' = 154, 'POSTGRES' = 155, 'SQLITE' = 156, 'ODBC' = 157, 'JDBC' = 158, 'HDFS' = 159, 'S3' = 160, 'HIVE' = 161, 'AZURE' = 162, 'SOURCES' = 163, 'CLUSTER' = 164, 'ALL' = 165, 'NONE' = 166), + `privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION' = 96, 'NAMED COLLECTION ADMIN' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD SYMBOLS' = 111, 'SYSTEM RELOAD DICTIONARY' = 112, 'SYSTEM RELOAD MODEL' = 113, 'SYSTEM RELOAD FUNCTION' = 114, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 115, 'SYSTEM RELOAD' = 116, 'SYSTEM RESTART DISK' = 117, 'SYSTEM MERGES' = 118, 'SYSTEM TTL MERGES' = 119, 'SYSTEM FETCHES' = 120, 'SYSTEM MOVES' = 121, 'SYSTEM DISTRIBUTED SENDS' = 122, 'SYSTEM REPLICATED SENDS' = 123, 'SYSTEM SENDS' = 124, 'SYSTEM REPLICATION QUEUES' = 125, 'SYSTEM DROP REPLICA' = 126, 'SYSTEM SYNC REPLICA' = 127, 'SYSTEM RESTART REPLICA' = 128, 'SYSTEM RESTORE REPLICA' = 129, 'SYSTEM WAIT LOADING PARTS' = 130, 'SYSTEM SYNC DATABASE REPLICA' = 131, 'SYSTEM SYNC TRANSACTION LOG' = 132, 'SYSTEM SYNC FILE CACHE' = 133, 'SYSTEM FLUSH DISTRIBUTED' = 134, 'SYSTEM FLUSH LOGS' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM' = 140, 'dictGet' = 141, 'displaySecretsInShowAndSelect' = 142, 'addressToLine' = 143, 'addressToLineWithInlines' = 144, 'addressToSymbol' = 145, 'demangle' = 146, 'INTROSPECTION' = 147, 'FILE' = 148, 'URL' = 149, 'REMOTE' = 150, 'MONGO' = 151, 'REDIS' = 152, 'MEILISEARCH' = 153, 'MYSQL' = 154, 'POSTGRES' = 155, 'SQLITE' = 156, 'ODBC' = 157, 'JDBC' = 158, 'HDFS' = 159, 'S3' = 160, 'HIVE' = 161, 'AZURE' = 162, 'SOURCES' = 163, 'CLUSTER' = 164, 'ALL' = 165, 'NONE' = 166), `aliases` Array(String), `level` Nullable(Enum8('GLOBAL' = 0, 'DATABASE' = 1, 'TABLE' = 2, 'DICTIONARY' = 3, 'VIEW' = 4, 'COLUMN' = 5, 'NAMED_COLLECTION' = 6)), - `parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'USE NAMED COLLECTION' = 96, 'NAMED COLLECTION ADMIN' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD SYMBOLS' = 111, 'SYSTEM RELOAD DICTIONARY' = 112, 'SYSTEM RELOAD MODEL' = 113, 'SYSTEM RELOAD FUNCTION' = 114, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 115, 'SYSTEM RELOAD' = 116, 'SYSTEM RESTART DISK' = 117, 'SYSTEM MERGES' = 118, 'SYSTEM TTL MERGES' = 119, 'SYSTEM FETCHES' = 120, 'SYSTEM MOVES' = 121, 'SYSTEM DISTRIBUTED SENDS' = 122, 'SYSTEM REPLICATED SENDS' = 123, 'SYSTEM SENDS' = 124, 'SYSTEM REPLICATION QUEUES' = 125, 'SYSTEM DROP REPLICA' = 126, 'SYSTEM SYNC REPLICA' = 127, 'SYSTEM RESTART REPLICA' = 128, 'SYSTEM RESTORE REPLICA' = 129, 'SYSTEM WAIT LOADING PARTS' = 130, 'SYSTEM SYNC DATABASE REPLICA' = 131, 'SYSTEM SYNC TRANSACTION LOG' = 132, 'SYSTEM SYNC FILE CACHE' = 133, 'SYSTEM FLUSH DISTRIBUTED' = 134, 'SYSTEM FLUSH LOGS' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM' = 140, 'dictGet' = 141, 'displaySecretsInShowAndSelect' = 142, 'addressToLine' = 143, 'addressToLineWithInlines' = 144, 'addressToSymbol' = 145, 'demangle' = 146, 'INTROSPECTION' = 147, 'FILE' = 148, 'URL' = 149, 'REMOTE' = 150, 'MONGO' = 151, 'REDIS' = 152, 'MEILISEARCH' = 153, 'MYSQL' = 154, 'POSTGRES' = 155, 'SQLITE' = 156, 'ODBC' = 157, 'JDBC' = 158, 'HDFS' = 159, 'S3' = 160, 'HIVE' = 161, 'AZURE' = 162, 'SOURCES' = 163, 'CLUSTER' = 164, 'ALL' = 165, 'NONE' = 166)) + `parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION' = 96, 'NAMED COLLECTION ADMIN' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD SYMBOLS' = 111, 'SYSTEM RELOAD DICTIONARY' = 112, 'SYSTEM RELOAD MODEL' = 113, 'SYSTEM RELOAD FUNCTION' = 114, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 115, 'SYSTEM RELOAD' = 116, 'SYSTEM RESTART DISK' = 117, 'SYSTEM MERGES' = 118, 'SYSTEM TTL MERGES' = 119, 'SYSTEM FETCHES' = 120, 'SYSTEM MOVES' = 121, 'SYSTEM DISTRIBUTED SENDS' = 122, 'SYSTEM REPLICATED SENDS' = 123, 'SYSTEM SENDS' = 124, 'SYSTEM REPLICATION QUEUES' = 125, 'SYSTEM DROP REPLICA' = 126, 'SYSTEM SYNC REPLICA' = 127, 'SYSTEM RESTART REPLICA' = 128, 'SYSTEM RESTORE REPLICA' = 129, 'SYSTEM WAIT LOADING PARTS' = 130, 'SYSTEM SYNC DATABASE REPLICA' = 131, 'SYSTEM SYNC TRANSACTION LOG' = 132, 'SYSTEM SYNC FILE CACHE' = 133, 'SYSTEM FLUSH DISTRIBUTED' = 134, 'SYSTEM FLUSH LOGS' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM' = 140, 'dictGet' = 141, 'displaySecretsInShowAndSelect' = 142, 'addressToLine' = 143, 'addressToLineWithInlines' = 144, 'addressToSymbol' = 145, 'demangle' = 146, 'INTROSPECTION' = 147, 'FILE' = 148, 'URL' = 149, 'REMOTE' = 150, 'MONGO' = 151, 'REDIS' = 152, 'MEILISEARCH' = 153, 'MYSQL' = 154, 'POSTGRES' = 155, 'SQLITE' = 156, 'ODBC' = 157, 'JDBC' = 158, 'HDFS' = 159, 'S3' = 160, 'HIVE' = 161, 'AZURE' = 162, 'SOURCES' = 163, 'CLUSTER' = 164, 'ALL' = 165, 'NONE' = 166)) ) ENGINE = SystemPrivileges COMMENT 'SYSTEM TABLE is built on the fly.' From 0eeee11dc46d462412ad671a7d59006fba59c403 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Thu, 15 Jun 2023 12:36:18 +0000 Subject: [PATCH 061/522] Style fix, add comment --- .../Formats/Impl/CSVRowInputFormat.cpp | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp index dcc057baef2..7cd812bc5b0 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp @@ -149,15 +149,17 @@ void CSVFormatReader::skipFieldDelimiter() skipWhitespacesAndTabs(*buf); bool res = checkChar(format_settings.csv.delimiter, *buf); - if (!res && !format_settings.csv.missing_as_default) + if (!res) { - char err[2] = {format_settings.csv.delimiter, '\0'}; - throwAtAssertionFailed(err, *buf); - } - - if (!res && format_settings.csv.missing_as_default) - { - current_row_has_missing_fields = true; + if (!format_settings.csv.missing_as_default) + { + char err[2] = {format_settings.csv.delimiter, '\0'}; + throwAtAssertionFailed(err, *buf); + } + else + { + current_row_has_missing_fields = true; + } } } @@ -332,6 +334,7 @@ bool CSVFormatReader::readField( if (is_last_file_column && format_settings.csv.ignore_extra_columns) { + // Skip all fields to next line. while (checkChar(format_settings.csv.delimiter, *buf)) { skipField(); From 49f88f5873aa8373c012feefe8a34be2b1902513 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Thu, 15 Jun 2023 14:15:01 +0000 Subject: [PATCH 062/522] Fix ENV variable name --- tests/ci/integration_test_check.py | 2 +- tests/integration/helpers/cluster.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/ci/integration_test_check.py b/tests/ci/integration_test_check.py index 843bbc8b3ee..e6b2203fb65 100644 --- a/tests/ci/integration_test_check.py +++ b/tests/ci/integration_test_check.py @@ -89,7 +89,7 @@ def get_env_for_runner(check_name, build_path, repo_path, result_path, work_path my_env["CLICKHOUSE_TESTS_RUNNER_RESTART_DOCKER"] = "0" if "analyzer" in check_name.lower(): - my_env["USE_NEW_ANALYZER"] = "1" + my_env["CLICKHOUSE_USE_NEW_ANALYZER"] = "1" return my_env diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 8c54a37cf60..a2b57fab628 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -4187,7 +4187,7 @@ class ClickHouseInstance: ) write_embedded_config("0_common_instance_users.xml", users_d_dir) - if os.environ.get("USE_NEW_ANALYZER") is not None: + if os.environ.get("CLICKHOUSE_USE_NEW_ANALYZER") is not None: write_embedded_config("0_common_enable_analyzer.xml", users_d_dir) if len(self.custom_dictionaries_paths): From 02b5b50e41f38bd0f0b67fd2c414579aeb0cd051 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Fri, 16 Jun 2023 12:39:46 +0000 Subject: [PATCH 063/522] Add milli/micro seconds support for date_diff --- .../functions/date-time-functions.md | 2 ++ .../functions/date-time-functions.md | 2 ++ .../functions/date-time-functions.md | 2 ++ src/Functions/DateTimeTransforms.h | 29 +++++++++++++++++++ src/Functions/TransformDateTime64.h | 2 +- src/Functions/dateDiff.cpp | 4 +++ .../02160_special_functions.reference | 8 +++++ .../0_stateless/02160_special_functions.sql | 9 ++++++ 8 files changed, 57 insertions(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 280b41e7a5f..82f1a8aa237 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -782,6 +782,8 @@ Aliases: `dateDiff`, `DATE_DIFF`, `timestampDiff`, `timestamp_diff`, `TIMESTAMP_ - `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md). Possible values: + - `microsecond` (possible abbreviations: `us`, `u`) + - `millisecond` (possible abbreviations: `ms`) - `second` (possible abbreviations: `ss`, `s`) - `minute` (possible abbreviations: `mi`, `n`) - `hour` (possible abbreviations: `hh`, `h`) diff --git a/docs/ru/sql-reference/functions/date-time-functions.md b/docs/ru/sql-reference/functions/date-time-functions.md index 867d71d334c..93ae750b10d 100644 --- a/docs/ru/sql-reference/functions/date-time-functions.md +++ b/docs/ru/sql-reference/functions/date-time-functions.md @@ -680,6 +680,8 @@ date_diff('unit', startdate, enddate, [timezone]) - `unit` — единица измерения времени, в которой будет выражено возвращаемое значение функции. [String](../../sql-reference/data-types/string.md). Возможные значения: + - `microsecond` (возможные сокращения: `us`, `u`) + - `millisecond` (возможные сокращения: `ms`) - `second` (возможные сокращения: `ss`, `s`) - `minute` (возможные сокращения: `mi`, `n`) - `hour` (возможные сокращения: `hh`, `h`) diff --git a/docs/zh/sql-reference/functions/date-time-functions.md b/docs/zh/sql-reference/functions/date-time-functions.md index 53dadc23c6d..e4b70322477 100644 --- a/docs/zh/sql-reference/functions/date-time-functions.md +++ b/docs/zh/sql-reference/functions/date-time-functions.md @@ -643,6 +643,8 @@ date_diff('unit', startdate, enddate, [timezone]) - `unit` — `value`对应的时间单位。类型为[String](../../sql-reference/data-types/string.md)。 可能的值: + - `microsecond` + - `millisecond` - `second` - `minute` - `hour` diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index 84c71c89b11..4d15078f2d7 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -1377,6 +1377,35 @@ struct ToRelativeSecondNumImpl using FactorTransform = ZeroTransform; }; +template +struct ToRelativeSubsecondNumImpl +{ + static constexpr auto name = "toRelativeSubsecondNumImpl"; + + static inline UInt64 execute(const DateTime64 & t, DateTime64::NativeType scale, const DateLUTImpl &) + { + if (scale == second_divider) + return t.value; + if (scale > second_divider) + return t.value / (scale / second_divider); + return t.value * (second_divider / scale); + } + static inline UInt64 execute(UInt32 t, const DateLUTImpl &) + { + return t * second_divider; + } + static inline UInt64 execute(Int32 d, const DateLUTImpl & time_zone) + { + return static_cast(time_zone.fromDayNum(ExtendedDayNum(d))) * second_divider; + } + static inline UInt64 execute(UInt16 d, const DateLUTImpl & time_zone) + { + return static_cast(time_zone.fromDayNum(DayNum(d)) * second_divider); + } + + using FactorTransform = ZeroTransform; +}; + struct ToYYYYMMImpl { static constexpr auto name = "toYYYYMM"; diff --git a/src/Functions/TransformDateTime64.h b/src/Functions/TransformDateTime64.h index 3dab9efeb6b..1a1e732ae40 100644 --- a/src/Functions/TransformDateTime64.h +++ b/src/Functions/TransformDateTime64.h @@ -5,7 +5,7 @@ namespace DB { -/** Tansform-type wrapper for DateTime64, simplifies DateTime64 support for given Transform. +/** Transform-type wrapper for DateTime64, simplifies DateTime64 support for given Transform. * * Depending on what overloads of Transform::execute() are available, when called with DateTime64 value, * invokes Transform::execute() with either: diff --git a/src/Functions/dateDiff.cpp b/src/Functions/dateDiff.cpp index 8361e9db166..62f01274476 100644 --- a/src/Functions/dateDiff.cpp +++ b/src/Functions/dateDiff.cpp @@ -373,6 +373,10 @@ public: impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); else if (unit == "second" || unit == "ss" || unit == "s") impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + else if (unit == "millisecond" || unit == "ms") + impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + else if (unit == "microsecond" || unit == "us" || unit == "u") + impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); else throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function {} does not support '{}' unit", getName(), unit); diff --git a/tests/queries/0_stateless/02160_special_functions.reference b/tests/queries/0_stateless/02160_special_functions.reference index 3a1dcd88902..37278d4b5dc 100644 --- a/tests/queries/0_stateless/02160_special_functions.reference +++ b/tests/queries/0_stateless/02160_special_functions.reference @@ -33,4 +33,12 @@ Hello 2021-01-01 1 1 +86400000 +172800000 +86461000 +86401299 +701 +701 +800 +60200201 1 diff --git a/tests/queries/0_stateless/02160_special_functions.sql b/tests/queries/0_stateless/02160_special_functions.sql index 6d18e7d0d25..6002f793601 100644 --- a/tests/queries/0_stateless/02160_special_functions.sql +++ b/tests/queries/0_stateless/02160_special_functions.sql @@ -41,4 +41,13 @@ SELECT TIMESTAMPSUB(DATE '2022-01-01', INTERVAL 1 YEAR); SELECT DATE_DIFF(YEAR, DATE '2021-01-01', DATE '2022-01-01'); SELECT DATEDIFF(YEAR, DATE '2021-01-01', DATE '2022-01-01'); +SELECT DATEDIFF(millisecond, '2021-01-01'::Date, '2021-01-02'::Date); +SELECT DATEDIFF(millisecond, '2021-01-01'::Date, '2021-01-03'::Date32); +SELECT DATEDIFF(millisecond, '2021-01-01'::Date, '2021-01-02 00:01:01'::DateTime); +SELECT DATEDIFF(millisecond, '2021-01-01'::Date, '2021-01-02 00:00:01.299'::DateTime64); +SELECT DATEDIFF(millisecond, '2021-01-01 23:59:59.299'::DateTime64, '2021-01-02'::Date); +SELECT DATEDIFF(millisecond, '2021-01-01 23:59:59.299999'::DateTime64(6), '2021-01-02'::Date); +SELECT DATEDIFF(millisecond, '2021-01-01 23:59:59.2'::DateTime64(1), '2021-01-02'::Date); +SELECT DATEDIFF(microsecond, '2021-01-01 23:59:59.899999'::DateTime64(6), '2021-01-02 00:01:00.100200300'::DateTime64(9)); + SELECT EXISTS (SELECT 1); From b546d8e665b86429ac44770db7d73dd32b0a7156 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Fri, 16 Jun 2023 15:30:56 +0200 Subject: [PATCH 064/522] review fixes + test --- src/Common/ProfileEvents.cpp | 2 ++ src/Disks/IO/ReadBufferFromRemoteFSGather.h | 2 +- src/IO/ReadBufferFromS3.cpp | 34 +++++++++++++------ src/IO/ReadBufferFromS3.h | 2 ++ src/IO/S3/PocoHTTPClient.cpp | 1 + ...ing_from_s3_with_connection_pool.reference | 1 + ...89_reading_from_s3_with_connection_pool.sh | 29 ++++++++++++++++ 7 files changed, 60 insertions(+), 11 deletions(-) create mode 100644 tests/queries/0_stateless/02789_reading_from_s3_with_connection_pool.reference create mode 100755 tests/queries/0_stateless/02789_reading_from_s3_with_connection_pool.sh diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index f66f7bc6465..c9030070bf2 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -368,6 +368,8 @@ The server successfully detected this situation and will download merged part fr M(ReadBufferFromS3InitMicroseconds, "Time spent initializing connection to S3.") \ M(ReadBufferFromS3Bytes, "Bytes read from S3.") \ M(ReadBufferFromS3RequestsErrors, "Number of exceptions while reading from S3.") \ + M(ReadBufferFromS3ResetSessions, "Number of HTTP sessions that were reset in ReadBufferFromS3.") \ + M(ReadBufferFromS3PreservedSessions, "Number of HTTP sessions that were preserved in ReadBufferFromS3.") \ \ M(WriteBufferFromS3Microseconds, "Time spent on writing to S3.") \ M(WriteBufferFromS3Bytes, "Bytes written to S3.") \ diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.h b/src/Disks/IO/ReadBufferFromRemoteFSGather.h index 9bf55ab69ce..6488d532829 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.h +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.h @@ -73,7 +73,7 @@ private: const std::shared_ptr cache_log; const String query_id; const bool use_external_buffer; - bool with_cache; + const bool with_cache; size_t read_until_position = 0; size_t file_offset_of_buffer_end = 0; diff --git a/src/IO/ReadBufferFromS3.cpp b/src/IO/ReadBufferFromS3.cpp index 364253ba746..0b320ed86ff 100644 --- a/src/IO/ReadBufferFromS3.cpp +++ b/src/IO/ReadBufferFromS3.cpp @@ -1,5 +1,7 @@ -#include "config.h" #include +#include +#include +#include "config.h" #if USE_AWS_S3 @@ -24,6 +26,8 @@ namespace ProfileEvents extern const Event ReadBufferFromS3InitMicroseconds; extern const Event ReadBufferFromS3Bytes; extern const Event ReadBufferFromS3RequestsErrors; + extern const Event ReadBufferFromS3ResetSessions; + extern const Event ReadBufferFromS3PreservedSessions; extern const Event ReadBufferSeekCancelConnection; extern const Event S3GetObject; extern const Event DiskS3GetObject; @@ -46,6 +50,19 @@ void resetSession(Aws::S3::Model::GetObjectResult & read_result) throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Session of unexpected type encountered"); } } + +void resetSessionIfNeeded(bool read_all_range_successfully, std::optional & read_result) +{ + if (!read_all_range_successfully && read_result) + { + /// When we abandon a session with an ongoing GetObject request and there is another one trying to delete the same object this delete + /// operation will hang until GetObject's session idle timeouts. So we have to call `reset()` on GetObject's session session immediately. + resetSession(*read_result); + ProfileEvents::increment(ProfileEvents::ReadBufferFromS3ResetSessions); + } + else + ProfileEvents::increment(ProfileEvents::ReadBufferFromS3PreservedSessions); +} } namespace DB @@ -91,10 +108,7 @@ bool ReadBufferFromS3::nextImpl() if (read_until_position) { if (read_until_position == offset) - { - read_all_range_successfully = true; return false; - } if (read_until_position < offset) throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to read beyond right offset ({} > {})", offset, read_until_position - 1); @@ -384,10 +398,7 @@ ReadBufferFromS3::~ReadBufferFromS3() { try { - if (!read_all_range_successfully && read_result) - /// When we abandon a session with an ongoing GetObject request and there is another one trying to delete the same object this delete - /// operation will hang until GetObject's session idle timeouts. So we have to call `reset()` on GetObject's session session immediately. - resetSession(*read_result); + resetSessionIfNeeded(readAllRangeSuccessfully(), read_result); } catch (...) { @@ -397,8 +408,7 @@ ReadBufferFromS3::~ReadBufferFromS3() std::unique_ptr ReadBufferFromS3::initialize() { - if (!read_all_range_successfully && read_result) - resetSession(*read_result); + resetSessionIfNeeded(readAllRangeSuccessfully(), read_result); read_all_range_successfully = false; /** @@ -463,6 +473,10 @@ Aws::S3::Model::GetObjectResult ReadBufferFromS3::sendRequest(size_t range_begin } } +bool ReadBufferFromS3::readAllRangeSuccessfully() const +{ + return read_until_position ? offset == read_until_position : read_all_range_successfully; +} } #endif diff --git a/src/IO/ReadBufferFromS3.h b/src/IO/ReadBufferFromS3.h index 11299aa2c2a..d58971bea5b 100644 --- a/src/IO/ReadBufferFromS3.h +++ b/src/IO/ReadBufferFromS3.h @@ -95,6 +95,8 @@ private: Aws::S3::Model::GetObjectResult sendRequest(size_t range_begin, std::optional range_end_incl) const; + bool readAllRangeSuccessfully() const; + ReadSettings read_settings; bool use_external_buffer; diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp index 754b1bfd5b8..d64ddf0ec38 100644 --- a/src/IO/S3/PocoHTTPClient.cpp +++ b/src/IO/S3/PocoHTTPClient.cpp @@ -261,6 +261,7 @@ void PocoHTTPClient::makeRequestInternal( Aws::Utils::RateLimits::RateLimiterInterface * readLimiter , Aws::Utils::RateLimits::RateLimiterInterface * writeLimiter) const { + /// Most sessions in pool are already connected and it is not possible to set proxy host/port to a connected session. const auto request_configuration = per_request_configuration(request); if (http_connection_pool_size && request_configuration.proxy_host.empty()) makeRequestInternalImpl(request, request_configuration, response, readLimiter, writeLimiter); diff --git a/tests/queries/0_stateless/02789_reading_from_s3_with_connection_pool.reference b/tests/queries/0_stateless/02789_reading_from_s3_with_connection_pool.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/02789_reading_from_s3_with_connection_pool.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02789_reading_from_s3_with_connection_pool.sh b/tests/queries/0_stateless/02789_reading_from_s3_with_connection_pool.sh new file mode 100755 index 00000000000..7a8b94a10a8 --- /dev/null +++ b/tests/queries/0_stateless/02789_reading_from_s3_with_connection_pool.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-random-settings + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} -nm --query " +DROP TABLE IF EXISTS test_s3; + +CREATE TABLE test_s3 (a UInt64, b UInt64) +ENGINE = MergeTree ORDER BY a +SETTINGS disk = 's3_disk', min_bytes_for_wide_part = 0; + +INSERT INTO test_s3 SELECT number, number FROM numbers_mt(1e7); +" +query="SELECT a, b FROM test_s3" +query_id=$(${CLICKHOUSE_CLIENT} --query "select queryID() from ($query) limit 1" 2>&1) +${CLICKHOUSE_CLIENT} --query "SYSTEM FLUSH LOGS" +${CLICKHOUSE_CLIENT} -nm --query " +WITH + ProfileEvents['ReadBufferFromS3ResetSessions'] AS reset, + ProfileEvents['ReadBufferFromS3PreservedSessions'] AS preserved +SELECT preserved > reset +FROM system.query_log +WHERE type = 'QueryFinish' + AND current_database = currentDatabase() + AND query_id='$query_id'; +" From 76faacd23ca6137feb52741a7217432ab961aea3 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Sun, 18 Jun 2023 14:59:10 +0200 Subject: [PATCH 065/522] try to fix some trash in Disks --- programs/disks/CommandCopy.cpp | 2 +- src/Disks/DiskEncrypted.cpp | 31 +------------------ src/Disks/DiskEncrypted.h | 2 -- src/Disks/DiskLocal.cpp | 19 +----------- src/Disks/DiskLocal.h | 2 -- src/Disks/IDisk.cpp | 18 ++++++----- src/Disks/IDisk.h | 3 -- .../ObjectStorages/DiskObjectStorage.cpp | 16 ++++++---- src/Disks/ObjectStorages/DiskObjectStorage.h | 6 +++- .../MergeTree/DataPartStorageOnDiskBase.cpp | 10 +++--- 10 files changed, 33 insertions(+), 76 deletions(-) diff --git a/programs/disks/CommandCopy.cpp b/programs/disks/CommandCopy.cpp index 1cfce7fc022..5228b582d25 100644 --- a/programs/disks/CommandCopy.cpp +++ b/programs/disks/CommandCopy.cpp @@ -59,7 +59,7 @@ public: String relative_path_from = validatePathAndGetAsRelative(path_from); String relative_path_to = validatePathAndGetAsRelative(path_to); - disk_from->copy(relative_path_from, disk_to, relative_path_to); + disk_from->copyDirectoryContent(relative_path_from, disk_to, relative_path_to); } }; } diff --git a/src/Disks/DiskEncrypted.cpp b/src/Disks/DiskEncrypted.cpp index 6b515b100c9..f24e06fdef0 100644 --- a/src/Disks/DiskEncrypted.cpp +++ b/src/Disks/DiskEncrypted.cpp @@ -310,32 +310,6 @@ ReservationPtr DiskEncrypted::reserve(UInt64 bytes) return std::make_unique(std::static_pointer_cast(shared_from_this()), std::move(reservation)); } -void DiskEncrypted::copy(const String & from_path, const std::shared_ptr & to_disk, const String & to_path) -{ - /// Check if we can copy the file without deciphering. - if (isSameDiskType(*this, *to_disk)) - { - /// Disk type is the same, check if the key is the same too. - if (auto * to_disk_enc = typeid_cast(to_disk.get())) - { - auto from_settings = current_settings.get(); - auto to_settings = to_disk_enc->current_settings.get(); - if (from_settings->all_keys == to_settings->all_keys) - { - /// Keys are the same so we can simply copy the encrypted file. - auto wrapped_from_path = wrappedPath(from_path); - auto to_delegate = to_disk_enc->delegate; - auto wrapped_to_path = to_disk_enc->wrappedPath(to_path); - delegate->copy(wrapped_from_path, to_delegate, wrapped_to_path); - return; - } - } - } - - /// Copy the file through buffers with deciphering. - copyThroughBuffers(from_path, to_disk, to_path); -} - void DiskEncrypted::copyDirectoryContent(const String & from_dir, const std::shared_ptr & to_disk, const String & to_dir) { @@ -359,11 +333,8 @@ void DiskEncrypted::copyDirectoryContent(const String & from_dir, const std::sha } } - if (!to_disk->exists(to_dir)) - to_disk->createDirectories(to_dir); - /// Copy the file through buffers with deciphering. - copyThroughBuffers(from_dir, to_disk, to_dir); + IDisk::copyDirectoryContent(from_dir, to_disk, to_dir); } std::unique_ptr DiskEncrypted::readFile( diff --git a/src/Disks/DiskEncrypted.h b/src/Disks/DiskEncrypted.h index 69d051a9537..e085409cedf 100644 --- a/src/Disks/DiskEncrypted.h +++ b/src/Disks/DiskEncrypted.h @@ -110,8 +110,6 @@ public: delegate->listFiles(wrapped_path, file_names); } - void copy(const String & from_path, const std::shared_ptr & to_disk, const String & to_path) override; - void copyDirectoryContent(const String & from_dir, const std::shared_ptr & to_disk, const String & to_dir) override; std::unique_ptr readFile( diff --git a/src/Disks/DiskLocal.cpp b/src/Disks/DiskLocal.cpp index c76ea289101..72c9ccafc8d 100644 --- a/src/Disks/DiskLocal.cpp +++ b/src/Disks/DiskLocal.cpp @@ -417,29 +417,12 @@ bool inline isSameDiskType(const IDisk & one, const IDisk & another) return typeid(one) == typeid(another); } -void DiskLocal::copy(const String & from_path, const std::shared_ptr & to_disk, const String & to_path) -{ - if (isSameDiskType(*this, *to_disk)) - { - fs::path to = fs::path(to_disk->getPath()) / to_path; - fs::path from = fs::path(disk_path) / from_path; - if (from_path.ends_with('/')) - from = from.parent_path(); - if (fs::is_directory(from)) - to /= from.filename(); - - fs::copy(from, to, fs::copy_options::recursive | fs::copy_options::overwrite_existing); /// Use more optimal way. - } - else - copyThroughBuffers(from_path, to_disk, to_path, /* copy_root_dir */ true); /// Base implementation. -} - void DiskLocal::copyDirectoryContent(const String & from_dir, const std::shared_ptr & to_disk, const String & to_dir) { if (isSameDiskType(*this, *to_disk)) fs::copy(from_dir, to_dir, fs::copy_options::recursive | fs::copy_options::overwrite_existing); /// Use more optimal way. else - copyThroughBuffers(from_dir, to_disk, to_dir, /* copy_root_dir */ false); /// Base implementation. + IDisk::copyDirectoryContent(from_dir, to_disk, to_dir); } SyncGuardPtr DiskLocal::getDirectorySyncGuard(const String & path) const diff --git a/src/Disks/DiskLocal.h b/src/Disks/DiskLocal.h index 3d340ae40b7..1eee58900bf 100644 --- a/src/Disks/DiskLocal.h +++ b/src/Disks/DiskLocal.h @@ -63,8 +63,6 @@ public: void replaceFile(const String & from_path, const String & to_path) override; - void copy(const String & from_path, const std::shared_ptr & to_disk, const String & to_path) override; - void copyDirectoryContent(const String & from_dir, const std::shared_ptr & to_disk, const String & to_dir) override; void listFiles(const String & path, std::vector & file_names) const override; diff --git a/src/Disks/IDisk.cpp b/src/Disks/IDisk.cpp index bca867fec76..46a2c5b30c6 100644 --- a/src/Disks/IDisk.cpp +++ b/src/Disks/IDisk.cpp @@ -85,9 +85,16 @@ void asyncCopy(IDisk & from_disk, String from_path, IDisk & to_disk, String to_p if (from_disk.isFile(from_path)) { auto result = exec.execute( - [&from_disk, from_path, &to_disk, to_path, &settings]() + [&from_disk, from_path, &to_disk, to_path, &settings, thread_group = CurrentThread::getGroup()]() { - setThreadName("DiskCopier"); + SCOPE_EXIT_SAFE( + if (thread_group) + CurrentThread::detachFromGroupIfNotDetached(); + ); + + if (thread_group) + CurrentThread::attachToGroup(thread_group); + from_disk.copyFile(from_path, to_disk, fs::path(to_path) / fileName(from_path), settings); }); @@ -126,18 +133,13 @@ void IDisk::copyThroughBuffers(const String & from_path, const std::shared_ptr & to_disk, const String & to_path) -{ - copyThroughBuffers(from_path, to_disk, to_path, true); -} - void IDisk::copyDirectoryContent(const String & from_dir, const std::shared_ptr & to_disk, const String & to_dir) { if (!to_disk->exists(to_dir)) to_disk->createDirectories(to_dir); - copyThroughBuffers(from_dir, to_disk, to_dir, false); + copyThroughBuffers(from_dir, to_disk, to_dir, /* copy_root_dir */ false); } void IDisk::truncateFile(const String &, size_t) diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h index 5d75f3b70e5..8a4a29c36fd 100644 --- a/src/Disks/IDisk.h +++ b/src/Disks/IDisk.h @@ -181,9 +181,6 @@ public: /// If a file with `to_path` path already exists, it will be replaced. virtual void replaceFile(const String & from_path, const String & to_path) = 0; - /// Recursively copy data containing at `from_path` to `to_path` located at `to_disk`. - virtual void copy(const String & from_path, const std::shared_ptr & to_disk, const String & to_path); - /// Recursively copy files from from_dir to to_dir. Create to_dir if not exists. virtual void copyDirectoryContent(const String & from_dir, const std::shared_ptr & to_disk, const String & to_dir); diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp index 005d115a277..3fae67e2e9d 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp @@ -235,19 +235,23 @@ void DiskObjectStorage::moveFile(const String & from_path, const String & to_pat transaction->commit(); } - -void DiskObjectStorage::copy(const String & from_path, const std::shared_ptr & to_disk, const String & to_path) +void DiskObjectStorage::copyFile( + const String & from_file_path, + IDisk & to_disk, + const String & to_file_path, + const WriteSettings & settings) { - /// It's the same object storage disk - if (this == to_disk.get()) + if (this == &to_disk) { + /// It may use s3-server-side copy auto transaction = createObjectStorageTransaction(); - transaction->copyFile(from_path, to_path); + transaction->copyFile(from_file_path, to_file_path); transaction->commit(); } else { - IDisk::copy(from_path, to_disk, to_path); + /// Copy through buffers + IDisk::copyFile(from_file_path, to_disk, to_file_path, settings); } } diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.h b/src/Disks/ObjectStorages/DiskObjectStorage.h index b7dfaf67cf2..b6e4252749a 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.h +++ b/src/Disks/ObjectStorages/DiskObjectStorage.h @@ -152,7 +152,11 @@ public: Strings getBlobPath(const String & path) const override; void writeFileUsingBlobWritingFunction(const String & path, WriteMode mode, WriteBlobFunction && write_blob_function) override; - void copy(const String & from_path, const std::shared_ptr & to_disk, const String & to_path) override; + void copyFile( /// NOLINT + const String & from_file_path, + IDisk & to_disk, + const String & to_file_path, + const WriteSettings & settings = {}) override; void applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context_, const String &, const DisksMap &) override; diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp index 30776a8bc50..545cb062fb7 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp +++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp @@ -456,18 +456,18 @@ MutableDataPartStoragePtr DataPartStorageOnDiskBase::clonePart( const std::string & to, const std::string & dir_path, const DiskPtr & disk, - Poco::Logger * log) const + Poco::Logger *) const { String path_to_clone = fs::path(to) / dir_path / ""; if (disk->exists(path_to_clone)) { - LOG_WARNING(log, "Path {} already exists. Will remove it and clone again.", fullPath(disk, path_to_clone)); - disk->removeRecursive(path_to_clone); + throw Exception(ErrorCodes::DIRECTORY_ALREADY_EXISTS, + "Cannot clone part {} from '{}' to '{}': path '{}' already exists", + dir_path, getRelativePath(), path_to_clone, fullPath(disk, path_to_clone)); } - disk->createDirectories(to); - volume->getDisk()->copy(getRelativePath(), disk, to); + volume->getDisk()->copyDirectoryContent(getRelativePath(), disk, path_to_clone); volume->getDisk()->removeFileIfExists(fs::path(path_to_clone) / "delete-on-destroy.txt"); auto single_disk_volume = std::make_shared(disk->getName(), disk, 0); From 25948cdd9e6ba2523cd293a66ac83f273c953a9d Mon Sep 17 00:00:00 2001 From: Alexey Gerasimchuck Date: Mon, 19 Jun 2023 03:57:38 +0000 Subject: [PATCH 066/522] Added ASK_PASSWORD client constant --- programs/client/Client.cpp | 7 +++---- src/Client/ConnectionParameters.cpp | 3 +-- src/Client/ConnectionParameters.h | 4 ++++ src/Client/ConnectionString.cpp | 5 +++-- 4 files changed, 11 insertions(+), 8 deletions(-) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 54b091700b2..506a3bdf895 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -1394,10 +1394,9 @@ void Client::readArguments( else if (arg == "--password" && ((arg_num + 1) >= argc || std::string_view(argv[arg_num + 1]).starts_with('-'))) { common_arguments.emplace_back(arg); - /// No password was provided by user. Add '\n' as implicit password, - /// which encodes that client should ask user for the password. - /// '\n' is used because there is hardly a chance that a user would use '\n' as a password. - common_arguments.emplace_back("\n"); + /// if the value of --password is omitted, the password will be asked before + /// connection start + common_arguments.emplace_back(ConnectionParameters::ASK_PASSWORD); } else common_arguments.emplace_back(arg); diff --git a/src/Client/ConnectionParameters.cpp b/src/Client/ConnectionParameters.cpp index c47d217d432..18585ab7610 100644 --- a/src/Client/ConnectionParameters.cpp +++ b/src/Client/ConnectionParameters.cpp @@ -46,8 +46,7 @@ ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfigurati else { password = config.getString("password", ""); - /// if the value of --password is omitted, the password will be set implicitly to "\n" - if (password == "\n") + if (password == ASK_PASSWORD) password_prompt = true; } if (password_prompt) diff --git a/src/Client/ConnectionParameters.h b/src/Client/ConnectionParameters.h index 0ccd6b92290..86149471983 100644 --- a/src/Client/ConnectionParameters.h +++ b/src/Client/ConnectionParameters.h @@ -28,6 +28,10 @@ struct ConnectionParameters ConnectionParameters(const Poco::Util::AbstractConfiguration & config, std::string host, std::optional port); static UInt16 getPortFromConfig(const Poco::Util::AbstractConfiguration & config); + + /// Ask to enter the user's password if password option contains this value. + /// "\n" is used because there is hardly a chance that a user would use '\n' as password. + static constexpr std::string_view ASK_PASSWORD = "\n"; }; } diff --git a/src/Client/ConnectionString.cpp b/src/Client/ConnectionString.cpp index f4a4e73c198..8150ae98c8b 100644 --- a/src/Client/ConnectionString.cpp +++ b/src/Client/ConnectionString.cpp @@ -1,6 +1,7 @@ #include "ConnectionString.h" #include +#include #include #include @@ -201,8 +202,8 @@ bool tryParseConnectionString( else { // in case of user_info == 'user:', ':' is specified, but password is empty - // then add password argument "\n" which means: Ask user for a password. - common_arguments.push_back("\n"); + // then ask user for a password. + common_arguments.emplace_back(ConnectionParameters::ASK_PASSWORD); } } else From dd43a186adca8b4480e9287127c740f6b05d743d Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Mon, 19 Jun 2023 09:51:29 +0000 Subject: [PATCH 067/522] Minor edit docs / add int256 test --- docs/en/interfaces/formats.md | 2 +- docs/en/operations/settings/settings-formats.md | 2 +- src/Core/Settings.h | 2 +- src/Processors/Formats/Impl/CSVRowInputFormat.cpp | 2 +- tests/queries/0_stateless/00301_csv.reference | 10 +++++----- tests/queries/0_stateless/00301_csv.sh | 10 +++++----- 6 files changed, 14 insertions(+), 14 deletions(-) diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 7a900ecd869..c5cd19f1743 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -471,7 +471,7 @@ The CSV format supports the output of totals and extremes the same way as `TabSe - [input_format_csv_skip_trailing_empty_lines](/docs/en/operations/settings/settings-formats.md/#input_format_csv_skip_trailing_empty_lines) - skip trailing empty lines at the end of data. Default value - `false`. - [input_format_csv_trim_whitespaces](/docs/en/operations/settings/settings-formats.md/#input_format_csv_trim_whitespaces) - trim spaces and tabs in non-quoted CSV strings. Default value - `true`. - [input_format_csv_allow_whitespace_or_tab_as_delimiter](/docs/en/operations/settings/settings-formats.md/# input_format_csv_allow_whitespace_or_tab_as_delimiter) - Allow to use whitespace or tab as field delimiter in CSV strings. Default value - `false`. -- [input_format_csv_ignore_extra_columns](/docs/en/operations/settings/settings-formats.md/#input_format_csv_ignore_extra_columns) - ignore extra columns in CSV input (if your file has more columns than expected). Default value - `false`. +- [input_format_csv_ignore_extra_columns](/docs/en/operations/settings/settings-formats.md/#input_format_csv_ignore_extra_columns) - ignore extra columns in CSV input (if file has more columns than expected). Default value - `false`. - [input_format_csv_missing_as_default](/docs/en/operations/settings/settings-formats.md/#input_format_csv_missing_as_default) - treat missing fields in CSV input as default values. Default value - `false`. ## CSVWithNames {#csvwithnames} diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md index c17a24abccf..6b05f41666c 100644 --- a/docs/en/operations/settings/settings-formats.md +++ b/docs/en/operations/settings/settings-formats.md @@ -933,7 +933,7 @@ Result ``` ### input_format_csv_ignore_extra_columns {#input_format_csv_ignore_extra_columns} -Ignore extra columns in CSV input (if your file has more columns than expected). +Ignore extra columns in CSV input (if file has more columns than expected). Disabled by default. diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 76bb7ae9206..e60d2df933f 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -1005,7 +1005,7 @@ class IColumn; M(Bool, regexp_dict_allow_hyperscan, true, "Allow regexp_tree dictionary using Hyperscan library.", 0) \ \ M(Bool, dictionary_use_async_executor, false, "Execute a pipeline for reading from a dictionary with several threads. It's supported only by DIRECT dictionary with CLICKHOUSE source.", 0) \ - M(Bool, input_format_csv_ignore_extra_columns, false, "Ignore extra columns in CSV input (if your file has more columns than expected)", 0) \ + M(Bool, input_format_csv_ignore_extra_columns, false, "Ignore extra columns in CSV input (if file has more columns than expected)", 0) \ M(Bool, input_format_csv_missing_as_default, false, "Treat missing fields in CSV input as default values", 0) \ // End of FORMAT_FACTORY_SETTINGS diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp index c80887bde0a..a727a5bc490 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp @@ -346,7 +346,7 @@ bool CSVFormatReader::readField( while (checkChar(format_settings.csv.delimiter, *buf)) { skipField(); - skipWhitespacesAndTabs(*buf); + skipWhitespacesAndTabs(*buf, format_settings.csv.allow_whitespace_or_tab_as_delimiter); } } return res; diff --git a/tests/queries/0_stateless/00301_csv.reference b/tests/queries/0_stateless/00301_csv.reference index fa85fd924e1..e3441a3a4b3 100644 --- a/tests/queries/0_stateless/00301_csv.reference +++ b/tests/queries/0_stateless/00301_csv.reference @@ -22,8 +22,8 @@ Hello 4 String4 Hello 5 String5 Hello 6 String6 === Test input_format_csv_missing_as_default -Hello 0 33 \N 55 Default -Hello 0 33 \N 55 Default -Hello 1 2 \N 55 Default -Hello 1 2 3 4 String -Hello 1 2 3 4 String +Hello 0 0 33 \N 55 Default +Hello 0 0 33 \N 55 Default +Hello 1 3 2 \N 55 Default +Hello 1 4 2 3 4 String +Hello 1 5 2 3 4 String diff --git a/tests/queries/0_stateless/00301_csv.sh b/tests/queries/0_stateless/00301_csv.sh index 887a75b0ded..4555e0476d8 100755 --- a/tests/queries/0_stateless/00301_csv.sh +++ b/tests/queries/0_stateless/00301_csv.sh @@ -56,12 +56,12 @@ $CLICKHOUSE_CLIENT --query="DROP TABLE csv"; echo === Test input_format_csv_missing_as_default -$CLICKHOUSE_CLIENT --query="CREATE TABLE csv (f1 String, f2 UInt64, f3 UInt64 Default 33, f4 Nullable(UInt64), f5 Nullable(UInt64) Default 55, f6 String DEFAULT 'Default') ENGINE = Memory"; +$CLICKHOUSE_CLIENT --query="CREATE TABLE csv (f1 String, f2 UInt64, f3 UInt256, f4 UInt64 Default 33, f5 Nullable(UInt64), f6 Nullable(UInt64) Default 55, f7 String DEFAULT 'Default') ENGINE = Memory"; echo 'Hello Hello, -Hello, 1, 2 -Hello, 1, 2, 3, 4, String -Hello, 1, 2, 3, 4, String,'| $CLICKHOUSE_CLIENT --input_format_defaults_for_omitted_fields=1 --input_format_csv_empty_as_default=1 --input_format_csv_missing_as_default=1 --query="INSERT INTO csv FORMAT CSV"; -$CLICKHOUSE_CLIENT --query="SELECT * FROM csv ORDER BY f1, f2, f3, f4 NULLS FIRST, f5, f6"; +Hello, 1, 3, 2 +Hello, 1, 4, 2, 3, 4, String +Hello, 1, 5, 2, 3, 4, String,'| $CLICKHOUSE_CLIENT --input_format_defaults_for_omitted_fields=1 --input_format_csv_missing_as_default=1 --query="INSERT INTO csv FORMAT CSV"; +$CLICKHOUSE_CLIENT --query="SELECT * FROM csv ORDER BY f1, f2, f3, f4, f5 NULLS FIRST, f6, f7"; $CLICKHOUSE_CLIENT --query="DROP TABLE csv"; From f81401db99e194c4a8d231a38918da53bd221e90 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Mon, 19 Jun 2023 10:48:38 +0000 Subject: [PATCH 068/522] Add empty line test --- src/Processors/Formats/Impl/CSVRowInputFormat.h | 2 ++ tests/queries/0_stateless/00301_csv.reference | 2 ++ tests/queries/0_stateless/00301_csv.sh | 4 +++- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.h b/src/Processors/Formats/Impl/CSVRowInputFormat.h index 3958c66bbc6..82e03c453e7 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.h +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.h @@ -89,6 +89,8 @@ public: protected: PeekableReadBuffer * buf; + +private: bool current_row_has_missing_fields = false; }; diff --git a/tests/queries/0_stateless/00301_csv.reference b/tests/queries/0_stateless/00301_csv.reference index e3441a3a4b3..140bbda84e7 100644 --- a/tests/queries/0_stateless/00301_csv.reference +++ b/tests/queries/0_stateless/00301_csv.reference @@ -22,6 +22,8 @@ Hello 4 String4 Hello 5 String5 Hello 6 String6 === Test input_format_csv_missing_as_default + 0 0 33 \N 55 Default + 0 0 33 \N 55 Default Hello 0 0 33 \N 55 Default Hello 0 0 33 \N 55 Default Hello 1 3 2 \N 55 Default diff --git a/tests/queries/0_stateless/00301_csv.sh b/tests/queries/0_stateless/00301_csv.sh index 4555e0476d8..aa019147bab 100755 --- a/tests/queries/0_stateless/00301_csv.sh +++ b/tests/queries/0_stateless/00301_csv.sh @@ -58,7 +58,9 @@ $CLICKHOUSE_CLIENT --query="DROP TABLE csv"; echo === Test input_format_csv_missing_as_default $CLICKHOUSE_CLIENT --query="CREATE TABLE csv (f1 String, f2 UInt64, f3 UInt256, f4 UInt64 Default 33, f5 Nullable(UInt64), f6 Nullable(UInt64) Default 55, f7 String DEFAULT 'Default') ENGINE = Memory"; -echo 'Hello +echo ' +, +Hello Hello, Hello, 1, 3, 2 Hello, 1, 4, 2, 3, 4, String From a2833b206032a613b23ed503ebd983efc8d1dc53 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 19 Jun 2023 19:41:17 +0200 Subject: [PATCH 069/522] remove AsyncTrashPoolExecutor --- src/Common/CurrentMetrics.cpp | 2 + src/Disks/DiskEncrypted.cpp | 16 ++-- src/Disks/DiskEncrypted.h | 5 +- src/Disks/DiskLocal.cpp | 17 ++-- src/Disks/DiskLocal.h | 6 +- src/Disks/DiskSelector.cpp | 2 +- src/Disks/Executor.h | 42 ---------- src/Disks/IDisk.cpp | 25 +++--- src/Disks/IDisk.h | 19 +++-- .../registerDiskAzureBlobStorage.cpp | 7 +- .../ObjectStorages/DiskObjectStorage.cpp | 78 +++---------------- src/Disks/ObjectStorages/DiskObjectStorage.h | 7 +- .../DiskObjectStorageCommon.cpp | 2 +- ...jectStorageRemoteMetadataRestoreHelper.cpp | 53 ++++++------- ...ObjectStorageRemoteMetadataRestoreHelper.h | 2 +- .../ObjectStorages/HDFS/registerDiskHDFS.cpp | 5 +- .../Local/registerLocalObjectStorage.cpp | 2 +- .../ObjectStorages/S3/registerDiskS3.cpp | 7 +- .../Web/registerDiskWebServer.cpp | 4 +- src/Disks/loadLocalDiskConfig.cpp | 2 +- src/Interpreters/Context.cpp | 8 +- src/Storages/HDFS/StorageHDFS.cpp | 4 +- .../MergeTree/DataPartStorageOnDiskBase.cpp | 1 + 23 files changed, 112 insertions(+), 204 deletions(-) delete mode 100644 src/Disks/Executor.h diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp index f2ddb7a84c0..c54541d6785 100644 --- a/src/Common/CurrentMetrics.cpp +++ b/src/Common/CurrentMetrics.cpp @@ -141,6 +141,8 @@ M(MergeTreeOutdatedPartsLoaderThreadsActive, "Number of active threads in the threadpool for loading Outdated data parts.") \ M(MergeTreePartsCleanerThreads, "Number of threads in the MergeTree parts cleaner thread pool.") \ M(MergeTreePartsCleanerThreadsActive, "Number of threads in the MergeTree parts cleaner thread pool running a task.") \ + M(IDiskCopierThreads, "Number of threads for copying data between disks of different types.") \ + M(IDiskCopierThreadsActive, "Number of threads for copying data between disks of different types running a task.") \ M(SystemReplicasThreads, "Number of threads in the system.replicas thread pool.") \ M(SystemReplicasThreadsActive, "Number of threads in the system.replicas thread pool running a task.") \ M(RestartReplicaThreads, "Number of threads in the RESTART REPLICA thread pool.") \ diff --git a/src/Disks/DiskEncrypted.cpp b/src/Disks/DiskEncrypted.cpp index f24e06fdef0..bdc66ace7b3 100644 --- a/src/Disks/DiskEncrypted.cpp +++ b/src/Disks/DiskEncrypted.cpp @@ -285,19 +285,20 @@ private: }; DiskEncrypted::DiskEncrypted( - const String & name_, const Poco::Util::AbstractConfiguration & config_, const String & config_prefix_, const DisksMap & map_, bool use_fake_transaction_) - : DiskEncrypted(name_, parseDiskEncryptedSettings(name_, config_, config_prefix_, map_), use_fake_transaction_) + const String & name_, const Poco::Util::AbstractConfiguration & config_, const String & config_prefix_, const DisksMap & map_) + : DiskEncrypted(name_, parseDiskEncryptedSettings(name_, config_, config_prefix_, map_), config_, config_prefix_) { } -DiskEncrypted::DiskEncrypted(const String & name_, std::unique_ptr settings_, bool use_fake_transaction_) - : IDisk(name_) +DiskEncrypted::DiskEncrypted(const String & name_, std::unique_ptr settings_, + const Poco::Util::AbstractConfiguration & config_, const String & config_prefix_) + : IDisk(name_, config_, config_prefix_) , delegate(settings_->wrapped_disk) , encrypted_name(name_) , disk_path(settings_->disk_path) , disk_absolute_path(settings_->wrapped_disk->getPath() + settings_->disk_path) , current_settings(std::move(settings_)) - , use_fake_transaction(use_fake_transaction_) + , use_fake_transaction(config_.getBool(config_prefix_ + ".use_fake_transaction", true)) { delegate->createDirectories(disk_path); } @@ -414,7 +415,7 @@ std::unordered_map DiskEncrypted::getSerializedMetadata(const st void DiskEncrypted::applyNewSettings( const Poco::Util::AbstractConfiguration & config, - ContextPtr /*context*/, + ContextPtr context, const String & config_prefix, const DisksMap & disk_map) { @@ -426,6 +427,7 @@ void DiskEncrypted::applyNewSettings( throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Сhanging disk path on the fly is not supported. Disk {}", name); current_settings.set(std::move(new_settings)); + IDisk::applyNewSettings(config, context, config_prefix, disk_map); } void registerDiskEncrypted(DiskFactory & factory, bool global_skip_access_check) @@ -438,7 +440,7 @@ void registerDiskEncrypted(DiskFactory & factory, bool global_skip_access_check) const DisksMap & map) -> DiskPtr { bool skip_access_check = global_skip_access_check || config.getBool(config_prefix + ".skip_access_check", false); - DiskPtr disk = std::make_shared(name, config, config_prefix, map, config.getBool(config_prefix + ".use_fake_transaction", true)); + DiskPtr disk = std::make_shared(name, config, config_prefix, map); disk->startup(context, skip_access_check); return disk; }; diff --git a/src/Disks/DiskEncrypted.h b/src/Disks/DiskEncrypted.h index e085409cedf..ab5b7425f69 100644 --- a/src/Disks/DiskEncrypted.h +++ b/src/Disks/DiskEncrypted.h @@ -21,8 +21,9 @@ class WriteBufferFromFileBase; class DiskEncrypted : public IDisk { public: - DiskEncrypted(const String & name_, const Poco::Util::AbstractConfiguration & config_, const String & config_prefix_, const DisksMap & map_, bool use_fake_transaction_); - DiskEncrypted(const String & name_, std::unique_ptr settings_, bool use_fake_transaction_); + DiskEncrypted(const String & name_, const Poco::Util::AbstractConfiguration & config_, const String & config_prefix_, const DisksMap & map_); + DiskEncrypted(const String & name_, std::unique_ptr settings_, + const Poco::Util::AbstractConfiguration & config_, const String & config_prefix_); const String & getName() const override { return encrypted_name; } const String & getPath() const override { return disk_absolute_path; } diff --git a/src/Disks/DiskLocal.cpp b/src/Disks/DiskLocal.cpp index 72c9ccafc8d..504e35abac7 100644 --- a/src/Disks/DiskLocal.cpp +++ b/src/Disks/DiskLocal.cpp @@ -431,7 +431,7 @@ SyncGuardPtr DiskLocal::getDirectorySyncGuard(const String & path) const } -void DiskLocal::applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context, const String & config_prefix, const DisksMap &) +void DiskLocal::applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context, const String & config_prefix, const DisksMap & disk_map) { String new_disk_path; UInt64 new_keep_free_space_bytes; @@ -443,10 +443,13 @@ void DiskLocal::applyNewSettings(const Poco::Util::AbstractConfiguration & confi if (keep_free_space_bytes != new_keep_free_space_bytes) keep_free_space_bytes = new_keep_free_space_bytes; + + IDisk::applyNewSettings(config, context, config_prefix, disk_map); } -DiskLocal::DiskLocal(const String & name_, const String & path_, UInt64 keep_free_space_bytes_) - : IDisk(name_) +DiskLocal::DiskLocal(const String & name_, const String & path_, UInt64 keep_free_space_bytes_, + const Poco::Util::AbstractConfiguration & config, const String & config_prefix) + : IDisk(name_, config, config_prefix) , disk_path(path_) , keep_free_space_bytes(keep_free_space_bytes_) , logger(&Poco::Logger::get("DiskLocal")) @@ -455,9 +458,11 @@ DiskLocal::DiskLocal(const String & name_, const String & path_, UInt64 keep_fre } DiskLocal::DiskLocal( - const String & name_, const String & path_, UInt64 keep_free_space_bytes_, ContextPtr context, UInt64 local_disk_check_period_ms) - : DiskLocal(name_, path_, keep_free_space_bytes_) + const String & name_, const String & path_, UInt64 keep_free_space_bytes_, ContextPtr context, + const Poco::Util::AbstractConfiguration & config, const String & config_prefix) + : DiskLocal(name_, path_, keep_free_space_bytes_, config, config_prefix) { + auto local_disk_check_period_ms = config.getUInt("local_disk_check_period_ms", 0); if (local_disk_check_period_ms > 0) disk_checker = std::make_unique(this, context, local_disk_check_period_ms); } @@ -703,7 +708,7 @@ void registerDiskLocal(DiskFactory & factory, bool global_skip_access_check) bool skip_access_check = global_skip_access_check || config.getBool(config_prefix + ".skip_access_check", false); std::shared_ptr disk - = std::make_shared(name, path, keep_free_space_bytes, context, config.getUInt("local_disk_check_period_ms", 0)); + = std::make_shared(name, path, keep_free_space_bytes, context, config, config_prefix); disk->startup(context, skip_access_check); return disk; }; diff --git a/src/Disks/DiskLocal.h b/src/Disks/DiskLocal.h index 1eee58900bf..2306deeb619 100644 --- a/src/Disks/DiskLocal.h +++ b/src/Disks/DiskLocal.h @@ -19,13 +19,15 @@ public: friend class DiskLocalCheckThread; friend class DiskLocalReservation; - DiskLocal(const String & name_, const String & path_, UInt64 keep_free_space_bytes_); + DiskLocal(const String & name_, const String & path_, UInt64 keep_free_space_bytes_, + const Poco::Util::AbstractConfiguration & config, const String & config_prefix); DiskLocal( const String & name_, const String & path_, UInt64 keep_free_space_bytes_, ContextPtr context, - UInt64 local_disk_check_period_ms); + const Poco::Util::AbstractConfiguration & config, + const String & config_prefix); const String & getPath() const override { return disk_path; } diff --git a/src/Disks/DiskSelector.cpp b/src/Disks/DiskSelector.cpp index 9894e4251a2..e51f79867b5 100644 --- a/src/Disks/DiskSelector.cpp +++ b/src/Disks/DiskSelector.cpp @@ -53,7 +53,7 @@ void DiskSelector::initialize(const Poco::Util::AbstractConfiguration & config, disks.emplace( default_disk_name, std::make_shared( - default_disk_name, context->getPath(), 0, context, config.getUInt("local_disk_check_period_ms", 0))); + default_disk_name, context->getPath(), 0, context, config, config_prefix)); } is_initialized = true; diff --git a/src/Disks/Executor.h b/src/Disks/Executor.h deleted file mode 100644 index 7330bcdd559..00000000000 --- a/src/Disks/Executor.h +++ /dev/null @@ -1,42 +0,0 @@ -#pragma once - -#include -#include - -namespace DB -{ - -/// Interface to run task asynchronously with possibility to wait for execution. -class Executor -{ -public: - virtual ~Executor() = default; - virtual std::future execute(std::function task) = 0; -}; - -/// Executes task synchronously in case when disk doesn't support async operations. -class SyncExecutor : public Executor -{ -public: - SyncExecutor() = default; - std::future execute(std::function task) override - { - auto promise = std::make_shared>(); - try - { - task(); - promise->set_value(); - } - catch (...) - { - try - { - promise->set_exception(std::current_exception()); - } - catch (...) { } - } - return promise->get_future(); - } -}; - -} diff --git a/src/Disks/IDisk.cpp b/src/Disks/IDisk.cpp index 46a2c5b30c6..de61218d5a6 100644 --- a/src/Disks/IDisk.cpp +++ b/src/Disks/IDisk.cpp @@ -1,5 +1,4 @@ #include "IDisk.h" -#include "Disks/Executor.h" #include #include #include @@ -80,12 +79,15 @@ UInt128 IDisk::getEncryptedFileIV(const String &) const using ResultsCollector = std::vector>; -void asyncCopy(IDisk & from_disk, String from_path, IDisk & to_disk, String to_path, Executor & exec, ResultsCollector & results, bool copy_root_dir, const WriteSettings & settings) +void asyncCopy(IDisk & from_disk, String from_path, IDisk & to_disk, String to_path, ThreadPool & pool, ResultsCollector & results, bool copy_root_dir, const WriteSettings & settings) { if (from_disk.isFile(from_path)) { - auto result = exec.execute( - [&from_disk, from_path, &to_disk, to_path, &settings, thread_group = CurrentThread::getGroup()]() + auto promise = std::make_shared>(); + auto future = promise->get_future(); + + pool.scheduleOrThrowOnError( + [&from_disk, from_path, &to_disk, to_path, &settings, promise, thread_group = CurrentThread::getGroup()]() { SCOPE_EXIT_SAFE( if (thread_group) @@ -96,9 +98,10 @@ void asyncCopy(IDisk & from_disk, String from_path, IDisk & to_disk, String to_p CurrentThread::attachToGroup(thread_group); from_disk.copyFile(from_path, to_disk, fs::path(to_path) / fileName(from_path), settings); + promise->set_value(); }); - results.push_back(std::move(result)); + results.push_back(std::move(future)); } else { @@ -111,13 +114,12 @@ void asyncCopy(IDisk & from_disk, String from_path, IDisk & to_disk, String to_p } for (auto it = from_disk.iterateDirectory(from_path); it->isValid(); it->next()) - asyncCopy(from_disk, it->path(), to_disk, dest, exec, results, true, settings); + asyncCopy(from_disk, it->path(), to_disk, dest, pool, results, true, settings); } } void IDisk::copyThroughBuffers(const String & from_path, const std::shared_ptr & to_disk, const String & to_path, bool copy_root_dir) { - auto & exec = to_disk->getExecutor(); ResultsCollector results; WriteSettings settings; @@ -125,10 +127,8 @@ void IDisk::copyThroughBuffers(const String & from_path, const std::shared_ptr #include #include -#include +#include #include #include #include @@ -35,6 +35,12 @@ namespace Poco } } +namespace CurrentMetrics +{ + extern const Metric IDiskCopierThreads; + extern const Metric IDiskCopierThreadsActive; +} + namespace DB { @@ -110,9 +116,9 @@ class IDisk : public Space { public: /// Default constructor. - explicit IDisk(const String & name_, std::shared_ptr executor_ = std::make_shared()) + explicit IDisk(const String & name_, const Poco::Util::AbstractConfiguration & config, const String & config_prefix) : name(name_) - , executor(executor_) + , copying_thread_pool(CurrentMetrics::IDiskCopierThreads, CurrentMetrics::IDiskCopierThreadsActive, config.getUInt(config_prefix + ".thread_pool_size", 16)) { } @@ -376,7 +382,7 @@ public: virtual SyncGuardPtr getDirectorySyncGuard(const String & path) const; /// Applies new settings for disk in runtime. - virtual void applyNewSettings(const Poco::Util::AbstractConfiguration &, ContextPtr, const String &, const DisksMap &) {} + virtual void applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context, const String & config_prefix, const DisksMap & map); /// Quite leaky abstraction. Some disks can use additional disk to store /// some parts of metadata. In general case we have only one disk itself and @@ -456,9 +462,6 @@ protected: const String name; - /// Returns executor to perform asynchronous operations. - virtual Executor & getExecutor() { return *executor; } - /// Base implementation of the function copy(). /// It just opens two files, reads data by portions from the first file, and writes it to the second one. /// A derived class may override copy() to provide a faster implementation. @@ -467,7 +470,7 @@ protected: virtual void checkAccessImpl(const String & path); private: - std::shared_ptr executor; + ThreadPool copying_thread_pool; bool is_custom_disk = false; /// Check access to the disk. diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/registerDiskAzureBlobStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/registerDiskAzureBlobStorage.cpp index 562b2b2fec0..a09befe84a8 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/registerDiskAzureBlobStorage.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/registerDiskAzureBlobStorage.cpp @@ -31,9 +31,6 @@ void registerDiskAzureBlobStorage(DiskFactory & factory, bool global_skip_access getAzureBlobContainerClient(config, config_prefix), getAzureBlobStorageSettings(config, config_prefix, context)); - uint64_t copy_thread_pool_size = config.getUInt(config_prefix + ".thread_pool_size", 16); - bool send_metadata = config.getBool(config_prefix + ".send_metadata", false); - auto metadata_storage = std::make_shared(metadata_disk, ""); std::shared_ptr azure_blob_storage_disk = std::make_shared( @@ -42,8 +39,8 @@ void registerDiskAzureBlobStorage(DiskFactory & factory, bool global_skip_access "DiskAzureBlobStorage", std::move(metadata_storage), std::move(azure_object_storage), - send_metadata, - copy_thread_pool_size + config, + config_prefix ); bool skip_access_check = global_skip_access_check || config.getBool(config_prefix + ".skip_access_check", false); diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp index 3fae67e2e9d..e6f48f45827 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp @@ -38,55 +38,6 @@ namespace ErrorCodes extern const int DIRECTORY_DOESNT_EXIST; } -namespace -{ - -/// Runs tasks asynchronously using thread pool. -class AsyncThreadPoolExecutor : public Executor -{ -public: - AsyncThreadPoolExecutor(const String & name_, int thread_pool_size) - : name(name_) - , pool(CurrentMetrics::DiskObjectStorageAsyncThreads, CurrentMetrics::DiskObjectStorageAsyncThreadsActive, thread_pool_size) - {} - - std::future execute(std::function task) override - { - auto promise = std::make_shared>(); - pool.scheduleOrThrowOnError( - [promise, task]() - { - try - { - task(); - promise->set_value(); - } - catch (...) - { - tryLogCurrentException("Failed to run async task"); - - try - { - promise->set_exception(std::current_exception()); - } - catch (...) {} - } - }); - - return promise->get_future(); - } - - void setMaxThreads(size_t threads) - { - pool.setMaxThreads(threads); - } - -private: - String name; - ThreadPool pool; -}; - -} DiskTransactionPtr DiskObjectStorage::createTransaction() { @@ -106,27 +57,20 @@ DiskTransactionPtr DiskObjectStorage::createObjectStorageTransaction() send_metadata ? metadata_helper.get() : nullptr); } -std::shared_ptr DiskObjectStorage::getAsyncExecutor(const std::string & log_name, size_t size) -{ - static auto reader = std::make_shared(log_name, size); - return reader; -} - DiskObjectStorage::DiskObjectStorage( const String & name_, const String & object_storage_root_path_, const String & log_name, MetadataStoragePtr metadata_storage_, ObjectStoragePtr object_storage_, - bool send_metadata_, - uint64_t thread_pool_size_) - : IDisk(name_, getAsyncExecutor(log_name, thread_pool_size_)) + const Poco::Util::AbstractConfiguration & config, + const String & config_prefix) + : IDisk(name_, config, config_prefix) , object_storage_root_path(object_storage_root_path_) , log (&Poco::Logger::get("DiskObjectStorage(" + log_name + ")")) , metadata_storage(std::move(metadata_storage_)) , object_storage(std::move(object_storage_)) - , send_metadata(send_metadata_) - , threadpool_size(thread_pool_size_) + , send_metadata(config.getBool(config_prefix + ".send_metadata", false)) , metadata_helper(std::make_unique(this, ReadSettings{})) {} @@ -235,7 +179,7 @@ void DiskObjectStorage::moveFile(const String & from_path, const String & to_pat transaction->commit(); } -void DiskObjectStorage::copyFile( +void DiskObjectStorage::copyFile( /// NOLINT const String & from_file_path, IDisk & to_disk, const String & to_file_path, @@ -524,14 +468,15 @@ bool DiskObjectStorage::isWriteOnce() const DiskObjectStoragePtr DiskObjectStorage::createDiskObjectStorage() { + const auto config_prefix = "storage_configuration.disks." + name; return std::make_shared( getName(), object_storage_root_path, getName(), metadata_storage, object_storage, - send_metadata, - threadpool_size); + Context::getGlobalContextInstance()->getConfigRef(), + config_prefix); } void DiskObjectStorage::wrapWithCache(FileCachePtr cache, const FileCacheSettings & cache_settings, const String & layer_name) @@ -605,13 +550,10 @@ void DiskObjectStorage::writeFileUsingBlobWritingFunction(const String & path, W } void DiskObjectStorage::applyNewSettings( - const Poco::Util::AbstractConfiguration & config, ContextPtr context_, const String &, const DisksMap &) + const Poco::Util::AbstractConfiguration & config, ContextPtr context_, const String & config_prefix, const DisksMap & disk_map) { - const auto config_prefix = "storage_configuration.disks." + name; object_storage->applyNewSettings(config, config_prefix, context_); - - if (AsyncThreadPoolExecutor * exec = dynamic_cast(&getExecutor())) - exec->setMaxThreads(config.getInt(config_prefix + ".thread_pool_size", 16)); + IDisk::applyNewSettings(config, context_, config_prefix, disk_map); } void DiskObjectStorage::restoreMetadataIfNeeded( diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.h b/src/Disks/ObjectStorages/DiskObjectStorage.h index b6e4252749a..cd000ee705d 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.h +++ b/src/Disks/ObjectStorages/DiskObjectStorage.h @@ -33,8 +33,8 @@ public: const String & log_name, MetadataStoragePtr metadata_storage_, ObjectStoragePtr object_storage_, - bool send_metadata_, - uint64_t thread_pool_size_); + const Poco::Util::AbstractConfiguration & config, + const String & config_prefix); /// Create fake transaction DiskTransactionPtr createTransaction() override; @@ -200,8 +200,6 @@ public: /// Get names of all cache layers. Name is how cache is defined in configuration file. NameSet getCacheLayersNames() const override; - static std::shared_ptr getAsyncExecutor(const std::string & log_name, size_t size); - bool supportsStat() const override { return metadata_storage->supportsStat(); } struct stat stat(const String & path) const override; @@ -227,7 +225,6 @@ private: std::optional tryReserve(UInt64 bytes); const bool send_metadata; - size_t threadpool_size; std::unique_ptr metadata_helper; }; diff --git a/src/Disks/ObjectStorages/DiskObjectStorageCommon.cpp b/src/Disks/ObjectStorages/DiskObjectStorageCommon.cpp index 5ac6128c3c0..cc9e4b0b712 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageCommon.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorageCommon.cpp @@ -25,7 +25,7 @@ std::pair prepareForLocalMetadata( /// where the metadata files are stored locally auto metadata_path = getDiskMetadataPath(name, config, config_prefix, context); fs::create_directories(metadata_path); - auto metadata_disk = std::make_shared(name + "-metadata", metadata_path, 0); + auto metadata_disk = std::make_shared(name + "-metadata", metadata_path, 0, config, config_prefix); return std::make_pair(metadata_path, metadata_disk); } diff --git a/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp b/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp index 74d1698bf01..bbcdd40d85f 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp @@ -8,6 +8,14 @@ #include #include #include +#include + + +namespace CurrentMetrics +{ + extern const Metric LocalThread; + extern const Metric LocalThreadActive; +} namespace DB { @@ -101,7 +109,7 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::migrateFileToRestorableSchema updateObjectMetadata(object.remote_path, metadata); } } -void DiskObjectStorageRemoteMetadataRestoreHelper::migrateToRestorableSchemaRecursive(const String & path, Futures & results) +void DiskObjectStorageRemoteMetadataRestoreHelper::migrateToRestorableSchemaRecursive(const String & path, ThreadPool & pool) { checkStackSize(); /// This is needed to prevent stack overflow in case of cyclic symlinks. @@ -120,29 +128,26 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::migrateToRestorableSchemaRecu /// The whole directory can be migrated asynchronously. if (dir_contains_only_files) { - auto result = disk->getExecutor().execute([this, path] + pool.scheduleOrThrowOnError([this, path] { for (auto it = disk->iterateDirectory(path); it->isValid(); it->next()) migrateFileToRestorableSchema(it->path()); }); - - results.push_back(std::move(result)); } else { for (auto it = disk->iterateDirectory(path); it->isValid(); it->next()) - if (!disk->isDirectory(it->path())) + { + if (disk->isDirectory(it->path())) { - auto source_path = it->path(); - auto result = disk->getExecutor().execute([this, source_path] - { - migrateFileToRestorableSchema(source_path); - }); - - results.push_back(std::move(result)); + migrateToRestorableSchemaRecursive(it->path(), pool); } else - migrateToRestorableSchemaRecursive(it->path(), results); + { + auto source_path = it->path(); + pool.scheduleOrThrowOnError([this, source_path] { migrateFileToRestorableSchema(source_path); }); + } + } } } @@ -153,16 +158,13 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::migrateToRestorableSchema() { LOG_INFO(disk->log, "Start migration to restorable schema for disk {}", disk->name); - Futures results; + ThreadPool pool{CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive}; for (const auto & root : data_roots) if (disk->exists(root)) - migrateToRestorableSchemaRecursive(root + '/', results); + migrateToRestorableSchemaRecursive(root + '/', pool); - for (auto & result : results) - result.wait(); - for (auto & result : results) - result.get(); + pool.wait(); saveSchemaVersion(RESTORABLE_SCHEMA_VERSION); } @@ -355,8 +357,8 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::restoreFiles(IObjectStorage * { LOG_INFO(disk->log, "Starting restore files for disk {}", disk->name); - std::vector> results; - auto restore_files = [this, &source_object_storage, &restore_information, &results](const RelativePathsWithMetadata & objects) + ThreadPool pool{CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive}; + auto restore_files = [this, &source_object_storage, &restore_information, &pool](const RelativePathsWithMetadata & objects) { std::vector keys_names; for (const auto & object : objects) @@ -378,12 +380,10 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::restoreFiles(IObjectStorage * if (!keys_names.empty()) { - auto result = disk->getExecutor().execute([this, &source_object_storage, &restore_information, keys_names]() + pool.scheduleOrThrowOnError([this, &source_object_storage, &restore_information, keys_names]() { processRestoreFiles(source_object_storage, restore_information.source_path, keys_names); }); - - results.push_back(std::move(result)); } return true; @@ -394,10 +394,7 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::restoreFiles(IObjectStorage * restore_files(children); - for (auto & result : results) - result.wait(); - for (auto & result : results) - result.get(); + pool.wait(); LOG_INFO(disk->log, "Files are restored for disk {}", disk->name); diff --git a/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.h b/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.h index cb8d9b8a5af..e7de4afcaf3 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.h +++ b/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.h @@ -75,7 +75,7 @@ private: void saveSchemaVersion(const int & version) const; void updateObjectMetadata(const String & key, const ObjectAttributes & metadata) const; void migrateFileToRestorableSchema(const String & path) const; - void migrateToRestorableSchemaRecursive(const String & path, Futures & results); + void migrateToRestorableSchemaRecursive(const String & path, ThreadPool & pool); void readRestoreInformation(RestoreInformation & restore_information); void restoreFiles(IObjectStorage * source_object_storage, const RestoreInformation & restore_information); diff --git a/src/Disks/ObjectStorages/HDFS/registerDiskHDFS.cpp b/src/Disks/ObjectStorages/HDFS/registerDiskHDFS.cpp index 693b966caf2..e72e7028c4b 100644 --- a/src/Disks/ObjectStorages/HDFS/registerDiskHDFS.cpp +++ b/src/Disks/ObjectStorages/HDFS/registerDiskHDFS.cpp @@ -44,7 +44,6 @@ void registerDiskHDFS(DiskFactory & factory, bool global_skip_access_check) auto [_, metadata_disk] = prepareForLocalMetadata(name, config, config_prefix, context); auto metadata_storage = std::make_shared(metadata_disk, uri); - uint64_t copy_thread_pool_size = config.getUInt(config_prefix + ".thread_pool_size", 16); bool skip_access_check = global_skip_access_check || config.getBool(config_prefix + ".skip_access_check", false); DiskPtr disk = std::make_shared( @@ -53,8 +52,8 @@ void registerDiskHDFS(DiskFactory & factory, bool global_skip_access_check) "DiskHDFS", std::move(metadata_storage), std::move(hdfs_storage), - /* send_metadata = */ false, - copy_thread_pool_size); + config, + config_prefix); disk->startup(context, skip_access_check); return disk; diff --git a/src/Disks/ObjectStorages/Local/registerLocalObjectStorage.cpp b/src/Disks/ObjectStorages/Local/registerLocalObjectStorage.cpp index 251fc77d1f8..eb9039fed44 100644 --- a/src/Disks/ObjectStorages/Local/registerLocalObjectStorage.cpp +++ b/src/Disks/ObjectStorages/Local/registerLocalObjectStorage.cpp @@ -34,7 +34,7 @@ void registerDiskLocalObjectStorage(DiskFactory & factory, bool global_skip_acce metadata_storage = std::make_shared(metadata_disk, path); auto disk = std::make_shared( - name, path, "Local", metadata_storage, local_storage, false, /* threadpool_size */16); + name, path, "Local", metadata_storage, local_storage, config, config_prefix); disk->startup(context, global_skip_access_check); return disk; }; diff --git a/src/Disks/ObjectStorages/S3/registerDiskS3.cpp b/src/Disks/ObjectStorages/S3/registerDiskS3.cpp index f3a57069a30..fb125ae8517 100644 --- a/src/Disks/ObjectStorages/S3/registerDiskS3.cpp +++ b/src/Disks/ObjectStorages/S3/registerDiskS3.cpp @@ -150,17 +150,14 @@ void registerDiskS3(DiskFactory & factory, bool global_skip_access_check) } } - bool send_metadata = config.getBool(config_prefix + ".send_metadata", false); - uint64_t copy_thread_pool_size = config.getUInt(config_prefix + ".thread_pool_size", 16); - DiskObjectStoragePtr s3disk = std::make_shared( name, uri.key, type == "s3" ? "DiskS3" : "DiskS3Plain", std::move(metadata_storage), std::move(s3_storage), - send_metadata, - copy_thread_pool_size); + config, + config_prefix); s3disk->startup(context, skip_access_check); diff --git a/src/Disks/ObjectStorages/Web/registerDiskWebServer.cpp b/src/Disks/ObjectStorages/Web/registerDiskWebServer.cpp index 8a54de81815..bc6c17863ef 100644 --- a/src/Disks/ObjectStorages/Web/registerDiskWebServer.cpp +++ b/src/Disks/ObjectStorages/Web/registerDiskWebServer.cpp @@ -52,8 +52,8 @@ void registerDiskWebServer(DiskFactory & factory, bool global_skip_access_check) "DiskWebServer", metadata_storage, object_storage, - /* send_metadata */false, - /* threadpool_size */16); + config, + config_prefix); disk->startup(context, skip_access_check); return disk; }; diff --git a/src/Disks/loadLocalDiskConfig.cpp b/src/Disks/loadLocalDiskConfig.cpp index 0e5eca17ca7..0a9cdae1ae3 100644 --- a/src/Disks/loadLocalDiskConfig.cpp +++ b/src/Disks/loadLocalDiskConfig.cpp @@ -56,7 +56,7 @@ void loadDiskLocalConfig(const String & name, tmp_path = context->getPath(); // Create tmp disk for getting total disk space. - keep_free_space_bytes = static_cast(DiskLocal("tmp", tmp_path, 0).getTotalSpace() * ratio); + keep_free_space_bytes = static_cast(DiskLocal("tmp", tmp_path, 0, config, config_prefix).getTotalSpace() * ratio); } } diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 995e78d8f0b..a244b82b54a 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -875,9 +875,9 @@ catch (...) "It is ok to skip this exception as cleaning old temporary files is not necessary", path)); } -static VolumePtr createLocalSingleDiskVolume(const std::string & path) +static VolumePtr createLocalSingleDiskVolume(const std::string & path, const Poco::Util::AbstractConfiguration & config_) { - auto disk = std::make_shared("_tmp_default", path, 0); + auto disk = std::make_shared("_tmp_default", path, 0, config_, "storage_configuration.disks._tmp_default"); VolumePtr volume = std::make_shared("_tmp_default", disk, 0); return volume; } @@ -893,7 +893,7 @@ void Context::setTemporaryStoragePath(const String & path, size_t max_size) if (!shared->tmp_path.ends_with('/')) shared->tmp_path += '/'; - VolumePtr volume = createLocalSingleDiskVolume(shared->tmp_path); + VolumePtr volume = createLocalSingleDiskVolume(shared->tmp_path, getConfigRef()); for (const auto & disk : volume->getDisks()) { @@ -966,7 +966,7 @@ void Context::setTemporaryStorageInCache(const String & cache_disk_name, size_t LOG_DEBUG(shared->log, "Using file cache ({}) for temporary files", file_cache->getBasePath()); shared->tmp_path = file_cache->getBasePath(); - VolumePtr volume = createLocalSingleDiskVolume(shared->tmp_path); + VolumePtr volume = createLocalSingleDiskVolume(shared->tmp_path, getConfigRef()); shared->root_temp_data_on_disk = std::make_shared(volume, file_cache.get(), max_size); } diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index 3a96d68dc2e..ad92d7497c0 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -103,8 +103,8 @@ namespace { if (re2::RE2::FullMatch(file_name, matcher)) result.emplace_back( - String(ls.file_info[i].mName), - StorageHDFS::PathInfo{ls.file_info[i].mLastMod, static_cast(ls.file_info[i].mSize)}); + StorageHDFS::PathWithInfo{String(ls.file_info[i].mName), + StorageHDFS::PathInfo{ls.file_info[i].mLastMod, static_cast(ls.file_info[i].mSize)}}); } else if (is_directory && looking_for_directory) { diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp index 545cb062fb7..c397a634db6 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp +++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp @@ -467,6 +467,7 @@ MutableDataPartStoragePtr DataPartStorageOnDiskBase::clonePart( dir_path, getRelativePath(), path_to_clone, fullPath(disk, path_to_clone)); } + disk->createDirectories(to); volume->getDisk()->copyDirectoryContent(getRelativePath(), disk, path_to_clone); volume->getDisk()->removeFileIfExists(fs::path(path_to_clone) / "delete-on-destroy.txt"); From 4949dd05466fd6ef3dee6c74fd6006ba0c4d3707 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 19 Jun 2023 20:21:33 +0200 Subject: [PATCH 070/522] fix --- src/Common/CurrentMetrics.cpp | 4 ++-- src/Disks/IDisk.h | 1 - src/Disks/ObjectStorages/DiskObjectStorage.cpp | 6 ------ 3 files changed, 2 insertions(+), 9 deletions(-) diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp index c54541d6785..85e08b4ec8d 100644 --- a/src/Common/CurrentMetrics.cpp +++ b/src/Common/CurrentMetrics.cpp @@ -93,8 +93,8 @@ M(ThreadPoolFSReaderThreadsActive, "Number of threads in the thread pool for local_filesystem_read_method=threadpool running a task.") \ M(BackupsIOThreads, "Number of threads in the BackupsIO thread pool.") \ M(BackupsIOThreadsActive, "Number of threads in the BackupsIO thread pool running a task.") \ - M(DiskObjectStorageAsyncThreads, "Number of threads in the async thread pool for DiskObjectStorage.") \ - M(DiskObjectStorageAsyncThreadsActive, "Number of threads in the async thread pool for DiskObjectStorage running a task.") \ + M(DiskObjectStorageAsyncThreads, "Obsolete metric, shows nothing.") \ + M(DiskObjectStorageAsyncThreadsActive, "Obsolete metric, shows nothing.") \ M(StorageHiveThreads, "Number of threads in the StorageHive thread pool.") \ M(StorageHiveThreadsActive, "Number of threads in the StorageHive thread pool running a task.") \ M(TablesLoaderThreads, "Number of threads in the tables loader thread pool.") \ diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h index 42122ce4cb9..f935933bad7 100644 --- a/src/Disks/IDisk.h +++ b/src/Disks/IDisk.h @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp index e6f48f45827..e3922b6c505 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp @@ -19,12 +19,6 @@ #include #include -namespace CurrentMetrics -{ - extern const Metric DiskObjectStorageAsyncThreads; - extern const Metric DiskObjectStorageAsyncThreadsActive; -} - namespace DB { From 792cdb6da5b390f953cb4e704f8de0a25e76633b Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Tue, 20 Jun 2023 08:26:59 +0000 Subject: [PATCH 071/522] Add millisecond support to age() / add tests --- .../functions/date-time-functions.md | 4 +- .../functions/date-time-functions.md | 4 +- .../functions/date-time-functions.md | 1 + src/Core/DecimalFunctions.h | 2 +- src/Functions/DateTimeTransforms.h | 50 +++++++--- src/Functions/dateDiff.cpp | 93 ++++++++++++------ .../02477_age_datetime64.reference | 97 +++++++++++++++++++ .../0_stateless/02477_age_datetime64.sql | 59 +++++++++++ 8 files changed, 259 insertions(+), 51 deletions(-) diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 82f1a8aa237..9dca91668f2 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -691,7 +691,7 @@ SELECT toDate('2016-12-27') AS date, toYearWeek(date) AS yearWeek0, toYearWeek(d ## age -Returns the `unit` component of the difference between `startdate` and `enddate`. The difference is calculated using a precision of 1 second. +Returns the `unit` component of the difference between `startdate` and `enddate`. The difference is calculated using a precision of 1 microsecond. E.g. the difference between `2021-12-29` and `2022-01-01` is 3 days for `day` unit, 0 months for `month` unit, 0 years for `year` unit. For an alternative to `age`, see function `date\_diff`. @@ -707,6 +707,8 @@ age('unit', startdate, enddate, [timezone]) - `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md). Possible values: + - `microsecond` (possible abbreviations: `us`, `u`) + - `millisecond` (possible abbreviations: `ms`) - `second` (possible abbreviations: `ss`, `s`) - `minute` (possible abbreviations: `mi`, `n`) - `hour` (possible abbreviations: `hh`, `h`) diff --git a/docs/ru/sql-reference/functions/date-time-functions.md b/docs/ru/sql-reference/functions/date-time-functions.md index 93ae750b10d..ddd3d2ca6f0 100644 --- a/docs/ru/sql-reference/functions/date-time-functions.md +++ b/docs/ru/sql-reference/functions/date-time-functions.md @@ -593,7 +593,7 @@ SELECT toDate('2016-12-27') AS date, toYearWeek(date) AS yearWeek0, toYearWeek(d ## age -Вычисляет компонент `unit` разницы между `startdate` и `enddate`. Разница вычисляется с точностью в 1 секунду. +Вычисляет компонент `unit` разницы между `startdate` и `enddate`. Разница вычисляется с точностью в 1 микросекунду. Например, разница между `2021-12-29` и `2022-01-01` 3 дня для единицы `day`, 0 месяцев для единицы `month`, 0 лет для единицы `year`. **Синтаксис** @@ -607,6 +607,8 @@ age('unit', startdate, enddate, [timezone]) - `unit` — единица измерения времени, в которой будет выражено возвращаемое значение функции. [String](../../sql-reference/data-types/string.md). Возможные значения: + - `microsecond` (возможные сокращения: `us`, `u`) + - `millisecond` (возможные сокращения: `ms`) - `second` (возможные сокращения: `ss`, `s`) - `minute` (возможные сокращения: `mi`, `n`) - `hour` (возможные сокращения: `hh`, `h`) diff --git a/docs/zh/sql-reference/functions/date-time-functions.md b/docs/zh/sql-reference/functions/date-time-functions.md index e4b70322477..270fa44a421 100644 --- a/docs/zh/sql-reference/functions/date-time-functions.md +++ b/docs/zh/sql-reference/functions/date-time-functions.md @@ -625,6 +625,7 @@ SELECT date_add(YEAR, 3, toDate('2018-01-01')); │ 2021-01-01 │ └───────────────────────────────────────────────┘ ``` +## age {#age} ## date_diff {#date_diff} diff --git a/src/Core/DecimalFunctions.h b/src/Core/DecimalFunctions.h index 357cff2c541..9b6b1e87e5f 100644 --- a/src/Core/DecimalFunctions.h +++ b/src/Core/DecimalFunctions.h @@ -33,7 +33,7 @@ template <> inline constexpr size_t max_precision = 38; template <> inline constexpr size_t max_precision = 76; template -inline auto scaleMultiplier(UInt32 scale) +constexpr inline auto scaleMultiplier(UInt32 scale) { if constexpr (std::is_same_v || std::is_same_v) return common::exp10_i32(scale); diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index 4d15078f2d7..192efb9820d 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -1377,30 +1377,30 @@ struct ToRelativeSecondNumImpl using FactorTransform = ZeroTransform; }; -template +template struct ToRelativeSubsecondNumImpl { static constexpr auto name = "toRelativeSubsecondNumImpl"; static inline UInt64 execute(const DateTime64 & t, DateTime64::NativeType scale, const DateLUTImpl &) { - if (scale == second_divider) + if (scale == scale_multiplier) return t.value; - if (scale > second_divider) - return t.value / (scale / second_divider); - return t.value * (second_divider / scale); + if (scale > scale_multiplier) + return t.value / (scale / scale_multiplier); + return t.value * (scale_multiplier / scale); } static inline UInt64 execute(UInt32 t, const DateLUTImpl &) { - return t * second_divider; + return t * scale_multiplier; } static inline UInt64 execute(Int32 d, const DateLUTImpl & time_zone) { - return static_cast(time_zone.fromDayNum(ExtendedDayNum(d))) * second_divider; + return static_cast(time_zone.fromDayNum(ExtendedDayNum(d))) * scale_multiplier; } static inline UInt64 execute(UInt16 d, const DateLUTImpl & time_zone) { - return static_cast(time_zone.fromDayNum(DayNum(d)) * second_divider); + return static_cast(time_zone.fromDayNum(DayNum(d)) * scale_multiplier); } using FactorTransform = ZeroTransform; @@ -1505,25 +1505,43 @@ struct ToYYYYMMDDhhmmssImpl using FactorTransform = ZeroTransform; }; +struct DateTimeComponentsWithFractionalPart +{ + DateLUTImpl::DateTimeComponents datetime; + UInt16 millisecond = 0; + UInt16 microsecond = 0; +}; + struct ToDateTimeComponentsImpl { static constexpr auto name = "toDateTimeComponents"; - static inline DateLUTImpl::DateTimeComponents execute(Int64 t, const DateLUTImpl & time_zone) + static inline DateTimeComponentsWithFractionalPart execute(const DateTime64 & t, DateTime64::NativeType scale_multiplier, const DateLUTImpl & time_zone) { - return time_zone.toDateTimeComponents(t); + const auto components = DecimalUtils::splitWithScaleMultiplier(t, scale_multiplier); + const auto multiplier = DecimalUtils::scaleMultiplier(6); + Int64 fractional = components.fractional; + + if (scale_multiplier > multiplier) + fractional = fractional / (scale_multiplier / multiplier); + else if (scale_multiplier < multiplier) + fractional = fractional * (multiplier / scale_multiplier); + + UInt16 millisecond = static_cast(fractional / 1000); + UInt16 microsecond = static_cast(fractional % 1000); + return DateTimeComponentsWithFractionalPart{time_zone.toDateTimeComponents(components.whole), millisecond, microsecond}; } - static inline DateLUTImpl::DateTimeComponents execute(UInt32 t, const DateLUTImpl & time_zone) + static inline DateTimeComponentsWithFractionalPart execute(UInt32 t, const DateLUTImpl & time_zone) { - return time_zone.toDateTimeComponents(static_cast(t)); + return DateTimeComponentsWithFractionalPart{time_zone.toDateTimeComponents(static_cast(t)), 0, 0}; } - static inline DateLUTImpl::DateTimeComponents execute(Int32 d, const DateLUTImpl & time_zone) + static inline DateTimeComponentsWithFractionalPart execute(Int32 d, const DateLUTImpl & time_zone) { - return time_zone.toDateTimeComponents(ExtendedDayNum(d)); + return DateTimeComponentsWithFractionalPart{time_zone.toDateTimeComponents(ExtendedDayNum(d)), 0, 0}; } - static inline DateLUTImpl::DateTimeComponents execute(UInt16 d, const DateLUTImpl & time_zone) + static inline DateTimeComponentsWithFractionalPart execute(UInt16 d, const DateLUTImpl & time_zone) { - return time_zone.toDateTimeComponents(DayNum(d)); + return DateTimeComponentsWithFractionalPart{time_zone.toDateTimeComponents(DayNum(d)), 0, 0}; } using FactorTransform = ZeroTransform; diff --git a/src/Functions/dateDiff.cpp b/src/Functions/dateDiff.cpp index 62f01274476..5805526ba1a 100644 --- a/src/Functions/dateDiff.cpp +++ b/src/Functions/dateDiff.cpp @@ -174,12 +174,15 @@ public: { auto res = static_cast(transform_y.execute(y, timezone_y)) - static_cast(transform_x.execute(x, timezone_x)); - DateLUTImpl::DateTimeComponents a_comp; - DateLUTImpl::DateTimeComponents b_comp; + DateTimeComponentsWithFractionalPart a_comp; + DateTimeComponentsWithFractionalPart b_comp; + Int64 adjust_value; - auto x_seconds = TransformDateTime64>(transform_x.getScaleMultiplier()).execute(x, timezone_x); - auto y_seconds = TransformDateTime64>(transform_y.getScaleMultiplier()).execute(y, timezone_y); - if (x_seconds <= y_seconds) + const auto multiplier = DecimalUtils::scaleMultiplier(6); + auto x_microseconds = TransformDateTime64>(transform_x.getScaleMultiplier()).execute(x, timezone_x); + auto y_microseconds = TransformDateTime64>(transform_y.getScaleMultiplier()).execute(y, timezone_y); + + if (x_microseconds <= y_microseconds) { a_comp = TransformDateTime64(transform_x.getScaleMultiplier()).execute(x, timezone_x); b_comp = TransformDateTime64(transform_y.getScaleMultiplier()).execute(y, timezone_y); @@ -191,36 +194,43 @@ public: b_comp = TransformDateTime64(transform_x.getScaleMultiplier()).execute(x, timezone_x); adjust_value = 1; } + const auto & a_date = a_comp.datetime.date; + const auto & b_date = b_comp.datetime.date; + const auto & a_time = a_comp.datetime.time; + const auto & b_time = b_comp.datetime.time; if constexpr (std::is_same_v>>) { - if ((a_comp.date.month > b_comp.date.month) - || ((a_comp.date.month == b_comp.date.month) && ((a_comp.date.day > b_comp.date.day) - || ((a_comp.date.day == b_comp.date.day) && ((a_comp.time.hour > b_comp.time.hour) - || ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute) - || ((a_comp.time.minute == b_comp.time.minute) && (a_comp.time.second > b_comp.time.second)))) - ))))) + if ((a_date.month > b_date.month) + || ((a_date.month == b_date.month) && ((a_date.day > b_date.day) + || ((a_date.day == b_date.day) && ((a_time.hour > b_time.hour) + || ((a_time.hour == b_time.hour) && ((a_time.minute > b_time.minute) + || ((a_time.minute == b_time.minute) && ((a_time.second > b_time.second) + || ((a_time.second == b_time.second) && ((a_comp.millisecond > b_comp.millisecond) + || ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond))))))))))))) res += adjust_value; } else if constexpr (std::is_same_v>>) { - auto x_month_in_quarter = (a_comp.date.month - 1) % 3; - auto y_month_in_quarter = (b_comp.date.month - 1) % 3; + auto x_month_in_quarter = (a_date.month - 1) % 3; + auto y_month_in_quarter = (b_date.month - 1) % 3; if ((x_month_in_quarter > y_month_in_quarter) - || ((x_month_in_quarter == y_month_in_quarter) && ((a_comp.date.day > b_comp.date.day) - || ((a_comp.date.day == b_comp.date.day) && ((a_comp.time.hour > b_comp.time.hour) - || ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute) - || ((a_comp.time.minute == b_comp.time.minute) && (a_comp.time.second > b_comp.time.second)))) - ))))) + || ((x_month_in_quarter == y_month_in_quarter) && ((a_date.day > b_date.day) + || ((a_date.day == b_date.day) && ((a_time.hour > b_time.hour) + || ((a_time.hour == b_time.hour) && ((a_time.minute > b_time.minute) + || ((a_time.minute == b_time.minute) && ((a_time.second > b_time.second) + || ((a_time.second == b_time.second) && ((a_comp.millisecond > b_comp.millisecond) + || ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond))))))))))))) res += adjust_value; } else if constexpr (std::is_same_v>>) { - if ((a_comp.date.day > b_comp.date.day) - || ((a_comp.date.day == b_comp.date.day) && ((a_comp.time.hour > b_comp.time.hour) - || ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute) - || ((a_comp.time.minute == b_comp.time.minute) && (a_comp.time.second > b_comp.time.second)))) - ))) + if ((a_date.day > b_date.day) + || ((a_date.day == b_date.day) && ((a_time.hour > b_time.hour) + || ((a_time.hour == b_time.hour) && ((a_time.minute > b_time.minute) + || ((a_time.minute == b_time.minute) && ((a_time.second > b_time.second) + || ((a_time.second == b_time.second) && ((a_comp.millisecond > b_comp.millisecond) + || ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond))))))))))) res += adjust_value; } else if constexpr (std::is_same_v>>) @@ -228,27 +238,46 @@ public: auto x_day_of_week = TransformDateTime64(transform_x.getScaleMultiplier()).execute(x, 0, timezone_x); auto y_day_of_week = TransformDateTime64(transform_y.getScaleMultiplier()).execute(y, 0, timezone_y); if ((x_day_of_week > y_day_of_week) - || ((x_day_of_week == y_day_of_week) && (a_comp.time.hour > b_comp.time.hour)) - || ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute) - || ((a_comp.time.minute == b_comp.time.minute) && (a_comp.time.second > b_comp.time.second))))) + || ((x_day_of_week == y_day_of_week) && (a_time.hour > b_time.hour)) + || ((a_time.hour == b_time.hour) && ((a_time.minute > b_time.minute) + || ((a_time.minute == b_time.minute) && ((a_time.second > b_time.second) + || ((a_time.second == b_time.second) && ((a_comp.millisecond > b_comp.millisecond) + || ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond))))))))) res += adjust_value; } else if constexpr (std::is_same_v>>) { - if ((a_comp.time.hour > b_comp.time.hour) - || ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute) - || ((a_comp.time.minute == b_comp.time.minute) && (a_comp.time.second > b_comp.time.second))))) + if ((a_time.hour > b_time.hour) + || ((a_time.hour == b_time.hour) && ((a_time.minute > b_time.minute) + || ((a_time.minute == b_time.minute) && ((a_time.second > b_time.second) + || ((a_time.second == b_time.second) && ((a_comp.millisecond > b_comp.millisecond) + || ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond))))))))) res += adjust_value; } else if constexpr (std::is_same_v>>) { - if ((a_comp.time.minute > b_comp.time.minute) - || ((a_comp.time.minute == b_comp.time.minute) && (a_comp.time.second > b_comp.time.second))) + if ((a_time.minute > b_time.minute) + || ((a_time.minute == b_time.minute) && ((a_time.second > b_time.second) + || ((a_time.second == b_time.second) && ((a_comp.millisecond > b_comp.millisecond) + || ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond))))))) res += adjust_value; } else if constexpr (std::is_same_v>>) { - if (a_comp.time.second > b_comp.time.second) + if ((a_time.second > b_time.second) + || ((a_time.second == b_time.second) && ((a_comp.millisecond > b_comp.millisecond) + || ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond))))) + res += adjust_value; + } + else if constexpr (std::is_same_v>>) + { + if ((a_comp.millisecond > b_comp.millisecond) + || ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond))) + res += adjust_value; + } + else if constexpr (std::is_same_v>>) + { + if (a_comp.microsecond > b_comp.microsecond) res += adjust_value; } return res; diff --git a/tests/queries/0_stateless/02477_age_datetime64.reference b/tests/queries/0_stateless/02477_age_datetime64.reference index 3b4459dd26d..b732794eef7 100644 --- a/tests/queries/0_stateless/02477_age_datetime64.reference +++ b/tests/queries/0_stateless/02477_age_datetime64.reference @@ -111,3 +111,100 @@ SELECT age('day', materialize(toDateTime64('2015-08-18 00:00:00', 0, 'UTC')), ma 1 SELECT age('day', materialize(toDate('2015-08-18', 'UTC')), materialize(toDateTime64('2015-08-19 00:00:00', 3, 'UTC'))); 1 +-- DateTime64 vs DateTime64 with fractional part +SELECT age('microsecond', toDateTime64('2015-08-18 20:30:36.100200005', 9, 'UTC'), toDateTime64('2015-08-18 20:30:41.200400005', 9, 'UTC')); +5100200 +SELECT age('microsecond', toDateTime64('2015-08-18 20:30:36.100200005', 9, 'UTC'), toDateTime64('2015-08-18 20:30:41.200400004', 9, 'UTC')); +5100200 +SELECT age('millisecond', toDateTime64('2015-08-18 20:30:36.450299', 6, 'UTC'), toDateTime64('2015-08-18 20:30:41.550299', 6, 'UTC')); +5100 +SELECT age('millisecond', toDateTime64('2015-08-18 20:30:36.450299', 6, 'UTC'), toDateTime64('2015-08-18 20:30:41.550298', 6, 'UTC')); +5099 +SELECT age('second', toDateTime64('2023-03-01 19:18:36.999003', 6, 'UTC'), toDateTime64('2023-03-01 19:18:41.999002', 6, 'UTC')); +4 +SELECT age('second', toDateTime64('2023-03-01 19:18:36.999', 3, 'UTC'), toDateTime64('2023-03-01 19:18:41.001', 3, 'UTC')); +4 +SELECT age('minute', toDateTime64('2015-01-01 20:30:36.200', 3, 'UTC'), toDateTime64('2015-01-01 20:35:36.300', 3, 'UTC')); +5 +SELECT age('minute', toDateTime64('2015-01-01 20:30:36.200', 3, 'UTC'), toDateTime64('2015-01-01 20:35:36.100', 3, 'UTC')); +4 +SELECT age('minute', toDateTime64('2015-01-01 20:30:36.200101', 6, 'UTC'), toDateTime64('2015-01-01 20:35:36.200100', 6, 'UTC')); +4 +SELECT age('hour', toDateTime64('2015-01-01 20:30:36.200', 3, 'UTC'), toDateTime64('2015-01-01 23:30:36.200', 3, 'UTC')); +3 +SELECT age('hour', toDateTime64('2015-01-01 20:31:36.200', 3, 'UTC'), toDateTime64('2015-01-01 23:30:36.200', 3, 'UTC')); +2 +SELECT age('hour', toDateTime64('2015-01-01 20:30:37.200', 3, 'UTC'), toDateTime64('2015-01-01 23:30:36.200', 3, 'UTC')); +2 +SELECT age('hour', toDateTime64('2015-01-01 20:30:36.300', 3, 'UTC'), toDateTime64('2015-01-01 23:30:36.200', 3, 'UTC')); +2 +SELECT age('hour', toDateTime64('2015-01-01 20:30:36.200101', 6, 'UTC'), toDateTime64('2015-01-01 23:30:36.200100', 6, 'UTC')); +2 +SELECT age('day', toDateTime64('2015-01-01 20:30:36.200', 3, 'UTC'), toDateTime64('2015-01-04 20:30:36.200', 3, 'UTC')); +3 +SELECT age('day', toDateTime64('2015-01-01 20:30:36.200', 3, 'UTC'), toDateTime64('2015-01-04 19:30:36.200', 3, 'UTC')); +2 +SELECT age('day', toDateTime64('2015-01-01 20:30:36.200', 3, 'UTC'), toDateTime64('2015-01-04 20:28:36.200', 3, 'UTC')); +2 +SELECT age('day', toDateTime64('2015-01-01 20:30:36.200', 3, 'UTC'), toDateTime64('2015-01-04 20:30:35.200', 3, 'UTC')); +2 +SELECT age('day', toDateTime64('2015-01-01 20:30:36.200', 3, 'UTC'), toDateTime64('2015-01-04 20:30:36.199', 3, 'UTC')); +2 +SELECT age('day', toDateTime64('2015-01-01 20:30:36.200101', 6, 'UTC'), toDateTime64('2015-01-04 20:30:36.200100', 6, 'UTC')); +2 +SELECT age('week', toDateTime64('2015-01-01 20:30:36.200', 3, 'UTC'), toDateTime64('2015-01-15 20:30:36.200', 3, 'UTC')); +2 +SELECT age('week', toDateTime64('2015-01-01 20:30:36.200', 3, 'UTC'), toDateTime64('2015-01-15 19:30:36.200', 3, 'UTC')); +1 +SELECT age('week', toDateTime64('2015-01-01 20:30:36.200', 3, 'UTC'), toDateTime64('2015-01-15 20:29:36.200', 3, 'UTC')); +1 +SELECT age('week', toDateTime64('2015-01-01 20:30:36.200', 3, 'UTC'), toDateTime64('2015-01-15 20:30:35.200', 3, 'UTC')); +1 +SELECT age('week', toDateTime64('2015-01-01 20:30:36.200', 3, 'UTC'), toDateTime64('2015-01-15 20:30:36.100', 3, 'UTC')); +1 +SELECT age('week', toDateTime64('2015-01-01 20:30:36.200101', 6, 'UTC'), toDateTime64('2015-01-15 20:30:36.200100', 6, 'UTC')); +1 +SELECT age('month', toDateTime64('2015-01-02 20:30:36.200', 3, 'UTC'), toDateTime64('2016-05-02 20:30:36.200', 3, 'UTC')); +16 +SELECT age('month', toDateTime64('2015-01-02 20:30:36.200', 3, 'UTC'), toDateTime64('2016-05-01 20:30:36.200', 3, 'UTC')); +15 +SELECT age('month', toDateTime64('2015-01-02 20:30:36.200', 3, 'UTC'), toDateTime64('2016-05-02 19:30:36.200', 3, 'UTC')); +15 +SELECT age('month', toDateTime64('2015-01-02 20:30:36.200', 3, 'UTC'), toDateTime64('2016-05-02 20:29:36.200', 3, 'UTC')); +15 +SELECT age('month', toDateTime64('2015-01-02 20:30:36.200', 3, 'UTC'), toDateTime64('2016-05-02 20:30:35.200', 3, 'UTC')); +15 +SELECT age('month', toDateTime64('2015-01-02 20:30:36.200', 3, 'UTC'), toDateTime64('2016-05-02 20:30:36.100', 3, 'UTC')); +15 +SELECT age('month', toDateTime64('2015-01-02 20:30:36.200101', 6, 'UTC'), toDateTime64('2016-05-02 20:30:36.200100', 6, 'UTC')); +15 +SELECT age('quarter', toDateTime64('2015-01-02 20:30:36.200', 3, 'UTC'), toDateTime64('2016-04-02 20:30:36.200', 3, 'UTC')); +5 +SELECT age('quarter', toDateTime64('2015-01-02 20:30:36.200', 3, 'UTC'), toDateTime64('2016-04-01 20:30:36.200', 3, 'UTC')); +4 +SELECT age('quarter', toDateTime64('2015-01-02 20:30:36.200', 3, 'UTC'), toDateTime64('2016-04-02 19:30:36.200', 3, 'UTC')); +4 +SELECT age('quarter', toDateTime64('2015-01-02 20:30:36.200', 3, 'UTC'), toDateTime64('2016-04-02 20:29:36.200', 3, 'UTC')); +4 +SELECT age('quarter', toDateTime64('2015-01-02 20:30:36.200', 3, 'UTC'), toDateTime64('2016-04-02 20:30:35.200', 3, 'UTC')); +4 +SELECT age('quarter', toDateTime64('2015-01-02 20:30:36.200', 3, 'UTC'), toDateTime64('2016-04-02 20:30:36.100', 3, 'UTC')); +4 +SELECT age('quarter', toDateTime64('2015-01-02 20:30:36.200101', 6, 'UTC'), toDateTime64('2016-04-02 20:30:36.200100', 6, 'UTC')); +4 +SELECT age('year', toDateTime64('2015-02-02 20:30:36.200', 3, 'UTC'), toDateTime64('2023-02-02 20:30:36.200', 3, 'UTC')); +8 +SELECT age('year', toDateTime64('2015-02-02 20:30:36.200', 3, 'UTC'), toDateTime64('2023-01-02 20:30:36.200', 3, 'UTC')); +7 +SELECT age('year', toDateTime64('2015-02-02 20:30:36.200', 3, 'UTC'), toDateTime64('2023-02-01 20:30:36.200', 3, 'UTC')); +7 +SELECT age('year', toDateTime64('2015-02-02 20:30:36.200', 3, 'UTC'), toDateTime64('2023-02-02 19:30:36.200', 3, 'UTC')); +7 +SELECT age('year', toDateTime64('2015-02-02 20:30:36.200', 3, 'UTC'), toDateTime64('2023-02-02 20:29:36.200', 3, 'UTC')); +7 +SELECT age('year', toDateTime64('2015-02-02 20:30:36.200', 3, 'UTC'), toDateTime64('2023-02-02 20:30:35.200', 3, 'UTC')); +7 +SELECT age('year', toDateTime64('2015-02-02 20:30:36.200', 3, 'UTC'), toDateTime64('2023-02-02 20:30:36.100', 3, 'UTC')); +7 +SELECT age('year', toDateTime64('2015-02-02 20:30:36.200101', 6, 'UTC'), toDateTime64('2023-02-02 20:30:36.200100', 6, 'UTC')); +7 diff --git a/tests/queries/0_stateless/02477_age_datetime64.sql b/tests/queries/0_stateless/02477_age_datetime64.sql index 1bed93991ca..809270f4cce 100644 --- a/tests/queries/0_stateless/02477_age_datetime64.sql +++ b/tests/queries/0_stateless/02477_age_datetime64.sql @@ -75,3 +75,62 @@ SELECT age('second', materialize(toDateTime64('2015-08-18 00:00:00', 0, 'UTC')), SELECT age('second', materialize(toDateTime('2015-08-18 00:00:00', 'UTC')), materialize(toDateTime64('2015-08-18 00:00:10', 3, 'UTC'))); SELECT age('day', materialize(toDateTime64('2015-08-18 00:00:00', 0, 'UTC')), materialize(toDate('2015-08-19', 'UTC'))); SELECT age('day', materialize(toDate('2015-08-18', 'UTC')), materialize(toDateTime64('2015-08-19 00:00:00', 3, 'UTC'))); + +-- DateTime64 vs DateTime64 with fractional part +SELECT age('microsecond', toDateTime64('2015-08-18 20:30:36.100200005', 9, 'UTC'), toDateTime64('2015-08-18 20:30:41.200400005', 9, 'UTC')); +SELECT age('microsecond', toDateTime64('2015-08-18 20:30:36.100200005', 9, 'UTC'), toDateTime64('2015-08-18 20:30:41.200400004', 9, 'UTC')); + +SELECT age('millisecond', toDateTime64('2015-08-18 20:30:36.450299', 6, 'UTC'), toDateTime64('2015-08-18 20:30:41.550299', 6, 'UTC')); +SELECT age('millisecond', toDateTime64('2015-08-18 20:30:36.450299', 6, 'UTC'), toDateTime64('2015-08-18 20:30:41.550298', 6, 'UTC')); + +SELECT age('second', toDateTime64('2023-03-01 19:18:36.999003', 6, 'UTC'), toDateTime64('2023-03-01 19:18:41.999002', 6, 'UTC')); +SELECT age('second', toDateTime64('2023-03-01 19:18:36.999', 3, 'UTC'), toDateTime64('2023-03-01 19:18:41.001', 3, 'UTC')); + +SELECT age('minute', toDateTime64('2015-01-01 20:30:36.200', 3, 'UTC'), toDateTime64('2015-01-01 20:35:36.300', 3, 'UTC')); +SELECT age('minute', toDateTime64('2015-01-01 20:30:36.200', 3, 'UTC'), toDateTime64('2015-01-01 20:35:36.100', 3, 'UTC')); +SELECT age('minute', toDateTime64('2015-01-01 20:30:36.200101', 6, 'UTC'), toDateTime64('2015-01-01 20:35:36.200100', 6, 'UTC')); + +SELECT age('hour', toDateTime64('2015-01-01 20:30:36.200', 3, 'UTC'), toDateTime64('2015-01-01 23:30:36.200', 3, 'UTC')); +SELECT age('hour', toDateTime64('2015-01-01 20:31:36.200', 3, 'UTC'), toDateTime64('2015-01-01 23:30:36.200', 3, 'UTC')); +SELECT age('hour', toDateTime64('2015-01-01 20:30:37.200', 3, 'UTC'), toDateTime64('2015-01-01 23:30:36.200', 3, 'UTC')); +SELECT age('hour', toDateTime64('2015-01-01 20:30:36.300', 3, 'UTC'), toDateTime64('2015-01-01 23:30:36.200', 3, 'UTC')); +SELECT age('hour', toDateTime64('2015-01-01 20:30:36.200101', 6, 'UTC'), toDateTime64('2015-01-01 23:30:36.200100', 6, 'UTC')); + +SELECT age('day', toDateTime64('2015-01-01 20:30:36.200', 3, 'UTC'), toDateTime64('2015-01-04 20:30:36.200', 3, 'UTC')); +SELECT age('day', toDateTime64('2015-01-01 20:30:36.200', 3, 'UTC'), toDateTime64('2015-01-04 19:30:36.200', 3, 'UTC')); +SELECT age('day', toDateTime64('2015-01-01 20:30:36.200', 3, 'UTC'), toDateTime64('2015-01-04 20:28:36.200', 3, 'UTC')); +SELECT age('day', toDateTime64('2015-01-01 20:30:36.200', 3, 'UTC'), toDateTime64('2015-01-04 20:30:35.200', 3, 'UTC')); +SELECT age('day', toDateTime64('2015-01-01 20:30:36.200', 3, 'UTC'), toDateTime64('2015-01-04 20:30:36.199', 3, 'UTC')); +SELECT age('day', toDateTime64('2015-01-01 20:30:36.200101', 6, 'UTC'), toDateTime64('2015-01-04 20:30:36.200100', 6, 'UTC')); + +SELECT age('week', toDateTime64('2015-01-01 20:30:36.200', 3, 'UTC'), toDateTime64('2015-01-15 20:30:36.200', 3, 'UTC')); +SELECT age('week', toDateTime64('2015-01-01 20:30:36.200', 3, 'UTC'), toDateTime64('2015-01-15 19:30:36.200', 3, 'UTC')); +SELECT age('week', toDateTime64('2015-01-01 20:30:36.200', 3, 'UTC'), toDateTime64('2015-01-15 20:29:36.200', 3, 'UTC')); +SELECT age('week', toDateTime64('2015-01-01 20:30:36.200', 3, 'UTC'), toDateTime64('2015-01-15 20:30:35.200', 3, 'UTC')); +SELECT age('week', toDateTime64('2015-01-01 20:30:36.200', 3, 'UTC'), toDateTime64('2015-01-15 20:30:36.100', 3, 'UTC')); +SELECT age('week', toDateTime64('2015-01-01 20:30:36.200101', 6, 'UTC'), toDateTime64('2015-01-15 20:30:36.200100', 6, 'UTC')); + +SELECT age('month', toDateTime64('2015-01-02 20:30:36.200', 3, 'UTC'), toDateTime64('2016-05-02 20:30:36.200', 3, 'UTC')); +SELECT age('month', toDateTime64('2015-01-02 20:30:36.200', 3, 'UTC'), toDateTime64('2016-05-01 20:30:36.200', 3, 'UTC')); +SELECT age('month', toDateTime64('2015-01-02 20:30:36.200', 3, 'UTC'), toDateTime64('2016-05-02 19:30:36.200', 3, 'UTC')); +SELECT age('month', toDateTime64('2015-01-02 20:30:36.200', 3, 'UTC'), toDateTime64('2016-05-02 20:29:36.200', 3, 'UTC')); +SELECT age('month', toDateTime64('2015-01-02 20:30:36.200', 3, 'UTC'), toDateTime64('2016-05-02 20:30:35.200', 3, 'UTC')); +SELECT age('month', toDateTime64('2015-01-02 20:30:36.200', 3, 'UTC'), toDateTime64('2016-05-02 20:30:36.100', 3, 'UTC')); +SELECT age('month', toDateTime64('2015-01-02 20:30:36.200101', 6, 'UTC'), toDateTime64('2016-05-02 20:30:36.200100', 6, 'UTC')); + +SELECT age('quarter', toDateTime64('2015-01-02 20:30:36.200', 3, 'UTC'), toDateTime64('2016-04-02 20:30:36.200', 3, 'UTC')); +SELECT age('quarter', toDateTime64('2015-01-02 20:30:36.200', 3, 'UTC'), toDateTime64('2016-04-01 20:30:36.200', 3, 'UTC')); +SELECT age('quarter', toDateTime64('2015-01-02 20:30:36.200', 3, 'UTC'), toDateTime64('2016-04-02 19:30:36.200', 3, 'UTC')); +SELECT age('quarter', toDateTime64('2015-01-02 20:30:36.200', 3, 'UTC'), toDateTime64('2016-04-02 20:29:36.200', 3, 'UTC')); +SELECT age('quarter', toDateTime64('2015-01-02 20:30:36.200', 3, 'UTC'), toDateTime64('2016-04-02 20:30:35.200', 3, 'UTC')); +SELECT age('quarter', toDateTime64('2015-01-02 20:30:36.200', 3, 'UTC'), toDateTime64('2016-04-02 20:30:36.100', 3, 'UTC')); +SELECT age('quarter', toDateTime64('2015-01-02 20:30:36.200101', 6, 'UTC'), toDateTime64('2016-04-02 20:30:36.200100', 6, 'UTC')); + +SELECT age('year', toDateTime64('2015-02-02 20:30:36.200', 3, 'UTC'), toDateTime64('2023-02-02 20:30:36.200', 3, 'UTC')); +SELECT age('year', toDateTime64('2015-02-02 20:30:36.200', 3, 'UTC'), toDateTime64('2023-01-02 20:30:36.200', 3, 'UTC')); +SELECT age('year', toDateTime64('2015-02-02 20:30:36.200', 3, 'UTC'), toDateTime64('2023-02-01 20:30:36.200', 3, 'UTC')); +SELECT age('year', toDateTime64('2015-02-02 20:30:36.200', 3, 'UTC'), toDateTime64('2023-02-02 19:30:36.200', 3, 'UTC')); +SELECT age('year', toDateTime64('2015-02-02 20:30:36.200', 3, 'UTC'), toDateTime64('2023-02-02 20:29:36.200', 3, 'UTC')); +SELECT age('year', toDateTime64('2015-02-02 20:30:36.200', 3, 'UTC'), toDateTime64('2023-02-02 20:30:35.200', 3, 'UTC')); +SELECT age('year', toDateTime64('2015-02-02 20:30:36.200', 3, 'UTC'), toDateTime64('2023-02-02 20:30:36.100', 3, 'UTC')); +SELECT age('year', toDateTime64('2015-02-02 20:30:36.200101', 6, 'UTC'), toDateTime64('2023-02-02 20:30:36.200100', 6, 'UTC')); \ No newline at end of file From d1cb371d8d9834a596ab50ee198693bc1569bb51 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Tue, 20 Jun 2023 08:53:25 +0000 Subject: [PATCH 072/522] Style fix (whitespaces) --- src/Functions/dateDiff.cpp | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/Functions/dateDiff.cpp b/src/Functions/dateDiff.cpp index 5805526ba1a..c8ee899d8fb 100644 --- a/src/Functions/dateDiff.cpp +++ b/src/Functions/dateDiff.cpp @@ -176,7 +176,6 @@ public: - static_cast(transform_x.execute(x, timezone_x)); DateTimeComponentsWithFractionalPart a_comp; DateTimeComponentsWithFractionalPart b_comp; - Int64 adjust_value; const auto multiplier = DecimalUtils::scaleMultiplier(6); auto x_microseconds = TransformDateTime64>(transform_x.getScaleMultiplier()).execute(x, timezone_x); @@ -219,7 +218,7 @@ public: || ((a_date.day == b_date.day) && ((a_time.hour > b_time.hour) || ((a_time.hour == b_time.hour) && ((a_time.minute > b_time.minute) || ((a_time.minute == b_time.minute) && ((a_time.second > b_time.second) - || ((a_time.second == b_time.second) && ((a_comp.millisecond > b_comp.millisecond) + || ((a_time.second == b_time.second) && ((a_comp.millisecond > b_comp.millisecond) || ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond))))))))))))) res += adjust_value; } @@ -229,7 +228,7 @@ public: || ((a_date.day == b_date.day) && ((a_time.hour > b_time.hour) || ((a_time.hour == b_time.hour) && ((a_time.minute > b_time.minute) || ((a_time.minute == b_time.minute) && ((a_time.second > b_time.second) - || ((a_time.second == b_time.second) && ((a_comp.millisecond > b_comp.millisecond) + || ((a_time.second == b_time.second) && ((a_comp.millisecond > b_comp.millisecond) || ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond))))))))))) res += adjust_value; } @@ -241,7 +240,7 @@ public: || ((x_day_of_week == y_day_of_week) && (a_time.hour > b_time.hour)) || ((a_time.hour == b_time.hour) && ((a_time.minute > b_time.minute) || ((a_time.minute == b_time.minute) && ((a_time.second > b_time.second) - || ((a_time.second == b_time.second) && ((a_comp.millisecond > b_comp.millisecond) + || ((a_time.second == b_time.second) && ((a_comp.millisecond > b_comp.millisecond) || ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond))))))))) res += adjust_value; } @@ -250,7 +249,7 @@ public: if ((a_time.hour > b_time.hour) || ((a_time.hour == b_time.hour) && ((a_time.minute > b_time.minute) || ((a_time.minute == b_time.minute) && ((a_time.second > b_time.second) - || ((a_time.second == b_time.second) && ((a_comp.millisecond > b_comp.millisecond) + || ((a_time.second == b_time.second) && ((a_comp.millisecond > b_comp.millisecond) || ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond))))))))) res += adjust_value; } @@ -403,9 +402,9 @@ public: else if (unit == "second" || unit == "ss" || unit == "s") impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); else if (unit == "millisecond" || unit == "ms") - impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns(3)>>(x, y, timezone_x, timezone_y, res->getData()); else if (unit == "microsecond" || unit == "us" || unit == "u") - impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns(6)>>(x, y, timezone_x, timezone_y, res->getData()); else throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function {} does not support '{}' unit", getName(), unit); From 7ab660acfbcbc722beabea32d45524d6294ea5e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Fri, 16 Jun 2023 12:20:40 +0200 Subject: [PATCH 073/522] Extract logging functions outside of executeQueryImpl --- src/Interpreters/AsynchronousInsertQueue.cpp | 2 + src/Interpreters/executeQuery.cpp | 647 ++++++++++--------- src/Interpreters/executeQuery.h | 45 +- 3 files changed, 384 insertions(+), 310 deletions(-) diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index dc2310cfebf..94171298780 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -395,6 +395,8 @@ try SCOPE_EXIT(CurrentMetrics::sub(CurrentMetrics::PendingAsyncInsert, data->entries.size())); + /// query_start_time context + settings query_for_logging + const auto * log = &Poco::Logger::get("AsynchronousInsertQueue"); const auto & insert_query = assert_cast(*key.query); auto insert_context = Context::createCopy(global_context); diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index de2e2b9ad92..122c8e0570a 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -155,7 +155,6 @@ static void logQuery(const String & query, ContextPtr context, bool internal, Qu } } - /// Call this inside catch block. static void setExceptionStackTrace(QueryLogElement & elem) { @@ -208,7 +207,331 @@ static void logException(ContextPtr context, QueryLogElement & elem, bool log_er LOG_INFO(&Poco::Logger::get("executeQuery"), message); } -static void onExceptionBeforeStart( +static void +addStatusInfoToQueryElement(QueryLogElement & element, const QueryStatusInfo & info, const ASTPtr query_ast, const ContextPtr context_ptr) +{ + const auto time_now = std::chrono::system_clock::now(); + UInt64 elapsed_microseconds = info.elapsed_microseconds; + element.event_time = timeInSeconds(time_now); + element.event_time_microseconds = timeInMicroseconds(time_now); + element.query_duration_ms = elapsed_microseconds / 1000; + + ProfileEvents::increment(ProfileEvents::QueryTimeMicroseconds, elapsed_microseconds); + if (query_ast->as() || query_ast->as()) + { + ProfileEvents::increment(ProfileEvents::SelectQueryTimeMicroseconds, elapsed_microseconds); + } + else if (query_ast->as()) + { + ProfileEvents::increment(ProfileEvents::InsertQueryTimeMicroseconds, elapsed_microseconds); + } + else + { + ProfileEvents::increment(ProfileEvents::OtherQueryTimeMicroseconds, elapsed_microseconds); + } + + element.read_rows = info.read_rows; + element.read_bytes = info.read_bytes; + + element.written_rows = info.written_rows; + element.written_bytes = info.written_bytes; + + element.memory_usage = info.peak_memory_usage > 0 ? info.peak_memory_usage : 0; + + element.thread_ids = info.thread_ids; + element.profile_counters = info.profile_counters; + + /// We need to refresh the access info since dependent views might have added extra information, either during + /// creation of the view (PushingToViews chain) or while executing its internal SELECT + const auto & access_info = context_ptr->getQueryAccessInfo(); + element.query_databases.insert(access_info.databases.begin(), access_info.databases.end()); + element.query_tables.insert(access_info.tables.begin(), access_info.tables.end()); + element.query_columns.insert(access_info.columns.begin(), access_info.columns.end()); + element.query_partitions.insert(access_info.partitions.begin(), access_info.partitions.end()); + element.query_projections.insert(access_info.projections.begin(), access_info.projections.end()); + element.query_views.insert(access_info.views.begin(), access_info.views.end()); + + const auto & factories_info = context_ptr->getQueryFactoriesInfo(); + element.used_aggregate_functions = factories_info.aggregate_functions; + element.used_aggregate_function_combinators = factories_info.aggregate_function_combinators; + element.used_database_engines = factories_info.database_engines; + element.used_data_type_families = factories_info.data_type_families; + element.used_dictionaries = factories_info.dictionaries; + element.used_formats = factories_info.formats; + element.used_functions = factories_info.functions; + element.used_storages = factories_info.storages; + element.used_table_functions = factories_info.table_functions; + + element.async_read_counters = context_ptr->getAsyncReadCounters(); +} + + +QueryLogElement logQueryStart( + const std::chrono::time_point & query_start_time, + const ContextMutablePtr & context, + const String & query_for_logging, + const ASTPtr & query_ast, + const QueryPipeline & pipeline, + const std::unique_ptr & interpreter, + bool internal, + const String & query_database, + const String & query_table) +{ + const Settings & settings = context->getSettingsRef(); + + QueryLogElement elem; + + elem.type = QueryLogElementType::QUERY_START; + elem.event_time = timeInSeconds(query_start_time); + elem.event_time_microseconds = timeInMicroseconds(query_start_time); + elem.query_start_time = timeInSeconds(query_start_time); + elem.query_start_time_microseconds = timeInMicroseconds(query_start_time); + + elem.current_database = context->getCurrentDatabase(); + elem.query = query_for_logging; + if (settings.log_formatted_queries) + elem.formatted_query = queryToString(query_ast); + elem.normalized_query_hash = normalizedQueryHash(query_for_logging); + elem.query_kind = query_ast->getQueryKind(); + + elem.client_info = context->getClientInfo(); + + if (auto txn = context->getCurrentTransaction()) + elem.tid = txn->tid; + + bool log_queries = settings.log_queries && !internal; + + /// Log into system table start of query execution, if need. + if (log_queries) + { + /// This check is not obvious, but without it 01220_scalar_optimization_in_alter fails. + if (pipeline.initialized()) + { + const auto & info = context->getQueryAccessInfo(); + elem.query_databases = info.databases; + elem.query_tables = info.tables; + elem.query_columns = info.columns; + elem.query_partitions = info.partitions; + elem.query_projections = info.projections; + elem.query_views = info.views; + } + + if (settings.async_insert) + InterpreterInsertQuery::extendQueryLogElemImpl(elem, context); + else if (interpreter) + interpreter->extendQueryLogElem(elem, query_ast, context, query_database, query_table); + + if (settings.log_query_settings) + elem.query_settings = std::make_shared(context->getSettingsRef()); + + elem.log_comment = settings.log_comment; + if (elem.log_comment.size() > settings.max_query_size) + elem.log_comment.resize(settings.max_query_size); + + if (elem.type >= settings.log_queries_min_type && !settings.log_queries_min_query_duration_ms.totalMilliseconds()) + { + if (auto query_log = context->getQueryLog()) + query_log->add(elem); + } + } + + return elem; +} + +void logQueryFinish( + QueryLogElement & elem, + const ContextMutablePtr & context, + const ASTPtr & query_ast, + const QueryPipeline & query_pipeline, + bool pulling_pipeline, + std::shared_ptr query_span, + bool internal) +{ + const Settings & settings = context->getSettingsRef(); + auto log_queries = settings.log_queries && !internal; + auto log_queries_min_type = settings.log_queries_min_type; + auto log_queries_min_query_duration_ms = settings.log_queries_min_query_duration_ms.totalMilliseconds(); + auto log_processors_profiles = settings.log_processors_profiles; + + QueryStatusPtr process_list_elem = context->getProcessListElement(); + if (process_list_elem) + { + /// Update performance counters before logging to query_log + CurrentThread::finalizePerformanceCounters(); + + QueryStatusInfo info = process_list_elem->getInfo(true, context->getSettingsRef().log_profile_events); + elem.type = QueryLogElementType::QUERY_FINISH; + + addStatusInfoToQueryElement(elem, info, query_ast, context); + + if (pulling_pipeline) + { + query_pipeline.tryGetResultRowsAndBytes(elem.result_rows, elem.result_bytes); + } + else /// will be used only for ordinary INSERT queries + { + auto progress_out = process_list_elem->getProgressOut(); + elem.result_rows = progress_out.written_rows; + elem.result_bytes = progress_out.written_bytes; + } + + auto progress_callback = context->getProgressCallback(); + if (progress_callback) + { + Progress p; + p.incrementPiecewiseAtomically(Progress{ResultProgress{elem.result_rows, elem.result_bytes}}); + progress_callback(p); + } + + if (elem.read_rows != 0) + { + double elapsed_seconds = static_cast(info.elapsed_microseconds) / 1000000.0; + double rows_per_second = static_cast(elem.read_rows) / elapsed_seconds; + LOG_DEBUG( + &Poco::Logger::get("executeQuery"), + "Read {} rows, {} in {} sec., {} rows/sec., {}/sec.", + elem.read_rows, + ReadableSize(elem.read_bytes), + elapsed_seconds, + rows_per_second, + ReadableSize(elem.read_bytes / elapsed_seconds)); + } + + if (log_queries && elem.type >= log_queries_min_type + && static_cast(elem.query_duration_ms) >= log_queries_min_query_duration_ms) + { + if (auto query_log = context->getQueryLog()) + query_log->add(elem); + } + if (log_processors_profiles) + { + if (auto processors_profile_log = context->getProcessorsProfileLog()) + { + ProcessorProfileLogElement processor_elem; + processor_elem.event_time = elem.event_time; + processor_elem.event_time_microseconds = elem.event_time_microseconds; + processor_elem.initial_query_id = elem.client_info.initial_query_id; + processor_elem.query_id = elem.client_info.current_query_id; + + auto get_proc_id = [](const IProcessor & proc) -> UInt64 { return reinterpret_cast(&proc); }; + + for (const auto & processor : query_pipeline.getProcessors()) + { + std::vector parents; + for (const auto & port : processor->getOutputs()) + { + if (!port.isConnected()) + continue; + const IProcessor & next = port.getInputPort().getProcessor(); + parents.push_back(get_proc_id(next)); + } + + processor_elem.id = get_proc_id(*processor); + processor_elem.parent_ids = std::move(parents); + + processor_elem.plan_step = reinterpret_cast(processor->getQueryPlanStep()); + processor_elem.plan_group = processor->getQueryPlanStepGroup(); + + processor_elem.processor_name = processor->getName(); + + /// NOTE: convert this to UInt64 + processor_elem.elapsed_us = static_cast(processor->getElapsedUs()); + processor_elem.input_wait_elapsed_us = static_cast(processor->getInputWaitElapsedUs()); + processor_elem.output_wait_elapsed_us = static_cast(processor->getOutputWaitElapsedUs()); + + auto stats = processor->getProcessorDataStats(); + processor_elem.input_rows = stats.input_rows; + processor_elem.input_bytes = stats.input_bytes; + processor_elem.output_rows = stats.output_rows; + processor_elem.output_bytes = stats.output_bytes; + + processors_profile_log->add(processor_elem); + } + } + } + } + + if (query_span) + { + query_span->addAttribute("db.statement", elem.query); + query_span->addAttribute("clickhouse.query_id", elem.client_info.current_query_id); + query_span->addAttribute("clickhouse.query_status", "QueryFinish"); + query_span->addAttributeIfNotEmpty("clickhouse.tracestate", OpenTelemetry::CurrentContext().tracestate); + query_span->addAttributeIfNotZero("clickhouse.read_rows", elem.read_rows); + query_span->addAttributeIfNotZero("clickhouse.read_bytes", elem.read_bytes); + query_span->addAttributeIfNotZero("clickhouse.written_rows", elem.written_rows); + query_span->addAttributeIfNotZero("clickhouse.written_bytes", elem.written_bytes); + query_span->addAttributeIfNotZero("clickhouse.memory_usage", elem.memory_usage); + query_span->finish(); + } +} + +void logQueryException( + QueryLogElement & elem, + const ContextMutablePtr & context, + const Stopwatch & start_watch, + const ASTPtr & query_ast, + std::shared_ptr query_span, + bool internal, + bool log_error) +{ + const Settings & settings = context->getSettingsRef(); + auto log_queries = settings.log_queries && !internal; + auto log_queries_min_type = settings.log_queries_min_type; + auto log_queries_min_query_duration_ms = settings.log_queries_min_query_duration_ms.totalMilliseconds(); + + elem.type = QueryLogElementType::EXCEPTION_WHILE_PROCESSING; + elem.exception_code = getCurrentExceptionCode(); + auto exception_message = getCurrentExceptionMessageAndPattern(/* with_stacktrace */ false); + elem.exception = std::move(exception_message.text); + elem.exception_format_string = exception_message.format_string; + + QueryStatusPtr process_list_elem = context->getProcessListElement(); + + /// Update performance counters before logging to query_log + CurrentThread::finalizePerformanceCounters(); + const auto time_now = std::chrono::system_clock::now(); + elem.event_time = timeInSeconds(time_now); + elem.event_time_microseconds = timeInMicroseconds(time_now); + + if (process_list_elem) + { + QueryStatusInfo info = process_list_elem->getInfo(true, settings.log_profile_events, false); + addStatusInfoToQueryElement(elem, info, query_ast, context); + } + else + { + elem.query_duration_ms = start_watch.elapsedMilliseconds(); + } + + if (settings.calculate_text_stack_trace && log_error) + setExceptionStackTrace(elem); + logException(context, elem, log_error); + + /// In case of exception we log internal queries also + if (log_queries && elem.type >= log_queries_min_type && static_cast(elem.query_duration_ms) >= log_queries_min_query_duration_ms) + { + if (auto query_log = context->getQueryLog()) + query_log->add(elem); + } + + ProfileEvents::increment(ProfileEvents::FailedQuery); + if (query_ast->as() || query_ast->as()) + ProfileEvents::increment(ProfileEvents::FailedSelectQuery); + else if (query_ast->as()) + ProfileEvents::increment(ProfileEvents::FailedInsertQuery); + + if (query_span) + { + query_span->addAttribute("db.statement", elem.query); + query_span->addAttribute("clickhouse.query_id", elem.client_info.current_query_id); + query_span->addAttribute("clickhouse.exception", elem.exception); + query_span->addAttribute("clickhouse.exception_code", elem.exception_code); + query_span->finish(); + } +} + +void logExceptionBeforeStart( const String & query_for_logging, ContextPtr context, ASTPtr ast, @@ -431,7 +754,7 @@ static std::tuple executeQueryImpl( logQuery(query_for_logging, context, internal, stage); if (!internal) - onExceptionBeforeStart(query_for_logging, context, ast, query_span, start_watch.elapsedMilliseconds()); + logExceptionBeforeStart(query_for_logging, context, ast, query_span, start_watch.elapsedMilliseconds()); throw; } @@ -810,134 +1133,17 @@ static std::tuple executeQueryImpl( /// Everything related to query log. { - QueryLogElement elem; - - elem.type = QueryLogElementType::QUERY_START; - - elem.event_time = timeInSeconds(query_start_time); - elem.event_time_microseconds = timeInMicroseconds(query_start_time); - elem.query_start_time = timeInSeconds(query_start_time); - elem.query_start_time_microseconds = timeInMicroseconds(query_start_time); - - elem.current_database = context->getCurrentDatabase(); - elem.query = query_for_logging; - if (settings.log_formatted_queries) - elem.formatted_query = queryToString(ast); - elem.normalized_query_hash = normalizedQueryHash(query_for_logging); - elem.query_kind = ast->getQueryKind(); - - elem.client_info = client_info; - - if (auto txn = context->getCurrentTransaction()) - elem.tid = txn->tid; - - bool log_queries = settings.log_queries && !internal; - - /// Log into system table start of query execution, if need. - if (log_queries) - { - /// This check is not obvious, but without it 01220_scalar_optimization_in_alter fails. - if (pipeline.initialized()) - { - const auto & info = context->getQueryAccessInfo(); - elem.query_databases = info.databases; - elem.query_tables = info.tables; - elem.query_columns = info.columns; - elem.query_partitions = info.partitions; - elem.query_projections = info.projections; - elem.query_views = info.views; - } - - if (async_insert) - InterpreterInsertQuery::extendQueryLogElemImpl(elem, context); - else if (interpreter) - interpreter->extendQueryLogElem(elem, ast, context, query_database, query_table); - - if (settings.log_query_settings) - elem.query_settings = std::make_shared(context->getSettingsRef()); - - elem.log_comment = settings.log_comment; - if (elem.log_comment.size() > settings.max_query_size) - elem.log_comment.resize(settings.max_query_size); - - if (elem.type >= settings.log_queries_min_type && !settings.log_queries_min_query_duration_ms.totalMilliseconds()) - { - if (auto query_log = context->getQueryLog()) - query_log->add(elem); - } - } - - /// Common code for finish and exception callbacks - auto status_info_to_query_log - = [](QueryLogElement & element, const QueryStatusInfo & info, const ASTPtr query_ast, const ContextPtr context_ptr) mutable - { - const auto time_now = std::chrono::system_clock::now(); - UInt64 elapsed_microseconds = info.elapsed_microseconds; - element.event_time = timeInSeconds(time_now); - element.event_time_microseconds = timeInMicroseconds(time_now); - element.query_duration_ms = elapsed_microseconds / 1000; - - ProfileEvents::increment(ProfileEvents::QueryTimeMicroseconds, elapsed_microseconds); - if (query_ast->as() || query_ast->as()) - { - ProfileEvents::increment(ProfileEvents::SelectQueryTimeMicroseconds, elapsed_microseconds); - } - else if (query_ast->as()) - { - ProfileEvents::increment(ProfileEvents::InsertQueryTimeMicroseconds, elapsed_microseconds); - } - else - { - ProfileEvents::increment(ProfileEvents::OtherQueryTimeMicroseconds, elapsed_microseconds); - } - - element.read_rows = info.read_rows; - element.read_bytes = info.read_bytes; - - element.written_rows = info.written_rows; - element.written_bytes = info.written_bytes; - - element.memory_usage = info.peak_memory_usage > 0 ? info.peak_memory_usage : 0; - - element.thread_ids = info.thread_ids; - element.profile_counters = info.profile_counters; - - /// We need to refresh the access info since dependent views might have added extra information, either during - /// creation of the view (PushingToViews chain) or while executing its internal SELECT - const auto & access_info = context_ptr->getQueryAccessInfo(); - element.query_databases.insert(access_info.databases.begin(), access_info.databases.end()); - element.query_tables.insert(access_info.tables.begin(), access_info.tables.end()); - element.query_columns.insert(access_info.columns.begin(), access_info.columns.end()); - element.query_partitions.insert(access_info.partitions.begin(), access_info.partitions.end()); - element.query_projections.insert(access_info.projections.begin(), access_info.projections.end()); - element.query_views.insert(access_info.views.begin(), access_info.views.end()); - - const auto & factories_info = context_ptr->getQueryFactoriesInfo(); - element.used_aggregate_functions = factories_info.aggregate_functions; - element.used_aggregate_function_combinators = factories_info.aggregate_function_combinators; - element.used_database_engines = factories_info.database_engines; - element.used_data_type_families = factories_info.data_type_families; - element.used_dictionaries = factories_info.dictionaries; - element.used_formats = factories_info.formats; - element.used_functions = factories_info.functions; - element.used_storages = factories_info.storages; - element.used_table_functions = factories_info.table_functions; - - element.async_read_counters = context_ptr->getAsyncReadCounters(); - }; - + QueryLogElement elem = logQueryStart( + query_start_time, context, query_for_logging, ast, pipeline, interpreter, internal, query_database, query_table); /// Also make possible for caller to log successful query finish and exception during execution. auto finish_callback = [elem, context, ast, my_can_use_query_cache = can_use_query_cache, enable_writes_to_query_cache = settings.enable_writes_to_query_cache, - query_cache_store_results_of_queries_with_nondeterministic_functions = settings.query_cache_store_results_of_queries_with_nondeterministic_functions, - log_queries, - log_queries_min_type = settings.log_queries_min_type, - log_queries_min_query_duration_ms = settings.log_queries_min_query_duration_ms.totalMilliseconds(), - log_processors_profiles = settings.log_processors_profiles, - status_info_to_query_log, + query_cache_store_results_of_queries_with_nondeterministic_functions + = settings.query_cache_store_results_of_queries_with_nondeterministic_functions, + internal, implicit_txn_control, execute_implicit_tcl_query, pulling_pipeline = pipeline.pulling(), @@ -954,137 +1160,15 @@ static std::tuple executeQueryImpl( query_pipeline.finalizeWriteInQueryCache(); } - QueryStatusPtr process_list_elem = context->getProcessListElement(); + logQueryFinish(elem, context, ast, query_pipeline, pulling_pipeline, query_span, internal); - if (process_list_elem) - { - /// Update performance counters before logging to query_log - CurrentThread::finalizePerformanceCounters(); - - QueryStatusInfo info = process_list_elem->getInfo(true, context->getSettingsRef().log_profile_events); - elem.type = QueryLogElementType::QUERY_FINISH; - - status_info_to_query_log(elem, info, ast, context); - - if (pulling_pipeline) - { - query_pipeline.tryGetResultRowsAndBytes(elem.result_rows, elem.result_bytes); - } - else /// will be used only for ordinary INSERT queries - { - auto progress_out = process_list_elem->getProgressOut(); - elem.result_rows = progress_out.written_rows; - elem.result_bytes = progress_out.written_bytes; - } - - auto progress_callback = context->getProgressCallback(); - if (progress_callback) - { - Progress p; - p.incrementPiecewiseAtomically(Progress{ResultProgress{elem.result_rows, elem.result_bytes}}); - progress_callback(p); - } - - if (elem.read_rows != 0) - { - double elapsed_seconds = static_cast(info.elapsed_microseconds) / 1000000.0; - double rows_per_second = static_cast(elem.read_rows) / elapsed_seconds; - LOG_DEBUG( - &Poco::Logger::get("executeQuery"), - "Read {} rows, {} in {} sec., {} rows/sec., {}/sec.", - elem.read_rows, - ReadableSize(elem.read_bytes), - elapsed_seconds, - rows_per_second, - ReadableSize(elem.read_bytes / elapsed_seconds)); - } - - if (log_queries && elem.type >= log_queries_min_type && static_cast(elem.query_duration_ms) >= log_queries_min_query_duration_ms) - { - if (auto query_log = context->getQueryLog()) - query_log->add(elem); - } - if (log_processors_profiles) - { - if (auto processors_profile_log = context->getProcessorsProfileLog()) - { - ProcessorProfileLogElement processor_elem; - processor_elem.event_time = elem.event_time; - processor_elem.event_time_microseconds = elem.event_time_microseconds; - processor_elem.initial_query_id = elem.client_info.initial_query_id; - processor_elem.query_id = elem.client_info.current_query_id; - - auto get_proc_id = [](const IProcessor & proc) -> UInt64 - { - return reinterpret_cast(&proc); - }; - - for (const auto & processor : query_pipeline.getProcessors()) - { - std::vector parents; - for (const auto & port : processor->getOutputs()) - { - if (!port.isConnected()) - continue; - const IProcessor & next = port.getInputPort().getProcessor(); - parents.push_back(get_proc_id(next)); - } - - processor_elem.id = get_proc_id(*processor); - processor_elem.parent_ids = std::move(parents); - - processor_elem.plan_step = reinterpret_cast(processor->getQueryPlanStep()); - processor_elem.plan_group = processor->getQueryPlanStepGroup(); - - processor_elem.processor_name = processor->getName(); - - /// NOTE: convert this to UInt64 - processor_elem.elapsed_us = static_cast(processor->getElapsedUs()); - processor_elem.input_wait_elapsed_us = static_cast(processor->getInputWaitElapsedUs()); - processor_elem.output_wait_elapsed_us = static_cast(processor->getOutputWaitElapsedUs()); - - auto stats = processor->getProcessorDataStats(); - processor_elem.input_rows = stats.input_rows; - processor_elem.input_bytes = stats.input_bytes; - processor_elem.output_rows = stats.output_rows; - processor_elem.output_bytes = stats.output_bytes; - - processors_profile_log->add(processor_elem); - } - } - } - - if (*implicit_txn_control) - execute_implicit_tcl_query(context, ASTTransactionControl::COMMIT); - } - - if (query_span) - { - query_span->addAttribute("db.statement", elem.query); - query_span->addAttribute("clickhouse.query_id", elem.client_info.current_query_id); - query_span->addAttribute("clickhouse.query_status", "QueryFinish"); - query_span->addAttributeIfNotEmpty("clickhouse.tracestate", OpenTelemetry::CurrentContext().tracestate); - query_span->addAttributeIfNotZero("clickhouse.read_rows", elem.read_rows); - query_span->addAttributeIfNotZero("clickhouse.read_bytes", elem.read_bytes); - query_span->addAttributeIfNotZero("clickhouse.written_rows", elem.written_rows); - query_span->addAttributeIfNotZero("clickhouse.written_bytes", elem.written_bytes); - query_span->addAttributeIfNotZero("clickhouse.memory_usage", elem.memory_usage); - query_span->finish(); - } + if (*implicit_txn_control) + execute_implicit_tcl_query(context, ASTTransactionControl::COMMIT); }; - auto exception_callback = [start_watch, - elem, - context, - ast, - log_queries, - log_queries_min_type = settings.log_queries_min_type, - log_queries_min_query_duration_ms = settings.log_queries_min_query_duration_ms.totalMilliseconds(), - my_quota(quota), - status_info_to_query_log, - implicit_txn_control, - execute_implicit_tcl_query, - query_span](bool log_error) mutable + auto exception_callback = + [start_watch, elem, context, ast, internal, my_quota(quota), implicit_txn_control, execute_implicit_tcl_query, query_span]( + bool log_error) mutable { if (*implicit_txn_control) execute_implicit_tcl_query(context, ASTTransactionControl::ROLLBACK); @@ -1094,60 +1178,7 @@ static std::tuple executeQueryImpl( if (my_quota) my_quota->used(QuotaType::ERRORS, 1, /* check_exceeded = */ false); - elem.type = QueryLogElementType::EXCEPTION_WHILE_PROCESSING; - elem.exception_code = getCurrentExceptionCode(); - auto exception_message = getCurrentExceptionMessageAndPattern(/* with_stacktrace */ false); - elem.exception = std::move(exception_message.text); - elem.exception_format_string = exception_message.format_string; - - QueryStatusPtr process_list_elem = context->getProcessListElement(); - const Settings & current_settings = context->getSettingsRef(); - - /// Update performance counters before logging to query_log - CurrentThread::finalizePerformanceCounters(); - const auto time_now = std::chrono::system_clock::now(); - elem.event_time = timeInSeconds(time_now); - elem.event_time_microseconds = timeInMicroseconds(time_now); - - if (process_list_elem) - { - QueryStatusInfo info = process_list_elem->getInfo(true, current_settings.log_profile_events, false); - status_info_to_query_log(elem, info, ast, context); - } - else - { - elem.query_duration_ms = start_watch.elapsedMilliseconds(); - } - - if (current_settings.calculate_text_stack_trace && log_error) - setExceptionStackTrace(elem); - logException(context, elem, log_error); - - /// In case of exception we log internal queries also - if (log_queries && elem.type >= log_queries_min_type && static_cast(elem.query_duration_ms) >= log_queries_min_query_duration_ms) - { - if (auto query_log = context->getQueryLog()) - query_log->add(elem); - } - - ProfileEvents::increment(ProfileEvents::FailedQuery); - if (ast->as() || ast->as()) - { - ProfileEvents::increment(ProfileEvents::FailedSelectQuery); - } - else if (ast->as()) - { - ProfileEvents::increment(ProfileEvents::FailedInsertQuery); - } - - if (query_span) - { - query_span->addAttribute("db.statement", elem.query); - query_span->addAttribute("clickhouse.query_id", elem.client_info.current_query_id); - query_span->addAttribute("clickhouse.exception", elem.exception); - query_span->addAttribute("clickhouse.exception_code", elem.exception_code); - query_span->finish(); - } + logQueryException(elem, context, start_watch, ast, query_span, internal, log_error); }; res.finish_callback = std::move(finish_callback); @@ -1162,7 +1193,7 @@ static std::tuple executeQueryImpl( txn->onException(); if (!internal) - onExceptionBeforeStart(query_for_logging, context, ast, query_span, start_watch.elapsedMilliseconds()); + logExceptionBeforeStart(query_for_logging, context, ast, query_span, start_watch.elapsedMilliseconds()); throw; } diff --git a/src/Interpreters/executeQuery.h b/src/Interpreters/executeQuery.h index 93152cc1de6..3fa76dbfcb1 100644 --- a/src/Interpreters/executeQuery.h +++ b/src/Interpreters/executeQuery.h @@ -1,15 +1,21 @@ #pragma once #include -#include -#include #include +#include +#include +#include + +#include +#include namespace DB { +class IInterpreter; class ReadBuffer; class WriteBuffer; +struct QueryStatusInfo; struct QueryResultDetails { @@ -66,4 +72,39 @@ BlockIO executeQuery( /// if built pipeline does not require any input and does not produce any output. void executeTrivialBlockIO(BlockIO & streams, ContextPtr context); +/// Prepares a QueryLogElement and, if enabled, logs it to system.query_log +QueryLogElement logQueryStart( + const std::chrono::time_point & query_start_time, + const ContextMutablePtr & context, + const String & query_for_logging, + const ASTPtr & query_ast, + const QueryPipeline & pipeline, + const std::unique_ptr & interpreter, + bool internal, + const String & query_database, + const String & query_table); + +void logQueryFinish( + QueryLogElement & elem, + const ContextMutablePtr & context, + const ASTPtr & query_ast, + const QueryPipeline & query_pipeline, + bool pulling_pipeline, + std::shared_ptr query_span, + bool internal); + +void logQueryException( + QueryLogElement & elem, + const ContextMutablePtr & context, + const Stopwatch & start_watch, + const ASTPtr & query_ast, + std::shared_ptr query_span, + bool internal); + +void logExceptionBeforeStart( + const String & query_for_logging, + ContextPtr context, + ASTPtr ast, + const std::shared_ptr & query_span, + UInt64 elapsed_millliseconds); } From cf74aee6c9992a65f88a1a91f920227fa0c55bd0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 19 Jun 2023 13:39:30 +0200 Subject: [PATCH 074/522] Log async query flushes to query_log --- src/Interpreters/AsynchronousInsertLog.cpp | 19 +-- src/Interpreters/AsynchronousInsertLog.h | 5 +- src/Interpreters/AsynchronousInsertQueue.cpp | 133 ++++++++++++++----- src/Interpreters/ProcessList.cpp | 18 ++- src/Interpreters/ProcessList.h | 9 +- src/Interpreters/executeQuery.cpp | 13 +- src/Interpreters/executeQuery.h | 9 +- src/Parsers/IAST.h | 1 + 8 files changed, 144 insertions(+), 63 deletions(-) diff --git a/src/Interpreters/AsynchronousInsertLog.cpp b/src/Interpreters/AsynchronousInsertLog.cpp index eeccd9ad92e..d7c9059d9de 100644 --- a/src/Interpreters/AsynchronousInsertLog.cpp +++ b/src/Interpreters/AsynchronousInsertLog.cpp @@ -55,21 +55,10 @@ void AsynchronousInsertLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insert(event_time); columns[i++]->insert(event_time_microseconds); - const auto & insert_query = assert_cast(*query); - columns[i++]->insert(queryToString(insert_query)); - - if (insert_query.table_id) - { - columns[i++]->insert(insert_query.table_id.getDatabaseName()); - columns[i++]->insert(insert_query.table_id.getTableName()); - } - else - { - columns[i++]->insertDefault(); - columns[i++]->insertDefault(); - } - - columns[i++]->insert(insert_query.format); + columns[i++]->insert(query_for_logging); + columns[i++]->insert(database); + columns[i++]->insert(table); + columns[i++]->insert(format); columns[i++]->insert(query_id); columns[i++]->insert(bytes); columns[i++]->insert(rows); diff --git a/src/Interpreters/AsynchronousInsertLog.h b/src/Interpreters/AsynchronousInsertLog.h index 372d1cf5a1b..a76db78d3ea 100644 --- a/src/Interpreters/AsynchronousInsertLog.h +++ b/src/Interpreters/AsynchronousInsertLog.h @@ -21,8 +21,11 @@ struct AsynchronousInsertLogElement time_t event_time{}; Decimal64 event_time_microseconds{}; - ASTPtr query; String query_id; + String query_for_logging; + String database; + String table; + String format; UInt64 bytes{}; UInt64 rows{}; String exception; diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index 94171298780..996a92cef6f 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -1,33 +1,37 @@ #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include #include +#include #include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include +#include +#include +#include +#include +#include +#include +#include namespace CurrentMetrics @@ -395,11 +399,15 @@ try SCOPE_EXIT(CurrentMetrics::sub(CurrentMetrics::PendingAsyncInsert, data->entries.size())); - /// query_start_time context + settings query_for_logging - const auto * log = &Poco::Logger::get("AsynchronousInsertQueue"); const auto & insert_query = assert_cast(*key.query); auto insert_context = Context::createCopy(global_context); + DB::CurrentThread::QueryScope query_scope_holder(insert_context); + bool internal = false; // To enable logging this query + bool async_insert = true; + + /// Disabled query spans. Could be activated by initializating this to a SpanHolder + std::shared_ptr query_span{nullptr}; /// 'resetParser' doesn't work for parallel parsing. key.settings.set("input_format_parallel_parsing", false); @@ -407,12 +415,67 @@ try insert_context->setSettings(key.settings); /// Set initial_query_id, because it's used in InterpreterInsertQuery for table lock. - insert_context->getClientInfo().query_kind = ClientInfo::QueryKind::INITIAL_QUERY; insert_context->setCurrentQueryId(""); - InterpreterInsertQuery interpreter(key.query, insert_context, key.settings.insert_allow_materialized_columns, false, false, true); - auto pipeline = interpreter.execute().pipeline; - assert(pipeline.pushing()); + auto insert_query_id = insert_context->getCurrentQueryId(); + auto query_start_time = std::chrono::system_clock::now(); + Stopwatch start_watch{CLOCK_MONOTONIC}; + ClientInfo & client_info = insert_context->getClientInfo(); + client_info.query_kind = ClientInfo::QueryKind::INITIAL_QUERY; + client_info.initial_query_start_time = timeInSeconds(query_start_time); + client_info.initial_query_start_time_microseconds = timeInMicroseconds(query_start_time); + client_info.current_query_id = insert_query_id; + client_info.initial_query_id = insert_query_id; + size_t log_queries_cut_to_length = insert_context->getSettingsRef().log_queries_cut_to_length; + String query_for_logging = insert_query.hasSecretParts() + ? insert_query.formatForLogging(log_queries_cut_to_length) + : wipeSensitiveDataAndCutToLength(serializeAST(insert_query), log_queries_cut_to_length); + + /// We add it to the process list so + /// a) it appears in system.processes + /// b) can be cancelled if we want to + /// c) has an associated process list element where runtime metrics are stored + auto process_list_entry = insert_context->getProcessList().insert( + query_for_logging, key.query.get(), insert_context, start_watch.getStart(), IAST::QueryKind::AsyncInsertFlush); + auto query_status = process_list_entry->getQueryStatus(); + insert_context->setProcessListElement(std::move(query_status)); + + String query_database{}; + String query_table{}; + if (insert_query.table_id) + { + query_database = insert_query.table_id.getDatabaseName(); + query_table = insert_query.table_id.getTableName(); + insert_context->setInsertionTable(insert_query.table_id); + } + std::unique_ptr interpreter; + QueryPipeline pipeline; + QueryLogElement query_log_elem; + + try + { + interpreter = std::make_unique( + key.query, insert_context, key.settings.insert_allow_materialized_columns, false, false, true); + pipeline = interpreter->execute().pipeline; + chassert(pipeline.pushing()); + + query_log_elem = logQueryStart( + query_start_time, + insert_context, + query_for_logging, + key.query, + pipeline, + interpreter, + internal, + query_database, + query_table, + async_insert); + } + catch (...) + { + logExceptionBeforeStart(query_for_logging, insert_context, key.query, query_span, start_watch.elapsedMilliseconds(), async_insert); + throw; + } auto header = pipeline.getHeader(); auto format = getInputFormatFromASTInsertQuery(key.query, false, header, insert_context, nullptr); @@ -472,7 +535,10 @@ try AsynchronousInsertLogElement elem; elem.event_time = timeInSeconds(entry->create_time); elem.event_time_microseconds = timeInMicroseconds(entry->create_time); - elem.query = key.query; + elem.query_for_logging = query_for_logging; + elem.database = query_database; + elem.table = query_table; + elem.format = insert_query.format; elem.query_id = entry->query_id; elem.bytes = bytes_size; elem.rows = num_rows; @@ -495,7 +561,6 @@ try } format->addBuffer(std::move(last_buffer)); - auto insert_query_id = insert_context->getCurrentQueryId(); ProfileEvents::increment(ProfileEvents::AsyncInsertRows, total_rows); auto finish_entries = [&] @@ -533,9 +598,15 @@ try LOG_INFO(log, "Flushed {} rows, {} bytes for query '{}'", total_rows, total_bytes, key.query_str); + + bool pulling_pipeline = false; + /// TODO: Not working. Is the context missing its getProcessListElement ??? + logQueryFinish(query_log_elem, insert_context, key.query, pipeline, pulling_pipeline, query_span, internal); } catch (...) { + bool log_error = true; + logQueryException(query_log_elem, insert_context, start_watch, key.query, query_span, internal, log_error); if (!log_elements.empty()) { auto exception = getCurrentExceptionMessage(false); diff --git a/src/Interpreters/ProcessList.cpp b/src/Interpreters/ProcessList.cpp index aca474bf152..766bdba0cef 100644 --- a/src/Interpreters/ProcessList.cpp +++ b/src/Interpreters/ProcessList.cpp @@ -32,13 +32,13 @@ namespace ErrorCodes /// Should we execute the query even if max_concurrent_queries limit is exhausted -static bool isUnlimitedQuery(const IAST * ast) +static bool isUnlimitedQuery(const IAST * ast, IAST::QueryKind query_kind) { if (!ast) return false; - /// It is KILL QUERY - if (ast->as()) + /// It is KILL QUERY or an async insert flush query + if (ast->as() || query_kind == IAST::QueryKind::AsyncInsertFlush) return true; /// It is SELECT FROM system.processes @@ -65,8 +65,12 @@ static bool isUnlimitedQuery(const IAST * ast) } -ProcessList::EntryPtr -ProcessList::insert(const String & query_, const IAST * ast, ContextMutablePtr query_context, UInt64 watch_start_nanoseconds) +ProcessList::EntryPtr ProcessList::insert( + const String & query_, + const IAST * ast, + ContextMutablePtr query_context, + UInt64 watch_start_nanoseconds, + IAST::QueryKind force_query_kind) { EntryPtr res; @@ -76,11 +80,11 @@ ProcessList::insert(const String & query_, const IAST * ast, ContextMutablePtr q if (client_info.current_query_id.empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Query id cannot be empty"); - bool is_unlimited_query = isUnlimitedQuery(ast); + bool is_unlimited_query = isUnlimitedQuery(ast, force_query_kind); { auto [lock, overcommit_blocker] = safeLock(); // To avoid deadlock in case of OOM - IAST::QueryKind query_kind = ast->getQueryKind(); + IAST::QueryKind query_kind = force_query_kind != IAST::QueryKind::None ? force_query_kind : ast->getQueryKind(); const auto queue_max_wait_ms = settings.queue_max_wait_ms.totalMilliseconds(); if (!is_unlimited_query && max_size && processes.size() >= max_size) diff --git a/src/Interpreters/ProcessList.h b/src/Interpreters/ProcessList.h index b593bcef395..8fb92afc8c8 100644 --- a/src/Interpreters/ProcessList.h +++ b/src/Interpreters/ProcessList.h @@ -392,9 +392,14 @@ public: /** Register running query. Returns refcounted object, that will remove element from list in destructor. * If too many running queries - wait for not more than specified (see settings) amount of time. * If timeout is passed - throw an exception. - * Don't count KILL QUERY queries. + * Don't count KILL QUERY queries or async insert flush queries */ - EntryPtr insert(const String & query_, const IAST * ast, ContextMutablePtr query_context, UInt64 watch_start_nanoseconds); + EntryPtr insert( + const String & query_, + const IAST * ast, + ContextMutablePtr query_context, + UInt64 watch_start_nanoseconds, + IAST::QueryKind force_query_kind = IAST::QueryKind::None); /// Number of currently executing queries. size_t size() const { return processes.size(); } diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 122c8e0570a..c276842ea5e 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -275,7 +275,8 @@ QueryLogElement logQueryStart( const std::unique_ptr & interpreter, bool internal, const String & query_database, - const String & query_table) + const String & query_table, + bool async_insert) { const Settings & settings = context->getSettingsRef(); @@ -292,7 +293,7 @@ QueryLogElement logQueryStart( if (settings.log_formatted_queries) elem.formatted_query = queryToString(query_ast); elem.normalized_query_hash = normalizedQueryHash(query_for_logging); - elem.query_kind = query_ast->getQueryKind(); + elem.query_kind = async_insert ? IAST::QueryKind::AsyncInsertFlush : query_ast->getQueryKind(); elem.client_info = context->getClientInfo(); @@ -316,7 +317,7 @@ QueryLogElement logQueryStart( elem.query_views = info.views; } - if (settings.async_insert) + if (settings.async_insert || async_insert) InterpreterInsertQuery::extendQueryLogElemImpl(elem, context); else if (interpreter) interpreter->extendQueryLogElem(elem, query_ast, context, query_database, query_table); @@ -536,7 +537,8 @@ void logExceptionBeforeStart( ContextPtr context, ASTPtr ast, const std::shared_ptr & query_span, - UInt64 elapsed_millliseconds) + UInt64 elapsed_millliseconds, + bool async_insert) { auto query_end_time = std::chrono::system_clock::now(); @@ -570,6 +572,9 @@ void logExceptionBeforeStart( elem.formatted_query = queryToString(ast); } + if (async_insert) + elem.query_kind = IAST::QueryKind::AsyncInsertFlush; + // We don't calculate databases, tables and columns when the query isn't able to start elem.exception_code = getCurrentExceptionCode(); diff --git a/src/Interpreters/executeQuery.h b/src/Interpreters/executeQuery.h index 3fa76dbfcb1..f1fe6b86b79 100644 --- a/src/Interpreters/executeQuery.h +++ b/src/Interpreters/executeQuery.h @@ -82,7 +82,8 @@ QueryLogElement logQueryStart( const std::unique_ptr & interpreter, bool internal, const String & query_database, - const String & query_table); + const String & query_table, + bool async_insert = false); void logQueryFinish( QueryLogElement & elem, @@ -99,12 +100,14 @@ void logQueryException( const Stopwatch & start_watch, const ASTPtr & query_ast, std::shared_ptr query_span, - bool internal); + bool internal, + bool log_error); void logExceptionBeforeStart( const String & query_for_logging, ContextPtr context, ASTPtr ast, const std::shared_ptr & query_span, - UInt64 elapsed_millliseconds); + UInt64 elapsed_millliseconds, + bool async_insert = false); } diff --git a/src/Parsers/IAST.h b/src/Parsers/IAST.h index aa5302a15b9..7a8ab36518d 100644 --- a/src/Parsers/IAST.h +++ b/src/Parsers/IAST.h @@ -305,6 +305,7 @@ public: Commit, Rollback, SetTransactionSnapshot, + AsyncInsertFlush }; /// Return QueryKind of this AST query. virtual QueryKind getQueryKind() const { return QueryKind::None; } From e60f0e96cc9e59fe20679780ec42d7302536b2af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 19 Jun 2023 17:06:32 +0200 Subject: [PATCH 075/522] Add tests for async insert flush queries in query_log --- ...02790_async_queries_in_query_log.reference | 135 ++++++++++++++++++ .../02790_async_queries_in_query_log.sh | 74 ++++++++++ 2 files changed, 209 insertions(+) create mode 100644 tests/queries/0_stateless/02790_async_queries_in_query_log.reference create mode 100755 tests/queries/0_stateless/02790_async_queries_in_query_log.sh diff --git a/tests/queries/0_stateless/02790_async_queries_in_query_log.reference b/tests/queries/0_stateless/02790_async_queries_in_query_log.reference new file mode 100644 index 00000000000..e202a38f068 --- /dev/null +++ b/tests/queries/0_stateless/02790_async_queries_in_query_log.reference @@ -0,0 +1,135 @@ + +system.query_log +Row 1: +────── +type: QueryStart +read_rows: 0 +read_bytes: 0 +written_rows: 0 +written_bytes: 0 +result_rows: 0 +result_bytes: 0 +query: INSERT INTO default.async_insert_landing SETTINGS wait_for_async_insert = 1, async_insert = 1 FORMAT Values +query_kind: AsyncInsertFlush +databases: ['default'] +tables: ['default.async_insert_landing'] +columns: [] +views: [] +exception_code: 0 + +Row 2: +────── +type: QueryFinish +read_rows: 0 +read_bytes: 0 +written_rows: 4 +written_bytes: 16 +result_rows: 4 +result_bytes: 16 +query: INSERT INTO default.async_insert_landing SETTINGS wait_for_async_insert = 1, async_insert = 1 FORMAT Values +query_kind: AsyncInsertFlush +databases: ['default'] +tables: ['default.async_insert_landing'] +columns: [] +views: [] +exception_code: 0 + +system.query_views_log + +system.query_log +Row 1: +────── +type: QueryStart +read_rows: 0 +read_bytes: 0 +written_rows: 0 +written_bytes: 0 +result_rows: 0 +result_bytes: 0 +query: INSERT INTO default.async_insert_landing SETTINGS wait_for_async_insert = 1, async_insert = 1 FORMAT Values +query_kind: AsyncInsertFlush +databases: ['default'] +tables: ['default.async_insert_landing','default.async_insert_target'] +columns: [] +views: ['default.async_insert_mv'] +exception_code: 0 + +Row 2: +────── +type: QueryFinish +read_rows: 3 +read_bytes: 12 +written_rows: 6 +written_bytes: 12 +result_rows: 6 +result_bytes: 12 +query: INSERT INTO default.async_insert_landing SETTINGS wait_for_async_insert = 1, async_insert = 1 FORMAT Values +query_kind: AsyncInsertFlush +databases: ['default'] +tables: ['default.async_insert_landing','default.async_insert_target'] +columns: ['default.async_insert_landing.id'] +views: ['default.async_insert_mv'] +exception_code: 0 + +system.query_views_log +Row 1: +────── +view_name: default.async_insert_mv +view_type: Materialized +view_query: SELECT id + throwIf(id = 42) FROM default.async_insert_landing +view_target: default.async_insert_target +read_rows: 3 +read_bytes: 12 +written_rows: 3 +written_bytes: 0 +status: QueryFinish +exception_code: 0 + +system.query_log +Row 1: +────── +type: QueryStart +read_rows: 0 +read_bytes: 0 +written_rows: 0 +written_bytes: 0 +result_rows: 0 +result_bytes: 0 +query: INSERT INTO default.async_insert_landing SETTINGS wait_for_async_insert = 1, async_insert = 1 FORMAT Values +query_kind: AsyncInsertFlush +databases: ['default'] +tables: ['default.async_insert_landing','default.async_insert_target'] +columns: [] +views: ['default.async_insert_mv'] +exception_code: 0 + +Row 2: +────── +type: Exc*****onWhileProcessing +read_rows: 3 +read_bytes: 12 +written_rows: 3 +written_bytes: 12 +result_rows: 0 +result_bytes: 0 +query: INSERT INTO default.async_insert_landing SETTINGS wait_for_async_insert = 1, async_insert = 1 FORMAT Values +query_kind: AsyncInsertFlush +databases: ['default'] +tables: ['default.async_insert_landing','default.async_insert_target'] +columns: ['default.async_insert_landing.id'] +views: ['default.async_insert_mv'] +exception_code: 395 + +system.query_views_log +Row 1: +────── +view_name: default.async_insert_mv +view_type: Materialized +view_query: SELECT id + throwIf(id = 42) FROM default.async_insert_landing +view_target: default.async_insert_target +read_rows: 3 +read_bytes: 12 +written_rows: 0 +written_bytes: 0 +status: Exc*****onWhileProcessing +exception_code: 395 diff --git a/tests/queries/0_stateless/02790_async_queries_in_query_log.sh b/tests/queries/0_stateless/02790_async_queries_in_query_log.sh new file mode 100755 index 00000000000..f5553f16e7a --- /dev/null +++ b/tests/queries/0_stateless/02790_async_queries_in_query_log.sh @@ -0,0 +1,74 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +function print_flush_query_logs() +{ + ${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH LOGS" + echo "" + echo "system.query_log" + ${CLICKHOUSE_CLIENT} -q " + SELECT + replace(type::String, 'Exception', 'Exc*****on') as type, + read_rows, + read_bytes, + written_rows, + written_bytes, + result_rows, + result_bytes, + query, + query_kind, + databases, + tables, + columns, + views, + exception_code + FROM system.query_log + WHERE + event_date >= yesterday() + AND initial_query_id = (SELECT flush_query_id FROM system.asynchronous_insert_log WHERE query_id = '$1') + ORDER BY type DESC + FORMAT Vertical" + + echo "" + echo "system.query_views_log" + ${CLICKHOUSE_CLIENT} -q " + SELECT + view_name, + view_type, + view_query, + view_target, + read_rows, + read_bytes, + written_rows, + written_bytes, + replace(status::String, 'Exception', 'Exc*****on') as status, + exception_code + FROM system.query_views_log + WHERE + event_date >= yesterday() + AND initial_query_id = (SELECT flush_query_id FROM system.asynchronous_insert_log WHERE query_id = '$1') + FORMAT Vertical" +} + + +${CLICKHOUSE_CLIENT} -q "CREATE TABLE async_insert_landing (id UInt32) ENGINE = MergeTree ORDER BY id" + +query_id="$(random_str 10)" +${CLICKHOUSE_CLIENT} --query_id="${query_id}" -q "INSERT INTO async_insert_landing SETTINGS wait_for_async_insert=1, async_insert=1 values (1), (2), (3), (4);" +print_flush_query_logs ${query_id} + + +${CLICKHOUSE_CLIENT} -q "CREATE TABLE async_insert_target (id UInt32) ENGINE = MergeTree ORDER BY id" +${CLICKHOUSE_CLIENT} -q "CREATE MATERIALIZED VIEW async_insert_mv TO async_insert_target AS SELECT id + throwIf(id = 42) FROM async_insert_landing" + +query_id="$(random_str 10)" +${CLICKHOUSE_CLIENT} --query_id="${query_id}" -q "INSERT INTO async_insert_landing SETTINGS wait_for_async_insert=1, async_insert=1 values (11), (12), (13);" +print_flush_query_logs ${query_id} + + +query_id="$(random_str 10)" +${CLICKHOUSE_CLIENT} --query_id="${query_id}" -q "INSERT INTO async_insert_landing SETTINGS wait_for_async_insert=1, async_insert=1 values (42), (12), (13)" 2>/dev/null || true +print_flush_query_logs ${query_id} From 5ac7d95bafd61e7720f55ca5d7cd7c8b006ccec5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 19 Jun 2023 17:12:33 +0200 Subject: [PATCH 076/522] Cleanup --- src/Interpreters/AsynchronousInsertQueue.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index 996a92cef6f..590632c77f0 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -600,7 +600,6 @@ try total_rows, total_bytes, key.query_str); bool pulling_pipeline = false; - /// TODO: Not working. Is the context missing its getProcessListElement ??? logQueryFinish(query_log_elem, insert_context, key.query, pipeline, pulling_pipeline, query_span, internal); } catch (...) From 5ee813b8d037e55edea54aead2074658cc7b8aef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 19 Jun 2023 17:37:11 +0200 Subject: [PATCH 077/522] Fix style --- src/Interpreters/AsynchronousInsertQueue.cpp | 2 +- tests/queries/0_stateless/02790_async_queries_in_query_log.sh | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index 590632c77f0..9a4ba0d2793 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -406,7 +406,7 @@ try bool internal = false; // To enable logging this query bool async_insert = true; - /// Disabled query spans. Could be activated by initializating this to a SpanHolder + /// Disabled query spans. Could be activated by initializing this to a SpanHolder std::shared_ptr query_span{nullptr}; /// 'resetParser' doesn't work for parallel parsing. diff --git a/tests/queries/0_stateless/02790_async_queries_in_query_log.sh b/tests/queries/0_stateless/02790_async_queries_in_query_log.sh index f5553f16e7a..d1c8fe122cb 100755 --- a/tests/queries/0_stateless/02790_async_queries_in_query_log.sh +++ b/tests/queries/0_stateless/02790_async_queries_in_query_log.sh @@ -29,6 +29,7 @@ function print_flush_query_logs() WHERE event_date >= yesterday() AND initial_query_id = (SELECT flush_query_id FROM system.asynchronous_insert_log WHERE query_id = '$1') + -- AND current_database = currentDatabase() -- Just to silence style check: this is not ok for this test since the query uses default values ORDER BY type DESC FORMAT Vertical" From 9b899b935158a074e744b6cce095d249c9f437fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Tue, 20 Jun 2023 09:52:11 +0200 Subject: [PATCH 078/522] Fix async insert logs with setting on mergetree --- src/Interpreters/executeQuery.cpp | 17 +++++++++++++---- src/Interpreters/executeQuery.h | 4 ++-- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index c276842ea5e..77c8a6f1bf4 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -317,7 +317,7 @@ QueryLogElement logQueryStart( elem.query_views = info.views; } - if (settings.async_insert || async_insert) + if (async_insert) InterpreterInsertQuery::extendQueryLogElemImpl(elem, context); else if (interpreter) interpreter->extendQueryLogElem(elem, query_ast, context, query_database, query_table); @@ -538,7 +538,7 @@ void logExceptionBeforeStart( ASTPtr ast, const std::shared_ptr & query_span, UInt64 elapsed_millliseconds, - bool async_insert) + bool async_insert_flush) { auto query_end_time = std::chrono::system_clock::now(); @@ -572,7 +572,7 @@ void logExceptionBeforeStart( elem.formatted_query = queryToString(ast); } - if (async_insert) + if (async_insert_flush) elem.query_kind = IAST::QueryKind::AsyncInsertFlush; // We don't calculate databases, tables and columns when the query isn't able to start @@ -1139,7 +1139,16 @@ static std::tuple executeQueryImpl( /// Everything related to query log. { QueryLogElement elem = logQueryStart( - query_start_time, context, query_for_logging, ast, pipeline, interpreter, internal, query_database, query_table); + query_start_time, + context, + query_for_logging, + ast, + pipeline, + interpreter, + internal, + query_database, + query_table, + async_insert); /// Also make possible for caller to log successful query finish and exception during execution. auto finish_callback = [elem, context, diff --git a/src/Interpreters/executeQuery.h b/src/Interpreters/executeQuery.h index f1fe6b86b79..94b106bb5d8 100644 --- a/src/Interpreters/executeQuery.h +++ b/src/Interpreters/executeQuery.h @@ -83,7 +83,7 @@ QueryLogElement logQueryStart( bool internal, const String & query_database, const String & query_table, - bool async_insert = false); + bool async_insert); void logQueryFinish( QueryLogElement & elem, @@ -109,5 +109,5 @@ void logExceptionBeforeStart( ASTPtr ast, const std::shared_ptr & query_span, UInt64 elapsed_millliseconds, - bool async_insert = false); + bool async_insert_flush = false); } From b1cedf0e42548011e95c3ec09e8245fb44d3e1ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Tue, 20 Jun 2023 11:37:56 +0200 Subject: [PATCH 079/522] Simplify handling of AsyncInsertFlush --- src/Interpreters/AsynchronousInsertQueue.cpp | 7 ++++--- src/Interpreters/ProcessList.cpp | 16 ++++++---------- src/Interpreters/ProcessList.h | 7 +------ src/Interpreters/executeQuery.cpp | 8 ++------ src/Interpreters/executeQuery.h | 3 +-- src/Parsers/ASTInsertQuery.h | 4 +++- 6 files changed, 17 insertions(+), 28 deletions(-) diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index 9a4ba0d2793..eae9c925bc2 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -206,6 +206,7 @@ AsynchronousInsertQueue::push(ASTPtr query, ContextPtr query_context) query = query->clone(); const auto & settings = query_context->getSettingsRef(); auto & insert_query = query->as(); + insert_query.async_insert_flush = true; InterpreterInsertQuery interpreter(query, query_context, settings.insert_allow_materialized_columns); auto table = interpreter.getTable(insert_query); @@ -435,8 +436,8 @@ try /// a) it appears in system.processes /// b) can be cancelled if we want to /// c) has an associated process list element where runtime metrics are stored - auto process_list_entry = insert_context->getProcessList().insert( - query_for_logging, key.query.get(), insert_context, start_watch.getStart(), IAST::QueryKind::AsyncInsertFlush); + auto process_list_entry + = insert_context->getProcessList().insert(query_for_logging, key.query.get(), insert_context, start_watch.getStart()); auto query_status = process_list_entry->getQueryStatus(); insert_context->setProcessListElement(std::move(query_status)); @@ -473,7 +474,7 @@ try } catch (...) { - logExceptionBeforeStart(query_for_logging, insert_context, key.query, query_span, start_watch.elapsedMilliseconds(), async_insert); + logExceptionBeforeStart(query_for_logging, insert_context, key.query, query_span, start_watch.elapsedMilliseconds()); throw; } diff --git a/src/Interpreters/ProcessList.cpp b/src/Interpreters/ProcessList.cpp index 766bdba0cef..dfac92ecc1e 100644 --- a/src/Interpreters/ProcessList.cpp +++ b/src/Interpreters/ProcessList.cpp @@ -32,13 +32,13 @@ namespace ErrorCodes /// Should we execute the query even if max_concurrent_queries limit is exhausted -static bool isUnlimitedQuery(const IAST * ast, IAST::QueryKind query_kind) +static bool isUnlimitedQuery(const IAST * ast) { if (!ast) return false; /// It is KILL QUERY or an async insert flush query - if (ast->as() || query_kind == IAST::QueryKind::AsyncInsertFlush) + if (ast->as() || ast->getQueryKind() == IAST::QueryKind::AsyncInsertFlush) return true; /// It is SELECT FROM system.processes @@ -65,12 +65,8 @@ static bool isUnlimitedQuery(const IAST * ast, IAST::QueryKind query_kind) } -ProcessList::EntryPtr ProcessList::insert( - const String & query_, - const IAST * ast, - ContextMutablePtr query_context, - UInt64 watch_start_nanoseconds, - IAST::QueryKind force_query_kind) +ProcessList::EntryPtr +ProcessList::insert(const String & query_, const IAST * ast, ContextMutablePtr query_context, UInt64 watch_start_nanoseconds) { EntryPtr res; @@ -80,11 +76,11 @@ ProcessList::EntryPtr ProcessList::insert( if (client_info.current_query_id.empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Query id cannot be empty"); - bool is_unlimited_query = isUnlimitedQuery(ast, force_query_kind); + bool is_unlimited_query = isUnlimitedQuery(ast); { auto [lock, overcommit_blocker] = safeLock(); // To avoid deadlock in case of OOM - IAST::QueryKind query_kind = force_query_kind != IAST::QueryKind::None ? force_query_kind : ast->getQueryKind(); + IAST::QueryKind query_kind = ast->getQueryKind(); const auto queue_max_wait_ms = settings.queue_max_wait_ms.totalMilliseconds(); if (!is_unlimited_query && max_size && processes.size() >= max_size) diff --git a/src/Interpreters/ProcessList.h b/src/Interpreters/ProcessList.h index 8fb92afc8c8..829457ed578 100644 --- a/src/Interpreters/ProcessList.h +++ b/src/Interpreters/ProcessList.h @@ -394,12 +394,7 @@ public: * If timeout is passed - throw an exception. * Don't count KILL QUERY queries or async insert flush queries */ - EntryPtr insert( - const String & query_, - const IAST * ast, - ContextMutablePtr query_context, - UInt64 watch_start_nanoseconds, - IAST::QueryKind force_query_kind = IAST::QueryKind::None); + EntryPtr insert(const String & query_, const IAST * ast, ContextMutablePtr query_context, UInt64 watch_start_nanoseconds); /// Number of currently executing queries. size_t size() const { return processes.size(); } diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 77c8a6f1bf4..09aa0b7900b 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -293,7 +293,7 @@ QueryLogElement logQueryStart( if (settings.log_formatted_queries) elem.formatted_query = queryToString(query_ast); elem.normalized_query_hash = normalizedQueryHash(query_for_logging); - elem.query_kind = async_insert ? IAST::QueryKind::AsyncInsertFlush : query_ast->getQueryKind(); + elem.query_kind = query_ast->getQueryKind(); elem.client_info = context->getClientInfo(); @@ -537,8 +537,7 @@ void logExceptionBeforeStart( ContextPtr context, ASTPtr ast, const std::shared_ptr & query_span, - UInt64 elapsed_millliseconds, - bool async_insert_flush) + UInt64 elapsed_millliseconds) { auto query_end_time = std::chrono::system_clock::now(); @@ -572,9 +571,6 @@ void logExceptionBeforeStart( elem.formatted_query = queryToString(ast); } - if (async_insert_flush) - elem.query_kind = IAST::QueryKind::AsyncInsertFlush; - // We don't calculate databases, tables and columns when the query isn't able to start elem.exception_code = getCurrentExceptionCode(); diff --git a/src/Interpreters/executeQuery.h b/src/Interpreters/executeQuery.h index 94b106bb5d8..53624f8c812 100644 --- a/src/Interpreters/executeQuery.h +++ b/src/Interpreters/executeQuery.h @@ -108,6 +108,5 @@ void logExceptionBeforeStart( ContextPtr context, ASTPtr ast, const std::shared_ptr & query_span, - UInt64 elapsed_millliseconds, - bool async_insert_flush = false); + UInt64 elapsed_millliseconds); } diff --git a/src/Parsers/ASTInsertQuery.h b/src/Parsers/ASTInsertQuery.h index 43780e27114..45fd3d97950 100644 --- a/src/Parsers/ASTInsertQuery.h +++ b/src/Parsers/ASTInsertQuery.h @@ -35,6 +35,8 @@ public: /// Data from buffer to insert after inlined one - may be nullptr. ReadBuffer * tail = nullptr; + bool async_insert_flush = false; + String getDatabase() const; String getTable() const; @@ -66,7 +68,7 @@ public: return res; } - QueryKind getQueryKind() const override { return QueryKind::Insert; } + QueryKind getQueryKind() const override { return async_insert_flush ? QueryKind::AsyncInsertFlush : QueryKind::Insert; } protected: void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; From c3e56a5214fae7031e73381bc8e26850203dbec2 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Tue, 20 Jun 2023 15:20:21 +0200 Subject: [PATCH 080/522] Pass ENV into inner docker run --- tests/integration/ci-runner.py | 5 +++++ tests/integration/runner | 15 ++++++++++++++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/tests/integration/ci-runner.py b/tests/integration/ci-runner.py index d6d17abe725..59c3c82499c 100755 --- a/tests/integration/ci-runner.py +++ b/tests/integration/ci-runner.py @@ -239,6 +239,8 @@ class ClickhouseIntegrationTestsRunner: self.start_time = time.time() self.soft_deadline_time = self.start_time + (TASK_TIMEOUT - MAX_TIME_IN_SANDBOX) + self.use_analyzer = os.environ.get("CLICKHOUSE_USE_NEW_ANALYZER") is not None + if "run_by_hash_total" in self.params: self.run_by_hash_total = self.params["run_by_hash_total"] self.run_by_hash_num = self.params["run_by_hash_num"] @@ -398,6 +400,9 @@ class ClickhouseIntegrationTestsRunner: result.append("--tmpfs") if self.disable_net_host: result.append("--disable-net-host") + if self.use_analyzer: + result.append("--analyzer") + return " ".join(result) def _get_all_tests(self, repo_path): diff --git a/tests/integration/runner b/tests/integration/runner index f658bac412b..a62195504d5 100755 --- a/tests/integration/runner +++ b/tests/integration/runner @@ -283,6 +283,14 @@ if __name__ == "__main__": help="Use tmpfs for dockerd files", ) + parser.add_argument( + "--analyzer", + action="store_true", + default=False, + dest="analyzer", + help="Use new analyzer infrastructure", + ) + parser.add_argument( "--cleanup-containers", action="store_true", @@ -393,6 +401,10 @@ if __name__ == "__main__": if args.keyword_expression: args.pytest_args += ["-k", args.keyword_expression] + use_analyzer = "" + if args.analyzer: + use_analyzer = "-e CLICKHOUSE_USE_NEW_ANALYZER=1" + cmd_base = "docker run {net} {tty} --rm --name {name} --privileged \ --volume={odbc_bridge_bin}:/clickhouse-odbc-bridge --volume={bin}:/clickhouse \ --volume={library_bridge_bin}:/clickhouse-library-bridge \ @@ -400,7 +412,7 @@ if __name__ == "__main__": --volume={src_dir}/Server/grpc_protos:/ClickHouse/src/Server/grpc_protos \ --volume=/run:/run/host:ro \ {dockerd_internal_volume} -e DOCKER_CLIENT_TIMEOUT=300 -e COMPOSE_HTTP_TIMEOUT=600 \ - -e XTABLES_LOCKFILE=/run/host/xtables.lock \ + -e XTABLES_LOCKFILE=/run/host/xtables.lock {use_analyzer_var}\ -e PYTHONUNBUFFERED=1 \ {env_tags} {env_cleanup} -e PYTEST_OPTS='{parallel} {opts} {tests_list} {rand} -vvv' {img}".format( net=net, @@ -420,6 +432,7 @@ if __name__ == "__main__": dockerd_internal_volume=dockerd_internal_volume, img=DIND_INTEGRATION_TESTS_IMAGE_NAME + ":" + args.docker_image_version, name=CONTAINER_NAME, + use_analyzer_var=use_analyzer, ) cmd = cmd_base + " " + args.command From 60a0843f58b311d6fccb827cf2f2d3261ef92ecf Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 20 Jun 2023 17:47:25 +0200 Subject: [PATCH 081/522] fix --- src/Backups/tests/gtest_backup_entries.cpp | 4 ++-- src/Disks/DiskEncrypted.cpp | 12 ++++++++++++ src/Disks/DiskEncrypted.h | 1 + src/Disks/DiskLocal.cpp | 11 ++++++++++- src/Disks/DiskLocal.h | 2 ++ src/Disks/IDisk.h | 8 +++++++- .../tests/gtest_cascade_and_memory_write_buffer.cpp | 2 +- src/Disks/tests/gtest_disk.cpp | 2 +- src/Disks/tests/gtest_disk_encrypted.cpp | 4 ++-- 9 files changed, 38 insertions(+), 8 deletions(-) diff --git a/src/Backups/tests/gtest_backup_entries.cpp b/src/Backups/tests/gtest_backup_entries.cpp index ca603d20787..75972b35ba4 100644 --- a/src/Backups/tests/gtest_backup_entries.cpp +++ b/src/Backups/tests/gtest_backup_entries.cpp @@ -24,7 +24,7 @@ protected: /// Make local disk. temp_dir = std::make_unique(); temp_dir->createDirectories(); - local_disk = std::make_shared("local_disk", temp_dir->path() + "/", 0); + local_disk = std::make_shared("local_disk", temp_dir->path() + "/"); /// Make encrypted disk. auto settings = std::make_unique(); @@ -38,7 +38,7 @@ protected: settings->current_key = key; settings->current_key_fingerprint = fingerprint; - encrypted_disk = std::make_shared("encrypted_disk", std::move(settings), true); + encrypted_disk = std::make_shared("encrypted_disk", std::move(settings)); } void TearDown() override diff --git a/src/Disks/DiskEncrypted.cpp b/src/Disks/DiskEncrypted.cpp index bdc66ace7b3..677dd73cc00 100644 --- a/src/Disks/DiskEncrypted.cpp +++ b/src/Disks/DiskEncrypted.cpp @@ -303,6 +303,18 @@ DiskEncrypted::DiskEncrypted(const String & name_, std::unique_ptrcreateDirectories(disk_path); } +DiskEncrypted::DiskEncrypted(const String & name_, std::unique_ptr settings_) + : IDisk(name_) + , delegate(settings_->wrapped_disk) + , encrypted_name(name_) + , disk_path(settings_->disk_path) + , disk_absolute_path(settings_->wrapped_disk->getPath() + settings_->disk_path) + , current_settings(std::move(settings_)) + , use_fake_transaction(true) +{ + delegate->createDirectories(disk_path); +} + ReservationPtr DiskEncrypted::reserve(UInt64 bytes) { auto reservation = delegate->reserve(bytes); diff --git a/src/Disks/DiskEncrypted.h b/src/Disks/DiskEncrypted.h index ab5b7425f69..9963770bd1c 100644 --- a/src/Disks/DiskEncrypted.h +++ b/src/Disks/DiskEncrypted.h @@ -24,6 +24,7 @@ public: DiskEncrypted(const String & name_, const Poco::Util::AbstractConfiguration & config_, const String & config_prefix_, const DisksMap & map_); DiskEncrypted(const String & name_, std::unique_ptr settings_, const Poco::Util::AbstractConfiguration & config_, const String & config_prefix_); + DiskEncrypted(const String & name_, std::unique_ptr settings_); const String & getName() const override { return encrypted_name; } const String & getPath() const override { return disk_absolute_path; } diff --git a/src/Disks/DiskLocal.cpp b/src/Disks/DiskLocal.cpp index 504e35abac7..9a61c176cf6 100644 --- a/src/Disks/DiskLocal.cpp +++ b/src/Disks/DiskLocal.cpp @@ -420,7 +420,7 @@ bool inline isSameDiskType(const IDisk & one, const IDisk & another) void DiskLocal::copyDirectoryContent(const String & from_dir, const std::shared_ptr & to_disk, const String & to_dir) { if (isSameDiskType(*this, *to_disk)) - fs::copy(from_dir, to_dir, fs::copy_options::recursive | fs::copy_options::overwrite_existing); /// Use more optimal way. + fs::copy(fs::path(disk_path) / from_dir, fs::path(to_disk->getPath()) / to_dir, fs::copy_options::recursive | fs::copy_options::overwrite_existing); /// Use more optimal way. else IDisk::copyDirectoryContent(from_dir, to_disk, to_dir); } @@ -467,6 +467,15 @@ DiskLocal::DiskLocal( disk_checker = std::make_unique(this, context, local_disk_check_period_ms); } +DiskLocal::DiskLocal(const String & name_, const String & path_) + : IDisk(name_) + , disk_path(path_) + , keep_free_space_bytes(0) + , logger(&Poco::Logger::get("DiskLocal")) + , data_source_description(getLocalDataSourceDescription(disk_path)) +{ +} + DataSourceDescription DiskLocal::getDataSourceDescription() const { return data_source_description; diff --git a/src/Disks/DiskLocal.h b/src/Disks/DiskLocal.h index 2306deeb619..b30732b67fd 100644 --- a/src/Disks/DiskLocal.h +++ b/src/Disks/DiskLocal.h @@ -29,6 +29,8 @@ public: const Poco::Util::AbstractConfiguration & config, const String & config_prefix); + DiskLocal(const String & name_, const String & path_); + const String & getPath() const override { return disk_path; } ReservationPtr reserve(UInt64 bytes) override; diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h index f935933bad7..ccef3db2dac 100644 --- a/src/Disks/IDisk.h +++ b/src/Disks/IDisk.h @@ -115,12 +115,18 @@ class IDisk : public Space { public: /// Default constructor. - explicit IDisk(const String & name_, const Poco::Util::AbstractConfiguration & config, const String & config_prefix) + IDisk(const String & name_, const Poco::Util::AbstractConfiguration & config, const String & config_prefix) : name(name_) , copying_thread_pool(CurrentMetrics::IDiskCopierThreads, CurrentMetrics::IDiskCopierThreadsActive, config.getUInt(config_prefix + ".thread_pool_size", 16)) { } + explicit IDisk(const String & name_) + : name(name_) + , copying_thread_pool(CurrentMetrics::IDiskCopierThreads, CurrentMetrics::IDiskCopierThreadsActive, 16) + { + } + /// This is a disk. bool isDisk() const override { return true; } diff --git a/src/Disks/tests/gtest_cascade_and_memory_write_buffer.cpp b/src/Disks/tests/gtest_cascade_and_memory_write_buffer.cpp index a24056a141f..482a38d8fc2 100644 --- a/src/Disks/tests/gtest_cascade_and_memory_write_buffer.cpp +++ b/src/Disks/tests/gtest_cascade_and_memory_write_buffer.cpp @@ -33,7 +33,7 @@ public: void SetUp() override { fs::create_directories(tmp_root); - disk = std::make_shared("local_disk", tmp_root, 0); + disk = std::make_shared("local_disk", tmp_root); } void TearDown() override diff --git a/src/Disks/tests/gtest_disk.cpp b/src/Disks/tests/gtest_disk.cpp index 1f33f536399..d57ca7bd81b 100644 --- a/src/Disks/tests/gtest_disk.cpp +++ b/src/Disks/tests/gtest_disk.cpp @@ -10,7 +10,7 @@ namespace fs = std::filesystem; DB::DiskPtr createDisk() { fs::create_directory("tmp/"); - return std::make_shared("local_disk", "tmp/", 0); + return std::make_shared("local_disk", "tmp/"); } void destroyDisk(DB::DiskPtr & disk) diff --git a/src/Disks/tests/gtest_disk_encrypted.cpp b/src/Disks/tests/gtest_disk_encrypted.cpp index ee9e284d409..b61b6140b0c 100644 --- a/src/Disks/tests/gtest_disk_encrypted.cpp +++ b/src/Disks/tests/gtest_disk_encrypted.cpp @@ -23,7 +23,7 @@ protected: /// Make local disk. temp_dir = std::make_unique(); temp_dir->createDirectories(); - local_disk = std::make_shared("local_disk", getDirectory(), 0); + local_disk = std::make_shared("local_disk", getDirectory()); } void TearDown() override @@ -42,7 +42,7 @@ protected: settings->current_key = key; settings->current_key_fingerprint = fingerprint; settings->disk_path = path; - encrypted_disk = std::make_shared("encrypted_disk", std::move(settings), true); + encrypted_disk = std::make_shared("encrypted_disk", std::move(settings)); } String getFileNames() From 876d5ae0a71dff7724bd665076a2d681a651829e Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Tue, 20 Jun 2023 20:27:43 +0200 Subject: [PATCH 082/522] fix ReadBufferFromS3 --- src/IO/ReadBufferFromS3.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/IO/ReadBufferFromS3.cpp b/src/IO/ReadBufferFromS3.cpp index 0b320ed86ff..fdbe1a4ba57 100644 --- a/src/IO/ReadBufferFromS3.cpp +++ b/src/IO/ReadBufferFromS3.cpp @@ -1,6 +1,4 @@ #include -#include -#include #include "config.h" #if USE_AWS_S3 From cda37f4d733e1cc0a86f7e211b18e3d540640f3e Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Tue, 20 Jun 2023 18:31:04 +0000 Subject: [PATCH 083/522] Fix negative support for date_diff/age(), fix whole int TransformDateTime64 --- src/Core/DecimalFunctions.h | 6 +- src/Functions/DateTimeTransforms.h | 24 ++++--- src/Functions/TransformDateTime64.h | 5 +- src/Functions/dateDiff.cpp | 67 +++++++++---------- src/Functions/toStartOfInterval.cpp | 1 - ...0479_date_and_datetime_to_number.reference | 1 + .../00479_date_and_datetime_to_number.sql | 1 + .../02160_special_functions.reference | 2 + .../0_stateless/02160_special_functions.sql | 3 + .../02477_age_datetime64.reference | 9 +++ .../0_stateless/02477_age_datetime64.sql | 8 ++- 11 files changed, 78 insertions(+), 49 deletions(-) diff --git a/src/Core/DecimalFunctions.h b/src/Core/DecimalFunctions.h index 9b6b1e87e5f..6d855498f6c 100644 --- a/src/Core/DecimalFunctions.h +++ b/src/Core/DecimalFunctions.h @@ -48,7 +48,11 @@ constexpr inline auto scaleMultiplier(UInt32 scale) /** Components of DecimalX value: * whole - represents whole part of decimal, can be negative or positive. - * fractional - for fractional part of decimal, always positive. + * fractional - for fractional part of decimal. + * + * 0.123 represent 0 / 0.123 + * -0.123 represent 0 / -0.123 + * -1.123 represent -1 / 0.123 */ template struct DecimalComponents diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index 192efb9820d..a0b649bbd9b 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -1382,7 +1382,7 @@ struct ToRelativeSubsecondNumImpl { static constexpr auto name = "toRelativeSubsecondNumImpl"; - static inline UInt64 execute(const DateTime64 & t, DateTime64::NativeType scale, const DateLUTImpl &) + static inline Int64 execute(const DateTime64 & t, DateTime64::NativeType scale, const DateLUTImpl &) { if (scale == scale_multiplier) return t.value; @@ -1390,17 +1390,17 @@ struct ToRelativeSubsecondNumImpl return t.value / (scale / scale_multiplier); return t.value * (scale_multiplier / scale); } - static inline UInt64 execute(UInt32 t, const DateLUTImpl &) + static inline Int64 execute(UInt32 t, const DateLUTImpl &) { return t * scale_multiplier; } - static inline UInt64 execute(Int32 d, const DateLUTImpl & time_zone) + static inline Int64 execute(Int32 d, const DateLUTImpl & time_zone) { - return static_cast(time_zone.fromDayNum(ExtendedDayNum(d))) * scale_multiplier; + return static_cast(time_zone.fromDayNum(ExtendedDayNum(d))) * scale_multiplier; } - static inline UInt64 execute(UInt16 d, const DateLUTImpl & time_zone) + static inline Int64 execute(UInt16 d, const DateLUTImpl & time_zone) { - return static_cast(time_zone.fromDayNum(DayNum(d)) * scale_multiplier); + return static_cast(time_zone.fromDayNum(DayNum(d)) * scale_multiplier); } using FactorTransform = ZeroTransform; @@ -1505,9 +1505,8 @@ struct ToYYYYMMDDhhmmssImpl using FactorTransform = ZeroTransform; }; -struct DateTimeComponentsWithFractionalPart +struct DateTimeComponentsWithFractionalPart : public DateLUTImpl::DateTimeComponents { - DateLUTImpl::DateTimeComponents datetime; UInt16 millisecond = 0; UInt16 microsecond = 0; }; @@ -1518,10 +1517,15 @@ struct ToDateTimeComponentsImpl static inline DateTimeComponentsWithFractionalPart execute(const DateTime64 & t, DateTime64::NativeType scale_multiplier, const DateLUTImpl & time_zone) { - const auto components = DecimalUtils::splitWithScaleMultiplier(t, scale_multiplier); + auto components = DecimalUtils::splitWithScaleMultiplier(t, scale_multiplier); const auto multiplier = DecimalUtils::scaleMultiplier(6); - Int64 fractional = components.fractional; + if (t.value < 0 && components.fractional) + { + components.fractional = scale_multiplier + (components.whole ? Int64(-1) : Int64(1)) * components.fractional; + --components.whole; + } + Int64 fractional = components.fractional; if (scale_multiplier > multiplier) fractional = fractional / (scale_multiplier / multiplier); else if (scale_multiplier < multiplier) diff --git a/src/Functions/TransformDateTime64.h b/src/Functions/TransformDateTime64.h index 1a1e732ae40..fcee2753066 100644 --- a/src/Functions/TransformDateTime64.h +++ b/src/Functions/TransformDateTime64.h @@ -80,7 +80,10 @@ public: } else { - const auto components = DecimalUtils::splitWithScaleMultiplier(t, scale_multiplier); + auto components = DecimalUtils::splitWithScaleMultiplier(t, scale_multiplier); + if (t.value < 0 && components.fractional) + --components.whole; + return wrapped_transform.execute(static_cast(components.whole), std::forward(args)...); } } diff --git a/src/Functions/dateDiff.cpp b/src/Functions/dateDiff.cpp index c8ee899d8fb..e41fe91818a 100644 --- a/src/Functions/dateDiff.cpp +++ b/src/Functions/dateDiff.cpp @@ -193,42 +193,39 @@ public: b_comp = TransformDateTime64(transform_x.getScaleMultiplier()).execute(x, timezone_x); adjust_value = 1; } - const auto & a_date = a_comp.datetime.date; - const auto & b_date = b_comp.datetime.date; - const auto & a_time = a_comp.datetime.time; - const auto & b_time = b_comp.datetime.time; + if constexpr (std::is_same_v>>) { - if ((a_date.month > b_date.month) - || ((a_date.month == b_date.month) && ((a_date.day > b_date.day) - || ((a_date.day == b_date.day) && ((a_time.hour > b_time.hour) - || ((a_time.hour == b_time.hour) && ((a_time.minute > b_time.minute) - || ((a_time.minute == b_time.minute) && ((a_time.second > b_time.second) - || ((a_time.second == b_time.second) && ((a_comp.millisecond > b_comp.millisecond) + if ((a_comp.date.month > b_comp.date.month) + || ((a_comp.date.month == b_comp.date.month) && ((a_comp.date.day > b_comp.date.day) + || ((a_comp.date.day == b_comp.date.day) && ((a_comp.time.hour > b_comp.time.hour) + || ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute) + || ((a_comp.time.minute == b_comp.time.minute) && ((a_comp.time.second > b_comp.time.second) + || ((a_comp.time.second == b_comp.time.second) && ((a_comp.millisecond > b_comp.millisecond) || ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond))))))))))))) res += adjust_value; } else if constexpr (std::is_same_v>>) { - auto x_month_in_quarter = (a_date.month - 1) % 3; - auto y_month_in_quarter = (b_date.month - 1) % 3; + auto x_month_in_quarter = (a_comp.date.month - 1) % 3; + auto y_month_in_quarter = (b_comp.date.month - 1) % 3; if ((x_month_in_quarter > y_month_in_quarter) - || ((x_month_in_quarter == y_month_in_quarter) && ((a_date.day > b_date.day) - || ((a_date.day == b_date.day) && ((a_time.hour > b_time.hour) - || ((a_time.hour == b_time.hour) && ((a_time.minute > b_time.minute) - || ((a_time.minute == b_time.minute) && ((a_time.second > b_time.second) - || ((a_time.second == b_time.second) && ((a_comp.millisecond > b_comp.millisecond) + || ((x_month_in_quarter == y_month_in_quarter) && ((a_comp.date.day > b_comp.date.day) + || ((a_comp.date.day == b_comp.date.day) && ((a_comp.time.hour > b_comp.time.hour) + || ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute) + || ((a_comp.time.minute == b_comp.time.minute) && ((a_comp.time.second > b_comp.time.second) + || ((a_comp.time.second == b_comp.time.second) && ((a_comp.millisecond > b_comp.millisecond) || ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond))))))))))))) res += adjust_value; } else if constexpr (std::is_same_v>>) { - if ((a_date.day > b_date.day) - || ((a_date.day == b_date.day) && ((a_time.hour > b_time.hour) - || ((a_time.hour == b_time.hour) && ((a_time.minute > b_time.minute) - || ((a_time.minute == b_time.minute) && ((a_time.second > b_time.second) - || ((a_time.second == b_time.second) && ((a_comp.millisecond > b_comp.millisecond) + if ((a_comp.date.day > b_comp.date.day) + || ((a_comp.date.day == b_comp.date.day) && ((a_comp.time.hour > b_comp.time.hour) + || ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute) + || ((a_comp.time.minute == b_comp.time.minute) && ((a_comp.time.second > b_comp.time.second) + || ((a_comp.time.second == b_comp.time.second) && ((a_comp.millisecond > b_comp.millisecond) || ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond))))))))))) res += adjust_value; } @@ -237,34 +234,34 @@ public: auto x_day_of_week = TransformDateTime64(transform_x.getScaleMultiplier()).execute(x, 0, timezone_x); auto y_day_of_week = TransformDateTime64(transform_y.getScaleMultiplier()).execute(y, 0, timezone_y); if ((x_day_of_week > y_day_of_week) - || ((x_day_of_week == y_day_of_week) && (a_time.hour > b_time.hour)) - || ((a_time.hour == b_time.hour) && ((a_time.minute > b_time.minute) - || ((a_time.minute == b_time.minute) && ((a_time.second > b_time.second) - || ((a_time.second == b_time.second) && ((a_comp.millisecond > b_comp.millisecond) + || ((x_day_of_week == y_day_of_week) && (a_comp.time.hour > b_comp.time.hour)) + || ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute) + || ((a_comp.time.minute == b_comp.time.minute) && ((a_comp.time.second > b_comp.time.second) + || ((a_comp.time.second == b_comp.time.second) && ((a_comp.millisecond > b_comp.millisecond) || ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond))))))))) res += adjust_value; } else if constexpr (std::is_same_v>>) { - if ((a_time.hour > b_time.hour) - || ((a_time.hour == b_time.hour) && ((a_time.minute > b_time.minute) - || ((a_time.minute == b_time.minute) && ((a_time.second > b_time.second) - || ((a_time.second == b_time.second) && ((a_comp.millisecond > b_comp.millisecond) + if ((a_comp.time.hour > b_comp.time.hour) + || ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute) + || ((a_comp.time.minute == b_comp.time.minute) && ((a_comp.time.second > b_comp.time.second) + || ((a_comp.time.second == b_comp.time.second) && ((a_comp.millisecond > b_comp.millisecond) || ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond))))))))) res += adjust_value; } else if constexpr (std::is_same_v>>) { - if ((a_time.minute > b_time.minute) - || ((a_time.minute == b_time.minute) && ((a_time.second > b_time.second) - || ((a_time.second == b_time.second) && ((a_comp.millisecond > b_comp.millisecond) + if ((a_comp.time.minute > b_comp.time.minute) + || ((a_comp.time.minute == b_comp.time.minute) && ((a_comp.time.second > b_comp.time.second) + || ((a_comp.time.second == b_comp.time.second) && ((a_comp.millisecond > b_comp.millisecond) || ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond))))))) res += adjust_value; } else if constexpr (std::is_same_v>>) { - if ((a_time.second > b_time.second) - || ((a_time.second == b_time.second) && ((a_comp.millisecond > b_comp.millisecond) + if ((a_comp.time.second > b_comp.time.second) + || ((a_comp.time.second == b_comp.time.second) && ((a_comp.millisecond > b_comp.millisecond) || ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond))))) res += adjust_value; } diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index 649242d0d86..48bf88cb14c 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -10,7 +10,6 @@ #include #include #include -#include #include diff --git a/tests/queries/0_stateless/00479_date_and_datetime_to_number.reference b/tests/queries/0_stateless/00479_date_and_datetime_to_number.reference index 1375ccb1542..168b733d702 100644 --- a/tests/queries/0_stateless/00479_date_and_datetime_to_number.reference +++ b/tests/queries/0_stateless/00479_date_and_datetime_to_number.reference @@ -4,3 +4,4 @@ 201707 20170721 20170721112233 +19691231235959 diff --git a/tests/queries/0_stateless/00479_date_and_datetime_to_number.sql b/tests/queries/0_stateless/00479_date_and_datetime_to_number.sql index 71151690028..1e35e99a802 100644 --- a/tests/queries/0_stateless/00479_date_and_datetime_to_number.sql +++ b/tests/queries/0_stateless/00479_date_and_datetime_to_number.sql @@ -4,3 +4,4 @@ SELECT toYYYYMMDDhhmmss(toDate('2017-07-21')); SELECT toYYYYMM(toDateTime('2017-07-21T11:22:33')); SELECT toYYYYMMDD(toDateTime('2017-07-21T11:22:33')); SELECT toYYYYMMDDhhmmss(toDateTime('2017-07-21T11:22:33')); +SELECT toYYYYMMDDhhmmss(toDateTime64('1969-12-31 23:59:59.900', 3)); diff --git a/tests/queries/0_stateless/02160_special_functions.reference b/tests/queries/0_stateless/02160_special_functions.reference index 37278d4b5dc..5e7e3383d8d 100644 --- a/tests/queries/0_stateless/02160_special_functions.reference +++ b/tests/queries/0_stateless/02160_special_functions.reference @@ -41,4 +41,6 @@ Hello 701 800 60200201 +60 +10 1 diff --git a/tests/queries/0_stateless/02160_special_functions.sql b/tests/queries/0_stateless/02160_special_functions.sql index 6002f793601..64919536be3 100644 --- a/tests/queries/0_stateless/02160_special_functions.sql +++ b/tests/queries/0_stateless/02160_special_functions.sql @@ -50,4 +50,7 @@ SELECT DATEDIFF(millisecond, '2021-01-01 23:59:59.299999'::DateTime64(6), '2021- SELECT DATEDIFF(millisecond, '2021-01-01 23:59:59.2'::DateTime64(1), '2021-01-02'::Date); SELECT DATEDIFF(microsecond, '2021-01-01 23:59:59.899999'::DateTime64(6), '2021-01-02 00:01:00.100200300'::DateTime64(9)); +SELECT DATEDIFF(microsecond, '1969-12-31 23:59:59.999950'::DateTime64(6), '1970-01-01 00:00:00.000010'::DateTime64(6)); +SELECT DATEDIFF(second, '1969-12-31 23:59:59.123'::DateTime64(6), '1970-01-01 00:00:09.123'::DateTime64(6)); + SELECT EXISTS (SELECT 1); diff --git a/tests/queries/0_stateless/02477_age_datetime64.reference b/tests/queries/0_stateless/02477_age_datetime64.reference index b732794eef7..e2ac97cbcd9 100644 --- a/tests/queries/0_stateless/02477_age_datetime64.reference +++ b/tests/queries/0_stateless/02477_age_datetime64.reference @@ -208,3 +208,12 @@ SELECT age('year', toDateTime64('2015-02-02 20:30:36.200', 3, 'UTC'), toDateTime 7 SELECT age('year', toDateTime64('2015-02-02 20:30:36.200101', 6, 'UTC'), toDateTime64('2023-02-02 20:30:36.200100', 6, 'UTC')); 7 +-- DateTime64 vs DateTime64 with negative time +SELECT age('millisecond', toDateTime64('1969-12-31 23:59:58.001', 3), toDateTime64('1970-01-01 00:00:00.350', 3, 'UTC')); +2349 +SELECT age('second', toDateTime64('1969-12-31 23:59:58.001', 3), toDateTime64('1970-01-01 00:00:00.35', 3, 'UTC')); +2 +SELECT age('second', toDateTime64('1969-12-31 23:59:50.001', 3), toDateTime64('1969-12-31 23:59:55.002', 3, 'UTC')); +5 +SELECT age('second', toDateTime64('1969-12-31 23:59:50.003', 3), toDateTime64('1969-12-31 23:59:55.002', 3, 'UTC')); +4 diff --git a/tests/queries/0_stateless/02477_age_datetime64.sql b/tests/queries/0_stateless/02477_age_datetime64.sql index 809270f4cce..2992f73a2c1 100644 --- a/tests/queries/0_stateless/02477_age_datetime64.sql +++ b/tests/queries/0_stateless/02477_age_datetime64.sql @@ -133,4 +133,10 @@ SELECT age('year', toDateTime64('2015-02-02 20:30:36.200', 3, 'UTC'), toDateTime SELECT age('year', toDateTime64('2015-02-02 20:30:36.200', 3, 'UTC'), toDateTime64('2023-02-02 20:29:36.200', 3, 'UTC')); SELECT age('year', toDateTime64('2015-02-02 20:30:36.200', 3, 'UTC'), toDateTime64('2023-02-02 20:30:35.200', 3, 'UTC')); SELECT age('year', toDateTime64('2015-02-02 20:30:36.200', 3, 'UTC'), toDateTime64('2023-02-02 20:30:36.100', 3, 'UTC')); -SELECT age('year', toDateTime64('2015-02-02 20:30:36.200101', 6, 'UTC'), toDateTime64('2023-02-02 20:30:36.200100', 6, 'UTC')); \ No newline at end of file +SELECT age('year', toDateTime64('2015-02-02 20:30:36.200101', 6, 'UTC'), toDateTime64('2023-02-02 20:30:36.200100', 6, 'UTC')); + +-- DateTime64 vs DateTime64 with negative time +SELECT age('millisecond', toDateTime64('1969-12-31 23:59:58.001', 3), toDateTime64('1970-01-01 00:00:00.350', 3, 'UTC')); +SELECT age('second', toDateTime64('1969-12-31 23:59:58.001', 3), toDateTime64('1970-01-01 00:00:00.35', 3, 'UTC')); +SELECT age('second', toDateTime64('1969-12-31 23:59:50.001', 3), toDateTime64('1969-12-31 23:59:55.002', 3, 'UTC')); +SELECT age('second', toDateTime64('1969-12-31 23:59:50.003', 3), toDateTime64('1969-12-31 23:59:55.002', 3, 'UTC')); \ No newline at end of file From 6050813d2e43d09a39fa2cb4b1d6daf1a4a8e27d Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Tue, 20 Jun 2023 18:56:07 +0000 Subject: [PATCH 084/522] Remove trailing whitespaces --- src/Core/DecimalFunctions.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/DecimalFunctions.h b/src/Core/DecimalFunctions.h index 6d855498f6c..648c09d3d72 100644 --- a/src/Core/DecimalFunctions.h +++ b/src/Core/DecimalFunctions.h @@ -49,7 +49,7 @@ constexpr inline auto scaleMultiplier(UInt32 scale) /** Components of DecimalX value: * whole - represents whole part of decimal, can be negative or positive. * fractional - for fractional part of decimal. - * + * * 0.123 represent 0 / 0.123 * -0.123 represent 0 / -0.123 * -1.123 represent -1 / 0.123 From 51a5ef33e1285c13d5d4967635dbbf3ead5a908c Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 20 Jun 2023 23:20:37 +0200 Subject: [PATCH 085/522] fix --- src/Common/Exception.cpp | 12 ++++++++++++ src/Disks/IDisk.cpp | 24 +++++++++++++++--------- 2 files changed, 27 insertions(+), 9 deletions(-) diff --git a/src/Common/Exception.cpp b/src/Common/Exception.cpp index 20206b76225..3fd0a929d6f 100644 --- a/src/Common/Exception.cpp +++ b/src/Common/Exception.cpp @@ -400,6 +400,18 @@ PreformattedMessage getCurrentExceptionMessageAndPattern(bool with_stacktrace, b << " (version " << VERSION_STRING << VERSION_OFFICIAL << ")"; } catch (...) {} + +#ifdef ABORT_ON_LOGICAL_ERROR + try + { + throw; + } + catch (const std::logic_error &) + { + abortOnFailedAssertion(stream.str()); + } + catch (...) {} +#endif } catch (...) { diff --git a/src/Disks/IDisk.cpp b/src/Disks/IDisk.cpp index de61218d5a6..544ba014fde 100644 --- a/src/Disks/IDisk.cpp +++ b/src/Disks/IDisk.cpp @@ -89,16 +89,20 @@ void asyncCopy(IDisk & from_disk, String from_path, IDisk & to_disk, String to_p pool.scheduleOrThrowOnError( [&from_disk, from_path, &to_disk, to_path, &settings, promise, thread_group = CurrentThread::getGroup()]() { - SCOPE_EXIT_SAFE( + try + { + SCOPE_EXIT_SAFE(if (thread_group) CurrentThread::detachFromGroupIfNotDetached();); + if (thread_group) - CurrentThread::detachFromGroupIfNotDetached(); - ); + CurrentThread::attachToGroup(thread_group); - if (thread_group) - CurrentThread::attachToGroup(thread_group); - - from_disk.copyFile(from_path, to_disk, fs::path(to_path) / fileName(from_path), settings); - promise->set_value(); + from_disk.copyFile(from_path, to_disk, fs::path(to_path) / fileName(from_path), settings); + promise->set_value(); + } + catch (...) + { + promise->set_exception(std::current_exception()); + } }); results.push_back(std::move(future)); @@ -130,7 +134,9 @@ void IDisk::copyThroughBuffers(const String & from_path, const std::shared_ptr Date: Wed, 21 Jun 2023 06:25:11 +0000 Subject: [PATCH 086/522] Add line without spaces --- tests/queries/0_stateless/00301_csv.reference | 1 + tests/queries/0_stateless/00301_csv.sh | 11 ++++++----- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/tests/queries/0_stateless/00301_csv.reference b/tests/queries/0_stateless/00301_csv.reference index 140bbda84e7..804ccf0c713 100644 --- a/tests/queries/0_stateless/00301_csv.reference +++ b/tests/queries/0_stateless/00301_csv.reference @@ -28,4 +28,5 @@ Hello 0 0 33 \N 55 Default Hello 0 0 33 \N 55 Default Hello 1 3 2 \N 55 Default Hello 1 4 2 3 4 String +Hello 1 4 2 3 4 String Hello 1 5 2 3 4 String diff --git a/tests/queries/0_stateless/00301_csv.sh b/tests/queries/0_stateless/00301_csv.sh index aa019147bab..c598be44261 100755 --- a/tests/queries/0_stateless/00301_csv.sh +++ b/tests/queries/0_stateless/00301_csv.sh @@ -60,10 +60,11 @@ $CLICKHOUSE_CLIENT --query="CREATE TABLE csv (f1 String, f2 UInt64, f3 UInt256, echo ' , -Hello -Hello, -Hello, 1, 3, 2 -Hello, 1, 4, 2, 3, 4, String -Hello, 1, 5, 2, 3, 4, String,'| $CLICKHOUSE_CLIENT --input_format_defaults_for_omitted_fields=1 --input_format_csv_missing_as_default=1 --query="INSERT INTO csv FORMAT CSV"; +"Hello" +"Hello", +"Hello", 1, 3, 2 +"Hello",1,4,2,3,4,"String" +"Hello", 1, 4, 2, 3, 4, "String" +"Hello", 1, 5, 2, 3, 4, "String",'| $CLICKHOUSE_CLIENT --input_format_defaults_for_omitted_fields=1 --input_format_csv_missing_as_default=1 --query="INSERT INTO csv FORMAT CSV"; $CLICKHOUSE_CLIENT --query="SELECT * FROM csv ORDER BY f1, f2, f3, f4, f5 NULLS FIRST, f6, f7"; $CLICKHOUSE_CLIENT --query="DROP TABLE csv"; From d58152cb82cf2abb6112077fc4f79c656380f582 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Wed, 21 Jun 2023 08:04:40 +0000 Subject: [PATCH 087/522] Add constants / fix tests --- src/Core/DecimalFunctions.h | 6 +++--- src/Functions/DateTimeTransforms.h | 14 +++++++++----- src/Functions/dateDiff.cpp | 6 +++--- .../0_stateless/02477_age_datetime64.reference | 8 ++++---- tests/queries/0_stateless/02477_age_datetime64.sql | 8 ++++---- 5 files changed, 23 insertions(+), 19 deletions(-) diff --git a/src/Core/DecimalFunctions.h b/src/Core/DecimalFunctions.h index 648c09d3d72..defc21a5f43 100644 --- a/src/Core/DecimalFunctions.h +++ b/src/Core/DecimalFunctions.h @@ -50,9 +50,9 @@ constexpr inline auto scaleMultiplier(UInt32 scale) * whole - represents whole part of decimal, can be negative or positive. * fractional - for fractional part of decimal. * - * 0.123 represent 0 / 0.123 - * -0.123 represent 0 / -0.123 - * -1.123 represent -1 / 0.123 + * 0.123 represents 0 / 0.123 + * -0.123 represents 0 / -0.123 + * -1.123 represents -1 / 0.123 */ template struct DecimalComponents diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index a0b649bbd9b..afff8d6523d 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -19,6 +19,9 @@ namespace DB { +static constexpr auto microsecond_scale = 6; +static constexpr auto millisecond_scale = 3; + namespace ErrorCodes { extern const int ILLEGAL_TYPE_OF_ARGUMENT; @@ -1507,8 +1510,8 @@ struct ToYYYYMMDDhhmmssImpl struct DateTimeComponentsWithFractionalPart : public DateLUTImpl::DateTimeComponents { - UInt16 millisecond = 0; - UInt16 microsecond = 0; + UInt16 millisecond; + UInt16 microsecond; }; struct ToDateTimeComponentsImpl @@ -1518,7 +1521,7 @@ struct ToDateTimeComponentsImpl static inline DateTimeComponentsWithFractionalPart execute(const DateTime64 & t, DateTime64::NativeType scale_multiplier, const DateLUTImpl & time_zone) { auto components = DecimalUtils::splitWithScaleMultiplier(t, scale_multiplier); - const auto multiplier = DecimalUtils::scaleMultiplier(6); + constexpr auto multiplier = DecimalUtils::scaleMultiplier(microsecond_scale); if (t.value < 0 && components.fractional) { @@ -1531,8 +1534,9 @@ struct ToDateTimeComponentsImpl else if (scale_multiplier < multiplier) fractional = fractional * (multiplier / scale_multiplier); - UInt16 millisecond = static_cast(fractional / 1000); - UInt16 microsecond = static_cast(fractional % 1000); + constexpr auto divider = DecimalUtils::scaleMultiplier(microsecond_scale - millisecond_scale); + UInt16 millisecond = static_cast(fractional / divider); + UInt16 microsecond = static_cast(fractional % divider); return DateTimeComponentsWithFractionalPart{time_zone.toDateTimeComponents(components.whole), millisecond, microsecond}; } static inline DateTimeComponentsWithFractionalPart execute(UInt32 t, const DateLUTImpl & time_zone) diff --git a/src/Functions/dateDiff.cpp b/src/Functions/dateDiff.cpp index e41fe91818a..79be3059b2a 100644 --- a/src/Functions/dateDiff.cpp +++ b/src/Functions/dateDiff.cpp @@ -177,7 +177,7 @@ public: DateTimeComponentsWithFractionalPart a_comp; DateTimeComponentsWithFractionalPart b_comp; Int64 adjust_value; - const auto multiplier = DecimalUtils::scaleMultiplier(6); + constexpr auto multiplier = DecimalUtils::scaleMultiplier(microsecond_scale); auto x_microseconds = TransformDateTime64>(transform_x.getScaleMultiplier()).execute(x, timezone_x); auto y_microseconds = TransformDateTime64>(transform_y.getScaleMultiplier()).execute(y, timezone_y); @@ -399,9 +399,9 @@ public: else if (unit == "second" || unit == "ss" || unit == "s") impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); else if (unit == "millisecond" || unit == "ms") - impl.template dispatchForColumns(3)>>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns(millisecond_scale)>>(x, y, timezone_x, timezone_y, res->getData()); else if (unit == "microsecond" || unit == "us" || unit == "u") - impl.template dispatchForColumns(6)>>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns(microsecond_scale)>>(x, y, timezone_x, timezone_y, res->getData()); else throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function {} does not support '{}' unit", getName(), unit); diff --git a/tests/queries/0_stateless/02477_age_datetime64.reference b/tests/queries/0_stateless/02477_age_datetime64.reference index e2ac97cbcd9..c8c716e1e9a 100644 --- a/tests/queries/0_stateless/02477_age_datetime64.reference +++ b/tests/queries/0_stateless/02477_age_datetime64.reference @@ -209,11 +209,11 @@ SELECT age('year', toDateTime64('2015-02-02 20:30:36.200', 3, 'UTC'), toDateTime SELECT age('year', toDateTime64('2015-02-02 20:30:36.200101', 6, 'UTC'), toDateTime64('2023-02-02 20:30:36.200100', 6, 'UTC')); 7 -- DateTime64 vs DateTime64 with negative time -SELECT age('millisecond', toDateTime64('1969-12-31 23:59:58.001', 3), toDateTime64('1970-01-01 00:00:00.350', 3, 'UTC')); +SELECT age('millisecond', toDateTime64('1969-12-31 23:59:58.001', 3, 'UTC'), toDateTime64('1970-01-01 00:00:00.350', 3, 'UTC')); 2349 -SELECT age('second', toDateTime64('1969-12-31 23:59:58.001', 3), toDateTime64('1970-01-01 00:00:00.35', 3, 'UTC')); +SELECT age('second', toDateTime64('1969-12-31 23:59:58.001', 3, 'UTC'), toDateTime64('1970-01-01 00:00:00.35', 3, 'UTC')); 2 -SELECT age('second', toDateTime64('1969-12-31 23:59:50.001', 3), toDateTime64('1969-12-31 23:59:55.002', 3, 'UTC')); +SELECT age('second', toDateTime64('1969-12-31 23:59:50.001', 3, 'UTC'), toDateTime64('1969-12-31 23:59:55.002', 3, 'UTC')); 5 -SELECT age('second', toDateTime64('1969-12-31 23:59:50.003', 3), toDateTime64('1969-12-31 23:59:55.002', 3, 'UTC')); +SELECT age('second', toDateTime64('1969-12-31 23:59:50.003', 3, 'UTC'), toDateTime64('1969-12-31 23:59:55.002', 3, 'UTC')); 4 diff --git a/tests/queries/0_stateless/02477_age_datetime64.sql b/tests/queries/0_stateless/02477_age_datetime64.sql index 2992f73a2c1..889137395a3 100644 --- a/tests/queries/0_stateless/02477_age_datetime64.sql +++ b/tests/queries/0_stateless/02477_age_datetime64.sql @@ -136,7 +136,7 @@ SELECT age('year', toDateTime64('2015-02-02 20:30:36.200', 3, 'UTC'), toDateTime SELECT age('year', toDateTime64('2015-02-02 20:30:36.200101', 6, 'UTC'), toDateTime64('2023-02-02 20:30:36.200100', 6, 'UTC')); -- DateTime64 vs DateTime64 with negative time -SELECT age('millisecond', toDateTime64('1969-12-31 23:59:58.001', 3), toDateTime64('1970-01-01 00:00:00.350', 3, 'UTC')); -SELECT age('second', toDateTime64('1969-12-31 23:59:58.001', 3), toDateTime64('1970-01-01 00:00:00.35', 3, 'UTC')); -SELECT age('second', toDateTime64('1969-12-31 23:59:50.001', 3), toDateTime64('1969-12-31 23:59:55.002', 3, 'UTC')); -SELECT age('second', toDateTime64('1969-12-31 23:59:50.003', 3), toDateTime64('1969-12-31 23:59:55.002', 3, 'UTC')); \ No newline at end of file +SELECT age('millisecond', toDateTime64('1969-12-31 23:59:58.001', 3, 'UTC'), toDateTime64('1970-01-01 00:00:00.350', 3, 'UTC')); +SELECT age('second', toDateTime64('1969-12-31 23:59:58.001', 3, 'UTC'), toDateTime64('1970-01-01 00:00:00.35', 3, 'UTC')); +SELECT age('second', toDateTime64('1969-12-31 23:59:50.001', 3, 'UTC'), toDateTime64('1969-12-31 23:59:55.002', 3, 'UTC')); +SELECT age('second', toDateTime64('1969-12-31 23:59:50.003', 3, 'UTC'), toDateTime64('1969-12-31 23:59:55.002', 3, 'UTC')); \ No newline at end of file From 8bd53cad7849816a6bd6591eddebd9ba19fa7272 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Wed, 21 Jun 2023 14:01:05 +0000 Subject: [PATCH 088/522] Add quotes to test --- tests/queries/0_stateless/00301_csv.sh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/queries/0_stateless/00301_csv.sh b/tests/queries/0_stateless/00301_csv.sh index c598be44261..dc354433af9 100755 --- a/tests/queries/0_stateless/00301_csv.sh +++ b/tests/queries/0_stateless/00301_csv.sh @@ -44,12 +44,12 @@ $CLICKHOUSE_CLIENT --query="DROP TABLE csv"; echo === Test input_format_csv_ignore_extra_columns $CLICKHOUSE_CLIENT --query="CREATE TABLE csv (s String, n UInt64 DEFAULT 3, d String DEFAULT 'String4') ENGINE = Memory"; -echo 'Hello, 1, String1 -Hello, 2, String2, -Hello, 3, String3, 2016-01-13 -Hello, 4, , 2016-01-14 -Hello, 5, String5, 2016-01-15, 2016-01-16 -Hello, 6, String6, "line with a +echo '"Hello", 1, "String1" +"Hello", 2, "String2", +"Hello", 3, "String3", "2016-01-13" +"Hello", 4, , "2016-01-14" +"Hello", 5, "String5", "2016-01-15", "2016-01-16" +"Hello", 6, "String6", "line with a break"' | $CLICKHOUSE_CLIENT --input_format_defaults_for_omitted_fields=1 --input_format_csv_empty_as_default=1 --input_format_csv_ignore_extra_columns=1 --query="INSERT INTO csv FORMAT CSV"; $CLICKHOUSE_CLIENT --query="SELECT * FROM csv ORDER BY s, n"; $CLICKHOUSE_CLIENT --query="DROP TABLE csv"; From 4a570a05c9714c8ee94e68e4dda58efa759c8780 Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Mon, 19 Jun 2023 23:35:35 +0000 Subject: [PATCH 089/522] Decrease default timeouts for S3 and HTTP requests --- docs/en/operations/settings/settings.md | 4 +- src/Backups/BackupIO_S3.cpp | 1 + src/Coordination/KeeperSnapshotManagerS3.cpp | 6 +-- src/Core/Defines.h | 2 +- src/Core/Settings.h | 1 + src/Core/SettingsChangesHistory.h | 2 + .../ObjectStorages/S3/S3ObjectStorage.cpp | 48 ++++++++++--------- src/Disks/ObjectStorages/S3/S3ObjectStorage.h | 16 +++++-- src/Disks/ObjectStorages/S3/diskSettings.cpp | 2 +- src/IO/S3/Client.cpp | 33 +++++++++---- src/IO/S3/Client.h | 22 +++++++-- src/IO/S3/tests/gtest_aws_s3_client.cpp | 1 + src/IO/WriteBufferFromS3.cpp | 4 +- src/IO/WriteBufferFromS3.h | 3 ++ src/IO/tests/gtest_writebuffer_s3.cpp | 1 + src/Storages/StorageS3.cpp | 5 +- src/Storages/StorageS3.h | 1 + src/Storages/StorageS3Settings.cpp | 5 +- src/Storages/StorageS3Settings.h | 3 +- 19 files changed, 109 insertions(+), 51 deletions(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index baefbb2cf6f..4916dfaaf7d 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -3501,7 +3501,7 @@ Possible values: - Any positive integer. - 0 - Disabled (infinite timeout). -Default value: 180. +Default value: 30. ## http_receive_timeout {#http_receive_timeout} @@ -3512,7 +3512,7 @@ Possible values: - Any positive integer. - 0 - Disabled (infinite timeout). -Default value: 180. +Default value: 30. ## check_query_single_value_result {#check_query_single_value_result} diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp index 967beba4bf5..9a2a457e13e 100644 --- a/src/Backups/BackupIO_S3.cpp +++ b/src/Backups/BackupIO_S3.cpp @@ -253,6 +253,7 @@ std::unique_ptr BackupWriterS3::writeFile(const String & file_name) { return std::make_unique( client, + client, // already has long timeout s3_uri.bucket, fs::path(s3_uri.key) / file_name, DBMS_DEFAULT_BUFFER_SIZE, diff --git a/src/Coordination/KeeperSnapshotManagerS3.cpp b/src/Coordination/KeeperSnapshotManagerS3.cpp index 1afe0b352c5..bf437f03ae3 100644 --- a/src/Coordination/KeeperSnapshotManagerS3.cpp +++ b/src/Coordination/KeeperSnapshotManagerS3.cpp @@ -144,14 +144,14 @@ void KeeperSnapshotManagerS3::uploadSnapshotImpl(const std::string & snapshot_pa const auto create_writer = [&](const auto & key) { - return WriteBufferFromS3 - { + return WriteBufferFromS3( + s3_client->client, s3_client->client, s3_client->uri.bucket, key, DBMS_DEFAULT_BUFFER_SIZE, request_settings_1 - }; + ); }; LOG_INFO(log, "Will try to upload snapshot on {} to S3", snapshot_path); diff --git a/src/Core/Defines.h b/src/Core/Defines.h index e9b84b71cae..efe14b93a3d 100644 --- a/src/Core/Defines.h +++ b/src/Core/Defines.h @@ -41,7 +41,7 @@ /// The boundary on which the blocks for asynchronous file operations should be aligned. #define DEFAULT_AIO_FILE_BLOCK_SIZE 4096 -#define DEFAULT_HTTP_READ_BUFFER_TIMEOUT 180 +#define DEFAULT_HTTP_READ_BUFFER_TIMEOUT 30 #define DEFAULT_HTTP_READ_BUFFER_CONNECTION_TIMEOUT 1 /// Maximum number of http-connections between two endpoints /// the number is unmotivated diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 204a27483df..5162e0f273e 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -102,6 +102,7 @@ class IColumn; M(Bool, s3_allow_parallel_part_upload, true, "Use multiple threads for s3 multipart upload. It may lead to slightly higher memory usage", 0) \ M(Bool, s3_throw_on_zero_files_match, false, "Throw an error, when ListObjects request cannot match any files", 0) \ M(UInt64, s3_retry_attempts, 10, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries", 0) \ + M(UInt64, s3_request_timeout_ms, 3000, "Idleness timeout for sending and receiving data to/from S3. Fail if a single TCP read or write call blocks for this long.", 0) \ M(Bool, enable_s3_requests_logging, false, "Enable very explicit logging of S3 requests. Makes sense for debug only.", 0) \ M(UInt64, hdfs_replication, 0, "The actual number of replications can be specified when the hdfs file is created.", 0) \ M(Bool, hdfs_truncate_on_insert, false, "Enables or disables truncate before insert in s3 engine tables", 0) \ diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 9fd45ac16d6..2886cdd288d 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -80,6 +80,8 @@ namespace SettingsChangesHistory /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972) static std::map settings_changes_history = { + {"23.6", {{"http_send_timeout", 180, 30, "3 minutes seems crazy long. Note that this is timeout for a single network write call, not for the whole upload operation."}, + {"http_receive_timeout", 180, 30, "See http_send_timeout."}}}, {"23.5", {{"input_format_parquet_preserve_order", true, false, "Allow Parquet reader to reorder rows for better parallelism."}, {"parallelize_output_from_storages", false, true, "Allow parallelism when executing queries that read from file/url/s3/etc. This may reorder rows."}, {"use_with_fill_by_sorting_prefix", false, true, "Columns preceding WITH FILL columns in ORDER BY clause form sorting prefix. Rows with different values in sorting prefix are filled independently"}, diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index e48924326e1..e46ca3d0828 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -149,7 +149,7 @@ private: bool S3ObjectStorage::exists(const StoredObject & object) const { auto settings_ptr = s3_settings.get(); - return S3::objectExists(*client.get(), bucket, object.remote_path, {}, settings_ptr->request_settings, /* for_disk_s3= */ true); + return S3::objectExists(*clients.get()->client, bucket, object.remote_path, {}, settings_ptr->request_settings, /* for_disk_s3= */ true); } std::unique_ptr S3ObjectStorage::readObjects( /// NOLINT @@ -168,7 +168,7 @@ std::unique_ptr S3ObjectStorage::readObjects( /// NOLINT (const std::string & path, size_t read_until_position) -> std::unique_ptr { return std::make_unique( - client.get(), + clients.get()->client, bucket, path, version_id, @@ -218,7 +218,7 @@ std::unique_ptr S3ObjectStorage::readObject( /// NOLINT { auto settings_ptr = s3_settings.get(); return std::make_unique( - client.get(), + clients.get()->client, bucket, object.remote_path, version_id, @@ -243,8 +243,10 @@ std::unique_ptr S3ObjectStorage::writeObject( /// NOLIN if (write_settings.s3_allow_parallel_part_upload) scheduler = threadPoolCallbackRunner(getThreadPoolWriter(), "VFSWrite"); + auto clients_ = clients.get(); return std::make_unique( - client.get(), + clients_->client, + clients_->client_with_long_timeout, bucket, object.remote_path, buf_size, @@ -258,7 +260,7 @@ std::unique_ptr S3ObjectStorage::writeObject( /// NOLIN ObjectStorageIteratorPtr S3ObjectStorage::iterate(const std::string & path_prefix) const { auto settings_ptr = s3_settings.get(); - auto client_ptr = client.get(); + auto client_ptr = clients.get()->client; return std::make_shared(bucket, path_prefix, client_ptr, settings_ptr->list_object_keys_size); } @@ -266,7 +268,7 @@ ObjectStorageIteratorPtr S3ObjectStorage::iterate(const std::string & path_prefi void S3ObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, int max_keys) const { auto settings_ptr = s3_settings.get(); - auto client_ptr = client.get(); + auto client_ptr = clients.get()->client; S3::ListObjectsV2Request request; request.SetBucket(bucket); @@ -307,7 +309,7 @@ void S3ObjectStorage::listObjects(const std::string & path, RelativePathsWithMet void S3ObjectStorage::removeObjectImpl(const StoredObject & object, bool if_exists) { - auto client_ptr = client.get(); + auto client_ptr = clients.get()->client; ProfileEvents::increment(ProfileEvents::S3DeleteObjects); ProfileEvents::increment(ProfileEvents::DiskS3DeleteObjects); @@ -333,7 +335,7 @@ void S3ObjectStorage::removeObjectsImpl(const StoredObjects & objects, bool if_e } else { - auto client_ptr = client.get(); + auto client_ptr = clients.get()->client; auto settings_ptr = s3_settings.get(); size_t chunk_size_limit = settings_ptr->objects_chunk_size_to_delete; @@ -394,7 +396,7 @@ void S3ObjectStorage::removeObjectsIfExist(const StoredObjects & objects) std::optional S3ObjectStorage::tryGetObjectMetadata(const std::string & path) const { auto settings_ptr = s3_settings.get(); - auto object_info = S3::getObjectInfo(*client.get(), bucket, path, {}, settings_ptr->request_settings, /* with_metadata= */ true, /* for_disk_s3= */ true, /* throw_on_error= */ false); + auto object_info = S3::getObjectInfo(*clients.get()->client, bucket, path, {}, settings_ptr->request_settings, /* with_metadata= */ true, /* for_disk_s3= */ true, /* throw_on_error= */ false); if (object_info.size == 0 && object_info.last_modification_time == 0 && object_info.metadata.empty()) return {}; @@ -410,7 +412,7 @@ std::optional S3ObjectStorage::tryGetObjectMetadata(const std::s ObjectMetadata S3ObjectStorage::getObjectMetadata(const std::string & path) const { auto settings_ptr = s3_settings.get(); - auto object_info = S3::getObjectInfo(*client.get(), bucket, path, {}, settings_ptr->request_settings, /* with_metadata= */ true, /* for_disk_s3= */ true); + auto object_info = S3::getObjectInfo(*clients.get()->client, bucket, path, {}, settings_ptr->request_settings, /* with_metadata= */ true, /* for_disk_s3= */ true); ObjectMetadata result; result.size_bytes = object_info.size; @@ -429,7 +431,7 @@ void S3ObjectStorage::copyObjectToAnotherObjectStorage( // NOLINT /// Shortcut for S3 if (auto * dest_s3 = dynamic_cast(&object_storage_to); dest_s3 != nullptr) { - auto client_ptr = client.get(); + auto client_ptr = clients.get()->client; auto settings_ptr = s3_settings.get(); auto size = S3::getObjectSize(*client_ptr, bucket, object_from.remote_path, {}, settings_ptr->request_settings, /* for_disk_s3= */ true); auto scheduler = threadPoolCallbackRunner(getThreadPoolWriter(), "S3ObjStor_copy"); @@ -445,7 +447,7 @@ void S3ObjectStorage::copyObjectToAnotherObjectStorage( // NOLINT void S3ObjectStorage::copyObject( // NOLINT const StoredObject & object_from, const StoredObject & object_to, std::optional object_to_attributes) { - auto client_ptr = client.get(); + auto client_ptr = clients.get()->client; auto settings_ptr = s3_settings.get(); auto size = S3::getObjectSize(*client_ptr, bucket, object_from.remote_path, {}, settings_ptr->request_settings, /* for_disk_s3= */ true); auto scheduler = threadPoolCallbackRunner(getThreadPoolWriter(), "S3ObjStor_copy"); @@ -458,35 +460,33 @@ void S3ObjectStorage::setNewSettings(std::unique_ptr && s3_settings.set(std::move(s3_settings_)); } -void S3ObjectStorage::setNewClient(std::unique_ptr && client_) -{ - client.set(std::move(client_)); -} - void S3ObjectStorage::shutdown() { - auto client_ptr = client.get(); + auto clients_ptr = clients.get(); /// This call stops any next retry attempts for ongoing S3 requests. /// If S3 request is failed and the method below is executed S3 client immediately returns the last failed S3 request outcome. /// If S3 is healthy nothing wrong will be happened and S3 requests will be processed in a regular way without errors. /// This should significantly speed up shutdown process if S3 is unhealthy. - const_cast(*client_ptr).DisableRequestProcessing(); + const_cast(*clients_ptr->client).DisableRequestProcessing(); + const_cast(*clients_ptr->client_with_long_timeout).DisableRequestProcessing(); } void S3ObjectStorage::startup() { - auto client_ptr = client.get(); + auto clients_ptr = clients.get(); /// Need to be enabled if it was disabled during shutdown() call. - const_cast(*client_ptr).EnableRequestProcessing(); + const_cast(*clients_ptr->client).EnableRequestProcessing(); + const_cast(*clients_ptr->client_with_long_timeout).EnableRequestProcessing(); } void S3ObjectStorage::applyNewSettings(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, ContextPtr context) { auto new_s3_settings = getSettings(config, config_prefix, context); auto new_client = getClient(config, config_prefix, context, *new_s3_settings); + auto new_clients = std::make_unique(std::move(new_client), *new_s3_settings); s3_settings.set(std::move(new_s3_settings)); - client.set(std::move(new_client)); + clients.set(std::move(new_clients)); } std::unique_ptr S3ObjectStorage::cloneObjectStorage( @@ -501,7 +501,9 @@ std::unique_ptr S3ObjectStorage::cloneObjectStorage( endpoint); } +S3ObjectStorage::Clients::Clients(std::shared_ptr client_, const S3ObjectStorageSettings & settings) + : client(std::move(client_)), client_with_long_timeout(client->clone(std::nullopt, settings.request_settings.long_request_timeout_ms)) {} + } - #endif diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h index 072e1354d38..527b1479d89 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h @@ -39,6 +39,16 @@ struct S3ObjectStorageSettings class S3ObjectStorage : public IObjectStorage { +public: + struct Clients + { + std::shared_ptr client; + std::shared_ptr client_with_long_timeout; + + Clients() = default; + Clients(std::shared_ptr client, const S3ObjectStorageSettings & settings); + }; + private: friend class S3PlainObjectStorage; @@ -51,7 +61,7 @@ private: String bucket_, String connection_string) : bucket(bucket_) - , client(std::move(client_)) + , clients(std::make_unique(std::move(client_), *s3_settings_)) , s3_settings(std::move(s3_settings_)) , s3_capabilities(s3_capabilities_) , version_id(std::move(version_id_)) @@ -159,14 +169,12 @@ public: private: void setNewSettings(std::unique_ptr && s3_settings_); - void setNewClient(std::unique_ptr && client_); - void removeObjectImpl(const StoredObject & object, bool if_exists); void removeObjectsImpl(const StoredObjects & objects, bool if_exists); std::string bucket; - MultiVersion client; + MultiVersion clients; MultiVersion s3_settings; S3Capabilities s3_capabilities; diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp index 409eb2a3dc3..cbf0392aae9 100644 --- a/src/Disks/ObjectStorages/S3/diskSettings.cpp +++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp @@ -129,7 +129,7 @@ std::unique_ptr getClient( throw Exception(ErrorCodes::BAD_ARGUMENTS, "S3 path must ends with '/', but '{}' doesn't.", uri.key); client_configuration.connectTimeoutMs = config.getUInt(config_prefix + ".connect_timeout_ms", 1000); - client_configuration.requestTimeoutMs = config.getUInt(config_prefix + ".request_timeout_ms", 30000); + client_configuration.requestTimeoutMs = config.getUInt(config_prefix + ".request_timeout_ms", 3000); client_configuration.maxConnections = config.getUInt(config_prefix + ".max_connections", 100); client_configuration.endpointOverride = uri.endpoint; diff --git a/src/IO/S3/Client.cpp b/src/IO/S3/Client.cpp index 668b1a3959d..7e20b1a9e8f 100644 --- a/src/IO/S3/Client.cpp +++ b/src/IO/S3/Client.cpp @@ -100,7 +100,7 @@ std::unique_ptr Client::create( size_t max_redirects_, ServerSideEncryptionKMSConfig sse_kms_config_, const std::shared_ptr & credentials_provider, - const Aws::Client::ClientConfiguration & client_configuration, + const PocoHTTPClientConfiguration & client_configuration, Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy sign_payloads, bool use_virtual_addressing) { @@ -109,9 +109,16 @@ std::unique_ptr Client::create( new Client(max_redirects_, std::move(sse_kms_config_), credentials_provider, client_configuration, sign_payloads, use_virtual_addressing)); } -std::unique_ptr Client::create(const Client & other) +std::unique_ptr Client::clone( + std::optional> override_retry_strategy, + std::optional override_request_timeout_ms) const { - return std::unique_ptr(new Client(other)); + PocoHTTPClientConfiguration new_configuration = client_configuration; + if (override_retry_strategy.has_value()) + new_configuration.retryStrategy = *override_retry_strategy; + if (override_request_timeout_ms.has_value()) + new_configuration.requestTimeoutMs = *override_request_timeout_ms; + return std::unique_ptr(new Client(*this, new_configuration)); } namespace @@ -134,11 +141,14 @@ Client::Client( size_t max_redirects_, ServerSideEncryptionKMSConfig sse_kms_config_, const std::shared_ptr & credentials_provider_, - const Aws::Client::ClientConfiguration & client_configuration, - Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy sign_payloads, - bool use_virtual_addressing) - : Aws::S3::S3Client(credentials_provider_, client_configuration, std::move(sign_payloads), use_virtual_addressing) + const PocoHTTPClientConfiguration & client_configuration_, + Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy sign_payloads_, + bool use_virtual_addressing_) + : Aws::S3::S3Client(credentials_provider_, client_configuration_, sign_payloads_, use_virtual_addressing_) , credentials_provider(credentials_provider_) + , client_configuration(client_configuration_) + , sign_payloads(sign_payloads_) + , use_virtual_addressing(use_virtual_addressing_) , max_redirects(max_redirects_) , sse_kms_config(std::move(sse_kms_config_)) , log(&Poco::Logger::get("S3Client")) @@ -175,10 +185,15 @@ Client::Client( ClientCacheRegistry::instance().registerClient(cache); } -Client::Client(const Client & other) - : Aws::S3::S3Client(other) +Client::Client( + const Client & other, const PocoHTTPClientConfiguration & client_configuration_) + : Aws::S3::S3Client(other.credentials_provider, client_configuration_, other.sign_payloads, + other.use_virtual_addressing) , initial_endpoint(other.initial_endpoint) , credentials_provider(other.credentials_provider) + , client_configuration(client_configuration_) + , sign_payloads(other.sign_payloads) + , use_virtual_addressing(other.use_virtual_addressing) , explicit_region(other.explicit_region) , detect_region(other.detect_region) , provider_type(other.provider_type) diff --git a/src/IO/S3/Client.h b/src/IO/S3/Client.h index e1b99c893a6..8904c850553 100644 --- a/src/IO/S3/Client.h +++ b/src/IO/S3/Client.h @@ -105,6 +105,8 @@ private: class Client : private Aws::S3::S3Client { public: + class RetryStrategy; + /// we use a factory method to verify arguments before creating a client because /// there are certain requirements on arguments for it to work correctly /// e.g. Client::RetryStrategy should be used @@ -112,11 +114,19 @@ public: size_t max_redirects_, ServerSideEncryptionKMSConfig sse_kms_config_, const std::shared_ptr & credentials_provider, - const Aws::Client::ClientConfiguration & client_configuration, + const PocoHTTPClientConfiguration & client_configuration, Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy sign_payloads, bool use_virtual_addressing); - static std::unique_ptr create(const Client & other); + /// Create a client with adjusted settings: + /// * override_retry_strategy can be used to disable retries to avoid nested retries when we have + /// a retry loop outside of S3 client. Specifically, for read and write buffers. Currently not + /// actually used. + /// * override_request_timeout_ms is used to increase timeout for CompleteMultipartUploadRequest + /// because it often sits idle for 10 seconds: https://github.com/ClickHouse/ClickHouse/pull/42321 + std::unique_ptr clone( + std::optional> override_retry_strategy = std::nullopt, + std::optional override_request_timeout_ms = std::nullopt) const; Client & operator=(const Client &) = delete; @@ -211,11 +221,12 @@ private: Client(size_t max_redirects_, ServerSideEncryptionKMSConfig sse_kms_config_, const std::shared_ptr & credentials_provider_, - const Aws::Client::ClientConfiguration& client_configuration, + const PocoHTTPClientConfiguration & client_configuration, Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy sign_payloads, bool use_virtual_addressing); - Client(const Client & other); + Client( + const Client & other, const PocoHTTPClientConfiguration & client_configuration); /// Leave regular functions private so we don't accidentally use them /// otherwise region and endpoint redirection won't work @@ -251,6 +262,9 @@ private: String initial_endpoint; std::shared_ptr credentials_provider; + PocoHTTPClientConfiguration client_configuration; + Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy sign_payloads; + bool use_virtual_addressing; std::string explicit_region; mutable bool detect_region = true; diff --git a/src/IO/S3/tests/gtest_aws_s3_client.cpp b/src/IO/S3/tests/gtest_aws_s3_client.cpp index a9b5fa03f30..5731e9061d6 100644 --- a/src/IO/S3/tests/gtest_aws_s3_client.cpp +++ b/src/IO/S3/tests/gtest_aws_s3_client.cpp @@ -89,6 +89,7 @@ void doWriteRequest(std::shared_ptr client, const DB::S3:: DB::S3Settings::RequestSettings request_settings; request_settings.max_unexpected_write_error_retries = max_unexpected_write_error_retries; DB::WriteBufferFromS3 write_buffer( + client, client, uri.bucket, uri.key, diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp index 990505adfb3..900861a7831 100644 --- a/src/IO/WriteBufferFromS3.cpp +++ b/src/IO/WriteBufferFromS3.cpp @@ -77,6 +77,7 @@ struct WriteBufferFromS3::PartData WriteBufferFromS3::WriteBufferFromS3( std::shared_ptr client_ptr_, + std::shared_ptr client_with_long_timeout_ptr_, const String & bucket_, const String & key_, size_t buf_size_, @@ -91,6 +92,7 @@ WriteBufferFromS3::WriteBufferFromS3( , upload_settings(request_settings.getUploadSettings()) , write_settings(write_settings_) , client_ptr(std::move(client_ptr_)) + , client_with_long_timeout_ptr(std::move(client_with_long_timeout_ptr_)) , object_metadata(std::move(object_metadata_)) , buffer_allocation_policy(ChooseBufferPolicy(upload_settings)) , task_tracker( @@ -551,7 +553,7 @@ void WriteBufferFromS3::completeMultipartUpload() ProfileEvents::increment(ProfileEvents::DiskS3CompleteMultipartUpload); Stopwatch watch; - auto outcome = client_ptr->CompleteMultipartUpload(req); + auto outcome = client_with_long_timeout_ptr->CompleteMultipartUpload(req); watch.stop(); ProfileEvents::increment(ProfileEvents::WriteBufferFromS3Microseconds, watch.elapsedMicroseconds()); diff --git a/src/IO/WriteBufferFromS3.h b/src/IO/WriteBufferFromS3.h index f4200b0a646..32f4867a439 100644 --- a/src/IO/WriteBufferFromS3.h +++ b/src/IO/WriteBufferFromS3.h @@ -29,6 +29,8 @@ class WriteBufferFromS3 final : public WriteBufferFromFileBase public: WriteBufferFromS3( std::shared_ptr client_ptr_, + /// for CompleteMultipartUploadRequest, because it blocks on recv() for a few seconds on big uploads + std::shared_ptr client_with_long_timeout_ptr_, const String & bucket_, const String & key_, size_t buf_size_, @@ -86,6 +88,7 @@ private: const S3Settings::RequestSettings::PartUploadSettings & upload_settings; const WriteSettings write_settings; const std::shared_ptr client_ptr; + const std::shared_ptr client_with_long_timeout_ptr; const std::optional> object_metadata; Poco::Logger * log = &Poco::Logger::get("WriteBufferFromS3"); diff --git a/src/IO/tests/gtest_writebuffer_s3.cpp b/src/IO/tests/gtest_writebuffer_s3.cpp index cd38291fb31..44c0ee67669 100644 --- a/src/IO/tests/gtest_writebuffer_s3.cpp +++ b/src/IO/tests/gtest_writebuffer_s3.cpp @@ -526,6 +526,7 @@ public: getAsyncPolicy().setAutoExecute(false); return std::make_unique( + client, client, bucket, file_name, diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index f4791e45e2b..135722dbce2 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -150,7 +150,7 @@ public: KeysWithInfo * read_keys_, const S3Settings::RequestSettings & request_settings_) : WithContext(context_) - , client(S3::Client::create(client_)) + , client(client_.clone()) , globbed_uri(globbed_uri_) , query(query_) , virtual_header(virtual_header_) @@ -783,6 +783,7 @@ public: write_buf = wrapWriteBufferWithCompressionMethod( std::make_unique( configuration_.client, + configuration_.client_with_long_timeout, bucket, key, DBMS_DEFAULT_BUFFER_SIZE, @@ -1296,6 +1297,8 @@ void StorageS3::Configuration::connect(ContextPtr context) context->getConfigRef().getUInt64("s3.expiration_window_seconds", S3::DEFAULT_EXPIRATION_WINDOW_SECONDS)), auth_settings.no_sign_request.value_or(context->getConfigRef().getBool("s3.no_sign_request", false)), }); + + client_with_long_timeout = client->clone(std::nullopt, request_settings.long_request_timeout_ms); } void StorageS3::processNamedCollectionResult(StorageS3::Configuration & configuration, const NamedCollection & collection) diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index 13053833623..8d571dd796f 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -274,6 +274,7 @@ public: HTTPHeaderEntries headers_from_ast; std::shared_ptr client; + std::shared_ptr client_with_long_timeout; std::vector keys; }; diff --git a/src/Storages/StorageS3Settings.cpp b/src/Storages/StorageS3Settings.cpp index 23b4630707c..89e6ee46b4d 100644 --- a/src/Storages/StorageS3Settings.cpp +++ b/src/Storages/StorageS3Settings.cpp @@ -199,7 +199,7 @@ S3Settings::RequestSettings::RequestSettings( list_object_keys_size = config.getUInt64(key + "list_object_keys_size", settings.s3_list_object_keys_size); throw_on_zero_files_match = config.getBool(key + "throw_on_zero_files_match", settings.s3_throw_on_zero_files_match); retry_attempts = config.getUInt64(key + "retry_attempts", settings.s3_retry_attempts); - request_timeout_ms = config.getUInt64(key + "request_timeout_ms", request_timeout_ms); + request_timeout_ms = config.getUInt64(key + "request_timeout_ms", settings.s3_request_timeout_ms); /// NOTE: it would be better to reuse old throttlers to avoid losing token bucket state on every config reload, /// which could lead to exceeding limit for short time. But it is good enough unless very high `burst` values are used. @@ -255,6 +255,9 @@ void S3Settings::RequestSettings::updateFromSettingsImpl(const Settings & settin if (!if_changed || settings.s3_retry_attempts.changed) retry_attempts = settings.s3_retry_attempts; + + if (!if_changed || settings.s3_request_timeout_ms.changed) + request_timeout_ms = settings.s3_request_timeout_ms; } void S3Settings::RequestSettings::updateFromSettings(const Settings & settings) diff --git a/src/Storages/StorageS3Settings.h b/src/Storages/StorageS3Settings.h index 41489927e7f..991e323acb6 100644 --- a/src/Storages/StorageS3Settings.h +++ b/src/Storages/StorageS3Settings.h @@ -69,7 +69,8 @@ struct S3Settings ThrottlerPtr get_request_throttler; ThrottlerPtr put_request_throttler; size_t retry_attempts = 10; - size_t request_timeout_ms = 30000; + size_t request_timeout_ms = 3000; + size_t long_request_timeout_ms = 30000; // TODO: Take this from config like request_timeout_ms bool throw_on_zero_files_match = false; From 9157314b2a8d03a87bc467e716c3557b7d9d768f Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 21 Jun 2023 20:29:32 +0200 Subject: [PATCH 090/522] fix --- .../ObjectStorages/DiskObjectStorage.cpp | 4 ++- .../MergeTree/DataPartStorageOnDiskBase.cpp | 27 +++++++++++++------ .../MergeTree/DataPartStorageOnDiskBase.h | 2 +- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 6 +++-- tests/integration/helpers/cluster.py | 2 +- .../configs/config.d/storage_conf.xml | 2 ++ .../test_merge_tree_s3_failover/test.py | 3 ++- 7 files changed, 32 insertions(+), 14 deletions(-) diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp index e3922b6c505..90eb87a56f1 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp @@ -544,8 +544,10 @@ void DiskObjectStorage::writeFileUsingBlobWritingFunction(const String & path, W } void DiskObjectStorage::applyNewSettings( - const Poco::Util::AbstractConfiguration & config, ContextPtr context_, const String & config_prefix, const DisksMap & disk_map) + const Poco::Util::AbstractConfiguration & config, ContextPtr context_, const String & /*config_prefix*/, const DisksMap & disk_map) { + /// FIXME we cannot use config_prefix that was passed through arguments because the disk may be wrapped with cache and we need another name + const auto config_prefix = "storage_configuration.disks." + name; object_storage->applyNewSettings(config, config_prefix, context_); IDisk::applyNewSettings(config, context_, config_prefix, disk_map); } diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp index c397a634db6..01fcc2698eb 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp +++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp @@ -455,23 +455,34 @@ MutableDataPartStoragePtr DataPartStorageOnDiskBase::freeze( MutableDataPartStoragePtr DataPartStorageOnDiskBase::clonePart( const std::string & to, const std::string & dir_path, - const DiskPtr & disk, - Poco::Logger *) const + const DiskPtr & dst_disk, + Poco::Logger * log) const { String path_to_clone = fs::path(to) / dir_path / ""; + auto src_disk = volume->getDisk(); - if (disk->exists(path_to_clone)) + if (dst_disk->exists(path_to_clone)) { throw Exception(ErrorCodes::DIRECTORY_ALREADY_EXISTS, "Cannot clone part {} from '{}' to '{}': path '{}' already exists", - dir_path, getRelativePath(), path_to_clone, fullPath(disk, path_to_clone)); + dir_path, getRelativePath(), path_to_clone, fullPath(dst_disk, path_to_clone)); } - disk->createDirectories(to); - volume->getDisk()->copyDirectoryContent(getRelativePath(), disk, path_to_clone); - volume->getDisk()->removeFileIfExists(fs::path(path_to_clone) / "delete-on-destroy.txt"); + try + { + dst_disk->createDirectories(to); + src_disk->copyDirectoryContent(getRelativePath(), dst_disk, path_to_clone); + } + catch (...) + { + /// It's safe to remove it recursively (even with zero-copy-replication) + /// because we've just did full copy through copyDirectoryContent + LOG_WARNING(log, "Removing directory {} after failed attempt to move a data part", path_to_clone); + dst_disk->removeRecursive(path_to_clone); + throw; + } - auto single_disk_volume = std::make_shared(disk->getName(), disk, 0); + auto single_disk_volume = std::make_shared(dst_disk->getName(), dst_disk, 0); return create(single_disk_volume, to, dir_path, /*initialize=*/ true); } diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.h b/src/Storages/MergeTree/DataPartStorageOnDiskBase.h index 043953eb20c..59f29b76b75 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.h +++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.h @@ -71,7 +71,7 @@ public: MutableDataPartStoragePtr clonePart( const std::string & to, const std::string & dir_path, - const DiskPtr & disk, + const DiskPtr & dst_disk, Poco::Logger * log) const override; void rename( diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index e1e64b82ea3..c5754f70265 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -502,8 +502,10 @@ void IMergeTreeDataPart::removeIfNeeded() throw Exception(ErrorCodes::LOGICAL_ERROR, "relative_path {} of part {} is invalid or not set", getDataPartStorage().getPartDirectory(), name); - const auto part_parent_directory = directoryPath(part_directory); - bool is_moving_part = part_parent_directory.ends_with("moving/"); + fs::path part_directory_path = getDataPartStorage().getRelativePath(); + if (part_directory_path.filename().empty()) + part_directory_path = part_directory_path.parent_path(); + bool is_moving_part = part_directory_path.parent_path().filename() == "moving"; if (!startsWith(file_name, "tmp") && !endsWith(file_name, ".tmp_proj") && !is_moving_part) { LOG_ERROR( diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index f57ebf40e54..2b14b2eeb25 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -36,6 +36,7 @@ try: from confluent_kafka.avro.cached_schema_registry_client import ( CachedSchemaRegistryClient, ) + from .hdfs_api import HDFSApi # imports requests_kerberos except Exception as e: logging.warning(f"Cannot import some modules, some tests may not work: {e}") @@ -51,7 +52,6 @@ from helpers.client import QueryRuntimeException import docker from .client import Client -from .hdfs_api import HDFSApi from .config_cluster import * diff --git a/tests/integration/test_merge_tree_s3_failover/configs/config.d/storage_conf.xml b/tests/integration/test_merge_tree_s3_failover/configs/config.d/storage_conf.xml index 4480327c4b5..235b9a7b7a1 100644 --- a/tests/integration/test_merge_tree_s3_failover/configs/config.d/storage_conf.xml +++ b/tests/integration/test_merge_tree_s3_failover/configs/config.d/storage_conf.xml @@ -72,4 +72,6 @@ + + true diff --git a/tests/integration/test_merge_tree_s3_failover/test.py b/tests/integration/test_merge_tree_s3_failover/test.py index 05aeeff2ec1..90dda631924 100644 --- a/tests/integration/test_merge_tree_s3_failover/test.py +++ b/tests/integration/test_merge_tree_s3_failover/test.py @@ -183,7 +183,8 @@ def test_move_failover(cluster): ) ENGINE=MergeTree() ORDER BY id TTL dt + INTERVAL 4 SECOND TO VOLUME 'external' - SETTINGS storage_policy='s3_cold' + SETTINGS storage_policy='s3_cold', temporary_directories_lifetime=1, + merge_tree_clear_old_temporary_directories_interval_seconds=1 """ ) From fc5ed7ffd7b6594beed5b3ed172ea79055358862 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 21 Jun 2023 18:45:28 +0000 Subject: [PATCH 091/522] Automatic style fix --- tests/integration/helpers/cluster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 2b14b2eeb25..4c356219537 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -36,7 +36,7 @@ try: from confluent_kafka.avro.cached_schema_registry_client import ( CachedSchemaRegistryClient, ) - from .hdfs_api import HDFSApi # imports requests_kerberos + from .hdfs_api import HDFSApi # imports requests_kerberos except Exception as e: logging.warning(f"Cannot import some modules, some tests may not work: {e}") From 1419bb7adbac4603439c02d8e8b68d1338437c48 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Tue, 20 Jun 2023 20:31:23 +0200 Subject: [PATCH 092/522] rollback changes in test --- tests/integration/test_merge_tree_s3/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_merge_tree_s3/test.py b/tests/integration/test_merge_tree_s3/test.py index 22805eb6e94..2ccd517923a 100644 --- a/tests/integration/test_merge_tree_s3/test.py +++ b/tests/integration/test_merge_tree_s3/test.py @@ -923,7 +923,7 @@ def test_merge_canceled_by_s3_errors_when_move(cluster, broken_s3, node_name): @pytest.mark.parametrize("node_name", ["node"]) @pytest.mark.parametrize( - "in_flight_memory", [(10, 288044299), (5, 193557290), (1, 128348733)] + "in_flight_memory", [(10, 245918115), (5, 156786752), (1, 106426187)] ) def test_s3_engine_heavy_write_check_mem( cluster, broken_s3, node_name, in_flight_memory From 2c3a4cb90de34569277edb3e4cf9f50fa9e5d5a2 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Thu, 22 Jun 2023 10:47:07 +0000 Subject: [PATCH 093/522] Style fix --- src/Processors/Formats/Impl/CSVRowInputFormat.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp index a727a5bc490..59b0f25f0bf 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp @@ -156,8 +156,7 @@ void CSVFormatReader::skipFieldDelimiter() { skipWhitespacesAndTabs(*buf, format_settings.csv.allow_whitespace_or_tab_as_delimiter); - bool res = checkChar(format_settings.csv.delimiter, *buf); - if (!res) + if (!checkChar(format_settings.csv.delimiter, *buf)) { if (!format_settings.csv.missing_as_default) { @@ -165,9 +164,7 @@ void CSVFormatReader::skipFieldDelimiter() throwAtAssertionFailed(err, *buf); } else - { current_row_has_missing_fields = true; - } } } From a0fde6a55b3ddb9cac0b3914fc18af58f6419eac Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Thu, 22 Jun 2023 10:50:14 +0000 Subject: [PATCH 094/522] Style fix --- .../Formats/Impl/CSVRowInputFormat.cpp | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp index 59b0f25f0bf..edbc33fb3c3 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp @@ -156,16 +156,17 @@ void CSVFormatReader::skipFieldDelimiter() { skipWhitespacesAndTabs(*buf, format_settings.csv.allow_whitespace_or_tab_as_delimiter); - if (!checkChar(format_settings.csv.delimiter, *buf)) + bool res = checkChar(format_settings.csv.delimiter, *buf); + if (res) + return; + + if (!format_settings.csv.missing_as_default) { - if (!format_settings.csv.missing_as_default) - { - char err[2] = {format_settings.csv.delimiter, '\0'}; - throwAtAssertionFailed(err, *buf); - } - else - current_row_has_missing_fields = true; + char err[2] = {format_settings.csv.delimiter, '\0'}; + throwAtAssertionFailed(err, *buf); } + else + current_row_has_missing_fields = true; } template From 65e5d40cae52b6cf9a2f0408f6ccb373f23a0e42 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Thu, 22 Jun 2023 19:38:41 +0000 Subject: [PATCH 095/522] Fix formatDateTime() with fractional negative datetime64 --- src/Functions/formatDateTime.cpp | 10 +++++++++- .../0_stateless/00718_format_datetime.reference | 5 +++++ tests/queries/0_stateless/00718_format_datetime.sql | 5 +++++ 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/src/Functions/formatDateTime.cpp b/src/Functions/formatDateTime.cpp index aac7ed1ad4d..c849b0dd933 100644 --- a/src/Functions/formatDateTime.cpp +++ b/src/Functions/formatDateTime.cpp @@ -943,7 +943,15 @@ public: { if constexpr (std::is_same_v) { - const auto c = DecimalUtils::split(vec[i], scale); + auto c = DecimalUtils::split(vec[i], scale); + + if (vec[i].value < 0 && c.fractional) + { + c.fractional = DecimalUtils::scaleMultiplier(scale) + + (c.whole ? DataType::FieldType(-1) : DataType::FieldType(1)) * c.fractional; + --c.whole; + } + for (auto & instruction : instructions) instruction.perform(pos, static_cast(c.whole), c.fractional, scale, time_zone); } diff --git a/tests/queries/0_stateless/00718_format_datetime.reference b/tests/queries/0_stateless/00718_format_datetime.reference index 50874ac9b2e..a51134348cc 100644 --- a/tests/queries/0_stateless/00718_format_datetime.reference +++ b/tests/queries/0_stateless/00718_format_datetime.reference @@ -61,6 +61,11 @@ no formatting pattern no formatting pattern 2022-12-08 18:11:29.123400000 2022-12-08 18:11:29.1 2022-12-08 18:11:29.000000 +1900-01-01 00:00:00.000 +1962-12-08 18:11:29.123 +1969-12-31 23:59:59.999 +1970-01-01 00:00:00.000 +1970-01-01 00:00:00.001 2022-12-08 18:11:29.000000 2022-12-08 00:00:00.000000 2022-12-08 00:00:00.000000 diff --git a/tests/queries/0_stateless/00718_format_datetime.sql b/tests/queries/0_stateless/00718_format_datetime.sql index c0db6a4f64e..14e43c31d9c 100644 --- a/tests/queries/0_stateless/00718_format_datetime.sql +++ b/tests/queries/0_stateless/00718_format_datetime.sql @@ -87,6 +87,11 @@ select formatDateTime(toDateTime64('2010-01-04 12:34:56.123456789', 9), '%f') SE select formatDateTime(toDateTime64('2022-12-08 18:11:29.1234', 9, 'UTC'), '%F %T.%f'); select formatDateTime(toDateTime64('2022-12-08 18:11:29.1234', 1, 'UTC'), '%F %T.%f'); select formatDateTime(toDateTime64('2022-12-08 18:11:29.1234', 0, 'UTC'), '%F %T.%f'); +select formatDateTime(toDateTime64('1900-01-01 00:00:00.000', 3, 'UTC'), '%F %T.%f'); +select formatDateTime(toDateTime64('1962-12-08 18:11:29.123', 3, 'UTC'), '%F %T.%f'); +select formatDateTime(toDateTime64('1969-12-31 23:59:59.999', 3, 'UTC'), '%F %T.%f'); +select formatDateTime(toDateTime64('1970-01-01 00:00:00.000', 3, 'UTC'), '%F %T.%f'); +select formatDateTime(toDateTime64('1970-01-01 00:00:00.001', 3, 'UTC'), '%F %T.%f'); select formatDateTime(toDateTime('2022-12-08 18:11:29', 'UTC'), '%F %T.%f'); select formatDateTime(toDate32('2022-12-08 18:11:29', 'UTC'), '%F %T.%f'); select formatDateTime(toDate('2022-12-08 18:11:29', 'UTC'), '%F %T.%f'); From 220520c516bea15399396b5f82aa3ab2d6cd9ca3 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 22 Jun 2023 21:45:10 +0200 Subject: [PATCH 096/522] fix --- src/Common/Exception.cpp | 22 +++++++++---------- src/Storages/MergeTree/MergeTreeData.h | 2 +- .../MergeTree/MergeTreePartsMover.cpp | 10 +++++++-- src/Storages/StorageReplicatedMergeTree.cpp | 8 +++---- src/Storages/StorageReplicatedMergeTree.h | 6 ++--- .../test_s3_zero_copy_ttl/configs/s3.xml | 2 ++ .../integration/test_s3_zero_copy_ttl/test.py | 2 +- 7 files changed, 30 insertions(+), 22 deletions(-) diff --git a/src/Common/Exception.cpp b/src/Common/Exception.cpp index 3fd0a929d6f..9757c24a8ec 100644 --- a/src/Common/Exception.cpp +++ b/src/Common/Exception.cpp @@ -401,17 +401,17 @@ PreformattedMessage getCurrentExceptionMessageAndPattern(bool with_stacktrace, b } catch (...) {} -#ifdef ABORT_ON_LOGICAL_ERROR - try - { - throw; - } - catch (const std::logic_error &) - { - abortOnFailedAssertion(stream.str()); - } - catch (...) {} -#endif +// #ifdef ABORT_ON_LOGICAL_ERROR +// try +// { +// throw; +// } +// catch (const std::logic_error &) +// { +// abortOnFailedAssertion(stream.str()); +// } +// catch (...) {} +// #endif } catch (...) { diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index b27392b355b..f6f241c1e89 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -1040,7 +1040,7 @@ public: /// Fetch part only if some replica has it on shared storage like S3 /// Overridden in StorageReplicatedMergeTree - virtual MutableDataPartStoragePtr tryToFetchIfShared(const IMergeTreeDataPart &, const DiskPtr &, const String &) { return nullptr; } + virtual MutableDataPartPtr tryToFetchIfShared(const IMergeTreeDataPart &, const DiskPtr &, const String &) { return nullptr; } /// Check shared data usage on other replicas for detached/freezed part /// Remove local files and remote files if needed diff --git a/src/Storages/MergeTree/MergeTreePartsMover.cpp b/src/Storages/MergeTree/MergeTreePartsMover.cpp index 8fa4ac6c78a..59784935c7b 100644 --- a/src/Storages/MergeTree/MergeTreePartsMover.cpp +++ b/src/Storages/MergeTree/MergeTreePartsMover.cpp @@ -233,9 +233,15 @@ MergeTreePartsMover::TemporaryClonedPart MergeTreePartsMover::clonePart(const Me disk->createDirectories(path_to_clone); - cloned_part_storage = data->tryToFetchIfShared(*part, disk, fs::path(path_to_clone) / part->name); + auto zero_copy_part = data->tryToFetchIfShared(*part, disk, fs::path(path_to_clone) / part->name); - if (!cloned_part_storage) + if (zero_copy_part) + { + /// FIXME for some reason we cannot just use this part, we have to re-create it through MergeTreeDataPartBuilder + zero_copy_part->is_temp = false; /// Do not remove it in dtor + cloned_part_storage = zero_copy_part->getDataPartStoragePtr(); + } + else { LOG_INFO(log, "Part {} was not fetched, we are the first who move it to another disk, so we will copy it", part->name); cloned_part_storage = part->getDataPartStorage().clonePart(path_to_clone, part->getDataPartStorage().getPartDirectory(), disk, log); diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index bb99e21e4ab..e96049a456a 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -1972,7 +1972,7 @@ bool StorageReplicatedMergeTree::executeFetch(LogEntry & entry, bool need_to_che } -MutableDataPartStoragePtr StorageReplicatedMergeTree::executeFetchShared( +MergeTreeData::MutableDataPartPtr StorageReplicatedMergeTree::executeFetchShared( const String & source_replica, const String & new_part_name, const DiskPtr & disk, @@ -4444,7 +4444,7 @@ bool StorageReplicatedMergeTree::fetchPart( } -MutableDataPartStoragePtr StorageReplicatedMergeTree::fetchExistsPart( +MergeTreeData::MutableDataPartPtr StorageReplicatedMergeTree::fetchExistsPart( const String & part_name, const StorageMetadataPtr & metadata_snapshot, const String & source_replica_path, @@ -4550,7 +4550,7 @@ MutableDataPartStoragePtr StorageReplicatedMergeTree::fetchExistsPart( ProfileEvents::increment(ProfileEvents::ReplicatedPartFetches); LOG_DEBUG(log, "Fetched part {} from {}:{}", part_name, zookeeper_name, source_replica_path); - return part->getDataPartStoragePtr(); + return part; } void StorageReplicatedMergeTree::startup() @@ -8868,7 +8868,7 @@ std::pair StorageReplicatedMergeTree::unlockSharedDataByID( } -MutableDataPartStoragePtr StorageReplicatedMergeTree::tryToFetchIfShared( +MergeTreeData::MutableDataPartPtr StorageReplicatedMergeTree::tryToFetchIfShared( const IMergeTreeDataPart & part, const DiskPtr & disk, const String & path) diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index c08e05090b1..3ba5c61d1b0 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -243,7 +243,7 @@ public: bool canExecuteFetch(const ReplicatedMergeTreeLogEntry & entry, String & disable_reason) const; /// Fetch part only when it stored on shared storage like S3 - MutableDataPartStoragePtr executeFetchShared(const String & source_replica, const String & new_part_name, const DiskPtr & disk, const String & path); + MutableDataPartPtr executeFetchShared(const String & source_replica, const String & new_part_name, const DiskPtr & disk, const String & path); /// Lock part in zookeeper for use shared data in several nodes void lockSharedData(const IMergeTreeDataPart & part, bool replace_existing_lock, std::optional hardlinked_files) const override; @@ -285,7 +285,7 @@ public: MergeTreeDataFormatVersion data_format_version); /// Fetch part only if some replica has it on shared storage like S3 - MutableDataPartStoragePtr tryToFetchIfShared(const IMergeTreeDataPart & part, const DiskPtr & disk, const String & path) override; + MutableDataPartPtr tryToFetchIfShared(const IMergeTreeDataPart & part, const DiskPtr & disk, const String & path) override; /// Get best replica having this partition on a same type remote disk String getSharedDataReplica(const IMergeTreeDataPart & part, DataSourceType data_source_type) const; @@ -716,7 +716,7 @@ private: * Used for replace local part on the same s3-shared part in hybrid storage. * Returns false if part is already fetching right now. */ - MutableDataPartStoragePtr fetchExistsPart( + MutableDataPartPtr fetchExistsPart( const String & part_name, const StorageMetadataPtr & metadata_snapshot, const String & replica_path, diff --git a/tests/integration/test_s3_zero_copy_ttl/configs/s3.xml b/tests/integration/test_s3_zero_copy_ttl/configs/s3.xml index 5ffeb0c0d01..e179c848be1 100644 --- a/tests/integration/test_s3_zero_copy_ttl/configs/s3.xml +++ b/tests/integration/test_s3_zero_copy_ttl/configs/s3.xml @@ -33,4 +33,6 @@ true + + true diff --git a/tests/integration/test_s3_zero_copy_ttl/test.py b/tests/integration/test_s3_zero_copy_ttl/test.py index 7dcf3734653..04bff4a44fb 100644 --- a/tests/integration/test_s3_zero_copy_ttl/test.py +++ b/tests/integration/test_s3_zero_copy_ttl/test.py @@ -35,7 +35,7 @@ def test_ttl_move_and_s3(started_cluster): ORDER BY id PARTITION BY id TTL date TO DISK 's3_disk' - SETTINGS storage_policy='s3_and_default' + SETTINGS storage_policy='s3_and_default', temporary_directories_lifetime=1 """.format( i ) From 9231bd9f9d544cb6b82e52b0327f25323aa644ab Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Thu, 22 Jun 2023 19:48:13 +0000 Subject: [PATCH 097/522] Process broken tests in report --- tests/analyzer_integration_broken_tests.txt | 138 ++++++++++++++++++++ tests/integration/ci-runner.py | 41 ++++-- 2 files changed, 169 insertions(+), 10 deletions(-) create mode 100644 tests/analyzer_integration_broken_tests.txt diff --git a/tests/analyzer_integration_broken_tests.txt b/tests/analyzer_integration_broken_tests.txt new file mode 100644 index 00000000000..3aa3b0dff2a --- /dev/null +++ b/tests/analyzer_integration_broken_tests.txt @@ -0,0 +1,138 @@ +test_access_for_functions/test.py::test_access_rights_for_function +test_backward_compatibility/test_normalized_count_comparison.py::test_select_aggregate_alias_column +test_concurrent_backups_s3/test.py::test_concurrent_backups +test_distributed_ddl/test.py::test_default_database[configs] +test_distributed_ddl/test.py::test_default_database[configs_secure] +test_distributed_ddl/test.py::test_on_server_fail[configs] +test_distributed_ddl/test.py::test_on_server_fail[configs_secure] +test_distributed_insert_backward_compatibility/test.py::test_distributed_in_tuple +test_distributed_inter_server_secret/test.py::test_per_user_inline_settings_secure_cluster[default-] +test_distributed_inter_server_secret/test.py::test_per_user_inline_settings_secure_cluster[nopass-] +test_distributed_inter_server_secret/test.py::test_per_user_inline_settings_secure_cluster[pass-foo] +test_distributed_inter_server_secret/test.py::test_per_user_protocol_settings_secure_cluster[default-] +test_distributed_inter_server_secret/test.py::test_per_user_protocol_settings_secure_cluster[nopass-] +test_distributed_inter_server_secret/test.py::test_per_user_protocol_settings_secure_cluster[pass-foo] +test_distributed_inter_server_secret/test.py::test_user_insecure_cluster[default-] +test_distributed_inter_server_secret/test.py::test_user_insecure_cluster[nopass-] +test_distributed_inter_server_secret/test.py::test_user_insecure_cluster[pass-foo] +test_distributed_inter_server_secret/test.py::test_user_secure_cluster[default-] +test_distributed_inter_server_secret/test.py::test_user_secure_cluster[nopass-] +test_distributed_inter_server_secret/test.py::test_user_secure_cluster[pass-foo] +test_distributed_inter_server_secret/test.py::test_user_secure_cluster_from_backward[default-] +test_distributed_inter_server_secret/test.py::test_user_secure_cluster_from_backward[nopass-] +test_distributed_inter_server_secret/test.py::test_user_secure_cluster_from_backward[pass-foo] +test_distributed_inter_server_secret/test.py::test_user_secure_cluster_with_backward[default-] +test_distributed_inter_server_secret/test.py::test_user_secure_cluster_with_backward[nopass-] +test_distributed_inter_server_secret/test.py::test_user_secure_cluster_with_backward[pass-foo] +test_distributed_load_balancing/test.py::test_distributed_replica_max_ignored_errors +test_distributed_load_balancing/test.py::test_load_balancing_default +test_distributed_load_balancing/test.py::test_load_balancing_priority_round_robin[dist_priority] +test_distributed_load_balancing/test.py::test_load_balancing_priority_round_robin[dist_priority_negative] +test_distributed_load_balancing/test.py::test_load_balancing_round_robin +test_backward_compatibility/test.py::test_backward_compatability1 +test_backward_compatibility/test_aggregate_fixed_key.py::test_two_level_merge +test_backward_compatibility/test_aggregate_function_state.py::test_backward_compatability_for_avg +test_backward_compatibility/test_aggregate_function_state.py::test_backward_compatability_for_uniq_exact[1000] +test_backward_compatibility/test_aggregate_function_state.py::test_backward_compatability_for_uniq_exact[500000] +test_backward_compatibility/test_aggregate_function_state.py::test_backward_compatability_for_uniq_exact_variadic[1000] +test_backward_compatibility/test_aggregate_function_state.py::test_backward_compatability_for_uniq_exact_variadic[500000] +test_backward_compatibility/test_ip_types_binary_compatibility.py::test_ip_types_binary_compatibility +test_backward_compatibility/test_select_aggregate_alias_column.py::test_select_aggregate_alias_column +test_backward_compatibility/test_short_strings_aggregation.py::test_backward_compatability +test_mask_sensitive_info/test.py::test_encryption_functions +test_merge_table_over_distributed/test.py::test_global_in +test_merge_table_over_distributed/test.py::test_select_table_name_from_merge_over_distributed +test_mutations_with_merge_tree/test.py::test_mutations_with_merge_background_task +test_passing_max_partitions_to_read_remotely/test.py::test_default_database_on_cluster +test_row_policy/test.py::test_change_of_users_xml_changes_row_policies +test_row_policy/test.py::test_change_of_users_xml_changes_row_policies +test_row_policy/test.py::test_dcl_introspection +test_row_policy/test.py::test_dcl_introspection +test_row_policy/test.py::test_dcl_management +test_row_policy/test.py::test_dcl_management +test_row_policy/test.py::test_dcl_users_with_policies_from_users_xml +test_row_policy/test.py::test_dcl_users_with_policies_from_users_xml +test_row_policy/test.py::test_grant_create_row_policy +test_row_policy/test.py::test_grant_create_row_policy +test_row_policy/test.py::test_introspection +test_row_policy/test.py::test_introspection +test_row_policy/test.py::test_join +test_row_policy/test.py::test_join +test_row_policy/test.py::test_miscellaneous_engines +test_row_policy/test.py::test_miscellaneous_engines +test_row_policy/test.py::test_policy_from_users_xml_affects_only_user_assigned +test_row_policy/test.py::test_policy_from_users_xml_affects_only_user_assigned +test_row_policy/test.py::test_policy_on_distributed_table_via_role +test_row_policy/test.py::test_policy_on_distributed_table_via_role +test_row_policy/test.py::test_reload_users_xml_by_timer +test_row_policy/test.py::test_reload_users_xml_by_timer +test_row_policy/test.py::test_row_policy_filter_with_subquery +test_row_policy/test.py::test_row_policy_filter_with_subquery +test_row_policy/test.py::test_smoke +test_row_policy/test.py::test_smoke +test_row_policy/test.py::test_some_users_without_policies +test_row_policy/test.py::test_some_users_without_policies +test_row_policy/test.py::test_tags_with_db_and_table_names +test_row_policy/test.py::test_tags_with_db_and_table_names +test_row_policy/test.py::test_throwif_error_in_prewhere_with_same_condition_as_filter +test_row_policy/test.py::test_throwif_error_in_prewhere_with_same_condition_as_filter +test_row_policy/test.py::test_throwif_error_in_where_with_same_condition_as_filter +test_row_policy/test.py::test_throwif_error_in_where_with_same_condition_as_filter +test_row_policy/test.py::test_throwif_in_prewhere_doesnt_expose_restricted_data +test_row_policy/test.py::test_throwif_in_prewhere_doesnt_expose_restricted_data +test_row_policy/test.py::test_throwif_in_where_doesnt_expose_restricted_data +test_row_policy/test.py::test_throwif_in_where_doesnt_expose_restricted_data +test_row_policy/test.py::test_users_xml_is_readonly +test_row_policy/test.py::test_users_xml_is_readonly +test_row_policy/test.py::test_with_prewhere +test_row_policy/test.py::test_with_prewhere +test_settings_constraints_distributed/test.py::test_select_clamps_settings +test_backward_compatibility/test_cte_distributed.py::test_cte_distributed +test_compression_codec_read/test.py::test_default_codec_read +test_dictionaries_update_and_reload/test.py::test_reload_after_fail_in_cache_dictionary +test_distributed_type_object/test.py::test_distributed_type_object +test_materialized_mysql_database/test.py::test_select_without_columns_5_7 +test_materialized_mysql_database/test.py::test_select_without_columns_8_0 +test_shard_level_const_function/test.py::test_remote +test_storage_postgresql/test.py::test_postgres_select_insert +test_storage_rabbitmq/test.py::test_rabbitmq_materialized_view +test_system_merges/test.py::test_mutation_simple[] +test_system_merges/test.py::test_mutation_simple[replicated] +test_backward_compatibility/test_insert_profile_events.py::test_new_client_compatible +test_backward_compatibility/test_insert_profile_events.py::test_old_client_compatible +test_backward_compatibility/test_vertical_merges_from_compact_parts.py::test_vertical_merges_from_compact_parts +test_disk_over_web_server/test.py::test_cache[node2] +test_disk_over_web_server/test.py::test_incorrect_usage +test_disk_over_web_server/test.py::test_replicated_database +test_disk_over_web_server/test.py::test_unavailable_server +test_disk_over_web_server/test.py::test_usage[node2] +test_distributed_backward_compatability/test.py::test_distributed_in_tuple +test_executable_table_function/test.py::test_executable_function_input_python +test_groupBitmapAnd_on_distributed/test_groupBitmapAndState_on_distributed_table.py::test_groupBitmapAndState_on_different_version_nodes +test_groupBitmapAnd_on_distributed/test_groupBitmapAndState_on_distributed_table.py::test_groupBitmapAndState_on_distributed_table +test_settings_profile/test.py::test_show_profiles +test_sql_user_defined_functions_on_cluster/test.py::test_sql_user_defined_functions_on_cluster +test_backward_compatibility/test_functions.py::test_aggregate_states +test_backward_compatibility/test_functions.py::test_string_functions +test_default_compression_codec/test.py::test_default_codec_for_compact_parts +test_default_compression_codec/test.py::test_default_codec_multiple +test_default_compression_codec/test.py::test_default_codec_single +test_default_compression_codec/test.py::test_default_codec_version_update +test_postgresql_protocol/test.py::test_python_client +test_quota/test.py::test_add_remove_interval +test_quota/test.py::test_add_remove_quota +test_quota/test.py::test_consumption_of_show_clusters +test_quota/test.py::test_consumption_of_show_databases +test_quota/test.py::test_consumption_of_show_privileges +test_quota/test.py::test_consumption_of_show_processlist +test_quota/test.py::test_consumption_of_show_tables +test_quota/test.py::test_dcl_introspection +test_quota/test.py::test_dcl_management +test_quota/test.py::test_exceed_quota +test_quota/test.py::test_query_inserts +test_quota/test.py::test_quota_from_users_xml +test_quota/test.py::test_reload_users_xml_by_timer +test_quota/test.py::test_simpliest_quota +test_quota/test.py::test_tracking_quota +test_quota/test.py::test_users_xml_is_readonly +test_replicated_merge_tree_compatibility/test.py::test_replicated_merge_tree_defaults_compatibili \ No newline at end of file diff --git a/tests/integration/ci-runner.py b/tests/integration/ci-runner.py index 59c3c82499c..5b986251c57 100755 --- a/tests/integration/ci-runner.py +++ b/tests/integration/ci-runner.py @@ -485,7 +485,7 @@ class ClickhouseIntegrationTestsRunner: result[test_file].append(test) return result - def _update_counters(self, main_counters, current_counters): + def _update_counters(self, main_counters, current_counters, broken_tests): for test in current_counters["PASSED"]: if ( test not in main_counters["PASSED"] @@ -498,10 +498,17 @@ class ClickhouseIntegrationTestsRunner: if test in main_counters["ERROR"]: main_counters["ERROR"].remove(test) is_flaky = True + if test in main_counters["BROKEN"]: + main_counters["BROKEN"].remove(test) + is_flaky = True + if is_flaky: main_counters["FLAKY"].append(test) else: - main_counters["PASSED"].append(test) + if test not in broken_tests: + main_counters["PASSED"].append(test) + else: + main_counters["NOT_FAILED"].append(test) for state in ("ERROR", "FAILED"): for test in current_counters[state]: @@ -511,8 +518,12 @@ class ClickhouseIntegrationTestsRunner: main_counters["PASSED"].remove(test) main_counters["FLAKY"].append(test) continue - if test not in main_counters[state]: - main_counters[state].append(test) + if test not in broken_tests: + if test not in main_counters[state]: + main_counters[state].append(test) + else: + if test not in main_counters["BROKEN"]: + main_counters["BROKEN"].append(test) for state in ("SKIPPED",): for test in current_counters[state]: @@ -570,11 +581,11 @@ class ClickhouseIntegrationTestsRunner: return res def try_run_test_group( - self, repo_path, test_group, tests_in_group, num_tries, num_workers + self, repo_path, test_group, tests_in_group, num_tries, num_workers, broken_tests ): try: return self.run_test_group( - repo_path, test_group, tests_in_group, num_tries, num_workers + repo_path, test_group, tests_in_group, num_tries, num_workers, broken_tests ) except Exception as e: logging.info("Failed to run {}:\n{}".format(str(test_group), str(e))) @@ -592,7 +603,7 @@ class ClickhouseIntegrationTestsRunner: return counters, tests_times, [] def run_test_group( - self, repo_path, test_group, tests_in_group, num_tries, num_workers + self, repo_path, test_group, tests_in_group, num_tries, num_workers, broken_tests ): counters = { "ERROR": [], @@ -600,6 +611,8 @@ class ClickhouseIntegrationTestsRunner: "FAILED": [], "SKIPPED": [], "FLAKY": [], + "BROKEN": [], + "NOT_FAILED": [], } tests_times = defaultdict(float) @@ -705,7 +718,7 @@ class ClickhouseIntegrationTestsRunner: ) times_lines = parse_test_times(info_path) new_tests_times = get_test_times(times_lines) - self._update_counters(counters, new_counters) + self._update_counters(counters, new_counters, broken_tests) for test_name, test_time in new_tests_times.items(): tests_times[test_name] = test_time @@ -778,7 +791,7 @@ class ClickhouseIntegrationTestsRunner: final_retry += 1 logging.info("Running tests for the %s time", i) counters, tests_times, log_paths = self.try_run_test_group( - repo_path, "bugfix" if should_fail else "flaky", tests_to_run, 1, 1 + repo_path, "bugfix" if should_fail else "flaky", tests_to_run, 1, 1, [] ) logs += log_paths if counters["FAILED"]: @@ -899,6 +912,8 @@ class ClickhouseIntegrationTestsRunner: "FAILED": [], "SKIPPED": [], "FLAKY": [], + "BROKEN": [], + "NOT_FAILED": [], } tests_times = defaultdict(float) tests_log_paths = defaultdict(list) @@ -910,10 +925,16 @@ class ClickhouseIntegrationTestsRunner: logging.info("Shuffling test groups") random.shuffle(items_to_run) + broken_tests = list() + if self.use_analyzer: + with open(f"{repo_path}/tests/analyzer_integration_broken_tests.txt") as f: + broken_tests = f.read().splitlines() + logging.info(f"Broken tests in the list: {len(broken_tests)}") + for group, tests in items_to_run: logging.info("Running test group %s containing %s tests", group, len(tests)) group_counters, group_test_times, log_paths = self.try_run_test_group( - repo_path, group, tests, MAX_RETRY, NUM_WORKERS + repo_path, group, tests, MAX_RETRY, NUM_WORKERS, broken_tests ) total_tests = 0 for counter, value in group_counters.items(): From 532eba18a0f2e3c4d15d3334405756dc3aae1637 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 22 Jun 2023 19:58:58 +0000 Subject: [PATCH 098/522] Automatic style fix --- tests/integration/ci-runner.py | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/tests/integration/ci-runner.py b/tests/integration/ci-runner.py index 5b986251c57..6a6134d7204 100755 --- a/tests/integration/ci-runner.py +++ b/tests/integration/ci-runner.py @@ -581,11 +581,22 @@ class ClickhouseIntegrationTestsRunner: return res def try_run_test_group( - self, repo_path, test_group, tests_in_group, num_tries, num_workers, broken_tests + self, + repo_path, + test_group, + tests_in_group, + num_tries, + num_workers, + broken_tests, ): try: return self.run_test_group( - repo_path, test_group, tests_in_group, num_tries, num_workers, broken_tests + repo_path, + test_group, + tests_in_group, + num_tries, + num_workers, + broken_tests, ) except Exception as e: logging.info("Failed to run {}:\n{}".format(str(test_group), str(e))) @@ -603,7 +614,13 @@ class ClickhouseIntegrationTestsRunner: return counters, tests_times, [] def run_test_group( - self, repo_path, test_group, tests_in_group, num_tries, num_workers, broken_tests + self, + repo_path, + test_group, + tests_in_group, + num_tries, + num_workers, + broken_tests, ): counters = { "ERROR": [], From ab903d395e6979f3885f2689bdb216986a3a4ffd Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Thu, 22 Jun 2023 20:43:26 +0000 Subject: [PATCH 099/522] Place new test into separate sql --- tests/queries/0_stateless/00718_format_datetime.reference | 5 ----- tests/queries/0_stateless/00718_format_datetime.sql | 5 ----- tests/queries/0_stateless/00718_format_datetime_1.reference | 5 +++++ tests/queries/0_stateless/00718_format_datetime_1.sql | 5 +++++ 4 files changed, 10 insertions(+), 10 deletions(-) create mode 100644 tests/queries/0_stateless/00718_format_datetime_1.reference create mode 100644 tests/queries/0_stateless/00718_format_datetime_1.sql diff --git a/tests/queries/0_stateless/00718_format_datetime.reference b/tests/queries/0_stateless/00718_format_datetime.reference index a51134348cc..50874ac9b2e 100644 --- a/tests/queries/0_stateless/00718_format_datetime.reference +++ b/tests/queries/0_stateless/00718_format_datetime.reference @@ -61,11 +61,6 @@ no formatting pattern no formatting pattern 2022-12-08 18:11:29.123400000 2022-12-08 18:11:29.1 2022-12-08 18:11:29.000000 -1900-01-01 00:00:00.000 -1962-12-08 18:11:29.123 -1969-12-31 23:59:59.999 -1970-01-01 00:00:00.000 -1970-01-01 00:00:00.001 2022-12-08 18:11:29.000000 2022-12-08 00:00:00.000000 2022-12-08 00:00:00.000000 diff --git a/tests/queries/0_stateless/00718_format_datetime.sql b/tests/queries/0_stateless/00718_format_datetime.sql index 14e43c31d9c..c0db6a4f64e 100644 --- a/tests/queries/0_stateless/00718_format_datetime.sql +++ b/tests/queries/0_stateless/00718_format_datetime.sql @@ -87,11 +87,6 @@ select formatDateTime(toDateTime64('2010-01-04 12:34:56.123456789', 9), '%f') SE select formatDateTime(toDateTime64('2022-12-08 18:11:29.1234', 9, 'UTC'), '%F %T.%f'); select formatDateTime(toDateTime64('2022-12-08 18:11:29.1234', 1, 'UTC'), '%F %T.%f'); select formatDateTime(toDateTime64('2022-12-08 18:11:29.1234', 0, 'UTC'), '%F %T.%f'); -select formatDateTime(toDateTime64('1900-01-01 00:00:00.000', 3, 'UTC'), '%F %T.%f'); -select formatDateTime(toDateTime64('1962-12-08 18:11:29.123', 3, 'UTC'), '%F %T.%f'); -select formatDateTime(toDateTime64('1969-12-31 23:59:59.999', 3, 'UTC'), '%F %T.%f'); -select formatDateTime(toDateTime64('1970-01-01 00:00:00.000', 3, 'UTC'), '%F %T.%f'); -select formatDateTime(toDateTime64('1970-01-01 00:00:00.001', 3, 'UTC'), '%F %T.%f'); select formatDateTime(toDateTime('2022-12-08 18:11:29', 'UTC'), '%F %T.%f'); select formatDateTime(toDate32('2022-12-08 18:11:29', 'UTC'), '%F %T.%f'); select formatDateTime(toDate('2022-12-08 18:11:29', 'UTC'), '%F %T.%f'); diff --git a/tests/queries/0_stateless/00718_format_datetime_1.reference b/tests/queries/0_stateless/00718_format_datetime_1.reference new file mode 100644 index 00000000000..e495b69ddfc --- /dev/null +++ b/tests/queries/0_stateless/00718_format_datetime_1.reference @@ -0,0 +1,5 @@ +1900-01-01 00:00:00.000 +1962-12-08 18:11:29.123 +1969-12-31 23:59:59.999 +1970-01-01 00:00:00.000 +1970-01-01 00:00:00.001 diff --git a/tests/queries/0_stateless/00718_format_datetime_1.sql b/tests/queries/0_stateless/00718_format_datetime_1.sql new file mode 100644 index 00000000000..855b0506f44 --- /dev/null +++ b/tests/queries/0_stateless/00718_format_datetime_1.sql @@ -0,0 +1,5 @@ +select formatDateTime(toDateTime64('1900-01-01 00:00:00.000', 3, 'UTC'), '%F %T.%f'); +select formatDateTime(toDateTime64('1962-12-08 18:11:29.123', 3, 'UTC'), '%F %T.%f'); +select formatDateTime(toDateTime64('1969-12-31 23:59:59.999', 3, 'UTC'), '%F %T.%f'); +select formatDateTime(toDateTime64('1970-01-01 00:00:00.000', 3, 'UTC'), '%F %T.%f'); +select formatDateTime(toDateTime64('1970-01-01 00:00:00.001', 3, 'UTC'), '%F %T.%f'); From 415749f64c6eb6e49fa95ac5038fc689766902ca Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 23 Jun 2023 05:09:20 +0200 Subject: [PATCH 100/522] Add a test for #44816 --- ...793_implicit_pretty_format_settings.expect | 27 +++++++++++++++++++ ..._implicit_pretty_format_settings.reference | 0 2 files changed, 27 insertions(+) create mode 100755 tests/queries/0_stateless/02793_implicit_pretty_format_settings.expect create mode 100644 tests/queries/0_stateless/02793_implicit_pretty_format_settings.reference diff --git a/tests/queries/0_stateless/02793_implicit_pretty_format_settings.expect b/tests/queries/0_stateless/02793_implicit_pretty_format_settings.expect new file mode 100755 index 00000000000..569cbc7330e --- /dev/null +++ b/tests/queries/0_stateless/02793_implicit_pretty_format_settings.expect @@ -0,0 +1,27 @@ +#!/usr/bin/expect -f + +set basedir [file dirname $argv0] +set basename [file tail $argv0] +exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0 +set history_file $env(CLICKHOUSE_TMP)/$basename.history + +log_user 0 +set timeout 60 +match_max 100000 +expect_after { + # Do not ignore eof from expect + -i $any_spawn_id eof { exp_continue } + # A default timeout action is to do nothing, change it to fail + -i $any_spawn_id timeout { exit 1 } +} + +spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --history_file=$history_file" +expect ":) " + +# Send a command +send -- "SELECT 1 SETTINGS output_format_pretty_row_numbers = 1\r" +expect "1. │ 1 │" +expect ":) " + +send -- "\4" +expect eof diff --git a/tests/queries/0_stateless/02793_implicit_pretty_format_settings.reference b/tests/queries/0_stateless/02793_implicit_pretty_format_settings.reference new file mode 100644 index 00000000000..e69de29bb2d From 2d11ce43f1d0ddd927c9ea5fa8a2bc2c39414f83 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Fri, 23 Jun 2023 05:17:01 +0000 Subject: [PATCH 101/522] Build fix --- src/Functions/formatDateTime.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Functions/formatDateTime.cpp b/src/Functions/formatDateTime.cpp index c849b0dd933..bdd694c7b94 100644 --- a/src/Functions/formatDateTime.cpp +++ b/src/Functions/formatDateTime.cpp @@ -947,8 +947,8 @@ public: if (vec[i].value < 0 && c.fractional) { - c.fractional = DecimalUtils::scaleMultiplier(scale) - + (c.whole ? DataType::FieldType(-1) : DataType::FieldType(1)) * c.fractional; + using F = typename DataType::FieldType; + c.fractional = DecimalUtils::scaleMultiplier(scale) + (c.whole ? F(-1) : F(1)) * c.fractional; --c.whole; } From 118f02b522420a786b093b3e55fcd404045df8a0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 23 Jun 2023 07:42:01 +0200 Subject: [PATCH 102/522] Add a test for calculate_text_stack_trace setting --- .../02796_calculate_text_stack_trace.reference | 2 ++ .../0_stateless/02796_calculate_text_stack_trace.sql | 8 ++++++++ 2 files changed, 10 insertions(+) create mode 100644 tests/queries/0_stateless/02796_calculate_text_stack_trace.reference create mode 100644 tests/queries/0_stateless/02796_calculate_text_stack_trace.sql diff --git a/tests/queries/0_stateless/02796_calculate_text_stack_trace.reference b/tests/queries/0_stateless/02796_calculate_text_stack_trace.reference new file mode 100644 index 00000000000..b261da18d51 --- /dev/null +++ b/tests/queries/0_stateless/02796_calculate_text_stack_trace.reference @@ -0,0 +1,2 @@ +1 +0 diff --git a/tests/queries/0_stateless/02796_calculate_text_stack_trace.sql b/tests/queries/0_stateless/02796_calculate_text_stack_trace.sql new file mode 100644 index 00000000000..3c2806ac010 --- /dev/null +++ b/tests/queries/0_stateless/02796_calculate_text_stack_trace.sql @@ -0,0 +1,8 @@ +SELECT throwIf(1); -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } +SYSTEM FLUSH LOGS; +SELECT length(stack_trace) > 1000 FROM system.query_log WHERE current_database = currentDatabase() AND query LIKE '%SELECT throwIf(1)%' AND query NOT LIKE '%system%' ORDER BY event_time_microseconds DESC LIMIT 1; + +SET calculate_text_stack_trace = 0; +SELECT throwIf(1); -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } +SYSTEM FLUSH LOGS; +SELECT length(stack_trace) FROM system.query_log WHERE current_database = currentDatabase() AND query LIKE '%SELECT throwIf(1)%' AND query NOT LIKE '%system%' ORDER BY event_time_microseconds DESC LIMIT 1; From 7fc8942ea9d7b15800d9d6ec8355b162013bf32e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 23 Jun 2023 07:50:44 +0200 Subject: [PATCH 103/522] Update test --- .../02796_calculate_text_stack_trace.reference | 4 ++++ .../02796_calculate_text_stack_trace.sql | 16 ++++++++++++---- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/02796_calculate_text_stack_trace.reference b/tests/queries/0_stateless/02796_calculate_text_stack_trace.reference index b261da18d51..c800bbce32b 100644 --- a/tests/queries/0_stateless/02796_calculate_text_stack_trace.reference +++ b/tests/queries/0_stateless/02796_calculate_text_stack_trace.reference @@ -1,2 +1,6 @@ 1 +1 +1 +0 +0 0 diff --git a/tests/queries/0_stateless/02796_calculate_text_stack_trace.sql b/tests/queries/0_stateless/02796_calculate_text_stack_trace.sql index 3c2806ac010..601bd16fb39 100644 --- a/tests/queries/0_stateless/02796_calculate_text_stack_trace.sql +++ b/tests/queries/0_stateless/02796_calculate_text_stack_trace.sql @@ -1,8 +1,16 @@ -SELECT throwIf(1); -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } +-- Tags: no-parallel + +TRUNCATE TABLE system.text_log; + +SELECT 'Hello', throwIf(1); -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } SYSTEM FLUSH LOGS; -SELECT length(stack_trace) > 1000 FROM system.query_log WHERE current_database = currentDatabase() AND query LIKE '%SELECT throwIf(1)%' AND query NOT LIKE '%system%' ORDER BY event_time_microseconds DESC LIMIT 1; +SELECT length(stack_trace) > 1000 FROM system.query_log WHERE current_database = currentDatabase() AND query LIKE '%SELECT \'Hello\', throwIf(1)%' AND query NOT LIKE '%system%' ORDER BY event_time_microseconds DESC LIMIT 1; +SELECT message LIKE '%Stack trace%' FROM system.text_log WHERE level = 'Error' AND message LIKE '%Exception%throwIf%' ORDER BY event_time_microseconds DESC LIMIT 10; + +TRUNCATE TABLE system.text_log; SET calculate_text_stack_trace = 0; -SELECT throwIf(1); -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } +SELECT 'World', throwIf(1); -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } SYSTEM FLUSH LOGS; -SELECT length(stack_trace) FROM system.query_log WHERE current_database = currentDatabase() AND query LIKE '%SELECT throwIf(1)%' AND query NOT LIKE '%system%' ORDER BY event_time_microseconds DESC LIMIT 1; +SELECT length(stack_trace) FROM system.query_log WHERE current_database = currentDatabase() AND query LIKE '%SELECT \'World\', throwIf(1)%' AND query NOT LIKE '%system%' ORDER BY event_time_microseconds DESC LIMIT 1; +SELECT message LIKE '%Stack trace%' FROM system.text_log WHERE level = 'Error' AND message LIKE '%Exception%throwIf%' ORDER BY event_time_microseconds DESC LIMIT 10; From 9680596d36c3aa6a98a3ad899cdb709d87a75b6b Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Fri, 23 Jun 2023 13:03:41 +0200 Subject: [PATCH 104/522] decrease log level, make logs shorter --- src/IO/WriteBufferFromS3.cpp | 76 ++++++++++++++----------- src/IO/WriteBufferFromS3.h | 3 +- src/IO/WriteBufferFromS3TaskTracker.cpp | 5 +- 3 files changed, 48 insertions(+), 36 deletions(-) diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp index ebab9b323b8..ff6da9bf444 100644 --- a/src/IO/WriteBufferFromS3.cpp +++ b/src/IO/WriteBufferFromS3.cpp @@ -98,15 +98,13 @@ WriteBufferFromS3::WriteBufferFromS3( std::move(schedule_), upload_settings.max_inflight_parts_for_one_file)) { - LOG_TRACE(log, "Create WriteBufferFromS3, {}", getLogDetails()); + LOG_TRACE(log, "Create WriteBufferFromS3, {}", getShortLogDetails()); allocateBuffer(); } void WriteBufferFromS3::nextImpl() { - LOG_TRACE(log, "nextImpl with incoming data size {}, memory buffer size {}. {}", offset(), memory.size(), getLogDetails()); - if (is_prefinalized) throw Exception( ErrorCodes::LOGICAL_ERROR, @@ -138,7 +136,7 @@ void WriteBufferFromS3::preFinalize() if (is_prefinalized) return; - LOG_TRACE(log, "preFinalize WriteBufferFromS3. {}", getLogDetails()); + LOG_TEST(log, "preFinalize WriteBufferFromS3. {}", getShortLogDetails()); /// This function should not be run again if an exception has occurred is_prefinalized = true; @@ -177,7 +175,7 @@ void WriteBufferFromS3::preFinalize() void WriteBufferFromS3::finalizeImpl() { - LOG_TRACE(log, "finalizeImpl WriteBufferFromS3. {}.", getLogDetails()); + LOG_TRACE(log, "finalizeImpl WriteBufferFromS3. {}.", getShortLogDetails()); if (!is_prefinalized) preFinalize(); @@ -206,7 +204,7 @@ void WriteBufferFromS3::finalizeImpl() } } -String WriteBufferFromS3::getLogDetails() const +String WriteBufferFromS3::getVerboseLogDetails() const { String multipart_upload_details; if (!multipart_upload_id.empty()) @@ -217,6 +215,17 @@ String WriteBufferFromS3::getLogDetails() const bucket, key, total_size, count(), hidden_size, offset(), task_tracker->isAsync(), is_prefinalized, finalized, multipart_upload_details); } +String WriteBufferFromS3::getShortLogDetails() const +{ + String multipart_upload_details; + if (!multipart_upload_id.empty()) + multipart_upload_details = fmt::format(", upload id {}" + , multipart_upload_id); + + return fmt::format("Details: bucket {}, key {}, total size {}{}", + bucket, key, total_size, multipart_upload_details); +} + void WriteBufferFromS3::tryToAbortMultipartUpload() { try @@ -226,14 +235,14 @@ void WriteBufferFromS3::tryToAbortMultipartUpload() } catch (...) { - LOG_ERROR(log, "Multipart upload hasn't aborted. {}", getLogDetails()); + LOG_ERROR(log, "Multipart upload hasn't aborted. {}", getVerboseLogDetails()); tryLogCurrentException(__PRETTY_FUNCTION__); } } WriteBufferFromS3::~WriteBufferFromS3() { - LOG_TRACE(log, "Close WriteBufferFromS3. {}.", getLogDetails()); + LOG_TRACE(log, "Close WriteBufferFromS3. {}.", getShortLogDetails()); /// That destructor could be call with finalized=false in case of exceptions if (!finalized) @@ -243,14 +252,14 @@ WriteBufferFromS3::~WriteBufferFromS3() "WriteBufferFromS3 is not finalized in destructor. " "The file might not be written to S3. " "{}.", - getLogDetails()); + getVerboseLogDetails()); } task_tracker->safeWaitAll(); if (!multipart_upload_id.empty() && !multipart_upload_finished) { - LOG_WARNING(log, "WriteBufferFromS3 was neither finished nor aborted, try to abort upload in destructor. {}.", getLogDetails()); + LOG_WARNING(log, "WriteBufferFromS3 was neither finished nor aborted, try to abort upload in destructor. {}.", getVerboseLogDetails()); tryToAbortMultipartUpload(); } } @@ -321,8 +330,6 @@ void WriteBufferFromS3::allocateBuffer() memory = Memory(buffer_allocation_policy->getBufferSize()); WriteBuffer::set(memory.data(), memory.size()); - - LOG_TRACE(log, "Allocated buffer with size {}. {}", buffer_allocation_policy->getBufferSize(), getLogDetails()); } void WriteBufferFromS3::setFakeBufferWhenPreFinalized() @@ -346,7 +353,7 @@ void WriteBufferFromS3::writeMultipartUpload() void WriteBufferFromS3::createMultipartUpload() { - LOG_TRACE(log, "Create multipart upload. Bucket: {}, Key: {}, Upload id: {}", bucket, key, multipart_upload_id); + LOG_TEST(log, "Create multipart upload. {}", getShortLogDetails()); S3::CreateMultipartUploadRequest req; @@ -378,18 +385,18 @@ void WriteBufferFromS3::createMultipartUpload() } multipart_upload_id = outcome.GetResult().GetUploadId(); - LOG_TRACE(log, "Multipart upload has created. {}", getLogDetails()); + LOG_TRACE(log, "Multipart upload has created. {}", getShortLogDetails()); } void WriteBufferFromS3::abortMultipartUpload() { if (multipart_upload_id.empty()) { - LOG_WARNING(log, "Nothing to abort. {}", getLogDetails()); + LOG_WARNING(log, "Nothing to abort. {}", getVerboseLogDetails()); return; } - LOG_WARNING(log, "Abort multipart upload. {}", getLogDetails()); + LOG_WARNING(log, "Abort multipart upload. {}", getVerboseLogDetails()); S3::AbortMultipartUploadRequest req; req.SetBucket(bucket); @@ -412,13 +419,12 @@ void WriteBufferFromS3::abortMultipartUpload() throw S3Exception(outcome.GetError().GetMessage(), outcome.GetError().GetErrorType()); } - LOG_WARNING(log, "Multipart upload has aborted successfully. {}", getLogDetails()); + LOG_WARNING(log, "Multipart upload has aborted successfully. {}", getVerboseLogDetails()); } S3::UploadPartRequest WriteBufferFromS3::getUploadRequest(size_t part_number, PartData & data) { ProfileEvents::increment(ProfileEvents::WriteBufferFromS3Bytes, data.data_size); - LOG_TRACE(log, "getUploadRequest, size {}, key: {}", data.data_size, key); S3::UploadPartRequest req; @@ -439,13 +445,13 @@ void WriteBufferFromS3::writePart(WriteBufferFromS3::PartData && data) { if (data.data_size == 0) { - LOG_TRACE(log, "Skipping writing part as empty."); + LOG_TEST(log, "Skipping writing part as empty {}", getShortLogDetails()); return; } multipart_tags.push_back({}); size_t part_number = multipart_tags.size(); - LOG_TRACE(log, "writePart {}, part size: {}, part number: {}", getLogDetails(), data.data_size, part_number); + LOG_TEST(log, "writePart {}, part size {}, part number {}", getShortLogDetails(), data.data_size, part_number); if (multipart_upload_id.empty()) throw Exception( @@ -468,11 +474,12 @@ void WriteBufferFromS3::writePart(WriteBufferFromS3::PartData && data) { throw Exception( ErrorCodes::LOGICAL_ERROR, - "Part size exceeded max_upload_part_size, part number: {}, part size {}, max_upload_part_size {}, {}", + "Part size exceeded max_upload_part_size. {}, part number {}, part size {}, max_upload_part_size {}", + getShortLogDetails(), part_number, data.data_size, - upload_settings.max_upload_part_size, - getLogDetails()); + upload_settings.max_upload_part_size + ); } auto req = getUploadRequest(part_number, data); @@ -480,7 +487,10 @@ void WriteBufferFromS3::writePart(WriteBufferFromS3::PartData && data) auto upload_worker = [&, worker_data, part_number] () { - LOG_TEST(log, "Writing part started. bucket {}, key {}, part id {}", bucket, key, part_number); + auto & data_size = std::get<1>(*worker_data).data_size; + + LOG_TEST(log, "Write part started {}, part size {}, part number {}", + getShortLogDetails(), data_size, part_number); ProfileEvents::increment(ProfileEvents::S3UploadPart); if (write_settings.for_object_storage) @@ -506,7 +516,8 @@ void WriteBufferFromS3::writePart(WriteBufferFromS3::PartData && data) multipart_tags[part_number-1] = outcome.GetResult().GetETag(); - LOG_TEST(log, "Writing part finished. bucket {}, key{}, part id {}, etag {}", bucket, key, part_number, multipart_tags[part_number-1]); + LOG_TEST(log, "Write part succeeded {}, part size {}, part number {}, etag {}", + getShortLogDetails(), data_size, part_number, multipart_tags[part_number-1]); }; task_tracker->add(std::move(upload_worker)); @@ -514,7 +525,7 @@ void WriteBufferFromS3::writePart(WriteBufferFromS3::PartData && data) void WriteBufferFromS3::completeMultipartUpload() { - LOG_TRACE(log, "Completing multipart upload. {}, Parts: {}", getLogDetails(), multipart_tags.size()); + LOG_TEST(log, "Completing multipart upload. {}, Parts: {}", getShortLogDetails(), multipart_tags.size()); if (multipart_tags.empty()) throw Exception( @@ -559,7 +570,7 @@ void WriteBufferFromS3::completeMultipartUpload() if (outcome.IsSuccess()) { - LOG_TRACE(log, "Multipart upload has completed. {}, Parts: {}", getLogDetails(), multipart_tags.size()); + LOG_TRACE(log, "Multipart upload has completed. {}, Parts: {}", getShortLogDetails(), multipart_tags.size()); return; } @@ -569,7 +580,7 @@ void WriteBufferFromS3::completeMultipartUpload() { /// For unknown reason, at least MinIO can respond with NO_SUCH_KEY for put requests /// BTW, NO_SUCH_UPLOAD is expected error and we shouldn't retry it - LOG_INFO(log, "Multipart upload failed with NO_SUCH_KEY error, will retry. {}, Parts: {}", getLogDetails(), multipart_tags.size()); + LOG_INFO(log, "Multipart upload failed with NO_SUCH_KEY error, will retry. {}, Parts: {}", getVerboseLogDetails(), multipart_tags.size()); } else { @@ -589,7 +600,6 @@ void WriteBufferFromS3::completeMultipartUpload() S3::PutObjectRequest WriteBufferFromS3::getPutRequest(PartData & data) { ProfileEvents::increment(ProfileEvents::WriteBufferFromS3Bytes, data.data_size); - LOG_TRACE(log, "getPutRequest, size {}, key {}", data.data_size, key); S3::PutObjectRequest req; @@ -612,14 +622,14 @@ S3::PutObjectRequest WriteBufferFromS3::getPutRequest(PartData & data) void WriteBufferFromS3::makeSinglepartUpload(WriteBufferFromS3::PartData && data) { - LOG_TRACE(log, "Making single part upload. {}.", getLogDetails()); + LOG_TEST(log, "Making single part upload. {}, size {}", getShortLogDetails(), data.data_size); auto req = getPutRequest(data); auto worker_data = std::make_shared>(std::move(req), std::move(data)); auto upload_worker = [&, worker_data] () { - LOG_TEST(log, "writing single part upload started. bucket {}, key {}", bucket, key); + LOG_TEST(log, "writing single part upload started. {}", getShortLogDetails()); auto & request = std::get<0>(*worker_data); size_t content_length = request.GetContentLength(); @@ -642,7 +652,7 @@ void WriteBufferFromS3::makeSinglepartUpload(WriteBufferFromS3::PartData && data if (outcome.IsSuccess()) { - LOG_TRACE(log, "Single part upload has completed. bucket {}, key {}, object size {}", bucket, key, content_length); + LOG_TRACE(log, "Single part upload has completed. {}, size {}", getShortLogDetails(), content_length); return; } @@ -653,7 +663,7 @@ void WriteBufferFromS3::makeSinglepartUpload(WriteBufferFromS3::PartData && data { /// For unknown reason, at least MinIO can respond with NO_SUCH_KEY for put requests - LOG_INFO(log, "Single part upload failed with NO_SUCH_KEY error for bucket {}, key {}, object size {}, will retry", bucket, key, content_length); + LOG_INFO(log, "Single part upload failed with NO_SUCH_KEY error. {}, size {}, will retry", getShortLogDetails(), content_length); } else { diff --git a/src/IO/WriteBufferFromS3.h b/src/IO/WriteBufferFromS3.h index f4200b0a646..48698df2eb5 100644 --- a/src/IO/WriteBufferFromS3.h +++ b/src/IO/WriteBufferFromS3.h @@ -59,7 +59,8 @@ private: /// Receives response from the server after sending all data. void finalizeImpl() override; - String getLogDetails() const; + String getVerboseLogDetails() const; + String getShortLogDetails() const; struct PartData; void hidePartialData(); diff --git a/src/IO/WriteBufferFromS3TaskTracker.cpp b/src/IO/WriteBufferFromS3TaskTracker.cpp index 2790d71db3d..f97afe82164 100644 --- a/src/IO/WriteBufferFromS3TaskTracker.cpp +++ b/src/IO/WriteBufferFromS3TaskTracker.cpp @@ -130,8 +130,6 @@ void WriteBufferFromS3::TaskTracker::add(Callback && func) /// this move is nothrow *future_placeholder = scheduler(std::move(func_with_notification), Priority{}); - LOG_TEST(log, "add ended, in queue {}, limit {}", futures.size(), max_tasks_inflight); - waitTilInflightShrink(); } @@ -140,6 +138,9 @@ void WriteBufferFromS3::TaskTracker::waitTilInflightShrink() if (!max_tasks_inflight) return; + if (futures.size() >= max_tasks_inflight) + LOG_TEST(log, "have to wait some tasks finish, in queue {}, limit {}", futures.size(), max_tasks_inflight); + Stopwatch watch; /// Alternative approach is to wait until at least futures.size() - max_tasks_inflight element are finished From 5b6dabdc34e1b6f198a76b489ebf0ff728c4b166 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Fri, 23 Jun 2023 23:02:30 +0200 Subject: [PATCH 105/522] introduce LogSeriesLimiterPtrt --- src/Common/LoggingFormatStringHelpers.cpp | 101 ++++++++++++++++++++++ src/Common/LoggingFormatStringHelpers.h | 28 ++++++ src/Common/logger_useful.h | 3 + src/IO/WriteBufferFromS3.cpp | 33 +++---- src/IO/WriteBufferFromS3.h | 2 + src/IO/WriteBufferFromS3TaskTracker.cpp | 5 +- src/IO/WriteBufferFromS3TaskTracker.h | 6 +- 7 files changed, 158 insertions(+), 20 deletions(-) diff --git a/src/Common/LoggingFormatStringHelpers.cpp b/src/Common/LoggingFormatStringHelpers.cpp index 85659e45791..648ec034adf 100644 --- a/src/Common/LoggingFormatStringHelpers.cpp +++ b/src/Common/LoggingFormatStringHelpers.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -74,3 +75,103 @@ void LogFrequencyLimiterIml::cleanup(time_t too_old_threshold_s) std::erase_if(logged_messages, [old](const auto & elem) { return elem.second.first < old; }); last_cleanup = now; } + + + +std::unordered_map> LogSeriesLimiter::series_settings; +std::unordered_map> LogSeriesLimiter::series_loggers; +std::mutex LogSeriesLimiter::mutex; + +LogSeriesLimiter::LogSeriesLimiter(Poco::Logger * logger_, size_t allowed_count_, time_t interval_s_) + : logger(logger_) +{ + if (allowed_count_ == 0) + { + accepted = false; + return; + } + + if (interval_s_ == 0) + { + accepted = true; + return; + } + + time_t now = time(nullptr); + UInt128 name_hash = sipHash128(logger->name().c_str(), logger->name().size()); + + std::lock_guard lock(mutex); + + if (series_settings.contains(name_hash)) + { + auto & settings = series_settings[name_hash]; + auto & [allowed_count, interval_s] = settings; + chassert(allowed_count_ == allowed_count); + chassert(interval_s_ == interval_s); + } + else + { + series_settings[name_hash] = std::make_tuple(allowed_count_, interval_s_); + } + + auto register_as_first = [&] () TSA_REQUIRES(mutex) + { + assert(allowed_count_ > 0); + accepted = true; + series_loggers[name_hash] = std::make_tuple(now, 1, 1); + }; + + + if (!series_loggers.contains(name_hash)) + { + register_as_first(); + return; + } + + auto & [last_time, accepted_count, total_count] = series_loggers[name_hash]; + if (last_time + interval_s_ <= now) + { + debug_message = fmt::format( + " (LogSeriesLimiter: on interval from {} to {} accepted series {} / {} for the logger {} : {})", + DateLUT::instance().timeToString(last_time), + DateLUT::instance().timeToString(now), + accepted_count, + total_count, + logger->name(), + double(name_hash)); + + register_as_first(); + return; + } + + if (accepted_count < allowed_count_) + { + accepted = true; + ++accepted_count; + } + ++total_count; +} + +void LogSeriesLimiter::log(Poco::Message & message) +{ + std::string_view pattern = message.getFormatString(); + if (pattern.empty()) + { + /// Do not filter messages without a format string + if (auto * channel = logger->getChannel()) + channel->log(message); + return; + } + + if (!accepted) + return; + + if (!debug_message.empty()) + { + message.appendText(debug_message); + debug_message.clear(); + } + + if (auto * channel = logger->getChannel()) + channel->log(message); +} diff --git a/src/Common/LoggingFormatStringHelpers.h b/src/Common/LoggingFormatStringHelpers.h index b29510a2c93..5dece8cd6ea 100644 --- a/src/Common/LoggingFormatStringHelpers.h +++ b/src/Common/LoggingFormatStringHelpers.h @@ -191,6 +191,34 @@ public: Poco::Logger * getLogger() { return logger; } }; +/// This wrapper helps to avoid too noisy log messages from similar objects. +/// For the value logger_name it remembers when such a message was logged the last time. +class LogSeriesLimiter +{ + static std::mutex mutex; + + /// Hash(logger_name) -> (allowed_count, interval_s) + static std::unordered_map> series_settings TSA_GUARDED_BY(mutex); + + /// Hash(logger_name) -> (last_logged_time_s, accepted, muted) + static std::unordered_map> series_loggers TSA_GUARDED_BY(mutex); + + Poco::Logger * logger = nullptr; + bool accepted = false; + String debug_message; +public: + LogSeriesLimiter(Poco::Logger * logger_, size_t allowed_count_, time_t interval_s_); + + LogSeriesLimiter & operator -> () { return *this; } + bool is(Poco::Message::Priority priority) { return logger->is(priority); } + LogSeriesLimiter * getChannel() {return this; } + const String & name() const { return logger->name(); } + + void log(Poco::Message & message); + + Poco::Logger * getLogger() { return logger; } +}; + /// This wrapper is useful to save formatted message into a String before sending it to a logger class LogToStrImpl { diff --git a/src/Common/logger_useful.h b/src/Common/logger_useful.h index 3ac950cbdfb..3ebb1d25075 100644 --- a/src/Common/logger_useful.h +++ b/src/Common/logger_useful.h @@ -15,12 +15,15 @@ namespace Poco { class Logger; } #define LogToStr(x, y) std::make_unique(x, y) #define LogFrequencyLimiter(x, y) std::make_unique(x, y) +using LogSeriesLimiterPtr = std::shared_ptr; + namespace { [[maybe_unused]] const ::Poco::Logger * getLogger(const ::Poco::Logger * logger) { return logger; } [[maybe_unused]] const ::Poco::Logger * getLogger(const std::atomic<::Poco::Logger *> & logger) { return logger.load(); } [[maybe_unused]] std::unique_ptr getLogger(std::unique_ptr && logger) { return logger; } [[maybe_unused]] std::unique_ptr getLogger(std::unique_ptr && logger) { return logger; } + [[maybe_unused]] LogSeriesLimiterPtr getLogger(LogSeriesLimiterPtr & logger) { return logger; } } #define LOG_IMPL_FIRST_ARG(X, ...) X diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp index ff6da9bf444..8714282f7a8 100644 --- a/src/IO/WriteBufferFromS3.cpp +++ b/src/IO/WriteBufferFromS3.cpp @@ -96,9 +96,10 @@ WriteBufferFromS3::WriteBufferFromS3( , task_tracker( std::make_unique( std::move(schedule_), - upload_settings.max_inflight_parts_for_one_file)) + upload_settings.max_inflight_parts_for_one_file, + limitedLog)) { - LOG_TRACE(log, "Create WriteBufferFromS3, {}", getShortLogDetails()); + LOG_TRACE(limitedLog, "Create WriteBufferFromS3, {}", getShortLogDetails()); allocateBuffer(); } @@ -136,7 +137,7 @@ void WriteBufferFromS3::preFinalize() if (is_prefinalized) return; - LOG_TEST(log, "preFinalize WriteBufferFromS3. {}", getShortLogDetails()); + LOG_TEST(limitedLog, "preFinalize WriteBufferFromS3. {}", getShortLogDetails()); /// This function should not be run again if an exception has occurred is_prefinalized = true; @@ -175,7 +176,7 @@ void WriteBufferFromS3::preFinalize() void WriteBufferFromS3::finalizeImpl() { - LOG_TRACE(log, "finalizeImpl WriteBufferFromS3. {}.", getShortLogDetails()); + LOG_TRACE(limitedLog, "finalizeImpl WriteBufferFromS3. {}.", getShortLogDetails()); if (!is_prefinalized) preFinalize(); @@ -242,7 +243,7 @@ void WriteBufferFromS3::tryToAbortMultipartUpload() WriteBufferFromS3::~WriteBufferFromS3() { - LOG_TRACE(log, "Close WriteBufferFromS3. {}.", getShortLogDetails()); + LOG_TRACE(limitedLog, "Close WriteBufferFromS3. {}.", getShortLogDetails()); /// That destructor could be call with finalized=false in case of exceptions if (!finalized) @@ -353,7 +354,7 @@ void WriteBufferFromS3::writeMultipartUpload() void WriteBufferFromS3::createMultipartUpload() { - LOG_TEST(log, "Create multipart upload. {}", getShortLogDetails()); + LOG_TEST(limitedLog, "Create multipart upload. {}", getShortLogDetails()); S3::CreateMultipartUploadRequest req; @@ -385,7 +386,7 @@ void WriteBufferFromS3::createMultipartUpload() } multipart_upload_id = outcome.GetResult().GetUploadId(); - LOG_TRACE(log, "Multipart upload has created. {}", getShortLogDetails()); + LOG_TRACE(limitedLog, "Multipart upload has created. {}", getShortLogDetails()); } void WriteBufferFromS3::abortMultipartUpload() @@ -445,13 +446,13 @@ void WriteBufferFromS3::writePart(WriteBufferFromS3::PartData && data) { if (data.data_size == 0) { - LOG_TEST(log, "Skipping writing part as empty {}", getShortLogDetails()); + LOG_TEST(limitedLog, "Skipping writing part as empty {}", getShortLogDetails()); return; } multipart_tags.push_back({}); size_t part_number = multipart_tags.size(); - LOG_TEST(log, "writePart {}, part size {}, part number {}", getShortLogDetails(), data.data_size, part_number); + LOG_TEST(limitedLog, "writePart {}, part size {}, part number {}", getShortLogDetails(), data.data_size, part_number); if (multipart_upload_id.empty()) throw Exception( @@ -489,7 +490,7 @@ void WriteBufferFromS3::writePart(WriteBufferFromS3::PartData && data) { auto & data_size = std::get<1>(*worker_data).data_size; - LOG_TEST(log, "Write part started {}, part size {}, part number {}", + LOG_TEST(limitedLog, "Write part started {}, part size {}, part number {}", getShortLogDetails(), data_size, part_number); ProfileEvents::increment(ProfileEvents::S3UploadPart); @@ -516,7 +517,7 @@ void WriteBufferFromS3::writePart(WriteBufferFromS3::PartData && data) multipart_tags[part_number-1] = outcome.GetResult().GetETag(); - LOG_TEST(log, "Write part succeeded {}, part size {}, part number {}, etag {}", + LOG_TEST(limitedLog, "Write part succeeded {}, part size {}, part number {}, etag {}", getShortLogDetails(), data_size, part_number, multipart_tags[part_number-1]); }; @@ -525,7 +526,7 @@ void WriteBufferFromS3::writePart(WriteBufferFromS3::PartData && data) void WriteBufferFromS3::completeMultipartUpload() { - LOG_TEST(log, "Completing multipart upload. {}, Parts: {}", getShortLogDetails(), multipart_tags.size()); + LOG_TEST(limitedLog, "Completing multipart upload. {}, Parts: {}", getShortLogDetails(), multipart_tags.size()); if (multipart_tags.empty()) throw Exception( @@ -570,7 +571,7 @@ void WriteBufferFromS3::completeMultipartUpload() if (outcome.IsSuccess()) { - LOG_TRACE(log, "Multipart upload has completed. {}, Parts: {}", getShortLogDetails(), multipart_tags.size()); + LOG_TRACE(limitedLog, "Multipart upload has completed. {}, Parts: {}", getShortLogDetails(), multipart_tags.size()); return; } @@ -622,14 +623,14 @@ S3::PutObjectRequest WriteBufferFromS3::getPutRequest(PartData & data) void WriteBufferFromS3::makeSinglepartUpload(WriteBufferFromS3::PartData && data) { - LOG_TEST(log, "Making single part upload. {}, size {}", getShortLogDetails(), data.data_size); + LOG_TEST(limitedLog, "Making single part upload. {}, size {}", getShortLogDetails(), data.data_size); auto req = getPutRequest(data); auto worker_data = std::make_shared>(std::move(req), std::move(data)); auto upload_worker = [&, worker_data] () { - LOG_TEST(log, "writing single part upload started. {}", getShortLogDetails()); + LOG_TEST(limitedLog, "writing single part upload started. {}", getShortLogDetails()); auto & request = std::get<0>(*worker_data); size_t content_length = request.GetContentLength(); @@ -652,7 +653,7 @@ void WriteBufferFromS3::makeSinglepartUpload(WriteBufferFromS3::PartData && data if (outcome.IsSuccess()) { - LOG_TRACE(log, "Single part upload has completed. {}, size {}", getShortLogDetails(), content_length); + LOG_TRACE(limitedLog, "Single part upload has completed. {}, size {}", getShortLogDetails(), content_length); return; } diff --git a/src/IO/WriteBufferFromS3.h b/src/IO/WriteBufferFromS3.h index 48698df2eb5..590342cc997 100644 --- a/src/IO/WriteBufferFromS3.h +++ b/src/IO/WriteBufferFromS3.h @@ -5,6 +5,7 @@ #if USE_AWS_S3 #include +#include #include #include #include @@ -89,6 +90,7 @@ private: const std::shared_ptr client_ptr; const std::optional> object_metadata; Poco::Logger * log = &Poco::Logger::get("WriteBufferFromS3"); + LogSeriesLimiterPtr limitedLog = std::make_shared(log, 1, 5); IBufferAllocationPolicyPtr buffer_allocation_policy; diff --git a/src/IO/WriteBufferFromS3TaskTracker.cpp b/src/IO/WriteBufferFromS3TaskTracker.cpp index f97afe82164..bce122dd6c8 100644 --- a/src/IO/WriteBufferFromS3TaskTracker.cpp +++ b/src/IO/WriteBufferFromS3TaskTracker.cpp @@ -12,10 +12,11 @@ namespace ProfileEvents namespace DB { -WriteBufferFromS3::TaskTracker::TaskTracker(ThreadPoolCallbackRunner scheduler_, size_t max_tasks_inflight_) +WriteBufferFromS3::TaskTracker::TaskTracker(ThreadPoolCallbackRunner scheduler_, size_t max_tasks_inflight_, LogSeriesLimiterPtr limitedLog_) : is_async(bool(scheduler_)) , scheduler(scheduler_ ? std::move(scheduler_) : syncRunner()) , max_tasks_inflight(max_tasks_inflight_) + , limitedLog(limitedLog_) {} WriteBufferFromS3::TaskTracker::~TaskTracker() @@ -139,7 +140,7 @@ void WriteBufferFromS3::TaskTracker::waitTilInflightShrink() return; if (futures.size() >= max_tasks_inflight) - LOG_TEST(log, "have to wait some tasks finish, in queue {}, limit {}", futures.size(), max_tasks_inflight); + LOG_TEST(limitedLog, "have to wait some tasks finish, in queue {}, limit {}", futures.size(), max_tasks_inflight); Stopwatch watch; diff --git a/src/IO/WriteBufferFromS3TaskTracker.h b/src/IO/WriteBufferFromS3TaskTracker.h index c3f4628b946..815e041ae52 100644 --- a/src/IO/WriteBufferFromS3TaskTracker.h +++ b/src/IO/WriteBufferFromS3TaskTracker.h @@ -6,6 +6,8 @@ #include "WriteBufferFromS3.h" +#include + #include namespace DB @@ -25,7 +27,7 @@ class WriteBufferFromS3::TaskTracker public: using Callback = std::function; - TaskTracker(ThreadPoolCallbackRunner scheduler_, size_t max_tasks_inflight_); + TaskTracker(ThreadPoolCallbackRunner scheduler_, size_t max_tasks_inflight_, LogSeriesLimiterPtr limitedLog_); ~TaskTracker(); static ThreadPoolCallbackRunner syncRunner(); @@ -57,7 +59,7 @@ private: using FutureList = std::list>; FutureList futures; - Poco::Logger * log = &Poco::Logger::get("TaskTracker"); + LogSeriesLimiterPtr limitedLog; std::mutex mutex; std::condition_variable has_finished TSA_GUARDED_BY(mutex); From 80aa8863e5330bf5774e569d0f92307bf98bad52 Mon Sep 17 00:00:00 2001 From: Sema Checherinda <104093494+CheSema@users.noreply.github.com> Date: Sat, 24 Jun 2023 00:12:30 +0200 Subject: [PATCH 106/522] Update LoggingFormatStringHelpers.cpp --- src/Common/LoggingFormatStringHelpers.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Common/LoggingFormatStringHelpers.cpp b/src/Common/LoggingFormatStringHelpers.cpp index 648ec034adf..ed578018d5f 100644 --- a/src/Common/LoggingFormatStringHelpers.cpp +++ b/src/Common/LoggingFormatStringHelpers.cpp @@ -77,7 +77,6 @@ void LogFrequencyLimiterIml::cleanup(time_t too_old_threshold_s) } - std::unordered_map> LogSeriesLimiter::series_settings; std::unordered_map> LogSeriesLimiter::series_loggers; std::mutex LogSeriesLimiter::mutex; From b8ede5262a5df9b4db2c25ecebc7818f763f9e9c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 24 Jun 2023 07:51:49 +0200 Subject: [PATCH 107/522] Add a test for #42691 --- .../0_stateless/02802_with_cube_with_totals.reference | 8 ++++++++ tests/queries/0_stateless/02802_with_cube_with_totals.sql | 2 ++ 2 files changed, 10 insertions(+) create mode 100644 tests/queries/0_stateless/02802_with_cube_with_totals.reference create mode 100644 tests/queries/0_stateless/02802_with_cube_with_totals.sql diff --git a/tests/queries/0_stateless/02802_with_cube_with_totals.reference b/tests/queries/0_stateless/02802_with_cube_with_totals.reference new file mode 100644 index 00000000000..c7b7b570456 --- /dev/null +++ b/tests/queries/0_stateless/02802_with_cube_with_totals.reference @@ -0,0 +1,8 @@ +((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 +((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 + +((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 +\N +\N + +\N diff --git a/tests/queries/0_stateless/02802_with_cube_with_totals.sql b/tests/queries/0_stateless/02802_with_cube_with_totals.sql new file mode 100644 index 00000000000..77adb68eb4b --- /dev/null +++ b/tests/queries/0_stateless/02802_with_cube_with_totals.sql @@ -0,0 +1,2 @@ +SELECT tuple((2147483648, (-0., 1.1754943508222875e-38, 2147483646, '-9223372036854775808', NULL))), toInt128(0.0001) GROUP BY ((256, toInt64(1.1754943508222875e-38), NULL), NULL, -0., ((65535, '-92233720368547758.07'), 0.9999), tuple(((1., 3.4028234663852886e38, '1', 0.5), NULL, tuple('0.1')))) WITH CUBE WITH TOTALS; +SELECT NULL GROUP BY toUUID(NULL, '0', NULL, '0.0000065535'), 1 WITH CUBE WITH TOTALS; From f0aee54dab32b1db35171c526525354a7701e21a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 24 Jun 2023 07:57:40 +0200 Subject: [PATCH 108/522] Add a test for #32474 --- tests/queries/0_stateless/02804_intersect_bad_cast.reference | 0 tests/queries/0_stateless/02804_intersect_bad_cast.sql | 1 + 2 files changed, 1 insertion(+) create mode 100644 tests/queries/0_stateless/02804_intersect_bad_cast.reference create mode 100644 tests/queries/0_stateless/02804_intersect_bad_cast.sql diff --git a/tests/queries/0_stateless/02804_intersect_bad_cast.reference b/tests/queries/0_stateless/02804_intersect_bad_cast.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02804_intersect_bad_cast.sql b/tests/queries/0_stateless/02804_intersect_bad_cast.sql new file mode 100644 index 00000000000..c7eb8fdd3bc --- /dev/null +++ b/tests/queries/0_stateless/02804_intersect_bad_cast.sql @@ -0,0 +1 @@ +SELECT 2., * FROM (SELECT 1024, 256 INTERSECT SELECT 100 AND inf, 256); From fa6df80aa204ec4e9f2d872eba0a2c7baee2cce4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 24 Jun 2023 08:05:45 +0200 Subject: [PATCH 109/522] Add a test for #35801 --- .../02807_default_date_time_nullable.reference | 2 ++ .../02807_default_date_time_nullable.sql | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+) create mode 100644 tests/queries/0_stateless/02807_default_date_time_nullable.reference create mode 100644 tests/queries/0_stateless/02807_default_date_time_nullable.sql diff --git a/tests/queries/0_stateless/02807_default_date_time_nullable.reference b/tests/queries/0_stateless/02807_default_date_time_nullable.reference new file mode 100644 index 00000000000..d103460bff7 --- /dev/null +++ b/tests/queries/0_stateless/02807_default_date_time_nullable.reference @@ -0,0 +1,2 @@ +1 1977-01-01 00:00:00 +1 1977-01-01 00:00:00 diff --git a/tests/queries/0_stateless/02807_default_date_time_nullable.sql b/tests/queries/0_stateless/02807_default_date_time_nullable.sql new file mode 100644 index 00000000000..9152f198787 --- /dev/null +++ b/tests/queries/0_stateless/02807_default_date_time_nullable.sql @@ -0,0 +1,18 @@ +create temporary table test ( + data int, + default Nullable(DateTime) DEFAULT '1977-01-01 00:00:00' +) engine = Memory(); + +insert into test (data) select 1; + +select * from test; + +drop temporary table test; + +create temporary table test ( + data int, + default DateTime DEFAULT '1977-01-01 00:00:00' +) engine = Memory(); +insert into test (data) select 1; + +select * from test; From 6fce596411b7f4f4fadefade4996e4d6f0776e96 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 24 Jun 2023 08:08:16 +0200 Subject: [PATCH 110/522] Add a test for #34626 --- .../0_stateless/02808_aliases_inside_case.reference | 2 ++ .../queries/0_stateless/02808_aliases_inside_case.sql | 10 ++++++++++ 2 files changed, 12 insertions(+) create mode 100644 tests/queries/0_stateless/02808_aliases_inside_case.reference create mode 100644 tests/queries/0_stateless/02808_aliases_inside_case.sql diff --git a/tests/queries/0_stateless/02808_aliases_inside_case.reference b/tests/queries/0_stateless/02808_aliases_inside_case.reference new file mode 100644 index 00000000000..140930649d3 --- /dev/null +++ b/tests/queries/0_stateless/02808_aliases_inside_case.reference @@ -0,0 +1,2 @@ +1 1 +2 0.5 diff --git a/tests/queries/0_stateless/02808_aliases_inside_case.sql b/tests/queries/0_stateless/02808_aliases_inside_case.sql new file mode 100644 index 00000000000..0da45416ed3 --- /dev/null +++ b/tests/queries/0_stateless/02808_aliases_inside_case.sql @@ -0,0 +1,10 @@ +# We support specifying aliases in any place in the query, including CASE expression: + +with arrayJoin([1,2]) as arg +select arg, + (case + when arg = 1 + then 1 as one + when arg = 2 + then one / 2 + end) as imposible; From 0f02d473d99789c0937b6dbfc4d7be49bb7a2b63 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 24 Jun 2023 12:56:00 +0200 Subject: [PATCH 111/522] Improve parsing of path in clickhouse-keeper-client Before this patch: / :) get /tables/default/data/block_numbers/0-7 Syntax error: failed at position 40 ('0'): get /tables/default/data/block_numbers/0-7 Expected end of query Signed-off-by: Azat Khuzhin --- programs/keeper-client/Parser.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/keeper-client/Parser.cpp b/programs/keeper-client/Parser.cpp index 0f3fc39704e..3420ccb2219 100644 --- a/programs/keeper-client/Parser.cpp +++ b/programs/keeper-client/Parser.cpp @@ -30,7 +30,7 @@ bool parseKeeperPath(IParser::Pos & pos, Expected & expected, String & path) return parseIdentifierOrStringLiteral(pos, expected, path); String result; - while (pos->type == TokenType::BareWord || pos->type == TokenType::Slash || pos->type == TokenType::Dot) + while (pos->type != TokenType::Whitespace && pos->type != TokenType::EndOfStream) { result.append(pos->begin, pos->end); ++pos; From 5d43a64112711b339b82b1c0e8df7882546a1a3c Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 24 Jun 2023 13:13:49 +0200 Subject: [PATCH 112/522] Initialize text_log earlier to capture table startup messages While I was investigating some issues, I noticed that messages from table startup are not appears in system.text_log due to too late initialization. Signed-off-by: Azat Khuzhin --- programs/server/Server.cpp | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index d2d8a0d07fb..755b7f17d98 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1581,6 +1581,15 @@ try /// After attaching system databases we can initialize system log. global_context->initializeSystemLogs(); global_context->setSystemZooKeeperLogAfterInitializationIfNeeded(); + /// Build loggers before tables startup to make log messages from tables + /// attach available in system.text_log + { + String level_str = config().getString("text_log.level", ""); + int level = level_str.empty() ? INT_MAX : Poco::Logger::parseLevel(level_str); + setTextLog(global_context->getTextLog(), level); + + buildLoggers(config(), logger()); + } /// After the system database is created, attach virtual system tables (in addition to query_log and part_log) attachSystemTablesServer(global_context, *database_catalog.getSystemDatabase(), has_zookeeper); attachInformationSchema(global_context, *database_catalog.getDatabase(DatabaseCatalog::INFORMATION_SCHEMA)); @@ -1707,14 +1716,6 @@ try /// Must be done after initialization of `servers`, because async_metrics will access `servers` variable from its thread. async_metrics.start(); - { - String level_str = config().getString("text_log.level", ""); - int level = level_str.empty() ? INT_MAX : Poco::Logger::parseLevel(level_str); - setTextLog(global_context->getTextLog(), level); - } - - buildLoggers(config(), logger()); - main_config_reloader->start(); access_control.startPeriodicReloading(); From 0c7a4142e40b186da12c3ac3f0664cb3a94e979f Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 24 Jun 2023 20:57:39 +0200 Subject: [PATCH 113/522] Use separate default settings for clickhouse-local There are already two of them: - storage_file_read_method can use mmap method for clickhouse-local - there is no sense in disabling allow_introspection_functions for clickhouse-local since it can hurt only itself And likely there will be more, once the infrastructure will be there. Signed-off-by: Azat Khuzhin --- src/Core/Settings.h | 2 +- src/Core/SettingsOverridesLocal.cpp | 13 +++++++++++++ src/Core/SettingsOverridesLocal.h | 11 +++++++++++ src/Interpreters/Context.cpp | 3 +++ ...2800_clickhouse_local_default_settings.reference | 2 ++ .../02800_clickhouse_local_default_settings.sh | 8 ++++++++ 6 files changed, 38 insertions(+), 1 deletion(-) create mode 100644 src/Core/SettingsOverridesLocal.cpp create mode 100644 src/Core/SettingsOverridesLocal.h create mode 100644 tests/queries/0_stateless/02800_clickhouse_local_default_settings.reference create mode 100755 tests/queries/0_stateless/02800_clickhouse_local_default_settings.sh diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 3d42bd582ed..c51076f3237 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -657,7 +657,7 @@ class IColumn; M(UInt64, function_range_max_elements_in_block, 500000000, "Maximum number of values generated by function 'range' per block of data (sum of array sizes for every row in a block, see also 'max_block_size' and 'min_insert_block_size_rows'). It is a safety threshold.", 0) \ M(ShortCircuitFunctionEvaluation, short_circuit_function_evaluation, ShortCircuitFunctionEvaluation::ENABLE, "Setting for short-circuit function evaluation configuration. Possible values: 'enable' - use short-circuit function evaluation for functions that are suitable for it, 'disable' - disable short-circuit function evaluation, 'force_enable' - use short-circuit function evaluation for all functions.", 0) \ \ - M(LocalFSReadMethod, storage_file_read_method, LocalFSReadMethod::mmap, "Method of reading data from storage file, one of: read, pread, mmap. The mmap method does not apply to clickhouse-server (it's intended for clickhouse-local).", 0) \ + M(LocalFSReadMethod, storage_file_read_method, LocalFSReadMethod::pread, "Method of reading data from storage file, one of: read, pread, mmap. The mmap method does not apply to clickhouse-server (it's intended for clickhouse-local).", 0) \ M(String, local_filesystem_read_method, "pread_threadpool", "Method of reading data from local filesystem, one of: read, pread, mmap, io_uring, pread_threadpool. The 'io_uring' method is experimental and does not work for Log, TinyLog, StripeLog, File, Set and Join, and other tables with append-able files in presence of concurrent reads and writes.", 0) \ M(String, remote_filesystem_read_method, "threadpool", "Method of reading data from remote filesystem, one of: read, threadpool.", 0) \ M(Bool, local_filesystem_read_prefetch, false, "Should use prefetching when reading data from local filesystem.", 0) \ diff --git a/src/Core/SettingsOverridesLocal.cpp b/src/Core/SettingsOverridesLocal.cpp new file mode 100644 index 00000000000..2beb560ece2 --- /dev/null +++ b/src/Core/SettingsOverridesLocal.cpp @@ -0,0 +1,13 @@ +#include +#include + +namespace DB +{ + +void applySettingsOverridesForLocal(Settings & settings) +{ + settings.allow_introspection_functions = true; + settings.storage_file_read_method = LocalFSReadMethod::mmap; +} + +} diff --git a/src/Core/SettingsOverridesLocal.h b/src/Core/SettingsOverridesLocal.h new file mode 100644 index 00000000000..89b79f4ad55 --- /dev/null +++ b/src/Core/SettingsOverridesLocal.h @@ -0,0 +1,11 @@ +#pragma once + +namespace DB +{ + +struct Settings; + +/// Update some settings defaults for clickhouse-local +void applySettingsOverridesForLocal(Settings & settings); + +} diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 106264320b2..dccdf4efca0 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -48,6 +48,7 @@ #include #include #include +#include #include #include #include @@ -3646,6 +3647,8 @@ void Context::setDefaultProfiles(const Poco::Util::AbstractConfiguration & confi setCurrentProfile(shared->system_profile_name); applySettingsQuirks(settings, &Poco::Logger::get("SettingsQuirks")); + if (shared->application_type == ApplicationType::LOCAL) + applySettingsOverridesForLocal(settings); shared->buffer_profile_name = config.getString("buffer_profile", shared->system_profile_name); buffer_context = Context::createCopy(shared_from_this()); diff --git a/tests/queries/0_stateless/02800_clickhouse_local_default_settings.reference b/tests/queries/0_stateless/02800_clickhouse_local_default_settings.reference new file mode 100644 index 00000000000..0f18d1a3897 --- /dev/null +++ b/tests/queries/0_stateless/02800_clickhouse_local_default_settings.reference @@ -0,0 +1,2 @@ +allow_introspection_functions 1 +storage_file_read_method mmap diff --git a/tests/queries/0_stateless/02800_clickhouse_local_default_settings.sh b/tests/queries/0_stateless/02800_clickhouse_local_default_settings.sh new file mode 100755 index 00000000000..792e187fc51 --- /dev/null +++ b/tests/queries/0_stateless/02800_clickhouse_local_default_settings.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash +# Tags: no-random-settings, no-random-merge-tree-settings + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL -q "select name, value from system.settings where changed" From a7b14f87e0b43f02fac2cd216e906b045dbbfa42 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 24 Jun 2023 21:14:28 +0200 Subject: [PATCH 114/522] Throw an error instead of silenty ignore storage_file_read_method=mmap in server Signed-off-by: Azat Khuzhin --- src/Storages/StorageFile.cpp | 8 ++++---- .../0_stateless/02497_storage_file_reader_selection.sh | 2 ++ 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index ff67272e542..5301b159f96 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -205,7 +205,7 @@ std::unique_ptr selectReadBuffer( { auto read_method = context->getSettingsRef().storage_file_read_method; - /** But using mmap on server-side is unsafe for the following reasons: + /** Using mmap on server-side is unsafe for the following reasons: * - concurrent modifications of a file will result in SIGBUS; * - IO error from the device will result in SIGBUS; * - recovery from this signal is not feasible even with the usage of siglongjmp, @@ -214,10 +214,10 @@ std::unique_ptr selectReadBuffer( * * But we keep this mode for clickhouse-local as it is not so bad for a command line tool. */ + if (context->getApplicationType() == Context::ApplicationType::SERVER && read_method == LocalFSReadMethod::mmap) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Using storage_file_read_method=mmap is not safe in server mode. Consider using pread."); - if (S_ISREG(file_stat.st_mode) - && context->getApplicationType() != Context::ApplicationType::SERVER - && read_method == LocalFSReadMethod::mmap) + if (S_ISREG(file_stat.st_mode) && read_method == LocalFSReadMethod::mmap) { try { diff --git a/tests/queries/0_stateless/02497_storage_file_reader_selection.sh b/tests/queries/0_stateless/02497_storage_file_reader_selection.sh index 20bde68718d..25387e61db6 100755 --- a/tests/queries/0_stateless/02497_storage_file_reader_selection.sh +++ b/tests/queries/0_stateless/02497_storage_file_reader_selection.sh @@ -13,4 +13,6 @@ $CLICKHOUSE_LOCAL --storage_file_read_method=mmap --print-profile-events -q "SEL $CLICKHOUSE_LOCAL --storage_file_read_method=pread --print-profile-events -q "SELECT * FROM file($DATA_FILE) FORMAT Null" 2>&1 | grep -F -c "CreatedReadBufferMMap" $CLICKHOUSE_LOCAL --storage_file_read_method=pread --print-profile-events -q "SELECT * FROM file($DATA_FILE) FORMAT Null" 2>&1 | grep -F -c "CreatedReadBufferOrdinary" +$CLICKHOUSE_CLIENT --storage_file_read_method=mmap -nq "SELECT * FROM file('/dev/null', 'LineAsString') FORMAT Null -- { serverError BAD_ARGUMENTS }" + rm $DATA_FILE From 59f11863d7776134c383b168a1ec7ff2acc8bc16 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 24 Jun 2023 21:41:33 +0200 Subject: [PATCH 115/522] Simplify settings overrides or clickhouse-local Signed-off-by: Azat Khuzhin --- programs/local/LocalServer.cpp | 16 +++++++++++++++- src/Core/SettingsOverridesLocal.cpp | 13 ------------- src/Core/SettingsOverridesLocal.h | 11 ----------- src/Interpreters/Context.cpp | 3 --- 4 files changed, 15 insertions(+), 28 deletions(-) delete mode 100644 src/Core/SettingsOverridesLocal.cpp delete mode 100644 src/Core/SettingsOverridesLocal.h diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index caca7cfb50d..033d2b91ec6 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -71,6 +71,15 @@ namespace ErrorCodes extern const int FILE_ALREADY_EXISTS; } +void applySettingsOverridesForLocal(ContextMutablePtr context) +{ + Settings settings = context->getSettings(); + + settings.allow_introspection_functions = true; + settings.storage_file_read_method = LocalFSReadMethod::mmap; + + context->setSettings(settings); +} void LocalServer::processError(const String &) const { @@ -657,6 +666,12 @@ void LocalServer::processConfig() CompiledExpressionCacheFactory::instance().init(compiled_expression_cache_size, compiled_expression_cache_elements_size); #endif + /// NOTE: it is important to apply any overrides before + /// setDefaultProfiles() calls since it will copy current context (i.e. + /// there is separate context for Buffer tables). + applySettingsOverridesForLocal(global_context); + applyCmdOptions(global_context); + /// Load global settings from default_profile and system_profile. global_context->setDefaultProfiles(config()); @@ -671,7 +686,6 @@ void LocalServer::processConfig() std::string default_database = config().getString("default_database", "_local"); DatabaseCatalog::instance().attachDatabase(default_database, std::make_shared(default_database, global_context)); global_context->setCurrentDatabase(default_database); - applyCmdOptions(global_context); if (config().has("path")) { diff --git a/src/Core/SettingsOverridesLocal.cpp b/src/Core/SettingsOverridesLocal.cpp deleted file mode 100644 index 2beb560ece2..00000000000 --- a/src/Core/SettingsOverridesLocal.cpp +++ /dev/null @@ -1,13 +0,0 @@ -#include -#include - -namespace DB -{ - -void applySettingsOverridesForLocal(Settings & settings) -{ - settings.allow_introspection_functions = true; - settings.storage_file_read_method = LocalFSReadMethod::mmap; -} - -} diff --git a/src/Core/SettingsOverridesLocal.h b/src/Core/SettingsOverridesLocal.h deleted file mode 100644 index 89b79f4ad55..00000000000 --- a/src/Core/SettingsOverridesLocal.h +++ /dev/null @@ -1,11 +0,0 @@ -#pragma once - -namespace DB -{ - -struct Settings; - -/// Update some settings defaults for clickhouse-local -void applySettingsOverridesForLocal(Settings & settings); - -} diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index dccdf4efca0..106264320b2 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -48,7 +48,6 @@ #include #include #include -#include #include #include #include @@ -3647,8 +3646,6 @@ void Context::setDefaultProfiles(const Poco::Util::AbstractConfiguration & confi setCurrentProfile(shared->system_profile_name); applySettingsQuirks(settings, &Poco::Logger::get("SettingsQuirks")); - if (shared->application_type == ApplicationType::LOCAL) - applySettingsOverridesForLocal(settings); shared->buffer_profile_name = config.getString("buffer_profile", shared->system_profile_name); buffer_context = Context::createCopy(shared_from_this()); From 940cf69ce436107415c3990088738b83dfb201c7 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 25 Jun 2023 07:30:32 +0200 Subject: [PATCH 116/522] Add a test for #43358 --- tests/queries/0_stateless/02809_has_token.reference | 1 + tests/queries/0_stateless/02809_has_token.sql | 3 +++ 2 files changed, 4 insertions(+) create mode 100644 tests/queries/0_stateless/02809_has_token.reference create mode 100644 tests/queries/0_stateless/02809_has_token.sql diff --git a/tests/queries/0_stateless/02809_has_token.reference b/tests/queries/0_stateless/02809_has_token.reference new file mode 100644 index 00000000000..573541ac970 --- /dev/null +++ b/tests/queries/0_stateless/02809_has_token.reference @@ -0,0 +1 @@ +0 diff --git a/tests/queries/0_stateless/02809_has_token.sql b/tests/queries/0_stateless/02809_has_token.sql new file mode 100644 index 00000000000..08edf3756d1 --- /dev/null +++ b/tests/queries/0_stateless/02809_has_token.sql @@ -0,0 +1,3 @@ +-- in old versions of ClickHouse, the following query returned a wrong result: + +SELECT hasToken('quotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotaquotquota', 'quota') AS r; From 79a03432bf688c9f6f29554f7b9548e2b36b2178 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Sun, 25 Jun 2023 13:27:07 +0200 Subject: [PATCH 117/522] add test, add comment --- src/Common/LoggingFormatStringHelpers.h | 5 ++++- src/Daemon/BaseDaemon.cpp | 2 ++ src/IO/tests/gtest_writebuffer_s3.cpp | 28 +++++++++++++++++++++++++ 3 files changed, 34 insertions(+), 1 deletion(-) diff --git a/src/Common/LoggingFormatStringHelpers.h b/src/Common/LoggingFormatStringHelpers.h index 5dece8cd6ea..82c260e52a6 100644 --- a/src/Common/LoggingFormatStringHelpers.h +++ b/src/Common/LoggingFormatStringHelpers.h @@ -192,7 +192,10 @@ public: }; /// This wrapper helps to avoid too noisy log messages from similar objects. -/// For the value logger_name it remembers when such a message was logged the last time. +/// Once an instance of LogSeriesLimiter type is created the decision is done +/// All followed message which use this instance is either printed or muted all together. +/// LogSeriesLimiter differs from LogFrequencyLimiterIml in a way that +/// LogSeriesLimiter is useful for accept or mute series of logs when LogFrequencyLimiterIml works for each line independently. class LogSeriesLimiter { static std::mutex mutex; diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index bfd5568b71d..6a6175b802f 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -417,6 +417,8 @@ private: { SentryWriter::onFault(sig, error_message, stack_trace); +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunreachable-code" /// Advice the user to send it manually. if constexpr (std::string_view(VERSION_OFFICIAL).contains("official build")) { diff --git a/src/IO/tests/gtest_writebuffer_s3.cpp b/src/IO/tests/gtest_writebuffer_s3.cpp index cd38291fb31..a4433fee60e 100644 --- a/src/IO/tests/gtest_writebuffer_s3.cpp +++ b/src/IO/tests/gtest_writebuffer_s3.cpp @@ -1119,4 +1119,32 @@ TEST_P(SyncAsync, IncreaseLimited) { } } +TEST_P(SyncAsync, StrictUploadPartSize) { + getSettings().s3_check_objects_after_upload = false; + + { + getSettings().s3_max_single_part_upload_size = 10; + getSettings().s3_strict_upload_part_size = 11; + + { + auto counters = MockS3::EventCounts{.multiUploadCreate = 1, .multiUploadComplete = 1, .uploadParts = 6}; + runSimpleScenario(counters, 66); + + auto actual_parts_sizes = MockS3::BucketMemStore::GetPartSizes(getCompletedPartUploads().back().second); + ASSERT_THAT(actual_parts_sizes, testing::ElementsAre(11, 11, 11, 11, 11, 11)); + + // parts: 11 22 33 44 55 66 + // size: 11 11 11 11 11 11 + } + + { + auto counters = MockS3::EventCounts{.multiUploadCreate = 1, .multiUploadComplete = 1, .uploadParts = 7}; + runSimpleScenario(counters, 67); + + auto actual_parts_sizes = MockS3::BucketMemStore::GetPartSizes(getCompletedPartUploads().back().second); + ASSERT_THAT(actual_parts_sizes, testing::ElementsAre(11, 11, 11, 11, 11, 11, 1)); + } + } +} + #endif From f13752a2805baf77a00d1ad0f50094e553a27f17 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Sun, 25 Jun 2023 13:29:41 +0200 Subject: [PATCH 118/522] delete 02720_s3_strict_upload_part_size --- ...02720_s3_strict_upload_part_size.reference | 4 --- .../02720_s3_strict_upload_part_size.sh | 25 ------------------- 2 files changed, 29 deletions(-) delete mode 100644 tests/queries/0_stateless/02720_s3_strict_upload_part_size.reference delete mode 100755 tests/queries/0_stateless/02720_s3_strict_upload_part_size.sh diff --git a/tests/queries/0_stateless/02720_s3_strict_upload_part_size.reference b/tests/queries/0_stateless/02720_s3_strict_upload_part_size.reference deleted file mode 100644 index f7c4ece5f1f..00000000000 --- a/tests/queries/0_stateless/02720_s3_strict_upload_part_size.reference +++ /dev/null @@ -1,4 +0,0 @@ -part size: 6000001, part number: 1 -part size: 6000001, part number: 2 -part size: 6000001, part number: 3 -part size: 2971517, part number: 4 diff --git a/tests/queries/0_stateless/02720_s3_strict_upload_part_size.sh b/tests/queries/0_stateless/02720_s3_strict_upload_part_size.sh deleted file mode 100755 index 9799ef0478a..00000000000 --- a/tests/queries/0_stateless/02720_s3_strict_upload_part_size.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/env bash -# Tags: no-fasttest, long -# Tag no-fasttest: requires S3 - -CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CUR_DIR"/../shell_config.sh - -in="$CUR_DIR/$CLICKHOUSE_TEST_UNIQUE_NAME.in" -out="$CUR_DIR/$CLICKHOUSE_TEST_UNIQUE_NAME.out" -log="$CUR_DIR/$CLICKHOUSE_TEST_UNIQUE_NAME.log" - -set -e -trap 'rm -f "${out:?}" "${in:?}" "${log:?}"' EXIT - -# Generate a file of 20MiB in size, with our part size it will have 4 parts -# NOTE: 1 byte is for new line, so 1023 not 1024 -$CLICKHOUSE_LOCAL -q "SELECT randomPrintableASCII(1023) FROM numbers(20*1024) FORMAT LineAsString" > "$in" - -$CLICKHOUSE_CLIENT --send_logs_level=trace --server_logs_file="$log" -q "INSERT INTO FUNCTION s3(s3_conn, filename='$CLICKHOUSE_TEST_UNIQUE_NAME', format='LineAsString', structure='line String') FORMAT LineAsString" --s3_strict_upload_part_size=6000001 < "$in" -grep -F '' "$log" || : -grep -o 'WriteBufferFromS3: writePart.*, part size: .*' "$log" | grep -o 'part size: .*' -$CLICKHOUSE_CLIENT -q "SELECT * FROM s3(s3_conn, filename='$CLICKHOUSE_TEST_UNIQUE_NAME', format='LineAsString', structure='line String') FORMAT LineAsString" > "$out" - -diff -q "$in" "$out" From c85ade9c27ae56584c924b4b18541bc8615d816e Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Mon, 26 Jun 2023 07:44:19 +0000 Subject: [PATCH 119/522] Add const multiplier --- .../functions/date-time-functions.md | 1 - src/Core/DecimalFunctions.h | 2 +- src/Functions/DateTimeTransforms.h | 16 ++++++++-------- src/Functions/dateDiff.cpp | 9 ++++----- 4 files changed, 13 insertions(+), 15 deletions(-) diff --git a/docs/zh/sql-reference/functions/date-time-functions.md b/docs/zh/sql-reference/functions/date-time-functions.md index 270fa44a421..e4b70322477 100644 --- a/docs/zh/sql-reference/functions/date-time-functions.md +++ b/docs/zh/sql-reference/functions/date-time-functions.md @@ -625,7 +625,6 @@ SELECT date_add(YEAR, 3, toDate('2018-01-01')); │ 2021-01-01 │ └───────────────────────────────────────────────┘ ``` -## age {#age} ## date_diff {#date_diff} diff --git a/src/Core/DecimalFunctions.h b/src/Core/DecimalFunctions.h index defc21a5f43..17d95650730 100644 --- a/src/Core/DecimalFunctions.h +++ b/src/Core/DecimalFunctions.h @@ -33,7 +33,7 @@ template <> inline constexpr size_t max_precision = 38; template <> inline constexpr size_t max_precision = 76; template -constexpr inline auto scaleMultiplier(UInt32 scale) +inline auto scaleMultiplier(UInt32 scale) { if constexpr (std::is_same_v || std::is_same_v) return common::exp10_i32(scale); diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index afff8d6523d..c967d74da0c 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -19,8 +19,8 @@ namespace DB { -static constexpr auto microsecond_scale = 6; -static constexpr auto millisecond_scale = 3; +static constexpr auto microsecond_multiplier = 1000000; +static constexpr auto millisecond_multiplier = 1000; namespace ErrorCodes { @@ -1387,6 +1387,7 @@ struct ToRelativeSubsecondNumImpl static inline Int64 execute(const DateTime64 & t, DateTime64::NativeType scale, const DateLUTImpl &) { + static_assert(scale_multiplier == 1000 || scale_multiplier == 1000000); if (scale == scale_multiplier) return t.value; if (scale > scale_multiplier) @@ -1521,7 +1522,6 @@ struct ToDateTimeComponentsImpl static inline DateTimeComponentsWithFractionalPart execute(const DateTime64 & t, DateTime64::NativeType scale_multiplier, const DateLUTImpl & time_zone) { auto components = DecimalUtils::splitWithScaleMultiplier(t, scale_multiplier); - constexpr auto multiplier = DecimalUtils::scaleMultiplier(microsecond_scale); if (t.value < 0 && components.fractional) { @@ -1529,12 +1529,12 @@ struct ToDateTimeComponentsImpl --components.whole; } Int64 fractional = components.fractional; - if (scale_multiplier > multiplier) - fractional = fractional / (scale_multiplier / multiplier); - else if (scale_multiplier < multiplier) - fractional = fractional * (multiplier / scale_multiplier); + if (scale_multiplier > microsecond_multiplier) + fractional = fractional / (scale_multiplier / microsecond_multiplier); + else if (scale_multiplier < microsecond_multiplier) + fractional = fractional * (microsecond_multiplier / scale_multiplier); - constexpr auto divider = DecimalUtils::scaleMultiplier(microsecond_scale - millisecond_scale); + constexpr Int64 divider = microsecond_multiplier/ millisecond_multiplier; UInt16 millisecond = static_cast(fractional / divider); UInt16 microsecond = static_cast(fractional % divider); return DateTimeComponentsWithFractionalPart{time_zone.toDateTimeComponents(components.whole), millisecond, microsecond}; diff --git a/src/Functions/dateDiff.cpp b/src/Functions/dateDiff.cpp index 79be3059b2a..253ed703bb9 100644 --- a/src/Functions/dateDiff.cpp +++ b/src/Functions/dateDiff.cpp @@ -177,9 +177,8 @@ public: DateTimeComponentsWithFractionalPart a_comp; DateTimeComponentsWithFractionalPart b_comp; Int64 adjust_value; - constexpr auto multiplier = DecimalUtils::scaleMultiplier(microsecond_scale); - auto x_microseconds = TransformDateTime64>(transform_x.getScaleMultiplier()).execute(x, timezone_x); - auto y_microseconds = TransformDateTime64>(transform_y.getScaleMultiplier()).execute(y, timezone_y); + auto x_microseconds = TransformDateTime64>(transform_x.getScaleMultiplier()).execute(x, timezone_x); + auto y_microseconds = TransformDateTime64>(transform_y.getScaleMultiplier()).execute(y, timezone_y); if (x_microseconds <= y_microseconds) { @@ -399,9 +398,9 @@ public: else if (unit == "second" || unit == "ss" || unit == "s") impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); else if (unit == "millisecond" || unit == "ms") - impl.template dispatchForColumns(millisecond_scale)>>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); else if (unit == "microsecond" || unit == "us" || unit == "u") - impl.template dispatchForColumns(microsecond_scale)>>(x, y, timezone_x, timezone_y, res->getData()); + impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); else throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function {} does not support '{}' unit", getName(), unit); From 7d4e7e320d44cc067c9142596209c9139454d1af Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Mon, 26 Jun 2023 12:03:25 +0000 Subject: [PATCH 120/522] Impelement support for function range of Nullable argument --- .../gtest_DataType_deserializeAsText.cpp | 1 - src/Functions/array/arrayDotProduct.cpp | 1 - src/Functions/array/arrayNorm.cpp | 1 - src/Functions/array/range.cpp | 37 +++++++++++++++++-- src/Functions/concat.cpp | 1 - src/Functions/ifNotFinite.cpp | 1 - .../02790_range_nullable.reference | 3 ++ .../0_stateless/02790_range_nullable.sql | 7 ++++ 8 files changed, 43 insertions(+), 9 deletions(-) create mode 100644 tests/queries/0_stateless/02790_range_nullable.reference create mode 100644 tests/queries/0_stateless/02790_range_nullable.sql diff --git a/src/DataTypes/tests/gtest_DataType_deserializeAsText.cpp b/src/DataTypes/tests/gtest_DataType_deserializeAsText.cpp index 2c0feab6d86..b755bd109d0 100644 --- a/src/DataTypes/tests/gtest_DataType_deserializeAsText.cpp +++ b/src/DataTypes/tests/gtest_DataType_deserializeAsText.cpp @@ -2,7 +2,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Functions/array/arrayDotProduct.cpp b/src/Functions/array/arrayDotProduct.cpp index d17c223cc2f..47e865785d4 100644 --- a/src/Functions/array/arrayDotProduct.cpp +++ b/src/Functions/array/arrayDotProduct.cpp @@ -1,6 +1,5 @@ #include #include -#include #include #include #include diff --git a/src/Functions/array/arrayNorm.cpp b/src/Functions/array/arrayNorm.cpp index e14133f931f..027a33d094c 100644 --- a/src/Functions/array/arrayNorm.cpp +++ b/src/Functions/array/arrayNorm.cpp @@ -5,7 +5,6 @@ #include #include #include -#include #include #include diff --git a/src/Functions/array/range.cpp b/src/Functions/array/range.cpp index f1f0fef8fd9..b638bc3c5b7 100644 --- a/src/Functions/array/range.cpp +++ b/src/Functions/array/range.cpp @@ -5,7 +5,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -21,6 +23,7 @@ namespace ErrorCodes extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int BAD_ARGUMENTS; } @@ -43,6 +46,7 @@ private: size_t getNumberOfArguments() const override { return 0; } bool isVariadic() const override { return true; } + bool useDefaultImplementationForNulls() const override { return false; } bool useDefaultImplementationForConstants() const override { return true; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } @@ -58,10 +62,12 @@ private: DataTypes arg_types; for (size_t i = 0, size = arguments.size(); i < size; ++i) { - if (i < 2 && WhichDataType(arguments[i]).isIPv4()) + DataTypePtr type_no_nullable = removeNullable(arguments[i]); + + if (i < 2 && WhichDataType(type_no_nullable).isIPv4()) arg_types.emplace_back(std::make_shared()); - else if (isInteger(arguments[i])) - arg_types.push_back(arguments[i]); + else if (isInteger(type_no_nullable)) + arg_types.push_back(type_no_nullable); else throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", arguments[i]->getName(), getName()); @@ -386,10 +392,32 @@ private: "for unsigned/signed integers up to 64 bit", getName()); } + auto throwIfNullValue = [&](const ColumnWithTypeAndName & col) + { + if (!col.type->isNullable()) + { + return; + } + const auto & nullable_col = assert_cast(*col.column); + const auto & null_map = nullable_col.getNullMapData(); + + if (!memoryIsZero(null_map.data(), 0, null_map.size())) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Illegal (null) value column {} of argument of function {}", col.column->getName(), getName()); + } + }; + ColumnPtr res; if (arguments.size() == 1) { - const auto * col = arguments[0].column.get(); + throwIfNullValue(arguments[0]); + auto * col = arguments[0].column.get(); + if (arguments[0].type->isNullable()) + { + const auto * nullable = checkAndGetColumn(*arguments[0].column); + col= nullable->getNestedColumnPtr().get(); + } + if (!((res = executeInternal(col)) || (res = executeInternal(col)) || (res = executeInternal(col)) || (res = executeInternal(col)) || (res = executeInternal(col)) || (res = executeInternal(col)) || (res = executeInternal(col)) || (res = executeInternal(col)))) @@ -404,6 +432,7 @@ private: for (size_t i = 0; i < arguments.size(); ++i) { + throwIfNullValue(arguments[i]); if (i == 1) columns_holder[i] = castColumn(arguments[i], elem_type)->convertToFullColumnIfConst(); else diff --git a/src/Functions/concat.cpp b/src/Functions/concat.cpp index 8fefc2d5b8a..8288d872f18 100644 --- a/src/Functions/concat.cpp +++ b/src/Functions/concat.cpp @@ -1,6 +1,5 @@ #include #include -#include #include #include #include diff --git a/src/Functions/ifNotFinite.cpp b/src/Functions/ifNotFinite.cpp index 5ce5d0ede70..d7af10eec44 100644 --- a/src/Functions/ifNotFinite.cpp +++ b/src/Functions/ifNotFinite.cpp @@ -2,7 +2,6 @@ #include #include #include -#include #include diff --git a/tests/queries/0_stateless/02790_range_nullable.reference b/tests/queries/0_stateless/02790_range_nullable.reference new file mode 100644 index 00000000000..7a98702e98c --- /dev/null +++ b/tests/queries/0_stateless/02790_range_nullable.reference @@ -0,0 +1,3 @@ +[0] +[0,2,4,6,8] +[0,2,4,6,8] diff --git a/tests/queries/0_stateless/02790_range_nullable.sql b/tests/queries/0_stateless/02790_range_nullable.sql new file mode 100644 index 00000000000..16e16512fc5 --- /dev/null +++ b/tests/queries/0_stateless/02790_range_nullable.sql @@ -0,0 +1,7 @@ +SELECT range(toNullable(1)); +SELECT range(0::Nullable(UInt64), 10::Nullable(UInt64), 2::Nullable(UInt64)); +SELECT range(0::Nullable(Int64), 10::Nullable(Int64), 2::Nullable(Int64)); +SELECT range(null); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT range(Null::Nullable(UInt64), 10::Nullable(UInt64), 2::Nullable(UInt64)); -- { serverError BAD_ARGUMENTS } +SELECT range(0::Nullable(UInt64), Null::Nullable(UInt64), 2::Nullable(UInt64)); -- { serverError BAD_ARGUMENTS } +SELECT range(0::Nullable(UInt64), 10::Nullable(UInt64), Null::Nullable(UInt64)); -- { serverError BAD_ARGUMENTS } From ed6bfd66fee9aa4c2e06eaf25ca81d02e09f075e Mon Sep 17 00:00:00 2001 From: Ilya Yatsishin <2159081+qoega@users.noreply.github.com> Date: Mon, 26 Jun 2023 17:28:59 +0200 Subject: [PATCH 121/522] Publish changes --- docker/packager/binary/build.sh | 4 ++++ docker/packager/packager | 1 + 2 files changed, 5 insertions(+) diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh index c0803c74147..08a9b07f3ce 100755 --- a/docker/packager/binary/build.sh +++ b/docker/packager/binary/build.sh @@ -15,6 +15,10 @@ if [ "$EXTRACT_TOOLCHAIN_DARWIN" = "1" ]; then mkdir -p /build/cmake/toolchain/darwin-x86_64 tar xJf /MacOSX11.0.sdk.tar.xz -C /build/cmake/toolchain/darwin-x86_64 --strip-components=1 ln -sf darwin-x86_64 /build/cmake/toolchain/darwin-aarch64 + + if [ "$EXPORT_SOURCES_WITH_SUBMODULES" = "1" ]; then + tar -c /build --exclude-vcs-ignores --exclude-vcs --exclude '/build/build' --exclude '/build/build_docker' --exclude '/build/debian' --exclude '/build/.cache' --exclude '/build/docs' --exclude '/build/tests/integration' | pigz -9 > /output/source_sub.tar.gz + fi fi # Uncomment to debug ccache. Don't put ccache log in /output right away, or it diff --git a/docker/packager/packager b/docker/packager/packager index 1b3df858cd2..42dc52aa37f 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -168,6 +168,7 @@ def parse_env_variables( "-DCMAKE_TOOLCHAIN_FILE=/build/cmake/darwin/toolchain-x86_64.cmake" ) result.append("EXTRACT_TOOLCHAIN_DARWIN=1") + result.append("EXPORT_SOURCES_WITH_SUBMODULES=1") elif is_cross_darwin_arm: cc = compiler[: -len(DARWIN_ARM_SUFFIX)] cmake_flags.append("-DCMAKE_AR:FILEPATH=/cctools/bin/aarch64-apple-darwin-ar") From 5521bf3f3570d910d3123f8839f78f99f9292051 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Mon, 26 Jun 2023 20:19:34 +0000 Subject: [PATCH 122/522] Fix null constant case --- src/Functions/array/range.cpp | 16 +++++++++++++++- ....reference => 02797_range_nullable.reference} | 4 ++++ ...nge_nullable.sql => 02797_range_nullable.sql} | 5 ++++- 3 files changed, 23 insertions(+), 2 deletions(-) rename tests/queries/0_stateless/{02790_range_nullable.reference => 02797_range_nullable.reference} (70%) rename tests/queries/0_stateless/{02790_range_nullable.sql => 02797_range_nullable.sql} (83%) diff --git a/src/Functions/array/range.cpp b/src/Functions/array/range.cpp index b638bc3c5b7..8c524566110 100644 --- a/src/Functions/array/range.cpp +++ b/src/Functions/array/range.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -59,6 +60,14 @@ private: getName(), arguments.size()); } + for (size_t i = 0, size = arguments.size(); i < size; ++i) + { + if (arguments[i]->onlyNull()) + { + return makeNullable(std::make_shared()); + } + } + DataTypes arg_types; for (size_t i = 0, size = arguments.size(); i < size; ++i) { @@ -382,6 +391,12 @@ private: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { + NullPresence null_presence = getNullPresense(arguments); + if (null_presence.has_null_constant) + { + return result_type->createColumnConstWithDefaultValue(input_rows_count); + } + DataTypePtr elem_type = checkAndGetDataType(result_type.get())->getNestedType(); WhichDataType which(elem_type); @@ -400,7 +415,6 @@ private: } const auto & nullable_col = assert_cast(*col.column); const auto & null_map = nullable_col.getNullMapData(); - if (!memoryIsZero(null_map.data(), 0, null_map.size())) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "Illegal (null) value column {} of argument of function {}", col.column->getName(), getName()); diff --git a/tests/queries/0_stateless/02790_range_nullable.reference b/tests/queries/0_stateless/02797_range_nullable.reference similarity index 70% rename from tests/queries/0_stateless/02790_range_nullable.reference rename to tests/queries/0_stateless/02797_range_nullable.reference index 7a98702e98c..a082a71f4ec 100644 --- a/tests/queries/0_stateless/02790_range_nullable.reference +++ b/tests/queries/0_stateless/02797_range_nullable.reference @@ -1,3 +1,7 @@ +\N +\N +\N +\N [0] [0,2,4,6,8] [0,2,4,6,8] diff --git a/tests/queries/0_stateless/02790_range_nullable.sql b/tests/queries/0_stateless/02797_range_nullable.sql similarity index 83% rename from tests/queries/0_stateless/02790_range_nullable.sql rename to tests/queries/0_stateless/02797_range_nullable.sql index 16e16512fc5..2b0fe69b123 100644 --- a/tests/queries/0_stateless/02790_range_nullable.sql +++ b/tests/queries/0_stateless/02797_range_nullable.sql @@ -1,7 +1,10 @@ +SELECT range(null); +SELECT range(10, null); +SELECT range(10, 2, null); +select range('string', Null); SELECT range(toNullable(1)); SELECT range(0::Nullable(UInt64), 10::Nullable(UInt64), 2::Nullable(UInt64)); SELECT range(0::Nullable(Int64), 10::Nullable(Int64), 2::Nullable(Int64)); -SELECT range(null); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT range(Null::Nullable(UInt64), 10::Nullable(UInt64), 2::Nullable(UInt64)); -- { serverError BAD_ARGUMENTS } SELECT range(0::Nullable(UInt64), Null::Nullable(UInt64), 2::Nullable(UInt64)); -- { serverError BAD_ARGUMENTS } SELECT range(0::Nullable(UInt64), 10::Nullable(UInt64), Null::Nullable(UInt64)); -- { serverError BAD_ARGUMENTS } From dd3d2c9aeaa5798467521eaf2fc85f2332a07a6a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 27 Jun 2023 08:01:15 +0200 Subject: [PATCH 123/522] Fix syntax error --- tests/integration/test_attach_table_normalizer/test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/integration/test_attach_table_normalizer/test.py b/tests/integration/test_attach_table_normalizer/test.py index ba0068e9c59..49acefdcd17 100644 --- a/tests/integration/test_attach_table_normalizer/test.py +++ b/tests/integration/test_attach_table_normalizer/test.py @@ -4,7 +4,7 @@ from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) node = cluster.add_instance( - 'node', main_configs=["configs/config.xml"], with_zookeeper=True, stay_alive=True + "node", main_configs=["configs/config.xml"], with_zookeeper=True, stay_alive=True ) @@ -18,13 +18,13 @@ def started_cluster(): def replace_substring_to_substr(node): - node.exec_in_container(( + node.exec_in_container( [ "bash", "-c", "sed -i 's/substring/substr/g' /var/lib/clickhouse/metadata/default/file.sql", ], - user="root" + user="root", ) From 5a4a774db7e961133dd124c5d337a402cb2ee9ee Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Tue, 27 Jun 2023 06:06:56 +0000 Subject: [PATCH 124/522] Style fix --- src/Functions/array/range.cpp | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/src/Functions/array/range.cpp b/src/Functions/array/range.cpp index 8c524566110..47e90de2e2b 100644 --- a/src/Functions/array/range.cpp +++ b/src/Functions/array/range.cpp @@ -60,13 +60,8 @@ private: getName(), arguments.size()); } - for (size_t i = 0, size = arguments.size(); i < size; ++i) - { - if (arguments[i]->onlyNull()) - { - return makeNullable(std::make_shared()); - } - } + if (std::find_if (arguments.cbegin(), arguments.cend(), [](const auto & arg) { return arg->onlyNull(); }) != arguments.cend()) + return makeNullable(std::make_shared()); DataTypes arg_types; for (size_t i = 0, size = arguments.size(); i < size; ++i) @@ -393,9 +388,7 @@ private: { NullPresence null_presence = getNullPresense(arguments); if (null_presence.has_null_constant) - { return result_type->createColumnConstWithDefaultValue(input_rows_count); - } DataTypePtr elem_type = checkAndGetDataType(result_type.get())->getNestedType(); WhichDataType which(elem_type); @@ -410,15 +403,11 @@ private: auto throwIfNullValue = [&](const ColumnWithTypeAndName & col) { if (!col.type->isNullable()) - { return; - } const auto & nullable_col = assert_cast(*col.column); const auto & null_map = nullable_col.getNullMapData(); if (!memoryIsZero(null_map.data(), 0, null_map.size())) - { throw Exception(ErrorCodes::BAD_ARGUMENTS, "Illegal (null) value column {} of argument of function {}", col.column->getName(), getName()); - } }; ColumnPtr res; @@ -429,7 +418,7 @@ private: if (arguments[0].type->isNullable()) { const auto * nullable = checkAndGetColumn(*arguments[0].column); - col= nullable->getNestedColumnPtr().get(); + col = nullable->getNestedColumnPtr().get(); } if (!((res = executeInternal(col)) || (res = executeInternal(col)) || (res = executeInternal(col)) From 8ad1d4b94f2d53c28f974be7ef8f0f4002eb4245 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Tue, 27 Jun 2023 06:31:30 +0000 Subject: [PATCH 125/522] Add comment --- src/Functions/formatDateTime.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Functions/formatDateTime.cpp b/src/Functions/formatDateTime.cpp index bdd694c7b94..50772866648 100644 --- a/src/Functions/formatDateTime.cpp +++ b/src/Functions/formatDateTime.cpp @@ -945,6 +945,7 @@ public: { auto c = DecimalUtils::split(vec[i], scale); + // -1.123 splits to -1 / 0.123 if (vec[i].value < 0 && c.fractional) { using F = typename DataType::FieldType; From c475e706d34761d7b5ff94b5f186e6f0e5479436 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 27 Jun 2023 13:20:00 +0000 Subject: [PATCH 126/522] Fix optimization to move functions before sorting. --- .../Optimizations/liftUpFunctions.cpp | 20 +++ ..._and_columns_with_same_names_bug.reference | 0 ...orting_and_columns_with_same_names_bug.sql | 133 ++++++++++++++++++ 3 files changed, 153 insertions(+) create mode 100644 tests/queries/0_stateless/02789_functions_after_sorting_and_columns_with_same_names_bug.reference create mode 100644 tests/queries/0_stateless/02789_functions_after_sorting_and_columns_with_same_names_bug.sql diff --git a/src/Processors/QueryPlan/Optimizations/liftUpFunctions.cpp b/src/Processors/QueryPlan/Optimizations/liftUpFunctions.cpp index c3b03a5385f..b2c3f3b4a6d 100644 --- a/src/Processors/QueryPlan/Optimizations/liftUpFunctions.cpp +++ b/src/Processors/QueryPlan/Optimizations/liftUpFunctions.cpp @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB { @@ -28,6 +29,22 @@ const DB::DataStream & getChildOutputStream(DB::QueryPlan::Node & node) namespace DB::QueryPlanOptimizations { +/// This is a check that output columns with the same name have the same types. +/// This is ok to have such a situation in DAG, but not for Block. +/// TODO: we should have a different data structure for headers. +static bool areOutputsAreConvertableToBlock(const ActionsDAG::NodeRawConstPtrs & outputs) +{ + std::unordered_map name_to_type; + for (const auto & output : outputs) + { + auto [it, inserted] = name_to_type.emplace(output->result_name, output->result_type.get()); + if (!inserted && !it->second->equals(*output->result_type)) + return false; + } + + return true; +} + size_t tryExecuteFunctionsAfterSorting(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes) { if (parent_node->children.size() != 1) @@ -57,6 +74,9 @@ size_t tryExecuteFunctionsAfterSorting(QueryPlan::Node * parent_node, QueryPlan: if (unneeded_for_sorting->trivial()) return 0; + if (!areOutputsAreConvertableToBlock(needed_for_sorting->getOutputs())) + return 0; + // Sorting (parent_node) -> Expression (child_node) auto & node_with_needed = nodes.emplace_back(); std::swap(node_with_needed.children, child_node->children); diff --git a/tests/queries/0_stateless/02789_functions_after_sorting_and_columns_with_same_names_bug.reference b/tests/queries/0_stateless/02789_functions_after_sorting_and_columns_with_same_names_bug.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02789_functions_after_sorting_and_columns_with_same_names_bug.sql b/tests/queries/0_stateless/02789_functions_after_sorting_and_columns_with_same_names_bug.sql new file mode 100644 index 00000000000..4a9ede36335 --- /dev/null +++ b/tests/queries/0_stateless/02789_functions_after_sorting_and_columns_with_same_names_bug.sql @@ -0,0 +1,133 @@ +drop table if exists test; +drop table if exists test1; + +CREATE TABLE test +( + `pt` String, + `count_distinct_exposure_uv` AggregateFunction(uniqHLL12, Int64) +) +ENGINE = AggregatingMergeTree +ORDER BY pt; + +SELECT * +FROM +( + SELECT m0.pt AS pt + ,m0.`exposure_uv` AS exposure_uv + ,round(m2.exposure_uv,4) AS exposure_uv_hb_last_value + ,if(m2.exposure_uv IS NULL OR m2.exposure_uv = 0,NULL,round((m0.exposure_uv - m2.exposure_uv) * 1.0 / m2.exposure_uv,4)) AS exposure_uv_hb_diff_percent + ,round(m1.exposure_uv,4) AS exposure_uv_tb_last_value + ,if(m1.exposure_uv IS NULL OR m1.exposure_uv = 0,NULL,round((m0.exposure_uv - m1.exposure_uv) * 1.0 / m1.exposure_uv,4)) AS exposure_uv_tb_diff_percent + FROM + ( + SELECT m0.pt AS pt + ,`exposure_uv` AS `exposure_uv` + FROM + ( + SELECT pt AS pt + ,CASE WHEN COUNT(`exposure_uv`) > 0 THEN AVG(`exposure_uv`) ELSE 0 END AS `exposure_uv` + FROM + ( + SELECT pt AS pt + ,uniqHLL12Merge(count_distinct_exposure_uv) AS `exposure_uv` + FROM test + GROUP BY pt + ) m + GROUP BY pt + ) m0 + ) m0 + LEFT JOIN + ( + SELECT m0.pt AS pt + ,`exposure_uv` AS `exposure_uv` + FROM + ( + SELECT formatDateTime(addYears(parseDateTimeBestEffort(pt),1),'%Y%m%d') AS pt + ,CASE WHEN COUNT(`exposure_uv`) > 0 THEN AVG(`exposure_uv`) ELSE 0 END AS `exposure_uv` + FROM + ( + SELECT pt AS pt + ,uniqHLL12Merge(count_distinct_exposure_uv) AS `exposure_uv` + FROM test + GROUP BY pt + ) m + GROUP BY pt + ) m0 + ) m1 + ON m0.pt = m1.pt + LEFT JOIN + ( + SELECT m0.pt AS pt + ,`exposure_uv` AS `exposure_uv` + FROM + ( + SELECT formatDateTime(addDays(toDate(parseDateTimeBestEffort(pt)),1),'%Y%m%d') AS pt + ,CASE WHEN COUNT(`exposure_uv`) > 0 THEN AVG(`exposure_uv`) ELSE 0 END AS `exposure_uv` + FROM + ( + SELECT pt AS pt + ,uniqHLL12Merge(count_distinct_exposure_uv) AS `exposure_uv` + FROM test + GROUP BY pt + ) m + GROUP BY pt + ) m0 + ) m2 + ON m0.pt = m2.pt +) c0 +ORDER BY pt ASC, exposure_uv DESC +settings join_use_nulls = 1; + +CREATE TABLE test1 +( + `pt` String, + `exposure_uv` Float64 +) +ENGINE = Memory; + +SELECT * +FROM +( + SELECT m0.pt + ,m0.exposure_uv AS exposure_uv + ,round(m2.exposure_uv,4) + FROM + ( + SELECT pt + ,exposure_uv + FROM test1 + ) m0 + LEFT JOIN + ( + SELECT pt + ,exposure_uv + FROM test1 + ) m1 + ON m0.pt = m1.pt + LEFT JOIN + ( + SELECT pt + ,exposure_uv + FROM test1 + ) m2 + ON m0.pt = m2.pt +) c0 +ORDER BY exposure_uv +settings join_use_nulls = 1; + +SELECT + pt AS pt, + exposure_uv AS exposure_uv +FROM +( + SELECT + pt + FROM test1 +) AS m0 +FULL OUTER JOIN +( + SELECT + pt, + exposure_uv + FROM test1 +) AS m1 ON m0.pt = m1.pt; From e34597e43d09c3c164fb516a544f82d347be6afa Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Tue, 27 Jun 2023 18:36:32 +0300 Subject: [PATCH 127/522] Fix tabulation --- src/Functions/dateDiff.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/dateDiff.cpp b/src/Functions/dateDiff.cpp index 253ed703bb9..6bfbbb7c735 100644 --- a/src/Functions/dateDiff.cpp +++ b/src/Functions/dateDiff.cpp @@ -399,7 +399,7 @@ public: impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); else if (unit == "millisecond" || unit == "ms") impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); - else if (unit == "microsecond" || unit == "us" || unit == "u") + else if (unit == "microsecond" || unit == "us" || unit == "u") impl.template dispatchForColumns>(x, y, timezone_x, timezone_y, res->getData()); else throw Exception(ErrorCodes::BAD_ARGUMENTS, From f3f604ace491e35b251d8be928c7110d83978d9f Mon Sep 17 00:00:00 2001 From: yariks5s Date: Tue, 27 Jun 2023 16:45:34 +0000 Subject: [PATCH 128/522] added table with pk size --- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 17 +++++++++++++++++ src/Storages/MergeTree/IMergeTreeDataPart.h | 1 + src/Storages/System/StorageSystemParts.cpp | 3 +++ 3 files changed, 21 insertions(+) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index c9930e61e98..55db22d6105 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -1,5 +1,6 @@ #include "IMergeTreeDataPart.h" #include "Storages/MergeTree/IDataPartStorage.h" +#include "base/types.h" #include #include @@ -1800,6 +1801,22 @@ MutableDataPartStoragePtr IMergeTreeDataPart::makeCloneOnDisk(const DiskPtr & di return getDataPartStorage().clonePart(path_to_clone, getDataPartStorage().getPartDirectory(), disk, storage.log); } +UInt64 IMergeTreeDataPart::getIndexSizeFromFile() const +{ + auto metadata_snapshot = storage.getInMemoryMetadataPtr(); + if (parent_part) + metadata_snapshot = metadata_snapshot->projections.get(name).metadata; + const auto & pk = metadata_snapshot->getPrimaryKey(); + if (!pk.column_names.empty()) + { + String file = "primary" + getIndexExtension(false); + if (checksums.files.contains("primary" + getIndexExtension(true))) + file = "primary" + getIndexExtension(true); + return getFileSizeOrZero(file); + } + return 0; +} + void IMergeTreeDataPart::checkConsistencyBase() const { auto metadata_snapshot = storage.getInMemoryMetadataPtr(); diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index a36634d2cf9..b3c70c99d2e 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -323,6 +323,7 @@ public: UInt64 getIndexSizeInBytes() const; UInt64 getIndexSizeInAllocatedBytes() const; UInt64 getMarksCount() const; + UInt64 getIndexSizeFromFile() const; UInt64 getBytesOnDisk() const { return bytes_on_disk; } void setBytesOnDisk(UInt64 bytes_on_disk_) { bytes_on_disk = bytes_on_disk_; } diff --git a/src/Storages/System/StorageSystemParts.cpp b/src/Storages/System/StorageSystemParts.cpp index 86ecb336b51..e1e8ba1aa00 100644 --- a/src/Storages/System/StorageSystemParts.cpp +++ b/src/Storages/System/StorageSystemParts.cpp @@ -57,6 +57,7 @@ StorageSystemParts::StorageSystemParts(const StorageID & table_id_) {"bytes_on_disk", std::make_shared()}, {"data_compressed_bytes", std::make_shared()}, {"data_uncompressed_bytes", std::make_shared()}, + {"primary_key_size", std::make_shared()}, {"marks_bytes", std::make_shared()}, {"secondary_indices_compressed_bytes", std::make_shared()}, {"secondary_indices_uncompressed_bytes", std::make_shared()}, @@ -168,6 +169,8 @@ void StorageSystemParts::processNextStorage( columns[res_index++]->insert(columns_size.data_compressed); if (columns_mask[src_index++]) columns[res_index++]->insert(columns_size.data_uncompressed); + if (columns_mask[src_index++]) + columns[res_index++]->insert(part->getIndexSizeFromFile()); if (columns_mask[src_index++]) columns[res_index++]->insert(columns_size.marks); if (columns_mask[src_index++]) From 40f721ae4f290c76d492260d740c1eb37df20e4c Mon Sep 17 00:00:00 2001 From: serxa Date: Tue, 27 Jun 2023 17:14:33 +0000 Subject: [PATCH 129/522] fix possible race on shutdown wait --- programs/server/Server.cpp | 4 ++-- src/Server/waitServersToFinish.cpp | 11 +++++++---- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index d2d8a0d07fb..41df7a119d1 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1523,7 +1523,7 @@ try LOG_INFO(log, "Closed all listening sockets."); if (current_connections > 0) - current_connections = waitServersToFinish(servers_to_start_before_tables, config().getInt("shutdown_wait_unfinished", 5)); + current_connections = waitServersToFinish(servers_to_start_before_tables, servers_lock, config().getInt("shutdown_wait_unfinished", 5)); if (current_connections) LOG_INFO(log, "Closed connections to servers for tables. But {} remain. Probably some tables of other users cannot finish their connections after context shutdown.", current_connections); @@ -1827,7 +1827,7 @@ try global_context->getProcessList().killAllQueries(); if (current_connections) - current_connections = waitServersToFinish(servers, config().getInt("shutdown_wait_unfinished", 5)); + current_connections = waitServersToFinish(servers, servers_lock, config().getInt("shutdown_wait_unfinished", 5)); if (current_connections) LOG_WARNING(log, "Closed connections. But {} remain." diff --git a/src/Server/waitServersToFinish.cpp b/src/Server/waitServersToFinish.cpp index f2e36fae86c..3b07c082067 100644 --- a/src/Server/waitServersToFinish.cpp +++ b/src/Server/waitServersToFinish.cpp @@ -5,7 +5,7 @@ namespace DB { -size_t waitServersToFinish(std::vector & servers, size_t seconds_to_wait) +size_t waitServersToFinish(std::vector & servers, std::mutex & mutex, size_t seconds_to_wait) { const size_t sleep_max_ms = 1000 * seconds_to_wait; const size_t sleep_one_ms = 100; @@ -15,10 +15,13 @@ size_t waitServersToFinish(std::vector & servers, siz { current_connections = 0; - for (auto & server : servers) { - server.stop(); - current_connections += server.currentConnections(); + std::scoped_lock lock{mutex}; + for (auto & server : servers) + { + server.stop(); + current_connections += server.currentConnections(); + } } if (!current_connections) From c59ddf0c668c0a345c88df98b249b79cd58a8fcb Mon Sep 17 00:00:00 2001 From: yariks5s Date: Tue, 27 Jun 2023 17:27:28 +0000 Subject: [PATCH 130/522] Resolved style check --- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 55db22d6105..eb35fe178c4 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -1801,7 +1801,7 @@ MutableDataPartStoragePtr IMergeTreeDataPart::makeCloneOnDisk(const DiskPtr & di return getDataPartStorage().clonePart(path_to_clone, getDataPartStorage().getPartDirectory(), disk, storage.log); } -UInt64 IMergeTreeDataPart::getIndexSizeFromFile() const +UInt64 IMergeTreeDataPart::getIndexSizeFromFile() const { auto metadata_snapshot = storage.getInMemoryMetadataPtr(); if (parent_part) From a013ec1abaccea5599b17e69d7a923addff76e4c Mon Sep 17 00:00:00 2001 From: yariks5s Date: Tue, 27 Jun 2023 17:42:19 +0000 Subject: [PATCH 131/522] added field to tests --- .../queries/0_stateless/02117_show_create_table_system.reference | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index 09cc62dac00..f2c85a4d0ba 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -466,6 +466,7 @@ CREATE TABLE system.parts `bytes_on_disk` UInt64, `data_compressed_bytes` UInt64, `data_uncompressed_bytes` UInt64, + `primary_key_size` UInt64, `marks_bytes` UInt64, `secondary_indices_compressed_bytes` UInt64, `secondary_indices_uncompressed_bytes` UInt64, From 7583da9b3806850a3ed99e7b93f253c17ddb5aa8 Mon Sep 17 00:00:00 2001 From: serxa Date: Tue, 27 Jun 2023 18:48:54 +0000 Subject: [PATCH 132/522] fix --- src/Server/waitServersToFinish.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Server/waitServersToFinish.h b/src/Server/waitServersToFinish.h index 5e90790cefb..b6daa025964 100644 --- a/src/Server/waitServersToFinish.h +++ b/src/Server/waitServersToFinish.h @@ -5,6 +5,6 @@ namespace DB { class ProtocolServerAdapter; -size_t waitServersToFinish(std::vector & servers, size_t seconds_to_wait); +size_t waitServersToFinish(std::vector & servers, std::mutex & mutex, size_t seconds_to_wait); } From 6515d52f6018570560eeb56d93d05ca1b530a892 Mon Sep 17 00:00:00 2001 From: serxa Date: Tue, 27 Jun 2023 18:50:40 +0000 Subject: [PATCH 133/522] fix2 --- programs/keeper/Keeper.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index a1825665188..43c3489bbda 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -465,7 +465,7 @@ try LOG_INFO(log, "Closed all listening sockets."); if (current_connections > 0) - current_connections = waitServersToFinish(*servers, config().getInt("shutdown_wait_unfinished", 5)); + current_connections = waitServersToFinish(*servers, servers_lock, config().getInt("shutdown_wait_unfinished", 5)); if (current_connections) LOG_INFO(log, "Closed connections to Keeper. But {} remain. Probably some users cannot finish their connections after context shutdown.", current_connections); From 13854e5259ee446c7b76be2db619bd22fd6491bb Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Tue, 27 Jun 2023 17:23:51 +0200 Subject: [PATCH 134/522] impl --- src/Processors/QueryPlan/PartsSplitter.cpp | 6 +- ...nal_block_structure_mismatch_bug.reference | 9 +++ ...791_final_block_structure_mismatch_bug.sql | 66 +++++++++++++++++++ 3 files changed, 78 insertions(+), 3 deletions(-) create mode 100644 tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.reference create mode 100644 tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.sql diff --git a/src/Processors/QueryPlan/PartsSplitter.cpp b/src/Processors/QueryPlan/PartsSplitter.cpp index 9796e696f6c..e1fc3facf04 100644 --- a/src/Processors/QueryPlan/PartsSplitter.cpp +++ b/src/Processors/QueryPlan/PartsSplitter.cpp @@ -271,6 +271,9 @@ Pipes buildPipesForReadingByPKRanges( for (size_t i = 0; i < result_layers.size(); ++i) { pipes[i] = reading_step_getter(std::move(result_layers[i])); + auto pk_expression = std::make_shared(primary_key.expression->getActionsDAG().clone()); + pipes[i].addSimpleTransform([pk_expression](const Block & header) + { return std::make_shared(header, pk_expression); }); auto & filter_function = filters[i]; if (!filter_function) continue; @@ -279,9 +282,6 @@ Pipes buildPipesForReadingByPKRanges( ExpressionActionsPtr expression_actions = std::make_shared(std::move(actions)); auto description = fmt::format( "filter values in [{}, {})", i ? ::toString(borders[i - 1]) : "-inf", i < borders.size() ? ::toString(borders[i]) : "+inf"); - auto pk_expression = std::make_shared(primary_key.expression->getActionsDAG().clone()); - pipes[i].addSimpleTransform([pk_expression](const Block & header) - { return std::make_shared(header, pk_expression); }); pipes[i].addSimpleTransform( [&](const Block & header) { diff --git a/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.reference b/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.reference new file mode 100644 index 00000000000..a8401b1cae8 --- /dev/null +++ b/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.reference @@ -0,0 +1,9 @@ +1 +2 +3 +1 +2 +3 +1 +2 +3 diff --git a/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.sql b/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.sql new file mode 100644 index 00000000000..4c7ac50b8d0 --- /dev/null +++ b/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.sql @@ -0,0 +1,66 @@ +SET do_not_merge_across_partitions_select_final=1; + +CREATE TABLE test_block_mismatch +( + a UInt32, + b DateTime +) +ENGINE = ReplacingMergeTree +PARTITION BY toYYYYMM(b) +ORDER BY (toDate(b), a); + +INSERT INTO test_block_mismatch VALUES (1, toDateTime('2023-01-01 12:12:12')); +INSERT INTO test_block_mismatch VALUES (1, toDateTime('2023-01-01 12:12:12')); +SELECT count(*) FROM test_block_mismatch FINAL; + +INSERT INTO test_block_mismatch VALUES (1, toDateTime('2023-02-02 12:12:12')); +INSERT INTO test_block_mismatch VALUES (1, toDateTime('2023-02-02 12:12:12')); +SELECT count(*) FROM test_block_mismatch FINAL; + +INSERT INTO test_block_mismatch VALUES (2, toDateTime('2023-01-01 12:12:12')); +INSERT INTO test_block_mismatch VALUES (2, toDateTime('2023-01-01 12:12:12')); +SELECT count(*) FROM test_block_mismatch FINAL; + +CREATE TABLE test_block_mismatch_sk1 +( + a UInt32, + b DateTime +) +ENGINE = ReplacingMergeTree +PARTITION BY toYYYYMM(b) +PRIMARY KEY (toDate(b)) +ORDER BY (toDate(b), a); + +INSERT INTO test_block_mismatch_sk1 VALUES (1, toDateTime('2023-01-01 12:12:12')); +INSERT INTO test_block_mismatch_sk1 VALUES (1, toDateTime('2023-01-01 12:12:12')); +SELECT count(*) FROM test_block_mismatch_sk1 FINAL; + +INSERT INTO test_block_mismatch_sk1 VALUES (1, toDateTime('2023-02-02 12:12:12')); +INSERT INTO test_block_mismatch_sk1 VALUES (1, toDateTime('2023-02-02 12:12:12')); +SELECT count(*) FROM test_block_mismatch_sk1 FINAL; + +INSERT INTO test_block_mismatch_sk1 VALUES (2, toDateTime('2023-01-01 12:12:12')); +INSERT INTO test_block_mismatch_sk1 VALUES (2, toDateTime('2023-01-01 12:12:12')); +SELECT count(*) FROM test_block_mismatch_sk1 FINAL; + +CREATE TABLE test_block_mismatch_sk2 +( + a UInt32, + b DateTime +) +ENGINE = ReplacingMergeTree +PARTITION BY toYYYYMM(b) +PRIMARY KEY (a) +ORDER BY (a, toDate(b)); + +INSERT INTO test_block_mismatch_sk2 VALUES (1, toDateTime('2023-01-01 12:12:12')); +INSERT INTO test_block_mismatch_sk2 VALUES (1, toDateTime('2023-01-01 12:12:12')); +SELECT count(*) FROM test_block_mismatch_sk2 FINAL; + +INSERT INTO test_block_mismatch_sk2 VALUES (1, toDateTime('2023-02-02 12:12:12')); +INSERT INTO test_block_mismatch_sk2 VALUES (1, toDateTime('2023-02-02 12:12:12')); +SELECT count(*) FROM test_block_mismatch_sk2 FINAL; + +INSERT INTO test_block_mismatch_sk2 VALUES (2, toDateTime('2023-01-01 12:12:12')); +INSERT INTO test_block_mismatch_sk2 VALUES (2, toDateTime('2023-01-01 12:12:12')); +SELECT count(*) FROM test_block_mismatch_sk2 FINAL; From e2f20ea0e2b012796e05f1e734152609b34167e7 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Wed, 28 Jun 2023 00:30:51 +0200 Subject: [PATCH 135/522] fix --- src/Processors/QueryPlan/PartsSplitter.cpp | 6 ++--- src/Processors/QueryPlan/PartsSplitter.h | 1 + .../QueryPlan/ReadFromMergeTree.cpp | 22 +++++++++--------- ...nal_block_structure_mismatch_bug.reference | 1 + ...791_final_block_structure_mismatch_bug.sql | 23 +++++++++++++++++++ 5 files changed, 39 insertions(+), 14 deletions(-) diff --git a/src/Processors/QueryPlan/PartsSplitter.cpp b/src/Processors/QueryPlan/PartsSplitter.cpp index e1fc3facf04..533fbde1e13 100644 --- a/src/Processors/QueryPlan/PartsSplitter.cpp +++ b/src/Processors/QueryPlan/PartsSplitter.cpp @@ -256,6 +256,7 @@ namespace ErrorCodes Pipes buildPipesForReadingByPKRanges( const KeyDescription & primary_key, + ExpressionActionsPtr sorting_expr, RangesInDataParts parts, size_t max_layers, ContextPtr context, @@ -271,9 +272,8 @@ Pipes buildPipesForReadingByPKRanges( for (size_t i = 0; i < result_layers.size(); ++i) { pipes[i] = reading_step_getter(std::move(result_layers[i])); - auto pk_expression = std::make_shared(primary_key.expression->getActionsDAG().clone()); - pipes[i].addSimpleTransform([pk_expression](const Block & header) - { return std::make_shared(header, pk_expression); }); + pipes[i].addSimpleTransform([sorting_expr](const Block & header) + { return std::make_shared(header, sorting_expr); }); auto & filter_function = filters[i]; if (!filter_function) continue; diff --git a/src/Processors/QueryPlan/PartsSplitter.h b/src/Processors/QueryPlan/PartsSplitter.h index 56bca688c2d..4ba655a6f6d 100644 --- a/src/Processors/QueryPlan/PartsSplitter.h +++ b/src/Processors/QueryPlan/PartsSplitter.h @@ -18,6 +18,7 @@ using ReadingInOrderStepGetter = std::function; /// Will try to produce exactly max_layer pipes but may return less if data is distributed in not a very parallelizable way. Pipes buildPipesForReadingByPKRanges( const KeyDescription & primary_key, + ExpressionActionsPtr sorting_expr, RangesInDataParts parts, size_t max_layers, ContextPtr context, diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 3c38ecbbd3f..fac8ebd6e1f 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -979,6 +979,8 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal( RangesInDataParts lonely_parts; size_t sum_marks_in_lonely_parts = 0; + auto sorting_expr = std::make_shared(metadata_for_reading->getSortingKey().expression->getActionsDAG().clone()); + for (size_t range_index = 0; range_index < parts_to_merge_ranges.size() - 1; ++range_index) { Pipes pipes; @@ -1022,12 +1024,20 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal( info.use_uncompressed_cache); }; pipes = buildPipesForReadingByPKRanges( - metadata_for_reading->getPrimaryKey(), std::move(new_parts), num_streams, context, std::move(reading_step_getter)); + metadata_for_reading->getPrimaryKey(), + sorting_expr, + std::move(new_parts), + num_streams, + context, + std::move(reading_step_getter)); } else { pipes.emplace_back(read( std::move(new_parts), column_names, ReadFromMergeTree::ReadType::InOrder, num_streams, 0, info.use_uncompressed_cache)); + + pipes.back().addSimpleTransform([sorting_expr](const Block & header) + { return std::make_shared(header, sorting_expr); }); } /// Drop temporary columns, added by 'sorting_key_expr' @@ -1035,13 +1045,6 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal( out_projection = createProjection(pipes.front().getHeader()); } - auto sorting_expr = std::make_shared( - metadata_for_reading->getSortingKey().expression->getActionsDAG().clone()); - - for (auto & pipe : pipes) - pipe.addSimpleTransform([sorting_expr](const Block & header) - { return std::make_shared(header, sorting_expr); }); - /// If do_not_merge_across_partitions_select_final is true and there is only one part in partition /// with level > 0 then we won't postprocess this part if (settings.do_not_merge_across_partitions_select_final && @@ -1098,9 +1101,6 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal( if (!out_projection) out_projection = createProjection(pipe.getHeader()); - auto sorting_expr = std::make_shared( - metadata_for_reading->getSortingKey().expression->getActionsDAG().clone()); - pipe.addSimpleTransform([sorting_expr](const Block & header) { return std::make_shared(header, sorting_expr); diff --git a/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.reference b/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.reference index a8401b1cae8..ca810c46a2d 100644 --- a/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.reference +++ b/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.reference @@ -7,3 +7,4 @@ 1 2 3 +2 diff --git a/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.sql b/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.sql index 4c7ac50b8d0..a82e43d81f4 100644 --- a/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.sql +++ b/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.sql @@ -64,3 +64,26 @@ SELECT count(*) FROM test_block_mismatch_sk2 FINAL; INSERT INTO test_block_mismatch_sk2 VALUES (2, toDateTime('2023-01-01 12:12:12')); INSERT INTO test_block_mismatch_sk2 VALUES (2, toDateTime('2023-01-01 12:12:12')); SELECT count(*) FROM test_block_mismatch_sk2 FINAL; + +CREATE TABLE test_block_mismatch_magic_row_dist +( + a UInt32, + b DateTime +) +ENGINE = ReplacingMergeTree +PARTITION BY toYYYYMM(b) +ORDER BY (toDate(b), a); + +INSERT INTO test_block_mismatch_magic_row_dist VALUES (1, toDateTime('2023-02-02 12:12:12')); +INSERT INTO test_block_mismatch_magic_row_dist VALUES (1, toDateTime('2023-02-02 12:12:12')); +INSERT INTO test_block_mismatch_magic_row_dist VALUES (1, toDateTime('2023-02-02 12:12:12')); +INSERT INTO test_block_mismatch_magic_row_dist VALUES (1, toDateTime('2023-02-02 12:12:12')); + +optimize table test_block_mismatch_magic_row_dist final; + +system stop merges test_block_mismatch_magic_row_dist; + +INSERT INTO test_block_mismatch_magic_row_dist VALUES (1, toDateTime('2023-01-01 12:12:12')); +INSERT INTO test_block_mismatch_magic_row_dist VALUES (1, toDateTime('2023-01-01 12:12:12')); + +SELECT count(*) FROM test_block_mismatch_magic_row_dist FINAL; From 521137c55d18f956c86cf71b1ca7bca2601f7d70 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 28 Jun 2023 06:28:21 +0300 Subject: [PATCH 136/522] Update test.py --- tests/integration/test_attach_table_normalizer/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_attach_table_normalizer/test.py b/tests/integration/test_attach_table_normalizer/test.py index 49acefdcd17..10b400494ab 100644 --- a/tests/integration/test_attach_table_normalizer/test.py +++ b/tests/integration/test_attach_table_normalizer/test.py @@ -4,7 +4,7 @@ from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) node = cluster.add_instance( - "node", main_configs=["configs/config.xml"], with_zookeeper=True, stay_alive=True + "node", stay_alive=True ) From 5df6f3d6e28483a029f3a8859c8bd09fdab008a0 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 28 Jun 2023 03:40:09 +0000 Subject: [PATCH 137/522] Automatic style fix --- tests/integration/test_attach_table_normalizer/test.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/integration/test_attach_table_normalizer/test.py b/tests/integration/test_attach_table_normalizer/test.py index 10b400494ab..79093bf4014 100644 --- a/tests/integration/test_attach_table_normalizer/test.py +++ b/tests/integration/test_attach_table_normalizer/test.py @@ -3,9 +3,7 @@ import pytest from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) -node = cluster.add_instance( - "node", stay_alive=True -) +node = cluster.add_instance("node", stay_alive=True) @pytest.fixture(scope="module") From 68ac4d8cc934d4e9483b5257e9ffbdb84b92c709 Mon Sep 17 00:00:00 2001 From: xuelei Date: Wed, 28 Jun 2023 16:31:57 +0800 Subject: [PATCH 138/522] fix storage policy prompt display error --- src/Disks/StoragePolicy.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Disks/StoragePolicy.cpp b/src/Disks/StoragePolicy.cpp index f4be8b8fe86..02789132e55 100644 --- a/src/Disks/StoragePolicy.cpp +++ b/src/Disks/StoragePolicy.cpp @@ -302,7 +302,7 @@ void StoragePolicy::checkCompatibleWith(const StoragePolicyPtr & new_storage_pol for (const auto & volume : getVolumes()) { if (!new_volume_names.contains(volume->getName())) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "New storage policy {} shall contain volumes of old one", backQuote(name)); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "New storage policy {} shall contain volumes of old one", backQuote(new_storage_policy->getName())); std::unordered_set new_disk_names; for (const auto & disk : new_storage_policy->getVolumeByName(volume->getName())->getDisks()) @@ -310,7 +310,7 @@ void StoragePolicy::checkCompatibleWith(const StoragePolicyPtr & new_storage_pol for (const auto & disk : volume->getDisks()) if (!new_disk_names.contains(disk->getName())) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "New storage policy {} shall contain disks of old one", backQuote(name)); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "New storage policy {} shall contain disks of old one", backQuote(new_storage_policy->getName())); } } From 7e6d606b1c6b5277b1420a509cf841d1c1120ffc Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 28 Jun 2023 10:41:39 +0200 Subject: [PATCH 139/522] Remove bad code and fix a bug --- src/Common/QueryProfiler.cpp | 9 +++ src/Common/StringSearcher.h | 89 +---------------------- src/Common/Volnitsky.h | 3 - src/Functions/HasTokenImpl.h | 45 ++++++++---- src/Functions/hasToken.cpp | 5 +- src/Functions/hasTokenCaseInsensitive.cpp | 5 +- 6 files changed, 48 insertions(+), 108 deletions(-) diff --git a/src/Common/QueryProfiler.cpp b/src/Common/QueryProfiler.cpp index 085c8fb8af4..313d4b77739 100644 --- a/src/Common/QueryProfiler.cpp +++ b/src/Common/QueryProfiler.cpp @@ -120,6 +120,15 @@ void Timer::createIfNecessary(UInt64 thread_id, int clock_type, int pause_signal throw Exception(ErrorCodes::CANNOT_CREATE_TIMER, "Failed to create thread timer. The function " "'timer_create' returned non-zero but didn't set errno. This is bug in your OS."); + /// For example, it cannot be created if the server is run under QEMU: + /// "Failed to create thread timer, errno: 11, strerror: Resource temporarily unavailable." + + /// You could accidentally run the server under QEMU without being aware, + /// if you use Docker image for a different architecture, + /// and you have the "binfmt-misc" kernel module, and "qemu-user" tools. + + /// Also, it cannot be created if the server has too many threads. + throwFromErrno("Failed to create thread timer", ErrorCodes::CANNOT_CREATE_TIMER); } timer_id.emplace(local_timer_id); diff --git a/src/Common/StringSearcher.h b/src/Common/StringSearcher.h index 3ed192d05f3..b3065354f65 100644 --- a/src/Common/StringSearcher.h +++ b/src/Common/StringSearcher.h @@ -793,88 +793,6 @@ public: } }; - -// Searches for needle surrounded by token-separators. -// Separators are anything inside ASCII (0-128) and not alphanum. -// Any value outside of basic ASCII (>=128) is considered a non-separator symbol, hence UTF-8 strings -// should work just fine. But any Unicode whitespace is not considered a token separtor. -template -class TokenSearcher : public StringSearcherBase -{ - StringSearcher searcher; - size_t needle_size; - -public: - - template - requires (sizeof(CharT) == 1) - static bool isValidNeedle(const CharT * needle_, size_t needle_size_) - { - return std::none_of(needle_, needle_ + needle_size_, isTokenSeparator); - } - - template - requires (sizeof(CharT) == 1) - TokenSearcher(const CharT * needle_, size_t needle_size_) - : searcher(needle_, needle_size_) - , needle_size(needle_size_) - { - /// The caller is responsible for calling isValidNeedle() - chassert(isValidNeedle(needle_, needle_size_)); - } - - template - requires (sizeof(CharT) == 1) - ALWAYS_INLINE bool compare(const CharT * haystack, const CharT * haystack_end, const CharT * pos) const - { - // use searcher only if pos is in the beginning of token and pos + searcher.needle_size is end of token. - if (isToken(haystack, haystack_end, pos)) - return searcher.compare(haystack, haystack_end, pos); - - return false; - } - - template - requires (sizeof(CharT) == 1) - const CharT * search(const CharT * haystack, const CharT * const haystack_end) const - { - // use searcher.search(), then verify that returned value is a token - // if it is not, skip it and re-run - - const auto * pos = haystack; - while (pos < haystack_end) - { - pos = searcher.search(pos, haystack_end); - if (pos == haystack_end || isToken(haystack, haystack_end, pos)) - return pos; - - // assuming that heendle does not contain any token separators. - pos += needle_size; - } - return haystack_end; - } - - template - requires (sizeof(CharT) == 1) - const CharT * search(const CharT * haystack, size_t haystack_size) const - { - return search(haystack, haystack + haystack_size); - } - - template - requires (sizeof(CharT) == 1) - ALWAYS_INLINE bool isToken(const CharT * haystack, const CharT * const haystack_end, const CharT* p) const - { - return (p == haystack || isTokenSeparator(*(p - 1))) - && (p + needle_size >= haystack_end || isTokenSeparator(*(p + needle_size))); - } - - ALWAYS_INLINE static bool isTokenSeparator(const uint8_t c) - { - return !(isAlphaNumericASCII(c) || !isASCII(c)); - } -}; - } using ASCIICaseSensitiveStringSearcher = impl::StringSearcher; @@ -882,9 +800,6 @@ using ASCIICaseInsensitiveStringSearcher = impl::StringSearcher; using UTF8CaseSensitiveStringSearcher = impl::StringSearcher; using UTF8CaseInsensitiveStringSearcher = impl::StringSearcher; -using ASCIICaseSensitiveTokenSearcher = impl::TokenSearcher; -using ASCIICaseInsensitiveTokenSearcher = impl::TokenSearcher; - /// Use only with short haystacks where cheap initialization is required. template struct StdLibASCIIStringSearcher @@ -906,11 +821,11 @@ struct StdLibASCIIStringSearcher if constexpr (CaseInsensitive) return std::search( haystack_start, haystack_end, needle_start, needle_end, - [](char c1, char c2) {return std::toupper(c1) == std::toupper(c2);}); + [](char c1, char c2) { return std::toupper(c1) == std::toupper(c2); }); else return std::search( haystack_start, haystack_end, needle_start, needle_end, - [](char c1, char c2) {return c1 == c2;}); + [](char c1, char c2) { return c1 == c2; }); } template diff --git a/src/Common/Volnitsky.h b/src/Common/Volnitsky.h index 8f9aa23a38a..3360c197984 100644 --- a/src/Common/Volnitsky.h +++ b/src/Common/Volnitsky.h @@ -730,9 +730,6 @@ using VolnitskyUTF8 = VolnitskyBase; /// ignores non-ASCII bytes using VolnitskyCaseInsensitiveUTF8 = VolnitskyBase; -using VolnitskyCaseSensitiveToken = VolnitskyBase; -using VolnitskyCaseInsensitiveToken = VolnitskyBase; - using MultiVolnitsky = MultiVolnitskyBase; using MultiVolnitskyUTF8 = MultiVolnitskyBase; using MultiVolnitskyCaseInsensitive = MultiVolnitskyBase; diff --git a/src/Functions/HasTokenImpl.h b/src/Functions/HasTokenImpl.h index 8cacdfff99d..fdec5fcb0b7 100644 --- a/src/Functions/HasTokenImpl.h +++ b/src/Functions/HasTokenImpl.h @@ -17,7 +17,7 @@ namespace ErrorCodes /** Token search the string, means that needle must be surrounded by some separator chars, like whitespace or puctuation. */ -template +template struct HasTokenImpl { using ResultType = UInt8; @@ -46,7 +46,7 @@ struct HasTokenImpl const UInt8 * const end = haystack_data.data() + haystack_data.size(); const UInt8 * pos = begin; - if (!ASCIICaseSensitiveTokenSearcher::isValidNeedle(pattern.data(), pattern.size())) + if (!std::none_of(pattern.begin(), pattern.end(), isTokenSeparator)) { if (res_null) { @@ -58,7 +58,8 @@ struct HasTokenImpl throw Exception(ErrorCodes::BAD_ARGUMENTS, "Needle must not contain whitespace or separator characters"); } - TokenSearcher searcher(pattern.data(), pattern.size(), end - pos); + size_t pattern_size = pattern.size(); + Searcher searcher(pattern.data(), pattern_size, end - pos); if (res_null) std::ranges::fill(res_null->getData(), false); @@ -67,21 +68,31 @@ struct HasTokenImpl /// We will search for the next occurrence in all rows at once. while (pos < end && end != (pos = searcher.search(pos, end - pos))) { - /// Let's determine which index it refers to. - while (begin + haystack_offsets[i] <= pos) + /// The found substring is a token + if ((pos == begin || isTokenSeparator(pos[-1])) + && (pos + pattern_size == end || isTokenSeparator(pos[pattern_size]))) { - res[i] = negate; + /// Let's determine which index it refers to. + while (begin + haystack_offsets[i] <= pos) + { + res[i] = negate; + ++i; + } + + /// We check that the entry does not pass through the boundaries of strings. + if (pos + pattern.size() < begin + haystack_offsets[i]) + res[i] = !negate; + else + res[i] = negate; + + pos = begin + haystack_offsets[i]; ++i; } - - /// We check that the entry does not pass through the boundaries of strings. - if (pos + pattern.size() < begin + haystack_offsets[i]) - res[i] = !negate; else - res[i] = negate; - - pos = begin + haystack_offsets[i]; - ++i; + { + /// Not a token. Jump over it. + pos += pattern_size; + } } /// Tail, in which there can be no substring. @@ -113,6 +124,12 @@ struct HasTokenImpl { throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Function '{}' doesn't support FixedString haystack argument", name); } + +private: + static bool isTokenSeparator(UInt8 c) + { + return isASCII(c) && !isAlphaNumericASCII(c); + } }; } diff --git a/src/Functions/hasToken.cpp b/src/Functions/hasToken.cpp index b90750ea233..fa41abf2641 100644 --- a/src/Functions/hasToken.cpp +++ b/src/Functions/hasToken.cpp @@ -6,6 +6,7 @@ namespace DB { + struct NameHasToken { static constexpr auto name = "hasToken"; @@ -17,9 +18,9 @@ struct NameHasTokenOrNull }; using FunctionHasToken - = FunctionsStringSearch>; + = FunctionsStringSearch>; using FunctionHasTokenOrNull - = FunctionsStringSearch, ExecutionErrorPolicy::Null>; + = FunctionsStringSearch, ExecutionErrorPolicy::Null>; REGISTER_FUNCTION(HasToken) { diff --git a/src/Functions/hasTokenCaseInsensitive.cpp b/src/Functions/hasTokenCaseInsensitive.cpp index d7381e336b5..32675b9384d 100644 --- a/src/Functions/hasTokenCaseInsensitive.cpp +++ b/src/Functions/hasTokenCaseInsensitive.cpp @@ -6,6 +6,7 @@ namespace DB { + struct NameHasTokenCaseInsensitive { static constexpr auto name = "hasTokenCaseInsensitive"; @@ -17,9 +18,9 @@ struct NameHasTokenCaseInsensitiveOrNull }; using FunctionHasTokenCaseInsensitive - = FunctionsStringSearch>; + = FunctionsStringSearch>; using FunctionHasTokenCaseInsensitiveOrNull - = FunctionsStringSearch, ExecutionErrorPolicy::Null>; + = FunctionsStringSearch, ExecutionErrorPolicy::Null>; REGISTER_FUNCTION(HasTokenCaseInsensitive) { From e3189e29f7d087cba591c2dc3dc9ae0cb6ce9df0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 28 Jun 2023 11:56:11 +0200 Subject: [PATCH 140/522] Allow running in a stripped chroot --- src/Client/ClientBase.cpp | 4 +++- src/Client/ConnectionParameters.cpp | 10 +++++++++- src/Common/checkStackSize.cpp | 16 ++++++++++++++-- 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 34b3b1e228a..36714ac762d 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -2297,7 +2297,9 @@ void ClientBase::runInteractive() catch (const ErrnoException & e) { if (e.getErrno() != EEXIST) - throw; + { + std::cerr << getCurrentExceptionMessage(false) << '\n'; + } } } diff --git a/src/Client/ConnectionParameters.cpp b/src/Client/ConnectionParameters.cpp index c47d217d432..f6630a06939 100644 --- a/src/Client/ConnectionParameters.cpp +++ b/src/Client/ConnectionParameters.cpp @@ -60,7 +60,15 @@ ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfigurati quota_key = config.getString("quota_key", ""); /// By default compression is disabled if address looks like localhost. - compression = config.getBool("compression", !isLocalAddress(DNSResolver::instance().resolveHost(host))) + + /// Avoid DNS request if the host is "localhost". + /// If ClickHouse is run under QEMU-user with a binary for a different architecture, + /// and there are all listed startup dependency shared libraries available, but not the runtime dependencies of glibc, + /// the glibc cannot open "plugins" for DNS resolving, and the DNS resolution does not work. + /// At the same time, I want clickhouse-local to always work, regardless. + /// TODO: get rid of glibc, or replace getaddrinfo to c-ares. + + compression = config.getBool("compression", host != "localhost" && !isLocalAddress(DNSResolver::instance().resolveHost(host))) ? Protocol::Compression::Enable : Protocol::Compression::Disable; timeouts = ConnectionTimeouts( diff --git a/src/Common/checkStackSize.cpp b/src/Common/checkStackSize.cpp index 67d163938b4..8847d37df3a 100644 --- a/src/Common/checkStackSize.cpp +++ b/src/Common/checkStackSize.cpp @@ -27,7 +27,7 @@ static thread_local size_t max_stack_size = 0; * @param out_address - if not nullptr, here the address of the stack will be written. * @return stack size */ -size_t getStackSize(void ** out_address) +static size_t getStackSize(void ** out_address) { using namespace DB; @@ -54,7 +54,15 @@ size_t getStackSize(void ** out_address) throwFromErrno("Cannot pthread_attr_get_np", ErrorCodes::CANNOT_PTHREAD_ATTR); # else if (0 != pthread_getattr_np(pthread_self(), &attr)) - throwFromErrno("Cannot pthread_getattr_np", ErrorCodes::CANNOT_PTHREAD_ATTR); + { + if (errno == ENOENT) + { + /// Most likely procfs is not mounted. + return 0; + } + else + throwFromErrno("Cannot pthread_getattr_np", ErrorCodes::CANNOT_PTHREAD_ATTR); + } # endif SCOPE_EXIT({ pthread_attr_destroy(&attr); }); @@ -83,6 +91,10 @@ __attribute__((__weak__)) void checkStackSize() if (!stack_address) max_stack_size = getStackSize(&stack_address); + /// The check is impossible. + if (!max_stack_size) + return; + const void * frame_address = __builtin_frame_address(0); uintptr_t int_frame_address = reinterpret_cast(frame_address); uintptr_t int_stack_address = reinterpret_cast(stack_address); From 7024527542dd341e32dfe313cc54f8f537b69c98 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com> Date: Wed, 28 Jun 2023 12:43:10 +0200 Subject: [PATCH 141/522] Fix flaky test 00417_kill_query --- tests/queries/0_stateless/00417_kill_query.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/00417_kill_query.sh b/tests/queries/0_stateless/00417_kill_query.sh index dc690caca39..6eb0505f6bb 100755 --- a/tests/queries/0_stateless/00417_kill_query.sh +++ b/tests/queries/0_stateless/00417_kill_query.sh @@ -9,13 +9,13 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) QUERY_FIELND_NUM=4 -$CLICKHOUSE_CLIENT --max_block_size=1 -q "SELECT sleep(1) FROM system.numbers LIMIT 4" &>/dev/null & +$CLICKHOUSE_CLIENT --max_block_size=1 -q "SELECT sleep(1) FROM system.numbers LIMIT 30" &>/dev/null & sleep 1 $CLICKHOUSE_CLIENT -q "KILL QUERY WHERE current_database = '${CLICKHOUSE_DATABASE}' and query LIKE 'SELECT sleep(%' AND (elapsed >= 0.) SYNC" | cut -f $QUERY_FIELND_NUM -$CLICKHOUSE_CLIENT --max_block_size=1 -q "SELECT sleep(1) FROM system.numbers LIMIT 5" &>/dev/null & +$CLICKHOUSE_CLIENT --max_block_size=1 -q "SELECT sleep(1) FROM system.numbers LIMIT 31" &>/dev/null & sleep 1 -$CLICKHOUSE_CLIENT -q "KILL QUERY WHERE current_database = '${CLICKHOUSE_DATABASE}' and query = 'SELECT sleep(1) FROM system.numbers LIMIT 5' ASYNC" | cut -f $QUERY_FIELND_NUM +$CLICKHOUSE_CLIENT -q "KILL QUERY WHERE current_database = '${CLICKHOUSE_DATABASE}' and query = 'SELECT sleep(1) FROM system.numbers LIMIT 31' ASYNC" | cut -f $QUERY_FIELND_NUM $CLICKHOUSE_CLIENT -q "KILL QUERY WHERE 0 ASYNC" $CLICKHOUSE_CLIENT -q "KILL QUERY WHERE 0 FORMAT TabSeparated" From d98776b70850f140494bc5e799219877f50124ca Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com> Date: Wed, 28 Jun 2023 12:44:03 +0200 Subject: [PATCH 142/522] Fix flaky test 00417_kill_query --- tests/queries/0_stateless/00417_kill_query.reference | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/00417_kill_query.reference b/tests/queries/0_stateless/00417_kill_query.reference index 7e89d9674db..1a3b47964c0 100644 --- a/tests/queries/0_stateless/00417_kill_query.reference +++ b/tests/queries/0_stateless/00417_kill_query.reference @@ -1,2 +1,2 @@ -SELECT sleep(1) FROM system.numbers LIMIT 4 -SELECT sleep(1) FROM system.numbers LIMIT 5 +SELECT sleep(1) FROM system.numbers LIMIT 30 +SELECT sleep(1) FROM system.numbers LIMIT 31 From 112310e98fce282516b633c1b0a193e45b278aec Mon Sep 17 00:00:00 2001 From: xuelei Date: Wed, 28 Jun 2023 19:17:53 +0800 Subject: [PATCH 143/522] fix storage policy prompt display error --- src/Disks/StoragePolicy.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Disks/StoragePolicy.cpp b/src/Disks/StoragePolicy.cpp index 02789132e55..6faa7c13c49 100644 --- a/src/Disks/StoragePolicy.cpp +++ b/src/Disks/StoragePolicy.cpp @@ -302,7 +302,7 @@ void StoragePolicy::checkCompatibleWith(const StoragePolicyPtr & new_storage_pol for (const auto & volume : getVolumes()) { if (!new_volume_names.contains(volume->getName())) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "New storage policy {} shall contain volumes of old one", backQuote(new_storage_policy->getName())); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "New storage policy {} shall contain volumes of Old storage policy {}", backQuote(new_storage_policy->getName()),backQuote(name)); std::unordered_set new_disk_names; for (const auto & disk : new_storage_policy->getVolumeByName(volume->getName())->getDisks()) @@ -310,7 +310,7 @@ void StoragePolicy::checkCompatibleWith(const StoragePolicyPtr & new_storage_pol for (const auto & disk : volume->getDisks()) if (!new_disk_names.contains(disk->getName())) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "New storage policy {} shall contain disks of old one", backQuote(new_storage_policy->getName())); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "New storage policy {} shall contain disks of Old storage policy {}", backQuote(new_storage_policy->getName()),backQuote(name)); } } From a3994319776c77576bff2a256aed77265423e279 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 28 Jun 2023 16:40:57 +0300 Subject: [PATCH 144/522] Update StoragePolicy.cpp --- src/Disks/StoragePolicy.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Disks/StoragePolicy.cpp b/src/Disks/StoragePolicy.cpp index 6faa7c13c49..a02568f9489 100644 --- a/src/Disks/StoragePolicy.cpp +++ b/src/Disks/StoragePolicy.cpp @@ -302,7 +302,7 @@ void StoragePolicy::checkCompatibleWith(const StoragePolicyPtr & new_storage_pol for (const auto & volume : getVolumes()) { if (!new_volume_names.contains(volume->getName())) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "New storage policy {} shall contain volumes of Old storage policy {}", backQuote(new_storage_policy->getName()),backQuote(name)); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "New storage policy {} shall contain volumes of the old storage policy {}", backQuote(new_storage_policy->getName()),backQuote(name)); std::unordered_set new_disk_names; for (const auto & disk : new_storage_policy->getVolumeByName(volume->getName())->getDisks()) @@ -310,7 +310,7 @@ void StoragePolicy::checkCompatibleWith(const StoragePolicyPtr & new_storage_pol for (const auto & disk : volume->getDisks()) if (!new_disk_names.contains(disk->getName())) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "New storage policy {} shall contain disks of Old storage policy {}", backQuote(new_storage_policy->getName()),backQuote(name)); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "New storage policy {} shall contain disks of the old storage policy {}", backQuote(new_storage_policy->getName()),backQuote(name)); } } From 23d0a9e3a83f263f563c0d2b0983bff6aa9a2d90 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Wed, 28 Jun 2023 16:20:45 +0200 Subject: [PATCH 145/522] fix --- .../01861_explain_pipeline.reference | 18 +++++----- ...inal_streams_data_skipping_index.reference | 36 +++++++++---------- 2 files changed, 24 insertions(+), 30 deletions(-) diff --git a/tests/queries/0_stateless/01861_explain_pipeline.reference b/tests/queries/0_stateless/01861_explain_pipeline.reference index aec3ae06dce..427b3eaefc0 100644 --- a/tests/queries/0_stateless/01861_explain_pipeline.reference +++ b/tests/queries/0_stateless/01861_explain_pipeline.reference @@ -17,14 +17,12 @@ ExpressionTransform × 2 (ReadFromMergeTree) ExpressionTransform × 2 ReplacingSorted - ExpressionTransform - FilterSortedStreamByRange - Description: filter values in [(5), +inf) - ExpressionTransform - MergeTreeInOrder 0 → 1 - ReplacingSorted 2 → 1 + FilterSortedStreamByRange + Description: filter values in [(5), +inf) + ExpressionTransform + MergeTreeInOrder 0 → 1 + ReplacingSorted 2 → 1 + FilterSortedStreamByRange × 2 + Description: filter values in [-inf, (5)) ExpressionTransform × 2 - FilterSortedStreamByRange × 2 - Description: filter values in [-inf, (5)) - ExpressionTransform × 2 - MergeTreeInOrder × 2 0 → 1 + MergeTreeInOrder × 2 0 → 1 diff --git a/tests/queries/0_stateless/02780_final_streams_data_skipping_index.reference b/tests/queries/0_stateless/02780_final_streams_data_skipping_index.reference index d7a540ae479..5242c625325 100644 --- a/tests/queries/0_stateless/02780_final_streams_data_skipping_index.reference +++ b/tests/queries/0_stateless/02780_final_streams_data_skipping_index.reference @@ -9,17 +9,15 @@ ExpressionTransform × 2 (ReadFromMergeTree) ExpressionTransform × 2 AggregatingSortedTransform 2 → 1 - ExpressionTransform × 2 - FilterSortedStreamByRange × 2 - Description: filter values in [(999424), +inf) - ExpressionTransform × 2 - MergeTreeInOrder × 2 0 → 1 - AggregatingSortedTransform + FilterSortedStreamByRange × 2 + Description: filter values in [(999424), +inf) + ExpressionTransform × 2 + MergeTreeInOrder × 2 0 → 1 + AggregatingSortedTransform + FilterSortedStreamByRange + Description: filter values in [-inf, (999424)) ExpressionTransform - FilterSortedStreamByRange - Description: filter values in [-inf, (999424)) - ExpressionTransform - MergeTreeInOrder 0 → 1 + MergeTreeInOrder 0 → 1 EXPLAIN PIPELINE SELECT * FROM data FINAL WHERE v1 >= now() - INTERVAL 180 DAY SETTINGS max_threads=2, max_final_threads=2, force_data_skipping_indices='v1_index', use_skip_indexes_if_final=0 FORMAT LineAsString; @@ -30,14 +28,12 @@ ExpressionTransform × 2 (ReadFromMergeTree) ExpressionTransform × 2 AggregatingSortedTransform 2 → 1 - ExpressionTransform × 2 - FilterSortedStreamByRange × 2 - Description: filter values in [(999424), +inf) - ExpressionTransform × 2 - MergeTreeInOrder × 2 0 → 1 - AggregatingSortedTransform + FilterSortedStreamByRange × 2 + Description: filter values in [(999424), +inf) + ExpressionTransform × 2 + MergeTreeInOrder × 2 0 → 1 + AggregatingSortedTransform + FilterSortedStreamByRange + Description: filter values in [-inf, (999424)) ExpressionTransform - FilterSortedStreamByRange - Description: filter values in [-inf, (999424)) - ExpressionTransform - MergeTreeInOrder 0 → 1 + MergeTreeInOrder 0 → 1 From c9fad7b1410740d7ada64b65dfda5fefbe4a45ff Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Wed, 28 Jun 2023 18:40:48 +0200 Subject: [PATCH 146/522] Don't run 02782_uniq_exact_parallel_merging_bug in parallel with other tests --- .../0_stateless/02782_uniq_exact_parallel_merging_bug.sh | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/02782_uniq_exact_parallel_merging_bug.sh b/tests/queries/0_stateless/02782_uniq_exact_parallel_merging_bug.sh index d84ffd21b87..a7f71eacf0f 100755 --- a/tests/queries/0_stateless/02782_uniq_exact_parallel_merging_bug.sh +++ b/tests/queries/0_stateless/02782_uniq_exact_parallel_merging_bug.sh @@ -1,10 +1,8 @@ #!/usr/bin/env bash -# Tags: long, no-random-settings, no-tsan, no-asan, no-ubsan, no-msan +# Tags: long, no-random-settings, no-tsan, no-asan, no-ubsan, no-msan, no-parallel # shellcheck disable=SC2154 -unset CLICKHOUSE_LOG_COMMENT - CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh From 7f8ad3d5cbab240a5ef4d75b55f55478ceed22e0 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 28 Jun 2023 17:48:54 +0200 Subject: [PATCH 147/522] Convert assert to LOGICAL_ERROR in createBlockSelector() for zero weight Signed-off-by: Azat Khuzhin --- src/Interpreters/createBlockSelector.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/createBlockSelector.cpp b/src/Interpreters/createBlockSelector.cpp index 659fc483373..a8eb39e6c9d 100644 --- a/src/Interpreters/createBlockSelector.cpp +++ b/src/Interpreters/createBlockSelector.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include @@ -12,13 +13,19 @@ namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + template IColumn::Selector createBlockSelector( const IColumn & column, const std::vector & slots) { const auto total_weight = slots.size(); - assert(total_weight != 0); + if (total_weight == 0) + throw Exception(ErrorCodes::LOGICAL_ERROR, "weight is zero"); size_t num_rows = column.size(); IColumn::Selector selector(num_rows); From c9adfe1efd9aa0210185eecfbc9d446f4060077f Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 28 Jun 2023 17:53:14 +0200 Subject: [PATCH 148/522] Prohibit cluster with zero weight across all shards Before it leads to SIGSEGV, due to either divizion by zero or an a check in libdivide. Signed-off-by: Azat Khuzhin --- src/Interpreters/Cluster.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/Interpreters/Cluster.cpp b/src/Interpreters/Cluster.cpp index edbef77ef02..89bfb70f7c5 100644 --- a/src/Interpreters/Cluster.cpp +++ b/src/Interpreters/Cluster.cpp @@ -30,6 +30,7 @@ namespace ErrorCodes extern const int SYNTAX_ERROR; extern const int INVALID_SHARD_ID; extern const int NO_SUCH_REPLICA; + extern const int BAD_ARGUMENTS; } namespace @@ -614,6 +615,12 @@ Poco::Timespan Cluster::saturate(Poco::Timespan v, Poco::Timespan limit) void Cluster::initMisc() { + /// NOTE: It is possible to have cluster w/o shards for + /// optimize_skip_unused_shards (i.e. WHERE 0 expression), so check the + /// slots only if shards is not empty. + if (!shards_info.empty() && slot_to_shard.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cluster with zero weight on all shards is prohibited"); + for (const auto & shard_info : shards_info) { if (!shard_info.isLocal() && !shard_info.hasRemoteConnections()) From 2a12fb42461f0916455a9efd8fd9b5ada4edca69 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 28 Jun 2023 17:57:53 +0200 Subject: [PATCH 149/522] Initialize weight/slot_to_shards for cluster not from xml correcty This is: - clusterAllReplicas - copier - some distributed cases Signed-off-by: Azat Khuzhin --- src/Interpreters/Cluster.cpp | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/Interpreters/Cluster.cpp b/src/Interpreters/Cluster.cpp index 89bfb70f7c5..891586d88b6 100644 --- a/src/Interpreters/Cluster.cpp +++ b/src/Interpreters/Cluster.cpp @@ -525,7 +525,7 @@ Cluster::Cluster( addresses_with_failover.emplace_back(current); - addShard(settings, std::move(current), params.treat_local_as_remote, current_shard_num); + addShard(settings, std::move(current), params.treat_local_as_remote, current_shard_num, /* insert_paths= */ {}, /* weight= */ 1); ++current_shard_num; } @@ -553,7 +553,7 @@ Cluster::Cluster( addresses_with_failover.emplace_back(current); - addShard(settings, std::move(current), params.treat_local_as_remote, current_shard_num); + addShard(settings, std::move(current), params.treat_local_as_remote, current_shard_num, /* insert_paths= */ {}, /* weight= */ 1); ++current_shard_num; } @@ -715,6 +715,7 @@ Cluster::Cluster(Cluster::ReplicasAsShardsTag, const Cluster & from, const Setti ShardInfo info; info.shard_num = ++shard_num; + info.weight = 1; if (address.is_local) info.local_addresses.push_back(address); @@ -740,6 +741,8 @@ Cluster::Cluster(Cluster::ReplicasAsShardsTag, const Cluster & from, const Setti info.per_replica_pools = {std::move(pool)}; addresses_with_failover.emplace_back(Addresses{address}); + + slot_to_shard.insert(std::end(slot_to_shard), info.weight, shards_info.size()); shards_info.emplace_back(std::move(info)); } }; @@ -769,7 +772,11 @@ Cluster::Cluster(Cluster::SubclusterTag, const Cluster & from, const std::vector { for (size_t index : indices) { - shards_info.emplace_back(from.shards_info.at(index)); + const auto & from_shard = from.shards_info.at(index); + + if (from_shard.weight) + slot_to_shard.insert(std::end(slot_to_shard), from_shard.weight, shards_info.size()); + shards_info.emplace_back(from_shard); if (!from.addresses_with_failover.empty()) addresses_with_failover.emplace_back(from.addresses_with_failover.at(index)); From 006d05c6a7aacc6f1c321822725389778b8c299c Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 28 Jun 2023 18:03:04 +0200 Subject: [PATCH 150/522] Add test for INSERT INTO clusterAllReplicas() (leads to SIGSEGV before) Signed-off-by: Azat Khuzhin --- .../0_stateless/02804_clusterAllReplicas_insert.reference | 1 + .../queries/0_stateless/02804_clusterAllReplicas_insert.sql | 5 +++++ 2 files changed, 6 insertions(+) create mode 100644 tests/queries/0_stateless/02804_clusterAllReplicas_insert.reference create mode 100644 tests/queries/0_stateless/02804_clusterAllReplicas_insert.sql diff --git a/tests/queries/0_stateless/02804_clusterAllReplicas_insert.reference b/tests/queries/0_stateless/02804_clusterAllReplicas_insert.reference new file mode 100644 index 00000000000..0cfbf08886f --- /dev/null +++ b/tests/queries/0_stateless/02804_clusterAllReplicas_insert.reference @@ -0,0 +1 @@ +2 diff --git a/tests/queries/0_stateless/02804_clusterAllReplicas_insert.sql b/tests/queries/0_stateless/02804_clusterAllReplicas_insert.sql new file mode 100644 index 00000000000..05bda19eb9e --- /dev/null +++ b/tests/queries/0_stateless/02804_clusterAllReplicas_insert.sql @@ -0,0 +1,5 @@ +drop table if exists data; +create table data (key Int) engine=Memory(); +-- NOTE: internal_replication is false, so INSERT will be done only into one shard +insert into function clusterAllReplicas(test_cluster_two_shards, currentDatabase(), data, rand()) values (2); +select * from data order by key; From 71c144530081549c776e6432a48bebbca9f9f135 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com> Date: Wed, 28 Jun 2023 21:45:56 +0200 Subject: [PATCH 151/522] Update 00417_kill_query.sh --- tests/queries/0_stateless/00417_kill_query.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/00417_kill_query.sh b/tests/queries/0_stateless/00417_kill_query.sh index 6eb0505f6bb..cd5b788a147 100755 --- a/tests/queries/0_stateless/00417_kill_query.sh +++ b/tests/queries/0_stateless/00417_kill_query.sh @@ -13,6 +13,7 @@ $CLICKHOUSE_CLIENT --max_block_size=1 -q "SELECT sleep(1) FROM system.numbers LI sleep 1 $CLICKHOUSE_CLIENT -q "KILL QUERY WHERE current_database = '${CLICKHOUSE_DATABASE}' and query LIKE 'SELECT sleep(%' AND (elapsed >= 0.) SYNC" | cut -f $QUERY_FIELND_NUM +# 31 is for the query to be different from the previous one $CLICKHOUSE_CLIENT --max_block_size=1 -q "SELECT sleep(1) FROM system.numbers LIMIT 31" &>/dev/null & sleep 1 $CLICKHOUSE_CLIENT -q "KILL QUERY WHERE current_database = '${CLICKHOUSE_DATABASE}' and query = 'SELECT sleep(1) FROM system.numbers LIMIT 31' ASYNC" | cut -f $QUERY_FIELND_NUM From fdd8a0a3966028a5c72e7ce5e07410f68ce50da5 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com> Date: Thu, 29 Jun 2023 02:35:07 +0200 Subject: [PATCH 152/522] Fix flaky test 00416_pocopatch_progress_in_http_headers --- ...0416_pocopatch_progress_in_http_headers.sh | 31 +++++++++++++++++-- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/00416_pocopatch_progress_in_http_headers.sh b/tests/queries/0_stateless/00416_pocopatch_progress_in_http_headers.sh index b2189ab0cc2..7e954db2c86 100755 --- a/tests/queries/0_stateless/00416_pocopatch_progress_in_http_headers.sh +++ b/tests/queries/0_stateless/00416_pocopatch_progress_in_http_headers.sh @@ -4,9 +4,28 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&max_block_size=5&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0" -d 'SELECT max(number) FROM numbers(10)' 2>&1 | grep -E 'Content-Encoding|X-ClickHouse-Progress|^[0-9]' +RETRIES=5 + +result="" +lines_expected=4 +counter=0 +while [ $counter -lt $RETRIES ] && [ $(echo "$result" | wc -l) != "$lines_expected" ]; do + result=$(${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&max_block_size=5&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0" -d 'SELECT max(number) FROM numbers(10)' 2>&1 | grep -E 'Content-Encoding|X-ClickHouse-Progress|^[0-9]') + let counter=counter+1 + # echo "$result" | wc -l +done +echo "$result" + +result="" +lines_expected=12 +counter=0 +while [ $counter -lt $RETRIES ] && [ $(echo "$result" | wc -l) != "$lines_expected" ]; do + result=$(${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&max_block_size=1&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0&output_format_parallel_formatting=0" -d 'SELECT number FROM numbers(10)' 2>&1 | grep -E 'Content-Encoding|X-ClickHouse-Progress|^[0-9]') + let counter=counter+1 + # echo "$result" | wc -l +done +echo "$result" -${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&max_block_size=1&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0&output_format_parallel_formatting=0" -d 'SELECT number FROM numbers(10)' 2>&1 | grep -E 'Content-Encoding|X-ClickHouse-Progress|^[0-9]' ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&max_block_size=1&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0&enable_http_compression=1" -H 'Accept-Encoding: gzip' -d 'SELECT number FROM system.numbers LIMIT 10' | gzip -d # 'send_progress_in_http_headers' is false by default @@ -26,7 +45,13 @@ ${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}" -H 'Accept-Encoding: gzip' -d 'DROP ${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}" -H 'Accept-Encoding: gzip' -d 'CREATE TABLE insert_number_query (record UInt32) Engine = Memory' > /dev/null 2>&1 ${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}" -H 'Accept-Encoding: gzip' -d 'CREATE TABLE insert_number_query_2 (record UInt32) Engine = Memory' > /dev/null 2>&1 -${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&max_block_size=1&http_headers_progress_interval_ms=0&send_progress_in_http_headers=1" -d 'INSERT INTO insert_number_query (record) SELECT number FROM system.numbers LIMIT 10' 2>&1 | grep -E 'Content-Encoding|X-ClickHouse-Summary|^[0-9]' +result="" +counter=0 +while [ $counter -lt $RETRIES ] && [ -z "$result" ]; do + result=$(${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&max_block_size=1&http_headers_progress_interval_ms=0&send_progress_in_http_headers=1" -d 'INSERT INTO insert_number_query (record) SELECT number FROM system.numbers LIMIT 10' 2>&1 | grep -E 'Content-Encoding|X-ClickHouse-Summary|^[0-9]') + let counter=counter+1 +done +echo "$result" ${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}" -H 'Accept-Encoding: gzip' -d 'DROP TABLE insert_number_query' > /dev/null 2>&1 ${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}" -H 'Accept-Encoding: gzip' -d 'DROP TABLE insert_number_query_2' > /dev/null 2>&1 From 58581ce5f6bdfe0df9135a95c0df14404af91e2a Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com> Date: Thu, 29 Jun 2023 02:37:09 +0200 Subject: [PATCH 153/522] Update 00416_pocopatch_progress_in_http_headers.sh --- .../0_stateless/00416_pocopatch_progress_in_http_headers.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/queries/0_stateless/00416_pocopatch_progress_in_http_headers.sh b/tests/queries/0_stateless/00416_pocopatch_progress_in_http_headers.sh index 7e954db2c86..ad7e89a7357 100755 --- a/tests/queries/0_stateless/00416_pocopatch_progress_in_http_headers.sh +++ b/tests/queries/0_stateless/00416_pocopatch_progress_in_http_headers.sh @@ -12,7 +12,6 @@ counter=0 while [ $counter -lt $RETRIES ] && [ $(echo "$result" | wc -l) != "$lines_expected" ]; do result=$(${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&max_block_size=5&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0" -d 'SELECT max(number) FROM numbers(10)' 2>&1 | grep -E 'Content-Encoding|X-ClickHouse-Progress|^[0-9]') let counter=counter+1 - # echo "$result" | wc -l done echo "$result" @@ -22,7 +21,6 @@ counter=0 while [ $counter -lt $RETRIES ] && [ $(echo "$result" | wc -l) != "$lines_expected" ]; do result=$(${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&max_block_size=1&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0&output_format_parallel_formatting=0" -d 'SELECT number FROM numbers(10)' 2>&1 | grep -E 'Content-Encoding|X-ClickHouse-Progress|^[0-9]') let counter=counter+1 - # echo "$result" | wc -l done echo "$result" From 919bf5429478261cd9d0329129191323e77263f2 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Thu, 29 Jun 2023 12:38:46 +0200 Subject: [PATCH 154/522] fix race condition --- src/IO/WriteBufferFromS3.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp index 8714282f7a8..a72fac138b3 100644 --- a/src/IO/WriteBufferFromS3.cpp +++ b/src/IO/WriteBufferFromS3.cpp @@ -223,8 +223,8 @@ String WriteBufferFromS3::getShortLogDetails() const multipart_upload_details = fmt::format(", upload id {}" , multipart_upload_id); - return fmt::format("Details: bucket {}, key {}, total size {}{}", - bucket, key, total_size, multipart_upload_details); + return fmt::format("Details: bucket {}, key {}{}", + bucket, key, multipart_upload_details); } void WriteBufferFromS3::tryToAbortMultipartUpload() From 60ce9773e0310992fd900e2cbc7a0f0f2f858c10 Mon Sep 17 00:00:00 2001 From: yariks5s Date: Thu, 29 Jun 2023 10:42:33 +0000 Subject: [PATCH 155/522] Added docs for primary_key_size --- docs/en/operations/system-tables/parts.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/en/operations/system-tables/parts.md b/docs/en/operations/system-tables/parts.md index e61c6ed2ba4..861104ff236 100644 --- a/docs/en/operations/system-tables/parts.md +++ b/docs/en/operations/system-tables/parts.md @@ -39,6 +39,8 @@ Columns: - `data_uncompressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – Total size of uncompressed data in the data part. All the auxiliary files (for example, files with marks) are not included. +- `primary_key_size` ([UInt64](../../sql-reference/data-types/int-uint.md)) – The amount of memory (in bytes) used by primary key values in the primary.idx/cidx file on disk. + - `marks_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – The size of the file with marks. - `secondary_indices_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – Total size of compressed data for secondary indices in the data part. All the auxiliary files (for example, files with marks) are not included. From f1d695463717703d9c9f076b0e18972425b6bf46 Mon Sep 17 00:00:00 2001 From: vdimir Date: Thu, 29 Jun 2023 13:09:55 +0000 Subject: [PATCH 156/522] Fix logical error in ANTI join with NULL --- src/Interpreters/HashJoin.cpp | 3 +++ .../02771_semi_join_use_nulls.reference | 16 ++++++++++++++++ .../0_stateless/02771_semi_join_use_nulls.sql.j2 | 6 ++++++ 3 files changed, 25 insertions(+) diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index 6fe2b8464f5..7fee2ab7a6f 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -1383,6 +1383,9 @@ NO_INLINE IColumn::Filter joinRightColumns( { if (!right_row_found && null_element_found) { + if constexpr (join_features.is_anti_join && join_features.left) + setUsed(filter, i); + addNotFoundRow(added_columns, current_offset); if constexpr (join_features.need_replication) diff --git a/tests/queries/0_stateless/02771_semi_join_use_nulls.reference b/tests/queries/0_stateless/02771_semi_join_use_nulls.reference index 8d4b1a3a75e..91c0d964968 100644 --- a/tests/queries/0_stateless/02771_semi_join_use_nulls.reference +++ b/tests/queries/0_stateless/02771_semi_join_use_nulls.reference @@ -11,7 +11,9 @@ 0 0 0 0 0 1 +\N 0 0 1 +\N 0 0 0 0 0 0 \N @@ -25,7 +27,9 @@ 0 0 0 0 0 2 +\N 1 0 2 +\N 1 0 \N 0 0 0 \N @@ -39,7 +43,9 @@ 0 \N 0 \N 0 1 +\N \N 0 1 +\N \N 0 0 0 0 0 0 @@ -53,7 +59,9 @@ 0 0 0 0 \N 2 +\N 1 \N 2 +\N 1 0 0 0 0 0 0 @@ -67,7 +75,9 @@ 0 0 0 0 0 1 +\N 0 0 1 +\N 0 0 0 0 0 0 0 @@ -81,7 +91,9 @@ 0 0 0 0 1 2 +\N 1 1 2 +\N 1 0 \N 0 0 0 0 @@ -95,7 +107,9 @@ 0 \N 0 \N 0 1 +\N \N 0 1 +\N \N 0 0 0 0 0 0 @@ -109,4 +123,6 @@ 0 0 0 0 1 2 +\N 1 1 2 +\N 1 diff --git a/tests/queries/0_stateless/02771_semi_join_use_nulls.sql.j2 b/tests/queries/0_stateless/02771_semi_join_use_nulls.sql.j2 index 37b2e63761b..248461a98bb 100644 --- a/tests/queries/0_stateless/02771_semi_join_use_nulls.sql.j2 +++ b/tests/queries/0_stateless/02771_semi_join_use_nulls.sql.j2 @@ -44,6 +44,12 @@ SELECT id > 1, d.idd FROM (SELECT {{ maybe_materialize }}(toLowCardinality(0)) A ON a.id = d.idd ; +SELECT * +FROM (SELECT {{ maybe_materialize }}(NULL :: Nullable(UInt64)) AS id) AS a +{{ strictness }} {{ kind }} JOIN (SELECT {{ maybe_materialize }}(1 :: UInt32) AS id) AS d +ON a.id = d.id +; + {% endfor -%} {% endfor -%} {% endfor -%} From c43acc6f909d22dab3c3282fabb46c3c6d877080 Mon Sep 17 00:00:00 2001 From: vdimir Date: Thu, 29 Jun 2023 13:19:04 +0000 Subject: [PATCH 157/522] better fix --- src/Interpreters/HashJoin.cpp | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index 7fee2ab7a6f..3e4f2902359 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -1284,7 +1284,6 @@ NO_INLINE IColumn::Filter joinRightColumns( for (size_t i = 0; i < rows; ++i) { bool right_row_found = false; - bool null_element_found = false; KnownRowsHolder known_rows; for (size_t onexpr_idx = 0; onexpr_idx < added_columns.join_on_keys.size(); ++onexpr_idx) @@ -1293,10 +1292,7 @@ NO_INLINE IColumn::Filter joinRightColumns( if constexpr (has_null_map) { if (join_keys.null_map && (*join_keys.null_map)[i]) - { - null_element_found = true; continue; - } } bool row_acceptable = !join_keys.isRowFiltered(i); @@ -1379,23 +1375,6 @@ NO_INLINE IColumn::Filter joinRightColumns( } } - if constexpr (has_null_map) - { - if (!right_row_found && null_element_found) - { - if constexpr (join_features.is_anti_join && join_features.left) - setUsed(filter, i); - - addNotFoundRow(added_columns, current_offset); - - if constexpr (join_features.need_replication) - { - (*added_columns.offsets_to_replicate)[i] = current_offset; - } - continue; - } - } - if (!right_row_found) { if constexpr (join_features.is_anti_join && join_features.left) From 4581526af76848ee7370d685e96f9cc3c464df6c Mon Sep 17 00:00:00 2001 From: vdimir Date: Thu, 29 Jun 2023 13:24:55 +0000 Subject: [PATCH 158/522] Remove has_null_map template parameter from hash join --- src/Interpreters/HashJoin.cpp | 50 +++++++++-------------------------- 1 file changed, 12 insertions(+), 38 deletions(-) diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index 3e4f2902359..967e58f6d40 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -581,7 +581,7 @@ namespace }; - template + template size_t NO_INLINE insertFromBlockImplTypeCase( HashJoin & join, Map & map, size_t rows, const ColumnRawPtrs & key_columns, const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, UInt8ColumnDataPtr join_mask, Arena & pool, bool & is_inserted) @@ -600,7 +600,7 @@ namespace for (size_t i = 0; i < rows; ++i) { - if (has_null_map && (*null_map)[i]) + if (null_map && (*null_map)[i]) { /// nulls are not inserted into hash table, /// keep them for RIGHT and FULL joins @@ -622,21 +622,6 @@ namespace return map.getBufferSizeInCells(); } - - template - size_t insertFromBlockImplType( - HashJoin & join, Map & map, size_t rows, const ColumnRawPtrs & key_columns, - const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, UInt8ColumnDataPtr join_mask, Arena & pool, bool & is_inserted) - { - if (null_map) - return insertFromBlockImplTypeCase( - join, map, rows, key_columns, key_sizes, stored_block, null_map, join_mask, pool, is_inserted); - else - return insertFromBlockImplTypeCase( - join, map, rows, key_columns, key_sizes, stored_block, null_map, join_mask, pool, is_inserted); - } - - template size_t insertFromBlockImpl( HashJoin & join, HashJoin::Type type, Maps & maps, size_t rows, const ColumnRawPtrs & key_columns, @@ -653,7 +638,7 @@ namespace #define M(TYPE) \ case HashJoin::Type::TYPE: \ - return insertFromBlockImplType>::Type>(\ + return insertFromBlockImplTypeCase>::Type>(\ join, *maps.TYPE, rows, key_columns, key_sizes, stored_block, null_map, join_mask, pool, is_inserted); \ break; @@ -1260,7 +1245,7 @@ void setUsed(IColumn::Filter & filter [[maybe_unused]], size_t pos [[maybe_unuse /// Joins right table columns which indexes are present in right_indexes using specified map. /// Makes filter (1 if row presented in right table) and returns offsets to replicate (for ALL JOINS). -template +template NO_INLINE IColumn::Filter joinRightColumns( std::vector && key_getter_vector, const std::vector & mapv, @@ -1289,11 +1274,8 @@ NO_INLINE IColumn::Filter joinRightColumns( for (size_t onexpr_idx = 0; onexpr_idx < added_columns.join_on_keys.size(); ++onexpr_idx) { const auto & join_keys = added_columns.join_on_keys[onexpr_idx]; - if constexpr (has_null_map) - { - if (join_keys.null_map && (*join_keys.null_map)[i]) - continue; - } + if (join_keys.null_map && (*join_keys.null_map)[i]) + continue; bool row_acceptable = !join_keys.isRowFiltered(i); using FindResult = typename KeyGetter::FindResult; @@ -1392,7 +1374,7 @@ NO_INLINE IColumn::Filter joinRightColumns( return filter; } -template +template IColumn::Filter joinRightColumnsSwitchMultipleDisjuncts( std::vector && key_getter_vector, const std::vector & mapv, @@ -1400,8 +1382,8 @@ IColumn::Filter joinRightColumnsSwitchMultipleDisjuncts( JoinStuff::JoinUsedFlags & used_flags [[maybe_unused]]) { return mapv.size() > 1 - ? joinRightColumns(std::forward>(key_getter_vector), mapv, added_columns, used_flags) - : joinRightColumns(std::forward>(key_getter_vector), mapv, added_columns, used_flags); + ? joinRightColumns(std::forward>(key_getter_vector), mapv, added_columns, used_flags) + : joinRightColumns(std::forward>(key_getter_vector), mapv, added_columns, used_flags); } template @@ -1411,21 +1393,13 @@ IColumn::Filter joinRightColumnsSwitchNullability( AddedColumns & added_columns, JoinStuff::JoinUsedFlags & used_flags) { - bool has_null_map = std::any_of(added_columns.join_on_keys.begin(), added_columns.join_on_keys.end(), - [](const auto & k) { return k.null_map; }); if (added_columns.need_filter) { - if (has_null_map) - return joinRightColumnsSwitchMultipleDisjuncts(std::forward>(key_getter_vector), mapv, added_columns, used_flags); - else - return joinRightColumnsSwitchMultipleDisjuncts(std::forward>(key_getter_vector), mapv, added_columns, used_flags); + return joinRightColumnsSwitchMultipleDisjuncts(std::forward>(key_getter_vector), mapv, added_columns, used_flags); } else { - if (has_null_map) - return joinRightColumnsSwitchMultipleDisjuncts(std::forward>(key_getter_vector), mapv, added_columns, used_flags); - else - return joinRightColumnsSwitchMultipleDisjuncts(std::forward>(key_getter_vector), mapv, added_columns, used_flags); + return joinRightColumnsSwitchMultipleDisjuncts(std::forward>(key_getter_vector), mapv, added_columns, used_flags); } } @@ -1850,7 +1824,7 @@ struct AdderNonJoined /// Based on: /// - map offsetInternal saved in used_flags for single disjuncts /// - flags in BlockWithFlags for multiple disjuncts -template +template class NotJoinedHash final : public NotJoinedBlocks::RightColumnsFiller { public: From ebeef65920fe671d64d8632ec6ca4d535bc2247e Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 29 Jun 2023 13:35:27 +0000 Subject: [PATCH 159/522] Add placeholder for rull filename in rename_files_after_processing setting --- docs/en/operations/settings/settings.md | 1 + docs/ru/operations/settings/settings.md | 1 + src/Common/FileRenamer.cpp | 6 ++++-- src/Common/FileRenamer.h | 1 + src/Core/Settings.h | 2 +- .../02732_rename_after_processing.reference | 3 +++ .../0_stateless/02732_rename_after_processing.sh | 10 ++++++++++ 7 files changed, 21 insertions(+), 3 deletions(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index cff65e049f3..25baad11282 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -4488,6 +4488,7 @@ This setting allows to specify renaming pattern for files processed by `file` ta ### Placeholders +- `%a` — Full original filename (e.g., "sample.csv"). - `%f` — Original filename without extension (e.g., "sample"). - `%e` — Original file extension with dot (e.g., ".csv"). - `%t` — Timestamp (in microseconds). diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index f83d05ff710..70f2793f6bb 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -4195,6 +4195,7 @@ SELECT *, timezone() FROM test_tz WHERE d = '2000-01-01 00:00:00' SETTINGS sessi ### Шаблон Шаблон поддерживает следующие виды плейсхолдеров: +- `%a` — Полное исходное имя файла (например "sample.csv"). - `%f` — Исходное имя файла без расширения (например "sample"). - `%e` — Оригинальное расширение файла с точкой (например ".csv"). - `%t` — Текущее время (в микросекундах). diff --git a/src/Common/FileRenamer.cpp b/src/Common/FileRenamer.cpp index 3473d543c00..33b55233a2e 100644 --- a/src/Common/FileRenamer.cpp +++ b/src/Common/FileRenamer.cpp @@ -47,6 +47,7 @@ String FileRenamer::generateNewFilename(const String & filename) const // Define placeholders and their corresponding values std::map placeholders = { + {"%a", filename}, {"%f", file_base}, {"%e", file_ext}, {"%t", timestamp}, @@ -69,16 +70,17 @@ bool FileRenamer::isEmpty() const bool FileRenamer::validateRenamingRule(const String & rule, bool throw_on_error) { // Check if the rule contains invalid placeholders - re2::RE2 invalid_placeholder_pattern("^([^%]|%[fet%])*$"); + re2::RE2 invalid_placeholder_pattern("^([^%]|%[afet%])*$"); if (!re2::RE2::FullMatch(rule, invalid_placeholder_pattern)) { if (throw_on_error) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid renaming rule: Allowed placeholders only %f, %e, %t, and %%"); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid renaming rule: Allowed placeholders only %a, %f, %e, %t, and %%"); return false; } // Replace valid placeholders with empty strings and count remaining percentage signs. String replaced_rule = rule; + boost::replace_all(replaced_rule, "%a", ""); boost::replace_all(replaced_rule, "%f", ""); boost::replace_all(replaced_rule, "%e", ""); boost::replace_all(replaced_rule, "%t", ""); diff --git a/src/Common/FileRenamer.h b/src/Common/FileRenamer.h index c062978d6f6..91f74f09032 100644 --- a/src/Common/FileRenamer.h +++ b/src/Common/FileRenamer.h @@ -9,6 +9,7 @@ namespace DB /** * The FileRenamer class provides functionality for renaming files based on given pattern with placeholders * The supported placeholders are: + * %a - Full original file name ("sample.csv") * %f - Original filename without extension ("sample") * %e - Original file extension with dot (".csv") * %t - Timestamp (in microseconds) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 658f3c8025b..099ad1ea649 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -733,7 +733,7 @@ class IColumn; M(String, workload, "default", "Name of workload to be used to access resources", 0) \ M(Milliseconds, storage_system_stack_trace_pipe_read_timeout_ms, 100, "Maximum time to read from a pipe for receiving information from the threads when querying the `system.stack_trace` table. This setting is used for testing purposes and not meant to be changed by users.", 0) \ \ - M(String, rename_files_after_processing, "", "Rename successfully processed files according to the specified pattern; Pattern can include the following placeholders: `%f` (original filename without extension), `%e` (file extension with dot), `%t` (current timestamp in µs), and `%%` (% sign)", 0) \ + M(String, rename_files_after_processing, "", "Rename successfully processed files according to the specified pattern; Pattern can include the following placeholders: `%a` (full original file name), `%f` (original filename without extension), `%e` (file extension with dot), `%t` (current timestamp in µs), and `%%` (% sign)", 0) \ \ M(Bool, parallelize_output_from_storages, true, "Parallelize output for reading step from storage. It allows parallelizing query processing right after reading from storage if possible", 0) \ M(String, insert_deduplication_token, "", "If not empty, used for duplicate detection instead of data digest", 0) \ diff --git a/tests/queries/0_stateless/02732_rename_after_processing.reference b/tests/queries/0_stateless/02732_rename_after_processing.reference index 39cdb677e09..86f682d682c 100644 --- a/tests/queries/0_stateless/02732_rename_after_processing.reference +++ b/tests/queries/0_stateless/02732_rename_after_processing.reference @@ -19,3 +19,6 @@ OK tmp5.csv OK tmp5.csv +4 +tmp6.csv.processed +!tmp6.csv diff --git a/tests/queries/0_stateless/02732_rename_after_processing.sh b/tests/queries/0_stateless/02732_rename_after_processing.sh index c4f80d3462b..cdbc9892bc7 100755 --- a/tests/queries/0_stateless/02732_rename_after_processing.sh +++ b/tests/queries/0_stateless/02732_rename_after_processing.sh @@ -29,6 +29,7 @@ cp ${tmp_dir}/tmp.csv ${tmp_dir}/tmp3_1.csv cp ${tmp_dir}/tmp.csv ${tmp_dir}/tmp3_2.csv cp ${tmp_dir}/tmp.csv ${tmp_dir}/tmp4.csv cp ${tmp_dir}/tmp.csv ${tmp_dir}/tmp5.csv +cp ${tmp_dir}/tmp.csv ${tmp_dir}/tmp6.csv ### Checking that renaming works @@ -115,5 +116,14 @@ if [ -e "${tmp_dir}/tmp5.csv" ]; then echo "tmp5.csv" fi +# check full file name placeholder +${CLICKHOUSE_CLIENT} --rename-files-after-processing="%a.processed" -q "SELECT COUNT(*) FROM file('${unique_name}/tmp6.csv')" +if [ -e "${tmp_dir}/tmp6.csv.processed" ]; then + echo "tmp6.csv.processed" +fi +if [ ! -e "${tmp_dir}/tmp6.csv" ]; then + echo "!tmp6.csv" +fi + # Clean rm -rd $tmp_dir From 843e910309c1ed6d488fce35230a0f658dee33f1 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Thu, 29 Jun 2023 18:24:22 +0200 Subject: [PATCH 160/522] impl --- tests/queries/0_stateless/00474_readonly_settings.sh | 2 -- tests/queries/0_stateless/01455_opentelemetry_distributed.sh | 2 -- tests/queries/0_stateless/01526_initial_query_id.sh | 2 -- .../0_stateless/01732_race_condition_storage_join_long.sh | 2 -- tests/queries/0_stateless/01872_initial_query_start_time.sh | 2 -- tests/queries/0_stateless/02030_rocksdb_race_long.sh | 2 -- tests/queries/0_stateless/02151_hash_table_sizes_stats.sh | 2 -- .../0_stateless/02151_hash_table_sizes_stats_distributed.sh | 2 -- .../02377_extend_protocol_with_query_parameters.sh | 2 -- ...empty_blocks_from_ConvertingAggregatedToChunksTransform.sh | 2 -- tests/queries/0_stateless/02473_functions_in_readonly_mode.sh | 4 +--- .../0_stateless/02499_monotonicity_toUnixTimestamp64.sh | 2 -- .../queries/0_stateless/02681_final_excessive_reading_bug.sh | 2 -- tests/queries/1_stateful/00177_memory_bound_merging.sh | 2 -- 14 files changed, 1 insertion(+), 29 deletions(-) diff --git a/tests/queries/0_stateless/00474_readonly_settings.sh b/tests/queries/0_stateless/00474_readonly_settings.sh index 07b78c64a7e..9432579f9e6 100755 --- a/tests/queries/0_stateless/00474_readonly_settings.sh +++ b/tests/queries/0_stateless/00474_readonly_settings.sh @@ -1,7 +1,5 @@ #!/usr/bin/env bash -unset CLICKHOUSE_LOG_COMMENT - CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh diff --git a/tests/queries/0_stateless/01455_opentelemetry_distributed.sh b/tests/queries/0_stateless/01455_opentelemetry_distributed.sh index 0dfec6097db..806da902a3c 100755 --- a/tests/queries/0_stateless/01455_opentelemetry_distributed.sh +++ b/tests/queries/0_stateless/01455_opentelemetry_distributed.sh @@ -3,8 +3,6 @@ set -ue -unset CLICKHOUSE_LOG_COMMENT - CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh diff --git a/tests/queries/0_stateless/01526_initial_query_id.sh b/tests/queries/0_stateless/01526_initial_query_id.sh index f9d739b57cd..e77764ee34e 100755 --- a/tests/queries/0_stateless/01526_initial_query_id.sh +++ b/tests/queries/0_stateless/01526_initial_query_id.sh @@ -1,8 +1,6 @@ #!/usr/bin/env bash set -ue -unset CLICKHOUSE_LOG_COMMENT - CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh diff --git a/tests/queries/0_stateless/01732_race_condition_storage_join_long.sh b/tests/queries/0_stateless/01732_race_condition_storage_join_long.sh index 5bb10220f7f..48e726aca9d 100755 --- a/tests/queries/0_stateless/01732_race_condition_storage_join_long.sh +++ b/tests/queries/0_stateless/01732_race_condition_storage_join_long.sh @@ -1,8 +1,6 @@ #!/usr/bin/env bash # Tags: race -unset CLICKHOUSE_LOG_COMMENT - CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh diff --git a/tests/queries/0_stateless/01872_initial_query_start_time.sh b/tests/queries/0_stateless/01872_initial_query_start_time.sh index dbfb89a05a1..6a935602ea4 100755 --- a/tests/queries/0_stateless/01872_initial_query_start_time.sh +++ b/tests/queries/0_stateless/01872_initial_query_start_time.sh @@ -3,8 +3,6 @@ set -ue # this test doesn't need 'current_database = currentDatabase()', -unset CLICKHOUSE_LOG_COMMENT - CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh diff --git a/tests/queries/0_stateless/02030_rocksdb_race_long.sh b/tests/queries/0_stateless/02030_rocksdb_race_long.sh index 88c30852c86..da31861991c 100755 --- a/tests/queries/0_stateless/02030_rocksdb_race_long.sh +++ b/tests/queries/0_stateless/02030_rocksdb_race_long.sh @@ -1,8 +1,6 @@ #!/usr/bin/env bash # Tags: race -unset CLICKHOUSE_LOG_COMMENT - CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh diff --git a/tests/queries/0_stateless/02151_hash_table_sizes_stats.sh b/tests/queries/0_stateless/02151_hash_table_sizes_stats.sh index 4a1eea0a238..fd6e44577d9 100755 --- a/tests/queries/0_stateless/02151_hash_table_sizes_stats.sh +++ b/tests/queries/0_stateless/02151_hash_table_sizes_stats.sh @@ -3,8 +3,6 @@ # shellcheck disable=SC2154 -unset CLICKHOUSE_LOG_COMMENT - CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh diff --git a/tests/queries/0_stateless/02151_hash_table_sizes_stats_distributed.sh b/tests/queries/0_stateless/02151_hash_table_sizes_stats_distributed.sh index 237bbe9edd9..703b2c4357c 100755 --- a/tests/queries/0_stateless/02151_hash_table_sizes_stats_distributed.sh +++ b/tests/queries/0_stateless/02151_hash_table_sizes_stats_distributed.sh @@ -5,8 +5,6 @@ # shellcheck disable=SC2154 -unset CLICKHOUSE_LOG_COMMENT - CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh diff --git a/tests/queries/0_stateless/02377_extend_protocol_with_query_parameters.sh b/tests/queries/0_stateless/02377_extend_protocol_with_query_parameters.sh index e61dc337d2a..71e3b6961f8 100755 --- a/tests/queries/0_stateless/02377_extend_protocol_with_query_parameters.sh +++ b/tests/queries/0_stateless/02377_extend_protocol_with_query_parameters.sh @@ -2,8 +2,6 @@ # shellcheck disable=SC2154 -unset CLICKHOUSE_LOG_COMMENT - CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh diff --git a/tests/queries/0_stateless/02418_do_not_return_empty_blocks_from_ConvertingAggregatedToChunksTransform.sh b/tests/queries/0_stateless/02418_do_not_return_empty_blocks_from_ConvertingAggregatedToChunksTransform.sh index 08c7e18e12c..32693adff24 100755 --- a/tests/queries/0_stateless/02418_do_not_return_empty_blocks_from_ConvertingAggregatedToChunksTransform.sh +++ b/tests/queries/0_stateless/02418_do_not_return_empty_blocks_from_ConvertingAggregatedToChunksTransform.sh @@ -1,8 +1,6 @@ #!/usr/bin/env bash set -ue -unset CLICKHOUSE_LOG_COMMENT - CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh diff --git a/tests/queries/0_stateless/02473_functions_in_readonly_mode.sh b/tests/queries/0_stateless/02473_functions_in_readonly_mode.sh index 5e11704e6ce..da3429a1d3e 100755 --- a/tests/queries/0_stateless/02473_functions_in_readonly_mode.sh +++ b/tests/queries/0_stateless/02473_functions_in_readonly_mode.sh @@ -1,7 +1,5 @@ #!/usr/bin/env bash -unset CLICKHOUSE_LOG_COMMENT - CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh @@ -11,4 +9,4 @@ $CLICKHOUSE_CLIENT --query="SELECT * from format('TSV', '123')" $CLICKHOUSE_CLIENT --readonly=1 --query="SELECT * from numbers(1)" $CLICKHOUSE_CLIENT --readonly=1 --query="SELECT * from format('TSV', '123')" 2>&1 | grep -Fq "Cannot execute query in readonly mode. (READONLY)" && echo 'ERROR' || echo 'OK' -$CLICKHOUSE_CLIENT --readonly=1 --query="INSERT INTO FUNCTION null('x String') (x) FORMAT TSV '123'" 2>&1 | grep -Fq "Cannot execute query in readonly mode. (READONLY)" && echo 'ERROR' || echo 'OK' \ No newline at end of file +$CLICKHOUSE_CLIENT --readonly=1 --query="INSERT INTO FUNCTION null('x String') (x) FORMAT TSV '123'" 2>&1 | grep -Fq "Cannot execute query in readonly mode. (READONLY)" && echo 'ERROR' || echo 'OK' diff --git a/tests/queries/0_stateless/02499_monotonicity_toUnixTimestamp64.sh b/tests/queries/0_stateless/02499_monotonicity_toUnixTimestamp64.sh index 5d787aa0d8e..59b6e2abb06 100755 --- a/tests/queries/0_stateless/02499_monotonicity_toUnixTimestamp64.sh +++ b/tests/queries/0_stateless/02499_monotonicity_toUnixTimestamp64.sh @@ -3,8 +3,6 @@ # shellcheck disable=SC2154 -unset CLICKHOUSE_LOG_COMMENT - CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh diff --git a/tests/queries/0_stateless/02681_final_excessive_reading_bug.sh b/tests/queries/0_stateless/02681_final_excessive_reading_bug.sh index a795b9ec5a0..120666d6156 100755 --- a/tests/queries/0_stateless/02681_final_excessive_reading_bug.sh +++ b/tests/queries/0_stateless/02681_final_excessive_reading_bug.sh @@ -3,8 +3,6 @@ # shellcheck disable=SC2154 -unset CLICKHOUSE_LOG_COMMENT - CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh diff --git a/tests/queries/1_stateful/00177_memory_bound_merging.sh b/tests/queries/1_stateful/00177_memory_bound_merging.sh index 774f005b8eb..2c531b064db 100755 --- a/tests/queries/1_stateful/00177_memory_bound_merging.sh +++ b/tests/queries/1_stateful/00177_memory_bound_merging.sh @@ -2,8 +2,6 @@ # shellcheck disable=SC2154 -unset CLICKHOUSE_LOG_COMMENT - CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh From 49ab480d40f268df1e597dfe14426eb5416a5fd6 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 29 Jun 2023 23:09:58 +0300 Subject: [PATCH 161/522] Update 00416_pocopatch_progress_in_http_headers.sh --- .../0_stateless/00416_pocopatch_progress_in_http_headers.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/00416_pocopatch_progress_in_http_headers.sh b/tests/queries/0_stateless/00416_pocopatch_progress_in_http_headers.sh index ad7e89a7357..2b0cae3c1d4 100755 --- a/tests/queries/0_stateless/00416_pocopatch_progress_in_http_headers.sh +++ b/tests/queries/0_stateless/00416_pocopatch_progress_in_http_headers.sh @@ -9,7 +9,7 @@ RETRIES=5 result="" lines_expected=4 counter=0 -while [ $counter -lt $RETRIES ] && [ $(echo "$result" | wc -l) != "$lines_expected" ]; do +while [ $counter -lt $RETRIES ] && [ "$(echo "$result" | wc -l)" != "$lines_expected" ]; do result=$(${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&max_block_size=5&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0" -d 'SELECT max(number) FROM numbers(10)' 2>&1 | grep -E 'Content-Encoding|X-ClickHouse-Progress|^[0-9]') let counter=counter+1 done @@ -18,7 +18,7 @@ echo "$result" result="" lines_expected=12 counter=0 -while [ $counter -lt $RETRIES ] && [ $(echo "$result" | wc -l) != "$lines_expected" ]; do +while [ $counter -lt $RETRIES ] && [ "$(echo "$result" | wc -l)" != "$lines_expected" ]; do result=$(${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&max_block_size=1&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0&output_format_parallel_formatting=0" -d 'SELECT number FROM numbers(10)' 2>&1 | grep -E 'Content-Encoding|X-ClickHouse-Progress|^[0-9]') let counter=counter+1 done From 3c4491b706e0cbd89086db845eb582e1227f3a74 Mon Sep 17 00:00:00 2001 From: Manas Alekar Date: Thu, 29 Jun 2023 14:31:40 -0700 Subject: [PATCH 162/522] Ignore APPEND and TRUNCATE modifiers if file does not exist. --- src/Client/ClientBase.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 34b3b1e228a..a8bdc5d0b08 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -575,9 +575,11 @@ try } auto flags = O_WRONLY | O_EXCL; - if (query_with_output->is_outfile_append) + + auto file_exists = fs::exists(out_file); + if (file_exists && query_with_output->is_outfile_append) flags |= O_APPEND; - else if (query_with_output->is_outfile_truncate) + else if (file_exists && query_with_output->is_outfile_truncate) flags |= O_TRUNC; else flags |= O_CREAT; From 42febefa966e89089065ecb6c7691731de4dde5c Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Thu, 29 Jun 2023 21:34:40 +0000 Subject: [PATCH 163/522] Try to fix flaky 02210_processors_profile_log --- tests/queries/0_stateless/02210_processors_profile_log.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02210_processors_profile_log.sql b/tests/queries/0_stateless/02210_processors_profile_log.sql index 44e563ef57b..92f6ab94293 100644 --- a/tests/queries/0_stateless/02210_processors_profile_log.sql +++ b/tests/queries/0_stateless/02210_processors_profile_log.sql @@ -15,7 +15,7 @@ SELECT multiIf( -- ExpressionTransform executes sleep(), -- so IProcessor::work() will spend 1 sec. - name = 'ExpressionTransform', elapsed_us>1e6, + name = 'ExpressionTransform', elapsed_us>=1e6, -- SourceFromSingleChunk, that feed data to ExpressionTransform, -- will feed first block and then wait in PortFull. name = 'SourceFromSingleChunk', output_wait_elapsed_us>1e6, From a705b08bd81658e878d7b7d214b057c661bbed69 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Thu, 29 Jun 2023 22:30:15 +0000 Subject: [PATCH 164/522] Update reference --- .../0_stateless/02210_processors_profile_log.reference | 6 +++--- tests/queries/0_stateless/02210_processors_profile_log.sql | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/queries/0_stateless/02210_processors_profile_log.reference b/tests/queries/0_stateless/02210_processors_profile_log.reference index 181022d2421..41543d0706a 100644 --- a/tests/queries/0_stateless/02210_processors_profile_log.reference +++ b/tests/queries/0_stateless/02210_processors_profile_log.reference @@ -18,13 +18,13 @@ SELECT multiIf( -- ExpressionTransform executes sleep(), -- so IProcessor::work() will spend 1 sec. - name = 'ExpressionTransform', elapsed_us>1e6, + name = 'ExpressionTransform', elapsed_us>=1e6, -- SourceFromSingleChunk, that feed data to ExpressionTransform, -- will feed first block and then wait in PortFull. - name = 'SourceFromSingleChunk', output_wait_elapsed_us>1e6, + name = 'SourceFromSingleChunk', output_wait_elapsed_us>=1e6, -- NullSource/LazyOutputFormatLazyOutputFormat are the outputs -- so they cannot starts to execute before sleep(1) will be executed. - input_wait_elapsed_us>1e6) + input_wait_elapsed_us>=1e6) elapsed, input_rows, input_bytes, diff --git a/tests/queries/0_stateless/02210_processors_profile_log.sql b/tests/queries/0_stateless/02210_processors_profile_log.sql index 92f6ab94293..a15ed26fd67 100644 --- a/tests/queries/0_stateless/02210_processors_profile_log.sql +++ b/tests/queries/0_stateless/02210_processors_profile_log.sql @@ -18,10 +18,10 @@ SELECT name = 'ExpressionTransform', elapsed_us>=1e6, -- SourceFromSingleChunk, that feed data to ExpressionTransform, -- will feed first block and then wait in PortFull. - name = 'SourceFromSingleChunk', output_wait_elapsed_us>1e6, + name = 'SourceFromSingleChunk', output_wait_elapsed_us>=1e6, -- NullSource/LazyOutputFormatLazyOutputFormat are the outputs -- so they cannot starts to execute before sleep(1) will be executed. - input_wait_elapsed_us>1e6) + input_wait_elapsed_us>=1e6) elapsed, input_rows, input_bytes, From 8f240ffcce6230636de57fe8a8638df3a29ac5e3 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 30 Jun 2023 10:50:44 +0200 Subject: [PATCH 165/522] tests: fix 02050_client_profile_events flakiness Signed-off-by: Azat Khuzhin --- tests/queries/0_stateless/02050_client_profile_events.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02050_client_profile_events.sh b/tests/queries/0_stateless/02050_client_profile_events.sh index dce0c80525a..05e48de771d 100755 --- a/tests/queries/0_stateless/02050_client_profile_events.sh +++ b/tests/queries/0_stateless/02050_client_profile_events.sh @@ -25,7 +25,7 @@ profile_events="$($CLICKHOUSE_CLIENT --max_block_size 1 --print-profile-events - test "$profile_events" -gt 1 && echo OK || echo "FAIL ($profile_events)" echo 'print each 100 ms' -profile_events="$($CLICKHOUSE_CLIENT --max_block_size 1 --print-profile-events --profile-events-delay-ms=100 -q 'select sleep(1) from numbers(2) format Null' |& grep -c 'SelectedRows')" +profile_events="$($CLICKHOUSE_CLIENT --max_block_size 1 --print-profile-events --profile-events-delay-ms=100 -q 'select sleep(0.2) from numbers(10) format Null' |& grep -c 'SelectedRows')" test "$profile_events" -gt 1 && echo OK || echo "FAIL ($profile_events)" echo 'check that ProfileEvents is new for each query' From 8af96f7a177c02edc8425bc7bcd1aa9dad6f086c Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Fri, 30 Jun 2023 09:49:29 +0000 Subject: [PATCH 166/522] Add uuid to uint128 converting --- src/Functions/FunctionsConversion.h | 15 +++++++++++++++ .../02810_convert_uuid_to_uint128.reference | 6 ++++++ .../0_stateless/02810_convert_uuid_to_uint128.sql | 8 ++++++++ 3 files changed, 29 insertions(+) create mode 100644 tests/queries/0_stateless/02810_convert_uuid_to_uint128.reference create mode 100644 tests/queries/0_stateless/02810_convert_uuid_to_uint128.sql diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index 00c4cfe7284..a79fd6c2e1b 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -203,6 +203,21 @@ struct ConvertImpl } } + if constexpr (std::is_same_v && std::is_same_v) + { + static_assert(std::is_same_v, "Can be used only to serialize to ColumnString or ColumnFixedString"); + if constexpr (std::endian::native == std::endian::little) + { + vec_to[i].items[1] = vec_from[i].toUnderType().items[0]; + vec_to[i].items[0] = vec_from[i].toUnderType().items[1]; + } + else + { + vec_to[i] = vec_from[i].toUnderType(); + } + continue; + } + if constexpr (std::is_same_v != std::is_same_v) { throw Exception(ErrorCodes::NOT_IMPLEMENTED, diff --git a/tests/queries/0_stateless/02810_convert_uuid_to_uint128.reference b/tests/queries/0_stateless/02810_convert_uuid_to_uint128.reference new file mode 100644 index 00000000000..3b44d4ba086 --- /dev/null +++ b/tests/queries/0_stateless/02810_convert_uuid_to_uint128.reference @@ -0,0 +1,6 @@ +0 +329871470813054077831677335124932328170 +340282366920938463463374607431768211455 +329871470813054077831677335124932328170 +329871470813054077831677335124932328170 +329871470813054077831677335124932328170 diff --git a/tests/queries/0_stateless/02810_convert_uuid_to_uint128.sql b/tests/queries/0_stateless/02810_convert_uuid_to_uint128.sql new file mode 100644 index 00000000000..5350ef99ed3 --- /dev/null +++ b/tests/queries/0_stateless/02810_convert_uuid_to_uint128.sql @@ -0,0 +1,8 @@ +SELECT toUInt128(toUUID('00000000-0000-0000-0000-000000000000')); +SELECT toUInt128(toUUID('f82aef31-279e-431f-8b00-2899ad387aea')); +SELECT toUInt128(toUUID('ffffffff-ffff-ffff-ffff-ffffffffffff')); +SELECT toUInt64(toUUID('00000000-0000-0000-0000-000000000000')); -- { serverError NOT_IMPLEMENTED } +SELECT toInt128(toUUID('00000000-0000-0000-0000-000000000000')); -- { serverError NOT_IMPLEMENTED } +SELECT cast(toUUID('f82aef31-279e-431f-8b00-2899ad387aea'), 'UInt128'); +select accurateCast(toUUID('f82aef31-279e-431f-8b00-2899ad387aea'), 'UInt128'); +select toUUID('f82aef31-279e-431f-8b00-2899ad387aea')::UInt128; \ No newline at end of file From 407a7e3cc50633f6ac73cbda6412db0ec28286f5 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Fri, 30 Jun 2023 09:55:55 +0000 Subject: [PATCH 167/522] Edit assert message --- src/Functions/FunctionsConversion.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index a79fd6c2e1b..b272e88d17d 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -205,7 +205,7 @@ struct ConvertImpl if constexpr (std::is_same_v && std::is_same_v) { - static_assert(std::is_same_v, "Can be used only to serialize to ColumnString or ColumnFixedString"); + static_assert(std::is_same_v, "UInt128 and UUID types must be same"); if constexpr (std::endian::native == std::endian::little) { vec_to[i].items[1] = vec_from[i].toUnderType().items[0]; From 1cf021c0b11ef1f24312a98bb8e443067a4ad497 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Fri, 30 Jun 2023 13:11:06 +0000 Subject: [PATCH 168/522] Add initcap prototype / tests --- .../functions/string-functions.md | 4 + .../functions/string-functions.md | 4 + src/Functions/initcap.cpp | 78 +++++++++++++++++++ .../0_stateless/02810_initcap.reference | 6 ++ tests/queries/0_stateless/02810_initcap.sql | 6 ++ 5 files changed, 98 insertions(+) create mode 100644 src/Functions/initcap.cpp create mode 100644 tests/queries/0_stateless/02810_initcap.reference create mode 100644 tests/queries/0_stateless/02810_initcap.sql diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index 5175bbf0615..d2180c9f3ea 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -1253,3 +1253,7 @@ Result: │ A240 │ └──────────────────┘ ``` + +## initcap + +Convert the first letter of each word to upper case and the rest to lower case. Words are sequences of alphanumeric characters separated by non-alphanumeric characters. \ No newline at end of file diff --git a/docs/ru/sql-reference/functions/string-functions.md b/docs/ru/sql-reference/functions/string-functions.md index 9638e25d488..bd104b27bed 100644 --- a/docs/ru/sql-reference/functions/string-functions.md +++ b/docs/ru/sql-reference/functions/string-functions.md @@ -1113,3 +1113,7 @@ A text with tags . The content within CDATA Do Nothing for 2 Minutes 2:00   ``` + +## initcap {#initcap} + +Переводит первую букву каждого слова в строке в верхний регистр, а остальные — в нижний. Словами считаются последовательности алфавитно-цифровых символов, разделённые любыми другими символами. \ No newline at end of file diff --git a/src/Functions/initcap.cpp b/src/Functions/initcap.cpp new file mode 100644 index 00000000000..70c332d191d --- /dev/null +++ b/src/Functions/initcap.cpp @@ -0,0 +1,78 @@ +#include +#include + +namespace DB +{ +namespace +{ + +struct InitcapImpl +{ + static void vector(const ColumnString::Chars & data, + const ColumnString::Offsets & offsets, + ColumnString::Chars & res_data, + ColumnString::Offsets & res_offsets) + { + res_data.resize(data.size()); + res_offsets.assign(offsets); + array(data.data(), data.data() + data.size(), res_data.data()); + } + + static void vectorFixed(const ColumnString::Chars & data, size_t /*n*/, ColumnString::Chars & res_data) + { + res_data.resize(data.size()); + array(data.data(), data.data() + data.size(), res_data.data()); + } + +private: + static void array(const UInt8 * src, const UInt8 * src_end, UInt8 * dst) + { + const auto flip_case_mask = 'A' ^ 'a'; + + auto is_lower_alpha = [](UInt8 c) { return c >= 'a' && c <= 'z'; }; + auto is_upper_alpha = [](UInt8 c) { return c >= 'A' && c <= 'Z'; }; + //auto is_digit = [](UInt8 c) { return c >= '0' && c <= '9'; }; + + bool prev_is_alpha = false; + + for (; src < src_end; ++src, ++dst) + { + bool lower = is_lower_alpha(*src); + bool is_alpha = lower || is_upper_alpha(*src); + if (!is_alpha) + { + *dst = *src; + } + else if (!prev_is_alpha) + { + if (lower) + *dst = *src ^ flip_case_mask; + else + *dst = *src; + } + else + { + if (!lower) + *dst = *src ^ flip_case_mask; + else + *dst = *src; + } + prev_is_alpha = is_alpha; + } + } +}; + +struct NameInitcap +{ + static constexpr auto name = "initcap"; +}; +using FunctionInitcap = FunctionStringToString; + +} + +REGISTER_FUNCTION(Initcap) +{ + factory.registerFunction({}, FunctionFactory::CaseInsensitive); +} + +} diff --git a/tests/queries/0_stateless/02810_initcap.reference b/tests/queries/0_stateless/02810_initcap.reference new file mode 100644 index 00000000000..9fda79e4afb --- /dev/null +++ b/tests/queries/0_stateless/02810_initcap.reference @@ -0,0 +1,6 @@ + +Hello +Hello +Hello World +Yeah, Well, I`M Gonna Go Build My Own Theme Park +Crc32ieee Is Best Function diff --git a/tests/queries/0_stateless/02810_initcap.sql b/tests/queries/0_stateless/02810_initcap.sql new file mode 100644 index 00000000000..810ea52dd6a --- /dev/null +++ b/tests/queries/0_stateless/02810_initcap.sql @@ -0,0 +1,6 @@ +select initcap(''); +select initcap('Hello'); +select initcap('hello'); +select initcap('hello world'); +select initcap('yeah, well, i`m gonna go build my own theme park'); +select initcap('CRC32IEEE is best function'); From d6dacd3ccfe340410ead90ffcadd769716a61ec7 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Fri, 30 Jun 2023 13:53:20 +0000 Subject: [PATCH 169/522] Fix test with num --- src/Functions/initcap.cpp | 38 ++++++------------- .../0_stateless/02810_initcap.reference | 1 + tests/queries/0_stateless/02810_initcap.sql | 1 + 3 files changed, 14 insertions(+), 26 deletions(-) diff --git a/src/Functions/initcap.cpp b/src/Functions/initcap.cpp index 70c332d191d..7d0749ecb12 100644 --- a/src/Functions/initcap.cpp +++ b/src/Functions/initcap.cpp @@ -1,5 +1,6 @@ #include #include +#include namespace DB { @@ -27,37 +28,22 @@ struct InitcapImpl private: static void array(const UInt8 * src, const UInt8 * src_end, UInt8 * dst) { - const auto flip_case_mask = 'A' ^ 'a'; - - auto is_lower_alpha = [](UInt8 c) { return c >= 'a' && c <= 'z'; }; - auto is_upper_alpha = [](UInt8 c) { return c >= 'A' && c <= 'Z'; }; - //auto is_digit = [](UInt8 c) { return c >= '0' && c <= '9'; }; - - bool prev_is_alpha = false; + bool prev_alphanum = false; for (; src < src_end; ++src, ++dst) { - bool lower = is_lower_alpha(*src); - bool is_alpha = lower || is_upper_alpha(*src); - if (!is_alpha) - { - *dst = *src; - } - else if (!prev_is_alpha) - { - if (lower) - *dst = *src ^ flip_case_mask; + char c = *src; + bool alphanum = isAlphaNumericASCII(c); + if (alphanum && !prev_alphanum) + if (isAlphaASCII(c)) + *dst = toUpperIfAlphaASCII(c); else - *dst = *src; - } + *dst = c; + else if (isAlphaASCII(c)) + *dst = toLowerIfAlphaASCII(c); else - { - if (!lower) - *dst = *src ^ flip_case_mask; - else - *dst = *src; - } - prev_is_alpha = is_alpha; + *dst = c; + prev_alphanum = alphanum; } } }; diff --git a/tests/queries/0_stateless/02810_initcap.reference b/tests/queries/0_stateless/02810_initcap.reference index 9fda79e4afb..4caa57e5ac0 100644 --- a/tests/queries/0_stateless/02810_initcap.reference +++ b/tests/queries/0_stateless/02810_initcap.reference @@ -4,3 +4,4 @@ Hello Hello World Yeah, Well, I`M Gonna Go Build My Own Theme Park Crc32ieee Is Best Function +42ok diff --git a/tests/queries/0_stateless/02810_initcap.sql b/tests/queries/0_stateless/02810_initcap.sql index 810ea52dd6a..f61fcddf4c1 100644 --- a/tests/queries/0_stateless/02810_initcap.sql +++ b/tests/queries/0_stateless/02810_initcap.sql @@ -4,3 +4,4 @@ select initcap('hello'); select initcap('hello world'); select initcap('yeah, well, i`m gonna go build my own theme park'); select initcap('CRC32IEEE is best function'); +select initcap('42oK'); \ No newline at end of file From 50449cc68d03f213c6b128ed51416d1de21ad1cd Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Fri, 30 Jun 2023 19:07:32 +0200 Subject: [PATCH 170/522] fix write to finalized buffer --- src/Core/Settings.h | 2 +- src/Server/HTTPHandler.cpp | 7 +++---- .../0_stateless/00429_long_http_bufferization.sh | 13 ++++++++++++- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 658f3c8025b..288413857d4 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -274,7 +274,7 @@ class IColumn; \ M(UInt64, http_headers_progress_interval_ms, 100, "Do not send HTTP headers X-ClickHouse-Progress more frequently than at each specified interval.", 0) \ M(Bool, http_wait_end_of_query, false, "Enable HTTP response buffering on the server-side.", 0) \ - M(UInt64, http_response_buffer_size, false, "The number of bytes to buffer in the server memory before sending a HTTP response to the client or flushing to disk (when http_wait_end_of_query is enabled).", 0) \ + M(UInt64, http_response_buffer_size, 0, "The number of bytes to buffer in the server memory before sending a HTTP response to the client or flushing to disk (when http_wait_end_of_query is enabled).", 0) \ \ M(Bool, fsync_metadata, true, "Do fsync after changing metadata for tables and databases (.sql files). Could be disabled in case of poor latency on server with high load of DDL queries and high load of disk subsystem.", 0) \ \ diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index fe98ae5f69e..a391e3bb2e4 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -902,10 +902,9 @@ try /// Destroy CascadeBuffer to actualize buffers' positions and reset extra references if (used_output.hasDelayed()) { - if (used_output.out_maybe_delayed_and_compressed) - { - used_output.out_maybe_delayed_and_compressed->finalize(); - } + /// do not call finalize here for CascadeWriteBuffer used_output.out_maybe_delayed_and_compressed, + /// exception is written into used_output.out_maybe_compressed later + /// HTTPHandler::trySendExceptionToClient is called with exception context, it is Ok to destroy buffers used_output.out_maybe_delayed_and_compressed.reset(); } diff --git a/tests/queries/0_stateless/00429_long_http_bufferization.sh b/tests/queries/0_stateless/00429_long_http_bufferization.sh index 34d07cef7e3..55192422389 100755 --- a/tests/queries/0_stateless/00429_long_http_bufferization.sh +++ b/tests/queries/0_stateless/00429_long_http_bufferization.sh @@ -7,9 +7,11 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +format="RowBinary" + function query { # bash isn't able to store \0 bytes, so use [1; 255] random range - echo "SELECT greatest(toUInt8(1), toUInt8(intHash64(number))) FROM system.numbers LIMIT $1 FORMAT RowBinary" + echo "SELECT greatest(toUInt8(1), toUInt8(intHash64(number))) FROM system.numbers LIMIT $1 FORMAT $format" } function ch_url() { @@ -42,6 +44,14 @@ function check_last_line_exception() { } function check_exception_handling() { + # it is impossible to override max_block_size, details here https://github.com/ClickHouse/ClickHouse/issues/51694 + # rebuild CLICKHOUSE_URL for one call in order to avoid using random parameters from CLICKHOUSE_URL_PARAMS + CLICKHOUSE_URL="${CLICKHOUSE_PORT_HTTP_PROTO}://${CLICKHOUSE_HOST}:${CLICKHOUSE_PORT_HTTP}/?wait_end_of_query=0" \ + max_block_size=30000 \ + format=TSV \ + check_last_line_exception \ + "max_result_rows=400000&buffer_size=1048577&wait_end_of_query=0" 111222333444 + check_only_exception "max_result_bytes=1000" 1001 check_only_exception "max_result_bytes=1000&wait_end_of_query=1" 1001 @@ -60,6 +70,7 @@ check_exception_handling # Tune setting to speed up combinatorial test +# max_block_size has no effect here, that value has been set inside CLICKHOUSE_URL max_block_size=500000 corner_sizes="1048576 $(seq 500000 1000000 3500000)" From 9a35921d005be1e7b34493d34429fb9dbf306ef7 Mon Sep 17 00:00:00 2001 From: Manas Alekar Date: Fri, 30 Jun 2023 13:16:02 -0700 Subject: [PATCH 171/522] Add tests. --- tests/queries/0_stateless/00415_into_outfile.reference | 4 ++++ tests/queries/0_stateless/00415_into_outfile.sh | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/tests/queries/0_stateless/00415_into_outfile.reference b/tests/queries/0_stateless/00415_into_outfile.reference index a609e77a50a..4576a2d9d60 100644 --- a/tests/queries/0_stateless/00415_into_outfile.reference +++ b/tests/queries/0_stateless/00415_into_outfile.reference @@ -1,5 +1,9 @@ performing test: select 1 2 3 +performing test: select_with_append +1 2 3 +performing test: select_with_truncate +1 2 3 performing test: union_all 1 2 3 4 diff --git a/tests/queries/0_stateless/00415_into_outfile.sh b/tests/queries/0_stateless/00415_into_outfile.sh index 77dc96a48e6..d360a29fa5a 100755 --- a/tests/queries/0_stateless/00415_into_outfile.sh +++ b/tests/queries/0_stateless/00415_into_outfile.sh @@ -21,6 +21,10 @@ function perform() perform "select" "SELECT 1, 2, 3 INTO OUTFILE '${CLICKHOUSE_TMP}/test_into_outfile_select.out'" +perform "select_with_append" "SELECT 1, 2, 3 INTO OUTFILE '${CLICKHOUSE_TMP}/test_into_outfile_select_with_append.out' APPEND" + +perform "select_with_truncate" "SELECT 1, 2, 3 INTO OUTFILE '${CLICKHOUSE_TMP}/test_into_outfile_select_with_truncate.out' TRUNCATE" + perform "union_all" "SELECT 1, 2 UNION ALL SELECT 3, 4 INTO OUTFILE '${CLICKHOUSE_TMP}/test_into_outfile_union_all.out' FORMAT TSV" | sort --numeric-sort perform "bad_union_all" "SELECT 1, 2 INTO OUTFILE '${CLICKHOUSE_TMP}/test_into_outfile_bad_union_all.out' UNION ALL SELECT 3, 4" From 8c0463fdd4bb99f707bcbb7b61b86ab8984ec6b7 Mon Sep 17 00:00:00 2001 From: FFFFFFFHHHHHHH <916677625@qq.com> Date: Sun, 2 Jul 2023 16:14:08 +0800 Subject: [PATCH 172/522] fix --- src/Functions/array/arrayJaccardIndex.cpp | 80 ++++++++++------------- 1 file changed, 35 insertions(+), 45 deletions(-) diff --git a/src/Functions/array/arrayJaccardIndex.cpp b/src/Functions/array/arrayJaccardIndex.cpp index c1ec8b53d25..211680092b3 100644 --- a/src/Functions/array/arrayJaccardIndex.cpp +++ b/src/Functions/array/arrayJaccardIndex.cpp @@ -34,85 +34,75 @@ public: bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo &) const override { return true; } bool useDefaultImplementationForConstants() const override { return true; } - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - DataTypes types; - for (size_t i = 0; i < 2; ++i) - { - const auto * array_type = checkAndGetDataType(arguments[i].get()); - if (!array_type) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Argument {} of function {} must be array, but it has type{}.", i + 1, getName(), arguments[i]->getName()); - } + FunctionArgumentDescriptors args{ + {"array_1", [](const IDataType & type) { return isArray(type.getPtr()); }, nullptr, "Array"}, + {"array_2", [](const IDataType & type) { return isArray(type.getPtr()); }, nullptr, "Array"}, + }; + validateFunctionArgumentTypes(*this, arguments, args); return std::make_shared>(); } template - static void vector(const ColumnArray::Offsets & intersect_offsets, const ColumnArray::Offsets & left_offsets, const ColumnArray::Offsets & right_offsets, PaddedPODArray & res) + static inline void getArraySize(const ColumnArray::Offsets & left_offsets, const ColumnArray::Offsets & right_offsets, size_t & left_size, size_t & right_size, const size_t & i) + { + if constexpr (is_const_left) + left_size = left_offsets[0]; + else + left_size = left_offsets[i] - left_offsets[i - 1]; + if constexpr (is_const_right) + right_size = right_offsets[0]; + else + right_size = right_offsets[i] - right_offsets[i - 1]; + } + + template + static inline void vector(const ColumnArray::Offsets & intersect_offsets, const ColumnArray::Offsets & left_offsets, const ColumnArray::Offsets & right_offsets, PaddedPODArray & res) { size_t left_size; size_t right_size; for (size_t i = 0; i < res.size(); ++i) { - if constexpr (is_const_left) - left_size = left_offsets[0]; - else - left_size = left_offsets[i] - left_offsets[i - 1]; - if constexpr (is_const_right) - right_size = right_offsets[0]; - else - right_size = right_offsets[i] - right_offsets[i - 1]; - + getArraySize(left_offsets, right_offsets, left_size, right_size, i); size_t intersect_size = intersect_offsets[i] - intersect_offsets[i - 1]; res[i] = static_cast(intersect_size) / (left_size + right_size - intersect_size); - if (unlikely(isnan(res[i]))) - res[i] = 1; } } - template - static void vectorWithEmptyIntersect(const ColumnArray::Offsets & left_offsets, const ColumnArray::Offsets & right_offsets, PaddedPODArray & res) + template + static inline void vectorWithEmptyIntersect(const ColumnArray::Offsets & left_offsets, const ColumnArray::Offsets & right_offsets, PaddedPODArray & res) { size_t left_size; size_t right_size; for (size_t i = 0; i < res.size(); ++i) { - if constexpr (is_const_left) - left_size = left_offsets[0]; - else - left_size = left_offsets[i] - left_offsets[i - 1]; - if constexpr (is_const_right) - right_size = right_offsets[0]; - else - right_size = right_offsets[i] - right_offsets[i - 1]; - - res[i] = static_cast(left_size + right_size == 0); + getArraySize(left_offsets, right_offsets, left_size, right_size, i); + if (unlikely(!left_size && !right_size)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "array aggregate functions cannot be performed on two empty arrays"); + res[i] = 0; } } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { - bool is_const_left; - bool is_const_right; - const ColumnArray * left_array; - const ColumnArray * right_array; - - auto cast_array = [&](const ColumnWithTypeAndName & col) + auto cast_array = [&](const ColumnWithTypeAndName & col) -> std::pair { const ColumnArray * res; bool is_const = false; - if (typeid_cast(col.column.get())) + if (const ColumnConst * col_const = typeid_cast(col.column.get())) { - res = checkAndGetColumn(checkAndGetColumnConst(col.column.get())->getDataColumnPtr().get()); + res = checkAndGetColumn(col_const->getDataColumnPtr().get()); is_const = true; } else if (!(res = checkAndGetColumn(col.column.get()))) throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Argument for function {} must be array but it has type {}.", col.column->getName(), getName()); - return std::make_pair(res, is_const); + return {res, is_const}; }; - std::tie(left_array, is_const_left) = cast_array(arguments[0]); - std::tie(right_array, is_const_right) = cast_array(arguments[1]); + const auto & [left_array, is_const_left] = cast_array(arguments[0]); + const auto & [right_array, is_const_right] = cast_array(arguments[1]); auto intersect_array = FunctionFactory::instance().get("arrayIntersect", context)->build(arguments); ColumnWithTypeAndName intersect_column; @@ -131,8 +121,8 @@ public: vectorWithEmptyIntersect(left_array->getOffsets(), right_array->getOffsets(), vec_res); \ else \ { \ - const ColumnArray * col_array = checkAndGetColumn(intersect_column.column.get()); \ - vector(col_array->getOffsets(), left_array->getOffsets(), right_array->getOffsets(), vec_res); \ + const ColumnArray * intersect_column_array = checkAndGetColumn(intersect_column.column.get()); \ + vector(intersect_column_array->getOffsets(), left_array->getOffsets(), right_array->getOffsets(), vec_res); \ } if (!is_const_left && !is_const_right) From 309fbc45a23a5132be56e6b71c87955b05d0db7c Mon Sep 17 00:00:00 2001 From: FFFFFFFHHHHHHH <916677625@qq.com> Date: Sun, 2 Jul 2023 16:14:53 +0800 Subject: [PATCH 173/522] fix test --- .../02737_arrayJaccardIndex.reference | 52 +++++++------------ .../0_stateless/02737_arrayJaccardIndex.sql | 33 +++++++----- ...2737_arrayJaccardIndex_exception.reference | 2 + .../02737_arrayJaccardIndex_exception.sh | 14 +++++ 4 files changed, 55 insertions(+), 46 deletions(-) create mode 100644 tests/queries/0_stateless/02737_arrayJaccardIndex_exception.reference create mode 100644 tests/queries/0_stateless/02737_arrayJaccardIndex_exception.sh diff --git a/tests/queries/0_stateless/02737_arrayJaccardIndex.reference b/tests/queries/0_stateless/02737_arrayJaccardIndex.reference index e6934bfe092..0b7969889c0 100644 --- a/tests/queries/0_stateless/02737_arrayJaccardIndex.reference +++ b/tests/queries/0_stateless/02737_arrayJaccardIndex.reference @@ -1,32 +1,20 @@ -0 -0.5 -1 -0.67 -1 -0 -0 -0 -1 -0 -0 -0 -0 -0.5 -1 -0.67 -0.5 -0.5 -0.5 -0.5 -1 -1 -1 -1 -1 -1 -1 -1 -0.33 -0.2 -1 -1 +[1] [1,2] 0.5 +[1,2] [1,2] 1 +[1,2,3] [1,2] 0.67 +[1] [] 0 +[1,2] [] 0 +[1,2,3] [] 0 +[] [1] 0 +[] [1,2] 0 +[] [1,2,3] 0 +[1,2] [1] 0.5 +[1,2] [1,2] 1 +[1,2] [1,2,3] 0.67 +[1] [1] 1 +[1,2] [1,2] 1 +[1,2,3] [1,2,3] 1 +['a'] ['a','aa','aaa'] 0.33 +[1,1.1,2.2] [2.2,3.3,444] 0.2 +[1] [1] 1 +[1,2] [1,2,3,4] 0.5 +[[1,2],[3,4]] [[1,2],[3,5]] 0.33 diff --git a/tests/queries/0_stateless/02737_arrayJaccardIndex.sql b/tests/queries/0_stateless/02737_arrayJaccardIndex.sql index c3f04ba0b10..000106e93b7 100644 --- a/tests/queries/0_stateless/02737_arrayJaccardIndex.sql +++ b/tests/queries/0_stateless/02737_arrayJaccardIndex.sql @@ -1,26 +1,31 @@ drop table if exists array_jaccard_index; -create table array_jaccard_index (arr Array(UInt8)) engine=MergeTree partition by arr order by arr; +create table array_jaccard_index (arr Array(UInt8)) engine = MergeTree order by arr; insert into array_jaccard_index values ([1,2,3]); + insert into array_jaccard_index values ([1,2]); + insert into array_jaccard_index values ([1]); -insert into array_jaccard_index values ([]); -select round(arrayJaccardIndex(arr, [1,2]), 2) from array_jaccard_index order by arr; -select round(arrayJaccardIndex(arr, []), 2) from array_jaccard_index order by arr; -select round(arrayJaccardIndex([], arr), 2) from array_jaccard_index order by arr; -select round(arrayJaccardIndex([1,2], arr), 2) from array_jaccard_index order by arr; -select round(arrayJaccardIndex([1,2], [1,2,3,4]), 2) from array_jaccard_index order by arr; -select round(arrayJaccardIndex([], []), 2) from array_jaccard_index order by arr; -select round(arrayJaccardIndex(arr, arr), 2) from array_jaccard_index order by arr; +select arr as arr_1, [1,2] as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2) from array_jaccard_index order by arr; -drop table if exists array_jaccard_index; +select arr as arr_1, [] as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2) from array_jaccard_index order by arr; -select round(arrayJaccardIndex(['a'], ['a', 'aa', 'aaa']), 2); +select [] as arr_1, arr as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2) from array_jaccard_index order by arr; -select round(arrayJaccardIndex([1, 1.1, 2.2], [2.2, 3.3, 444]), 2); +select [1,2] as arr_1, arr as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2) from array_jaccard_index order by arr; -select round(arrayJaccardIndex([], []), 2); +select arr as arr_1, arr as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2) from array_jaccard_index order by arr; -select round(arrayJaccardIndex([toUInt16(1)], [toUInt32(1)]), 2); +drop table array_jaccard_index; + +select ['a'] as arr_1, ['a', 'aa', 'aaa'] as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2); + +select [1, 1.1, 2.2] as arr_1, [2.2, 3.3, 444] as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2); + +select [toUInt16(1)] as arr_1, [toUInt32(1)] as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2); + +select [1,2] as arr_1, [1,2,3,4] as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2); + +select [[1,2], [3,4]] as arr_1, [[1,2], [3,5]] as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2); diff --git a/tests/queries/0_stateless/02737_arrayJaccardIndex_exception.reference b/tests/queries/0_stateless/02737_arrayJaccardIndex_exception.reference new file mode 100644 index 00000000000..307d9a195b0 --- /dev/null +++ b/tests/queries/0_stateless/02737_arrayJaccardIndex_exception.reference @@ -0,0 +1,2 @@ +Code: 43 +Code: 386 diff --git a/tests/queries/0_stateless/02737_arrayJaccardIndex_exception.sh b/tests/queries/0_stateless/02737_arrayJaccardIndex_exception.sh new file mode 100644 index 00000000000..c36700c6e0f --- /dev/null +++ b/tests/queries/0_stateless/02737_arrayJaccardIndex_exception.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +echo ${CLICKHOUSE_CLIENT} + +# Code: 43. DB::Exception: Received from localhost:9000. DB::Exception: array aggregate functions cannot be performed on two empty arrays: While processing arrayJaccardIndex([], []). (ILLEGAL_TYPE_OF_ARGUMENT) +$CLICKHOUSE_CLIENT -q "SELECT arrayJaccardIndex([], [])" |& grep -o "Code: 43" + + +# Code: 386. DB::Exception: Received from localhost:9000. DB::Exception: There is no subtype for types UInt8, String because some of them are String/FixedString and some of them are not: While processing [1, 2] AS arr_1, ['1', '2'] AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2). (NO_COMMON_TYPE) +$CLICKHOUSE_CLIENT -q "select [1,2] as arr_1, ['1','2'] as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2)" |& grep -o "Code: 386" From 85053ef008295ec48e66a42a4d1dafa41ff22e6c Mon Sep 17 00:00:00 2001 From: FFFFFFFHHHHHHH <916677625@qq.com> Date: Sun, 2 Jul 2023 16:39:55 +0800 Subject: [PATCH 174/522] fix permission --- tests/queries/0_stateless/02737_arrayJaccardIndex_exception.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 tests/queries/0_stateless/02737_arrayJaccardIndex_exception.sh diff --git a/tests/queries/0_stateless/02737_arrayJaccardIndex_exception.sh b/tests/queries/0_stateless/02737_arrayJaccardIndex_exception.sh old mode 100644 new mode 100755 From 2c09ea04048d664fad9c70de49c0ceff10c9ec22 Mon Sep 17 00:00:00 2001 From: FFFFFFFHHHHHHH <916677625@qq.com> Date: Sun, 2 Jul 2023 17:09:52 +0800 Subject: [PATCH 175/522] add doc --- .../sql-reference/functions/array-functions.md | 18 ++++++++++++++++++ .../02737_arrayJaccardIndex_exception.sh | 3 --- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index 7f2b8f3c605..921e9765080 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -996,6 +996,24 @@ SELECT └──────────────┴───────────┘ ``` +## arrayJaccardIndex + +Returns the jaccard similarity between two arrays. + +**Example** + +Query: +``` sql +SELECT arrayJaccardIndex([1, 2], [2, 3]) AS res +``` + +Result: +``` text +┌─res────────────────┐ +│ 0.3333333333333333 │ +└────────────────────┘ +``` + ## arrayReduce Applies an aggregate function to array elements and returns its result. The name of the aggregation function is passed as a string in single quotes `'max'`, `'sum'`. When using parametric aggregate functions, the parameter is indicated after the function name in parentheses `'uniqUpTo(6)'`. diff --git a/tests/queries/0_stateless/02737_arrayJaccardIndex_exception.sh b/tests/queries/0_stateless/02737_arrayJaccardIndex_exception.sh index c36700c6e0f..49e80e06cba 100755 --- a/tests/queries/0_stateless/02737_arrayJaccardIndex_exception.sh +++ b/tests/queries/0_stateless/02737_arrayJaccardIndex_exception.sh @@ -4,11 +4,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -echo ${CLICKHOUSE_CLIENT} - # Code: 43. DB::Exception: Received from localhost:9000. DB::Exception: array aggregate functions cannot be performed on two empty arrays: While processing arrayJaccardIndex([], []). (ILLEGAL_TYPE_OF_ARGUMENT) $CLICKHOUSE_CLIENT -q "SELECT arrayJaccardIndex([], [])" |& grep -o "Code: 43" - # Code: 386. DB::Exception: Received from localhost:9000. DB::Exception: There is no subtype for types UInt8, String because some of them are String/FixedString and some of them are not: While processing [1, 2] AS arr_1, ['1', '2'] AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2). (NO_COMMON_TYPE) $CLICKHOUSE_CLIENT -q "select [1,2] as arr_1, ['1','2'] as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2)" |& grep -o "Code: 386" From 91d091a80607a8c9c9e9edbc02392172825f5299 Mon Sep 17 00:00:00 2001 From: FFFFFFFHHHHHHH <916677625@qq.com> Date: Sun, 2 Jul 2023 18:58:58 +0800 Subject: [PATCH 176/522] fix style --- utils/check-style/aspell-ignore/en/aspell-dict.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 2802e52c288..6231e8a07f3 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -1034,6 +1034,7 @@ arrayFirst arrayFirstIndex arrayFlatten arrayIntersect +arrayJaccardIndex arrayJoin arrayLast arrayLastIndex @@ -1607,6 +1608,7 @@ isNull isValidJSON isValidUTF iteratively +jaccard javaHash javaHashUTF jbod From 1a40e30797fcfb65885beb0630e9605bd46d0b64 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Mon, 3 Jul 2023 11:37:55 +0000 Subject: [PATCH 177/522] Add initcapUtf8: impl + tests --- .../functions/string-functions.md | 10 +- .../functions/string-functions.md | 9 +- src/Functions/LowerUpperUTF8Impl.h | 2 - src/Functions/initcapUTF8.cpp | 114 ++++++++++++++++++ .../0_stateless/02810_initcap.reference | 8 +- tests/queries/0_stateless/02810_initcap.sql | 11 +- .../aspell-ignore/en/aspell-dict.txt | 1 + 7 files changed, 148 insertions(+), 7 deletions(-) create mode 100644 src/Functions/initcapUTF8.cpp diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index d2180c9f3ea..cab6764c041 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -1256,4 +1256,12 @@ Result: ## initcap -Convert the first letter of each word to upper case and the rest to lower case. Words are sequences of alphanumeric characters separated by non-alphanumeric characters. \ No newline at end of file +Convert the first letter of each word to upper case and the rest to lower case. Words are sequences of alphanumeric characters separated by non-alphanumeric characters. + +## initcapUTF8 + +Like [initcap](#initcap), assuming that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. + +Does not detect the language, e.g. for Turkish the result might not be exactly correct (i/İ vs. i/I). + +If the length of the UTF-8 byte sequence is different for upper and lower case of a code point, the result may be incorrect for this code point. \ No newline at end of file diff --git a/docs/ru/sql-reference/functions/string-functions.md b/docs/ru/sql-reference/functions/string-functions.md index bd104b27bed..ecb36bf4f65 100644 --- a/docs/ru/sql-reference/functions/string-functions.md +++ b/docs/ru/sql-reference/functions/string-functions.md @@ -1116,4 +1116,11 @@ Do Nothing for 2 Minutes 2:00   ## initcap {#initcap} -Переводит первую букву каждого слова в строке в верхний регистр, а остальные — в нижний. Словами считаются последовательности алфавитно-цифровых символов, разделённые любыми другими символами. \ No newline at end of file +Переводит первую букву каждого слова в строке в верхний регистр, а остальные — в нижний. Словами считаются последовательности алфавитно-цифровых символов, разделённые любыми другими символами. + +## initcapUTF8 {#initcapUTF8} + +Как [initcap](#initcap), предпологая, что строка содержит набор байтов, представляющий текст в кодировке UTF-8. +Не учитывает язык. То есть, для турецкого языка, результат может быть не совсем верным. +Если длина UTF-8 последовательности байтов различна для верхнего и нижнего регистра кодовой точки, то для этой кодовой точки результат работы может быть некорректным. +Если строка содержит набор байтов, не являющийся UTF-8, то поведение не определено. \ No newline at end of file diff --git a/src/Functions/LowerUpperUTF8Impl.h b/src/Functions/LowerUpperUTF8Impl.h index f6b18439fd1..460f75f9bde 100644 --- a/src/Functions/LowerUpperUTF8Impl.h +++ b/src/Functions/LowerUpperUTF8Impl.h @@ -133,8 +133,6 @@ struct LowerUpperUTF8Impl } else { - static const Poco::UTF8Encoding utf8; - size_t src_sequence_length = UTF8::seqLength(*src); /// In case partial buffer was passed (due to SSE optimization) /// we cannot convert it with current src_end, but we may have more diff --git a/src/Functions/initcapUTF8.cpp b/src/Functions/initcapUTF8.cpp new file mode 100644 index 00000000000..333ebe266d3 --- /dev/null +++ b/src/Functions/initcapUTF8.cpp @@ -0,0 +1,114 @@ +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + +namespace +{ + +struct InitcapUTF8Impl +{ + static void vector( + const ColumnString::Chars & data, + const ColumnString::Offsets & offsets, + ColumnString::Chars & res_data, + ColumnString::Offsets & res_offsets) + { + if (data.empty()) + return; + res_data.resize(data.size()); + res_offsets.assign(offsets); + array(data.data(), data.data() + data.size(), offsets, res_data.data()); + } + + [[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function initcapUTF8 cannot work with FixedString argument"); + } + + static void processCodePoint(const UInt8 *& src, const UInt8 * src_end, UInt8 *& dst, bool& prev_alphanum) + { + size_t src_sequence_length = UTF8::seqLength(*src); + auto src_code_point = UTF8::convertUTF8ToCodePoint(src, src_end - src); + + if (src_code_point) + { + bool alpha = Poco::Unicode::isAlpha(*src_code_point); + bool alphanum = alpha || Poco::Unicode::isDigit(*src_code_point); + + int dst_code_point = *src_code_point; + if (alphanum && !prev_alphanum) + { + if (alpha) + dst_code_point = Poco::Unicode::toUpper(*src_code_point); + } + else if (alpha) + { + dst_code_point = Poco::Unicode::toLower(*src_code_point); + } + prev_alphanum = alphanum; + if (dst_code_point > 0) + { + size_t dst_sequence_length = UTF8::convertCodePointToUTF8(dst_code_point, dst, src_end - src); + assert(dst_sequence_length <= 4); + + if (dst_sequence_length == src_sequence_length) + { + src += dst_sequence_length; + dst += dst_sequence_length; + return; + } + } + } + + *dst = *src; + ++dst; + ++src; + prev_alphanum = false; + } + +private: + + static void array(const UInt8 * src, const UInt8 * src_end, const ColumnString::Offsets & offsets, UInt8 * dst) + { + auto offset_it = offsets.begin(); + const UInt8 * begin = src; + + /// handle remaining symbols, row by row (to avoid influence of bad UTF8 symbols from one row, to another) + while (src < src_end) + { + const UInt8 * row_end = begin + *offset_it; + chassert(row_end >= src); + bool prev_alphanum = false; + while (src < row_end) + processCodePoint(src, row_end, dst, prev_alphanum); + ++offset_it; + } + } +}; + +struct NameInitcapUTF8 +{ + static constexpr auto name = "initcapUTF8"; +}; + +using FunctionInitcapUTF8 = FunctionStringToString; + +} + +REGISTER_FUNCTION(InitcapUTF8) +{ + factory.registerFunction(); +} + +} diff --git a/tests/queries/0_stateless/02810_initcap.reference b/tests/queries/0_stateless/02810_initcap.reference index 4caa57e5ac0..0d24e14c445 100644 --- a/tests/queries/0_stateless/02810_initcap.reference +++ b/tests/queries/0_stateless/02810_initcap.reference @@ -3,5 +3,11 @@ Hello Hello Hello World Yeah, Well, I`M Gonna Go Build My Own Theme Park -Crc32ieee Is Best Function +Crc32ieee Is The Best Function 42ok + +Hello +Yeah, Well, I`M Gonna Go Build My Own Theme Park +Привет, Как Дела? +Ätsch, Bätsch +We Dont Support Cases When Lowercase And Uppercase Characters Occupy Different Number Of Bytes In Utf-8. As An Example, This Happens For ß And ẞ. diff --git a/tests/queries/0_stateless/02810_initcap.sql b/tests/queries/0_stateless/02810_initcap.sql index f61fcddf4c1..1a730003604 100644 --- a/tests/queries/0_stateless/02810_initcap.sql +++ b/tests/queries/0_stateless/02810_initcap.sql @@ -3,5 +3,12 @@ select initcap('Hello'); select initcap('hello'); select initcap('hello world'); select initcap('yeah, well, i`m gonna go build my own theme park'); -select initcap('CRC32IEEE is best function'); -select initcap('42oK'); \ No newline at end of file +select initcap('CRC32IEEE is the best function'); +select initcap('42oK'); + +select initcapUTF8(''); +select initcapUTF8('Hello'); +select initcapUTF8('yeah, well, i`m gonna go build my own theme park'); +select initcapUTF8('привет, как дела?'); +select initcapUTF8('ätsch, bätsch'); +select initcapUTF8('We dont support cases when lowercase and uppercase characters occupy different number of bytes in UTF-8. As an example, this happens for ß and ẞ.'); \ No newline at end of file diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index f25d082e5a6..835de91c0d8 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -1580,6 +1580,7 @@ indexOf infi initialQueryID initializeAggregation +initcap injective innogames inodes From 16ab84d8040e92c0b1a258cf554195371469344b Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Mon, 3 Jul 2023 11:50:26 +0000 Subject: [PATCH 178/522] Style fix --- docs/en/sql-reference/functions/string-functions.md | 2 +- docs/ru/sql-reference/functions/string-functions.md | 2 +- utils/check-style/aspell-ignore/en/aspell-dict.txt | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index cab6764c041..cbbd32328bd 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -1264,4 +1264,4 @@ Like [initcap](#initcap), assuming that the string contains valid UTF-8 encoded Does not detect the language, e.g. for Turkish the result might not be exactly correct (i/İ vs. i/I). -If the length of the UTF-8 byte sequence is different for upper and lower case of a code point, the result may be incorrect for this code point. \ No newline at end of file +If the length of the UTF-8 byte sequence is different for upper and lower case of a code point, the result may be incorrect for this code point. diff --git a/docs/ru/sql-reference/functions/string-functions.md b/docs/ru/sql-reference/functions/string-functions.md index ecb36bf4f65..62697e5e197 100644 --- a/docs/ru/sql-reference/functions/string-functions.md +++ b/docs/ru/sql-reference/functions/string-functions.md @@ -1123,4 +1123,4 @@ Do Nothing for 2 Minutes 2:00   Как [initcap](#initcap), предпологая, что строка содержит набор байтов, представляющий текст в кодировке UTF-8. Не учитывает язык. То есть, для турецкого языка, результат может быть не совсем верным. Если длина UTF-8 последовательности байтов различна для верхнего и нижнего регистра кодовой точки, то для этой кодовой точки результат работы может быть некорректным. -Если строка содержит набор байтов, не являющийся UTF-8, то поведение не определено. \ No newline at end of file +Если строка содержит набор байтов, не являющийся UTF-8, то поведение не определено. diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 835de91c0d8..79c34360584 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -1581,6 +1581,7 @@ infi initialQueryID initializeAggregation initcap +initcapUTF injective innogames inodes From 20d7cf2bf6e87f65ede724a2b12a406a1d50c20a Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 3 Jul 2023 14:51:19 +0200 Subject: [PATCH 179/522] Fix tests --- tests/integration/test_storage_dict/test.py | 5 ++++- tests/integration/test_storage_s3/test.py | 8 ++++---- .../test_storage_s3/test_invalid_env_credentials.py | 1 + 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/tests/integration/test_storage_dict/test.py b/tests/integration/test_storage_dict/test.py index 1ed974f267d..dd4ab5c8d2c 100644 --- a/tests/integration/test_storage_dict/test.py +++ b/tests/integration/test_storage_dict/test.py @@ -10,7 +10,10 @@ def cluster(): try: cluster = ClickHouseCluster(__file__) cluster.add_instance( - "node1", main_configs=["configs/conf.xml"], with_nginx=True + "node1", + main_configs=["configs/conf.xml"], + user_configs=["configs/users.xml"], + with_nginx=True, ) cluster.start() diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index cecc201945c..45437fefa79 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -944,7 +944,7 @@ def test_predefined_connection_configuration(started_cluster): f"CREATE TABLE {name} (id UInt32) ENGINE = S3(s3_conf1, format='CSV')" ) assert ( - "To execute this query it's necessary to have grant USE NAMED COLLECTION ON s3_conf1" + "To execute this query it's necessary to have grant NAMED COLLECTION ON s3_conf1" in error ) error = instance.query_and_get_error( @@ -952,7 +952,7 @@ def test_predefined_connection_configuration(started_cluster): user="user", ) assert ( - "To execute this query it's necessary to have grant USE NAMED COLLECTION ON s3_conf1" + "To execute this query it's necessary to have grant NAMED COLLECTION ON s3_conf1" in error ) @@ -973,12 +973,12 @@ def test_predefined_connection_configuration(started_cluster): error = instance.query_and_get_error("SELECT * FROM s3(no_collection)") assert ( - "To execute this query it's necessary to have grant USE NAMED COLLECTION ON no_collection" + "To execute this query it's necessary to have grant NAMED COLLECTION ON no_collection" in error ) error = instance.query_and_get_error("SELECT * FROM s3(no_collection)", user="user") assert ( - "To execute this query it's necessary to have grant USE NAMED COLLECTION ON no_collection" + "To execute this query it's necessary to have grant NAMED COLLECTION ON no_collection" in error ) instance = started_cluster.instances["dummy"] # has named collection access diff --git a/tests/integration/test_storage_s3/test_invalid_env_credentials.py b/tests/integration/test_storage_s3/test_invalid_env_credentials.py index 0ee679014b1..d91cb7d68f9 100644 --- a/tests/integration/test_storage_s3/test_invalid_env_credentials.py +++ b/tests/integration/test_storage_s3/test_invalid_env_credentials.py @@ -92,6 +92,7 @@ def started_cluster(): "configs/use_environment_credentials.xml", "configs/named_collections.xml", ], + user_configs=["configs/users.xml"], ) logging.info("Starting cluster...") From fd545deba071ffc9c6bde43683ecfbec533e4498 Mon Sep 17 00:00:00 2001 From: velavokr Date: Sun, 2 Jul 2023 17:51:43 +0300 Subject: [PATCH 180/522] added a warning on autocalculated parallelizm limits underutilizing CPU cores --- cmake/limit_jobs.cmake | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/cmake/limit_jobs.cmake b/cmake/limit_jobs.cmake index a8f105b8987..100ce921b19 100644 --- a/cmake/limit_jobs.cmake +++ b/cmake/limit_jobs.cmake @@ -18,6 +18,9 @@ if (NOT PARALLEL_COMPILE_JOBS AND TOTAL_PHYSICAL_MEMORY AND MAX_COMPILER_MEMORY) if (NOT PARALLEL_COMPILE_JOBS) set (PARALLEL_COMPILE_JOBS 1) endif () + if (NOT NUMBER_OF_LOGICAL_CORES OR PARALLEL_COMPILE_JOBS LESS NUMBER_OF_LOGICAL_CORES) + set (PARALLEL_COMPILE_JOBS_LESS TRUE) + endif() endif () if (PARALLEL_COMPILE_JOBS AND (NOT NUMBER_OF_LOGICAL_CORES OR PARALLEL_COMPILE_JOBS LESS NUMBER_OF_LOGICAL_CORES)) @@ -33,6 +36,9 @@ if (NOT PARALLEL_LINK_JOBS AND TOTAL_PHYSICAL_MEMORY AND MAX_LINKER_MEMORY) if (NOT PARALLEL_LINK_JOBS) set (PARALLEL_LINK_JOBS 1) endif () + if (NOT NUMBER_OF_LOGICAL_CORES OR PARALLEL_LINK_JOBS LESS NUMBER_OF_LOGICAL_CORES) + set (PARALLEL_LINK_JOBS_LESS TRUE) + endif() endif () # ThinLTO provides its own parallel linking @@ -56,4 +62,10 @@ if (PARALLEL_COMPILE_JOBS OR PARALLEL_LINK_JOBS) message(STATUS "${CMAKE_CURRENT_SOURCE_DIR}: Have ${TOTAL_PHYSICAL_MEMORY} megabytes of memory. Limiting concurrent linkers jobs to ${PARALLEL_LINK_JOBS} and compiler jobs to ${PARALLEL_COMPILE_JOBS} (system has ${NUMBER_OF_LOGICAL_CORES} logical cores)") + if (PARALLEL_COMPILE_JOBS_LESS) + message(WARNING "The autocalculated compile jobs limit (${PARALLEL_COMPILE_JOBS}) will underutilize CPU cores (${NUMBER_OF_LOGICAL_CORES}). Set PARALLEL_COMPILE_JOBS to override.") + endif() + if (PARALLEL_LINK_JOBS_LESS) + message(WARNING "The autocalculated link jobs limit (${PARALLEL_LINK_JOBS}) will underutilize CPU cores (${NUMBER_OF_LOGICAL_CORES}). Set PARALLEL_LINK_JOBS to override.") + endif() endif () From 711d8db6443c4a87dcb3b7a28df3265079717e54 Mon Sep 17 00:00:00 2001 From: velavokr Date: Sun, 2 Jul 2023 17:59:48 +0300 Subject: [PATCH 181/522] better wording --- cmake/limit_jobs.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/limit_jobs.cmake b/cmake/limit_jobs.cmake index 100ce921b19..3a33b3b9989 100644 --- a/cmake/limit_jobs.cmake +++ b/cmake/limit_jobs.cmake @@ -63,9 +63,9 @@ if (PARALLEL_COMPILE_JOBS OR PARALLEL_LINK_JOBS) "${CMAKE_CURRENT_SOURCE_DIR}: Have ${TOTAL_PHYSICAL_MEMORY} megabytes of memory. Limiting concurrent linkers jobs to ${PARALLEL_LINK_JOBS} and compiler jobs to ${PARALLEL_COMPILE_JOBS} (system has ${NUMBER_OF_LOGICAL_CORES} logical cores)") if (PARALLEL_COMPILE_JOBS_LESS) - message(WARNING "The autocalculated compile jobs limit (${PARALLEL_COMPILE_JOBS}) will underutilize CPU cores (${NUMBER_OF_LOGICAL_CORES}). Set PARALLEL_COMPILE_JOBS to override.") + message(WARNING "The autocalculated compile jobs limit (${PARALLEL_COMPILE_JOBS}) underutilizes CPU cores (${NUMBER_OF_LOGICAL_CORES}). Set PARALLEL_COMPILE_JOBS to override.") endif() if (PARALLEL_LINK_JOBS_LESS) - message(WARNING "The autocalculated link jobs limit (${PARALLEL_LINK_JOBS}) will underutilize CPU cores (${NUMBER_OF_LOGICAL_CORES}). Set PARALLEL_LINK_JOBS to override.") + message(WARNING "The autocalculated link jobs limit (${PARALLEL_LINK_JOBS}) underutilizes CPU cores (${NUMBER_OF_LOGICAL_CORES}). Set PARALLEL_LINK_JOBS to override.") endif() endif () From d9d98d6286f1bef423167ef35f0278c08426b3a6 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Mon, 3 Jul 2023 13:18:54 +0000 Subject: [PATCH 182/522] Fix all_new_function... test --- .../02415_all_new_functions_must_be_documented.reference | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference index b5c133988e6..7eb0c57b362 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference @@ -363,6 +363,8 @@ in inIgnoreSet indexHint indexOf +initcap +initcapUTF8 initialQueryID initializeAggregation intDiv From fe1cf294fb9fd239d449b4ed464bf7e1c3e2c207 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Mon, 3 Jul 2023 13:25:16 +0000 Subject: [PATCH 183/522] Fix ru docs --- docs/ru/sql-reference/functions/string-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/functions/string-functions.md b/docs/ru/sql-reference/functions/string-functions.md index 62697e5e197..b872200f99b 100644 --- a/docs/ru/sql-reference/functions/string-functions.md +++ b/docs/ru/sql-reference/functions/string-functions.md @@ -1120,7 +1120,7 @@ Do Nothing for 2 Minutes 2:00   ## initcapUTF8 {#initcapUTF8} -Как [initcap](#initcap), предпологая, что строка содержит набор байтов, представляющий текст в кодировке UTF-8. +Как [initcap](#initcap), предполагая, что строка содержит набор байтов, представляющий текст в кодировке UTF-8. Не учитывает язык. То есть, для турецкого языка, результат может быть не совсем верным. Если длина UTF-8 последовательности байтов различна для верхнего и нижнего регистра кодовой точки, то для этой кодовой точки результат работы может быть некорректным. Если строка содержит набор байтов, не являющийся UTF-8, то поведение не определено. From af603c2cc6455b31aba1a70c967c35b083fe6c0a Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Mon, 3 Jul 2023 16:40:04 +0200 Subject: [PATCH 184/522] Fixed stack overflow on Field destruction --- ..._function_state_deserialization_fuzzer.cpp | 1 + .../fuzzers/delta_decompress_fuzzer.cpp | 2 +- .../double_delta_decompress_fuzzer.cpp | 2 +- .../fuzzers/encrypted_decompress_fuzzer.cpp | 4 +-- .../fuzzers/lz4_decompress_fuzzer.cpp | 4 +-- src/Core/Field.h | 33 ++++++++++++++++++- src/DataTypes/DataTypeFactory.cpp | 2 +- src/Functions/DateTimeTransforms.h | 4 ++- 8 files changed, 43 insertions(+), 9 deletions(-) diff --git a/src/AggregateFunctions/fuzzers/aggregate_function_state_deserialization_fuzzer.cpp b/src/AggregateFunctions/fuzzers/aggregate_function_state_deserialization_fuzzer.cpp index 2ea01e1d5bc..3db1afb7a92 100644 --- a/src/AggregateFunctions/fuzzers/aggregate_function_state_deserialization_fuzzer.cpp +++ b/src/AggregateFunctions/fuzzers/aggregate_function_state_deserialization_fuzzer.cpp @@ -8,6 +8,7 @@ #include #include +#include #include diff --git a/src/Compression/fuzzers/delta_decompress_fuzzer.cpp b/src/Compression/fuzzers/delta_decompress_fuzzer.cpp index b039777da15..451606843e2 100644 --- a/src/Compression/fuzzers/delta_decompress_fuzzer.cpp +++ b/src/Compression/fuzzers/delta_decompress_fuzzer.cpp @@ -34,7 +34,7 @@ try DB::Memory<> memory; memory.resize(output_buffer_size + codec->getAdditionalSizeAtTheEndOfBuffer()); - codec->doDecompressData(reinterpret_cast(data), size, memory.data(), output_buffer_size); + codec->doDecompressData(reinterpret_cast(data), static_cast(size), memory.data(), static_cast(output_buffer_size)); return 0; } diff --git a/src/Compression/fuzzers/double_delta_decompress_fuzzer.cpp b/src/Compression/fuzzers/double_delta_decompress_fuzzer.cpp index f9822daa3bd..f7e685d68ad 100644 --- a/src/Compression/fuzzers/double_delta_decompress_fuzzer.cpp +++ b/src/Compression/fuzzers/double_delta_decompress_fuzzer.cpp @@ -34,7 +34,7 @@ try DB::Memory<> memory; memory.resize(output_buffer_size + codec->getAdditionalSizeAtTheEndOfBuffer()); - codec->doDecompressData(reinterpret_cast(data), size, memory.data(), output_buffer_size); + codec->doDecompressData(reinterpret_cast(data), static_cast(size), memory.data(), static_cast(output_buffer_size)); return 0; } diff --git a/src/Compression/fuzzers/encrypted_decompress_fuzzer.cpp b/src/Compression/fuzzers/encrypted_decompress_fuzzer.cpp index 3e3d0e164fe..207cce21e3b 100644 --- a/src/Compression/fuzzers/encrypted_decompress_fuzzer.cpp +++ b/src/Compression/fuzzers/encrypted_decompress_fuzzer.cpp @@ -292,10 +292,10 @@ try DB::Memory<> memory; memory.resize(input.size() + codec_128->getAdditionalSizeAtTheEndOfBuffer()); - codec_128->doDecompressData(input.data(), input.size(), memory.data(), input.size() - 31); + codec_128->doDecompressData(input.data(), static_cast(input.size()), memory.data(), static_cast(input.size() - 31)); memory.resize(input.size() + codec_128->getAdditionalSizeAtTheEndOfBuffer()); - codec_256->doDecompressData(input.data(), input.size(), memory.data(), input.size() - 31); + codec_256->doDecompressData(input.data(), static_cast(input.size()), memory.data(), static_cast(input.size() - 31)); return 0; } catch (...) diff --git a/src/Compression/fuzzers/lz4_decompress_fuzzer.cpp b/src/Compression/fuzzers/lz4_decompress_fuzzer.cpp index 85c4c9bd329..f6d4c51f18b 100644 --- a/src/Compression/fuzzers/lz4_decompress_fuzzer.cpp +++ b/src/Compression/fuzzers/lz4_decompress_fuzzer.cpp @@ -24,7 +24,7 @@ try return 0; const auto * p = reinterpret_cast(data); - auto codec = DB::getCompressionCodecLZ4(p->level); + auto codec = DB::getCompressionCodecLZ4(static_cast(p->level)); size_t output_buffer_size = p->decompressed_size % 65536; size -= sizeof(AuxiliaryRandomData); @@ -37,7 +37,7 @@ try DB::Memory<> memory; memory.resize(output_buffer_size + LZ4::ADDITIONAL_BYTES_AT_END_OF_BUFFER); - codec->doDecompressData(reinterpret_cast(data), size, memory.data(), output_buffer_size); + codec->doDecompressData(reinterpret_cast(data), static_cast(size), memory.data(), static_cast(output_buffer_size)); return 0; } diff --git a/src/Core/Field.h b/src/Core/Field.h index ef1bd9a895d..8ee93d08411 100644 --- a/src/Core/Field.h +++ b/src/Core/Field.h @@ -28,6 +28,7 @@ namespace ErrorCodes extern const int NOT_IMPLEMENTED; extern const int LOGICAL_ERROR; extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int TOO_DEEP_RECURSION; } constexpr Null NEGATIVE_INFINITY{Null::Value::NegativeInfinity}; @@ -291,6 +292,11 @@ decltype(auto) castToNearestFieldType(T && x) */ #define DBMS_MIN_FIELD_SIZE 32 +#if defined(SANITIZER) || !defined(NDEBUG) + #define DBMS_MAX_NESTED_FIELD_DEPTH 64 +#else + #define DBMS_MAX_NESTED_FIELD_DEPTH 256 +#endif /** Discriminated union of several types. * Made for replacement of `boost::variant` @@ -671,6 +677,27 @@ private: Types::Which which; + /// Field may contain a Field inside in case when Field stores Array, Tuple, Map or Object. + /// As the result stack overflow on destruction is possible + /// and to avoid it we need to count the depth and have a threshold. + size_t nested_field_depth = 0; + + /// Check whether T is already a Field with composite underlying type. + template + size_t calculateAndCheckFieldDepth(Original && x) + { + size_t result = 0; + + if constexpr (std::is_same_v || std::is_same_v || std::is_same_v) + std::for_each(x.begin(), x.end(), [this, &x](auto & elem){ nested_field_depth = std::max(nested_field_depth, elem.nested_field_depth); }); + else if constexpr (std::is_same_v) + std::for_each(x.begin(), x.end(), [this, &x](auto & elem){ nested_field_depth = std::max(nested_field_depth, elem.second.nested_field_depth); }); + + if (result >= DBMS_MAX_NESTED_FIELD_DEPTH) + throw Exception(ErrorCodes::TOO_DEEP_RECURSION, "Too deep Field"); + + return result; + } /// Assuming there was no allocated state or it was deallocated (see destroy). template @@ -686,6 +713,8 @@ private: using StorageType = NearestFieldType; new (&storage) StorageType(std::forward(x)); which = TypeToEnum::value; + /// Incrementing the depth since we create a new Field. + nested_field_depth = calculateAndCheckFieldDepth(x) + 1; } /// Assuming same types. @@ -696,6 +725,8 @@ private: assert(which == TypeToEnum::value); JustT * MAY_ALIAS ptr = reinterpret_cast(&storage); *ptr = std::forward(x); + /// Do not increment the depth, because it is an assignment. + nested_field_depth = calculateAndCheckFieldDepth(x); } template @@ -781,7 +812,7 @@ private: } template - void destroy() + ALWAYS_INLINE void destroy() { T * MAY_ALIAS ptr = reinterpret_cast(&storage); ptr->~T(); diff --git a/src/DataTypes/DataTypeFactory.cpp b/src/DataTypes/DataTypeFactory.cpp index 415f24d8151..89dacae59ff 100644 --- a/src/DataTypes/DataTypeFactory.cpp +++ b/src/DataTypes/DataTypeFactory.cpp @@ -62,7 +62,7 @@ DataTypePtr DataTypeFactory::getImpl(const String & full_name) const } else { - ast = parseQuery(parser, full_name.data(), full_name.data() + full_name.size(), "data type", false, data_type_max_parse_depth); + ast = parseQuery(parser, full_name.data(), full_name.data() + full_name.size(), "data type", DBMS_DEFAULT_MAX_QUERY_SIZE, data_type_max_parse_depth); } return getImpl(ast); diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index 019e0c42cde..0aa495dace2 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -1449,8 +1449,10 @@ struct Transformer if constexpr (std::is_same_v || std::is_same_v) { +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wimplicit-const-int-float-conversion" bool is_valid_input = vec_from[i] >= 0 && vec_from[i] <= 0xFFFFFFFFL; - +# pragma clang diagnostic pop if (!is_valid_input) { if constexpr (std::is_same_v) From ccda3c3a6e25a8d9b2245631691e4fe892b21f5a Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Mon, 3 Jul 2023 15:03:40 +0000 Subject: [PATCH 185/522] Try to fix logical error #51703 --- src/Interpreters/GraceHashJoin.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/Interpreters/GraceHashJoin.cpp b/src/Interpreters/GraceHashJoin.cpp index 4218a8ea4e1..4bfe0315138 100644 --- a/src/Interpreters/GraceHashJoin.cpp +++ b/src/Interpreters/GraceHashJoin.cpp @@ -638,10 +638,9 @@ void GraceHashJoin::addJoinedBlockImpl(Block block) if (current_block.rows() > 0) { std::lock_guard lock(hash_join_mutex); - auto current_buckets = getCurrentBuckets(); - if (!isPowerOf2(current_buckets.size())) [[unlikely]] + if (!isPowerOf2(buckets_snapshot.size())) [[unlikely]] { - throw Exception(ErrorCodes::LOGICAL_ERROR, "Broken buckets. its size({}) is not power of 2", current_buckets.size()); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Broken buckets. its size({}) is not power of 2", buckets_snapshot.size()); } if (!hash_join) hash_join = makeInMemoryJoin(); @@ -654,7 +653,7 @@ void GraceHashJoin::addJoinedBlockImpl(Block block) current_block = {}; // Must use the latest buckets snapshot in case that it has been rehashed by other threads. - buckets_snapshot = rehashBuckets(current_buckets.size() * 2); + buckets_snapshot = rehashBuckets(buckets_snapshot.size() * 2); auto right_blocks = hash_join->releaseJoinedBlocks(/* restructure */ false); hash_join = nullptr; From f316914aed71f2e90caf0a6af707cb5756bdbec8 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 3 Jul 2023 15:29:57 +0000 Subject: [PATCH 186/522] Fix another one key. --- .../Optimizations/liftUpFunctions.cpp | 10 +- ...nd_columns_with_same_names_bug_2.reference | 3 + ...ting_and_columns_with_same_names_bug_2.sql | 107 ++++++++++++++++++ 3 files changed, 114 insertions(+), 6 deletions(-) create mode 100644 tests/queries/0_stateless/02789_functions_after_sorting_and_columns_with_same_names_bug_2.reference create mode 100644 tests/queries/0_stateless/02789_functions_after_sorting_and_columns_with_same_names_bug_2.sql diff --git a/src/Processors/QueryPlan/Optimizations/liftUpFunctions.cpp b/src/Processors/QueryPlan/Optimizations/liftUpFunctions.cpp index b2c3f3b4a6d..47b4e31ed32 100644 --- a/src/Processors/QueryPlan/Optimizations/liftUpFunctions.cpp +++ b/src/Processors/QueryPlan/Optimizations/liftUpFunctions.cpp @@ -29,16 +29,14 @@ const DB::DataStream & getChildOutputStream(DB::QueryPlan::Node & node) namespace DB::QueryPlanOptimizations { -/// This is a check that output columns with the same name have the same types. -/// This is ok to have such a situation in DAG, but not for Block. -/// TODO: we should have a different data structure for headers. +/// This is a check that output columns does not have the same name +/// This is ok for DAG, but may introduce a bug in a SotringStep cause columns are selected by name. static bool areOutputsAreConvertableToBlock(const ActionsDAG::NodeRawConstPtrs & outputs) { - std::unordered_map name_to_type; + std::unordered_set names; for (const auto & output : outputs) { - auto [it, inserted] = name_to_type.emplace(output->result_name, output->result_type.get()); - if (!inserted && !it->second->equals(*output->result_type)) + if (!names.emplace(output->result_name).second) return false; } diff --git a/tests/queries/0_stateless/02789_functions_after_sorting_and_columns_with_same_names_bug_2.reference b/tests/queries/0_stateless/02789_functions_after_sorting_and_columns_with_same_names_bug_2.reference new file mode 100644 index 00000000000..bcc55e50958 --- /dev/null +++ b/tests/queries/0_stateless/02789_functions_after_sorting_and_columns_with_same_names_bug_2.reference @@ -0,0 +1,3 @@ +20230626 0.3156979034107179 \N \N +20230626 0.2624629016490004 \N \N +20230626 0.19390556368960468 \N \N diff --git a/tests/queries/0_stateless/02789_functions_after_sorting_and_columns_with_same_names_bug_2.sql b/tests/queries/0_stateless/02789_functions_after_sorting_and_columns_with_same_names_bug_2.sql new file mode 100644 index 00000000000..b0221635fe9 --- /dev/null +++ b/tests/queries/0_stateless/02789_functions_after_sorting_and_columns_with_same_names_bug_2.sql @@ -0,0 +1,107 @@ +create table test1 ( + `pt` String, + `brand_name` String, + `total_indirect_order_cnt` Float64, + `total_indirect_gmv` Float64 +) ENGINE = Memory; + +create table test2 ( + `pt` String, + `brand_name` String, + `exposure_uv` Float64, + `click_uv` Float64 +) ENGINE = Memory; + +INSERT INTO test1 (`pt`, `brand_name`, `total_indirect_order_cnt`, `total_indirect_gmv`) VALUES ('20230625', 'LINING', 2232, 1008710), ('20230625', 'adidas', 125, 58820), ('20230625', 'Nike', 1291, 1033020), ('20230626', 'Nike', 1145, 938926), ('20230626', 'LINING', 1904, 853336), ('20230626', 'adidas', 133, 62546), ('20220626', 'LINING', 3747, 1855203), ('20220626', 'Nike', 2295, 1742665), ('20220626', 'adidas', 302, 122388); + +INSERT INTO test2 (`pt`, `brand_name`, `exposure_uv`, `click_uv`) VALUES ('20230625', 'Nike', 2012913, 612831), ('20230625', 'adidas', 480277, 96176), ('20230625', 'LINING', 2474234, 627814), ('20230626', 'Nike', 1934666, 610770), ('20230626', 'adidas', 469904, 91117), ('20230626', 'LINING', 2285142, 599765), ('20220626', 'Nike', 2979656, 937166), ('20220626', 'adidas', 704751, 124250), ('20220626', 'LINING', 3163884, 1010221); + +SELECT * FROM ( + SELECT m0.pt AS pt + ,m0.`uvctr` AS uvctr + ,round(m1.uvctr,4) AS uvctr_hb_last_value + ,round(m2.uvctr,4) AS uvctr_tb_last_value + FROM + ( + SELECT m0.pt AS pt + ,COALESCE(m0.brand_name,m1.brand_name) AS brand_name + ,if(isNaN(`click_uv` / `exposure_uv`) OR isInfinite(`click_uv` / `exposure_uv`),NULL,`click_uv` / `exposure_uv`) AS `uvctr` + FROM + ( + SELECT pt AS pt + ,brand_name AS `brand_name` + ,exposure_uv AS `exposure_uv` + ,click_uv AS `click_uv` + FROM test2 + WHERE pt = '20230626' + ) m0 + FULL JOIN + ( + SELECT pt AS pt + ,brand_name AS `brand_name` + ,total_indirect_order_cnt AS `total_indirect_order_cnt` + ,total_indirect_gmv AS `total_indirect_gmv` + FROM test1 + WHERE pt = '20230626' + ) m1 + ON m0.brand_name = m1.brand_name AND m0.pt = m1.pt + ) m0 + LEFT JOIN + ( + SELECT m0.pt AS pt + ,if(isNaN(`click_uv` / `exposure_uv`) OR isInfinite(`click_uv` / `exposure_uv`),NULL,`click_uv` / `exposure_uv`) AS `uvctr` + ,COALESCE(m0.brand_name,m1.brand_name) AS brand_name + ,`exposure_uv` AS `exposure_uv` + ,`click_uv` + FROM + ( + SELECT pt AS pt + ,brand_name AS `brand_name` + ,exposure_uv AS `exposure_uv` + ,click_uv AS `click_uv` + FROM test2 + WHERE pt = '20230625' + ) m0 + FULL JOIN + ( + SELECT pt AS pt + ,brand_name AS `brand_name` + ,total_indirect_order_cnt AS `total_indirect_order_cnt` + ,total_indirect_gmv AS `total_indirect_gmv` + FROM test1 + WHERE pt = '20230625' + ) m1 + ON m0.brand_name = m1.brand_name AND m0.pt = m1.pt + ) m1 + ON m0.brand_name = m1.brand_name AND m0.pt = m1.pt + LEFT JOIN + ( + SELECT m0.pt AS pt + ,if(isNaN(`click_uv` / `exposure_uv`) OR isInfinite(`click_uv` / `exposure_uv`),NULL,`click_uv` / `exposure_uv`) AS `uvctr` + ,COALESCE(m0.brand_name,m1.brand_name) AS brand_name + ,`exposure_uv` AS `exposure_uv` + ,`click_uv` + FROM + ( + SELECT pt AS pt + ,brand_name AS `brand_name` + ,exposure_uv AS `exposure_uv` + ,click_uv AS `click_uv` + FROM test2 + WHERE pt = '20220626' + ) m0 + FULL JOIN + ( + SELECT pt AS pt + ,brand_name AS `brand_name` + ,total_indirect_order_cnt AS `total_indirect_order_cnt` + ,total_indirect_gmv AS `total_indirect_gmv` + FROM test1 + WHERE pt = '20220626' + ) m1 + ON m0.brand_name = m1.brand_name AND m0.pt = m1.pt + ) m2 + ON m0.brand_name = m2.brand_name AND m0.pt = m2.pt +) c0 +ORDER BY pt ASC, uvctr DESC; + From 66227ce8d3faacd7a60a1cde9c96f55cb6c1b134 Mon Sep 17 00:00:00 2001 From: velavokr Date: Sun, 2 Jul 2023 15:20:59 +0300 Subject: [PATCH 187/522] #51292 added default_temporary_table_engine setting --- docs/en/operations/settings/settings.md | 34 ++++++++++++++++++ src/Core/Settings.h | 1 + src/Interpreters/InterpreterCreateQuery.cpp | 35 ++++++++----------- src/Interpreters/InterpreterCreateQuery.h | 2 +- .../02184_default_table_engine.reference | 1 + .../02184_default_table_engine.sql | 4 +++ 6 files changed, 56 insertions(+), 21 deletions(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index cff13302cdc..0d5072d5474 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -3201,6 +3201,40 @@ ENGINE = Log └──────────────────────────────────────────────────────────────────────────┘ ``` +## default_temporary_table_engine {#default_temporary_table_engine} + +Same as [default_temporary_table_engine](#default_temporary_table_engine) but for temporary tables. + +Default value: `Memory`. + +In this example, any new temporary table that does not specify an `Engine` will use the `Log` table engine: + +Query: + +```sql +SET default_temporary_table_engine = 'Log'; + +CREATE TEMPORARY TABLE my_table ( + x UInt32, + y UInt32 +); + +SHOW CREATE TEMPORARY TABLE my_table; +``` + +Result: + +```response +┌─statement────────────────────────────────────────────────────────────────┐ +│ CREATE TEMPORARY TABLE default.my_table +( + `x` UInt32, + `y` UInt32 +) +ENGINE = Log +└──────────────────────────────────────────────────────────────────────────┘ +``` + ## data_type_default_nullable {#data_type_default_nullable} Allows data types without explicit modifiers [NULL or NOT NULL](../../sql-reference/statements/create/table.md/#null-modifiers) in column definition will be [Nullable](../../sql-reference/data-types/nullable.md/#data_type-nullable). diff --git a/src/Core/Settings.h b/src/Core/Settings.h index b7d12a518c8..59373df3ece 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -517,6 +517,7 @@ class IColumn; M(Seconds, wait_for_window_view_fire_signal_timeout, 10, "Timeout for waiting for window view fire signal in event time processing", 0) \ M(UInt64, min_free_disk_space_for_temporary_data, 0, "The minimum disk space to keep while writing temporary data used in external sorting and aggregation.", 0) \ \ + M(DefaultTableEngine, default_temporary_table_engine, DefaultTableEngine::Memory, "Default table engine used when ENGINE is not set in CREATE TEMPORARY statement.",0) \ M(DefaultTableEngine, default_table_engine, DefaultTableEngine::None, "Default table engine used when ENGINE is not set in CREATE statement.",0) \ M(Bool, show_table_uuid_in_table_create_query_if_not_nil, false, "For tables in databases with Engine=Atomic show UUID of the table in its CREATE query.", 0) \ M(Bool, database_atomic_wait_for_drop_and_detach_synchronously, false, "When executing DROP or DETACH TABLE in Atomic database, wait for table data to be finally dropped or detached.", 0) \ diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index d0bb3dd389f..1419203b45b 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -911,14 +911,13 @@ String InterpreterCreateQuery::getTableEngineName(DefaultTableEngine default_tab } } -void InterpreterCreateQuery::setDefaultTableEngine(ASTStorage & storage, ContextPtr local_context) +void InterpreterCreateQuery::setDefaultTableEngine(ASTStorage & storage, DefaultTableEngine engine) { - if (local_context->getSettingsRef().default_table_engine.value == DefaultTableEngine::None) + if (engine == DefaultTableEngine::None) throw Exception(ErrorCodes::ENGINE_REQUIRED, "Table engine is not specified in CREATE query"); auto engine_ast = std::make_shared(); - auto default_table_engine = local_context->getSettingsRef().default_table_engine.value; - engine_ast->name = getTableEngineName(default_table_engine); + engine_ast->name = getTableEngineName(engine); engine_ast->no_empty_args = true; storage.set(storage.engine, engine_ast); } @@ -943,24 +942,20 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const if (!create.cluster.empty()) throw Exception(ErrorCodes::INCORRECT_QUERY, "Temporary tables cannot be created with ON CLUSTER clause"); - if (create.storage) + if (create.storage && create.storage->engine) { - if (create.storage->engine) - { - if (create.storage->engine->name.starts_with("Replicated") || create.storage->engine->name == "KeeperMap") - throw Exception(ErrorCodes::INCORRECT_QUERY, "Temporary tables cannot be created with Replicated or KeeperMap table engines"); - } - else - throw Exception(ErrorCodes::INCORRECT_QUERY, "Invalid storage definition for temporary table"); + if (create.storage->engine->name.starts_with("Replicated") || create.storage->engine->name == "KeeperMap") + throw Exception(ErrorCodes::INCORRECT_QUERY, "Temporary tables cannot be created with Replicated or KeeperMap table engines"); + return; } else { - auto engine_ast = std::make_shared(); - engine_ast->name = "Memory"; - engine_ast->no_empty_args = true; - auto storage_ast = std::make_shared(); - storage_ast->set(storage_ast->engine, engine_ast); - create.set(create.storage, storage_ast); + if (!create.storage) + { + auto storage_ast = std::make_shared(); + create.set(create.storage, storage_ast); + } + setDefaultTableEngine(*create.storage, getContext()->getSettingsRef().default_temporary_table_engine.value); } return; } @@ -969,7 +964,7 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const { /// Some part of storage definition (such as PARTITION BY) is specified, but ENGINE is not: just set default one. if (!create.storage->engine) - setDefaultTableEngine(*create.storage, getContext()); + setDefaultTableEngine(*create.storage, getContext()->getSettingsRef().default_table_engine.value); return; } @@ -1008,7 +1003,7 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const } create.set(create.storage, std::make_shared()); - setDefaultTableEngine(*create.storage, getContext()); + setDefaultTableEngine(*create.storage, getContext()->getSettingsRef().default_table_engine.value); } static void generateUUIDForTable(ASTCreateQuery & create) diff --git a/src/Interpreters/InterpreterCreateQuery.h b/src/Interpreters/InterpreterCreateQuery.h index a5fa6576091..09a582d6686 100644 --- a/src/Interpreters/InterpreterCreateQuery.h +++ b/src/Interpreters/InterpreterCreateQuery.h @@ -91,7 +91,7 @@ private: TableProperties getTablePropertiesAndNormalizeCreateQuery(ASTCreateQuery & create) const; void validateTableStructure(const ASTCreateQuery & create, const TableProperties & properties) const; static String getTableEngineName(DefaultTableEngine default_table_engine); - static void setDefaultTableEngine(ASTStorage & storage, ContextPtr local_context); + static void setDefaultTableEngine(ASTStorage & storage, DefaultTableEngine engine); void setEngine(ASTCreateQuery & create) const; AccessRightsElements getRequiredAccess() const; diff --git a/tests/queries/0_stateless/02184_default_table_engine.reference b/tests/queries/0_stateless/02184_default_table_engine.reference index 870dff90efa..495b9627acb 100644 --- a/tests/queries/0_stateless/02184_default_table_engine.reference +++ b/tests/queries/0_stateless/02184_default_table_engine.reference @@ -27,3 +27,4 @@ CREATE TABLE default.val2\n(\n `n` Int32\n) AS values(\'n int\', 1, 2) CREATE TABLE default.log\n(\n `n` Int32\n)\nENGINE = Log CREATE TABLE default.kek\n(\n `n` Int32\n)\nENGINE = Memory CREATE TABLE default.lol\n(\n `n` Int32\n)\nENGINE = MergeTree\nORDER BY n\nSETTINGS min_bytes_for_wide_part = 123, index_granularity = 8192 +CREATE TEMPORARY TABLE tmp_log\n(\n `n` Int32\n)\nENGINE = Log diff --git a/tests/queries/0_stateless/02184_default_table_engine.sql b/tests/queries/0_stateless/02184_default_table_engine.sql index 109875d53a5..68422f273b0 100644 --- a/tests/queries/0_stateless/02184_default_table_engine.sql +++ b/tests/queries/0_stateless/02184_default_table_engine.sql @@ -128,3 +128,7 @@ SHOW CREATE TABLE kek; SHOW CREATE TABLE lol; DROP TABLE kek; DROP TABLE lol; + +SET default_temporary_table_engine = 'Log'; +CREATE TEMPORARY TABLE tmp_log (n int); +SHOW CREATE TEMPORARY TABLE tmp_log; From 1e10bf5bdf50aac027f0824bad812676988a1eb3 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 3 Jul 2023 13:47:52 +0300 Subject: [PATCH 188/522] Update docs/en/operations/settings/settings.md --- docs/en/operations/settings/settings.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 0d5072d5474..5f6cf98646b 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -3203,7 +3203,7 @@ ENGINE = Log ## default_temporary_table_engine {#default_temporary_table_engine} -Same as [default_temporary_table_engine](#default_temporary_table_engine) but for temporary tables. +Same as [default_table_engine](#default_table_engine) but for temporary tables. Default value: `Memory`. From 2f85d048ae42f0b06658b2acd38271d041be057e Mon Sep 17 00:00:00 2001 From: velavokr Date: Mon, 3 Jul 2023 16:14:19 +0300 Subject: [PATCH 189/522] bugfix --- src/Interpreters/InterpreterCreateQuery.cpp | 65 ++++++--------------- src/Interpreters/InterpreterCreateQuery.h | 2 - 2 files changed, 19 insertions(+), 48 deletions(-) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 1419203b45b..72312a33b3d 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -881,45 +881,21 @@ void InterpreterCreateQuery::validateTableStructure(const ASTCreateQuery & creat } } -String InterpreterCreateQuery::getTableEngineName(DefaultTableEngine default_table_engine) -{ - switch (default_table_engine) - { - case DefaultTableEngine::Log: - return "Log"; - - case DefaultTableEngine::StripeLog: - return "StripeLog"; - - case DefaultTableEngine::MergeTree: - return "MergeTree"; - - case DefaultTableEngine::ReplacingMergeTree: - return "ReplacingMergeTree"; - - case DefaultTableEngine::ReplicatedMergeTree: - return "ReplicatedMergeTree"; - - case DefaultTableEngine::ReplicatedReplacingMergeTree: - return "ReplicatedReplacingMergeTree"; - - case DefaultTableEngine::Memory: - return "Memory"; - - default: - throw Exception(ErrorCodes::LOGICAL_ERROR, "default_table_engine is set to unknown value"); +namespace { + void checkTemporaryTableEngineName(const String& name) { + if (name.starts_with("Replicated") || name == "KeeperMap") + throw Exception(ErrorCodes::INCORRECT_QUERY, "Temporary tables cannot be created with Replicated or KeeperMap table engines"); } -} -void InterpreterCreateQuery::setDefaultTableEngine(ASTStorage & storage, DefaultTableEngine engine) -{ - if (engine == DefaultTableEngine::None) - throw Exception(ErrorCodes::ENGINE_REQUIRED, "Table engine is not specified in CREATE query"); + void setDefaultTableEngine(ASTStorage &storage, DefaultTableEngine engine) { + if (engine == DefaultTableEngine::None) + throw Exception(ErrorCodes::ENGINE_REQUIRED, "Table engine is not specified in CREATE query"); - auto engine_ast = std::make_shared(); - engine_ast->name = getTableEngineName(engine); - engine_ast->no_empty_args = true; - storage.set(storage.engine, engine_ast); + auto engine_ast = std::make_shared(); + engine_ast->name = SettingFieldDefaultTableEngine(engine).toString(); + engine_ast->no_empty_args = true; + storage.set(storage.engine, engine_ast); + } } void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const @@ -942,21 +918,18 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const if (!create.cluster.empty()) throw Exception(ErrorCodes::INCORRECT_QUERY, "Temporary tables cannot be created with ON CLUSTER clause"); - if (create.storage && create.storage->engine) + if (!create.storage) { - if (create.storage->engine->name.starts_with("Replicated") || create.storage->engine->name == "KeeperMap") - throw Exception(ErrorCodes::INCORRECT_QUERY, "Temporary tables cannot be created with Replicated or KeeperMap table engines"); - return; + auto storage_ast = std::make_shared(); + create.set(create.storage, storage_ast); } - else + + if (!create.storage->engine) { - if (!create.storage) - { - auto storage_ast = std::make_shared(); - create.set(create.storage, storage_ast); - } setDefaultTableEngine(*create.storage, getContext()->getSettingsRef().default_temporary_table_engine.value); } + + checkTemporaryTableEngineName(create.storage->engine->name); return; } diff --git a/src/Interpreters/InterpreterCreateQuery.h b/src/Interpreters/InterpreterCreateQuery.h index 09a582d6686..67339dea928 100644 --- a/src/Interpreters/InterpreterCreateQuery.h +++ b/src/Interpreters/InterpreterCreateQuery.h @@ -90,8 +90,6 @@ private: /// Calculate list of columns, constraints, indices, etc... of table. Rewrite query in canonical way. TableProperties getTablePropertiesAndNormalizeCreateQuery(ASTCreateQuery & create) const; void validateTableStructure(const ASTCreateQuery & create, const TableProperties & properties) const; - static String getTableEngineName(DefaultTableEngine default_table_engine); - static void setDefaultTableEngine(ASTStorage & storage, DefaultTableEngine engine); void setEngine(ASTCreateQuery & create) const; AccessRightsElements getRequiredAccess() const; From ae87d43f887376d19f2df3e197bc20ecefa7b012 Mon Sep 17 00:00:00 2001 From: velavokr Date: Mon, 3 Jul 2023 18:28:56 +0300 Subject: [PATCH 190/522] test fixes --- src/Interpreters/InterpreterCreateQuery.cpp | 9 ++++++--- tests/queries/0_stateless/02184_default_table_engine.sql | 4 ++-- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 72312a33b3d..dc95335d3ad 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -881,13 +881,16 @@ void InterpreterCreateQuery::validateTableStructure(const ASTCreateQuery & creat } } -namespace { - void checkTemporaryTableEngineName(const String& name) { +namespace +{ + void checkTemporaryTableEngineName(const String& name) + { if (name.starts_with("Replicated") || name == "KeeperMap") throw Exception(ErrorCodes::INCORRECT_QUERY, "Temporary tables cannot be created with Replicated or KeeperMap table engines"); } - void setDefaultTableEngine(ASTStorage &storage, DefaultTableEngine engine) { + void setDefaultTableEngine(ASTStorage &storage, DefaultTableEngine engine) + { if (engine == DefaultTableEngine::None) throw Exception(ErrorCodes::ENGINE_REQUIRED, "Table engine is not specified in CREATE query"); diff --git a/tests/queries/0_stateless/02184_default_table_engine.sql b/tests/queries/0_stateless/02184_default_table_engine.sql index 68422f273b0..a984ec1b6c9 100644 --- a/tests/queries/0_stateless/02184_default_table_engine.sql +++ b/tests/queries/0_stateless/02184_default_table_engine.sql @@ -83,8 +83,8 @@ CREATE TEMPORARY TABLE tmp (n int); SHOW CREATE TEMPORARY TABLE tmp; CREATE TEMPORARY TABLE tmp1 (n int) ENGINE=Memory; CREATE TEMPORARY TABLE tmp2 (n int) ENGINE=Log; -CREATE TEMPORARY TABLE tmp2 (n int) ORDER BY n; -- {serverError 80} -CREATE TEMPORARY TABLE tmp2 (n int, PRIMARY KEY (n)); -- {serverError 80} +CREATE TEMPORARY TABLE tmp2 (n int) ORDER BY n; -- {serverError 36} +CREATE TEMPORARY TABLE tmp2 (n int, PRIMARY KEY (n)); -- {serverError 36} CREATE TABLE log (n int); SHOW CREATE log; From dcc0076ded42792fd41c7f83bca9ff3e5ce0ed4b Mon Sep 17 00:00:00 2001 From: velavokr Date: Mon, 3 Jul 2023 19:01:35 +0300 Subject: [PATCH 191/522] fixed comment --- src/Interpreters/InterpreterCreateQuery.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index dc95335d3ad..55d2449f739 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -914,9 +914,7 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const if (create.temporary) { - /// It's possible if some part of storage definition (such as PARTITION BY) is specified, but ENGINE is not. - /// It makes sense when default_table_engine setting is used, but not for temporary tables. - /// For temporary tables we ignore this setting to allow CREATE TEMPORARY TABLE query without specifying ENGINE + /// Some part of storage definition is specified, but ENGINE is not: just set the one from default_temporary_table_engine setting. if (!create.cluster.empty()) throw Exception(ErrorCodes::INCORRECT_QUERY, "Temporary tables cannot be created with ON CLUSTER clause"); From 9941b29fd3031bc12e055d17f18ad2b31ba7973d Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Mon, 3 Jul 2023 18:18:14 +0200 Subject: [PATCH 192/522] Better --- src/Core/Field.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Core/Field.h b/src/Core/Field.h index 8ee93d08411..0b3c5b7f48d 100644 --- a/src/Core/Field.h +++ b/src/Core/Field.h @@ -689,9 +689,9 @@ private: size_t result = 0; if constexpr (std::is_same_v || std::is_same_v || std::is_same_v) - std::for_each(x.begin(), x.end(), [this, &x](auto & elem){ nested_field_depth = std::max(nested_field_depth, elem.nested_field_depth); }); + std::for_each(x.begin(), x.end(), [this, &result](auto & elem){ result = std::max(result, elem.nested_field_depth); }); else if constexpr (std::is_same_v) - std::for_each(x.begin(), x.end(), [this, &x](auto & elem){ nested_field_depth = std::max(nested_field_depth, elem.second.nested_field_depth); }); + std::for_each(x.begin(), x.end(), [this, &result](auto & elem){ result = std::max(result, elem.second.nested_field_depth); }); if (result >= DBMS_MAX_NESTED_FIELD_DEPTH) throw Exception(ErrorCodes::TOO_DEEP_RECURSION, "Too deep Field"); @@ -711,10 +711,10 @@ private: // we must initialize the entire wide stored type, and not just the // nominal type. using StorageType = NearestFieldType; - new (&storage) StorageType(std::forward(x)); - which = TypeToEnum::value; /// Incrementing the depth since we create a new Field. nested_field_depth = calculateAndCheckFieldDepth(x) + 1; + new (&storage) StorageType(std::forward(x)); + which = TypeToEnum::value; } /// Assuming same types. @@ -724,9 +724,9 @@ private: using JustT = std::decay_t; assert(which == TypeToEnum::value); JustT * MAY_ALIAS ptr = reinterpret_cast(&storage); - *ptr = std::forward(x); /// Do not increment the depth, because it is an assignment. nested_field_depth = calculateAndCheckFieldDepth(x); + *ptr = std::forward(x); } template From 5b85e1ce8aa6c79ac8cb74be492ed06060ec2e73 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Mon, 3 Jul 2023 18:09:08 +0000 Subject: [PATCH 193/522] Special build check fix --- src/Functions/initcap.cpp | 2 ++ src/Functions/initcapUTF8.cpp | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Functions/initcap.cpp b/src/Functions/initcap.cpp index 7d0749ecb12..5460ee06792 100644 --- a/src/Functions/initcap.cpp +++ b/src/Functions/initcap.cpp @@ -14,6 +14,8 @@ struct InitcapImpl ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets) { + if (data.empty()) + return; res_data.resize(data.size()); res_offsets.assign(offsets); array(data.data(), data.data() + data.size(), res_data.data()); diff --git a/src/Functions/initcapUTF8.cpp b/src/Functions/initcapUTF8.cpp index 333ebe266d3..076dcff6622 100644 --- a/src/Functions/initcapUTF8.cpp +++ b/src/Functions/initcapUTF8.cpp @@ -81,7 +81,7 @@ private: static void array(const UInt8 * src, const UInt8 * src_end, const ColumnString::Offsets & offsets, UInt8 * dst) { - auto offset_it = offsets.begin(); + const auto * offset_it = offsets.begin(); const UInt8 * begin = src; /// handle remaining symbols, row by row (to avoid influence of bad UTF8 symbols from one row, to another) From 9660291ab312ad0639ffc3058ad28015d76d08aa Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 3 Jul 2023 18:33:59 +0000 Subject: [PATCH 194/522] my fixes --- .../functions/array-functions.md | 2 +- src/Functions/array/arrayJaccardIndex.cpp | 9 ++-- .../0_stateless/02737_arrayJaccardIndex.sql | 41 +++++++------------ 3 files changed, 21 insertions(+), 31 deletions(-) diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index 921e9765080..862ecc42158 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -998,7 +998,7 @@ SELECT ## arrayJaccardIndex -Returns the jaccard similarity between two arrays. +Returns the [Jaccard index](https://en.wikipedia.org/wiki/Jaccard_index) of two arrays. **Example** diff --git a/src/Functions/array/arrayJaccardIndex.cpp b/src/Functions/array/arrayJaccardIndex.cpp index 211680092b3..078687a6431 100644 --- a/src/Functions/array/arrayJaccardIndex.cpp +++ b/src/Functions/array/arrayJaccardIndex.cpp @@ -37,6 +37,7 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors args{ + // XXX {"array_1", [](const IDataType & type) { return isArray(type.getPtr()); }, nullptr, "Array"}, {"array_2", [](const IDataType & type) { return isArray(type.getPtr()); }, nullptr, "Array"}, }; @@ -45,7 +46,7 @@ public: } template - static inline void getArraySize(const ColumnArray::Offsets & left_offsets, const ColumnArray::Offsets & right_offsets, size_t & left_size, size_t & right_size, const size_t & i) + static void getArraySize(const ColumnArray::Offsets & left_offsets, const ColumnArray::Offsets & right_offsets, size_t & left_size, size_t & right_size, const size_t & i) { if constexpr (is_const_left) left_size = left_offsets[0]; @@ -58,7 +59,7 @@ public: } template - static inline void vector(const ColumnArray::Offsets & intersect_offsets, const ColumnArray::Offsets & left_offsets, const ColumnArray::Offsets & right_offsets, PaddedPODArray & res) + static void vector(const ColumnArray::Offsets & intersect_offsets, const ColumnArray::Offsets & left_offsets, const ColumnArray::Offsets & right_offsets, PaddedPODArray & res) { size_t left_size; size_t right_size; @@ -71,14 +72,14 @@ public: } template - static inline void vectorWithEmptyIntersect(const ColumnArray::Offsets & left_offsets, const ColumnArray::Offsets & right_offsets, PaddedPODArray & res) + static void vectorWithEmptyIntersect(const ColumnArray::Offsets & left_offsets, const ColumnArray::Offsets & right_offsets, PaddedPODArray & res) { size_t left_size; size_t right_size; for (size_t i = 0; i < res.size(); ++i) { getArraySize(left_offsets, right_offsets, left_size, right_size, i); - if (unlikely(!left_size && !right_size)) + if ((!left_size && !right_size)) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "array aggregate functions cannot be performed on two empty arrays"); res[i] = 0; } diff --git a/tests/queries/0_stateless/02737_arrayJaccardIndex.sql b/tests/queries/0_stateless/02737_arrayJaccardIndex.sql index 000106e93b7..ba5a93f1658 100644 --- a/tests/queries/0_stateless/02737_arrayJaccardIndex.sql +++ b/tests/queries/0_stateless/02737_arrayJaccardIndex.sql @@ -1,31 +1,20 @@ -drop table if exists array_jaccard_index; +SELECT ['a'] AS arr_1, ['a', 'aa', 'aaa'] AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2); +SELECT [1, 1.1, 2.2] AS arr_1, [2.2, 3.3, 444] AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2); +SELECT [toUInt16(1)] AS arr_1, [toUInt32(1)] AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2); +SELECT [1,2] AS arr_1, [1,2,3,4] AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2); +SELECT [[1,2], [3,4]] AS arr_1, [[1,2], [3,5]] AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2); -create table array_jaccard_index (arr Array(UInt8)) engine = MergeTree order by arr; +DROP TABLE IF EXISTS array_jaccard_index; -insert into array_jaccard_index values ([1,2,3]); +CREATE TABLE array_jaccard_index (arr Array(UInt8)) engine = MergeTree ORDER BY arr; +INSERT INTO array_jaccard_index values ([1,2,3]); +INSERT INTO array_jaccard_index values ([1,2]); +INSERT INTO array_jaccard_index values ([1]); -insert into array_jaccard_index values ([1,2]); - -insert into array_jaccard_index values ([1]); - -select arr as arr_1, [1,2] as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2) from array_jaccard_index order by arr; - -select arr as arr_1, [] as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2) from array_jaccard_index order by arr; - -select [] as arr_1, arr as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2) from array_jaccard_index order by arr; - -select [1,2] as arr_1, arr as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2) from array_jaccard_index order by arr; - -select arr as arr_1, arr as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2) from array_jaccard_index order by arr; +SELECT arr AS arr_1, [1,2] AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2) FROM array_jaccard_index ORDER BY arr; +SELECT arr AS arr_1, [] AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2) FROM array_jaccard_index ORDER BY arr; +SELECT [] AS arr_1, arr AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2) FROM array_jaccard_index ORDER BY arr; +SELECT [1,2] AS arr_1, arr AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2) FROM array_jaccard_index ORDER BY arr; +SELECT arr AS arr_1, arr AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2) FROM array_jaccard_index ORDER BY arr; drop table array_jaccard_index; - -select ['a'] as arr_1, ['a', 'aa', 'aaa'] as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2); - -select [1, 1.1, 2.2] as arr_1, [2.2, 3.3, 444] as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2); - -select [toUInt16(1)] as arr_1, [toUInt32(1)] as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2); - -select [1,2] as arr_1, [1,2,3,4] as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2); - -select [[1,2], [3,4]] as arr_1, [[1,2], [3,5]] as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2); From 29f93bd06dc7cb7ba6a768644bc5a0eda79126d4 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 3 Jul 2023 20:25:51 +0000 Subject: [PATCH 195/522] More fixes, pt. II --- src/DataTypes/IDataType.h | 34 +++-- src/Functions/array/arrayJaccardIndex.cpp | 137 ++++++++++-------- .../02737_arrayJaccardIndex.reference | 19 ++- .../0_stateless/02737_arrayJaccardIndex.sql | 32 ++-- ...2737_arrayJaccardIndex_exception.reference | 2 - .../02737_arrayJaccardIndex_exception.sh | 11 -- 6 files changed, 126 insertions(+), 109 deletions(-) delete mode 100644 tests/queries/0_stateless/02737_arrayJaccardIndex_exception.reference delete mode 100755 tests/queries/0_stateless/02737_arrayJaccardIndex_exception.sh diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h index bfc4a71083d..4adafe5d212 100644 --- a/src/DataTypes/IDataType.h +++ b/src/DataTypes/IDataType.h @@ -410,21 +410,29 @@ inline bool isDateTime(const T & data_type) { return WhichDataType(data_type).is template inline bool isDateTime64(const T & data_type) { return WhichDataType(data_type).isDateTime64(); } -inline bool isEnum(const DataTypePtr & data_type) { return WhichDataType(data_type).isEnum(); } -inline bool isDecimal(const DataTypePtr & data_type) { return WhichDataType(data_type).isDecimal(); } -inline bool isTuple(const DataTypePtr & data_type) { return WhichDataType(data_type).isTuple(); } -inline bool isArray(const DataTypePtr & data_type) { return WhichDataType(data_type).isArray(); } -inline bool isMap(const DataTypePtr & data_type) {return WhichDataType(data_type).isMap(); } -inline bool isInterval(const DataTypePtr & data_type) {return WhichDataType(data_type).isInterval(); } -inline bool isNothing(const DataTypePtr & data_type) { return WhichDataType(data_type).isNothing(); } -inline bool isUUID(const DataTypePtr & data_type) { return WhichDataType(data_type).isUUID(); } -inline bool isIPv4(const DataTypePtr & data_type) { return WhichDataType(data_type).isIPv4(); } -inline bool isIPv6(const DataTypePtr & data_type) { return WhichDataType(data_type).isIPv6(); } +template +inline bool isEnum(const T & data_type) { return WhichDataType(data_type).isEnum(); } +template +inline bool isDecimal(const T & data_type) { return WhichDataType(data_type).isDecimal(); } +template +inline bool isTuple(const T & data_type) { return WhichDataType(data_type).isTuple(); } +template +inline bool isArray(const T & data_type) { return WhichDataType(data_type).isArray(); } +template +inline bool isMap(const T & data_type) {return WhichDataType(data_type).isMap(); } +template +inline bool isInterval(const T & data_type) {return WhichDataType(data_type).isInterval(); } +template +inline bool isNothing(const T & data_type) { return WhichDataType(data_type).isNothing(); } +template +inline bool isUUID(const T & data_type) { return WhichDataType(data_type).isUUID(); } +template +inline bool isIPv4(const T & data_type) { return WhichDataType(data_type).isIPv4(); } +template +inline bool isIPv6(const T & data_type) { return WhichDataType(data_type).isIPv6(); } template -inline bool isObject(const T & data_type) -{ - return WhichDataType(data_type).isObject(); +inline bool isObject(const T & data_type) { return WhichDataType(data_type).isObject(); } template diff --git a/src/Functions/array/arrayJaccardIndex.cpp b/src/Functions/array/arrayJaccardIndex.cpp index 078687a6431..755e0f8278f 100644 --- a/src/Functions/array/arrayJaccardIndex.cpp +++ b/src/Functions/array/arrayJaccardIndex.cpp @@ -24,8 +24,58 @@ namespace ErrorCodes class FunctionArrayJaccardIndex : public IFunction { -public: +private: using ResultType = Float64; + + struct LeftAndRightSizes + { + size_t left_size; + size_t right_size; + }; + + template + static LeftAndRightSizes getArraySizes(const ColumnArray::Offsets & left_offsets, const ColumnArray::Offsets & right_offsets, size_t i) + { + size_t left_size; + size_t right_size; + + if constexpr (left_is_const) + left_size = left_offsets[0]; + else + left_size = left_offsets[i] - left_offsets[i - 1]; + + if constexpr (right_is_const) + right_size = right_offsets[0]; + else + right_size = right_offsets[i] - right_offsets[i - 1]; + + return {left_size, right_size}; + } + + template + static void vector(const ColumnArray::Offsets & intersect_offsets, const ColumnArray::Offsets & left_offsets, const ColumnArray::Offsets & right_offsets, PaddedPODArray & res) + { + for (size_t i = 0; i < res.size(); ++i) + { + LeftAndRightSizes sizes = getArraySizes(left_offsets, right_offsets, i); + size_t intersect_size = intersect_offsets[i] - intersect_offsets[i - 1]; + res[i] = static_cast(intersect_size) / (sizes.left_size + sizes.right_size - intersect_size); + } + } + + template + static void vectorWithEmptyIntersect(const ColumnArray::Offsets & left_offsets, const ColumnArray::Offsets & right_offsets, PaddedPODArray & res) + { + for (size_t i = 0; i < res.size(); ++i) + { + LeftAndRightSizes sizes = getArraySizes(left_offsets, right_offsets, i); + if (sizes.left_size == 0 && sizes.right_size == 0) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "array aggregate functions cannot be performed on two empty arrays"); + res[i] = 0; + } + } + +public: static constexpr auto name = "arrayJaccardIndex"; String getName() const override { return name; } static FunctionPtr create(ContextPtr context_) { return std::make_shared(context_); } @@ -37,100 +87,59 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors args{ - // XXX - {"array_1", [](const IDataType & type) { return isArray(type.getPtr()); }, nullptr, "Array"}, - {"array_2", [](const IDataType & type) { return isArray(type.getPtr()); }, nullptr, "Array"}, + {"array_1", &isArray, nullptr, "Array"}, + {"array_2", &isArray, nullptr, "Array"}, }; validateFunctionArgumentTypes(*this, arguments, args); return std::make_shared>(); } - template - static void getArraySize(const ColumnArray::Offsets & left_offsets, const ColumnArray::Offsets & right_offsets, size_t & left_size, size_t & right_size, const size_t & i) - { - if constexpr (is_const_left) - left_size = left_offsets[0]; - else - left_size = left_offsets[i] - left_offsets[i - 1]; - if constexpr (is_const_right) - right_size = right_offsets[0]; - else - right_size = right_offsets[i] - right_offsets[i - 1]; - } - - template - static void vector(const ColumnArray::Offsets & intersect_offsets, const ColumnArray::Offsets & left_offsets, const ColumnArray::Offsets & right_offsets, PaddedPODArray & res) - { - size_t left_size; - size_t right_size; - for (size_t i = 0; i < res.size(); ++i) - { - getArraySize(left_offsets, right_offsets, left_size, right_size, i); - size_t intersect_size = intersect_offsets[i] - intersect_offsets[i - 1]; - res[i] = static_cast(intersect_size) / (left_size + right_size - intersect_size); - } - } - - template - static void vectorWithEmptyIntersect(const ColumnArray::Offsets & left_offsets, const ColumnArray::Offsets & right_offsets, PaddedPODArray & res) - { - size_t left_size; - size_t right_size; - for (size_t i = 0; i < res.size(); ++i) - { - getArraySize(left_offsets, right_offsets, left_size, right_size, i); - if ((!left_size && !right_size)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "array aggregate functions cannot be performed on two empty arrays"); - res[i] = 0; - } - } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { - auto cast_array = [&](const ColumnWithTypeAndName & col) -> std::pair + auto cast_to_array = [&](const ColumnWithTypeAndName & col) -> std::pair { - const ColumnArray * res; - bool is_const = false; if (const ColumnConst * col_const = typeid_cast(col.column.get())) { - res = checkAndGetColumn(col_const->getDataColumnPtr().get()); - is_const = true; + const ColumnArray * col_const_array = checkAndGetColumn(col_const->getDataColumnPtr().get()); + return {col_const_array, true}; } - else if (!(res = checkAndGetColumn(col.column.get()))) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Argument for function {} must be array but it has type {}.", - col.column->getName(), getName()); - return {res, is_const}; + else if (const ColumnArray * col_non_const_array = checkAndGetColumn(col.column.get())) + return {col_non_const_array, false}; + else + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Argument for function {} must be array but it has type {}.", col.column->getName(), getName()); }; - const auto & [left_array, is_const_left] = cast_array(arguments[0]); - const auto & [right_array, is_const_right] = cast_array(arguments[1]); + const auto & [left_array, left_is_const] = cast_to_array(arguments[0]); + const auto & [right_array, right_is_const] = cast_to_array(arguments[1]); auto intersect_array = FunctionFactory::instance().get("arrayIntersect", context)->build(arguments); + ColumnWithTypeAndName intersect_column; intersect_column.type = intersect_array->getResultType(); intersect_column.column = intersect_array->execute(arguments, intersect_column.type, input_rows_count); - const auto * return_type_intersect = checkAndGetDataType(intersect_column.type.get()); - if (!return_type_intersect) + + const auto * intersect_column_type = checkAndGetDataType(intersect_column.type.get()); + if (!intersect_column_type) throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected return type for function arrayIntersect"); auto col_res = ColumnVector::create(); typename ColumnVector::Container & vec_res = col_res->getData(); vec_res.resize(input_rows_count); -#define EXECUTE_VECTOR(is_const_left, is_const_right) \ - if (typeid_cast(return_type_intersect->getNestedType().get())) \ - vectorWithEmptyIntersect(left_array->getOffsets(), right_array->getOffsets(), vec_res); \ +#define EXECUTE_VECTOR(left_is_const, right_is_const) \ + if (typeid_cast(intersect_column_type->getNestedType().get())) \ + vectorWithEmptyIntersect(left_array->getOffsets(), right_array->getOffsets(), vec_res); \ else \ { \ const ColumnArray * intersect_column_array = checkAndGetColumn(intersect_column.column.get()); \ - vector(intersect_column_array->getOffsets(), left_array->getOffsets(), right_array->getOffsets(), vec_res); \ + vector(intersect_column_array->getOffsets(), left_array->getOffsets(), right_array->getOffsets(), vec_res); \ } - if (!is_const_left && !is_const_right) + if (!left_is_const && !right_is_const) EXECUTE_VECTOR(false, false) - else if (!is_const_left && is_const_right) + else if (!left_is_const && right_is_const) EXECUTE_VECTOR(false, true) - else if (is_const_left && !is_const_right) + else if (left_is_const && !right_is_const) EXECUTE_VECTOR(true, false) else EXECUTE_VECTOR(true, true) diff --git a/tests/queries/0_stateless/02737_arrayJaccardIndex.reference b/tests/queries/0_stateless/02737_arrayJaccardIndex.reference index 0b7969889c0..62a51ec0ab2 100644 --- a/tests/queries/0_stateless/02737_arrayJaccardIndex.reference +++ b/tests/queries/0_stateless/02737_arrayJaccardIndex.reference @@ -1,20 +1,23 @@ +negative tests +const arguments +[1,2] [1,2,3,4] 0.5 +[1,1.1,2.2] [2.2,3.3,444] 0.2 +[1] [1] 1 +['a'] ['a','aa','aaa'] 0.33 +[[1,2],[3,4]] [[1,2],[3,5]] 0.33 +non-const arguments [1] [1,2] 0.5 [1,2] [1,2] 1 [1,2,3] [1,2] 0.67 [1] [] 0 [1,2] [] 0 [1,2,3] [] 0 -[] [1] 0 -[] [1,2] 0 -[] [1,2,3] 0 [1,2] [1] 0.5 [1,2] [1,2] 1 [1,2] [1,2,3] 0.67 +[] [1] 0 +[] [1,2] 0 +[] [1,2,3] 0 [1] [1] 1 [1,2] [1,2] 1 [1,2,3] [1,2,3] 1 -['a'] ['a','aa','aaa'] 0.33 -[1,1.1,2.2] [2.2,3.3,444] 0.2 -[1] [1] 1 -[1,2] [1,2,3,4] 0.5 -[[1,2],[3,4]] [[1,2],[3,5]] 0.33 diff --git a/tests/queries/0_stateless/02737_arrayJaccardIndex.sql b/tests/queries/0_stateless/02737_arrayJaccardIndex.sql index ba5a93f1658..499debd94b7 100644 --- a/tests/queries/0_stateless/02737_arrayJaccardIndex.sql +++ b/tests/queries/0_stateless/02737_arrayJaccardIndex.sql @@ -1,8 +1,18 @@ -SELECT ['a'] AS arr_1, ['a', 'aa', 'aaa'] AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2); -SELECT [1, 1.1, 2.2] AS arr_1, [2.2, 3.3, 444] AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2); -SELECT [toUInt16(1)] AS arr_1, [toUInt32(1)] AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2); -SELECT [1,2] AS arr_1, [1,2,3,4] AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2); -SELECT [[1,2], [3,4]] AS arr_1, [[1,2], [3,5]] AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2); +SELECT 'negative tests'; + +SELECT 'a' AS arr1, 2 AS arr2, round(arrayJaccardIndex(arr1, arr2), 2); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT [] AS arr1, [] AS arr2, round(arrayJaccardIndex(arr1, arr2), 2); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT ['1', '2'] AS arr1, [1,2] AS arr2, round(arrayJaccardIndex(arr1, arr2), 2); -- { serverError NO_COMMON_TYPE } + +SELECT 'const arguments'; + +SELECT [1,2] AS arr1, [1,2,3,4] AS arr2, round(arrayJaccardIndex(arr1, arr2), 2); +SELECT [1, 1.1, 2.2] AS arr1, [2.2, 3.3, 444] AS arr2, round(arrayJaccardIndex(arr1, arr2), 2); +SELECT [toUInt16(1)] AS arr1, [toUInt32(1)] AS arr2, round(arrayJaccardIndex(arr1, arr2), 2); +SELECT ['a'] AS arr1, ['a', 'aa', 'aaa'] AS arr2, round(arrayJaccardIndex(arr1, arr2), 2); +SELECT [[1,2], [3,4]] AS arr1, [[1,2], [3,5]] AS arr2, round(arrayJaccardIndex(arr1, arr2), 2); + +SELECT 'non-const arguments'; DROP TABLE IF EXISTS array_jaccard_index; @@ -11,10 +21,10 @@ INSERT INTO array_jaccard_index values ([1,2,3]); INSERT INTO array_jaccard_index values ([1,2]); INSERT INTO array_jaccard_index values ([1]); -SELECT arr AS arr_1, [1,2] AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2) FROM array_jaccard_index ORDER BY arr; -SELECT arr AS arr_1, [] AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2) FROM array_jaccard_index ORDER BY arr; -SELECT [] AS arr_1, arr AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2) FROM array_jaccard_index ORDER BY arr; -SELECT [1,2] AS arr_1, arr AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2) FROM array_jaccard_index ORDER BY arr; -SELECT arr AS arr_1, arr AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2) FROM array_jaccard_index ORDER BY arr; +SELECT arr, [1,2] AS other, round(arrayJaccardIndex(arr, other), 2) FROM array_jaccard_index ORDER BY arr; +SELECT arr, [] AS other, round(arrayJaccardIndex(arr, other), 2) FROM array_jaccard_index ORDER BY arr; +SELECT [1,2] AS other, arr, round(arrayJaccardIndex(other, arr), 2) FROM array_jaccard_index ORDER BY arr; +SELECT [] AS other, arr, round(arrayJaccardIndex(other, arr), 2) FROM array_jaccard_index ORDER BY arr; +SELECT arr, arr, round(arrayJaccardIndex(arr, arr), 2) FROM array_jaccard_index ORDER BY arr; -drop table array_jaccard_index; +DROP TABLE array_jaccard_index; diff --git a/tests/queries/0_stateless/02737_arrayJaccardIndex_exception.reference b/tests/queries/0_stateless/02737_arrayJaccardIndex_exception.reference deleted file mode 100644 index 307d9a195b0..00000000000 --- a/tests/queries/0_stateless/02737_arrayJaccardIndex_exception.reference +++ /dev/null @@ -1,2 +0,0 @@ -Code: 43 -Code: 386 diff --git a/tests/queries/0_stateless/02737_arrayJaccardIndex_exception.sh b/tests/queries/0_stateless/02737_arrayJaccardIndex_exception.sh deleted file mode 100755 index 49e80e06cba..00000000000 --- a/tests/queries/0_stateless/02737_arrayJaccardIndex_exception.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/usr/bin/env bash - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -# Code: 43. DB::Exception: Received from localhost:9000. DB::Exception: array aggregate functions cannot be performed on two empty arrays: While processing arrayJaccardIndex([], []). (ILLEGAL_TYPE_OF_ARGUMENT) -$CLICKHOUSE_CLIENT -q "SELECT arrayJaccardIndex([], [])" |& grep -o "Code: 43" - -# Code: 386. DB::Exception: Received from localhost:9000. DB::Exception: There is no subtype for types UInt8, String because some of them are String/FixedString and some of them are not: While processing [1, 2] AS arr_1, ['1', '2'] AS arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2). (NO_COMMON_TYPE) -$CLICKHOUSE_CLIENT -q "select [1,2] as arr_1, ['1','2'] as arr_2, round(arrayJaccardIndex(arr_1, arr_2), 2)" |& grep -o "Code: 386" From 6742432fd2801380350df489dd882ab538598a7e Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Mon, 3 Jul 2023 21:58:56 +0000 Subject: [PATCH 196/522] Number of bucket always increased by 2, so there is no reason provide it as parameter --- src/Interpreters/GraceHashJoin.cpp | 20 +++++++++----------- src/Interpreters/GraceHashJoin.h | 2 +- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/src/Interpreters/GraceHashJoin.cpp b/src/Interpreters/GraceHashJoin.cpp index 4bfe0315138..f5b2386fd1e 100644 --- a/src/Interpreters/GraceHashJoin.cpp +++ b/src/Interpreters/GraceHashJoin.cpp @@ -356,16 +356,16 @@ bool GraceHashJoin::hasMemoryOverflow(const InMemoryJoinPtr & hash_join_) const return hasMemoryOverflow(total_rows, total_bytes); } -GraceHashJoin::Buckets GraceHashJoin::rehashBuckets(size_t to_size) +GraceHashJoin::Buckets GraceHashJoin::rehashBuckets() { std::unique_lock lock(rehash_mutex); + + if (!isPowerOf2(buckets.size())) [[unlikely]] + throw Exception(ErrorCodes::LOGICAL_ERROR, "Number of buckets should be power of 2 but it's {}", buckets.size()); + + const size_t to_size = buckets.size() * 2; size_t current_size = buckets.size(); - if (to_size <= current_size) - return buckets; - - chassert(isPowerOf2(to_size)); - if (to_size > max_num_buckets) { throw Exception(ErrorCodes::LIMIT_EXCEEDED, @@ -623,6 +623,8 @@ Block GraceHashJoin::prepareRightBlock(const Block & block) void GraceHashJoin::addJoinedBlockImpl(Block block) { + LOG_ERROR(&Poco::Logger::get(__PRETTY_FUNCTION__), ""); + block = prepareRightBlock(block); Buckets buckets_snapshot = getCurrentBuckets(); size_t bucket_index = current_bucket->idx; @@ -638,10 +640,6 @@ void GraceHashJoin::addJoinedBlockImpl(Block block) if (current_block.rows() > 0) { std::lock_guard lock(hash_join_mutex); - if (!isPowerOf2(buckets_snapshot.size())) [[unlikely]] - { - throw Exception(ErrorCodes::LOGICAL_ERROR, "Broken buckets. its size({}) is not power of 2", buckets_snapshot.size()); - } if (!hash_join) hash_join = makeInMemoryJoin(); @@ -653,7 +651,7 @@ void GraceHashJoin::addJoinedBlockImpl(Block block) current_block = {}; // Must use the latest buckets snapshot in case that it has been rehashed by other threads. - buckets_snapshot = rehashBuckets(buckets_snapshot.size() * 2); + buckets_snapshot = rehashBuckets(); auto right_blocks = hash_join->releaseJoinedBlocks(/* restructure */ false); hash_join = nullptr; diff --git a/src/Interpreters/GraceHashJoin.h b/src/Interpreters/GraceHashJoin.h index b8d83f4cad0..fd3397ba15e 100644 --- a/src/Interpreters/GraceHashJoin.h +++ b/src/Interpreters/GraceHashJoin.h @@ -109,7 +109,7 @@ private: /// /// NB: after @rehashBuckets there may be rows that are written to the buckets that they do not belong to. /// It is fine; these rows will be written to the corresponding buckets during the third stage. - Buckets rehashBuckets(size_t to_size); + Buckets rehashBuckets(); /// Perform some bookkeeping after all calls to @joinBlock. void startReadingDelayedBlocks(); From 2e245f4a438544f2270ef2be9336c8fb6e4aa6ed Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 4 Jul 2023 00:23:00 +0200 Subject: [PATCH 197/522] Even better --- src/Core/Field.h | 58 +- .../fuzzers/codegen_fuzzer/CMakeLists.txt | 2 +- .../fuzzers/codegen_fuzzer/clickhouse.g | 1592 ++++ src/Parsers/fuzzers/codegen_fuzzer/out.cpp | 6461 +++++++++++++++++ src/Parsers/fuzzers/codegen_fuzzer/out.proto | 1587 ++++ 5 files changed, 9686 insertions(+), 14 deletions(-) create mode 100644 src/Parsers/fuzzers/codegen_fuzzer/clickhouse.g create mode 100644 src/Parsers/fuzzers/codegen_fuzzer/out.cpp create mode 100644 src/Parsers/fuzzers/codegen_fuzzer/out.proto diff --git a/src/Core/Field.h b/src/Core/Field.h index 0b3c5b7f48d..97a32ab5bb1 100644 --- a/src/Core/Field.h +++ b/src/Core/Field.h @@ -42,10 +42,13 @@ using FieldVector = std::vector>; /// construct a Field of Array or a Tuple type. An alternative approach would be /// to construct both of these types from FieldVector, and have the caller /// specify the desired Field type explicitly. +/// As the result stack overflow on destruction is possible +/// and to avoid it we need to count the depth and have a threshold. #define DEFINE_FIELD_VECTOR(X) \ struct X : public FieldVector \ { \ using FieldVector::FieldVector; \ + size_t nested_field_depth = 0; \ } DEFINE_FIELD_VECTOR(Array); @@ -62,6 +65,7 @@ using FieldMap = std::map, AllocatorWithMemoryTrackin struct X : public FieldMap \ { \ using FieldMap::FieldMap; \ + size_t nested_field_depth = 0; \ } DEFINE_FIELD_MAP(Object); @@ -677,21 +681,43 @@ private: Types::Which which; - /// Field may contain a Field inside in case when Field stores Array, Tuple, Map or Object. - /// As the result stack overflow on destruction is possible - /// and to avoid it we need to count the depth and have a threshold. - size_t nested_field_depth = 0; - - /// Check whether T is already a Field with composite underlying type. + /// StorageType and Original are the same for Array, Tuple, Map, Object template size_t calculateAndCheckFieldDepth(Original && x) { size_t result = 0; - if constexpr (std::is_same_v || std::is_same_v || std::is_same_v) - std::for_each(x.begin(), x.end(), [this, &result](auto & elem){ result = std::max(result, elem.nested_field_depth); }); - else if constexpr (std::is_same_v) - std::for_each(x.begin(), x.end(), [this, &result](auto & elem){ result = std::max(result, elem.second.nested_field_depth); }); + if constexpr (std::is_same_v + || std::is_same_v + || std::is_same_v + || std::is_same_v) + { + result = x.nested_field_depth; + + auto calculate_max = [](const Field & elem, size_t result) + { + switch (elem.which) + { + case Types::Array: + return std::max(result, elem.template get().nested_field_depth); + case Types::Tuple: + return std::max(result, elem.template get().nested_field_depth); + case Types::Map: + return std::max(result, elem.template get().nested_field_depth); + case Types::Object: + return std::max(result, elem.template get().nested_field_depth); + default: + return result; + } + }; + + if constexpr (std::is_same_v) + for (auto & [_, value] : x) + result = calculate_max(value, result); + else + for (auto & value : x) + result = calculate_max(value, result); + } if (result >= DBMS_MAX_NESTED_FIELD_DEPTH) throw Exception(ErrorCodes::TOO_DEEP_RECURSION, "Too deep Field"); @@ -711,9 +737,17 @@ private: // we must initialize the entire wide stored type, and not just the // nominal type. using StorageType = NearestFieldType; + /// Incrementing the depth since we create a new Field. - nested_field_depth = calculateAndCheckFieldDepth(x) + 1; + auto depth = calculateAndCheckFieldDepth(x) + 1; new (&storage) StorageType(std::forward(x)); + + if constexpr (std::is_same_v + || std::is_same_v + || std::is_same_v + || std::is_same_v) + reinterpret_cast(&storage)->nested_field_depth = depth + 1; + which = TypeToEnum::value; } @@ -724,8 +758,6 @@ private: using JustT = std::decay_t; assert(which == TypeToEnum::value); JustT * MAY_ALIAS ptr = reinterpret_cast(&storage); - /// Do not increment the depth, because it is an assignment. - nested_field_depth = calculateAndCheckFieldDepth(x); *ptr = std::forward(x); } diff --git a/src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt b/src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt index 727c49cfc4d..30f0e91a75b 100644 --- a/src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt +++ b/src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt @@ -42,4 +42,4 @@ clickhouse_add_executable(codegen_select_fuzzer ${FUZZER_SRCS}) set_source_files_properties("${PROTO_SRCS}" "out.cpp" PROPERTIES COMPILE_FLAGS "-Wno-reserved-identifier") target_include_directories(codegen_select_fuzzer SYSTEM BEFORE PRIVATE "${CMAKE_CURRENT_BINARY_DIR}") -target_link_libraries(codegen_select_fuzzer PRIVATE ch_contrib::protobuf_mutator ch_contrib::protoc dbms ${LIB_FUZZING_ENGINE}) +target_link_libraries(codegen_select_fuzzer PRIVATE ch_contrib::protobuf ch_contrib::protobuf_mutator ch_contrib::protoc dbms ${LIB_FUZZING_ENGINE}) diff --git a/src/Parsers/fuzzers/codegen_fuzzer/clickhouse.g b/src/Parsers/fuzzers/codegen_fuzzer/clickhouse.g new file mode 100644 index 00000000000..0ae74055eda --- /dev/null +++ b/src/Parsers/fuzzers/codegen_fuzzer/clickhouse.g @@ -0,0 +1,1592 @@ +" "; +" "; +" "; +";"; + + +"(" $1 ")"; +"(" $1 ", " $2 ")"; +"(" $1 ", " $2 ", " $3 ")"; + +$1 ", " $2 ; +$1 ", " $2 ", " $3 ; +$1 ", " $2 ", " $3 ", " $4 ; +$1 ", " $2 ", " $3 ", " $4 ", " $5 ; + +"[" $1 ", " $2 "]"; +"[" $1 ", " $2 ", " $3 "]"; +"[" $1 ", " $2 ", " $3 ", " $4 "]"; +"[" $1 ", " $2 ", " $3 ", " $4 ", " $5 "]"; + +$0 "(" $1 ")"; +$0 "(" $1 ", " $2 ")"; +$0 "(" $1 ", " $2 ", " $3 ")"; + +$1 " as " $2 ; + + +// TODO: add more clickhouse specific stuff +"SELECT " $1 " FROM " $2 " WHERE " $3 ; +"SELECT " $1 " FROM " $2 " GROUP BY " $3 ; +"SELECT " $1 " FROM " $2 " SORT BY " $3 ; +"SELECT " $1 " FROM " $2 " LIMIT " $3 ; +"SELECT " $1 " FROM " $2 " JOIN " $3 ; +"SELECT " $1 " FROM " $2 " ARRAY JOIN " $3 ; +"SELECT " $1 " FROM " $2 " JOIN " $3 " ON " $4 ; +"SELECT " $1 " FROM " $2 " JOIN " $3 " USING " $5 ; +"SELECT " $1 " INTO OUTFILE " $2 ; + +"WITH " $1 " AS " $2 ; + +"{" $1 ":" $2 "}"; +"[" $1 "," $2 "]"; +"[]"; + + +" x "; +"x"; +" `x` "; +"`x`"; + +" \"value\" "; +"\"value\""; +" 0 "; +"0"; +"1"; +"2"; +"123123123123123123"; +"182374019873401982734091873420923123123123123123"; +"1e-1"; +"1.1"; +"\"\""; +" '../../../../../../../../../etc/passwd' "; + +"/"; +"="; +"=="; +"!="; +"<>"; +"<"; +"<="; +">"; +">="; +"<<"; +"|<<"; +"&"; +"|"; +"||"; +"<|"; +"|>"; +"+"; +"-"; +"~"; +"*"; +"/"; +"\\"; +"%"; +""; +"."; +","; +","; +","; +","; +","; +","; +"("; +")"; +"("; +")"; +"("; +")"; +"("; +")"; +"("; +")"; +"("; +")"; +"?"; +":"; +"@"; +"@@"; +"$"; +"\""; +"`"; +"{"; +"}"; +"^"; +"::"; +"->"; +"]"; +"["; + +" abs "; +" accurate_Cast "; +" accurateCast "; +" accurate_CastOrNull "; +" accurateCastOrNull "; +" acos "; +" acosh "; +" ADD "; +" ADD COLUMN "; +" ADD CONSTRAINT "; +" addDays "; +" addHours "; +" ADD INDEX "; +" addMinutes "; +" addMonths "; +" addQuarters "; +" addressToLine "; +" addressToSymbol "; +" addSeconds "; +" addWeeks "; +" addYears "; +" aes_decrypt_mysql "; +" aes_encrypt_mysql "; +" AFTER "; +" AggregateFunction "; +" aggThrow "; +" ALIAS "; +" ALL "; +" alphaTokens "; +" ALTER "; +" ALTER LIVE VIEW "; +" ALTER TABLE "; +" and "; +" AND "; +" ANTI "; +" any "; +" ANY "; +" anyHeavy "; +" anyLast "; +" appendTrailingCharIfAbsent "; +" argMax "; +" argMin "; +" array "; +" Array "; +" ARRAY "; +" arrayAll "; +" arrayAUC "; +" arrayAvg "; +" arrayCompact "; +" arrayConcat "; +" arrayCount "; +" arrayCumSum "; +" arrayCumSumNonNegative "; +" arrayDifference "; +" arrayDistinct "; +" arrayElement "; +" arrayEnumerate "; +" arrayEnumerateDense "; +" arrayEnumerateDenseRanked "; +" arrayEnumerateUniq "; +" arrayEnumerateUniqRanked "; +" arrayExists "; +" arrayFill "; +" arrayFilter "; +" arrayFirst "; +" arrayFirstIndex "; +" arrayFlatten "; +" arrayIntersect "; +" arrayJoin "; +" ARRAY JOIN "; +" arrayMap "; +" arrayMax "; +" arrayMin "; +" arrayPartialReverseSort "; +" arrayPartialShuffle "; +" arrayPartialSort "; +" arrayPopBack "; +" arrayPopFront "; +" arrayProduct "; +" arrayPushBack "; +" arrayPushFront "; +" arrayReduce "; +" arrayReduceInRanges "; +" arrayResize "; +" arrayReverse "; +" arrayReverseFill "; +" arrayReverseSort "; +" arrayReverseSplit "; +" arrayShuffle "; +" arraySlice "; +" arraySort "; +" arraySplit "; +" arrayStringConcat "; +" arraySum "; +" arrayUniq "; +" arrayWithConstant "; +" arrayZip "; +" AS "; +" ASC "; +" ASCENDING "; +" asin "; +" asinh "; +" ASOF "; +" assumeNotNull "; +" AST "; +" ASYNC "; +" atan "; +" atan2 "; +" atanh "; +" ATTACH "; +" ATTACH PART "; +" ATTACH PARTITION "; +" avg "; +" avgWeighted "; +" bar "; +" base64Decode "; +" base64Encode "; +" basename "; +" bayesAB "; +" BETWEEN "; +" BIGINT "; +" BIGINT SIGNED "; +" BIGINT UNSIGNED "; +" bin "; +" BINARY "; +" BINARY LARGE OBJECT "; +" BINARY VARYING "; +" bitAnd "; +" BIT_AND "; +" __bitBoolMaskAnd "; +" __bitBoolMaskOr "; +" bitCount "; +" bitHammingDistance "; +" bitmapAnd "; +" bitmapAndCardinality "; +" bitmapAndnot "; +" bitmapAndnotCardinality "; +" bitmapBuild "; +" bitmapCardinality "; +" bitmapContains "; +" bitmapHasAll "; +" bitmapHasAny "; +" bitmapMax "; +" bitmapMin "; +" bitmapOr "; +" bitmapOrCardinality "; +" bitmapSubsetInRange "; +" bitmapSubsetLimit "; +" bitmapToArray "; +" bitmapTransform "; +" bitmapXor "; +" bitmapXorCardinality "; +" bitmaskToArray "; +" bitmaskToList "; +" bitNot "; +" bitOr "; +" BIT_OR "; +" bitPositionsToArray "; +" bitRotateLeft "; +" bitRotateRight "; +" bitShiftLeft "; +" bitShiftRight "; +" __bitSwapLastTwo "; +" bitTest "; +" bitTestAll "; +" bitTestAny "; +" __bitWrapperFunc "; +" bitXor "; +" BIT_XOR "; +" BLOB "; +" blockNumber "; +" blockSerializedSize "; +" blockSize "; +" BOOL "; +" BOOLEAN "; +" BOTH "; +" boundingRatio "; +" buildId "; +" BY "; +" BYTE "; +" BYTEA "; +" byteSize "; +" CASE "; +" caseWithExpr "; +" caseWithExpression "; +" caseWithoutExpr "; +" caseWithoutExpression "; +" _CAST "; +" CAST "; +" categoricalInformationValue "; +" cbrt "; +" ceil "; +" ceiling "; +" char "; +" CHAR "; +" CHARACTER "; +" CHARACTER LARGE OBJECT "; +" CHARACTER_LENGTH "; +" CHARACTER VARYING "; +" CHAR LARGE OBJECT "; +" CHAR_LENGTH "; +" CHAR VARYING "; +" CHECK "; +" CHECK TABLE "; +" cityHash64 "; +" CLEAR "; +" CLEAR COLUMN "; +" CLEAR INDEX "; +" CLOB "; +" CLUSTER "; +" coalesce "; +" CODEC "; +" COLLATE "; +" COLUMN "; +" COLUMNS "; +" COMMENT "; +" COMMENT COLUMN "; +" concat "; +" concatAssumeInjective "; +" connection_id "; +" connectionid "; +" connectionId "; +" CONSTRAINT "; +" convertCharset "; +" corr "; +" corrStable "; +" cos "; +" cosh "; +" count "; +" countDigits "; +" countEqual "; +" countMatches "; +" countMatchesCaseInsensitive "; +" countSubstrings "; +" countSubstringsCaseInsensitive "; +" countSubstringsCaseInsensitiveUTF8 "; +" covarPop "; +" COVAR_POP "; +" covarPopStable "; +" covarSamp "; +" COVAR_SAMP "; +" covarSampStable "; +" CRC32 "; +" CRC32IEEE "; +" CRC64 "; +" CREATE "; +" CROSS "; +" CUBE "; +" currentDatabase "; +" currentProfiles "; +" currentRoles "; +" currentUser "; +" cutFragment "; +" cutIPv6 "; +" cutQueryString "; +" cutQueryStringAndFragment "; +" cutToFirstSignificantSubdomain "; +" cutToFirstSignificantSubdomainCustom "; +" cutToFirstSignificantSubdomainCustomWithWWW "; +" cutToFirstSignificantSubdomainWithWWW "; +" cutURLParameter "; +" cutWWW "; +" D "; +" DATABASE "; +" DATABASES "; +" Date "; +" DATE "; +" Date32 "; +" DATE_ADD "; +" DATEADD "; +" dateDiff "; +" DATE_DIFF "; +" DATEDIFF "; +" dateName "; +" DATE_SUB "; +" DATESUB "; +" DateTime "; +" DateTime32 "; +" DateTime64 "; +" dateTime64ToSnowflake "; +" dateTimeToSnowflake "; +" date_trunc "; +" dateTrunc "; +" DAY "; +" DAYOFMONTH "; +" DAYOFWEEK "; +" DAYOFYEAR "; +" DD "; +" DEC "; +" Decimal "; +" Decimal128 "; +" Decimal256 "; +" Decimal32 "; +" Decimal64 "; +" decodeURLComponent "; +" decodeXMLComponent "; +" decrypt "; +" DEDUPLICATE "; +" DEFAULT "; +" defaultProfiles "; +" defaultRoles "; +" defaultValueOfArgumentType "; +" defaultValueOfTypeName "; +" DELAY "; +" DELETE "; +" DELETE WHERE "; +" deltaSum "; +" deltaSumTimestamp "; +" demangle "; +" dense_rank "; +" DESC "; +" DESCENDING "; +" DESCRIBE "; +" DETACH "; +" DETACH PARTITION "; +" dictGet "; +" dictGetChildren "; +" dictGetDate "; +" dictGetDateOrDefault "; +" dictGetDateTime "; +" dictGetDateTimeOrDefault "; +" dictGetDescendants "; +" dictGetFloat32 "; +" dictGetFloat32OrDefault "; +" dictGetFloat64 "; +" dictGetFloat64OrDefault "; +" dictGetHierarchy "; +" dictGetInt16 "; +" dictGetInt16OrDefault "; +" dictGetInt32 "; +" dictGetInt32OrDefault "; +" dictGetInt64 "; +" dictGetInt64OrDefault "; +" dictGetInt8 "; +" dictGetInt8OrDefault "; +" dictGetOrDefault "; +" dictGetOrNull "; +" dictGetString "; +" dictGetStringOrDefault "; +" dictGetUInt16 "; +" dictGetUInt16OrDefault "; +" dictGetUInt32 "; +" dictGetUInt32OrDefault "; +" dictGetUInt64 "; +" dictGetUInt64OrDefault "; +" dictGetUInt8 "; +" dictGetUInt8OrDefault "; +" dictGetUUID "; +" dictGetUUIDOrDefault "; +" dictHas "; +" DICTIONARIES "; +" DICTIONARY "; +" dictIsIn "; +" DISK "; +" DISTINCT "; +" DISTRIBUTED "; +" divide "; +" domain "; +" domainWithoutWWW "; +" DOUBLE "; +" DOUBLE PRECISION "; +" DROP "; +" DROP COLUMN "; +" DROP CONSTRAINT "; +" DROP DETACHED PART "; +" DROP DETACHED PARTITION "; +" DROP INDEX "; +" DROP PARTITION "; +" dumpColumnStructure "; +" e "; +" ELSE "; +" empty "; +" emptyArrayDate "; +" emptyArrayDateTime "; +" emptyArrayFloat32 "; +" emptyArrayFloat64 "; +" emptyArrayInt16 "; +" emptyArrayInt32 "; +" emptyArrayInt64 "; +" emptyArrayInt8 "; +" emptyArrayString "; +" emptyArrayToSingle "; +" emptyArrayUInt16 "; +" emptyArrayUInt32 "; +" emptyArrayUInt64 "; +" emptyArrayUInt8 "; +" enabledProfiles "; +" enabledRoles "; +" encodeXMLComponent "; +" encrypt "; +" END "; +" endsWith "; +" ENGINE "; +" entropy "; +" Enum "; +" ENUM "; +" Enum16 "; +" Enum8 "; +" equals "; +" erf "; +" erfc "; +" errorCodeToName "; +" evalMLMethod "; +" EVENTS "; +" EXCHANGE TABLES "; +" EXISTS "; +" exp "; +" exp10 "; +" exp2 "; +" EXPLAIN "; +" exponentialMovingAverage "; +" EXPRESSION "; +" extract "; +" EXTRACT "; +" extractAll "; +" extractAllGroups "; +" extractAllGroupsHorizontal "; +" extractAllGroupsVertical "; +" extractGroups "; +" extractTextFromHTML "; +" extractURLParameter "; +" extractURLParameterNames "; +" extractURLParameters "; +" farmFingerprint64 "; +" farmHash64 "; +" FETCHES "; +" FETCH PART "; +" FETCH PARTITION "; +" file "; +" filesystemAvailable "; +" filesystemCapacity "; +" filesystemFree "; +" FINAL "; +" finalizeAggregation "; +" FIRST "; +" firstSignificantSubdomain "; +" firstSignificantSubdomainCustom "; +" first_value "; +" FIXED "; +" FixedString "; +" flatten "; +" FLOAT "; +" Float32 "; +" Float64 "; +" floor "; +" FLUSH "; +" FOR "; +" ForEach "; +" format "; +" FORMAT "; +" formatDateTime "; +" formatReadableQuantity "; +" formatReadableDecimalSize "; +" formatReadableSize "; +" formatReadableTimeDelta "; +" formatRow "; +" formatRowNoNewline "; +" FQDN "; +" fragment "; +" FREEZE "; +" FROM "; +" FROM_BASE64 "; +" fromModifiedJulianDay "; +" fromModifiedJulianDayOrNull "; +" FROM_UNIXTIME "; +" fromUnixTimestamp "; +" fromUnixTimestamp64Micro "; +" fromUnixTimestamp64Milli "; +" fromUnixTimestamp64Nano "; +" FULL "; +" fullHostName "; +" FUNCTION "; +" fuzzBits "; +" gccMurmurHash "; +" gcd "; +" generateUUIDv4 "; +" geoDistance "; +" geohashDecode "; +" geohashEncode "; +" geohashesInBox "; +" geoToH3 "; +" geoToS2 "; +" getMacro "; +" __getScalar "; +" getServerPort "; +" getSetting "; +" getSizeOfEnumType "; +" GLOBAL "; +" globalIn "; +" globalInIgnoreSet "; +" globalNotIn "; +" globalNotInIgnoreSet "; +" globalNotNullIn "; +" globalNotNullInIgnoreSet "; +" globalNullIn "; +" globalNullInIgnoreSet "; +" globalVariable "; +" GRANULARITY "; +" greatCircleAngle "; +" greatCircleDistance "; +" greater "; +" greaterOrEquals "; +" greatest "; +" GROUP "; +" groupArray "; +" groupArrayInsertAt "; +" groupArrayMovingAvg "; +" groupArrayMovingSum "; +" groupArraySample "; +" groupBitAnd "; +" groupBitmap "; +" groupBitmapAnd "; +" groupBitmapOr "; +" groupBitmapXor "; +" groupBitOr "; +" groupBitXor "; +" GROUP BY "; +" groupUniqArray "; +" h3EdgeAngle "; +" h3EdgeLengthM "; +" h3GetBaseCell "; +" h3GetFaces "; +" h3GetResolution "; +" h3HexAreaM2 "; +" h3IndexesAreNeighbors "; +" h3IsPentagon "; +" h3IsResClassIII "; +" h3IsValid "; +" h3kRing "; +" h3ToChildren "; +" h3ToGeo "; +" h3ToGeoBoundary "; +" h3ToParent "; +" h3ToString "; +" halfMD5 "; +" has "; +" hasAll "; +" hasAny "; +" hasColumnInTable "; +" hasSubstr "; +" hasThreadFuzzer "; +" hasToken "; +" hasTokenCaseInsensitive "; +" HAVING "; +" hex "; +" HH "; +" HIERARCHICAL "; +" histogram "; +" hiveHash "; +" hostname "; +" hostName "; +" HOUR "; +" hypot "; +" ID "; +" identity "; +" if "; +" IF "; +" IF EXISTS "; +" IF NOT EXISTS "; +" ifNotFinite "; +" ifNull "; +" ignore "; +" ilike "; +" ILIKE "; +" in "; +" IN "; +" INDEX "; +" indexHint "; +" indexOf "; +" INET4 "; +" INET6 "; +" INET6_ATON "; +" INET6_NTOA "; +" INET_ATON "; +" INET_NTOA "; +" INF "; +" inIgnoreSet "; +" initializeAggregation "; +" initial_query_id "; +" initialQueryID "; +" INJECTIVE "; +" INNER "; +" IN PARTITION "; +" INSERT "; +" INSERT INTO "; +" INT "; +" INT1 "; +" Int128 "; +" Int16 "; +" INT1 SIGNED "; +" INT1 UNSIGNED "; +" Int256 "; +" Int32 "; +" Int64 "; +" Int8 "; +" intDiv "; +" intDivOrZero "; +" INTEGER "; +" INTEGER SIGNED "; +" INTEGER UNSIGNED "; +" INTERVAL "; +" IntervalDay "; +" IntervalHour "; +" intervalLengthSum "; +" IntervalMinute "; +" IntervalMonth "; +" IntervalQuarter "; +" IntervalSecond "; +" IntervalWeek "; +" IntervalYear "; +" intExp10 "; +" intExp2 "; +" intHash32 "; +" intHash64 "; +" INTO "; +" INTO OUTFILE "; +" INT SIGNED "; +" INT UNSIGNED "; +" IPv4 "; +" IPv4CIDRToRange "; +" IPv4NumToString "; +" IPv4NumToStringClassC "; +" IPv4StringToNum "; +" IPv4ToIPv6 "; +" IPv6 "; +" IPv6CIDRToRange "; +" IPv6NumToString "; +" IPv6StringToNum "; +" IS "; +" isConstant "; +" isDecimalOverflow "; +" isFinite "; +" isInfinite "; +" isIPAddressInRange "; +" isIPv4String "; +" isIPv6String "; +" isNaN "; +" isNotNull "; +" isNull "; +" IS_OBJECT_ID "; +" isValidJSON "; +" isValidUTF8 "; +" isZeroOrNull "; +" javaHash "; +" javaHashUTF16LE "; +" JOIN "; +" joinGet "; +" joinGetOrNull "; +" JSON_EXISTS "; +" JSONExtract "; +" JSONExtractArrayRaw "; +" JSONExtractBool "; +" JSONExtractFloat "; +" JSONExtractInt "; +" JSONExtractKeysAndValues "; +" JSONExtractKeysAndValuesRaw "; +" JSONExtractKeys "; +" JSONExtractRaw "; +" JSONExtractString "; +" JSONExtractUInt "; +" JSONHas "; +" JSONKey "; +" JSONLength "; +" JSON_QUERY "; +" JSONType "; +" JSON_VALUE "; +" jumpConsistentHash "; +" KEY "; +" KILL "; +" kurtPop "; +" kurtSamp "; +" lagInFrame "; +" LAST "; +" last_value "; +" LAYOUT "; +" lcase "; +" lcm "; +" leadInFrame "; +" LEADING "; +" least "; +" LEFT "; +" LEFT ARRAY JOIN "; +" leftPad "; +" leftPadUTF8 "; +" lemmatize "; +" length "; +" lengthUTF8 "; +" less "; +" lessOrEquals "; +" lgamma "; +" LIFETIME "; +" like "; +" LIKE "; +" LIMIT "; +" LIVE "; +" ln "; +" LOCAL "; +" locate "; +" log "; +" log10 "; +" log1p "; +" log2 "; +" LOGS "; +" logTrace "; +" LONGBLOB "; +" LONGTEXT "; +" LowCardinality "; +" lowCardinalityIndices "; +" lowCardinalityKeys "; +" lower "; +" lowerUTF8 "; +" lpad "; +" LTRIM "; +" M "; +" MACNumToString "; +" MACStringToNum "; +" MACStringToOUI "; +" mannWhitneyUTest "; +" map "; +" Map "; +" mapAdd "; +" mapContains "; +" mapKeys "; +" mapPopulateSeries "; +" mapSubtract "; +" mapValues "; +" match "; +" materialize "; +" MATERIALIZE "; +" MATERIALIZED "; +" MATERIALIZE INDEX "; +" MATERIALIZE TTL "; +" max "; +" MAX "; +" maxIntersections "; +" maxIntersectionsPosition "; +" maxMap "; +" MD4 "; +" MD5 "; +" median "; +" medianBFloat16 "; +" medianBFloat16Weighted "; +" medianDeterministic "; +" medianExact "; +" medianExactHigh "; +" medianExactLow "; +" medianExactWeighted "; +" medianTDigest "; +" medianTDigestWeighted "; +" medianTiming "; +" medianTimingWeighted "; +" MEDIUMBLOB "; +" MEDIUMINT "; +" MEDIUMINT SIGNED "; +" MEDIUMINT UNSIGNED "; +" MEDIUMTEXT "; +" Merge "; +" MERGES "; +" metroHash64 "; +" MI "; +" mid "; +" min "; +" MIN "; +" minMap "; +" minus "; +" MINUTE "; +" MM "; +" mod "; +" MODIFY "; +" MODIFY COLUMN "; +" MODIFY ORDER BY "; +" MODIFY QUERY "; +" MODIFY SETTING "; +" MODIFY TTL "; +" modulo "; +" moduloLegacy "; +" moduloOrZero "; +" MONTH "; +" MOVE "; +" MOVE PART "; +" MOVE PARTITION "; +" movingXXX "; +" multiFuzzyMatchAllIndices "; +" multiFuzzyMatchAny "; +" multiFuzzyMatchAnyIndex "; +" multiIf "; +" multiMatchAllIndices "; +" multiMatchAny "; +" multiMatchAnyIndex "; +" multiply "; +" MultiPolygon "; +" multiSearchAllPositions "; +" multiSearchAllPositionsCaseInsensitive "; +" multiSearchAllPositionsCaseInsensitiveUTF8 "; +" multiSearchAllPositionsUTF8 "; +" multiSearchAny "; +" multiSearchAnyCaseInsensitive "; +" multiSearchAnyCaseInsensitiveUTF8 "; +" multiSearchAnyUTF8 "; +" multiSearchFirstIndex "; +" multiSearchFirstIndexCaseInsensitive "; +" multiSearchFirstIndexCaseInsensitiveUTF8 "; +" multiSearchFirstIndexUTF8 "; +" multiSearchFirstPosition "; +" multiSearchFirstPositionCaseInsensitive "; +" multiSearchFirstPositionCaseInsensitiveUTF8 "; +" multiSearchFirstPositionUTF8 "; +" murmurHash2_32 "; +" murmurHash2_64 "; +" murmurHash3_128 "; +" murmurHash3_32 "; +" murmurHash3_64 "; +" MUTATION "; +" N "; +" NAME "; +" NAN_SQL "; +" NATIONAL CHAR "; +" NATIONAL CHARACTER "; +" NATIONAL CHARACTER LARGE OBJECT "; +" NATIONAL CHARACTER VARYING "; +" NATIONAL CHAR VARYING "; +" NCHAR "; +" NCHAR LARGE OBJECT "; +" NCHAR VARYING "; +" negate "; +" neighbor "; +" Nested "; +" netloc "; +" ngramDistance "; +" ngramDistanceCaseInsensitive "; +" ngramDistanceCaseInsensitiveUTF8 "; +" ngramDistanceUTF8 "; +" ngramMinHash "; +" ngramMinHashArg "; +" ngramMinHashArgCaseInsensitive "; +" ngramMinHashArgCaseInsensitiveUTF8 "; +" ngramMinHashArgUTF8 "; +" ngramMinHashCaseInsensitive "; +" ngramMinHashCaseInsensitiveUTF8 "; +" ngramMinHashUTF8 "; +" ngramSearch "; +" ngramSearchCaseInsensitive "; +" ngramSearchCaseInsensitiveUTF8 "; +" ngramSearchUTF8 "; +" ngramSimHash "; +" ngramSimHashCaseInsensitive "; +" ngramSimHashCaseInsensitiveUTF8 "; +" ngramSimHashUTF8 "; +" NO "; +" NO DELAY "; +" NONE "; +" normalizedQueryHash "; +" normalizedQueryHashKeepNames "; +" normalizeQuery "; +" normalizeQueryKeepNames "; +" not "; +" NOT "; +" notEmpty "; +" notEquals "; +" nothing "; +" Nothing "; +" notILike "; +" notIn "; +" notInIgnoreSet "; +" notLike "; +" notNullIn "; +" notNullInIgnoreSet "; +" now "; +" now64 "; +" Null "; +" Nullable "; +" nullIf "; +" nullIn "; +" nullInIgnoreSet "; +" NULLS "; +" NULL_SQL "; +" NUMERIC "; +" NVARCHAR "; +" OFFSET "; +" ON "; +" ONLY "; +" OPTIMIZE "; +" OPTIMIZE TABLE "; +" or "; +" OR "; +" ORDER "; +" ORDER BY "; +" OR REPLACE "; +" OUTER "; +" OUTFILE "; +" parseDateTime32BestEffort "; +" parseDateTime32BestEffortOrNull "; +" parseDateTime32BestEffortOrZero "; +" parseDateTime64BestEffort "; +" parseDateTime64BestEffortOrNull "; +" parseDateTime64BestEffortOrZero "; +" parseDateTimeBestEffort "; +" parseDateTimeBestEffortOrNull "; +" parseDateTimeBestEffortOrZero "; +" parseDateTimeBestEffortUS "; +" parseDateTimeBestEffortUSOrNull "; +" parseDateTimeBestEffortUSOrZero "; +" parseTimeDelta "; +" PARTITION "; +" PARTITION BY "; +" partitionId "; +" path "; +" pathFull "; +" pi "; +" plus "; +" Point "; +" pointInEllipses "; +" pointInPolygon "; +" Polygon "; +" polygonAreaCartesian "; +" polygonAreaSpherical "; +" polygonConvexHullCartesian "; +" polygonPerimeterCartesian "; +" polygonPerimeterSpherical "; +" polygonsDistanceCartesian "; +" polygonsDistanceSpherical "; +" polygonsEqualsCartesian "; +" polygonsIntersectionCartesian "; +" polygonsIntersectionSpherical "; +" polygonsSymDifferenceCartesian "; +" polygonsSymDifferenceSpherical "; +" polygonsUnionCartesian "; +" polygonsUnionSpherical "; +" polygonsWithinCartesian "; +" polygonsWithinSpherical "; +" POPULATE "; +" port "; +" position "; +" positionCaseInsensitive "; +" positionCaseInsensitiveUTF8 "; +" positionUTF8 "; +" pow "; +" power "; +" PREWHERE "; +" PRIMARY "; +" PRIMARY KEY "; +" PROJECTION "; +" protocol "; +" Q "; +" QQ "; +" quantile "; +" quantileBFloat16 "; +" quantileBFloat16Weighted "; +" quantileDeterministic "; +" quantileExact "; +" quantileExactExclusive "; +" quantileExactHigh "; +" quantileExactInclusive "; +" quantileExactLow "; +" quantileExactWeighted "; +" quantiles "; +" quantilesBFloat16 "; +" quantilesBFloat16Weighted "; +" quantilesDeterministic "; +" quantilesExact "; +" quantilesExactExclusive "; +" quantilesExactHigh "; +" quantilesExactInclusive "; +" quantilesExactLow "; +" quantilesExactWeighted "; +" quantilesTDigest "; +" quantilesTDigestWeighted "; +" quantilesTiming "; +" quantilesTimingWeighted "; +" quantileTDigest "; +" quantileTDigestWeighted "; +" quantileTiming "; +" quantileTimingWeighted "; +" QUARTER "; +" query_id "; +" queryID "; +" queryString "; +" queryStringAndFragment "; +" rand "; +" rand32 "; +" rand64 "; +" randConstant "; +" randomFixedString "; +" randomPrintableASCII "; +" randomString "; +" randomStringUTF8 "; +" range "; +" RANGE "; +" rank "; +" rankCorr "; +" readWKTMultiPolygon "; +" readWKTPoint "; +" readWKTPolygon "; +" readWKTRing "; +" REAL "; +" REFRESH "; +" regexpQuoteMeta "; +" regionHierarchy "; +" regionIn "; +" regionToArea "; +" regionToCity "; +" regionToContinent "; +" regionToCountry "; +" regionToDistrict "; +" regionToName "; +" regionToPopulation "; +" regionToTopContinent "; +" reinterpret "; +" reinterpretAsDate "; +" reinterpretAsDateTime "; +" reinterpretAsFixedString "; +" reinterpretAsFloat32 "; +" reinterpretAsFloat64 "; +" reinterpretAsInt128 "; +" reinterpretAsInt16 "; +" reinterpretAsInt256 "; +" reinterpretAsInt32 "; +" reinterpretAsInt64 "; +" reinterpretAsInt8 "; +" reinterpretAsString "; +" reinterpretAsUInt128 "; +" reinterpretAsUInt16 "; +" reinterpretAsUInt256 "; +" reinterpretAsUInt32 "; +" reinterpretAsUInt64 "; +" reinterpretAsUInt8 "; +" reinterpretAsUUID "; +" RELOAD "; +" REMOVE "; +" RENAME "; +" RENAME COLUMN "; +" RENAME TABLE "; +" repeat "; +" replace "; +" REPLACE "; +" replaceAll "; +" replaceOne "; +" REPLACE PARTITION "; +" replaceRegexpAll "; +" replaceRegexpOne "; +" REPLICA "; +" replicate "; +" REPLICATED "; +" Resample "; +" RESUME "; +" retention "; +" reverse "; +" reverseUTF8 "; +" RIGHT "; +" rightPad "; +" rightPadUTF8 "; +" Ring "; +" ROLLUP "; +" round "; +" roundAge "; +" roundBankers "; +" roundDown "; +" roundDuration "; +" roundToExp2 "; +" row_number "; +" rowNumberInAllBlocks "; +" rowNumberInBlock "; +" rpad "; +" RTRIM "; +" runningAccumulate "; +" runningConcurrency "; +" runningDifference "; +" runningDifferenceStartingWithFirstValue "; +" S "; +" s2CapContains "; +" s2CapUnion "; +" s2CellsIntersect "; +" s2GetNeighbors "; +" s2RectAdd "; +" s2RectContains "; +" s2RectIntersection "; +" s2RectUnion "; +" s2ToGeo "; +" SAMPLE "; +" SAMPLE BY "; +" SECOND "; +" SELECT "; +" SEMI "; +" SENDS "; +" sequenceCount "; +" sequenceMatch "; +" sequenceNextNode "; +" serverUUID "; +" SET "; +" SETTINGS "; +" SHA1 "; +" SHA224 "; +" SHA256 "; +" SHA384 "; +" SHA512 "; +" shardCount "; +" shardNum "; +" SHOW "; +" SHOW PROCESSLIST "; +" sigmoid "; +" sign "; +" SimpleAggregateFunction "; +" simpleJSONExtractBool "; +" simpleJSONExtractFloat "; +" simpleJSONExtractInt "; +" simpleJSONExtractRaw "; +" simpleJSONExtractString "; +" simpleJSONExtractUInt "; +" simpleJSONHas "; +" simpleLinearRegression "; +" sin "; +" SINGLE "; +" singleValueOrNull "; +" sinh "; +" sipHash128 "; +" sipHash64 "; +" skewPop "; +" skewSamp "; +" sleep "; +" sleepEachRow "; +" SMALLINT "; +" SMALLINT SIGNED "; +" SMALLINT UNSIGNED "; +" snowflakeToDateTime "; +" snowflakeToDateTime64 "; +" SOURCE "; +" sparkbar "; +" splitByChar "; +" splitByNonAlpha "; +" splitByRegexp "; +" splitByString "; +" splitByWhitespace "; +" SQL_TSI_DAY "; +" SQL_TSI_HOUR "; +" SQL_TSI_MINUTE "; +" SQL_TSI_MONTH "; +" SQL_TSI_QUARTER "; +" SQL_TSI_SECOND "; +" SQL_TSI_WEEK "; +" SQL_TSI_YEAR "; +" sqrt "; +" SS "; +" START "; +" startsWith "; +" State "; +" stddevPop "; +" STDDEV_POP "; +" stddevPopStable "; +" stddevSamp "; +" STDDEV_SAMP "; +" stddevSampStable "; +" stem "; +" STEP "; +" stochasticLinearRegression "; +" stochasticLogisticRegression "; +" STOP "; +" String "; +" stringToH3 "; +" studentTTest "; +" subBitmap "; +" substr "; +" substring "; +" SUBSTRING "; +" substringUTF8 "; +" subtractDays "; +" subtractHours "; +" subtractMinutes "; +" subtractMonths "; +" subtractQuarters "; +" subtractSeconds "; +" subtractWeeks "; +" subtractYears "; +" sum "; +" sumCount "; +" sumKahan "; +" sumMap "; +" sumMapFiltered "; +" sumMapFilteredWithOverflow "; +" sumMapWithOverflow "; +" sumWithOverflow "; +" SUSPEND "; +" svg "; +" SVG "; +" SYNC "; +" synonyms "; +" SYNTAX "; +" SYSTEM "; +" TABLE "; +" TABLES "; +" tan "; +" tanh "; +" tcpPort "; +" TEMPORARY "; +" TEST "; +" TEXT "; +" tgamma "; +" THEN "; +" throwIf "; +" tid "; +" TIES "; +" TIMEOUT "; +" timeSlot "; +" timeSlots "; +" TIMESTAMP "; +" TIMESTAMP_ADD "; +" TIMESTAMPADD "; +" TIMESTAMP_DIFF "; +" TIMESTAMPDIFF "; +" TIMESTAMP_SUB "; +" TIMESTAMPSUB "; +" timezone "; +" timeZone "; +" timezoneOf "; +" timeZoneOf "; +" timezoneOffset "; +" timeZoneOffset "; +" TINYBLOB "; +" TINYINT "; +" TINYINT SIGNED "; +" TINYINT UNSIGNED "; +" TINYTEXT "; +" TO "; +" TO_BASE64 "; +" toColumnTypeName "; +" toDate "; +" toDate32 "; +" toDate32OrNull "; +" toDate32OrZero "; +" toDateOrNull "; +" toDateOrZero "; +" toDateTime "; +" toDateTime32 "; +" toDateTime64 "; +" toDateTime64OrNull "; +" toDateTime64OrZero "; +" toDateTimeOrNull "; +" toDateTimeOrZero "; +" today "; +" toDayOfMonth "; +" toDayOfWeek "; +" toDayOfYear "; +" toDecimal128 "; +" toDecimal128OrNull "; +" toDecimal128OrZero "; +" toDecimal256 "; +" toDecimal256OrNull "; +" toDecimal256OrZero "; +" toDecimal32 "; +" toDecimal32OrNull "; +" toDecimal32OrZero "; +" toDecimal64 "; +" toDecimal64OrNull "; +" toDecimal64OrZero "; +" TO DISK "; +" toFixedString "; +" toFloat32 "; +" toFloat32OrNull "; +" toFloat32OrZero "; +" toFloat64 "; +" toFloat64OrNull "; +" toFloat64OrZero "; +" toHour "; +" toInt128 "; +" toInt128OrNull "; +" toInt128OrZero "; +" toInt16 "; +" toInt16OrNull "; +" toInt16OrZero "; +" toInt256 "; +" toInt256OrNull "; +" toInt256OrZero "; +" toInt32 "; +" toInt32OrNull "; +" toInt32OrZero "; +" toInt64 "; +" toInt64OrNull "; +" toInt64OrZero "; +" toInt8 "; +" toInt8OrNull "; +" toInt8OrZero "; +" toIntervalDay "; +" toIntervalHour "; +" toIntervalMinute "; +" toIntervalMonth "; +" toIntervalQuarter "; +" toIntervalSecond "; +" toIntervalWeek "; +" toIntervalYear "; +" toIPv4 "; +" toIPv6 "; +" toISOWeek "; +" toISOYear "; +" toJSONString "; +" toLowCardinality "; +" toMinute "; +" toModifiedJulianDay "; +" toModifiedJulianDayOrNull "; +" toMonday "; +" toMonth "; +" toNullable "; +" TOP "; +" topK "; +" topKWeighted "; +" topLevelDomain "; +" toQuarter "; +" toRelativeDayNum "; +" toRelativeHourNum "; +" toRelativeMinuteNum "; +" toRelativeMonthNum "; +" toRelativeQuarterNum "; +" toRelativeSecondNum "; +" toRelativeWeekNum "; +" toRelativeYearNum "; +" toSecond "; +" toStartOfDay "; +" toStartOfFifteenMinutes "; +" toStartOfFiveMinutes "; +" toStartOfHour "; +" toStartOfInterval "; +" toStartOfISOYear "; +" toStartOfMinute "; +" toStartOfMonth "; +" toStartOfQuarter "; +" toStartOfSecond "; +" toStartOfTenMinutes "; +" toStartOfWeek "; +" toStartOfYear "; +" toString "; +" toStringCutToZero "; +" TO TABLE "; +" TOTALS "; +" toTime "; +" toTimezone "; +" toTimeZone "; +" toTypeName "; +" toUInt128 "; +" toUInt128OrNull "; +" toUInt128OrZero "; +" toUInt16 "; +" toUInt16OrNull "; +" toUInt16OrZero "; +" toUInt256 "; +" toUInt256OrNull "; +" toUInt256OrZero "; +" toUInt32 "; +" toUInt32OrNull "; +" toUInt32OrZero "; +" toUInt64 "; +" toUInt64OrNull "; +" toUInt64OrZero "; +" toUInt8 "; +" toUInt8OrNull "; +" toUInt8OrZero "; +" toUnixTimestamp "; +" toUnixTimestamp64Micro "; +" toUnixTimestamp64Milli "; +" toUnixTimestamp64Nano "; +" toUUID "; +" toUUIDOrNull "; +" toUUIDOrZero "; +" toValidUTF8 "; +" TO VOLUME "; +" toWeek "; +" toYear "; +" toYearWeek "; +" toYYYYMM "; +" toYYYYMMDD "; +" toYYYYMMDDhhmmss "; +" TRAILING "; +" transform "; +" TRIM "; +" trimBoth "; +" trimLeft "; +" trimRight "; +" trunc "; +" truncate "; +" TRUNCATE "; +" tryBase64Decode "; +" TTL "; +" tuple "; +" Tuple "; +" tupleElement "; +" tupleHammingDistance "; +" tupleToNameValuePairs "; +" TYPE "; +" ucase "; +" UInt128 "; +" UInt16 "; +" UInt256 "; +" UInt32 "; +" UInt64 "; +" UInt8 "; +" unbin "; +" unhex "; +" UNION "; +" uniq "; +" uniqCombined "; +" uniqCombined64 "; +" uniqExact "; +" uniqHLL12 "; +" uniqTheta "; +" uniqUpTo "; +" UPDATE "; +" upper "; +" upperUTF8 "; +" uptime "; +" URLHash "; +" URLHierarchy "; +" URLPathHierarchy "; +" USE "; +" user "; +" USING "; +" UUID "; +" UUIDNumToString "; +" UUIDStringToNum "; +" validateNestedArraySizes "; +" VALUES "; +" VARCHAR "; +" VARCHAR2 "; +" varPop "; +" VAR_POP "; +" varPopStable "; +" varSamp "; +" VAR_SAMP "; +" varSampStable "; +" version "; +" VIEW "; +" visibleWidth "; +" visitParamExtractBool "; +" visitParamExtractFloat "; +" visitParamExtractInt "; +" visitParamExtractRaw "; +" visitParamExtractString "; +" visitParamExtractUInt "; +" visitParamHas "; +" VOLUME "; +" WATCH "; +" week "; +" WEEK "; +" welchTTest "; +" WHEN "; +" WHERE "; +" windowFunnel "; +" WITH "; +" WITH FILL "; +" WITH TIES "; +" WK "; +" wkt "; +" wordShingleMinHash "; +" wordShingleMinHashArg "; +" wordShingleMinHashArgCaseInsensitive "; +" wordShingleMinHashArgCaseInsensitiveUTF8 "; +" wordShingleMinHashArgUTF8 "; +" wordShingleMinHashCaseInsensitive "; +" wordShingleMinHashCaseInsensitiveUTF8 "; +" wordShingleMinHashUTF8 "; +" wordShingleSimHash "; +" wordShingleSimHashCaseInsensitive "; +" wordShingleSimHashCaseInsensitiveUTF8 "; +" wordShingleSimHashUTF8 "; +" WW "; +" xor "; +" xxHash32 "; +" xxHash64 "; +" kostikConsistentHash "; +" YEAR "; +" yearweek "; +" yesterday "; +" YY "; +" YYYY "; +" zookeeperSessionUptime "; diff --git a/src/Parsers/fuzzers/codegen_fuzzer/out.cpp b/src/Parsers/fuzzers/codegen_fuzzer/out.cpp new file mode 100644 index 00000000000..29168751d71 --- /dev/null +++ b/src/Parsers/fuzzers/codegen_fuzzer/out.cpp @@ -0,0 +1,6461 @@ +#include +#include +#include + +#include + +#include "out.pb.h" + +void GenerateWord(const Word&, std::string&, int); + +void GenerateSentence(const Sentence& stc, std::string &s, int depth) { + for (int i = 0; i < stc.words_size(); i++ ) { + GenerateWord(stc.words(i), s, ++depth); + } +} +void GenerateWord(const Word& word, std::string &s, int depth) { + if (depth > 5) return; + + switch (word.value()) { + case 0: { + s += " "; + break; + } + case 1: { + s += " "; + break; + } + case 2: { + s += " "; + break; + } + case 3: { + s += ";"; + break; + } + case 4: { + s += "("; + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += ")"; + break; + } + case 5: { + s += "("; + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += ", "; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += ")"; + break; + } + case 6: { + s += "("; + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += ", "; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += ", "; + if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); + s += ")"; + break; + } + case 7: { + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += ", "; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + break; + } + case 8: { + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += ", "; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += ", "; + if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); + break; + } + case 9: { + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += ", "; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += ", "; + if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); + s += ", "; + if (word.inner().words_size() > 3) GenerateWord(word.inner().words(3), s, ++depth); + break; + } + case 10: { + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += ", "; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += ", "; + if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); + s += ", "; + if (word.inner().words_size() > 3) GenerateWord(word.inner().words(3), s, ++depth); + s += ", "; + if (word.inner().words_size() > 4) GenerateWord(word.inner().words(4), s, ++depth); + break; + } + case 11: { + s += "["; + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += ", "; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += "]"; + break; + } + case 12: { + s += "["; + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += ", "; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += ", "; + if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); + s += "]"; + break; + } + case 13: { + s += "["; + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += ", "; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += ", "; + if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); + s += ", "; + if (word.inner().words_size() > 3) GenerateWord(word.inner().words(3), s, ++depth); + s += "]"; + break; + } + case 14: { + s += "["; + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += ", "; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += ", "; + if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); + s += ", "; + if (word.inner().words_size() > 3) GenerateWord(word.inner().words(3), s, ++depth); + s += ", "; + if (word.inner().words_size() > 4) GenerateWord(word.inner().words(4), s, ++depth); + s += "]"; + break; + } + case 15: { + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += "("; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += ")"; + break; + } + case 16: { + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += "("; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += ", "; + if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); + s += ")"; + break; + } + case 17: { + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += "("; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += ", "; + if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); + s += ", "; + if (word.inner().words_size() > 3) GenerateWord(word.inner().words(3), s, ++depth); + s += ")"; + break; + } + case 18: { + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += " as "; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + break; + } + case 19: { + s += "SELECT "; + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += " FROM "; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += " WHERE "; + if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); + break; + } + case 20: { + s += "SELECT "; + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += " FROM "; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += " GROUP BY "; + if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); + break; + } + case 21: { + s += "SELECT "; + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += " FROM "; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += " SORT BY "; + if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); + break; + } + case 22: { + s += "SELECT "; + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += " FROM "; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += " LIMIT "; + if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); + break; + } + case 23: { + s += "SELECT "; + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += " FROM "; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += " JOIN "; + if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); + break; + } + case 24: { + s += "SELECT "; + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += " FROM "; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += " ARRAY JOIN "; + if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); + break; + } + case 25: { + s += "SELECT "; + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += " FROM "; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += " JOIN "; + if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); + s += " ON "; + if (word.inner().words_size() > 3) GenerateWord(word.inner().words(3), s, ++depth); + break; + } + case 26: { + s += "SELECT "; + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += " FROM "; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += " JOIN "; + if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); + s += " USING "; + if (word.inner().words_size() > 3) GenerateWord(word.inner().words(3), s, ++depth); + break; + } + case 27: { + s += "SELECT "; + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += " INTO OUTFILE "; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + break; + } + case 28: { + s += "WITH "; + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += " AS "; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + break; + } + case 29: { + s += "{"; + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += ":"; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += "}"; + break; + } + case 30: { + s += "["; + if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); + s += ","; + if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); + s += "]"; + break; + } + case 31: { + s += "[]"; + break; + } + case 32: { + s += " x "; + break; + } + case 33: { + s += "x"; + break; + } + case 34: { + s += " `x` "; + break; + } + case 35: { + s += "`x`"; + break; + } + case 36: { + s += " \"value\" "; + break; + } + case 37: { + s += "\"value\""; + break; + } + case 38: { + s += " 0 "; + break; + } + case 39: { + s += "0"; + break; + } + case 40: { + s += "1"; + break; + } + case 41: { + s += "2"; + break; + } + case 42: { + s += "123123123123123123"; + break; + } + case 43: { + s += "182374019873401982734091873420923123123123123123"; + break; + } + case 44: { + s += "1e-1"; + break; + } + case 45: { + s += "1.1"; + break; + } + case 46: { + s += "\"\""; + break; + } + case 47: { + s += " '../../../../../../../../../etc/passwd' "; + break; + } + case 48: { + s += "/"; + break; + } + case 49: { + s += "="; + break; + } + case 50: { + s += "=="; + break; + } + case 51: { + s += "!="; + break; + } + case 52: { + s += "<>"; + break; + } + case 53: { + s += "<"; + break; + } + case 54: { + s += "<="; + break; + } + case 55: { + s += ">"; + break; + } + case 56: { + s += ">="; + break; + } + case 57: { + s += "<<"; + break; + } + case 58: { + s += "|<<"; + break; + } + case 59: { + s += "&"; + break; + } + case 60: { + s += "|"; + break; + } + case 61: { + s += "||"; + break; + } + case 62: { + s += "<|"; + break; + } + case 63: { + s += "|>"; + break; + } + case 64: { + s += "+"; + break; + } + case 65: { + s += "-"; + break; + } + case 66: { + s += "~"; + break; + } + case 67: { + s += "*"; + break; + } + case 68: { + s += "/"; + break; + } + case 69: { + s += "\\"; + break; + } + case 70: { + s += "%"; + break; + } + case 71: { + s += ""; + break; + } + case 72: { + s += "."; + break; + } + case 73: { + s += ","; + break; + } + case 74: { + s += ","; + break; + } + case 75: { + s += ","; + break; + } + case 76: { + s += ","; + break; + } + case 77: { + s += ","; + break; + } + case 78: { + s += ","; + break; + } + case 79: { + s += "("; + break; + } + case 80: { + s += ")"; + break; + } + case 81: { + s += "("; + break; + } + case 82: { + s += ")"; + break; + } + case 83: { + s += "("; + break; + } + case 84: { + s += ")"; + break; + } + case 85: { + s += "("; + break; + } + case 86: { + s += ")"; + break; + } + case 87: { + s += "("; + break; + } + case 88: { + s += ")"; + break; + } + case 89: { + s += "("; + break; + } + case 90: { + s += ")"; + break; + } + case 91: { + s += "?"; + break; + } + case 92: { + s += ":"; + break; + } + case 93: { + s += "@"; + break; + } + case 94: { + s += "@@"; + break; + } + case 95: { + s += "$"; + break; + } + case 96: { + s += "\""; + break; + } + case 97: { + s += "`"; + break; + } + case 98: { + s += "{"; + break; + } + case 99: { + s += "}"; + break; + } + case 100: { + s += "^"; + break; + } + case 101: { + s += "::"; + break; + } + case 102: { + s += "->"; + break; + } + case 103: { + s += "]"; + break; + } + case 104: { + s += "["; + break; + } + case 105: { + s += " abs "; + break; + } + case 106: { + s += " accurate_Cast "; + break; + } + case 107: { + s += " accurateCast "; + break; + } + case 108: { + s += " accurate_CastOrNull "; + break; + } + case 109: { + s += " accurateCastOrNull "; + break; + } + case 110: { + s += " acos "; + break; + } + case 111: { + s += " acosh "; + break; + } + case 112: { + s += " ADD "; + break; + } + case 113: { + s += " ADD COLUMN "; + break; + } + case 114: { + s += " ADD CONSTRAINT "; + break; + } + case 115: { + s += " addDays "; + break; + } + case 116: { + s += " addHours "; + break; + } + case 117: { + s += " ADD INDEX "; + break; + } + case 118: { + s += " addMinutes "; + break; + } + case 119: { + s += " addMonths "; + break; + } + case 120: { + s += " addQuarters "; + break; + } + case 121: { + s += " addressToLine "; + break; + } + case 122: { + s += " addressToSymbol "; + break; + } + case 123: { + s += " addSeconds "; + break; + } + case 124: { + s += " addWeeks "; + break; + } + case 125: { + s += " addYears "; + break; + } + case 126: { + s += " aes_decrypt_mysql "; + break; + } + case 127: { + s += " aes_encrypt_mysql "; + break; + } + case 128: { + s += " AFTER "; + break; + } + case 129: { + s += " AggregateFunction "; + break; + } + case 130: { + s += " aggThrow "; + break; + } + case 131: { + s += " ALIAS "; + break; + } + case 132: { + s += " ALL "; + break; + } + case 133: { + s += " alphaTokens "; + break; + } + case 134: { + s += " ALTER "; + break; + } + case 135: { + s += " ALTER LIVE VIEW "; + break; + } + case 136: { + s += " ALTER TABLE "; + break; + } + case 137: { + s += " and "; + break; + } + case 138: { + s += " AND "; + break; + } + case 139: { + s += " ANTI "; + break; + } + case 140: { + s += " any "; + break; + } + case 141: { + s += " ANY "; + break; + } + case 142: { + s += " anyHeavy "; + break; + } + case 143: { + s += " anyLast "; + break; + } + case 144: { + s += " appendTrailingCharIfAbsent "; + break; + } + case 145: { + s += " argMax "; + break; + } + case 146: { + s += " argMin "; + break; + } + case 147: { + s += " array "; + break; + } + case 148: { + s += " Array "; + break; + } + case 149: { + s += " ARRAY "; + break; + } + case 150: { + s += " arrayAll "; + break; + } + case 151: { + s += " arrayAUC "; + break; + } + case 152: { + s += " arrayAvg "; + break; + } + case 153: { + s += " arrayCompact "; + break; + } + case 154: { + s += " arrayConcat "; + break; + } + case 155: { + s += " arrayCount "; + break; + } + case 156: { + s += " arrayCumSum "; + break; + } + case 157: { + s += " arrayCumSumNonNegative "; + break; + } + case 158: { + s += " arrayDifference "; + break; + } + case 159: { + s += " arrayDistinct "; + break; + } + case 160: { + s += " arrayElement "; + break; + } + case 161: { + s += " arrayEnumerate "; + break; + } + case 162: { + s += " arrayEnumerateDense "; + break; + } + case 163: { + s += " arrayEnumerateDenseRanked "; + break; + } + case 164: { + s += " arrayEnumerateUniq "; + break; + } + case 165: { + s += " arrayEnumerateUniqRanked "; + break; + } + case 166: { + s += " arrayExists "; + break; + } + case 167: { + s += " arrayFill "; + break; + } + case 168: { + s += " arrayFilter "; + break; + } + case 169: { + s += " arrayFirst "; + break; + } + case 170: { + s += " arrayFirstIndex "; + break; + } + case 171: { + s += " arrayFlatten "; + break; + } + case 172: { + s += " arrayIntersect "; + break; + } + case 173: { + s += " arrayJoin "; + break; + } + case 174: { + s += " ARRAY JOIN "; + break; + } + case 175: { + s += " arrayMap "; + break; + } + case 176: { + s += " arrayMax "; + break; + } + case 177: { + s += " arrayMin "; + break; + } + case 178: { + s += " arrayPartialReverseSort "; + break; + } + case 179: { + s += " arrayPartialShuffle "; + break; + } + case 180: { + s += " arrayPartialSort "; + break; + } + case 181: { + s += " arrayPopBack "; + break; + } + case 182: { + s += " arrayPopFront "; + break; + } + case 183: { + s += " arrayProduct "; + break; + } + case 184: { + s += " arrayPushBack "; + break; + } + case 185: { + s += " arrayPushFront "; + break; + } + case 186: { + s += " arrayReduce "; + break; + } + case 187: { + s += " arrayReduceInRanges "; + break; + } + case 188: { + s += " arrayResize "; + break; + } + case 189: { + s += " arrayReverse "; + break; + } + case 190: { + s += " arrayReverseFill "; + break; + } + case 191: { + s += " arrayReverseSort "; + break; + } + case 192: { + s += " arrayReverseSplit "; + break; + } + case 193: { + s += " arrayShuffle "; + break; + } + case 194: { + s += " arraySlice "; + break; + } + case 195: { + s += " arraySort "; + break; + } + case 196: { + s += " arraySplit "; + break; + } + case 197: { + s += " arrayStringConcat "; + break; + } + case 198: { + s += " arraySum "; + break; + } + case 199: { + s += " arrayUniq "; + break; + } + case 200: { + s += " arrayWithConstant "; + break; + } + case 201: { + s += " arrayZip "; + break; + } + case 202: { + s += " AS "; + break; + } + case 203: { + s += " ASC "; + break; + } + case 204: { + s += " ASCENDING "; + break; + } + case 205: { + s += " asin "; + break; + } + case 206: { + s += " asinh "; + break; + } + case 207: { + s += " ASOF "; + break; + } + case 208: { + s += " assumeNotNull "; + break; + } + case 209: { + s += " AST "; + break; + } + case 210: { + s += " ASYNC "; + break; + } + case 211: { + s += " atan "; + break; + } + case 212: { + s += " atan2 "; + break; + } + case 213: { + s += " atanh "; + break; + } + case 214: { + s += " ATTACH "; + break; + } + case 215: { + s += " ATTACH PART "; + break; + } + case 216: { + s += " ATTACH PARTITION "; + break; + } + case 217: { + s += " avg "; + break; + } + case 218: { + s += " avgWeighted "; + break; + } + case 219: { + s += " bar "; + break; + } + case 220: { + s += " base64Decode "; + break; + } + case 221: { + s += " base64Encode "; + break; + } + case 222: { + s += " basename "; + break; + } + case 223: { + s += " bayesAB "; + break; + } + case 224: { + s += " BETWEEN "; + break; + } + case 225: { + s += " BIGINT "; + break; + } + case 226: { + s += " BIGINT SIGNED "; + break; + } + case 227: { + s += " BIGINT UNSIGNED "; + break; + } + case 228: { + s += " bin "; + break; + } + case 229: { + s += " BINARY "; + break; + } + case 230: { + s += " BINARY LARGE OBJECT "; + break; + } + case 231: { + s += " BINARY VARYING "; + break; + } + case 232: { + s += " bitAnd "; + break; + } + case 233: { + s += " BIT_AND "; + break; + } + case 234: { + s += " __bitBoolMaskAnd "; + break; + } + case 235: { + s += " __bitBoolMaskOr "; + break; + } + case 236: { + s += " bitCount "; + break; + } + case 237: { + s += " bitHammingDistance "; + break; + } + case 238: { + s += " bitmapAnd "; + break; + } + case 239: { + s += " bitmapAndCardinality "; + break; + } + case 240: { + s += " bitmapAndnot "; + break; + } + case 241: { + s += " bitmapAndnotCardinality "; + break; + } + case 242: { + s += " bitmapBuild "; + break; + } + case 243: { + s += " bitmapCardinality "; + break; + } + case 244: { + s += " bitmapContains "; + break; + } + case 245: { + s += " bitmapHasAll "; + break; + } + case 246: { + s += " bitmapHasAny "; + break; + } + case 247: { + s += " bitmapMax "; + break; + } + case 248: { + s += " bitmapMin "; + break; + } + case 249: { + s += " bitmapOr "; + break; + } + case 250: { + s += " bitmapOrCardinality "; + break; + } + case 251: { + s += " bitmapSubsetInRange "; + break; + } + case 252: { + s += " bitmapSubsetLimit "; + break; + } + case 253: { + s += " bitmapToArray "; + break; + } + case 254: { + s += " bitmapTransform "; + break; + } + case 255: { + s += " bitmapXor "; + break; + } + case 256: { + s += " bitmapXorCardinality "; + break; + } + case 257: { + s += " bitmaskToArray "; + break; + } + case 258: { + s += " bitmaskToList "; + break; + } + case 259: { + s += " bitNot "; + break; + } + case 260: { + s += " bitOr "; + break; + } + case 261: { + s += " BIT_OR "; + break; + } + case 262: { + s += " bitPositionsToArray "; + break; + } + case 263: { + s += " bitRotateLeft "; + break; + } + case 264: { + s += " bitRotateRight "; + break; + } + case 265: { + s += " bitShiftLeft "; + break; + } + case 266: { + s += " bitShiftRight "; + break; + } + case 267: { + s += " __bitSwapLastTwo "; + break; + } + case 268: { + s += " bitTest "; + break; + } + case 269: { + s += " bitTestAll "; + break; + } + case 270: { + s += " bitTestAny "; + break; + } + case 271: { + s += " __bitWrapperFunc "; + break; + } + case 272: { + s += " bitXor "; + break; + } + case 273: { + s += " BIT_XOR "; + break; + } + case 274: { + s += " BLOB "; + break; + } + case 275: { + s += " blockNumber "; + break; + } + case 276: { + s += " blockSerializedSize "; + break; + } + case 277: { + s += " blockSize "; + break; + } + case 278: { + s += " BOOL "; + break; + } + case 279: { + s += " BOOLEAN "; + break; + } + case 280: { + s += " BOTH "; + break; + } + case 281: { + s += " boundingRatio "; + break; + } + case 282: { + s += " buildId "; + break; + } + case 283: { + s += " BY "; + break; + } + case 284: { + s += " BYTE "; + break; + } + case 285: { + s += " BYTEA "; + break; + } + case 286: { + s += " byteSize "; + break; + } + case 287: { + s += " CASE "; + break; + } + case 288: { + s += " caseWithExpr "; + break; + } + case 289: { + s += " caseWithExpression "; + break; + } + case 290: { + s += " caseWithoutExpr "; + break; + } + case 291: { + s += " caseWithoutExpression "; + break; + } + case 292: { + s += " _CAST "; + break; + } + case 293: { + s += " CAST "; + break; + } + case 294: { + s += " categoricalInformationValue "; + break; + } + case 295: { + s += " cbrt "; + break; + } + case 296: { + s += " ceil "; + break; + } + case 297: { + s += " ceiling "; + break; + } + case 298: { + s += " char "; + break; + } + case 299: { + s += " CHAR "; + break; + } + case 300: { + s += " CHARACTER "; + break; + } + case 301: { + s += " CHARACTER LARGE OBJECT "; + break; + } + case 302: { + s += " CHARACTER_LENGTH "; + break; + } + case 303: { + s += " CHARACTER VARYING "; + break; + } + case 304: { + s += " CHAR LARGE OBJECT "; + break; + } + case 305: { + s += " CHAR_LENGTH "; + break; + } + case 306: { + s += " CHAR VARYING "; + break; + } + case 307: { + s += " CHECK "; + break; + } + case 308: { + s += " CHECK TABLE "; + break; + } + case 309: { + s += " cityHash64 "; + break; + } + case 310: { + s += " CLEAR "; + break; + } + case 311: { + s += " CLEAR COLUMN "; + break; + } + case 312: { + s += " CLEAR INDEX "; + break; + } + case 313: { + s += " CLOB "; + break; + } + case 314: { + s += " CLUSTER "; + break; + } + case 315: { + s += " coalesce "; + break; + } + case 316: { + s += " CODEC "; + break; + } + case 317: { + s += " COLLATE "; + break; + } + case 318: { + s += " COLUMN "; + break; + } + case 319: { + s += " COLUMNS "; + break; + } + case 320: { + s += " COMMENT "; + break; + } + case 321: { + s += " COMMENT COLUMN "; + break; + } + case 322: { + s += " concat "; + break; + } + case 323: { + s += " concatAssumeInjective "; + break; + } + case 324: { + s += " connection_id "; + break; + } + case 325: { + s += " connectionid "; + break; + } + case 326: { + s += " connectionId "; + break; + } + case 327: { + s += " CONSTRAINT "; + break; + } + case 328: { + s += " convertCharset "; + break; + } + case 329: { + s += " corr "; + break; + } + case 330: { + s += " corrStable "; + break; + } + case 331: { + s += " cos "; + break; + } + case 332: { + s += " cosh "; + break; + } + case 333: { + s += " count "; + break; + } + case 334: { + s += " countDigits "; + break; + } + case 335: { + s += " countEqual "; + break; + } + case 336: { + s += " countMatches "; + break; + } + case 337: { + s += " countMatchesCaseInsensitive "; + break; + } + case 338: { + s += " countSubstrings "; + break; + } + case 339: { + s += " countSubstringsCaseInsensitive "; + break; + } + case 340: { + s += " countSubstringsCaseInsensitiveUTF8 "; + break; + } + case 341: { + s += " covarPop "; + break; + } + case 342: { + s += " COVAR_POP "; + break; + } + case 343: { + s += " covarPopStable "; + break; + } + case 344: { + s += " covarSamp "; + break; + } + case 345: { + s += " COVAR_SAMP "; + break; + } + case 346: { + s += " covarSampStable "; + break; + } + case 347: { + s += " CRC32 "; + break; + } + case 348: { + s += " CRC32IEEE "; + break; + } + case 349: { + s += " CRC64 "; + break; + } + case 350: { + s += " CREATE "; + break; + } + case 351: { + s += " CROSS "; + break; + } + case 352: { + s += " CUBE "; + break; + } + case 353: { + s += " currentDatabase "; + break; + } + case 354: { + s += " currentProfiles "; + break; + } + case 355: { + s += " currentRoles "; + break; + } + case 356: { + s += " currentUser "; + break; + } + case 357: { + s += " cutFragment "; + break; + } + case 358: { + s += " cutIPv6 "; + break; + } + case 359: { + s += " cutQueryString "; + break; + } + case 360: { + s += " cutQueryStringAndFragment "; + break; + } + case 361: { + s += " cutToFirstSignificantSubdomain "; + break; + } + case 362: { + s += " cutToFirstSignificantSubdomainCustom "; + break; + } + case 363: { + s += " cutToFirstSignificantSubdomainCustomWithWWW "; + break; + } + case 364: { + s += " cutToFirstSignificantSubdomainWithWWW "; + break; + } + case 365: { + s += " cutURLParameter "; + break; + } + case 366: { + s += " cutWWW "; + break; + } + case 367: { + s += " D "; + break; + } + case 368: { + s += " DATABASE "; + break; + } + case 369: { + s += " DATABASES "; + break; + } + case 370: { + s += " Date "; + break; + } + case 371: { + s += " DATE "; + break; + } + case 372: { + s += " Date32 "; + break; + } + case 373: { + s += " DATE_ADD "; + break; + } + case 374: { + s += " DATEADD "; + break; + } + case 375: { + s += " dateDiff "; + break; + } + case 376: { + s += " DATE_DIFF "; + break; + } + case 377: { + s += " DATEDIFF "; + break; + } + case 378: { + s += " dateName "; + break; + } + case 379: { + s += " DATE_SUB "; + break; + } + case 380: { + s += " DATESUB "; + break; + } + case 381: { + s += " DateTime "; + break; + } + case 382: { + s += " DateTime32 "; + break; + } + case 383: { + s += " DateTime64 "; + break; + } + case 384: { + s += " dateTime64ToSnowflake "; + break; + } + case 385: { + s += " dateTimeToSnowflake "; + break; + } + case 386: { + s += " date_trunc "; + break; + } + case 387: { + s += " dateTrunc "; + break; + } + case 388: { + s += " DAY "; + break; + } + case 389: { + s += " DAYOFMONTH "; + break; + } + case 390: { + s += " DAYOFWEEK "; + break; + } + case 391: { + s += " DAYOFYEAR "; + break; + } + case 392: { + s += " DD "; + break; + } + case 393: { + s += " DEC "; + break; + } + case 394: { + s += " Decimal "; + break; + } + case 395: { + s += " Decimal128 "; + break; + } + case 396: { + s += " Decimal256 "; + break; + } + case 397: { + s += " Decimal32 "; + break; + } + case 398: { + s += " Decimal64 "; + break; + } + case 399: { + s += " decodeURLComponent "; + break; + } + case 400: { + s += " decodeXMLComponent "; + break; + } + case 401: { + s += " decrypt "; + break; + } + case 402: { + s += " DEDUPLICATE "; + break; + } + case 403: { + s += " DEFAULT "; + break; + } + case 404: { + s += " defaultProfiles "; + break; + } + case 405: { + s += " defaultRoles "; + break; + } + case 406: { + s += " defaultValueOfArgumentType "; + break; + } + case 407: { + s += " defaultValueOfTypeName "; + break; + } + case 408: { + s += " DELAY "; + break; + } + case 409: { + s += " DELETE "; + break; + } + case 410: { + s += " DELETE WHERE "; + break; + } + case 411: { + s += " deltaSum "; + break; + } + case 412: { + s += " deltaSumTimestamp "; + break; + } + case 413: { + s += " demangle "; + break; + } + case 414: { + s += " dense_rank "; + break; + } + case 415: { + s += " DESC "; + break; + } + case 416: { + s += " DESCENDING "; + break; + } + case 417: { + s += " DESCRIBE "; + break; + } + case 418: { + s += " DETACH "; + break; + } + case 419: { + s += " DETACH PARTITION "; + break; + } + case 420: { + s += " dictGet "; + break; + } + case 421: { + s += " dictGetChildren "; + break; + } + case 422: { + s += " dictGetDate "; + break; + } + case 423: { + s += " dictGetDateOrDefault "; + break; + } + case 424: { + s += " dictGetDateTime "; + break; + } + case 425: { + s += " dictGetDateTimeOrDefault "; + break; + } + case 426: { + s += " dictGetDescendants "; + break; + } + case 427: { + s += " dictGetFloat32 "; + break; + } + case 428: { + s += " dictGetFloat32OrDefault "; + break; + } + case 429: { + s += " dictGetFloat64 "; + break; + } + case 430: { + s += " dictGetFloat64OrDefault "; + break; + } + case 431: { + s += " dictGetHierarchy "; + break; + } + case 432: { + s += " dictGetInt16 "; + break; + } + case 433: { + s += " dictGetInt16OrDefault "; + break; + } + case 434: { + s += " dictGetInt32 "; + break; + } + case 435: { + s += " dictGetInt32OrDefault "; + break; + } + case 436: { + s += " dictGetInt64 "; + break; + } + case 437: { + s += " dictGetInt64OrDefault "; + break; + } + case 438: { + s += " dictGetInt8 "; + break; + } + case 439: { + s += " dictGetInt8OrDefault "; + break; + } + case 440: { + s += " dictGetOrDefault "; + break; + } + case 441: { + s += " dictGetOrNull "; + break; + } + case 442: { + s += " dictGetString "; + break; + } + case 443: { + s += " dictGetStringOrDefault "; + break; + } + case 444: { + s += " dictGetUInt16 "; + break; + } + case 445: { + s += " dictGetUInt16OrDefault "; + break; + } + case 446: { + s += " dictGetUInt32 "; + break; + } + case 447: { + s += " dictGetUInt32OrDefault "; + break; + } + case 448: { + s += " dictGetUInt64 "; + break; + } + case 449: { + s += " dictGetUInt64OrDefault "; + break; + } + case 450: { + s += " dictGetUInt8 "; + break; + } + case 451: { + s += " dictGetUInt8OrDefault "; + break; + } + case 452: { + s += " dictGetUUID "; + break; + } + case 453: { + s += " dictGetUUIDOrDefault "; + break; + } + case 454: { + s += " dictHas "; + break; + } + case 455: { + s += " DICTIONARIES "; + break; + } + case 456: { + s += " DICTIONARY "; + break; + } + case 457: { + s += " dictIsIn "; + break; + } + case 458: { + s += " DISK "; + break; + } + case 459: { + s += " DISTINCT "; + break; + } + case 460: { + s += " DISTRIBUTED "; + break; + } + case 461: { + s += " divide "; + break; + } + case 462: { + s += " domain "; + break; + } + case 463: { + s += " domainWithoutWWW "; + break; + } + case 464: { + s += " DOUBLE "; + break; + } + case 465: { + s += " DOUBLE PRECISION "; + break; + } + case 466: { + s += " DROP "; + break; + } + case 467: { + s += " DROP COLUMN "; + break; + } + case 468: { + s += " DROP CONSTRAINT "; + break; + } + case 469: { + s += " DROP DETACHED PART "; + break; + } + case 470: { + s += " DROP DETACHED PARTITION "; + break; + } + case 471: { + s += " DROP INDEX "; + break; + } + case 472: { + s += " DROP PARTITION "; + break; + } + case 473: { + s += " dumpColumnStructure "; + break; + } + case 474: { + s += " e "; + break; + } + case 475: { + s += " ELSE "; + break; + } + case 476: { + s += " empty "; + break; + } + case 477: { + s += " emptyArrayDate "; + break; + } + case 478: { + s += " emptyArrayDateTime "; + break; + } + case 479: { + s += " emptyArrayFloat32 "; + break; + } + case 480: { + s += " emptyArrayFloat64 "; + break; + } + case 481: { + s += " emptyArrayInt16 "; + break; + } + case 482: { + s += " emptyArrayInt32 "; + break; + } + case 483: { + s += " emptyArrayInt64 "; + break; + } + case 484: { + s += " emptyArrayInt8 "; + break; + } + case 485: { + s += " emptyArrayString "; + break; + } + case 486: { + s += " emptyArrayToSingle "; + break; + } + case 487: { + s += " emptyArrayUInt16 "; + break; + } + case 488: { + s += " emptyArrayUInt32 "; + break; + } + case 489: { + s += " emptyArrayUInt64 "; + break; + } + case 490: { + s += " emptyArrayUInt8 "; + break; + } + case 491: { + s += " enabledProfiles "; + break; + } + case 492: { + s += " enabledRoles "; + break; + } + case 493: { + s += " encodeXMLComponent "; + break; + } + case 494: { + s += " encrypt "; + break; + } + case 495: { + s += " END "; + break; + } + case 496: { + s += " endsWith "; + break; + } + case 497: { + s += " ENGINE "; + break; + } + case 498: { + s += " entropy "; + break; + } + case 499: { + s += " Enum "; + break; + } + case 500: { + s += " ENUM "; + break; + } + case 501: { + s += " Enum16 "; + break; + } + case 502: { + s += " Enum8 "; + break; + } + case 503: { + s += " equals "; + break; + } + case 504: { + s += " erf "; + break; + } + case 505: { + s += " erfc "; + break; + } + case 506: { + s += " errorCodeToName "; + break; + } + case 507: { + s += " evalMLMethod "; + break; + } + case 508: { + s += " EVENTS "; + break; + } + case 509: { + s += " EXCHANGE TABLES "; + break; + } + case 510: { + s += " EXISTS "; + break; + } + case 511: { + s += " exp "; + break; + } + case 512: { + s += " exp10 "; + break; + } + case 513: { + s += " exp2 "; + break; + } + case 514: { + s += " EXPLAIN "; + break; + } + case 515: { + s += " exponentialMovingAverage "; + break; + } + case 516: { + s += " EXPRESSION "; + break; + } + case 517: { + s += " extract "; + break; + } + case 518: { + s += " EXTRACT "; + break; + } + case 519: { + s += " extractAll "; + break; + } + case 520: { + s += " extractAllGroups "; + break; + } + case 521: { + s += " extractAllGroupsHorizontal "; + break; + } + case 522: { + s += " extractAllGroupsVertical "; + break; + } + case 523: { + s += " extractGroups "; + break; + } + case 524: { + s += " extractTextFromHTML "; + break; + } + case 525: { + s += " extractURLParameter "; + break; + } + case 526: { + s += " extractURLParameterNames "; + break; + } + case 527: { + s += " extractURLParameters "; + break; + } + case 528: { + s += " farmFingerprint64 "; + break; + } + case 529: { + s += " farmHash64 "; + break; + } + case 530: { + s += " FETCHES "; + break; + } + case 531: { + s += " FETCH PART "; + break; + } + case 532: { + s += " FETCH PARTITION "; + break; + } + case 533: { + s += " file "; + break; + } + case 534: { + s += " filesystemAvailable "; + break; + } + case 535: { + s += " filesystemCapacity "; + break; + } + case 536: { + s += " filesystemFree "; + break; + } + case 537: { + s += " FINAL "; + break; + } + case 538: { + s += " finalizeAggregation "; + break; + } + case 539: { + s += " FIRST "; + break; + } + case 540: { + s += " firstSignificantSubdomain "; + break; + } + case 541: { + s += " firstSignificantSubdomainCustom "; + break; + } + case 542: { + s += " first_value "; + break; + } + case 543: { + s += " FIXED "; + break; + } + case 544: { + s += " FixedString "; + break; + } + case 545: { + s += " flatten "; + break; + } + case 546: { + s += " FLOAT "; + break; + } + case 547: { + s += " Float32 "; + break; + } + case 548: { + s += " Float64 "; + break; + } + case 549: { + s += " floor "; + break; + } + case 550: { + s += " FLUSH "; + break; + } + case 551: { + s += " FOR "; + break; + } + case 552: { + s += " ForEach "; + break; + } + case 553: { + s += " format "; + break; + } + case 554: { + s += " FORMAT "; + break; + } + case 555: { + s += " formatDateTime "; + break; + } + case 556: { + s += " formatReadableQuantity "; + break; + } + case 557: { + s += " formatReadableDecimalSize "; + break; + } + case 558: { + s += " formatReadableSize "; + break; + } + case 559: { + s += " formatReadableTimeDelta "; + break; + } + case 560: { + s += " formatRow "; + break; + } + case 561: { + s += " formatRowNoNewline "; + break; + } + case 562: { + s += " FQDN "; + break; + } + case 563: { + s += " fragment "; + break; + } + case 564: { + s += " FREEZE "; + break; + } + case 565: { + s += " FROM "; + break; + } + case 566: { + s += " FROM_BASE64 "; + break; + } + case 567: { + s += " fromModifiedJulianDay "; + break; + } + case 568: { + s += " fromModifiedJulianDayOrNull "; + break; + } + case 569: { + s += " FROM_UNIXTIME "; + break; + } + case 570: { + s += " fromUnixTimestamp "; + break; + } + case 571: { + s += " fromUnixTimestamp64Micro "; + break; + } + case 572: { + s += " fromUnixTimestamp64Milli "; + break; + } + case 573: { + s += " fromUnixTimestamp64Nano "; + break; + } + case 574: { + s += " FULL "; + break; + } + case 575: { + s += " fullHostName "; + break; + } + case 576: { + s += " FUNCTION "; + break; + } + case 577: { + s += " fuzzBits "; + break; + } + case 578: { + s += " gccMurmurHash "; + break; + } + case 579: { + s += " gcd "; + break; + } + case 580: { + s += " generateUUIDv4 "; + break; + } + case 581: { + s += " geoDistance "; + break; + } + case 582: { + s += " geohashDecode "; + break; + } + case 583: { + s += " geohashEncode "; + break; + } + case 584: { + s += " geohashesInBox "; + break; + } + case 585: { + s += " geoToH3 "; + break; + } + case 586: { + s += " geoToS2 "; + break; + } + case 587: { + s += " getMacro "; + break; + } + case 588: { + s += " __getScalar "; + break; + } + case 589: { + s += " getServerPort "; + break; + } + case 590: { + s += " getSetting "; + break; + } + case 591: { + s += " getSizeOfEnumType "; + break; + } + case 592: { + s += " GLOBAL "; + break; + } + case 593: { + s += " globalIn "; + break; + } + case 594: { + s += " globalInIgnoreSet "; + break; + } + case 595: { + s += " globalNotIn "; + break; + } + case 596: { + s += " globalNotInIgnoreSet "; + break; + } + case 597: { + s += " globalNotNullIn "; + break; + } + case 598: { + s += " globalNotNullInIgnoreSet "; + break; + } + case 599: { + s += " globalNullIn "; + break; + } + case 600: { + s += " globalNullInIgnoreSet "; + break; + } + case 601: { + s += " globalVariable "; + break; + } + case 602: { + s += " GRANULARITY "; + break; + } + case 603: { + s += " greatCircleAngle "; + break; + } + case 604: { + s += " greatCircleDistance "; + break; + } + case 605: { + s += " greater "; + break; + } + case 606: { + s += " greaterOrEquals "; + break; + } + case 607: { + s += " greatest "; + break; + } + case 608: { + s += " GROUP "; + break; + } + case 609: { + s += " groupArray "; + break; + } + case 610: { + s += " groupArrayInsertAt "; + break; + } + case 611: { + s += " groupArrayMovingAvg "; + break; + } + case 612: { + s += " groupArrayMovingSum "; + break; + } + case 613: { + s += " groupArraySample "; + break; + } + case 614: { + s += " groupBitAnd "; + break; + } + case 615: { + s += " groupBitmap "; + break; + } + case 616: { + s += " groupBitmapAnd "; + break; + } + case 617: { + s += " groupBitmapOr "; + break; + } + case 618: { + s += " groupBitmapXor "; + break; + } + case 619: { + s += " groupBitOr "; + break; + } + case 620: { + s += " groupBitXor "; + break; + } + case 621: { + s += " GROUP BY "; + break; + } + case 622: { + s += " groupUniqArray "; + break; + } + case 623: { + s += " h3EdgeAngle "; + break; + } + case 624: { + s += " h3EdgeLengthM "; + break; + } + case 625: { + s += " h3GetBaseCell "; + break; + } + case 626: { + s += " h3GetFaces "; + break; + } + case 627: { + s += " h3GetResolution "; + break; + } + case 628: { + s += " h3HexAreaM2 "; + break; + } + case 629: { + s += " h3IndexesAreNeighbors "; + break; + } + case 630: { + s += " h3IsPentagon "; + break; + } + case 631: { + s += " h3IsResClassIII "; + break; + } + case 632: { + s += " h3IsValid "; + break; + } + case 633: { + s += " h3kRing "; + break; + } + case 634: { + s += " h3ToChildren "; + break; + } + case 635: { + s += " h3ToGeo "; + break; + } + case 636: { + s += " h3ToGeoBoundary "; + break; + } + case 637: { + s += " h3ToParent "; + break; + } + case 638: { + s += " h3ToString "; + break; + } + case 639: { + s += " halfMD5 "; + break; + } + case 640: { + s += " has "; + break; + } + case 641: { + s += " hasAll "; + break; + } + case 642: { + s += " hasAny "; + break; + } + case 643: { + s += " hasColumnInTable "; + break; + } + case 644: { + s += " hasSubstr "; + break; + } + case 645: { + s += " hasThreadFuzzer "; + break; + } + case 646: { + s += " hasToken "; + break; + } + case 647: { + s += " hasTokenCaseInsensitive "; + break; + } + case 648: { + s += " HAVING "; + break; + } + case 649: { + s += " hex "; + break; + } + case 650: { + s += " HH "; + break; + } + case 651: { + s += " HIERARCHICAL "; + break; + } + case 652: { + s += " histogram "; + break; + } + case 653: { + s += " hiveHash "; + break; + } + case 654: { + s += " hostname "; + break; + } + case 655: { + s += " hostName "; + break; + } + case 656: { + s += " HOUR "; + break; + } + case 657: { + s += " hypot "; + break; + } + case 658: { + s += " ID "; + break; + } + case 659: { + s += " identity "; + break; + } + case 660: { + s += " if "; + break; + } + case 661: { + s += " IF "; + break; + } + case 662: { + s += " IF EXISTS "; + break; + } + case 663: { + s += " IF NOT EXISTS "; + break; + } + case 664: { + s += " ifNotFinite "; + break; + } + case 665: { + s += " ifNull "; + break; + } + case 666: { + s += " ignore "; + break; + } + case 667: { + s += " ilike "; + break; + } + case 668: { + s += " ILIKE "; + break; + } + case 669: { + s += " in "; + break; + } + case 670: { + s += " IN "; + break; + } + case 671: { + s += " INDEX "; + break; + } + case 672: { + s += " indexHint "; + break; + } + case 673: { + s += " indexOf "; + break; + } + case 674: { + s += " INET4 "; + break; + } + case 675: { + s += " INET6 "; + break; + } + case 676: { + s += " INET6_ATON "; + break; + } + case 677: { + s += " INET6_NTOA "; + break; + } + case 678: { + s += " INET_ATON "; + break; + } + case 679: { + s += " INET_NTOA "; + break; + } + case 680: { + s += " INF "; + break; + } + case 681: { + s += " inIgnoreSet "; + break; + } + case 682: { + s += " initializeAggregation "; + break; + } + case 683: { + s += " initial_query_id "; + break; + } + case 684: { + s += " initialQueryID "; + break; + } + case 685: { + s += " INJECTIVE "; + break; + } + case 686: { + s += " INNER "; + break; + } + case 687: { + s += " IN PARTITION "; + break; + } + case 688: { + s += " INSERT "; + break; + } + case 689: { + s += " INSERT INTO "; + break; + } + case 690: { + s += " INT "; + break; + } + case 691: { + s += " INT1 "; + break; + } + case 692: { + s += " Int128 "; + break; + } + case 693: { + s += " Int16 "; + break; + } + case 694: { + s += " INT1 SIGNED "; + break; + } + case 695: { + s += " INT1 UNSIGNED "; + break; + } + case 696: { + s += " Int256 "; + break; + } + case 697: { + s += " Int32 "; + break; + } + case 698: { + s += " Int64 "; + break; + } + case 699: { + s += " Int8 "; + break; + } + case 700: { + s += " intDiv "; + break; + } + case 701: { + s += " intDivOrZero "; + break; + } + case 702: { + s += " INTEGER "; + break; + } + case 703: { + s += " INTEGER SIGNED "; + break; + } + case 704: { + s += " INTEGER UNSIGNED "; + break; + } + case 705: { + s += " INTERVAL "; + break; + } + case 706: { + s += " IntervalDay "; + break; + } + case 707: { + s += " IntervalHour "; + break; + } + case 708: { + s += " intervalLengthSum "; + break; + } + case 709: { + s += " IntervalMinute "; + break; + } + case 710: { + s += " IntervalMonth "; + break; + } + case 711: { + s += " IntervalQuarter "; + break; + } + case 712: { + s += " IntervalSecond "; + break; + } + case 713: { + s += " IntervalWeek "; + break; + } + case 714: { + s += " IntervalYear "; + break; + } + case 715: { + s += " intExp10 "; + break; + } + case 716: { + s += " intExp2 "; + break; + } + case 717: { + s += " intHash32 "; + break; + } + case 718: { + s += " intHash64 "; + break; + } + case 719: { + s += " INTO "; + break; + } + case 720: { + s += " INTO OUTFILE "; + break; + } + case 721: { + s += " INT SIGNED "; + break; + } + case 722: { + s += " INT UNSIGNED "; + break; + } + case 723: { + s += " IPv4 "; + break; + } + case 724: { + s += " IPv4CIDRToRange "; + break; + } + case 725: { + s += " IPv4NumToString "; + break; + } + case 726: { + s += " IPv4NumToStringClassC "; + break; + } + case 727: { + s += " IPv4StringToNum "; + break; + } + case 728: { + s += " IPv4ToIPv6 "; + break; + } + case 729: { + s += " IPv6 "; + break; + } + case 730: { + s += " IPv6CIDRToRange "; + break; + } + case 731: { + s += " IPv6NumToString "; + break; + } + case 732: { + s += " IPv6StringToNum "; + break; + } + case 733: { + s += " IS "; + break; + } + case 734: { + s += " isConstant "; + break; + } + case 735: { + s += " isDecimalOverflow "; + break; + } + case 736: { + s += " isFinite "; + break; + } + case 737: { + s += " isInfinite "; + break; + } + case 738: { + s += " isIPAddressInRange "; + break; + } + case 739: { + s += " isIPv4String "; + break; + } + case 740: { + s += " isIPv6String "; + break; + } + case 741: { + s += " isNaN "; + break; + } + case 742: { + s += " isNotNull "; + break; + } + case 743: { + s += " isNull "; + break; + } + case 744: { + s += " IS_OBJECT_ID "; + break; + } + case 745: { + s += " isValidJSON "; + break; + } + case 746: { + s += " isValidUTF8 "; + break; + } + case 747: { + s += " isZeroOrNull "; + break; + } + case 748: { + s += " javaHash "; + break; + } + case 749: { + s += " javaHashUTF16LE "; + break; + } + case 750: { + s += " JOIN "; + break; + } + case 751: { + s += " joinGet "; + break; + } + case 752: { + s += " joinGetOrNull "; + break; + } + case 753: { + s += " JSON_EXISTS "; + break; + } + case 754: { + s += " JSONExtract "; + break; + } + case 755: { + s += " JSONExtractArrayRaw "; + break; + } + case 756: { + s += " JSONExtractBool "; + break; + } + case 757: { + s += " JSONExtractFloat "; + break; + } + case 758: { + s += " JSONExtractInt "; + break; + } + case 759: { + s += " JSONExtractKeysAndValues "; + break; + } + case 760: { + s += " JSONExtractKeysAndValuesRaw "; + break; + } + case 761: { + s += " JSONExtractKeys "; + break; + } + case 762: { + s += " JSONExtractRaw "; + break; + } + case 763: { + s += " JSONExtractString "; + break; + } + case 764: { + s += " JSONExtractUInt "; + break; + } + case 765: { + s += " JSONHas "; + break; + } + case 766: { + s += " JSONKey "; + break; + } + case 767: { + s += " JSONLength "; + break; + } + case 768: { + s += " JSON_QUERY "; + break; + } + case 769: { + s += " JSONType "; + break; + } + case 770: { + s += " JSON_VALUE "; + break; + } + case 771: { + s += " jumpConsistentHash "; + break; + } + case 772: { + s += " KEY "; + break; + } + case 773: { + s += " KILL "; + break; + } + case 774: { + s += " kurtPop "; + break; + } + case 775: { + s += " kurtSamp "; + break; + } + case 776: { + s += " lagInFrame "; + break; + } + case 777: { + s += " LAST "; + break; + } + case 778: { + s += " last_value "; + break; + } + case 779: { + s += " LAYOUT "; + break; + } + case 780: { + s += " lcase "; + break; + } + case 781: { + s += " lcm "; + break; + } + case 782: { + s += " leadInFrame "; + break; + } + case 783: { + s += " LEADING "; + break; + } + case 784: { + s += " least "; + break; + } + case 785: { + s += " LEFT "; + break; + } + case 786: { + s += " LEFT ARRAY JOIN "; + break; + } + case 787: { + s += " leftPad "; + break; + } + case 788: { + s += " leftPadUTF8 "; + break; + } + case 789: { + s += " lemmatize "; + break; + } + case 790: { + s += " length "; + break; + } + case 791: { + s += " lengthUTF8 "; + break; + } + case 792: { + s += " less "; + break; + } + case 793: { + s += " lessOrEquals "; + break; + } + case 794: { + s += " lgamma "; + break; + } + case 795: { + s += " LIFETIME "; + break; + } + case 796: { + s += " like "; + break; + } + case 797: { + s += " LIKE "; + break; + } + case 798: { + s += " LIMIT "; + break; + } + case 799: { + s += " LIVE "; + break; + } + case 800: { + s += " ln "; + break; + } + case 801: { + s += " LOCAL "; + break; + } + case 802: { + s += " locate "; + break; + } + case 803: { + s += " log "; + break; + } + case 804: { + s += " log10 "; + break; + } + case 805: { + s += " log1p "; + break; + } + case 806: { + s += " log2 "; + break; + } + case 807: { + s += " LOGS "; + break; + } + case 808: { + s += " logTrace "; + break; + } + case 809: { + s += " LONGBLOB "; + break; + } + case 810: { + s += " LONGTEXT "; + break; + } + case 811: { + s += " LowCardinality "; + break; + } + case 812: { + s += " lowCardinalityIndices "; + break; + } + case 813: { + s += " lowCardinalityKeys "; + break; + } + case 814: { + s += " lower "; + break; + } + case 815: { + s += " lowerUTF8 "; + break; + } + case 816: { + s += " lpad "; + break; + } + case 817: { + s += " LTRIM "; + break; + } + case 818: { + s += " M "; + break; + } + case 819: { + s += " MACNumToString "; + break; + } + case 820: { + s += " MACStringToNum "; + break; + } + case 821: { + s += " MACStringToOUI "; + break; + } + case 822: { + s += " mannWhitneyUTest "; + break; + } + case 823: { + s += " map "; + break; + } + case 824: { + s += " Map "; + break; + } + case 825: { + s += " mapAdd "; + break; + } + case 826: { + s += " mapContains "; + break; + } + case 827: { + s += " mapKeys "; + break; + } + case 828: { + s += " mapPopulateSeries "; + break; + } + case 829: { + s += " mapSubtract "; + break; + } + case 830: { + s += " mapValues "; + break; + } + case 831: { + s += " match "; + break; + } + case 832: { + s += " materialize "; + break; + } + case 833: { + s += " MATERIALIZE "; + break; + } + case 834: { + s += " MATERIALIZED "; + break; + } + case 835: { + s += " MATERIALIZE INDEX "; + break; + } + case 836: { + s += " MATERIALIZE TTL "; + break; + } + case 837: { + s += " max "; + break; + } + case 838: { + s += " MAX "; + break; + } + case 839: { + s += " maxIntersections "; + break; + } + case 840: { + s += " maxIntersectionsPosition "; + break; + } + case 841: { + s += " maxMap "; + break; + } + case 842: { + s += " MD4 "; + break; + } + case 843: { + s += " MD5 "; + break; + } + case 844: { + s += " median "; + break; + } + case 845: { + s += " medianBFloat16 "; + break; + } + case 846: { + s += " medianBFloat16Weighted "; + break; + } + case 847: { + s += " medianDeterministic "; + break; + } + case 848: { + s += " medianExact "; + break; + } + case 849: { + s += " medianExactHigh "; + break; + } + case 850: { + s += " medianExactLow "; + break; + } + case 851: { + s += " medianExactWeighted "; + break; + } + case 852: { + s += " medianTDigest "; + break; + } + case 853: { + s += " medianTDigestWeighted "; + break; + } + case 854: { + s += " medianTiming "; + break; + } + case 855: { + s += " medianTimingWeighted "; + break; + } + case 856: { + s += " MEDIUMBLOB "; + break; + } + case 857: { + s += " MEDIUMINT "; + break; + } + case 858: { + s += " MEDIUMINT SIGNED "; + break; + } + case 859: { + s += " MEDIUMINT UNSIGNED "; + break; + } + case 860: { + s += " MEDIUMTEXT "; + break; + } + case 861: { + s += " Merge "; + break; + } + case 862: { + s += " MERGES "; + break; + } + case 863: { + s += " metroHash64 "; + break; + } + case 864: { + s += " MI "; + break; + } + case 865: { + s += " mid "; + break; + } + case 866: { + s += " min "; + break; + } + case 867: { + s += " MIN "; + break; + } + case 868: { + s += " minMap "; + break; + } + case 869: { + s += " minus "; + break; + } + case 870: { + s += " MINUTE "; + break; + } + case 871: { + s += " MM "; + break; + } + case 872: { + s += " mod "; + break; + } + case 873: { + s += " MODIFY "; + break; + } + case 874: { + s += " MODIFY COLUMN "; + break; + } + case 875: { + s += " MODIFY ORDER BY "; + break; + } + case 876: { + s += " MODIFY QUERY "; + break; + } + case 877: { + s += " MODIFY SETTING "; + break; + } + case 878: { + s += " MODIFY TTL "; + break; + } + case 879: { + s += " modulo "; + break; + } + case 880: { + s += " moduloLegacy "; + break; + } + case 881: { + s += " moduloOrZero "; + break; + } + case 882: { + s += " MONTH "; + break; + } + case 883: { + s += " MOVE "; + break; + } + case 884: { + s += " MOVE PART "; + break; + } + case 885: { + s += " MOVE PARTITION "; + break; + } + case 886: { + s += " movingXXX "; + break; + } + case 887: { + s += " multiFuzzyMatchAllIndices "; + break; + } + case 888: { + s += " multiFuzzyMatchAny "; + break; + } + case 889: { + s += " multiFuzzyMatchAnyIndex "; + break; + } + case 890: { + s += " multiIf "; + break; + } + case 891: { + s += " multiMatchAllIndices "; + break; + } + case 892: { + s += " multiMatchAny "; + break; + } + case 893: { + s += " multiMatchAnyIndex "; + break; + } + case 894: { + s += " multiply "; + break; + } + case 895: { + s += " MultiPolygon "; + break; + } + case 896: { + s += " multiSearchAllPositions "; + break; + } + case 897: { + s += " multiSearchAllPositionsCaseInsensitive "; + break; + } + case 898: { + s += " multiSearchAllPositionsCaseInsensitiveUTF8 "; + break; + } + case 899: { + s += " multiSearchAllPositionsUTF8 "; + break; + } + case 900: { + s += " multiSearchAny "; + break; + } + case 901: { + s += " multiSearchAnyCaseInsensitive "; + break; + } + case 902: { + s += " multiSearchAnyCaseInsensitiveUTF8 "; + break; + } + case 903: { + s += " multiSearchAnyUTF8 "; + break; + } + case 904: { + s += " multiSearchFirstIndex "; + break; + } + case 905: { + s += " multiSearchFirstIndexCaseInsensitive "; + break; + } + case 906: { + s += " multiSearchFirstIndexCaseInsensitiveUTF8 "; + break; + } + case 907: { + s += " multiSearchFirstIndexUTF8 "; + break; + } + case 908: { + s += " multiSearchFirstPosition "; + break; + } + case 909: { + s += " multiSearchFirstPositionCaseInsensitive "; + break; + } + case 910: { + s += " multiSearchFirstPositionCaseInsensitiveUTF8 "; + break; + } + case 911: { + s += " multiSearchFirstPositionUTF8 "; + break; + } + case 912: { + s += " murmurHash2_32 "; + break; + } + case 913: { + s += " murmurHash2_64 "; + break; + } + case 914: { + s += " murmurHash3_128 "; + break; + } + case 915: { + s += " murmurHash3_32 "; + break; + } + case 916: { + s += " murmurHash3_64 "; + break; + } + case 917: { + s += " MUTATION "; + break; + } + case 918: { + s += " N "; + break; + } + case 919: { + s += " NAME "; + break; + } + case 920: { + s += " NAN_SQL "; + break; + } + case 921: { + s += " NATIONAL CHAR "; + break; + } + case 922: { + s += " NATIONAL CHARACTER "; + break; + } + case 923: { + s += " NATIONAL CHARACTER LARGE OBJECT "; + break; + } + case 924: { + s += " NATIONAL CHARACTER VARYING "; + break; + } + case 925: { + s += " NATIONAL CHAR VARYING "; + break; + } + case 926: { + s += " NCHAR "; + break; + } + case 927: { + s += " NCHAR LARGE OBJECT "; + break; + } + case 928: { + s += " NCHAR VARYING "; + break; + } + case 929: { + s += " negate "; + break; + } + case 930: { + s += " neighbor "; + break; + } + case 931: { + s += " Nested "; + break; + } + case 932: { + s += " netloc "; + break; + } + case 933: { + s += " ngramDistance "; + break; + } + case 934: { + s += " ngramDistanceCaseInsensitive "; + break; + } + case 935: { + s += " ngramDistanceCaseInsensitiveUTF8 "; + break; + } + case 936: { + s += " ngramDistanceUTF8 "; + break; + } + case 937: { + s += " ngramMinHash "; + break; + } + case 938: { + s += " ngramMinHashArg "; + break; + } + case 939: { + s += " ngramMinHashArgCaseInsensitive "; + break; + } + case 940: { + s += " ngramMinHashArgCaseInsensitiveUTF8 "; + break; + } + case 941: { + s += " ngramMinHashArgUTF8 "; + break; + } + case 942: { + s += " ngramMinHashCaseInsensitive "; + break; + } + case 943: { + s += " ngramMinHashCaseInsensitiveUTF8 "; + break; + } + case 944: { + s += " ngramMinHashUTF8 "; + break; + } + case 945: { + s += " ngramSearch "; + break; + } + case 946: { + s += " ngramSearchCaseInsensitive "; + break; + } + case 947: { + s += " ngramSearchCaseInsensitiveUTF8 "; + break; + } + case 948: { + s += " ngramSearchUTF8 "; + break; + } + case 949: { + s += " ngramSimHash "; + break; + } + case 950: { + s += " ngramSimHashCaseInsensitive "; + break; + } + case 951: { + s += " ngramSimHashCaseInsensitiveUTF8 "; + break; + } + case 952: { + s += " ngramSimHashUTF8 "; + break; + } + case 953: { + s += " NO "; + break; + } + case 954: { + s += " NO DELAY "; + break; + } + case 955: { + s += " NONE "; + break; + } + case 956: { + s += " normalizedQueryHash "; + break; + } + case 957: { + s += " normalizedQueryHashKeepNames "; + break; + } + case 958: { + s += " normalizeQuery "; + break; + } + case 959: { + s += " normalizeQueryKeepNames "; + break; + } + case 960: { + s += " not "; + break; + } + case 961: { + s += " NOT "; + break; + } + case 962: { + s += " notEmpty "; + break; + } + case 963: { + s += " notEquals "; + break; + } + case 964: { + s += " nothing "; + break; + } + case 965: { + s += " Nothing "; + break; + } + case 966: { + s += " notILike "; + break; + } + case 967: { + s += " notIn "; + break; + } + case 968: { + s += " notInIgnoreSet "; + break; + } + case 969: { + s += " notLike "; + break; + } + case 970: { + s += " notNullIn "; + break; + } + case 971: { + s += " notNullInIgnoreSet "; + break; + } + case 972: { + s += " now "; + break; + } + case 973: { + s += " now64 "; + break; + } + case 974: { + s += " Null "; + break; + } + case 975: { + s += " Nullable "; + break; + } + case 976: { + s += " nullIf "; + break; + } + case 977: { + s += " nullIn "; + break; + } + case 978: { + s += " nullInIgnoreSet "; + break; + } + case 979: { + s += " NULLS "; + break; + } + case 980: { + s += " NULL_SQL "; + break; + } + case 981: { + s += " NUMERIC "; + break; + } + case 982: { + s += " NVARCHAR "; + break; + } + case 983: { + s += " OFFSET "; + break; + } + case 984: { + s += " ON "; + break; + } + case 985: { + s += " ONLY "; + break; + } + case 986: { + s += " OPTIMIZE "; + break; + } + case 987: { + s += " OPTIMIZE TABLE "; + break; + } + case 988: { + s += " or "; + break; + } + case 989: { + s += " OR "; + break; + } + case 990: { + s += " ORDER "; + break; + } + case 991: { + s += " ORDER BY "; + break; + } + case 992: { + s += " OR REPLACE "; + break; + } + case 993: { + s += " OUTER "; + break; + } + case 994: { + s += " OUTFILE "; + break; + } + case 995: { + s += " parseDateTime32BestEffort "; + break; + } + case 996: { + s += " parseDateTime32BestEffortOrNull "; + break; + } + case 997: { + s += " parseDateTime32BestEffortOrZero "; + break; + } + case 998: { + s += " parseDateTime64BestEffort "; + break; + } + case 999: { + s += " parseDateTime64BestEffortOrNull "; + break; + } + case 1000: { + s += " parseDateTime64BestEffortOrZero "; + break; + } + case 1001: { + s += " parseDateTimeBestEffort "; + break; + } + case 1002: { + s += " parseDateTimeBestEffortOrNull "; + break; + } + case 1003: { + s += " parseDateTimeBestEffortOrZero "; + break; + } + case 1004: { + s += " parseDateTimeBestEffortUS "; + break; + } + case 1005: { + s += " parseDateTimeBestEffortUSOrNull "; + break; + } + case 1006: { + s += " parseDateTimeBestEffortUSOrZero "; + break; + } + case 1007: { + s += " parseTimeDelta "; + break; + } + case 1008: { + s += " PARTITION "; + break; + } + case 1009: { + s += " PARTITION BY "; + break; + } + case 1010: { + s += " partitionId "; + break; + } + case 1011: { + s += " path "; + break; + } + case 1012: { + s += " pathFull "; + break; + } + case 1013: { + s += " pi "; + break; + } + case 1014: { + s += " plus "; + break; + } + case 1015: { + s += " Point "; + break; + } + case 1016: { + s += " pointInEllipses "; + break; + } + case 1017: { + s += " pointInPolygon "; + break; + } + case 1018: { + s += " Polygon "; + break; + } + case 1019: { + s += " polygonAreaCartesian "; + break; + } + case 1020: { + s += " polygonAreaSpherical "; + break; + } + case 1021: { + s += " polygonConvexHullCartesian "; + break; + } + case 1022: { + s += " polygonPerimeterCartesian "; + break; + } + case 1023: { + s += " polygonPerimeterSpherical "; + break; + } + case 1024: { + s += " polygonsDistanceCartesian "; + break; + } + case 1025: { + s += " polygonsDistanceSpherical "; + break; + } + case 1026: { + s += " polygonsEqualsCartesian "; + break; + } + case 1027: { + s += " polygonsIntersectionCartesian "; + break; + } + case 1028: { + s += " polygonsIntersectionSpherical "; + break; + } + case 1029: { + s += " polygonsSymDifferenceCartesian "; + break; + } + case 1030: { + s += " polygonsSymDifferenceSpherical "; + break; + } + case 1031: { + s += " polygonsUnionCartesian "; + break; + } + case 1032: { + s += " polygonsUnionSpherical "; + break; + } + case 1033: { + s += " polygonsWithinCartesian "; + break; + } + case 1034: { + s += " polygonsWithinSpherical "; + break; + } + case 1035: { + s += " POPULATE "; + break; + } + case 1036: { + s += " port "; + break; + } + case 1037: { + s += " position "; + break; + } + case 1038: { + s += " positionCaseInsensitive "; + break; + } + case 1039: { + s += " positionCaseInsensitiveUTF8 "; + break; + } + case 1040: { + s += " positionUTF8 "; + break; + } + case 1041: { + s += " pow "; + break; + } + case 1042: { + s += " power "; + break; + } + case 1043: { + s += " PREWHERE "; + break; + } + case 1044: { + s += " PRIMARY "; + break; + } + case 1045: { + s += " PRIMARY KEY "; + break; + } + case 1046: { + s += " PROJECTION "; + break; + } + case 1047: { + s += " protocol "; + break; + } + case 1048: { + s += " Q "; + break; + } + case 1049: { + s += " QQ "; + break; + } + case 1050: { + s += " quantile "; + break; + } + case 1051: { + s += " quantileBFloat16 "; + break; + } + case 1052: { + s += " quantileBFloat16Weighted "; + break; + } + case 1053: { + s += " quantileDeterministic "; + break; + } + case 1054: { + s += " quantileExact "; + break; + } + case 1055: { + s += " quantileExactExclusive "; + break; + } + case 1056: { + s += " quantileExactHigh "; + break; + } + case 1057: { + s += " quantileExactInclusive "; + break; + } + case 1058: { + s += " quantileExactLow "; + break; + } + case 1059: { + s += " quantileExactWeighted "; + break; + } + case 1060: { + s += " quantiles "; + break; + } + case 1061: { + s += " quantilesBFloat16 "; + break; + } + case 1062: { + s += " quantilesBFloat16Weighted "; + break; + } + case 1063: { + s += " quantilesDeterministic "; + break; + } + case 1064: { + s += " quantilesExact "; + break; + } + case 1065: { + s += " quantilesExactExclusive "; + break; + } + case 1066: { + s += " quantilesExactHigh "; + break; + } + case 1067: { + s += " quantilesExactInclusive "; + break; + } + case 1068: { + s += " quantilesExactLow "; + break; + } + case 1069: { + s += " quantilesExactWeighted "; + break; + } + case 1070: { + s += " quantilesTDigest "; + break; + } + case 1071: { + s += " quantilesTDigestWeighted "; + break; + } + case 1072: { + s += " quantilesTiming "; + break; + } + case 1073: { + s += " quantilesTimingWeighted "; + break; + } + case 1074: { + s += " quantileTDigest "; + break; + } + case 1075: { + s += " quantileTDigestWeighted "; + break; + } + case 1076: { + s += " quantileTiming "; + break; + } + case 1077: { + s += " quantileTimingWeighted "; + break; + } + case 1078: { + s += " QUARTER "; + break; + } + case 1079: { + s += " query_id "; + break; + } + case 1080: { + s += " queryID "; + break; + } + case 1081: { + s += " queryString "; + break; + } + case 1082: { + s += " queryStringAndFragment "; + break; + } + case 1083: { + s += " rand "; + break; + } + case 1084: { + s += " rand32 "; + break; + } + case 1085: { + s += " rand64 "; + break; + } + case 1086: { + s += " randConstant "; + break; + } + case 1087: { + s += " randomFixedString "; + break; + } + case 1088: { + s += " randomPrintableASCII "; + break; + } + case 1089: { + s += " randomString "; + break; + } + case 1090: { + s += " randomStringUTF8 "; + break; + } + case 1091: { + s += " range "; + break; + } + case 1092: { + s += " RANGE "; + break; + } + case 1093: { + s += " rank "; + break; + } + case 1094: { + s += " rankCorr "; + break; + } + case 1095: { + s += " readWKTMultiPolygon "; + break; + } + case 1096: { + s += " readWKTPoint "; + break; + } + case 1097: { + s += " readWKTPolygon "; + break; + } + case 1098: { + s += " readWKTRing "; + break; + } + case 1099: { + s += " REAL "; + break; + } + case 1100: { + s += " REFRESH "; + break; + } + case 1101: { + s += " regexpQuoteMeta "; + break; + } + case 1102: { + s += " regionHierarchy "; + break; + } + case 1103: { + s += " regionIn "; + break; + } + case 1104: { + s += " regionToArea "; + break; + } + case 1105: { + s += " regionToCity "; + break; + } + case 1106: { + s += " regionToContinent "; + break; + } + case 1107: { + s += " regionToCountry "; + break; + } + case 1108: { + s += " regionToDistrict "; + break; + } + case 1109: { + s += " regionToName "; + break; + } + case 1110: { + s += " regionToPopulation "; + break; + } + case 1111: { + s += " regionToTopContinent "; + break; + } + case 1112: { + s += " reinterpret "; + break; + } + case 1113: { + s += " reinterpretAsDate "; + break; + } + case 1114: { + s += " reinterpretAsDateTime "; + break; + } + case 1115: { + s += " reinterpretAsFixedString "; + break; + } + case 1116: { + s += " reinterpretAsFloat32 "; + break; + } + case 1117: { + s += " reinterpretAsFloat64 "; + break; + } + case 1118: { + s += " reinterpretAsInt128 "; + break; + } + case 1119: { + s += " reinterpretAsInt16 "; + break; + } + case 1120: { + s += " reinterpretAsInt256 "; + break; + } + case 1121: { + s += " reinterpretAsInt32 "; + break; + } + case 1122: { + s += " reinterpretAsInt64 "; + break; + } + case 1123: { + s += " reinterpretAsInt8 "; + break; + } + case 1124: { + s += " reinterpretAsString "; + break; + } + case 1125: { + s += " reinterpretAsUInt128 "; + break; + } + case 1126: { + s += " reinterpretAsUInt16 "; + break; + } + case 1127: { + s += " reinterpretAsUInt256 "; + break; + } + case 1128: { + s += " reinterpretAsUInt32 "; + break; + } + case 1129: { + s += " reinterpretAsUInt64 "; + break; + } + case 1130: { + s += " reinterpretAsUInt8 "; + break; + } + case 1131: { + s += " reinterpretAsUUID "; + break; + } + case 1132: { + s += " RELOAD "; + break; + } + case 1133: { + s += " REMOVE "; + break; + } + case 1134: { + s += " RENAME "; + break; + } + case 1135: { + s += " RENAME COLUMN "; + break; + } + case 1136: { + s += " RENAME TABLE "; + break; + } + case 1137: { + s += " repeat "; + break; + } + case 1138: { + s += " replace "; + break; + } + case 1139: { + s += " REPLACE "; + break; + } + case 1140: { + s += " replaceAll "; + break; + } + case 1141: { + s += " replaceOne "; + break; + } + case 1142: { + s += " REPLACE PARTITION "; + break; + } + case 1143: { + s += " replaceRegexpAll "; + break; + } + case 1144: { + s += " replaceRegexpOne "; + break; + } + case 1145: { + s += " REPLICA "; + break; + } + case 1146: { + s += " replicate "; + break; + } + case 1147: { + s += " REPLICATED "; + break; + } + case 1148: { + s += " Resample "; + break; + } + case 1149: { + s += " RESUME "; + break; + } + case 1150: { + s += " retention "; + break; + } + case 1151: { + s += " reverse "; + break; + } + case 1152: { + s += " reverseUTF8 "; + break; + } + case 1153: { + s += " RIGHT "; + break; + } + case 1154: { + s += " rightPad "; + break; + } + case 1155: { + s += " rightPadUTF8 "; + break; + } + case 1156: { + s += " Ring "; + break; + } + case 1157: { + s += " ROLLUP "; + break; + } + case 1158: { + s += " round "; + break; + } + case 1159: { + s += " roundAge "; + break; + } + case 1160: { + s += " roundBankers "; + break; + } + case 1161: { + s += " roundDown "; + break; + } + case 1162: { + s += " roundDuration "; + break; + } + case 1163: { + s += " roundToExp2 "; + break; + } + case 1164: { + s += " row_number "; + break; + } + case 1165: { + s += " rowNumberInAllBlocks "; + break; + } + case 1166: { + s += " rowNumberInBlock "; + break; + } + case 1167: { + s += " rpad "; + break; + } + case 1168: { + s += " RTRIM "; + break; + } + case 1169: { + s += " runningAccumulate "; + break; + } + case 1170: { + s += " runningConcurrency "; + break; + } + case 1171: { + s += " runningDifference "; + break; + } + case 1172: { + s += " runningDifferenceStartingWithFirstValue "; + break; + } + case 1173: { + s += " S "; + break; + } + case 1174: { + s += " s2CapContains "; + break; + } + case 1175: { + s += " s2CapUnion "; + break; + } + case 1176: { + s += " s2CellsIntersect "; + break; + } + case 1177: { + s += " s2GetNeighbors "; + break; + } + case 1178: { + s += " s2RectAdd "; + break; + } + case 1179: { + s += " s2RectContains "; + break; + } + case 1180: { + s += " s2RectIntersection "; + break; + } + case 1181: { + s += " s2RectUnion "; + break; + } + case 1182: { + s += " s2ToGeo "; + break; + } + case 1183: { + s += " SAMPLE "; + break; + } + case 1184: { + s += " SAMPLE BY "; + break; + } + case 1185: { + s += " SECOND "; + break; + } + case 1186: { + s += " SELECT "; + break; + } + case 1187: { + s += " SEMI "; + break; + } + case 1188: { + s += " SENDS "; + break; + } + case 1189: { + s += " sequenceCount "; + break; + } + case 1190: { + s += " sequenceMatch "; + break; + } + case 1191: { + s += " sequenceNextNode "; + break; + } + case 1192: { + s += " serverUUID "; + break; + } + case 1193: { + s += " SET "; + break; + } + case 1194: { + s += " SETTINGS "; + break; + } + case 1195: { + s += " SHA1 "; + break; + } + case 1196: { + s += " SHA224 "; + break; + } + case 1197: { + s += " SHA256 "; + break; + } + case 1198: { + s += " SHA384 "; + break; + } + case 1199: { + s += " SHA512 "; + break; + } + case 1200: { + s += " shardCount "; + break; + } + case 1201: { + s += " shardNum "; + break; + } + case 1202: { + s += " SHOW "; + break; + } + case 1203: { + s += " SHOW PROCESSLIST "; + break; + } + case 1204: { + s += " sigmoid "; + break; + } + case 1205: { + s += " sign "; + break; + } + case 1206: { + s += " SimpleAggregateFunction "; + break; + } + case 1207: { + s += " simpleJSONExtractBool "; + break; + } + case 1208: { + s += " simpleJSONExtractFloat "; + break; + } + case 1209: { + s += " simpleJSONExtractInt "; + break; + } + case 1210: { + s += " simpleJSONExtractRaw "; + break; + } + case 1211: { + s += " simpleJSONExtractString "; + break; + } + case 1212: { + s += " simpleJSONExtractUInt "; + break; + } + case 1213: { + s += " simpleJSONHas "; + break; + } + case 1214: { + s += " simpleLinearRegression "; + break; + } + case 1215: { + s += " sin "; + break; + } + case 1216: { + s += " SINGLE "; + break; + } + case 1217: { + s += " singleValueOrNull "; + break; + } + case 1218: { + s += " sinh "; + break; + } + case 1219: { + s += " sipHash128 "; + break; + } + case 1220: { + s += " sipHash64 "; + break; + } + case 1221: { + s += " skewPop "; + break; + } + case 1222: { + s += " skewSamp "; + break; + } + case 1223: { + s += " sleep "; + break; + } + case 1224: { + s += " sleepEachRow "; + break; + } + case 1225: { + s += " SMALLINT "; + break; + } + case 1226: { + s += " SMALLINT SIGNED "; + break; + } + case 1227: { + s += " SMALLINT UNSIGNED "; + break; + } + case 1228: { + s += " snowflakeToDateTime "; + break; + } + case 1229: { + s += " snowflakeToDateTime64 "; + break; + } + case 1230: { + s += " SOURCE "; + break; + } + case 1231: { + s += " sparkbar "; + break; + } + case 1232: { + s += " splitByChar "; + break; + } + case 1233: { + s += " splitByNonAlpha "; + break; + } + case 1234: { + s += " splitByRegexp "; + break; + } + case 1235: { + s += " splitByString "; + break; + } + case 1236: { + s += " splitByWhitespace "; + break; + } + case 1237: { + s += " SQL_TSI_DAY "; + break; + } + case 1238: { + s += " SQL_TSI_HOUR "; + break; + } + case 1239: { + s += " SQL_TSI_MINUTE "; + break; + } + case 1240: { + s += " SQL_TSI_MONTH "; + break; + } + case 1241: { + s += " SQL_TSI_QUARTER "; + break; + } + case 1242: { + s += " SQL_TSI_SECOND "; + break; + } + case 1243: { + s += " SQL_TSI_WEEK "; + break; + } + case 1244: { + s += " SQL_TSI_YEAR "; + break; + } + case 1245: { + s += " sqrt "; + break; + } + case 1246: { + s += " SS "; + break; + } + case 1247: { + s += " START "; + break; + } + case 1248: { + s += " startsWith "; + break; + } + case 1249: { + s += " State "; + break; + } + case 1250: { + s += " stddevPop "; + break; + } + case 1251: { + s += " STDDEV_POP "; + break; + } + case 1252: { + s += " stddevPopStable "; + break; + } + case 1253: { + s += " stddevSamp "; + break; + } + case 1254: { + s += " STDDEV_SAMP "; + break; + } + case 1255: { + s += " stddevSampStable "; + break; + } + case 1256: { + s += " stem "; + break; + } + case 1257: { + s += " STEP "; + break; + } + case 1258: { + s += " stochasticLinearRegression "; + break; + } + case 1259: { + s += " stochasticLogisticRegression "; + break; + } + case 1260: { + s += " STOP "; + break; + } + case 1261: { + s += " String "; + break; + } + case 1262: { + s += " stringToH3 "; + break; + } + case 1263: { + s += " studentTTest "; + break; + } + case 1264: { + s += " subBitmap "; + break; + } + case 1265: { + s += " substr "; + break; + } + case 1266: { + s += " substring "; + break; + } + case 1267: { + s += " SUBSTRING "; + break; + } + case 1268: { + s += " substringUTF8 "; + break; + } + case 1269: { + s += " subtractDays "; + break; + } + case 1270: { + s += " subtractHours "; + break; + } + case 1271: { + s += " subtractMinutes "; + break; + } + case 1272: { + s += " subtractMonths "; + break; + } + case 1273: { + s += " subtractQuarters "; + break; + } + case 1274: { + s += " subtractSeconds "; + break; + } + case 1275: { + s += " subtractWeeks "; + break; + } + case 1276: { + s += " subtractYears "; + break; + } + case 1277: { + s += " sum "; + break; + } + case 1278: { + s += " sumCount "; + break; + } + case 1279: { + s += " sumKahan "; + break; + } + case 1280: { + s += " sumMap "; + break; + } + case 1281: { + s += " sumMapFiltered "; + break; + } + case 1282: { + s += " sumMapFilteredWithOverflow "; + break; + } + case 1283: { + s += " sumMapWithOverflow "; + break; + } + case 1284: { + s += " sumWithOverflow "; + break; + } + case 1285: { + s += " SUSPEND "; + break; + } + case 1286: { + s += " svg "; + break; + } + case 1287: { + s += " SVG "; + break; + } + case 1288: { + s += " SYNC "; + break; + } + case 1289: { + s += " synonyms "; + break; + } + case 1290: { + s += " SYNTAX "; + break; + } + case 1291: { + s += " SYSTEM "; + break; + } + case 1292: { + s += " TABLE "; + break; + } + case 1293: { + s += " TABLES "; + break; + } + case 1294: { + s += " tan "; + break; + } + case 1295: { + s += " tanh "; + break; + } + case 1296: { + s += " tcpPort "; + break; + } + case 1297: { + s += " TEMPORARY "; + break; + } + case 1298: { + s += " TEST "; + break; + } + case 1299: { + s += " TEXT "; + break; + } + case 1300: { + s += " tgamma "; + break; + } + case 1301: { + s += " THEN "; + break; + } + case 1302: { + s += " throwIf "; + break; + } + case 1303: { + s += " tid "; + break; + } + case 1304: { + s += " TIES "; + break; + } + case 1305: { + s += " TIMEOUT "; + break; + } + case 1306: { + s += " timeSlot "; + break; + } + case 1307: { + s += " timeSlots "; + break; + } + case 1308: { + s += " TIMESTAMP "; + break; + } + case 1309: { + s += " TIMESTAMP_ADD "; + break; + } + case 1310: { + s += " TIMESTAMPADD "; + break; + } + case 1311: { + s += " TIMESTAMP_DIFF "; + break; + } + case 1312: { + s += " TIMESTAMPDIFF "; + break; + } + case 1313: { + s += " TIMESTAMP_SUB "; + break; + } + case 1314: { + s += " TIMESTAMPSUB "; + break; + } + case 1315: { + s += " timezone "; + break; + } + case 1316: { + s += " timeZone "; + break; + } + case 1317: { + s += " timezoneOf "; + break; + } + case 1318: { + s += " timeZoneOf "; + break; + } + case 1319: { + s += " timezoneOffset "; + break; + } + case 1320: { + s += " timeZoneOffset "; + break; + } + case 1321: { + s += " TINYBLOB "; + break; + } + case 1322: { + s += " TINYINT "; + break; + } + case 1323: { + s += " TINYINT SIGNED "; + break; + } + case 1324: { + s += " TINYINT UNSIGNED "; + break; + } + case 1325: { + s += " TINYTEXT "; + break; + } + case 1326: { + s += " TO "; + break; + } + case 1327: { + s += " TO_BASE64 "; + break; + } + case 1328: { + s += " toColumnTypeName "; + break; + } + case 1329: { + s += " toDate "; + break; + } + case 1330: { + s += " toDate32 "; + break; + } + case 1331: { + s += " toDate32OrNull "; + break; + } + case 1332: { + s += " toDate32OrZero "; + break; + } + case 1333: { + s += " toDateOrNull "; + break; + } + case 1334: { + s += " toDateOrZero "; + break; + } + case 1335: { + s += " toDateTime "; + break; + } + case 1336: { + s += " toDateTime32 "; + break; + } + case 1337: { + s += " toDateTime64 "; + break; + } + case 1338: { + s += " toDateTime64OrNull "; + break; + } + case 1339: { + s += " toDateTime64OrZero "; + break; + } + case 1340: { + s += " toDateTimeOrNull "; + break; + } + case 1341: { + s += " toDateTimeOrZero "; + break; + } + case 1342: { + s += " today "; + break; + } + case 1343: { + s += " toDayOfMonth "; + break; + } + case 1344: { + s += " toDayOfWeek "; + break; + } + case 1345: { + s += " toDayOfYear "; + break; + } + case 1346: { + s += " toDecimal128 "; + break; + } + case 1347: { + s += " toDecimal128OrNull "; + break; + } + case 1348: { + s += " toDecimal128OrZero "; + break; + } + case 1349: { + s += " toDecimal256 "; + break; + } + case 1350: { + s += " toDecimal256OrNull "; + break; + } + case 1351: { + s += " toDecimal256OrZero "; + break; + } + case 1352: { + s += " toDecimal32 "; + break; + } + case 1353: { + s += " toDecimal32OrNull "; + break; + } + case 1354: { + s += " toDecimal32OrZero "; + break; + } + case 1355: { + s += " toDecimal64 "; + break; + } + case 1356: { + s += " toDecimal64OrNull "; + break; + } + case 1357: { + s += " toDecimal64OrZero "; + break; + } + case 1358: { + s += " TO DISK "; + break; + } + case 1359: { + s += " toFixedString "; + break; + } + case 1360: { + s += " toFloat32 "; + break; + } + case 1361: { + s += " toFloat32OrNull "; + break; + } + case 1362: { + s += " toFloat32OrZero "; + break; + } + case 1363: { + s += " toFloat64 "; + break; + } + case 1364: { + s += " toFloat64OrNull "; + break; + } + case 1365: { + s += " toFloat64OrZero "; + break; + } + case 1366: { + s += " toHour "; + break; + } + case 1367: { + s += " toInt128 "; + break; + } + case 1368: { + s += " toInt128OrNull "; + break; + } + case 1369: { + s += " toInt128OrZero "; + break; + } + case 1370: { + s += " toInt16 "; + break; + } + case 1371: { + s += " toInt16OrNull "; + break; + } + case 1372: { + s += " toInt16OrZero "; + break; + } + case 1373: { + s += " toInt256 "; + break; + } + case 1374: { + s += " toInt256OrNull "; + break; + } + case 1375: { + s += " toInt256OrZero "; + break; + } + case 1376: { + s += " toInt32 "; + break; + } + case 1377: { + s += " toInt32OrNull "; + break; + } + case 1378: { + s += " toInt32OrZero "; + break; + } + case 1379: { + s += " toInt64 "; + break; + } + case 1380: { + s += " toInt64OrNull "; + break; + } + case 1381: { + s += " toInt64OrZero "; + break; + } + case 1382: { + s += " toInt8 "; + break; + } + case 1383: { + s += " toInt8OrNull "; + break; + } + case 1384: { + s += " toInt8OrZero "; + break; + } + case 1385: { + s += " toIntervalDay "; + break; + } + case 1386: { + s += " toIntervalHour "; + break; + } + case 1387: { + s += " toIntervalMinute "; + break; + } + case 1388: { + s += " toIntervalMonth "; + break; + } + case 1389: { + s += " toIntervalQuarter "; + break; + } + case 1390: { + s += " toIntervalSecond "; + break; + } + case 1391: { + s += " toIntervalWeek "; + break; + } + case 1392: { + s += " toIntervalYear "; + break; + } + case 1393: { + s += " toIPv4 "; + break; + } + case 1394: { + s += " toIPv6 "; + break; + } + case 1395: { + s += " toISOWeek "; + break; + } + case 1396: { + s += " toISOYear "; + break; + } + case 1397: { + s += " toJSONString "; + break; + } + case 1398: { + s += " toLowCardinality "; + break; + } + case 1399: { + s += " toMinute "; + break; + } + case 1400: { + s += " toModifiedJulianDay "; + break; + } + case 1401: { + s += " toModifiedJulianDayOrNull "; + break; + } + case 1402: { + s += " toMonday "; + break; + } + case 1403: { + s += " toMonth "; + break; + } + case 1404: { + s += " toNullable "; + break; + } + case 1405: { + s += " TOP "; + break; + } + case 1406: { + s += " topK "; + break; + } + case 1407: { + s += " topKWeighted "; + break; + } + case 1408: { + s += " topLevelDomain "; + break; + } + case 1409: { + s += " toQuarter "; + break; + } + case 1410: { + s += " toRelativeDayNum "; + break; + } + case 1411: { + s += " toRelativeHourNum "; + break; + } + case 1412: { + s += " toRelativeMinuteNum "; + break; + } + case 1413: { + s += " toRelativeMonthNum "; + break; + } + case 1414: { + s += " toRelativeQuarterNum "; + break; + } + case 1415: { + s += " toRelativeSecondNum "; + break; + } + case 1416: { + s += " toRelativeWeekNum "; + break; + } + case 1417: { + s += " toRelativeYearNum "; + break; + } + case 1418: { + s += " toSecond "; + break; + } + case 1419: { + s += " toStartOfDay "; + break; + } + case 1420: { + s += " toStartOfFifteenMinutes "; + break; + } + case 1421: { + s += " toStartOfFiveMinutes "; + break; + } + case 1422: { + s += " toStartOfHour "; + break; + } + case 1423: { + s += " toStartOfInterval "; + break; + } + case 1424: { + s += " toStartOfISOYear "; + break; + } + case 1425: { + s += " toStartOfMinute "; + break; + } + case 1426: { + s += " toStartOfMonth "; + break; + } + case 1427: { + s += " toStartOfQuarter "; + break; + } + case 1428: { + s += " toStartOfSecond "; + break; + } + case 1429: { + s += " toStartOfTenMinutes "; + break; + } + case 1430: { + s += " toStartOfWeek "; + break; + } + case 1431: { + s += " toStartOfYear "; + break; + } + case 1432: { + s += " toString "; + break; + } + case 1433: { + s += " toStringCutToZero "; + break; + } + case 1434: { + s += " TO TABLE "; + break; + } + case 1435: { + s += " TOTALS "; + break; + } + case 1436: { + s += " toTime "; + break; + } + case 1437: { + s += " toTimezone "; + break; + } + case 1438: { + s += " toTimeZone "; + break; + } + case 1439: { + s += " toTypeName "; + break; + } + case 1440: { + s += " toUInt128 "; + break; + } + case 1441: { + s += " toUInt128OrNull "; + break; + } + case 1442: { + s += " toUInt128OrZero "; + break; + } + case 1443: { + s += " toUInt16 "; + break; + } + case 1444: { + s += " toUInt16OrNull "; + break; + } + case 1445: { + s += " toUInt16OrZero "; + break; + } + case 1446: { + s += " toUInt256 "; + break; + } + case 1447: { + s += " toUInt256OrNull "; + break; + } + case 1448: { + s += " toUInt256OrZero "; + break; + } + case 1449: { + s += " toUInt32 "; + break; + } + case 1450: { + s += " toUInt32OrNull "; + break; + } + case 1451: { + s += " toUInt32OrZero "; + break; + } + case 1452: { + s += " toUInt64 "; + break; + } + case 1453: { + s += " toUInt64OrNull "; + break; + } + case 1454: { + s += " toUInt64OrZero "; + break; + } + case 1455: { + s += " toUInt8 "; + break; + } + case 1456: { + s += " toUInt8OrNull "; + break; + } + case 1457: { + s += " toUInt8OrZero "; + break; + } + case 1458: { + s += " toUnixTimestamp "; + break; + } + case 1459: { + s += " toUnixTimestamp64Micro "; + break; + } + case 1460: { + s += " toUnixTimestamp64Milli "; + break; + } + case 1461: { + s += " toUnixTimestamp64Nano "; + break; + } + case 1462: { + s += " toUUID "; + break; + } + case 1463: { + s += " toUUIDOrNull "; + break; + } + case 1464: { + s += " toUUIDOrZero "; + break; + } + case 1465: { + s += " toValidUTF8 "; + break; + } + case 1466: { + s += " TO VOLUME "; + break; + } + case 1467: { + s += " toWeek "; + break; + } + case 1468: { + s += " toYear "; + break; + } + case 1469: { + s += " toYearWeek "; + break; + } + case 1470: { + s += " toYYYYMM "; + break; + } + case 1471: { + s += " toYYYYMMDD "; + break; + } + case 1472: { + s += " toYYYYMMDDhhmmss "; + break; + } + case 1473: { + s += " TRAILING "; + break; + } + case 1474: { + s += " transform "; + break; + } + case 1475: { + s += " TRIM "; + break; + } + case 1476: { + s += " trimBoth "; + break; + } + case 1477: { + s += " trimLeft "; + break; + } + case 1478: { + s += " trimRight "; + break; + } + case 1479: { + s += " trunc "; + break; + } + case 1480: { + s += " truncate "; + break; + } + case 1481: { + s += " TRUNCATE "; + break; + } + case 1482: { + s += " tryBase64Decode "; + break; + } + case 1483: { + s += " TTL "; + break; + } + case 1484: { + s += " tuple "; + break; + } + case 1485: { + s += " Tuple "; + break; + } + case 1486: { + s += " tupleElement "; + break; + } + case 1487: { + s += " tupleHammingDistance "; + break; + } + case 1488: { + s += " tupleToNameValuePairs "; + break; + } + case 1489: { + s += " TYPE "; + break; + } + case 1490: { + s += " ucase "; + break; + } + case 1491: { + s += " UInt128 "; + break; + } + case 1492: { + s += " UInt16 "; + break; + } + case 1493: { + s += " UInt256 "; + break; + } + case 1494: { + s += " UInt32 "; + break; + } + case 1495: { + s += " UInt64 "; + break; + } + case 1496: { + s += " UInt8 "; + break; + } + case 1497: { + s += " unbin "; + break; + } + case 1498: { + s += " unhex "; + break; + } + case 1499: { + s += " UNION "; + break; + } + case 1500: { + s += " uniq "; + break; + } + case 1501: { + s += " uniqCombined "; + break; + } + case 1502: { + s += " uniqCombined64 "; + break; + } + case 1503: { + s += " uniqExact "; + break; + } + case 1504: { + s += " uniqHLL12 "; + break; + } + case 1505: { + s += " uniqTheta "; + break; + } + case 1506: { + s += " uniqUpTo "; + break; + } + case 1507: { + s += " UPDATE "; + break; + } + case 1508: { + s += " upper "; + break; + } + case 1509: { + s += " upperUTF8 "; + break; + } + case 1510: { + s += " uptime "; + break; + } + case 1511: { + s += " URLHash "; + break; + } + case 1512: { + s += " URLHierarchy "; + break; + } + case 1513: { + s += " URLPathHierarchy "; + break; + } + case 1514: { + s += " USE "; + break; + } + case 1515: { + s += " user "; + break; + } + case 1516: { + s += " USING "; + break; + } + case 1517: { + s += " UUID "; + break; + } + case 1518: { + s += " UUIDNumToString "; + break; + } + case 1519: { + s += " UUIDStringToNum "; + break; + } + case 1520: { + s += " validateNestedArraySizes "; + break; + } + case 1521: { + s += " VALUES "; + break; + } + case 1522: { + s += " VARCHAR "; + break; + } + case 1523: { + s += " VARCHAR2 "; + break; + } + case 1524: { + s += " varPop "; + break; + } + case 1525: { + s += " VAR_POP "; + break; + } + case 1526: { + s += " varPopStable "; + break; + } + case 1527: { + s += " varSamp "; + break; + } + case 1528: { + s += " VAR_SAMP "; + break; + } + case 1529: { + s += " varSampStable "; + break; + } + case 1530: { + s += " version "; + break; + } + case 1531: { + s += " VIEW "; + break; + } + case 1532: { + s += " visibleWidth "; + break; + } + case 1533: { + s += " visitParamExtractBool "; + break; + } + case 1534: { + s += " visitParamExtractFloat "; + break; + } + case 1535: { + s += " visitParamExtractInt "; + break; + } + case 1536: { + s += " visitParamExtractRaw "; + break; + } + case 1537: { + s += " visitParamExtractString "; + break; + } + case 1538: { + s += " visitParamExtractUInt "; + break; + } + case 1539: { + s += " visitParamHas "; + break; + } + case 1540: { + s += " VOLUME "; + break; + } + case 1541: { + s += " WATCH "; + break; + } + case 1542: { + s += " week "; + break; + } + case 1543: { + s += " WEEK "; + break; + } + case 1544: { + s += " welchTTest "; + break; + } + case 1545: { + s += " WHEN "; + break; + } + case 1546: { + s += " WHERE "; + break; + } + case 1547: { + s += " windowFunnel "; + break; + } + case 1548: { + s += " WITH "; + break; + } + case 1549: { + s += " WITH FILL "; + break; + } + case 1550: { + s += " WITH TIES "; + break; + } + case 1551: { + s += " WK "; + break; + } + case 1552: { + s += " wkt "; + break; + } + case 1553: { + s += " wordShingleMinHash "; + break; + } + case 1554: { + s += " wordShingleMinHashArg "; + break; + } + case 1555: { + s += " wordShingleMinHashArgCaseInsensitive "; + break; + } + case 1556: { + s += " wordShingleMinHashArgCaseInsensitiveUTF8 "; + break; + } + case 1557: { + s += " wordShingleMinHashArgUTF8 "; + break; + } + case 1558: { + s += " wordShingleMinHashCaseInsensitive "; + break; + } + case 1559: { + s += " wordShingleMinHashCaseInsensitiveUTF8 "; + break; + } + case 1560: { + s += " wordShingleMinHashUTF8 "; + break; + } + case 1561: { + s += " wordShingleSimHash "; + break; + } + case 1562: { + s += " wordShingleSimHashCaseInsensitive "; + break; + } + case 1563: { + s += " wordShingleSimHashCaseInsensitiveUTF8 "; + break; + } + case 1564: { + s += " wordShingleSimHashUTF8 "; + break; + } + case 1565: { + s += " WW "; + break; + } + case 1566: { + s += " xor "; + break; + } + case 1567: { + s += " xxHash32 "; + break; + } + case 1568: { + s += " xxHash64 "; + break; + } + case 1569: { + s += " kostikConsistentHash "; + break; + } + case 1570: { + s += " YEAR "; + break; + } + case 1571: { + s += " yearweek "; + break; + } + case 1572: { + s += " yesterday "; + break; + } + case 1573: { + s += " YY "; + break; + } + case 1574: { + s += " YYYY "; + break; + } + case 1575: { + s += " zookeeperSessionUptime "; + break; + } + default: break; + } +} diff --git a/src/Parsers/fuzzers/codegen_fuzzer/out.proto b/src/Parsers/fuzzers/codegen_fuzzer/out.proto new file mode 100644 index 00000000000..60992ca6a81 --- /dev/null +++ b/src/Parsers/fuzzers/codegen_fuzzer/out.proto @@ -0,0 +1,1587 @@ +syntax = "proto3"; + +message Word { + enum Value { + value_0 = 0; + value_1 = 1; + value_2 = 2; + value_3 = 3; + value_4 = 4; + value_5 = 5; + value_6 = 6; + value_7 = 7; + value_8 = 8; + value_9 = 9; + value_10 = 10; + value_11 = 11; + value_12 = 12; + value_13 = 13; + value_14 = 14; + value_15 = 15; + value_16 = 16; + value_17 = 17; + value_18 = 18; + value_19 = 19; + value_20 = 20; + value_21 = 21; + value_22 = 22; + value_23 = 23; + value_24 = 24; + value_25 = 25; + value_26 = 26; + value_27 = 27; + value_28 = 28; + value_29 = 29; + value_30 = 30; + value_31 = 31; + value_32 = 32; + value_33 = 33; + value_34 = 34; + value_35 = 35; + value_36 = 36; + value_37 = 37; + value_38 = 38; + value_39 = 39; + value_40 = 40; + value_41 = 41; + value_42 = 42; + value_43 = 43; + value_44 = 44; + value_45 = 45; + value_46 = 46; + value_47 = 47; + value_48 = 48; + value_49 = 49; + value_50 = 50; + value_51 = 51; + value_52 = 52; + value_53 = 53; + value_54 = 54; + value_55 = 55; + value_56 = 56; + value_57 = 57; + value_58 = 58; + value_59 = 59; + value_60 = 60; + value_61 = 61; + value_62 = 62; + value_63 = 63; + value_64 = 64; + value_65 = 65; + value_66 = 66; + value_67 = 67; + value_68 = 68; + value_69 = 69; + value_70 = 70; + value_71 = 71; + value_72 = 72; + value_73 = 73; + value_74 = 74; + value_75 = 75; + value_76 = 76; + value_77 = 77; + value_78 = 78; + value_79 = 79; + value_80 = 80; + value_81 = 81; + value_82 = 82; + value_83 = 83; + value_84 = 84; + value_85 = 85; + value_86 = 86; + value_87 = 87; + value_88 = 88; + value_89 = 89; + value_90 = 90; + value_91 = 91; + value_92 = 92; + value_93 = 93; + value_94 = 94; + value_95 = 95; + value_96 = 96; + value_97 = 97; + value_98 = 98; + value_99 = 99; + value_100 = 100; + value_101 = 101; + value_102 = 102; + value_103 = 103; + value_104 = 104; + value_105 = 105; + value_106 = 106; + value_107 = 107; + value_108 = 108; + value_109 = 109; + value_110 = 110; + value_111 = 111; + value_112 = 112; + value_113 = 113; + value_114 = 114; + value_115 = 115; + value_116 = 116; + value_117 = 117; + value_118 = 118; + value_119 = 119; + value_120 = 120; + value_121 = 121; + value_122 = 122; + value_123 = 123; + value_124 = 124; + value_125 = 125; + value_126 = 126; + value_127 = 127; + value_128 = 128; + value_129 = 129; + value_130 = 130; + value_131 = 131; + value_132 = 132; + value_133 = 133; + value_134 = 134; + value_135 = 135; + value_136 = 136; + value_137 = 137; + value_138 = 138; + value_139 = 139; + value_140 = 140; + value_141 = 141; + value_142 = 142; + value_143 = 143; + value_144 = 144; + value_145 = 145; + value_146 = 146; + value_147 = 147; + value_148 = 148; + value_149 = 149; + value_150 = 150; + value_151 = 151; + value_152 = 152; + value_153 = 153; + value_154 = 154; + value_155 = 155; + value_156 = 156; + value_157 = 157; + value_158 = 158; + value_159 = 159; + value_160 = 160; + value_161 = 161; + value_162 = 162; + value_163 = 163; + value_164 = 164; + value_165 = 165; + value_166 = 166; + value_167 = 167; + value_168 = 168; + value_169 = 169; + value_170 = 170; + value_171 = 171; + value_172 = 172; + value_173 = 173; + value_174 = 174; + value_175 = 175; + value_176 = 176; + value_177 = 177; + value_178 = 178; + value_179 = 179; + value_180 = 180; + value_181 = 181; + value_182 = 182; + value_183 = 183; + value_184 = 184; + value_185 = 185; + value_186 = 186; + value_187 = 187; + value_188 = 188; + value_189 = 189; + value_190 = 190; + value_191 = 191; + value_192 = 192; + value_193 = 193; + value_194 = 194; + value_195 = 195; + value_196 = 196; + value_197 = 197; + value_198 = 198; + value_199 = 199; + value_200 = 200; + value_201 = 201; + value_202 = 202; + value_203 = 203; + value_204 = 204; + value_205 = 205; + value_206 = 206; + value_207 = 207; + value_208 = 208; + value_209 = 209; + value_210 = 210; + value_211 = 211; + value_212 = 212; + value_213 = 213; + value_214 = 214; + value_215 = 215; + value_216 = 216; + value_217 = 217; + value_218 = 218; + value_219 = 219; + value_220 = 220; + value_221 = 221; + value_222 = 222; + value_223 = 223; + value_224 = 224; + value_225 = 225; + value_226 = 226; + value_227 = 227; + value_228 = 228; + value_229 = 229; + value_230 = 230; + value_231 = 231; + value_232 = 232; + value_233 = 233; + value_234 = 234; + value_235 = 235; + value_236 = 236; + value_237 = 237; + value_238 = 238; + value_239 = 239; + value_240 = 240; + value_241 = 241; + value_242 = 242; + value_243 = 243; + value_244 = 244; + value_245 = 245; + value_246 = 246; + value_247 = 247; + value_248 = 248; + value_249 = 249; + value_250 = 250; + value_251 = 251; + value_252 = 252; + value_253 = 253; + value_254 = 254; + value_255 = 255; + value_256 = 256; + value_257 = 257; + value_258 = 258; + value_259 = 259; + value_260 = 260; + value_261 = 261; + value_262 = 262; + value_263 = 263; + value_264 = 264; + value_265 = 265; + value_266 = 266; + value_267 = 267; + value_268 = 268; + value_269 = 269; + value_270 = 270; + value_271 = 271; + value_272 = 272; + value_273 = 273; + value_274 = 274; + value_275 = 275; + value_276 = 276; + value_277 = 277; + value_278 = 278; + value_279 = 279; + value_280 = 280; + value_281 = 281; + value_282 = 282; + value_283 = 283; + value_284 = 284; + value_285 = 285; + value_286 = 286; + value_287 = 287; + value_288 = 288; + value_289 = 289; + value_290 = 290; + value_291 = 291; + value_292 = 292; + value_293 = 293; + value_294 = 294; + value_295 = 295; + value_296 = 296; + value_297 = 297; + value_298 = 298; + value_299 = 299; + value_300 = 300; + value_301 = 301; + value_302 = 302; + value_303 = 303; + value_304 = 304; + value_305 = 305; + value_306 = 306; + value_307 = 307; + value_308 = 308; + value_309 = 309; + value_310 = 310; + value_311 = 311; + value_312 = 312; + value_313 = 313; + value_314 = 314; + value_315 = 315; + value_316 = 316; + value_317 = 317; + value_318 = 318; + value_319 = 319; + value_320 = 320; + value_321 = 321; + value_322 = 322; + value_323 = 323; + value_324 = 324; + value_325 = 325; + value_326 = 326; + value_327 = 327; + value_328 = 328; + value_329 = 329; + value_330 = 330; + value_331 = 331; + value_332 = 332; + value_333 = 333; + value_334 = 334; + value_335 = 335; + value_336 = 336; + value_337 = 337; + value_338 = 338; + value_339 = 339; + value_340 = 340; + value_341 = 341; + value_342 = 342; + value_343 = 343; + value_344 = 344; + value_345 = 345; + value_346 = 346; + value_347 = 347; + value_348 = 348; + value_349 = 349; + value_350 = 350; + value_351 = 351; + value_352 = 352; + value_353 = 353; + value_354 = 354; + value_355 = 355; + value_356 = 356; + value_357 = 357; + value_358 = 358; + value_359 = 359; + value_360 = 360; + value_361 = 361; + value_362 = 362; + value_363 = 363; + value_364 = 364; + value_365 = 365; + value_366 = 366; + value_367 = 367; + value_368 = 368; + value_369 = 369; + value_370 = 370; + value_371 = 371; + value_372 = 372; + value_373 = 373; + value_374 = 374; + value_375 = 375; + value_376 = 376; + value_377 = 377; + value_378 = 378; + value_379 = 379; + value_380 = 380; + value_381 = 381; + value_382 = 382; + value_383 = 383; + value_384 = 384; + value_385 = 385; + value_386 = 386; + value_387 = 387; + value_388 = 388; + value_389 = 389; + value_390 = 390; + value_391 = 391; + value_392 = 392; + value_393 = 393; + value_394 = 394; + value_395 = 395; + value_396 = 396; + value_397 = 397; + value_398 = 398; + value_399 = 399; + value_400 = 400; + value_401 = 401; + value_402 = 402; + value_403 = 403; + value_404 = 404; + value_405 = 405; + value_406 = 406; + value_407 = 407; + value_408 = 408; + value_409 = 409; + value_410 = 410; + value_411 = 411; + value_412 = 412; + value_413 = 413; + value_414 = 414; + value_415 = 415; + value_416 = 416; + value_417 = 417; + value_418 = 418; + value_419 = 419; + value_420 = 420; + value_421 = 421; + value_422 = 422; + value_423 = 423; + value_424 = 424; + value_425 = 425; + value_426 = 426; + value_427 = 427; + value_428 = 428; + value_429 = 429; + value_430 = 430; + value_431 = 431; + value_432 = 432; + value_433 = 433; + value_434 = 434; + value_435 = 435; + value_436 = 436; + value_437 = 437; + value_438 = 438; + value_439 = 439; + value_440 = 440; + value_441 = 441; + value_442 = 442; + value_443 = 443; + value_444 = 444; + value_445 = 445; + value_446 = 446; + value_447 = 447; + value_448 = 448; + value_449 = 449; + value_450 = 450; + value_451 = 451; + value_452 = 452; + value_453 = 453; + value_454 = 454; + value_455 = 455; + value_456 = 456; + value_457 = 457; + value_458 = 458; + value_459 = 459; + value_460 = 460; + value_461 = 461; + value_462 = 462; + value_463 = 463; + value_464 = 464; + value_465 = 465; + value_466 = 466; + value_467 = 467; + value_468 = 468; + value_469 = 469; + value_470 = 470; + value_471 = 471; + value_472 = 472; + value_473 = 473; + value_474 = 474; + value_475 = 475; + value_476 = 476; + value_477 = 477; + value_478 = 478; + value_479 = 479; + value_480 = 480; + value_481 = 481; + value_482 = 482; + value_483 = 483; + value_484 = 484; + value_485 = 485; + value_486 = 486; + value_487 = 487; + value_488 = 488; + value_489 = 489; + value_490 = 490; + value_491 = 491; + value_492 = 492; + value_493 = 493; + value_494 = 494; + value_495 = 495; + value_496 = 496; + value_497 = 497; + value_498 = 498; + value_499 = 499; + value_500 = 500; + value_501 = 501; + value_502 = 502; + value_503 = 503; + value_504 = 504; + value_505 = 505; + value_506 = 506; + value_507 = 507; + value_508 = 508; + value_509 = 509; + value_510 = 510; + value_511 = 511; + value_512 = 512; + value_513 = 513; + value_514 = 514; + value_515 = 515; + value_516 = 516; + value_517 = 517; + value_518 = 518; + value_519 = 519; + value_520 = 520; + value_521 = 521; + value_522 = 522; + value_523 = 523; + value_524 = 524; + value_525 = 525; + value_526 = 526; + value_527 = 527; + value_528 = 528; + value_529 = 529; + value_530 = 530; + value_531 = 531; + value_532 = 532; + value_533 = 533; + value_534 = 534; + value_535 = 535; + value_536 = 536; + value_537 = 537; + value_538 = 538; + value_539 = 539; + value_540 = 540; + value_541 = 541; + value_542 = 542; + value_543 = 543; + value_544 = 544; + value_545 = 545; + value_546 = 546; + value_547 = 547; + value_548 = 548; + value_549 = 549; + value_550 = 550; + value_551 = 551; + value_552 = 552; + value_553 = 553; + value_554 = 554; + value_555 = 555; + value_556 = 556; + value_557 = 557; + value_558 = 558; + value_559 = 559; + value_560 = 560; + value_561 = 561; + value_562 = 562; + value_563 = 563; + value_564 = 564; + value_565 = 565; + value_566 = 566; + value_567 = 567; + value_568 = 568; + value_569 = 569; + value_570 = 570; + value_571 = 571; + value_572 = 572; + value_573 = 573; + value_574 = 574; + value_575 = 575; + value_576 = 576; + value_577 = 577; + value_578 = 578; + value_579 = 579; + value_580 = 580; + value_581 = 581; + value_582 = 582; + value_583 = 583; + value_584 = 584; + value_585 = 585; + value_586 = 586; + value_587 = 587; + value_588 = 588; + value_589 = 589; + value_590 = 590; + value_591 = 591; + value_592 = 592; + value_593 = 593; + value_594 = 594; + value_595 = 595; + value_596 = 596; + value_597 = 597; + value_598 = 598; + value_599 = 599; + value_600 = 600; + value_601 = 601; + value_602 = 602; + value_603 = 603; + value_604 = 604; + value_605 = 605; + value_606 = 606; + value_607 = 607; + value_608 = 608; + value_609 = 609; + value_610 = 610; + value_611 = 611; + value_612 = 612; + value_613 = 613; + value_614 = 614; + value_615 = 615; + value_616 = 616; + value_617 = 617; + value_618 = 618; + value_619 = 619; + value_620 = 620; + value_621 = 621; + value_622 = 622; + value_623 = 623; + value_624 = 624; + value_625 = 625; + value_626 = 626; + value_627 = 627; + value_628 = 628; + value_629 = 629; + value_630 = 630; + value_631 = 631; + value_632 = 632; + value_633 = 633; + value_634 = 634; + value_635 = 635; + value_636 = 636; + value_637 = 637; + value_638 = 638; + value_639 = 639; + value_640 = 640; + value_641 = 641; + value_642 = 642; + value_643 = 643; + value_644 = 644; + value_645 = 645; + value_646 = 646; + value_647 = 647; + value_648 = 648; + value_649 = 649; + value_650 = 650; + value_651 = 651; + value_652 = 652; + value_653 = 653; + value_654 = 654; + value_655 = 655; + value_656 = 656; + value_657 = 657; + value_658 = 658; + value_659 = 659; + value_660 = 660; + value_661 = 661; + value_662 = 662; + value_663 = 663; + value_664 = 664; + value_665 = 665; + value_666 = 666; + value_667 = 667; + value_668 = 668; + value_669 = 669; + value_670 = 670; + value_671 = 671; + value_672 = 672; + value_673 = 673; + value_674 = 674; + value_675 = 675; + value_676 = 676; + value_677 = 677; + value_678 = 678; + value_679 = 679; + value_680 = 680; + value_681 = 681; + value_682 = 682; + value_683 = 683; + value_684 = 684; + value_685 = 685; + value_686 = 686; + value_687 = 687; + value_688 = 688; + value_689 = 689; + value_690 = 690; + value_691 = 691; + value_692 = 692; + value_693 = 693; + value_694 = 694; + value_695 = 695; + value_696 = 696; + value_697 = 697; + value_698 = 698; + value_699 = 699; + value_700 = 700; + value_701 = 701; + value_702 = 702; + value_703 = 703; + value_704 = 704; + value_705 = 705; + value_706 = 706; + value_707 = 707; + value_708 = 708; + value_709 = 709; + value_710 = 710; + value_711 = 711; + value_712 = 712; + value_713 = 713; + value_714 = 714; + value_715 = 715; + value_716 = 716; + value_717 = 717; + value_718 = 718; + value_719 = 719; + value_720 = 720; + value_721 = 721; + value_722 = 722; + value_723 = 723; + value_724 = 724; + value_725 = 725; + value_726 = 726; + value_727 = 727; + value_728 = 728; + value_729 = 729; + value_730 = 730; + value_731 = 731; + value_732 = 732; + value_733 = 733; + value_734 = 734; + value_735 = 735; + value_736 = 736; + value_737 = 737; + value_738 = 738; + value_739 = 739; + value_740 = 740; + value_741 = 741; + value_742 = 742; + value_743 = 743; + value_744 = 744; + value_745 = 745; + value_746 = 746; + value_747 = 747; + value_748 = 748; + value_749 = 749; + value_750 = 750; + value_751 = 751; + value_752 = 752; + value_753 = 753; + value_754 = 754; + value_755 = 755; + value_756 = 756; + value_757 = 757; + value_758 = 758; + value_759 = 759; + value_760 = 760; + value_761 = 761; + value_762 = 762; + value_763 = 763; + value_764 = 764; + value_765 = 765; + value_766 = 766; + value_767 = 767; + value_768 = 768; + value_769 = 769; + value_770 = 770; + value_771 = 771; + value_772 = 772; + value_773 = 773; + value_774 = 774; + value_775 = 775; + value_776 = 776; + value_777 = 777; + value_778 = 778; + value_779 = 779; + value_780 = 780; + value_781 = 781; + value_782 = 782; + value_783 = 783; + value_784 = 784; + value_785 = 785; + value_786 = 786; + value_787 = 787; + value_788 = 788; + value_789 = 789; + value_790 = 790; + value_791 = 791; + value_792 = 792; + value_793 = 793; + value_794 = 794; + value_795 = 795; + value_796 = 796; + value_797 = 797; + value_798 = 798; + value_799 = 799; + value_800 = 800; + value_801 = 801; + value_802 = 802; + value_803 = 803; + value_804 = 804; + value_805 = 805; + value_806 = 806; + value_807 = 807; + value_808 = 808; + value_809 = 809; + value_810 = 810; + value_811 = 811; + value_812 = 812; + value_813 = 813; + value_814 = 814; + value_815 = 815; + value_816 = 816; + value_817 = 817; + value_818 = 818; + value_819 = 819; + value_820 = 820; + value_821 = 821; + value_822 = 822; + value_823 = 823; + value_824 = 824; + value_825 = 825; + value_826 = 826; + value_827 = 827; + value_828 = 828; + value_829 = 829; + value_830 = 830; + value_831 = 831; + value_832 = 832; + value_833 = 833; + value_834 = 834; + value_835 = 835; + value_836 = 836; + value_837 = 837; + value_838 = 838; + value_839 = 839; + value_840 = 840; + value_841 = 841; + value_842 = 842; + value_843 = 843; + value_844 = 844; + value_845 = 845; + value_846 = 846; + value_847 = 847; + value_848 = 848; + value_849 = 849; + value_850 = 850; + value_851 = 851; + value_852 = 852; + value_853 = 853; + value_854 = 854; + value_855 = 855; + value_856 = 856; + value_857 = 857; + value_858 = 858; + value_859 = 859; + value_860 = 860; + value_861 = 861; + value_862 = 862; + value_863 = 863; + value_864 = 864; + value_865 = 865; + value_866 = 866; + value_867 = 867; + value_868 = 868; + value_869 = 869; + value_870 = 870; + value_871 = 871; + value_872 = 872; + value_873 = 873; + value_874 = 874; + value_875 = 875; + value_876 = 876; + value_877 = 877; + value_878 = 878; + value_879 = 879; + value_880 = 880; + value_881 = 881; + value_882 = 882; + value_883 = 883; + value_884 = 884; + value_885 = 885; + value_886 = 886; + value_887 = 887; + value_888 = 888; + value_889 = 889; + value_890 = 890; + value_891 = 891; + value_892 = 892; + value_893 = 893; + value_894 = 894; + value_895 = 895; + value_896 = 896; + value_897 = 897; + value_898 = 898; + value_899 = 899; + value_900 = 900; + value_901 = 901; + value_902 = 902; + value_903 = 903; + value_904 = 904; + value_905 = 905; + value_906 = 906; + value_907 = 907; + value_908 = 908; + value_909 = 909; + value_910 = 910; + value_911 = 911; + value_912 = 912; + value_913 = 913; + value_914 = 914; + value_915 = 915; + value_916 = 916; + value_917 = 917; + value_918 = 918; + value_919 = 919; + value_920 = 920; + value_921 = 921; + value_922 = 922; + value_923 = 923; + value_924 = 924; + value_925 = 925; + value_926 = 926; + value_927 = 927; + value_928 = 928; + value_929 = 929; + value_930 = 930; + value_931 = 931; + value_932 = 932; + value_933 = 933; + value_934 = 934; + value_935 = 935; + value_936 = 936; + value_937 = 937; + value_938 = 938; + value_939 = 939; + value_940 = 940; + value_941 = 941; + value_942 = 942; + value_943 = 943; + value_944 = 944; + value_945 = 945; + value_946 = 946; + value_947 = 947; + value_948 = 948; + value_949 = 949; + value_950 = 950; + value_951 = 951; + value_952 = 952; + value_953 = 953; + value_954 = 954; + value_955 = 955; + value_956 = 956; + value_957 = 957; + value_958 = 958; + value_959 = 959; + value_960 = 960; + value_961 = 961; + value_962 = 962; + value_963 = 963; + value_964 = 964; + value_965 = 965; + value_966 = 966; + value_967 = 967; + value_968 = 968; + value_969 = 969; + value_970 = 970; + value_971 = 971; + value_972 = 972; + value_973 = 973; + value_974 = 974; + value_975 = 975; + value_976 = 976; + value_977 = 977; + value_978 = 978; + value_979 = 979; + value_980 = 980; + value_981 = 981; + value_982 = 982; + value_983 = 983; + value_984 = 984; + value_985 = 985; + value_986 = 986; + value_987 = 987; + value_988 = 988; + value_989 = 989; + value_990 = 990; + value_991 = 991; + value_992 = 992; + value_993 = 993; + value_994 = 994; + value_995 = 995; + value_996 = 996; + value_997 = 997; + value_998 = 998; + value_999 = 999; + value_1000 = 1000; + value_1001 = 1001; + value_1002 = 1002; + value_1003 = 1003; + value_1004 = 1004; + value_1005 = 1005; + value_1006 = 1006; + value_1007 = 1007; + value_1008 = 1008; + value_1009 = 1009; + value_1010 = 1010; + value_1011 = 1011; + value_1012 = 1012; + value_1013 = 1013; + value_1014 = 1014; + value_1015 = 1015; + value_1016 = 1016; + value_1017 = 1017; + value_1018 = 1018; + value_1019 = 1019; + value_1020 = 1020; + value_1021 = 1021; + value_1022 = 1022; + value_1023 = 1023; + value_1024 = 1024; + value_1025 = 1025; + value_1026 = 1026; + value_1027 = 1027; + value_1028 = 1028; + value_1029 = 1029; + value_1030 = 1030; + value_1031 = 1031; + value_1032 = 1032; + value_1033 = 1033; + value_1034 = 1034; + value_1035 = 1035; + value_1036 = 1036; + value_1037 = 1037; + value_1038 = 1038; + value_1039 = 1039; + value_1040 = 1040; + value_1041 = 1041; + value_1042 = 1042; + value_1043 = 1043; + value_1044 = 1044; + value_1045 = 1045; + value_1046 = 1046; + value_1047 = 1047; + value_1048 = 1048; + value_1049 = 1049; + value_1050 = 1050; + value_1051 = 1051; + value_1052 = 1052; + value_1053 = 1053; + value_1054 = 1054; + value_1055 = 1055; + value_1056 = 1056; + value_1057 = 1057; + value_1058 = 1058; + value_1059 = 1059; + value_1060 = 1060; + value_1061 = 1061; + value_1062 = 1062; + value_1063 = 1063; + value_1064 = 1064; + value_1065 = 1065; + value_1066 = 1066; + value_1067 = 1067; + value_1068 = 1068; + value_1069 = 1069; + value_1070 = 1070; + value_1071 = 1071; + value_1072 = 1072; + value_1073 = 1073; + value_1074 = 1074; + value_1075 = 1075; + value_1076 = 1076; + value_1077 = 1077; + value_1078 = 1078; + value_1079 = 1079; + value_1080 = 1080; + value_1081 = 1081; + value_1082 = 1082; + value_1083 = 1083; + value_1084 = 1084; + value_1085 = 1085; + value_1086 = 1086; + value_1087 = 1087; + value_1088 = 1088; + value_1089 = 1089; + value_1090 = 1090; + value_1091 = 1091; + value_1092 = 1092; + value_1093 = 1093; + value_1094 = 1094; + value_1095 = 1095; + value_1096 = 1096; + value_1097 = 1097; + value_1098 = 1098; + value_1099 = 1099; + value_1100 = 1100; + value_1101 = 1101; + value_1102 = 1102; + value_1103 = 1103; + value_1104 = 1104; + value_1105 = 1105; + value_1106 = 1106; + value_1107 = 1107; + value_1108 = 1108; + value_1109 = 1109; + value_1110 = 1110; + value_1111 = 1111; + value_1112 = 1112; + value_1113 = 1113; + value_1114 = 1114; + value_1115 = 1115; + value_1116 = 1116; + value_1117 = 1117; + value_1118 = 1118; + value_1119 = 1119; + value_1120 = 1120; + value_1121 = 1121; + value_1122 = 1122; + value_1123 = 1123; + value_1124 = 1124; + value_1125 = 1125; + value_1126 = 1126; + value_1127 = 1127; + value_1128 = 1128; + value_1129 = 1129; + value_1130 = 1130; + value_1131 = 1131; + value_1132 = 1132; + value_1133 = 1133; + value_1134 = 1134; + value_1135 = 1135; + value_1136 = 1136; + value_1137 = 1137; + value_1138 = 1138; + value_1139 = 1139; + value_1140 = 1140; + value_1141 = 1141; + value_1142 = 1142; + value_1143 = 1143; + value_1144 = 1144; + value_1145 = 1145; + value_1146 = 1146; + value_1147 = 1147; + value_1148 = 1148; + value_1149 = 1149; + value_1150 = 1150; + value_1151 = 1151; + value_1152 = 1152; + value_1153 = 1153; + value_1154 = 1154; + value_1155 = 1155; + value_1156 = 1156; + value_1157 = 1157; + value_1158 = 1158; + value_1159 = 1159; + value_1160 = 1160; + value_1161 = 1161; + value_1162 = 1162; + value_1163 = 1163; + value_1164 = 1164; + value_1165 = 1165; + value_1166 = 1166; + value_1167 = 1167; + value_1168 = 1168; + value_1169 = 1169; + value_1170 = 1170; + value_1171 = 1171; + value_1172 = 1172; + value_1173 = 1173; + value_1174 = 1174; + value_1175 = 1175; + value_1176 = 1176; + value_1177 = 1177; + value_1178 = 1178; + value_1179 = 1179; + value_1180 = 1180; + value_1181 = 1181; + value_1182 = 1182; + value_1183 = 1183; + value_1184 = 1184; + value_1185 = 1185; + value_1186 = 1186; + value_1187 = 1187; + value_1188 = 1188; + value_1189 = 1189; + value_1190 = 1190; + value_1191 = 1191; + value_1192 = 1192; + value_1193 = 1193; + value_1194 = 1194; + value_1195 = 1195; + value_1196 = 1196; + value_1197 = 1197; + value_1198 = 1198; + value_1199 = 1199; + value_1200 = 1200; + value_1201 = 1201; + value_1202 = 1202; + value_1203 = 1203; + value_1204 = 1204; + value_1205 = 1205; + value_1206 = 1206; + value_1207 = 1207; + value_1208 = 1208; + value_1209 = 1209; + value_1210 = 1210; + value_1211 = 1211; + value_1212 = 1212; + value_1213 = 1213; + value_1214 = 1214; + value_1215 = 1215; + value_1216 = 1216; + value_1217 = 1217; + value_1218 = 1218; + value_1219 = 1219; + value_1220 = 1220; + value_1221 = 1221; + value_1222 = 1222; + value_1223 = 1223; + value_1224 = 1224; + value_1225 = 1225; + value_1226 = 1226; + value_1227 = 1227; + value_1228 = 1228; + value_1229 = 1229; + value_1230 = 1230; + value_1231 = 1231; + value_1232 = 1232; + value_1233 = 1233; + value_1234 = 1234; + value_1235 = 1235; + value_1236 = 1236; + value_1237 = 1237; + value_1238 = 1238; + value_1239 = 1239; + value_1240 = 1240; + value_1241 = 1241; + value_1242 = 1242; + value_1243 = 1243; + value_1244 = 1244; + value_1245 = 1245; + value_1246 = 1246; + value_1247 = 1247; + value_1248 = 1248; + value_1249 = 1249; + value_1250 = 1250; + value_1251 = 1251; + value_1252 = 1252; + value_1253 = 1253; + value_1254 = 1254; + value_1255 = 1255; + value_1256 = 1256; + value_1257 = 1257; + value_1258 = 1258; + value_1259 = 1259; + value_1260 = 1260; + value_1261 = 1261; + value_1262 = 1262; + value_1263 = 1263; + value_1264 = 1264; + value_1265 = 1265; + value_1266 = 1266; + value_1267 = 1267; + value_1268 = 1268; + value_1269 = 1269; + value_1270 = 1270; + value_1271 = 1271; + value_1272 = 1272; + value_1273 = 1273; + value_1274 = 1274; + value_1275 = 1275; + value_1276 = 1276; + value_1277 = 1277; + value_1278 = 1278; + value_1279 = 1279; + value_1280 = 1280; + value_1281 = 1281; + value_1282 = 1282; + value_1283 = 1283; + value_1284 = 1284; + value_1285 = 1285; + value_1286 = 1286; + value_1287 = 1287; + value_1288 = 1288; + value_1289 = 1289; + value_1290 = 1290; + value_1291 = 1291; + value_1292 = 1292; + value_1293 = 1293; + value_1294 = 1294; + value_1295 = 1295; + value_1296 = 1296; + value_1297 = 1297; + value_1298 = 1298; + value_1299 = 1299; + value_1300 = 1300; + value_1301 = 1301; + value_1302 = 1302; + value_1303 = 1303; + value_1304 = 1304; + value_1305 = 1305; + value_1306 = 1306; + value_1307 = 1307; + value_1308 = 1308; + value_1309 = 1309; + value_1310 = 1310; + value_1311 = 1311; + value_1312 = 1312; + value_1313 = 1313; + value_1314 = 1314; + value_1315 = 1315; + value_1316 = 1316; + value_1317 = 1317; + value_1318 = 1318; + value_1319 = 1319; + value_1320 = 1320; + value_1321 = 1321; + value_1322 = 1322; + value_1323 = 1323; + value_1324 = 1324; + value_1325 = 1325; + value_1326 = 1326; + value_1327 = 1327; + value_1328 = 1328; + value_1329 = 1329; + value_1330 = 1330; + value_1331 = 1331; + value_1332 = 1332; + value_1333 = 1333; + value_1334 = 1334; + value_1335 = 1335; + value_1336 = 1336; + value_1337 = 1337; + value_1338 = 1338; + value_1339 = 1339; + value_1340 = 1340; + value_1341 = 1341; + value_1342 = 1342; + value_1343 = 1343; + value_1344 = 1344; + value_1345 = 1345; + value_1346 = 1346; + value_1347 = 1347; + value_1348 = 1348; + value_1349 = 1349; + value_1350 = 1350; + value_1351 = 1351; + value_1352 = 1352; + value_1353 = 1353; + value_1354 = 1354; + value_1355 = 1355; + value_1356 = 1356; + value_1357 = 1357; + value_1358 = 1358; + value_1359 = 1359; + value_1360 = 1360; + value_1361 = 1361; + value_1362 = 1362; + value_1363 = 1363; + value_1364 = 1364; + value_1365 = 1365; + value_1366 = 1366; + value_1367 = 1367; + value_1368 = 1368; + value_1369 = 1369; + value_1370 = 1370; + value_1371 = 1371; + value_1372 = 1372; + value_1373 = 1373; + value_1374 = 1374; + value_1375 = 1375; + value_1376 = 1376; + value_1377 = 1377; + value_1378 = 1378; + value_1379 = 1379; + value_1380 = 1380; + value_1381 = 1381; + value_1382 = 1382; + value_1383 = 1383; + value_1384 = 1384; + value_1385 = 1385; + value_1386 = 1386; + value_1387 = 1387; + value_1388 = 1388; + value_1389 = 1389; + value_1390 = 1390; + value_1391 = 1391; + value_1392 = 1392; + value_1393 = 1393; + value_1394 = 1394; + value_1395 = 1395; + value_1396 = 1396; + value_1397 = 1397; + value_1398 = 1398; + value_1399 = 1399; + value_1400 = 1400; + value_1401 = 1401; + value_1402 = 1402; + value_1403 = 1403; + value_1404 = 1404; + value_1405 = 1405; + value_1406 = 1406; + value_1407 = 1407; + value_1408 = 1408; + value_1409 = 1409; + value_1410 = 1410; + value_1411 = 1411; + value_1412 = 1412; + value_1413 = 1413; + value_1414 = 1414; + value_1415 = 1415; + value_1416 = 1416; + value_1417 = 1417; + value_1418 = 1418; + value_1419 = 1419; + value_1420 = 1420; + value_1421 = 1421; + value_1422 = 1422; + value_1423 = 1423; + value_1424 = 1424; + value_1425 = 1425; + value_1426 = 1426; + value_1427 = 1427; + value_1428 = 1428; + value_1429 = 1429; + value_1430 = 1430; + value_1431 = 1431; + value_1432 = 1432; + value_1433 = 1433; + value_1434 = 1434; + value_1435 = 1435; + value_1436 = 1436; + value_1437 = 1437; + value_1438 = 1438; + value_1439 = 1439; + value_1440 = 1440; + value_1441 = 1441; + value_1442 = 1442; + value_1443 = 1443; + value_1444 = 1444; + value_1445 = 1445; + value_1446 = 1446; + value_1447 = 1447; + value_1448 = 1448; + value_1449 = 1449; + value_1450 = 1450; + value_1451 = 1451; + value_1452 = 1452; + value_1453 = 1453; + value_1454 = 1454; + value_1455 = 1455; + value_1456 = 1456; + value_1457 = 1457; + value_1458 = 1458; + value_1459 = 1459; + value_1460 = 1460; + value_1461 = 1461; + value_1462 = 1462; + value_1463 = 1463; + value_1464 = 1464; + value_1465 = 1465; + value_1466 = 1466; + value_1467 = 1467; + value_1468 = 1468; + value_1469 = 1469; + value_1470 = 1470; + value_1471 = 1471; + value_1472 = 1472; + value_1473 = 1473; + value_1474 = 1474; + value_1475 = 1475; + value_1476 = 1476; + value_1477 = 1477; + value_1478 = 1478; + value_1479 = 1479; + value_1480 = 1480; + value_1481 = 1481; + value_1482 = 1482; + value_1483 = 1483; + value_1484 = 1484; + value_1485 = 1485; + value_1486 = 1486; + value_1487 = 1487; + value_1488 = 1488; + value_1489 = 1489; + value_1490 = 1490; + value_1491 = 1491; + value_1492 = 1492; + value_1493 = 1493; + value_1494 = 1494; + value_1495 = 1495; + value_1496 = 1496; + value_1497 = 1497; + value_1498 = 1498; + value_1499 = 1499; + value_1500 = 1500; + value_1501 = 1501; + value_1502 = 1502; + value_1503 = 1503; + value_1504 = 1504; + value_1505 = 1505; + value_1506 = 1506; + value_1507 = 1507; + value_1508 = 1508; + value_1509 = 1509; + value_1510 = 1510; + value_1511 = 1511; + value_1512 = 1512; + value_1513 = 1513; + value_1514 = 1514; + value_1515 = 1515; + value_1516 = 1516; + value_1517 = 1517; + value_1518 = 1518; + value_1519 = 1519; + value_1520 = 1520; + value_1521 = 1521; + value_1522 = 1522; + value_1523 = 1523; + value_1524 = 1524; + value_1525 = 1525; + value_1526 = 1526; + value_1527 = 1527; + value_1528 = 1528; + value_1529 = 1529; + value_1530 = 1530; + value_1531 = 1531; + value_1532 = 1532; + value_1533 = 1533; + value_1534 = 1534; + value_1535 = 1535; + value_1536 = 1536; + value_1537 = 1537; + value_1538 = 1538; + value_1539 = 1539; + value_1540 = 1540; + value_1541 = 1541; + value_1542 = 1542; + value_1543 = 1543; + value_1544 = 1544; + value_1545 = 1545; + value_1546 = 1546; + value_1547 = 1547; + value_1548 = 1548; + value_1549 = 1549; + value_1550 = 1550; + value_1551 = 1551; + value_1552 = 1552; + value_1553 = 1553; + value_1554 = 1554; + value_1555 = 1555; + value_1556 = 1556; + value_1557 = 1557; + value_1558 = 1558; + value_1559 = 1559; + value_1560 = 1560; + value_1561 = 1561; + value_1562 = 1562; + value_1563 = 1563; + value_1564 = 1564; + value_1565 = 1565; + value_1566 = 1566; + value_1567 = 1567; + value_1568 = 1568; + value_1569 = 1569; + value_1570 = 1570; + value_1571 = 1571; + value_1572 = 1572; + value_1573 = 1573; + value_1574 = 1574; + value_1575 = 1575; + } + Value value = 1; + Sentence inner = 2; +} +message Sentence { + repeated Word words = 1; +} From 811ba86a20c67a6cebb70ae0574f9d124e3678ec Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 4 Jul 2023 00:24:35 +0200 Subject: [PATCH 198/522] Remove garbage --- .../fuzzers/codegen_fuzzer/clickhouse.g | 1592 ---- src/Parsers/fuzzers/codegen_fuzzer/out.cpp | 6461 ----------------- src/Parsers/fuzzers/codegen_fuzzer/out.proto | 1587 ---- 3 files changed, 9640 deletions(-) delete mode 100644 src/Parsers/fuzzers/codegen_fuzzer/clickhouse.g delete mode 100644 src/Parsers/fuzzers/codegen_fuzzer/out.cpp delete mode 100644 src/Parsers/fuzzers/codegen_fuzzer/out.proto diff --git a/src/Parsers/fuzzers/codegen_fuzzer/clickhouse.g b/src/Parsers/fuzzers/codegen_fuzzer/clickhouse.g deleted file mode 100644 index 0ae74055eda..00000000000 --- a/src/Parsers/fuzzers/codegen_fuzzer/clickhouse.g +++ /dev/null @@ -1,1592 +0,0 @@ -" "; -" "; -" "; -";"; - - -"(" $1 ")"; -"(" $1 ", " $2 ")"; -"(" $1 ", " $2 ", " $3 ")"; - -$1 ", " $2 ; -$1 ", " $2 ", " $3 ; -$1 ", " $2 ", " $3 ", " $4 ; -$1 ", " $2 ", " $3 ", " $4 ", " $5 ; - -"[" $1 ", " $2 "]"; -"[" $1 ", " $2 ", " $3 "]"; -"[" $1 ", " $2 ", " $3 ", " $4 "]"; -"[" $1 ", " $2 ", " $3 ", " $4 ", " $5 "]"; - -$0 "(" $1 ")"; -$0 "(" $1 ", " $2 ")"; -$0 "(" $1 ", " $2 ", " $3 ")"; - -$1 " as " $2 ; - - -// TODO: add more clickhouse specific stuff -"SELECT " $1 " FROM " $2 " WHERE " $3 ; -"SELECT " $1 " FROM " $2 " GROUP BY " $3 ; -"SELECT " $1 " FROM " $2 " SORT BY " $3 ; -"SELECT " $1 " FROM " $2 " LIMIT " $3 ; -"SELECT " $1 " FROM " $2 " JOIN " $3 ; -"SELECT " $1 " FROM " $2 " ARRAY JOIN " $3 ; -"SELECT " $1 " FROM " $2 " JOIN " $3 " ON " $4 ; -"SELECT " $1 " FROM " $2 " JOIN " $3 " USING " $5 ; -"SELECT " $1 " INTO OUTFILE " $2 ; - -"WITH " $1 " AS " $2 ; - -"{" $1 ":" $2 "}"; -"[" $1 "," $2 "]"; -"[]"; - - -" x "; -"x"; -" `x` "; -"`x`"; - -" \"value\" "; -"\"value\""; -" 0 "; -"0"; -"1"; -"2"; -"123123123123123123"; -"182374019873401982734091873420923123123123123123"; -"1e-1"; -"1.1"; -"\"\""; -" '../../../../../../../../../etc/passwd' "; - -"/"; -"="; -"=="; -"!="; -"<>"; -"<"; -"<="; -">"; -">="; -"<<"; -"|<<"; -"&"; -"|"; -"||"; -"<|"; -"|>"; -"+"; -"-"; -"~"; -"*"; -"/"; -"\\"; -"%"; -""; -"."; -","; -","; -","; -","; -","; -","; -"("; -")"; -"("; -")"; -"("; -")"; -"("; -")"; -"("; -")"; -"("; -")"; -"?"; -":"; -"@"; -"@@"; -"$"; -"\""; -"`"; -"{"; -"}"; -"^"; -"::"; -"->"; -"]"; -"["; - -" abs "; -" accurate_Cast "; -" accurateCast "; -" accurate_CastOrNull "; -" accurateCastOrNull "; -" acos "; -" acosh "; -" ADD "; -" ADD COLUMN "; -" ADD CONSTRAINT "; -" addDays "; -" addHours "; -" ADD INDEX "; -" addMinutes "; -" addMonths "; -" addQuarters "; -" addressToLine "; -" addressToSymbol "; -" addSeconds "; -" addWeeks "; -" addYears "; -" aes_decrypt_mysql "; -" aes_encrypt_mysql "; -" AFTER "; -" AggregateFunction "; -" aggThrow "; -" ALIAS "; -" ALL "; -" alphaTokens "; -" ALTER "; -" ALTER LIVE VIEW "; -" ALTER TABLE "; -" and "; -" AND "; -" ANTI "; -" any "; -" ANY "; -" anyHeavy "; -" anyLast "; -" appendTrailingCharIfAbsent "; -" argMax "; -" argMin "; -" array "; -" Array "; -" ARRAY "; -" arrayAll "; -" arrayAUC "; -" arrayAvg "; -" arrayCompact "; -" arrayConcat "; -" arrayCount "; -" arrayCumSum "; -" arrayCumSumNonNegative "; -" arrayDifference "; -" arrayDistinct "; -" arrayElement "; -" arrayEnumerate "; -" arrayEnumerateDense "; -" arrayEnumerateDenseRanked "; -" arrayEnumerateUniq "; -" arrayEnumerateUniqRanked "; -" arrayExists "; -" arrayFill "; -" arrayFilter "; -" arrayFirst "; -" arrayFirstIndex "; -" arrayFlatten "; -" arrayIntersect "; -" arrayJoin "; -" ARRAY JOIN "; -" arrayMap "; -" arrayMax "; -" arrayMin "; -" arrayPartialReverseSort "; -" arrayPartialShuffle "; -" arrayPartialSort "; -" arrayPopBack "; -" arrayPopFront "; -" arrayProduct "; -" arrayPushBack "; -" arrayPushFront "; -" arrayReduce "; -" arrayReduceInRanges "; -" arrayResize "; -" arrayReverse "; -" arrayReverseFill "; -" arrayReverseSort "; -" arrayReverseSplit "; -" arrayShuffle "; -" arraySlice "; -" arraySort "; -" arraySplit "; -" arrayStringConcat "; -" arraySum "; -" arrayUniq "; -" arrayWithConstant "; -" arrayZip "; -" AS "; -" ASC "; -" ASCENDING "; -" asin "; -" asinh "; -" ASOF "; -" assumeNotNull "; -" AST "; -" ASYNC "; -" atan "; -" atan2 "; -" atanh "; -" ATTACH "; -" ATTACH PART "; -" ATTACH PARTITION "; -" avg "; -" avgWeighted "; -" bar "; -" base64Decode "; -" base64Encode "; -" basename "; -" bayesAB "; -" BETWEEN "; -" BIGINT "; -" BIGINT SIGNED "; -" BIGINT UNSIGNED "; -" bin "; -" BINARY "; -" BINARY LARGE OBJECT "; -" BINARY VARYING "; -" bitAnd "; -" BIT_AND "; -" __bitBoolMaskAnd "; -" __bitBoolMaskOr "; -" bitCount "; -" bitHammingDistance "; -" bitmapAnd "; -" bitmapAndCardinality "; -" bitmapAndnot "; -" bitmapAndnotCardinality "; -" bitmapBuild "; -" bitmapCardinality "; -" bitmapContains "; -" bitmapHasAll "; -" bitmapHasAny "; -" bitmapMax "; -" bitmapMin "; -" bitmapOr "; -" bitmapOrCardinality "; -" bitmapSubsetInRange "; -" bitmapSubsetLimit "; -" bitmapToArray "; -" bitmapTransform "; -" bitmapXor "; -" bitmapXorCardinality "; -" bitmaskToArray "; -" bitmaskToList "; -" bitNot "; -" bitOr "; -" BIT_OR "; -" bitPositionsToArray "; -" bitRotateLeft "; -" bitRotateRight "; -" bitShiftLeft "; -" bitShiftRight "; -" __bitSwapLastTwo "; -" bitTest "; -" bitTestAll "; -" bitTestAny "; -" __bitWrapperFunc "; -" bitXor "; -" BIT_XOR "; -" BLOB "; -" blockNumber "; -" blockSerializedSize "; -" blockSize "; -" BOOL "; -" BOOLEAN "; -" BOTH "; -" boundingRatio "; -" buildId "; -" BY "; -" BYTE "; -" BYTEA "; -" byteSize "; -" CASE "; -" caseWithExpr "; -" caseWithExpression "; -" caseWithoutExpr "; -" caseWithoutExpression "; -" _CAST "; -" CAST "; -" categoricalInformationValue "; -" cbrt "; -" ceil "; -" ceiling "; -" char "; -" CHAR "; -" CHARACTER "; -" CHARACTER LARGE OBJECT "; -" CHARACTER_LENGTH "; -" CHARACTER VARYING "; -" CHAR LARGE OBJECT "; -" CHAR_LENGTH "; -" CHAR VARYING "; -" CHECK "; -" CHECK TABLE "; -" cityHash64 "; -" CLEAR "; -" CLEAR COLUMN "; -" CLEAR INDEX "; -" CLOB "; -" CLUSTER "; -" coalesce "; -" CODEC "; -" COLLATE "; -" COLUMN "; -" COLUMNS "; -" COMMENT "; -" COMMENT COLUMN "; -" concat "; -" concatAssumeInjective "; -" connection_id "; -" connectionid "; -" connectionId "; -" CONSTRAINT "; -" convertCharset "; -" corr "; -" corrStable "; -" cos "; -" cosh "; -" count "; -" countDigits "; -" countEqual "; -" countMatches "; -" countMatchesCaseInsensitive "; -" countSubstrings "; -" countSubstringsCaseInsensitive "; -" countSubstringsCaseInsensitiveUTF8 "; -" covarPop "; -" COVAR_POP "; -" covarPopStable "; -" covarSamp "; -" COVAR_SAMP "; -" covarSampStable "; -" CRC32 "; -" CRC32IEEE "; -" CRC64 "; -" CREATE "; -" CROSS "; -" CUBE "; -" currentDatabase "; -" currentProfiles "; -" currentRoles "; -" currentUser "; -" cutFragment "; -" cutIPv6 "; -" cutQueryString "; -" cutQueryStringAndFragment "; -" cutToFirstSignificantSubdomain "; -" cutToFirstSignificantSubdomainCustom "; -" cutToFirstSignificantSubdomainCustomWithWWW "; -" cutToFirstSignificantSubdomainWithWWW "; -" cutURLParameter "; -" cutWWW "; -" D "; -" DATABASE "; -" DATABASES "; -" Date "; -" DATE "; -" Date32 "; -" DATE_ADD "; -" DATEADD "; -" dateDiff "; -" DATE_DIFF "; -" DATEDIFF "; -" dateName "; -" DATE_SUB "; -" DATESUB "; -" DateTime "; -" DateTime32 "; -" DateTime64 "; -" dateTime64ToSnowflake "; -" dateTimeToSnowflake "; -" date_trunc "; -" dateTrunc "; -" DAY "; -" DAYOFMONTH "; -" DAYOFWEEK "; -" DAYOFYEAR "; -" DD "; -" DEC "; -" Decimal "; -" Decimal128 "; -" Decimal256 "; -" Decimal32 "; -" Decimal64 "; -" decodeURLComponent "; -" decodeXMLComponent "; -" decrypt "; -" DEDUPLICATE "; -" DEFAULT "; -" defaultProfiles "; -" defaultRoles "; -" defaultValueOfArgumentType "; -" defaultValueOfTypeName "; -" DELAY "; -" DELETE "; -" DELETE WHERE "; -" deltaSum "; -" deltaSumTimestamp "; -" demangle "; -" dense_rank "; -" DESC "; -" DESCENDING "; -" DESCRIBE "; -" DETACH "; -" DETACH PARTITION "; -" dictGet "; -" dictGetChildren "; -" dictGetDate "; -" dictGetDateOrDefault "; -" dictGetDateTime "; -" dictGetDateTimeOrDefault "; -" dictGetDescendants "; -" dictGetFloat32 "; -" dictGetFloat32OrDefault "; -" dictGetFloat64 "; -" dictGetFloat64OrDefault "; -" dictGetHierarchy "; -" dictGetInt16 "; -" dictGetInt16OrDefault "; -" dictGetInt32 "; -" dictGetInt32OrDefault "; -" dictGetInt64 "; -" dictGetInt64OrDefault "; -" dictGetInt8 "; -" dictGetInt8OrDefault "; -" dictGetOrDefault "; -" dictGetOrNull "; -" dictGetString "; -" dictGetStringOrDefault "; -" dictGetUInt16 "; -" dictGetUInt16OrDefault "; -" dictGetUInt32 "; -" dictGetUInt32OrDefault "; -" dictGetUInt64 "; -" dictGetUInt64OrDefault "; -" dictGetUInt8 "; -" dictGetUInt8OrDefault "; -" dictGetUUID "; -" dictGetUUIDOrDefault "; -" dictHas "; -" DICTIONARIES "; -" DICTIONARY "; -" dictIsIn "; -" DISK "; -" DISTINCT "; -" DISTRIBUTED "; -" divide "; -" domain "; -" domainWithoutWWW "; -" DOUBLE "; -" DOUBLE PRECISION "; -" DROP "; -" DROP COLUMN "; -" DROP CONSTRAINT "; -" DROP DETACHED PART "; -" DROP DETACHED PARTITION "; -" DROP INDEX "; -" DROP PARTITION "; -" dumpColumnStructure "; -" e "; -" ELSE "; -" empty "; -" emptyArrayDate "; -" emptyArrayDateTime "; -" emptyArrayFloat32 "; -" emptyArrayFloat64 "; -" emptyArrayInt16 "; -" emptyArrayInt32 "; -" emptyArrayInt64 "; -" emptyArrayInt8 "; -" emptyArrayString "; -" emptyArrayToSingle "; -" emptyArrayUInt16 "; -" emptyArrayUInt32 "; -" emptyArrayUInt64 "; -" emptyArrayUInt8 "; -" enabledProfiles "; -" enabledRoles "; -" encodeXMLComponent "; -" encrypt "; -" END "; -" endsWith "; -" ENGINE "; -" entropy "; -" Enum "; -" ENUM "; -" Enum16 "; -" Enum8 "; -" equals "; -" erf "; -" erfc "; -" errorCodeToName "; -" evalMLMethod "; -" EVENTS "; -" EXCHANGE TABLES "; -" EXISTS "; -" exp "; -" exp10 "; -" exp2 "; -" EXPLAIN "; -" exponentialMovingAverage "; -" EXPRESSION "; -" extract "; -" EXTRACT "; -" extractAll "; -" extractAllGroups "; -" extractAllGroupsHorizontal "; -" extractAllGroupsVertical "; -" extractGroups "; -" extractTextFromHTML "; -" extractURLParameter "; -" extractURLParameterNames "; -" extractURLParameters "; -" farmFingerprint64 "; -" farmHash64 "; -" FETCHES "; -" FETCH PART "; -" FETCH PARTITION "; -" file "; -" filesystemAvailable "; -" filesystemCapacity "; -" filesystemFree "; -" FINAL "; -" finalizeAggregation "; -" FIRST "; -" firstSignificantSubdomain "; -" firstSignificantSubdomainCustom "; -" first_value "; -" FIXED "; -" FixedString "; -" flatten "; -" FLOAT "; -" Float32 "; -" Float64 "; -" floor "; -" FLUSH "; -" FOR "; -" ForEach "; -" format "; -" FORMAT "; -" formatDateTime "; -" formatReadableQuantity "; -" formatReadableDecimalSize "; -" formatReadableSize "; -" formatReadableTimeDelta "; -" formatRow "; -" formatRowNoNewline "; -" FQDN "; -" fragment "; -" FREEZE "; -" FROM "; -" FROM_BASE64 "; -" fromModifiedJulianDay "; -" fromModifiedJulianDayOrNull "; -" FROM_UNIXTIME "; -" fromUnixTimestamp "; -" fromUnixTimestamp64Micro "; -" fromUnixTimestamp64Milli "; -" fromUnixTimestamp64Nano "; -" FULL "; -" fullHostName "; -" FUNCTION "; -" fuzzBits "; -" gccMurmurHash "; -" gcd "; -" generateUUIDv4 "; -" geoDistance "; -" geohashDecode "; -" geohashEncode "; -" geohashesInBox "; -" geoToH3 "; -" geoToS2 "; -" getMacro "; -" __getScalar "; -" getServerPort "; -" getSetting "; -" getSizeOfEnumType "; -" GLOBAL "; -" globalIn "; -" globalInIgnoreSet "; -" globalNotIn "; -" globalNotInIgnoreSet "; -" globalNotNullIn "; -" globalNotNullInIgnoreSet "; -" globalNullIn "; -" globalNullInIgnoreSet "; -" globalVariable "; -" GRANULARITY "; -" greatCircleAngle "; -" greatCircleDistance "; -" greater "; -" greaterOrEquals "; -" greatest "; -" GROUP "; -" groupArray "; -" groupArrayInsertAt "; -" groupArrayMovingAvg "; -" groupArrayMovingSum "; -" groupArraySample "; -" groupBitAnd "; -" groupBitmap "; -" groupBitmapAnd "; -" groupBitmapOr "; -" groupBitmapXor "; -" groupBitOr "; -" groupBitXor "; -" GROUP BY "; -" groupUniqArray "; -" h3EdgeAngle "; -" h3EdgeLengthM "; -" h3GetBaseCell "; -" h3GetFaces "; -" h3GetResolution "; -" h3HexAreaM2 "; -" h3IndexesAreNeighbors "; -" h3IsPentagon "; -" h3IsResClassIII "; -" h3IsValid "; -" h3kRing "; -" h3ToChildren "; -" h3ToGeo "; -" h3ToGeoBoundary "; -" h3ToParent "; -" h3ToString "; -" halfMD5 "; -" has "; -" hasAll "; -" hasAny "; -" hasColumnInTable "; -" hasSubstr "; -" hasThreadFuzzer "; -" hasToken "; -" hasTokenCaseInsensitive "; -" HAVING "; -" hex "; -" HH "; -" HIERARCHICAL "; -" histogram "; -" hiveHash "; -" hostname "; -" hostName "; -" HOUR "; -" hypot "; -" ID "; -" identity "; -" if "; -" IF "; -" IF EXISTS "; -" IF NOT EXISTS "; -" ifNotFinite "; -" ifNull "; -" ignore "; -" ilike "; -" ILIKE "; -" in "; -" IN "; -" INDEX "; -" indexHint "; -" indexOf "; -" INET4 "; -" INET6 "; -" INET6_ATON "; -" INET6_NTOA "; -" INET_ATON "; -" INET_NTOA "; -" INF "; -" inIgnoreSet "; -" initializeAggregation "; -" initial_query_id "; -" initialQueryID "; -" INJECTIVE "; -" INNER "; -" IN PARTITION "; -" INSERT "; -" INSERT INTO "; -" INT "; -" INT1 "; -" Int128 "; -" Int16 "; -" INT1 SIGNED "; -" INT1 UNSIGNED "; -" Int256 "; -" Int32 "; -" Int64 "; -" Int8 "; -" intDiv "; -" intDivOrZero "; -" INTEGER "; -" INTEGER SIGNED "; -" INTEGER UNSIGNED "; -" INTERVAL "; -" IntervalDay "; -" IntervalHour "; -" intervalLengthSum "; -" IntervalMinute "; -" IntervalMonth "; -" IntervalQuarter "; -" IntervalSecond "; -" IntervalWeek "; -" IntervalYear "; -" intExp10 "; -" intExp2 "; -" intHash32 "; -" intHash64 "; -" INTO "; -" INTO OUTFILE "; -" INT SIGNED "; -" INT UNSIGNED "; -" IPv4 "; -" IPv4CIDRToRange "; -" IPv4NumToString "; -" IPv4NumToStringClassC "; -" IPv4StringToNum "; -" IPv4ToIPv6 "; -" IPv6 "; -" IPv6CIDRToRange "; -" IPv6NumToString "; -" IPv6StringToNum "; -" IS "; -" isConstant "; -" isDecimalOverflow "; -" isFinite "; -" isInfinite "; -" isIPAddressInRange "; -" isIPv4String "; -" isIPv6String "; -" isNaN "; -" isNotNull "; -" isNull "; -" IS_OBJECT_ID "; -" isValidJSON "; -" isValidUTF8 "; -" isZeroOrNull "; -" javaHash "; -" javaHashUTF16LE "; -" JOIN "; -" joinGet "; -" joinGetOrNull "; -" JSON_EXISTS "; -" JSONExtract "; -" JSONExtractArrayRaw "; -" JSONExtractBool "; -" JSONExtractFloat "; -" JSONExtractInt "; -" JSONExtractKeysAndValues "; -" JSONExtractKeysAndValuesRaw "; -" JSONExtractKeys "; -" JSONExtractRaw "; -" JSONExtractString "; -" JSONExtractUInt "; -" JSONHas "; -" JSONKey "; -" JSONLength "; -" JSON_QUERY "; -" JSONType "; -" JSON_VALUE "; -" jumpConsistentHash "; -" KEY "; -" KILL "; -" kurtPop "; -" kurtSamp "; -" lagInFrame "; -" LAST "; -" last_value "; -" LAYOUT "; -" lcase "; -" lcm "; -" leadInFrame "; -" LEADING "; -" least "; -" LEFT "; -" LEFT ARRAY JOIN "; -" leftPad "; -" leftPadUTF8 "; -" lemmatize "; -" length "; -" lengthUTF8 "; -" less "; -" lessOrEquals "; -" lgamma "; -" LIFETIME "; -" like "; -" LIKE "; -" LIMIT "; -" LIVE "; -" ln "; -" LOCAL "; -" locate "; -" log "; -" log10 "; -" log1p "; -" log2 "; -" LOGS "; -" logTrace "; -" LONGBLOB "; -" LONGTEXT "; -" LowCardinality "; -" lowCardinalityIndices "; -" lowCardinalityKeys "; -" lower "; -" lowerUTF8 "; -" lpad "; -" LTRIM "; -" M "; -" MACNumToString "; -" MACStringToNum "; -" MACStringToOUI "; -" mannWhitneyUTest "; -" map "; -" Map "; -" mapAdd "; -" mapContains "; -" mapKeys "; -" mapPopulateSeries "; -" mapSubtract "; -" mapValues "; -" match "; -" materialize "; -" MATERIALIZE "; -" MATERIALIZED "; -" MATERIALIZE INDEX "; -" MATERIALIZE TTL "; -" max "; -" MAX "; -" maxIntersections "; -" maxIntersectionsPosition "; -" maxMap "; -" MD4 "; -" MD5 "; -" median "; -" medianBFloat16 "; -" medianBFloat16Weighted "; -" medianDeterministic "; -" medianExact "; -" medianExactHigh "; -" medianExactLow "; -" medianExactWeighted "; -" medianTDigest "; -" medianTDigestWeighted "; -" medianTiming "; -" medianTimingWeighted "; -" MEDIUMBLOB "; -" MEDIUMINT "; -" MEDIUMINT SIGNED "; -" MEDIUMINT UNSIGNED "; -" MEDIUMTEXT "; -" Merge "; -" MERGES "; -" metroHash64 "; -" MI "; -" mid "; -" min "; -" MIN "; -" minMap "; -" minus "; -" MINUTE "; -" MM "; -" mod "; -" MODIFY "; -" MODIFY COLUMN "; -" MODIFY ORDER BY "; -" MODIFY QUERY "; -" MODIFY SETTING "; -" MODIFY TTL "; -" modulo "; -" moduloLegacy "; -" moduloOrZero "; -" MONTH "; -" MOVE "; -" MOVE PART "; -" MOVE PARTITION "; -" movingXXX "; -" multiFuzzyMatchAllIndices "; -" multiFuzzyMatchAny "; -" multiFuzzyMatchAnyIndex "; -" multiIf "; -" multiMatchAllIndices "; -" multiMatchAny "; -" multiMatchAnyIndex "; -" multiply "; -" MultiPolygon "; -" multiSearchAllPositions "; -" multiSearchAllPositionsCaseInsensitive "; -" multiSearchAllPositionsCaseInsensitiveUTF8 "; -" multiSearchAllPositionsUTF8 "; -" multiSearchAny "; -" multiSearchAnyCaseInsensitive "; -" multiSearchAnyCaseInsensitiveUTF8 "; -" multiSearchAnyUTF8 "; -" multiSearchFirstIndex "; -" multiSearchFirstIndexCaseInsensitive "; -" multiSearchFirstIndexCaseInsensitiveUTF8 "; -" multiSearchFirstIndexUTF8 "; -" multiSearchFirstPosition "; -" multiSearchFirstPositionCaseInsensitive "; -" multiSearchFirstPositionCaseInsensitiveUTF8 "; -" multiSearchFirstPositionUTF8 "; -" murmurHash2_32 "; -" murmurHash2_64 "; -" murmurHash3_128 "; -" murmurHash3_32 "; -" murmurHash3_64 "; -" MUTATION "; -" N "; -" NAME "; -" NAN_SQL "; -" NATIONAL CHAR "; -" NATIONAL CHARACTER "; -" NATIONAL CHARACTER LARGE OBJECT "; -" NATIONAL CHARACTER VARYING "; -" NATIONAL CHAR VARYING "; -" NCHAR "; -" NCHAR LARGE OBJECT "; -" NCHAR VARYING "; -" negate "; -" neighbor "; -" Nested "; -" netloc "; -" ngramDistance "; -" ngramDistanceCaseInsensitive "; -" ngramDistanceCaseInsensitiveUTF8 "; -" ngramDistanceUTF8 "; -" ngramMinHash "; -" ngramMinHashArg "; -" ngramMinHashArgCaseInsensitive "; -" ngramMinHashArgCaseInsensitiveUTF8 "; -" ngramMinHashArgUTF8 "; -" ngramMinHashCaseInsensitive "; -" ngramMinHashCaseInsensitiveUTF8 "; -" ngramMinHashUTF8 "; -" ngramSearch "; -" ngramSearchCaseInsensitive "; -" ngramSearchCaseInsensitiveUTF8 "; -" ngramSearchUTF8 "; -" ngramSimHash "; -" ngramSimHashCaseInsensitive "; -" ngramSimHashCaseInsensitiveUTF8 "; -" ngramSimHashUTF8 "; -" NO "; -" NO DELAY "; -" NONE "; -" normalizedQueryHash "; -" normalizedQueryHashKeepNames "; -" normalizeQuery "; -" normalizeQueryKeepNames "; -" not "; -" NOT "; -" notEmpty "; -" notEquals "; -" nothing "; -" Nothing "; -" notILike "; -" notIn "; -" notInIgnoreSet "; -" notLike "; -" notNullIn "; -" notNullInIgnoreSet "; -" now "; -" now64 "; -" Null "; -" Nullable "; -" nullIf "; -" nullIn "; -" nullInIgnoreSet "; -" NULLS "; -" NULL_SQL "; -" NUMERIC "; -" NVARCHAR "; -" OFFSET "; -" ON "; -" ONLY "; -" OPTIMIZE "; -" OPTIMIZE TABLE "; -" or "; -" OR "; -" ORDER "; -" ORDER BY "; -" OR REPLACE "; -" OUTER "; -" OUTFILE "; -" parseDateTime32BestEffort "; -" parseDateTime32BestEffortOrNull "; -" parseDateTime32BestEffortOrZero "; -" parseDateTime64BestEffort "; -" parseDateTime64BestEffortOrNull "; -" parseDateTime64BestEffortOrZero "; -" parseDateTimeBestEffort "; -" parseDateTimeBestEffortOrNull "; -" parseDateTimeBestEffortOrZero "; -" parseDateTimeBestEffortUS "; -" parseDateTimeBestEffortUSOrNull "; -" parseDateTimeBestEffortUSOrZero "; -" parseTimeDelta "; -" PARTITION "; -" PARTITION BY "; -" partitionId "; -" path "; -" pathFull "; -" pi "; -" plus "; -" Point "; -" pointInEllipses "; -" pointInPolygon "; -" Polygon "; -" polygonAreaCartesian "; -" polygonAreaSpherical "; -" polygonConvexHullCartesian "; -" polygonPerimeterCartesian "; -" polygonPerimeterSpherical "; -" polygonsDistanceCartesian "; -" polygonsDistanceSpherical "; -" polygonsEqualsCartesian "; -" polygonsIntersectionCartesian "; -" polygonsIntersectionSpherical "; -" polygonsSymDifferenceCartesian "; -" polygonsSymDifferenceSpherical "; -" polygonsUnionCartesian "; -" polygonsUnionSpherical "; -" polygonsWithinCartesian "; -" polygonsWithinSpherical "; -" POPULATE "; -" port "; -" position "; -" positionCaseInsensitive "; -" positionCaseInsensitiveUTF8 "; -" positionUTF8 "; -" pow "; -" power "; -" PREWHERE "; -" PRIMARY "; -" PRIMARY KEY "; -" PROJECTION "; -" protocol "; -" Q "; -" QQ "; -" quantile "; -" quantileBFloat16 "; -" quantileBFloat16Weighted "; -" quantileDeterministic "; -" quantileExact "; -" quantileExactExclusive "; -" quantileExactHigh "; -" quantileExactInclusive "; -" quantileExactLow "; -" quantileExactWeighted "; -" quantiles "; -" quantilesBFloat16 "; -" quantilesBFloat16Weighted "; -" quantilesDeterministic "; -" quantilesExact "; -" quantilesExactExclusive "; -" quantilesExactHigh "; -" quantilesExactInclusive "; -" quantilesExactLow "; -" quantilesExactWeighted "; -" quantilesTDigest "; -" quantilesTDigestWeighted "; -" quantilesTiming "; -" quantilesTimingWeighted "; -" quantileTDigest "; -" quantileTDigestWeighted "; -" quantileTiming "; -" quantileTimingWeighted "; -" QUARTER "; -" query_id "; -" queryID "; -" queryString "; -" queryStringAndFragment "; -" rand "; -" rand32 "; -" rand64 "; -" randConstant "; -" randomFixedString "; -" randomPrintableASCII "; -" randomString "; -" randomStringUTF8 "; -" range "; -" RANGE "; -" rank "; -" rankCorr "; -" readWKTMultiPolygon "; -" readWKTPoint "; -" readWKTPolygon "; -" readWKTRing "; -" REAL "; -" REFRESH "; -" regexpQuoteMeta "; -" regionHierarchy "; -" regionIn "; -" regionToArea "; -" regionToCity "; -" regionToContinent "; -" regionToCountry "; -" regionToDistrict "; -" regionToName "; -" regionToPopulation "; -" regionToTopContinent "; -" reinterpret "; -" reinterpretAsDate "; -" reinterpretAsDateTime "; -" reinterpretAsFixedString "; -" reinterpretAsFloat32 "; -" reinterpretAsFloat64 "; -" reinterpretAsInt128 "; -" reinterpretAsInt16 "; -" reinterpretAsInt256 "; -" reinterpretAsInt32 "; -" reinterpretAsInt64 "; -" reinterpretAsInt8 "; -" reinterpretAsString "; -" reinterpretAsUInt128 "; -" reinterpretAsUInt16 "; -" reinterpretAsUInt256 "; -" reinterpretAsUInt32 "; -" reinterpretAsUInt64 "; -" reinterpretAsUInt8 "; -" reinterpretAsUUID "; -" RELOAD "; -" REMOVE "; -" RENAME "; -" RENAME COLUMN "; -" RENAME TABLE "; -" repeat "; -" replace "; -" REPLACE "; -" replaceAll "; -" replaceOne "; -" REPLACE PARTITION "; -" replaceRegexpAll "; -" replaceRegexpOne "; -" REPLICA "; -" replicate "; -" REPLICATED "; -" Resample "; -" RESUME "; -" retention "; -" reverse "; -" reverseUTF8 "; -" RIGHT "; -" rightPad "; -" rightPadUTF8 "; -" Ring "; -" ROLLUP "; -" round "; -" roundAge "; -" roundBankers "; -" roundDown "; -" roundDuration "; -" roundToExp2 "; -" row_number "; -" rowNumberInAllBlocks "; -" rowNumberInBlock "; -" rpad "; -" RTRIM "; -" runningAccumulate "; -" runningConcurrency "; -" runningDifference "; -" runningDifferenceStartingWithFirstValue "; -" S "; -" s2CapContains "; -" s2CapUnion "; -" s2CellsIntersect "; -" s2GetNeighbors "; -" s2RectAdd "; -" s2RectContains "; -" s2RectIntersection "; -" s2RectUnion "; -" s2ToGeo "; -" SAMPLE "; -" SAMPLE BY "; -" SECOND "; -" SELECT "; -" SEMI "; -" SENDS "; -" sequenceCount "; -" sequenceMatch "; -" sequenceNextNode "; -" serverUUID "; -" SET "; -" SETTINGS "; -" SHA1 "; -" SHA224 "; -" SHA256 "; -" SHA384 "; -" SHA512 "; -" shardCount "; -" shardNum "; -" SHOW "; -" SHOW PROCESSLIST "; -" sigmoid "; -" sign "; -" SimpleAggregateFunction "; -" simpleJSONExtractBool "; -" simpleJSONExtractFloat "; -" simpleJSONExtractInt "; -" simpleJSONExtractRaw "; -" simpleJSONExtractString "; -" simpleJSONExtractUInt "; -" simpleJSONHas "; -" simpleLinearRegression "; -" sin "; -" SINGLE "; -" singleValueOrNull "; -" sinh "; -" sipHash128 "; -" sipHash64 "; -" skewPop "; -" skewSamp "; -" sleep "; -" sleepEachRow "; -" SMALLINT "; -" SMALLINT SIGNED "; -" SMALLINT UNSIGNED "; -" snowflakeToDateTime "; -" snowflakeToDateTime64 "; -" SOURCE "; -" sparkbar "; -" splitByChar "; -" splitByNonAlpha "; -" splitByRegexp "; -" splitByString "; -" splitByWhitespace "; -" SQL_TSI_DAY "; -" SQL_TSI_HOUR "; -" SQL_TSI_MINUTE "; -" SQL_TSI_MONTH "; -" SQL_TSI_QUARTER "; -" SQL_TSI_SECOND "; -" SQL_TSI_WEEK "; -" SQL_TSI_YEAR "; -" sqrt "; -" SS "; -" START "; -" startsWith "; -" State "; -" stddevPop "; -" STDDEV_POP "; -" stddevPopStable "; -" stddevSamp "; -" STDDEV_SAMP "; -" stddevSampStable "; -" stem "; -" STEP "; -" stochasticLinearRegression "; -" stochasticLogisticRegression "; -" STOP "; -" String "; -" stringToH3 "; -" studentTTest "; -" subBitmap "; -" substr "; -" substring "; -" SUBSTRING "; -" substringUTF8 "; -" subtractDays "; -" subtractHours "; -" subtractMinutes "; -" subtractMonths "; -" subtractQuarters "; -" subtractSeconds "; -" subtractWeeks "; -" subtractYears "; -" sum "; -" sumCount "; -" sumKahan "; -" sumMap "; -" sumMapFiltered "; -" sumMapFilteredWithOverflow "; -" sumMapWithOverflow "; -" sumWithOverflow "; -" SUSPEND "; -" svg "; -" SVG "; -" SYNC "; -" synonyms "; -" SYNTAX "; -" SYSTEM "; -" TABLE "; -" TABLES "; -" tan "; -" tanh "; -" tcpPort "; -" TEMPORARY "; -" TEST "; -" TEXT "; -" tgamma "; -" THEN "; -" throwIf "; -" tid "; -" TIES "; -" TIMEOUT "; -" timeSlot "; -" timeSlots "; -" TIMESTAMP "; -" TIMESTAMP_ADD "; -" TIMESTAMPADD "; -" TIMESTAMP_DIFF "; -" TIMESTAMPDIFF "; -" TIMESTAMP_SUB "; -" TIMESTAMPSUB "; -" timezone "; -" timeZone "; -" timezoneOf "; -" timeZoneOf "; -" timezoneOffset "; -" timeZoneOffset "; -" TINYBLOB "; -" TINYINT "; -" TINYINT SIGNED "; -" TINYINT UNSIGNED "; -" TINYTEXT "; -" TO "; -" TO_BASE64 "; -" toColumnTypeName "; -" toDate "; -" toDate32 "; -" toDate32OrNull "; -" toDate32OrZero "; -" toDateOrNull "; -" toDateOrZero "; -" toDateTime "; -" toDateTime32 "; -" toDateTime64 "; -" toDateTime64OrNull "; -" toDateTime64OrZero "; -" toDateTimeOrNull "; -" toDateTimeOrZero "; -" today "; -" toDayOfMonth "; -" toDayOfWeek "; -" toDayOfYear "; -" toDecimal128 "; -" toDecimal128OrNull "; -" toDecimal128OrZero "; -" toDecimal256 "; -" toDecimal256OrNull "; -" toDecimal256OrZero "; -" toDecimal32 "; -" toDecimal32OrNull "; -" toDecimal32OrZero "; -" toDecimal64 "; -" toDecimal64OrNull "; -" toDecimal64OrZero "; -" TO DISK "; -" toFixedString "; -" toFloat32 "; -" toFloat32OrNull "; -" toFloat32OrZero "; -" toFloat64 "; -" toFloat64OrNull "; -" toFloat64OrZero "; -" toHour "; -" toInt128 "; -" toInt128OrNull "; -" toInt128OrZero "; -" toInt16 "; -" toInt16OrNull "; -" toInt16OrZero "; -" toInt256 "; -" toInt256OrNull "; -" toInt256OrZero "; -" toInt32 "; -" toInt32OrNull "; -" toInt32OrZero "; -" toInt64 "; -" toInt64OrNull "; -" toInt64OrZero "; -" toInt8 "; -" toInt8OrNull "; -" toInt8OrZero "; -" toIntervalDay "; -" toIntervalHour "; -" toIntervalMinute "; -" toIntervalMonth "; -" toIntervalQuarter "; -" toIntervalSecond "; -" toIntervalWeek "; -" toIntervalYear "; -" toIPv4 "; -" toIPv6 "; -" toISOWeek "; -" toISOYear "; -" toJSONString "; -" toLowCardinality "; -" toMinute "; -" toModifiedJulianDay "; -" toModifiedJulianDayOrNull "; -" toMonday "; -" toMonth "; -" toNullable "; -" TOP "; -" topK "; -" topKWeighted "; -" topLevelDomain "; -" toQuarter "; -" toRelativeDayNum "; -" toRelativeHourNum "; -" toRelativeMinuteNum "; -" toRelativeMonthNum "; -" toRelativeQuarterNum "; -" toRelativeSecondNum "; -" toRelativeWeekNum "; -" toRelativeYearNum "; -" toSecond "; -" toStartOfDay "; -" toStartOfFifteenMinutes "; -" toStartOfFiveMinutes "; -" toStartOfHour "; -" toStartOfInterval "; -" toStartOfISOYear "; -" toStartOfMinute "; -" toStartOfMonth "; -" toStartOfQuarter "; -" toStartOfSecond "; -" toStartOfTenMinutes "; -" toStartOfWeek "; -" toStartOfYear "; -" toString "; -" toStringCutToZero "; -" TO TABLE "; -" TOTALS "; -" toTime "; -" toTimezone "; -" toTimeZone "; -" toTypeName "; -" toUInt128 "; -" toUInt128OrNull "; -" toUInt128OrZero "; -" toUInt16 "; -" toUInt16OrNull "; -" toUInt16OrZero "; -" toUInt256 "; -" toUInt256OrNull "; -" toUInt256OrZero "; -" toUInt32 "; -" toUInt32OrNull "; -" toUInt32OrZero "; -" toUInt64 "; -" toUInt64OrNull "; -" toUInt64OrZero "; -" toUInt8 "; -" toUInt8OrNull "; -" toUInt8OrZero "; -" toUnixTimestamp "; -" toUnixTimestamp64Micro "; -" toUnixTimestamp64Milli "; -" toUnixTimestamp64Nano "; -" toUUID "; -" toUUIDOrNull "; -" toUUIDOrZero "; -" toValidUTF8 "; -" TO VOLUME "; -" toWeek "; -" toYear "; -" toYearWeek "; -" toYYYYMM "; -" toYYYYMMDD "; -" toYYYYMMDDhhmmss "; -" TRAILING "; -" transform "; -" TRIM "; -" trimBoth "; -" trimLeft "; -" trimRight "; -" trunc "; -" truncate "; -" TRUNCATE "; -" tryBase64Decode "; -" TTL "; -" tuple "; -" Tuple "; -" tupleElement "; -" tupleHammingDistance "; -" tupleToNameValuePairs "; -" TYPE "; -" ucase "; -" UInt128 "; -" UInt16 "; -" UInt256 "; -" UInt32 "; -" UInt64 "; -" UInt8 "; -" unbin "; -" unhex "; -" UNION "; -" uniq "; -" uniqCombined "; -" uniqCombined64 "; -" uniqExact "; -" uniqHLL12 "; -" uniqTheta "; -" uniqUpTo "; -" UPDATE "; -" upper "; -" upperUTF8 "; -" uptime "; -" URLHash "; -" URLHierarchy "; -" URLPathHierarchy "; -" USE "; -" user "; -" USING "; -" UUID "; -" UUIDNumToString "; -" UUIDStringToNum "; -" validateNestedArraySizes "; -" VALUES "; -" VARCHAR "; -" VARCHAR2 "; -" varPop "; -" VAR_POP "; -" varPopStable "; -" varSamp "; -" VAR_SAMP "; -" varSampStable "; -" version "; -" VIEW "; -" visibleWidth "; -" visitParamExtractBool "; -" visitParamExtractFloat "; -" visitParamExtractInt "; -" visitParamExtractRaw "; -" visitParamExtractString "; -" visitParamExtractUInt "; -" visitParamHas "; -" VOLUME "; -" WATCH "; -" week "; -" WEEK "; -" welchTTest "; -" WHEN "; -" WHERE "; -" windowFunnel "; -" WITH "; -" WITH FILL "; -" WITH TIES "; -" WK "; -" wkt "; -" wordShingleMinHash "; -" wordShingleMinHashArg "; -" wordShingleMinHashArgCaseInsensitive "; -" wordShingleMinHashArgCaseInsensitiveUTF8 "; -" wordShingleMinHashArgUTF8 "; -" wordShingleMinHashCaseInsensitive "; -" wordShingleMinHashCaseInsensitiveUTF8 "; -" wordShingleMinHashUTF8 "; -" wordShingleSimHash "; -" wordShingleSimHashCaseInsensitive "; -" wordShingleSimHashCaseInsensitiveUTF8 "; -" wordShingleSimHashUTF8 "; -" WW "; -" xor "; -" xxHash32 "; -" xxHash64 "; -" kostikConsistentHash "; -" YEAR "; -" yearweek "; -" yesterday "; -" YY "; -" YYYY "; -" zookeeperSessionUptime "; diff --git a/src/Parsers/fuzzers/codegen_fuzzer/out.cpp b/src/Parsers/fuzzers/codegen_fuzzer/out.cpp deleted file mode 100644 index 29168751d71..00000000000 --- a/src/Parsers/fuzzers/codegen_fuzzer/out.cpp +++ /dev/null @@ -1,6461 +0,0 @@ -#include -#include -#include - -#include - -#include "out.pb.h" - -void GenerateWord(const Word&, std::string&, int); - -void GenerateSentence(const Sentence& stc, std::string &s, int depth) { - for (int i = 0; i < stc.words_size(); i++ ) { - GenerateWord(stc.words(i), s, ++depth); - } -} -void GenerateWord(const Word& word, std::string &s, int depth) { - if (depth > 5) return; - - switch (word.value()) { - case 0: { - s += " "; - break; - } - case 1: { - s += " "; - break; - } - case 2: { - s += " "; - break; - } - case 3: { - s += ";"; - break; - } - case 4: { - s += "("; - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += ")"; - break; - } - case 5: { - s += "("; - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += ", "; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += ")"; - break; - } - case 6: { - s += "("; - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += ", "; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += ", "; - if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); - s += ")"; - break; - } - case 7: { - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += ", "; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - break; - } - case 8: { - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += ", "; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += ", "; - if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); - break; - } - case 9: { - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += ", "; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += ", "; - if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); - s += ", "; - if (word.inner().words_size() > 3) GenerateWord(word.inner().words(3), s, ++depth); - break; - } - case 10: { - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += ", "; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += ", "; - if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); - s += ", "; - if (word.inner().words_size() > 3) GenerateWord(word.inner().words(3), s, ++depth); - s += ", "; - if (word.inner().words_size() > 4) GenerateWord(word.inner().words(4), s, ++depth); - break; - } - case 11: { - s += "["; - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += ", "; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += "]"; - break; - } - case 12: { - s += "["; - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += ", "; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += ", "; - if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); - s += "]"; - break; - } - case 13: { - s += "["; - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += ", "; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += ", "; - if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); - s += ", "; - if (word.inner().words_size() > 3) GenerateWord(word.inner().words(3), s, ++depth); - s += "]"; - break; - } - case 14: { - s += "["; - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += ", "; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += ", "; - if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); - s += ", "; - if (word.inner().words_size() > 3) GenerateWord(word.inner().words(3), s, ++depth); - s += ", "; - if (word.inner().words_size() > 4) GenerateWord(word.inner().words(4), s, ++depth); - s += "]"; - break; - } - case 15: { - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += "("; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += ")"; - break; - } - case 16: { - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += "("; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += ", "; - if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); - s += ")"; - break; - } - case 17: { - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += "("; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += ", "; - if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); - s += ", "; - if (word.inner().words_size() > 3) GenerateWord(word.inner().words(3), s, ++depth); - s += ")"; - break; - } - case 18: { - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += " as "; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - break; - } - case 19: { - s += "SELECT "; - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += " FROM "; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += " WHERE "; - if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); - break; - } - case 20: { - s += "SELECT "; - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += " FROM "; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += " GROUP BY "; - if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); - break; - } - case 21: { - s += "SELECT "; - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += " FROM "; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += " SORT BY "; - if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); - break; - } - case 22: { - s += "SELECT "; - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += " FROM "; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += " LIMIT "; - if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); - break; - } - case 23: { - s += "SELECT "; - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += " FROM "; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += " JOIN "; - if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); - break; - } - case 24: { - s += "SELECT "; - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += " FROM "; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += " ARRAY JOIN "; - if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); - break; - } - case 25: { - s += "SELECT "; - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += " FROM "; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += " JOIN "; - if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); - s += " ON "; - if (word.inner().words_size() > 3) GenerateWord(word.inner().words(3), s, ++depth); - break; - } - case 26: { - s += "SELECT "; - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += " FROM "; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += " JOIN "; - if (word.inner().words_size() > 2) GenerateWord(word.inner().words(2), s, ++depth); - s += " USING "; - if (word.inner().words_size() > 3) GenerateWord(word.inner().words(3), s, ++depth); - break; - } - case 27: { - s += "SELECT "; - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += " INTO OUTFILE "; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - break; - } - case 28: { - s += "WITH "; - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += " AS "; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - break; - } - case 29: { - s += "{"; - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += ":"; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += "}"; - break; - } - case 30: { - s += "["; - if (word.inner().words_size() > 0) GenerateWord(word.inner().words(0), s, ++depth); - s += ","; - if (word.inner().words_size() > 1) GenerateWord(word.inner().words(1), s, ++depth); - s += "]"; - break; - } - case 31: { - s += "[]"; - break; - } - case 32: { - s += " x "; - break; - } - case 33: { - s += "x"; - break; - } - case 34: { - s += " `x` "; - break; - } - case 35: { - s += "`x`"; - break; - } - case 36: { - s += " \"value\" "; - break; - } - case 37: { - s += "\"value\""; - break; - } - case 38: { - s += " 0 "; - break; - } - case 39: { - s += "0"; - break; - } - case 40: { - s += "1"; - break; - } - case 41: { - s += "2"; - break; - } - case 42: { - s += "123123123123123123"; - break; - } - case 43: { - s += "182374019873401982734091873420923123123123123123"; - break; - } - case 44: { - s += "1e-1"; - break; - } - case 45: { - s += "1.1"; - break; - } - case 46: { - s += "\"\""; - break; - } - case 47: { - s += " '../../../../../../../../../etc/passwd' "; - break; - } - case 48: { - s += "/"; - break; - } - case 49: { - s += "="; - break; - } - case 50: { - s += "=="; - break; - } - case 51: { - s += "!="; - break; - } - case 52: { - s += "<>"; - break; - } - case 53: { - s += "<"; - break; - } - case 54: { - s += "<="; - break; - } - case 55: { - s += ">"; - break; - } - case 56: { - s += ">="; - break; - } - case 57: { - s += "<<"; - break; - } - case 58: { - s += "|<<"; - break; - } - case 59: { - s += "&"; - break; - } - case 60: { - s += "|"; - break; - } - case 61: { - s += "||"; - break; - } - case 62: { - s += "<|"; - break; - } - case 63: { - s += "|>"; - break; - } - case 64: { - s += "+"; - break; - } - case 65: { - s += "-"; - break; - } - case 66: { - s += "~"; - break; - } - case 67: { - s += "*"; - break; - } - case 68: { - s += "/"; - break; - } - case 69: { - s += "\\"; - break; - } - case 70: { - s += "%"; - break; - } - case 71: { - s += ""; - break; - } - case 72: { - s += "."; - break; - } - case 73: { - s += ","; - break; - } - case 74: { - s += ","; - break; - } - case 75: { - s += ","; - break; - } - case 76: { - s += ","; - break; - } - case 77: { - s += ","; - break; - } - case 78: { - s += ","; - break; - } - case 79: { - s += "("; - break; - } - case 80: { - s += ")"; - break; - } - case 81: { - s += "("; - break; - } - case 82: { - s += ")"; - break; - } - case 83: { - s += "("; - break; - } - case 84: { - s += ")"; - break; - } - case 85: { - s += "("; - break; - } - case 86: { - s += ")"; - break; - } - case 87: { - s += "("; - break; - } - case 88: { - s += ")"; - break; - } - case 89: { - s += "("; - break; - } - case 90: { - s += ")"; - break; - } - case 91: { - s += "?"; - break; - } - case 92: { - s += ":"; - break; - } - case 93: { - s += "@"; - break; - } - case 94: { - s += "@@"; - break; - } - case 95: { - s += "$"; - break; - } - case 96: { - s += "\""; - break; - } - case 97: { - s += "`"; - break; - } - case 98: { - s += "{"; - break; - } - case 99: { - s += "}"; - break; - } - case 100: { - s += "^"; - break; - } - case 101: { - s += "::"; - break; - } - case 102: { - s += "->"; - break; - } - case 103: { - s += "]"; - break; - } - case 104: { - s += "["; - break; - } - case 105: { - s += " abs "; - break; - } - case 106: { - s += " accurate_Cast "; - break; - } - case 107: { - s += " accurateCast "; - break; - } - case 108: { - s += " accurate_CastOrNull "; - break; - } - case 109: { - s += " accurateCastOrNull "; - break; - } - case 110: { - s += " acos "; - break; - } - case 111: { - s += " acosh "; - break; - } - case 112: { - s += " ADD "; - break; - } - case 113: { - s += " ADD COLUMN "; - break; - } - case 114: { - s += " ADD CONSTRAINT "; - break; - } - case 115: { - s += " addDays "; - break; - } - case 116: { - s += " addHours "; - break; - } - case 117: { - s += " ADD INDEX "; - break; - } - case 118: { - s += " addMinutes "; - break; - } - case 119: { - s += " addMonths "; - break; - } - case 120: { - s += " addQuarters "; - break; - } - case 121: { - s += " addressToLine "; - break; - } - case 122: { - s += " addressToSymbol "; - break; - } - case 123: { - s += " addSeconds "; - break; - } - case 124: { - s += " addWeeks "; - break; - } - case 125: { - s += " addYears "; - break; - } - case 126: { - s += " aes_decrypt_mysql "; - break; - } - case 127: { - s += " aes_encrypt_mysql "; - break; - } - case 128: { - s += " AFTER "; - break; - } - case 129: { - s += " AggregateFunction "; - break; - } - case 130: { - s += " aggThrow "; - break; - } - case 131: { - s += " ALIAS "; - break; - } - case 132: { - s += " ALL "; - break; - } - case 133: { - s += " alphaTokens "; - break; - } - case 134: { - s += " ALTER "; - break; - } - case 135: { - s += " ALTER LIVE VIEW "; - break; - } - case 136: { - s += " ALTER TABLE "; - break; - } - case 137: { - s += " and "; - break; - } - case 138: { - s += " AND "; - break; - } - case 139: { - s += " ANTI "; - break; - } - case 140: { - s += " any "; - break; - } - case 141: { - s += " ANY "; - break; - } - case 142: { - s += " anyHeavy "; - break; - } - case 143: { - s += " anyLast "; - break; - } - case 144: { - s += " appendTrailingCharIfAbsent "; - break; - } - case 145: { - s += " argMax "; - break; - } - case 146: { - s += " argMin "; - break; - } - case 147: { - s += " array "; - break; - } - case 148: { - s += " Array "; - break; - } - case 149: { - s += " ARRAY "; - break; - } - case 150: { - s += " arrayAll "; - break; - } - case 151: { - s += " arrayAUC "; - break; - } - case 152: { - s += " arrayAvg "; - break; - } - case 153: { - s += " arrayCompact "; - break; - } - case 154: { - s += " arrayConcat "; - break; - } - case 155: { - s += " arrayCount "; - break; - } - case 156: { - s += " arrayCumSum "; - break; - } - case 157: { - s += " arrayCumSumNonNegative "; - break; - } - case 158: { - s += " arrayDifference "; - break; - } - case 159: { - s += " arrayDistinct "; - break; - } - case 160: { - s += " arrayElement "; - break; - } - case 161: { - s += " arrayEnumerate "; - break; - } - case 162: { - s += " arrayEnumerateDense "; - break; - } - case 163: { - s += " arrayEnumerateDenseRanked "; - break; - } - case 164: { - s += " arrayEnumerateUniq "; - break; - } - case 165: { - s += " arrayEnumerateUniqRanked "; - break; - } - case 166: { - s += " arrayExists "; - break; - } - case 167: { - s += " arrayFill "; - break; - } - case 168: { - s += " arrayFilter "; - break; - } - case 169: { - s += " arrayFirst "; - break; - } - case 170: { - s += " arrayFirstIndex "; - break; - } - case 171: { - s += " arrayFlatten "; - break; - } - case 172: { - s += " arrayIntersect "; - break; - } - case 173: { - s += " arrayJoin "; - break; - } - case 174: { - s += " ARRAY JOIN "; - break; - } - case 175: { - s += " arrayMap "; - break; - } - case 176: { - s += " arrayMax "; - break; - } - case 177: { - s += " arrayMin "; - break; - } - case 178: { - s += " arrayPartialReverseSort "; - break; - } - case 179: { - s += " arrayPartialShuffle "; - break; - } - case 180: { - s += " arrayPartialSort "; - break; - } - case 181: { - s += " arrayPopBack "; - break; - } - case 182: { - s += " arrayPopFront "; - break; - } - case 183: { - s += " arrayProduct "; - break; - } - case 184: { - s += " arrayPushBack "; - break; - } - case 185: { - s += " arrayPushFront "; - break; - } - case 186: { - s += " arrayReduce "; - break; - } - case 187: { - s += " arrayReduceInRanges "; - break; - } - case 188: { - s += " arrayResize "; - break; - } - case 189: { - s += " arrayReverse "; - break; - } - case 190: { - s += " arrayReverseFill "; - break; - } - case 191: { - s += " arrayReverseSort "; - break; - } - case 192: { - s += " arrayReverseSplit "; - break; - } - case 193: { - s += " arrayShuffle "; - break; - } - case 194: { - s += " arraySlice "; - break; - } - case 195: { - s += " arraySort "; - break; - } - case 196: { - s += " arraySplit "; - break; - } - case 197: { - s += " arrayStringConcat "; - break; - } - case 198: { - s += " arraySum "; - break; - } - case 199: { - s += " arrayUniq "; - break; - } - case 200: { - s += " arrayWithConstant "; - break; - } - case 201: { - s += " arrayZip "; - break; - } - case 202: { - s += " AS "; - break; - } - case 203: { - s += " ASC "; - break; - } - case 204: { - s += " ASCENDING "; - break; - } - case 205: { - s += " asin "; - break; - } - case 206: { - s += " asinh "; - break; - } - case 207: { - s += " ASOF "; - break; - } - case 208: { - s += " assumeNotNull "; - break; - } - case 209: { - s += " AST "; - break; - } - case 210: { - s += " ASYNC "; - break; - } - case 211: { - s += " atan "; - break; - } - case 212: { - s += " atan2 "; - break; - } - case 213: { - s += " atanh "; - break; - } - case 214: { - s += " ATTACH "; - break; - } - case 215: { - s += " ATTACH PART "; - break; - } - case 216: { - s += " ATTACH PARTITION "; - break; - } - case 217: { - s += " avg "; - break; - } - case 218: { - s += " avgWeighted "; - break; - } - case 219: { - s += " bar "; - break; - } - case 220: { - s += " base64Decode "; - break; - } - case 221: { - s += " base64Encode "; - break; - } - case 222: { - s += " basename "; - break; - } - case 223: { - s += " bayesAB "; - break; - } - case 224: { - s += " BETWEEN "; - break; - } - case 225: { - s += " BIGINT "; - break; - } - case 226: { - s += " BIGINT SIGNED "; - break; - } - case 227: { - s += " BIGINT UNSIGNED "; - break; - } - case 228: { - s += " bin "; - break; - } - case 229: { - s += " BINARY "; - break; - } - case 230: { - s += " BINARY LARGE OBJECT "; - break; - } - case 231: { - s += " BINARY VARYING "; - break; - } - case 232: { - s += " bitAnd "; - break; - } - case 233: { - s += " BIT_AND "; - break; - } - case 234: { - s += " __bitBoolMaskAnd "; - break; - } - case 235: { - s += " __bitBoolMaskOr "; - break; - } - case 236: { - s += " bitCount "; - break; - } - case 237: { - s += " bitHammingDistance "; - break; - } - case 238: { - s += " bitmapAnd "; - break; - } - case 239: { - s += " bitmapAndCardinality "; - break; - } - case 240: { - s += " bitmapAndnot "; - break; - } - case 241: { - s += " bitmapAndnotCardinality "; - break; - } - case 242: { - s += " bitmapBuild "; - break; - } - case 243: { - s += " bitmapCardinality "; - break; - } - case 244: { - s += " bitmapContains "; - break; - } - case 245: { - s += " bitmapHasAll "; - break; - } - case 246: { - s += " bitmapHasAny "; - break; - } - case 247: { - s += " bitmapMax "; - break; - } - case 248: { - s += " bitmapMin "; - break; - } - case 249: { - s += " bitmapOr "; - break; - } - case 250: { - s += " bitmapOrCardinality "; - break; - } - case 251: { - s += " bitmapSubsetInRange "; - break; - } - case 252: { - s += " bitmapSubsetLimit "; - break; - } - case 253: { - s += " bitmapToArray "; - break; - } - case 254: { - s += " bitmapTransform "; - break; - } - case 255: { - s += " bitmapXor "; - break; - } - case 256: { - s += " bitmapXorCardinality "; - break; - } - case 257: { - s += " bitmaskToArray "; - break; - } - case 258: { - s += " bitmaskToList "; - break; - } - case 259: { - s += " bitNot "; - break; - } - case 260: { - s += " bitOr "; - break; - } - case 261: { - s += " BIT_OR "; - break; - } - case 262: { - s += " bitPositionsToArray "; - break; - } - case 263: { - s += " bitRotateLeft "; - break; - } - case 264: { - s += " bitRotateRight "; - break; - } - case 265: { - s += " bitShiftLeft "; - break; - } - case 266: { - s += " bitShiftRight "; - break; - } - case 267: { - s += " __bitSwapLastTwo "; - break; - } - case 268: { - s += " bitTest "; - break; - } - case 269: { - s += " bitTestAll "; - break; - } - case 270: { - s += " bitTestAny "; - break; - } - case 271: { - s += " __bitWrapperFunc "; - break; - } - case 272: { - s += " bitXor "; - break; - } - case 273: { - s += " BIT_XOR "; - break; - } - case 274: { - s += " BLOB "; - break; - } - case 275: { - s += " blockNumber "; - break; - } - case 276: { - s += " blockSerializedSize "; - break; - } - case 277: { - s += " blockSize "; - break; - } - case 278: { - s += " BOOL "; - break; - } - case 279: { - s += " BOOLEAN "; - break; - } - case 280: { - s += " BOTH "; - break; - } - case 281: { - s += " boundingRatio "; - break; - } - case 282: { - s += " buildId "; - break; - } - case 283: { - s += " BY "; - break; - } - case 284: { - s += " BYTE "; - break; - } - case 285: { - s += " BYTEA "; - break; - } - case 286: { - s += " byteSize "; - break; - } - case 287: { - s += " CASE "; - break; - } - case 288: { - s += " caseWithExpr "; - break; - } - case 289: { - s += " caseWithExpression "; - break; - } - case 290: { - s += " caseWithoutExpr "; - break; - } - case 291: { - s += " caseWithoutExpression "; - break; - } - case 292: { - s += " _CAST "; - break; - } - case 293: { - s += " CAST "; - break; - } - case 294: { - s += " categoricalInformationValue "; - break; - } - case 295: { - s += " cbrt "; - break; - } - case 296: { - s += " ceil "; - break; - } - case 297: { - s += " ceiling "; - break; - } - case 298: { - s += " char "; - break; - } - case 299: { - s += " CHAR "; - break; - } - case 300: { - s += " CHARACTER "; - break; - } - case 301: { - s += " CHARACTER LARGE OBJECT "; - break; - } - case 302: { - s += " CHARACTER_LENGTH "; - break; - } - case 303: { - s += " CHARACTER VARYING "; - break; - } - case 304: { - s += " CHAR LARGE OBJECT "; - break; - } - case 305: { - s += " CHAR_LENGTH "; - break; - } - case 306: { - s += " CHAR VARYING "; - break; - } - case 307: { - s += " CHECK "; - break; - } - case 308: { - s += " CHECK TABLE "; - break; - } - case 309: { - s += " cityHash64 "; - break; - } - case 310: { - s += " CLEAR "; - break; - } - case 311: { - s += " CLEAR COLUMN "; - break; - } - case 312: { - s += " CLEAR INDEX "; - break; - } - case 313: { - s += " CLOB "; - break; - } - case 314: { - s += " CLUSTER "; - break; - } - case 315: { - s += " coalesce "; - break; - } - case 316: { - s += " CODEC "; - break; - } - case 317: { - s += " COLLATE "; - break; - } - case 318: { - s += " COLUMN "; - break; - } - case 319: { - s += " COLUMNS "; - break; - } - case 320: { - s += " COMMENT "; - break; - } - case 321: { - s += " COMMENT COLUMN "; - break; - } - case 322: { - s += " concat "; - break; - } - case 323: { - s += " concatAssumeInjective "; - break; - } - case 324: { - s += " connection_id "; - break; - } - case 325: { - s += " connectionid "; - break; - } - case 326: { - s += " connectionId "; - break; - } - case 327: { - s += " CONSTRAINT "; - break; - } - case 328: { - s += " convertCharset "; - break; - } - case 329: { - s += " corr "; - break; - } - case 330: { - s += " corrStable "; - break; - } - case 331: { - s += " cos "; - break; - } - case 332: { - s += " cosh "; - break; - } - case 333: { - s += " count "; - break; - } - case 334: { - s += " countDigits "; - break; - } - case 335: { - s += " countEqual "; - break; - } - case 336: { - s += " countMatches "; - break; - } - case 337: { - s += " countMatchesCaseInsensitive "; - break; - } - case 338: { - s += " countSubstrings "; - break; - } - case 339: { - s += " countSubstringsCaseInsensitive "; - break; - } - case 340: { - s += " countSubstringsCaseInsensitiveUTF8 "; - break; - } - case 341: { - s += " covarPop "; - break; - } - case 342: { - s += " COVAR_POP "; - break; - } - case 343: { - s += " covarPopStable "; - break; - } - case 344: { - s += " covarSamp "; - break; - } - case 345: { - s += " COVAR_SAMP "; - break; - } - case 346: { - s += " covarSampStable "; - break; - } - case 347: { - s += " CRC32 "; - break; - } - case 348: { - s += " CRC32IEEE "; - break; - } - case 349: { - s += " CRC64 "; - break; - } - case 350: { - s += " CREATE "; - break; - } - case 351: { - s += " CROSS "; - break; - } - case 352: { - s += " CUBE "; - break; - } - case 353: { - s += " currentDatabase "; - break; - } - case 354: { - s += " currentProfiles "; - break; - } - case 355: { - s += " currentRoles "; - break; - } - case 356: { - s += " currentUser "; - break; - } - case 357: { - s += " cutFragment "; - break; - } - case 358: { - s += " cutIPv6 "; - break; - } - case 359: { - s += " cutQueryString "; - break; - } - case 360: { - s += " cutQueryStringAndFragment "; - break; - } - case 361: { - s += " cutToFirstSignificantSubdomain "; - break; - } - case 362: { - s += " cutToFirstSignificantSubdomainCustom "; - break; - } - case 363: { - s += " cutToFirstSignificantSubdomainCustomWithWWW "; - break; - } - case 364: { - s += " cutToFirstSignificantSubdomainWithWWW "; - break; - } - case 365: { - s += " cutURLParameter "; - break; - } - case 366: { - s += " cutWWW "; - break; - } - case 367: { - s += " D "; - break; - } - case 368: { - s += " DATABASE "; - break; - } - case 369: { - s += " DATABASES "; - break; - } - case 370: { - s += " Date "; - break; - } - case 371: { - s += " DATE "; - break; - } - case 372: { - s += " Date32 "; - break; - } - case 373: { - s += " DATE_ADD "; - break; - } - case 374: { - s += " DATEADD "; - break; - } - case 375: { - s += " dateDiff "; - break; - } - case 376: { - s += " DATE_DIFF "; - break; - } - case 377: { - s += " DATEDIFF "; - break; - } - case 378: { - s += " dateName "; - break; - } - case 379: { - s += " DATE_SUB "; - break; - } - case 380: { - s += " DATESUB "; - break; - } - case 381: { - s += " DateTime "; - break; - } - case 382: { - s += " DateTime32 "; - break; - } - case 383: { - s += " DateTime64 "; - break; - } - case 384: { - s += " dateTime64ToSnowflake "; - break; - } - case 385: { - s += " dateTimeToSnowflake "; - break; - } - case 386: { - s += " date_trunc "; - break; - } - case 387: { - s += " dateTrunc "; - break; - } - case 388: { - s += " DAY "; - break; - } - case 389: { - s += " DAYOFMONTH "; - break; - } - case 390: { - s += " DAYOFWEEK "; - break; - } - case 391: { - s += " DAYOFYEAR "; - break; - } - case 392: { - s += " DD "; - break; - } - case 393: { - s += " DEC "; - break; - } - case 394: { - s += " Decimal "; - break; - } - case 395: { - s += " Decimal128 "; - break; - } - case 396: { - s += " Decimal256 "; - break; - } - case 397: { - s += " Decimal32 "; - break; - } - case 398: { - s += " Decimal64 "; - break; - } - case 399: { - s += " decodeURLComponent "; - break; - } - case 400: { - s += " decodeXMLComponent "; - break; - } - case 401: { - s += " decrypt "; - break; - } - case 402: { - s += " DEDUPLICATE "; - break; - } - case 403: { - s += " DEFAULT "; - break; - } - case 404: { - s += " defaultProfiles "; - break; - } - case 405: { - s += " defaultRoles "; - break; - } - case 406: { - s += " defaultValueOfArgumentType "; - break; - } - case 407: { - s += " defaultValueOfTypeName "; - break; - } - case 408: { - s += " DELAY "; - break; - } - case 409: { - s += " DELETE "; - break; - } - case 410: { - s += " DELETE WHERE "; - break; - } - case 411: { - s += " deltaSum "; - break; - } - case 412: { - s += " deltaSumTimestamp "; - break; - } - case 413: { - s += " demangle "; - break; - } - case 414: { - s += " dense_rank "; - break; - } - case 415: { - s += " DESC "; - break; - } - case 416: { - s += " DESCENDING "; - break; - } - case 417: { - s += " DESCRIBE "; - break; - } - case 418: { - s += " DETACH "; - break; - } - case 419: { - s += " DETACH PARTITION "; - break; - } - case 420: { - s += " dictGet "; - break; - } - case 421: { - s += " dictGetChildren "; - break; - } - case 422: { - s += " dictGetDate "; - break; - } - case 423: { - s += " dictGetDateOrDefault "; - break; - } - case 424: { - s += " dictGetDateTime "; - break; - } - case 425: { - s += " dictGetDateTimeOrDefault "; - break; - } - case 426: { - s += " dictGetDescendants "; - break; - } - case 427: { - s += " dictGetFloat32 "; - break; - } - case 428: { - s += " dictGetFloat32OrDefault "; - break; - } - case 429: { - s += " dictGetFloat64 "; - break; - } - case 430: { - s += " dictGetFloat64OrDefault "; - break; - } - case 431: { - s += " dictGetHierarchy "; - break; - } - case 432: { - s += " dictGetInt16 "; - break; - } - case 433: { - s += " dictGetInt16OrDefault "; - break; - } - case 434: { - s += " dictGetInt32 "; - break; - } - case 435: { - s += " dictGetInt32OrDefault "; - break; - } - case 436: { - s += " dictGetInt64 "; - break; - } - case 437: { - s += " dictGetInt64OrDefault "; - break; - } - case 438: { - s += " dictGetInt8 "; - break; - } - case 439: { - s += " dictGetInt8OrDefault "; - break; - } - case 440: { - s += " dictGetOrDefault "; - break; - } - case 441: { - s += " dictGetOrNull "; - break; - } - case 442: { - s += " dictGetString "; - break; - } - case 443: { - s += " dictGetStringOrDefault "; - break; - } - case 444: { - s += " dictGetUInt16 "; - break; - } - case 445: { - s += " dictGetUInt16OrDefault "; - break; - } - case 446: { - s += " dictGetUInt32 "; - break; - } - case 447: { - s += " dictGetUInt32OrDefault "; - break; - } - case 448: { - s += " dictGetUInt64 "; - break; - } - case 449: { - s += " dictGetUInt64OrDefault "; - break; - } - case 450: { - s += " dictGetUInt8 "; - break; - } - case 451: { - s += " dictGetUInt8OrDefault "; - break; - } - case 452: { - s += " dictGetUUID "; - break; - } - case 453: { - s += " dictGetUUIDOrDefault "; - break; - } - case 454: { - s += " dictHas "; - break; - } - case 455: { - s += " DICTIONARIES "; - break; - } - case 456: { - s += " DICTIONARY "; - break; - } - case 457: { - s += " dictIsIn "; - break; - } - case 458: { - s += " DISK "; - break; - } - case 459: { - s += " DISTINCT "; - break; - } - case 460: { - s += " DISTRIBUTED "; - break; - } - case 461: { - s += " divide "; - break; - } - case 462: { - s += " domain "; - break; - } - case 463: { - s += " domainWithoutWWW "; - break; - } - case 464: { - s += " DOUBLE "; - break; - } - case 465: { - s += " DOUBLE PRECISION "; - break; - } - case 466: { - s += " DROP "; - break; - } - case 467: { - s += " DROP COLUMN "; - break; - } - case 468: { - s += " DROP CONSTRAINT "; - break; - } - case 469: { - s += " DROP DETACHED PART "; - break; - } - case 470: { - s += " DROP DETACHED PARTITION "; - break; - } - case 471: { - s += " DROP INDEX "; - break; - } - case 472: { - s += " DROP PARTITION "; - break; - } - case 473: { - s += " dumpColumnStructure "; - break; - } - case 474: { - s += " e "; - break; - } - case 475: { - s += " ELSE "; - break; - } - case 476: { - s += " empty "; - break; - } - case 477: { - s += " emptyArrayDate "; - break; - } - case 478: { - s += " emptyArrayDateTime "; - break; - } - case 479: { - s += " emptyArrayFloat32 "; - break; - } - case 480: { - s += " emptyArrayFloat64 "; - break; - } - case 481: { - s += " emptyArrayInt16 "; - break; - } - case 482: { - s += " emptyArrayInt32 "; - break; - } - case 483: { - s += " emptyArrayInt64 "; - break; - } - case 484: { - s += " emptyArrayInt8 "; - break; - } - case 485: { - s += " emptyArrayString "; - break; - } - case 486: { - s += " emptyArrayToSingle "; - break; - } - case 487: { - s += " emptyArrayUInt16 "; - break; - } - case 488: { - s += " emptyArrayUInt32 "; - break; - } - case 489: { - s += " emptyArrayUInt64 "; - break; - } - case 490: { - s += " emptyArrayUInt8 "; - break; - } - case 491: { - s += " enabledProfiles "; - break; - } - case 492: { - s += " enabledRoles "; - break; - } - case 493: { - s += " encodeXMLComponent "; - break; - } - case 494: { - s += " encrypt "; - break; - } - case 495: { - s += " END "; - break; - } - case 496: { - s += " endsWith "; - break; - } - case 497: { - s += " ENGINE "; - break; - } - case 498: { - s += " entropy "; - break; - } - case 499: { - s += " Enum "; - break; - } - case 500: { - s += " ENUM "; - break; - } - case 501: { - s += " Enum16 "; - break; - } - case 502: { - s += " Enum8 "; - break; - } - case 503: { - s += " equals "; - break; - } - case 504: { - s += " erf "; - break; - } - case 505: { - s += " erfc "; - break; - } - case 506: { - s += " errorCodeToName "; - break; - } - case 507: { - s += " evalMLMethod "; - break; - } - case 508: { - s += " EVENTS "; - break; - } - case 509: { - s += " EXCHANGE TABLES "; - break; - } - case 510: { - s += " EXISTS "; - break; - } - case 511: { - s += " exp "; - break; - } - case 512: { - s += " exp10 "; - break; - } - case 513: { - s += " exp2 "; - break; - } - case 514: { - s += " EXPLAIN "; - break; - } - case 515: { - s += " exponentialMovingAverage "; - break; - } - case 516: { - s += " EXPRESSION "; - break; - } - case 517: { - s += " extract "; - break; - } - case 518: { - s += " EXTRACT "; - break; - } - case 519: { - s += " extractAll "; - break; - } - case 520: { - s += " extractAllGroups "; - break; - } - case 521: { - s += " extractAllGroupsHorizontal "; - break; - } - case 522: { - s += " extractAllGroupsVertical "; - break; - } - case 523: { - s += " extractGroups "; - break; - } - case 524: { - s += " extractTextFromHTML "; - break; - } - case 525: { - s += " extractURLParameter "; - break; - } - case 526: { - s += " extractURLParameterNames "; - break; - } - case 527: { - s += " extractURLParameters "; - break; - } - case 528: { - s += " farmFingerprint64 "; - break; - } - case 529: { - s += " farmHash64 "; - break; - } - case 530: { - s += " FETCHES "; - break; - } - case 531: { - s += " FETCH PART "; - break; - } - case 532: { - s += " FETCH PARTITION "; - break; - } - case 533: { - s += " file "; - break; - } - case 534: { - s += " filesystemAvailable "; - break; - } - case 535: { - s += " filesystemCapacity "; - break; - } - case 536: { - s += " filesystemFree "; - break; - } - case 537: { - s += " FINAL "; - break; - } - case 538: { - s += " finalizeAggregation "; - break; - } - case 539: { - s += " FIRST "; - break; - } - case 540: { - s += " firstSignificantSubdomain "; - break; - } - case 541: { - s += " firstSignificantSubdomainCustom "; - break; - } - case 542: { - s += " first_value "; - break; - } - case 543: { - s += " FIXED "; - break; - } - case 544: { - s += " FixedString "; - break; - } - case 545: { - s += " flatten "; - break; - } - case 546: { - s += " FLOAT "; - break; - } - case 547: { - s += " Float32 "; - break; - } - case 548: { - s += " Float64 "; - break; - } - case 549: { - s += " floor "; - break; - } - case 550: { - s += " FLUSH "; - break; - } - case 551: { - s += " FOR "; - break; - } - case 552: { - s += " ForEach "; - break; - } - case 553: { - s += " format "; - break; - } - case 554: { - s += " FORMAT "; - break; - } - case 555: { - s += " formatDateTime "; - break; - } - case 556: { - s += " formatReadableQuantity "; - break; - } - case 557: { - s += " formatReadableDecimalSize "; - break; - } - case 558: { - s += " formatReadableSize "; - break; - } - case 559: { - s += " formatReadableTimeDelta "; - break; - } - case 560: { - s += " formatRow "; - break; - } - case 561: { - s += " formatRowNoNewline "; - break; - } - case 562: { - s += " FQDN "; - break; - } - case 563: { - s += " fragment "; - break; - } - case 564: { - s += " FREEZE "; - break; - } - case 565: { - s += " FROM "; - break; - } - case 566: { - s += " FROM_BASE64 "; - break; - } - case 567: { - s += " fromModifiedJulianDay "; - break; - } - case 568: { - s += " fromModifiedJulianDayOrNull "; - break; - } - case 569: { - s += " FROM_UNIXTIME "; - break; - } - case 570: { - s += " fromUnixTimestamp "; - break; - } - case 571: { - s += " fromUnixTimestamp64Micro "; - break; - } - case 572: { - s += " fromUnixTimestamp64Milli "; - break; - } - case 573: { - s += " fromUnixTimestamp64Nano "; - break; - } - case 574: { - s += " FULL "; - break; - } - case 575: { - s += " fullHostName "; - break; - } - case 576: { - s += " FUNCTION "; - break; - } - case 577: { - s += " fuzzBits "; - break; - } - case 578: { - s += " gccMurmurHash "; - break; - } - case 579: { - s += " gcd "; - break; - } - case 580: { - s += " generateUUIDv4 "; - break; - } - case 581: { - s += " geoDistance "; - break; - } - case 582: { - s += " geohashDecode "; - break; - } - case 583: { - s += " geohashEncode "; - break; - } - case 584: { - s += " geohashesInBox "; - break; - } - case 585: { - s += " geoToH3 "; - break; - } - case 586: { - s += " geoToS2 "; - break; - } - case 587: { - s += " getMacro "; - break; - } - case 588: { - s += " __getScalar "; - break; - } - case 589: { - s += " getServerPort "; - break; - } - case 590: { - s += " getSetting "; - break; - } - case 591: { - s += " getSizeOfEnumType "; - break; - } - case 592: { - s += " GLOBAL "; - break; - } - case 593: { - s += " globalIn "; - break; - } - case 594: { - s += " globalInIgnoreSet "; - break; - } - case 595: { - s += " globalNotIn "; - break; - } - case 596: { - s += " globalNotInIgnoreSet "; - break; - } - case 597: { - s += " globalNotNullIn "; - break; - } - case 598: { - s += " globalNotNullInIgnoreSet "; - break; - } - case 599: { - s += " globalNullIn "; - break; - } - case 600: { - s += " globalNullInIgnoreSet "; - break; - } - case 601: { - s += " globalVariable "; - break; - } - case 602: { - s += " GRANULARITY "; - break; - } - case 603: { - s += " greatCircleAngle "; - break; - } - case 604: { - s += " greatCircleDistance "; - break; - } - case 605: { - s += " greater "; - break; - } - case 606: { - s += " greaterOrEquals "; - break; - } - case 607: { - s += " greatest "; - break; - } - case 608: { - s += " GROUP "; - break; - } - case 609: { - s += " groupArray "; - break; - } - case 610: { - s += " groupArrayInsertAt "; - break; - } - case 611: { - s += " groupArrayMovingAvg "; - break; - } - case 612: { - s += " groupArrayMovingSum "; - break; - } - case 613: { - s += " groupArraySample "; - break; - } - case 614: { - s += " groupBitAnd "; - break; - } - case 615: { - s += " groupBitmap "; - break; - } - case 616: { - s += " groupBitmapAnd "; - break; - } - case 617: { - s += " groupBitmapOr "; - break; - } - case 618: { - s += " groupBitmapXor "; - break; - } - case 619: { - s += " groupBitOr "; - break; - } - case 620: { - s += " groupBitXor "; - break; - } - case 621: { - s += " GROUP BY "; - break; - } - case 622: { - s += " groupUniqArray "; - break; - } - case 623: { - s += " h3EdgeAngle "; - break; - } - case 624: { - s += " h3EdgeLengthM "; - break; - } - case 625: { - s += " h3GetBaseCell "; - break; - } - case 626: { - s += " h3GetFaces "; - break; - } - case 627: { - s += " h3GetResolution "; - break; - } - case 628: { - s += " h3HexAreaM2 "; - break; - } - case 629: { - s += " h3IndexesAreNeighbors "; - break; - } - case 630: { - s += " h3IsPentagon "; - break; - } - case 631: { - s += " h3IsResClassIII "; - break; - } - case 632: { - s += " h3IsValid "; - break; - } - case 633: { - s += " h3kRing "; - break; - } - case 634: { - s += " h3ToChildren "; - break; - } - case 635: { - s += " h3ToGeo "; - break; - } - case 636: { - s += " h3ToGeoBoundary "; - break; - } - case 637: { - s += " h3ToParent "; - break; - } - case 638: { - s += " h3ToString "; - break; - } - case 639: { - s += " halfMD5 "; - break; - } - case 640: { - s += " has "; - break; - } - case 641: { - s += " hasAll "; - break; - } - case 642: { - s += " hasAny "; - break; - } - case 643: { - s += " hasColumnInTable "; - break; - } - case 644: { - s += " hasSubstr "; - break; - } - case 645: { - s += " hasThreadFuzzer "; - break; - } - case 646: { - s += " hasToken "; - break; - } - case 647: { - s += " hasTokenCaseInsensitive "; - break; - } - case 648: { - s += " HAVING "; - break; - } - case 649: { - s += " hex "; - break; - } - case 650: { - s += " HH "; - break; - } - case 651: { - s += " HIERARCHICAL "; - break; - } - case 652: { - s += " histogram "; - break; - } - case 653: { - s += " hiveHash "; - break; - } - case 654: { - s += " hostname "; - break; - } - case 655: { - s += " hostName "; - break; - } - case 656: { - s += " HOUR "; - break; - } - case 657: { - s += " hypot "; - break; - } - case 658: { - s += " ID "; - break; - } - case 659: { - s += " identity "; - break; - } - case 660: { - s += " if "; - break; - } - case 661: { - s += " IF "; - break; - } - case 662: { - s += " IF EXISTS "; - break; - } - case 663: { - s += " IF NOT EXISTS "; - break; - } - case 664: { - s += " ifNotFinite "; - break; - } - case 665: { - s += " ifNull "; - break; - } - case 666: { - s += " ignore "; - break; - } - case 667: { - s += " ilike "; - break; - } - case 668: { - s += " ILIKE "; - break; - } - case 669: { - s += " in "; - break; - } - case 670: { - s += " IN "; - break; - } - case 671: { - s += " INDEX "; - break; - } - case 672: { - s += " indexHint "; - break; - } - case 673: { - s += " indexOf "; - break; - } - case 674: { - s += " INET4 "; - break; - } - case 675: { - s += " INET6 "; - break; - } - case 676: { - s += " INET6_ATON "; - break; - } - case 677: { - s += " INET6_NTOA "; - break; - } - case 678: { - s += " INET_ATON "; - break; - } - case 679: { - s += " INET_NTOA "; - break; - } - case 680: { - s += " INF "; - break; - } - case 681: { - s += " inIgnoreSet "; - break; - } - case 682: { - s += " initializeAggregation "; - break; - } - case 683: { - s += " initial_query_id "; - break; - } - case 684: { - s += " initialQueryID "; - break; - } - case 685: { - s += " INJECTIVE "; - break; - } - case 686: { - s += " INNER "; - break; - } - case 687: { - s += " IN PARTITION "; - break; - } - case 688: { - s += " INSERT "; - break; - } - case 689: { - s += " INSERT INTO "; - break; - } - case 690: { - s += " INT "; - break; - } - case 691: { - s += " INT1 "; - break; - } - case 692: { - s += " Int128 "; - break; - } - case 693: { - s += " Int16 "; - break; - } - case 694: { - s += " INT1 SIGNED "; - break; - } - case 695: { - s += " INT1 UNSIGNED "; - break; - } - case 696: { - s += " Int256 "; - break; - } - case 697: { - s += " Int32 "; - break; - } - case 698: { - s += " Int64 "; - break; - } - case 699: { - s += " Int8 "; - break; - } - case 700: { - s += " intDiv "; - break; - } - case 701: { - s += " intDivOrZero "; - break; - } - case 702: { - s += " INTEGER "; - break; - } - case 703: { - s += " INTEGER SIGNED "; - break; - } - case 704: { - s += " INTEGER UNSIGNED "; - break; - } - case 705: { - s += " INTERVAL "; - break; - } - case 706: { - s += " IntervalDay "; - break; - } - case 707: { - s += " IntervalHour "; - break; - } - case 708: { - s += " intervalLengthSum "; - break; - } - case 709: { - s += " IntervalMinute "; - break; - } - case 710: { - s += " IntervalMonth "; - break; - } - case 711: { - s += " IntervalQuarter "; - break; - } - case 712: { - s += " IntervalSecond "; - break; - } - case 713: { - s += " IntervalWeek "; - break; - } - case 714: { - s += " IntervalYear "; - break; - } - case 715: { - s += " intExp10 "; - break; - } - case 716: { - s += " intExp2 "; - break; - } - case 717: { - s += " intHash32 "; - break; - } - case 718: { - s += " intHash64 "; - break; - } - case 719: { - s += " INTO "; - break; - } - case 720: { - s += " INTO OUTFILE "; - break; - } - case 721: { - s += " INT SIGNED "; - break; - } - case 722: { - s += " INT UNSIGNED "; - break; - } - case 723: { - s += " IPv4 "; - break; - } - case 724: { - s += " IPv4CIDRToRange "; - break; - } - case 725: { - s += " IPv4NumToString "; - break; - } - case 726: { - s += " IPv4NumToStringClassC "; - break; - } - case 727: { - s += " IPv4StringToNum "; - break; - } - case 728: { - s += " IPv4ToIPv6 "; - break; - } - case 729: { - s += " IPv6 "; - break; - } - case 730: { - s += " IPv6CIDRToRange "; - break; - } - case 731: { - s += " IPv6NumToString "; - break; - } - case 732: { - s += " IPv6StringToNum "; - break; - } - case 733: { - s += " IS "; - break; - } - case 734: { - s += " isConstant "; - break; - } - case 735: { - s += " isDecimalOverflow "; - break; - } - case 736: { - s += " isFinite "; - break; - } - case 737: { - s += " isInfinite "; - break; - } - case 738: { - s += " isIPAddressInRange "; - break; - } - case 739: { - s += " isIPv4String "; - break; - } - case 740: { - s += " isIPv6String "; - break; - } - case 741: { - s += " isNaN "; - break; - } - case 742: { - s += " isNotNull "; - break; - } - case 743: { - s += " isNull "; - break; - } - case 744: { - s += " IS_OBJECT_ID "; - break; - } - case 745: { - s += " isValidJSON "; - break; - } - case 746: { - s += " isValidUTF8 "; - break; - } - case 747: { - s += " isZeroOrNull "; - break; - } - case 748: { - s += " javaHash "; - break; - } - case 749: { - s += " javaHashUTF16LE "; - break; - } - case 750: { - s += " JOIN "; - break; - } - case 751: { - s += " joinGet "; - break; - } - case 752: { - s += " joinGetOrNull "; - break; - } - case 753: { - s += " JSON_EXISTS "; - break; - } - case 754: { - s += " JSONExtract "; - break; - } - case 755: { - s += " JSONExtractArrayRaw "; - break; - } - case 756: { - s += " JSONExtractBool "; - break; - } - case 757: { - s += " JSONExtractFloat "; - break; - } - case 758: { - s += " JSONExtractInt "; - break; - } - case 759: { - s += " JSONExtractKeysAndValues "; - break; - } - case 760: { - s += " JSONExtractKeysAndValuesRaw "; - break; - } - case 761: { - s += " JSONExtractKeys "; - break; - } - case 762: { - s += " JSONExtractRaw "; - break; - } - case 763: { - s += " JSONExtractString "; - break; - } - case 764: { - s += " JSONExtractUInt "; - break; - } - case 765: { - s += " JSONHas "; - break; - } - case 766: { - s += " JSONKey "; - break; - } - case 767: { - s += " JSONLength "; - break; - } - case 768: { - s += " JSON_QUERY "; - break; - } - case 769: { - s += " JSONType "; - break; - } - case 770: { - s += " JSON_VALUE "; - break; - } - case 771: { - s += " jumpConsistentHash "; - break; - } - case 772: { - s += " KEY "; - break; - } - case 773: { - s += " KILL "; - break; - } - case 774: { - s += " kurtPop "; - break; - } - case 775: { - s += " kurtSamp "; - break; - } - case 776: { - s += " lagInFrame "; - break; - } - case 777: { - s += " LAST "; - break; - } - case 778: { - s += " last_value "; - break; - } - case 779: { - s += " LAYOUT "; - break; - } - case 780: { - s += " lcase "; - break; - } - case 781: { - s += " lcm "; - break; - } - case 782: { - s += " leadInFrame "; - break; - } - case 783: { - s += " LEADING "; - break; - } - case 784: { - s += " least "; - break; - } - case 785: { - s += " LEFT "; - break; - } - case 786: { - s += " LEFT ARRAY JOIN "; - break; - } - case 787: { - s += " leftPad "; - break; - } - case 788: { - s += " leftPadUTF8 "; - break; - } - case 789: { - s += " lemmatize "; - break; - } - case 790: { - s += " length "; - break; - } - case 791: { - s += " lengthUTF8 "; - break; - } - case 792: { - s += " less "; - break; - } - case 793: { - s += " lessOrEquals "; - break; - } - case 794: { - s += " lgamma "; - break; - } - case 795: { - s += " LIFETIME "; - break; - } - case 796: { - s += " like "; - break; - } - case 797: { - s += " LIKE "; - break; - } - case 798: { - s += " LIMIT "; - break; - } - case 799: { - s += " LIVE "; - break; - } - case 800: { - s += " ln "; - break; - } - case 801: { - s += " LOCAL "; - break; - } - case 802: { - s += " locate "; - break; - } - case 803: { - s += " log "; - break; - } - case 804: { - s += " log10 "; - break; - } - case 805: { - s += " log1p "; - break; - } - case 806: { - s += " log2 "; - break; - } - case 807: { - s += " LOGS "; - break; - } - case 808: { - s += " logTrace "; - break; - } - case 809: { - s += " LONGBLOB "; - break; - } - case 810: { - s += " LONGTEXT "; - break; - } - case 811: { - s += " LowCardinality "; - break; - } - case 812: { - s += " lowCardinalityIndices "; - break; - } - case 813: { - s += " lowCardinalityKeys "; - break; - } - case 814: { - s += " lower "; - break; - } - case 815: { - s += " lowerUTF8 "; - break; - } - case 816: { - s += " lpad "; - break; - } - case 817: { - s += " LTRIM "; - break; - } - case 818: { - s += " M "; - break; - } - case 819: { - s += " MACNumToString "; - break; - } - case 820: { - s += " MACStringToNum "; - break; - } - case 821: { - s += " MACStringToOUI "; - break; - } - case 822: { - s += " mannWhitneyUTest "; - break; - } - case 823: { - s += " map "; - break; - } - case 824: { - s += " Map "; - break; - } - case 825: { - s += " mapAdd "; - break; - } - case 826: { - s += " mapContains "; - break; - } - case 827: { - s += " mapKeys "; - break; - } - case 828: { - s += " mapPopulateSeries "; - break; - } - case 829: { - s += " mapSubtract "; - break; - } - case 830: { - s += " mapValues "; - break; - } - case 831: { - s += " match "; - break; - } - case 832: { - s += " materialize "; - break; - } - case 833: { - s += " MATERIALIZE "; - break; - } - case 834: { - s += " MATERIALIZED "; - break; - } - case 835: { - s += " MATERIALIZE INDEX "; - break; - } - case 836: { - s += " MATERIALIZE TTL "; - break; - } - case 837: { - s += " max "; - break; - } - case 838: { - s += " MAX "; - break; - } - case 839: { - s += " maxIntersections "; - break; - } - case 840: { - s += " maxIntersectionsPosition "; - break; - } - case 841: { - s += " maxMap "; - break; - } - case 842: { - s += " MD4 "; - break; - } - case 843: { - s += " MD5 "; - break; - } - case 844: { - s += " median "; - break; - } - case 845: { - s += " medianBFloat16 "; - break; - } - case 846: { - s += " medianBFloat16Weighted "; - break; - } - case 847: { - s += " medianDeterministic "; - break; - } - case 848: { - s += " medianExact "; - break; - } - case 849: { - s += " medianExactHigh "; - break; - } - case 850: { - s += " medianExactLow "; - break; - } - case 851: { - s += " medianExactWeighted "; - break; - } - case 852: { - s += " medianTDigest "; - break; - } - case 853: { - s += " medianTDigestWeighted "; - break; - } - case 854: { - s += " medianTiming "; - break; - } - case 855: { - s += " medianTimingWeighted "; - break; - } - case 856: { - s += " MEDIUMBLOB "; - break; - } - case 857: { - s += " MEDIUMINT "; - break; - } - case 858: { - s += " MEDIUMINT SIGNED "; - break; - } - case 859: { - s += " MEDIUMINT UNSIGNED "; - break; - } - case 860: { - s += " MEDIUMTEXT "; - break; - } - case 861: { - s += " Merge "; - break; - } - case 862: { - s += " MERGES "; - break; - } - case 863: { - s += " metroHash64 "; - break; - } - case 864: { - s += " MI "; - break; - } - case 865: { - s += " mid "; - break; - } - case 866: { - s += " min "; - break; - } - case 867: { - s += " MIN "; - break; - } - case 868: { - s += " minMap "; - break; - } - case 869: { - s += " minus "; - break; - } - case 870: { - s += " MINUTE "; - break; - } - case 871: { - s += " MM "; - break; - } - case 872: { - s += " mod "; - break; - } - case 873: { - s += " MODIFY "; - break; - } - case 874: { - s += " MODIFY COLUMN "; - break; - } - case 875: { - s += " MODIFY ORDER BY "; - break; - } - case 876: { - s += " MODIFY QUERY "; - break; - } - case 877: { - s += " MODIFY SETTING "; - break; - } - case 878: { - s += " MODIFY TTL "; - break; - } - case 879: { - s += " modulo "; - break; - } - case 880: { - s += " moduloLegacy "; - break; - } - case 881: { - s += " moduloOrZero "; - break; - } - case 882: { - s += " MONTH "; - break; - } - case 883: { - s += " MOVE "; - break; - } - case 884: { - s += " MOVE PART "; - break; - } - case 885: { - s += " MOVE PARTITION "; - break; - } - case 886: { - s += " movingXXX "; - break; - } - case 887: { - s += " multiFuzzyMatchAllIndices "; - break; - } - case 888: { - s += " multiFuzzyMatchAny "; - break; - } - case 889: { - s += " multiFuzzyMatchAnyIndex "; - break; - } - case 890: { - s += " multiIf "; - break; - } - case 891: { - s += " multiMatchAllIndices "; - break; - } - case 892: { - s += " multiMatchAny "; - break; - } - case 893: { - s += " multiMatchAnyIndex "; - break; - } - case 894: { - s += " multiply "; - break; - } - case 895: { - s += " MultiPolygon "; - break; - } - case 896: { - s += " multiSearchAllPositions "; - break; - } - case 897: { - s += " multiSearchAllPositionsCaseInsensitive "; - break; - } - case 898: { - s += " multiSearchAllPositionsCaseInsensitiveUTF8 "; - break; - } - case 899: { - s += " multiSearchAllPositionsUTF8 "; - break; - } - case 900: { - s += " multiSearchAny "; - break; - } - case 901: { - s += " multiSearchAnyCaseInsensitive "; - break; - } - case 902: { - s += " multiSearchAnyCaseInsensitiveUTF8 "; - break; - } - case 903: { - s += " multiSearchAnyUTF8 "; - break; - } - case 904: { - s += " multiSearchFirstIndex "; - break; - } - case 905: { - s += " multiSearchFirstIndexCaseInsensitive "; - break; - } - case 906: { - s += " multiSearchFirstIndexCaseInsensitiveUTF8 "; - break; - } - case 907: { - s += " multiSearchFirstIndexUTF8 "; - break; - } - case 908: { - s += " multiSearchFirstPosition "; - break; - } - case 909: { - s += " multiSearchFirstPositionCaseInsensitive "; - break; - } - case 910: { - s += " multiSearchFirstPositionCaseInsensitiveUTF8 "; - break; - } - case 911: { - s += " multiSearchFirstPositionUTF8 "; - break; - } - case 912: { - s += " murmurHash2_32 "; - break; - } - case 913: { - s += " murmurHash2_64 "; - break; - } - case 914: { - s += " murmurHash3_128 "; - break; - } - case 915: { - s += " murmurHash3_32 "; - break; - } - case 916: { - s += " murmurHash3_64 "; - break; - } - case 917: { - s += " MUTATION "; - break; - } - case 918: { - s += " N "; - break; - } - case 919: { - s += " NAME "; - break; - } - case 920: { - s += " NAN_SQL "; - break; - } - case 921: { - s += " NATIONAL CHAR "; - break; - } - case 922: { - s += " NATIONAL CHARACTER "; - break; - } - case 923: { - s += " NATIONAL CHARACTER LARGE OBJECT "; - break; - } - case 924: { - s += " NATIONAL CHARACTER VARYING "; - break; - } - case 925: { - s += " NATIONAL CHAR VARYING "; - break; - } - case 926: { - s += " NCHAR "; - break; - } - case 927: { - s += " NCHAR LARGE OBJECT "; - break; - } - case 928: { - s += " NCHAR VARYING "; - break; - } - case 929: { - s += " negate "; - break; - } - case 930: { - s += " neighbor "; - break; - } - case 931: { - s += " Nested "; - break; - } - case 932: { - s += " netloc "; - break; - } - case 933: { - s += " ngramDistance "; - break; - } - case 934: { - s += " ngramDistanceCaseInsensitive "; - break; - } - case 935: { - s += " ngramDistanceCaseInsensitiveUTF8 "; - break; - } - case 936: { - s += " ngramDistanceUTF8 "; - break; - } - case 937: { - s += " ngramMinHash "; - break; - } - case 938: { - s += " ngramMinHashArg "; - break; - } - case 939: { - s += " ngramMinHashArgCaseInsensitive "; - break; - } - case 940: { - s += " ngramMinHashArgCaseInsensitiveUTF8 "; - break; - } - case 941: { - s += " ngramMinHashArgUTF8 "; - break; - } - case 942: { - s += " ngramMinHashCaseInsensitive "; - break; - } - case 943: { - s += " ngramMinHashCaseInsensitiveUTF8 "; - break; - } - case 944: { - s += " ngramMinHashUTF8 "; - break; - } - case 945: { - s += " ngramSearch "; - break; - } - case 946: { - s += " ngramSearchCaseInsensitive "; - break; - } - case 947: { - s += " ngramSearchCaseInsensitiveUTF8 "; - break; - } - case 948: { - s += " ngramSearchUTF8 "; - break; - } - case 949: { - s += " ngramSimHash "; - break; - } - case 950: { - s += " ngramSimHashCaseInsensitive "; - break; - } - case 951: { - s += " ngramSimHashCaseInsensitiveUTF8 "; - break; - } - case 952: { - s += " ngramSimHashUTF8 "; - break; - } - case 953: { - s += " NO "; - break; - } - case 954: { - s += " NO DELAY "; - break; - } - case 955: { - s += " NONE "; - break; - } - case 956: { - s += " normalizedQueryHash "; - break; - } - case 957: { - s += " normalizedQueryHashKeepNames "; - break; - } - case 958: { - s += " normalizeQuery "; - break; - } - case 959: { - s += " normalizeQueryKeepNames "; - break; - } - case 960: { - s += " not "; - break; - } - case 961: { - s += " NOT "; - break; - } - case 962: { - s += " notEmpty "; - break; - } - case 963: { - s += " notEquals "; - break; - } - case 964: { - s += " nothing "; - break; - } - case 965: { - s += " Nothing "; - break; - } - case 966: { - s += " notILike "; - break; - } - case 967: { - s += " notIn "; - break; - } - case 968: { - s += " notInIgnoreSet "; - break; - } - case 969: { - s += " notLike "; - break; - } - case 970: { - s += " notNullIn "; - break; - } - case 971: { - s += " notNullInIgnoreSet "; - break; - } - case 972: { - s += " now "; - break; - } - case 973: { - s += " now64 "; - break; - } - case 974: { - s += " Null "; - break; - } - case 975: { - s += " Nullable "; - break; - } - case 976: { - s += " nullIf "; - break; - } - case 977: { - s += " nullIn "; - break; - } - case 978: { - s += " nullInIgnoreSet "; - break; - } - case 979: { - s += " NULLS "; - break; - } - case 980: { - s += " NULL_SQL "; - break; - } - case 981: { - s += " NUMERIC "; - break; - } - case 982: { - s += " NVARCHAR "; - break; - } - case 983: { - s += " OFFSET "; - break; - } - case 984: { - s += " ON "; - break; - } - case 985: { - s += " ONLY "; - break; - } - case 986: { - s += " OPTIMIZE "; - break; - } - case 987: { - s += " OPTIMIZE TABLE "; - break; - } - case 988: { - s += " or "; - break; - } - case 989: { - s += " OR "; - break; - } - case 990: { - s += " ORDER "; - break; - } - case 991: { - s += " ORDER BY "; - break; - } - case 992: { - s += " OR REPLACE "; - break; - } - case 993: { - s += " OUTER "; - break; - } - case 994: { - s += " OUTFILE "; - break; - } - case 995: { - s += " parseDateTime32BestEffort "; - break; - } - case 996: { - s += " parseDateTime32BestEffortOrNull "; - break; - } - case 997: { - s += " parseDateTime32BestEffortOrZero "; - break; - } - case 998: { - s += " parseDateTime64BestEffort "; - break; - } - case 999: { - s += " parseDateTime64BestEffortOrNull "; - break; - } - case 1000: { - s += " parseDateTime64BestEffortOrZero "; - break; - } - case 1001: { - s += " parseDateTimeBestEffort "; - break; - } - case 1002: { - s += " parseDateTimeBestEffortOrNull "; - break; - } - case 1003: { - s += " parseDateTimeBestEffortOrZero "; - break; - } - case 1004: { - s += " parseDateTimeBestEffortUS "; - break; - } - case 1005: { - s += " parseDateTimeBestEffortUSOrNull "; - break; - } - case 1006: { - s += " parseDateTimeBestEffortUSOrZero "; - break; - } - case 1007: { - s += " parseTimeDelta "; - break; - } - case 1008: { - s += " PARTITION "; - break; - } - case 1009: { - s += " PARTITION BY "; - break; - } - case 1010: { - s += " partitionId "; - break; - } - case 1011: { - s += " path "; - break; - } - case 1012: { - s += " pathFull "; - break; - } - case 1013: { - s += " pi "; - break; - } - case 1014: { - s += " plus "; - break; - } - case 1015: { - s += " Point "; - break; - } - case 1016: { - s += " pointInEllipses "; - break; - } - case 1017: { - s += " pointInPolygon "; - break; - } - case 1018: { - s += " Polygon "; - break; - } - case 1019: { - s += " polygonAreaCartesian "; - break; - } - case 1020: { - s += " polygonAreaSpherical "; - break; - } - case 1021: { - s += " polygonConvexHullCartesian "; - break; - } - case 1022: { - s += " polygonPerimeterCartesian "; - break; - } - case 1023: { - s += " polygonPerimeterSpherical "; - break; - } - case 1024: { - s += " polygonsDistanceCartesian "; - break; - } - case 1025: { - s += " polygonsDistanceSpherical "; - break; - } - case 1026: { - s += " polygonsEqualsCartesian "; - break; - } - case 1027: { - s += " polygonsIntersectionCartesian "; - break; - } - case 1028: { - s += " polygonsIntersectionSpherical "; - break; - } - case 1029: { - s += " polygonsSymDifferenceCartesian "; - break; - } - case 1030: { - s += " polygonsSymDifferenceSpherical "; - break; - } - case 1031: { - s += " polygonsUnionCartesian "; - break; - } - case 1032: { - s += " polygonsUnionSpherical "; - break; - } - case 1033: { - s += " polygonsWithinCartesian "; - break; - } - case 1034: { - s += " polygonsWithinSpherical "; - break; - } - case 1035: { - s += " POPULATE "; - break; - } - case 1036: { - s += " port "; - break; - } - case 1037: { - s += " position "; - break; - } - case 1038: { - s += " positionCaseInsensitive "; - break; - } - case 1039: { - s += " positionCaseInsensitiveUTF8 "; - break; - } - case 1040: { - s += " positionUTF8 "; - break; - } - case 1041: { - s += " pow "; - break; - } - case 1042: { - s += " power "; - break; - } - case 1043: { - s += " PREWHERE "; - break; - } - case 1044: { - s += " PRIMARY "; - break; - } - case 1045: { - s += " PRIMARY KEY "; - break; - } - case 1046: { - s += " PROJECTION "; - break; - } - case 1047: { - s += " protocol "; - break; - } - case 1048: { - s += " Q "; - break; - } - case 1049: { - s += " QQ "; - break; - } - case 1050: { - s += " quantile "; - break; - } - case 1051: { - s += " quantileBFloat16 "; - break; - } - case 1052: { - s += " quantileBFloat16Weighted "; - break; - } - case 1053: { - s += " quantileDeterministic "; - break; - } - case 1054: { - s += " quantileExact "; - break; - } - case 1055: { - s += " quantileExactExclusive "; - break; - } - case 1056: { - s += " quantileExactHigh "; - break; - } - case 1057: { - s += " quantileExactInclusive "; - break; - } - case 1058: { - s += " quantileExactLow "; - break; - } - case 1059: { - s += " quantileExactWeighted "; - break; - } - case 1060: { - s += " quantiles "; - break; - } - case 1061: { - s += " quantilesBFloat16 "; - break; - } - case 1062: { - s += " quantilesBFloat16Weighted "; - break; - } - case 1063: { - s += " quantilesDeterministic "; - break; - } - case 1064: { - s += " quantilesExact "; - break; - } - case 1065: { - s += " quantilesExactExclusive "; - break; - } - case 1066: { - s += " quantilesExactHigh "; - break; - } - case 1067: { - s += " quantilesExactInclusive "; - break; - } - case 1068: { - s += " quantilesExactLow "; - break; - } - case 1069: { - s += " quantilesExactWeighted "; - break; - } - case 1070: { - s += " quantilesTDigest "; - break; - } - case 1071: { - s += " quantilesTDigestWeighted "; - break; - } - case 1072: { - s += " quantilesTiming "; - break; - } - case 1073: { - s += " quantilesTimingWeighted "; - break; - } - case 1074: { - s += " quantileTDigest "; - break; - } - case 1075: { - s += " quantileTDigestWeighted "; - break; - } - case 1076: { - s += " quantileTiming "; - break; - } - case 1077: { - s += " quantileTimingWeighted "; - break; - } - case 1078: { - s += " QUARTER "; - break; - } - case 1079: { - s += " query_id "; - break; - } - case 1080: { - s += " queryID "; - break; - } - case 1081: { - s += " queryString "; - break; - } - case 1082: { - s += " queryStringAndFragment "; - break; - } - case 1083: { - s += " rand "; - break; - } - case 1084: { - s += " rand32 "; - break; - } - case 1085: { - s += " rand64 "; - break; - } - case 1086: { - s += " randConstant "; - break; - } - case 1087: { - s += " randomFixedString "; - break; - } - case 1088: { - s += " randomPrintableASCII "; - break; - } - case 1089: { - s += " randomString "; - break; - } - case 1090: { - s += " randomStringUTF8 "; - break; - } - case 1091: { - s += " range "; - break; - } - case 1092: { - s += " RANGE "; - break; - } - case 1093: { - s += " rank "; - break; - } - case 1094: { - s += " rankCorr "; - break; - } - case 1095: { - s += " readWKTMultiPolygon "; - break; - } - case 1096: { - s += " readWKTPoint "; - break; - } - case 1097: { - s += " readWKTPolygon "; - break; - } - case 1098: { - s += " readWKTRing "; - break; - } - case 1099: { - s += " REAL "; - break; - } - case 1100: { - s += " REFRESH "; - break; - } - case 1101: { - s += " regexpQuoteMeta "; - break; - } - case 1102: { - s += " regionHierarchy "; - break; - } - case 1103: { - s += " regionIn "; - break; - } - case 1104: { - s += " regionToArea "; - break; - } - case 1105: { - s += " regionToCity "; - break; - } - case 1106: { - s += " regionToContinent "; - break; - } - case 1107: { - s += " regionToCountry "; - break; - } - case 1108: { - s += " regionToDistrict "; - break; - } - case 1109: { - s += " regionToName "; - break; - } - case 1110: { - s += " regionToPopulation "; - break; - } - case 1111: { - s += " regionToTopContinent "; - break; - } - case 1112: { - s += " reinterpret "; - break; - } - case 1113: { - s += " reinterpretAsDate "; - break; - } - case 1114: { - s += " reinterpretAsDateTime "; - break; - } - case 1115: { - s += " reinterpretAsFixedString "; - break; - } - case 1116: { - s += " reinterpretAsFloat32 "; - break; - } - case 1117: { - s += " reinterpretAsFloat64 "; - break; - } - case 1118: { - s += " reinterpretAsInt128 "; - break; - } - case 1119: { - s += " reinterpretAsInt16 "; - break; - } - case 1120: { - s += " reinterpretAsInt256 "; - break; - } - case 1121: { - s += " reinterpretAsInt32 "; - break; - } - case 1122: { - s += " reinterpretAsInt64 "; - break; - } - case 1123: { - s += " reinterpretAsInt8 "; - break; - } - case 1124: { - s += " reinterpretAsString "; - break; - } - case 1125: { - s += " reinterpretAsUInt128 "; - break; - } - case 1126: { - s += " reinterpretAsUInt16 "; - break; - } - case 1127: { - s += " reinterpretAsUInt256 "; - break; - } - case 1128: { - s += " reinterpretAsUInt32 "; - break; - } - case 1129: { - s += " reinterpretAsUInt64 "; - break; - } - case 1130: { - s += " reinterpretAsUInt8 "; - break; - } - case 1131: { - s += " reinterpretAsUUID "; - break; - } - case 1132: { - s += " RELOAD "; - break; - } - case 1133: { - s += " REMOVE "; - break; - } - case 1134: { - s += " RENAME "; - break; - } - case 1135: { - s += " RENAME COLUMN "; - break; - } - case 1136: { - s += " RENAME TABLE "; - break; - } - case 1137: { - s += " repeat "; - break; - } - case 1138: { - s += " replace "; - break; - } - case 1139: { - s += " REPLACE "; - break; - } - case 1140: { - s += " replaceAll "; - break; - } - case 1141: { - s += " replaceOne "; - break; - } - case 1142: { - s += " REPLACE PARTITION "; - break; - } - case 1143: { - s += " replaceRegexpAll "; - break; - } - case 1144: { - s += " replaceRegexpOne "; - break; - } - case 1145: { - s += " REPLICA "; - break; - } - case 1146: { - s += " replicate "; - break; - } - case 1147: { - s += " REPLICATED "; - break; - } - case 1148: { - s += " Resample "; - break; - } - case 1149: { - s += " RESUME "; - break; - } - case 1150: { - s += " retention "; - break; - } - case 1151: { - s += " reverse "; - break; - } - case 1152: { - s += " reverseUTF8 "; - break; - } - case 1153: { - s += " RIGHT "; - break; - } - case 1154: { - s += " rightPad "; - break; - } - case 1155: { - s += " rightPadUTF8 "; - break; - } - case 1156: { - s += " Ring "; - break; - } - case 1157: { - s += " ROLLUP "; - break; - } - case 1158: { - s += " round "; - break; - } - case 1159: { - s += " roundAge "; - break; - } - case 1160: { - s += " roundBankers "; - break; - } - case 1161: { - s += " roundDown "; - break; - } - case 1162: { - s += " roundDuration "; - break; - } - case 1163: { - s += " roundToExp2 "; - break; - } - case 1164: { - s += " row_number "; - break; - } - case 1165: { - s += " rowNumberInAllBlocks "; - break; - } - case 1166: { - s += " rowNumberInBlock "; - break; - } - case 1167: { - s += " rpad "; - break; - } - case 1168: { - s += " RTRIM "; - break; - } - case 1169: { - s += " runningAccumulate "; - break; - } - case 1170: { - s += " runningConcurrency "; - break; - } - case 1171: { - s += " runningDifference "; - break; - } - case 1172: { - s += " runningDifferenceStartingWithFirstValue "; - break; - } - case 1173: { - s += " S "; - break; - } - case 1174: { - s += " s2CapContains "; - break; - } - case 1175: { - s += " s2CapUnion "; - break; - } - case 1176: { - s += " s2CellsIntersect "; - break; - } - case 1177: { - s += " s2GetNeighbors "; - break; - } - case 1178: { - s += " s2RectAdd "; - break; - } - case 1179: { - s += " s2RectContains "; - break; - } - case 1180: { - s += " s2RectIntersection "; - break; - } - case 1181: { - s += " s2RectUnion "; - break; - } - case 1182: { - s += " s2ToGeo "; - break; - } - case 1183: { - s += " SAMPLE "; - break; - } - case 1184: { - s += " SAMPLE BY "; - break; - } - case 1185: { - s += " SECOND "; - break; - } - case 1186: { - s += " SELECT "; - break; - } - case 1187: { - s += " SEMI "; - break; - } - case 1188: { - s += " SENDS "; - break; - } - case 1189: { - s += " sequenceCount "; - break; - } - case 1190: { - s += " sequenceMatch "; - break; - } - case 1191: { - s += " sequenceNextNode "; - break; - } - case 1192: { - s += " serverUUID "; - break; - } - case 1193: { - s += " SET "; - break; - } - case 1194: { - s += " SETTINGS "; - break; - } - case 1195: { - s += " SHA1 "; - break; - } - case 1196: { - s += " SHA224 "; - break; - } - case 1197: { - s += " SHA256 "; - break; - } - case 1198: { - s += " SHA384 "; - break; - } - case 1199: { - s += " SHA512 "; - break; - } - case 1200: { - s += " shardCount "; - break; - } - case 1201: { - s += " shardNum "; - break; - } - case 1202: { - s += " SHOW "; - break; - } - case 1203: { - s += " SHOW PROCESSLIST "; - break; - } - case 1204: { - s += " sigmoid "; - break; - } - case 1205: { - s += " sign "; - break; - } - case 1206: { - s += " SimpleAggregateFunction "; - break; - } - case 1207: { - s += " simpleJSONExtractBool "; - break; - } - case 1208: { - s += " simpleJSONExtractFloat "; - break; - } - case 1209: { - s += " simpleJSONExtractInt "; - break; - } - case 1210: { - s += " simpleJSONExtractRaw "; - break; - } - case 1211: { - s += " simpleJSONExtractString "; - break; - } - case 1212: { - s += " simpleJSONExtractUInt "; - break; - } - case 1213: { - s += " simpleJSONHas "; - break; - } - case 1214: { - s += " simpleLinearRegression "; - break; - } - case 1215: { - s += " sin "; - break; - } - case 1216: { - s += " SINGLE "; - break; - } - case 1217: { - s += " singleValueOrNull "; - break; - } - case 1218: { - s += " sinh "; - break; - } - case 1219: { - s += " sipHash128 "; - break; - } - case 1220: { - s += " sipHash64 "; - break; - } - case 1221: { - s += " skewPop "; - break; - } - case 1222: { - s += " skewSamp "; - break; - } - case 1223: { - s += " sleep "; - break; - } - case 1224: { - s += " sleepEachRow "; - break; - } - case 1225: { - s += " SMALLINT "; - break; - } - case 1226: { - s += " SMALLINT SIGNED "; - break; - } - case 1227: { - s += " SMALLINT UNSIGNED "; - break; - } - case 1228: { - s += " snowflakeToDateTime "; - break; - } - case 1229: { - s += " snowflakeToDateTime64 "; - break; - } - case 1230: { - s += " SOURCE "; - break; - } - case 1231: { - s += " sparkbar "; - break; - } - case 1232: { - s += " splitByChar "; - break; - } - case 1233: { - s += " splitByNonAlpha "; - break; - } - case 1234: { - s += " splitByRegexp "; - break; - } - case 1235: { - s += " splitByString "; - break; - } - case 1236: { - s += " splitByWhitespace "; - break; - } - case 1237: { - s += " SQL_TSI_DAY "; - break; - } - case 1238: { - s += " SQL_TSI_HOUR "; - break; - } - case 1239: { - s += " SQL_TSI_MINUTE "; - break; - } - case 1240: { - s += " SQL_TSI_MONTH "; - break; - } - case 1241: { - s += " SQL_TSI_QUARTER "; - break; - } - case 1242: { - s += " SQL_TSI_SECOND "; - break; - } - case 1243: { - s += " SQL_TSI_WEEK "; - break; - } - case 1244: { - s += " SQL_TSI_YEAR "; - break; - } - case 1245: { - s += " sqrt "; - break; - } - case 1246: { - s += " SS "; - break; - } - case 1247: { - s += " START "; - break; - } - case 1248: { - s += " startsWith "; - break; - } - case 1249: { - s += " State "; - break; - } - case 1250: { - s += " stddevPop "; - break; - } - case 1251: { - s += " STDDEV_POP "; - break; - } - case 1252: { - s += " stddevPopStable "; - break; - } - case 1253: { - s += " stddevSamp "; - break; - } - case 1254: { - s += " STDDEV_SAMP "; - break; - } - case 1255: { - s += " stddevSampStable "; - break; - } - case 1256: { - s += " stem "; - break; - } - case 1257: { - s += " STEP "; - break; - } - case 1258: { - s += " stochasticLinearRegression "; - break; - } - case 1259: { - s += " stochasticLogisticRegression "; - break; - } - case 1260: { - s += " STOP "; - break; - } - case 1261: { - s += " String "; - break; - } - case 1262: { - s += " stringToH3 "; - break; - } - case 1263: { - s += " studentTTest "; - break; - } - case 1264: { - s += " subBitmap "; - break; - } - case 1265: { - s += " substr "; - break; - } - case 1266: { - s += " substring "; - break; - } - case 1267: { - s += " SUBSTRING "; - break; - } - case 1268: { - s += " substringUTF8 "; - break; - } - case 1269: { - s += " subtractDays "; - break; - } - case 1270: { - s += " subtractHours "; - break; - } - case 1271: { - s += " subtractMinutes "; - break; - } - case 1272: { - s += " subtractMonths "; - break; - } - case 1273: { - s += " subtractQuarters "; - break; - } - case 1274: { - s += " subtractSeconds "; - break; - } - case 1275: { - s += " subtractWeeks "; - break; - } - case 1276: { - s += " subtractYears "; - break; - } - case 1277: { - s += " sum "; - break; - } - case 1278: { - s += " sumCount "; - break; - } - case 1279: { - s += " sumKahan "; - break; - } - case 1280: { - s += " sumMap "; - break; - } - case 1281: { - s += " sumMapFiltered "; - break; - } - case 1282: { - s += " sumMapFilteredWithOverflow "; - break; - } - case 1283: { - s += " sumMapWithOverflow "; - break; - } - case 1284: { - s += " sumWithOverflow "; - break; - } - case 1285: { - s += " SUSPEND "; - break; - } - case 1286: { - s += " svg "; - break; - } - case 1287: { - s += " SVG "; - break; - } - case 1288: { - s += " SYNC "; - break; - } - case 1289: { - s += " synonyms "; - break; - } - case 1290: { - s += " SYNTAX "; - break; - } - case 1291: { - s += " SYSTEM "; - break; - } - case 1292: { - s += " TABLE "; - break; - } - case 1293: { - s += " TABLES "; - break; - } - case 1294: { - s += " tan "; - break; - } - case 1295: { - s += " tanh "; - break; - } - case 1296: { - s += " tcpPort "; - break; - } - case 1297: { - s += " TEMPORARY "; - break; - } - case 1298: { - s += " TEST "; - break; - } - case 1299: { - s += " TEXT "; - break; - } - case 1300: { - s += " tgamma "; - break; - } - case 1301: { - s += " THEN "; - break; - } - case 1302: { - s += " throwIf "; - break; - } - case 1303: { - s += " tid "; - break; - } - case 1304: { - s += " TIES "; - break; - } - case 1305: { - s += " TIMEOUT "; - break; - } - case 1306: { - s += " timeSlot "; - break; - } - case 1307: { - s += " timeSlots "; - break; - } - case 1308: { - s += " TIMESTAMP "; - break; - } - case 1309: { - s += " TIMESTAMP_ADD "; - break; - } - case 1310: { - s += " TIMESTAMPADD "; - break; - } - case 1311: { - s += " TIMESTAMP_DIFF "; - break; - } - case 1312: { - s += " TIMESTAMPDIFF "; - break; - } - case 1313: { - s += " TIMESTAMP_SUB "; - break; - } - case 1314: { - s += " TIMESTAMPSUB "; - break; - } - case 1315: { - s += " timezone "; - break; - } - case 1316: { - s += " timeZone "; - break; - } - case 1317: { - s += " timezoneOf "; - break; - } - case 1318: { - s += " timeZoneOf "; - break; - } - case 1319: { - s += " timezoneOffset "; - break; - } - case 1320: { - s += " timeZoneOffset "; - break; - } - case 1321: { - s += " TINYBLOB "; - break; - } - case 1322: { - s += " TINYINT "; - break; - } - case 1323: { - s += " TINYINT SIGNED "; - break; - } - case 1324: { - s += " TINYINT UNSIGNED "; - break; - } - case 1325: { - s += " TINYTEXT "; - break; - } - case 1326: { - s += " TO "; - break; - } - case 1327: { - s += " TO_BASE64 "; - break; - } - case 1328: { - s += " toColumnTypeName "; - break; - } - case 1329: { - s += " toDate "; - break; - } - case 1330: { - s += " toDate32 "; - break; - } - case 1331: { - s += " toDate32OrNull "; - break; - } - case 1332: { - s += " toDate32OrZero "; - break; - } - case 1333: { - s += " toDateOrNull "; - break; - } - case 1334: { - s += " toDateOrZero "; - break; - } - case 1335: { - s += " toDateTime "; - break; - } - case 1336: { - s += " toDateTime32 "; - break; - } - case 1337: { - s += " toDateTime64 "; - break; - } - case 1338: { - s += " toDateTime64OrNull "; - break; - } - case 1339: { - s += " toDateTime64OrZero "; - break; - } - case 1340: { - s += " toDateTimeOrNull "; - break; - } - case 1341: { - s += " toDateTimeOrZero "; - break; - } - case 1342: { - s += " today "; - break; - } - case 1343: { - s += " toDayOfMonth "; - break; - } - case 1344: { - s += " toDayOfWeek "; - break; - } - case 1345: { - s += " toDayOfYear "; - break; - } - case 1346: { - s += " toDecimal128 "; - break; - } - case 1347: { - s += " toDecimal128OrNull "; - break; - } - case 1348: { - s += " toDecimal128OrZero "; - break; - } - case 1349: { - s += " toDecimal256 "; - break; - } - case 1350: { - s += " toDecimal256OrNull "; - break; - } - case 1351: { - s += " toDecimal256OrZero "; - break; - } - case 1352: { - s += " toDecimal32 "; - break; - } - case 1353: { - s += " toDecimal32OrNull "; - break; - } - case 1354: { - s += " toDecimal32OrZero "; - break; - } - case 1355: { - s += " toDecimal64 "; - break; - } - case 1356: { - s += " toDecimal64OrNull "; - break; - } - case 1357: { - s += " toDecimal64OrZero "; - break; - } - case 1358: { - s += " TO DISK "; - break; - } - case 1359: { - s += " toFixedString "; - break; - } - case 1360: { - s += " toFloat32 "; - break; - } - case 1361: { - s += " toFloat32OrNull "; - break; - } - case 1362: { - s += " toFloat32OrZero "; - break; - } - case 1363: { - s += " toFloat64 "; - break; - } - case 1364: { - s += " toFloat64OrNull "; - break; - } - case 1365: { - s += " toFloat64OrZero "; - break; - } - case 1366: { - s += " toHour "; - break; - } - case 1367: { - s += " toInt128 "; - break; - } - case 1368: { - s += " toInt128OrNull "; - break; - } - case 1369: { - s += " toInt128OrZero "; - break; - } - case 1370: { - s += " toInt16 "; - break; - } - case 1371: { - s += " toInt16OrNull "; - break; - } - case 1372: { - s += " toInt16OrZero "; - break; - } - case 1373: { - s += " toInt256 "; - break; - } - case 1374: { - s += " toInt256OrNull "; - break; - } - case 1375: { - s += " toInt256OrZero "; - break; - } - case 1376: { - s += " toInt32 "; - break; - } - case 1377: { - s += " toInt32OrNull "; - break; - } - case 1378: { - s += " toInt32OrZero "; - break; - } - case 1379: { - s += " toInt64 "; - break; - } - case 1380: { - s += " toInt64OrNull "; - break; - } - case 1381: { - s += " toInt64OrZero "; - break; - } - case 1382: { - s += " toInt8 "; - break; - } - case 1383: { - s += " toInt8OrNull "; - break; - } - case 1384: { - s += " toInt8OrZero "; - break; - } - case 1385: { - s += " toIntervalDay "; - break; - } - case 1386: { - s += " toIntervalHour "; - break; - } - case 1387: { - s += " toIntervalMinute "; - break; - } - case 1388: { - s += " toIntervalMonth "; - break; - } - case 1389: { - s += " toIntervalQuarter "; - break; - } - case 1390: { - s += " toIntervalSecond "; - break; - } - case 1391: { - s += " toIntervalWeek "; - break; - } - case 1392: { - s += " toIntervalYear "; - break; - } - case 1393: { - s += " toIPv4 "; - break; - } - case 1394: { - s += " toIPv6 "; - break; - } - case 1395: { - s += " toISOWeek "; - break; - } - case 1396: { - s += " toISOYear "; - break; - } - case 1397: { - s += " toJSONString "; - break; - } - case 1398: { - s += " toLowCardinality "; - break; - } - case 1399: { - s += " toMinute "; - break; - } - case 1400: { - s += " toModifiedJulianDay "; - break; - } - case 1401: { - s += " toModifiedJulianDayOrNull "; - break; - } - case 1402: { - s += " toMonday "; - break; - } - case 1403: { - s += " toMonth "; - break; - } - case 1404: { - s += " toNullable "; - break; - } - case 1405: { - s += " TOP "; - break; - } - case 1406: { - s += " topK "; - break; - } - case 1407: { - s += " topKWeighted "; - break; - } - case 1408: { - s += " topLevelDomain "; - break; - } - case 1409: { - s += " toQuarter "; - break; - } - case 1410: { - s += " toRelativeDayNum "; - break; - } - case 1411: { - s += " toRelativeHourNum "; - break; - } - case 1412: { - s += " toRelativeMinuteNum "; - break; - } - case 1413: { - s += " toRelativeMonthNum "; - break; - } - case 1414: { - s += " toRelativeQuarterNum "; - break; - } - case 1415: { - s += " toRelativeSecondNum "; - break; - } - case 1416: { - s += " toRelativeWeekNum "; - break; - } - case 1417: { - s += " toRelativeYearNum "; - break; - } - case 1418: { - s += " toSecond "; - break; - } - case 1419: { - s += " toStartOfDay "; - break; - } - case 1420: { - s += " toStartOfFifteenMinutes "; - break; - } - case 1421: { - s += " toStartOfFiveMinutes "; - break; - } - case 1422: { - s += " toStartOfHour "; - break; - } - case 1423: { - s += " toStartOfInterval "; - break; - } - case 1424: { - s += " toStartOfISOYear "; - break; - } - case 1425: { - s += " toStartOfMinute "; - break; - } - case 1426: { - s += " toStartOfMonth "; - break; - } - case 1427: { - s += " toStartOfQuarter "; - break; - } - case 1428: { - s += " toStartOfSecond "; - break; - } - case 1429: { - s += " toStartOfTenMinutes "; - break; - } - case 1430: { - s += " toStartOfWeek "; - break; - } - case 1431: { - s += " toStartOfYear "; - break; - } - case 1432: { - s += " toString "; - break; - } - case 1433: { - s += " toStringCutToZero "; - break; - } - case 1434: { - s += " TO TABLE "; - break; - } - case 1435: { - s += " TOTALS "; - break; - } - case 1436: { - s += " toTime "; - break; - } - case 1437: { - s += " toTimezone "; - break; - } - case 1438: { - s += " toTimeZone "; - break; - } - case 1439: { - s += " toTypeName "; - break; - } - case 1440: { - s += " toUInt128 "; - break; - } - case 1441: { - s += " toUInt128OrNull "; - break; - } - case 1442: { - s += " toUInt128OrZero "; - break; - } - case 1443: { - s += " toUInt16 "; - break; - } - case 1444: { - s += " toUInt16OrNull "; - break; - } - case 1445: { - s += " toUInt16OrZero "; - break; - } - case 1446: { - s += " toUInt256 "; - break; - } - case 1447: { - s += " toUInt256OrNull "; - break; - } - case 1448: { - s += " toUInt256OrZero "; - break; - } - case 1449: { - s += " toUInt32 "; - break; - } - case 1450: { - s += " toUInt32OrNull "; - break; - } - case 1451: { - s += " toUInt32OrZero "; - break; - } - case 1452: { - s += " toUInt64 "; - break; - } - case 1453: { - s += " toUInt64OrNull "; - break; - } - case 1454: { - s += " toUInt64OrZero "; - break; - } - case 1455: { - s += " toUInt8 "; - break; - } - case 1456: { - s += " toUInt8OrNull "; - break; - } - case 1457: { - s += " toUInt8OrZero "; - break; - } - case 1458: { - s += " toUnixTimestamp "; - break; - } - case 1459: { - s += " toUnixTimestamp64Micro "; - break; - } - case 1460: { - s += " toUnixTimestamp64Milli "; - break; - } - case 1461: { - s += " toUnixTimestamp64Nano "; - break; - } - case 1462: { - s += " toUUID "; - break; - } - case 1463: { - s += " toUUIDOrNull "; - break; - } - case 1464: { - s += " toUUIDOrZero "; - break; - } - case 1465: { - s += " toValidUTF8 "; - break; - } - case 1466: { - s += " TO VOLUME "; - break; - } - case 1467: { - s += " toWeek "; - break; - } - case 1468: { - s += " toYear "; - break; - } - case 1469: { - s += " toYearWeek "; - break; - } - case 1470: { - s += " toYYYYMM "; - break; - } - case 1471: { - s += " toYYYYMMDD "; - break; - } - case 1472: { - s += " toYYYYMMDDhhmmss "; - break; - } - case 1473: { - s += " TRAILING "; - break; - } - case 1474: { - s += " transform "; - break; - } - case 1475: { - s += " TRIM "; - break; - } - case 1476: { - s += " trimBoth "; - break; - } - case 1477: { - s += " trimLeft "; - break; - } - case 1478: { - s += " trimRight "; - break; - } - case 1479: { - s += " trunc "; - break; - } - case 1480: { - s += " truncate "; - break; - } - case 1481: { - s += " TRUNCATE "; - break; - } - case 1482: { - s += " tryBase64Decode "; - break; - } - case 1483: { - s += " TTL "; - break; - } - case 1484: { - s += " tuple "; - break; - } - case 1485: { - s += " Tuple "; - break; - } - case 1486: { - s += " tupleElement "; - break; - } - case 1487: { - s += " tupleHammingDistance "; - break; - } - case 1488: { - s += " tupleToNameValuePairs "; - break; - } - case 1489: { - s += " TYPE "; - break; - } - case 1490: { - s += " ucase "; - break; - } - case 1491: { - s += " UInt128 "; - break; - } - case 1492: { - s += " UInt16 "; - break; - } - case 1493: { - s += " UInt256 "; - break; - } - case 1494: { - s += " UInt32 "; - break; - } - case 1495: { - s += " UInt64 "; - break; - } - case 1496: { - s += " UInt8 "; - break; - } - case 1497: { - s += " unbin "; - break; - } - case 1498: { - s += " unhex "; - break; - } - case 1499: { - s += " UNION "; - break; - } - case 1500: { - s += " uniq "; - break; - } - case 1501: { - s += " uniqCombined "; - break; - } - case 1502: { - s += " uniqCombined64 "; - break; - } - case 1503: { - s += " uniqExact "; - break; - } - case 1504: { - s += " uniqHLL12 "; - break; - } - case 1505: { - s += " uniqTheta "; - break; - } - case 1506: { - s += " uniqUpTo "; - break; - } - case 1507: { - s += " UPDATE "; - break; - } - case 1508: { - s += " upper "; - break; - } - case 1509: { - s += " upperUTF8 "; - break; - } - case 1510: { - s += " uptime "; - break; - } - case 1511: { - s += " URLHash "; - break; - } - case 1512: { - s += " URLHierarchy "; - break; - } - case 1513: { - s += " URLPathHierarchy "; - break; - } - case 1514: { - s += " USE "; - break; - } - case 1515: { - s += " user "; - break; - } - case 1516: { - s += " USING "; - break; - } - case 1517: { - s += " UUID "; - break; - } - case 1518: { - s += " UUIDNumToString "; - break; - } - case 1519: { - s += " UUIDStringToNum "; - break; - } - case 1520: { - s += " validateNestedArraySizes "; - break; - } - case 1521: { - s += " VALUES "; - break; - } - case 1522: { - s += " VARCHAR "; - break; - } - case 1523: { - s += " VARCHAR2 "; - break; - } - case 1524: { - s += " varPop "; - break; - } - case 1525: { - s += " VAR_POP "; - break; - } - case 1526: { - s += " varPopStable "; - break; - } - case 1527: { - s += " varSamp "; - break; - } - case 1528: { - s += " VAR_SAMP "; - break; - } - case 1529: { - s += " varSampStable "; - break; - } - case 1530: { - s += " version "; - break; - } - case 1531: { - s += " VIEW "; - break; - } - case 1532: { - s += " visibleWidth "; - break; - } - case 1533: { - s += " visitParamExtractBool "; - break; - } - case 1534: { - s += " visitParamExtractFloat "; - break; - } - case 1535: { - s += " visitParamExtractInt "; - break; - } - case 1536: { - s += " visitParamExtractRaw "; - break; - } - case 1537: { - s += " visitParamExtractString "; - break; - } - case 1538: { - s += " visitParamExtractUInt "; - break; - } - case 1539: { - s += " visitParamHas "; - break; - } - case 1540: { - s += " VOLUME "; - break; - } - case 1541: { - s += " WATCH "; - break; - } - case 1542: { - s += " week "; - break; - } - case 1543: { - s += " WEEK "; - break; - } - case 1544: { - s += " welchTTest "; - break; - } - case 1545: { - s += " WHEN "; - break; - } - case 1546: { - s += " WHERE "; - break; - } - case 1547: { - s += " windowFunnel "; - break; - } - case 1548: { - s += " WITH "; - break; - } - case 1549: { - s += " WITH FILL "; - break; - } - case 1550: { - s += " WITH TIES "; - break; - } - case 1551: { - s += " WK "; - break; - } - case 1552: { - s += " wkt "; - break; - } - case 1553: { - s += " wordShingleMinHash "; - break; - } - case 1554: { - s += " wordShingleMinHashArg "; - break; - } - case 1555: { - s += " wordShingleMinHashArgCaseInsensitive "; - break; - } - case 1556: { - s += " wordShingleMinHashArgCaseInsensitiveUTF8 "; - break; - } - case 1557: { - s += " wordShingleMinHashArgUTF8 "; - break; - } - case 1558: { - s += " wordShingleMinHashCaseInsensitive "; - break; - } - case 1559: { - s += " wordShingleMinHashCaseInsensitiveUTF8 "; - break; - } - case 1560: { - s += " wordShingleMinHashUTF8 "; - break; - } - case 1561: { - s += " wordShingleSimHash "; - break; - } - case 1562: { - s += " wordShingleSimHashCaseInsensitive "; - break; - } - case 1563: { - s += " wordShingleSimHashCaseInsensitiveUTF8 "; - break; - } - case 1564: { - s += " wordShingleSimHashUTF8 "; - break; - } - case 1565: { - s += " WW "; - break; - } - case 1566: { - s += " xor "; - break; - } - case 1567: { - s += " xxHash32 "; - break; - } - case 1568: { - s += " xxHash64 "; - break; - } - case 1569: { - s += " kostikConsistentHash "; - break; - } - case 1570: { - s += " YEAR "; - break; - } - case 1571: { - s += " yearweek "; - break; - } - case 1572: { - s += " yesterday "; - break; - } - case 1573: { - s += " YY "; - break; - } - case 1574: { - s += " YYYY "; - break; - } - case 1575: { - s += " zookeeperSessionUptime "; - break; - } - default: break; - } -} diff --git a/src/Parsers/fuzzers/codegen_fuzzer/out.proto b/src/Parsers/fuzzers/codegen_fuzzer/out.proto deleted file mode 100644 index 60992ca6a81..00000000000 --- a/src/Parsers/fuzzers/codegen_fuzzer/out.proto +++ /dev/null @@ -1,1587 +0,0 @@ -syntax = "proto3"; - -message Word { - enum Value { - value_0 = 0; - value_1 = 1; - value_2 = 2; - value_3 = 3; - value_4 = 4; - value_5 = 5; - value_6 = 6; - value_7 = 7; - value_8 = 8; - value_9 = 9; - value_10 = 10; - value_11 = 11; - value_12 = 12; - value_13 = 13; - value_14 = 14; - value_15 = 15; - value_16 = 16; - value_17 = 17; - value_18 = 18; - value_19 = 19; - value_20 = 20; - value_21 = 21; - value_22 = 22; - value_23 = 23; - value_24 = 24; - value_25 = 25; - value_26 = 26; - value_27 = 27; - value_28 = 28; - value_29 = 29; - value_30 = 30; - value_31 = 31; - value_32 = 32; - value_33 = 33; - value_34 = 34; - value_35 = 35; - value_36 = 36; - value_37 = 37; - value_38 = 38; - value_39 = 39; - value_40 = 40; - value_41 = 41; - value_42 = 42; - value_43 = 43; - value_44 = 44; - value_45 = 45; - value_46 = 46; - value_47 = 47; - value_48 = 48; - value_49 = 49; - value_50 = 50; - value_51 = 51; - value_52 = 52; - value_53 = 53; - value_54 = 54; - value_55 = 55; - value_56 = 56; - value_57 = 57; - value_58 = 58; - value_59 = 59; - value_60 = 60; - value_61 = 61; - value_62 = 62; - value_63 = 63; - value_64 = 64; - value_65 = 65; - value_66 = 66; - value_67 = 67; - value_68 = 68; - value_69 = 69; - value_70 = 70; - value_71 = 71; - value_72 = 72; - value_73 = 73; - value_74 = 74; - value_75 = 75; - value_76 = 76; - value_77 = 77; - value_78 = 78; - value_79 = 79; - value_80 = 80; - value_81 = 81; - value_82 = 82; - value_83 = 83; - value_84 = 84; - value_85 = 85; - value_86 = 86; - value_87 = 87; - value_88 = 88; - value_89 = 89; - value_90 = 90; - value_91 = 91; - value_92 = 92; - value_93 = 93; - value_94 = 94; - value_95 = 95; - value_96 = 96; - value_97 = 97; - value_98 = 98; - value_99 = 99; - value_100 = 100; - value_101 = 101; - value_102 = 102; - value_103 = 103; - value_104 = 104; - value_105 = 105; - value_106 = 106; - value_107 = 107; - value_108 = 108; - value_109 = 109; - value_110 = 110; - value_111 = 111; - value_112 = 112; - value_113 = 113; - value_114 = 114; - value_115 = 115; - value_116 = 116; - value_117 = 117; - value_118 = 118; - value_119 = 119; - value_120 = 120; - value_121 = 121; - value_122 = 122; - value_123 = 123; - value_124 = 124; - value_125 = 125; - value_126 = 126; - value_127 = 127; - value_128 = 128; - value_129 = 129; - value_130 = 130; - value_131 = 131; - value_132 = 132; - value_133 = 133; - value_134 = 134; - value_135 = 135; - value_136 = 136; - value_137 = 137; - value_138 = 138; - value_139 = 139; - value_140 = 140; - value_141 = 141; - value_142 = 142; - value_143 = 143; - value_144 = 144; - value_145 = 145; - value_146 = 146; - value_147 = 147; - value_148 = 148; - value_149 = 149; - value_150 = 150; - value_151 = 151; - value_152 = 152; - value_153 = 153; - value_154 = 154; - value_155 = 155; - value_156 = 156; - value_157 = 157; - value_158 = 158; - value_159 = 159; - value_160 = 160; - value_161 = 161; - value_162 = 162; - value_163 = 163; - value_164 = 164; - value_165 = 165; - value_166 = 166; - value_167 = 167; - value_168 = 168; - value_169 = 169; - value_170 = 170; - value_171 = 171; - value_172 = 172; - value_173 = 173; - value_174 = 174; - value_175 = 175; - value_176 = 176; - value_177 = 177; - value_178 = 178; - value_179 = 179; - value_180 = 180; - value_181 = 181; - value_182 = 182; - value_183 = 183; - value_184 = 184; - value_185 = 185; - value_186 = 186; - value_187 = 187; - value_188 = 188; - value_189 = 189; - value_190 = 190; - value_191 = 191; - value_192 = 192; - value_193 = 193; - value_194 = 194; - value_195 = 195; - value_196 = 196; - value_197 = 197; - value_198 = 198; - value_199 = 199; - value_200 = 200; - value_201 = 201; - value_202 = 202; - value_203 = 203; - value_204 = 204; - value_205 = 205; - value_206 = 206; - value_207 = 207; - value_208 = 208; - value_209 = 209; - value_210 = 210; - value_211 = 211; - value_212 = 212; - value_213 = 213; - value_214 = 214; - value_215 = 215; - value_216 = 216; - value_217 = 217; - value_218 = 218; - value_219 = 219; - value_220 = 220; - value_221 = 221; - value_222 = 222; - value_223 = 223; - value_224 = 224; - value_225 = 225; - value_226 = 226; - value_227 = 227; - value_228 = 228; - value_229 = 229; - value_230 = 230; - value_231 = 231; - value_232 = 232; - value_233 = 233; - value_234 = 234; - value_235 = 235; - value_236 = 236; - value_237 = 237; - value_238 = 238; - value_239 = 239; - value_240 = 240; - value_241 = 241; - value_242 = 242; - value_243 = 243; - value_244 = 244; - value_245 = 245; - value_246 = 246; - value_247 = 247; - value_248 = 248; - value_249 = 249; - value_250 = 250; - value_251 = 251; - value_252 = 252; - value_253 = 253; - value_254 = 254; - value_255 = 255; - value_256 = 256; - value_257 = 257; - value_258 = 258; - value_259 = 259; - value_260 = 260; - value_261 = 261; - value_262 = 262; - value_263 = 263; - value_264 = 264; - value_265 = 265; - value_266 = 266; - value_267 = 267; - value_268 = 268; - value_269 = 269; - value_270 = 270; - value_271 = 271; - value_272 = 272; - value_273 = 273; - value_274 = 274; - value_275 = 275; - value_276 = 276; - value_277 = 277; - value_278 = 278; - value_279 = 279; - value_280 = 280; - value_281 = 281; - value_282 = 282; - value_283 = 283; - value_284 = 284; - value_285 = 285; - value_286 = 286; - value_287 = 287; - value_288 = 288; - value_289 = 289; - value_290 = 290; - value_291 = 291; - value_292 = 292; - value_293 = 293; - value_294 = 294; - value_295 = 295; - value_296 = 296; - value_297 = 297; - value_298 = 298; - value_299 = 299; - value_300 = 300; - value_301 = 301; - value_302 = 302; - value_303 = 303; - value_304 = 304; - value_305 = 305; - value_306 = 306; - value_307 = 307; - value_308 = 308; - value_309 = 309; - value_310 = 310; - value_311 = 311; - value_312 = 312; - value_313 = 313; - value_314 = 314; - value_315 = 315; - value_316 = 316; - value_317 = 317; - value_318 = 318; - value_319 = 319; - value_320 = 320; - value_321 = 321; - value_322 = 322; - value_323 = 323; - value_324 = 324; - value_325 = 325; - value_326 = 326; - value_327 = 327; - value_328 = 328; - value_329 = 329; - value_330 = 330; - value_331 = 331; - value_332 = 332; - value_333 = 333; - value_334 = 334; - value_335 = 335; - value_336 = 336; - value_337 = 337; - value_338 = 338; - value_339 = 339; - value_340 = 340; - value_341 = 341; - value_342 = 342; - value_343 = 343; - value_344 = 344; - value_345 = 345; - value_346 = 346; - value_347 = 347; - value_348 = 348; - value_349 = 349; - value_350 = 350; - value_351 = 351; - value_352 = 352; - value_353 = 353; - value_354 = 354; - value_355 = 355; - value_356 = 356; - value_357 = 357; - value_358 = 358; - value_359 = 359; - value_360 = 360; - value_361 = 361; - value_362 = 362; - value_363 = 363; - value_364 = 364; - value_365 = 365; - value_366 = 366; - value_367 = 367; - value_368 = 368; - value_369 = 369; - value_370 = 370; - value_371 = 371; - value_372 = 372; - value_373 = 373; - value_374 = 374; - value_375 = 375; - value_376 = 376; - value_377 = 377; - value_378 = 378; - value_379 = 379; - value_380 = 380; - value_381 = 381; - value_382 = 382; - value_383 = 383; - value_384 = 384; - value_385 = 385; - value_386 = 386; - value_387 = 387; - value_388 = 388; - value_389 = 389; - value_390 = 390; - value_391 = 391; - value_392 = 392; - value_393 = 393; - value_394 = 394; - value_395 = 395; - value_396 = 396; - value_397 = 397; - value_398 = 398; - value_399 = 399; - value_400 = 400; - value_401 = 401; - value_402 = 402; - value_403 = 403; - value_404 = 404; - value_405 = 405; - value_406 = 406; - value_407 = 407; - value_408 = 408; - value_409 = 409; - value_410 = 410; - value_411 = 411; - value_412 = 412; - value_413 = 413; - value_414 = 414; - value_415 = 415; - value_416 = 416; - value_417 = 417; - value_418 = 418; - value_419 = 419; - value_420 = 420; - value_421 = 421; - value_422 = 422; - value_423 = 423; - value_424 = 424; - value_425 = 425; - value_426 = 426; - value_427 = 427; - value_428 = 428; - value_429 = 429; - value_430 = 430; - value_431 = 431; - value_432 = 432; - value_433 = 433; - value_434 = 434; - value_435 = 435; - value_436 = 436; - value_437 = 437; - value_438 = 438; - value_439 = 439; - value_440 = 440; - value_441 = 441; - value_442 = 442; - value_443 = 443; - value_444 = 444; - value_445 = 445; - value_446 = 446; - value_447 = 447; - value_448 = 448; - value_449 = 449; - value_450 = 450; - value_451 = 451; - value_452 = 452; - value_453 = 453; - value_454 = 454; - value_455 = 455; - value_456 = 456; - value_457 = 457; - value_458 = 458; - value_459 = 459; - value_460 = 460; - value_461 = 461; - value_462 = 462; - value_463 = 463; - value_464 = 464; - value_465 = 465; - value_466 = 466; - value_467 = 467; - value_468 = 468; - value_469 = 469; - value_470 = 470; - value_471 = 471; - value_472 = 472; - value_473 = 473; - value_474 = 474; - value_475 = 475; - value_476 = 476; - value_477 = 477; - value_478 = 478; - value_479 = 479; - value_480 = 480; - value_481 = 481; - value_482 = 482; - value_483 = 483; - value_484 = 484; - value_485 = 485; - value_486 = 486; - value_487 = 487; - value_488 = 488; - value_489 = 489; - value_490 = 490; - value_491 = 491; - value_492 = 492; - value_493 = 493; - value_494 = 494; - value_495 = 495; - value_496 = 496; - value_497 = 497; - value_498 = 498; - value_499 = 499; - value_500 = 500; - value_501 = 501; - value_502 = 502; - value_503 = 503; - value_504 = 504; - value_505 = 505; - value_506 = 506; - value_507 = 507; - value_508 = 508; - value_509 = 509; - value_510 = 510; - value_511 = 511; - value_512 = 512; - value_513 = 513; - value_514 = 514; - value_515 = 515; - value_516 = 516; - value_517 = 517; - value_518 = 518; - value_519 = 519; - value_520 = 520; - value_521 = 521; - value_522 = 522; - value_523 = 523; - value_524 = 524; - value_525 = 525; - value_526 = 526; - value_527 = 527; - value_528 = 528; - value_529 = 529; - value_530 = 530; - value_531 = 531; - value_532 = 532; - value_533 = 533; - value_534 = 534; - value_535 = 535; - value_536 = 536; - value_537 = 537; - value_538 = 538; - value_539 = 539; - value_540 = 540; - value_541 = 541; - value_542 = 542; - value_543 = 543; - value_544 = 544; - value_545 = 545; - value_546 = 546; - value_547 = 547; - value_548 = 548; - value_549 = 549; - value_550 = 550; - value_551 = 551; - value_552 = 552; - value_553 = 553; - value_554 = 554; - value_555 = 555; - value_556 = 556; - value_557 = 557; - value_558 = 558; - value_559 = 559; - value_560 = 560; - value_561 = 561; - value_562 = 562; - value_563 = 563; - value_564 = 564; - value_565 = 565; - value_566 = 566; - value_567 = 567; - value_568 = 568; - value_569 = 569; - value_570 = 570; - value_571 = 571; - value_572 = 572; - value_573 = 573; - value_574 = 574; - value_575 = 575; - value_576 = 576; - value_577 = 577; - value_578 = 578; - value_579 = 579; - value_580 = 580; - value_581 = 581; - value_582 = 582; - value_583 = 583; - value_584 = 584; - value_585 = 585; - value_586 = 586; - value_587 = 587; - value_588 = 588; - value_589 = 589; - value_590 = 590; - value_591 = 591; - value_592 = 592; - value_593 = 593; - value_594 = 594; - value_595 = 595; - value_596 = 596; - value_597 = 597; - value_598 = 598; - value_599 = 599; - value_600 = 600; - value_601 = 601; - value_602 = 602; - value_603 = 603; - value_604 = 604; - value_605 = 605; - value_606 = 606; - value_607 = 607; - value_608 = 608; - value_609 = 609; - value_610 = 610; - value_611 = 611; - value_612 = 612; - value_613 = 613; - value_614 = 614; - value_615 = 615; - value_616 = 616; - value_617 = 617; - value_618 = 618; - value_619 = 619; - value_620 = 620; - value_621 = 621; - value_622 = 622; - value_623 = 623; - value_624 = 624; - value_625 = 625; - value_626 = 626; - value_627 = 627; - value_628 = 628; - value_629 = 629; - value_630 = 630; - value_631 = 631; - value_632 = 632; - value_633 = 633; - value_634 = 634; - value_635 = 635; - value_636 = 636; - value_637 = 637; - value_638 = 638; - value_639 = 639; - value_640 = 640; - value_641 = 641; - value_642 = 642; - value_643 = 643; - value_644 = 644; - value_645 = 645; - value_646 = 646; - value_647 = 647; - value_648 = 648; - value_649 = 649; - value_650 = 650; - value_651 = 651; - value_652 = 652; - value_653 = 653; - value_654 = 654; - value_655 = 655; - value_656 = 656; - value_657 = 657; - value_658 = 658; - value_659 = 659; - value_660 = 660; - value_661 = 661; - value_662 = 662; - value_663 = 663; - value_664 = 664; - value_665 = 665; - value_666 = 666; - value_667 = 667; - value_668 = 668; - value_669 = 669; - value_670 = 670; - value_671 = 671; - value_672 = 672; - value_673 = 673; - value_674 = 674; - value_675 = 675; - value_676 = 676; - value_677 = 677; - value_678 = 678; - value_679 = 679; - value_680 = 680; - value_681 = 681; - value_682 = 682; - value_683 = 683; - value_684 = 684; - value_685 = 685; - value_686 = 686; - value_687 = 687; - value_688 = 688; - value_689 = 689; - value_690 = 690; - value_691 = 691; - value_692 = 692; - value_693 = 693; - value_694 = 694; - value_695 = 695; - value_696 = 696; - value_697 = 697; - value_698 = 698; - value_699 = 699; - value_700 = 700; - value_701 = 701; - value_702 = 702; - value_703 = 703; - value_704 = 704; - value_705 = 705; - value_706 = 706; - value_707 = 707; - value_708 = 708; - value_709 = 709; - value_710 = 710; - value_711 = 711; - value_712 = 712; - value_713 = 713; - value_714 = 714; - value_715 = 715; - value_716 = 716; - value_717 = 717; - value_718 = 718; - value_719 = 719; - value_720 = 720; - value_721 = 721; - value_722 = 722; - value_723 = 723; - value_724 = 724; - value_725 = 725; - value_726 = 726; - value_727 = 727; - value_728 = 728; - value_729 = 729; - value_730 = 730; - value_731 = 731; - value_732 = 732; - value_733 = 733; - value_734 = 734; - value_735 = 735; - value_736 = 736; - value_737 = 737; - value_738 = 738; - value_739 = 739; - value_740 = 740; - value_741 = 741; - value_742 = 742; - value_743 = 743; - value_744 = 744; - value_745 = 745; - value_746 = 746; - value_747 = 747; - value_748 = 748; - value_749 = 749; - value_750 = 750; - value_751 = 751; - value_752 = 752; - value_753 = 753; - value_754 = 754; - value_755 = 755; - value_756 = 756; - value_757 = 757; - value_758 = 758; - value_759 = 759; - value_760 = 760; - value_761 = 761; - value_762 = 762; - value_763 = 763; - value_764 = 764; - value_765 = 765; - value_766 = 766; - value_767 = 767; - value_768 = 768; - value_769 = 769; - value_770 = 770; - value_771 = 771; - value_772 = 772; - value_773 = 773; - value_774 = 774; - value_775 = 775; - value_776 = 776; - value_777 = 777; - value_778 = 778; - value_779 = 779; - value_780 = 780; - value_781 = 781; - value_782 = 782; - value_783 = 783; - value_784 = 784; - value_785 = 785; - value_786 = 786; - value_787 = 787; - value_788 = 788; - value_789 = 789; - value_790 = 790; - value_791 = 791; - value_792 = 792; - value_793 = 793; - value_794 = 794; - value_795 = 795; - value_796 = 796; - value_797 = 797; - value_798 = 798; - value_799 = 799; - value_800 = 800; - value_801 = 801; - value_802 = 802; - value_803 = 803; - value_804 = 804; - value_805 = 805; - value_806 = 806; - value_807 = 807; - value_808 = 808; - value_809 = 809; - value_810 = 810; - value_811 = 811; - value_812 = 812; - value_813 = 813; - value_814 = 814; - value_815 = 815; - value_816 = 816; - value_817 = 817; - value_818 = 818; - value_819 = 819; - value_820 = 820; - value_821 = 821; - value_822 = 822; - value_823 = 823; - value_824 = 824; - value_825 = 825; - value_826 = 826; - value_827 = 827; - value_828 = 828; - value_829 = 829; - value_830 = 830; - value_831 = 831; - value_832 = 832; - value_833 = 833; - value_834 = 834; - value_835 = 835; - value_836 = 836; - value_837 = 837; - value_838 = 838; - value_839 = 839; - value_840 = 840; - value_841 = 841; - value_842 = 842; - value_843 = 843; - value_844 = 844; - value_845 = 845; - value_846 = 846; - value_847 = 847; - value_848 = 848; - value_849 = 849; - value_850 = 850; - value_851 = 851; - value_852 = 852; - value_853 = 853; - value_854 = 854; - value_855 = 855; - value_856 = 856; - value_857 = 857; - value_858 = 858; - value_859 = 859; - value_860 = 860; - value_861 = 861; - value_862 = 862; - value_863 = 863; - value_864 = 864; - value_865 = 865; - value_866 = 866; - value_867 = 867; - value_868 = 868; - value_869 = 869; - value_870 = 870; - value_871 = 871; - value_872 = 872; - value_873 = 873; - value_874 = 874; - value_875 = 875; - value_876 = 876; - value_877 = 877; - value_878 = 878; - value_879 = 879; - value_880 = 880; - value_881 = 881; - value_882 = 882; - value_883 = 883; - value_884 = 884; - value_885 = 885; - value_886 = 886; - value_887 = 887; - value_888 = 888; - value_889 = 889; - value_890 = 890; - value_891 = 891; - value_892 = 892; - value_893 = 893; - value_894 = 894; - value_895 = 895; - value_896 = 896; - value_897 = 897; - value_898 = 898; - value_899 = 899; - value_900 = 900; - value_901 = 901; - value_902 = 902; - value_903 = 903; - value_904 = 904; - value_905 = 905; - value_906 = 906; - value_907 = 907; - value_908 = 908; - value_909 = 909; - value_910 = 910; - value_911 = 911; - value_912 = 912; - value_913 = 913; - value_914 = 914; - value_915 = 915; - value_916 = 916; - value_917 = 917; - value_918 = 918; - value_919 = 919; - value_920 = 920; - value_921 = 921; - value_922 = 922; - value_923 = 923; - value_924 = 924; - value_925 = 925; - value_926 = 926; - value_927 = 927; - value_928 = 928; - value_929 = 929; - value_930 = 930; - value_931 = 931; - value_932 = 932; - value_933 = 933; - value_934 = 934; - value_935 = 935; - value_936 = 936; - value_937 = 937; - value_938 = 938; - value_939 = 939; - value_940 = 940; - value_941 = 941; - value_942 = 942; - value_943 = 943; - value_944 = 944; - value_945 = 945; - value_946 = 946; - value_947 = 947; - value_948 = 948; - value_949 = 949; - value_950 = 950; - value_951 = 951; - value_952 = 952; - value_953 = 953; - value_954 = 954; - value_955 = 955; - value_956 = 956; - value_957 = 957; - value_958 = 958; - value_959 = 959; - value_960 = 960; - value_961 = 961; - value_962 = 962; - value_963 = 963; - value_964 = 964; - value_965 = 965; - value_966 = 966; - value_967 = 967; - value_968 = 968; - value_969 = 969; - value_970 = 970; - value_971 = 971; - value_972 = 972; - value_973 = 973; - value_974 = 974; - value_975 = 975; - value_976 = 976; - value_977 = 977; - value_978 = 978; - value_979 = 979; - value_980 = 980; - value_981 = 981; - value_982 = 982; - value_983 = 983; - value_984 = 984; - value_985 = 985; - value_986 = 986; - value_987 = 987; - value_988 = 988; - value_989 = 989; - value_990 = 990; - value_991 = 991; - value_992 = 992; - value_993 = 993; - value_994 = 994; - value_995 = 995; - value_996 = 996; - value_997 = 997; - value_998 = 998; - value_999 = 999; - value_1000 = 1000; - value_1001 = 1001; - value_1002 = 1002; - value_1003 = 1003; - value_1004 = 1004; - value_1005 = 1005; - value_1006 = 1006; - value_1007 = 1007; - value_1008 = 1008; - value_1009 = 1009; - value_1010 = 1010; - value_1011 = 1011; - value_1012 = 1012; - value_1013 = 1013; - value_1014 = 1014; - value_1015 = 1015; - value_1016 = 1016; - value_1017 = 1017; - value_1018 = 1018; - value_1019 = 1019; - value_1020 = 1020; - value_1021 = 1021; - value_1022 = 1022; - value_1023 = 1023; - value_1024 = 1024; - value_1025 = 1025; - value_1026 = 1026; - value_1027 = 1027; - value_1028 = 1028; - value_1029 = 1029; - value_1030 = 1030; - value_1031 = 1031; - value_1032 = 1032; - value_1033 = 1033; - value_1034 = 1034; - value_1035 = 1035; - value_1036 = 1036; - value_1037 = 1037; - value_1038 = 1038; - value_1039 = 1039; - value_1040 = 1040; - value_1041 = 1041; - value_1042 = 1042; - value_1043 = 1043; - value_1044 = 1044; - value_1045 = 1045; - value_1046 = 1046; - value_1047 = 1047; - value_1048 = 1048; - value_1049 = 1049; - value_1050 = 1050; - value_1051 = 1051; - value_1052 = 1052; - value_1053 = 1053; - value_1054 = 1054; - value_1055 = 1055; - value_1056 = 1056; - value_1057 = 1057; - value_1058 = 1058; - value_1059 = 1059; - value_1060 = 1060; - value_1061 = 1061; - value_1062 = 1062; - value_1063 = 1063; - value_1064 = 1064; - value_1065 = 1065; - value_1066 = 1066; - value_1067 = 1067; - value_1068 = 1068; - value_1069 = 1069; - value_1070 = 1070; - value_1071 = 1071; - value_1072 = 1072; - value_1073 = 1073; - value_1074 = 1074; - value_1075 = 1075; - value_1076 = 1076; - value_1077 = 1077; - value_1078 = 1078; - value_1079 = 1079; - value_1080 = 1080; - value_1081 = 1081; - value_1082 = 1082; - value_1083 = 1083; - value_1084 = 1084; - value_1085 = 1085; - value_1086 = 1086; - value_1087 = 1087; - value_1088 = 1088; - value_1089 = 1089; - value_1090 = 1090; - value_1091 = 1091; - value_1092 = 1092; - value_1093 = 1093; - value_1094 = 1094; - value_1095 = 1095; - value_1096 = 1096; - value_1097 = 1097; - value_1098 = 1098; - value_1099 = 1099; - value_1100 = 1100; - value_1101 = 1101; - value_1102 = 1102; - value_1103 = 1103; - value_1104 = 1104; - value_1105 = 1105; - value_1106 = 1106; - value_1107 = 1107; - value_1108 = 1108; - value_1109 = 1109; - value_1110 = 1110; - value_1111 = 1111; - value_1112 = 1112; - value_1113 = 1113; - value_1114 = 1114; - value_1115 = 1115; - value_1116 = 1116; - value_1117 = 1117; - value_1118 = 1118; - value_1119 = 1119; - value_1120 = 1120; - value_1121 = 1121; - value_1122 = 1122; - value_1123 = 1123; - value_1124 = 1124; - value_1125 = 1125; - value_1126 = 1126; - value_1127 = 1127; - value_1128 = 1128; - value_1129 = 1129; - value_1130 = 1130; - value_1131 = 1131; - value_1132 = 1132; - value_1133 = 1133; - value_1134 = 1134; - value_1135 = 1135; - value_1136 = 1136; - value_1137 = 1137; - value_1138 = 1138; - value_1139 = 1139; - value_1140 = 1140; - value_1141 = 1141; - value_1142 = 1142; - value_1143 = 1143; - value_1144 = 1144; - value_1145 = 1145; - value_1146 = 1146; - value_1147 = 1147; - value_1148 = 1148; - value_1149 = 1149; - value_1150 = 1150; - value_1151 = 1151; - value_1152 = 1152; - value_1153 = 1153; - value_1154 = 1154; - value_1155 = 1155; - value_1156 = 1156; - value_1157 = 1157; - value_1158 = 1158; - value_1159 = 1159; - value_1160 = 1160; - value_1161 = 1161; - value_1162 = 1162; - value_1163 = 1163; - value_1164 = 1164; - value_1165 = 1165; - value_1166 = 1166; - value_1167 = 1167; - value_1168 = 1168; - value_1169 = 1169; - value_1170 = 1170; - value_1171 = 1171; - value_1172 = 1172; - value_1173 = 1173; - value_1174 = 1174; - value_1175 = 1175; - value_1176 = 1176; - value_1177 = 1177; - value_1178 = 1178; - value_1179 = 1179; - value_1180 = 1180; - value_1181 = 1181; - value_1182 = 1182; - value_1183 = 1183; - value_1184 = 1184; - value_1185 = 1185; - value_1186 = 1186; - value_1187 = 1187; - value_1188 = 1188; - value_1189 = 1189; - value_1190 = 1190; - value_1191 = 1191; - value_1192 = 1192; - value_1193 = 1193; - value_1194 = 1194; - value_1195 = 1195; - value_1196 = 1196; - value_1197 = 1197; - value_1198 = 1198; - value_1199 = 1199; - value_1200 = 1200; - value_1201 = 1201; - value_1202 = 1202; - value_1203 = 1203; - value_1204 = 1204; - value_1205 = 1205; - value_1206 = 1206; - value_1207 = 1207; - value_1208 = 1208; - value_1209 = 1209; - value_1210 = 1210; - value_1211 = 1211; - value_1212 = 1212; - value_1213 = 1213; - value_1214 = 1214; - value_1215 = 1215; - value_1216 = 1216; - value_1217 = 1217; - value_1218 = 1218; - value_1219 = 1219; - value_1220 = 1220; - value_1221 = 1221; - value_1222 = 1222; - value_1223 = 1223; - value_1224 = 1224; - value_1225 = 1225; - value_1226 = 1226; - value_1227 = 1227; - value_1228 = 1228; - value_1229 = 1229; - value_1230 = 1230; - value_1231 = 1231; - value_1232 = 1232; - value_1233 = 1233; - value_1234 = 1234; - value_1235 = 1235; - value_1236 = 1236; - value_1237 = 1237; - value_1238 = 1238; - value_1239 = 1239; - value_1240 = 1240; - value_1241 = 1241; - value_1242 = 1242; - value_1243 = 1243; - value_1244 = 1244; - value_1245 = 1245; - value_1246 = 1246; - value_1247 = 1247; - value_1248 = 1248; - value_1249 = 1249; - value_1250 = 1250; - value_1251 = 1251; - value_1252 = 1252; - value_1253 = 1253; - value_1254 = 1254; - value_1255 = 1255; - value_1256 = 1256; - value_1257 = 1257; - value_1258 = 1258; - value_1259 = 1259; - value_1260 = 1260; - value_1261 = 1261; - value_1262 = 1262; - value_1263 = 1263; - value_1264 = 1264; - value_1265 = 1265; - value_1266 = 1266; - value_1267 = 1267; - value_1268 = 1268; - value_1269 = 1269; - value_1270 = 1270; - value_1271 = 1271; - value_1272 = 1272; - value_1273 = 1273; - value_1274 = 1274; - value_1275 = 1275; - value_1276 = 1276; - value_1277 = 1277; - value_1278 = 1278; - value_1279 = 1279; - value_1280 = 1280; - value_1281 = 1281; - value_1282 = 1282; - value_1283 = 1283; - value_1284 = 1284; - value_1285 = 1285; - value_1286 = 1286; - value_1287 = 1287; - value_1288 = 1288; - value_1289 = 1289; - value_1290 = 1290; - value_1291 = 1291; - value_1292 = 1292; - value_1293 = 1293; - value_1294 = 1294; - value_1295 = 1295; - value_1296 = 1296; - value_1297 = 1297; - value_1298 = 1298; - value_1299 = 1299; - value_1300 = 1300; - value_1301 = 1301; - value_1302 = 1302; - value_1303 = 1303; - value_1304 = 1304; - value_1305 = 1305; - value_1306 = 1306; - value_1307 = 1307; - value_1308 = 1308; - value_1309 = 1309; - value_1310 = 1310; - value_1311 = 1311; - value_1312 = 1312; - value_1313 = 1313; - value_1314 = 1314; - value_1315 = 1315; - value_1316 = 1316; - value_1317 = 1317; - value_1318 = 1318; - value_1319 = 1319; - value_1320 = 1320; - value_1321 = 1321; - value_1322 = 1322; - value_1323 = 1323; - value_1324 = 1324; - value_1325 = 1325; - value_1326 = 1326; - value_1327 = 1327; - value_1328 = 1328; - value_1329 = 1329; - value_1330 = 1330; - value_1331 = 1331; - value_1332 = 1332; - value_1333 = 1333; - value_1334 = 1334; - value_1335 = 1335; - value_1336 = 1336; - value_1337 = 1337; - value_1338 = 1338; - value_1339 = 1339; - value_1340 = 1340; - value_1341 = 1341; - value_1342 = 1342; - value_1343 = 1343; - value_1344 = 1344; - value_1345 = 1345; - value_1346 = 1346; - value_1347 = 1347; - value_1348 = 1348; - value_1349 = 1349; - value_1350 = 1350; - value_1351 = 1351; - value_1352 = 1352; - value_1353 = 1353; - value_1354 = 1354; - value_1355 = 1355; - value_1356 = 1356; - value_1357 = 1357; - value_1358 = 1358; - value_1359 = 1359; - value_1360 = 1360; - value_1361 = 1361; - value_1362 = 1362; - value_1363 = 1363; - value_1364 = 1364; - value_1365 = 1365; - value_1366 = 1366; - value_1367 = 1367; - value_1368 = 1368; - value_1369 = 1369; - value_1370 = 1370; - value_1371 = 1371; - value_1372 = 1372; - value_1373 = 1373; - value_1374 = 1374; - value_1375 = 1375; - value_1376 = 1376; - value_1377 = 1377; - value_1378 = 1378; - value_1379 = 1379; - value_1380 = 1380; - value_1381 = 1381; - value_1382 = 1382; - value_1383 = 1383; - value_1384 = 1384; - value_1385 = 1385; - value_1386 = 1386; - value_1387 = 1387; - value_1388 = 1388; - value_1389 = 1389; - value_1390 = 1390; - value_1391 = 1391; - value_1392 = 1392; - value_1393 = 1393; - value_1394 = 1394; - value_1395 = 1395; - value_1396 = 1396; - value_1397 = 1397; - value_1398 = 1398; - value_1399 = 1399; - value_1400 = 1400; - value_1401 = 1401; - value_1402 = 1402; - value_1403 = 1403; - value_1404 = 1404; - value_1405 = 1405; - value_1406 = 1406; - value_1407 = 1407; - value_1408 = 1408; - value_1409 = 1409; - value_1410 = 1410; - value_1411 = 1411; - value_1412 = 1412; - value_1413 = 1413; - value_1414 = 1414; - value_1415 = 1415; - value_1416 = 1416; - value_1417 = 1417; - value_1418 = 1418; - value_1419 = 1419; - value_1420 = 1420; - value_1421 = 1421; - value_1422 = 1422; - value_1423 = 1423; - value_1424 = 1424; - value_1425 = 1425; - value_1426 = 1426; - value_1427 = 1427; - value_1428 = 1428; - value_1429 = 1429; - value_1430 = 1430; - value_1431 = 1431; - value_1432 = 1432; - value_1433 = 1433; - value_1434 = 1434; - value_1435 = 1435; - value_1436 = 1436; - value_1437 = 1437; - value_1438 = 1438; - value_1439 = 1439; - value_1440 = 1440; - value_1441 = 1441; - value_1442 = 1442; - value_1443 = 1443; - value_1444 = 1444; - value_1445 = 1445; - value_1446 = 1446; - value_1447 = 1447; - value_1448 = 1448; - value_1449 = 1449; - value_1450 = 1450; - value_1451 = 1451; - value_1452 = 1452; - value_1453 = 1453; - value_1454 = 1454; - value_1455 = 1455; - value_1456 = 1456; - value_1457 = 1457; - value_1458 = 1458; - value_1459 = 1459; - value_1460 = 1460; - value_1461 = 1461; - value_1462 = 1462; - value_1463 = 1463; - value_1464 = 1464; - value_1465 = 1465; - value_1466 = 1466; - value_1467 = 1467; - value_1468 = 1468; - value_1469 = 1469; - value_1470 = 1470; - value_1471 = 1471; - value_1472 = 1472; - value_1473 = 1473; - value_1474 = 1474; - value_1475 = 1475; - value_1476 = 1476; - value_1477 = 1477; - value_1478 = 1478; - value_1479 = 1479; - value_1480 = 1480; - value_1481 = 1481; - value_1482 = 1482; - value_1483 = 1483; - value_1484 = 1484; - value_1485 = 1485; - value_1486 = 1486; - value_1487 = 1487; - value_1488 = 1488; - value_1489 = 1489; - value_1490 = 1490; - value_1491 = 1491; - value_1492 = 1492; - value_1493 = 1493; - value_1494 = 1494; - value_1495 = 1495; - value_1496 = 1496; - value_1497 = 1497; - value_1498 = 1498; - value_1499 = 1499; - value_1500 = 1500; - value_1501 = 1501; - value_1502 = 1502; - value_1503 = 1503; - value_1504 = 1504; - value_1505 = 1505; - value_1506 = 1506; - value_1507 = 1507; - value_1508 = 1508; - value_1509 = 1509; - value_1510 = 1510; - value_1511 = 1511; - value_1512 = 1512; - value_1513 = 1513; - value_1514 = 1514; - value_1515 = 1515; - value_1516 = 1516; - value_1517 = 1517; - value_1518 = 1518; - value_1519 = 1519; - value_1520 = 1520; - value_1521 = 1521; - value_1522 = 1522; - value_1523 = 1523; - value_1524 = 1524; - value_1525 = 1525; - value_1526 = 1526; - value_1527 = 1527; - value_1528 = 1528; - value_1529 = 1529; - value_1530 = 1530; - value_1531 = 1531; - value_1532 = 1532; - value_1533 = 1533; - value_1534 = 1534; - value_1535 = 1535; - value_1536 = 1536; - value_1537 = 1537; - value_1538 = 1538; - value_1539 = 1539; - value_1540 = 1540; - value_1541 = 1541; - value_1542 = 1542; - value_1543 = 1543; - value_1544 = 1544; - value_1545 = 1545; - value_1546 = 1546; - value_1547 = 1547; - value_1548 = 1548; - value_1549 = 1549; - value_1550 = 1550; - value_1551 = 1551; - value_1552 = 1552; - value_1553 = 1553; - value_1554 = 1554; - value_1555 = 1555; - value_1556 = 1556; - value_1557 = 1557; - value_1558 = 1558; - value_1559 = 1559; - value_1560 = 1560; - value_1561 = 1561; - value_1562 = 1562; - value_1563 = 1563; - value_1564 = 1564; - value_1565 = 1565; - value_1566 = 1566; - value_1567 = 1567; - value_1568 = 1568; - value_1569 = 1569; - value_1570 = 1570; - value_1571 = 1571; - value_1572 = 1572; - value_1573 = 1573; - value_1574 = 1574; - value_1575 = 1575; - } - Value value = 1; - Sentence inner = 2; -} -message Sentence { - repeated Word words = 1; -} From 6c68269458a7d01a6fef4c6757e1e4c9419fec08 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 4 Jul 2023 00:25:51 +0200 Subject: [PATCH 199/522] Update Field.h --- src/Core/Field.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/Field.h b/src/Core/Field.h index 97a32ab5bb1..686bfafc397 100644 --- a/src/Core/Field.h +++ b/src/Core/Field.h @@ -739,7 +739,7 @@ private: using StorageType = NearestFieldType; /// Incrementing the depth since we create a new Field. - auto depth = calculateAndCheckFieldDepth(x) + 1; + auto depth = calculateAndCheckFieldDepth(x); new (&storage) StorageType(std::forward(x)); if constexpr (std::is_same_v From 87a2c44778db4dce5a425c7f618009e5652a6bef Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 4 Jul 2023 00:43:25 +0200 Subject: [PATCH 200/522] Fix test 02789_object_type_invalid_num_of_rows --- .../0_stateless/02789_object_type_invalid_num_of_rows.reference | 1 - .../0_stateless/02789_object_type_invalid_num_of_rows.sql | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference b/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference index 7dec35f7acb..e69de29bb2d 100644 --- a/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference +++ b/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference @@ -1 +0,0 @@ -0.02 diff --git a/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.sql b/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.sql index a9c8a844aa0..d0fc6905593 100644 --- a/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.sql +++ b/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.sql @@ -1,2 +1,2 @@ set allow_experimental_object_type=1; -SELECT '0.02' GROUP BY GROUPING SETS (('6553.6'), (CAST('{"x" : 1}', 'Object(\'json\')'))) settings max_threads=1; -- { serverError NOT_IMPLEMENTED } +SELECT '0.02' GROUP BY GROUPING SETS (('6553.6'), (CAST('{"x" : 1}', 'Object(\'json\')'))) FORMAT Null; -- { serverError NOT_IMPLEMENTED } From be1353fe334cf90d5534036e306dc424cbf26773 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 4 Jul 2023 00:47:31 +0200 Subject: [PATCH 201/522] Revert "Merge pull request #51750 from ClickHouse/revert-51296-object_column_invalid_num_of_rows" This reverts commit 73d956249eaf5076691af80edac689d466a1f0fd, reversing changes made to 4d76b538c7838c55105746bfdd99db1e01e1fd91. --- .../0_stateless/02789_object_type_invalid_num_of_rows.reference | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference b/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference index e69de29bb2d..7dec35f7acb 100644 --- a/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference +++ b/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference @@ -0,0 +1 @@ +0.02 From c65ee49a37773a2034c4e9a439ba6ebaf1820955 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 4 Jul 2023 00:49:39 +0200 Subject: [PATCH 202/522] Revert "Merge pull request #51750 from ClickHouse/revert-51296-object_column_invalid_num_of_rows" This reverts commit 73d956249eaf5076691af80edac689d466a1f0fd, reversing changes made to 4d76b538c7838c55105746bfdd99db1e01e1fd91. --- src/Processors/QueryPlan/AggregatingStep.cpp | 2 ++ .../0_stateless/02789_object_type_invalid_num_of_rows.reference | 1 + 2 files changed, 3 insertions(+) create mode 100644 tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference diff --git a/src/Processors/QueryPlan/AggregatingStep.cpp b/src/Processors/QueryPlan/AggregatingStep.cpp index 4ac972e2a79..eebbfc04304 100644 --- a/src/Processors/QueryPlan/AggregatingStep.cpp +++ b/src/Processors/QueryPlan/AggregatingStep.cpp @@ -319,6 +319,8 @@ void AggregatingStep::transformPipeline(QueryPipelineBuilder & pipeline, const B { auto column_with_default = col.column->cloneEmpty(); col.type->insertDefaultInto(*column_with_default); + column_with_default->finalize(); + auto column = ColumnConst::create(std::move(column_with_default), 0); const auto * node = &dag->addColumn({ColumnPtr(std::move(column)), col.type, col.name}); node = &dag->materializeNode(*node); diff --git a/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference b/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference new file mode 100644 index 00000000000..7dec35f7acb --- /dev/null +++ b/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference @@ -0,0 +1 @@ +0.02 From 9d11678f8486c81bc38638c7144e705ad46af304 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 4 Jul 2023 00:54:50 +0200 Subject: [PATCH 203/522] Polishing --- src/Core/Field.h | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/src/Core/Field.h b/src/Core/Field.h index 686bfafc397..cc0083e02d8 100644 --- a/src/Core/Field.h +++ b/src/Core/Field.h @@ -48,7 +48,7 @@ using FieldVector = std::vector>; struct X : public FieldVector \ { \ using FieldVector::FieldVector; \ - size_t nested_field_depth = 0; \ + uint8_t nested_field_depth = 0; \ } DEFINE_FIELD_VECTOR(Array); @@ -65,7 +65,7 @@ using FieldMap = std::map, AllocatorWithMemoryTrackin struct X : public FieldMap \ { \ using FieldMap::FieldMap; \ - size_t nested_field_depth = 0; \ + uint8_t nested_field_depth = 0; \ } DEFINE_FIELD_MAP(Object); @@ -296,10 +296,11 @@ decltype(auto) castToNearestFieldType(T && x) */ #define DBMS_MIN_FIELD_SIZE 32 +/// Note: uint8_t is used for storing depth value. #if defined(SANITIZER) || !defined(NDEBUG) #define DBMS_MAX_NESTED_FIELD_DEPTH 64 #else - #define DBMS_MAX_NESTED_FIELD_DEPTH 256 + #define DBMS_MAX_NESTED_FIELD_DEPTH 255 #endif /** Discriminated union of several types. @@ -683,9 +684,9 @@ private: /// StorageType and Original are the same for Array, Tuple, Map, Object template - size_t calculateAndCheckFieldDepth(Original && x) + uint8_t calculateAndCheckFieldDepth(Original && x) { - size_t result = 0; + uint8_t result = 0; if constexpr (std::is_same_v || std::is_same_v @@ -694,29 +695,29 @@ private: { result = x.nested_field_depth; - auto calculate_max = [](const Field & elem, size_t result) + auto get_depth = [](const Field & elem) { switch (elem.which) { case Types::Array: - return std::max(result, elem.template get().nested_field_depth); + return elem.template get().nested_field_depth; case Types::Tuple: - return std::max(result, elem.template get().nested_field_depth); + return elem.template get().nested_field_depth; case Types::Map: - return std::max(result, elem.template get().nested_field_depth); + return elem.template get().nested_field_depth; case Types::Object: - return std::max(result, elem.template get().nested_field_depth); + return elem.template get().nested_field_depth; default: - return result; + return static_cast(0); } }; if constexpr (std::is_same_v) for (auto & [_, value] : x) - result = calculate_max(value, result); + result = std::max(get_depth(value), result); else for (auto & value : x) - result = calculate_max(value, result); + result = std::max(get_depth(value), result); } if (result >= DBMS_MAX_NESTED_FIELD_DEPTH) From c25da7cbf4432a8b49155902dd8e5f23929fa844 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 4 Jul 2023 01:03:49 +0200 Subject: [PATCH 204/522] Fix (benign) data rance in `transform` --- src/Columns/ColumnVector.h | 4 ++-- src/Functions/transform.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Columns/ColumnVector.h b/src/Columns/ColumnVector.h index bf790423d1d..b8ebff2a5d5 100644 --- a/src/Columns/ColumnVector.h +++ b/src/Columns/ColumnVector.h @@ -107,8 +107,8 @@ struct FloatCompareHelper } }; -template struct CompareHelper : public FloatCompareHelper {}; -template struct CompareHelper : public FloatCompareHelper {}; +template struct CompareHelper : public FloatCompareHelper {}; +template struct CompareHelper : public FloatCompareHelper {}; /** A template for columns that use a simple array to store. diff --git a/src/Functions/transform.cpp b/src/Functions/transform.cpp index 8d6e53c491e..1fc0e3adf96 100644 --- a/src/Functions/transform.cpp +++ b/src/Functions/transform.cpp @@ -698,6 +698,8 @@ namespace const DataTypePtr & from_type = arguments[0].type; + std::lock_guard lock(cache.mutex); + if (from_type->onlyNull()) { cache.is_empty = true; @@ -711,8 +713,6 @@ namespace throw Exception( ErrorCodes::ILLEGAL_COLUMN, "Second and third arguments of function {} must be constant arrays.", getName()); - std::lock_guard lock(cache.mutex); - const ColumnPtr & from_column_uncasted = array_from->getDataPtr(); cache.from_column = castColumn( From 49907818e10030e4129bcd65c52a865b0415abf0 Mon Sep 17 00:00:00 2001 From: Alexey Gerasimchuck Date: Tue, 4 Jul 2023 02:32:08 +0000 Subject: [PATCH 205/522] Added waiting before crash log store finish --- src/Common/SystemLogBase.cpp | 49 ++++++++++------- src/Common/SystemLogBase.h | 9 +++- src/Daemon/BaseDaemon.cpp | 3 ++ src/Interpreters/CrashLog.cpp | 3 ++ tests/integration/test_crash_log/__init__.py | 0 tests/integration/test_crash_log/test.py | 57 ++++++++++++++++++++ 6 files changed, 101 insertions(+), 20 deletions(-) create mode 100644 tests/integration/test_crash_log/__init__.py create mode 100644 tests/integration/test_crash_log/test.py diff --git a/src/Common/SystemLogBase.cpp b/src/Common/SystemLogBase.cpp index 86adcbbd31b..cb1d2c3b3e7 100644 --- a/src/Common/SystemLogBase.cpp +++ b/src/Common/SystemLogBase.cpp @@ -137,25 +137,9 @@ void SystemLogBase::add(const LogElement & element) template void SystemLogBase::flush(bool force) { - uint64_t this_thread_requested_offset; - - { - std::lock_guard lock(mutex); - - if (is_shutdown) - return; - - this_thread_requested_offset = queue_front_index + queue.size(); - - // Publish our flush request, taking care not to overwrite the requests - // made by other threads. - is_force_prepare_tables |= force; - requested_flush_up_to = std::max(requested_flush_up_to, this_thread_requested_offset); - - flush_event.notify_all(); - } - - LOG_DEBUG(log, "Requested flush up to offset {}", this_thread_requested_offset); + uint64_t this_thread_requested_offset = notifyFlushImpl(force); + if (this_thread_requested_offset == uint64_t(-1)) + return; // Use an arbitrary timeout to avoid endless waiting. 60s proved to be // too fast for our parallel functional tests, probably because they @@ -174,6 +158,33 @@ void SystemLogBase::flush(bool force) } } +template +void SystemLogBase::notifyFlush(bool force) { notifyFlushImpl(force ); } + +template +uint64_t SystemLogBase::notifyFlushImpl(bool force) +{ + uint64_t this_thread_requested_offset; + + { + std::lock_guard lock(mutex); + if (is_shutdown) + return uint64_t(-1); + + this_thread_requested_offset = queue_front_index + queue.size(); + + // Publish our flush request, taking care not to overwrite the requests + // made by other threads. + is_force_prepare_tables |= force; + requested_flush_up_to = std::max(requested_flush_up_to, this_thread_requested_offset); + + flush_event.notify_all(); + } + + LOG_DEBUG(log, "Requested flush up to offset {}", this_thread_requested_offset); + return this_thread_requested_offset; +} + #define INSTANTIATE_SYSTEM_LOG_BASE(ELEMENT) template class SystemLogBase; SYSTEM_LOG_ELEMENTS(INSTANTIATE_SYSTEM_LOG_BASE) diff --git a/src/Common/SystemLogBase.h b/src/Common/SystemLogBase.h index f8febd8b159..92409028c22 100644 --- a/src/Common/SystemLogBase.h +++ b/src/Common/SystemLogBase.h @@ -87,9 +87,12 @@ public: */ void add(const LogElement & element); - /// Flush data in the buffer to disk + /// Flush data in the buffer to disk. Block the thread until the data is stored on disk. void flush(bool force) override; + /// Non-blocking flush data in the buffer to disk. + void notifyFlush(bool force); + String getName() const override { return LogElement::name(); } static const char * getDefaultOrderBy() { return "event_date, event_time"; } @@ -112,6 +115,10 @@ protected: uint64_t flushed_up_to = 0; // Logged overflow message at this queue front index uint64_t logged_queue_full_at_index = -1; + +private: + uint64_t notifyFlushImpl(bool force); + }; } diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index 6d29523a354..2c4c740af30 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -173,6 +173,9 @@ static void signalHandler(int sig, siginfo_t * info, void * context) /// This coarse method of synchronization is perfectly ok for fatal signals. sleepForSeconds(1); } + + /// Wait for all logs flush operations + sleepForSeconds(3); call_default_signal_handler(sig); } diff --git a/src/Interpreters/CrashLog.cpp b/src/Interpreters/CrashLog.cpp index f1f0ffb6f60..3f5476535a6 100644 --- a/src/Interpreters/CrashLog.cpp +++ b/src/Interpreters/CrashLog.cpp @@ -84,5 +84,8 @@ void collectCrashLog(Int32 signal, UInt64 thread_id, const String & query_id, co CrashLogElement element{static_cast(time / 1000000000), time, signal, thread_id, query_id, trace, trace_full}; crash_log_owned->add(element); + /// Notify savingThreadFunction to start flushing crash log + /// Crash log is storing in parallel with the signal processing thread. + crash_log_owned->notifyFlush(true); } } diff --git a/tests/integration/test_crash_log/__init__.py b/tests/integration/test_crash_log/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_crash_log/test.py b/tests/integration/test_crash_log/test.py new file mode 100644 index 00000000000..9f6eca794b1 --- /dev/null +++ b/tests/integration/test_crash_log/test.py @@ -0,0 +1,57 @@ +import os +import time +import pytest + +import helpers.cluster +import helpers.test_tools + +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) + + +@pytest.fixture(scope="module") +def started_node(): + cluster = helpers.cluster.ClickHouseCluster(__file__) + try: + node = cluster.add_instance("node", stay_alive=True) + + cluster.start() + yield node + finally: + cluster.shutdown() + + +def send_signal(started_node, signal): + started_node.exec_in_container( + ["bash", "-c", f"pkill -{signal} clickhouse"], user="root" + ) + + +def wait_for_clickhouse_stop(started_node): + result = None + for attempt in range(60): + time.sleep(1) + pid = started_node.get_process_pid("clickhouse") + if pid is None: + result = "OK" + break + assert result == "OK", "ClickHouse process is still running" + + +def test_pkill(started_node): + if ( + started_node.is_built_with_thread_sanitizer() + or started_node.is_built_with_address_sanitizer() + or started_node.is_built_with_memory_sanitizer() + ): + pytest.skip("doesn't fit in timeouts for stacktrace generation") + + crashes_count = 0 + for signal in ["SEGV", "4"]: + send_signal(started_node, signal) + wait_for_clickhouse_stop(started_node) + started_node.restart_clickhouse() + crashes_count += 1 + assert ( + started_node.query("SELECT COUNT(*) FROM system.crash_log") + == f"{crashes_count}\n" + ) From 50d555a3987d5e653da64107254af3bcfd33af81 Mon Sep 17 00:00:00 2001 From: Alexey Gerasimchuck Date: Tue, 4 Jul 2023 02:45:15 +0000 Subject: [PATCH 206/522] style correction --- src/Common/SystemLogBase.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/SystemLogBase.cpp b/src/Common/SystemLogBase.cpp index cb1d2c3b3e7..5e9ee9a1e04 100644 --- a/src/Common/SystemLogBase.cpp +++ b/src/Common/SystemLogBase.cpp @@ -159,7 +159,7 @@ void SystemLogBase::flush(bool force) } template -void SystemLogBase::notifyFlush(bool force) { notifyFlushImpl(force ); } +void SystemLogBase::notifyFlush(bool force) { notifyFlushImpl(force); } template uint64_t SystemLogBase::notifyFlushImpl(bool force) From f86c5edfc465717a5344a8b71e140f0ceaa9ba47 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 4 Jul 2023 07:54:13 +0000 Subject: [PATCH 207/522] Remove debug tracing --- src/Interpreters/GraceHashJoin.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Interpreters/GraceHashJoin.cpp b/src/Interpreters/GraceHashJoin.cpp index f5b2386fd1e..8acdb4e90dd 100644 --- a/src/Interpreters/GraceHashJoin.cpp +++ b/src/Interpreters/GraceHashJoin.cpp @@ -623,8 +623,6 @@ Block GraceHashJoin::prepareRightBlock(const Block & block) void GraceHashJoin::addJoinedBlockImpl(Block block) { - LOG_ERROR(&Poco::Logger::get(__PRETTY_FUNCTION__), ""); - block = prepareRightBlock(block); Buckets buckets_snapshot = getCurrentBuckets(); size_t bucket_index = current_bucket->idx; From 1b71bbf1b119c937a176ff63a4ffaeb660d96038 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 4 Jul 2023 08:27:48 +0000 Subject: [PATCH 208/522] Add test for ZK disconnect --- tests/integration/test_keeper_map/test.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/tests/integration/test_keeper_map/test.py b/tests/integration/test_keeper_map/test.py index c6ec7103056..fbae875d2e6 100644 --- a/tests/integration/test_keeper_map/test.py +++ b/tests/integration/test_keeper_map/test.py @@ -1,4 +1,5 @@ import pytest +import time from helpers.cluster import ClickHouseCluster from helpers.network import PartitionManager @@ -39,9 +40,18 @@ def remove_children(client, path): def test_keeper_map_without_zk(started_cluster): + def wait_disconnect_from_zk(): + for _ in range(20): + if len(node.query_and_get_answer_with_error("SELECT * FROM system.zookeeper WHERE path='/'")[1]) != 0: + break + time.sleep(1) + else: + assert False, "ClickHouse didn't disconnect from ZK after DROP rule was added" + def assert_keeper_exception_after_partition(query): with PartitionManager() as pm: pm.drop_instance_zk_connections(node) + wait_disconnect_from_zk() error = node.query_and_get_error(query) assert "Coordination::Exception" in error @@ -49,17 +59,17 @@ def test_keeper_map_without_zk(started_cluster): "CREATE TABLE test_keeper_map_without_zk (key UInt64, value UInt64) ENGINE = KeeperMap('/test_without_zk') PRIMARY KEY(key);" ) - node.query( + node.query_with_retry( "CREATE TABLE test_keeper_map_without_zk (key UInt64, value UInt64) ENGINE = KeeperMap('/test_without_zk') PRIMARY KEY(key);" ) assert_keeper_exception_after_partition( "INSERT INTO test_keeper_map_without_zk VALUES (1, 11)" ) - node.query("INSERT INTO test_keeper_map_without_zk VALUES (1, 11)") + node.query_with_retry("INSERT INTO test_keeper_map_without_zk VALUES (1, 11)") assert_keeper_exception_after_partition("SELECT * FROM test_keeper_map_without_zk") - node.query("SELECT * FROM test_keeper_map_without_zk") + node.query_with_retry("SELECT * FROM test_keeper_map_without_zk") with PartitionManager() as pm: pm.drop_instance_zk_connections(node) @@ -67,7 +77,7 @@ def test_keeper_map_without_zk(started_cluster): error = node.query_and_get_error("SELECT * FROM test_keeper_map_without_zk") assert "Failed to activate table because of connection issues" in error - node.query("SELECT * FROM test_keeper_map_without_zk") + node.query_with_retry("SELECT * FROM test_keeper_map_without_zk") client = get_genuine_zk() remove_children(client, "/test_keeper_map/test_without_zk") From 30be0ab4a8af7247a12c94076cd17834f712b9d9 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 4 Jul 2023 09:00:53 +0000 Subject: [PATCH 209/522] Fix: unexpected number of buckets Number of buckets could become inconsistent if exception was thrown during new buckets creation --- src/Interpreters/GraceHashJoin.cpp | 63 ++++++++++++++++++------------ src/Interpreters/GraceHashJoin.h | 5 ++- 2 files changed, 40 insertions(+), 28 deletions(-) diff --git a/src/Interpreters/GraceHashJoin.cpp b/src/Interpreters/GraceHashJoin.cpp index 8acdb4e90dd..aa7091548d7 100644 --- a/src/Interpreters/GraceHashJoin.cpp +++ b/src/Interpreters/GraceHashJoin.cpp @@ -288,10 +288,7 @@ void GraceHashJoin::initBuckets() size_t initial_num_buckets = roundUpToPowerOfTwoOrZero(std::clamp(settings.grace_hash_join_initial_buckets, 1, settings.grace_hash_join_max_buckets)); - for (size_t i = 0; i < initial_num_buckets; ++i) - { - addBucket(buckets); - } + addBuckets(initial_num_buckets); if (buckets.empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "No buckets created"); @@ -368,40 +365,54 @@ GraceHashJoin::Buckets GraceHashJoin::rehashBuckets() if (to_size > max_num_buckets) { - throw Exception(ErrorCodes::LIMIT_EXCEEDED, + throw Exception( + ErrorCodes::LIMIT_EXCEEDED, "Too many grace hash join buckets ({} > {}), " "consider increasing grace_hash_join_max_buckets or max_rows_in_join/max_bytes_in_join", - to_size, max_num_buckets); + to_size, + max_num_buckets); } LOG_TRACE(log, "Rehashing from {} to {}", current_size, to_size); - buckets.reserve(to_size); - for (size_t i = current_size; i < to_size; ++i) - addBucket(buckets); + addBuckets(to_size - current_size); return buckets; } -void GraceHashJoin::addBucket(Buckets & destination) +void GraceHashJoin::addBuckets(const size_t bucket_count) { - // There could be exceptions from createStream, In ci tests - // there is a certain probability of failure in allocating memory, see memory_tracker_fault_probability. - // It may terminate this thread and leave a broken hash_join, and another thread cores when it tries to - // use the broken hash_join. So we print an exception message here to help debug. - try - { - auto & left_file = tmp_data->createStream(left_sample_block); - auto & right_file = tmp_data->createStream(prepareRightBlock(right_sample_block)); + // Exception can be thrown in number of cases: + // - during creation of temporary files for buckets + // - in CI tests, there is a certain probability of failure in allocating memory, see memory_tracker_fault_probability + // Therefore, new buckets are added only after all of them created successfully, + // otherwise we can end up having unexpected number of buckets - BucketPtr new_bucket = std::make_shared(destination.size(), left_file, right_file, log); - destination.emplace_back(std::move(new_bucket)); - } - catch (...) - { - LOG_ERROR(&Poco::Logger::get("GraceHashJoin"), "Can't create bucket. current buckets size: {}", destination.size()); - throw; - } + const size_t current_size = buckets.size(); + Buckets tmp_buckets; + tmp_buckets.reserve(bucket_count); + for (size_t i = 0; i < bucket_count; ++i) + try + { + auto & left_file = tmp_data->createStream(left_sample_block); + auto & right_file = tmp_data->createStream(prepareRightBlock(right_sample_block)); + + BucketPtr new_bucket = std::make_shared(current_size + i, left_file, right_file, log); + tmp_buckets.emplace_back(std::move(new_bucket)); + } + catch (...) + { + LOG_ERROR( + &Poco::Logger::get("GraceHashJoin"), + "Can't create bucket {} due to error: {}", + current_size + i, + getCurrentExceptionMessage(false)); + throw; + } + + buckets.reserve(buckets.size() + bucket_count); + for(auto & bucket : tmp_buckets) + buckets.emplace_back(std::move(bucket)); } void GraceHashJoin::checkTypesOfKeys(const Block & block) const diff --git a/src/Interpreters/GraceHashJoin.h b/src/Interpreters/GraceHashJoin.h index fd3397ba15e..78ba70bc764 100644 --- a/src/Interpreters/GraceHashJoin.h +++ b/src/Interpreters/GraceHashJoin.h @@ -101,8 +101,9 @@ private: bool hasMemoryOverflow(const InMemoryJoinPtr & hash_join_) const; bool hasMemoryOverflow(const BlocksList & blocks) const; - /// Create new bucket at the end of @destination. - void addBucket(Buckets & destination); + /// Add bucket_count new buckets + /// Throws if a bucket creation fails + void addBuckets(size_t bucket_count); /// Increase number of buckets to match desired_size. /// Called when HashJoin in-memory table for one bucket exceeds the limits. From 57ada39ccf1615910afb48152a30ba7410152a36 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 4 Jul 2023 09:31:15 +0000 Subject: [PATCH 210/522] Add retries and iptables rules dump --- tests/integration/helpers/cluster.py | 7 +-- tests/integration/helpers/network.py | 11 +++- tests/integration/test_keeper_map/test.py | 66 ++++++++++++++--------- 3 files changed, 54 insertions(+), 30 deletions(-) diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 21398790be3..3db0ad12295 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -3416,13 +3416,14 @@ class ClickHouseInstance: database=database, ) time.sleep(sleep_time) + + if result is not None: + return result except QueryRuntimeException as ex: logging.debug("Retry {} got exception {}".format(i + 1, ex)) time.sleep(sleep_time) - if result is not None: - return result - raise Exception("Query {sql} did not fail".format(sql)) + raise Exception("Query {} did not fail".format(sql)) # The same as query_and_get_error but ignores successful query. def query_and_get_answer_with_error( diff --git a/tests/integration/helpers/network.py b/tests/integration/helpers/network.py index 2df560708e0..60b46926589 100644 --- a/tests/integration/helpers/network.py +++ b/tests/integration/helpers/network.py @@ -32,6 +32,9 @@ class PartitionManager: {"destination": instance.ip_address, "source_port": 2181, "action": action} ) + def dump_rules(self): + return _NetworkManager.get().dump_rules() + def restore_instance_zk_connections(self, instance, action="DROP"): self._check_instance(instance) @@ -157,6 +160,10 @@ class _NetworkManager: cmd.extend(self._iptables_cmd_suffix(**kwargs)) self._exec_run(cmd, privileged=True) + def dump_rules(self): + cmd = ["iptables", "-L", "DOCKER-USER"] + return self._exec_run(cmd, privileged=True) + @staticmethod def clean_all_user_iptables_rules(): for i in range(1000): @@ -212,8 +219,8 @@ class _NetworkManager: def __init__( self, - container_expire_timeout=50, - container_exit_timeout=60, + container_expire_timeout=120, + container_exit_timeout=120, docker_api_version=os.environ.get("DOCKER_API_VERSION"), ): self.container_expire_timeout = container_expire_timeout diff --git a/tests/integration/test_keeper_map/test.py b/tests/integration/test_keeper_map/test.py index fbae875d2e6..d7b4230d872 100644 --- a/tests/integration/test_keeper_map/test.py +++ b/tests/integration/test_keeper_map/test.py @@ -1,8 +1,7 @@ import pytest -import time from helpers.cluster import ClickHouseCluster -from helpers.network import PartitionManager +from helpers.network import PartitionManager, _NetworkManager test_recover_staled_replica_run = 1 @@ -39,50 +38,67 @@ def remove_children(client, path): client.delete(child_path) -def test_keeper_map_without_zk(started_cluster): - def wait_disconnect_from_zk(): - for _ in range(20): - if len(node.query_and_get_answer_with_error("SELECT * FROM system.zookeeper WHERE path='/'")[1]) != 0: - break - time.sleep(1) - else: - assert False, "ClickHouse didn't disconnect from ZK after DROP rule was added" +def print_iptables_rules(): + print(f"iptables rules: {_NetworkManager.get().dump_rules()}") - def assert_keeper_exception_after_partition(query): - with PartitionManager() as pm: - pm.drop_instance_zk_connections(node) - wait_disconnect_from_zk() - error = node.query_and_get_error(query) + +def assert_keeper_exception_after_partition(query): + with PartitionManager() as pm: + pm.drop_instance_zk_connections(node) + try: + error = node.query_and_get_error_with_retry(query, sleep_time=1) assert "Coordination::Exception" in error + except: + print_iptables_rules() + raise + +def run_query(query): + try: + result = node.query_with_retry(query, sleep_time=1) + return result + except: + print_iptables_rules() + raise + + +def test_keeper_map_without_zk(started_cluster): assert_keeper_exception_after_partition( - "CREATE TABLE test_keeper_map_without_zk (key UInt64, value UInt64) ENGINE = KeeperMap('/test_without_zk') PRIMARY KEY(key);" + "CREATE TABLE test_keeper_map_without_zk (key UInt64, value UInt64) ENGINE = KeeperMap('/test_keeper_map_without_zk') PRIMARY KEY(key);" ) - node.query_with_retry( - "CREATE TABLE test_keeper_map_without_zk (key UInt64, value UInt64) ENGINE = KeeperMap('/test_without_zk') PRIMARY KEY(key);" + run_query( + "CREATE TABLE test_keeper_map_without_zk (key UInt64, value UInt64) ENGINE = KeeperMap('/test_keeper_map_without_zk') PRIMARY KEY(key);" ) assert_keeper_exception_after_partition( "INSERT INTO test_keeper_map_without_zk VALUES (1, 11)" ) - node.query_with_retry("INSERT INTO test_keeper_map_without_zk VALUES (1, 11)") + run_query("INSERT INTO test_keeper_map_without_zk VALUES (1, 11)") assert_keeper_exception_after_partition("SELECT * FROM test_keeper_map_without_zk") - node.query_with_retry("SELECT * FROM test_keeper_map_without_zk") + assert run_query("SELECT * FROM test_keeper_map_without_zk") == "1\t11\n" with PartitionManager() as pm: pm.drop_instance_zk_connections(node) node.restart_clickhouse(60) - error = node.query_and_get_error("SELECT * FROM test_keeper_map_without_zk") - assert "Failed to activate table because of connection issues" in error + try: + error = node.query_and_get_error_with_retry( + "SELECT * FROM test_keeper_map_without_zk", sleep_time=1 + ) + assert "Failed to activate table because of connection issues" in error + except: + print_iptables_rules() + raise - node.query_with_retry("SELECT * FROM test_keeper_map_without_zk") + run_query("SELECT * FROM test_keeper_map_without_zk") client = get_genuine_zk() - remove_children(client, "/test_keeper_map/test_without_zk") + remove_children(client, "/test_keeper_map/test_keeper_map_without_zk") node.restart_clickhouse(60) - error = node.query_and_get_error("SELECT * FROM test_keeper_map_without_zk") + error = node.query_and_get_error_with_retry( + "SELECT * FROM test_keeper_map_without_zk" + ) assert "Failed to activate table because of invalid metadata in ZooKeeper" in error node.query("DETACH TABLE test_keeper_map_without_zk") From 60c6cc17fca0cf98ace414f6c81e780859439a1c Mon Sep 17 00:00:00 2001 From: yariks5s <114298166+yariks5s@users.noreply.github.com> Date: Tue, 4 Jul 2023 11:39:39 +0200 Subject: [PATCH 211/522] Update include brackets Co-authored-by: Sergei Trifonov --- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 87c3567f7d5..1cf86f1ae4d 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -1,6 +1,6 @@ #include "IMergeTreeDataPart.h" -#include "Storages/MergeTree/IDataPartStorage.h" -#include "base/types.h" +#include +#include #include #include From 0094919c436dacc4f52c97efedf2b6a166cf198b Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Tue, 4 Jul 2023 12:53:03 +0300 Subject: [PATCH 212/522] Add line --- tests/queries/0_stateless/02810_convert_uuid_to_uint128.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02810_convert_uuid_to_uint128.sql b/tests/queries/0_stateless/02810_convert_uuid_to_uint128.sql index 5350ef99ed3..155596dd1d5 100644 --- a/tests/queries/0_stateless/02810_convert_uuid_to_uint128.sql +++ b/tests/queries/0_stateless/02810_convert_uuid_to_uint128.sql @@ -5,4 +5,4 @@ SELECT toUInt64(toUUID('00000000-0000-0000-0000-000000000000')); -- { serverErro SELECT toInt128(toUUID('00000000-0000-0000-0000-000000000000')); -- { serverError NOT_IMPLEMENTED } SELECT cast(toUUID('f82aef31-279e-431f-8b00-2899ad387aea'), 'UInt128'); select accurateCast(toUUID('f82aef31-279e-431f-8b00-2899ad387aea'), 'UInt128'); -select toUUID('f82aef31-279e-431f-8b00-2899ad387aea')::UInt128; \ No newline at end of file +select toUUID('f82aef31-279e-431f-8b00-2899ad387aea')::UInt128; From 0e4cd16582c5427cebb52fece006d16ec5e337e2 Mon Sep 17 00:00:00 2001 From: yariks5s <114298166+yariks5s@users.noreply.github.com> Date: Tue, 4 Jul 2023 11:53:23 +0200 Subject: [PATCH 213/522] Update include brackets --- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 1cf86f1ae4d..021f624e783 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -1,4 +1,4 @@ -#include "IMergeTreeDataPart.h" +#include #include #include From 0d0e59abc8672348f9a3d8386fa20454271ff618 Mon Sep 17 00:00:00 2001 From: yariks5s <114298166+yariks5s@users.noreply.github.com> Date: Tue, 4 Jul 2023 12:08:58 +0200 Subject: [PATCH 214/522] Update IMergeTreeDataPart.cpp --- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 021f624e783..1cf86f1ae4d 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -1,4 +1,4 @@ -#include +#include "IMergeTreeDataPart.h" #include #include From 9c8cb9f77ee3f087e3799b2a14a4d0db6f75f367 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Tue, 4 Jul 2023 13:20:48 +0300 Subject: [PATCH 215/522] Add comment to docs --- docs/en/sql-reference/functions/array-functions.md | 3 ++- docs/ru/sql-reference/functions/array-functions.md | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index 7f2b8f3c605..763a61d43f2 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -142,6 +142,7 @@ range([start, ] end [, step]) - All arguments `start`, `end`, `step` must be below data types: `UInt8`, `UInt16`, `UInt32`, `UInt64`,`Int8`, `Int16`, `Int32`, `Int64`, as well as elements of the returned array, which's type is a super type of all arguments. - An exception is thrown if query results in arrays with a total length of more than number of elements specified by the [function_range_max_elements_in_block](../../operations/settings/settings.md#settings-function_range_max_elements_in_block) setting. +- Returns Null if any argument has Nullable(Nothing) type. An exception is thrown if any argument has Null value (Nullable(T) type). **Examples** @@ -878,7 +879,7 @@ A special function. See the section [“ArrayJoin function”](../../sql-referen ## arrayDifference -Calculates an array of differences between adjacent array elements. The first element of the result array will be 0, the second `a[1] - a[0]`, the third `a[2] - a[1]`, etc. The type of elements in the result array is determined by the type inference rules for subtraction (e.g. `UInt8` - `UInt8` = `Int16`). +Calculates an array of differences between adjacent array elements. The first element of the result array will be 0, the second `a[1] - a[0]`, the third `a[2] - a[1]`, etc. The type of elements in the result array is determined by the type inference rules for subtraction (e.g. `UInt8` - `UInt8` = `Int16`). **Syntax** diff --git a/docs/ru/sql-reference/functions/array-functions.md b/docs/ru/sql-reference/functions/array-functions.md index c43323d68fd..439eddfd752 100644 --- a/docs/ru/sql-reference/functions/array-functions.md +++ b/docs/ru/sql-reference/functions/array-functions.md @@ -145,6 +145,8 @@ range([start, ] end [, step]) - Если в результате запроса создаются массивы суммарной длиной больше, чем количество элементов, указанное настройкой [function_range_max_elements_in_block](../../operations/settings/settings.md#settings-function_range_max_elements_in_block), то генерируется исключение. +- Возвращает Null если любой аргумент Nullable(Nothing) типа. Генерируется исключение если любой аргумент Null (Nullable(T) тип). + **Примеры** Запрос: From 6bca452924bf4ceecbd4106acbfc99bc49276012 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 4 Jul 2023 12:42:27 +0200 Subject: [PATCH 216/522] Fix tests --- src/Access/tests/gtest_access_rights_ops.cpp | 2 +- .../test_s3_table_functions/configs/users.d/users.xml | 9 +++++++++ tests/integration/test_s3_table_functions/test.py | 9 ++++++--- 3 files changed, 16 insertions(+), 4 deletions(-) create mode 100644 tests/integration/test_s3_table_functions/configs/users.d/users.xml diff --git a/src/Access/tests/gtest_access_rights_ops.cpp b/src/Access/tests/gtest_access_rights_ops.cpp index 5f1f13ca5a2..c2e9501f58c 100644 --- a/src/Access/tests/gtest_access_rights_ops.cpp +++ b/src/Access/tests/gtest_access_rights_ops.cpp @@ -53,7 +53,7 @@ TEST(AccessRights, Union) "SHOW ROW POLICIES, SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, " "SYSTEM MOVES, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, " "SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, SYSTEM RESTART REPLICA, " - "SYSTEM RESTORE REPLICA, SYSTEM WAIT LOADING PARTS, SYSTEM SYNC DATABASE REPLICA, SYSTEM FLUSH DISTRIBUTED, dictGet ON db1.*, GRANT NAMED COLLECTION CONTROL ON db1"); + "SYSTEM RESTORE REPLICA, SYSTEM WAIT LOADING PARTS, SYSTEM SYNC DATABASE REPLICA, SYSTEM FLUSH DISTRIBUTED, dictGet ON db1.*, GRANT NAMED COLLECTION ADMIN ON db1"); } diff --git a/tests/integration/test_s3_table_functions/configs/users.d/users.xml b/tests/integration/test_s3_table_functions/configs/users.d/users.xml new file mode 100644 index 00000000000..4b6ba057ecb --- /dev/null +++ b/tests/integration/test_s3_table_functions/configs/users.d/users.xml @@ -0,0 +1,9 @@ + + + + + default + 1 + + + diff --git a/tests/integration/test_s3_table_functions/test.py b/tests/integration/test_s3_table_functions/test.py index 516d6582990..a6def175136 100644 --- a/tests/integration/test_s3_table_functions/test.py +++ b/tests/integration/test_s3_table_functions/test.py @@ -11,6 +11,9 @@ node = cluster.add_instance( main_configs=[ "configs/config.d/minio.xml", ], + user_configs=[ + "configs/users.d/users.xml", + ], with_minio=True, ) @@ -44,7 +47,7 @@ def test_s3_table_functions(started_cluster): """ INSERT INTO FUNCTION s3 ( - nc_s3, + nc_s3, filename = 'test_file.tsv.gz', format = 'TSV', structure = 'number UInt64', @@ -60,7 +63,7 @@ def test_s3_table_functions(started_cluster): """ SELECT count(*) FROM s3 ( - nc_s3, + nc_s3, filename = 'test_file.tsv.gz', format = 'TSV', structure = 'number UInt64', @@ -85,7 +88,7 @@ def test_s3_table_functions_timeouts(started_cluster): """ INSERT INTO FUNCTION s3 ( - nc_s3, + nc_s3, filename = 'test_file.tsv.gz', format = 'TSV', structure = 'number UInt64', From c9a26d43c5b053c3e5a0898e382c0405a4ecf4a6 Mon Sep 17 00:00:00 2001 From: Rory Crispin Date: Tue, 4 Jul 2023 12:06:15 +0100 Subject: [PATCH 217/522] (docs) Remove async_metric_log event_time_microseconds event_time_microseconds was removed from the system.asynchronous_metric_log in https://github.com/ClickHouse/ClickHouse/pull/36360 --- .../system-tables/asynchronous_metric_log.md | 25 +++++++++---------- .../system-tables/asynchronous_metric_log.md | 1 - .../system-tables/asynchronous_metric_log.md | 25 +++++++++---------- 3 files changed, 24 insertions(+), 27 deletions(-) diff --git a/docs/en/operations/system-tables/asynchronous_metric_log.md b/docs/en/operations/system-tables/asynchronous_metric_log.md index 4290799b6bc..efe57a202d8 100644 --- a/docs/en/operations/system-tables/asynchronous_metric_log.md +++ b/docs/en/operations/system-tables/asynchronous_metric_log.md @@ -9,7 +9,6 @@ Columns: - `event_date` ([Date](../../sql-reference/data-types/date.md)) — Event date. - `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Event time. -- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Event time with microseconds resolution. - `name` ([String](../../sql-reference/data-types/string.md)) — Metric name. - `value` ([Float64](../../sql-reference/data-types/float.md)) — Metric value. @@ -20,18 +19,18 @@ SELECT * FROM system.asynchronous_metric_log LIMIT 10 ``` ``` text -┌─event_date─┬──────────event_time─┬────event_time_microseconds─┬─name─────────────────────────────────────┬─────value─┐ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ CPUFrequencyMHz_0 │ 2120.9 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.arenas.all.pmuzzy │ 743 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.arenas.all.pdirty │ 26288 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.background_thread.run_intervals │ 0 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.background_thread.num_runs │ 0 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.retained │ 60694528 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.mapped │ 303161344 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.resident │ 260931584 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.metadata │ 12079488 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.allocated │ 133756128 │ -└────────────┴─────────────────────┴────────────────────────────┴──────────────────────────────────────────┴───────────┘ +┌─event_date─┬──────────event_time─┬─name─────────────────────────────────────┬─────value─┐ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ CPUFrequencyMHz_0 │ 2120.9 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.arenas.all.pmuzzy │ 743 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.arenas.all.pdirty │ 26288 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.background_thread.run_intervals │ 0 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.background_thread.num_runs │ 0 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.retained │ 60694528 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.mapped │ 303161344 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.resident │ 260931584 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.metadata │ 12079488 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.allocated │ 133756128 │ +└────────────┴─────────────────────┴──────────────────────────────────────────┴───────────┘ ``` **See Also** diff --git a/docs/ru/operations/system-tables/asynchronous_metric_log.md b/docs/ru/operations/system-tables/asynchronous_metric_log.md index 886fbb6cab0..5145889c95f 100644 --- a/docs/ru/operations/system-tables/asynchronous_metric_log.md +++ b/docs/ru/operations/system-tables/asynchronous_metric_log.md @@ -8,7 +8,6 @@ slug: /ru/operations/system-tables/asynchronous_metric_log Столбцы: - `event_date` ([Date](../../sql-reference/data-types/date.md)) — дата события. - `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — время события. -- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — время события в микросекундах. - `name` ([String](../../sql-reference/data-types/string.md)) — название метрики. - `value` ([Float64](../../sql-reference/data-types/float.md)) — значение метрики. diff --git a/docs/zh/operations/system-tables/asynchronous_metric_log.md b/docs/zh/operations/system-tables/asynchronous_metric_log.md index 419ad2a7ed6..9fa399f1aed 100644 --- a/docs/zh/operations/system-tables/asynchronous_metric_log.md +++ b/docs/zh/operations/system-tables/asynchronous_metric_log.md @@ -8,7 +8,6 @@ slug: /zh/operations/system-tables/asynchronous_metric_log 列: - `event_date` ([Date](../../sql-reference/data-types/date.md)) — 事件日期。 - `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — 事件时间。 -- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — 事件时间(微秒)。 - `name` ([String](../../sql-reference/data-types/string.md)) — 指标名。 - `value` ([Float64](../../sql-reference/data-types/float.md)) — 指标值。 @@ -17,18 +16,18 @@ slug: /zh/operations/system-tables/asynchronous_metric_log SELECT * FROM system.asynchronous_metric_log LIMIT 10 ``` ``` text -┌─event_date─┬──────────event_time─┬────event_time_microseconds─┬─name─────────────────────────────────────┬─────value─┐ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ CPUFrequencyMHz_0 │ 2120.9 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.arenas.all.pmuzzy │ 743 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.arenas.all.pdirty │ 26288 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.background_thread.run_intervals │ 0 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.background_thread.num_runs │ 0 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.retained │ 60694528 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.mapped │ 303161344 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.resident │ 260931584 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.metadata │ 12079488 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.allocated │ 133756128 │ -└────────────┴─────────────────────┴────────────────────────────┴──────────────────────────────────────────┴───────────┘ +┌─event_date─┬──────────event_time─┬─name─────────────────────────────────────┬─────value─┐ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ CPUFrequencyMHz_0 │ 2120.9 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.arenas.all.pmuzzy │ 743 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.arenas.all.pdirty │ 26288 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.background_thread.run_intervals │ 0 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.background_thread.num_runs │ 0 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.retained │ 60694528 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.mapped │ 303161344 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.resident │ 260931584 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.metadata │ 12079488 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.allocated │ 133756128 │ +└────────────┴─────────────────────┴──────────────────────────────────────────┴───────────┘ ``` **另请参阅** From fd6115f0e3ef53fb6b0a7c411de13148e8cdc10e Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 4 Jul 2023 11:42:27 +0000 Subject: [PATCH 218/522] Fix flaky tests with timeout --- ...20_create_sync_race_condition_zookeeper.sh | 17 ++++++++----- .../0_stateless/01632_tinylog_read_write.sh | 24 ++++++++++-------- .../02481_async_insert_race_long.sh | 25 +++++++++++-------- 3 files changed, 39 insertions(+), 27 deletions(-) diff --git a/tests/queries/0_stateless/01320_create_sync_race_condition_zookeeper.sh b/tests/queries/0_stateless/01320_create_sync_race_condition_zookeeper.sh index aee69e64b1b..57409d782ae 100755 --- a/tests/queries/0_stateless/01320_create_sync_race_condition_zookeeper.sh +++ b/tests/queries/0_stateless/01320_create_sync_race_condition_zookeeper.sh @@ -12,22 +12,27 @@ $CLICKHOUSE_CLIENT --allow_deprecated_database_ordinary=1 --query "CREATE DATABA function thread1() { - while true; do - $CLICKHOUSE_CLIENT -n --query "CREATE TABLE test_01320.r (x UInt64) ENGINE = ReplicatedMergeTree('/test/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/table', 'r') ORDER BY x; - DROP TABLE test_01320.r;" 2>&1 | grep -F "Code:" | grep -v "UNKNOWN_DATABASE" + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do + $CLICKHOUSE_CLIENT -n --query "CREATE TABLE test_01320.r (x UInt64) ENGINE = ReplicatedMergeTree('/test/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/table', 'r') ORDER BY x; DROP TABLE test_01320.r;" done } function thread2() { - while true; do $CLICKHOUSE_CLIENT --query "SYSTEM SYNC REPLICA test_01320.r" 2>/dev/null; done + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do + $CLICKHOUSE_CLIENT --query "SYSTEM SYNC REPLICA test_01320.r" 2>/dev/null; + done } export -f thread1 export -f thread2 -timeout 10 bash -c thread1 & -timeout 10 bash -c thread2 & +TIMEOUT=10 + +thread1 $TIMEOUT & +thread2 $TIMEOUT & wait diff --git a/tests/queries/0_stateless/01632_tinylog_read_write.sh b/tests/queries/0_stateless/01632_tinylog_read_write.sh index 69f985a9d0d..10625ec5d27 100755 --- a/tests/queries/0_stateless/01632_tinylog_read_write.sh +++ b/tests/queries/0_stateless/01632_tinylog_read_write.sh @@ -11,14 +11,16 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT --multiquery --query "DROP TABLE IF EXISTS test; CREATE TABLE IF NOT EXISTS test (x UInt64, s Array(Nullable(String))) ENGINE = TinyLog;" function thread_select { - while true; do + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do $CLICKHOUSE_CLIENT --local_filesystem_read_method pread --query "SELECT * FROM test FORMAT Null" sleep 0.0$RANDOM done } function thread_insert { - while true; do + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$1" ]; do $CLICKHOUSE_CLIENT --query "INSERT INTO test VALUES (1, ['Hello'])" sleep 0.0$RANDOM done @@ -30,15 +32,17 @@ export -f thread_insert # Do randomized queries and expect nothing extraordinary happens. -timeout 10 bash -c 'thread_select' & -timeout 10 bash -c 'thread_select' & -timeout 10 bash -c 'thread_select' & -timeout 10 bash -c 'thread_select' & +TIMEOUT=10 -timeout 10 bash -c 'thread_insert' & -timeout 10 bash -c 'thread_insert' & -timeout 10 bash -c 'thread_insert' & -timeout 10 bash -c 'thread_insert' & +thread_select $TIMEOUT & +thread_select $TIMEOUT & +thread_select $TIMEOUT & +thread_select $TIMEOUT & + +thread_insert $TIMEOUT & +thread_insert $TIMEOUT & +thread_insert $TIMEOUT & +thread_insert $TIMEOUT & wait echo "Done" diff --git a/tests/queries/0_stateless/02481_async_insert_race_long.sh b/tests/queries/0_stateless/02481_async_insert_race_long.sh index cec9278c127..c4b026c6aba 100755 --- a/tests/queries/0_stateless/02481_async_insert_race_long.sh +++ b/tests/queries/0_stateless/02481_async_insert_race_long.sh @@ -11,21 +11,24 @@ export MY_CLICKHOUSE_CLIENT="$CLICKHOUSE_CLIENT --async_insert_busy_timeout_ms 1 function insert1() { - while true; do + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do ${MY_CLICKHOUSE_CLIENT} --wait_for_async_insert 0 -q 'INSERT INTO async_inserts_race FORMAT CSV 1,"a"' done } function insert2() { - while true; do + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do ${MY_CLICKHOUSE_CLIENT} --wait_for_async_insert 0 -q 'INSERT INTO async_inserts_race FORMAT JSONEachRow {"id": 5, "s": "e"} {"id": 6, "s": "f"}' done } function insert3() { - while true; do + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do ${MY_CLICKHOUSE_CLIENT} --wait_for_async_insert 1 -q "INSERT INTO async_inserts_race VALUES (7, 'g') (8, 'h')" & sleep 0.05 done @@ -33,29 +36,29 @@ function insert3() function select1() { - while true; do + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do ${MY_CLICKHOUSE_CLIENT} -q "SELECT * FROM async_inserts_race FORMAT Null" done - } ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS async_inserts_race" ${CLICKHOUSE_CLIENT} -q "CREATE TABLE async_inserts_race (id UInt32, s String) ENGINE = MergeTree ORDER BY id" -TIMEOUT=10 - export -f insert1 export -f insert2 export -f insert3 export -f select1 +TIMEOUT=10 + for _ in {1..3}; do - timeout $TIMEOUT bash -c insert1 & - timeout $TIMEOUT bash -c insert2 & - timeout $TIMEOUT bash -c insert3 & + insert1 $TIMEOUT & + insert2 $TIMEOUT & + insert3 $TIMEOUT & done -timeout $TIMEOUT bash -c select1 & +select1 $TIMEOUT & wait echo "OK" From ca6930eb110903709fc4c2e1cbec19a95e55ee18 Mon Sep 17 00:00:00 2001 From: Han Fei Date: Tue, 4 Jul 2023 17:38:53 +0200 Subject: [PATCH 219/522] Revert "Revert "Merge pull request #50951 from ZhiguoZh/20230607-toyear-fix"" --- src/Functions/DateTimeTransforms.h | 72 +++++++ .../FunctionDateOrDateTimeToSomething.h | 13 ++ src/Functions/IFunction.h | 29 ++- src/Functions/IFunctionAdaptors.h | 7 + ...OrDateTimeConverterWithPreimageVisitor.cpp | 199 ++++++++++++++++++ ...teOrDateTimeConverterWithPreimageVisitor.h | 37 ++++ src/Interpreters/TreeOptimizer.cpp | 19 ++ ...783_date_predicate_optimizations.reference | 52 +++++ .../02783_date_predicate_optimizations.sql | 76 +++++++ ...dicate_optimizations_ast_rewrite.reference | 87 ++++++++ ...te_predicate_optimizations_ast_rewrite.sql | 47 +++++ 11 files changed, 632 insertions(+), 6 deletions(-) create mode 100644 src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.cpp create mode 100644 src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.h create mode 100644 tests/queries/0_stateless/02785_date_predicate_optimizations_ast_rewrite.reference create mode 100644 tests/queries/0_stateless/02785_date_predicate_optimizations_ast_rewrite.sql diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index 019e0c42cde..84c71c89b11 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -322,6 +322,7 @@ struct ToTimeImpl { throwDateTimeIsNotSupported(name); } + static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ToDateImpl; }; @@ -393,6 +394,7 @@ struct ToStartOfSecondImpl { throwDateTimeIsNotSupported(name); } + static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ZeroTransform; }; @@ -440,6 +442,7 @@ struct ToStartOfMillisecondImpl { throwDateTimeIsNotSupported(name); } + static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ZeroTransform; }; @@ -483,6 +486,7 @@ struct ToStartOfMicrosecondImpl { throwDateTimeIsNotSupported(name); } + static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ZeroTransform; }; @@ -520,6 +524,7 @@ struct ToStartOfNanosecondImpl { throwDateTimeIsNotSupported(name); } + static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ZeroTransform; }; @@ -718,6 +723,28 @@ struct ToYearImpl return time_zone.toYear(DayNum(d)); } + static inline constexpr bool hasPreimage() { return true; } + + static inline RangeOrNull getPreimage(const IDataType & type, const Field & point) + { + if (point.getType() != Field::Types::UInt64) return std::nullopt; + + auto year = point.get(); + if (year < DATE_LUT_MIN_YEAR || year >= DATE_LUT_MAX_YEAR) return std::nullopt; + + const DateLUTImpl & date_lut = DateLUT::instance(); + + auto start_time = date_lut.makeDateTime(year, 1, 1, 0, 0, 0); + auto end_time = date_lut.addYears(start_time, 1); + + if (isDateOrDate32(type) || isDateTime(type) || isDateTime64(type)) + return {std::make_pair(Field(start_time), Field(end_time))}; + else + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument of function {}. Should be Date, Date32, DateTime or DateTime64", + type.getName(), name); + } + using FactorTransform = ZeroTransform; }; @@ -791,6 +818,7 @@ struct ToQuarterImpl { return time_zone.toQuarter(DayNum(d)); } + static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ToStartOfYearImpl; }; @@ -815,6 +843,7 @@ struct ToMonthImpl { return time_zone.toMonth(DayNum(d)); } + static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ToStartOfYearImpl; }; @@ -840,6 +869,7 @@ struct ToDayOfMonthImpl return time_zone.toDayOfMonth(DayNum(d)); } + static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ToStartOfMonthImpl; }; @@ -887,6 +917,7 @@ struct ToDayOfYearImpl { return time_zone.toDayOfYear(DayNum(d)); } + static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ToStartOfYearImpl; }; @@ -911,6 +942,7 @@ struct ToHourImpl { throwDateTimeIsNotSupported(name); } + static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ToDateImpl; }; @@ -939,6 +971,7 @@ struct TimezoneOffsetImpl throwDateTimeIsNotSupported(name); } + static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ToTimeImpl; }; @@ -962,6 +995,7 @@ struct ToMinuteImpl { throwDateTimeIsNotSupported(name); } + static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ToStartOfHourImpl; }; @@ -986,6 +1020,7 @@ struct ToSecondImpl { throwDateTimeIsNotSupported(name); } + static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ToStartOfMinuteImpl; }; @@ -1010,6 +1045,7 @@ struct ToISOYearImpl { return time_zone.toISOYear(DayNum(d)); } + static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ZeroTransform; }; @@ -1066,6 +1102,7 @@ struct ToISOWeekImpl { return time_zone.toISOWeek(DayNum(d)); } + static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ToISOYearImpl; }; @@ -1108,6 +1145,7 @@ struct ToRelativeYearNumImpl { return time_zone.toYear(DayNum(d)); } + static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ZeroTransform; }; @@ -1139,6 +1177,7 @@ struct ToRelativeQuarterNumImpl { return time_zone.toRelativeQuarterNum(DayNum(d)); } + static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ZeroTransform; }; @@ -1170,6 +1209,7 @@ struct ToRelativeMonthNumImpl { return time_zone.toRelativeMonthNum(DayNum(d)); } + static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ZeroTransform; }; @@ -1201,6 +1241,7 @@ struct ToRelativeWeekNumImpl { return time_zone.toRelativeWeekNum(DayNum(d)); } + static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ZeroTransform; }; @@ -1232,6 +1273,7 @@ struct ToRelativeDayNumImpl { return static_cast(d); } + static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ZeroTransform; }; @@ -1269,6 +1311,7 @@ struct ToRelativeHourNumImpl else return static_cast(time_zone.toRelativeHourNum(DayNum(d))); } + static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ZeroTransform; }; @@ -1300,6 +1343,7 @@ struct ToRelativeMinuteNumImpl { return static_cast(time_zone.toRelativeMinuteNum(DayNum(d))); } + static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ZeroTransform; }; @@ -1328,6 +1372,7 @@ struct ToRelativeSecondNumImpl { return static_cast(time_zone.fromDayNum(DayNum(d))); } + static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ZeroTransform; }; @@ -1352,6 +1397,31 @@ struct ToYYYYMMImpl { return time_zone.toNumYYYYMM(DayNum(d)); } + static inline constexpr bool hasPreimage() { return true; } + + static inline RangeOrNull getPreimage(const IDataType & type, const Field & point) + { + if (point.getType() != Field::Types::UInt64) return std::nullopt; + + auto year_month = point.get(); + auto year = year_month / 100; + auto month = year_month % 100; + + if (year < DATE_LUT_MIN_YEAR || year > DATE_LUT_MAX_YEAR || month < 1 || month > 12 || (year == DATE_LUT_MAX_YEAR && month == 12)) + return std::nullopt; + + const DateLUTImpl & date_lut = DateLUT::instance(); + + auto start_time = date_lut.makeDateTime(year, month, 1, 0, 0, 0); + auto end_time = date_lut.addMonths(start_time, 1); + + if (isDateOrDate32(type) || isDateTime(type) || isDateTime64(type)) + return {std::make_pair(Field(start_time), Field(end_time))}; + else + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of argument of function {}. Should be Date, Date32, DateTime or DateTime64", + type.getName(), name); + } using FactorTransform = ZeroTransform; }; @@ -1376,6 +1446,7 @@ struct ToYYYYMMDDImpl { return time_zone.toNumYYYYMMDD(DayNum(d)); } + static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ZeroTransform; }; @@ -1400,6 +1471,7 @@ struct ToYYYYMMDDhhmmssImpl { return time_zone.toNumYYYYMMDDhhmmss(time_zone.toDate(DayNum(d))); } + static inline constexpr bool hasPreimage() { return false; } using FactorTransform = ZeroTransform; }; diff --git a/src/Functions/FunctionDateOrDateTimeToSomething.h b/src/Functions/FunctionDateOrDateTimeToSomething.h index 82818cc3d2b..d98b788c7d7 100644 --- a/src/Functions/FunctionDateOrDateTimeToSomething.h +++ b/src/Functions/FunctionDateOrDateTimeToSomething.h @@ -7,6 +7,7 @@ namespace DB namespace ErrorCodes { extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int NOT_IMPLEMENTED; } /// See DateTimeTransforms.h @@ -83,6 +84,18 @@ public: arguments[0].type->getName(), this->getName()); } + bool hasInformationAboutPreimage() const override { return Transform::hasPreimage(); } + + RangeOrNull getPreimage(const IDataType & type, const Field & point) const override + { + if constexpr (Transform::hasPreimage()) + return Transform::getPreimage(type, point); + else + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "Function {} has no information about its preimage", + Transform::name); + } + }; } diff --git a/src/Functions/IFunction.h b/src/Functions/IFunction.h index c5b9a78015d..433cb61d04e 100644 --- a/src/Functions/IFunction.h +++ b/src/Functions/IFunction.h @@ -2,6 +2,8 @@ #include #include +#include +#include #include #include #include @@ -11,11 +13,6 @@ #include -#if USE_EMBEDDED_COMPILER -# include -#endif - - /// This file contains user interface for functions. namespace llvm @@ -35,7 +32,8 @@ namespace ErrorCodes extern const int ILLEGAL_TYPE_OF_ARGUMENT; } -class Field; +/// A left-closed and right-open interval representing the preimage of a function. +using RangeOrNull = std::optional>; /// The simplest executable object. /// Motivation: @@ -233,6 +231,12 @@ public: */ virtual bool hasInformationAboutMonotonicity() const { return false; } + /** Lets you know if the function has its definition of preimage. + * This is used to work with predicate optimizations, where the comparison between + * f(x) and a constant c could be converted to the comparison between x and f's preimage [b, e). + */ + virtual bool hasInformationAboutPreimage() const { return false; } + struct ShortCircuitSettings { /// Should we enable lazy execution for the first argument of short-circuit function? @@ -286,6 +290,14 @@ public: throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Function {} has no information about its monotonicity", getName()); } + /** Get the preimage of a function in the form of a left-closed and right-open interval. Call only if hasInformationAboutPreimage. + * std::nullopt might be returned if the point (a single value) is invalid for this function. + */ + virtual RangeOrNull getPreimage(const IDataType & /*type*/, const Field & /*point*/) const + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Function {} has no information about its preimage", getName()); + } + }; using FunctionBasePtr = std::shared_ptr; @@ -475,12 +487,17 @@ public: virtual bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const = 0; virtual bool hasInformationAboutMonotonicity() const { return false; } + virtual bool hasInformationAboutPreimage() const { return false; } using Monotonicity = IFunctionBase::Monotonicity; virtual Monotonicity getMonotonicityForRange(const IDataType & /*type*/, const Field & /*left*/, const Field & /*right*/) const { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Function {} has no information about its monotonicity", getName()); } + virtual RangeOrNull getPreimage(const IDataType & /*type*/, const Field & /*point*/) const + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Function {} has no information about its preimage", getName()); + } /// For non-variadic functions, return number of arguments; otherwise return zero (that should be ignored). virtual size_t getNumberOfArguments() const = 0; diff --git a/src/Functions/IFunctionAdaptors.h b/src/Functions/IFunctionAdaptors.h index 23725b1a8b1..123fdbc2f50 100644 --- a/src/Functions/IFunctionAdaptors.h +++ b/src/Functions/IFunctionAdaptors.h @@ -90,10 +90,17 @@ public: bool hasInformationAboutMonotonicity() const override { return function->hasInformationAboutMonotonicity(); } + bool hasInformationAboutPreimage() const override { return function->hasInformationAboutPreimage(); } + Monotonicity getMonotonicityForRange(const IDataType & type, const Field & left, const Field & right) const override { return function->getMonotonicityForRange(type, left, right); } + + RangeOrNull getPreimage(const IDataType & type, const Field & point) const override + { + return function->getPreimage(type, point); + } private: std::shared_ptr function; DataTypes arguments; diff --git a/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.cpp b/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.cpp new file mode 100644 index 00000000000..a377bb4bba6 --- /dev/null +++ b/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.cpp @@ -0,0 +1,199 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +/** Given a monotonic non-decreasing function f(x), which satisfies f(x) = c for any value x within [b, e). + * We could convert it into its equivalent form, x >= b AND x < e, which is free from the invocation of the function. + * And we could apply the similar transformation to other comparisons. The suggested transformations list: + * + * f(x) == c -> x >= b AND x < e + * f(x) != c -> x < b OR x >= e + * f(x) > c -> x >= e + * f(x) >= c -> x >= b + * f(x) < c -> x < b + * f(x) <= c -> x < e + * + * This function generates a new AST with the transformed relation. + */ +ASTPtr generateOptimizedDateFilterAST(const String & comparator, const NameAndTypePair & column, const std::pair& range) +{ + const DateLUTImpl & date_lut = DateLUT::instance(); + + const String & column_name = column.name; + String start_date_or_date_time; + String end_date_or_date_time; + + if (isDateOrDate32(column.type.get())) + { + start_date_or_date_time = date_lut.dateToString(range.first.get()); + end_date_or_date_time = date_lut.dateToString(range.second.get()); + } + else if (isDateTime(column.type.get()) || isDateTime64(column.type.get())) + { + start_date_or_date_time = date_lut.timeToString(range.first.get()); + end_date_or_date_time = date_lut.timeToString(range.second.get()); + } + else [[unlikely]] return {}; + + if (comparator == "equals") + { + return makeASTFunction("and", + makeASTFunction("greaterOrEquals", + std::make_shared(column_name), + std::make_shared(start_date_or_date_time) + ), + makeASTFunction("less", + std::make_shared(column_name), + std::make_shared(end_date_or_date_time) + ) + ); + } + else if (comparator == "notEquals") + { + return makeASTFunction("or", + makeASTFunction("less", + std::make_shared(column_name), + std::make_shared(start_date_or_date_time) + ), + makeASTFunction("greaterOrEquals", + std::make_shared(column_name), + std::make_shared(end_date_or_date_time) + ) + ); + } + else if (comparator == "greater") + { + return makeASTFunction("greaterOrEquals", + std::make_shared(column_name), + std::make_shared(end_date_or_date_time) + ); + } + else if (comparator == "lessOrEquals") + { + return makeASTFunction("less", + std::make_shared(column_name), + std::make_shared(end_date_or_date_time) + ); + } + else if (comparator == "less" || comparator == "greaterOrEquals") + { + return makeASTFunction(comparator, + std::make_shared(column_name), + std::make_shared(start_date_or_date_time) + ); + } + else [[unlikely]] + { + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Expected equals, notEquals, less, lessOrEquals, greater, greaterOrEquals. Actual {}", + comparator); + } +} + +void OptimizeDateOrDateTimeConverterWithPreimageMatcher::visit(const ASTFunction & function, ASTPtr & ast, const Data & data) +{ + const static std::unordered_map swap_relations = { + {"equals", "equals"}, + {"notEquals", "notEquals"}, + {"less", "greater"}, + {"greater", "less"}, + {"lessOrEquals", "greaterOrEquals"}, + {"greaterOrEquals", "lessOrEquals"}, + }; + + if (!swap_relations.contains(function.name)) return; + + if (!function.arguments || function.arguments->children.size() != 2) return; + + size_t func_id = function.arguments->children.size(); + + for (size_t i = 0; i < function.arguments->children.size(); i++) + { + if (const auto * func = function.arguments->children[i]->as()) + { + func_id = i; + } + } + + if (func_id == function.arguments->children.size()) return; + + size_t literal_id = 1 - func_id; + const auto * literal = function.arguments->children[literal_id]->as(); + + if (!literal || literal->value.getType() != Field::Types::UInt64) return; + + String comparator = literal_id > func_id ? function.name : swap_relations.at(function.name); + + const auto * ast_func = function.arguments->children[func_id]->as(); + /// Currently we only handle single-argument functions. + if (!ast_func || !ast_func->arguments || ast_func->arguments->children.size() != 1) return; + + const auto * column_id = ast_func->arguments->children.at(0)->as(); + if (!column_id) return; + + auto pos = IdentifierSemantic::getMembership(*column_id); + if (!pos) + pos = IdentifierSemantic::chooseTableColumnMatch(*column_id, data.tables, true); + if (!pos) + return; + + if (*pos >= data.tables.size()) + return; + + auto data_type_and_name = data.tables[*pos].columns.tryGetByName(column_id->shortName()); + if (!data_type_and_name) return; + + const auto & converter = FunctionFactory::instance().tryGet(ast_func->name, data.context); + if (!converter) return; + + ColumnsWithTypeAndName args; + args.emplace_back(data_type_and_name->type, "tmp"); + auto converter_base = converter->build(args); + if (!converter_base || !converter_base->hasInformationAboutPreimage()) return; + + auto preimage_range = converter_base->getPreimage(*(data_type_and_name->type), literal->value); + if (!preimage_range) return; + + const auto new_ast = generateOptimizedDateFilterAST(comparator, *data_type_and_name, *preimage_range); + if (!new_ast) return; + + ast = new_ast; +} + +bool OptimizeDateOrDateTimeConverterWithPreimageMatcher::needChildVisit(ASTPtr & ast, ASTPtr & /*child*/) +{ + const static std::unordered_set relations = { + "equals", + "notEquals", + "less", + "greater", + "lessOrEquals", + "greaterOrEquals", + }; + + if (const auto * ast_function = ast->as()) + { + return !relations.contains(ast_function->name); + } + + return true; +} + +} diff --git a/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.h b/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.h new file mode 100644 index 00000000000..778fa462364 --- /dev/null +++ b/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.h @@ -0,0 +1,37 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ASTFunction; + +/** Replace predicate having Date/DateTime converters with their preimages to improve performance. + * Given a Date column c, toYear(c) = 2023 -> c >= '2023-01-01' AND c < '2024-01-01' + * Or if c is a DateTime column, toYear(c) = 2023 -> c >= '2023-01-01 00:00:00' AND c < '2024-01-01 00:00:00'. + * The similar optimization also applies to other converters. + */ +class OptimizeDateOrDateTimeConverterWithPreimageMatcher +{ +public: + struct Data + { + const TablesWithColumns & tables; + ContextPtr context; + }; + + static void visit(ASTPtr & ast, Data & data) + { + if (const auto * ast_function = ast->as()) + visit(*ast_function, ast, data); + } + + static void visit(const ASTFunction & function, ASTPtr & ast, const Data & data); + + static bool needChildVisit(ASTPtr & ast, ASTPtr & child); +}; + +using OptimizeDateOrDateTimeConverterWithPreimageVisitor = InDepthNodeVisitor; +} diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index c38b3c79026..fd4d2c9d846 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -677,6 +678,21 @@ void optimizeInjectiveFunctionsInsideUniq(ASTPtr & query, ContextPtr context) RemoveInjectiveFunctionsVisitor(data).visit(query); } +void optimizeDateFilters(ASTSelectQuery * select_query, const std::vector & tables_with_columns, ContextPtr context) +{ + /// Predicates in HAVING clause has been moved to WHERE clause. + if (select_query->where()) + { + OptimizeDateOrDateTimeConverterWithPreimageVisitor::Data data{tables_with_columns, context}; + OptimizeDateOrDateTimeConverterWithPreimageVisitor(data).visit(select_query->refWhere()); + } + if (select_query->prewhere()) + { + OptimizeDateOrDateTimeConverterWithPreimageVisitor::Data data{tables_with_columns, context}; + OptimizeDateOrDateTimeConverterWithPreimageVisitor(data).visit(select_query->refPrewhere()); + } +} + void transformIfStringsIntoEnum(ASTPtr & query) { std::unordered_set function_names = {"if", "transform"}; @@ -780,6 +796,9 @@ void TreeOptimizer::apply(ASTPtr & query, TreeRewriterResult & result, tables_with_columns, result.storage_snapshot->metadata, result.storage); } + /// Rewrite date filters to avoid the calls of converters such as toYear, toYYYYMM, etc. + optimizeDateFilters(select_query, tables_with_columns, context); + /// GROUP BY injective function elimination. optimizeGroupBy(select_query, context); diff --git a/tests/queries/0_stateless/02783_date_predicate_optimizations.reference b/tests/queries/0_stateless/02783_date_predicate_optimizations.reference index cd689b93034..872a5dd1d7d 100644 --- a/tests/queries/0_stateless/02783_date_predicate_optimizations.reference +++ b/tests/queries/0_stateless/02783_date_predicate_optimizations.reference @@ -1,2 +1,54 @@ 2021-12-31 23:00:00 0 2021-12-31 23:00:00 0 +Date +2 +3 +2 +4 +1 +3 +3 +2 +1 +4 +1 +4 +DateTime +2 +3 +2 +4 +1 +3 +3 +2 +1 +4 +1 +4 +Date32 +2 +3 +2 +4 +1 +3 +3 +2 +1 +4 +1 +4 +DateTime64 +2 +3 +2 +4 +1 +3 +3 +2 +1 +4 +1 +4 diff --git a/tests/queries/0_stateless/02783_date_predicate_optimizations.sql b/tests/queries/0_stateless/02783_date_predicate_optimizations.sql index abb13f1005e..0a2fa6cc93b 100644 --- a/tests/queries/0_stateless/02783_date_predicate_optimizations.sql +++ b/tests/queries/0_stateless/02783_date_predicate_optimizations.sql @@ -11,3 +11,79 @@ INSERT INTO source values ('2021-12-31 23:00:00', 0); SELECT * FROM source WHERE toYYYYMM(ts) = 202112; SELECT * FROM source WHERE toYear(ts) = 2021; + +DROP TABLE IF EXISTS source; +CREATE TABLE source +( + `dt` Date, + `ts` DateTime, + `dt_32` Date32, + `ts_64` DateTime64(3), + `n` Int32 +) +ENGINE = MergeTree +PARTITION BY toYYYYMM(ts) +ORDER BY tuple(); + +INSERT INTO source values ('2022-12-31', '2022-12-31 23:59:59', '2022-12-31', '2022-12-31 23:59:59.123', 0); +INSERT INTO source values ('2023-01-01', '2023-01-01 00:00:00', '2023-01-01', '2023-01-01 00:00:00.000', 1); +INSERT INTO source values ('2023-12-01', '2023-12-01 00:00:00', '2023-12-01', '2023-12-01 00:00:00.000', 2); +INSERT INTO source values ('2023-12-31', '2023-12-31 23:59:59', '2023-12-31', '2023-12-31 23:59:59.123', 3); +INSERT INTO source values ('2024-01-01', '2024-01-01 00:00:00', '2024-01-01', '2024-01-01 00:00:00.000', 4); + +SELECT 'Date'; +SELECT count(*) FROM source WHERE toYYYYMM(dt) = 202312; +SELECT count(*) FROM source WHERE toYYYYMM(dt) <> 202312; +SELECT count(*) FROM source WHERE toYYYYMM(dt) < 202312; +SELECT count(*) FROM source WHERE toYYYYMM(dt) <= 202312; +SELECT count(*) FROM source WHERE toYYYYMM(dt) > 202312; +SELECT count(*) FROM source WHERE toYYYYMM(dt) >= 202312; +SELECT count(*) FROM source WHERE toYear(dt) = 2023; +SELECT count(*) FROM source WHERE toYear(dt) <> 2023; +SELECT count(*) FROM source WHERE toYear(dt) < 2023; +SELECT count(*) FROM source WHERE toYear(dt) <= 2023; +SELECT count(*) FROM source WHERE toYear(dt) > 2023; +SELECT count(*) FROM source WHERE toYear(dt) >= 2023; + +SELECT 'DateTime'; +SELECT count(*) FROM source WHERE toYYYYMM(ts) = 202312; +SELECT count(*) FROM source WHERE toYYYYMM(ts) <> 202312; +SELECT count(*) FROM source WHERE toYYYYMM(ts) < 202312; +SELECT count(*) FROM source WHERE toYYYYMM(ts) <= 202312; +SELECT count(*) FROM source WHERE toYYYYMM(ts) > 202312; +SELECT count(*) FROM source WHERE toYYYYMM(ts) >= 202312; +SELECT count(*) FROM source WHERE toYear(ts) = 2023; +SELECT count(*) FROM source WHERE toYear(ts) <> 2023; +SELECT count(*) FROM source WHERE toYear(ts) < 2023; +SELECT count(*) FROM source WHERE toYear(ts) <= 2023; +SELECT count(*) FROM source WHERE toYear(ts) > 2023; +SELECT count(*) FROM source WHERE toYear(ts) >= 2023; + +SELECT 'Date32'; +SELECT count(*) FROM source WHERE toYYYYMM(dt_32) = 202312; +SELECT count(*) FROM source WHERE toYYYYMM(dt_32) <> 202312; +SELECT count(*) FROM source WHERE toYYYYMM(dt_32) < 202312; +SELECT count(*) FROM source WHERE toYYYYMM(dt_32) <= 202312; +SELECT count(*) FROM source WHERE toYYYYMM(dt_32) > 202312; +SELECT count(*) FROM source WHERE toYYYYMM(dt_32) >= 202312; +SELECT count(*) FROM source WHERE toYear(dt_32) = 2023; +SELECT count(*) FROM source WHERE toYear(dt_32) <> 2023; +SELECT count(*) FROM source WHERE toYear(dt_32) < 2023; +SELECT count(*) FROM source WHERE toYear(dt_32) <= 2023; +SELECT count(*) FROM source WHERE toYear(dt_32) > 2023; +SELECT count(*) FROM source WHERE toYear(dt_32) >= 2023; + +SELECT 'DateTime64'; +SELECT count(*) FROM source WHERE toYYYYMM(ts_64) = 202312; +SELECT count(*) FROM source WHERE toYYYYMM(ts_64) <> 202312; +SELECT count(*) FROM source WHERE toYYYYMM(ts_64) < 202312; +SELECT count(*) FROM source WHERE toYYYYMM(ts_64) <= 202312; +SELECT count(*) FROM source WHERE toYYYYMM(ts_64) > 202312; +SELECT count(*) FROM source WHERE toYYYYMM(ts_64) >= 202312; +SELECT count(*) FROM source WHERE toYear(ts_64) = 2023; +SELECT count(*) FROM source WHERE toYear(ts_64) <> 2023; +SELECT count(*) FROM source WHERE toYear(ts_64) < 2023; +SELECT count(*) FROM source WHERE toYear(ts_64) <= 2023; +SELECT count(*) FROM source WHERE toYear(ts_64) > 2023; +SELECT count(*) FROM source WHERE toYear(ts_64) >= 2023; +DROP TABLE source; diff --git a/tests/queries/0_stateless/02785_date_predicate_optimizations_ast_rewrite.reference b/tests/queries/0_stateless/02785_date_predicate_optimizations_ast_rewrite.reference new file mode 100644 index 00000000000..9235e7e106a --- /dev/null +++ b/tests/queries/0_stateless/02785_date_predicate_optimizations_ast_rewrite.reference @@ -0,0 +1,87 @@ +SELECT value1 +FROM date_t +WHERE ((date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\')) AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM date_t +WHERE ((date1 < \'1993-01-01\') OR (date1 >= \'1994-01-01\')) AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM date_t +WHERE (date1 < \'1993-01-01\') AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM date_t +WHERE (date1 >= \'1994-01-01\') AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM date_t +WHERE (date1 < \'1994-01-01\') AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM date_t +WHERE (date1 >= \'1993-01-01\') AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM date_t +WHERE ((date1 >= \'1993-01-01\') AND (date1 < \'1998-01-01\')) AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM date_t +WHERE (((date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\')) OR ((date1 >= \'1994-01-01\') AND (date1 < \'1995-01-01\'))) AND ((id >= 1) AND (id <= 3)) +SELECT + value1, + toYear(date1) AS year1 +FROM date_t +WHERE ((date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\')) AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM date_t +WHERE (date1 < \'1993-01-01\') AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM date_t +PREWHERE (date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\') +WHERE ((date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\')) AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM date_t +WHERE ((id >= 1) AND (id <= 3)) AND ((date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\')) +SELECT value1 +FROM date_t +WHERE (toYYYYMM(date1) = 199300) AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM date_t +WHERE (toYYYYMM(date1) = 199313) AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM date_t +WHERE ((date1 >= \'1993-12-01\') AND (date1 < \'1994-01-01\')) AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM date_t +WHERE ((date1 >= \'1992-03-01\') AND (date1 < \'1992-04-01\')) AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM date_t +WHERE ((date1 < \'1992-03-01\') OR (date1 >= \'1992-04-01\')) AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM date_t +WHERE (date1 < \'1992-03-01\') AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM date_t +WHERE (date1 >= \'1992-04-01\') AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM date_t +WHERE (date1 < \'1992-04-01\') AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM date_t +WHERE (date1 >= \'1992-03-01\') AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM date_t +WHERE ((date1 >= \'1992-03-01\') OR ((date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\'))) AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM datetime_t +WHERE ((date1 >= \'1993-01-01 00:00:00\') AND (date1 < \'1994-01-01 00:00:00\')) AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM datetime_t +WHERE ((date1 >= \'1993-12-01 00:00:00\') AND (date1 < \'1994-01-01 00:00:00\')) AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM date32_t +WHERE ((date1 >= \'1993-01-01\') AND (date1 < \'1994-01-01\')) AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM date32_t +WHERE ((date1 >= \'1993-12-01\') AND (date1 < \'1994-01-01\')) AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM datetime64_t +WHERE ((date1 >= \'1993-01-01 00:00:00\') AND (date1 < \'1994-01-01 00:00:00\')) AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM datetime64_t +WHERE ((date1 >= \'1993-12-01 00:00:00\') AND (date1 < \'1994-01-01 00:00:00\')) AND ((id >= 1) AND (id <= 3)) diff --git a/tests/queries/0_stateless/02785_date_predicate_optimizations_ast_rewrite.sql b/tests/queries/0_stateless/02785_date_predicate_optimizations_ast_rewrite.sql new file mode 100644 index 00000000000..266be59b0a3 --- /dev/null +++ b/tests/queries/0_stateless/02785_date_predicate_optimizations_ast_rewrite.sql @@ -0,0 +1,47 @@ +DROP TABLE IF EXISTS date_t; +CREATE TABLE date_t (id UInt32, value1 String, date1 Date) ENGINE ReplacingMergeTree() ORDER BY id; + +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYear(date1) = 1993 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYear(date1) <> 1993 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYear(date1) < 1993 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYear(date1) > 1993 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYear(date1) <= 1993 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYear(date1) >= 1993 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYear(date1) BETWEEN 1993 AND 1997 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE (toYear(date1) = 1993 OR toYear(date1) = 1994) AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1, toYear(date1) as year1 FROM date_t WHERE year1 = 1993 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE 1993 > toYear(date1) AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM date_t PREWHERE toYear(date1) = 1993 WHERE id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE id BETWEEN 1 AND 3 HAVING toYear(date1) = 1993; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) = 199300 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) = 199313 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) = 199312 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) = 199203 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) <> 199203 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) < 199203 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) > 199203 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) <= 199203 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE toYYYYMM(date1) >= 199203 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM date_t WHERE (toYYYYMM(date1) >= 199203 OR toYear(date1) = 1993) AND id BETWEEN 1 AND 3; +DROP TABLE date_t; + +DROP TABLE IF EXISTS datetime_t; +CREATE TABLE datetime_t (id UInt32, value1 String, date1 Datetime) ENGINE ReplacingMergeTree() ORDER BY id; + +EXPLAIN SYNTAX SELECT value1 FROM datetime_t WHERE toYear(date1) = 1993 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM datetime_t WHERE toYYYYMM(date1) = 199312 AND id BETWEEN 1 AND 3; +DROP TABLE datetime_t; + +DROP TABLE IF EXISTS date32_t; +CREATE TABLE date32_t (id UInt32, value1 String, date1 Date32) ENGINE ReplacingMergeTree() ORDER BY id; + +EXPLAIN SYNTAX SELECT value1 FROM date32_t WHERE toYear(date1) = 1993 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM date32_t WHERE toYYYYMM(date1) = 199312 AND id BETWEEN 1 AND 3; +DROP TABLE date32_t; + +DROP TABLE IF EXISTS datetime64_t; +CREATE TABLE datetime64_t (id UInt32, value1 String, date1 Datetime64) ENGINE ReplacingMergeTree() ORDER BY id; + +EXPLAIN SYNTAX SELECT value1 FROM datetime64_t WHERE toYear(date1) = 1993 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM datetime64_t WHERE toYYYYMM(date1) = 199312 AND id BETWEEN 1 AND 3; +DROP TABLE datetime64_t; From 1c2233b693077bbc5ce042c46a56aadaa49aab98 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 4 Jul 2023 15:46:40 +0000 Subject: [PATCH 220/522] Fix style check --- src/Interpreters/GraceHashJoin.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/GraceHashJoin.cpp b/src/Interpreters/GraceHashJoin.cpp index aa7091548d7..66dc1aa7bde 100644 --- a/src/Interpreters/GraceHashJoin.cpp +++ b/src/Interpreters/GraceHashJoin.cpp @@ -411,7 +411,7 @@ void GraceHashJoin::addBuckets(const size_t bucket_count) } buckets.reserve(buckets.size() + bucket_count); - for(auto & bucket : tmp_buckets) + for (auto & bucket : tmp_buckets) buckets.emplace_back(std::move(bucket)); } From e84769cb23b1447dce57eb957480f7c5d7cdced8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 4 Jul 2023 20:19:17 +0300 Subject: [PATCH 221/522] Update 02789_object_type_invalid_num_of_rows.reference --- .../0_stateless/02789_object_type_invalid_num_of_rows.reference | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference b/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference index 7dec35f7acb..8b137891791 100644 --- a/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference +++ b/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference @@ -1 +1 @@ -0.02 + From 31ced70ced6cf8a82aac60b7cd7e9d2740aae2bf Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Tue, 4 Jul 2023 20:19:20 +0200 Subject: [PATCH 222/522] remove wrong commit, fix the exceptions in tests --- .../00429_long_http_bufferization.sh | 27 +++++++++---------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/tests/queries/0_stateless/00429_long_http_bufferization.sh b/tests/queries/0_stateless/00429_long_http_bufferization.sh index 55192422389..98dd300e6ab 100755 --- a/tests/queries/0_stateless/00429_long_http_bufferization.sh +++ b/tests/queries/0_stateless/00429_long_http_bufferization.sh @@ -15,7 +15,9 @@ function query { } function ch_url() { - ${CLICKHOUSE_CURL_COMMAND} -q -sS "${CLICKHOUSE_URL}&max_block_size=$max_block_size&$1" -d "$(query "$2")" + ${CLICKHOUSE_CURL_COMMAND} -q -sS \ + "${CLICKHOUSE_URL}${max_block_size:+"&max_block_size=$max_block_size"}&$1" \ + -d "$(query "$2")" } @@ -26,9 +28,9 @@ exception_pattern="DB::Exception:[[:print:]]*" function check_only_exception() { local res res=$(ch_url "$1" "$2") - #(echo "$res") - #(echo "$res" | wc -l) - #(echo "$res" | grep -c "$exception_pattern") + # echo "$res" + # echo "$res" | wc -l + # echo "$res" | grep -c "$exception_pattern" [[ $(echo "$res" | wc -l) -eq 1 ]] || echo FAIL 1 "$@" [[ $(echo "$res" | grep -c "$exception_pattern") -eq 1 ]] || echo FAIL 2 "$@" } @@ -36,27 +38,23 @@ function check_only_exception() { function check_last_line_exception() { local res res=$(ch_url "$1" "$2") - #echo "$res" > res - #echo "$res" | wc -c - #echo "$res" | tail -n -2 + # echo "$res" > res + # echo "$res" | wc -c + # echo "$res" | tail -n -2 [[ $(echo "$res" | tail -n -1 | grep -c "$exception_pattern") -eq 1 ]] || echo FAIL 3 "$@" [[ $(echo "$res" | head -n -1 | grep -c "$exception_pattern") -eq 0 ]] || echo FAIL 4 "$@" } function check_exception_handling() { - # it is impossible to override max_block_size, details here https://github.com/ClickHouse/ClickHouse/issues/51694 - # rebuild CLICKHOUSE_URL for one call in order to avoid using random parameters from CLICKHOUSE_URL_PARAMS - CLICKHOUSE_URL="${CLICKHOUSE_PORT_HTTP_PROTO}://${CLICKHOUSE_HOST}:${CLICKHOUSE_PORT_HTTP}/?wait_end_of_query=0" \ - max_block_size=30000 \ format=TSV \ check_last_line_exception \ - "max_result_rows=400000&buffer_size=1048577&wait_end_of_query=0" 111222333444 + "max_block_size=30000&max_result_rows=400000&buffer_size=1048577&wait_end_of_query=0" 111222333444 check_only_exception "max_result_bytes=1000" 1001 check_only_exception "max_result_bytes=1000&wait_end_of_query=1" 1001 - check_only_exception "max_result_bytes=1048576&buffer_size=1048576&wait_end_of_query=0" 1048577 - check_only_exception "max_result_bytes=1048576&buffer_size=1048576&wait_end_of_query=1" 1048577 + check_last_line_exception "max_result_bytes=1048576&buffer_size=1048576&wait_end_of_query=0" 1048577 + check_only_exception "max_result_bytes=1048576&buffer_size=1048576&wait_end_of_query=1" 1048577 check_only_exception "max_result_bytes=1500000&buffer_size=2500000&wait_end_of_query=0" 1500001 check_only_exception "max_result_bytes=1500000&buffer_size=1500000&wait_end_of_query=1" 1500001 @@ -70,7 +68,6 @@ check_exception_handling # Tune setting to speed up combinatorial test -# max_block_size has no effect here, that value has been set inside CLICKHOUSE_URL max_block_size=500000 corner_sizes="1048576 $(seq 500000 1000000 3500000)" From d987b94ed48594541bf91bb42fb4f5a8ced52e1f Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 4 Jul 2023 20:51:15 +0200 Subject: [PATCH 223/522] fix the way how broken parts are detached --- src/Storages/MergeTree/IMergeTreeDataPart.h | 8 +- src/Storages/MergeTree/MergeTreeData.cpp | 23 +- src/Storages/MergeTree/MergeTreeData.h | 10 +- .../ReplicatedMergeTreePartCheckThread.cpp | 402 ++++++++++-------- .../ReplicatedMergeTreePartCheckThread.h | 44 +- src/Storages/StorageReplicatedMergeTree.cpp | 68 ++- 6 files changed, 317 insertions(+), 238 deletions(-) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index fd73d802579..1fdcbd7309c 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -242,9 +242,11 @@ public: /// Frozen by ALTER TABLE ... FREEZE ... It is used for information purposes in system.parts table. mutable std::atomic is_frozen {false}; - /// Indicated that the part was marked Outdated because it's broken, not because it's actually outdated - /// See outdateBrokenPartAndCloneToDetached(...) - mutable bool outdated_because_broken = false; + /// Indicates that the part was marked Outdated by PartCheckThread because the part was not committed to ZooKeeper + mutable bool is_unexpected_local_part = false; + + /// Indicates that the part was detached and marked Outdated because it's broken + mutable std::atomic_bool was_removed_as_broken = false; /// Flag for keep S3 data when zero-copy replication over S3 turned on. mutable bool force_keep_shared_data = false; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index e9c3a7f66ae..e37d4273629 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -4023,22 +4023,15 @@ void MergeTreeData::restoreAndActivatePart(const DataPartPtr & part, DataPartsLo } -void MergeTreeData::outdateBrokenPartAndCloneToDetached(const DataPartPtr & part_to_detach, const String & prefix) +void MergeTreeData::outdateUnexpectedPartAndCloneToDetached(const DataPartPtr & part_to_detach) { - auto metadata_snapshot = getInMemoryMetadataPtr(); - if (prefix.empty()) - LOG_INFO(log, "Cloning part {} to {} and making it obsolete.", part_to_detach->getDataPartStorage().getPartDirectory(), part_to_detach->name); - else - LOG_INFO(log, "Cloning part {} to {}_{} and making it obsolete.", part_to_detach->getDataPartStorage().getPartDirectory(), prefix, part_to_detach->name); - - part_to_detach->makeCloneInDetached(prefix, metadata_snapshot); + LOG_INFO(log, "Cloning part {} to unexpected_{} and making it obsolete.", part_to_detach->getDataPartStorage().getPartDirectory(), part_to_detach->name); + part_to_detach->makeCloneInDetached("unexpected", getInMemoryMetadataPtr()); DataPartsLock lock = lockParts(); + part_to_detach->is_unexpected_local_part = true; if (part_to_detach->getState() == DataPartState::Active) - { - part_to_detach->outdated_because_broken = true; removePartsFromWorkingSet(NO_TRANSACTION_RAW, {part_to_detach}, true, &lock); - } } void MergeTreeData::forcefullyMovePartToDetachedAndRemoveFromMemory(const MergeTreeData::DataPartPtr & part_to_detach, const String & prefix, bool restore_covered) @@ -4677,24 +4670,24 @@ MergeTreeData::DataPartsVector MergeTreeData::getVisibleDataPartsVectorInPartiti return res; } -MergeTreeData::DataPartPtr MergeTreeData::getPartIfExists(const MergeTreePartInfo & part_info, const MergeTreeData::DataPartStates & valid_states) +MergeTreeData::DataPartPtr MergeTreeData::getPartIfExists(const MergeTreePartInfo & part_info, const MergeTreeData::DataPartStates & valid_states) const { auto lock = lockParts(); return getPartIfExistsUnlocked(part_info, valid_states, lock); } -MergeTreeData::DataPartPtr MergeTreeData::getPartIfExists(const String & part_name, const MergeTreeData::DataPartStates & valid_states) +MergeTreeData::DataPartPtr MergeTreeData::getPartIfExists(const String & part_name, const MergeTreeData::DataPartStates & valid_states) const { auto lock = lockParts(); return getPartIfExistsUnlocked(part_name, valid_states, lock); } -MergeTreeData::DataPartPtr MergeTreeData::getPartIfExistsUnlocked(const String & part_name, const DataPartStates & valid_states, DataPartsLock & acquired_lock) +MergeTreeData::DataPartPtr MergeTreeData::getPartIfExistsUnlocked(const String & part_name, const DataPartStates & valid_states, DataPartsLock & acquired_lock) const { return getPartIfExistsUnlocked(MergeTreePartInfo::fromPartName(part_name, format_version), valid_states, acquired_lock); } -MergeTreeData::DataPartPtr MergeTreeData::getPartIfExistsUnlocked(const MergeTreePartInfo & part_info, const DataPartStates & valid_states, DataPartsLock & /* acquired_lock */) +MergeTreeData::DataPartPtr MergeTreeData::getPartIfExistsUnlocked(const MergeTreePartInfo & part_info, const DataPartStates & valid_states, DataPartsLock & /* acquired_lock */) const { auto it = data_parts_by_info.find(part_info); if (it == data_parts_by_info.end()) diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index b27392b355b..d5991aaea71 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -521,10 +521,10 @@ public: DataPartsVector getDataPartsVectorInPartitionForInternalUsage(const DataPartStates & affordable_states, const String & partition_id, DataPartsLock * acquired_lock = nullptr) const; /// Returns the part with the given name and state or nullptr if no such part. - DataPartPtr getPartIfExistsUnlocked(const String & part_name, const DataPartStates & valid_states, DataPartsLock & acquired_lock); - DataPartPtr getPartIfExistsUnlocked(const MergeTreePartInfo & part_info, const DataPartStates & valid_states, DataPartsLock & acquired_lock); - DataPartPtr getPartIfExists(const String & part_name, const DataPartStates & valid_states); - DataPartPtr getPartIfExists(const MergeTreePartInfo & part_info, const DataPartStates & valid_states); + DataPartPtr getPartIfExistsUnlocked(const String & part_name, const DataPartStates & valid_states, DataPartsLock & acquired_lock) const; + DataPartPtr getPartIfExistsUnlocked(const MergeTreePartInfo & part_info, const DataPartStates & valid_states, DataPartsLock & acquired_lock) const; + DataPartPtr getPartIfExists(const String & part_name, const DataPartStates & valid_states) const; + DataPartPtr getPartIfExists(const MergeTreePartInfo & part_info, const DataPartStates & valid_states) const; /// Total size of active parts in bytes. size_t getTotalActiveSizeInBytes() const; @@ -654,7 +654,7 @@ public: virtual void forcefullyRemoveBrokenOutdatedPartFromZooKeeperBeforeDetaching(const String & /*part_name*/) {} /// Outdate broken part, set remove time to zero (remove as fast as possible) and make clone in detached directory. - void outdateBrokenPartAndCloneToDetached(const DataPartPtr & part, const String & prefix); + void outdateUnexpectedPartAndCloneToDetached(const DataPartPtr & part); /// If the part is Obsolete and not used by anybody else, immediately delete it from filesystem and remove from memory. void tryRemovePartImmediately(DataPartPtr && part); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp index c495fdaf5e2..d6f8dbac883 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp @@ -131,7 +131,7 @@ size_t ReplicatedMergeTreePartCheckThread::size() const } -ReplicatedMergeTreePartCheckThread::MissingPartSearchResult ReplicatedMergeTreePartCheckThread::searchForMissingPartOnOtherReplicas(const String & part_name) +bool ReplicatedMergeTreePartCheckThread::searchForMissingPartOnOtherReplicas(const String & part_name) const { auto zookeeper = storage.getZooKeeper(); @@ -198,13 +198,13 @@ ReplicatedMergeTreePartCheckThread::MissingPartSearchResult ReplicatedMergeTreeP continue; LOG_INFO(log, "Found the missing part {} at {} on {}", part_name, part_on_replica, replica); - return MissingPartSearchResult::FoundAndNeedFetch; + return true; } if (part_on_replica_info.contains(part_info)) { LOG_INFO(log, "Found part {} on {} that covers the missing part {}", part_on_replica, replica, part_name); - return MissingPartSearchResult::FoundAndDontNeedFetch; + return true; } if (part_info.contains(part_on_replica_info)) @@ -227,11 +227,10 @@ ReplicatedMergeTreePartCheckThread::MissingPartSearchResult ReplicatedMergeTreeP if (found_part_with_the_same_min_block && found_part_with_the_same_max_block) { - /// FIXME It may never appear LOG_INFO(log, "Found parts with the same min block and with the same max block as the missing part {} on replica {}. " "Hoping that it will eventually appear as a result of a merge. Parts: {}", part_name, replica, fmt::join(parts_found, ", ")); - return MissingPartSearchResult::FoundAndDontNeedFetch; + return true; } } } @@ -247,70 +246,9 @@ ReplicatedMergeTreePartCheckThread::MissingPartSearchResult ReplicatedMergeTreeP not_found_msg = "smaller parts with either the same min block or the same max block."; LOG_ERROR(log, "No replica has part covering {} and a merge is impossible: we didn't find {}", part_name, not_found_msg); - return MissingPartSearchResult::LostForever; + return false; } -void ReplicatedMergeTreePartCheckThread::searchForMissingPartAndFetchIfPossible(const String & part_name, bool exists_in_zookeeper) -{ - auto zookeeper = storage.getZooKeeper(); - auto missing_part_search_result = searchForMissingPartOnOtherReplicas(part_name); - - /// If the part is in ZooKeeper, remove it from there and add the task to download it to the queue. - if (exists_in_zookeeper) - { - if (missing_part_search_result == MissingPartSearchResult::FoundAndNeedFetch) - { - LOG_WARNING(log, "Part {} exists in ZooKeeper but not locally and found on other replica. Removing from ZooKeeper and queueing a fetch.", part_name); - } - else - { - LOG_WARNING(log, "Part {} exists in ZooKeeper but not locally and not found on other replica. Removing it from ZooKeeper.", part_name); - } - - /// We cannot simply remove part from ZooKeeper, because it may be removed from virtual_part, - /// so we have to create some entry in the queue. Maybe we will execute it (by fetching part or covering part from somewhere), - /// maybe will simply replace with empty part. - storage.removePartAndEnqueueFetch(part_name, /* storage_init = */false); - } - - ProfileEvents::increment(ProfileEvents::ReplicatedPartChecksFailed); - - if (missing_part_search_result == MissingPartSearchResult::LostForever) - { - auto lost_part_info = MergeTreePartInfo::fromPartName(part_name, storage.format_version); - if (lost_part_info.level != 0 || lost_part_info.mutation != 0) - { - Strings source_parts; - bool part_in_queue = storage.queue.checkPartInQueueAndGetSourceParts(part_name, source_parts); - - /// If it's MERGE/MUTATION etc. we shouldn't replace result part with empty part - /// because some source parts can be lost, but some of them can exist. - if (part_in_queue && !source_parts.empty()) - { - LOG_ERROR(log, "Part {} found in queue and some source parts for it was lost. Will check all source parts.", part_name); - for (const String & source_part_name : source_parts) - enqueuePart(source_part_name); - - return; - } - } - - ThreadFuzzer::maybeInjectSleep(); - - if (storage.createEmptyPartInsteadOfLost(zookeeper, part_name)) - { - /** This situation is possible if on all the replicas where the part was, it deteriorated. - * For example, a replica that has just written it has power turned off and the data has not been written from cache to disk. - */ - LOG_ERROR(log, "Part {} is lost forever.", part_name); - ProfileEvents::increment(ProfileEvents::ReplicatedDataLoss); - } - else - { - LOG_WARNING(log, "Cannot create empty part {} instead of lost. Will retry later", part_name); - } - } -} std::pair ReplicatedMergeTreePartCheckThread::findLocalPart(const String & part_name) { @@ -335,12 +273,12 @@ std::pair ReplicatedMergeTreePartCheckThread::findLo return std::make_pair(exists_in_zookeeper, part); } -CheckResult ReplicatedMergeTreePartCheckThread::checkPart(const String & part_name) +ReplicatedCheckResult ReplicatedMergeTreePartCheckThread::checkPartImpl(const String & part_name) { - LOG_INFO(log, "Checking part {}", part_name); - ProfileEvents::increment(ProfileEvents::ReplicatedPartChecks); - + ReplicatedCheckResult result; auto [exists_in_zookeeper, part] = findLocalPart(part_name); + result.exists_in_zookeeper = exists_in_zookeeper; + result.part = part; LOG_TRACE(log, "Part {} in zookeeper: {}, locally: {}", part_name, exists_in_zookeeper, part != nullptr); @@ -351,130 +289,236 @@ CheckResult ReplicatedMergeTreePartCheckThread::checkPart(const String & part_na { /// We cannot rely on exists_in_zookeeper, because the cleanup thread is probably going to remove it from ZooKeeper /// Also, it will avoid "Cannot commit empty part: Part ... (state Outdated) already exists, but it will be deleted soon" - LOG_WARNING(log, "Part {} is Outdated, will wait for cleanup thread to handle it and check again later", part_name); time_t lifetime = time(nullptr) - outdated->remove_time; time_t max_lifetime = storage.getSettings()->old_parts_lifetime.totalSeconds(); time_t delay = lifetime >= max_lifetime ? 0 : max_lifetime - lifetime; - enqueuePart(part_name, delay + 30); - return {part_name, true, "Part is Outdated, will recheck later"}; + result.recheck_after = delay + 30; + + auto message = PreformattedMessage::create("Part {} is Outdated, will wait for cleanup thread to handle it " + "and check again after {}s", part_name, result.recheck_after); + LOG_WARNING(log, message); + result.status = {part_name, true, message.text}; + result.action = ReplicatedCheckResult::RecheckLater; + return result; } } /// We do not have this or a covering part. if (!part) { - searchForMissingPartAndFetchIfPossible(part_name, exists_in_zookeeper); - return {part_name, false, "Part is missing, will search for it"}; + result.status = {part_name, false, "Part is missing, will search for it"}; + result.action = ReplicatedCheckResult::TryFetchMissing; + return result; } /// We have this part, and it's active. We will check whether we need this part and whether it has the right data. - if (part->name == part_name) - { - auto zookeeper = storage.getZooKeeper(); - auto table_lock = storage.lockForShare(RWLockImpl::NO_QUERY, storage.getSettings()->lock_acquire_timeout_for_background_operations); - - auto local_part_header = ReplicatedMergeTreePartHeader::fromColumnsAndChecksums( - part->getColumns(), part->checksums); - - /// The double get scheme is needed to retain compatibility with very old parts that were created - /// before the ReplicatedMergeTreePartHeader was introduced. - - String part_path = storage.replica_path + "/parts/" + part_name; - String part_znode; - /// If the part is in ZooKeeper, check its data with its checksums, and them with ZooKeeper. - if (zookeeper->tryGet(part_path, part_znode)) - { - LOG_INFO(log, "Checking data of part {}.", part_name); - - try - { - ReplicatedMergeTreePartHeader zk_part_header; - if (!part_znode.empty()) - zk_part_header = ReplicatedMergeTreePartHeader::fromString(part_znode); - else - { - String columns_znode = zookeeper->get(part_path + "/columns"); - String checksums_znode = zookeeper->get(part_path + "/checksums"); - zk_part_header = ReplicatedMergeTreePartHeader::fromColumnsAndChecksumsZNodes( - columns_znode, checksums_znode); - } - - if (local_part_header.getColumnsHash() != zk_part_header.getColumnsHash()) - throw Exception(ErrorCodes::TABLE_DIFFERS_TOO_MUCH, "Columns of local part {} are different from ZooKeeper", part_name); - - zk_part_header.getChecksums().checkEqual(local_part_header.getChecksums(), true); - - checkDataPart( - part, - true, - [this] { return need_stop.load(); }); - - if (need_stop) - { - LOG_INFO(log, "Checking part was cancelled."); - return {part_name, false, "Checking part was cancelled"}; - } - - LOG_INFO(log, "Part {} looks good.", part_name); - } - catch (const Exception & e) - { - /// Don't count the part as broken if we got known retryable exception. - /// In fact, there can be other similar situations because not all - /// of the exceptions are classified as retryable/non-retryable. But it is OK, - /// because there is a safety guard against deleting too many parts. - if (isRetryableException(e)) - throw; - - tryLogCurrentException(log, __PRETTY_FUNCTION__); - constexpr auto fmt_string = "Part {} looks broken. Removing it and will try to fetch."; - String message = fmt::format(fmt_string, part_name); - LOG_ERROR(log, fmt_string, part_name); - - /// Delete part locally. - storage.outdateBrokenPartAndCloneToDetached(part, "broken"); - - ThreadFuzzer::maybeInjectMemoryLimitException(); - ThreadFuzzer::maybeInjectSleep(); - - /// Part is broken, let's try to find it and fetch. - searchForMissingPartAndFetchIfPossible(part_name, exists_in_zookeeper); - - return {part_name, false, message}; - } - } - else if (part->modification_time + MAX_AGE_OF_LOCAL_PART_THAT_WASNT_ADDED_TO_ZOOKEEPER < time(nullptr)) - { - /// If the part is not in ZooKeeper, delete it locally. - /// Probably, someone just wrote down the part, and has not yet added to ZK. - /// Therefore, delete only if the part is old (not very reliable). - ProfileEvents::increment(ProfileEvents::ReplicatedPartChecksFailed); - constexpr auto fmt_string = "Unexpected part {} in filesystem. Removing."; - String message = fmt::format(fmt_string, part_name); - LOG_ERROR(log, fmt_string, part_name); - storage.outdateBrokenPartAndCloneToDetached(part, "unexpected"); - ThreadFuzzer::maybeInjectSleep(); - return {part_name, false, message}; - } - else - { - /// TODO You need to make sure that the part is still checked after a while. - /// Otherwise, it's possible that the part was not added to ZK, - /// but remained in the filesystem and in a number of active parts. - /// And then for a long time (before restarting), the data on the replicas will be different. - - LOG_TRACE(log, "Young part {} with age {} seconds hasn't been added to ZooKeeper yet. It's ok.", part_name, (time(nullptr) - part->modification_time)); - } - } - else + if (part->name != part_name) { /// If we have a covering part, ignore all the problems with this part. /// In the worst case, errors will still appear `old_parts_lifetime` seconds in error log until the part is removed as the old one. - LOG_WARNING(log, "We have part {} covering part {}", part->name, part_name); + auto message = PreformattedMessage::create("We have part {} covering part {}, will not check", part->name, part_name); + LOG_WARNING(log, message); + result.status = {part_name, true, message.text}; + result.action = ReplicatedCheckResult::DoNothing; + return result; } - part->checkMetadata(); - return {part_name, true, ""}; + time_t current_time = time(nullptr); + auto zookeeper = storage.getZooKeeper(); + auto table_lock = storage.lockForShare(RWLockImpl::NO_QUERY, storage.getSettings()->lock_acquire_timeout_for_background_operations); + + auto local_part_header = ReplicatedMergeTreePartHeader::fromColumnsAndChecksums( + part->getColumns(), part->checksums); + + + /// If the part is in ZooKeeper, check its data with its checksums, and them with ZooKeeper. + if (exists_in_zookeeper) + { + LOG_INFO(log, "Checking data of part {}.", part_name); + + /// The double get scheme is needed to retain compatibility with very old parts that were created + /// before the ReplicatedMergeTreePartHeader was introduced. + String part_path = storage.replica_path + "/parts/" + part_name; + String part_znode = zookeeper->get(part_path); + + try + { + ReplicatedMergeTreePartHeader zk_part_header; + if (!part_znode.empty()) + zk_part_header = ReplicatedMergeTreePartHeader::fromString(part_znode); + else + { + String columns_znode = zookeeper->get(part_path + "/columns"); + String checksums_znode = zookeeper->get(part_path + "/checksums"); + zk_part_header = ReplicatedMergeTreePartHeader::fromColumnsAndChecksumsZNodes( + columns_znode, checksums_znode); + } + + if (local_part_header.getColumnsHash() != zk_part_header.getColumnsHash()) + throw Exception(ErrorCodes::TABLE_DIFFERS_TOO_MUCH, "Columns of local part {} are different from ZooKeeper", part_name); + + zk_part_header.getChecksums().checkEqual(local_part_header.getChecksums(), true); + + checkDataPart( + part, + true, + [this] { return need_stop.load(); }); + + if (need_stop) + { + result.status = {part_name, false, "Checking part was cancelled"}; + result.action = ReplicatedCheckResult::Cancelled; + return result; + } + + part->checkMetadata(); + + LOG_INFO(log, "Part {} looks good.", part_name); + result.status = {part_name, true, ""}; + result.action = ReplicatedCheckResult::DoNothing; + return result; + } + catch (const Exception & e) + { + /// Don't count the part as broken if we got known retryable exception. + /// In fact, there can be other similar situations because not all + /// of the exceptions are classified as retryable/non-retryable. But it is OK, + /// because there is a safety guard against deleting too many parts. + if (isRetryableException(e)) + throw; + + tryLogCurrentException(log, __PRETTY_FUNCTION__); + + auto message = PreformattedMessage::create("Part {} looks broken. Removing it and will try to fetch.", part_name); + LOG_ERROR(log, message); + + /// Part is broken, let's try to find it and fetch. + result.status = {part_name, false, message}; + result.action = ReplicatedCheckResult::TryFetchMissing; + return result; + } + } + else if (part->modification_time + MAX_AGE_OF_LOCAL_PART_THAT_WASNT_ADDED_TO_ZOOKEEPER < current_time) + { + /// If the part is not in ZooKeeper, delete it locally. + /// Probably, someone just wrote down the part, and has not yet added to ZK. + /// Therefore, delete only if the part is old (not very reliable). + constexpr auto fmt_string = "Unexpected part {} in filesystem. Removing."; + String message = fmt::format(fmt_string, part_name); + LOG_ERROR(log, fmt_string, part_name); + result.status = {part_name, false, message}; + result.action = ReplicatedCheckResult::DetachUnexpected; + return result; + } + else + { + auto message = PreformattedMessage::create("Young part {} with age {} seconds hasn't been added to ZooKeeper yet. It's ok.", + part_name, (current_time - part->modification_time)); + LOG_INFO(log, message); + result.recheck_after = part->modification_time + MAX_AGE_OF_LOCAL_PART_THAT_WASNT_ADDED_TO_ZOOKEEPER - current_time; + result.status = {part_name, true, message}; + result.action = ReplicatedCheckResult::RecheckLater; + return result; + } +} + + +CheckResult ReplicatedMergeTreePartCheckThread::checkPartAndFix(const String & part_name) +{ + LOG_INFO(log, "Checking part {}", part_name); + ProfileEvents::increment(ProfileEvents::ReplicatedPartChecks); + + ReplicatedCheckResult result = checkPartImpl(part_name); + switch (result.action) + { + case ReplicatedCheckResult::None: UNREACHABLE(); + case ReplicatedCheckResult::DoNothing: break; + case ReplicatedCheckResult::Cancelled: + LOG_INFO(log, "Checking part was cancelled."); + break; + + case ReplicatedCheckResult::RecheckLater: + enqueuePart(part_name, result.recheck_after); + break; + + case ReplicatedCheckResult::DetachUnexpected: + chassert(!result.exists_in_zookeeper); + ProfileEvents::increment(ProfileEvents::ReplicatedPartChecksFailed); + + storage.outdateUnexpectedPartAndCloneToDetached(result.part); + break; + + case ReplicatedCheckResult::TryFetchMissing: + { + ProfileEvents::increment(ProfileEvents::ReplicatedPartChecksFailed); + + /// If the part is in ZooKeeper, remove it from there and add the task to download it to the queue (atomically). + if (result.exists_in_zookeeper) + { + /// We cannot simply remove part from ZooKeeper, because it may be removed from virtual_part, + /// so we have to create some entry in the queue. Maybe we will execute it (by fetching part or covering part from somewhere), + /// maybe will simply replace with empty part. + if (result.part) + LOG_WARNING(log, "Part {} exists in ZooKeeper and the local part was broken. Detaching it, removing from ZooKeeper and queueing a fetch.", part_name); + else + LOG_WARNING(log, "Part {} exists in ZooKeeper but not locally. Removing from ZooKeeper and queueing a fetch.", part_name); + + storage.removePartAndEnqueueFetch(part_name, /* storage_init = */ false); + break; + } + + chassert(!result.part); + + /// Part is not in ZooKeeper and not on disk (so there's nothing to detach or remove from ZooKeeper). + /// Probably we cannot execute some entry from the replication queue (so don't need to enqueue another one). + /// Either all replicas having the part are not active, or the part is lost forever. + bool is_lost = searchForMissingPartOnOtherReplicas(part_name); + if (is_lost) + onPartIsLostForever(part_name); + + break; + } + } + + return result.status; +} + +void ReplicatedMergeTreePartCheckThread::onPartIsLostForever(const String & part_name) +{ + auto lost_part_info = MergeTreePartInfo::fromPartName(part_name, storage.format_version); + if (lost_part_info.level != 0 || lost_part_info.mutation != 0) + { + Strings source_parts; + bool part_in_queue = storage.queue.checkPartInQueueAndGetSourceParts(part_name, source_parts); + + /// If it's MERGE/MUTATION etc. we shouldn't replace result part with empty part + /// because some source parts can be lost, but some of them can exist. + if (part_in_queue && !source_parts.empty()) + { + LOG_ERROR(log, "Part {} found in queue and some source parts for it was lost. Will check all source parts.", part_name); + for (const String & source_part_name : source_parts) + enqueuePart(source_part_name); + + return; + } + } + + ThreadFuzzer::maybeInjectSleep(); + + if (storage.createEmptyPartInsteadOfLost(storage.getZooKeeper(), part_name)) + { + /** This situation is possible if on all the replicas where the part was, it deteriorated. + * For example, a replica that has just written it has power turned off and the data has not been written from cache to disk. + */ + LOG_ERROR(log, "Part {} is lost forever.", part_name); + ProfileEvents::increment(ProfileEvents::ReplicatedDataLoss); + } + else + { + LOG_WARNING(log, "Cannot create empty part {} instead of lost. Will retry later", part_name); + constexpr time_t retry_after_seconds = 30; + enqueuePart(part_name, retry_after_seconds); + } } @@ -524,7 +568,7 @@ void ReplicatedMergeTreePartCheckThread::run() if (selected == parts_queue.end()) return; - checkPart(selected->first); + checkPartAndFix(selected->first); if (need_stop) return; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h index b86191dbf50..0a8fbc75c05 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h @@ -18,6 +18,27 @@ namespace DB class StorageReplicatedMergeTree; +struct ReplicatedCheckResult +{ + enum Action + { + None, + + Cancelled, + DoNothing, + RecheckLater, + + DetachUnexpected, + TryFetchMissing, + }; + + CheckResult status; + Action action = None; + + bool exists_in_zookeeper; + MergeTreeDataPartPtr part; + time_t recheck_after = 0; +}; /** Checks the integrity of the parts requested for validation. * @@ -44,7 +65,9 @@ public: size_t size() const; /// Check part by name - CheckResult checkPart(const String & part_name); + CheckResult checkPartAndFix(const String & part_name); + + ReplicatedCheckResult checkPartImpl(const String & part_name); std::unique_lock pausePartsCheck(); @@ -54,26 +77,13 @@ public: private: void run(); - /// Search for missing part and queue fetch if possible. Otherwise - /// remove part from zookeeper and queue. - void searchForMissingPartAndFetchIfPossible(const String & part_name, bool exists_in_zookeeper); + void onPartIsLostForever(const String & part_name); std::pair findLocalPart(const String & part_name); - enum MissingPartSearchResult - { - /// We found this part on other replica, let's fetch it. - FoundAndNeedFetch, - /// We found covering part or source part with same min and max block number - /// don't need to fetch because we should do it during normal queue processing. - FoundAndDontNeedFetch, - /// Covering part not found anywhere and exact part_name doesn't found on other - /// replicas. - LostForever, - }; - /// Search for missing part on other replicas or covering part on all replicas (including our replica). - MissingPartSearchResult searchForMissingPartOnOtherReplicas(const String & part_name); + /// Returns false if the part is lost forever. + bool searchForMissingPartOnOtherReplicas(const String & part_name) const; StorageReplicatedMergeTree & storage; String log_name; diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index b1ba06c77f9..56b8d431588 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -3351,6 +3351,17 @@ bool StorageReplicatedMergeTree::canExecuteFetch(const ReplicatedMergeTreeLogEnt return false; } + if (entry.source_replica.empty()) + { + auto part = getPartIfExists(entry.new_part_name, {MergeTreeDataPartState::Active, MergeTreeDataPartState::Outdated, MergeTreeDataPartState::Deleting}); + if (part && part->was_removed_as_broken) + { + disable_reason = fmt::format("Not executing fetch of part {} because we still have broken part with that name. " + "Waiting for the broken part to be removed first.", entry.new_part_name); + return false; + } + } + return true; } @@ -3731,23 +3742,44 @@ void StorageReplicatedMergeTree::removePartAndEnqueueFetch(const String & part_n { auto zookeeper = getZooKeeper(); + DataPartPtr broken_part; + auto outdate_broken_part = [this, &broken_part]() + { + if (broken_part) + return; + DataPartsLock lock = lockParts(); + if (broken_part->getState() == DataPartState::Active) + removePartsFromWorkingSet(NO_TRANSACTION_RAW, {broken_part}, true, &lock); + }; + /// We don't know exactly what happened to broken part /// and we are going to remove all covered log entries. /// It's quite dangerous, so clone covered parts to detached. auto broken_part_info = MergeTreePartInfo::fromPartName(part_name, format_version); - auto partition_range = getVisibleDataPartsVectorInPartition(getContext(), broken_part_info.partition_id); + auto partition_range = getDataPartsVectorInPartitionForInternalUsage({MergeTreeDataPartState::Active, MergeTreeDataPartState::Outdated}, + broken_part_info.partition_id); for (const auto & part : partition_range) { if (!broken_part_info.contains(part->info)) continue; - /// Broken part itself either already moved to detached or does not exist. - assert(broken_part_info != part->info); - part->makeCloneInDetached("covered-by-broken", getInMemoryMetadataPtr()); + if (broken_part_info == part->info) + { + chassert(!broken_part); + chassert(!storage_init); + part->was_removed_as_broken = true; + part->makeCloneInDetached("broken", getInMemoryMetadataPtr()); + broken_part = part; + } + else + { + part->makeCloneInDetached("covered-by-broken", getInMemoryMetadataPtr()); + } } ThreadFuzzer::maybeInjectSleep(); + ThreadFuzzer::maybeInjectMemoryLimitException(); /// It's possible that queue contains entries covered by part_name. /// For example, we had GET_PART all_1_42_5 and MUTATE_PART all_1_42_5_63, @@ -3762,6 +3794,7 @@ void StorageReplicatedMergeTree::removePartAndEnqueueFetch(const String & part_n queue.removePartProducingOpsInRange(zookeeper, broken_part_info, /* covering_entry= */ {}); ThreadFuzzer::maybeInjectSleep(); + ThreadFuzzer::maybeInjectMemoryLimitException(); String part_path = fs::path(replica_path) / "parts" / part_name; @@ -3780,7 +3813,7 @@ void StorageReplicatedMergeTree::removePartAndEnqueueFetch(const String & part_n /// but we are going to remove it from /parts and add to queue again. Coordination::Stat is_lost_stat; String is_lost_value = zookeeper->get(replica_path + "/is_lost", &is_lost_stat); - assert(is_lost_value == "0"); + chassert(is_lost_value == "0"); ops.emplace_back(zkutil::makeSetRequest(replica_path + "/is_lost", is_lost_value, is_lost_stat.version)); part_create_time = stat.ctime / 1000; @@ -3802,12 +3835,8 @@ void StorageReplicatedMergeTree::removePartAndEnqueueFetch(const String & part_n ReplicatedMergeTreeMergePredicate merge_pred = queue.getMergePredicate(zookeeper, PartitionIdsHint{broken_part_info.partition_id}); if (merge_pred.isGoingToBeDropped(broken_part_info)) { - LOG_INFO(log, "Broken part {} is covered by drop range, don't need to fetch it, removing it from ZooKeeper", part_name); - - /// But we have to remove it from ZooKeeper because broken parts are not removed from ZK during Outdated parts cleanup - /// There's a chance that DROP_RANGE will remove it, but only if it was not already removed by cleanup thread - if (exists_in_zookeeper) - removePartsFromZooKeeperWithRetries({part_name}); + LOG_INFO(log, "Broken part {} is covered by drop range, don't need to fetch it", part_name); + outdate_broken_part(); return; } @@ -3836,10 +3865,11 @@ void StorageReplicatedMergeTree::removePartAndEnqueueFetch(const String & part_n zkutil::KeeperMultiException::check(rc, ops, results); - String path_created = dynamic_cast(*results.back()).path_created; - log_entry->znode_name = path_created.substr(path_created.find_last_of('/') + 1); - queue.insert(zookeeper, log_entry); - break; + /// Make the part outdated after creating the log entry. + /// Otherwise, if we failed to create the entry, cleanup thread could remove the part from ZooKeeper (leading to diverged replicas) + outdate_broken_part(); + queue_updating_task->schedule(); + return; } } @@ -6841,10 +6871,10 @@ void StorageReplicatedMergeTree::clearOldPartsAndRemoveFromZKImpl(zkutil::ZooKee { /// Broken part can be removed from zk by removePartAndEnqueueFetch(...) only. /// Removal without enqueueing a fetch leads to intersecting parts. - if (part->is_duplicate || part->outdated_because_broken) + if (part->is_duplicate || part->is_unexpected_local_part) { - LOG_WARNING(log, "Will not remove part {} from ZooKeeper (is_duplicate: {}, outdated_because_broken: {})", - part->name, part->is_duplicate, part->outdated_because_broken); + LOG_WARNING(log, "Will not remove part {} from ZooKeeper (is_duplicate: {}, is_unexpected_local_part: {})", + part->name, part->is_duplicate, part->is_unexpected_local_part); parts_to_delete_only_from_filesystem.emplace_back(part); } else @@ -8189,7 +8219,7 @@ CheckResults StorageReplicatedMergeTree::checkData(const ASTPtr & query, Context { try { - results.push_back(part_check_thread.checkPart(part->name)); + results.push_back(part_check_thread.checkPartAndFix(part->name)); } catch (const Exception & ex) { From 99f02e0f6bfb4682e972b8258fea7be02c0f2691 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Tue, 4 Jul 2023 22:06:17 +0300 Subject: [PATCH 224/522] Add const to trying to fix build --- src/Functions/array/range.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/array/range.cpp b/src/Functions/array/range.cpp index 47e90de2e2b..b14f2baca15 100644 --- a/src/Functions/array/range.cpp +++ b/src/Functions/array/range.cpp @@ -414,7 +414,7 @@ private: if (arguments.size() == 1) { throwIfNullValue(arguments[0]); - auto * col = arguments[0].column.get(); + const auto * col = arguments[0].column.get(); if (arguments[0].type->isNullable()) { const auto * nullable = checkAndGetColumn(*arguments[0].column); From bb5b47cacf30c84f51e3c8a70040bf5707a5e742 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Tue, 4 Jul 2023 19:07:11 +0000 Subject: [PATCH 225/522] do not access Exception::thread_frame_pointers if not initialized --- src/Daemon/BaseDaemon.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index 6d29523a354..a75aac7a08e 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -154,7 +154,10 @@ static void signalHandler(int sig, siginfo_t * info, void * context) writePODBinary(*info, out); writePODBinary(signal_context, out); writePODBinary(stack_trace, out); - writeVectorBinary(Exception::thread_frame_pointers, out); + if (Exception::enable_job_stack_trace) + writeVectorBinary(Exception::thread_frame_pointers, out); + else + writeVarUInt(0, out); writeBinary(static_cast(getThreadId()), out); writePODBinary(current_thread, out); From 3021180e0133c8904a29cfc1d4254a0504f9a5fb Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 4 Jul 2023 21:33:12 +0200 Subject: [PATCH 226/522] Update --- .github/workflows/master.yml | 23 +++++++++-------------- .github/workflows/pull_request.yml | 24 ++++++++++-------------- tests/ci/ci_config.py | 2 +- 3 files changed, 20 insertions(+), 29 deletions(-) diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index c6270af0efa..6996221e1aa 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -851,8 +851,8 @@ jobs: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderBinRISCV64: - needs: [DockerHubPush] - runs-on: [self-hosted, builder] + needs: [ DockerHubPush ] + runs-on: [ self-hosted, builder ] steps: - name: Set envs run: | @@ -864,38 +864,33 @@ jobs: BUILD_NAME=binary_riscv64 EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 with: + clear-repository: true + submodules: true fetch-depth: 0 # otherwise we will have no info about contributors - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync --recursive - git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | - # shellcheck disable=SC2046 - docker kill $(docker ps -q) ||: - # shellcheck disable=SC2046 - docker rm -f $(docker ps -a -q) ||: + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" ############################################################################################ ##################################### Docker images ####################################### diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 0ec4d997a4d..fe7c3bba410 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -912,8 +912,8 @@ jobs: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderBinRISCV64: - needs: [DockerHubPush, FastTest, StyleCheck] - runs-on: [self-hosted, builder] + needs: [ DockerHubPush, FastTest, StyleCheck ] + runs-on: [ self-hosted, builder ] steps: - name: Set envs run: | @@ -925,36 +925,32 @@ jobs: BUILD_NAME=binary_riscv64 EOF - name: Download changed images - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 with: name: changed_images path: ${{ env.IMAGES_PATH }} - - name: Clear repository - run: | - sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" - name: Check out repository code - uses: actions/checkout@v2 + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true - name: Build run: | - git -C "$GITHUB_WORKSPACE" submodule sync --recursive - git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10 sudo rm -fr "$TEMP_PATH" mkdir -p "$TEMP_PATH" cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - name: Upload build URLs to artifacts if: ${{ success() || failure() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: ${{ env.BUILD_URLS }} path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json - name: Cleanup if: always() run: | - # shellcheck disable=SC2046 - docker kill $(docker ps -q) ||: - # shellcheck disable=SC2046 - docker rm -f $(docker ps -a -q) ||: + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" ############################################################################################ ##################################### Docker images ####################################### diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 6f86c24184b..1777180a76e 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -174,7 +174,7 @@ CI_CONFIG = { "comment": "SSE2-only build", }, "binary_riscv64": { - "compiler": "clang-14-riscv64", + "compiler": "clang-16-riscv64", "build_type": "", "sanitizer": "", "package_type": "binary", From 6345879cdf4ba9c33f121a17a16e389761791de5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 4 Jul 2023 22:56:58 +0300 Subject: [PATCH 227/522] Update src/Disks/VolumeJBOD.cpp Co-authored-by: Sergei Trifonov --- src/Disks/VolumeJBOD.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Disks/VolumeJBOD.cpp b/src/Disks/VolumeJBOD.cpp index 885b1d56b0d..519f3378c4c 100644 --- a/src/Disks/VolumeJBOD.cpp +++ b/src/Disks/VolumeJBOD.cpp @@ -46,11 +46,11 @@ VolumeJBOD::VolumeJBOD( for (const auto & disk : disks) { auto size = disk->getTotalSpace(); - sizes.push_back(*size); if (size) sum_size += *size; else break; + sizes.push_back(*size); } if (sizes.size() == disks.size()) { From 5a3299572626c5ce5fcd53759b134de49287a4e3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 4 Jul 2023 22:57:39 +0300 Subject: [PATCH 228/522] Update src/Disks/IVolume.cpp Co-authored-by: Sergei Trifonov --- src/Disks/IVolume.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Disks/IVolume.cpp b/src/Disks/IVolume.cpp index 15b52acb422..43caf07d70a 100644 --- a/src/Disks/IVolume.cpp +++ b/src/Disks/IVolume.cpp @@ -51,7 +51,7 @@ IVolume::IVolume( std::optional IVolume::getMaxUnreservedFreeSpace() const { - std::optional res = 0; + std::optional res; for (const auto & disk : disks) res = std::max(res, disk->getUnreservedSpace()); return res; From c76cf53391426471d2a374b63c302e2a383258a5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 4 Jul 2023 22:14:37 +0200 Subject: [PATCH 229/522] Address review comments --- src/Disks/IVolume.cpp | 9 ++++++++- src/Disks/StoragePolicy.cpp | 13 ++++++++----- src/Disks/loadLocalDiskConfig.cpp | 2 +- 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/src/Disks/IVolume.cpp b/src/Disks/IVolume.cpp index 43caf07d70a..0b072e6ba8b 100644 --- a/src/Disks/IVolume.cpp +++ b/src/Disks/IVolume.cpp @@ -53,7 +53,14 @@ std::optional IVolume::getMaxUnreservedFreeSpace() const { std::optional res; for (const auto & disk : disks) - res = std::max(res, disk->getUnreservedSpace()); + { + auto disk_unreserved_space = disk->getUnreservedSpace(); + if (!disk_unreserved_space) + return std::nullopt; /// There is at least one unlimited disk. + + if (!res || *disk_unreserved_space > *res) + res = disk_unreserved_space; + } return res; } diff --git a/src/Disks/StoragePolicy.cpp b/src/Disks/StoragePolicy.cpp index 92cca23ca76..6b8d7186a15 100644 --- a/src/Disks/StoragePolicy.cpp +++ b/src/Disks/StoragePolicy.cpp @@ -209,14 +209,17 @@ DiskPtr StoragePolicy::tryGetDiskByName(const String & disk_name) const UInt64 StoragePolicy::getMaxUnreservedFreeSpace() const { - UInt64 res = 0; + std::optional res; for (const auto & volume : volumes) { - auto max_unreserved_for_volume = volume->getMaxUnreservedFreeSpace(); - if (max_unreserved_for_volume) - res = std::max(res, *max_unreserved_for_volume); + auto volume_unreserved_space = volume->getMaxUnreservedFreeSpace(); + if (!volume_unreserved_space) + return -1ULL; /// There is at least one unlimited disk. + + if (!res || *volume_unreserved_space > *res) + res = volume_unreserved_space; } - return res; + return res.value_or(-1ULL); } diff --git a/src/Disks/loadLocalDiskConfig.cpp b/src/Disks/loadLocalDiskConfig.cpp index 0e5eca17ca7..0c4a9e7af32 100644 --- a/src/Disks/loadLocalDiskConfig.cpp +++ b/src/Disks/loadLocalDiskConfig.cpp @@ -56,7 +56,7 @@ void loadDiskLocalConfig(const String & name, tmp_path = context->getPath(); // Create tmp disk for getting total disk space. - keep_free_space_bytes = static_cast(DiskLocal("tmp", tmp_path, 0).getTotalSpace() * ratio); + keep_free_space_bytes = static_cast(*DiskLocal("tmp", tmp_path, 0).getTotalSpace() * ratio); } } From ead43836f7b9f1eb04e8cd4e9c293f39ddf1ec1a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 4 Jul 2023 22:35:01 +0200 Subject: [PATCH 230/522] Fix the test --- .../02796_calculate_text_stack_trace.sql | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/tests/queries/0_stateless/02796_calculate_text_stack_trace.sql b/tests/queries/0_stateless/02796_calculate_text_stack_trace.sql index 601bd16fb39..52d55bdbe11 100644 --- a/tests/queries/0_stateless/02796_calculate_text_stack_trace.sql +++ b/tests/queries/0_stateless/02796_calculate_text_stack_trace.sql @@ -1,16 +1,20 @@ -- Tags: no-parallel -TRUNCATE TABLE system.text_log; - SELECT 'Hello', throwIf(1); -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } SYSTEM FLUSH LOGS; -SELECT length(stack_trace) > 1000 FROM system.query_log WHERE current_database = currentDatabase() AND query LIKE '%SELECT \'Hello\', throwIf(1)%' AND query NOT LIKE '%system%' ORDER BY event_time_microseconds DESC LIMIT 1; -SELECT message LIKE '%Stack trace%' FROM system.text_log WHERE level = 'Error' AND message LIKE '%Exception%throwIf%' ORDER BY event_time_microseconds DESC LIMIT 10; -TRUNCATE TABLE system.text_log; +SELECT length(stack_trace) > 1000 FROM system.query_log WHERE current_database = currentDatabase() AND query LIKE '%SELECT \'Hello\', throwIf(1)%' AND query NOT LIKE '%system%' ORDER BY event_time_microseconds DESC LIMIT 1; + +SELECT message LIKE '%Stack trace%' FROM system.text_log WHERE level = 'Error' AND message LIKE '%Exception%throwIf%' + AND query_id = (SELECT query_id FROM system.query_log WHERE current_database = currentDatabase() AND query LIKE '%SELECT \'Hello\', throwIf(1)%' AND query NOT LIKE '%system%' ORDER BY event_time_microseconds DESC LIMIT 1) + ORDER BY event_time_microseconds DESC LIMIT 10; SET calculate_text_stack_trace = 0; SELECT 'World', throwIf(1); -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } SYSTEM FLUSH LOGS; + SELECT length(stack_trace) FROM system.query_log WHERE current_database = currentDatabase() AND query LIKE '%SELECT \'World\', throwIf(1)%' AND query NOT LIKE '%system%' ORDER BY event_time_microseconds DESC LIMIT 1; -SELECT message LIKE '%Stack trace%' FROM system.text_log WHERE level = 'Error' AND message LIKE '%Exception%throwIf%' ORDER BY event_time_microseconds DESC LIMIT 10; + +SELECT message LIKE '%Stack trace%' FROM system.text_log WHERE level = 'Error' AND message LIKE '%Exception%throwIf%' + AND query_id = (SELECT query_id FROM system.query_log WHERE current_database = currentDatabase() AND query LIKE '%SELECT \'World\', throwIf(1)%' AND query NOT LIKE '%system%' ORDER BY event_time_microseconds DESC LIMIT 1) + ORDER BY event_time_microseconds DESC LIMIT 10; From 607a8a1c465baf85818ec41b8229f7afda8d6fb8 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 4 Jul 2023 22:52:59 +0200 Subject: [PATCH 231/522] fix --- src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp index d6f8dbac883..1cc3736bd2e 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp @@ -472,8 +472,8 @@ CheckResult ReplicatedMergeTreePartCheckThread::checkPartAndFix(const String & p /// Part is not in ZooKeeper and not on disk (so there's nothing to detach or remove from ZooKeeper). /// Probably we cannot execute some entry from the replication queue (so don't need to enqueue another one). /// Either all replicas having the part are not active, or the part is lost forever. - bool is_lost = searchForMissingPartOnOtherReplicas(part_name); - if (is_lost) + bool found_something = searchForMissingPartOnOtherReplicas(part_name); + if (!found_something) onPartIsLostForever(part_name); break; From da105d491661d4a7a564263d11499c74126f0453 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Tue, 4 Jul 2023 23:01:06 +0200 Subject: [PATCH 232/522] impl --- src/Functions/FunctionsHashing.h | 5 ++++- tests/queries/0_stateless/02790_keyed_hash_bug.reference | 1 + tests/queries/0_stateless/02790_keyed_hash_bug.sql | 2 ++ 3 files changed, 7 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02790_keyed_hash_bug.reference create mode 100644 tests/queries/0_stateless/02790_keyed_hash_bug.sql diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h index a4d4fbd085d..f20cf4a5ff4 100644 --- a/src/Functions/FunctionsHashing.h +++ b/src/Functions/FunctionsHashing.h @@ -81,7 +81,7 @@ namespace impl static SipHashKey parseSipHashKey(const ColumnWithTypeAndName & key) { - SipHashKey ret; + SipHashKey ret{}; const auto * tuple = checkAndGetColumn(key.column.get()); if (!tuple) @@ -90,6 +90,9 @@ namespace impl if (tuple->tupleSize() != 2) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "wrong tuple size: key must be a tuple of 2 UInt64"); + if (tuple->empty()) + return ret; + if (const auto * key0col = checkAndGetColumn(&(tuple->getColumn(0)))) ret.key0 = key0col->get64(0); else diff --git a/tests/queries/0_stateless/02790_keyed_hash_bug.reference b/tests/queries/0_stateless/02790_keyed_hash_bug.reference new file mode 100644 index 00000000000..a321a9052d0 --- /dev/null +++ b/tests/queries/0_stateless/02790_keyed_hash_bug.reference @@ -0,0 +1 @@ +16324913028386710556 diff --git a/tests/queries/0_stateless/02790_keyed_hash_bug.sql b/tests/queries/0_stateless/02790_keyed_hash_bug.sql new file mode 100644 index 00000000000..409e284d0d5 --- /dev/null +++ b/tests/queries/0_stateless/02790_keyed_hash_bug.sql @@ -0,0 +1,2 @@ +--- previously caused MemorySanitizer: use-of-uninitialized-value, because we tried to read hash key from empty tuple column during interpretation +SELECT sipHash64Keyed((1111111111111111111, toUInt64(222222222222223))) group by toUInt64(222222222222223); From 7f1ee68c87160089d70f4cef04c975c38b01218e Mon Sep 17 00:00:00 2001 From: Han Fei Date: Tue, 4 Jul 2023 23:08:54 +0200 Subject: [PATCH 233/522] refine --- src/Functions/DateTimeTransforms.h | 8 +++---- src/Functions/IFunction.h | 4 ++++ ...OrDateTimeConverterWithPreimageVisitor.cpp | 21 +++++++++---------- 3 files changed, 18 insertions(+), 15 deletions(-) diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index 84c71c89b11..e59a9046277 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -734,11 +734,11 @@ struct ToYearImpl const DateLUTImpl & date_lut = DateLUT::instance(); - auto start_time = date_lut.makeDateTime(year, 1, 1, 0, 0, 0); + auto start_time = date_lut.makeDayNum(year, 1, 1); auto end_time = date_lut.addYears(start_time, 1); if (isDateOrDate32(type) || isDateTime(type) || isDateTime64(type)) - return {std::make_pair(Field(start_time), Field(end_time))}; + return {std::make_pair(Field(Int32(start_time)), Field(Int32(end_time)))}; else throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}. Should be Date, Date32, DateTime or DateTime64", @@ -1412,11 +1412,11 @@ struct ToYYYYMMImpl const DateLUTImpl & date_lut = DateLUT::instance(); - auto start_time = date_lut.makeDateTime(year, month, 1, 0, 0, 0); + auto start_time = date_lut.makeDayNum(year, month, 1); auto end_time = date_lut.addMonths(start_time, 1); if (isDateOrDate32(type) || isDateTime(type) || isDateTime64(type)) - return {std::make_pair(Field(start_time), Field(end_time))}; + return {std::make_pair(Field(Int32(start_time)), Field(Int32(end_time)))}; else throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}. Should be Date, Date32, DateTime or DateTime64", diff --git a/src/Functions/IFunction.h b/src/Functions/IFunction.h index 433cb61d04e..928475652f4 100644 --- a/src/Functions/IFunction.h +++ b/src/Functions/IFunction.h @@ -11,6 +11,10 @@ #include "config.h" +#if USE_EMBEDDED_COMPILER +# include +#endif + #include /// This file contains user interface for functions. diff --git a/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.cpp b/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.cpp index a377bb4bba6..9c2fdf6dee9 100644 --- a/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.cpp +++ b/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.cpp @@ -4,6 +4,7 @@ #include #include #include +#include "base/DayNum.h" #include #include #include @@ -37,20 +38,18 @@ ASTPtr generateOptimizedDateFilterAST(const String & comparator, const NameAndTy const DateLUTImpl & date_lut = DateLUT::instance(); const String & column_name = column.name; - String start_date_or_date_time; - String end_date_or_date_time; - if (isDateOrDate32(column.type.get())) + auto start_date = range.first.get(); + auto end_date = range.second.get(); + String start_date_or_date_time = date_lut.dateToString(ExtendedDayNum(static_cast(start_date))); + String end_date_or_date_time = date_lut.dateToString(ExtendedDayNum(static_cast(end_date))); + + if (isDateTime(column.type.get()) || isDateTime64(column.type.get())) { - start_date_or_date_time = date_lut.dateToString(range.first.get()); - end_date_or_date_time = date_lut.dateToString(range.second.get()); + start_date_or_date_time += " 00:00:00"; + end_date_or_date_time += " 00:00:00"; } - else if (isDateTime(column.type.get()) || isDateTime64(column.type.get())) - { - start_date_or_date_time = date_lut.timeToString(range.first.get()); - end_date_or_date_time = date_lut.timeToString(range.second.get()); - } - else [[unlikely]] return {}; + else if (!isDateOrDate32(column.type.get())) return {}; if (comparator == "equals") { From 39199fd1168816c0e46da0011e21ad20573517e8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 5 Jul 2023 00:49:10 +0200 Subject: [PATCH 234/522] Update test --- .../0_stateless/02789_object_type_invalid_num_of_rows.reference | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference b/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference index 8b137891791..e69de29bb2d 100644 --- a/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference +++ b/tests/queries/0_stateless/02789_object_type_invalid_num_of_rows.reference @@ -1 +0,0 @@ - From 94f0bd6b84dbaa3961ac689ecc1354a9385ca339 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 5 Jul 2023 02:12:38 +0300 Subject: [PATCH 235/522] Update 00474_readonly_settings.sh --- tests/queries/0_stateless/00474_readonly_settings.sh | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tests/queries/0_stateless/00474_readonly_settings.sh b/tests/queries/0_stateless/00474_readonly_settings.sh index 9432579f9e6..3a857d81a74 100755 --- a/tests/queries/0_stateless/00474_readonly_settings.sh +++ b/tests/queries/0_stateless/00474_readonly_settings.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -10,12 +11,8 @@ $CLICKHOUSE_CLIENT --query="select toUInt64(pow(2, 62)) as value format JSON" -- $CLICKHOUSE_CLIENT --readonly=1 --multiquery --query="set output_format_json_quote_64bit_integers=1 ; select toUInt64(pow(2, 63)) as value format JSON" --server_logs_file=/dev/null 2>&1 | grep -o -q 'value\|Cannot modify .* setting in readonly mode' && echo "OK" || echo "FAIL" $CLICKHOUSE_CLIENT --readonly=1 --multiquery --query="set output_format_json_quote_64bit_integers=0 ; select toUInt64(pow(2, 63)) as value format JSON" --server_logs_file=/dev/null 2>&1 | grep -o -q 'value\|Cannot modify .* setting in readonly mode' && echo "OK" || echo "FAIL" - ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query=SELECT+toUInt64(pow(2,+63))+as+value+format+JSON&output_format_json_quote_64bit_integers=1" | grep value ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query=SELECT+toUInt64(pow(2,+63))+as+value+format+JSON&output_format_json_quote_64bit_integers=0" | grep value -#${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=readonly&session_timeout=3600" -d 'SET readonly = 1' - -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=readonly&query=SELECT+toUInt64(pow(2,+63))+as+value+format+JSON&output_format_json_quote_64bit_integers=1" 2>&1 | grep -o -q 'value\|Cannot modify .* setting in readonly mode.' && echo "OK" || echo "FAIL" +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=readonly&query=SELECT+toUInt64(pow(2,+63))+as+value+format+JSON&output_format_json_quote_64bit_integers=1" 2>&1 | grep -o -q 'value\|Cannot modify .* setting in readonly mode' && echo "OK" || echo "FAIL" ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=readonly&query=SELECT+toUInt64(pow(2,+63))+as+value+format+JSON&output_format_json_quote_64bit_integers=0" 2>&1 | grep -o -q 'value\|Cannot modify .* setting in readonly mode' && echo "OK" || echo "FAIL" - From ede63a0f4e8239c56999a72bfe3af3f59e63dfb2 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 5 Jul 2023 00:30:28 +0000 Subject: [PATCH 236/522] fix drop column with enabled sparse columns --- src/Storages/MergeTree/MutateTask.cpp | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index f4a071b8f27..f23ef82fca8 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -67,7 +67,9 @@ static void splitAndModifyMutationCommands( if (!isWidePart(part) || !isFullPartStorage(part->getDataPartStorage())) { - NameSet mutated_columns, dropped_columns; + NameSet mutated_columns; + NameSet dropped_columns; + for (const auto & command : commands) { if (command.type == MutationCommand::Type::MATERIALIZE_INDEX @@ -258,6 +260,10 @@ getColumnsForNewDataPart( storage_columns.emplace_back(column); } + NameSet storage_columns_set; + for (const auto & [name, _] : storage_columns) + storage_columns_set.insert(name); + for (const auto & command : all_commands) { if (command.type == MutationCommand::UPDATE) @@ -292,13 +298,15 @@ getColumnsForNewDataPart( SerializationInfoByName new_serialization_infos; for (const auto & [name, old_info] : serialization_infos) { - if (removed_columns.contains(name)) - continue; - auto it = renamed_columns_from_to.find(name); auto new_name = it == renamed_columns_from_to.end() ? name : it->second; - if (!updated_header.has(new_name)) + if (!storage_columns_set.contains(new_name)) + continue; + + /// In compact part we read all columns and all of them are in @updated_header. + /// But in wide part we must keep serialization infos for columns that are not touched by mutation. + if (!updated_header.has(new_name) && isWidePart(source_part)) { new_serialization_infos.emplace(new_name, old_info); continue; From 759b8b9a7685f566a88e86f5db5ebccb0db34869 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 5 Jul 2023 07:17:52 +0000 Subject: [PATCH 237/522] Fix more tests --- .../00941_system_columns_race_condition.sh | 46 +++++++++++-------- .../0_stateless/02470_mutation_sync_race.sh | 8 ++-- 2 files changed, 32 insertions(+), 22 deletions(-) diff --git a/tests/queries/0_stateless/00941_system_columns_race_condition.sh b/tests/queries/0_stateless/00941_system_columns_race_condition.sh index 69dfb30cd2c..4f2cd6ee91b 100755 --- a/tests/queries/0_stateless/00941_system_columns_race_condition.sh +++ b/tests/queries/0_stateless/00941_system_columns_race_condition.sh @@ -14,35 +14,43 @@ $CLICKHOUSE_CLIENT -q "CREATE TABLE alter_table (a UInt8, b Int16, c Float32, d function thread1() { - # NOTE: database = $CLICKHOUSE_DATABASE is unwanted - while true; do $CLICKHOUSE_CLIENT --query "SELECT name FROM system.columns UNION ALL SELECT name FROM system.columns FORMAT Null"; done + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do + # NOTE: database = $CLICKHOUSE_DATABASE is unwanted + $CLICKHOUSE_CLIENT --query "SELECT name FROM system.columns UNION ALL SELECT name FROM system.columns FORMAT Null"; + done } function thread2() { - while true; do $CLICKHOUSE_CLIENT -n --query "ALTER TABLE alter_table ADD COLUMN h String; ALTER TABLE alter_table MODIFY COLUMN h UInt64; ALTER TABLE alter_table DROP COLUMN h;"; done + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do + $CLICKHOUSE_CLIENT -n --query "ALTER TABLE alter_table ADD COLUMN h String; ALTER TABLE alter_table MODIFY COLUMN h UInt64; ALTER TABLE alter_table DROP COLUMN h;"; + done } # https://stackoverflow.com/questions/9954794/execute-a-shell-function-with-timeout export -f thread1; export -f thread2; -timeout 15 bash -c thread1 2> /dev/null & -timeout 15 bash -c thread1 2> /dev/null & -timeout 15 bash -c thread1 2> /dev/null & -timeout 15 bash -c thread1 2> /dev/null & -timeout 15 bash -c thread2 2> /dev/null & -timeout 15 bash -c thread2 2> /dev/null & -timeout 15 bash -c thread2 2> /dev/null & -timeout 15 bash -c thread2 2> /dev/null & -timeout 15 bash -c thread1 2> /dev/null & -timeout 15 bash -c thread1 2> /dev/null & -timeout 15 bash -c thread1 2> /dev/null & -timeout 15 bash -c thread1 2> /dev/null & -timeout 15 bash -c thread2 2> /dev/null & -timeout 15 bash -c thread2 2> /dev/null & -timeout 15 bash -c thread2 2> /dev/null & -timeout 15 bash -c thread2 2> /dev/null & +TIMEOUT=15 + +thread1 $TIMEOUT 2> /dev/null & +thread1 $TIMEOUT 2> /dev/null & +thread1 $TIMEOUT 2> /dev/null & +thread1 $TIMEOUT 2> /dev/null & +thread2 $TIMEOUT 2> /dev/null & +thread2 $TIMEOUT 2> /dev/null & +thread2 $TIMEOUT 2> /dev/null & +thread2 $TIMEOUT 2> /dev/null & +thread1 $TIMEOUT 2> /dev/null & +thread1 $TIMEOUT 2> /dev/null & +thread1 $TIMEOUT 2> /dev/null & +thread1 $TIMEOUT 2> /dev/null & +thread2 $TIMEOUT 2> /dev/null & +thread2 $TIMEOUT 2> /dev/null & +thread2 $TIMEOUT 2> /dev/null & +thread2 $TIMEOUT 2> /dev/null & wait diff --git a/tests/queries/0_stateless/02470_mutation_sync_race.sh b/tests/queries/0_stateless/02470_mutation_sync_race.sh index 6c259e46cb1..37e99663ab5 100755 --- a/tests/queries/0_stateless/02470_mutation_sync_race.sh +++ b/tests/queries/0_stateless/02470_mutation_sync_race.sh @@ -12,7 +12,11 @@ $CLICKHOUSE_CLIENT -q "insert into src values (0)" function thread() { + local TIMELIMIT=$((SECONDS+$1)) for i in $(seq 1000); do + if [ $SECONDS -ge "$TIMELIMIT" ]; then + return + fi $CLICKHOUSE_CLIENT -q "alter table src detach partition tuple()" $CLICKHOUSE_CLIENT -q "alter table src attach partition tuple()" $CLICKHOUSE_CLIENT -q "alter table src update A = ${i} where 1 settings mutations_sync=2" @@ -20,8 +24,6 @@ function thread() done } -export -f thread; - TIMEOUT=30 -timeout $TIMEOUT bash -c thread || true +thread $TIMEOUT || true \ No newline at end of file From 9544c035b9d8b4646defd770b829715043b145d7 Mon Sep 17 00:00:00 2001 From: Han Fei Date: Wed, 5 Jul 2023 11:15:31 +0200 Subject: [PATCH 238/522] Revert "refine" This reverts commit 7f1ee68c87160089d70f4cef04c975c38b01218e. --- src/Functions/DateTimeTransforms.h | 8 +++---- src/Functions/IFunction.h | 4 ---- ...OrDateTimeConverterWithPreimageVisitor.cpp | 21 ++++++++++--------- 3 files changed, 15 insertions(+), 18 deletions(-) diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index e59a9046277..84c71c89b11 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -734,11 +734,11 @@ struct ToYearImpl const DateLUTImpl & date_lut = DateLUT::instance(); - auto start_time = date_lut.makeDayNum(year, 1, 1); + auto start_time = date_lut.makeDateTime(year, 1, 1, 0, 0, 0); auto end_time = date_lut.addYears(start_time, 1); if (isDateOrDate32(type) || isDateTime(type) || isDateTime64(type)) - return {std::make_pair(Field(Int32(start_time)), Field(Int32(end_time)))}; + return {std::make_pair(Field(start_time), Field(end_time))}; else throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}. Should be Date, Date32, DateTime or DateTime64", @@ -1412,11 +1412,11 @@ struct ToYYYYMMImpl const DateLUTImpl & date_lut = DateLUT::instance(); - auto start_time = date_lut.makeDayNum(year, month, 1); + auto start_time = date_lut.makeDateTime(year, month, 1, 0, 0, 0); auto end_time = date_lut.addMonths(start_time, 1); if (isDateOrDate32(type) || isDateTime(type) || isDateTime64(type)) - return {std::make_pair(Field(Int32(start_time)), Field(Int32(end_time)))}; + return {std::make_pair(Field(start_time), Field(end_time))}; else throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}. Should be Date, Date32, DateTime or DateTime64", diff --git a/src/Functions/IFunction.h b/src/Functions/IFunction.h index 928475652f4..433cb61d04e 100644 --- a/src/Functions/IFunction.h +++ b/src/Functions/IFunction.h @@ -11,10 +11,6 @@ #include "config.h" -#if USE_EMBEDDED_COMPILER -# include -#endif - #include /// This file contains user interface for functions. diff --git a/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.cpp b/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.cpp index 9c2fdf6dee9..a377bb4bba6 100644 --- a/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.cpp +++ b/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.cpp @@ -4,7 +4,6 @@ #include #include #include -#include "base/DayNum.h" #include #include #include @@ -38,18 +37,20 @@ ASTPtr generateOptimizedDateFilterAST(const String & comparator, const NameAndTy const DateLUTImpl & date_lut = DateLUT::instance(); const String & column_name = column.name; + String start_date_or_date_time; + String end_date_or_date_time; - auto start_date = range.first.get(); - auto end_date = range.second.get(); - String start_date_or_date_time = date_lut.dateToString(ExtendedDayNum(static_cast(start_date))); - String end_date_or_date_time = date_lut.dateToString(ExtendedDayNum(static_cast(end_date))); - - if (isDateTime(column.type.get()) || isDateTime64(column.type.get())) + if (isDateOrDate32(column.type.get())) { - start_date_or_date_time += " 00:00:00"; - end_date_or_date_time += " 00:00:00"; + start_date_or_date_time = date_lut.dateToString(range.first.get()); + end_date_or_date_time = date_lut.dateToString(range.second.get()); } - else if (!isDateOrDate32(column.type.get())) return {}; + else if (isDateTime(column.type.get()) || isDateTime64(column.type.get())) + { + start_date_or_date_time = date_lut.timeToString(range.first.get()); + end_date_or_date_time = date_lut.timeToString(range.second.get()); + } + else [[unlikely]] return {}; if (comparator == "equals") { From 2e5643cc4133f207b46534a4cf8a7875d7c18a8e Mon Sep 17 00:00:00 2001 From: Han Fei Date: Wed, 5 Jul 2023 11:57:18 +0200 Subject: [PATCH 239/522] use UTC LUT --- src/Functions/DateTimeTransforms.h | 4 ++-- src/Functions/IFunction.h | 4 ++++ .../OptimizeDateOrDateTimeConverterWithPreimageVisitor.cpp | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index 84c71c89b11..510a88db2b6 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -732,7 +732,7 @@ struct ToYearImpl auto year = point.get(); if (year < DATE_LUT_MIN_YEAR || year >= DATE_LUT_MAX_YEAR) return std::nullopt; - const DateLUTImpl & date_lut = DateLUT::instance(); + const DateLUTImpl & date_lut = DateLUT::instance("UTC"); auto start_time = date_lut.makeDateTime(year, 1, 1, 0, 0, 0); auto end_time = date_lut.addYears(start_time, 1); @@ -1410,7 +1410,7 @@ struct ToYYYYMMImpl if (year < DATE_LUT_MIN_YEAR || year > DATE_LUT_MAX_YEAR || month < 1 || month > 12 || (year == DATE_LUT_MAX_YEAR && month == 12)) return std::nullopt; - const DateLUTImpl & date_lut = DateLUT::instance(); + const DateLUTImpl & date_lut = DateLUT::instance("UTC"); auto start_time = date_lut.makeDateTime(year, month, 1, 0, 0, 0); auto end_time = date_lut.addMonths(start_time, 1); diff --git a/src/Functions/IFunction.h b/src/Functions/IFunction.h index 433cb61d04e..09758d59e4a 100644 --- a/src/Functions/IFunction.h +++ b/src/Functions/IFunction.h @@ -13,6 +13,10 @@ #include +#if USE_EMBEDDED_COMPILER +# include +#endif + /// This file contains user interface for functions. namespace llvm diff --git a/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.cpp b/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.cpp index a377bb4bba6..6a9251cec49 100644 --- a/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.cpp +++ b/src/Interpreters/OptimizeDateOrDateTimeConverterWithPreimageVisitor.cpp @@ -34,7 +34,7 @@ namespace ErrorCodes */ ASTPtr generateOptimizedDateFilterAST(const String & comparator, const NameAndTypePair & column, const std::pair& range) { - const DateLUTImpl & date_lut = DateLUT::instance(); + const DateLUTImpl & date_lut = DateLUT::instance("UTC"); const String & column_name = column.name; String start_date_or_date_time; From 47cffa6f1ed6832e38d30a95f2c63e26506b0a10 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 5 Jul 2023 09:40:53 +0000 Subject: [PATCH 240/522] Properly check the first part disk --- tests/integration/test_multiple_disks/test.py | 86 +++++++++---------- 1 file changed, 43 insertions(+), 43 deletions(-) diff --git a/tests/integration/test_multiple_disks/test.py b/tests/integration/test_multiple_disks/test.py index b5606ee8bc2..c0fbe39196d 100644 --- a/tests/integration/test_multiple_disks/test.py +++ b/tests/integration/test_multiple_disks/test.py @@ -49,6 +49,18 @@ def start_cluster(): cluster.shutdown() +def get_oldest_part(node, table_name): + return node.query( + f"SELECT name FROM system.parts WHERE table = '{table_name}' and active = 1 ORDER BY modification_time LIMIT 1" + ).strip() + + +def get_disk_for_part(node, table_name, part): + return node.query( + f"SELECT disk_name FROM system.parts WHERE table == '{table_name}' and active = 1 and name = '{part}' ORDER BY modification_time" + ).strip() + + def test_system_tables(start_cluster): expected_disks_data = [ { @@ -694,15 +706,13 @@ def test_jbod_overflow(start_cluster, name, engine): def test_background_move(start_cluster, name, engine): try: node1.query_with_retry( - """ + f""" CREATE TABLE IF NOT EXISTS {name} ( s1 String ) ENGINE = {engine} ORDER BY tuple() SETTINGS storage_policy='moving_jbod_with_external' - """.format( - name=name, engine=engine - ) + """ ) node1.query(f"SYSTEM STOP MERGES {name}") @@ -718,25 +728,27 @@ def test_background_move(start_cluster, name, engine): ) ) + first_part = get_oldest_part(node1, name) + used_disks = get_used_disks_for_table(node1, name) retry = 20 i = 0 - while not sum(1 for x in used_disks if x == "jbod1") <= 2 and i < retry: + # multiple moves can be assigned in parallel so we can move later parts before the oldest + # we need to wait explicitly until the oldest part is moved + while get_disk_for_part(node1, name, first_part) != "external" and i < retry: time.sleep(0.5) - used_disks = get_used_disks_for_table(node1, name) i += 1 + used_disks = get_used_disks_for_table(node1, name) assert sum(1 for x in used_disks if x == "jbod1") <= 2 # first (oldest) part was moved to external - assert used_disks[0] == "external" + assert get_disk_for_part(node1, name, first_part) == "external" node1.query("SYSTEM FLUSH LOGS") path = node1.query( - "SELECT path_on_disk FROM system.part_log WHERE table = '{}' AND event_type='MovePart' AND part_name = 'all_1_1_0'".format( - name - ) + f"SELECT path_on_disk FROM system.part_log WHERE table = '{name}' AND event_type='MovePart' AND part_name = '{first_part}'" ) # first (oldest) part was moved to external @@ -762,36 +774,28 @@ def test_background_move(start_cluster, name, engine): def test_start_stop_moves(start_cluster, name, engine): try: node1.query_with_retry( - """ + f""" CREATE TABLE IF NOT EXISTS {name} ( s1 String ) ENGINE = {engine} ORDER BY tuple() SETTINGS storage_policy='moving_jbod_with_external' - """.format( - name=name, engine=engine - ) + """ ) - node1.query_with_retry("INSERT INTO {} VALUES ('HELLO')".format(name)) - node1.query_with_retry("INSERT INTO {} VALUES ('WORLD')".format(name)) + node1.query_with_retry(f"INSERT INTO {name} VALUES ('HELLO')") + node1.query_with_retry(f"INSERT INTO {name} VALUES ('WORLD')") used_disks = get_used_disks_for_table(node1, name) assert all(d == "jbod1" for d in used_disks), "All writes shoud go to jbods" - first_part = node1.query( - "SELECT name FROM system.parts WHERE table = '{}' and active = 1 ORDER BY modification_time LIMIT 1".format( - name - ) - ).strip() + first_part = get_oldest_part(node1, name) node1.query("SYSTEM STOP MOVES") with pytest.raises(QueryRuntimeException): node1.query( - "ALTER TABLE {} MOVE PART '{}' TO VOLUME 'external'".format( - name, first_part - ) + f"ALTER TABLE {name} MOVE PART '{first_part}' TO VOLUME 'external'" ) used_disks = get_used_disks_for_table(node1, name) @@ -801,24 +805,18 @@ def test_start_stop_moves(start_cluster, name, engine): node1.query("SYSTEM START MOVES") - node1.query( - "ALTER TABLE {} MOVE PART '{}' TO VOLUME 'external'".format( - name, first_part - ) - ) + node1.query(f"ALTER TABLE {name} MOVE PART '{first_part}' TO VOLUME 'external'") disk = node1.query( - "SELECT disk_name FROM system.parts WHERE table = '{}' and name = '{}' and active = 1".format( - name, first_part - ) + f"SELECT disk_name FROM system.parts WHERE table = '{name}' and name = '{first_part}' and active = 1" ).strip() assert disk == "external" - node1.query_with_retry("TRUNCATE TABLE {}".format(name)) + node1.query_with_retry(f"TRUNCATE TABLE {name}") - node1.query("SYSTEM STOP MOVES {}".format(name)) - node1.query("SYSTEM STOP MERGES {}".format(name)) + node1.query(f"SYSTEM STOP MOVES {name}") + node1.query(f"SYSTEM STOP MERGES {name}") for i in range(5): data = [] # 5MB in total @@ -831,6 +829,8 @@ def test_start_stop_moves(start_cluster, name, engine): ) ) + first_part = get_oldest_part(node1, name) + used_disks = get_used_disks_for_table(node1, name) retry = 5 @@ -843,23 +843,23 @@ def test_start_stop_moves(start_cluster, name, engine): # first (oldest) part doesn't move anywhere assert used_disks[0] == "jbod1" - node1.query("SYSTEM START MOVES {}".format(name)) + node1.query(f"SYSTEM START MOVES {name}") - # wait sometime until background backoff finishes + # multiple moves can be assigned in parallel so we can move later parts before the oldest + # we need to wait explicitly until the oldest part is moved retry = 60 i = 0 - while not sum(1 for x in used_disks if x == "jbod1") <= 2 and i < retry: + while get_disk_for_part(node1, name, first_part) != "external" and i < retry: time.sleep(1) - used_disks = get_used_disks_for_table(node1, name) i += 1 - node1.query("SYSTEM START MERGES {}".format(name)) + # first (oldest) part moved to external + assert get_disk_for_part(node1, name, first_part) == "external" + used_disks = get_used_disks_for_table(node1, name) assert sum(1 for x in used_disks if x == "jbod1") <= 2 - # first (oldest) part moved to external - assert used_disks[0] == "external" - + node1.query(f"SYSTEM START MERGES {name}") finally: node1.query_with_retry(f"DROP TABLE IF EXISTS {name} SYNC") From e6422f814418fce9e020e5f32029192e8f6a5dd7 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 5 Jul 2023 11:52:46 +0200 Subject: [PATCH 241/522] Delete comment, rename variable --- src/Interpreters/FilesystemCacheLog.h | 11 +---------- src/Interpreters/InterpreterSystemQuery.cpp | 8 ++++---- src/Parsers/ASTSystemQuery.cpp | 8 ++++---- src/Parsers/ASTSystemQuery.h | 4 ++-- src/Parsers/ParserSystemQuery.cpp | 4 ++-- 5 files changed, 13 insertions(+), 22 deletions(-) diff --git a/src/Interpreters/FilesystemCacheLog.h b/src/Interpreters/FilesystemCacheLog.h index d6dd00e5463..0d088a922e0 100644 --- a/src/Interpreters/FilesystemCacheLog.h +++ b/src/Interpreters/FilesystemCacheLog.h @@ -11,16 +11,7 @@ namespace DB { -/// -/// -------- Column --------- Type ------ -/// | event_date | DateTime | -/// | event_time | UInt64 | -/// | query_id | String | -/// | remote_file_path | String | -/// | segment_range | Tuple | -/// | read_type | String | -/// ------------------------------------- -/// + struct FilesystemCacheLogElement { enum class CacheType diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index e1ff8676bc7..1c2eb66923e 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -370,15 +370,15 @@ BlockIO InterpreterSystemQuery::execute() else { auto cache = FileCacheFactory::instance().getByName(query.filesystem_cache_name).cache; - if (query.delete_key.empty()) + if (query.key_to_drop.empty()) { cache->removeAllReleasable(); } else { - auto key = FileCacheKey::fromKeyString(query.delete_key); - if (query.delete_offset.has_value()) - cache->removeFileSegment(key, query.delete_offset.value()); + auto key = FileCacheKey::fromKeyString(query.key_to_drop); + if (query.offset_to_drop.has_value()) + cache->removeFileSegment(key, query.offset_to_drop.value()); else cache->removeKey(key); } diff --git a/src/Parsers/ASTSystemQuery.cpp b/src/Parsers/ASTSystemQuery.cpp index 9c5e7bff61e..22244a7075c 100644 --- a/src/Parsers/ASTSystemQuery.cpp +++ b/src/Parsers/ASTSystemQuery.cpp @@ -212,11 +212,11 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &, if (!filesystem_cache_name.empty()) { settings.ostr << (settings.hilite ? hilite_none : "") << " " << filesystem_cache_name; - if (!delete_key.empty()) + if (!key_to_drop.empty()) { - settings.ostr << (settings.hilite ? hilite_none : "") << " KEY " << delete_key; - if (delete_offset.has_value()) - settings.ostr << (settings.hilite ? hilite_none : "") << " OFFSET " << delete_offset.value(); + settings.ostr << (settings.hilite ? hilite_none : "") << " KEY " << key_to_drop; + if (offset_to_drop.has_value()) + settings.ostr << (settings.hilite ? hilite_none : "") << " OFFSET " << offset_to_drop.value(); } } } diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h index ebc3e9cd430..6c81162f103 100644 --- a/src/Parsers/ASTSystemQuery.h +++ b/src/Parsers/ASTSystemQuery.h @@ -107,8 +107,8 @@ public: UInt64 seconds{}; String filesystem_cache_name; - std::string delete_key; - std::optional delete_offset; + std::string key_to_drop; + std::optional offset_to_drop; String backup_name; diff --git a/src/Parsers/ParserSystemQuery.cpp b/src/Parsers/ParserSystemQuery.cpp index ef71e994d56..09c86876b48 100644 --- a/src/Parsers/ParserSystemQuery.cpp +++ b/src/Parsers/ParserSystemQuery.cpp @@ -409,9 +409,9 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & res->filesystem_cache_name = ast->as()->value.safeGet(); if (ParserKeyword{"KEY"}.ignore(pos, expected) && ParserIdentifier().parse(pos, ast, expected)) { - res->delete_key = ast->as()->name(); + res->key_to_drop = ast->as()->name(); if (ParserKeyword{"OFFSET"}.ignore(pos, expected) && ParserLiteral().parse(pos, ast, expected)) - res->delete_offset = ast->as()->value.safeGet(); + res->offset_to_drop = ast->as()->value.safeGet(); } } if (!parseQueryWithOnCluster(res, pos, expected)) From 7c3c48c8c44a1c53902dd24d540e25e2634a986b Mon Sep 17 00:00:00 2001 From: flynn Date: Wed, 5 Jul 2023 11:20:03 +0000 Subject: [PATCH 242/522] Fix use_structure_from_insertion_table_in_table_functions does not work for materialized and alias columns --- src/Interpreters/Context.cpp | 6 +++++- .../0_stateless/02811_insert_schema_inference.reference | 0 .../0_stateless/02811_insert_schema_inference.sql | 9 +++++++++ 3 files changed, 14 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02811_insert_schema_inference.reference create mode 100644 tests/queries/0_stateless/02811_insert_schema_inference.sql diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 7482450d529..a0abab349b3 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -1519,7 +1519,11 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const uint64_t use_structure_from_insertion_table_in_table_functions = getSettingsRef().use_structure_from_insertion_table_in_table_functions; if (use_structure_from_insertion_table_in_table_functions && table_function_ptr->needStructureHint() && hasInsertionTable()) { - const auto & insert_structure = DatabaseCatalog::instance().getTable(getInsertionTable(), shared_from_this())->getInMemoryMetadataPtr()->getColumns(); + const auto & insert_structure = DatabaseCatalog::instance() + .getTable(getInsertionTable(), shared_from_this()) + ->getInMemoryMetadataPtr() + ->getColumns() + .getInsertable(); DB::ColumnsDescription structure_hint; bool use_columns_from_insert_query = true; diff --git a/tests/queries/0_stateless/02811_insert_schema_inference.reference b/tests/queries/0_stateless/02811_insert_schema_inference.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02811_insert_schema_inference.sql b/tests/queries/0_stateless/02811_insert_schema_inference.sql new file mode 100644 index 00000000000..9de710047f7 --- /dev/null +++ b/tests/queries/0_stateless/02811_insert_schema_inference.sql @@ -0,0 +1,9 @@ +drop table if exists test; +create table test +( + n1 UInt32, + n2 UInt32 alias murmurHash3_32(n1), + n3 UInt32 materialized n2 + 1 +)engine=MergeTree order by n1; +insert into test select * from generateRandom() limit 10; +drop table test; From cf809c25cd0052b1a7d51aea8d5179a1c9b741d2 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 5 Jul 2023 11:24:29 +0000 Subject: [PATCH 243/522] fix CLEAR COLUMN query --- src/Storages/MergeTree/MutateTask.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index f23ef82fca8..1346d5937f7 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -301,14 +301,16 @@ getColumnsForNewDataPart( auto it = renamed_columns_from_to.find(name); auto new_name = it == renamed_columns_from_to.end() ? name : it->second; - if (!storage_columns_set.contains(new_name)) + /// Column can be removed only in this data part by CLEAR COLUMN query. + if (!storage_columns_set.contains(new_name) || removed_columns.contains(new_name)) continue; /// In compact part we read all columns and all of them are in @updated_header. /// But in wide part we must keep serialization infos for columns that are not touched by mutation. - if (!updated_header.has(new_name) && isWidePart(source_part)) + if (!updated_header.has(new_name)) { - new_serialization_infos.emplace(new_name, old_info); + if (isWidePart(source_part)) + new_serialization_infos.emplace(new_name, old_info); continue; } From 86014a60a308ec41c7416bdbbfe6b360dcf1617b Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Wed, 5 Jul 2023 11:42:02 +0000 Subject: [PATCH 244/522] Fixed case with spaces before delimiter --- src/Processors/Formats/Impl/CSVRowInputFormat.cpp | 1 + tests/queries/0_stateless/00301_csv.sh | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp index edbc33fb3c3..9731b4ba465 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp @@ -341,6 +341,7 @@ bool CSVFormatReader::readField( if (is_last_file_column && format_settings.csv.ignore_extra_columns) { // Skip all fields to next line. + skipWhitespacesAndTabs(*buf, format_settings.csv.allow_whitespace_or_tab_as_delimiter); while (checkChar(format_settings.csv.delimiter, *buf)) { skipField(); diff --git a/tests/queries/0_stateless/00301_csv.sh b/tests/queries/0_stateless/00301_csv.sh index dc354433af9..7657745e9f7 100755 --- a/tests/queries/0_stateless/00301_csv.sh +++ b/tests/queries/0_stateless/00301_csv.sh @@ -49,7 +49,7 @@ echo '"Hello", 1, "String1" "Hello", 3, "String3", "2016-01-13" "Hello", 4, , "2016-01-14" "Hello", 5, "String5", "2016-01-15", "2016-01-16" -"Hello", 6, "String6", "line with a +"Hello", 6, "String6" , "line with a break"' | $CLICKHOUSE_CLIENT --input_format_defaults_for_omitted_fields=1 --input_format_csv_empty_as_default=1 --input_format_csv_ignore_extra_columns=1 --query="INSERT INTO csv FORMAT CSV"; $CLICKHOUSE_CLIENT --query="SELECT * FROM csv ORDER BY s, n"; $CLICKHOUSE_CLIENT --query="DROP TABLE csv"; From 7cb7e138c13406f05d733323141649ae13a7f615 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 5 Jul 2023 14:16:46 +0200 Subject: [PATCH 245/522] Update --- .github/workflows/master.yml | 4 ++-- .github/workflows/pull_request.yml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 6996221e1aa..0fbcb95fc12 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -851,8 +851,8 @@ jobs: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderBinRISCV64: - needs: [ DockerHubPush ] - runs-on: [ self-hosted, builder ] + needs: [DockerHubPush] + runs-on: [self-hosted, builder] steps: - name: Set envs run: | diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index fe7c3bba410..f898e764915 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -912,8 +912,8 @@ jobs: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" BuilderBinRISCV64: - needs: [ DockerHubPush, FastTest, StyleCheck ] - runs-on: [ self-hosted, builder ] + needs: [DockerHubPush, FastTest, StyleCheck] + runs-on: [self-hosted, builder] steps: - name: Set envs run: | From e957600d5c287c52f93d0f631587852ad0869035 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Wed, 5 Jul 2023 12:28:27 +0000 Subject: [PATCH 246/522] wip --- src/Parsers/ASTColumnDeclaration.cpp | 5 ++ src/Parsers/ASTColumnDeclaration.h | 1 + src/Parsers/ASTCreateQuery.h | 3 +- src/Parsers/ParserCreateQuery.cpp | 26 +++++- src/Parsers/ParserCreateQuery.h | 11 ++- .../02811_primary_key_in_columns.sql | 83 +++++++++++++++++++ 6 files changed, 126 insertions(+), 3 deletions(-) create mode 100644 tests/queries/0_stateless/02811_primary_key_in_columns.sql diff --git a/src/Parsers/ASTColumnDeclaration.cpp b/src/Parsers/ASTColumnDeclaration.cpp index c2396708a73..12d000d5e9f 100644 --- a/src/Parsers/ASTColumnDeclaration.cpp +++ b/src/Parsers/ASTColumnDeclaration.cpp @@ -44,6 +44,7 @@ ASTPtr ASTColumnDeclaration::clone() const res->ttl = ttl->clone(); res->children.push_back(res->ttl); } + if (collation) { res->collation = collation->clone(); @@ -76,6 +77,10 @@ void ASTColumnDeclaration::formatImpl(const FormatSettings & settings, FormatSta << (*null_modifier ? "" : "NOT ") << "NULL" << (settings.hilite ? hilite_none : ""); } + if (primary_key_specifier) + settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") + << "PRIMARY KEY" << (settings.hilite ? hilite_none : ""); + if (default_expression) { settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << default_specifier << (settings.hilite ? hilite_none : ""); diff --git a/src/Parsers/ASTColumnDeclaration.h b/src/Parsers/ASTColumnDeclaration.h index 45814551db8..9d486667911 100644 --- a/src/Parsers/ASTColumnDeclaration.h +++ b/src/Parsers/ASTColumnDeclaration.h @@ -21,6 +21,7 @@ public: ASTPtr codec; ASTPtr ttl; ASTPtr collation; + bool primary_key_specifier = false; String getID(char delim) const override { return "ColumnDeclaration" + (delim + name); } diff --git a/src/Parsers/ASTCreateQuery.h b/src/Parsers/ASTCreateQuery.h index 230996f610e..ae45a244a03 100644 --- a/src/Parsers/ASTCreateQuery.h +++ b/src/Parsers/ASTCreateQuery.h @@ -56,6 +56,7 @@ public: ASTExpressionList * constraints = nullptr; ASTExpressionList * projections = nullptr; IAST * primary_key = nullptr; + IAST * primary_key_from_columns = nullptr; String getID(char) const override { return "Columns definition"; } @@ -76,7 +77,7 @@ public: f(reinterpret_cast(&primary_key)); f(reinterpret_cast(&constraints)); f(reinterpret_cast(&projections)); - f(reinterpret_cast(&primary_key)); + f(reinterpret_cast(&primary_key_from_columns)); } }; diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index adf3513ba40..1941bafab0d 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -300,11 +300,21 @@ bool ParserTablePropertiesDeclarationList::parseImpl(Pos & pos, ASTPtr & node, E ASTPtr constraints = std::make_shared(); ASTPtr projections = std::make_shared(); ASTPtr primary_key; + ASTPtr primary_key_from_columns; for (const auto & elem : list->children) { - if (elem->as()) + if (auto *cd = elem->as()) + { + if(cd->primary_key_specifier) + { + if(!primary_key_from_columns) + primary_key_from_columns = makeASTFunction("tuple"); + auto column_identifier = std::make_shared(cd->name); + primary_key_from_columns->children.push_back(column_identifier); + } columns->children.push_back(elem); + } else if (elem->as()) indices->children.push_back(elem); else if (elem->as()) @@ -336,6 +346,8 @@ bool ParserTablePropertiesDeclarationList::parseImpl(Pos & pos, ASTPtr & node, E res->set(res->projections, projections); if (primary_key) res->set(res->primary_key, primary_key); + if (primary_key_from_columns) + res->set(res->primary_key_from_columns, primary_key_from_columns); node = res; @@ -599,6 +611,7 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe /// List of columns. if (s_lparen.ignore(pos, expected)) { + /// Columns and all table properties (indices, constraints, projections, primary_key) if (!table_properties_p.parse(pos, columns_list, expected)) return false; @@ -699,6 +712,17 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe query->storage->primary_key = query->columns_list->primary_key; } + if (query->columns_list && (query->columns_list->primary_key_from_columns)) + { + /// If engine is not set will use default one + if (!query->storage) + query->set(query->storage, std::make_shared()); + else if (query->storage->primary_key) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Multiple primary keys are not allowed."); + + query->storage->primary_key = query->columns_list->primary_key_from_columns; + } + tryGetIdentifierNameInto(as_database, query->as_database); tryGetIdentifierNameInto(as_table, query->as_table); query->set(query->select, select); diff --git a/src/Parsers/ParserCreateQuery.h b/src/Parsers/ParserCreateQuery.h index 5f79a4b68f6..09935e2b608 100644 --- a/src/Parsers/ParserCreateQuery.h +++ b/src/Parsers/ParserCreateQuery.h @@ -135,6 +135,7 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E ParserKeyword s_remove{"REMOVE"}; ParserKeyword s_type{"TYPE"}; ParserKeyword s_collate{"COLLATE"}; + ParserKeyword s_primary_key{"PRIMARY KEY"}; ParserExpression expr_parser; ParserStringLiteral string_literal_parser; ParserLiteral literal_parser; @@ -177,6 +178,7 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E ASTPtr codec_expression; ASTPtr ttl_expression; ASTPtr collation_expression; + bool primary_key_specifier = false; auto null_check_without_moving = [&]() -> bool { @@ -198,6 +200,7 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E && !s_ephemeral.checkWithoutMoving(pos, expected) && !s_alias.checkWithoutMoving(pos, expected) && !s_auto_increment.checkWithoutMoving(pos, expected) + && !s_primary_key.checkWithoutMoving(pos, expected) && (require_type || (!s_comment.checkWithoutMoving(pos, expected) && !s_codec.checkWithoutMoving(pos, expected)))) @@ -266,7 +269,6 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E ParserDataType().parse(tmp_pos, type, tmp_expected); } } - /// This will rule out unusual expressions like *, t.* that cannot appear in DEFAULT if (default_expression && !dynamic_cast(default_expression.get())) return false; @@ -305,6 +307,11 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E return false; } + if (s_primary_key.ignore(pos, expected)) + { + primary_key_specifier = true; + } + node = column_declaration; if (type) @@ -346,6 +353,8 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E column_declaration->children.push_back(std::move(collation_expression)); } + column_declaration->primary_key_specifier = primary_key_specifier; + return true; } diff --git a/tests/queries/0_stateless/02811_primary_key_in_columns.sql b/tests/queries/0_stateless/02811_primary_key_in_columns.sql new file mode 100644 index 00000000000..df25fdd14ab --- /dev/null +++ b/tests/queries/0_stateless/02811_primary_key_in_columns.sql @@ -0,0 +1,83 @@ +DROP TABLE IF EXISTS pk_test1; +DROP TABLE IF EXISTS pk_test2; +DROP TABLE IF EXISTS pk_test3; +DROP TABLE IF EXISTS pk_test4; +DROP TABLE IF EXISTS pk_test5; +DROP TABLE IF EXISTS pk_test6; +DROP TABLE IF EXISTS pk_test7; +DROP TABLE IF EXISTS pk_test8; +DROP TABLE IF EXISTS pk_test9; +DROP TABLE IF EXISTS pk_test10; +DROP TABLE IF EXISTS pk_test11; +DROP TABLE IF EXISTS pk_test12; +DROP TABLE IF EXISTS pk_test12; +DROP TABLE IF EXISTS pk_test13; +DROP TABLE IF EXISTS pk_test14; +DROP TABLE IF EXISTS pk_test15; +DROP TABLE IF EXISTS pk_test16; +DROP TABLE IF EXISTS pk_test17; +DROP TABLE IF EXISTS pk_test18; +DROP TABLE IF EXISTS pk_test19; +DROP TABLE IF EXISTS pk_test20; +DROP TABLE IF EXISTS pk_test21; +DROP TABLE IF EXISTS pk_test22; +DROP TABLE IF EXISTS pk_test23; + +SET default_table_engine=MergeTree; + +CREATE TABLE pk_test1 (String a PRIMARY KEY, String b, String c); +CREATE TABLE pk_test2 (String a PRIMARY KEY, String b PRIMARY KEY, String c); +CREATE TABLE pk_test3 (String a PRIMARY KEY, String b PRIMARY KEY, String c PRIMARY KEY); + +CREATE TABLE pk_test4 (String a, String b PRIMARY KEY, String c PRIMARY KEY); +CREATE TABLE pk_test5 (String a, String b PRIMARY KEY, String c); +CREATE TABLE pk_test6 (String a, String b, String c PRIMARY KEY); + +CREATE TABLE pk_test7 (String a PRIMARY KEY, String b, String c, PRIMARY KEY (a)); +CREATE TABLE pk_test8 (String a PRIMARY KEY, String b PRIMARY KEY, String c, PRIMARY KEY (a)); +CREATE TABLE pk_test9 (String a PRIMARY KEY, String b PRIMARY KEY, String c PRIMARY KEY, PRIMARY KEY (a)); + +CREATE TABLE pk_test10 (String a, String b PRIMARY KEY, String c PRIMARY KEY, PRIMARY KEY (a)); +CREATE TABLE pk_test11 (String a, String b PRIMARY KEY, String c, PRIMARY KEY (a)); +CREATE TABLE pk_test12 (String a, String b, String c PRIMARY KEY, PRIMARY KEY (a)); + +CREATE TABLE pk_test12 (String a PRIMARY KEY, String b, String c) PRIMARY KEY (a,b,c); +CREATE TABLE pk_test13 (String a PRIMARY KEY, String b PRIMARY KEY, String c) PRIMARY KEY (a,b,c); +CREATE TABLE pk_test14 (String a PRIMARY KEY, String b PRIMARY KEY, String c PRIMARY KEY) PRIMARY KEY (a,b,c); + +CREATE TABLE pk_test15 (String a, String b PRIMARY KEY, String c PRIMARY KEY) PRIMARY KEY (a,b,c); +CREATE TABLE pk_test16 (String a, String b PRIMARY KEY, String c) PRIMARY KEY (a,b,c); +CREATE TABLE pk_test17 (String a, String b, String c PRIMARY KEY) PRIMARY KEY (a,b,c); + +CREATE TABLE pk_test18 (String a PRIMARY KEY, String b, String c) ORDER BY (a,b,c); +CREATE TABLE pk_test19 (String a PRIMARY KEY, String b PRIMARY KEY, String c) ORDER BY (a,b,c); +CREATE TABLE pk_test20 (String a PRIMARY KEY, String b PRIMARY KEY, String c PRIMARY KEY) ORDER BY (a,b,c); + +CREATE TABLE pk_test21 (String a, String b PRIMARY KEY, String c PRIMARY KEY) ORDER BY (a,b,c); +CREATE TABLE pk_test22 (String a, String b PRIMARY KEY, String c) ORDER BY (a,b,c); +CREATE TABLE pk_test23 (String a, String b, String c PRIMARY KEY) ORDER BY (a,b,c); + +DROP TABLE IF EXISTS pk_test1; +DROP TABLE IF EXISTS pk_test2; +DROP TABLE IF EXISTS pk_test3; +DROP TABLE IF EXISTS pk_test4; +DROP TABLE IF EXISTS pk_test5; +DROP TABLE IF EXISTS pk_test6; +DROP TABLE IF EXISTS pk_test7; +DROP TABLE IF EXISTS pk_test8; +DROP TABLE IF EXISTS pk_test9; +DROP TABLE IF EXISTS pk_test10; +DROP TABLE IF EXISTS pk_test11; +DROP TABLE IF EXISTS pk_test12; +DROP TABLE IF EXISTS pk_test12; +DROP TABLE IF EXISTS pk_test13; +DROP TABLE IF EXISTS pk_test14; +DROP TABLE IF EXISTS pk_test15; +DROP TABLE IF EXISTS pk_test16; +DROP TABLE IF EXISTS pk_test17; +DROP TABLE IF EXISTS pk_test18; +DROP TABLE IF EXISTS pk_test19; +DROP TABLE IF EXISTS pk_test20; +DROP TABLE IF EXISTS pk_test21; +DROP TABLE IF EXISTS pk_test22; +DROP TABLE IF EXISTS pk_test23; \ No newline at end of file From 5512c307db1d43b5902e00ec13fd007e0882a82c Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Fri, 30 Jun 2023 13:23:23 +0200 Subject: [PATCH 247/522] system.jemalloc_bins table --- src/CMakeLists.txt | 5 +- src/Storages/System/StorageSystemJemalloc.cpp | 125 ++++++++++++++++++ src/Storages/System/StorageSystemJemalloc.h | 34 +++++ src/Storages/System/attachSystemTables.cpp | 3 + 4 files changed, 164 insertions(+), 3 deletions(-) create mode 100644 src/Storages/System/StorageSystemJemalloc.cpp create mode 100644 src/Storages/System/StorageSystemJemalloc.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index ca428fbff3a..f870993f080 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -206,11 +206,10 @@ add_library (clickhouse_new_delete STATIC Common/new_delete.cpp) target_link_libraries (clickhouse_new_delete PRIVATE clickhouse_common_io) if (TARGET ch_contrib::jemalloc) target_link_libraries (clickhouse_new_delete PRIVATE ch_contrib::jemalloc) + target_link_libraries (clickhouse_common_io PRIVATE ch_contrib::jemalloc) + target_link_libraries (clickhouse_storages_system PRIVATE ch_contrib::jemalloc) endif() -if (TARGET ch_contrib::jemalloc) - target_link_libraries (clickhouse_common_io PRIVATE ch_contrib::jemalloc) -endif() target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::sparsehash) add_subdirectory(Access/Common) diff --git a/src/Storages/System/StorageSystemJemalloc.cpp b/src/Storages/System/StorageSystemJemalloc.cpp new file mode 100644 index 00000000000..2cb666eb5c3 --- /dev/null +++ b/src/Storages/System/StorageSystemJemalloc.cpp @@ -0,0 +1,125 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "config.h" + +#if USE_JEMALLOC +# include +#endif + + +namespace DB +{ + +#if USE_JEMALLOC + +UInt64 getJeMallocValue(const char * name) +{ + UInt64 value{}; + size_t size = sizeof(value); + mallctl(name, &value, &size, nullptr, 0); + return value; +} + +void fillJemallocBins(MutableColumns & res_columns) +{ + /// Bins for small allocations + auto small_bins_count = getJeMallocValue("arenas.nbins"); + UInt16 bin_index = 0; + for (UInt64 bin = 0; bin < small_bins_count; ++bin, ++bin_index) + { + auto size = getJeMallocValue(fmt::format("arenas.bin.{}.size", bin).c_str()); + auto ndalloc = getJeMallocValue(fmt::format("stats.arenas.{}.bins.{}.ndalloc", MALLCTL_ARENAS_ALL, bin).c_str()); + auto nmalloc = getJeMallocValue(fmt::format("stats.arenas.{}.bins.{}.nmalloc", MALLCTL_ARENAS_ALL, bin).c_str()); + + size_t col_num = 0; + res_columns.at(col_num++)->insert(bin_index); + res_columns.at(col_num++)->insert(0); + res_columns.at(col_num++)->insert(size); + res_columns.at(col_num++)->insert(nmalloc); + res_columns.at(col_num++)->insert(ndalloc); + } + + /// Bins for large allocations + auto large_bins_count = getJeMallocValue("arenas.nlextents"); + for (UInt64 bin = 0; bin < large_bins_count; ++bin, ++bin_index) + { + auto size = getJeMallocValue(fmt::format("arenas.lextent.{}.size", bin).c_str()); + auto ndalloc = getJeMallocValue(fmt::format("stats.arenas.{}.lextents.{}.ndalloc", MALLCTL_ARENAS_ALL, bin).c_str()); + auto nmalloc = getJeMallocValue(fmt::format("stats.arenas.{}.lextents.{}.nmalloc", MALLCTL_ARENAS_ALL, bin).c_str()); + + size_t col_num = 0; + res_columns.at(col_num++)->insert(bin_index); + res_columns.at(col_num++)->insert(1); + res_columns.at(col_num++)->insert(size); + res_columns.at(col_num++)->insert(nmalloc); + res_columns.at(col_num++)->insert(ndalloc); + } +} + +#else + +void fillJemallocBins(MutableColumns &) +{ + LOG_INFO(&Poco::Logger::get("StorageSystemJemallocBins"), "jemalloc is not enabled"); +} + +#endif // USE_JEMALLOC + + +StorageSystemJemallocBins::StorageSystemJemallocBins(const StorageID & table_id_) + : IStorage(table_id_) +{ + StorageInMemoryMetadata storage_metadata; + ColumnsDescription desc; + auto columns = getNamesAndTypes(); + for (const auto & col : columns) + { + ColumnDescription col_desc(col.name, col.type); + desc.add(col_desc); + } + storage_metadata.setColumns(desc); + setInMemoryMetadata(storage_metadata); +} + +NamesAndTypesList StorageSystemJemallocBins::getNamesAndTypes() +{ + return { + { "index", std::make_shared() }, + { "large", std::make_shared() }, + { "size", std::make_shared() }, + { "nmalloc", std::make_shared() }, + { "ndalloc", std::make_shared() }, + }; +} + +Pipe StorageSystemJemallocBins::read( + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo &, + ContextPtr /*context*/, + QueryProcessingStage::Enum /*processed_stage*/, + const size_t /*max_block_size*/, + const size_t /*num_streams*/) +{ + storage_snapshot->check(column_names); + + auto header = storage_snapshot->metadata->getSampleBlockWithVirtuals(getVirtuals()); + MutableColumns res_columns = header.cloneEmptyColumns(); + + fillJemallocBins(res_columns); + + UInt64 num_rows = res_columns.at(0)->size(); + Chunk chunk(std::move(res_columns), num_rows); + + return Pipe(std::make_shared(std::move(header), std::move(chunk))); +} + +} diff --git a/src/Storages/System/StorageSystemJemalloc.h b/src/Storages/System/StorageSystemJemalloc.h new file mode 100644 index 00000000000..a4ac2fbcdcb --- /dev/null +++ b/src/Storages/System/StorageSystemJemalloc.h @@ -0,0 +1,34 @@ +#pragma once + +#include + + +namespace DB +{ + +class Context; + +class StorageSystemJemallocBins final : public IStorage +{ +public: + explicit StorageSystemJemallocBins(const StorageID & table_id_); + + std::string getName() const override { return "SystemJemallocBins"; } + + static NamesAndTypesList getNamesAndTypes(); + + Pipe read( + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + size_t num_streams) override; + + bool isSystemStorage() const override { return true; } + + bool supportsTransactions() const override { return true; } +}; + +} diff --git a/src/Storages/System/attachSystemTables.cpp b/src/Storages/System/attachSystemTables.cpp index 7d21d9e39d2..a9873c821ce 100644 --- a/src/Storages/System/attachSystemTables.cpp +++ b/src/Storages/System/attachSystemTables.cpp @@ -1,3 +1,4 @@ +#include "Storages/System/StorageSystemJemalloc.h" #include "config.h" #include @@ -82,6 +83,7 @@ #include #include #include +#include #ifdef OS_LINUX #include @@ -187,6 +189,7 @@ void attachSystemTablesServer(ContextPtr context, IDatabase & system_database, b attach(context, system_database, "certificates"); attach(context, system_database, "named_collections"); attach(context, system_database, "user_processes"); + attach(context, system_database, "jemalloc_bins"); if (has_zookeeper) { From ff0197543e568125c7f3c75f4930d750d741ff6d Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Mon, 3 Jul 2023 22:39:42 +0200 Subject: [PATCH 248/522] Basic test that stats are non-zero --- .../02810_system_jemalloc_bins.reference | 1 + .../0_stateless/02810_system_jemalloc_bins.sql | 13 +++++++++++++ 2 files changed, 14 insertions(+) create mode 100644 tests/queries/0_stateless/02810_system_jemalloc_bins.reference create mode 100644 tests/queries/0_stateless/02810_system_jemalloc_bins.sql diff --git a/tests/queries/0_stateless/02810_system_jemalloc_bins.reference b/tests/queries/0_stateless/02810_system_jemalloc_bins.reference new file mode 100644 index 00000000000..50d4d226b46 --- /dev/null +++ b/tests/queries/0_stateless/02810_system_jemalloc_bins.reference @@ -0,0 +1 @@ +1 1 1 1 1 diff --git a/tests/queries/0_stateless/02810_system_jemalloc_bins.sql b/tests/queries/0_stateless/02810_system_jemalloc_bins.sql new file mode 100644 index 00000000000..8ecf47e51b5 --- /dev/null +++ b/tests/queries/0_stateless/02810_system_jemalloc_bins.sql @@ -0,0 +1,13 @@ +WITH + (SELECT value IN ('ON', '1') FROM system.build_options WHERE name = 'USE_JEMALLOC') AS jemalloc_enabled, + (SELECT count() FROM system.jemalloc_bins) AS total_bins, + (SELECT count() FROM system.jemalloc_bins WHERE large) AS large_bins, + (SELECT count() FROM system.jemalloc_bins WHERE NOT large) AS small_bins, + (SELECT sum(size * (nmalloc - ndalloc)) FROM system.jemalloc_bins WHERE large) AS large_allocated_bytes, + (SELECT sum(size * (nmalloc - ndalloc)) FROM system.jemalloc_bins WHERE NOT large) AS small_allocated_bytes +SELECT + (total_bins > 0) = jemalloc_enabled, + (large_bins > 0) = jemalloc_enabled, + (small_bins > 0) = jemalloc_enabled, + (large_allocated_bytes > 0) = jemalloc_enabled, + (small_allocated_bytes > 0) = jemalloc_enabled; From bb422b816894769860a60579aea04454f8f1c496 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Mon, 3 Jul 2023 23:23:06 +0200 Subject: [PATCH 249/522] Added doc --- .../operations/system-tables/jemalloc_bins.md | 45 +++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 docs/en/operations/system-tables/jemalloc_bins.md diff --git a/docs/en/operations/system-tables/jemalloc_bins.md b/docs/en/operations/system-tables/jemalloc_bins.md new file mode 100644 index 00000000000..dfe2ddb01e2 --- /dev/null +++ b/docs/en/operations/system-tables/jemalloc_bins.md @@ -0,0 +1,45 @@ +--- +slug: /en/operations/system-tables/jemalloc_bins +--- +# jemalloc_bins + +Contains information about memory allocations done via jemalloc allocator in different size classes (bins) aggregated from all arenas. +These statistics might not be absolutely accurate because of thread local caching in jemalloc. + +Columns: + +- `index` (UInt64) — Index of the bin ordered by size +- `large` (Bool) — True for large allocations and False for small +- `size` (UInt64) — Size of allocations in this bin +- `nmalloc` (UInt64) — Number of allocations +- `ndalloc` (UInt64) — Number of deallocations + +**Example** + +Find the sizes of allocations that contributed the most to the current overall memory usage. + +``` sql +SELECT + *, + nmalloc - ndalloc AS active_allocations, + size * active_allocations AS allocated_bytes +FROM system.jemalloc_bins +WHERE allocated_bytes > 0 +ORDER BY allocated_bytes DESC +LIMIT 10 +``` + +``` text +┌─index─┬─large─┬─────size─┬──nmalloc─┬──ndalloc─┬─active_allocations─┬─allocated_bytes─┐ +│ 82 │ 1 │ 50331648 │ 1 │ 0 │ 1 │ 50331648 │ +│ 10 │ 0 │ 192 │ 512336 │ 370710 │ 141626 │ 27192192 │ +│ 69 │ 1 │ 5242880 │ 6 │ 2 │ 4 │ 20971520 │ +│ 3 │ 0 │ 48 │ 16938224 │ 16559484 │ 378740 │ 18179520 │ +│ 28 │ 0 │ 4096 │ 122924 │ 119142 │ 3782 │ 15491072 │ +│ 61 │ 1 │ 1310720 │ 44569 │ 44558 │ 11 │ 14417920 │ +│ 39 │ 1 │ 28672 │ 1285 │ 913 │ 372 │ 10665984 │ +│ 4 │ 0 │ 64 │ 2837225 │ 2680568 │ 156657 │ 10026048 │ +│ 6 │ 0 │ 96 │ 2617803 │ 2531435 │ 86368 │ 8291328 │ +│ 36 │ 1 │ 16384 │ 22431 │ 21970 │ 461 │ 7553024 │ +└───────┴───────┴──────────┴──────────┴──────────┴────────────────────┴─────────────────┘ +``` From 8d5ddcbd3094182b44b3641f11acf6ba788faaf7 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 5 Jul 2023 15:40:06 +0200 Subject: [PATCH 250/522] Remove coverity --- .github/workflows/nightly.yml | 45 ----------------------------------- 1 file changed, 45 deletions(-) diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index cf61012f2bc..9de0444bd83 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -75,51 +75,6 @@ jobs: Codebrowser: needs: [DockerHubPush] uses: ./.github/workflows/woboq.yml - BuilderCoverity: - needs: DockerHubPush - runs-on: [self-hosted, builder] - steps: - - name: Set envs - run: | - cat >> "$GITHUB_ENV" << 'EOF' - BUILD_NAME=coverity - CACHES_PATH=${{runner.temp}}/../ccaches - IMAGES_PATH=${{runner.temp}}/images_path - REPO_COPY=${{runner.temp}}/build_check/ClickHouse - TEMP_PATH=${{runner.temp}}/build_check - EOF - echo "COVERITY_TOKEN=${{ secrets.COVERITY_TOKEN }}" >> "$GITHUB_ENV" - - name: Download changed images - uses: actions/download-artifact@v3 - with: - name: changed_images - path: ${{ env.IMAGES_PATH }} - - name: Check out repository code - uses: ClickHouse/checkout@v1 - with: - clear-repository: true - submodules: true - - name: Build - run: | - sudo rm -fr "$TEMP_PATH" - mkdir -p "$TEMP_PATH" - cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - - name: Upload Coverity Analysis - if: ${{ success() || failure() }} - run: | - curl --form token="${COVERITY_TOKEN}" \ - --form email='security+coverity@clickhouse.com' \ - --form file="@$TEMP_PATH/$BUILD_NAME/coverity-scan.tar.gz" \ - --form version="${GITHUB_REF#refs/heads/}-${GITHUB_SHA::6}" \ - --form description="Nighly Scan: $(date +'%Y-%m-%dT%H:%M:%S')" \ - https://scan.coverity.com/builds?project=ClickHouse%2FClickHouse - - name: Cleanup - if: always() - run: | - docker ps --quiet | xargs --no-run-if-empty docker kill ||: - docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: - sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" SonarCloud: runs-on: [self-hosted, builder] env: From 1da413e64eaa092b2ab685253f4cb32a93dcc53e Mon Sep 17 00:00:00 2001 From: Duc Canh Le Date: Wed, 5 Jul 2023 14:56:11 +0000 Subject: [PATCH 251/522] fix segfault when create invalid EmbeddedRocksdb table --- src/Storages/checkAndGetLiteralArgument.cpp | 10 +++++++++- .../02811_invalid_embedded_rocksdb_create.reference | 0 .../02811_invalid_embedded_rocksdb_create.sql | 1 + 3 files changed, 10 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02811_invalid_embedded_rocksdb_create.reference create mode 100644 tests/queries/0_stateless/02811_invalid_embedded_rocksdb_create.sql diff --git a/src/Storages/checkAndGetLiteralArgument.cpp b/src/Storages/checkAndGetLiteralArgument.cpp index 1aa942548a7..78ec1e55b64 100644 --- a/src/Storages/checkAndGetLiteralArgument.cpp +++ b/src/Storages/checkAndGetLiteralArgument.cpp @@ -12,7 +12,15 @@ namespace ErrorCodes template T checkAndGetLiteralArgument(const ASTPtr & arg, const String & arg_name) { - return checkAndGetLiteralArgument(*arg->as(), arg_name); + if (arg->as()) + return checkAndGetLiteralArgument(*arg->as(), arg_name); + + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Argument '{}' must be a literal, get {} (value: {})", + arg_name, + arg->getID(), + arg->formatForErrorMessage()); } template diff --git a/tests/queries/0_stateless/02811_invalid_embedded_rocksdb_create.reference b/tests/queries/0_stateless/02811_invalid_embedded_rocksdb_create.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02811_invalid_embedded_rocksdb_create.sql b/tests/queries/0_stateless/02811_invalid_embedded_rocksdb_create.sql new file mode 100644 index 00000000000..aac2652fbfa --- /dev/null +++ b/tests/queries/0_stateless/02811_invalid_embedded_rocksdb_create.sql @@ -0,0 +1 @@ +CREATE TABLE dict (`k` String, `v` String) ENGINE = EmbeddedRocksDB(k) PRIMARY KEY k; -- {serverError 36} \ No newline at end of file From 8f1ed5c90de4ada3764ea6384220459359eb7950 Mon Sep 17 00:00:00 2001 From: Duc Canh Le Date: Wed, 5 Jul 2023 15:04:38 +0000 Subject: [PATCH 252/522] add more check + line break --- src/Storages/checkAndGetLiteralArgument.cpp | 6 +++--- .../0_stateless/02811_invalid_embedded_rocksdb_create.sql | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Storages/checkAndGetLiteralArgument.cpp b/src/Storages/checkAndGetLiteralArgument.cpp index 78ec1e55b64..5baf47fe91a 100644 --- a/src/Storages/checkAndGetLiteralArgument.cpp +++ b/src/Storages/checkAndGetLiteralArgument.cpp @@ -12,15 +12,15 @@ namespace ErrorCodes template T checkAndGetLiteralArgument(const ASTPtr & arg, const String & arg_name) { - if (arg->as()) + if (arg && arg->as()) return checkAndGetLiteralArgument(*arg->as(), arg_name); throw Exception( ErrorCodes::BAD_ARGUMENTS, "Argument '{}' must be a literal, get {} (value: {})", arg_name, - arg->getID(), - arg->formatForErrorMessage()); + arg ? arg->getID() : "NULL", + arg ? arg->formatForErrorMessage() : "NULL"); } template diff --git a/tests/queries/0_stateless/02811_invalid_embedded_rocksdb_create.sql b/tests/queries/0_stateless/02811_invalid_embedded_rocksdb_create.sql index aac2652fbfa..bfe4ee0622e 100644 --- a/tests/queries/0_stateless/02811_invalid_embedded_rocksdb_create.sql +++ b/tests/queries/0_stateless/02811_invalid_embedded_rocksdb_create.sql @@ -1 +1 @@ -CREATE TABLE dict (`k` String, `v` String) ENGINE = EmbeddedRocksDB(k) PRIMARY KEY k; -- {serverError 36} \ No newline at end of file +CREATE TABLE dict (`k` String, `v` String) ENGINE = EmbeddedRocksDB(k) PRIMARY KEY k; -- {serverError 36} From 32e0348caa6ee34d1f631fceffbc6a93b09953d2 Mon Sep 17 00:00:00 2001 From: Ilya Yatsishin <2159081+qoega@users.noreply.github.com> Date: Wed, 5 Jul 2023 17:12:56 +0200 Subject: [PATCH 253/522] Revert "Publish changes" This reverts commit ed6bfd66fee9aa4c2e06eaf25ca81d02e09f075e. --- docker/packager/binary/build.sh | 4 ---- docker/packager/packager | 1 - 2 files changed, 5 deletions(-) diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh index 08a9b07f3ce..c0803c74147 100755 --- a/docker/packager/binary/build.sh +++ b/docker/packager/binary/build.sh @@ -15,10 +15,6 @@ if [ "$EXTRACT_TOOLCHAIN_DARWIN" = "1" ]; then mkdir -p /build/cmake/toolchain/darwin-x86_64 tar xJf /MacOSX11.0.sdk.tar.xz -C /build/cmake/toolchain/darwin-x86_64 --strip-components=1 ln -sf darwin-x86_64 /build/cmake/toolchain/darwin-aarch64 - - if [ "$EXPORT_SOURCES_WITH_SUBMODULES" = "1" ]; then - tar -c /build --exclude-vcs-ignores --exclude-vcs --exclude '/build/build' --exclude '/build/build_docker' --exclude '/build/debian' --exclude '/build/.cache' --exclude '/build/docs' --exclude '/build/tests/integration' | pigz -9 > /output/source_sub.tar.gz - fi fi # Uncomment to debug ccache. Don't put ccache log in /output right away, or it diff --git a/docker/packager/packager b/docker/packager/packager index 42dc52aa37f..1b3df858cd2 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -168,7 +168,6 @@ def parse_env_variables( "-DCMAKE_TOOLCHAIN_FILE=/build/cmake/darwin/toolchain-x86_64.cmake" ) result.append("EXTRACT_TOOLCHAIN_DARWIN=1") - result.append("EXPORT_SOURCES_WITH_SUBMODULES=1") elif is_cross_darwin_arm: cc = compiler[: -len(DARWIN_ARM_SUFFIX)] cmake_flags.append("-DCMAKE_AR:FILEPATH=/cctools/bin/aarch64-apple-darwin-ar") From bf190381f5b6fa068948330f54ae9ee583c1ea80 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Wed, 5 Jul 2023 17:03:18 +0000 Subject: [PATCH 254/522] addJoinedBlock -> addBlockToJoin --- src/Interpreters/ConcurrentHashJoin.cpp | 4 ++-- src/Interpreters/ConcurrentHashJoin.h | 6 +++--- src/Interpreters/DirectJoin.cpp | 2 +- src/Interpreters/DirectJoin.h | 4 ++-- src/Interpreters/FullSortingMergeJoin.h | 4 ++-- src/Interpreters/GraceHashJoin.cpp | 12 ++++++------ src/Interpreters/GraceHashJoin.h | 8 ++++---- src/Interpreters/HashJoin.cpp | 8 ++++---- src/Interpreters/HashJoin.h | 6 +++--- src/Interpreters/IJoin.h | 6 +++--- src/Interpreters/JoinSwitcher.cpp | 8 ++++---- src/Interpreters/JoinSwitcher.h | 2 +- src/Interpreters/MergeJoin.cpp | 2 +- src/Interpreters/MergeJoin.h | 2 +- src/Processors/Transforms/JoiningTransform.cpp | 2 +- src/Storages/StorageJoin.cpp | 4 ++-- 16 files changed, 40 insertions(+), 40 deletions(-) diff --git a/src/Interpreters/ConcurrentHashJoin.cpp b/src/Interpreters/ConcurrentHashJoin.cpp index fc24f0ae029..1a8e0ad96fa 100644 --- a/src/Interpreters/ConcurrentHashJoin.cpp +++ b/src/Interpreters/ConcurrentHashJoin.cpp @@ -49,7 +49,7 @@ ConcurrentHashJoin::ConcurrentHashJoin(ContextPtr context_, std::shared_ptrgetOnlyClause().key_names_right, right_block); @@ -77,7 +77,7 @@ bool ConcurrentHashJoin::addJoinedBlock(const Block & right_block, bool check_li if (!lock.owns_lock()) continue; - bool limit_exceeded = !hash_join->data->addJoinedBlock(dispatched_block, check_limits); + bool limit_exceeded = !hash_join->data->addBlockToJoin(dispatched_block, check_limits); dispatched_block = {}; blocks_left--; diff --git a/src/Interpreters/ConcurrentHashJoin.h b/src/Interpreters/ConcurrentHashJoin.h index 5e53f9845aa..1283879971d 100644 --- a/src/Interpreters/ConcurrentHashJoin.h +++ b/src/Interpreters/ConcurrentHashJoin.h @@ -16,13 +16,13 @@ namespace DB { /** - * Can run addJoinedBlock() parallelly to speedup the join process. On test, it almose linear speedup by + * Can run addBlockToJoin() parallelly to speedup the join process. On test, it almose linear speedup by * the degree of parallelism. * * The default HashJoin is not thread safe for inserting right table's rows and run it in a single thread. When * the right table is large, the join process is too slow. * - * We create multiple HashJoin instances here. In addJoinedBlock(), one input block is split into multiple blocks + * We create multiple HashJoin instances here. In addBlockToJoin(), one input block is split into multiple blocks * corresponding to the HashJoin instances by hashing every row on the join keys. And make a guarantee that every HashJoin * instance is written by only one thread. * @@ -37,7 +37,7 @@ public: ~ConcurrentHashJoin() override = default; const TableJoin & getTableJoin() const override { return *table_join; } - bool addJoinedBlock(const Block & block, bool check_limits) override; + bool addBlockToJoin(const Block & block, bool check_limits) override; void checkTypesOfKeys(const Block & block) const override; void joinBlock(Block & block, std::shared_ptr & not_processed) override; void setTotals(const Block & block) override; diff --git a/src/Interpreters/DirectJoin.cpp b/src/Interpreters/DirectJoin.cpp index cfefd7c5a91..431f216436d 100644 --- a/src/Interpreters/DirectJoin.cpp +++ b/src/Interpreters/DirectJoin.cpp @@ -103,7 +103,7 @@ DirectKeyValueJoin::DirectKeyValueJoin( right_sample_block_with_storage_column_names = right_sample_block_with_storage_column_names_; } -bool DirectKeyValueJoin::addJoinedBlock(const Block &, bool) +bool DirectKeyValueJoin::addBlockToJoin(const Block &, bool) { throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Unreachable code reached"); } diff --git a/src/Interpreters/DirectJoin.h b/src/Interpreters/DirectJoin.h index 644b66a9d99..e55ac278705 100644 --- a/src/Interpreters/DirectJoin.h +++ b/src/Interpreters/DirectJoin.h @@ -32,10 +32,10 @@ public: virtual const TableJoin & getTableJoin() const override { return *table_join; } - virtual bool addJoinedBlock(const Block &, bool) override; + virtual bool addBlockToJoin(const Block &, bool) override; virtual void checkTypesOfKeys(const Block &) const override; - /// Join the block with data from left hand of JOIN to the right hand data (that was previously built by calls to addJoinedBlock). + /// Join the block with data from left hand of JOIN to the right hand data (that was previously built by calls to addBlockToJoin). /// Could be called from different threads in parallel. virtual void joinBlock(Block & block, std::shared_ptr &) override; diff --git a/src/Interpreters/FullSortingMergeJoin.h b/src/Interpreters/FullSortingMergeJoin.h index 7318d1d24a1..a6b53a51c04 100644 --- a/src/Interpreters/FullSortingMergeJoin.h +++ b/src/Interpreters/FullSortingMergeJoin.h @@ -30,9 +30,9 @@ public: const TableJoin & getTableJoin() const override { return *table_join; } - bool addJoinedBlock(const Block & /* block */, bool /* check_limits */) override + bool addBlockToJoin(const Block & /* block */, bool /* check_limits */) override { - throw Exception(ErrorCodes::LOGICAL_ERROR, "FullSortingMergeJoin::addJoinedBlock should not be called"); + throw Exception(ErrorCodes::LOGICAL_ERROR, "FullSortingMergeJoin::addBlockToJoin should not be called"); } static bool isSupported(const std::shared_ptr & table_join) diff --git a/src/Interpreters/GraceHashJoin.cpp b/src/Interpreters/GraceHashJoin.cpp index 4218a8ea4e1..f455622c4c8 100644 --- a/src/Interpreters/GraceHashJoin.cpp +++ b/src/Interpreters/GraceHashJoin.cpp @@ -310,13 +310,13 @@ bool GraceHashJoin::isSupported(const std::shared_ptr & table_join) GraceHashJoin::~GraceHashJoin() = default; -bool GraceHashJoin::addJoinedBlock(const Block & block, bool /*check_limits*/) +bool GraceHashJoin::addBlockToJoin(const Block & block, bool /*check_limits*/) { if (current_bucket == nullptr) throw Exception(ErrorCodes::LOGICAL_ERROR, "GraceHashJoin is not initialized"); Block materialized = materializeBlock(block); - addJoinedBlockImpl(std::move(materialized)); + addBlockToJoinImpl(std::move(materialized)); return true; } @@ -596,7 +596,7 @@ IBlocksStreamPtr GraceHashJoin::getDelayedBlocks() while (Block block = right_reader.read()) { num_rows += block.rows(); - addJoinedBlockImpl(std::move(block)); + addBlockToJoinImpl(std::move(block)); } LOG_TRACE(log, "Loaded bucket {} with {}(/{}) rows", @@ -621,7 +621,7 @@ Block GraceHashJoin::prepareRightBlock(const Block & block) return HashJoin::prepareRightBlock(block, hash_join_sample_block); } -void GraceHashJoin::addJoinedBlockImpl(Block block) +void GraceHashJoin::addBlockToJoinImpl(Block block) { block = prepareRightBlock(block); Buckets buckets_snapshot = getCurrentBuckets(); @@ -646,7 +646,7 @@ void GraceHashJoin::addJoinedBlockImpl(Block block) if (!hash_join) hash_join = makeInMemoryJoin(); - hash_join->addJoinedBlock(current_block, /* check_limits = */ false); + hash_join->addBlockToJoin(current_block, /* check_limits = */ false); if (!hasMemoryOverflow(hash_join)) return; @@ -677,7 +677,7 @@ void GraceHashJoin::addJoinedBlockImpl(Block block) hash_join = makeInMemoryJoin(); if (current_block.rows() > 0) - hash_join->addJoinedBlock(current_block, /* check_limits = */ false); + hash_join->addBlockToJoin(current_block, /* check_limits = */ false); } } diff --git a/src/Interpreters/GraceHashJoin.h b/src/Interpreters/GraceHashJoin.h index b8d83f4cad0..8224f1f1a4a 100644 --- a/src/Interpreters/GraceHashJoin.h +++ b/src/Interpreters/GraceHashJoin.h @@ -23,11 +23,11 @@ class HashJoin; * * The joining algorithm consists of three stages: * - * 1) During the first stage we accumulate blocks of the right table via @addJoinedBlock. + * 1) During the first stage we accumulate blocks of the right table via @addBlockToJoin. * Each input block is split into multiple buckets based on the hash of the row join keys. * The first bucket is added to the in-memory HashJoin, and the remaining buckets are written to disk for further processing. * When the size of HashJoin exceeds the limits, we double the number of buckets. - * There can be multiple threads calling addJoinedBlock, just like @ConcurrentHashJoin. + * There can be multiple threads calling addBlockToJoin, just like @ConcurrentHashJoin. * * 2) At the second stage we process left table blocks via @joinBlock. * Again, each input block is split into multiple buckets by hash. @@ -65,7 +65,7 @@ public: void initialize(const Block & sample_block) override; - bool addJoinedBlock(const Block & block, bool check_limits) override; + bool addBlockToJoin(const Block & block, bool check_limits) override; void checkTypesOfKeys(const Block & block) const override; void joinBlock(Block & block, std::shared_ptr & not_processed) override; @@ -94,7 +94,7 @@ private: InMemoryJoinPtr makeInMemoryJoin(); /// Add right table block to the @join. Calls @rehash on overflow. - void addJoinedBlockImpl(Block block); + void addBlockToJoinImpl(Block block); /// Check that join satisfies limits on rows/bytes in table_join. bool hasMemoryOverflow(size_t total_rows, size_t total_bytes) const; diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index 6fe2b8464f5..548039f257a 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -79,8 +79,8 @@ namespace JoinStuff { assert(flags[nullptr].size() <= size); need_flags = true; - // For one disjunct clause case, we don't need to reinit each time we call addJoinedBlock. - // and there is no value inserted in this JoinUsedFlags before addJoinedBlock finish. + // For one disjunct clause case, we don't need to reinit each time we call addBlockToJoin. + // and there is no value inserted in this JoinUsedFlags before addBlockToJoin finish. // So we reinit only when the hash table is rehashed to a larger size. if (flags.empty() || flags[nullptr].size() < size) [[unlikely]] { @@ -729,7 +729,7 @@ Block HashJoin::prepareRightBlock(const Block & block) const return prepareRightBlock(block, savedBlockSample()); } -bool HashJoin::addJoinedBlock(const Block & source_block_, bool check_limits) +bool HashJoin::addBlockToJoin(const Block & source_block_, bool check_limits) { if (!data) throw Exception(ErrorCodes::LOGICAL_ERROR, "Join data was released"); @@ -781,7 +781,7 @@ bool HashJoin::addJoinedBlock(const Block & source_block_, bool check_limits) size_t total_bytes = 0; { if (storage_join_lock) - throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "addJoinedBlock called when HashJoin locked to prevent updates"); + throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "addBlockToJoin called when HashJoin locked to prevent updates"); data->blocks_allocated_size += block_to_save.allocatedBytes(); data->blocks.emplace_back(std::move(block_to_save)); diff --git a/src/Interpreters/HashJoin.h b/src/Interpreters/HashJoin.h index 50eda4482bd..f30bbc3a46c 100644 --- a/src/Interpreters/HashJoin.h +++ b/src/Interpreters/HashJoin.h @@ -155,11 +155,11 @@ public: /** Add block of data from right hand of JOIN to the map. * Returns false, if some limit was exceeded and you should not insert more data. */ - bool addJoinedBlock(const Block & source_block_, bool check_limits) override; + bool addBlockToJoin(const Block & source_block_, bool check_limits) override; void checkTypesOfKeys(const Block & block) const override; - /** Join data from the map (that was previously built by calls to addJoinedBlock) to the block with data from "left" table. + /** Join data from the map (that was previously built by calls to addBlockToJoin) to the block with data from "left" table. * Could be called from different threads in parallel. */ void joinBlock(Block & block, ExtraBlockPtr & not_processed) override; @@ -406,7 +406,7 @@ private: Poco::Logger * log; /// Should be set via setLock to protect hash table from modification from StorageJoin - /// If set HashJoin instance is not available for modification (addJoinedBlock) + /// If set HashJoin instance is not available for modification (addBlockToJoin) TableLockHolder storage_join_lock = nullptr; void dataMapInit(MapsVariant &); diff --git a/src/Interpreters/IJoin.h b/src/Interpreters/IJoin.h index 83067b0eab7..97b119bd795 100644 --- a/src/Interpreters/IJoin.h +++ b/src/Interpreters/IJoin.h @@ -52,7 +52,7 @@ public: /// Add block of data from right hand of JOIN. /// @returns false, if some limit was exceeded and you should not insert more data. - virtual bool addJoinedBlock(const Block & block, bool check_limits = true) = 0; /// NOLINT + virtual bool addBlockToJoin(const Block & block, bool check_limits = true) = 0; /// NOLINT /* Some initialization may be required before joinBlock() call. * It's better to done in in constructor, but left block exact structure is not known at that moment. @@ -62,7 +62,7 @@ public: virtual void checkTypesOfKeys(const Block & block) const = 0; - /// Join the block with data from left hand of JOIN to the right hand data (that was previously built by calls to addJoinedBlock). + /// Join the block with data from left hand of JOIN to the right hand data (that was previously built by calls to addBlockToJoin). /// Could be called from different threads in parallel. virtual void joinBlock(Block & block, std::shared_ptr & not_processed) = 0; @@ -79,7 +79,7 @@ public: /// Returns true if no data to join with. virtual bool alwaysReturnsEmptySet() const = 0; - /// StorageJoin/Dictionary is already filled. No need to call addJoinedBlock. + /// StorageJoin/Dictionary is already filled. No need to call addBlockToJoin. /// Different query plan is used for such joins. virtual bool isFilled() const { return pipelineType() == JoinPipelineType::FilledRight; } virtual JoinPipelineType pipelineType() const { return JoinPipelineType::FillRightFirst; } diff --git a/src/Interpreters/JoinSwitcher.cpp b/src/Interpreters/JoinSwitcher.cpp index 15702784d74..5ea347549c1 100644 --- a/src/Interpreters/JoinSwitcher.cpp +++ b/src/Interpreters/JoinSwitcher.cpp @@ -19,16 +19,16 @@ JoinSwitcher::JoinSwitcher(std::shared_ptr table_join_, const Block & limits.max_bytes = table_join->defaultMaxBytes(); } -bool JoinSwitcher::addJoinedBlock(const Block & block, bool) +bool JoinSwitcher::addBlockToJoin(const Block & block, bool) { std::lock_guard lock(switch_mutex); if (switched) - return join->addJoinedBlock(block); + return join->addBlockToJoin(block); /// HashJoin with external limits check - join->addJoinedBlock(block, false); + join->addBlockToJoin(block, false); size_t rows = join->getTotalRowCount(); size_t bytes = join->getTotalByteCount(); @@ -48,7 +48,7 @@ bool JoinSwitcher::switchJoin() bool success = true; for (const Block & saved_block : right_blocks) - success = success && join->addJoinedBlock(saved_block); + success = success && join->addBlockToJoin(saved_block); switched = true; return success; diff --git a/src/Interpreters/JoinSwitcher.h b/src/Interpreters/JoinSwitcher.h index eec4787037d..fb5066b2d04 100644 --- a/src/Interpreters/JoinSwitcher.h +++ b/src/Interpreters/JoinSwitcher.h @@ -23,7 +23,7 @@ public: /// Add block of data from right hand of JOIN into current join object. /// If join-in-memory memory limit exceeded switches to join-on-disk and continue with it. /// @returns false, if join-on-disk disk limit exceeded - bool addJoinedBlock(const Block & block, bool check_limits) override; + bool addBlockToJoin(const Block & block, bool check_limits) override; void checkTypesOfKeys(const Block & block) const override { diff --git a/src/Interpreters/MergeJoin.cpp b/src/Interpreters/MergeJoin.cpp index d31510c2fb5..ceef1371f16 100644 --- a/src/Interpreters/MergeJoin.cpp +++ b/src/Interpreters/MergeJoin.cpp @@ -669,7 +669,7 @@ Block MergeJoin::modifyRightBlock(const Block & src_block) const return block; } -bool MergeJoin::addJoinedBlock(const Block & src_block, bool) +bool MergeJoin::addBlockToJoin(const Block & src_block, bool) { Block block = modifyRightBlock(src_block); diff --git a/src/Interpreters/MergeJoin.h b/src/Interpreters/MergeJoin.h index 8b5d884a0e6..03a661c5b8a 100644 --- a/src/Interpreters/MergeJoin.h +++ b/src/Interpreters/MergeJoin.h @@ -23,7 +23,7 @@ public: MergeJoin(std::shared_ptr table_join_, const Block & right_sample_block); const TableJoin & getTableJoin() const override { return *table_join; } - bool addJoinedBlock(const Block & block, bool check_limits) override; + bool addBlockToJoin(const Block & block, bool check_limits) override; void checkTypesOfKeys(const Block & block) const override; void joinBlock(Block &, ExtraBlockPtr & not_processed) override; diff --git a/src/Processors/Transforms/JoiningTransform.cpp b/src/Processors/Transforms/JoiningTransform.cpp index bba8ec6fa16..49b90d04b81 100644 --- a/src/Processors/Transforms/JoiningTransform.cpp +++ b/src/Processors/Transforms/JoiningTransform.cpp @@ -305,7 +305,7 @@ void FillingRightJoinSideTransform::work() if (for_totals) join->setTotals(block); else - stop_reading = !join->addJoinedBlock(block); + stop_reading = !join->addBlockToJoin(block); set_totals = for_totals; } diff --git a/src/Storages/StorageJoin.cpp b/src/Storages/StorageJoin.cpp index a238e9ef26c..640706aae17 100644 --- a/src/Storages/StorageJoin.cpp +++ b/src/Storages/StorageJoin.cpp @@ -146,7 +146,7 @@ void StorageJoin::mutate(const MutationCommands & commands, ContextPtr context) Block block; while (executor.pull(block)) { - new_data->addJoinedBlock(block, true); + new_data->addBlockToJoin(block, true); if (persistent) backup_stream.write(block); } @@ -257,7 +257,7 @@ void StorageJoin::insertBlock(const Block & block, ContextPtr context) if (!holder) throw Exception(ErrorCodes::DEADLOCK_AVOIDED, "StorageJoin: cannot insert data because current query tries to read from this storage"); - join->addJoinedBlock(block_to_insert, true); + join->addBlockToJoin(block_to_insert, true); } size_t StorageJoin::getSize(ContextPtr context) const From f7640ff5733822a9c6f4e119f6ff2ed7027a885d Mon Sep 17 00:00:00 2001 From: Feng Kaiyu Date: Thu, 6 Jul 2023 01:27:20 +0800 Subject: [PATCH 255/522] fix: correct exception message on policies comparison --- src/Disks/StoragePolicy.cpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/Disks/StoragePolicy.cpp b/src/Disks/StoragePolicy.cpp index f4be8b8fe86..71922e297df 100644 --- a/src/Disks/StoragePolicy.cpp +++ b/src/Disks/StoragePolicy.cpp @@ -302,7 +302,11 @@ void StoragePolicy::checkCompatibleWith(const StoragePolicyPtr & new_storage_pol for (const auto & volume : getVolumes()) { if (!new_volume_names.contains(volume->getName())) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "New storage policy {} shall contain volumes of old one", backQuote(name)); + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "New storage policy {} shall contain volumes of old one ({})", + backQuote(new_storage_policy->getName()), + backQuote(name)); std::unordered_set new_disk_names; for (const auto & disk : new_storage_policy->getVolumeByName(volume->getName())->getDisks()) @@ -310,7 +314,11 @@ void StoragePolicy::checkCompatibleWith(const StoragePolicyPtr & new_storage_pol for (const auto & disk : volume->getDisks()) if (!new_disk_names.contains(disk->getName())) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "New storage policy {} shall contain disks of old one", backQuote(name)); + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "New storage policy {} shall contain disks of old one ({})", + backQuote(new_storage_policy->getName()), + backQuote(name)); } } From b60a1c53d638b5c10727d3e5c0e6d5b5b8d5725a Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 5 Jul 2023 17:43:09 +0000 Subject: [PATCH 256/522] Fix oldest part fetching --- tests/integration/test_multiple_disks/test.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_multiple_disks/test.py b/tests/integration/test_multiple_disks/test.py index c0fbe39196d..fa79a9baa90 100644 --- a/tests/integration/test_multiple_disks/test.py +++ b/tests/integration/test_multiple_disks/test.py @@ -717,9 +717,10 @@ def test_background_move(start_cluster, name, engine): node1.query(f"SYSTEM STOP MERGES {name}") + first_part = None for i in range(5): data = [] # 5MB in total - for i in range(5): + for _ in range(5): data.append(get_random_string(1024 * 1024)) # 1MB row # small jbod size is 40MB, so lets insert 5MB batch 5 times node1.query_with_retry( @@ -728,7 +729,11 @@ def test_background_move(start_cluster, name, engine): ) ) - first_part = get_oldest_part(node1, name) + # we are doing moves in parallel so we need to fetch the name of first part before we add new parts + if i == 0: + first_part = get_oldest_part(node1, name) + + assert first_part is not None used_disks = get_used_disks_for_table(node1, name) From ce8b0cae822f7e049eba7e8967122890510a82c5 Mon Sep 17 00:00:00 2001 From: lcjh <120989324@qq.com> Date: Thu, 6 Jul 2023 02:14:48 +0800 Subject: [PATCH 257/522] remove duplicate condition --- src/Functions/FunctionUnixTimestamp64.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Functions/FunctionUnixTimestamp64.h b/src/Functions/FunctionUnixTimestamp64.h index 58a23f7266e..a2065465501 100644 --- a/src/Functions/FunctionUnixTimestamp64.h +++ b/src/Functions/FunctionUnixTimestamp64.h @@ -155,7 +155,6 @@ public: if (!((executeType(result_column, arguments, input_rows_count)) || (executeType(result_column, arguments, input_rows_count)) || (executeType(result_column, arguments, input_rows_count)) - || (executeType(result_column, arguments, input_rows_count)) || (executeType(result_column, arguments, input_rows_count)) || (executeType(result_column, arguments, input_rows_count)) || (executeType(result_column, arguments, input_rows_count)) From 44791af7102079b8a3db6a5a2fbe5fbaa8eae3bf Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 5 Jul 2023 22:54:22 +0200 Subject: [PATCH 258/522] stop merges properly for replicated tables --- tests/integration/test_multiple_disks/test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_multiple_disks/test.py b/tests/integration/test_multiple_disks/test.py index fa79a9baa90..4a934447345 100644 --- a/tests/integration/test_multiple_disks/test.py +++ b/tests/integration/test_multiple_disks/test.py @@ -711,7 +711,7 @@ def test_background_move(start_cluster, name, engine): s1 String ) ENGINE = {engine} ORDER BY tuple() - SETTINGS storage_policy='moving_jbod_with_external' + SETTINGS storage_policy='moving_jbod_with_external, max_replicated_merges_in_queue=0' """ ) @@ -784,7 +784,7 @@ def test_start_stop_moves(start_cluster, name, engine): s1 String ) ENGINE = {engine} ORDER BY tuple() - SETTINGS storage_policy='moving_jbod_with_external' + SETTINGS storage_policy='moving_jbod_with_external', max_replicated_merges_in_queue=0 """ ) From 5a3aadacde7e82d47ff550601191186c2eab9abb Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 5 Jul 2023 23:40:37 +0200 Subject: [PATCH 259/522] Fix error --- tests/ci/ci_config.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 1777180a76e..ea7d112c73e 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -179,10 +179,9 @@ CI_CONFIG = { "sanitizer": "", "package_type": "binary", "static_binary_name": "riscv64", - "bundled": "bundled", - "libraries": "static", "tidy": "disable", "with_coverage": False, + "comment": "", }, }, "builds_report_config": { From 98da25f1d3f4014fd13d1f53aa3ccee6da21d9f2 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 6 Jul 2023 01:17:41 +0200 Subject: [PATCH 260/522] Fix build --- cmake/target.cmake | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cmake/target.cmake b/cmake/target.cmake index ea4c206fc4f..0791da87bf0 100644 --- a/cmake/target.cmake +++ b/cmake/target.cmake @@ -44,6 +44,8 @@ if (CMAKE_CROSSCOMPILING) set (ENABLE_GRPC OFF CACHE INTERNAL "") set (ENABLE_HDFS OFF CACHE INTERNAL "") set (ENABLE_MYSQL OFF CACHE INTERNAL "") + # It might be ok, but we need to update 'sysroot' + set (ENABLE_RUST OFF CACHE INTERNAL "") elseif (ARCH_S390X) set (ENABLE_GRPC OFF CACHE INTERNAL "") set (ENABLE_SENTRY OFF CACHE INTERNAL "") From 698c49cd51f406d0a9e619b4c7d971857f1fb59b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 6 Jul 2023 02:19:03 +0300 Subject: [PATCH 261/522] Update 02811_invalid_embedded_rocksdb_create.sql --- .../0_stateless/02811_invalid_embedded_rocksdb_create.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/02811_invalid_embedded_rocksdb_create.sql b/tests/queries/0_stateless/02811_invalid_embedded_rocksdb_create.sql index bfe4ee0622e..a87ac5e0de0 100644 --- a/tests/queries/0_stateless/02811_invalid_embedded_rocksdb_create.sql +++ b/tests/queries/0_stateless/02811_invalid_embedded_rocksdb_create.sql @@ -1 +1,2 @@ +-- Tags: no-fasttest CREATE TABLE dict (`k` String, `v` String) ENGINE = EmbeddedRocksDB(k) PRIMARY KEY k; -- {serverError 36} From 75d051dd5554022ee7d9c215543c5ffad5c3df63 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 6 Jul 2023 01:49:53 +0200 Subject: [PATCH 262/522] Remove useless packages --- docker/test/sqllogic/run.sh | 4 ++-- docker/test/stateless/Dockerfile | 1 - docker/test/stress/Dockerfile | 3 --- docker/test/upgrade/Dockerfile | 3 --- docker/test/util/Dockerfile | 1 - docs/zh/development/build.md | 7 ------- .../0_stateless/02439_merge_selecting_partitions.sql | 1 - 7 files changed, 2 insertions(+), 18 deletions(-) diff --git a/docker/test/sqllogic/run.sh b/docker/test/sqllogic/run.sh index 8d0252e3c98..444252837a3 100755 --- a/docker/test/sqllogic/run.sh +++ b/docker/test/sqllogic/run.sh @@ -92,8 +92,8 @@ sudo clickhouse stop ||: for _ in $(seq 1 60); do if [[ $(wget --timeout=1 -q 'localhost:8123' -O-) == 'Ok.' ]]; then sleep 1 ; else break; fi ; done -grep -Fa "Fatal" /var/log/clickhouse-server/clickhouse-server.log ||: -pigz < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log.gz & +rg -Fa "Fatal" /var/log/clickhouse-server/clickhouse-server.log ||: +zstd < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log.zst & # Compressed (FIXME: remove once only github actions will be left) rm /var/log/clickhouse-server/clickhouse-server.log diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index 32996140521..e1e84c427ba 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -33,7 +33,6 @@ RUN apt-get update -y \ qemu-user-static \ sqlite3 \ sudo \ - telnet \ tree \ unixodbc \ wget \ diff --git a/docker/test/stress/Dockerfile b/docker/test/stress/Dockerfile index e9712f430fd..eddeb04758b 100644 --- a/docker/test/stress/Dockerfile +++ b/docker/test/stress/Dockerfile @@ -8,8 +8,6 @@ RUN apt-get update -y \ apt-get install --yes --no-install-recommends \ bash \ tzdata \ - fakeroot \ - debhelper \ parallel \ expect \ python3 \ @@ -20,7 +18,6 @@ RUN apt-get update -y \ sudo \ openssl \ netcat-openbsd \ - telnet \ brotli \ && apt-get clean diff --git a/docker/test/upgrade/Dockerfile b/docker/test/upgrade/Dockerfile index 8e5890b81a0..9152230af1c 100644 --- a/docker/test/upgrade/Dockerfile +++ b/docker/test/upgrade/Dockerfile @@ -8,8 +8,6 @@ RUN apt-get update -y \ apt-get install --yes --no-install-recommends \ bash \ tzdata \ - fakeroot \ - debhelper \ parallel \ expect \ python3 \ @@ -20,7 +18,6 @@ RUN apt-get update -y \ sudo \ openssl \ netcat-openbsd \ - telnet \ brotli \ && apt-get clean diff --git a/docker/test/util/Dockerfile b/docker/test/util/Dockerfile index 85e888f1df7..6a4c6aa3057 100644 --- a/docker/test/util/Dockerfile +++ b/docker/test/util/Dockerfile @@ -44,7 +44,6 @@ RUN apt-get update \ clang-${LLVM_VERSION} \ clang-tidy-${LLVM_VERSION} \ cmake \ - fakeroot \ gdb \ git \ gperf \ diff --git a/docs/zh/development/build.md b/docs/zh/development/build.md index d76f4b1577c..bb25755a615 100644 --- a/docs/zh/development/build.md +++ b/docs/zh/development/build.md @@ -3,13 +3,6 @@ slug: /zh/development/build --- # 如何构建 ClickHouse 发布包 {#ru-he-gou-jian-clickhouse-fa-bu-bao} -## 安装 Git 和 Pbuilder {#an-zhuang-git-he-pbuilder} - -``` bash -sudo apt-get update -sudo apt-get install git pbuilder debhelper lsb-release fakeroot sudo debian-archive-keyring debian-keyring -``` - ## 拉取 ClickHouse 源码 {#la-qu-clickhouse-yuan-ma} ``` bash diff --git a/tests/queries/0_stateless/02439_merge_selecting_partitions.sql b/tests/queries/0_stateless/02439_merge_selecting_partitions.sql index 88ce2834d6b..bcfcaa2acd3 100644 --- a/tests/queries/0_stateless/02439_merge_selecting_partitions.sql +++ b/tests/queries/0_stateless/02439_merge_selecting_partitions.sql @@ -1,4 +1,3 @@ - drop table if exists rmt; create table rmt (n int, m int) engine=ReplicatedMergeTree('/test/02439/{shard}/{database}', '{replica}') partition by n order by n; From db14b2c54fbd42d1c8123a15d87382fe00938a6a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 6 Jul 2023 02:16:38 +0200 Subject: [PATCH 263/522] Remove useless logs --- src/Interpreters/executeQuery.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index c52dab722c9..694226af6b0 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -322,8 +322,8 @@ static std::tuple executeQueryImpl( /// This does not have impact on the final span logs, because these internal queries are issued by external queries, /// we still have enough span logs for the execution of external queries. std::shared_ptr query_span = internal ? nullptr : std::make_shared("query"); - if (query_span) - LOG_DEBUG(&Poco::Logger::get("executeQuery"), "Query span trace_id for opentelemetry log: {}", query_span->trace_id); + if (query_span && query_span->trace_id != UUID{}) + LOG_TRACE(&Poco::Logger::get("executeQuery"), "Query span trace_id for opentelemetry log: {}", query_span->trace_id); auto query_start_time = std::chrono::system_clock::now(); From 5416b7b6df8104440d9d74cbdc68fd0505012654 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 6 Jul 2023 03:04:58 +0200 Subject: [PATCH 264/522] Fix incorrect log level = warning --- programs/server/Server.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index d2d8a0d07fb..686c3b90dd6 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1146,7 +1146,16 @@ try size_t merges_mutations_memory_usage_soft_limit = server_settings_.merges_mutations_memory_usage_soft_limit; size_t default_merges_mutations_server_memory_usage = static_cast(memory_amount * server_settings_.merges_mutations_memory_usage_to_ram_ratio); - if (merges_mutations_memory_usage_soft_limit == 0 || merges_mutations_memory_usage_soft_limit > default_merges_mutations_server_memory_usage) + if (merges_mutations_memory_usage_soft_limit == 0) + { + merges_mutations_memory_usage_soft_limit = default_merges_mutations_server_memory_usage; + LOG_INFO(log, "Setting merges_mutations_memory_usage_soft_limit was set to {}" + " ({} available * {:.2f} merges_mutations_memory_usage_to_ram_ratio)", + formatReadableSizeWithBinarySuffix(merges_mutations_memory_usage_soft_limit), + formatReadableSizeWithBinarySuffix(memory_amount), + server_settings_.merges_mutations_memory_usage_to_ram_ratio); + } + else if (merges_mutations_memory_usage_soft_limit > default_merges_mutations_server_memory_usage) { merges_mutations_memory_usage_soft_limit = default_merges_mutations_server_memory_usage; LOG_WARNING(log, "Setting merges_mutations_memory_usage_soft_limit was set to {}" From 64d5a85f6e731d9e8baba170aa7441555c030545 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 6 Jul 2023 03:16:06 +0200 Subject: [PATCH 265/522] Fix test_replicated_table_attach --- tests/integration/test_replicated_table_attach/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_replicated_table_attach/test.py b/tests/integration/test_replicated_table_attach/test.py index 2d209ddaf79..dee2be3fcf7 100644 --- a/tests/integration/test_replicated_table_attach/test.py +++ b/tests/integration/test_replicated_table_attach/test.py @@ -54,7 +54,7 @@ def test_startup_with_small_bg_pool_partitioned(started_cluster): assert_values() with PartitionManager() as pm: pm.drop_instance_zk_connections(node) - node.restart_clickhouse(stop_start_wait_sec=20) + node.restart_clickhouse(stop_start_wait_sec=300) assert_values() # check that we activate it in the end From e2c9f86f39e83b128d0fc82628bdae2ab0b8080b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 6 Jul 2023 03:31:10 +0200 Subject: [PATCH 266/522] Better usability of a test --- tests/queries/0_stateless/02125_many_mutations.sh | 2 ++ tests/queries/0_stateless/02125_many_mutations_2.sh | 2 ++ 2 files changed, 4 insertions(+) diff --git a/tests/queries/0_stateless/02125_many_mutations.sh b/tests/queries/0_stateless/02125_many_mutations.sh index b42d5bb15d3..54948fa1048 100755 --- a/tests/queries/0_stateless/02125_many_mutations.sh +++ b/tests/queries/0_stateless/02125_many_mutations.sh @@ -7,6 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # "max_parts_to_merge_at_once = 1" prevents merges to start in background before our own OPTIMIZE FINAL +$CLICKHOUSE_CLIENT -q "drop table if exists many_mutations" $CLICKHOUSE_CLIENT -q "create table many_mutations (x UInt32, y UInt32) engine = MergeTree order by x settings number_of_mutations_to_delay = 0, number_of_mutations_to_throw = 0, max_parts_to_merge_at_once = 1" $CLICKHOUSE_CLIENT -q "insert into many_mutations values (0, 0), (1, 1)" $CLICKHOUSE_CLIENT -q "system stop merges many_mutations" @@ -49,3 +50,4 @@ $CLICKHOUSE_CLIENT -q "system start merges many_mutations" $CLICKHOUSE_CLIENT -q "optimize table many_mutations final" --optimize_throw_if_noop 1 $CLICKHOUSE_CLIENT -q "select count() from system.mutations where database = currentDatabase() and table = 'many_mutations' and not is_done" $CLICKHOUSE_CLIENT -q "select x, y from many_mutations order by x" +$CLICKHOUSE_CLIENT -q "drop table many_mutations" diff --git a/tests/queries/0_stateless/02125_many_mutations_2.sh b/tests/queries/0_stateless/02125_many_mutations_2.sh index e5e3070a944..0351538b210 100755 --- a/tests/queries/0_stateless/02125_many_mutations_2.sh +++ b/tests/queries/0_stateless/02125_many_mutations_2.sh @@ -7,6 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # "max_parts_to_merge_at_once = 1" prevents merges to start in background before our own OPTIMIZE FINAL +$CLICKHOUSE_CLIENT -q "drop table if exists many_mutations" $CLICKHOUSE_CLIENT -q "create table many_mutations (x UInt32, y UInt32) engine = MergeTree order by x settings number_of_mutations_to_delay = 0, number_of_mutations_to_throw = 0, max_parts_to_merge_at_once = 1" $CLICKHOUSE_CLIENT -q "insert into many_mutations select number, number + 1 from numbers(2000)" $CLICKHOUSE_CLIENT -q "system stop merges many_mutations" @@ -51,3 +52,4 @@ $CLICKHOUSE_CLIENT -q "system flush logs" $CLICKHOUSE_CLIENT -q "select count() from system.mutations where database = currentDatabase() and table = 'many_mutations' and not is_done" $CLICKHOUSE_CLIENT -q "select count() from many_mutations" $CLICKHOUSE_CLIENT -q "select * from system.part_log where database = currentDatabase() and table == 'many_mutations' and peak_memory_usage > 1e9" +$CLICKHOUSE_CLIENT -q "drop table many_mutations" From 38c163b0662249b4da83e8b812662bf5b6d1a27a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 6 Jul 2023 03:43:59 +0200 Subject: [PATCH 267/522] Improve test --- .../0_stateless/02125_many_mutations.sh | 32 +++++++++---------- .../0_stateless/02125_many_mutations_2.sh | 32 +++++++++++-------- 2 files changed, 34 insertions(+), 30 deletions(-) diff --git a/tests/queries/0_stateless/02125_many_mutations.sh b/tests/queries/0_stateless/02125_many_mutations.sh index 54948fa1048..5a139e8b01d 100755 --- a/tests/queries/0_stateless/02125_many_mutations.sh +++ b/tests/queries/0_stateless/02125_many_mutations.sh @@ -7,19 +7,17 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # "max_parts_to_merge_at_once = 1" prevents merges to start in background before our own OPTIMIZE FINAL -$CLICKHOUSE_CLIENT -q "drop table if exists many_mutations" -$CLICKHOUSE_CLIENT -q "create table many_mutations (x UInt32, y UInt32) engine = MergeTree order by x settings number_of_mutations_to_delay = 0, number_of_mutations_to_throw = 0, max_parts_to_merge_at_once = 1" -$CLICKHOUSE_CLIENT -q "insert into many_mutations values (0, 0), (1, 1)" -$CLICKHOUSE_CLIENT -q "system stop merges many_mutations" - -$CLICKHOUSE_CLIENT -q "select x, y from many_mutations order by x" +$CLICKHOUSE_CLIENT --multiquery -q " +drop table if exists many_mutations; +create table many_mutations (x UInt32, y UInt32) engine = MergeTree order by x settings number_of_mutations_to_delay = 0, number_of_mutations_to_throw = 0, max_parts_to_merge_at_once = 1; +insert into many_mutations values (0, 0), (1, 1); +system stop merges many_mutations; +select x, y from many_mutations order by x; +" job() { - for _ in {1..1000} - do - $CLICKHOUSE_CLIENT -q "alter table many_mutations update y = y + 1 where 1" - done + yes "alter table many_mutations update y = y + 1 where 1;" | head -n 1000 | $CLICKHOUSE_CLIENT --multiquery } job & @@ -45,9 +43,11 @@ job & wait -$CLICKHOUSE_CLIENT -q "select count() from system.mutations where database = currentDatabase() and table = 'many_mutations' and not is_done" -$CLICKHOUSE_CLIENT -q "system start merges many_mutations" -$CLICKHOUSE_CLIENT -q "optimize table many_mutations final" --optimize_throw_if_noop 1 -$CLICKHOUSE_CLIENT -q "select count() from system.mutations where database = currentDatabase() and table = 'many_mutations' and not is_done" -$CLICKHOUSE_CLIENT -q "select x, y from many_mutations order by x" -$CLICKHOUSE_CLIENT -q "drop table many_mutations" +$CLICKHOUSE_CLIENT --multiquery -q " +select count() from system.mutations where database = currentDatabase() and table = 'many_mutations' and not is_done; +system start merges many_mutations; +optimize table many_mutations final SETTINGS optimize_throw_if_noop = 1; +select count() from system.mutations where database = currentDatabase() and table = 'many_mutations' and not is_done; +select x, y from many_mutations order by x; +drop table many_mutations; +" diff --git a/tests/queries/0_stateless/02125_many_mutations_2.sh b/tests/queries/0_stateless/02125_many_mutations_2.sh index 0351538b210..5b779c1b276 100755 --- a/tests/queries/0_stateless/02125_many_mutations_2.sh +++ b/tests/queries/0_stateless/02125_many_mutations_2.sh @@ -7,10 +7,12 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # "max_parts_to_merge_at_once = 1" prevents merges to start in background before our own OPTIMIZE FINAL -$CLICKHOUSE_CLIENT -q "drop table if exists many_mutations" -$CLICKHOUSE_CLIENT -q "create table many_mutations (x UInt32, y UInt32) engine = MergeTree order by x settings number_of_mutations_to_delay = 0, number_of_mutations_to_throw = 0, max_parts_to_merge_at_once = 1" -$CLICKHOUSE_CLIENT -q "insert into many_mutations select number, number + 1 from numbers(2000)" -$CLICKHOUSE_CLIENT -q "system stop merges many_mutations" +$CLICKHOUSE_CLIENT --multiquery -q " +drop table if exists many_mutations; +create table many_mutations (x UInt32, y UInt32) engine = MergeTree order by x settings number_of_mutations_to_delay = 0, number_of_mutations_to_throw = 0, max_parts_to_merge_at_once = 1; +insert into many_mutations select number, number + 1 from numbers(2000); +system stop merges many_mutations; +" $CLICKHOUSE_CLIENT -q "select count() from many_mutations" @@ -18,8 +20,8 @@ job() { for i in {1..1000} do - $CLICKHOUSE_CLIENT -q "alter table many_mutations delete where y = ${i} * 2 settings mutations_sync=0" - done + echo "alter table many_mutations delete where y = ${i} * 2 settings mutations_sync = 0;" + done | $CLICKHOUSE_CLIENT --multiquery } job & @@ -45,11 +47,13 @@ job & wait -$CLICKHOUSE_CLIENT -q "select count() from system.mutations where database = currentDatabase() and table = 'many_mutations' and not is_done" -$CLICKHOUSE_CLIENT -q "system start merges many_mutations" -$CLICKHOUSE_CLIENT -q "optimize table many_mutations final" --optimize_throw_if_noop 1 -$CLICKHOUSE_CLIENT -q "system flush logs" -$CLICKHOUSE_CLIENT -q "select count() from system.mutations where database = currentDatabase() and table = 'many_mutations' and not is_done" -$CLICKHOUSE_CLIENT -q "select count() from many_mutations" -$CLICKHOUSE_CLIENT -q "select * from system.part_log where database = currentDatabase() and table == 'many_mutations' and peak_memory_usage > 1e9" -$CLICKHOUSE_CLIENT -q "drop table many_mutations" +$CLICKHOUSE_CLIENT --multiquery -q " +select count() from system.mutations where database = currentDatabase() and table = 'many_mutations' and not is_done; +system start merges many_mutations; +optimize table many_mutations final SETTINGS optimize_throw_if_noop = 1; +system flush logs; +select count() from system.mutations where database = currentDatabase() and table = 'many_mutations' and not is_done; +select count() from many_mutations; +select * from system.part_log where database = currentDatabase() and table == 'many_mutations' and peak_memory_usage > 1e9; +drop table many_mutations; +" From c178a362c573f7212c8f9986f78e78b209713bee Mon Sep 17 00:00:00 2001 From: flynn Date: Thu, 6 Jul 2023 02:30:37 +0000 Subject: [PATCH 268/522] Fix for new analyzer --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index 163092f1b7f..34286c266c9 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -6238,7 +6238,11 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node, const auto & insertion_table = scope_context->getInsertionTable(); if (!insertion_table.empty()) { - const auto & insert_structure = DatabaseCatalog::instance().getTable(insertion_table, scope_context)->getInMemoryMetadataPtr()->getColumns(); + const auto & insert_structure = DatabaseCatalog::instance() + .getTable(insertion_table, scope_context) + ->getInMemoryMetadataPtr() + ->getColumns() + .getInsertable(); DB::ColumnsDescription structure_hint; bool use_columns_from_insert_query = true; From d59f68b6009467e891b96e0725ec308aad236c63 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 6 Jul 2023 04:55:46 +0200 Subject: [PATCH 269/522] Remove useless code --- src/Access/Common/AccessType.h | 1 - src/Common/SymbolIndex.cpp | 1 - src/Interpreters/InterpreterSystemQuery.cpp | 15 --------------- src/Parsers/ASTSystemQuery.h | 1 - .../0_stateless/01271_show_privileges.reference | 1 - .../02117_show_create_table_system.reference | 6 +++--- 6 files changed, 3 insertions(+), 22 deletions(-) diff --git a/src/Access/Common/AccessType.h b/src/Access/Common/AccessType.h index f65a77c1d6a..c06bceb87e3 100644 --- a/src/Access/Common/AccessType.h +++ b/src/Access/Common/AccessType.h @@ -157,7 +157,6 @@ enum class AccessType M(SYSTEM_DROP_CACHE, "DROP CACHE", GROUP, SYSTEM) \ M(SYSTEM_RELOAD_CONFIG, "RELOAD CONFIG", GLOBAL, SYSTEM_RELOAD) \ M(SYSTEM_RELOAD_USERS, "RELOAD USERS", GLOBAL, SYSTEM_RELOAD) \ - M(SYSTEM_RELOAD_SYMBOLS, "RELOAD SYMBOLS", GLOBAL, SYSTEM_RELOAD) \ M(SYSTEM_RELOAD_DICTIONARY, "SYSTEM RELOAD DICTIONARIES, RELOAD DICTIONARY, RELOAD DICTIONARIES", GLOBAL, SYSTEM_RELOAD) \ M(SYSTEM_RELOAD_MODEL, "SYSTEM RELOAD MODELS, RELOAD MODEL, RELOAD MODELS", GLOBAL, SYSTEM_RELOAD) \ M(SYSTEM_RELOAD_FUNCTION, "SYSTEM RELOAD FUNCTIONS, RELOAD FUNCTION, RELOAD FUNCTIONS", GLOBAL, SYSTEM_RELOAD) \ diff --git a/src/Common/SymbolIndex.cpp b/src/Common/SymbolIndex.cpp index f1cace5017c..b4ae16670d8 100644 --- a/src/Common/SymbolIndex.cpp +++ b/src/Common/SymbolIndex.cpp @@ -9,7 +9,6 @@ #include -//#include #include #include diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index e1ff8676bc7..c74ff062471 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -470,16 +470,6 @@ BlockIO InterpreterSystemQuery::execute() getContext()->checkAccess(AccessType::SYSTEM_RELOAD_USERS); system_context->getAccessControl().reload(AccessControl::ReloadMode::ALL); break; - case Type::RELOAD_SYMBOLS: - { -#if defined(__ELF__) && !defined(OS_FREEBSD) - getContext()->checkAccess(AccessType::SYSTEM_RELOAD_SYMBOLS); - SymbolIndex::reload(); - break; -#else - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "SYSTEM RELOAD SYMBOLS is not supported on current platform"); -#endif - } case Type::STOP_MERGES: startStopAction(ActionLocks::PartsMerge, false); break; @@ -1056,11 +1046,6 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster() required_access.emplace_back(AccessType::SYSTEM_RELOAD_USERS); break; } - case Type::RELOAD_SYMBOLS: - { - required_access.emplace_back(AccessType::SYSTEM_RELOAD_SYMBOLS); - break; - } case Type::STOP_MERGES: case Type::START_MERGES: { diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h index ebc3e9cd430..528fbdce2c2 100644 --- a/src/Parsers/ASTSystemQuery.h +++ b/src/Parsers/ASTSystemQuery.h @@ -56,7 +56,6 @@ public: RELOAD_EMBEDDED_DICTIONARIES, RELOAD_CONFIG, RELOAD_USERS, - RELOAD_SYMBOLS, RESTART_DISK, STOP_MERGES, START_MERGES, diff --git a/tests/queries/0_stateless/01271_show_privileges.reference b/tests/queries/0_stateless/01271_show_privileges.reference index 9e6249bfcb3..f3c07cf11a7 100644 --- a/tests/queries/0_stateless/01271_show_privileges.reference +++ b/tests/queries/0_stateless/01271_show_privileges.reference @@ -108,7 +108,6 @@ SYSTEM DROP S3 CLIENT CACHE ['SYSTEM DROP S3 CLIENT','DROP S3 CLIENT CACHE'] GLO SYSTEM DROP CACHE ['DROP CACHE'] \N SYSTEM SYSTEM RELOAD CONFIG ['RELOAD CONFIG'] GLOBAL SYSTEM RELOAD SYSTEM RELOAD USERS ['RELOAD USERS'] GLOBAL SYSTEM RELOAD -SYSTEM RELOAD SYMBOLS ['RELOAD SYMBOLS'] GLOBAL SYSTEM RELOAD SYSTEM RELOAD DICTIONARY ['SYSTEM RELOAD DICTIONARIES','RELOAD DICTIONARY','RELOAD DICTIONARIES'] GLOBAL SYSTEM RELOAD SYSTEM RELOAD MODEL ['SYSTEM RELOAD MODELS','RELOAD MODEL','RELOAD MODELS'] GLOBAL SYSTEM RELOAD SYSTEM RELOAD FUNCTION ['SYSTEM RELOAD FUNCTIONS','RELOAD FUNCTION','RELOAD FUNCTIONS'] GLOBAL SYSTEM RELOAD diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index 3834b05601f..c7aded81ac6 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -297,7 +297,7 @@ CREATE TABLE system.grants ( `user_name` Nullable(String), `role_name` Nullable(String), - `access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION CONTROL' = 96, 'SYSTEM SHUTDOWN' = 97, 'SYSTEM DROP DNS CACHE' = 98, 'SYSTEM DROP MARK CACHE' = 99, 'SYSTEM DROP UNCOMPRESSED CACHE' = 100, 'SYSTEM DROP MMAP CACHE' = 101, 'SYSTEM DROP QUERY CACHE' = 102, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 103, 'SYSTEM DROP FILESYSTEM CACHE' = 104, 'SYSTEM DROP SCHEMA CACHE' = 105, 'SYSTEM DROP S3 CLIENT CACHE' = 106, 'SYSTEM DROP CACHE' = 107, 'SYSTEM RELOAD CONFIG' = 108, 'SYSTEM RELOAD USERS' = 109, 'SYSTEM RELOAD SYMBOLS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM DISTRIBUTED SENDS' = 121, 'SYSTEM REPLICATED SENDS' = 122, 'SYSTEM SENDS' = 123, 'SYSTEM REPLICATION QUEUES' = 124, 'SYSTEM DROP REPLICA' = 125, 'SYSTEM SYNC REPLICA' = 126, 'SYSTEM RESTART REPLICA' = 127, 'SYSTEM RESTORE REPLICA' = 128, 'SYSTEM WAIT LOADING PARTS' = 129, 'SYSTEM SYNC DATABASE REPLICA' = 130, 'SYSTEM SYNC TRANSACTION LOG' = 131, 'SYSTEM SYNC FILE CACHE' = 132, 'SYSTEM FLUSH DISTRIBUTED' = 133, 'SYSTEM FLUSH LOGS' = 134, 'SYSTEM FLUSH' = 135, 'SYSTEM THREAD FUZZER' = 136, 'SYSTEM UNFREEZE' = 137, 'SYSTEM FAILPOINT' = 138, 'SYSTEM' = 139, 'dictGet' = 140, 'displaySecretsInShowAndSelect' = 141, 'addressToLine' = 142, 'addressToLineWithInlines' = 143, 'addressToSymbol' = 144, 'demangle' = 145, 'INTROSPECTION' = 146, 'FILE' = 147, 'URL' = 148, 'REMOTE' = 149, 'MONGO' = 150, 'REDIS' = 151, 'MEILISEARCH' = 152, 'MYSQL' = 153, 'POSTGRES' = 154, 'SQLITE' = 155, 'ODBC' = 156, 'JDBC' = 157, 'HDFS' = 158, 'S3' = 159, 'HIVE' = 160, 'AZURE' = 161, 'SOURCES' = 162, 'CLUSTER' = 163, 'ALL' = 164, 'NONE' = 165), + `access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION CONTROL' = 96, 'SYSTEM SHUTDOWN' = 97, 'SYSTEM DROP DNS CACHE' = 98, 'SYSTEM DROP MARK CACHE' = 99, 'SYSTEM DROP UNCOMPRESSED CACHE' = 100, 'SYSTEM DROP MMAP CACHE' = 101, 'SYSTEM DROP QUERY CACHE' = 102, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 103, 'SYSTEM DROP FILESYSTEM CACHE' = 104, 'SYSTEM DROP SCHEMA CACHE' = 105, 'SYSTEM DROP S3 CLIENT CACHE' = 106, 'SYSTEM DROP CACHE' = 107, 'SYSTEM RELOAD CONFIG' = 108, 'SYSTEM RELOAD USERS' = 109, 'SYSTEM RELOAD DICTIONARY' = 110, 'SYSTEM RELOAD MODEL' = 111, 'SYSTEM RELOAD FUNCTION' = 112, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 113, 'SYSTEM RELOAD' = 114, 'SYSTEM RESTART DISK' = 115, 'SYSTEM MERGES' = 116, 'SYSTEM TTL MERGES' = 117, 'SYSTEM FETCHES' = 118, 'SYSTEM MOVES' = 119, 'SYSTEM DISTRIBUTED SENDS' = 120, 'SYSTEM REPLICATED SENDS' = 121, 'SYSTEM SENDS' = 122, 'SYSTEM REPLICATION QUEUES' = 123, 'SYSTEM DROP REPLICA' = 124, 'SYSTEM SYNC REPLICA' = 125, 'SYSTEM RESTART REPLICA' = 126, 'SYSTEM RESTORE REPLICA' = 127, 'SYSTEM WAIT LOADING PARTS' = 128, 'SYSTEM SYNC DATABASE REPLICA' = 129, 'SYSTEM SYNC TRANSACTION LOG' = 130, 'SYSTEM SYNC FILE CACHE' = 131, 'SYSTEM FLUSH DISTRIBUTED' = 132, 'SYSTEM FLUSH LOGS' = 133, 'SYSTEM FLUSH' = 134, 'SYSTEM THREAD FUZZER' = 135, 'SYSTEM UNFREEZE' = 136, 'SYSTEM FAILPOINT' = 137, 'SYSTEM' = 138, 'dictGet' = 139, 'displaySecretsInShowAndSelect' = 140, 'addressToLine' = 141, 'addressToLineWithInlines' = 142, 'addressToSymbol' = 143, 'demangle' = 144, 'INTROSPECTION' = 145, 'FILE' = 146, 'URL' = 147, 'REMOTE' = 148, 'MONGO' = 149, 'REDIS' = 150, 'MEILISEARCH' = 151, 'MYSQL' = 152, 'POSTGRES' = 153, 'SQLITE' = 154, 'ODBC' = 155, 'JDBC' = 156, 'HDFS' = 157, 'S3' = 158, 'HIVE' = 159, 'AZURE' = 160, 'SOURCES' = 161, 'CLUSTER' = 162, 'ALL' = 163, 'NONE' = 164), `database` Nullable(String), `table` Nullable(String), `column` Nullable(String), @@ -582,10 +582,10 @@ ENGINE = SystemPartsColumns COMMENT 'SYSTEM TABLE is built on the fly.' CREATE TABLE system.privileges ( - `privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION CONTROL' = 96, 'SYSTEM SHUTDOWN' = 97, 'SYSTEM DROP DNS CACHE' = 98, 'SYSTEM DROP MARK CACHE' = 99, 'SYSTEM DROP UNCOMPRESSED CACHE' = 100, 'SYSTEM DROP MMAP CACHE' = 101, 'SYSTEM DROP QUERY CACHE' = 102, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 103, 'SYSTEM DROP FILESYSTEM CACHE' = 104, 'SYSTEM DROP SCHEMA CACHE' = 105, 'SYSTEM DROP S3 CLIENT CACHE' = 106, 'SYSTEM DROP CACHE' = 107, 'SYSTEM RELOAD CONFIG' = 108, 'SYSTEM RELOAD USERS' = 109, 'SYSTEM RELOAD SYMBOLS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM DISTRIBUTED SENDS' = 121, 'SYSTEM REPLICATED SENDS' = 122, 'SYSTEM SENDS' = 123, 'SYSTEM REPLICATION QUEUES' = 124, 'SYSTEM DROP REPLICA' = 125, 'SYSTEM SYNC REPLICA' = 126, 'SYSTEM RESTART REPLICA' = 127, 'SYSTEM RESTORE REPLICA' = 128, 'SYSTEM WAIT LOADING PARTS' = 129, 'SYSTEM SYNC DATABASE REPLICA' = 130, 'SYSTEM SYNC TRANSACTION LOG' = 131, 'SYSTEM SYNC FILE CACHE' = 132, 'SYSTEM FLUSH DISTRIBUTED' = 133, 'SYSTEM FLUSH LOGS' = 134, 'SYSTEM FLUSH' = 135, 'SYSTEM THREAD FUZZER' = 136, 'SYSTEM UNFREEZE' = 137, 'SYSTEM FAILPOINT' = 138, 'SYSTEM' = 139, 'dictGet' = 140, 'displaySecretsInShowAndSelect' = 141, 'addressToLine' = 142, 'addressToLineWithInlines' = 143, 'addressToSymbol' = 144, 'demangle' = 145, 'INTROSPECTION' = 146, 'FILE' = 147, 'URL' = 148, 'REMOTE' = 149, 'MONGO' = 150, 'REDIS' = 151, 'MEILISEARCH' = 152, 'MYSQL' = 153, 'POSTGRES' = 154, 'SQLITE' = 155, 'ODBC' = 156, 'JDBC' = 157, 'HDFS' = 158, 'S3' = 159, 'HIVE' = 160, 'AZURE' = 161, 'SOURCES' = 162, 'CLUSTER' = 163, 'ALL' = 164, 'NONE' = 165), + `privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION CONTROL' = 96, 'SYSTEM SHUTDOWN' = 97, 'SYSTEM DROP DNS CACHE' = 98, 'SYSTEM DROP MARK CACHE' = 99, 'SYSTEM DROP UNCOMPRESSED CACHE' = 100, 'SYSTEM DROP MMAP CACHE' = 101, 'SYSTEM DROP QUERY CACHE' = 102, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 103, 'SYSTEM DROP FILESYSTEM CACHE' = 104, 'SYSTEM DROP SCHEMA CACHE' = 105, 'SYSTEM DROP S3 CLIENT CACHE' = 106, 'SYSTEM DROP CACHE' = 107, 'SYSTEM RELOAD CONFIG' = 108, 'SYSTEM RELOAD USERS' = 109, 'SYSTEM RELOAD DICTIONARY' = 110, 'SYSTEM RELOAD MODEL' = 111, 'SYSTEM RELOAD FUNCTION' = 112, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 113, 'SYSTEM RELOAD' = 114, 'SYSTEM RESTART DISK' = 115, 'SYSTEM MERGES' = 116, 'SYSTEM TTL MERGES' = 117, 'SYSTEM FETCHES' = 118, 'SYSTEM MOVES' = 119, 'SYSTEM DISTRIBUTED SENDS' = 120, 'SYSTEM REPLICATED SENDS' = 121, 'SYSTEM SENDS' = 122, 'SYSTEM REPLICATION QUEUES' = 123, 'SYSTEM DROP REPLICA' = 124, 'SYSTEM SYNC REPLICA' = 125, 'SYSTEM RESTART REPLICA' = 126, 'SYSTEM RESTORE REPLICA' = 127, 'SYSTEM WAIT LOADING PARTS' = 128, 'SYSTEM SYNC DATABASE REPLICA' = 129, 'SYSTEM SYNC TRANSACTION LOG' = 130, 'SYSTEM SYNC FILE CACHE' = 131, 'SYSTEM FLUSH DISTRIBUTED' = 132, 'SYSTEM FLUSH LOGS' = 133, 'SYSTEM FLUSH' = 134, 'SYSTEM THREAD FUZZER' = 135, 'SYSTEM UNFREEZE' = 136, 'SYSTEM FAILPOINT' = 137, 'SYSTEM' = 138, 'dictGet' = 139, 'displaySecretsInShowAndSelect' = 140, 'addressToLine' = 141, 'addressToLineWithInlines' = 142, 'addressToSymbol' = 143, 'demangle' = 144, 'INTROSPECTION' = 145, 'FILE' = 146, 'URL' = 147, 'REMOTE' = 148, 'MONGO' = 149, 'REDIS' = 150, 'MEILISEARCH' = 151, 'MYSQL' = 152, 'POSTGRES' = 153, 'SQLITE' = 154, 'ODBC' = 155, 'JDBC' = 156, 'HDFS' = 157, 'S3' = 158, 'HIVE' = 159, 'AZURE' = 160, 'SOURCES' = 161, 'CLUSTER' = 162, 'ALL' = 163, 'NONE' = 164), `aliases` Array(String), `level` Nullable(Enum8('GLOBAL' = 0, 'DATABASE' = 1, 'TABLE' = 2, 'DICTIONARY' = 3, 'VIEW' = 4, 'COLUMN' = 5, 'NAMED_COLLECTION' = 6)), - `parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION CONTROL' = 96, 'SYSTEM SHUTDOWN' = 97, 'SYSTEM DROP DNS CACHE' = 98, 'SYSTEM DROP MARK CACHE' = 99, 'SYSTEM DROP UNCOMPRESSED CACHE' = 100, 'SYSTEM DROP MMAP CACHE' = 101, 'SYSTEM DROP QUERY CACHE' = 102, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 103, 'SYSTEM DROP FILESYSTEM CACHE' = 104, 'SYSTEM DROP SCHEMA CACHE' = 105, 'SYSTEM DROP S3 CLIENT CACHE' = 106, 'SYSTEM DROP CACHE' = 107, 'SYSTEM RELOAD CONFIG' = 108, 'SYSTEM RELOAD USERS' = 109, 'SYSTEM RELOAD SYMBOLS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM DISTRIBUTED SENDS' = 121, 'SYSTEM REPLICATED SENDS' = 122, 'SYSTEM SENDS' = 123, 'SYSTEM REPLICATION QUEUES' = 124, 'SYSTEM DROP REPLICA' = 125, 'SYSTEM SYNC REPLICA' = 126, 'SYSTEM RESTART REPLICA' = 127, 'SYSTEM RESTORE REPLICA' = 128, 'SYSTEM WAIT LOADING PARTS' = 129, 'SYSTEM SYNC DATABASE REPLICA' = 130, 'SYSTEM SYNC TRANSACTION LOG' = 131, 'SYSTEM SYNC FILE CACHE' = 132, 'SYSTEM FLUSH DISTRIBUTED' = 133, 'SYSTEM FLUSH LOGS' = 134, 'SYSTEM FLUSH' = 135, 'SYSTEM THREAD FUZZER' = 136, 'SYSTEM UNFREEZE' = 137, 'SYSTEM FAILPOINT' = 138, 'SYSTEM' = 139, 'dictGet' = 140, 'displaySecretsInShowAndSelect' = 141, 'addressToLine' = 142, 'addressToLineWithInlines' = 143, 'addressToSymbol' = 144, 'demangle' = 145, 'INTROSPECTION' = 146, 'FILE' = 147, 'URL' = 148, 'REMOTE' = 149, 'MONGO' = 150, 'REDIS' = 151, 'MEILISEARCH' = 152, 'MYSQL' = 153, 'POSTGRES' = 154, 'SQLITE' = 155, 'ODBC' = 156, 'JDBC' = 157, 'HDFS' = 158, 'S3' = 159, 'HIVE' = 160, 'AZURE' = 161, 'SOURCES' = 162, 'CLUSTER' = 163, 'ALL' = 164, 'NONE' = 165)) + `parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION CONTROL' = 96, 'SYSTEM SHUTDOWN' = 97, 'SYSTEM DROP DNS CACHE' = 98, 'SYSTEM DROP MARK CACHE' = 99, 'SYSTEM DROP UNCOMPRESSED CACHE' = 100, 'SYSTEM DROP MMAP CACHE' = 101, 'SYSTEM DROP QUERY CACHE' = 102, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 103, 'SYSTEM DROP FILESYSTEM CACHE' = 104, 'SYSTEM DROP SCHEMA CACHE' = 105, 'SYSTEM DROP S3 CLIENT CACHE' = 106, 'SYSTEM DROP CACHE' = 107, 'SYSTEM RELOAD CONFIG' = 108, 'SYSTEM RELOAD USERS' = 109, 'SYSTEM RELOAD DICTIONARY' = 110, 'SYSTEM RELOAD MODEL' = 111, 'SYSTEM RELOAD FUNCTION' = 112, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 113, 'SYSTEM RELOAD' = 114, 'SYSTEM RESTART DISK' = 115, 'SYSTEM MERGES' = 116, 'SYSTEM TTL MERGES' = 117, 'SYSTEM FETCHES' = 118, 'SYSTEM MOVES' = 119, 'SYSTEM DISTRIBUTED SENDS' = 120, 'SYSTEM REPLICATED SENDS' = 121, 'SYSTEM SENDS' = 122, 'SYSTEM REPLICATION QUEUES' = 123, 'SYSTEM DROP REPLICA' = 124, 'SYSTEM SYNC REPLICA' = 125, 'SYSTEM RESTART REPLICA' = 126, 'SYSTEM RESTORE REPLICA' = 127, 'SYSTEM WAIT LOADING PARTS' = 128, 'SYSTEM SYNC DATABASE REPLICA' = 129, 'SYSTEM SYNC TRANSACTION LOG' = 130, 'SYSTEM SYNC FILE CACHE' = 131, 'SYSTEM FLUSH DISTRIBUTED' = 132, 'SYSTEM FLUSH LOGS' = 133, 'SYSTEM FLUSH' = 134, 'SYSTEM THREAD FUZZER' = 135, 'SYSTEM UNFREEZE' = 136, 'SYSTEM FAILPOINT' = 137, 'SYSTEM' = 138, 'dictGet' = 139, 'displaySecretsInShowAndSelect' = 140, 'addressToLine' = 141, 'addressToLineWithInlines' = 142, 'addressToSymbol' = 143, 'demangle' = 144, 'INTROSPECTION' = 145, 'FILE' = 146, 'URL' = 147, 'REMOTE' = 148, 'MONGO' = 149, 'REDIS' = 150, 'MEILISEARCH' = 151, 'MYSQL' = 152, 'POSTGRES' = 153, 'SQLITE' = 154, 'ODBC' = 155, 'JDBC' = 156, 'HDFS' = 157, 'S3' = 158, 'HIVE' = 159, 'AZURE' = 160, 'SOURCES' = 161, 'CLUSTER' = 162, 'ALL' = 163, 'NONE' = 164)) ) ENGINE = SystemPrivileges COMMENT 'SYSTEM TABLE is built on the fly.' From e8718e04cb2cfed00365f6e75c2c4e5bf2baa925 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 6 Jul 2023 05:58:05 +0300 Subject: [PATCH 270/522] Update --- src/Common/SymbolIndex.cpp | 7 ------- src/Common/SymbolIndex.h | 1 - 2 files changed, 8 deletions(-) diff --git a/src/Common/SymbolIndex.cpp b/src/Common/SymbolIndex.cpp index b4ae16670d8..4c7f3827125 100644 --- a/src/Common/SymbolIndex.cpp +++ b/src/Common/SymbolIndex.cpp @@ -560,13 +560,6 @@ MultiVersion::Version SymbolIndex::instance() return instanceImpl().get(); } -void SymbolIndex::reload() -{ - instanceImpl().set(std::unique_ptr(new SymbolIndex)); - /// Also drop stacktrace cache. - StackTrace::dropCache(); -} - } #endif diff --git a/src/Common/SymbolIndex.h b/src/Common/SymbolIndex.h index 47162331946..773f59b7914 100644 --- a/src/Common/SymbolIndex.h +++ b/src/Common/SymbolIndex.h @@ -24,7 +24,6 @@ protected: public: static MultiVersion::Version instance(); - static void reload(); struct Symbol { From d8e87f6c1df5c113fdd6026466caf8fccebd5150 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Fri, 30 Jun 2023 14:48:20 +0800 Subject: [PATCH 271/522] Make common macros extendable --- src/Common/CurrentMetrics.cpp | 10 ++++++++-- src/Common/ErrorCodes.cpp | 8 +++++++- src/Common/ProfileEvents.cpp | 7 ++++++- src/Common/StatusInfo.cpp | 7 ++++++- 4 files changed, 27 insertions(+), 5 deletions(-) diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp index 105a7c0548f..4f0d55a9cb6 100644 --- a/src/Common/CurrentMetrics.cpp +++ b/src/Common/CurrentMetrics.cpp @@ -2,7 +2,7 @@ /// Available metrics. Add something here as you wish. -#define APPLY_FOR_METRICS(M) \ +#define APPLY_FOR_BUILTIN_METRICS(M) \ M(Query, "Number of executing queries") \ M(Merge, "Number of executing background merges") \ M(Move, "Number of currently executing moves") \ @@ -200,7 +200,13 @@ M(MergeTreeReadTaskRequestsSent, "The current number of callback requests in flight from the remote server back to the initiator server to choose the read task (for MergeTree tables). Measured on the remote server side.") \ M(MergeTreeAllRangesAnnouncementsSent, "The current number of announcement being sent in flight from the remote server to the initiator server about the set of data parts (for MergeTree tables). Measured on the remote server side.") \ M(CreatedTimersInQueryProfiler, "Number of Created thread local timers in QueryProfiler") \ - M(ActiveTimersInQueryProfiler, "Number of Active thread local timers in QueryProfiler") + M(ActiveTimersInQueryProfiler, "Number of Active thread local timers in QueryProfiler") \ + +#ifdef APPLY_FOR_EXTERNAL_METRICS + #define APPLY_FOR_METRICS(M) APPLY_FOR_BUILTIN_METRICS(M) APPLY_FOR_EXTERNAL_METRICS(M) +#else + #define APPLY_FOR_METRICS(M) APPLY_FOR_BUILTIN_METRICS(M) +#endif namespace CurrentMetrics { diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index 4c08d762df2..87619cdafad 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -13,7 +13,7 @@ * - system.errors table */ -#define APPLY_FOR_ERROR_CODES(M) \ +#define APPLY_FOR_BUILTIN_ERROR_CODES(M) \ M(0, OK) \ M(1, UNSUPPORTED_METHOD) \ M(2, UNSUPPORTED_PARAMETER) \ @@ -589,6 +589,12 @@ M(1002, UNKNOWN_EXCEPTION) \ /* See END */ +#ifdef APPLY_FOR_EXTERNAL_ERROR_CODES + #define APPLY_FOR_ERROR_CODES(M) APPLY_FOR_BUILTIN_ERROR_CODES(M) APPLY_FOR_EXTERNAL_ERROR_CODES(M) +#else + #define APPLY_FOR_ERROR_CODES(M) APPLY_FOR_BUILTIN_ERROR_CODES(M) +#endif + namespace DB { namespace ErrorCodes diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index 0838e0366df..ecec1179875 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -4,7 +4,7 @@ /// Available events. Add something here as you wish. -#define APPLY_FOR_EVENTS(M) \ +#define APPLY_FOR_BUILTIN_EVENTS(M) \ M(Query, "Number of queries to be interpreted and potentially executed. Does not include queries that failed to parse or were rejected due to AST size limits, quota limits or limits on the number of simultaneously running queries. May include internal queries initiated by ClickHouse itself. Does not count subqueries.") \ M(SelectQuery, "Same as Query, but only for SELECT queries.") \ M(InsertQuery, "Same as Query, but only for INSERT queries.") \ @@ -536,6 +536,11 @@ The server successfully detected this situation and will download merged part fr M(LogError, "Number of log messages with level Error") \ M(LogFatal, "Number of log messages with level Fatal") \ +#ifdef APPLY_FOR_EXTERNAL_EVENTS + #define APPLY_FOR_EVENTS(M) APPLY_FOR_BUILTIN_EVENTS(M) APPLY_FOR_EXTERNAL_EVENTS(M) +#else + #define APPLY_FOR_EVENTS(M) APPLY_FOR_BUILTIN_EVENTS(M) +#endif namespace ProfileEvents { diff --git a/src/Common/StatusInfo.cpp b/src/Common/StatusInfo.cpp index 1f9ddfaf4b9..07828cd0aaf 100644 --- a/src/Common/StatusInfo.cpp +++ b/src/Common/StatusInfo.cpp @@ -2,9 +2,14 @@ #include /// Available status. Add something here as you wish. -#define APPLY_FOR_STATUS(M) \ +#define APPLY_FOR_BUILTIN_STATUS(M) \ M(DictionaryStatus, "Dictionary Status.", DB::getStatusEnumAllPossibleValues()) \ +#ifdef APPLY_FOR_EXTERNAL_STATUS + #define APPLY_FOR_STATUS(M) APPLY_FOR_BUILTIN_STATUS(M) APPLY_FOR_EXTERNAL_STATUS(M) +#else + #define APPLY_FOR_STATUS(M) APPLY_FOR_BUILTIN_STATUS(M) +#endif namespace CurrentStatusInfo { From 06553452ed1135a74f00ba9bb177e7c57954ea77 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Fri, 30 Jun 2023 22:33:26 +0800 Subject: [PATCH 272/522] StatusInfo will be deprecated --- src/Common/StatusInfo.cpp | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/Common/StatusInfo.cpp b/src/Common/StatusInfo.cpp index 07828cd0aaf..1f9ddfaf4b9 100644 --- a/src/Common/StatusInfo.cpp +++ b/src/Common/StatusInfo.cpp @@ -2,14 +2,9 @@ #include /// Available status. Add something here as you wish. -#define APPLY_FOR_BUILTIN_STATUS(M) \ +#define APPLY_FOR_STATUS(M) \ M(DictionaryStatus, "Dictionary Status.", DB::getStatusEnumAllPossibleValues()) \ -#ifdef APPLY_FOR_EXTERNAL_STATUS - #define APPLY_FOR_STATUS(M) APPLY_FOR_BUILTIN_STATUS(M) APPLY_FOR_EXTERNAL_STATUS(M) -#else - #define APPLY_FOR_STATUS(M) APPLY_FOR_BUILTIN_STATUS(M) -#endif namespace CurrentStatusInfo { From 5af28315e233561b196a1e05d5bb2d185288c747 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Fri, 30 Jun 2023 22:34:47 +0800 Subject: [PATCH 273/522] Try to fix style issues --- src/Common/CurrentMetrics.cpp | 4 ++-- src/Common/ErrorCodes.cpp | 4 ++-- src/Common/ProfileEvents.cpp | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp index 4f0d55a9cb6..8b88555d78a 100644 --- a/src/Common/CurrentMetrics.cpp +++ b/src/Common/CurrentMetrics.cpp @@ -203,9 +203,9 @@ M(ActiveTimersInQueryProfiler, "Number of Active thread local timers in QueryProfiler") \ #ifdef APPLY_FOR_EXTERNAL_METRICS - #define APPLY_FOR_METRICS(M) APPLY_FOR_BUILTIN_METRICS(M) APPLY_FOR_EXTERNAL_METRICS(M) + #define APPLY_FOR_METRICS(M) APPLY_FOR_BUILTIN_METRICS(M) APPLY_FOR_EXTERNAL_METRICS(M) #else - #define APPLY_FOR_METRICS(M) APPLY_FOR_BUILTIN_METRICS(M) + #define APPLY_FOR_METRICS(M) APPLY_FOR_BUILTIN_METRICS(M) #endif namespace CurrentMetrics diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index 87619cdafad..ae8d5f8796d 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -590,9 +590,9 @@ /* See END */ #ifdef APPLY_FOR_EXTERNAL_ERROR_CODES - #define APPLY_FOR_ERROR_CODES(M) APPLY_FOR_BUILTIN_ERROR_CODES(M) APPLY_FOR_EXTERNAL_ERROR_CODES(M) + #define APPLY_FOR_ERROR_CODES(M) APPLY_FOR_BUILTIN_ERROR_CODES(M) APPLY_FOR_EXTERNAL_ERROR_CODES(M) #else - #define APPLY_FOR_ERROR_CODES(M) APPLY_FOR_BUILTIN_ERROR_CODES(M) + #define APPLY_FOR_ERROR_CODES(M) APPLY_FOR_BUILTIN_ERROR_CODES(M) #endif namespace DB diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index ecec1179875..c8570b7921b 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -537,9 +537,9 @@ The server successfully detected this situation and will download merged part fr M(LogFatal, "Number of log messages with level Fatal") \ #ifdef APPLY_FOR_EXTERNAL_EVENTS - #define APPLY_FOR_EVENTS(M) APPLY_FOR_BUILTIN_EVENTS(M) APPLY_FOR_EXTERNAL_EVENTS(M) + #define APPLY_FOR_EVENTS(M) APPLY_FOR_BUILTIN_EVENTS(M) APPLY_FOR_EXTERNAL_EVENTS(M) #else - #define APPLY_FOR_EVENTS(M) APPLY_FOR_BUILTIN_EVENTS(M) + #define APPLY_FOR_EVENTS(M) APPLY_FOR_BUILTIN_EVENTS(M) #endif namespace ProfileEvents From 546f12dc85fdbbcf3396767917bd9dbbf8522c41 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Thu, 6 Jul 2023 05:05:27 +0000 Subject: [PATCH 274/522] Fix inserts to MongoDB tables --- src/Storages/StorageMongoDB.cpp | 60 ++++++++++++++++++- .../integration/test_storage_mongodb/test.py | 6 ++ 2 files changed, 63 insertions(+), 3 deletions(-) diff --git a/src/Storages/StorageMongoDB.cpp b/src/Storages/StorageMongoDB.cpp index 3287e3272e3..45b8aceb058 100644 --- a/src/Storages/StorageMongoDB.cpp +++ b/src/Storages/StorageMongoDB.cpp @@ -19,6 +19,8 @@ #include #include +#include + namespace DB { @@ -127,9 +129,7 @@ public: for (const auto j : collections::range(0, num_cols)) { - WriteBufferFromOwnString ostr; - data_types[j]->getDefaultSerialization()->serializeText(*columns[j], i, ostr, FormatSettings{}); - document->add(data_names[j], ostr.str()); + insertValueIntoMongoDB(*document, data_names[j], *data_types[j], *columns[j], i); } documents.push_back(std::move(document)); @@ -151,6 +151,60 @@ public: } private: + + void insertValueIntoMongoDB( + Poco::MongoDB::Document & document, + const std::string & name, + const IDataType & data_type, + const IColumn & column, + size_t idx) + { + WhichDataType which(data_type); + + if (which.isArray()) + { + const ColumnArray & column_array = assert_cast(column); + const ColumnArray::Offsets & offsets = column_array.getOffsets(); + + size_t offset = offsets[idx - 1]; + size_t next_offset = offsets[idx]; + + const IColumn & nested_column = column_array.getData(); + + const auto * array_type = assert_cast(&data_type); + const DataTypePtr & nested_type = array_type->getNestedType(); + + Poco::MongoDB::Array::Ptr array = new Poco::MongoDB::Array(); + for (size_t i = 0; i + offset < next_offset; ++i) + { + insertValueIntoMongoDB(*array, Poco::NumberFormatter::format(i), *nested_type, nested_column, i + offset); + } + + document.add(name, array); + return; + } + + /// MongoDB does not support UInt64 type, so just cast it to Int64 + if (which.isNativeUInt()) + document.add(name, static_cast(column.getUInt(idx))); + else if (which.isNativeInt()) + document.add(name, static_cast(column.getInt(idx))); + else if (which.isFloat32()) + document.add(name, static_cast(column.getFloat32(idx))); + else if (which.isFloat64()) + document.add(name, static_cast(column.getFloat64(idx))); + else if (which.isDate()) + document.add(name, Poco::Timestamp(DateLUT::instance().fromDayNum(DayNum(column.getUInt(idx))) * 1000000)); + else if (which.isDateTime()) + document.add(name, Poco::Timestamp(column.getUInt(idx) * 1000000)); + else + { + WriteBufferFromOwnString ostr; + data_type.getDefaultSerialization()->serializeText(column, idx, ostr, FormatSettings{}); + document.add(name, ostr.str()); + } + } + String collection_name; String db_name; StorageMetadataPtr metadata_snapshot; diff --git a/tests/integration/test_storage_mongodb/test.py b/tests/integration/test_storage_mongodb/test.py index 6ce71fb91fa..0abaa7a8214 100644 --- a/tests/integration/test_storage_mongodb/test.py +++ b/tests/integration/test_storage_mongodb/test.py @@ -244,6 +244,12 @@ def test_arrays(started_cluster): == "[]\n" ) + # Test INSERT SELECT + node.query("INSERT INTO arrays_mongo_table SELECT * FROM arrays_mongo_table") + + assert node.query("SELECT COUNT() FROM arrays_mongo_table") == "200\n" + assert node.query("SELECT COUNT(DISTINCT *) FROM arrays_mongo_table") == "100\n" + node.query("DROP TABLE arrays_mongo_table") arrays_mongo_table.drop() From 24b5c9c204dcc0f3c181d13528d46d012dae86c9 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Thu, 6 Jul 2023 06:05:43 +0000 Subject: [PATCH 275/522] Use one setting input_format_csv_allow_variable_number_of_colums and code in RowInput --- docs/en/interfaces/formats.md | 3 +- .../operations/settings/settings-formats.md | 10 +--- docs/ru/interfaces/formats.md | 3 +- docs/ru/operations/settings/settings.md | 10 +--- src/Core/Settings.h | 3 +- src/Formats/FormatFactory.cpp | 3 +- src/Formats/FormatSettings.h | 3 +- .../Formats/Impl/CSVRowInputFormat.cpp | 58 ++++++------------- .../Formats/Impl/CSVRowInputFormat.h | 6 +- .../RowInputFormatWithNamesAndTypes.cpp | 23 ++++++++ .../Formats/RowInputFormatWithNamesAndTypes.h | 4 ++ 11 files changed, 58 insertions(+), 68 deletions(-) diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 79790cef5b2..34f9abb91d4 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -471,8 +471,7 @@ The CSV format supports the output of totals and extremes the same way as `TabSe - [input_format_csv_skip_trailing_empty_lines](/docs/en/operations/settings/settings-formats.md/#input_format_csv_skip_trailing_empty_lines) - skip trailing empty lines at the end of data. Default value - `false`. - [input_format_csv_trim_whitespaces](/docs/en/operations/settings/settings-formats.md/#input_format_csv_trim_whitespaces) - trim spaces and tabs in non-quoted CSV strings. Default value - `true`. - [input_format_csv_allow_whitespace_or_tab_as_delimiter](/docs/en/operations/settings/settings-formats.md/# input_format_csv_allow_whitespace_or_tab_as_delimiter) - Allow to use whitespace or tab as field delimiter in CSV strings. Default value - `false`. -- [input_format_csv_ignore_extra_columns](/docs/en/operations/settings/settings-formats.md/#input_format_csv_ignore_extra_columns) - ignore extra columns in CSV input (if file has more columns than expected). Default value - `false`. -- [input_format_csv_missing_as_default](/docs/en/operations/settings/settings-formats.md/#input_format_csv_missing_as_default) - treat missing fields in CSV input as default values. Default value - `false`. +- [input_format_csv_allow_variable_number_of_colums](/docs/en/operations/settings/settings-formats.md/#input_format_csv_allow_variable_number_of_colums) - ignore extra columns in CSV input (if file has more columns than expected) and treat missing fields in CSV input as default values. Default value - `false`. ## CSVWithNames {#csvwithnames} diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md index 6b05f41666c..43e410ceee8 100644 --- a/docs/en/operations/settings/settings-formats.md +++ b/docs/en/operations/settings/settings-formats.md @@ -931,15 +931,9 @@ Result ```text " string " ``` -### input_format_csv_ignore_extra_columns {#input_format_csv_ignore_extra_columns} +### input_format_csv_allow_variable_number_of_colums {#input_format_csv_allow_variable_number_of_colums} -Ignore extra columns in CSV input (if file has more columns than expected). - -Disabled by default. - -### input_format_csv_missing_as_default {#input_format_csv_missing_as_default} - -Treat missing fields in CSV input as default values. +ignore extra columns in CSV input (if file has more columns than expected) and treat missing fields in CSV input as default values. Disabled by default. diff --git a/docs/ru/interfaces/formats.md b/docs/ru/interfaces/formats.md index 7e3bb3f7d26..e7c57fff749 100644 --- a/docs/ru/interfaces/formats.md +++ b/docs/ru/interfaces/formats.md @@ -402,8 +402,7 @@ $ clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO test.csv FOR - [input_format_csv_skip_first_lines](../operations/settings/settings.md#input_format_csv_skip_first_lines) - пропустить указанное количество строк в начале данных. Значение по умолчанию - `0`. - [input_format_csv_detect_header](../operations/settings/settings.md#input_format_csv_detect_header) - обнаружить заголовок с именами и типами в формате CSV. Значение по умолчанию - `true`. - [input_format_csv_trim_whitespaces](../operations/settings/settings.md#input_format_csv_trim_whitespaces) - удалить пробелы и символы табуляции из строк без кавычек. Значение по умолчанию - `true`. -- [input_format_csv_ignore_extra_columns](../operations/settings/settings.md/#input_format_csv_ignore_extra_columns) - игнорировать дополнительные столбцы (если файл содержит больше столбцов чем ожидается). Значение по умолчанию - `false`. -- [input_format_csv_missing_as_default](../operations/settings/settings.md/#input_format_csv_missing_as_default) - рассматривать отсутствующие поля в CSV в качестве значений по умолчанию. Значение по умолчанию - `false`. +- [input_format_csv_allow_variable_number_of_colums](../operations/settings/settings.md/#input_format_csv_allow_variable_number_of_colums) - игнорировать дополнительные столбцы (если файл содержит больше столбцов чем ожидается) и рассматривать отсутствующие поля в CSV в качестве значений по умолчанию. Значение по умолчанию - `false`. ## CSVWithNames {#csvwithnames} diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index e679ce6abe1..ddc101c6991 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -1727,15 +1727,9 @@ echo ' string ' | ./clickhouse local -q "select * from table FORMAT CSV" --in " string " ``` -## input_format_csv_ignore_extra_columns {#input_format_csv_ignore_extra_columns} +## input_format_csv_allow_variable_number_of_colums {#input_format_csv_allow_variable_number_of_colums} -Игнорировать дополнительные столбцы (если файл содержит больше столбцов чем ожидается). - -Выключено по умолчанию. - -## input_format_csv_missing_as_default {#input_format_csv_missing_as_default} - -Рассматривать отсутствующие поля в CSV в качестве значений по умолчанию. +Игнорировать дополнительные столбцы (если файл содержит больше столбцов чем ожидается) и рассматривать отсутствующие поля в CSV в качестве значений по умолчанию. Выключено по умолчанию. diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 044b3c34dc2..df2a916b7cf 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -1009,8 +1009,7 @@ class IColumn; M(Bool, regexp_dict_allow_hyperscan, true, "Allow regexp_tree dictionary using Hyperscan library.", 0) \ \ M(Bool, dictionary_use_async_executor, false, "Execute a pipeline for reading from a dictionary with several threads. It's supported only by DIRECT dictionary with CLICKHOUSE source.", 0) \ - M(Bool, input_format_csv_ignore_extra_columns, false, "Ignore extra columns in CSV input (if file has more columns than expected)", 0) \ - M(Bool, input_format_csv_missing_as_default, false, "Treat missing fields in CSV input as default values", 0) \ + M(Bool, input_format_csv_allow_variable_number_of_colums, false, "Ignore extra columns in CSV input (if file has more columns than expected) and treat missing fields in CSV input as default values", 0) \ // End of FORMAT_FACTORY_SETTINGS // Please add settings non-related to formats into the COMMON_SETTINGS above. diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 04b095a92d6..af9823dde73 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -72,8 +72,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.csv.skip_trailing_empty_lines = settings.input_format_csv_skip_trailing_empty_lines; format_settings.csv.trim_whitespaces = settings.input_format_csv_trim_whitespaces; format_settings.csv.allow_whitespace_or_tab_as_delimiter = settings.input_format_csv_allow_whitespace_or_tab_as_delimiter; - format_settings.csv.ignore_extra_columns = settings.input_format_csv_ignore_extra_columns; - format_settings.csv.missing_as_default = settings.input_format_csv_missing_as_default; + format_settings.csv.allow_variable_number_of_colums = settings.input_format_csv_allow_variable_number_of_colums; format_settings.hive_text.fields_delimiter = settings.input_format_hive_text_fields_delimiter; format_settings.hive_text.collection_items_delimiter = settings.input_format_hive_text_collection_items_delimiter; format_settings.hive_text.map_keys_delimiter = settings.input_format_hive_text_map_keys_delimiter; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 4bdc9077a0b..653578f8496 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -140,8 +140,7 @@ struct FormatSettings bool skip_trailing_empty_lines = false; bool trim_whitespaces = true; bool allow_whitespace_or_tab_as_delimiter = false; - bool ignore_extra_columns = false; - bool missing_as_default = false; + bool allow_variable_number_of_colums = false; } csv; struct HiveText diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp index 9731b4ba465..57e05ae7cd3 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp @@ -155,18 +155,7 @@ CSVFormatReader::CSVFormatReader(PeekableReadBuffer & buf_, const FormatSettings void CSVFormatReader::skipFieldDelimiter() { skipWhitespacesAndTabs(*buf, format_settings.csv.allow_whitespace_or_tab_as_delimiter); - - bool res = checkChar(format_settings.csv.delimiter, *buf); - if (res) - return; - - if (!format_settings.csv.missing_as_default) - { - char err[2] = {format_settings.csv.delimiter, '\0'}; - throwAtAssertionFailed(err, *buf); - } - else - current_row_has_missing_fields = true; + assertChar(format_settings.csv.delimiter, *buf); } template @@ -206,7 +195,6 @@ void CSVFormatReader::skipRowEndDelimiter() return; skipEndOfLine(*buf); - current_row_has_missing_fields = false; } void CSVFormatReader::skipHeaderRow() @@ -295,6 +283,11 @@ bool CSVFormatReader::parseRowEndWithDiagnosticInfo(WriteBuffer & out) return true; } +bool CSVFormatReader::allowVariableNumberOfColumns() +{ + return format_settings.csv.allow_variable_number_of_colums; +} + bool CSVFormatReader::readField( IColumn & column, const DataTypePtr & type, @@ -308,8 +301,6 @@ bool CSVFormatReader::readField( const bool at_delimiter = !buf->eof() && *buf->position() == format_settings.csv.delimiter; const bool at_last_column_line_end = is_last_file_column && (buf->eof() || *buf->position() == '\n' || *buf->position() == '\r'); - bool res = false; - /// Note: Tuples are serialized in CSV as separate columns, but with empty_as_default or null_as_default /// only one empty or NULL column will be expected if (format_settings.csv.empty_as_default && (at_delimiter || at_last_column_line_end)) @@ -321,34 +312,18 @@ bool CSVFormatReader::readField( /// they do not contain empty unquoted fields, so this check /// works for tuples as well. column.insertDefault(); - } - else if (current_row_has_missing_fields) - { - column.insertDefault(); - } - else if (format_settings.null_as_default && !isNullableOrLowCardinalityNullable(type)) - { - /// If value is null but type is not nullable then use default value instead. - res = SerializationNullable::deserializeTextCSVImpl(column, *buf, format_settings, serialization); - } - else - { - /// Read the column normally. - serialization->deserializeTextCSV(column, *buf, format_settings); - res = true; + return false; } - if (is_last_file_column && format_settings.csv.ignore_extra_columns) + if (format_settings.null_as_default && !isNullableOrLowCardinalityNullable(type)) { - // Skip all fields to next line. - skipWhitespacesAndTabs(*buf, format_settings.csv.allow_whitespace_or_tab_as_delimiter); - while (checkChar(format_settings.csv.delimiter, *buf)) - { - skipField(); - skipWhitespacesAndTabs(*buf, format_settings.csv.allow_whitespace_or_tab_as_delimiter); - } + /// If value is null but type is not nullable then use default value instead. + return SerializationNullable::deserializeTextCSVImpl(column, *buf, format_settings, serialization); } - return res; + + /// Read the column normally. + serialization->deserializeTextCSV(column, *buf, format_settings); + return true; } void CSVFormatReader::skipPrefixBeforeHeader() @@ -377,6 +352,11 @@ bool CSVFormatReader::checkForSuffix() return false; } +bool CSVFormatReader::checkForEndOfRow() +{ + return buf->eof() || *buf->position() == '\n' || *buf->position() == '\r'; +} + CSVSchemaReader::CSVSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, const FormatSettings & format_settings_) : FormatWithNamesAndTypesSchemaReader( buf, diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.h b/src/Processors/Formats/Impl/CSVRowInputFormat.h index 82e03c453e7..8ccf04feed3 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.h +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.h @@ -69,6 +69,9 @@ public: void skipRowEndDelimiter() override; void skipPrefixBeforeHeader() override; + bool checkForEndOfRow() override; + bool allowVariableNumberOfColumns() override; + std::vector readNames() override { return readHeaderRow(); } std::vector readTypes() override { return readHeaderRow(); } std::vector readHeaderRow() { return readRowImpl(); } @@ -89,9 +92,6 @@ public: protected: PeekableReadBuffer * buf; - -private: - bool current_row_has_missing_fields = false; }; class CSVSchemaReader : public FormatWithNamesAndTypesSchemaReader diff --git a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp index eaedbbb4a1e..fb49779e0af 100644 --- a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp +++ b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp @@ -227,7 +227,30 @@ bool RowInputFormatWithNamesAndTypes::readRow(MutableColumns & columns, RowReadE format_reader->skipField(file_column); if (!is_last_file_column) + { + if (format_reader->allowVariableNumberOfColumns() && format_reader->checkForEndOfRow()) + { + ++file_column; + while (file_column < column_mapping->column_indexes_for_input_fields.size()) + { + const auto & rem_column_index = column_mapping->column_indexes_for_input_fields[file_column]; + columns[*rem_column_index]->insertDefault(); + ++file_column; + } + } + else + format_reader->skipFieldDelimiter(); + } + } + + if (format_reader->allowVariableNumberOfColumns() && !format_reader->checkForEndOfRow()) + { + do + { format_reader->skipFieldDelimiter(); + format_reader->skipField(1); + } + while (!format_reader->checkForEndOfRow()); } format_reader->skipRowEndDelimiter(); diff --git a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h index 5648acd392d..b5103d3db39 100644 --- a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h +++ b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h @@ -119,6 +119,10 @@ public: /// Check suffix. virtual bool checkForSuffix() { return in->eof(); } + virtual bool checkForEndOfRow() { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method checkForEndOfRow is not implemented"); } + + virtual bool allowVariableNumberOfColumns() { return false; } + const FormatSettings & getFormatSettings() const { return format_settings; } virtual void setReadBuffer(ReadBuffer & in_) { in = &in_; } From d86ceef663cd0d3fcd8532ae63539e85bc4b210b Mon Sep 17 00:00:00 2001 From: Victor Krasnov Date: Thu, 6 Jul 2023 14:14:48 +0800 Subject: [PATCH 276/522] Implement log file names rendering --- src/Loggers/Loggers.cpp | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/src/Loggers/Loggers.cpp b/src/Loggers/Loggers.cpp index 0c3a7bd615d..1e169190ca4 100644 --- a/src/Loggers/Loggers.cpp +++ b/src/Loggers/Loggers.cpp @@ -34,6 +34,16 @@ static std::string createDirectory(const std::string & file) return path; } +static std::string renderFileNameTemplate(time_t now, const std::string & file_path) +{ + fs::path path{file_path}; + std::tm buf; + localtime_r(&now, &buf); + std::stringstream ss; + ss << std::put_time(&buf, file_path.c_str()); + return path.replace_filename(ss.str()); +} + #ifndef WITHOUT_TEXT_LOG void Loggers::setTextLog(std::shared_ptr log, int max_priority) { @@ -68,9 +78,12 @@ void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Log /// The maximum (the most verbose) of those will be used as default for Poco loggers int max_log_level = 0; - const auto log_path = config.getString("logger.log", ""); - if (!log_path.empty()) + time_t now = std::time({}); + + const auto log_path_prop = config.getString("logger.log", ""); + if (!log_path_prop.empty()) { + const auto log_path = renderFileNameTemplate(now, log_path_prop); createDirectory(log_path); std::string ext; @@ -109,9 +122,10 @@ void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Log split->addChannel(log, "log"); } - const auto errorlog_path = config.getString("logger.errorlog", ""); - if (!errorlog_path.empty()) + const auto errorlog_path_prop = config.getString("logger.errorlog", ""); + if (!errorlog_path_prop.empty()) { + const auto errorlog_path = renderFileNameTemplate(now, errorlog_path_prop); createDirectory(errorlog_path); // NOTE: we don't use notice & critical in the code, so in practice error log collects fatal & error & warning. From 479efaa79acd23e72fb06413fd84d4b7091bd019 Mon Sep 17 00:00:00 2001 From: Victor Krasnov Date: Thu, 6 Jul 2023 14:16:18 +0800 Subject: [PATCH 277/522] Add clickhouse_log_file and clickhouse_error_log_file args to add_instance() --- tests/integration/helpers/cluster.py | 35 +++++++++++++++++----------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 21398790be3..5b583b865de 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -64,6 +64,13 @@ DEFAULT_ENV_NAME = ".env" SANITIZER_SIGN = "==================" +CLICKHOUSE_START_COMMAND = ( + "clickhouse server --config-file=/etc/clickhouse-server/{main_config_file}" +) + +CLICKHOUSE_LOG_FILE = "/var/log/clickhouse-server/clickhouse-server.log" + +CLICKHOUSE_ERROR_LOG_FILE = "/var/log/clickhouse-server/clickhouse-server.err.log" # to create docker-compose env file def _create_env_file(path, variables): @@ -1497,6 +1504,8 @@ class ClickHouseCluster: with_postgres=False, with_postgres_cluster=False, with_postgresql_java_client=False, + clickhouse_log_file=CLICKHOUSE_LOG_FILE, + clickhouse_error_log_file=CLICKHOUSE_ERROR_LOG_FILE, with_hdfs=False, with_kerberized_hdfs=False, with_mongo=False, @@ -1563,6 +1572,13 @@ class ClickHouseCluster: "LLVM_PROFILE_FILE" ] = "/var/lib/clickhouse/server_%h_%p_%m.profraw" + clickhouse_start_command = CLICKHOUSE_START_COMMAND + if clickhouse_log_file: + clickhouse_start_command += " --log-file=" + clickhouse_log_file + if clickhouse_error_log_file: + clickhouse_start_command += " --errorlog-file=" + clickhouse_error_log_file + logging.debug(f"clickhouse_start_command: {clickhouse_start_command}") + instance = ClickHouseInstance( cluster=self, base_path=self.base_dir, @@ -1592,10 +1608,10 @@ class ClickHouseCluster: with_redis=with_redis, with_minio=with_minio, with_azurite=with_azurite, - with_cassandra=with_cassandra, with_jdbc_bridge=with_jdbc_bridge, with_hive=with_hive, with_coredns=with_coredns, + with_cassandra=with_cassandra, server_bin_path=self.server_bin_path, odbc_bridge_bin_path=self.odbc_bridge_bin_path, library_bridge_bin_path=self.library_bridge_bin_path, @@ -1604,6 +1620,10 @@ class ClickHouseCluster: with_postgres=with_postgres, with_postgres_cluster=with_postgres_cluster, with_postgresql_java_client=with_postgresql_java_client, + clickhouse_start_command=clickhouse_start_command, + main_config_name=main_config_name, + users_config_name=users_config_name, + copy_common_configs=copy_common_configs, hostname=hostname, env_variables=env_variables, image=image, @@ -1612,9 +1632,6 @@ class ClickHouseCluster: ipv4_address=ipv4_address, ipv6_address=ipv6_address, with_installed_binary=with_installed_binary, - main_config_name=main_config_name, - users_config_name=users_config_name, - copy_common_configs=copy_common_configs, external_dirs=external_dirs, tmpfs=tmpfs or [], config_root_name=config_root_name, @@ -3046,16 +3063,6 @@ class ClickHouseCluster: subprocess_check_call(self.base_zookeeper_cmd + ["start", n]) -CLICKHOUSE_START_COMMAND = ( - "clickhouse server --config-file=/etc/clickhouse-server/{main_config_file}" - " --log-file=/var/log/clickhouse-server/clickhouse-server.log " - " --errorlog-file=/var/log/clickhouse-server/clickhouse-server.err.log" -) - -CLICKHOUSE_STAY_ALIVE_COMMAND = "bash -c \"trap 'pkill tail' INT TERM; {} --daemon; coproc tail -f /dev/null; wait $$!\"".format( - CLICKHOUSE_START_COMMAND -) - DOCKER_COMPOSE_TEMPLATE = """ version: '2.3' services: From fef71ab0b8759f7a659c4bb8c1be03a89df92f79 Mon Sep 17 00:00:00 2001 From: Victor Krasnov Date: Thu, 6 Jul 2023 14:16:46 +0800 Subject: [PATCH 278/522] Add inegration test --- .../__init__.py | 58 +++++++++++++++++++ .../configs/config-file-template.xml | 6 ++ .../test.py | 0 3 files changed, 64 insertions(+) create mode 100644 tests/integration/test_render_log_file_name_templates/__init__.py create mode 100644 tests/integration/test_render_log_file_name_templates/configs/config-file-template.xml create mode 100644 tests/integration/test_render_log_file_name_templates/test.py diff --git a/tests/integration/test_render_log_file_name_templates/__init__.py b/tests/integration/test_render_log_file_name_templates/__init__.py new file mode 100644 index 00000000000..9fa87056d2c --- /dev/null +++ b/tests/integration/test_render_log_file_name_templates/__init__.py @@ -0,0 +1,58 @@ +import pytest +import logging +from helpers.cluster import ClickHouseCluster +from datetime import datetime + + +log_dir = "/var/log/clickhouse-server/" +cluster = ClickHouseCluster(__file__) + + +@pytest.fixture(scope="module") +def started_cluster(): + cluster.add_instance( + "file-names-from-config", + main_configs=["configs/config-file-template.xml"], + clickhouse_log_file=None, + clickhouse_error_log_file=None, + ) + cluster.add_instance( + "file-names-from-params", + clickhouse_log_file=log_dir + "clickhouse-server-%Y-%m.log", + clickhouse_error_log_file=log_dir + "clickhouse-server-%Y-%m.err.log", + ) + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def test_check_file_names(started_cluster): + now = datetime.now() + log_file = ( + log_dir + f"clickhouse-server-{now.strftime('%Y')}-{now.strftime('%m')}.log" + ) + err_log_file = ( + log_dir + f"clickhouse-server-{now.strftime('%Y')}-{now.strftime('%m')}.err.log" + ) + logging.debug(f"log_file {log_file} err_log_file {err_log_file}") + + for name, instance in started_cluster.instances.items(): + files = instance.exec_in_container( + ["bash", "-c", f"ls -lh {log_dir}"], nothrow=True + ) + + logging.debug(f"check instance '{name}': {log_dir} contains: {files}") + + assert ( + instance.exec_in_container(["bash", "-c", f"ls {log_file}"], nothrow=True) + == log_file + "\n" + ) + + assert ( + instance.exec_in_container( + ["bash", "-c", f"ls {err_log_file}"], nothrow=True + ) + == err_log_file + "\n" + ) diff --git a/tests/integration/test_render_log_file_name_templates/configs/config-file-template.xml b/tests/integration/test_render_log_file_name_templates/configs/config-file-template.xml new file mode 100644 index 00000000000..ba408eb9823 --- /dev/null +++ b/tests/integration/test_render_log_file_name_templates/configs/config-file-template.xml @@ -0,0 +1,6 @@ + + + /var/log/clickhouse-server/clickhouse-server-%Y-%m.log + /var/log/clickhouse-server/clickhouse-server-%Y-%m.err.log + + diff --git a/tests/integration/test_render_log_file_name_templates/test.py b/tests/integration/test_render_log_file_name_templates/test.py new file mode 100644 index 00000000000..e69de29bb2d From af0de3d614c41ce24ad631b1e12328efb4dd444b Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 6 Jul 2023 06:17:11 +0000 Subject: [PATCH 279/522] Small fixes --- tests/integration/test_multiple_disks/test.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/tests/integration/test_multiple_disks/test.py b/tests/integration/test_multiple_disks/test.py index 4a934447345..5561d63840b 100644 --- a/tests/integration/test_multiple_disks/test.py +++ b/tests/integration/test_multiple_disks/test.py @@ -711,7 +711,7 @@ def test_background_move(start_cluster, name, engine): s1 String ) ENGINE = {engine} ORDER BY tuple() - SETTINGS storage_policy='moving_jbod_with_external, max_replicated_merges_in_queue=0' + SETTINGS storage_policy='moving_jbod_with_external', max_replicated_merges_in_queue=0 """ ) @@ -735,8 +735,6 @@ def test_background_move(start_cluster, name, engine): assert first_part is not None - used_disks = get_used_disks_for_table(node1, name) - retry = 20 i = 0 # multiple moves can be assigned in parallel so we can move later parts before the oldest @@ -745,9 +743,6 @@ def test_background_move(start_cluster, name, engine): time.sleep(0.5) i += 1 - used_disks = get_used_disks_for_table(node1, name) - assert sum(1 for x in used_disks if x == "jbod1") <= 2 - # first (oldest) part was moved to external assert get_disk_for_part(node1, name, first_part) == "external" @@ -861,9 +856,6 @@ def test_start_stop_moves(start_cluster, name, engine): # first (oldest) part moved to external assert get_disk_for_part(node1, name, first_part) == "external" - used_disks = get_used_disks_for_table(node1, name) - assert sum(1 for x in used_disks if x == "jbod1") <= 2 - node1.query(f"SYSTEM START MERGES {name}") finally: node1.query_with_retry(f"DROP TABLE IF EXISTS {name} SYNC") From 085f7caccffa20717ac6d96e13a5e8baae84db98 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 6 Jul 2023 06:30:47 +0000 Subject: [PATCH 280/522] Move config changes after configure --- docker/test/upgrade/run.sh | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docker/test/upgrade/run.sh b/docker/test/upgrade/run.sh index 82a88272df9..b8061309342 100644 --- a/docker/test/upgrade/run.sh +++ b/docker/test/upgrade/run.sh @@ -67,6 +67,13 @@ start stop mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.initial.log +# Start server from previous release +# Let's enable S3 storage by default +export USE_S3_STORAGE_FOR_MERGE_TREE=1 +# Previous version may not be ready for fault injections +export ZOOKEEPER_FAULT_INJECTION=0 +configure + # force_sync=false doesn't work correctly on some older versions sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \ | sed "s|false|true|" \ @@ -81,13 +88,6 @@ mv /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp /etc/cli sudo chown clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml -# Start server from previous release -# Let's enable S3 storage by default -export USE_S3_STORAGE_FOR_MERGE_TREE=1 -# Previous version may not be ready for fault injections -export ZOOKEEPER_FAULT_INJECTION=0 -configure - # it contains some new settings, but we can safely remove it rm /etc/clickhouse-server/config.d/merge_tree.xml rm /etc/clickhouse-server/users.d/nonconst_timezone.xml From b9fffacc653fb9175af03cbb8f53766b0272ddbc Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Thu, 6 Jul 2023 06:31:09 +0000 Subject: [PATCH 281/522] Fix build --- src/Storages/StorageMongoDB.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Storages/StorageMongoDB.cpp b/src/Storages/StorageMongoDB.cpp index 45b8aceb058..21543541f36 100644 --- a/src/Storages/StorageMongoDB.cpp +++ b/src/Storages/StorageMongoDB.cpp @@ -186,9 +186,9 @@ private: /// MongoDB does not support UInt64 type, so just cast it to Int64 if (which.isNativeUInt()) - document.add(name, static_cast(column.getUInt(idx))); + document.add(name, static_cast(column.getUInt(idx))); else if (which.isNativeInt()) - document.add(name, static_cast(column.getInt(idx))); + document.add(name, static_cast(column.getInt(idx))); else if (which.isFloat32()) document.add(name, static_cast(column.getFloat32(idx))); else if (which.isFloat64()) From 1336a9ec6770ae709f956a019c778346b8475162 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Thu, 6 Jul 2023 09:09:55 +0200 Subject: [PATCH 282/522] Better naming --- .../operations/system-tables/jemalloc_bins.md | 30 +++++++++---------- src/Storages/System/StorageSystemJemalloc.cpp | 4 +-- .../02810_system_jemalloc_bins.sql | 4 +-- 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/docs/en/operations/system-tables/jemalloc_bins.md b/docs/en/operations/system-tables/jemalloc_bins.md index dfe2ddb01e2..06d9ba57dfc 100644 --- a/docs/en/operations/system-tables/jemalloc_bins.md +++ b/docs/en/operations/system-tables/jemalloc_bins.md @@ -11,8 +11,8 @@ Columns: - `index` (UInt64) — Index of the bin ordered by size - `large` (Bool) — True for large allocations and False for small - `size` (UInt64) — Size of allocations in this bin -- `nmalloc` (UInt64) — Number of allocations -- `ndalloc` (UInt64) — Number of deallocations +- `allocations` (UInt64) — Number of allocations +- `deallocations` (UInt64) — Number of deallocations **Example** @@ -21,7 +21,7 @@ Find the sizes of allocations that contributed the most to the current overall m ``` sql SELECT *, - nmalloc - ndalloc AS active_allocations, + allocations - deallocations AS active_allocations, size * active_allocations AS allocated_bytes FROM system.jemalloc_bins WHERE allocated_bytes > 0 @@ -30,16 +30,16 @@ LIMIT 10 ``` ``` text -┌─index─┬─large─┬─────size─┬──nmalloc─┬──ndalloc─┬─active_allocations─┬─allocated_bytes─┐ -│ 82 │ 1 │ 50331648 │ 1 │ 0 │ 1 │ 50331648 │ -│ 10 │ 0 │ 192 │ 512336 │ 370710 │ 141626 │ 27192192 │ -│ 69 │ 1 │ 5242880 │ 6 │ 2 │ 4 │ 20971520 │ -│ 3 │ 0 │ 48 │ 16938224 │ 16559484 │ 378740 │ 18179520 │ -│ 28 │ 0 │ 4096 │ 122924 │ 119142 │ 3782 │ 15491072 │ -│ 61 │ 1 │ 1310720 │ 44569 │ 44558 │ 11 │ 14417920 │ -│ 39 │ 1 │ 28672 │ 1285 │ 913 │ 372 │ 10665984 │ -│ 4 │ 0 │ 64 │ 2837225 │ 2680568 │ 156657 │ 10026048 │ -│ 6 │ 0 │ 96 │ 2617803 │ 2531435 │ 86368 │ 8291328 │ -│ 36 │ 1 │ 16384 │ 22431 │ 21970 │ 461 │ 7553024 │ -└───────┴───────┴──────────┴──────────┴──────────┴────────────────────┴─────────────────┘ +┌─index─┬─large─┬─────size─┬─allocactions─┬─deallocations─┬─active_allocations─┬─allocated_bytes─┐ +│ 82 │ 1 │ 50331648 │ 1 │ 0 │ 1 │ 50331648 │ +│ 10 │ 0 │ 192 │ 512336 │ 370710 │ 141626 │ 27192192 │ +│ 69 │ 1 │ 5242880 │ 6 │ 2 │ 4 │ 20971520 │ +│ 3 │ 0 │ 48 │ 16938224 │ 16559484 │ 378740 │ 18179520 │ +│ 28 │ 0 │ 4096 │ 122924 │ 119142 │ 3782 │ 15491072 │ +│ 61 │ 1 │ 1310720 │ 44569 │ 44558 │ 11 │ 14417920 │ +│ 39 │ 1 │ 28672 │ 1285 │ 913 │ 372 │ 10665984 │ +│ 4 │ 0 │ 64 │ 2837225 │ 2680568 │ 156657 │ 10026048 │ +│ 6 │ 0 │ 96 │ 2617803 │ 2531435 │ 86368 │ 8291328 │ +│ 36 │ 1 │ 16384 │ 22431 │ 21970 │ 461 │ 7553024 │ +└───────┴───────┴──────────┴──────────────┴───────────────┴────────────────────┴─────────────────┘ ``` diff --git a/src/Storages/System/StorageSystemJemalloc.cpp b/src/Storages/System/StorageSystemJemalloc.cpp index 2cb666eb5c3..4348349ebbc 100644 --- a/src/Storages/System/StorageSystemJemalloc.cpp +++ b/src/Storages/System/StorageSystemJemalloc.cpp @@ -95,8 +95,8 @@ NamesAndTypesList StorageSystemJemallocBins::getNamesAndTypes() { "index", std::make_shared() }, { "large", std::make_shared() }, { "size", std::make_shared() }, - { "nmalloc", std::make_shared() }, - { "ndalloc", std::make_shared() }, + { "allocations", std::make_shared() }, + { "deallocations", std::make_shared() }, }; } diff --git a/tests/queries/0_stateless/02810_system_jemalloc_bins.sql b/tests/queries/0_stateless/02810_system_jemalloc_bins.sql index 8ecf47e51b5..03062e70aa3 100644 --- a/tests/queries/0_stateless/02810_system_jemalloc_bins.sql +++ b/tests/queries/0_stateless/02810_system_jemalloc_bins.sql @@ -3,8 +3,8 @@ WITH (SELECT count() FROM system.jemalloc_bins) AS total_bins, (SELECT count() FROM system.jemalloc_bins WHERE large) AS large_bins, (SELECT count() FROM system.jemalloc_bins WHERE NOT large) AS small_bins, - (SELECT sum(size * (nmalloc - ndalloc)) FROM system.jemalloc_bins WHERE large) AS large_allocated_bytes, - (SELECT sum(size * (nmalloc - ndalloc)) FROM system.jemalloc_bins WHERE NOT large) AS small_allocated_bytes + (SELECT sum(size * (allocations - deallocations)) FROM system.jemalloc_bins WHERE large) AS large_allocated_bytes, + (SELECT sum(size * (allocations - deallocations)) FROM system.jemalloc_bins WHERE NOT large) AS small_allocated_bytes SELECT (total_bins > 0) = jemalloc_enabled, (large_bins > 0) = jemalloc_enabled, From 32f5a7830229b53df80f9e788b860066a4a86947 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Thu, 6 Jul 2023 07:32:46 +0000 Subject: [PATCH 283/522] Fix setting name --- docs/en/interfaces/formats.md | 2 +- docs/en/operations/settings/settings-formats.md | 2 +- docs/ru/interfaces/formats.md | 2 +- docs/ru/operations/settings/settings.md | 2 +- src/Core/Settings.h | 2 +- src/Formats/FormatFactory.cpp | 2 +- src/Formats/FormatSettings.h | 2 +- src/Processors/Formats/Impl/CSVRowInputFormat.cpp | 2 +- tests/queries/0_stateless/00301_csv.reference | 4 ++-- tests/queries/0_stateless/00301_csv.sh | 8 ++++---- 10 files changed, 14 insertions(+), 14 deletions(-) diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 34f9abb91d4..ed2f010a632 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -471,7 +471,7 @@ The CSV format supports the output of totals and extremes the same way as `TabSe - [input_format_csv_skip_trailing_empty_lines](/docs/en/operations/settings/settings-formats.md/#input_format_csv_skip_trailing_empty_lines) - skip trailing empty lines at the end of data. Default value - `false`. - [input_format_csv_trim_whitespaces](/docs/en/operations/settings/settings-formats.md/#input_format_csv_trim_whitespaces) - trim spaces and tabs in non-quoted CSV strings. Default value - `true`. - [input_format_csv_allow_whitespace_or_tab_as_delimiter](/docs/en/operations/settings/settings-formats.md/# input_format_csv_allow_whitespace_or_tab_as_delimiter) - Allow to use whitespace or tab as field delimiter in CSV strings. Default value - `false`. -- [input_format_csv_allow_variable_number_of_colums](/docs/en/operations/settings/settings-formats.md/#input_format_csv_allow_variable_number_of_colums) - ignore extra columns in CSV input (if file has more columns than expected) and treat missing fields in CSV input as default values. Default value - `false`. +- [input_format_csv_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_csv_allow_variable_number_of_columns) - ignore extra columns in CSV input (if file has more columns than expected) and treat missing fields in CSV input as default values. Default value - `false`. ## CSVWithNames {#csvwithnames} diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md index 43e410ceee8..3eea5ef4ad9 100644 --- a/docs/en/operations/settings/settings-formats.md +++ b/docs/en/operations/settings/settings-formats.md @@ -931,7 +931,7 @@ Result ```text " string " ``` -### input_format_csv_allow_variable_number_of_colums {#input_format_csv_allow_variable_number_of_colums} +### input_format_csv_allow_variable_number_of_columns {#input_format_csv_allow_variable_number_of_columns} ignore extra columns in CSV input (if file has more columns than expected) and treat missing fields in CSV input as default values. diff --git a/docs/ru/interfaces/formats.md b/docs/ru/interfaces/formats.md index e7c57fff749..e232b63f049 100644 --- a/docs/ru/interfaces/formats.md +++ b/docs/ru/interfaces/formats.md @@ -402,7 +402,7 @@ $ clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO test.csv FOR - [input_format_csv_skip_first_lines](../operations/settings/settings.md#input_format_csv_skip_first_lines) - пропустить указанное количество строк в начале данных. Значение по умолчанию - `0`. - [input_format_csv_detect_header](../operations/settings/settings.md#input_format_csv_detect_header) - обнаружить заголовок с именами и типами в формате CSV. Значение по умолчанию - `true`. - [input_format_csv_trim_whitespaces](../operations/settings/settings.md#input_format_csv_trim_whitespaces) - удалить пробелы и символы табуляции из строк без кавычек. Значение по умолчанию - `true`. -- [input_format_csv_allow_variable_number_of_colums](../operations/settings/settings.md/#input_format_csv_allow_variable_number_of_colums) - игнорировать дополнительные столбцы (если файл содержит больше столбцов чем ожидается) и рассматривать отсутствующие поля в CSV в качестве значений по умолчанию. Значение по умолчанию - `false`. +- [input_format_csv_allow_variable_number_of_columns](../operations/settings/settings.md/#input_format_csv_allow_variable_number_of_columns) - игнорировать дополнительные столбцы (если файл содержит больше столбцов чем ожидается) и рассматривать отсутствующие поля в CSV в качестве значений по умолчанию. Значение по умолчанию - `false`. ## CSVWithNames {#csvwithnames} diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index ddc101c6991..42e21f6140b 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -1727,7 +1727,7 @@ echo ' string ' | ./clickhouse local -q "select * from table FORMAT CSV" --in " string " ``` -## input_format_csv_allow_variable_number_of_colums {#input_format_csv_allow_variable_number_of_colums} +## input_format_csv_allow_variable_number_of_columns {#input_format_csv_allow_variable_number_of_columns} Игнорировать дополнительные столбцы (если файл содержит больше столбцов чем ожидается) и рассматривать отсутствующие поля в CSV в качестве значений по умолчанию. diff --git a/src/Core/Settings.h b/src/Core/Settings.h index df2a916b7cf..7f8a52c69fa 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -1009,7 +1009,7 @@ class IColumn; M(Bool, regexp_dict_allow_hyperscan, true, "Allow regexp_tree dictionary using Hyperscan library.", 0) \ \ M(Bool, dictionary_use_async_executor, false, "Execute a pipeline for reading from a dictionary with several threads. It's supported only by DIRECT dictionary with CLICKHOUSE source.", 0) \ - M(Bool, input_format_csv_allow_variable_number_of_colums, false, "Ignore extra columns in CSV input (if file has more columns than expected) and treat missing fields in CSV input as default values", 0) \ + M(Bool, input_format_csv_allow_variable_number_of_columns, false, "Ignore extra columns in CSV input (if file has more columns than expected) and treat missing fields in CSV input as default values", 0) \ // End of FORMAT_FACTORY_SETTINGS // Please add settings non-related to formats into the COMMON_SETTINGS above. diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index af9823dde73..182abc84ffe 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -72,7 +72,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.csv.skip_trailing_empty_lines = settings.input_format_csv_skip_trailing_empty_lines; format_settings.csv.trim_whitespaces = settings.input_format_csv_trim_whitespaces; format_settings.csv.allow_whitespace_or_tab_as_delimiter = settings.input_format_csv_allow_whitespace_or_tab_as_delimiter; - format_settings.csv.allow_variable_number_of_colums = settings.input_format_csv_allow_variable_number_of_colums; + format_settings.csv.allow_variable_number_of_columns = settings.input_format_csv_allow_variable_number_of_columns; format_settings.hive_text.fields_delimiter = settings.input_format_hive_text_fields_delimiter; format_settings.hive_text.collection_items_delimiter = settings.input_format_hive_text_collection_items_delimiter; format_settings.hive_text.map_keys_delimiter = settings.input_format_hive_text_map_keys_delimiter; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 653578f8496..dd4608227d0 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -140,7 +140,7 @@ struct FormatSettings bool skip_trailing_empty_lines = false; bool trim_whitespaces = true; bool allow_whitespace_or_tab_as_delimiter = false; - bool allow_variable_number_of_colums = false; + bool allow_variable_number_of_columns = false; } csv; struct HiveText diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp index 57e05ae7cd3..60f1cbe1f80 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp @@ -285,7 +285,7 @@ bool CSVFormatReader::parseRowEndWithDiagnosticInfo(WriteBuffer & out) bool CSVFormatReader::allowVariableNumberOfColumns() { - return format_settings.csv.allow_variable_number_of_colums; + return format_settings.csv.allow_variable_number_of_columns; } bool CSVFormatReader::readField( diff --git a/tests/queries/0_stateless/00301_csv.reference b/tests/queries/0_stateless/00301_csv.reference index 804ccf0c713..ec8c5f2b371 100644 --- a/tests/queries/0_stateless/00301_csv.reference +++ b/tests/queries/0_stateless/00301_csv.reference @@ -14,14 +14,14 @@ default-eof 1 2019-06-19 2016-01-01 01:02:03 NUL 2016-01-02 01:02:03 Nhello \N \N -=== Test input_format_csv_ignore_extra_columns +=== Test ignore extra columns Hello 1 String1 Hello 2 String2 Hello 3 String3 Hello 4 String4 Hello 5 String5 Hello 6 String6 -=== Test input_format_csv_missing_as_default +=== Test missing as default 0 0 33 \N 55 Default 0 0 33 \N 55 Default Hello 0 0 33 \N 55 Default diff --git a/tests/queries/0_stateless/00301_csv.sh b/tests/queries/0_stateless/00301_csv.sh index 7657745e9f7..776bd39fc03 100755 --- a/tests/queries/0_stateless/00301_csv.sh +++ b/tests/queries/0_stateless/00301_csv.sh @@ -41,7 +41,7 @@ $CLICKHOUSE_CLIENT --query="SELECT * FROM csv ORDER BY s NULLS LAST"; $CLICKHOUSE_CLIENT --query="DROP TABLE csv"; -echo === Test input_format_csv_ignore_extra_columns +echo === Test ignore extra columns $CLICKHOUSE_CLIENT --query="CREATE TABLE csv (s String, n UInt64 DEFAULT 3, d String DEFAULT 'String4') ENGINE = Memory"; echo '"Hello", 1, "String1" @@ -50,12 +50,12 @@ echo '"Hello", 1, "String1" "Hello", 4, , "2016-01-14" "Hello", 5, "String5", "2016-01-15", "2016-01-16" "Hello", 6, "String6" , "line with a -break"' | $CLICKHOUSE_CLIENT --input_format_defaults_for_omitted_fields=1 --input_format_csv_empty_as_default=1 --input_format_csv_ignore_extra_columns=1 --query="INSERT INTO csv FORMAT CSV"; +break"' | $CLICKHOUSE_CLIENT --input_format_defaults_for_omitted_fields=1 --input_format_csv_empty_as_default=1 --input_format_csv_allow_variable_number_of_columns=1 --query="INSERT INTO csv FORMAT CSV"; $CLICKHOUSE_CLIENT --query="SELECT * FROM csv ORDER BY s, n"; $CLICKHOUSE_CLIENT --query="DROP TABLE csv"; -echo === Test input_format_csv_missing_as_default +echo === Test missing as default $CLICKHOUSE_CLIENT --query="CREATE TABLE csv (f1 String, f2 UInt64, f3 UInt256, f4 UInt64 Default 33, f5 Nullable(UInt64), f6 Nullable(UInt64) Default 55, f7 String DEFAULT 'Default') ENGINE = Memory"; echo ' @@ -65,6 +65,6 @@ echo ' "Hello", 1, 3, 2 "Hello",1,4,2,3,4,"String" "Hello", 1, 4, 2, 3, 4, "String" -"Hello", 1, 5, 2, 3, 4, "String",'| $CLICKHOUSE_CLIENT --input_format_defaults_for_omitted_fields=1 --input_format_csv_missing_as_default=1 --query="INSERT INTO csv FORMAT CSV"; +"Hello", 1, 5, 2, 3, 4, "String",'| $CLICKHOUSE_CLIENT --input_format_defaults_for_omitted_fields=1 --input_format_csv_allow_variable_number_of_columns=1 --query="INSERT INTO csv FORMAT CSV"; $CLICKHOUSE_CLIENT --query="SELECT * FROM csv ORDER BY f1, f2, f3, f4, f5 NULLS FIRST, f6, f7"; $CLICKHOUSE_CLIENT --query="DROP TABLE csv"; From 24e77083b38fbfdbec0d5a6fa8da65cb6a33a602 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Thu, 6 Jul 2023 09:50:44 +0000 Subject: [PATCH 284/522] Commit tests --- src/Parsers/ParserCreateQuery.cpp | 3 +- .../02811_primary_key_in_columns.reference | 0 .../02811_primary_key_in_columns.sql | 50 +++++++++---------- 3 files changed, 27 insertions(+), 26 deletions(-) create mode 100644 tests/queries/0_stateless/02811_primary_key_in_columns.reference diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index 1941bafab0d..60e15cb92f4 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -311,7 +311,7 @@ bool ParserTablePropertiesDeclarationList::parseImpl(Pos & pos, ASTPtr & node, E if(!primary_key_from_columns) primary_key_from_columns = makeASTFunction("tuple"); auto column_identifier = std::make_shared(cd->name); - primary_key_from_columns->children.push_back(column_identifier); + primary_key_from_columns->children[0]->as()->children.push_back(column_identifier); } columns->children.push_back(elem); } @@ -710,6 +710,7 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe throw Exception(ErrorCodes::BAD_ARGUMENTS, "Multiple primary keys are not allowed."); query->storage->primary_key = query->columns_list->primary_key; + } if (query->columns_list && (query->columns_list->primary_key_from_columns)) diff --git a/tests/queries/0_stateless/02811_primary_key_in_columns.reference b/tests/queries/0_stateless/02811_primary_key_in_columns.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02811_primary_key_in_columns.sql b/tests/queries/0_stateless/02811_primary_key_in_columns.sql index df25fdd14ab..0519f4c820b 100644 --- a/tests/queries/0_stateless/02811_primary_key_in_columns.sql +++ b/tests/queries/0_stateless/02811_primary_key_in_columns.sql @@ -23,39 +23,39 @@ DROP TABLE IF EXISTS pk_test21; DROP TABLE IF EXISTS pk_test22; DROP TABLE IF EXISTS pk_test23; -SET default_table_engine=MergeTree; +SET default_table_engine='MergeTree'; -CREATE TABLE pk_test1 (String a PRIMARY KEY, String b, String c); -CREATE TABLE pk_test2 (String a PRIMARY KEY, String b PRIMARY KEY, String c); -CREATE TABLE pk_test3 (String a PRIMARY KEY, String b PRIMARY KEY, String c PRIMARY KEY); +CREATE TABLE pk_test1 (a String PRIMARY KEY, b String, c String); +CREATE TABLE pk_test2 (a String PRIMARY KEY, b String PRIMARY KEY, c String); +CREATE TABLE pk_test3 (a String PRIMARY KEY, b String PRIMARY KEY, c String PRIMARY KEY); -CREATE TABLE pk_test4 (String a, String b PRIMARY KEY, String c PRIMARY KEY); -CREATE TABLE pk_test5 (String a, String b PRIMARY KEY, String c); -CREATE TABLE pk_test6 (String a, String b, String c PRIMARY KEY); +CREATE TABLE pk_test4 (a String, b String PRIMARY KEY, c String PRIMARY KEY); +CREATE TABLE pk_test5 (a String, b String PRIMARY KEY, c String); +CREATE TABLE pk_test6 (a String, b String, c String PRIMARY KEY); -CREATE TABLE pk_test7 (String a PRIMARY KEY, String b, String c, PRIMARY KEY (a)); -CREATE TABLE pk_test8 (String a PRIMARY KEY, String b PRIMARY KEY, String c, PRIMARY KEY (a)); -CREATE TABLE pk_test9 (String a PRIMARY KEY, String b PRIMARY KEY, String c PRIMARY KEY, PRIMARY KEY (a)); +CREATE TABLE pk_test7 (a String PRIMARY KEY, b String, c String, PRIMARY KEY (a)); -- { clientError BAD_ARGUMENTS } +CREATE TABLE pk_test8 (a String PRIMARY KEY, b String PRIMARY KEY, c String, PRIMARY KEY (a)); -- { clientError BAD_ARGUMENTS } +CREATE TABLE pk_test9 (a String PRIMARY KEY, b String PRIMARY KEY, c String PRIMARY KEY, PRIMARY KEY (a)); -- { clientError BAD_ARGUMENTS } -CREATE TABLE pk_test10 (String a, String b PRIMARY KEY, String c PRIMARY KEY, PRIMARY KEY (a)); -CREATE TABLE pk_test11 (String a, String b PRIMARY KEY, String c, PRIMARY KEY (a)); -CREATE TABLE pk_test12 (String a, String b, String c PRIMARY KEY, PRIMARY KEY (a)); +CREATE TABLE pk_test10 (a String, b String PRIMARY KEY, c String PRIMARY KEY, PRIMARY KEY (a)); -- { clientError BAD_ARGUMENTS } +CREATE TABLE pk_test11 (a String, b String PRIMARY KEY, c String, PRIMARY KEY (a)); -- { clientError BAD_ARGUMENTS } +CREATE TABLE pk_test12 (a String, b String, c String PRIMARY KEY, PRIMARY KEY (a)); -- { clientError BAD_ARGUMENTS } -CREATE TABLE pk_test12 (String a PRIMARY KEY, String b, String c) PRIMARY KEY (a,b,c); -CREATE TABLE pk_test13 (String a PRIMARY KEY, String b PRIMARY KEY, String c) PRIMARY KEY (a,b,c); -CREATE TABLE pk_test14 (String a PRIMARY KEY, String b PRIMARY KEY, String c PRIMARY KEY) PRIMARY KEY (a,b,c); +CREATE TABLE pk_test12 (a String PRIMARY KEY, b String, c String) PRIMARY KEY (a,b,c); -- { clientError BAD_ARGUMENTS } +CREATE TABLE pk_test13 (a String PRIMARY KEY, b String PRIMARY KEY, c String) PRIMARY KEY (a,b,c); -- { clientError BAD_ARGUMENTS } +CREATE TABLE pk_test14 (a String PRIMARY KEY, b String PRIMARY KEY, c String PRIMARY KEY) PRIMARY KEY (a,b,c); -- { clientError BAD_ARGUMENTS } -CREATE TABLE pk_test15 (String a, String b PRIMARY KEY, String c PRIMARY KEY) PRIMARY KEY (a,b,c); -CREATE TABLE pk_test16 (String a, String b PRIMARY KEY, String c) PRIMARY KEY (a,b,c); -CREATE TABLE pk_test17 (String a, String b, String c PRIMARY KEY) PRIMARY KEY (a,b,c); +CREATE TABLE pk_test15 (a String, b String PRIMARY KEY, c String PRIMARY KEY) PRIMARY KEY (a,b,c); -- { clientError BAD_ARGUMENTS } +CREATE TABLE pk_test16 (a String, b String PRIMARY KEY, c String) PRIMARY KEY (a,b,c); -- { clientError BAD_ARGUMENTS } +CREATE TABLE pk_test17 (a String, b String, c String PRIMARY KEY) PRIMARY KEY (a,b,c); -- { clientError BAD_ARGUMENTS } -CREATE TABLE pk_test18 (String a PRIMARY KEY, String b, String c) ORDER BY (a,b,c); -CREATE TABLE pk_test19 (String a PRIMARY KEY, String b PRIMARY KEY, String c) ORDER BY (a,b,c); -CREATE TABLE pk_test20 (String a PRIMARY KEY, String b PRIMARY KEY, String c PRIMARY KEY) ORDER BY (a,b,c); +CREATE TABLE pk_test18 (a String PRIMARY KEY, b String, c String) ORDER BY (a,b,c); +CREATE TABLE pk_test19 (a String PRIMARY KEY, b String PRIMARY KEY, c String) ORDER BY (a,b,c); +CREATE TABLE pk_test20 (a String PRIMARY KEY, b String PRIMARY KEY, c String PRIMARY KEY) ORDER BY (a,b,c); -CREATE TABLE pk_test21 (String a, String b PRIMARY KEY, String c PRIMARY KEY) ORDER BY (a,b,c); -CREATE TABLE pk_test22 (String a, String b PRIMARY KEY, String c) ORDER BY (a,b,c); -CREATE TABLE pk_test23 (String a, String b, String c PRIMARY KEY) ORDER BY (a,b,c); +CREATE TABLE pk_test21 (a String, b String PRIMARY KEY, c String PRIMARY KEY) ORDER BY (a,b,c); -- { serverError BAD_ARGUMENTS } +CREATE TABLE pk_test22 (a String, b String PRIMARY KEY, c String) ORDER BY (a,b,c); -- { serverError BAD_ARGUMENTS } +CREATE TABLE pk_test23 (a String, b String, c String PRIMARY KEY) ORDER BY (a,b,c); -- { serverError BAD_ARGUMENTS } DROP TABLE IF EXISTS pk_test1; DROP TABLE IF EXISTS pk_test2; From e80f2a0acc91d9003880f4054f05b3e23b9a2679 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Thu, 6 Jul 2023 09:55:30 +0000 Subject: [PATCH 285/522] Define default_table_engine in sqllogictest --- tests/sqllogic/connection.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/sqllogic/connection.py b/tests/sqllogic/connection.py index a9976a7beca..5e2634787d8 100644 --- a/tests/sqllogic/connection.py +++ b/tests/sqllogic/connection.py @@ -62,6 +62,7 @@ def default_clickhouse_odbc_conn_str(): return str( OdbcConnectingArgs.create_from_kw( dsn="ClickHouse DSN (ANSI)", + Url="http://localhost:8123/query?default_format=ODBCDriver2&default_table_engine=MergeTree" ) ) From 12ebb30781e8427a1e797464c3bd4675787c87e9 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Thu, 6 Jul 2023 10:34:34 +0000 Subject: [PATCH 286/522] style --- src/Parsers/ParserCreateQuery.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index 60e15cb92f4..c4c02ab7417 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -306,9 +306,9 @@ bool ParserTablePropertiesDeclarationList::parseImpl(Pos & pos, ASTPtr & node, E { if (auto *cd = elem->as()) { - if(cd->primary_key_specifier) + if (cd->primary_key_specifier) { - if(!primary_key_from_columns) + if (!primary_key_from_columns) primary_key_from_columns = makeASTFunction("tuple"); auto column_identifier = std::make_shared(cd->name); primary_key_from_columns->children[0]->as()->children.push_back(column_identifier); From c7ccf23a24a7fb2bb1245b76fc9169649cd474c3 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Thu, 6 Jul 2023 10:44:06 +0000 Subject: [PATCH 287/522] Update CREATE TABLE docs --- .../mergetree-family/mergetree.md | 50 +++++++++---------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 67043ef1062..4f506126682 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -37,8 +37,8 @@ The [Merge](/docs/en/engines/table-engines/special/merge.md/#merge) engine does ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] ( - name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1] [TTL expr1], - name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2] [TTL expr2], + name1 [type1] [DEFAULT|MATERIALIZED|ALIAS|EPHEMERAL expr1] [TTL expr1] [CODEC(codec1)] [[NOT] NULL|PRIMARY KEY], + name2 [type2] [DEFAULT|MATERIALIZED|ALIAS|EPHEMERAL expr2] [TTL expr2] [CODEC(codec2)] [[NOT] NULL|PRIMARY KEY], ... INDEX index_name1 expr1 TYPE type1(...) [GRANULARITY value1], INDEX index_name2 expr2 TYPE type2(...) [GRANULARITY value2], @@ -439,41 +439,41 @@ Syntax: `ngrambf_v1(n, size_of_bloom_filter_in_bytes, number_of_hash_functions, - `number_of_hash_functions` — The number of hash functions used in the Bloom filter. - `random_seed` — The seed for Bloom filter hash functions. -Users can create [UDF](/docs/en/sql-reference/statements/create/function.md) to estimate the parameters set of `ngrambf_v1`. Query statements are as follows: +Users can create [UDF](/docs/en/sql-reference/statements/create/function.md) to estimate the parameters set of `ngrambf_v1`. Query statements are as follows: ```sql -CREATE FUNCTION bfEstimateFunctions [ON CLUSTER cluster] -AS -(total_nubmer_of_all_grams, size_of_bloom_filter_in_bits) -> round((size_of_bloom_filter_in_bits / total_nubmer_of_all_grams) * log(2)); - -CREATE FUNCTION bfEstimateBmSize [ON CLUSTER cluster] -AS -(total_nubmer_of_all_grams, probability_of_false_positives) -> ceil((total_nubmer_of_all_grams * log(probability_of_false_positives)) / log(1 / pow(2, log(2)))); - -CREATE FUNCTION bfEstimateFalsePositive [ON CLUSTER cluster] -AS -(total_nubmer_of_all_grams, number_of_hash_functions, size_of_bloom_filter_in_bytes) -> pow(1 - exp(-number_of_hash_functions/ (size_of_bloom_filter_in_bytes / total_nubmer_of_all_grams)), number_of_hash_functions); - -CREATE FUNCTION bfEstimateGramNumber [ON CLUSTER cluster] -AS +CREATE FUNCTION bfEstimateFunctions [ON CLUSTER cluster] +AS +(total_nubmer_of_all_grams, size_of_bloom_filter_in_bits) -> round((size_of_bloom_filter_in_bits / total_nubmer_of_all_grams) * log(2)); + +CREATE FUNCTION bfEstimateBmSize [ON CLUSTER cluster] +AS +(total_nubmer_of_all_grams, probability_of_false_positives) -> ceil((total_nubmer_of_all_grams * log(probability_of_false_positives)) / log(1 / pow(2, log(2)))); + +CREATE FUNCTION bfEstimateFalsePositive [ON CLUSTER cluster] +AS +(total_nubmer_of_all_grams, number_of_hash_functions, size_of_bloom_filter_in_bytes) -> pow(1 - exp(-number_of_hash_functions/ (size_of_bloom_filter_in_bytes / total_nubmer_of_all_grams)), number_of_hash_functions); + +CREATE FUNCTION bfEstimateGramNumber [ON CLUSTER cluster] +AS (number_of_hash_functions, probability_of_false_positives, size_of_bloom_filter_in_bytes) -> ceil(size_of_bloom_filter_in_bytes / (-number_of_hash_functions / log(1 - exp(log(probability_of_false_positives) / number_of_hash_functions)))) -``` +``` To use those functions,we need to specify two parameter at least. -For example, if there 4300 ngrams in the granule and we expect false positives to be less than 0.0001. The other parameters can be estimated by executing following queries: - +For example, if there 4300 ngrams in the granule and we expect false positives to be less than 0.0001. The other parameters can be estimated by executing following queries: + ```sql --- estimate number of bits in the filter -SELECT bfEstimateBmSize(4300, 0.0001) / 8 as size_of_bloom_filter_in_bytes; +SELECT bfEstimateBmSize(4300, 0.0001) / 8 as size_of_bloom_filter_in_bytes; ┌─size_of_bloom_filter_in_bytes─┐ │ 10304 │ └───────────────────────────────┘ - + --- estimate number of hash functions SELECT bfEstimateFunctions(4300, bfEstimateBmSize(4300, 0.0001)) as number_of_hash_functions - + ┌─number_of_hash_functions─┐ │ 13 │ └──────────────────────────┘ @@ -991,7 +991,7 @@ use a local disk to cache data from a table stored at a URL. Neither the cache d nor the web storage is configured in the ClickHouse configuration files; both are configured in the CREATE/ATTACH query settings. -In the settings highlighted below notice that the disk of `type=web` is nested within +In the settings highlighted below notice that the disk of `type=web` is nested within the disk of `type=cache`. ```sql @@ -1308,7 +1308,7 @@ configuration file. In this sample configuration: - the disk is of type `web` - the data is hosted at `http://nginx:80/test1/` -- a cache on local storage is used +- a cache on local storage is used ```xml From 86fc70223693db8aac9edfa7c85e7e80286042ec Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Thu, 6 Jul 2023 15:14:18 +0300 Subject: [PATCH 288/522] Add skipWhitespacesAndTabs() Co-authored-by: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> --- src/Processors/Formats/Impl/CSVRowInputFormat.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp index 60f1cbe1f80..79ce2549b4d 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp @@ -354,6 +354,7 @@ bool CSVFormatReader::checkForSuffix() bool CSVFormatReader::checkForEndOfRow() { + skipWhitespacesAndTabs(*buf, format_settings.csv.allow_whitespace_or_tab_as_delimiter); return buf->eof() || *buf->position() == '\n' || *buf->position() == '\r'; } From c23e29d6aa836980337683800c6c2b029cfb7c40 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Tue, 20 Jun 2023 20:27:56 +0200 Subject: [PATCH 289/522] don't account session's memory in thread/user mem tracker --- src/Common/MemoryTrackerSwitcher.h | 42 ++++++++++++++++++++ src/IO/HTTPCommon.cpp | 4 ++ src/Interpreters/AsynchronousInsertQueue.cpp | 2 +- src/Interpreters/AsynchronousInsertQueue.h | 35 +++------------- src/Server/InterserverIOHTTPHandler.cpp | 1 + 5 files changed, 54 insertions(+), 30 deletions(-) create mode 100644 src/Common/MemoryTrackerSwitcher.h diff --git a/src/Common/MemoryTrackerSwitcher.h b/src/Common/MemoryTrackerSwitcher.h new file mode 100644 index 00000000000..0fefcbb280a --- /dev/null +++ b/src/Common/MemoryTrackerSwitcher.h @@ -0,0 +1,42 @@ +#pragma once + +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +struct MemoryTrackerSwitcher +{ + explicit MemoryTrackerSwitcher(MemoryTracker * new_tracker) + { + if (!current_thread) + throw Exception(ErrorCodes::LOGICAL_ERROR, "current_thread is not initialized"); + + auto * thread_tracker = CurrentThread::getMemoryTracker(); + prev_untracked_memory = current_thread->untracked_memory; + prev_memory_tracker_parent = thread_tracker->getParent(); + + current_thread->untracked_memory = 0; + thread_tracker->setParent(new_tracker); + } + + ~MemoryTrackerSwitcher() + { + CurrentThread::flushUntrackedMemory(); + auto * thread_tracker = CurrentThread::getMemoryTracker(); + + current_thread->untracked_memory = prev_untracked_memory; + thread_tracker->setParent(prev_memory_tracker_parent); + } + + MemoryTracker * prev_memory_tracker_parent = nullptr; + Int64 prev_untracked_memory = 0; +}; + +} diff --git a/src/IO/HTTPCommon.cpp b/src/IO/HTTPCommon.cpp index f3e2064c8bf..1731b4022ea 100644 --- a/src/IO/HTTPCommon.cpp +++ b/src/IO/HTTPCommon.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -107,6 +108,9 @@ namespace ObjectPtr allocObject() override { + /// Pool is global, we shouldn't attribute this memory to query/user. + MemoryTrackerSwitcher switcher{&total_memory_tracker}; + auto session = makeHTTPSessionImpl(host, port, https, true, resolve_host); if (!proxy_host.empty()) { diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index dc2310cfebf..e6417de53b4 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -125,7 +125,7 @@ void AsynchronousInsertQueue::InsertData::Entry::finish(std::exception_ptr excep // Entries data must be destroyed in context of user who runs async insert. // Each entry in the list may correspond to a different user, // so we need to switch current thread's MemoryTracker. - UserMemoryTrackerSwitcher switcher(user_memory_tracker); + MemoryTrackerSwitcher switcher(user_memory_tracker); bytes = ""; } diff --git a/src/Interpreters/AsynchronousInsertQueue.h b/src/Interpreters/AsynchronousInsertQueue.h index bc60c86d067..f18db69a7bb 100644 --- a/src/Interpreters/AsynchronousInsertQueue.h +++ b/src/Interpreters/AsynchronousInsertQueue.h @@ -1,10 +1,12 @@ #pragma once -#include -#include -#include #include +#include #include +#include +#include +#include + #include namespace DB @@ -60,31 +62,6 @@ private: UInt128 calculateHash() const; }; - struct UserMemoryTrackerSwitcher - { - explicit UserMemoryTrackerSwitcher(MemoryTracker * new_tracker) - { - auto * thread_tracker = CurrentThread::getMemoryTracker(); - prev_untracked_memory = current_thread->untracked_memory; - prev_memory_tracker_parent = thread_tracker->getParent(); - - current_thread->untracked_memory = 0; - thread_tracker->setParent(new_tracker); - } - - ~UserMemoryTrackerSwitcher() - { - CurrentThread::flushUntrackedMemory(); - auto * thread_tracker = CurrentThread::getMemoryTracker(); - - current_thread->untracked_memory = prev_untracked_memory; - thread_tracker->setParent(prev_memory_tracker_parent); - } - - MemoryTracker * prev_memory_tracker_parent; - Int64 prev_untracked_memory; - }; - struct InsertData { struct Entry @@ -114,7 +91,7 @@ private: // so we need to switch current thread's MemoryTracker parent on each iteration. while (it != entries.end()) { - UserMemoryTrackerSwitcher switcher((*it)->user_memory_tracker); + MemoryTrackerSwitcher switcher((*it)->user_memory_tracker); it = entries.erase(it); } } diff --git a/src/Server/InterserverIOHTTPHandler.cpp b/src/Server/InterserverIOHTTPHandler.cpp index ea71d954cc0..9741592868a 100644 --- a/src/Server/InterserverIOHTTPHandler.cpp +++ b/src/Server/InterserverIOHTTPHandler.cpp @@ -80,6 +80,7 @@ void InterserverIOHTTPHandler::processQuery(HTTPServerRequest & request, HTTPSer void InterserverIOHTTPHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) { setThreadName("IntersrvHandler"); + ThreadStatus thread_status; /// In order to work keep-alive. if (request.getVersion() == HTTPServerRequest::HTTP_1_1) From aec720563612e3d7faa09bcb2c4b2cc4e5e8935c Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Mon, 3 Jul 2023 23:11:32 +0200 Subject: [PATCH 290/522] rework pool usage --- src/IO/HTTPCommon.cpp | 44 ++++++++----- src/IO/HTTPCommon.h | 12 ++++ src/IO/ReadBufferFromS3.cpp | 29 ++++++--- src/IO/ReadWriteBufferFromHTTP.cpp | 65 ++++++++----------- .../Formats/Impl/AvroRowInputFormat.cpp | 22 +++---- 5 files changed, 95 insertions(+), 77 deletions(-) diff --git a/src/IO/HTTPCommon.cpp b/src/IO/HTTPCommon.cpp index 1731b4022ea..2f5e0a172a0 100644 --- a/src/IO/HTTPCommon.cpp +++ b/src/IO/HTTPCommon.cpp @@ -1,6 +1,7 @@ #include #include +#include #include #include #include @@ -41,6 +42,7 @@ namespace ErrorCodes extern const int RECEIVED_ERROR_TOO_MANY_REQUESTS; extern const int FEATURE_IS_NOT_ENABLED_AT_BUILD_TIME; extern const int UNSUPPORTED_URI_SCHEME; + extern const int LOGICAL_ERROR; } @@ -271,27 +273,17 @@ namespace auto retry_timeout = timeouts.connection_timeout.totalMicroseconds(); auto session = pool_ptr->second->get(retry_timeout); - /// We store exception messages in session data. - /// Poco HTTPSession also stores exception, but it can be removed at any time. const auto & session_data = session->sessionData(); - if (!session_data.empty()) + if (session_data.empty() || !Poco::AnyCast(&session_data)) { - auto msg = Poco::AnyCast(session_data); - if (!msg.empty()) - { - LOG_TRACE((&Poco::Logger::get("HTTPCommon")), "Failed communicating with {} with error '{}' will try to reconnect session", host, msg); + session->reset(); - if (resolve_host) - { - updateHostIfIpChanged(session, DNSResolver::instance().resolveHost(host).toString()); - } - } - /// Reset the message, once it has been printed, - /// otherwise you will get report for failed parts on and on, - /// even for different tables (since they uses the same session). - session->attachSessionData({}); + if (resolve_host) + updateHostIfIpChanged(session, DNSResolver::instance().resolveHost(host).toString()); } + session->attachSessionData({}); + setTimeouts(*session, timeouts); return session; @@ -388,4 +380,24 @@ Exception HTTPException::makeExceptionMessage( uri, static_cast(http_status), reason, body); } +void markSessionForReuse(Poco::Net::HTTPSession & session) +{ + const auto & session_data = session.sessionData(); + if (!session_data.empty() && !Poco::AnyCast(&session_data)) + throw Exception( + ErrorCodes::LOGICAL_ERROR, "Data of an unexpected type ({}) is attached to the session", session_data.type().name()); + + session.attachSessionData(HTTPSessionReuseTag{}); +} + +void markSessionForReuse(HTTPSessionPtr session) +{ + markSessionForReuse(*session); +} + +void markSessionForReuse(PooledHTTPSessionPtr session) +{ + markSessionForReuse(static_cast(*session)); +} + } diff --git a/src/IO/HTTPCommon.h b/src/IO/HTTPCommon.h index db8fc2a2a40..4733f366c8a 100644 --- a/src/IO/HTTPCommon.h +++ b/src/IO/HTTPCommon.h @@ -55,6 +55,18 @@ private: using PooledHTTPSessionPtr = PoolBase::Entry; // SingleEndpointHTTPSessionPool::Entry using HTTPSessionPtr = std::shared_ptr; +/// If a session have this tag attached, it will be reused without calling `reset()` on it. +/// All pooled sessions don't have this tag attached after being taken from a pool. +/// If the request and the response were fully written/read, the client code should add this tag +/// explicitly by calling `markSessionForReuse()`. +struct HTTPSessionReuseTag +{ +}; + +void markSessionForReuse(HTTPSessionPtr session); +void markSessionForReuse(PooledHTTPSessionPtr session); + + void setResponseDefaultHeaders(HTTPServerResponse & response, size_t keep_alive_timeout); /// Create session object to perform requests and set required parameters. diff --git a/src/IO/ReadBufferFromS3.cpp b/src/IO/ReadBufferFromS3.cpp index fdbe1a4ba57..5c562d32fbc 100644 --- a/src/IO/ReadBufferFromS3.cpp +++ b/src/IO/ReadBufferFromS3.cpp @@ -1,3 +1,4 @@ +#include #include #include "config.h" @@ -35,31 +36,41 @@ namespace ProfileEvents namespace { -void resetSession(Aws::S3::Model::GetObjectResult & read_result) +DB::PooledHTTPSessionPtr getSession(Aws::S3::Model::GetObjectResult & read_result) { if (auto * session_aware_stream = dynamic_cast *>(&read_result.GetBody())) - { - auto & session - = static_cast(*static_cast(session_aware_stream->getSession())); - session.reset(); - } + return static_cast(session_aware_stream->getSession()); else if (!dynamic_cast *>(&read_result.GetBody())) - { throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Session of unexpected type encountered"); + return {}; +} + +void resetSession(Aws::S3::Model::GetObjectResult & read_result) +{ + if (auto session = getSession(read_result); !session.isNull()) + { + auto & http_session = static_cast(*session); + http_session.reset(); } } void resetSessionIfNeeded(bool read_all_range_successfully, std::optional & read_result) { - if (!read_all_range_successfully && read_result) + if (!read_result) + return; + + if (!read_all_range_successfully) { /// When we abandon a session with an ongoing GetObject request and there is another one trying to delete the same object this delete /// operation will hang until GetObject's session idle timeouts. So we have to call `reset()` on GetObject's session session immediately. resetSession(*read_result); ProfileEvents::increment(ProfileEvents::ReadBufferFromS3ResetSessions); } - else + else if (auto session = getSession(*read_result); !session.isNull()) + { + DB::markSessionForReuse(session); ProfileEvents::increment(ProfileEvents::ReadBufferFromS3PreservedSessions); + } } } diff --git a/src/IO/ReadWriteBufferFromHTTP.cpp b/src/IO/ReadWriteBufferFromHTTP.cpp index cf1159bfb4b..b834c17ab6c 100644 --- a/src/IO/ReadWriteBufferFromHTTP.cpp +++ b/src/IO/ReadWriteBufferFromHTTP.cpp @@ -1,5 +1,7 @@ #include "ReadWriteBufferFromHTTP.h" +#include + namespace ProfileEvents { extern const Event ReadBufferSeekCancelConnection; @@ -146,30 +148,20 @@ std::istream * ReadWriteBufferFromHTTPBase::callImpl( LOG_TRACE(log, "Sending request to {}", uri_.toString()); auto sess = current_session->getSession(); - try - { - auto & stream_out = sess->sendRequest(request); + auto & stream_out = sess->sendRequest(request); - if (out_stream_callback) - out_stream_callback(stream_out); + if (out_stream_callback) + out_stream_callback(stream_out); - auto result_istr = receiveResponse(*sess, request, response, true); - response.getCookies(cookies); + auto result_istr = receiveResponse(*sess, request, response, true); + response.getCookies(cookies); - /// we can fetch object info while the request is being processed - /// and we don't want to override any context used by it - if (!for_object_info) - content_encoding = response.get("Content-Encoding", ""); + /// we can fetch object info while the request is being processed + /// and we don't want to override any context used by it + if (!for_object_info) + content_encoding = response.get("Content-Encoding", ""); - return result_istr; - } - catch (const Poco::Exception & e) - { - /// We use session data storage as storage for exception text - /// Depend on it we can deduce to reconnect session or reresolve session host - sess->attachSessionData(e.message()); - throw; - } + return result_istr; } template @@ -429,23 +421,10 @@ void ReadWriteBufferFromHTTPBase::initialize() if (!read_range.end && response.hasContentLength()) file_info = parseFileInfo(response, withPartialContent(read_range) ? getOffset() : 0); - try - { - impl = std::make_unique(*istr, buffer_size); + impl = std::make_unique(*istr, buffer_size); - if (use_external_buffer) - { - setupExternalBuffer(); - } - } - catch (const Poco::Exception & e) - { - /// We use session data storage as storage for exception text - /// Depend on it we can deduce to reconnect session or reresolve session host - auto sess = session->getSession(); - sess->attachSessionData(e.message()); - throw; - } + if (use_external_buffer) + setupExternalBuffer(); } template @@ -460,7 +439,11 @@ bool ReadWriteBufferFromHTTPBase::nextImpl() if ((read_range.end && getOffset() > read_range.end.value()) || (file_info && file_info->file_size && getOffset() >= file_info->file_size.value())) + { + /// Response was fully read. + markSessionForReuse(session->getSession()); return false; + } if (impl) { @@ -582,7 +565,11 @@ bool ReadWriteBufferFromHTTPBase::nextImpl() std::rethrow_exception(exception); if (!result) + { + /// Eof is reached, i.e response was fully read. + markSessionForReuse(session->getSession()); return false; + } internal_buffer = impl->buffer(); working_buffer = internal_buffer; @@ -635,12 +622,14 @@ size_t ReadWriteBufferFromHTTPBase::readBigAt(char * to, si bool cancelled; size_t r = copyFromIStreamWithProgressCallback(*result_istr, to, n, progress_callback, &cancelled); + if (!cancelled) + /// Response was fully read. + markSessionForReuse(sess); + return r; } catch (const Poco::Exception & e) { - sess->attachSessionData(e.message()); - LOG_ERROR( log, "HTTP request (positioned) to `{}` with range [{}, {}) failed at try {}/{}: {}", diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp index 1ec7491658e..4cd73cb23b5 100644 --- a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp @@ -935,23 +935,17 @@ private: request.setHost(url.getHost()); auto session = makePooledHTTPSession(url, timeouts, 1); - std::istream * response_body{}; - try - { - session->sendRequest(request); + session->sendRequest(request); + + Poco::Net::HTTPResponse response; + std::istream * response_body = receiveResponse(*session, request, response, false); - Poco::Net::HTTPResponse response; - response_body = receiveResponse(*session, request, response, false); - } - catch (const Poco::Exception & e) - { - /// We use session data storage as storage for exception text - /// Depend on it we can deduce to reconnect session or reresolve session host - session->attachSessionData(e.message()); - throw; - } Poco::JSON::Parser parser; auto json_body = parser.parse(*response_body).extract(); + + /// Response was fully read. + markSessionForReuse(session); + auto schema = json_body->getValue("schema"); LOG_TRACE((&Poco::Logger::get("AvroConfluentRowInputFormat")), "Successfully fetched schema id = {}\n{}", id, schema); return avro::compileJsonSchemaFromString(schema); From 5a6957d95e46861f39bdb1c39e442951b1e26d47 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 6 Jul 2023 13:02:03 +0000 Subject: [PATCH 291/522] Disable ThinLTO on non-Linux MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cross-compiling on Linux for Mac failed with CMake parameters -DCMAKE_BUILD_TYPE=None -DENABLE_CLICKHOUSE_SELF_EXTRACTING=1 -DENABLE_TESTS=0 (see below). This happened e.g. in #51243. The problem was that ThinLTO enabled/disabled depends on ENABLE_TESTS (see the top-level CMakeLists.txt). If ENABLE_TESTS=0 then ThinLTO is activated. On Linux, building/linking works with or without ThinLTO but on Mac building/linking the self-extracting compressor binary doesn’t work if ThinLTO is on. This is quite weird, as a workaround restrict ThinLTO to Linux. ------- [185/187] Linking CXX static library base/glibc-compatibility/libglibc-compatibility.a [186/187] Linking CXX static library contrib/zstd-cmake/lib_zstd.a [187/187] Linking CXX executable utils/self-extracting-executable/pre_compressor -- Configuring done -- Generating done -- Build files have been written to: /home/ubuntu/repo/ch4/build [0/2] Re-checking globbed directories... [108/108] Linking CXX executable utils/self-extracting-executable/pre_compressor FAILED: utils/self-extracting-executable/pre_compressor : && /usr/bin/clang++-16 --target=x86_64-apple-darwin -std=c++20 -fdiagnostics-color=always -Xclang -fuse-ctor-homing -Wno-enum-constexpr-conversion -fsized-deallocation -gdwarf-aranges -pipe -mssse3 -msse4.1 -msse4.2 -mpclmul -mpopcnt -fasynchronous-unwind-tables -ffile-prefix-map=/home/ubuntu/repo/ch4=. -falign-functions=32 -mbranches-within-32B-boundaries -stdlib=libc++ -fdiagnostics-absolute-paths -fstrict -vtable-pointers -Wall -Wextra -Wframe-larger-than=65536 -Weverything -Wpedantic -Wno-zero-length-array -Wno-c++98-compat-pedantic -Wno-c++98-compat -Wno-c++20-compat -Wno-sign-conversion -Wno-implicit-int-conversion -Wno-implicit-int-float-conversion -Wno-ctad-maybe-unsupported -Wno-disabled-macro-expansion -Wno-documentation-unknown-command -Wno-double-promotion -Wno-exit-time-destructors -Wno-float-equal -Wn o-global-constructors -Wno-missing-prototypes -Wno-missing-variable-declarations -Wno-padded -Wno-switch-enum -Wno-undefined-func-template -Wno-unused-template -Wno-vla -Wno-weak-template-vtables -Wno-weak-vtables -Wno-thread-safety-negative -Wno-enum-constexpr-conversion -Wno-unsafe-buffer-usage -O2 -g -DNDEBUG -O3 -g -gdwarf-4 -flto=thin -fwhole-program-vtables -isysroot /home/ubuntu/repo/ch4/cmake/darwin/.. /toolchain/darwin-x86_64 -mmacosx-version-min=10.15 -Wl,-headerpad_max_install_names --ld-path=/home/ubuntu/cctools/bin/x86_64-apple-darwin-ld -rdynamic -Wl,-U,_inside_main -flto=thin -fwhole-program-vtables utils/self-extracting-executable/CMakeFiles/pre_compressor.dir/compressor.cpp.o -o utils/self-extracting-executable/pre_compressor contrib/zstd-cmake/lib_zstd.a contrib/libcxx-cmake/libcxx.a contrib/lib cxxabi-cmake/libcxxabi.a -nodefaultlibs -lc -lm -lpthread -ldl && : clang: warning: argument unused during compilation: '-stdlib=libc++' [-Wunused-command-line-argument] ld: warning: ignoring file utils/self-extracting-executable/CMakeFiles/pre_compressor.dir/compressor.cpp.o, building for macOS-x86_64 but attempting to link with file built for unknown-unsupported file format ( 0xDE 0xC0 0x17 0x0B 0x00 0x00 0x00 0x00 0x14 0x00 0x00 0x00 0x88 0x3E 0x03 0x00 ) ld: warning: ignoring file contrib/zstd-cmake/lib_zstd.a, building for macOS-x86_64 but attempting to link with file built for macOS-x86_64 ld: warning: ignoring file contrib/libcxxabi-cmake/libcxxabi.a, building for macOS-x86_64 but attempting to link with file built for unknown-unsupported file format ( 0x21 0x3C 0x61 0x72 0x63 0x68 0x3E 0x0A 0x23 0x31 0x2F 0x31 0x32 0x20 0x20 0x20 ) ld: warning: ignoring file contrib/libcxx-cmake/libcxx.a, building for macOS-x86_64 but attempting to link with file built for unknown-unsupported file format ( 0x21 0x3C 0x61 0x72 0x63 0x68 0x3E 0x0A 0x23 0x31 0x2F 0x31 0x32 0x20 0x20 0x20 ) Undefined symbols for architecture x86_64: "_main", referenced from: implicit entry/start for main executable ld: symbol(s) not found for architecture x86_64 clang: error: linker command failed with exit code 1 (use -v to see invocation) ninja: build stopped: subcommand failed. --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5d6ed75bb29..06ee98b5ee1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -344,9 +344,9 @@ if (COMPILER_CLANG) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fdiagnostics-absolute-paths") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fdiagnostics-absolute-paths") - if (NOT ENABLE_TESTS AND NOT SANITIZE) + if (NOT ENABLE_TESTS AND NOT SANITIZE AND OS_LINUX) # https://clang.llvm.org/docs/ThinLTO.html - # Applies to clang only. + # Applies to clang and linux only. # Disabled when building with tests or sanitizers. option(ENABLE_THINLTO "Clang-specific link time optimization" ON) endif() From 28332076054cc77660a4dbc3e13dcea1999a6342 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Thu, 6 Jul 2023 13:09:49 +0000 Subject: [PATCH 292/522] Edit tests to test last commit --- tests/queries/0_stateless/00301_csv.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/00301_csv.sh b/tests/queries/0_stateless/00301_csv.sh index 776bd39fc03..80053c99a17 100755 --- a/tests/queries/0_stateless/00301_csv.sh +++ b/tests/queries/0_stateless/00301_csv.sh @@ -44,7 +44,7 @@ $CLICKHOUSE_CLIENT --query="DROP TABLE csv"; echo === Test ignore extra columns $CLICKHOUSE_CLIENT --query="CREATE TABLE csv (s String, n UInt64 DEFAULT 3, d String DEFAULT 'String4') ENGINE = Memory"; -echo '"Hello", 1, "String1" +echo '"Hello", 1, "String1" "Hello", 2, "String2", "Hello", 3, "String3", "2016-01-13" "Hello", 4, , "2016-01-14" From 6bbaade4a63524c4c1c4376e18d8fa1f3e3914a9 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 6 Jul 2023 13:15:38 +0200 Subject: [PATCH 293/522] Update sccache, do not fail on connection error --- docker/test/util/Dockerfile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docker/test/util/Dockerfile b/docker/test/util/Dockerfile index 85e888f1df7..b255a2cc23d 100644 --- a/docker/test/util/Dockerfile +++ b/docker/test/util/Dockerfile @@ -94,7 +94,10 @@ RUN mkdir /tmp/ccache \ && rm -rf /tmp/ccache ARG TARGETARCH -ARG SCCACHE_VERSION=v0.4.1 +ARG SCCACHE_VERSION=v0.5.4 +ENV SCCACHE_IGNORE_SERVER_IO_ERROR=1 +# sccache requires a value for the region. So by default we use The Default Region +ENV SCCACHE_REGION=us-east-1 RUN arch=${TARGETARCH:-amd64} \ && case $arch in \ amd64) rarch=x86_64 ;; \ From 7644f0b37c88cd924f20ecec4acc599e50491423 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 6 Jul 2023 14:44:06 +0000 Subject: [PATCH 294/522] Cosmetics: move code around --- src/IO/VarInt.h | 282 +++++++++++++++++++++++------------------------- 1 file changed, 132 insertions(+), 150 deletions(-) diff --git a/src/IO/VarInt.h b/src/IO/VarInt.h index f6441391c8f..a88347d68eb 100644 --- a/src/IO/VarInt.h +++ b/src/IO/VarInt.h @@ -12,24 +12,77 @@ namespace DB /// Variable-Length Quantity (VLQ) Base-128 compression, also known as Variable Byte (VB) or Varint encoding. -/// Write UInt64 in variable length format (base128) -void writeVarUInt(UInt64 x, std::ostream & ostr); -void writeVarUInt(UInt64 x, WriteBuffer & ostr); -char * writeVarUInt(UInt64 x, char * ostr); - -/// Read UInt64, written in variable length format (base128) -void readVarUInt(UInt64 & x, std::istream & istr); -void readVarUInt(UInt64 & x, ReadBuffer & istr); -const char * readVarUInt(UInt64 & x, const char * istr, size_t size); - -/// Get the length of an variable-length-encoded integer -size_t getLengthOfVarUInt(UInt64 x); -size_t getLengthOfVarInt(Int64 x); - [[noreturn]] void throwReadAfterEOF(); [[noreturn]] void throwValueTooLargeForVarIntEncoding(UInt64 x); -/// Write Int64 in variable length format (base128) + +/// NOTE: Due to historical reasons, only values up to 1<<63-1 can be safely encoded/decoded (bigger values are not idempotent under +/// encoding/decoding). This cannot be changed without breaking backward compatibility (some drivers, e.g. clickhouse-rs (Rust), have the +/// same limitation, others support the full 1<<64 range, e.g. clickhouse-driver (Python)) +constexpr UInt64 VAR_UINT_MAX = (1ULL<<63) - 1; + +inline void writeVarUInt(UInt64 x, WriteBuffer & ostr) +{ + if (x > VAR_UINT_MAX) [[unlikely]] + throwValueTooLargeForVarIntEncoding(x); + + for (size_t i = 0; i < 9; ++i) + { + uint8_t byte = x & 0x7F; + if (x > 0x7F) + byte |= 0x80; + + ostr.nextIfAtEnd(); + *ostr.position() = byte; + ++ostr.position(); + + x >>= 7; + if (!x) + return; + } +} + +inline void writeVarUInt(UInt64 x, std::ostream & ostr) +{ + if (x > VAR_UINT_MAX) [[unlikely]] + throwValueTooLargeForVarIntEncoding(x); + + for (size_t i = 0; i < 9; ++i) + { + uint8_t byte = x & 0x7F; + if (x > 0x7F) + byte |= 0x80; + + ostr.put(byte); + + x >>= 7; + if (!x) + return; + } +} + +inline char * writeVarUInt(UInt64 x, char * ostr) +{ + if (x > VAR_UINT_MAX) [[unlikely]] + throwValueTooLargeForVarIntEncoding(x); + + for (size_t i = 0; i < 9; ++i) + { + uint8_t byte = x & 0x7F; + if (x > 0x7F) + byte |= 0x80; + + *ostr = byte; + ++ostr; + + x >>= 7; + if (!x) + return ostr; + } + + return ostr; +} + template inline void writeVarInt(Int64 x, Out & ostr) { @@ -41,8 +94,71 @@ inline char * writeVarInt(Int64 x, char * ostr) return writeVarUInt(static_cast((x << 1) ^ (x >> 63)), ostr); } +namespace impl +{ + +template +inline void readVarUInt(UInt64 & x, ReadBuffer & istr) +{ + x = 0; + for (size_t i = 0; i < 9; ++i) + { + if constexpr (!fast) + if (istr.eof()) [[unlikely]] + throwReadAfterEOF(); + + UInt64 byte = *istr.position(); + ++istr.position(); + x |= (byte & 0x7F) << (7 * i); + + if (!(byte & 0x80)) + return; + } +} + +} + +inline void readVarUInt(UInt64 & x, ReadBuffer & istr) +{ + if (istr.buffer().end() - istr.position() >= 9) + return impl::readVarUInt(x, istr); + return impl::readVarUInt(x, istr); +} + +inline void readVarUInt(UInt64 & x, std::istream & istr) +{ + x = 0; + for (size_t i = 0; i < 9; ++i) + { + UInt64 byte = istr.get(); + x |= (byte & 0x7F) << (7 * i); + + if (!(byte & 0x80)) + return; + } +} + +inline const char * readVarUInt(UInt64 & x, const char * istr, size_t size) +{ + const char * end = istr + size; + + x = 0; + for (size_t i = 0; i < 9; ++i) + { + if (istr == end) [[unlikely]] + throwReadAfterEOF(); + + UInt64 byte = *istr; + ++istr; + x |= (byte & 0x7F) << (7 * i); + + if (!(byte & 0x80)) + return istr; + } + + return istr; +} -/// Read Int64, written in variable length format (base128) template inline void readVarInt(Int64 & x, In & istr) { @@ -57,9 +173,6 @@ inline const char * readVarInt(Int64 & x, const char * istr, size_t size) return res; } - -/// For [U]Int32, [U]Int16, size_t. - inline void readVarUInt(UInt32 & x, ReadBuffer & istr) { UInt64 tmp; @@ -97,137 +210,6 @@ inline void readVarUInt(T & x, ReadBuffer & istr) x = tmp; } -template -inline void readVarUIntImpl(UInt64 & x, ReadBuffer & istr) -{ - x = 0; - for (size_t i = 0; i < 9; ++i) - { - if constexpr (!fast) - if (istr.eof()) [[unlikely]] - throwReadAfterEOF(); - - UInt64 byte = *istr.position(); - ++istr.position(); - x |= (byte & 0x7F) << (7 * i); - - if (!(byte & 0x80)) - return; - } -} - -inline void readVarUInt(UInt64 & x, ReadBuffer & istr) -{ - if (istr.buffer().end() - istr.position() >= 9) - return readVarUIntImpl(x, istr); - return readVarUIntImpl(x, istr); -} - - -inline void readVarUInt(UInt64 & x, std::istream & istr) -{ - x = 0; - for (size_t i = 0; i < 9; ++i) - { - UInt64 byte = istr.get(); - x |= (byte & 0x7F) << (7 * i); - - if (!(byte & 0x80)) - return; - } -} - -inline const char * readVarUInt(UInt64 & x, const char * istr, size_t size) -{ - const char * end = istr + size; - - x = 0; - for (size_t i = 0; i < 9; ++i) - { - if (istr == end) [[unlikely]] - throwReadAfterEOF(); - - UInt64 byte = *istr; - ++istr; - x |= (byte & 0x7F) << (7 * i); - - if (!(byte & 0x80)) - return istr; - } - - return istr; -} - -/// NOTE: Due to historical reasons, only values up to 1<<63-1 can be safely encoded/decoded (bigger values are not idempotent under -/// encoding/decoding). This cannot be changed without breaking backward compatibility (some drivers, e.g. clickhouse-rs (Rust), have the -/// same limitation, others support the full 1<<64 range, e.g. clickhouse-driver (Python)) -constexpr UInt64 VAR_UINT_MAX = (1ULL<<63) - 1; - -inline void writeVarUInt(UInt64 x, WriteBuffer & ostr) -{ - if (x > VAR_UINT_MAX) [[unlikely]] - throwValueTooLargeForVarIntEncoding(x); - - for (size_t i = 0; i < 9; ++i) - { - uint8_t byte = x & 0x7F; - if (x > 0x7F) - byte |= 0x80; - - ostr.nextIfAtEnd(); - *ostr.position() = byte; - ++ostr.position(); - - x >>= 7; - if (!x) - return; - } -} - - -inline void writeVarUInt(UInt64 x, std::ostream & ostr) -{ - if (x > VAR_UINT_MAX) [[unlikely]] - throwValueTooLargeForVarIntEncoding(x); - - for (size_t i = 0; i < 9; ++i) - { - uint8_t byte = x & 0x7F; - if (x > 0x7F) - byte |= 0x80; - - ostr.put(byte); - - x >>= 7; - if (!x) - return; - } -} - - -inline char * writeVarUInt(UInt64 x, char * ostr) -{ - if (x > VAR_UINT_MAX) [[unlikely]] - throwValueTooLargeForVarIntEncoding(x); - - for (size_t i = 0; i < 9; ++i) - { - uint8_t byte = x & 0x7F; - if (x > 0x7F) - byte |= 0x80; - - *ostr = byte; - ++ostr; - - x >>= 7; - if (!x) - return ostr; - } - - return ostr; -} - - inline size_t getLengthOfVarUInt(UInt64 x) { return x < (1ULL << 7) ? 1 From 3f744c1e14ba7350c2dab4a8ccf145c26762f0c3 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 6 Jul 2023 14:47:40 +0000 Subject: [PATCH 295/522] Cosmetics: rename template parameter --- src/IO/VarInt.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/IO/VarInt.h b/src/IO/VarInt.h index a88347d68eb..9099b5e7f6a 100644 --- a/src/IO/VarInt.h +++ b/src/IO/VarInt.h @@ -97,13 +97,13 @@ inline char * writeVarInt(Int64 x, char * ostr) namespace impl { -template +template inline void readVarUInt(UInt64 & x, ReadBuffer & istr) { x = 0; for (size_t i = 0; i < 9; ++i) { - if constexpr (!fast) + if constexpr (check_eof) if (istr.eof()) [[unlikely]] throwReadAfterEOF(); @@ -121,8 +121,8 @@ inline void readVarUInt(UInt64 & x, ReadBuffer & istr) inline void readVarUInt(UInt64 & x, ReadBuffer & istr) { if (istr.buffer().end() - istr.position() >= 9) - return impl::readVarUInt(x, istr); - return impl::readVarUInt(x, istr); + return impl::readVarUInt(x, istr); + return impl::readVarUInt(x, istr); } inline void readVarUInt(UInt64 & x, std::istream & istr) From abf36065b7bbddeba2b80f76ad966a9167852089 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 6 Jul 2023 17:24:06 +0200 Subject: [PATCH 296/522] fix --- .../ReplicatedMergeTreePartCheckThread.cpp | 89 ++++++++++--------- .../ReplicatedMergeTreePartCheckThread.h | 4 +- src/Storages/StorageReplicatedMergeTree.cpp | 17 +++- .../__init__.py | 0 .../configs/testkeeper.xml | 6 -- .../test.py | 65 -------------- .../02254_projection_broken_part.reference | 6 ++ .../02254_projection_broken_part.sh | 44 +++++++++ 8 files changed, 115 insertions(+), 116 deletions(-) delete mode 100644 tests/integration/test_projection_report_broken_part/__init__.py delete mode 100644 tests/integration/test_projection_report_broken_part/configs/testkeeper.xml delete mode 100644 tests/integration/test_projection_report_broken_part/test.py create mode 100644 tests/queries/0_stateless/02254_projection_broken_part.reference create mode 100755 tests/queries/0_stateless/02254_projection_broken_part.sh diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp index 1cc3736bd2e..ffe3f883f80 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp @@ -63,6 +63,7 @@ void ReplicatedMergeTreePartCheckThread::enqueuePart(const String & name, time_t if (parts_set.contains(name)) return; + LOG_TRACE(log, "Enqueueing {} for check after after {}s", name, delay_to_check_seconds); parts_queue.emplace_back(name, time(nullptr) + delay_to_check_seconds); parts_set.insert(name); task->schedule(); @@ -423,7 +424,7 @@ ReplicatedCheckResult ReplicatedMergeTreePartCheckThread::checkPartImpl(const St } -CheckResult ReplicatedMergeTreePartCheckThread::checkPartAndFix(const String & part_name) +CheckResult ReplicatedMergeTreePartCheckThread::checkPartAndFix(const String & part_name, std::optional * recheck_after) { LOG_INFO(log, "Checking part {}", part_name); ProfileEvents::increment(ProfileEvents::ReplicatedPartChecks); @@ -438,7 +439,11 @@ CheckResult ReplicatedMergeTreePartCheckThread::checkPartAndFix(const String & p break; case ReplicatedCheckResult::RecheckLater: - enqueuePart(part_name, result.recheck_after); + /// NOTE We cannot enqueue it from the check thread itself + if (recheck_after) + *recheck_after = result.recheck_after; + else + enqueuePart(part_name, result.recheck_after); break; case ReplicatedCheckResult::DetachUnexpected: @@ -471,10 +476,22 @@ CheckResult ReplicatedMergeTreePartCheckThread::checkPartAndFix(const String & p /// Part is not in ZooKeeper and not on disk (so there's nothing to detach or remove from ZooKeeper). /// Probably we cannot execute some entry from the replication queue (so don't need to enqueue another one). - /// Either all replicas having the part are not active, or the part is lost forever. + /// Either all replicas having the part are not active... bool found_something = searchForMissingPartOnOtherReplicas(part_name); - if (!found_something) - onPartIsLostForever(part_name); + if (found_something) + break; + + /// ... or the part is lost forever + bool handled_lost_part = onPartIsLostForever(part_name); + if (handled_lost_part) + break; + + /// We failed to create empty part, need retry + constexpr time_t retry_after_seconds = 30; + if (recheck_after) + *recheck_after = retry_after_seconds; + else + enqueuePart(part_name, retry_after_seconds); break; } @@ -483,7 +500,7 @@ CheckResult ReplicatedMergeTreePartCheckThread::checkPartAndFix(const String & p return result.status; } -void ReplicatedMergeTreePartCheckThread::onPartIsLostForever(const String & part_name) +bool ReplicatedMergeTreePartCheckThread::onPartIsLostForever(const String & part_name) { auto lost_part_info = MergeTreePartInfo::fromPartName(part_name, storage.format_version); if (lost_part_info.level != 0 || lost_part_info.mutation != 0) @@ -499,7 +516,7 @@ void ReplicatedMergeTreePartCheckThread::onPartIsLostForever(const String & part for (const String & source_part_name : source_parts) enqueuePart(source_part_name); - return; + return true; } } @@ -512,13 +529,11 @@ void ReplicatedMergeTreePartCheckThread::onPartIsLostForever(const String & part */ LOG_ERROR(log, "Part {} is lost forever.", part_name); ProfileEvents::increment(ProfileEvents::ReplicatedDataLoss); + return true; } - else - { - LOG_WARNING(log, "Cannot create empty part {} instead of lost. Will retry later", part_name); - constexpr time_t retry_after_seconds = 30; - enqueuePart(part_name, retry_after_seconds); - } + + LOG_WARNING(log, "Cannot create empty part {} instead of lost. Will retry later", part_name); + return false; } @@ -533,42 +548,29 @@ void ReplicatedMergeTreePartCheckThread::run() /// Take part from the queue for verification. PartsToCheckQueue::iterator selected = parts_queue.end(); /// end from std::list is not get invalidated - time_t min_check_time = std::numeric_limits::max(); { std::lock_guard lock(parts_mutex); - if (parts_queue.empty()) + if (parts_queue.empty() && !parts_set.empty()) { - if (!parts_set.empty()) - { - parts_set.clear(); - throw Exception(ErrorCodes::LOGICAL_ERROR, "Non-empty parts_set with empty parts_queue. This is a bug."); - } + parts_set.clear(); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Non-empty parts_set with empty parts_queue. This is a bug."); } - else - { - for (auto it = parts_queue.begin(); it != parts_queue.end(); ++it) - { - if (it->second <= current_time) - { - selected = it; - break; - } - if (it->second < min_check_time) - { - min_check_time = it->second; - selected = it; - } - } - } + selected = std::find_if(parts_queue.begin(), parts_queue.end(), [current_time](const auto & elem) + { + return elem.second <= current_time; + }); + if (selected == parts_queue.end()) + return; + + /// Move selected part to the end of the queue + parts_queue.splice(parts_queue.end(), parts_queue, selected); } - if (selected == parts_queue.end()) - return; - - checkPartAndFix(selected->first); + std::optional recheck_after; + checkPartAndFix(selected->first, &recheck_after); if (need_stop) return; @@ -581,6 +583,11 @@ void ReplicatedMergeTreePartCheckThread::run() { throw Exception(ErrorCodes::LOGICAL_ERROR, "Someone erased checking part from parts_queue. This is a bug."); } + else if (recheck_after.has_value()) + { + LOG_TRACE(log, "Will recheck part {} after after {}s", selected->first, *recheck_after); + selected->second = time(nullptr) + *recheck_after; + } else { parts_set.erase(selected->first); @@ -596,7 +603,7 @@ void ReplicatedMergeTreePartCheckThread::run() { tryLogCurrentException(log, __PRETTY_FUNCTION__); - if (e.code == Coordination::Error::ZSESSIONEXPIRED) + if (Coordination::isHardwareError(e.code)) return; task->scheduleAfter(PART_CHECK_ERROR_SLEEP_MS); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h index 0a8fbc75c05..fc76cbad4ed 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h @@ -65,7 +65,7 @@ public: size_t size() const; /// Check part by name - CheckResult checkPartAndFix(const String & part_name); + CheckResult checkPartAndFix(const String & part_name, std::optional * recheck_after = nullptr); ReplicatedCheckResult checkPartImpl(const String & part_name); @@ -77,7 +77,7 @@ public: private: void run(); - void onPartIsLostForever(const String & part_name); + bool onPartIsLostForever(const String & part_name); std::pair findLocalPart(const String & part_name); diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index ff319e47946..e8176ac1d5f 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -3366,6 +3366,10 @@ bool StorageReplicatedMergeTree::canExecuteFetch(const ReplicatedMergeTreeLogEnt { disable_reason = fmt::format("Not executing fetch of part {} because we still have broken part with that name. " "Waiting for the broken part to be removed first.", entry.new_part_name); + + constexpr time_t min_interval_to_wakeup_cleanup_s = 30; + if (entry.last_postpone_time + min_interval_to_wakeup_cleanup_s < time(nullptr)) + const_cast(this)->cleanup_thread.wakeup(); return false; } } @@ -3753,11 +3757,13 @@ void StorageReplicatedMergeTree::removePartAndEnqueueFetch(const String & part_n DataPartPtr broken_part; auto outdate_broken_part = [this, &broken_part]() { - if (broken_part) + if (!broken_part) return; DataPartsLock lock = lockParts(); if (broken_part->getState() == DataPartState::Active) removePartsFromWorkingSet(NO_TRANSACTION_RAW, {broken_part}, true, &lock); + broken_part.reset(); + cleanup_thread.wakeup(); }; /// We don't know exactly what happened to broken part @@ -3767,6 +3773,7 @@ void StorageReplicatedMergeTree::removePartAndEnqueueFetch(const String & part_n auto partition_range = getDataPartsVectorInPartitionForInternalUsage({MergeTreeDataPartState::Active, MergeTreeDataPartState::Outdated}, broken_part_info.partition_id); + Strings detached_parts; for (const auto & part : partition_range) { if (!broken_part_info.contains(part->info)) @@ -3784,7 +3791,9 @@ void StorageReplicatedMergeTree::removePartAndEnqueueFetch(const String & part_n { part->makeCloneInDetached("covered-by-broken", getInMemoryMetadataPtr()); } + detached_parts.push_back(part->name); } + LOG_WARNING(log, "Detached {} parts covered by broken part {}: {}", detached_parts.size(), part_name, fmt::join(detached_parts, ", ")); ThreadFuzzer::maybeInjectSleep(); ThreadFuzzer::maybeInjectMemoryLimitException(); @@ -3873,10 +3882,14 @@ void StorageReplicatedMergeTree::removePartAndEnqueueFetch(const String & part_n zkutil::KeeperMultiException::check(rc, ops, results); + String path_created = dynamic_cast(*results.back()).path_created; + log_entry->znode_name = path_created.substr(path_created.find_last_of('/') + 1); + LOG_DEBUG(log, "Created entry {} to fetch missing part {}", log_entry->znode_name, part_name); + queue.insert(zookeeper, log_entry); + /// Make the part outdated after creating the log entry. /// Otherwise, if we failed to create the entry, cleanup thread could remove the part from ZooKeeper (leading to diverged replicas) outdate_broken_part(); - queue_updating_task->schedule(); return; } } diff --git a/tests/integration/test_projection_report_broken_part/__init__.py b/tests/integration/test_projection_report_broken_part/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/integration/test_projection_report_broken_part/configs/testkeeper.xml b/tests/integration/test_projection_report_broken_part/configs/testkeeper.xml deleted file mode 100644 index 617371b13fa..00000000000 --- a/tests/integration/test_projection_report_broken_part/configs/testkeeper.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - testkeeper - - diff --git a/tests/integration/test_projection_report_broken_part/test.py b/tests/integration/test_projection_report_broken_part/test.py deleted file mode 100644 index f376adf4f1a..00000000000 --- a/tests/integration/test_projection_report_broken_part/test.py +++ /dev/null @@ -1,65 +0,0 @@ -# pylint: disable=unused-argument -# pylint: disable=redefined-outer-name -# pylint: disable=line-too-long - -import pytest -import time - -from helpers.client import QueryRuntimeException -from helpers.cluster import ClickHouseCluster - -cluster = ClickHouseCluster(__file__) -node = cluster.add_instance( - "node", - main_configs=[ - "configs/testkeeper.xml", - ], -) - - -@pytest.fixture(scope="module", autouse=True) -def start_cluster(): - try: - cluster.start() - yield cluster - finally: - cluster.shutdown() - - -def test_projection_broken_part(): - node.query( - """ - create table test_projection_broken_parts_1 (a int, b int, projection ab (select a, sum(b) group by a)) - engine = ReplicatedMergeTree('/clickhouse-tables/test_projection_broken_parts', 'r1') - order by a settings index_granularity = 1; - - create table test_projection_broken_parts_2 (a int, b int, projection ab (select a, sum(b) group by a)) - engine ReplicatedMergeTree('/clickhouse-tables/test_projection_broken_parts', 'r2') - order by a settings index_granularity = 1; - - insert into test_projection_broken_parts_1 values (1, 1), (1, 2), (1, 3); - - system sync replica test_projection_broken_parts_2; - """ - ) - - # break projection part - node.exec_in_container( - [ - "bash", - "-c", - "rm /var/lib/clickhouse/data/default/test_projection_broken_parts_1/all_0_0_0/ab.proj/data.bin", - ] - ) - - expected_error = "No such file or directory" - assert expected_error in node.query_and_get_error( - "select sum(b) from test_projection_broken_parts_1 group by a" - ) - - time.sleep(2) - - assert ( - int(node.query("select sum(b) from test_projection_broken_parts_1 group by a")) - == 6 - ) diff --git a/tests/queries/0_stateless/02254_projection_broken_part.reference b/tests/queries/0_stateless/02254_projection_broken_part.reference new file mode 100644 index 00000000000..68538fd31ea --- /dev/null +++ b/tests/queries/0_stateless/02254_projection_broken_part.reference @@ -0,0 +1,6 @@ +1 1 1 all_0_0_0 +1 1 2 all_0_0_0 +1 1 3 all_0_0_0 +2 6 +0 +5 6 diff --git a/tests/queries/0_stateless/02254_projection_broken_part.sh b/tests/queries/0_stateless/02254_projection_broken_part.sh new file mode 100755 index 00000000000..d276c67f8de --- /dev/null +++ b/tests/queries/0_stateless/02254_projection_broken_part.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +# Tags: long, zookeeper + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "drop table if exists projection_broken_parts_1 sync;" +$CLICKHOUSE_CLIENT -q "drop table if exists projection_broken_parts_1 sync;" + +$CLICKHOUSE_CLIENT -q "create table projection_broken_parts_1 (a int, b int, projection ab (select a, sum(b) group by a)) + engine = ReplicatedMergeTree('/test/02369/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/rmt', 'r1') + order by a settings index_granularity = 1;" + +$CLICKHOUSE_CLIENT -q "create table projection_broken_parts_2 (a int, b int, projection ab (select a, sum(b) group by a)) + engine = ReplicatedMergeTree('/test/02369/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/rmt', 'r2') + order by a settings index_granularity = 1;" + +$CLICKHOUSE_CLIENT --insert_keeper_fault_injection_probability=0 -q "insert into projection_broken_parts_1 values (1, 1), (1, 2), (1, 3);" +$CLICKHOUSE_CLIENT -q "system sync replica projection_broken_parts_2;" +$CLICKHOUSE_CLIENT -q "select 1, *, _part from projection_broken_parts_2 order by b;" +$CLICKHOUSE_CLIENT -q "select 2, sum(b) from projection_broken_parts_2 group by a;" + +path=$($CLICKHOUSE_CLIENT -q "select path from system.parts where database='$CLICKHOUSE_DATABASE' and table='projection_broken_parts_1' and name='all_0_0_0'") +# ensure that path is absolute before removing +$CLICKHOUSE_CLIENT -q "select throwIf(substring('$path', 1, 1) != '/', 'Path is relative: $path')" || exit +rm -f "$path/ab.proj/data.bin" + +$CLICKHOUSE_CLIENT -q "select 3, sum(b) from projection_broken_parts_1 group by a;" 2>/dev/null + +num_tries=0 +while ! $CLICKHOUSE_CLIENT -q "select 4, sum(b) from projection_broken_parts_1 group by a format Null;" 2>/dev/null; do + sleep 1; + num_tries=$((num_tries+1)) + if [ $num_tries -eq 60 ]; then + break + fi +done + +$CLICKHOUSE_CLIENT -q "system sync replica projection_broken_parts_1;" +$CLICKHOUSE_CLIENT -q "select 5, sum(b) from projection_broken_parts_1 group by a;" + +$CLICKHOUSE_CLIENT -q "drop table if exists projection_broken_parts_1 sync;" +$CLICKHOUSE_CLIENT -q "drop table if exists projection_broken_parts_1 sync;" From 63b9c1ac0670947b49a916b5b6e47cab1dd1d3d0 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Thu, 6 Jul 2023 18:58:13 +0200 Subject: [PATCH 297/522] add test --- src/Common/ProfileEvents.cpp | 2 ++ src/IO/ReadWriteBufferFromHTTP.cpp | 6 ++++ ...ing_from_s3_with_connection_pool.reference | 1 + ...89_reading_from_s3_with_connection_pool.sh | 32 ++++++++++++++++++- 4 files changed, 40 insertions(+), 1 deletion(-) diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index c9030070bf2..3bee12731aa 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -371,6 +371,8 @@ The server successfully detected this situation and will download merged part fr M(ReadBufferFromS3ResetSessions, "Number of HTTP sessions that were reset in ReadBufferFromS3.") \ M(ReadBufferFromS3PreservedSessions, "Number of HTTP sessions that were preserved in ReadBufferFromS3.") \ \ + M(ReadWriteBufferFromHTTPPreservedSessions, "Number of HTTP sessions that were preserved in ReadWriteBufferFromHTTP.") \ + \ M(WriteBufferFromS3Microseconds, "Time spent on writing to S3.") \ M(WriteBufferFromS3Bytes, "Bytes written to S3.") \ M(WriteBufferFromS3RequestsErrors, "Number of exceptions while writing to S3.") \ diff --git a/src/IO/ReadWriteBufferFromHTTP.cpp b/src/IO/ReadWriteBufferFromHTTP.cpp index b834c17ab6c..6d1c0f7aafa 100644 --- a/src/IO/ReadWriteBufferFromHTTP.cpp +++ b/src/IO/ReadWriteBufferFromHTTP.cpp @@ -5,6 +5,7 @@ namespace ProfileEvents { extern const Event ReadBufferSeekCancelConnection; +extern const Event ReadWriteBufferFromHTTPPreservedSessions; } namespace DB @@ -442,6 +443,7 @@ bool ReadWriteBufferFromHTTPBase::nextImpl() { /// Response was fully read. markSessionForReuse(session->getSession()); + ProfileEvents::increment(ProfileEvents::ReadWriteBufferFromHTTPPreservedSessions); return false; } @@ -568,6 +570,7 @@ bool ReadWriteBufferFromHTTPBase::nextImpl() { /// Eof is reached, i.e response was fully read. markSessionForReuse(session->getSession()); + ProfileEvents::increment(ProfileEvents::ReadWriteBufferFromHTTPPreservedSessions); return false; } @@ -623,8 +626,11 @@ size_t ReadWriteBufferFromHTTPBase::readBigAt(char * to, si size_t r = copyFromIStreamWithProgressCallback(*result_istr, to, n, progress_callback, &cancelled); if (!cancelled) + { /// Response was fully read. markSessionForReuse(sess); + ProfileEvents::increment(ProfileEvents::ReadWriteBufferFromHTTPPreservedSessions); + } return r; } diff --git a/tests/queries/0_stateless/02789_reading_from_s3_with_connection_pool.reference b/tests/queries/0_stateless/02789_reading_from_s3_with_connection_pool.reference index d00491fd7e5..6ed281c757a 100644 --- a/tests/queries/0_stateless/02789_reading_from_s3_with_connection_pool.reference +++ b/tests/queries/0_stateless/02789_reading_from_s3_with_connection_pool.reference @@ -1 +1,2 @@ 1 +1 diff --git a/tests/queries/0_stateless/02789_reading_from_s3_with_connection_pool.sh b/tests/queries/0_stateless/02789_reading_from_s3_with_connection_pool.sh index 7a8b94a10a8..ce90157d004 100755 --- a/tests/queries/0_stateless/02789_reading_from_s3_with_connection_pool.sh +++ b/tests/queries/0_stateless/02789_reading_from_s3_with_connection_pool.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-random-settings +# Tags: no-fasttest, no-random-settings, no-replicated-database CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -27,3 +27,33 @@ WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND query_id='$query_id'; " + + +# Test connection pool in ReadWriteBufferFromHTTP + +query_id=$(${CLICKHOUSE_CLIENT} -nq " +create table mut (n int, m int, k int) engine=ReplicatedMergeTree('/test/02441/{database}/mut', '1') order by n; +set insert_keeper_fault_injection_probability=0; +insert into mut values (1, 2, 3), (10, 20, 30); + +system stop merges mut; +alter table mut delete where n = 10; + +select queryID() from( + -- a funny way to wait for a MUTATE_PART to be assigned + select sleepEachRow(2) from url('http://localhost:8123/?param_tries={1..10}&query=' || encodeURLComponent( + 'select 1 where ''MUTATE_PART'' not in (select type from system.replication_queue where database=''' || currentDatabase() || ''' and table=''mut'')' + ), 'LineAsString', 's String') + -- queryID() will be returned for each row, since the query above doesn't return anything we need to return a fake row + union all + select 1 +) limit 1 settings max_threads=1; +" 2>&1) +${CLICKHOUSE_CLIENT} --query "SYSTEM FLUSH LOGS" +${CLICKHOUSE_CLIENT} -nm --query " +SELECT ProfileEvents['ReadWriteBufferFromHTTPPreservedSessions'] > 0 +FROM system.query_log +WHERE type = 'QueryFinish' + AND current_database = currentDatabase() + AND query_id='$query_id'; +" From 45579417642c44956ebe329b5412bcbb48809d72 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Thu, 6 Jul 2023 17:03:43 +0000 Subject: [PATCH 298/522] black --- tests/sqllogic/connection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/sqllogic/connection.py b/tests/sqllogic/connection.py index 5e2634787d8..d71cc005d09 100644 --- a/tests/sqllogic/connection.py +++ b/tests/sqllogic/connection.py @@ -62,7 +62,7 @@ def default_clickhouse_odbc_conn_str(): return str( OdbcConnectingArgs.create_from_kw( dsn="ClickHouse DSN (ANSI)", - Url="http://localhost:8123/query?default_format=ODBCDriver2&default_table_engine=MergeTree" + Url="http://localhost:8123/query?default_format=ODBCDriver2&default_table_engine=MergeTree", ) ) From 58793816a73b7b17eb72c35f0266276bc40507b4 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 6 Jul 2023 19:04:34 +0200 Subject: [PATCH 299/522] fix paranoid check --- src/Storages/MergeTree/MergeTreeData.cpp | 2 +- .../MergeTree/ReplicatedMergeTreeQueue.cpp | 3 ++ src/Storages/StorageReplicatedMergeTree.cpp | 15 ++++--- src/Storages/StorageReplicatedMergeTree.h | 2 + .../02254_projection_broken_part.sh | 4 +- ...2255_broken_parts_chain_on_start.reference | 8 ++++ .../02255_broken_parts_chain_on_start.sh | 43 +++++++++++++++++++ 7 files changed, 68 insertions(+), 9 deletions(-) create mode 100644 tests/queries/0_stateless/02255_broken_parts_chain_on_start.reference create mode 100755 tests/queries/0_stateless/02255_broken_parts_chain_on_start.sh diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 4dc3583c706..b7fde55880e 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -2651,7 +2651,7 @@ size_t MergeTreeData::clearOldBrokenPartsFromDetachedDirectory() for (auto & [old_name, new_name, disk] : renamed_parts.old_and_new_names) { removeDetachedPart(disk, fs::path(relative_data_path) / "detached" / new_name / "", old_name); - LOG_DEBUG(log, "Removed broken detached part {} due to a timeout for broken detached parts", old_name); + LOG_WARNING(log, "Removed broken detached part {} due to a timeout for broken detached parts", old_name); old_name.clear(); } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 792843cbe18..07f46c07466 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -218,6 +218,9 @@ void ReplicatedMergeTreeQueue::createLogEntriesToFetchBrokenParts() for (const auto & broken_part_name : broken_parts) storage.removePartAndEnqueueFetch(broken_part_name, /* storage_init = */true); + Strings parts_in_zk = storage.getZooKeeper()->getChildren(replica_path + "/parts"); + storage.paranoidCheckForCoveredPartsInZooKeeperOnStart(parts_in_zk, {}); + std::lock_guard lock(state_mutex); /// broken_parts_to_enqueue_fetches_on_loading can be assigned only once on table startup, /// so actually no race conditions are possible diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index e8176ac1d5f..2da18f69baf 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -1260,8 +1260,7 @@ static time_t tryGetPartCreateTime(zkutil::ZooKeeperPtr & zookeeper, const Strin return res; } -static void paranoidCheckForCoveredPartsInZooKeeperOnStart(const StorageReplicatedMergeTree * storage, const Strings & parts_in_zk, - MergeTreeDataFormatVersion format_version, Poco::Logger * log) +void StorageReplicatedMergeTree::paranoidCheckForCoveredPartsInZooKeeperOnStart(const Strings & parts_in_zk, const Strings & parts_to_fetch) const { #ifdef ABORT_ON_LOGICAL_ERROR constexpr bool paranoid_check_for_covered_parts_default = true; @@ -1275,15 +1274,15 @@ static void paranoidCheckForCoveredPartsInZooKeeperOnStart(const StorageReplicat return; /// FIXME https://github.com/ClickHouse/ClickHouse/issues/51182 - if (storage->getSettings()->use_metadata_cache) + if (getSettings()->use_metadata_cache) return; ActiveDataPartSet active_set(format_version); for (const auto & part_name : parts_in_zk) active_set.add(part_name); - const auto disks = storage->getStoragePolicy()->getDisks(); - auto path = storage->getRelativeDataPath(); + const auto disks = getStoragePolicy()->getDisks(); + auto path = getRelativeDataPath(); for (const auto & part_name : parts_in_zk) { @@ -1296,6 +1295,9 @@ static void paranoidCheckForCoveredPartsInZooKeeperOnStart(const StorageReplicat if (disk->exists(fs::path(path) / part_name)) found = true; + if (!found) + found = std::find(parts_to_fetch.begin(), parts_to_fetch.end(), part_name) != parts_to_fetch.end(); + if (!found) { LOG_WARNING(log, "Part {} exists in ZooKeeper and covered by another part in ZooKeeper ({}), but doesn't exist on any disk. " @@ -1310,7 +1312,6 @@ void StorageReplicatedMergeTree::checkParts(bool skip_sanity_checks) auto zookeeper = getZooKeeper(); Strings expected_parts_vec = zookeeper->getChildren(fs::path(replica_path) / "parts"); - paranoidCheckForCoveredPartsInZooKeeperOnStart(this, expected_parts_vec, format_version, log); /// Parts in ZK. NameSet expected_parts(expected_parts_vec.begin(), expected_parts_vec.end()); @@ -1345,6 +1346,8 @@ void StorageReplicatedMergeTree::checkParts(bool skip_sanity_checks) if (!getActiveContainingPart(missing_name)) parts_to_fetch.push_back(missing_name); + paranoidCheckForCoveredPartsInZooKeeperOnStart(expected_parts_vec, parts_to_fetch); + /** To check the adequacy, for the parts that are in the FS, but not in ZK, we will only consider not the most recent parts. * Because unexpected new parts usually arise only because they did not have time to enroll in ZK with a rough restart of the server. * It also occurs from deduplicated parts that did not have time to retire. diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index bdd3f0da5bf..72a022fce26 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -584,6 +584,8 @@ private: void forcefullyRemoveBrokenOutdatedPartFromZooKeeperBeforeDetaching(const String & part_name) override; + void paranoidCheckForCoveredPartsInZooKeeperOnStart(const Strings & parts_in_zk, const Strings & parts_to_fetch) const; + /// Removes a part from ZooKeeper and adds a task to the queue to download it. It is supposed to do this with broken parts. void removePartAndEnqueueFetch(const String & part_name, bool storage_init); diff --git a/tests/queries/0_stateless/02254_projection_broken_part.sh b/tests/queries/0_stateless/02254_projection_broken_part.sh index d276c67f8de..6ba5093f234 100755 --- a/tests/queries/0_stateless/02254_projection_broken_part.sh +++ b/tests/queries/0_stateless/02254_projection_broken_part.sh @@ -9,11 +9,11 @@ $CLICKHOUSE_CLIENT -q "drop table if exists projection_broken_parts_1 sync;" $CLICKHOUSE_CLIENT -q "drop table if exists projection_broken_parts_1 sync;" $CLICKHOUSE_CLIENT -q "create table projection_broken_parts_1 (a int, b int, projection ab (select a, sum(b) group by a)) - engine = ReplicatedMergeTree('/test/02369/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/rmt', 'r1') + engine = ReplicatedMergeTree('/test/02254/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/rmt', 'r1') order by a settings index_granularity = 1;" $CLICKHOUSE_CLIENT -q "create table projection_broken_parts_2 (a int, b int, projection ab (select a, sum(b) group by a)) - engine = ReplicatedMergeTree('/test/02369/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/rmt', 'r2') + engine = ReplicatedMergeTree('/test/02254/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/rmt', 'r2') order by a settings index_granularity = 1;" $CLICKHOUSE_CLIENT --insert_keeper_fault_injection_probability=0 -q "insert into projection_broken_parts_1 values (1, 1), (1, 2), (1, 3);" diff --git a/tests/queries/0_stateless/02255_broken_parts_chain_on_start.reference b/tests/queries/0_stateless/02255_broken_parts_chain_on_start.reference new file mode 100644 index 00000000000..d55cb5baf93 --- /dev/null +++ b/tests/queries/0_stateless/02255_broken_parts_chain_on_start.reference @@ -0,0 +1,8 @@ +1 1 10 all_0_0_0_1 +1 1 20 all_0_0_0_1 +1 1 30 all_0_0_0_1 +0 +0 +1 1 10 all_0_0_0_1 +1 1 20 all_0_0_0_1 +1 1 30 all_0_0_0_1 diff --git a/tests/queries/0_stateless/02255_broken_parts_chain_on_start.sh b/tests/queries/0_stateless/02255_broken_parts_chain_on_start.sh new file mode 100755 index 00000000000..de260937b9c --- /dev/null +++ b/tests/queries/0_stateless/02255_broken_parts_chain_on_start.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash +# Tags: long, zookeeper + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "drop table if exists rmt1 sync;" +$CLICKHOUSE_CLIENT -q "drop table if exists rmt2 sync;" + +$CLICKHOUSE_CLIENT -q "create table rmt1 (a int, b int) + engine = ReplicatedMergeTree('/test/02255/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/rmt', 'r1') order by a settings old_parts_lifetime=100500;" + +$CLICKHOUSE_CLIENT -q "create table rmt2 (a int, b int) + engine = ReplicatedMergeTree('/test/02255/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/rmt', 'r2') order by a settings old_parts_lifetime=100500;" + +$CLICKHOUSE_CLIENT --insert_keeper_fault_injection_probability=0 -q "insert into rmt1 values (1, 1), (1, 2), (1, 3);" +$CLICKHOUSE_CLIENT -q "alter table rmt1 update b = b*10 where 1 settings mutations_sync=1" +$CLICKHOUSE_CLIENT -q "system sync replica rmt2;" +$CLICKHOUSE_CLIENT -q "select 1, *, _part from rmt2 order by b;" + +path=$($CLICKHOUSE_CLIENT -q "select path from system.parts where database='$CLICKHOUSE_DATABASE' and table='rmt1' and name='all_0_0_0'") +# ensure that path is absolute before removing +$CLICKHOUSE_CLIENT -q "select throwIf(substring('$path', 1, 1) != '/', 'Path is relative: $path')" || exit +rm -f "$path/data.bin" + +path=$($CLICKHOUSE_CLIENT -q "select path from system.parts where database='$CLICKHOUSE_DATABASE' and table='rmt1' and name='all_0_0_0_1'") +# ensure that path is absolute before removing +$CLICKHOUSE_CLIENT -q "select throwIf(substring('$path', 1, 1) != '/', 'Path is relative: $path')" || exit +rm -f "$path/data.bin" + +$CLICKHOUSE_CLIENT -q "detach table rmt1 sync" +$CLICKHOUSE_CLIENT -q "attach table rmt1" 2>/dev/null + +$CLICKHOUSE_CLIENT -q "system sync replica rmt1;" +$CLICKHOUSE_CLIENT -q "select 1, *, _part from rmt1 order by b;" + +$CLICKHOUSE_CLIENT -q "truncate table rmt1" + +$CLICKHOUSE_CLIENT -q "SELECT table, lost_part_count FROM system.replicas WHERE database=currentDatabase() AND lost_part_count!=0"; + +$CLICKHOUSE_CLIENT -q "drop table if exists projection_broken_parts_1 sync;" +$CLICKHOUSE_CLIENT -q "drop table if exists projection_broken_parts_1 sync;" From 9c12994d942e48c112e9392738c561582f10bb0a Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Thu, 6 Jul 2023 19:49:22 +0200 Subject: [PATCH 300/522] initialize SeriesRecords for LogSeriesLimiter lazy --- src/Common/LoggingFormatStringHelpers.cpp | 29 +++++++++++------------ src/Common/LoggingFormatStringHelpers.h | 12 ++++++---- 2 files changed, 22 insertions(+), 19 deletions(-) diff --git a/src/Common/LoggingFormatStringHelpers.cpp b/src/Common/LoggingFormatStringHelpers.cpp index ed578018d5f..074c8dd2803 100644 --- a/src/Common/LoggingFormatStringHelpers.cpp +++ b/src/Common/LoggingFormatStringHelpers.cpp @@ -77,9 +77,8 @@ void LogFrequencyLimiterIml::cleanup(time_t too_old_threshold_s) } -std::unordered_map> LogSeriesLimiter::series_settings; -std::unordered_map> LogSeriesLimiter::series_loggers; std::mutex LogSeriesLimiter::mutex; +time_t LogSeriesLimiter::last_cleanup = 0; LogSeriesLimiter::LogSeriesLimiter(Poco::Logger * logger_, size_t allowed_count_, time_t interval_s_) : logger(logger_) @@ -101,33 +100,33 @@ LogSeriesLimiter::LogSeriesLimiter(Poco::Logger * logger_, size_t allowed_count_ std::lock_guard lock(mutex); - if (series_settings.contains(name_hash)) + if (last_cleanup == 0) + last_cleanup = now; + + auto & series_records = getSeriesRecords(); + + static const time_t cleanup_delay_s = 600; + if (last_cleanup + cleanup_delay_s >= now) { - auto & settings = series_settings[name_hash]; - auto & [allowed_count, interval_s] = settings; - chassert(allowed_count_ == allowed_count); - chassert(interval_s_ == interval_s); - } - else - { - series_settings[name_hash] = std::make_tuple(allowed_count_, interval_s_); + time_t old = now - cleanup_delay_s; + std::erase_if(series_records, [old](const auto & elem) { return get<0>(elem.second) < old; }); + last_cleanup = now; } auto register_as_first = [&] () TSA_REQUIRES(mutex) { assert(allowed_count_ > 0); accepted = true; - series_loggers[name_hash] = std::make_tuple(now, 1, 1); + series_records[name_hash] = std::make_tuple(now, 1, 1); }; - - if (!series_loggers.contains(name_hash)) + if (!series_records.contains(name_hash)) { register_as_first(); return; } - auto & [last_time, accepted_count, total_count] = series_loggers[name_hash]; + auto & [last_time, accepted_count, total_count] = series_records[name_hash]; if (last_time + interval_s_ <= now) { debug_message = fmt::format( diff --git a/src/Common/LoggingFormatStringHelpers.h b/src/Common/LoggingFormatStringHelpers.h index 82c260e52a6..3afa3fb089d 100644 --- a/src/Common/LoggingFormatStringHelpers.h +++ b/src/Common/LoggingFormatStringHelpers.h @@ -199,12 +199,16 @@ public: class LogSeriesLimiter { static std::mutex mutex; - - /// Hash(logger_name) -> (allowed_count, interval_s) - static std::unordered_map> series_settings TSA_GUARDED_BY(mutex); + static time_t last_cleanup; /// Hash(logger_name) -> (last_logged_time_s, accepted, muted) - static std::unordered_map> series_loggers TSA_GUARDED_BY(mutex); + using SeriesRecords = std::unordered_map>; + + static SeriesRecords & getSeriesRecords() TSA_REQUIRES(mutex) + { + static SeriesRecords records; + return records; + } Poco::Logger * logger = nullptr; bool accepted = false; From ec5e26a017c39eb4d76a1b07e4083cc53a225a5d Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 6 Jul 2023 21:08:53 +0200 Subject: [PATCH 301/522] Pin rust nightly (to make it stable) Because of using Rust nightly, and without #49601 the Rust toolchain is very unstable, and can be frequently failed. So let's ping particular version. Also I've looked and it seems that Rust archives stores this archive without any TTL, since there is even a version for 2015 year. Follow-up for: #50541 Signed-off-by: Azat Khuzhin --- docker/packager/binary/Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index e824161a688..897bcd24d04 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -49,8 +49,8 @@ ENV CARGO_HOME=/rust/cargo ENV PATH="/rust/cargo/bin:${PATH}" RUN curl https://sh.rustup.rs -sSf | bash -s -- -y && \ chmod 777 -R /rust && \ - rustup toolchain install nightly && \ - rustup default nightly && \ + rustup toolchain install nightly-2023-07-04 && \ + rustup default nightly-2023-07-04 && \ rustup component add rust-src && \ rustup target add aarch64-unknown-linux-gnu && \ rustup target add x86_64-apple-darwin && \ From c1fa38ea8ed98123a780f2a35c41b8eaf85e2ec0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 6 Jul 2023 22:22:36 +0200 Subject: [PATCH 302/522] Add RISC-V 64 to the universal installer --- docs/_includes/install/universal.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/_includes/install/universal.sh b/docs/_includes/install/universal.sh index 1699be138c8..5d4571aed9e 100755 --- a/docs/_includes/install/universal.sh +++ b/docs/_includes/install/universal.sh @@ -33,6 +33,9 @@ then elif [ "${ARCH}" = "powerpc64le" -o "${ARCH}" = "ppc64le" ] then DIR="powerpc64le" + elif [ "${ARCH}" = "riscv64" ] + then + DIR="riscv64" fi elif [ "${OS}" = "FreeBSD" ] then From 271297823ae6abe82908220d1a540fbf0113f4d8 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 6 Jul 2023 14:56:05 +0000 Subject: [PATCH 303/522] Allow var-int encoded 64-bit integers with MSB=1 Resolves: #51486 Until now, it was illegal to encode 64-bit (unsigned) integers with MSB=1, i.e. values > (1ULL<<63) - 1, as var-int. In more detail, the var-int code used by ClickHouse server and client spent at most 9 bytes per value such that 9 * 7 = 63 bits could be encoded. Some 3rd party clients (e.g. Rust clickhouse-rs) had the same limitation, whereas other clients understand the full range (Python clickhouse-driver). PRs #47608 and #48628 added sanity checks as asserts or exceptions during var-int encoding on the server side. This was considered okay as such huge integers so far occurred only during testing (usually fuzzing) but not in practice. Issue #51486 is a new fuzzing issue where the exception thrown from the sanity check led to a half-baked progress packet and as a result, a logical error / server crash. The only fix which is not another bandaid is to allow the full range in var-int coding. Clients will have to allow the full range too, a note will be added to the changelog. (the alternative was to create another protocol version but as var-int is used all over the place this was considered infeasible) Review note: this is the relevant commit. --- src/IO/VarInt.cpp | 9 --- src/IO/VarInt.h | 65 ++++++++----------- src/Server/TCPHandler.cpp | 7 +- .../0_stateless/02812_large_varints.reference | 0 .../0_stateless/02812_large_varints.sql | 4 ++ 5 files changed, 34 insertions(+), 51 deletions(-) create mode 100644 tests/queries/0_stateless/02812_large_varints.reference create mode 100644 tests/queries/0_stateless/02812_large_varints.sql diff --git a/src/IO/VarInt.cpp b/src/IO/VarInt.cpp index ca4b95fcb60..a4b249b01d7 100644 --- a/src/IO/VarInt.cpp +++ b/src/IO/VarInt.cpp @@ -6,7 +6,6 @@ namespace DB namespace ErrorCodes { extern const int ATTEMPT_TO_READ_AFTER_EOF; - extern const int BAD_ARGUMENTS; } void throwReadAfterEOF() @@ -14,12 +13,4 @@ void throwReadAfterEOF() throw Exception(ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF, "Attempt to read after eof"); } -void throwValueTooLargeForVarIntEncoding(UInt64 x) -{ - /// Under practical circumstances, we should virtually never end up here but AST Fuzzer manages to create superlarge input integers - /// which trigger this exception. Intentionally not throwing LOGICAL_ERROR or calling abort() or [ch]assert(false), so AST Fuzzer - /// can swallow the exception and continue to run. - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Value {} is too large for VarInt encoding", x); -} - } diff --git a/src/IO/VarInt.h b/src/IO/VarInt.h index 9099b5e7f6a..2a2743e3407 100644 --- a/src/IO/VarInt.h +++ b/src/IO/VarInt.h @@ -13,73 +13,59 @@ namespace DB /// Variable-Length Quantity (VLQ) Base-128 compression, also known as Variable Byte (VB) or Varint encoding. [[noreturn]] void throwReadAfterEOF(); -[[noreturn]] void throwValueTooLargeForVarIntEncoding(UInt64 x); -/// NOTE: Due to historical reasons, only values up to 1<<63-1 can be safely encoded/decoded (bigger values are not idempotent under -/// encoding/decoding). This cannot be changed without breaking backward compatibility (some drivers, e.g. clickhouse-rs (Rust), have the -/// same limitation, others support the full 1<<64 range, e.g. clickhouse-driver (Python)) -constexpr UInt64 VAR_UINT_MAX = (1ULL<<63) - 1; - inline void writeVarUInt(UInt64 x, WriteBuffer & ostr) { - if (x > VAR_UINT_MAX) [[unlikely]] - throwValueTooLargeForVarIntEncoding(x); - - for (size_t i = 0; i < 9; ++i) + while (x > 0x7F) { - uint8_t byte = x & 0x7F; - if (x > 0x7F) - byte |= 0x80; + uint8_t byte = 0x80 | (x & 0x7F); ostr.nextIfAtEnd(); *ostr.position() = byte; ++ostr.position(); x >>= 7; - if (!x) - return; } + + uint8_t final_byte = static_cast(x); + + ostr.nextIfAtEnd(); + *ostr.position() = final_byte; + ++ostr.position(); } inline void writeVarUInt(UInt64 x, std::ostream & ostr) { - if (x > VAR_UINT_MAX) [[unlikely]] - throwValueTooLargeForVarIntEncoding(x); - - for (size_t i = 0; i < 9; ++i) + while (x > 0x7F) { - uint8_t byte = x & 0x7F; - if (x > 0x7F) - byte |= 0x80; - + uint8_t byte = 0x80 | (x & 0x7F); ostr.put(byte); x >>= 7; - if (!x) - return; } + + uint8_t final_byte = static_cast(x); + ostr.put(final_byte); } inline char * writeVarUInt(UInt64 x, char * ostr) { - if (x > VAR_UINT_MAX) [[unlikely]] - throwValueTooLargeForVarIntEncoding(x); - - for (size_t i = 0; i < 9; ++i) + while (x > 0x7F) { - uint8_t byte = x & 0x7F; - if (x > 0x7F) - byte |= 0x80; + uint8_t byte = 0x80 | (x & 0x7F); *ostr = byte; ++ostr; x >>= 7; - if (!x) - return ostr; } + uint8_t final_byte = static_cast(x); + + *ostr = final_byte; + ++ostr; + return ostr; } @@ -101,7 +87,7 @@ template inline void readVarUInt(UInt64 & x, ReadBuffer & istr) { x = 0; - for (size_t i = 0; i < 9; ++i) + for (size_t i = 0; i < 10; ++i) { if constexpr (check_eof) if (istr.eof()) [[unlikely]] @@ -120,7 +106,7 @@ inline void readVarUInt(UInt64 & x, ReadBuffer & istr) inline void readVarUInt(UInt64 & x, ReadBuffer & istr) { - if (istr.buffer().end() - istr.position() >= 9) + if (istr.buffer().end() - istr.position() >= 10) return impl::readVarUInt(x, istr); return impl::readVarUInt(x, istr); } @@ -128,7 +114,7 @@ inline void readVarUInt(UInt64 & x, ReadBuffer & istr) inline void readVarUInt(UInt64 & x, std::istream & istr) { x = 0; - for (size_t i = 0; i < 9; ++i) + for (size_t i = 0; i < 10; ++i) { UInt64 byte = istr.get(); x |= (byte & 0x7F) << (7 * i); @@ -143,7 +129,7 @@ inline const char * readVarUInt(UInt64 & x, const char * istr, size_t size) const char * end = istr + size; x = 0; - for (size_t i = 0; i < 9; ++i) + for (size_t i = 0; i < 10; ++i) { if (istr == end) [[unlikely]] throwReadAfterEOF(); @@ -220,7 +206,8 @@ inline size_t getLengthOfVarUInt(UInt64 x) : (x < (1ULL << 42) ? 6 : (x < (1ULL << 49) ? 7 : (x < (1ULL << 56) ? 8 - : 9))))))); + : (x < (1ULL << 63) ? 9 + : 10)))))))); } diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 4d9fb47c893..36566832ebc 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -1905,17 +1905,18 @@ void TCPHandler::sendData(const Block & block) { initBlockOutput(block); - auto prev_bytes_written_out = out->count(); - auto prev_bytes_written_compressed_out = state.maybe_compressed_out->count(); + size_t prev_bytes_written_out = out->count(); + size_t prev_bytes_written_compressed_out = state.maybe_compressed_out->count(); try { /// For testing hedged requests if (unknown_packet_in_send_data) { + constexpr UInt64 marker = (1ULL<<63) - 1; --unknown_packet_in_send_data; if (unknown_packet_in_send_data == 0) - writeVarUInt(VAR_UINT_MAX, *out); + writeVarUInt(marker, *out); } writeVarUInt(Protocol::Server::Data, *out); diff --git a/tests/queries/0_stateless/02812_large_varints.reference b/tests/queries/0_stateless/02812_large_varints.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02812_large_varints.sql b/tests/queries/0_stateless/02812_large_varints.sql new file mode 100644 index 00000000000..cfbebb7292e --- /dev/null +++ b/tests/queries/0_stateless/02812_large_varints.sql @@ -0,0 +1,4 @@ +-- 64-bit integers with MSB set (i.e. values > (1ULL<<63) - 1) could for historical/compat reasons not be serialized as var-ints (issue #51486). +-- These two queries internally produce such big values, run them to be sure no bad things happen. +SELECT topKWeightedState(65535)(now(), -2) FORMAT Null; +SELECT number FROM numbers(toUInt64(-1)) limit 10 Format Null; From c35294317dbff31b8ff8b48f6256162d6d5dc02e Mon Sep 17 00:00:00 2001 From: vdimir Date: Thu, 6 Jul 2023 15:06:54 +0000 Subject: [PATCH 304/522] Remove parts in order for object storage always --- src/Storages/MergeTree/MergeTreeData.cpp | 34 +++++++++++++++--------- 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index fa9bfd38a23..0ef71895999 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -2137,20 +2137,20 @@ MergeTreeData::DataPartsVector MergeTreeData::grabOldParts(bool force) /// Please don't use "zero-copy replication" (a non-production feature) in production. /// It is not ready for production usage. Don't use it. - bool need_remove_parts_in_order = supportsReplication() && getSettings()->allow_remote_fs_zero_copy_replication; + /// It also is disabled for any object storage, because it can lead to race conditions on blob removal. + /// (see comment at `clearPartsFromFilesystemImpl`). + bool need_remove_parts_in_order = false; - if (need_remove_parts_in_order) + if (supportsReplication()) { - bool has_zero_copy_disk = false; for (const auto & disk : getDisks()) { - if (disk->supportZeroCopyReplication()) + if (disk->isRemote()) { - has_zero_copy_disk = true; + need_remove_parts_in_order = true; break; } } - need_remove_parts_in_order = has_zero_copy_disk; } std::vector parts_to_delete; @@ -2394,18 +2394,28 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t std::mutex part_names_mutex; auto runner = threadPoolCallbackRunner(getPartsCleaningThreadPool().get(), "PartsCleaning"); - /// This flag disallow straightforward concurrent parts removal. It's required only in case - /// when we have parts on zero-copy disk + at least some of them were mutated. + /** Straightforward concurrent parts removal can be applied for the case + * when we have parts on object storage disk + at least some of them were mutated + * (thus, can contains hardlinks to files in the previous parts). + * If we are deleting parts that contains hardlinks to the same file we may face into race condition + * and delete only local metadata files, but not the blobs on object storage. + * Given that, we remove in parallel only "independent" parts that don't have such hardlinks. + * Note that it also may be applicable for the regular MergeTree, fixed only for Replicated. + * + * To avoid this we need to fix race conditions on parts and blob removal. + */ bool remove_parts_in_order = false; - if (settings->allow_remote_fs_zero_copy_replication && dynamic_cast(this) != nullptr) + if (dynamic_cast(this) != nullptr) { remove_parts_in_order = std::any_of( parts_to_remove.begin(), parts_to_remove.end(), - [] (const auto & data_part) { return data_part->isStoredOnRemoteDiskWithZeroCopySupport() && data_part->info.getMutationVersion() > 0; } + [] (const auto & data_part) + { + return data_part->isStoredOnRemoteDisk() && data_part->info.getMutationVersion() > 0; + } ); } - if (!remove_parts_in_order) { /// NOTE: Under heavy system load you may get "Cannot schedule a task" from ThreadPool. @@ -2441,7 +2451,7 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t /// NOTE: Under heavy system load you may get "Cannot schedule a task" from ThreadPool. LOG_DEBUG( - log, "Removing {} parts from filesystem (concurrently): Parts: [{}]", parts_to_remove.size(), fmt::join(parts_to_remove, ", ")); + log, "Removing {} parts from filesystem (concurrently in order): Parts: [{}]", parts_to_remove.size(), fmt::join(parts_to_remove, ", ")); /// We have "zero copy replication" parts and we are going to remove them in parallel. /// The problem is that all parts in a mutation chain must be removed sequentially to avoid "key does not exits" issues. From fc19e74ba9084e66a7ff43565ef80a78dda65570 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Fri, 7 Jul 2023 01:12:17 +0200 Subject: [PATCH 305/522] fix deadlock on DatabaseCatalog shutdown --- src/Interpreters/DatabaseCatalog.cpp | 11 ++++++++++- src/Interpreters/DatabaseCatalog.h | 2 ++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 4cb2f6e3b3d..dc1861b3bd8 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -56,6 +56,7 @@ namespace ErrorCodes extern const int DATABASE_ACCESS_DENIED; extern const int LOGICAL_ERROR; extern const int HAVE_DEPENDENT_OBJECTS; + extern const int UNFINISHED; } TemporaryTableHolder::TemporaryTableHolder(ContextPtr context_, const TemporaryTableHolder::Creator & creator, const ASTPtr & query) @@ -196,6 +197,9 @@ void DatabaseCatalog::startupBackgroundCleanup() void DatabaseCatalog::shutdownImpl() { + is_shutting_down = true; + wait_table_finally_dropped.notify_all(); + if (cleanup_task) (*cleanup_task)->deactivate(); @@ -1160,8 +1164,13 @@ void DatabaseCatalog::waitTableFinallyDropped(const UUID & uuid) std::unique_lock lock{tables_marked_dropped_mutex}; wait_table_finally_dropped.wait(lock, [&]() TSA_REQUIRES(tables_marked_dropped_mutex) -> bool { - return !tables_marked_dropped_ids.contains(uuid); + return !tables_marked_dropped_ids.contains(uuid) || is_shutting_down; }); + + /// TSA doesn't support unique_lock + if (TSA_SUPPRESS_WARNING_FOR_READ(tables_marked_dropped_ids).contains(uuid)) + throw Exception(ErrorCodes::UNFINISHED, "Did not finish dropping the table with UUID {} because the server is shutting down, " + "will finish after restart", uuid); } void DatabaseCatalog::addDependencies( diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h index 258ea2dee7c..d502505027f 100644 --- a/src/Interpreters/DatabaseCatalog.h +++ b/src/Interpreters/DatabaseCatalog.h @@ -308,6 +308,8 @@ private: Poco::Logger * log; + std::atomic_bool is_shutting_down = false; + /// Do not allow simultaneous execution of DDL requests on the same table. /// database name -> database guard -> (table name mutex, counter), /// counter: how many threads are running a query on the table at the same time From 3ec617b1840e7a64761c0e45926719a6d41363c1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 7 Jul 2023 01:31:52 +0200 Subject: [PATCH 306/522] Fix build --- programs/keeper-converter/KeeperConverter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/keeper-converter/KeeperConverter.cpp b/programs/keeper-converter/KeeperConverter.cpp index a049e6bc2b3..20448aafa2f 100644 --- a/programs/keeper-converter/KeeperConverter.cpp +++ b/programs/keeper-converter/KeeperConverter.cpp @@ -42,7 +42,7 @@ int mainEntryClickHouseKeeperConverter(int argc, char ** argv) { auto keeper_context = std::make_shared(true); keeper_context->setDigestEnabled(true); - keeper_context->setSnapshotDisk(std::make_shared("Keeper-snapshots", options["output-dir"].as(), 0)); + keeper_context->setSnapshotDisk(std::make_shared("Keeper-snapshots", options["output-dir"].as())); DB::KeeperStorage storage(/* tick_time_ms */ 500, /* superdigest */ "", keeper_context, /* initialize_system_nodes */ false); From 48eb30de513f3561eef6cd8be661023438405e0e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 7 Jul 2023 01:41:36 +0200 Subject: [PATCH 307/522] Fix build --- src/Coordination/KeeperContext.cpp | 6 +++--- src/Coordination/tests/gtest_coordination.cpp | 10 +++++----- utils/keeper-data-dumper/main.cpp | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/Coordination/KeeperContext.cpp b/src/Coordination/KeeperContext.cpp index 3c3c0500540..408344ee67f 100644 --- a/src/Coordination/KeeperContext.cpp +++ b/src/Coordination/KeeperContext.cpp @@ -220,7 +220,7 @@ KeeperContext::Storage KeeperContext::getLogsPathFromConfig(const Poco::Util::Ab if (!fs::exists(path)) fs::create_directories(path); - return std::make_shared("LocalLogDisk", path, 0); + return std::make_shared("LocalLogDisk", path); }; /// the most specialized path @@ -246,7 +246,7 @@ KeeperContext::Storage KeeperContext::getSnapshotsPathFromConfig(const Poco::Uti if (!fs::exists(path)) fs::create_directories(path); - return std::make_shared("LocalSnapshotDisk", path, 0); + return std::make_shared("LocalSnapshotDisk", path); }; /// the most specialized path @@ -272,7 +272,7 @@ KeeperContext::Storage KeeperContext::getStatePathFromConfig(const Poco::Util::A if (!fs::exists(path)) fs::create_directories(path); - return std::make_shared("LocalStateFileDisk", path, 0); + return std::make_shared("LocalStateFileDisk", path); }; if (config.has("keeper_server.state_storage_disk")) diff --git a/src/Coordination/tests/gtest_coordination.cpp b/src/Coordination/tests/gtest_coordination.cpp index 0f60c960b8b..6df149bbfbe 100644 --- a/src/Coordination/tests/gtest_coordination.cpp +++ b/src/Coordination/tests/gtest_coordination.cpp @@ -71,16 +71,16 @@ protected: DB::KeeperContextPtr keeper_context = std::make_shared(true); Poco::Logger * log{&Poco::Logger::get("CoordinationTest")}; - void setLogDirectory(const std::string & path) { keeper_context->setLogDisk(std::make_shared("LogDisk", path, 0)); } + void setLogDirectory(const std::string & path) { keeper_context->setLogDisk(std::make_shared("LogDisk", path)); } void setSnapshotDirectory(const std::string & path) { - keeper_context->setSnapshotDisk(std::make_shared("SnapshotDisk", path, 0)); + keeper_context->setSnapshotDisk(std::make_shared("SnapshotDisk", path)); } void setStateFileDirectory(const std::string & path) { - keeper_context->setStateFileDisk(std::make_shared("StateFile", path, 0)); + keeper_context->setStateFileDisk(std::make_shared("StateFile", path)); } }; @@ -1503,9 +1503,9 @@ void testLogAndStateMachine( using namespace DB; ChangelogDirTest snapshots("./snapshots"); - keeper_context->setSnapshotDisk(std::make_shared("SnapshotDisk", "./snapshots", 0)); + keeper_context->setSnapshotDisk(std::make_shared("SnapshotDisk", "./snapshots")); ChangelogDirTest logs("./logs"); - keeper_context->setLogDisk(std::make_shared("LogDisk", "./logs", 0)); + keeper_context->setLogDisk(std::make_shared("LogDisk", "./logs")); ResponsesQueue queue(std::numeric_limits::max()); SnapshotsQueue snapshots_queue{1}; diff --git a/utils/keeper-data-dumper/main.cpp b/utils/keeper-data-dumper/main.cpp index 5a6fd15d72c..51a09b676dc 100644 --- a/utils/keeper-data-dumper/main.cpp +++ b/utils/keeper-data-dumper/main.cpp @@ -64,8 +64,8 @@ int main(int argc, char *argv[]) SnapshotsQueue snapshots_queue{1}; CoordinationSettingsPtr settings = std::make_shared(); KeeperContextPtr keeper_context = std::make_shared(true); - keeper_context->setLogDisk(std::make_shared("LogDisk", argv[2], 0)); - keeper_context->setSnapshotDisk(std::make_shared("LogDisk", argv[1], 0)); + keeper_context->setLogDisk(std::make_shared("LogDisk", argv[2])); + keeper_context->setSnapshotDisk(std::make_shared("LogDisk", argv[1])); auto state_machine = std::make_shared(queue, snapshots_queue, settings, keeper_context, nullptr); state_machine->init(); From c47b32b17a59202f4b21f5cff09898d41d436925 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 7 Jul 2023 01:52:18 +0200 Subject: [PATCH 308/522] Fix build --- src/Coordination/KeeperContext.cpp | 6 +++--- src/Coordination/tests/gtest_coordination.cpp | 10 +++++----- utils/keeper-data-dumper/main.cpp | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/Coordination/KeeperContext.cpp b/src/Coordination/KeeperContext.cpp index 408344ee67f..3c3c0500540 100644 --- a/src/Coordination/KeeperContext.cpp +++ b/src/Coordination/KeeperContext.cpp @@ -220,7 +220,7 @@ KeeperContext::Storage KeeperContext::getLogsPathFromConfig(const Poco::Util::Ab if (!fs::exists(path)) fs::create_directories(path); - return std::make_shared("LocalLogDisk", path); + return std::make_shared("LocalLogDisk", path, 0); }; /// the most specialized path @@ -246,7 +246,7 @@ KeeperContext::Storage KeeperContext::getSnapshotsPathFromConfig(const Poco::Uti if (!fs::exists(path)) fs::create_directories(path); - return std::make_shared("LocalSnapshotDisk", path); + return std::make_shared("LocalSnapshotDisk", path, 0); }; /// the most specialized path @@ -272,7 +272,7 @@ KeeperContext::Storage KeeperContext::getStatePathFromConfig(const Poco::Util::A if (!fs::exists(path)) fs::create_directories(path); - return std::make_shared("LocalStateFileDisk", path); + return std::make_shared("LocalStateFileDisk", path, 0); }; if (config.has("keeper_server.state_storage_disk")) diff --git a/src/Coordination/tests/gtest_coordination.cpp b/src/Coordination/tests/gtest_coordination.cpp index 6df149bbfbe..0f60c960b8b 100644 --- a/src/Coordination/tests/gtest_coordination.cpp +++ b/src/Coordination/tests/gtest_coordination.cpp @@ -71,16 +71,16 @@ protected: DB::KeeperContextPtr keeper_context = std::make_shared(true); Poco::Logger * log{&Poco::Logger::get("CoordinationTest")}; - void setLogDirectory(const std::string & path) { keeper_context->setLogDisk(std::make_shared("LogDisk", path)); } + void setLogDirectory(const std::string & path) { keeper_context->setLogDisk(std::make_shared("LogDisk", path, 0)); } void setSnapshotDirectory(const std::string & path) { - keeper_context->setSnapshotDisk(std::make_shared("SnapshotDisk", path)); + keeper_context->setSnapshotDisk(std::make_shared("SnapshotDisk", path, 0)); } void setStateFileDirectory(const std::string & path) { - keeper_context->setStateFileDisk(std::make_shared("StateFile", path)); + keeper_context->setStateFileDisk(std::make_shared("StateFile", path, 0)); } }; @@ -1503,9 +1503,9 @@ void testLogAndStateMachine( using namespace DB; ChangelogDirTest snapshots("./snapshots"); - keeper_context->setSnapshotDisk(std::make_shared("SnapshotDisk", "./snapshots")); + keeper_context->setSnapshotDisk(std::make_shared("SnapshotDisk", "./snapshots", 0)); ChangelogDirTest logs("./logs"); - keeper_context->setLogDisk(std::make_shared("LogDisk", "./logs")); + keeper_context->setLogDisk(std::make_shared("LogDisk", "./logs", 0)); ResponsesQueue queue(std::numeric_limits::max()); SnapshotsQueue snapshots_queue{1}; diff --git a/utils/keeper-data-dumper/main.cpp b/utils/keeper-data-dumper/main.cpp index 51a09b676dc..5a6fd15d72c 100644 --- a/utils/keeper-data-dumper/main.cpp +++ b/utils/keeper-data-dumper/main.cpp @@ -64,8 +64,8 @@ int main(int argc, char *argv[]) SnapshotsQueue snapshots_queue{1}; CoordinationSettingsPtr settings = std::make_shared(); KeeperContextPtr keeper_context = std::make_shared(true); - keeper_context->setLogDisk(std::make_shared("LogDisk", argv[2])); - keeper_context->setSnapshotDisk(std::make_shared("LogDisk", argv[1])); + keeper_context->setLogDisk(std::make_shared("LogDisk", argv[2], 0)); + keeper_context->setSnapshotDisk(std::make_shared("LogDisk", argv[1], 0)); auto state_machine = std::make_shared(queue, snapshots_queue, settings, keeper_context, nullptr); state_machine->init(); From 685f2949b75fad05bf1959931b626b73cdab55e9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 7 Jul 2023 02:53:13 +0300 Subject: [PATCH 309/522] Revert "Fix build" --- src/Coordination/KeeperContext.cpp | 6 +++--- src/Coordination/tests/gtest_coordination.cpp | 10 +++++----- utils/keeper-data-dumper/main.cpp | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/Coordination/KeeperContext.cpp b/src/Coordination/KeeperContext.cpp index 3c3c0500540..408344ee67f 100644 --- a/src/Coordination/KeeperContext.cpp +++ b/src/Coordination/KeeperContext.cpp @@ -220,7 +220,7 @@ KeeperContext::Storage KeeperContext::getLogsPathFromConfig(const Poco::Util::Ab if (!fs::exists(path)) fs::create_directories(path); - return std::make_shared("LocalLogDisk", path, 0); + return std::make_shared("LocalLogDisk", path); }; /// the most specialized path @@ -246,7 +246,7 @@ KeeperContext::Storage KeeperContext::getSnapshotsPathFromConfig(const Poco::Uti if (!fs::exists(path)) fs::create_directories(path); - return std::make_shared("LocalSnapshotDisk", path, 0); + return std::make_shared("LocalSnapshotDisk", path); }; /// the most specialized path @@ -272,7 +272,7 @@ KeeperContext::Storage KeeperContext::getStatePathFromConfig(const Poco::Util::A if (!fs::exists(path)) fs::create_directories(path); - return std::make_shared("LocalStateFileDisk", path, 0); + return std::make_shared("LocalStateFileDisk", path); }; if (config.has("keeper_server.state_storage_disk")) diff --git a/src/Coordination/tests/gtest_coordination.cpp b/src/Coordination/tests/gtest_coordination.cpp index 0f60c960b8b..6df149bbfbe 100644 --- a/src/Coordination/tests/gtest_coordination.cpp +++ b/src/Coordination/tests/gtest_coordination.cpp @@ -71,16 +71,16 @@ protected: DB::KeeperContextPtr keeper_context = std::make_shared(true); Poco::Logger * log{&Poco::Logger::get("CoordinationTest")}; - void setLogDirectory(const std::string & path) { keeper_context->setLogDisk(std::make_shared("LogDisk", path, 0)); } + void setLogDirectory(const std::string & path) { keeper_context->setLogDisk(std::make_shared("LogDisk", path)); } void setSnapshotDirectory(const std::string & path) { - keeper_context->setSnapshotDisk(std::make_shared("SnapshotDisk", path, 0)); + keeper_context->setSnapshotDisk(std::make_shared("SnapshotDisk", path)); } void setStateFileDirectory(const std::string & path) { - keeper_context->setStateFileDisk(std::make_shared("StateFile", path, 0)); + keeper_context->setStateFileDisk(std::make_shared("StateFile", path)); } }; @@ -1503,9 +1503,9 @@ void testLogAndStateMachine( using namespace DB; ChangelogDirTest snapshots("./snapshots"); - keeper_context->setSnapshotDisk(std::make_shared("SnapshotDisk", "./snapshots", 0)); + keeper_context->setSnapshotDisk(std::make_shared("SnapshotDisk", "./snapshots")); ChangelogDirTest logs("./logs"); - keeper_context->setLogDisk(std::make_shared("LogDisk", "./logs", 0)); + keeper_context->setLogDisk(std::make_shared("LogDisk", "./logs")); ResponsesQueue queue(std::numeric_limits::max()); SnapshotsQueue snapshots_queue{1}; diff --git a/utils/keeper-data-dumper/main.cpp b/utils/keeper-data-dumper/main.cpp index 5a6fd15d72c..51a09b676dc 100644 --- a/utils/keeper-data-dumper/main.cpp +++ b/utils/keeper-data-dumper/main.cpp @@ -64,8 +64,8 @@ int main(int argc, char *argv[]) SnapshotsQueue snapshots_queue{1}; CoordinationSettingsPtr settings = std::make_shared(); KeeperContextPtr keeper_context = std::make_shared(true); - keeper_context->setLogDisk(std::make_shared("LogDisk", argv[2], 0)); - keeper_context->setSnapshotDisk(std::make_shared("LogDisk", argv[1], 0)); + keeper_context->setLogDisk(std::make_shared("LogDisk", argv[2])); + keeper_context->setSnapshotDisk(std::make_shared("LogDisk", argv[1])); auto state_machine = std::make_shared(queue, snapshots_queue, settings, keeper_context, nullptr); state_machine->init(); From d30be39224f94618393c9502961632422b6676f5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 7 Jul 2023 03:28:33 +0200 Subject: [PATCH 310/522] Fix flaky test 00175_partition_by_ignore and move it to correct location --- .../00175_partition_by_ignore.reference | 0 .../{1_stateful => 0_stateless}/00175_partition_by_ignore.sql | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename tests/queries/{1_stateful => 0_stateless}/00175_partition_by_ignore.reference (100%) rename tests/queries/{1_stateful => 0_stateless}/00175_partition_by_ignore.sql (90%) diff --git a/tests/queries/1_stateful/00175_partition_by_ignore.reference b/tests/queries/0_stateless/00175_partition_by_ignore.reference similarity index 100% rename from tests/queries/1_stateful/00175_partition_by_ignore.reference rename to tests/queries/0_stateless/00175_partition_by_ignore.reference diff --git a/tests/queries/1_stateful/00175_partition_by_ignore.sql b/tests/queries/0_stateless/00175_partition_by_ignore.sql similarity index 90% rename from tests/queries/1_stateful/00175_partition_by_ignore.sql rename to tests/queries/0_stateless/00175_partition_by_ignore.sql index 737d1b59fe3..19d63c82a87 100644 --- a/tests/queries/1_stateful/00175_partition_by_ignore.sql +++ b/tests/queries/0_stateless/00175_partition_by_ignore.sql @@ -2,7 +2,7 @@ SELECT '-- check that partition key with ignore works correctly'; DROP TABLE IF EXISTS partition_by_ignore SYNC; -CREATE TABLE partition_by_ignore (ts DateTime, ts_2 DateTime) ENGINE=MergeTree PARTITION BY (toYYYYMM(ts), ignore(ts_2)) ORDER BY tuple(); +CREATE TABLE partition_by_ignore (ts DateTime, ts_2 DateTime) ENGINE=MergeTree PARTITION BY (toYYYYMM(ts), ignore(ts_2)) ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO partition_by_ignore SELECT toDateTime('2022-08-03 00:00:00') + toIntervalDay(number), toDateTime('2022-08-04 00:00:00') + toIntervalDay(number) FROM numbers(60); EXPLAIN ESTIMATE SELECT count() FROM partition_by_ignore WHERE ts BETWEEN toDateTime('2022-08-07 00:00:00') AND toDateTime('2022-08-10 00:00:00') FORMAT CSV; From f8ac899c3fefb1268a5197dc4d85c2ee1eb174ca Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 7 Jul 2023 03:49:50 +0200 Subject: [PATCH 311/522] Fix flaky test 02360_send_logs_level_colors: avoid usage of `file` tool --- tests/queries/0_stateless/02360_send_logs_level_colors.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/02360_send_logs_level_colors.sh b/tests/queries/0_stateless/02360_send_logs_level_colors.sh index 0585e779815..a9b7d4dd3c1 100755 --- a/tests/queries/0_stateless/02360_send_logs_level_colors.sh +++ b/tests/queries/0_stateless/02360_send_logs_level_colors.sh @@ -1,11 +1,12 @@ #!/usr/bin/env bash CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=trace + # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh file_name="${CLICKHOUSE_TMP}/res_${CLICKHOUSE_DATABASE}.log" -CLICKHOUSE_CLIENT=$(echo ${CLICKHOUSE_CLIENT} | sed 's/'"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"'/--send_logs_level=trace/g') # Run query via expect to make isatty() return true function run() @@ -20,8 +21,7 @@ spawn bash -c "$command" expect 1 EOF - file "$file_name" | grep -o "ASCII text" - file "$file_name" | grep -o "with escape sequences" + rg -F $'\x1b' "$file_name" && cat "$file_name" || echo "ASCII text" } run "$CLICKHOUSE_CLIENT -q 'SELECT 1' 2>$file_name" From f0cc90a7fb0dcf75725e0f4e437828cbb4465143 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 7 Jul 2023 04:07:03 +0200 Subject: [PATCH 312/522] Revert "Merge pull request #51822 from kssenii/minor-changes" This reverts commit 5ac85f4fa888b4cca9d433b98505d52777281c6e, reversing changes made to 376c903da9502fb2efce180178d96c14a664f298. --- src/Interpreters/FilesystemCacheLog.h | 11 ++++++++++- src/Interpreters/InterpreterSystemQuery.cpp | 8 ++++---- src/Parsers/ASTSystemQuery.cpp | 8 ++++---- src/Parsers/ASTSystemQuery.h | 4 ++-- src/Parsers/ParserSystemQuery.cpp | 4 ++-- 5 files changed, 22 insertions(+), 13 deletions(-) diff --git a/src/Interpreters/FilesystemCacheLog.h b/src/Interpreters/FilesystemCacheLog.h index 0d088a922e0..d6dd00e5463 100644 --- a/src/Interpreters/FilesystemCacheLog.h +++ b/src/Interpreters/FilesystemCacheLog.h @@ -11,7 +11,16 @@ namespace DB { - +/// +/// -------- Column --------- Type ------ +/// | event_date | DateTime | +/// | event_time | UInt64 | +/// | query_id | String | +/// | remote_file_path | String | +/// | segment_range | Tuple | +/// | read_type | String | +/// ------------------------------------- +/// struct FilesystemCacheLogElement { enum class CacheType diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index 1c2eb66923e..e1ff8676bc7 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -370,15 +370,15 @@ BlockIO InterpreterSystemQuery::execute() else { auto cache = FileCacheFactory::instance().getByName(query.filesystem_cache_name).cache; - if (query.key_to_drop.empty()) + if (query.delete_key.empty()) { cache->removeAllReleasable(); } else { - auto key = FileCacheKey::fromKeyString(query.key_to_drop); - if (query.offset_to_drop.has_value()) - cache->removeFileSegment(key, query.offset_to_drop.value()); + auto key = FileCacheKey::fromKeyString(query.delete_key); + if (query.delete_offset.has_value()) + cache->removeFileSegment(key, query.delete_offset.value()); else cache->removeKey(key); } diff --git a/src/Parsers/ASTSystemQuery.cpp b/src/Parsers/ASTSystemQuery.cpp index 22244a7075c..9c5e7bff61e 100644 --- a/src/Parsers/ASTSystemQuery.cpp +++ b/src/Parsers/ASTSystemQuery.cpp @@ -212,11 +212,11 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &, if (!filesystem_cache_name.empty()) { settings.ostr << (settings.hilite ? hilite_none : "") << " " << filesystem_cache_name; - if (!key_to_drop.empty()) + if (!delete_key.empty()) { - settings.ostr << (settings.hilite ? hilite_none : "") << " KEY " << key_to_drop; - if (offset_to_drop.has_value()) - settings.ostr << (settings.hilite ? hilite_none : "") << " OFFSET " << offset_to_drop.value(); + settings.ostr << (settings.hilite ? hilite_none : "") << " KEY " << delete_key; + if (delete_offset.has_value()) + settings.ostr << (settings.hilite ? hilite_none : "") << " OFFSET " << delete_offset.value(); } } } diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h index 6c81162f103..ebc3e9cd430 100644 --- a/src/Parsers/ASTSystemQuery.h +++ b/src/Parsers/ASTSystemQuery.h @@ -107,8 +107,8 @@ public: UInt64 seconds{}; String filesystem_cache_name; - std::string key_to_drop; - std::optional offset_to_drop; + std::string delete_key; + std::optional delete_offset; String backup_name; diff --git a/src/Parsers/ParserSystemQuery.cpp b/src/Parsers/ParserSystemQuery.cpp index 09c86876b48..ef71e994d56 100644 --- a/src/Parsers/ParserSystemQuery.cpp +++ b/src/Parsers/ParserSystemQuery.cpp @@ -409,9 +409,9 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & res->filesystem_cache_name = ast->as()->value.safeGet(); if (ParserKeyword{"KEY"}.ignore(pos, expected) && ParserIdentifier().parse(pos, ast, expected)) { - res->key_to_drop = ast->as()->name(); + res->delete_key = ast->as()->name(); if (ParserKeyword{"OFFSET"}.ignore(pos, expected) && ParserLiteral().parse(pos, ast, expected)) - res->offset_to_drop = ast->as()->value.safeGet(); + res->delete_offset = ast->as()->value.safeGet(); } } if (!parseQueryWithOnCluster(res, pos, expected)) From 7cece62d26d01621f2cd9e8cc8b6b7a68d808dd9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 7 Jul 2023 04:07:19 +0200 Subject: [PATCH 313/522] Revert "Merge pull request #51547 from kssenii/more-flexible-drop-cache" This reverts commit 2ce7bcaa3d5fb36a11ae0211eabd5a89c2a8c5de, reversing changes made to e897207cd5402307295fb3dcf5c8650d5e0a4668. --- .../IO/CachedOnDiskReadBufferFromFile.cpp | 13 ++-- src/Disks/IO/CachedOnDiskReadBufferFromFile.h | 2 +- src/Disks/IO/ReadBufferFromRemoteFSGather.cpp | 2 - src/Interpreters/Cache/FileCache.cpp | 34 +++++----- src/Interpreters/Cache/FileCache.h | 12 +--- src/Interpreters/Cache/FileCacheKey.cpp | 5 -- src/Interpreters/Cache/FileCacheKey.h | 2 - src/Interpreters/Cache/Metadata.cpp | 26 +------ src/Interpreters/Cache/Metadata.h | 8 +-- src/Interpreters/FilesystemCacheLog.cpp | 4 -- src/Interpreters/FilesystemCacheLog.h | 2 - src/Interpreters/InterpreterSystemQuery.cpp | 13 +--- src/Parsers/ASTSystemQuery.cpp | 8 --- src/Parsers/ASTSystemQuery.h | 2 - src/Parsers/ParserSystemQuery.cpp | 8 --- ...2808_filesystem_cache_drop_query.reference | 4 -- .../02808_filesystem_cache_drop_query.sh | 67 ------------------- 17 files changed, 30 insertions(+), 182 deletions(-) delete mode 100644 tests/queries/0_stateless/02808_filesystem_cache_drop_query.reference delete mode 100755 tests/queries/0_stateless/02808_filesystem_cache_drop_query.sh diff --git a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp index 15b6a9211de..81aa29639ac 100644 --- a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp +++ b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp @@ -74,22 +74,19 @@ CachedOnDiskReadBufferFromFile::CachedOnDiskReadBufferFromFile( } void CachedOnDiskReadBufferFromFile::appendFilesystemCacheLog( - const FileSegment & file_segment, CachedOnDiskReadBufferFromFile::ReadType type) + const FileSegment::Range & file_segment_range, CachedOnDiskReadBufferFromFile::ReadType type) { if (!cache_log) return; - const auto range = file_segment.range(); FilesystemCacheLogElement elem { .event_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()), .query_id = query_id, .source_file_path = source_file_path, - .file_segment_range = { range.left, range.right }, + .file_segment_range = { file_segment_range.left, file_segment_range.right }, .requested_range = { first_offset, read_until_position }, - .file_segment_key = file_segment.key().toString(), - .file_segment_offset = file_segment.offset(), - .file_segment_size = range.size(), + .file_segment_size = file_segment_range.size(), .read_from_cache_attempted = true, .read_buffer_id = current_buffer_id, .profile_counters = std::make_shared( @@ -498,7 +495,7 @@ bool CachedOnDiskReadBufferFromFile::completeFileSegmentAndGetNext() auto completed_range = current_file_segment->range(); if (cache_log) - appendFilesystemCacheLog(*current_file_segment, read_type); + appendFilesystemCacheLog(completed_range, read_type); chassert(file_offset_of_buffer_end > completed_range.right); @@ -521,7 +518,7 @@ CachedOnDiskReadBufferFromFile::~CachedOnDiskReadBufferFromFile() { if (cache_log && file_segments && !file_segments->empty()) { - appendFilesystemCacheLog(file_segments->front(), read_type); + appendFilesystemCacheLog(file_segments->front().range(), read_type); } } diff --git a/src/Disks/IO/CachedOnDiskReadBufferFromFile.h b/src/Disks/IO/CachedOnDiskReadBufferFromFile.h index 36cf8a54183..b4e7701de75 100644 --- a/src/Disks/IO/CachedOnDiskReadBufferFromFile.h +++ b/src/Disks/IO/CachedOnDiskReadBufferFromFile.h @@ -90,7 +90,7 @@ private: bool completeFileSegmentAndGetNext(); - void appendFilesystemCacheLog(const FileSegment & file_segment, ReadType read_type); + void appendFilesystemCacheLog(const FileSegment::Range & file_segment_range, ReadType read_type); bool writeCache(char * data, size_t size, size_t offset, FileSegment & file_segment); diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp index 2cd90731f1d..16c1def7b11 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp @@ -90,8 +90,6 @@ void ReadBufferFromRemoteFSGather::appendUncachedReadInfo() .source_file_path = current_object.remote_path, .file_segment_range = { 0, current_object.bytes_size }, .cache_type = FilesystemCacheLogElement::CacheType::READ_FROM_FS_BYPASSING_CACHE, - .file_segment_key = {}, - .file_segment_offset = {}, .file_segment_size = current_object.bytes_size, .read_from_cache_attempted = false, }; diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index de8ae33433a..91d1c63e832 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -806,13 +806,6 @@ bool FileCache::tryReserve(FileSegment & file_segment, const size_t size) return true; } -void FileCache::removeKey(const Key & key) -{ - assertInitialized(); - auto locked_key = metadata.lockKeyMetadata(key, CacheMetadata::KeyNotFoundPolicy::THROW); - locked_key->removeAll(); -} - void FileCache::removeKeyIfExists(const Key & key) { assertInitialized(); @@ -825,14 +818,7 @@ void FileCache::removeKeyIfExists(const Key & key) /// But if we have multiple replicated zero-copy tables on the same server /// it became possible to start removing something from cache when it is used /// by other "zero-copy" tables. That is why it's not an error. - locked_key->removeAll(/* if_releasable */true); -} - -void FileCache::removeFileSegment(const Key & key, size_t offset) -{ - assertInitialized(); - auto locked_key = metadata.lockKeyMetadata(key, CacheMetadata::KeyNotFoundPolicy::THROW); - locked_key->removeFileSegment(offset); + locked_key->removeAllReleasable(); } void FileCache::removePathIfExists(const String & path) @@ -844,12 +830,22 @@ void FileCache::removeAllReleasable() { assertInitialized(); - metadata.iterate([](LockedKey & locked_key) { locked_key.removeAll(/* if_releasable */true); }); + auto lock = lockCache(); + + main_priority->iterate([&](LockedKey & locked_key, const FileSegmentMetadataPtr & segment_metadata) + { + if (segment_metadata->releasable()) + { + auto file_segment = segment_metadata->file_segment; + locked_key.removeFileSegment(file_segment->offset(), file_segment->lock()); + return PriorityIterationResult::REMOVE_AND_CONTINUE; + } + return PriorityIterationResult::CONTINUE; + }, lock); if (stash) { /// Remove all access information. - auto lock = lockCache(); stash->records.clear(); stash->queue->removeAll(lock); } @@ -919,7 +915,7 @@ void FileCache::loadMetadata() continue; } - const auto key = Key::fromKeyString(key_directory.filename().string()); + const auto key = Key(unhexUInt(key_directory.filename().string().data())); auto locked_key = metadata.lockKeyMetadata(key, CacheMetadata::KeyNotFoundPolicy::CREATE_EMPTY, /* is_initial_load */true); for (fs::directory_iterator offset_it{key_directory}; offset_it != fs::directory_iterator(); ++offset_it) @@ -1074,7 +1070,7 @@ FileSegmentsHolderPtr FileCache::getSnapshot() FileSegmentsHolderPtr FileCache::getSnapshot(const Key & key) { FileSegments file_segments; - auto locked_key = metadata.lockKeyMetadata(key, CacheMetadata::KeyNotFoundPolicy::THROW_LOGICAL); + auto locked_key = metadata.lockKeyMetadata(key, CacheMetadata::KeyNotFoundPolicy::THROW); for (const auto & [_, file_segment_metadata] : *locked_key->getKeyMetadata()) file_segments.push_back(FileSegment::getSnapshot(file_segment_metadata->file_segment)); return std::make_unique(std::move(file_segments)); diff --git a/src/Interpreters/Cache/FileCache.h b/src/Interpreters/Cache/FileCache.h index 2e6a5094758..0e3b17baa2f 100644 --- a/src/Interpreters/Cache/FileCache.h +++ b/src/Interpreters/Cache/FileCache.h @@ -83,19 +83,13 @@ public: FileSegmentsHolderPtr set(const Key & key, size_t offset, size_t size, const CreateFileSegmentSettings & settings); - /// Remove file segment by `key` and `offset`. Throws if file segment does not exist. - void removeFileSegment(const Key & key, size_t offset); - - /// Remove files by `key`. Throws if key does not exist. - void removeKey(const Key & key); - - /// Remove files by `key`. + /// Remove files by `key`. Removes files which might be used at the moment. void removeKeyIfExists(const Key & key); - /// Removes files by `path`. + /// Removes files by `path`. Removes files which might be used at the moment. void removePathIfExists(const String & path); - /// Remove files by `key`. + /// Remove files by `key`. Will not remove files which are used at the moment. void removeAllReleasable(); std::vector tryGetCachePaths(const Key & key); diff --git a/src/Interpreters/Cache/FileCacheKey.cpp b/src/Interpreters/Cache/FileCacheKey.cpp index 772fcd600bf..f97cdc058aa 100644 --- a/src/Interpreters/Cache/FileCacheKey.cpp +++ b/src/Interpreters/Cache/FileCacheKey.cpp @@ -28,9 +28,4 @@ FileCacheKey FileCacheKey::random() return FileCacheKey(UUIDHelpers::generateV4().toUnderType()); } -FileCacheKey FileCacheKey::fromKeyString(const std::string & key_str) -{ - return FileCacheKey(unhexUInt(key_str.data())); -} - } diff --git a/src/Interpreters/Cache/FileCacheKey.h b/src/Interpreters/Cache/FileCacheKey.h index e788cd5e7cd..bab8359732c 100644 --- a/src/Interpreters/Cache/FileCacheKey.h +++ b/src/Interpreters/Cache/FileCacheKey.h @@ -21,8 +21,6 @@ struct FileCacheKey static FileCacheKey random(); bool operator==(const FileCacheKey & other) const { return key == other.key; } - - static FileCacheKey fromKeyString(const std::string & key_str); }; using FileCacheKeyAndOffset = std::pair; diff --git a/src/Interpreters/Cache/Metadata.cpp b/src/Interpreters/Cache/Metadata.cpp index 0a2d58432e4..bfaa00eac2c 100644 --- a/src/Interpreters/Cache/Metadata.cpp +++ b/src/Interpreters/Cache/Metadata.cpp @@ -25,7 +25,6 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; - extern const int BAD_ARGUMENTS; } FileSegmentMetadata::FileSegmentMetadata(FileSegmentPtr && file_segment_) @@ -192,8 +191,6 @@ LockedKeyPtr CacheMetadata::lockKeyMetadata( if (it == end()) { if (key_not_found_policy == KeyNotFoundPolicy::THROW) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "No such key `{}` in cache", key); - else if (key_not_found_policy == KeyNotFoundPolicy::THROW_LOGICAL) throw Exception(ErrorCodes::LOGICAL_ERROR, "No such key `{}` in cache", key); else if (key_not_found_policy == KeyNotFoundPolicy::RETURN_NULL) return nullptr; @@ -218,8 +215,6 @@ LockedKeyPtr CacheMetadata::lockKeyMetadata( return locked_metadata; if (key_not_found_policy == KeyNotFoundPolicy::THROW) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "No such key `{}` in cache", key); - else if (key_not_found_policy == KeyNotFoundPolicy::THROW_LOGICAL) throw Exception(ErrorCodes::LOGICAL_ERROR, "No such key `{}` in cache", key); if (key_not_found_policy == KeyNotFoundPolicy::RETURN_NULL) @@ -563,11 +558,11 @@ bool LockedKey::isLastOwnerOfFileSegment(size_t offset) const return file_segment_metadata->file_segment.use_count() == 2; } -void LockedKey::removeAll(bool if_releasable) +void LockedKey::removeAllReleasable() { for (auto it = key_metadata->begin(); it != key_metadata->end();) { - if (if_releasable && !it->second->releasable()) + if (!it->second->releasable()) { ++it; continue; @@ -588,32 +583,17 @@ void LockedKey::removeAll(bool if_releasable) } } -KeyMetadata::iterator LockedKey::removeFileSegment(size_t offset) -{ - auto it = key_metadata->find(offset); - if (it == key_metadata->end()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no offset {}", offset); - - auto file_segment = it->second->file_segment; - return removeFileSegmentImpl(it, file_segment->lock()); -} - KeyMetadata::iterator LockedKey::removeFileSegment(size_t offset, const FileSegmentGuard::Lock & segment_lock) { auto it = key_metadata->find(offset); if (it == key_metadata->end()) throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no offset {}", offset); - return removeFileSegmentImpl(it, segment_lock); -} - -KeyMetadata::iterator LockedKey::removeFileSegmentImpl(KeyMetadata::iterator it, const FileSegmentGuard::Lock & segment_lock) -{ auto file_segment = it->second->file_segment; LOG_DEBUG( key_metadata->log, "Remove from cache. Key: {}, offset: {}, size: {}", - getKey(), file_segment->offset(), file_segment->reserved_size); + getKey(), offset, file_segment->reserved_size); chassert(file_segment->assertCorrectnessUnlocked(segment_lock)); diff --git a/src/Interpreters/Cache/Metadata.h b/src/Interpreters/Cache/Metadata.h index 42d74338e12..503c19f4150 100644 --- a/src/Interpreters/Cache/Metadata.h +++ b/src/Interpreters/Cache/Metadata.h @@ -87,7 +87,7 @@ struct CacheMetadata : public std::unordered_map, { public: using Key = FileCacheKey; - using IterateCacheMetadataFunc = std::function; + using IterateCacheMetadataFunc = std::function; explicit CacheMetadata(const std::string & path_); @@ -106,7 +106,6 @@ public: enum class KeyNotFoundPolicy { THROW, - THROW_LOGICAL, CREATE_EMPTY, RETURN_NULL, }; @@ -170,10 +169,9 @@ struct LockedKey : private boost::noncopyable std::shared_ptr getKeyMetadata() const { return key_metadata; } std::shared_ptr getKeyMetadata() { return key_metadata; } - void removeAll(bool if_releasable = true); + void removeAllReleasable(); KeyMetadata::iterator removeFileSegment(size_t offset, const FileSegmentGuard::Lock &); - KeyMetadata::iterator removeFileSegment(size_t offset); void shrinkFileSegmentToDownloadedSize(size_t offset, const FileSegmentGuard::Lock &); @@ -190,8 +188,6 @@ struct LockedKey : private boost::noncopyable std::string toString() const; private: - KeyMetadata::iterator removeFileSegmentImpl(KeyMetadata::iterator it, const FileSegmentGuard::Lock &); - const std::shared_ptr key_metadata; KeyGuard::Lock lock; /// `lock` must be destructed before `key_metadata`. }; diff --git a/src/Interpreters/FilesystemCacheLog.cpp b/src/Interpreters/FilesystemCacheLog.cpp index b660db064d1..17f0fda71ec 100644 --- a/src/Interpreters/FilesystemCacheLog.cpp +++ b/src/Interpreters/FilesystemCacheLog.cpp @@ -40,8 +40,6 @@ NamesAndTypesList FilesystemCacheLogElement::getNamesAndTypes() {"source_file_path", std::make_shared()}, {"file_segment_range", std::make_shared(types)}, {"total_requested_range", std::make_shared(types)}, - {"key", std::make_shared()}, - {"offset", std::make_shared()}, {"size", std::make_shared()}, {"read_type", std::make_shared()}, {"read_from_cache_attempted", std::make_shared()}, @@ -62,8 +60,6 @@ void FilesystemCacheLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insert(source_file_path); columns[i++]->insert(Tuple{file_segment_range.first, file_segment_range.second}); columns[i++]->insert(Tuple{requested_range.first, requested_range.second}); - columns[i++]->insert(file_segment_key); - columns[i++]->insert(file_segment_offset); columns[i++]->insert(file_segment_size); columns[i++]->insert(typeToString(cache_type)); columns[i++]->insert(read_from_cache_attempted); diff --git a/src/Interpreters/FilesystemCacheLog.h b/src/Interpreters/FilesystemCacheLog.h index d6dd00e5463..1b22d561c51 100644 --- a/src/Interpreters/FilesystemCacheLog.h +++ b/src/Interpreters/FilesystemCacheLog.h @@ -39,8 +39,6 @@ struct FilesystemCacheLogElement std::pair file_segment_range{}; std::pair requested_range{}; CacheType cache_type{}; - std::string file_segment_key; - size_t file_segment_offset; size_t file_segment_size; bool read_from_cache_attempted; String read_buffer_id; diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index e1ff8676bc7..f2d011b12d1 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -370,18 +370,7 @@ BlockIO InterpreterSystemQuery::execute() else { auto cache = FileCacheFactory::instance().getByName(query.filesystem_cache_name).cache; - if (query.delete_key.empty()) - { - cache->removeAllReleasable(); - } - else - { - auto key = FileCacheKey::fromKeyString(query.delete_key); - if (query.delete_offset.has_value()) - cache->removeFileSegment(key, query.delete_offset.value()); - else - cache->removeKey(key); - } + cache->removeAllReleasable(); } break; } diff --git a/src/Parsers/ASTSystemQuery.cpp b/src/Parsers/ASTSystemQuery.cpp index 9c5e7bff61e..a91449ff035 100644 --- a/src/Parsers/ASTSystemQuery.cpp +++ b/src/Parsers/ASTSystemQuery.cpp @@ -210,15 +210,7 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &, else if (type == Type::DROP_FILESYSTEM_CACHE) { if (!filesystem_cache_name.empty()) - { settings.ostr << (settings.hilite ? hilite_none : "") << " " << filesystem_cache_name; - if (!delete_key.empty()) - { - settings.ostr << (settings.hilite ? hilite_none : "") << " KEY " << delete_key; - if (delete_offset.has_value()) - settings.ostr << (settings.hilite ? hilite_none : "") << " OFFSET " << delete_offset.value(); - } - } } else if (type == Type::UNFREEZE) { diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h index ebc3e9cd430..ca4802d9a9b 100644 --- a/src/Parsers/ASTSystemQuery.h +++ b/src/Parsers/ASTSystemQuery.h @@ -107,8 +107,6 @@ public: UInt64 seconds{}; String filesystem_cache_name; - std::string delete_key; - std::optional delete_offset; String backup_name; diff --git a/src/Parsers/ParserSystemQuery.cpp b/src/Parsers/ParserSystemQuery.cpp index ef71e994d56..48dbe60e241 100644 --- a/src/Parsers/ParserSystemQuery.cpp +++ b/src/Parsers/ParserSystemQuery.cpp @@ -405,15 +405,7 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & ParserLiteral path_parser; ASTPtr ast; if (path_parser.parse(pos, ast, expected)) - { res->filesystem_cache_name = ast->as()->value.safeGet(); - if (ParserKeyword{"KEY"}.ignore(pos, expected) && ParserIdentifier().parse(pos, ast, expected)) - { - res->delete_key = ast->as()->name(); - if (ParserKeyword{"OFFSET"}.ignore(pos, expected) && ParserLiteral().parse(pos, ast, expected)) - res->delete_offset = ast->as()->value.safeGet(); - } - } if (!parseQueryWithOnCluster(res, pos, expected)) return false; break; diff --git a/tests/queries/0_stateless/02808_filesystem_cache_drop_query.reference b/tests/queries/0_stateless/02808_filesystem_cache_drop_query.reference deleted file mode 100644 index d80fc78e03d..00000000000 --- a/tests/queries/0_stateless/02808_filesystem_cache_drop_query.reference +++ /dev/null @@ -1,4 +0,0 @@ -1 -0 -1 -0 diff --git a/tests/queries/0_stateless/02808_filesystem_cache_drop_query.sh b/tests/queries/0_stateless/02808_filesystem_cache_drop_query.sh deleted file mode 100755 index 9d987d0ebf2..00000000000 --- a/tests/queries/0_stateless/02808_filesystem_cache_drop_query.sh +++ /dev/null @@ -1,67 +0,0 @@ -#!/usr/bin/env bash -# Tags: no-fasttest, no-parallel, no-s3-storage, no-random-settings - -# set -x - -CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CUR_DIR"/../shell_config.sh - - -disk_name="${CLICKHOUSE_TEST_UNIQUE_NAME}" -$CLICKHOUSE_CLIENT -nm --query """ -DROP TABLE IF EXISTS test; -CREATE TABLE test (a Int32, b String) -ENGINE = MergeTree() ORDER BY tuple() -SETTINGS disk = disk_$disk_name(type = cache, max_size = '100Ki', path = ${CLICKHOUSE_TEST_UNIQUE_NAME}, disk = s3_disk); - -INSERT INTO test SELECT 1, 'test'; -""" - -query_id=$RANDOM - -$CLICKHOUSE_CLIENT --query_id "$query_id" --query "SELECT * FROM test FORMAT Null SETTINGS enable_filesystem_cache_log = 1" - -${CLICKHOUSE_CLIENT} -q " system flush logs" - -key=$($CLICKHOUSE_CLIENT -nm --query """ -SELECT key FROM system.filesystem_cache_log WHERE query_id = '$query_id' ORDER BY size DESC LIMIT 1; -""") - -offset=$($CLICKHOUSE_CLIENT -nm --query """ -SELECT offset FROM system.filesystem_cache_log WHERE query_id = '$query_id' ORDER BY size DESC LIMIT 1; -""") - -$CLICKHOUSE_CLIENT -nm --query """ -SELECT count() FROM system.filesystem_cache WHERE key = '$key' AND file_segment_range_begin = $offset; -""" - -$CLICKHOUSE_CLIENT -nm --query """ -SYSTEM DROP FILESYSTEM CACHE '$disk_name' KEY $key OFFSET $offset; -""" - -$CLICKHOUSE_CLIENT -nm --query """ -SELECT count() FROM system.filesystem_cache WHERE key = '$key' AND file_segment_range_begin = $offset; -""" - -query_id=$RANDOM$RANDOM - -$CLICKHOUSE_CLIENT --query_id "$query_id" --query "SELECT * FROM test FORMAT Null SETTINGS enable_filesystem_cache_log = 1" - -${CLICKHOUSE_CLIENT} -q " system flush logs" - -key=$($CLICKHOUSE_CLIENT -nm --query """ -SELECT key FROM system.filesystem_cache_log WHERE query_id = '$query_id' ORDER BY size DESC LIMIT 1; -""") - -$CLICKHOUSE_CLIENT -nm --query """ -SELECT count() FROM system.filesystem_cache WHERE key = '$key'; -""" - -$CLICKHOUSE_CLIENT -nm --query """ -SYSTEM DROP FILESYSTEM CACHE '$disk_name' KEY $key -""" - -$CLICKHOUSE_CLIENT -nm --query """ -SELECT count() FROM system.filesystem_cache WHERE key = '$key'; -""" From acd17c7974637714138a76fb83f73ec31946aa79 Mon Sep 17 00:00:00 2001 From: Victor Krasnov Date: Fri, 7 Jul 2023 10:40:04 +0800 Subject: [PATCH 314/522] Make a deal with the "Style check" --- src/Loggers/Loggers.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Loggers/Loggers.cpp b/src/Loggers/Loggers.cpp index 1e169190ca4..4c85ea79a63 100644 --- a/src/Loggers/Loggers.cpp +++ b/src/Loggers/Loggers.cpp @@ -39,7 +39,7 @@ static std::string renderFileNameTemplate(time_t now, const std::string & file_p fs::path path{file_path}; std::tm buf; localtime_r(&now, &buf); - std::stringstream ss; + std::ostringstream ss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM ss << std::put_time(&buf, file_path.c_str()); return path.replace_filename(ss.str()); } From d0ad416e352f39e20b034c5ee1b51cb9efdc6aec Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 7 Jul 2023 04:41:47 +0200 Subject: [PATCH 315/522] Fix flaky test detach_attach_partition_race --- .../0_stateless/01164_detach_attach_partition_race.sh | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01164_detach_attach_partition_race.sh b/tests/queries/0_stateless/01164_detach_attach_partition_race.sh index 7640b9dddf2..3aec4c3445d 100755 --- a/tests/queries/0_stateless/01164_detach_attach_partition_race.sh +++ b/tests/queries/0_stateless/01164_detach_attach_partition_race.sh @@ -2,9 +2,12 @@ # Tags: race CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=none # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +$CLICKHOUSE_CLIENT -q "drop table if exists mt" + $CLICKHOUSE_CLIENT -q "create table mt (n int) engine=MergeTree order by n settings parts_to_throw_insert=1000" $CLICKHOUSE_CLIENT -q "insert into mt values (1)" $CLICKHOUSE_CLIENT -q "insert into mt values (2)" @@ -13,7 +16,9 @@ $CLICKHOUSE_CLIENT -q "insert into mt values (3)" function thread_insert() { while true; do - $CLICKHOUSE_CLIENT -q "insert into mt values (rand())"; + # It might be the case that the threads are terminated and exited, but some children didn't and they are still sending queries when we are dropping tables. + # That's why the "Table doesn't exist" error is allowed, while other errors don't. + $CLICKHOUSE_CLIENT -q "insert into mt values (rand())" 2>&1 | tr -d '\n' | rg -v "Table .+ doesn't exist"; done } From 7080d85d2de6c743cc5759fa2a50d1ada1d51068 Mon Sep 17 00:00:00 2001 From: Victor Krasnov Date: Fri, 7 Jul 2023 10:54:36 +0800 Subject: [PATCH 316/522] Amend the documentation --- .../settings.md | 49 +++++++++++++++++- .../settings.md | 50 ++++++++++++++++++- 2 files changed, 95 insertions(+), 4 deletions(-) diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index bad7e388377..48361b0f157 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -1201,13 +1201,58 @@ Keys: - `console` – Send `log` and `errorlog` to the console instead of file. To enable, set to `1` or `true`. - `stream_compress` – Compress `log` and `errorlog` with `lz4` stream compression. To enable, set to `1` or `true`. +Both log and error log file names (only file names, not directories) support date and time format placeholders. + +**Placeholders** +Using the following placeholders, you can define a pattern for the resulting file name. “Example” column shows formatting result for `2023-07-06 18:32:07`. + +| Placeholder | Description | Example | +|-------------|---------------------------------------------------------------------------------------------------------------------|--------------------------| +| %% | Literal % | % | +| %n | New-line character | | +| %t | Horizontal tab character | | +| %Y | Year as a decimal number, e.g. 2017 | 2023 | +| %y | Last 2 digits of year as a decimal number (range [00,99]) | 23 | +| %C | First 2 digits of year as a decimal number (range [00,99]) | 20 | +| %G | Four-digit [ISO 8601 week-based year](https://en.wikipedia.org/wiki/ISO_8601#Week_dates), i.e. the year that contains the specified week. Normally useful only with %V | 2023 | +| %g | Last 2 digits of [ISO 8601 week-based year](https://en.wikipedia.org/wiki/ISO_8601#Week_dates), i.e. the year that contains the specified week. | 23 | +| %b | Abbreviated month name, e.g. Oct (locale dependent) | Jul | +| %h | Synonym of %b | Jul | +| %B | Full month name, e.g. October (locale dependent) | July | +| %m | Month as a decimal number (range [01,12]) | 07 | +| %U | Week of the year as a decimal number (Sunday is the first day of the week) (range [00,53]) | 27 | +| %W | Week of the year as a decimal number (Monday is the first day of the week) (range [00,53]) | 27 | +| %V | ISO 8601 week number (range [01,53]) | 27 | +| %j | Day of the year as a decimal number (range [001,366]) | 187 | +| %d | Day of the month as a zero-padded decimal number (range [01,31]). Single digit is preceded by zero. | 06 | +| %e | Day of the month as a space-padded decimal number (range [1,31]). Single digit is preceded by a space. |   6 | +| %a | Abbreviated weekday name, e.g. Fri (locale dependent) | Thu | +| %A | Full weekday name, e.g. Friday (locale dependent) | Thursday | +| %w | Weekday as a integer number with Sunday as 0 (range [0-6]) | 4 | +| %u | Weekday as a decimal number, where Monday is 1 (ISO 8601 format) (range [1-7]) | 4 | +| %H | Hour as a decimal number, 24 hour clock (range [00-23]) | 18 | +| %I | Hour as a decimal number, 12 hour clock (range [01,12]) | 06 | +| %M | Minute as a decimal number (range [00,59]) | 32 | +| %S | Second as a decimal number (range [00,60]) | 07 | +| %c | Standard date and time string, e.g. Sun Oct 17 04:41:13 2010 (locale dependent) | Thu Jul 6 18:32:07 2023 | +| %x | Localized date representation (locale dependent) | 07/06/23 | +| %X | Localized time representation, e.g. 18:40:20 or 6:40:20 PM (locale dependent) | 18:32:07 | +| %D | Short MM/DD/YY date, equivalent to %m/%d/%y | 07/06/23 | +| %F | Short YYYY-MM-DD date, equivalent to %Y-%m-%d | 2023-07-06 | +| %r | Localized 12-hour clock time (locale dependent) | 06:32:07 PM | +| %R | Equivalent to "%H:%M" | 18:32 | +| %T | Equivalent to "%H:%M:%S" (the ISO 8601 time format) | 18:32:07 | +| %p | Localized a.m. or p.m. designation (locale dependent) | PM | +| %z | Offset from UTC in the ISO 8601 format (e.g. -0430), or no characters if the time zone information is not available | +0800 | +| %Z | Locale-dependent time zone name or abbreviation, or no characters if the time zone information is not available | Z AWST | + **Example** ``` xml trace - /var/log/clickhouse-server/clickhouse-server.log - /var/log/clickhouse-server/clickhouse-server.err.log + /var/log/clickhouse-server/clickhouse-server-%F-%T.log + /var/log/clickhouse-server/clickhouse-server-%F-%T.err.log 1000M 10 true diff --git a/docs/ru/operations/server-configuration-parameters/settings.md b/docs/ru/operations/server-configuration-parameters/settings.md index 5430469ea18..421df3fe3eb 100644 --- a/docs/ru/operations/server-configuration-parameters/settings.md +++ b/docs/ru/operations/server-configuration-parameters/settings.md @@ -575,14 +575,60 @@ ClickHouse поддерживает динамическое изменение - `errorlog` - Файл лога ошибок. - `size` - Размер файла. Действует для `log` и `errorlog`. Как только файл достиг размера `size`, ClickHouse архивирует и переименовывает его, а на его месте создает новый файл лога. - `count` - Количество заархивированных файлов логов, которые сохраняет ClickHouse. +- `stream_compress` – Сжимать `log` и `errorlog` с помощью алгоритма `lz4`. Чтобы активировать, узтановите значение `1` или `true`. + +Имена файлов `log` и `errorlog` (только имя файла, а не директорий) поддерживают спецификаторы шаблонов даты и времени. + +**Спецификаторы форматирования** +С помощью следующих спецификаторов, можно определить шаблон для формирования имени файла. Столбец “Пример” показывает возможные значения на момент времени `2023-07-06 18:32:07`. + +| Спецификатор | Описание | Пример | +|--------------|---------------------------------------------------------------------------------------------------------------------|--------------------------| +| %% | Литерал % | % | +| %n | Символ новой строки | | +| %t | Символ горизонтальной табуляции | | +| %Y | Год как десятичное число, например, 2017 | 2023 | +| %y | Последние 2 цифры года в виде десятичного числа (диапазон [00,99]) | 23 | +| %C | Первые 2 цифры года в виде десятичного числа (диапазон [00,99]) | 20 | +| %G | Год по неделям согласно [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601#Week_dates), то есть год, который содержит указанную неделю. Обычно используется вместе с %V. | 2023 | +| %g | Последние 2 цифры [года по неделям ISO 8601](https://en.wikipedia.org/wiki/ISO_8601#Week_dates), т.е. года, содержащего указанную неделю (диапазон [00,99]). | 23 | +| %b | Сокращённое название месяца, например Oct (зависит от локали) | Jul | +| %h | Синоним %b | Jul | +| %B | Полное название месяца, например, October (зависит от локали) | July | +| %m | Месяц в виде десятичного числа (диапазон [01,12]) | 07 | +| %U | Неделя года в виде десятичного числа (воскресенье - первый день недели) (диапазон [00,53]) | 27 | +| %W | Неделя года в виде десятичного числа (понедельник - первый день недели) (диапазон [00,53]) | 27 | +| %V | Неделя года ISO 8601 (диапазон [01,53]) | 27 | +| %j | День года в виде десятичного числа (диапазон [001,366]) | 187 | +| %d | День месяца в виде десятичного числа (диапазон [01,31]) Перед одиночной цифрой ставится ноль. | 06 | +| %e | День месяца в виде десятичного числа (диапазон [1,31]). Перед одиночной цифрой ставится пробел. |   6 | +| %a | Сокращённое название дня недели, например, Fri (зависит от локали) | Thu | +| %A | Полный день недели, например, Friday (зависит от локали) | Thursday | +| %w | День недели в виде десятичного числа, где воскресенье равно 0 (диапазон [0-6]) | 4 | +| %u | День недели в виде десятичного числа, где понедельник равен 1 (формат ISO 8601) (диапазон [1-7]) | 4 | +| %H | Час в виде десятичного числа, 24-часовой формат (диапазон [00-23]) | 18 | +| %I | Час в виде десятичного числа, 12-часовой формат (диапазон [01,12]) | 06 | +| %M | Минуты в виде десятичного числа (диапазон [00,59]) | 32 | +| %S | Секунды как десятичное число (диапазон [00,60]) | 07 | +| %c | Стандартная строка даты и времени, например, Sun Oct 17 04:41:13 2010 (зависит от локали) | Thu Jul 6 18:32:07 2023 | +| %x | Локализованное представление даты (зависит от локали) | 07/06/23 | +| %X | Локализованное представление времени, например, 18:40:20 или 6:40:20 PM (зависит от локали) | 18:32:07 | +| %D | Эквивалентно "%m/%d/%y" | 07/06/23 | +| %F | Эквивалентно "%Y-%m-%d" (формат даты ISO 8601) | 2023-07-06 | +| %r | Локализованное 12-часовое время (зависит от локали) | 06:32:07 PM | +| %R | Эквивалентно "%H:%M" | 18:32 | +| %T | Эквивалентно "%H:%M:%S" (формат времени ISO 8601) | 18:32:07 | +| %p | Локализованное обозначение a.m. или p.m. (зависит от локали) | PM | +| %z | Смещение от UTC в формате ISO 8601 (например, -0430), или без символов, если информация о часовом поясе недоступна | +0800 | +| %Z | Зависящее от локали название или аббревиатура часового пояса, если информация о часовом поясе доступна | Z AWST | **Пример** ``` xml trace - /var/log/clickhouse-server/clickhouse-server.log - /var/log/clickhouse-server/clickhouse-server.err.log + /var/log/clickhouse-server/clickhouse-server-%F-%T.log + /var/log/clickhouse-server/clickhouse-server-%F-%T.err.log 1000M 10 From 63fbde41fee5fb8c0133dc5a576ed4e3caa5c3f2 Mon Sep 17 00:00:00 2001 From: Victor Krasnov Date: Fri, 7 Jul 2023 11:01:39 +0800 Subject: [PATCH 317/522] Reformat cluster.py (add empty line) --- tests/integration/helpers/cluster.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 5b583b865de..d4b1ee76712 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -72,6 +72,7 @@ CLICKHOUSE_LOG_FILE = "/var/log/clickhouse-server/clickhouse-server.log" CLICKHOUSE_ERROR_LOG_FILE = "/var/log/clickhouse-server/clickhouse-server.err.log" + # to create docker-compose env file def _create_env_file(path, variables): logging.debug(f"Env {variables} stored in {path}") From 2246e86159824f9e658ca28ecb796295a1b8585c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 7 Jul 2023 05:30:32 +0200 Subject: [PATCH 318/522] Fix error in subquery operators --- .../AggregateFunctionMinMaxAny.h | 48 +++++++++++-------- .../02812_subquery_operators.reference | 6 +++ .../0_stateless/02812_subquery_operators.sql | 6 +++ 3 files changed, 41 insertions(+), 19 deletions(-) create mode 100644 tests/queries/0_stateless/02812_subquery_operators.reference create mode 100644 tests/queries/0_stateless/02812_subquery_operators.sql diff --git a/src/AggregateFunctions/AggregateFunctionMinMaxAny.h b/src/AggregateFunctions/AggregateFunctionMinMaxAny.h index 5312df32459..6bfa6895a5c 100644 --- a/src/AggregateFunctions/AggregateFunctionMinMaxAny.h +++ b/src/AggregateFunctions/AggregateFunctionMinMaxAny.h @@ -51,7 +51,8 @@ private: T value = T{}; public: - static constexpr bool is_nullable = false; + static constexpr bool result_is_nullable = false; + static constexpr bool should_skip_null_arguments = true; static constexpr bool is_any = false; bool has() const @@ -501,7 +502,8 @@ private: char small_data[MAX_SMALL_STRING_SIZE]; /// Including the terminating zero. public: - static constexpr bool is_nullable = false; + static constexpr bool result_is_nullable = false; + static constexpr bool should_skip_null_arguments = true; static constexpr bool is_any = false; bool has() const @@ -769,7 +771,7 @@ static_assert( /// For any other value types. -template +template struct SingleValueDataGeneric { private: @@ -779,12 +781,13 @@ private: bool has_value = false; public: - static constexpr bool is_nullable = IS_NULLABLE; + static constexpr bool result_is_nullable = RESULT_IS_NULLABLE; + static constexpr bool should_skip_null_arguments = !RESULT_IS_NULLABLE; static constexpr bool is_any = false; bool has() const { - if constexpr (is_nullable) + if constexpr (result_is_nullable) return has_value; return !value.isNull(); } @@ -820,14 +823,14 @@ public: void change(const IColumn & column, size_t row_num, Arena *) { column.get(row_num, value); - if constexpr (is_nullable) + if constexpr (result_is_nullable) has_value = true; } void change(const Self & to, Arena *) { value = to.value; - if constexpr (is_nullable) + if constexpr (result_is_nullable) has_value = true; } @@ -844,7 +847,7 @@ public: bool changeFirstTime(const Self & to, Arena * arena) { - if (!has() && (is_nullable || to.has())) + if (!has() && (result_is_nullable || to.has())) { change(to, arena); return true; @@ -879,7 +882,7 @@ public: } else { - if constexpr (is_nullable) + if constexpr (result_is_nullable) { Field new_value; column.get(row_num, new_value); @@ -910,7 +913,7 @@ public: { if (!to.has()) return false; - if constexpr (is_nullable) + if constexpr (result_is_nullable) { if (!has()) { @@ -945,7 +948,7 @@ public: } else { - if constexpr (is_nullable) + if constexpr (result_is_nullable) { Field new_value; column.get(row_num, new_value); @@ -975,7 +978,7 @@ public: { if (!to.has()) return false; - if constexpr (is_nullable) + if constexpr (result_is_nullable) { if (!value.isNull() && (to.value.isNull() || value < to.value)) { @@ -1138,13 +1141,20 @@ struct AggregateFunctionAnyLastData : Data #endif }; + +/** The aggregate function 'singleValueOrNull' is used to implement subquery operators, + * such as x = ALL (SELECT ...) + * It checks if there is only one unique non-NULL value in the data. + * If there is only one unique value - returns it. + * If there are zero or at least two distinct values - returns NULL. + */ template struct AggregateFunctionSingleValueOrNullData : Data { - static constexpr bool is_nullable = true; - using Self = AggregateFunctionSingleValueOrNullData; + static constexpr bool result_is_nullable = true; + bool first_value = true; bool is_null = false; @@ -1166,7 +1176,7 @@ struct AggregateFunctionSingleValueOrNullData : Data if (!to.has()) return; - if (first_value) + if (first_value && !to.first_value) { first_value = false; this->change(to, arena); @@ -1311,7 +1321,7 @@ public: static DataTypePtr createResultType(const DataTypePtr & type_) { - if constexpr (Data::is_nullable) + if constexpr (Data::result_is_nullable) return makeNullable(type_); return type_; } @@ -1431,13 +1441,13 @@ public: } AggregateFunctionPtr getOwnNullAdapter( - const AggregateFunctionPtr & nested_function, + const AggregateFunctionPtr & original_function, const DataTypes & /*arguments*/, const Array & /*params*/, const AggregateFunctionProperties & /*properties*/) const override { - if (Data::is_nullable) - return nested_function; + if (Data::result_is_nullable && !Data::should_skip_null_arguments) + return original_function; return nullptr; } diff --git a/tests/queries/0_stateless/02812_subquery_operators.reference b/tests/queries/0_stateless/02812_subquery_operators.reference new file mode 100644 index 00000000000..aed0a046f99 --- /dev/null +++ b/tests/queries/0_stateless/02812_subquery_operators.reference @@ -0,0 +1,6 @@ + +Hello +Hello +123 +1 + ['\0'] [] \0 [''] diff --git a/tests/queries/0_stateless/02812_subquery_operators.sql b/tests/queries/0_stateless/02812_subquery_operators.sql new file mode 100644 index 00000000000..b0638b43e89 --- /dev/null +++ b/tests/queries/0_stateless/02812_subquery_operators.sql @@ -0,0 +1,6 @@ +SELECT singleValueOrNull(toNullable('')); +SELECT singleValueOrNull(toNullable('Hello')); +SELECT singleValueOrNull((SELECT 'Hello')); +SELECT singleValueOrNull(toNullable(123)); +SELECT '' = ALL (SELECT toNullable('')); +SELECT '', ['\0'], [], singleValueOrNull(( SELECT '\0' ) ), ['']; From 3edee4174c040b079015ce6524c0d4c56926e348 Mon Sep 17 00:00:00 2001 From: Victor Krasnov Date: Fri, 7 Jul 2023 11:34:03 +0800 Subject: [PATCH 319/522] Add AWST time zone abbreviation to the ignore list --- utils/check-style/aspell-ignore/en/aspell-dict.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 2802e52c288..6ddca6db538 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -12,6 +12,7 @@ ARMv ASLR ASOF ASan +AWST Actian ActionsMenu ActiveRecord From 87ea1b6667ed9a79272e3b77c529369f2acc4e4e Mon Sep 17 00:00:00 2001 From: Victor Krasnov Date: Fri, 7 Jul 2023 13:01:30 +0800 Subject: [PATCH 320/522] Recover the integration test --- .../test.py | 58 +++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/tests/integration/test_render_log_file_name_templates/test.py b/tests/integration/test_render_log_file_name_templates/test.py index e69de29bb2d..9fa87056d2c 100644 --- a/tests/integration/test_render_log_file_name_templates/test.py +++ b/tests/integration/test_render_log_file_name_templates/test.py @@ -0,0 +1,58 @@ +import pytest +import logging +from helpers.cluster import ClickHouseCluster +from datetime import datetime + + +log_dir = "/var/log/clickhouse-server/" +cluster = ClickHouseCluster(__file__) + + +@pytest.fixture(scope="module") +def started_cluster(): + cluster.add_instance( + "file-names-from-config", + main_configs=["configs/config-file-template.xml"], + clickhouse_log_file=None, + clickhouse_error_log_file=None, + ) + cluster.add_instance( + "file-names-from-params", + clickhouse_log_file=log_dir + "clickhouse-server-%Y-%m.log", + clickhouse_error_log_file=log_dir + "clickhouse-server-%Y-%m.err.log", + ) + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def test_check_file_names(started_cluster): + now = datetime.now() + log_file = ( + log_dir + f"clickhouse-server-{now.strftime('%Y')}-{now.strftime('%m')}.log" + ) + err_log_file = ( + log_dir + f"clickhouse-server-{now.strftime('%Y')}-{now.strftime('%m')}.err.log" + ) + logging.debug(f"log_file {log_file} err_log_file {err_log_file}") + + for name, instance in started_cluster.instances.items(): + files = instance.exec_in_container( + ["bash", "-c", f"ls -lh {log_dir}"], nothrow=True + ) + + logging.debug(f"check instance '{name}': {log_dir} contains: {files}") + + assert ( + instance.exec_in_container(["bash", "-c", f"ls {log_file}"], nothrow=True) + == log_file + "\n" + ) + + assert ( + instance.exec_in_container( + ["bash", "-c", f"ls {err_log_file}"], nothrow=True + ) + == err_log_file + "\n" + ) From 4c44c1f6ea422356bbed589aa5053fcd08139cb6 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 7 Jul 2023 06:32:42 +0000 Subject: [PATCH 321/522] Wait inside the function --- tests/queries/0_stateless/02481_async_insert_race_long.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/02481_async_insert_race_long.sh b/tests/queries/0_stateless/02481_async_insert_race_long.sh index c4b026c6aba..d8153967e9a 100755 --- a/tests/queries/0_stateless/02481_async_insert_race_long.sh +++ b/tests/queries/0_stateless/02481_async_insert_race_long.sh @@ -32,6 +32,8 @@ function insert3() ${MY_CLICKHOUSE_CLIENT} --wait_for_async_insert 1 -q "INSERT INTO async_inserts_race VALUES (7, 'g') (8, 'h')" & sleep 0.05 done + + wait } function select1() From 95fedaedff3ad3e3cdb15d3cc2b06ab6d9ea1e9b Mon Sep 17 00:00:00 2001 From: Victor Krasnov Date: Fri, 7 Jul 2023 15:16:10 +0800 Subject: [PATCH 322/522] Refine the integration test code --- .../__init__.py | 58 ------------------- .../test.py | 8 +-- 2 files changed, 2 insertions(+), 64 deletions(-) diff --git a/tests/integration/test_render_log_file_name_templates/__init__.py b/tests/integration/test_render_log_file_name_templates/__init__.py index 9fa87056d2c..e69de29bb2d 100644 --- a/tests/integration/test_render_log_file_name_templates/__init__.py +++ b/tests/integration/test_render_log_file_name_templates/__init__.py @@ -1,58 +0,0 @@ -import pytest -import logging -from helpers.cluster import ClickHouseCluster -from datetime import datetime - - -log_dir = "/var/log/clickhouse-server/" -cluster = ClickHouseCluster(__file__) - - -@pytest.fixture(scope="module") -def started_cluster(): - cluster.add_instance( - "file-names-from-config", - main_configs=["configs/config-file-template.xml"], - clickhouse_log_file=None, - clickhouse_error_log_file=None, - ) - cluster.add_instance( - "file-names-from-params", - clickhouse_log_file=log_dir + "clickhouse-server-%Y-%m.log", - clickhouse_error_log_file=log_dir + "clickhouse-server-%Y-%m.err.log", - ) - try: - cluster.start() - yield cluster - finally: - cluster.shutdown() - - -def test_check_file_names(started_cluster): - now = datetime.now() - log_file = ( - log_dir + f"clickhouse-server-{now.strftime('%Y')}-{now.strftime('%m')}.log" - ) - err_log_file = ( - log_dir + f"clickhouse-server-{now.strftime('%Y')}-{now.strftime('%m')}.err.log" - ) - logging.debug(f"log_file {log_file} err_log_file {err_log_file}") - - for name, instance in started_cluster.instances.items(): - files = instance.exec_in_container( - ["bash", "-c", f"ls -lh {log_dir}"], nothrow=True - ) - - logging.debug(f"check instance '{name}': {log_dir} contains: {files}") - - assert ( - instance.exec_in_container(["bash", "-c", f"ls {log_file}"], nothrow=True) - == log_file + "\n" - ) - - assert ( - instance.exec_in_container( - ["bash", "-c", f"ls {err_log_file}"], nothrow=True - ) - == err_log_file + "\n" - ) diff --git a/tests/integration/test_render_log_file_name_templates/test.py b/tests/integration/test_render_log_file_name_templates/test.py index 9fa87056d2c..58df32b823e 100644 --- a/tests/integration/test_render_log_file_name_templates/test.py +++ b/tests/integration/test_render_log_file_name_templates/test.py @@ -30,12 +30,8 @@ def started_cluster(): def test_check_file_names(started_cluster): now = datetime.now() - log_file = ( - log_dir + f"clickhouse-server-{now.strftime('%Y')}-{now.strftime('%m')}.log" - ) - err_log_file = ( - log_dir + f"clickhouse-server-{now.strftime('%Y')}-{now.strftime('%m')}.err.log" - ) + log_file = log_dir + f"clickhouse-server-{now.strftime('%Y-%m')}.log" + err_log_file = log_dir + f"clickhouse-server-{now.strftime('%Y-%m')}.err.log" logging.debug(f"log_file {log_file} err_log_file {err_log_file}") for name, instance in started_cluster.instances.items(): From fb2affcae31afa1558706592860cc8f32e44ecde Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 7 Jul 2023 08:37:08 +0000 Subject: [PATCH 323/522] Dump all rules --- tests/integration/helpers/network.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/helpers/network.py b/tests/integration/helpers/network.py index 60b46926589..fe3a858b867 100644 --- a/tests/integration/helpers/network.py +++ b/tests/integration/helpers/network.py @@ -161,7 +161,7 @@ class _NetworkManager: self._exec_run(cmd, privileged=True) def dump_rules(self): - cmd = ["iptables", "-L", "DOCKER-USER"] + cmd = ["iptables", "-L"] return self._exec_run(cmd, privileged=True) @staticmethod From 5b102ce7d44d678a674d29e4140a60950c69f537 Mon Sep 17 00:00:00 2001 From: Victor Krasnov Date: Fri, 7 Jul 2023 17:21:47 +0800 Subject: [PATCH 324/522] Amend English version of settings.md --- .../server-configuration-parameters/settings.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index 48361b0f157..82dac74e647 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -1201,12 +1201,12 @@ Keys: - `console` – Send `log` and `errorlog` to the console instead of file. To enable, set to `1` or `true`. - `stream_compress` – Compress `log` and `errorlog` with `lz4` stream compression. To enable, set to `1` or `true`. -Both log and error log file names (only file names, not directories) support date and time format placeholders. +Both log and error log file names (only file names, not directories) support date and time format specifiers. -**Placeholders** -Using the following placeholders, you can define a pattern for the resulting file name. “Example” column shows formatting result for `2023-07-06 18:32:07`. +**Format specifiers** +Using the following format specifiers, you can define a pattern for the resulting file name. “Example” column shows possible results for `2023-07-06 18:32:07`. -| Placeholder | Description | Example | +| Specifier | Description | Example | |-------------|---------------------------------------------------------------------------------------------------------------------|--------------------------| | %% | Literal % | % | | %n | New-line character | | From 23bd23802fc160a34e09db83c87fda53ef645e19 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 7 Jul 2023 12:26:15 +0300 Subject: [PATCH 325/522] CacheDictionary request only unique keys from source --- src/Dictionaries/CacheDictionary.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/Dictionaries/CacheDictionary.cpp b/src/Dictionaries/CacheDictionary.cpp index c5c88a9f142..e27e25ea7c4 100644 --- a/src/Dictionaries/CacheDictionary.cpp +++ b/src/Dictionaries/CacheDictionary.cpp @@ -552,13 +552,14 @@ void CacheDictionary::update(CacheDictionaryUpdateUnitPtr Date: Fri, 7 Jul 2023 09:58:35 +0000 Subject: [PATCH 326/522] Skip parallel keepermap test --- tests/integration/parallel_skip.json | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/integration/parallel_skip.json b/tests/integration/parallel_skip.json index e9089fcde73..d060218456a 100644 --- a/tests/integration/parallel_skip.json +++ b/tests/integration/parallel_skip.json @@ -66,5 +66,7 @@ "test_server_reload/test.py::test_remove_http_port", "test_server_reload/test.py::test_remove_mysql_port", "test_server_reload/test.py::test_remove_postgresql_port", - "test_server_reload/test.py::test_remove_tcp_port" + "test_server_reload/test.py::test_remove_tcp_port", + + "test_keeper_map/test.py::test_keeper_map_without_zk" ] From 2ada80aa109991f664f6a9495eddcc013215b94d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 7 Jul 2023 13:32:39 +0300 Subject: [PATCH 327/522] Update 02360_send_logs_level_colors.sh --- tests/queries/0_stateless/02360_send_logs_level_colors.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02360_send_logs_level_colors.sh b/tests/queries/0_stateless/02360_send_logs_level_colors.sh index a9b7d4dd3c1..127c94c88e2 100755 --- a/tests/queries/0_stateless/02360_send_logs_level_colors.sh +++ b/tests/queries/0_stateless/02360_send_logs_level_colors.sh @@ -21,7 +21,7 @@ spawn bash -c "$command" expect 1 EOF - rg -F $'\x1b' "$file_name" && cat "$file_name" || echo "ASCII text" + grep -F $'\x1b' "$file_name" && cat "$file_name" || echo "ASCII text" } run "$CLICKHOUSE_CLIENT -q 'SELECT 1' 2>$file_name" From 602392bb6206590e0d24df05eabf69a970767756 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Fri, 7 Jul 2023 12:37:16 +0200 Subject: [PATCH 328/522] Print short fault info only from safe fields --- src/Daemon/BaseDaemon.cpp | 83 ++++++++++++++++++++++++--------------- 1 file changed, 52 insertions(+), 31 deletions(-) diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index 6d29523a354..f766880bd34 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -310,6 +310,56 @@ private: { ThreadStatus thread_status; + /// First log those fields that are safe to access and that should not cause new fault. + /// That way we will have some duplicated info in the log but we don't loose important info + /// in case of double fault. + + std::string signal_description = "Unknown signal"; + + /// Some of these are not really signals, but our own indications on failure reason. + if (sig == StdTerminate) + signal_description = "std::terminate"; + else if (sig == SanitizerTrap) + signal_description = "sanitizer trap"; + else if (sig >= 0) + signal_description = strsignal(sig); // NOLINT(concurrency-mt-unsafe) // it is not thread-safe but ok in this context + + String error_message; + + if (sig != SanitizerTrap) + error_message = signalToErrorMessage(sig, info, *context); + else + error_message = "Sanitizer trap."; + + LOG_FATAL(log, "########## Short fault info ############"); + + LOG_FATAL(log, "(version {}{}, build id: {}, git hash: {}) (from thread {}) Received signal {} ({})", + VERSION_STRING, VERSION_OFFICIAL, daemon.build_id, daemon.git_hash, + thread_num, signal_description, sig); + + LOG_FATAL(log, fmt::runtime(error_message)); + + String bare_stacktrace_str; + if (stack_trace.getSize()) + { + /// Write bare stack trace (addresses) just in case if we will fail to print symbolized stack trace. + /// NOTE: This still require memory allocations and mutex lock inside logger. + /// BTW we can also print it to stderr using write syscalls. + + WriteBufferFromOwnString bare_stacktrace; + writeString("Stack trace:", bare_stacktrace); + for (size_t i = stack_trace.getOffset(); i < stack_trace.getSize(); ++i) + { + writeChar(' ', bare_stacktrace); + writePointerHex(stack_trace.getFramePointers()[i], bare_stacktrace); + } + + LOG_FATAL(log, fmt::runtime(bare_stacktrace.str())); + bare_stacktrace_str = bare_stacktrace.str(); + } + + /// Now try to access potentially unsafe data in thread_ptr. + String query_id; String query; @@ -326,16 +376,6 @@ private: } } - std::string signal_description = "Unknown signal"; - - /// Some of these are not really signals, but our own indications on failure reason. - if (sig == StdTerminate) - signal_description = "std::terminate"; - else if (sig == SanitizerTrap) - signal_description = "sanitizer trap"; - else if (sig >= 0) - signal_description = strsignal(sig); // NOLINT(concurrency-mt-unsafe) // it is not thread-safe but ok in this context - LOG_FATAL(log, "########################################"); if (query_id.empty()) @@ -351,30 +391,11 @@ private: thread_num, query_id, query, signal_description, sig); } - String error_message; - - if (sig != SanitizerTrap) - error_message = signalToErrorMessage(sig, info, *context); - else - error_message = "Sanitizer trap."; - LOG_FATAL(log, fmt::runtime(error_message)); - if (stack_trace.getSize()) + if (!bare_stacktrace_str.empty()) { - /// Write bare stack trace (addresses) just in case if we will fail to print symbolized stack trace. - /// NOTE: This still require memory allocations and mutex lock inside logger. - /// BTW we can also print it to stderr using write syscalls. - - WriteBufferFromOwnString bare_stacktrace; - writeString("Stack trace:", bare_stacktrace); - for (size_t i = stack_trace.getOffset(); i < stack_trace.getSize(); ++i) - { - writeChar(' ', bare_stacktrace); - writePointerHex(stack_trace.getFramePointers()[i], bare_stacktrace); - } - - LOG_FATAL(log, fmt::runtime(bare_stacktrace.str())); + LOG_FATAL(log, fmt::runtime(bare_stacktrace_str)); } /// Write symbolized stack trace line by line for better grep-ability. From 50bda59a0d226b108ab1521ae6499d35bab01ad0 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Fri, 7 Jul 2023 11:05:42 +0000 Subject: [PATCH 329/522] Fix typo --- .../test_s3_zero_copy_replication/test.py | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/tests/integration/test_s3_zero_copy_replication/test.py b/tests/integration/test_s3_zero_copy_replication/test.py index bc13c127610..2a4e0eece08 100644 --- a/tests/integration/test_s3_zero_copy_replication/test.py +++ b/tests/integration/test_s3_zero_copy_replication/test.py @@ -48,7 +48,7 @@ def get_large_objects_count(cluster, size=100, folder="data"): return counter -def check_objects_exisis(cluster, object_list, folder="data"): +def check_objects_exist(cluster, object_list, folder="data"): minio = cluster.minio_client for obj in object_list: if obj: @@ -466,7 +466,7 @@ def s3_zero_copy_unfreeze_base(cluster, unfreeze_query_template): assert objects01 == objects02 - check_objects_exisis(cluster, objects01) + check_objects_exist(cluster, objects01) node1.query("TRUNCATE TABLE unfreeze_test") node2.query("SYSTEM SYNC REPLICA unfreeze_test", timeout=30) @@ -477,12 +477,12 @@ def s3_zero_copy_unfreeze_base(cluster, unfreeze_query_template): assert objects01 == objects11 assert objects01 == objects12 - check_objects_exisis(cluster, objects11) + check_objects_exist(cluster, objects11) node1.query(f"{unfreeze_query_template} 'freeze_backup1'") wait_mutations(node1, "unfreeze_test", 10) - check_objects_exisis(cluster, objects12) + check_objects_exist(cluster, objects12) node2.query(f"{unfreeze_query_template} 'freeze_backup2'") wait_mutations(node2, "unfreeze_test", 10) @@ -540,8 +540,8 @@ def s3_zero_copy_drop_detached(cluster, unfreeze_query_template): wait_mutations(node1, "drop_detached_test", 10) wait_mutations(node2, "drop_detached_test", 10) - check_objects_exisis(cluster, objects1) - check_objects_exisis(cluster, objects2) + check_objects_exist(cluster, objects1) + check_objects_exist(cluster, objects2) node2.query( "ALTER TABLE drop_detached_test DROP DETACHED PARTITION '1'", @@ -551,8 +551,8 @@ def s3_zero_copy_drop_detached(cluster, unfreeze_query_template): wait_mutations(node1, "drop_detached_test", 10) wait_mutations(node2, "drop_detached_test", 10) - check_objects_exisis(cluster, objects1) - check_objects_exisis(cluster, objects2) + check_objects_exist(cluster, objects1) + check_objects_exist(cluster, objects2) node1.query( "ALTER TABLE drop_detached_test DROP DETACHED PARTITION '1'", @@ -562,7 +562,7 @@ def s3_zero_copy_drop_detached(cluster, unfreeze_query_template): wait_mutations(node1, "drop_detached_test", 10) wait_mutations(node2, "drop_detached_test", 10) - check_objects_exisis(cluster, objects1) + check_objects_exist(cluster, objects1) check_objects_not_exisis(cluster, objects_diff) node1.query( @@ -573,7 +573,7 @@ def s3_zero_copy_drop_detached(cluster, unfreeze_query_template): wait_mutations(node1, "drop_detached_test", 10) wait_mutations(node2, "drop_detached_test", 10) - check_objects_exisis(cluster, objects1) + check_objects_exist(cluster, objects1) node2.query( "ALTER TABLE drop_detached_test DROP DETACHED PARTITION '0'", @@ -682,7 +682,7 @@ def test_s3_zero_copy_keeps_data_after_mutation(started_cluster): wait_for_active_parts(node2, 4, "zero_copy_mutation") objects1 = node1.get_table_objects("zero_copy_mutation") - check_objects_exisis(cluster, objects1) + check_objects_exist(cluster, objects1) node1.query( """ @@ -710,7 +710,7 @@ def test_s3_zero_copy_keeps_data_after_mutation(started_cluster): nodeY = node2 objectsY = nodeY.get_table_objects("zero_copy_mutation") - check_objects_exisis(cluster, objectsY) + check_objects_exist(cluster, objectsY) nodeX.query( """ @@ -745,7 +745,7 @@ def test_s3_zero_copy_keeps_data_after_mutation(started_cluster): """ ) - check_objects_exisis(cluster, objectsY) + check_objects_exist(cluster, objectsY) nodeY.query( """ From d439db31397e8576a6e49e209bf069612ef9d2f5 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Fri, 7 Jul 2023 13:10:55 +0200 Subject: [PATCH 330/522] Print just signal number first, and only then get its description --- src/Daemon/BaseDaemon.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index f766880bd34..422f6ffb63f 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -314,6 +314,11 @@ private: /// That way we will have some duplicated info in the log but we don't loose important info /// in case of double fault. + LOG_FATAL(log, "########## Short fault info ############"); + LOG_FATAL(log, "(version {}{}, build id: {}, git hash: {}) (from thread {}) Received signal {}", + VERSION_STRING, VERSION_OFFICIAL, daemon.build_id, daemon.git_hash, + thread_num, sig); + std::string signal_description = "Unknown signal"; /// Some of these are not really signals, but our own indications on failure reason. @@ -324,6 +329,8 @@ private: else if (sig >= 0) signal_description = strsignal(sig); // NOLINT(concurrency-mt-unsafe) // it is not thread-safe but ok in this context + LOG_FATAL(log, "Signal description: {}", signal_description); + String error_message; if (sig != SanitizerTrap) @@ -331,12 +338,6 @@ private: else error_message = "Sanitizer trap."; - LOG_FATAL(log, "########## Short fault info ############"); - - LOG_FATAL(log, "(version {}{}, build id: {}, git hash: {}) (from thread {}) Received signal {} ({})", - VERSION_STRING, VERSION_OFFICIAL, daemon.build_id, daemon.git_hash, - thread_num, signal_description, sig); - LOG_FATAL(log, fmt::runtime(error_message)); String bare_stacktrace_str; From 05649c7b384cb412fa9e25150413460cc969893e Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Fri, 7 Jul 2023 13:22:52 +0200 Subject: [PATCH 331/522] Removed duplicate header --- src/Storages/System/attachSystemTables.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Storages/System/attachSystemTables.cpp b/src/Storages/System/attachSystemTables.cpp index a9873c821ce..84965b3196b 100644 --- a/src/Storages/System/attachSystemTables.cpp +++ b/src/Storages/System/attachSystemTables.cpp @@ -1,4 +1,3 @@ -#include "Storages/System/StorageSystemJemalloc.h" #include "config.h" #include From 6d798e0bde13416488409718fd2db6191dde1197 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 7 Jul 2023 10:16:36 +0000 Subject: [PATCH 332/522] Better check for current_thread --- src/Common/ThreadStatus.cpp | 9 +++++---- src/Common/ThreadStatus.h | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/Common/ThreadStatus.cpp b/src/Common/ThreadStatus.cpp index 7a602afe7e7..b39ea7e8ea8 100644 --- a/src/Common/ThreadStatus.cpp +++ b/src/Common/ThreadStatus.cpp @@ -199,13 +199,14 @@ ThreadStatus::~ThreadStatus() if (deleter) deleter(); + chassert(!check_current_thread_on_destruction || current_thread == this); + /// Only change current_thread if it's currently being used by this ThreadStatus /// For example, PushingToViews chain creates and deletes ThreadStatus instances while running in the main query thread - if (check_current_thread_on_destruction) - { - assert(current_thread == this); + if (current_thread == this) current_thread = nullptr; - } + else if (check_current_thread_on_destruction) + LOG_ERROR(log, "current_thread contains invalid address"); } void ThreadStatus::updatePerformanceCounters() diff --git a/src/Common/ThreadStatus.h b/src/Common/ThreadStatus.h index 7c8dbdb68bd..aa1e3eea6e5 100644 --- a/src/Common/ThreadStatus.h +++ b/src/Common/ThreadStatus.h @@ -224,7 +224,7 @@ private: Poco::Logger * log = nullptr; - bool check_current_thread_on_destruction; + [[maybe_unused]] bool check_current_thread_on_destruction; public: explicit ThreadStatus(bool check_current_thread_on_destruction_ = true); From d9d0e9062a4f30775b1b0d32121fef3da1ea33bf Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 7 Jul 2023 11:41:01 +0000 Subject: [PATCH 333/522] Remove maybe_unused --- src/Common/ThreadStatus.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/ThreadStatus.h b/src/Common/ThreadStatus.h index aa1e3eea6e5..7c8dbdb68bd 100644 --- a/src/Common/ThreadStatus.h +++ b/src/Common/ThreadStatus.h @@ -224,7 +224,7 @@ private: Poco::Logger * log = nullptr; - [[maybe_unused]] bool check_current_thread_on_destruction; + bool check_current_thread_on_destruction; public: explicit ThreadStatus(bool check_current_thread_on_destruction_ = true); From 36e52efc3e7602e43628246562b2db70ca85e765 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 7 Jul 2023 11:57:12 +0000 Subject: [PATCH 334/522] Remove timeout --- .../01164_detach_attach_partition_race.sh | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/queries/0_stateless/01164_detach_attach_partition_race.sh b/tests/queries/0_stateless/01164_detach_attach_partition_race.sh index 3aec4c3445d..e645cb5aae7 100755 --- a/tests/queries/0_stateless/01164_detach_attach_partition_race.sh +++ b/tests/queries/0_stateless/01164_detach_attach_partition_race.sh @@ -2,7 +2,6 @@ # Tags: race CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=none # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh @@ -15,16 +14,16 @@ $CLICKHOUSE_CLIENT -q "insert into mt values (3)" function thread_insert() { - while true; do - # It might be the case that the threads are terminated and exited, but some children didn't and they are still sending queries when we are dropping tables. - # That's why the "Table doesn't exist" error is allowed, while other errors don't. - $CLICKHOUSE_CLIENT -q "insert into mt values (rand())" 2>&1 | tr -d '\n' | rg -v "Table .+ doesn't exist"; + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do + $CLICKHOUSE_CLIENT -q "insert into mt values (rand())"; done } function thread_detach_attach() { - while true; do + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do $CLICKHOUSE_CLIENT -q "alter table mt detach partition id 'all'"; $CLICKHOUSE_CLIENT -q "alter table mt attach partition id 'all'"; done @@ -32,7 +31,8 @@ function thread_detach_attach() function thread_drop_detached() { - while true; do + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do $CLICKHOUSE_CLIENT --allow_drop_detached 1 -q "alter table mt drop detached partition id 'all'"; done } @@ -43,10 +43,10 @@ export -f thread_drop_detached; TIMEOUT=10 -timeout $TIMEOUT bash -c thread_insert & -timeout $TIMEOUT bash -c thread_detach_attach 2> /dev/null & -timeout $TIMEOUT bash -c thread_detach_attach 2> /dev/null & -timeout $TIMEOUT bash -c thread_drop_detached 2> /dev/null & +thread_insert $TIMEOUT & +thread_detach_attach $TIMEOUT 2> /dev/null & +thread_detach_attach $TIMEOUT 2> /dev/null & +thread_drop_detached $TIMEOUT 2> /dev/null & wait From 1e0d97c282b1415aed77dd7198ab244a84c7aea9 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 7 Jul 2023 12:19:10 +0000 Subject: [PATCH 335/522] Do not remove inputs after ActionsDAG::merge --- src/Interpreters/ActionsDAG.cpp | 2 +- .../0_stateless/02812_bug_with_unused_join_columns.reference | 0 .../queries/0_stateless/02812_bug_with_unused_join_columns.sql | 1 + 3 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02812_bug_with_unused_join_columns.reference create mode 100644 tests/queries/0_stateless/02812_bug_with_unused_join_columns.sql diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 906875dd314..46c14c503e4 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -1366,7 +1366,7 @@ ActionsDAGPtr ActionsDAG::merge(ActionsDAG && first, ActionsDAG && second) first.mergeInplace(std::move(second)); /// Drop unused inputs and, probably, some actions. - first.removeUnusedActions(); + first.removeUnusedActions(false); return std::make_shared(std::move(first)); } diff --git a/tests/queries/0_stateless/02812_bug_with_unused_join_columns.reference b/tests/queries/0_stateless/02812_bug_with_unused_join_columns.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02812_bug_with_unused_join_columns.sql b/tests/queries/0_stateless/02812_bug_with_unused_join_columns.sql new file mode 100644 index 00000000000..6c801b5b73e --- /dev/null +++ b/tests/queries/0_stateless/02812_bug_with_unused_join_columns.sql @@ -0,0 +1 @@ +SELECT concat(func.name, comb.name) AS x FROM system.functions AS func JOIN system.aggregate_function_combinators AS comb using name WHERE is_aggregate settings allow_experimental_analyzer=1; From fa7fe5277c99c036ff488997aab46b36c6901610 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 7 Jul 2023 12:25:13 +0000 Subject: [PATCH 336/522] Better comment. --- src/Interpreters/ActionsDAG.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 46c14c503e4..2f9fc7e5746 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -1365,7 +1365,7 @@ ActionsDAGPtr ActionsDAG::merge(ActionsDAG && first, ActionsDAG && second) { first.mergeInplace(std::move(second)); - /// Drop unused inputs and, probably, some actions. + /// Some actions could become unused. Do not drop inputs to preserve the header. first.removeUnusedActions(false); return std::make_shared(std::move(first)); From ee33000fc24367166ebf56772b0be4ca0ee25192 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 7 Jul 2023 14:08:54 +0000 Subject: [PATCH 337/522] Fixing tests. --- src/Interpreters/ActionsDAG.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 2f9fc7e5746..e68e2580231 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -465,8 +465,12 @@ void ActionsDAG::removeUnusedActions(const Names & required_names, bool allow_re void ActionsDAG::removeUnusedActions(bool allow_remove_inputs, bool allow_constant_folding) { std::unordered_set visited_nodes; + std::unordered_set used_inputs; std::stack stack; + for (const auto * input : inputs) + used_inputs.insert(input); + for (const auto * node : outputs) { visited_nodes.insert(node); @@ -484,7 +488,7 @@ void ActionsDAG::removeUnusedActions(bool allow_remove_inputs, bool allow_consta stack.push(&node); } - if (node.type == ActionType::INPUT && !allow_remove_inputs) + if (node.type == ActionType::INPUT && !allow_remove_inputs && used_inputs.contains(&node)) visited_nodes.insert(&node); } From e08f140d62988cd0340ec75f441891a2c01539c3 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Fri, 7 Jul 2023 17:32:10 +0300 Subject: [PATCH 338/522] Update 02254_projection_broken_part.sh --- tests/queries/0_stateless/02254_projection_broken_part.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02254_projection_broken_part.sh b/tests/queries/0_stateless/02254_projection_broken_part.sh index 6ba5093f234..3521d1d9d16 100755 --- a/tests/queries/0_stateless/02254_projection_broken_part.sh +++ b/tests/queries/0_stateless/02254_projection_broken_part.sh @@ -26,7 +26,7 @@ path=$($CLICKHOUSE_CLIENT -q "select path from system.parts where database='$CLI $CLICKHOUSE_CLIENT -q "select throwIf(substring('$path', 1, 1) != '/', 'Path is relative: $path')" || exit rm -f "$path/ab.proj/data.bin" -$CLICKHOUSE_CLIENT -q "select 3, sum(b) from projection_broken_parts_1 group by a;" 2>/dev/null +$CLICKHOUSE_CLIENT -q "select 3, sum(b) from projection_broken_parts_1 group by a format Null;" 2>/dev/null num_tries=0 while ! $CLICKHOUSE_CLIENT -q "select 4, sum(b) from projection_broken_parts_1 group by a format Null;" 2>/dev/null; do From 05b7da78130b21367b69a2cc22a319be11de8207 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Fri, 7 Jul 2023 10:32:44 -0400 Subject: [PATCH 339/522] add doc note for MongoDB Atlas --- docs/en/engines/table-engines/integrations/mongodb.md | 9 +++++++++ docs/en/sql-reference/table-functions/mongodb.md | 8 ++++++++ 2 files changed, 17 insertions(+) diff --git a/docs/en/engines/table-engines/integrations/mongodb.md b/docs/en/engines/table-engines/integrations/mongodb.md index 912f81573db..f87e8da8b5b 100644 --- a/docs/en/engines/table-engines/integrations/mongodb.md +++ b/docs/en/engines/table-engines/integrations/mongodb.md @@ -33,6 +33,15 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name - `options` — MongoDB connection string options (optional parameter). +:::tip +If you are using the MongoDB Atlas cloud offering please add these options: + +``` +'connectTimeoutMS=10000&ssl=true&authSource=admin' +``` + +::: + ## Usage Example {#usage-example} Create a table in ClickHouse which allows to read data from MongoDB collection: diff --git a/docs/en/sql-reference/table-functions/mongodb.md b/docs/en/sql-reference/table-functions/mongodb.md index aad60a7003c..a483414c0d4 100644 --- a/docs/en/sql-reference/table-functions/mongodb.md +++ b/docs/en/sql-reference/table-functions/mongodb.md @@ -30,6 +30,14 @@ mongodb(host:port, database, collection, user, password, structure [, options]) - `options` - MongoDB connection string options (optional parameter). +:::tip +If you are using the MongoDB Atlas cloud offering please add these options: + +``` +'connectTimeoutMS=10000&ssl=true&authSource=admin' +``` + +::: **Returned Value** From 0bd16d47be2c2040ab1d6787e953b0c4154ee0a1 Mon Sep 17 00:00:00 2001 From: Slach Date: Fri, 7 Jul 2023 19:44:20 +0500 Subject: [PATCH 340/522] fix documentation insconsistency about additional_tables_filter during reproduce https://github.com/ClickHouse/ClickHouse/issues/51948 Signed-off-by: Slach --- docs/en/operations/settings/settings.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 5f6cf98646b..195a9e26b53 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -17,7 +17,8 @@ Default value: 0. **Example** ``` sql -insert into table_1 values (1, 'a'), (2, 'bb'), (3, 'ccc'), (4, 'dddd'); +INSERT INTO table_1 VALUES (1, 'a'), (2, 'bb'), (3, 'ccc'), (4, 'dddd'); +SELECT * FROM table_1; ``` ```response ┌─x─┬─y────┐ @@ -30,7 +31,7 @@ insert into table_1 values (1, 'a'), (2, 'bb'), (3, 'ccc'), (4, 'dddd'); ```sql SELECT * FROM table_1 -SETTINGS additional_table_filters = (('table_1', 'x != 2')) +SETTINGS additional_table_filters = {'table_1': 'x != 2'} ``` ```response ┌─x─┬─y────┐ @@ -50,7 +51,8 @@ Default value: `''`. **Example** ``` sql -insert into table_1 values (1, 'a'), (2, 'bb'), (3, 'ccc'), (4, 'dddd'); +INSERT INTO table_1 VALUES (1, 'a'), (2, 'bb'), (3, 'ccc'), (4, 'dddd'); +SElECT * FROM table_1; ``` ```response ┌─x─┬─y────┐ From 50ea0855bf622ede96cb9726d5010d03c8dbebf4 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Fri, 7 Jul 2023 17:47:09 +0300 Subject: [PATCH 341/522] Update 02439_merge_selecting_partitions.sql (#51862) * Update 02439_merge_selecting_partitions.sql * Update 02439_merge_selecting_partitions.reference * Update 02439_merge_selecting_partitions.reference * fix --- .../0_stateless/02439_merge_selecting_partitions.reference | 1 - .../0_stateless/02439_merge_selecting_partitions.sql | 6 ++++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/02439_merge_selecting_partitions.reference b/tests/queries/0_stateless/02439_merge_selecting_partitions.reference index e836994b3aa..e69de29bb2d 100644 --- a/tests/queries/0_stateless/02439_merge_selecting_partitions.reference +++ b/tests/queries/0_stateless/02439_merge_selecting_partitions.reference @@ -1 +0,0 @@ -/test/02439/s1/default/block_numbers/123 diff --git a/tests/queries/0_stateless/02439_merge_selecting_partitions.sql b/tests/queries/0_stateless/02439_merge_selecting_partitions.sql index 88ce2834d6b..3d0c0af84d5 100644 --- a/tests/queries/0_stateless/02439_merge_selecting_partitions.sql +++ b/tests/queries/0_stateless/02439_merge_selecting_partitions.sql @@ -22,7 +22,9 @@ select sleepEachRow(3) as higher_probability_of_reproducing_the_issue format Nul system flush logs; -- it should not list unneeded partitions where we cannot merge anything -select distinct path from system.zookeeper_log where path like '/test/02439/s1/' || currentDatabase() || '/block_numbers/%' - and op_num in ('List', 'SimpleList', 'FilteredList') and path not like '%/block_numbers/1'; +select * from system.zookeeper_log where path like '/test/02439/s1/' || currentDatabase() || '/block_numbers/%' + and op_num in ('List', 'SimpleList', 'FilteredList') + and path not like '%/block_numbers/1' and path not like '%/block_numbers/123' + and event_time >= now() - interval 1 minute; drop table rmt; From eed1ecb6ba7ba4fdebd1c572881d064c66a0a102 Mon Sep 17 00:00:00 2001 From: vdimir Date: Fri, 7 Jul 2023 15:01:23 +0000 Subject: [PATCH 342/522] Revert "Remove parts in order for object storage always" This reverts commit c35294317dbff31b8ff8b48f6256162d6d5dc02e. --- src/Storages/MergeTree/MergeTreeData.cpp | 34 +++++++++--------------- 1 file changed, 12 insertions(+), 22 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 0ef71895999..fa9bfd38a23 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -2137,20 +2137,20 @@ MergeTreeData::DataPartsVector MergeTreeData::grabOldParts(bool force) /// Please don't use "zero-copy replication" (a non-production feature) in production. /// It is not ready for production usage. Don't use it. - /// It also is disabled for any object storage, because it can lead to race conditions on blob removal. - /// (see comment at `clearPartsFromFilesystemImpl`). - bool need_remove_parts_in_order = false; + bool need_remove_parts_in_order = supportsReplication() && getSettings()->allow_remote_fs_zero_copy_replication; - if (supportsReplication()) + if (need_remove_parts_in_order) { + bool has_zero_copy_disk = false; for (const auto & disk : getDisks()) { - if (disk->isRemote()) + if (disk->supportZeroCopyReplication()) { - need_remove_parts_in_order = true; + has_zero_copy_disk = true; break; } } + need_remove_parts_in_order = has_zero_copy_disk; } std::vector parts_to_delete; @@ -2394,28 +2394,18 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t std::mutex part_names_mutex; auto runner = threadPoolCallbackRunner(getPartsCleaningThreadPool().get(), "PartsCleaning"); - /** Straightforward concurrent parts removal can be applied for the case - * when we have parts on object storage disk + at least some of them were mutated - * (thus, can contains hardlinks to files in the previous parts). - * If we are deleting parts that contains hardlinks to the same file we may face into race condition - * and delete only local metadata files, but not the blobs on object storage. - * Given that, we remove in parallel only "independent" parts that don't have such hardlinks. - * Note that it also may be applicable for the regular MergeTree, fixed only for Replicated. - * - * To avoid this we need to fix race conditions on parts and blob removal. - */ + /// This flag disallow straightforward concurrent parts removal. It's required only in case + /// when we have parts on zero-copy disk + at least some of them were mutated. bool remove_parts_in_order = false; - if (dynamic_cast(this) != nullptr) + if (settings->allow_remote_fs_zero_copy_replication && dynamic_cast(this) != nullptr) { remove_parts_in_order = std::any_of( parts_to_remove.begin(), parts_to_remove.end(), - [] (const auto & data_part) - { - return data_part->isStoredOnRemoteDisk() && data_part->info.getMutationVersion() > 0; - } + [] (const auto & data_part) { return data_part->isStoredOnRemoteDiskWithZeroCopySupport() && data_part->info.getMutationVersion() > 0; } ); } + if (!remove_parts_in_order) { /// NOTE: Under heavy system load you may get "Cannot schedule a task" from ThreadPool. @@ -2451,7 +2441,7 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t /// NOTE: Under heavy system load you may get "Cannot schedule a task" from ThreadPool. LOG_DEBUG( - log, "Removing {} parts from filesystem (concurrently in order): Parts: [{}]", parts_to_remove.size(), fmt::join(parts_to_remove, ", ")); + log, "Removing {} parts from filesystem (concurrently): Parts: [{}]", parts_to_remove.size(), fmt::join(parts_to_remove, ", ")); /// We have "zero copy replication" parts and we are going to remove them in parallel. /// The problem is that all parts in a mutation chain must be removed sequentially to avoid "key does not exits" issues. From 227e415d6d71ca49b486052513786c5f050a6279 Mon Sep 17 00:00:00 2001 From: vdimir Date: Fri, 7 Jul 2023 15:08:21 +0000 Subject: [PATCH 343/522] Check refcount in `RemoveManyObjectStorageOperation::finalize` instead of `execute` --- .../DiskObjectStorageTransaction.cpp | 39 ++++++++++++------- src/Disks/ObjectStorages/IMetadataStorage.h | 5 ++- .../MetadataStorageFromDisk.cpp | 7 +++- .../ObjectStorages/MetadataStorageFromDisk.h | 5 ++- ...taStorageFromDiskTransactionOperations.cpp | 5 +++ ...dataStorageFromDiskTransactionOperations.h | 12 ++++++ .../MetadataStorageFromPlainObjectStorage.cpp | 5 ++- .../MetadataStorageFromPlainObjectStorage.h | 5 ++- 8 files changed, 63 insertions(+), 20 deletions(-) diff --git a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp index bd66ada492f..f3dbac445a5 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp @@ -6,6 +6,8 @@ #include #include +#include + namespace DB { @@ -150,7 +152,15 @@ struct RemoveManyObjectStorageOperation final : public IDiskObjectStorageOperati RemoveBatchRequest remove_paths; bool keep_all_batch_data; NameSet file_names_remove_metadata_only; - StoredObjects objects_to_remove; + + struct ObjectsToRemove + { + StoredObjects objects; + UnlinkMetadataFileOperationOutcomePtr unlink_outcome; + }; + + std::vector objects_to_remove; + bool remove_from_cache = false; RemoveManyObjectStorageOperation( @@ -174,7 +184,6 @@ struct RemoveManyObjectStorageOperation final : public IDiskObjectStorageOperati { for (const auto & [path, if_exists] : remove_paths) { - if (!metadata_storage.exists(path)) { if (if_exists) @@ -188,14 +197,12 @@ struct RemoveManyObjectStorageOperation final : public IDiskObjectStorageOperati try { - uint32_t hardlink_count = metadata_storage.getHardlinkCount(path); - auto objects = metadata_storage.getStorageObjects(path); - - tx->unlinkMetadata(path); - - /// File is really redundant - if (hardlink_count == 0 && !keep_all_batch_data && !file_names_remove_metadata_only.contains(fs::path(path).filename())) - std::move(objects.begin(), objects.end(), std::back_inserter(objects_to_remove)); + auto unlink_outcome = tx->unlinkMetadata(path); + if (unlink_outcome && !keep_all_batch_data && !file_names_remove_metadata_only.contains(fs::path(path).filename())) + { + auto objects = metadata_storage.getStorageObjects(path); + objects_to_remove.emplace_back(ObjectsToRemove{std::move(objects), std::move(unlink_outcome)}); + } } catch (const Exception & e) { @@ -215,15 +222,21 @@ struct RemoveManyObjectStorageOperation final : public IDiskObjectStorageOperati void undo() override { - } void finalize() override { + StoredObjects remove_from_remote; + for (auto && [objects, unlink_outcome] : objects_to_remove) + { + if (unlink_outcome->num_hardlinks == 0) + std::move(objects.begin(), objects.end(), std::back_inserter(remove_from_remote)); + } + /// Read comment inside RemoveObjectStorageOperation class /// TL;DR Don't pay any attention to 404 status code - if (!objects_to_remove.empty()) - object_storage.removeObjectsIfExist(objects_to_remove); + if (!remove_from_remote.empty()) + object_storage.removeObjectsIfExist(remove_from_remote); } }; diff --git a/src/Disks/ObjectStorages/IMetadataStorage.h b/src/Disks/ObjectStorages/IMetadataStorage.h index 00150df9fa3..264c481ee08 100644 --- a/src/Disks/ObjectStorages/IMetadataStorage.h +++ b/src/Disks/ObjectStorages/IMetadataStorage.h @@ -22,6 +22,8 @@ namespace ErrorCodes } class IMetadataStorage; +struct UnlinkMetadataFileOperationOutcome; +using UnlinkMetadataFileOperationOutcomePtr = std::shared_ptr; /// Tries to provide some "transactions" interface, which allow /// to execute (commit) operations simultaneously. We don't provide @@ -127,9 +129,10 @@ public: /// Unlink metadata file and do something special if required /// By default just remove file (unlink file). - virtual void unlinkMetadata(const std::string & path) + virtual UnlinkMetadataFileOperationOutcomePtr unlinkMetadata(const std::string & path) { unlinkFile(path); + return nullptr; } virtual ~IMetadataTransaction() = default; diff --git a/src/Disks/ObjectStorages/MetadataStorageFromDisk.cpp b/src/Disks/ObjectStorages/MetadataStorageFromDisk.cpp index 9461a82845f..53428c2f6e1 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromDisk.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromDisk.cpp @@ -340,9 +340,12 @@ void MetadataStorageFromDiskTransaction::addBlobToMetadata(const std::string & p addOperation(std::make_unique(path, blob_name, metadata_storage.object_storage_root_path, size_in_bytes, *metadata_storage.disk, metadata_storage)); } -void MetadataStorageFromDiskTransaction::unlinkMetadata(const std::string & path) +UnlinkMetadataFileOperationOutcomePtr MetadataStorageFromDiskTransaction::unlinkMetadata(const std::string & path) { - addOperation(std::make_unique(path, *metadata_storage.disk, metadata_storage)); + auto operation = std::make_unique(path, *metadata_storage.getDisk(), metadata_storage); + auto result = operation->outcome; + addOperation(std::move(operation)); + return result; } } diff --git a/src/Disks/ObjectStorages/MetadataStorageFromDisk.h b/src/Disks/ObjectStorages/MetadataStorageFromDisk.h index 5273f0b041e..b518f5e3622 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromDisk.h +++ b/src/Disks/ObjectStorages/MetadataStorageFromDisk.h @@ -11,6 +11,9 @@ namespace DB { +struct UnlinkMetadataFileOperationOutcome; +using UnlinkMetadataFileOperationOutcomePtr = std::shared_ptr; + /// Store metadata on a separate disk /// (used for object storages, like S3 and related). class MetadataStorageFromDisk final : public IMetadataStorage @@ -131,7 +134,7 @@ public: void replaceFile(const std::string & path_from, const std::string & path_to) override; - void unlinkMetadata(const std::string & path) override; + UnlinkMetadataFileOperationOutcomePtr unlinkMetadata(const std::string & path) override; }; diff --git a/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.cpp b/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.cpp index 7463622cb06..78e8764f8fc 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.cpp @@ -319,6 +319,8 @@ void UnlinkMetadataFileOperation::execute(std::unique_lock & metada write_operation = std::make_unique(path, disk, metadata->serializeToString()); write_operation->execute(metadata_lock); } + outcome->num_hardlinks = ref_count; + unlink_operation = std::make_unique(path, disk); unlink_operation->execute(metadata_lock); } @@ -334,6 +336,9 @@ void UnlinkMetadataFileOperation::undo() if (write_operation) write_operation->undo(); + + /// Update outcome to reflect the fact that we have restored the file. + outcome->num_hardlinks++; } void SetReadonlyFileOperation::execute(std::unique_lock & metadata_lock) diff --git a/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.h b/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.h index d8e4892a0a5..4662ebc3423 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.h +++ b/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.h @@ -3,6 +3,8 @@ #include #include +#include + namespace DB { class MetadataStorageFromDisk; @@ -242,9 +244,19 @@ private: std::unique_ptr write_operation; }; +/// Return the result of operation to the caller. +/// It is used in `IDiskObjectStorageOperation::finalize` after metadata transaction executed to make decision on blob removal. +struct UnlinkMetadataFileOperationOutcome +{ + UInt32 num_hardlinks = std::numeric_limits::max(); +}; + +using UnlinkMetadataFileOperationOutcomePtr = std::shared_ptr; struct UnlinkMetadataFileOperation final : public IMetadataOperation { + const UnlinkMetadataFileOperationOutcomePtr outcome = std::make_shared(); + UnlinkMetadataFileOperation( const std::string & path_, IDisk & disk_, diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp index c119e9f3adc..3650c7eaac8 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp @@ -135,9 +135,10 @@ void MetadataStorageFromPlainObjectStorageTransaction::addBlobToMetadata( { /// Noop, local metadata files is only one file, it is the metadata file itself. } -void MetadataStorageFromPlainObjectStorageTransaction::unlinkMetadata(const std::string &) + +UnlinkMetadataFileOperationOutcomePtr MetadataStorageFromPlainObjectStorageTransaction::unlinkMetadata(const std::string &) { - /// Noop, no separate metadata. + return nullptr; } } diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h index fb5b6d0757c..bd068c1362f 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.h @@ -9,6 +9,9 @@ namespace DB { +struct UnlinkMetadataFileOperationOutcome; +using UnlinkMetadataFileOperationOutcomePtr = std::shared_ptr; + /// Object storage is used as a filesystem, in a limited form: /// - no directory concept, files only /// - no stat/chmod/... @@ -104,7 +107,7 @@ public: void unlinkFile(const std::string & path) override; - void unlinkMetadata(const std::string & path) override; + UnlinkMetadataFileOperationOutcomePtr unlinkMetadata(const std::string & path) override; void commit() override { From 8266067e1a650453968f278f64e20bd4addc7aa2 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 7 Jul 2023 19:09:55 +0300 Subject: [PATCH 344/522] Fixed style check --- src/Dictionaries/CacheDictionary.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Dictionaries/CacheDictionary.cpp b/src/Dictionaries/CacheDictionary.cpp index e27e25ea7c4..3011151ef00 100644 --- a/src/Dictionaries/CacheDictionary.cpp +++ b/src/Dictionaries/CacheDictionary.cpp @@ -549,12 +549,12 @@ void CacheDictionary::update(CacheDictionaryUpdateUnitPtr Date: Fri, 7 Jul 2023 18:39:20 +0200 Subject: [PATCH 345/522] comments for the tests --- ...nal_block_structure_mismatch_bug.reference | 1 - ...791_final_block_structure_mismatch_bug.sql | 38 ++++++++----------- 2 files changed, 15 insertions(+), 24 deletions(-) diff --git a/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.reference b/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.reference index ca810c46a2d..a8401b1cae8 100644 --- a/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.reference +++ b/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.reference @@ -7,4 +7,3 @@ 1 2 3 -2 diff --git a/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.sql b/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.sql index a82e43d81f4..394e3bff87b 100644 --- a/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.sql +++ b/tests/queries/0_stateless/02791_final_block_structure_mismatch_bug.sql @@ -17,10 +17,18 @@ INSERT INTO test_block_mismatch VALUES (1, toDateTime('2023-02-02 12:12:12')); INSERT INTO test_block_mismatch VALUES (1, toDateTime('2023-02-02 12:12:12')); SELECT count(*) FROM test_block_mismatch FINAL; +optimize table test_block_mismatch final; +system stop merges test_block_mismatch; + INSERT INTO test_block_mismatch VALUES (2, toDateTime('2023-01-01 12:12:12')); INSERT INTO test_block_mismatch VALUES (2, toDateTime('2023-01-01 12:12:12')); +-- one lonely part in 2023-02-02 partition and 3 parts in 2023-01-01 partition. +-- lonely part will not be processed by PartsSplitter and 2023-01-01's parts will be - previously this led to the `Block structure mismatch in Pipe::unitePipes` exception. SELECT count(*) FROM test_block_mismatch FINAL; + +-- variations of the test above with slightly modified table definitions + CREATE TABLE test_block_mismatch_sk1 ( a UInt32, @@ -39,10 +47,14 @@ INSERT INTO test_block_mismatch_sk1 VALUES (1, toDateTime('2023-02-02 12:12:12') INSERT INTO test_block_mismatch_sk1 VALUES (1, toDateTime('2023-02-02 12:12:12')); SELECT count(*) FROM test_block_mismatch_sk1 FINAL; +optimize table test_block_mismatch_sk1 final; +system stop merges test_block_mismatch_sk1; + INSERT INTO test_block_mismatch_sk1 VALUES (2, toDateTime('2023-01-01 12:12:12')); INSERT INTO test_block_mismatch_sk1 VALUES (2, toDateTime('2023-01-01 12:12:12')); SELECT count(*) FROM test_block_mismatch_sk1 FINAL; + CREATE TABLE test_block_mismatch_sk2 ( a UInt32, @@ -61,29 +73,9 @@ INSERT INTO test_block_mismatch_sk2 VALUES (1, toDateTime('2023-02-02 12:12:12') INSERT INTO test_block_mismatch_sk2 VALUES (1, toDateTime('2023-02-02 12:12:12')); SELECT count(*) FROM test_block_mismatch_sk2 FINAL; +optimize table test_block_mismatch_sk2 final; +system stop merges test_block_mismatch_sk2; + INSERT INTO test_block_mismatch_sk2 VALUES (2, toDateTime('2023-01-01 12:12:12')); INSERT INTO test_block_mismatch_sk2 VALUES (2, toDateTime('2023-01-01 12:12:12')); SELECT count(*) FROM test_block_mismatch_sk2 FINAL; - -CREATE TABLE test_block_mismatch_magic_row_dist -( - a UInt32, - b DateTime -) -ENGINE = ReplacingMergeTree -PARTITION BY toYYYYMM(b) -ORDER BY (toDate(b), a); - -INSERT INTO test_block_mismatch_magic_row_dist VALUES (1, toDateTime('2023-02-02 12:12:12')); -INSERT INTO test_block_mismatch_magic_row_dist VALUES (1, toDateTime('2023-02-02 12:12:12')); -INSERT INTO test_block_mismatch_magic_row_dist VALUES (1, toDateTime('2023-02-02 12:12:12')); -INSERT INTO test_block_mismatch_magic_row_dist VALUES (1, toDateTime('2023-02-02 12:12:12')); - -optimize table test_block_mismatch_magic_row_dist final; - -system stop merges test_block_mismatch_magic_row_dist; - -INSERT INTO test_block_mismatch_magic_row_dist VALUES (1, toDateTime('2023-01-01 12:12:12')); -INSERT INTO test_block_mismatch_magic_row_dist VALUES (1, toDateTime('2023-01-01 12:12:12')); - -SELECT count(*) FROM test_block_mismatch_magic_row_dist FINAL; From 93b76c93210bccfda6d6b2413bf07cf48c4f9fa3 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Fri, 7 Jul 2023 16:40:47 +0200 Subject: [PATCH 346/522] better logs on shutdown --- base/base/getThreadId.cpp | 41 +++++++++++++++++----------- base/base/getThreadId.h | 2 ++ src/Daemon/BaseDaemon.cpp | 1 + src/Interpreters/Context.cpp | 41 ++++++++++++++++------------ src/Interpreters/DatabaseCatalog.cpp | 2 ++ 5 files changed, 53 insertions(+), 34 deletions(-) diff --git a/base/base/getThreadId.cpp b/base/base/getThreadId.cpp index b6c22bb8856..a42d79c5698 100644 --- a/base/base/getThreadId.cpp +++ b/base/base/getThreadId.cpp @@ -15,25 +15,34 @@ static thread_local uint64_t current_tid = 0; + +static void setCurrentThreadId() +{ +#if defined(OS_ANDROID) + current_tid = gettid(); +#elif defined(OS_LINUX) + current_tid = static_cast(syscall(SYS_gettid)); /// This call is always successful. - man gettid +#elif defined(OS_FREEBSD) + current_tid = pthread_getthreadid_np(); +#elif defined(OS_SUNOS) + // On Solaris-derived systems, this returns the ID of the LWP, analogous + // to a thread. + current_tid = static_cast(pthread_self()); +#else + if (0 != pthread_threadid_np(nullptr, ¤t_tid)) + throw std::logic_error("pthread_threadid_np returned error"); +#endif +} + uint64_t getThreadId() { if (!current_tid) - { -#if defined(OS_ANDROID) - current_tid = gettid(); -#elif defined(OS_LINUX) - current_tid = static_cast(syscall(SYS_gettid)); /// This call is always successful. - man gettid -#elif defined(OS_FREEBSD) - current_tid = pthread_getthreadid_np(); -#elif defined(OS_SUNOS) - // On Solaris-derived systems, this returns the ID of the LWP, analogous - // to a thread. - current_tid = static_cast(pthread_self()); -#else - if (0 != pthread_threadid_np(nullptr, ¤t_tid)) - throw std::logic_error("pthread_threadid_np returned error"); -#endif - } + setCurrentThreadId(); return current_tid; } + +void updateCurrentThreadIdAfterFork() +{ + setCurrentThreadId(); +} diff --git a/base/base/getThreadId.h b/base/base/getThreadId.h index a1b5ff5f3e8..f90c76029e1 100644 --- a/base/base/getThreadId.h +++ b/base/base/getThreadId.h @@ -3,3 +3,5 @@ /// Obtain thread id from OS. The value is cached in thread local variable. uint64_t getThreadId(); + +void updateCurrentThreadIdAfterFork(); diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index 6d29523a354..d63e9976437 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -1101,6 +1101,7 @@ void BaseDaemon::setupWatchdog() if (0 == pid) { + updateCurrentThreadIdAfterFork(); logger().information("Forked a child process to watch"); #if defined(OS_LINUX) if (0 != prctl(PR_SET_PDEATHSIG, SIGKILL)) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 7482450d529..7b3d419cce4 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -176,6 +176,15 @@ namespace ErrorCodes extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH; } +#define SHUTDOWN(log, desc, ptr, method) do \ +{ \ + if (ptr) \ + { \ + LOG_DEBUG(log, "Shutting down " desc); \ + ptr->method; \ + } \ +} while (false) \ + /** Set of known objects (environment), that could be used in query. * Shared (global) part. Order of members (especially, order of destruction) is very important. @@ -479,35 +488,29 @@ struct ContextSharedPart : boost::noncopyable /// Stop periodic reloading of the configuration files. /// This must be done first because otherwise the reloading may pass a changed config /// to some destroyed parts of ContextSharedPart. - if (external_dictionaries_loader) - external_dictionaries_loader->enablePeriodicUpdates(false); - if (external_user_defined_executable_functions_loader) - external_user_defined_executable_functions_loader->enablePeriodicUpdates(false); - if (user_defined_sql_objects_loader) - user_defined_sql_objects_loader->stopWatching(); + SHUTDOWN(log, "dictionaries loader", external_dictionaries_loader, enablePeriodicUpdates(false)); + SHUTDOWN(log, "UDFs loader", external_user_defined_executable_functions_loader, enablePeriodicUpdates(false)); + SHUTDOWN(log, "another UDFs loader", user_defined_sql_objects_loader, stopWatching()); + + LOG_TRACE(log, "Shutting down named sessions"); Session::shutdownNamedSessions(); /// Waiting for current backups/restores to be finished. This must be done before `DatabaseCatalog::shutdown()`. - if (backups_worker) - backups_worker->shutdown(); + SHUTDOWN(log, "backups worker", backups_worker, shutdown()); /** After system_logs have been shut down it is guaranteed that no system table gets created or written to. * Note that part changes at shutdown won't be logged to part log. */ - if (system_logs) - system_logs->shutdown(); + SHUTDOWN(log, "system logs", system_logs, shutdown()); + LOG_TRACE(log, "Shutting down database catalog"); DatabaseCatalog::shutdown(); - if (merge_mutate_executor) - merge_mutate_executor->wait(); - if (fetch_executor) - fetch_executor->wait(); - if (moves_executor) - moves_executor->wait(); - if (common_executor) - common_executor->wait(); + SHUTDOWN(log, "merges executor", merge_mutate_executor, wait()); + SHUTDOWN(log, "fetches executor", fetch_executor, wait()); + SHUTDOWN(log, "moves executor", moves_executor, wait()); + SHUTDOWN(log, "common executor", common_executor, wait()); TransactionLog::shutdownIfAny(); @@ -533,10 +536,12 @@ struct ContextSharedPart : boost::noncopyable /// DDLWorker should be deleted without lock, cause its internal thread can /// take it as well, which will cause deadlock. + LOG_TRACE(log, "Shutting down DDLWorker"); delete_ddl_worker.reset(); /// Background operations in cache use background schedule pool. /// Deactivate them before destructing it. + LOG_TRACE(log, "Shutting down caches"); const auto & caches = FileCacheFactory::instance().getAll(); for (const auto & [_, cache] : caches) cache->cache->deactivateBackgroundOperations(); diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 4cb2f6e3b3d..271330bc64a 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -227,9 +227,11 @@ void DatabaseCatalog::shutdownImpl() databases_with_delayed_shutdown.push_back(database.second); continue; } + LOG_TRACE(log, "Shutting down database {}", database.first); database.second->shutdown(); } + LOG_TRACE(log, "Shutting down system databases"); for (auto & database : databases_with_delayed_shutdown) { database->shutdown(); From 88911e1378900d6687e05f08c6cbe592b5d32001 Mon Sep 17 00:00:00 2001 From: vdimir Date: Fri, 7 Jul 2023 16:42:03 +0000 Subject: [PATCH 347/522] Check refcount in finalize for other RemoveObjectStorageOperations --- .../DiskObjectStorageTransaction.cpp | 50 +++++++++---------- 1 file changed, 24 insertions(+), 26 deletions(-) diff --git a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp index f3dbac445a5..0ae577602b1 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp @@ -65,11 +65,18 @@ struct PureMetadataObjectStorageOperation final : public IDiskObjectStorageOpera std::string getInfoForLog() const override { return fmt::format("PureMetadataObjectStorageOperation"); } }; + +struct ObjectsToRemove +{ + StoredObjects objects; + UnlinkMetadataFileOperationOutcomePtr unlink_outcome; +}; + struct RemoveObjectStorageOperation final : public IDiskObjectStorageOperation { std::string path; bool delete_metadata_only; - StoredObjects objects_to_remove; + ObjectsToRemove objects_to_remove; bool if_exists; bool remove_from_cache = false; @@ -105,15 +112,12 @@ struct RemoveObjectStorageOperation final : public IDiskObjectStorageOperation try { - uint32_t hardlink_count = metadata_storage.getHardlinkCount(path); auto objects = metadata_storage.getStorageObjects(path); - tx->unlinkMetadata(path); + auto unlink_outcome = tx->unlinkMetadata(path); - if (hardlink_count == 0) - { - objects_to_remove = std::move(objects); - } + if (unlink_outcome) + objects_to_remove = ObjectsToRemove{std::move(objects), std::move(unlink_outcome)}; } catch (const Exception & e) { @@ -142,8 +146,11 @@ struct RemoveObjectStorageOperation final : public IDiskObjectStorageOperation /// due to network error or similar. And when it will retry an operation it may receive /// a 404 HTTP code. We don't want to threat this code as a real error for deletion process /// (e.g. throwing some exceptions) and thus we just use method `removeObjectsIfExists` - if (!delete_metadata_only && !objects_to_remove.empty()) - object_storage.removeObjectsIfExist(objects_to_remove); + if (!delete_metadata_only && !objects_to_remove.objects.empty() + && objects_to_remove.unlink_outcome->num_hardlinks == 0) + { + object_storage.removeObjectsIfExist(objects_to_remove.objects); + } } }; @@ -153,12 +160,6 @@ struct RemoveManyObjectStorageOperation final : public IDiskObjectStorageOperati bool keep_all_batch_data; NameSet file_names_remove_metadata_only; - struct ObjectsToRemove - { - StoredObjects objects; - UnlinkMetadataFileOperationOutcomePtr unlink_outcome; - }; - std::vector objects_to_remove; bool remove_from_cache = false; @@ -197,10 +198,10 @@ struct RemoveManyObjectStorageOperation final : public IDiskObjectStorageOperati try { + auto objects = metadata_storage.getStorageObjects(path); auto unlink_outcome = tx->unlinkMetadata(path); if (unlink_outcome && !keep_all_batch_data && !file_names_remove_metadata_only.contains(fs::path(path).filename())) { - auto objects = metadata_storage.getStorageObjects(path); objects_to_remove.emplace_back(ObjectsToRemove{std::move(objects), std::move(unlink_outcome)}); } } @@ -244,10 +245,9 @@ struct RemoveManyObjectStorageOperation final : public IDiskObjectStorageOperati struct RemoveRecursiveObjectStorageOperation final : public IDiskObjectStorageOperation { std::string path; - std::unordered_map objects_to_remove; + std::unordered_map objects_to_remove_by_path; bool keep_all_batch_data; NameSet file_names_remove_metadata_only; - StoredObjects objects_to_remove_from_cache; RemoveRecursiveObjectStorageOperation( IObjectStorage & object_storage_, @@ -274,14 +274,11 @@ struct RemoveRecursiveObjectStorageOperation final : public IDiskObjectStorageOp { try { - uint32_t hardlink_count = metadata_storage.getHardlinkCount(path_to_remove); auto objects_paths = metadata_storage.getStorageObjects(path_to_remove); - - tx->unlinkMetadata(path_to_remove); - - if (hardlink_count == 0) + auto unlink_outcome = tx->unlinkMetadata(path_to_remove); + if (unlink_outcome) { - objects_to_remove[path_to_remove] = std::move(objects_paths); + objects_to_remove_by_path[path_to_remove] = ObjectsToRemove{std::move(objects_paths), std::move(unlink_outcome)}; } } catch (const Exception & e) @@ -331,11 +328,12 @@ struct RemoveRecursiveObjectStorageOperation final : public IDiskObjectStorageOp if (!keep_all_batch_data) { StoredObjects remove_from_remote; - for (auto && [local_path, remote_paths] : objects_to_remove) + for (auto && [local_path, objects_to_remove] : objects_to_remove_by_path) { if (!file_names_remove_metadata_only.contains(fs::path(local_path).filename())) { - std::move(remote_paths.begin(), remote_paths.end(), std::back_inserter(remove_from_remote)); + if (objects_to_remove.unlink_outcome->num_hardlinks == 0) + std::move(objects_to_remove.objects.begin(), objects_to_remove.objects.end(), std::back_inserter(remove_from_remote)); } } /// Read comment inside RemoveObjectStorageOperation class From a96874850ec0faaf049cce01feee6c4a572d7961 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 8 Jul 2023 00:55:57 +0200 Subject: [PATCH 348/522] Revert "Merge pull request #48115 from save-my-heart/throw_non_parametric_function" This reverts commit 5f930aeb2619bda8f27f3cfc6ba01ffaf48c3d64, reversing changes made to 35572321a14d617cfd110a48d8d3416615bd75c9. --- .../UserDefined/UserDefinedSQLFunctionVisitor.cpp | 7 ------- src/Interpreters/ActionsVisitor.cpp | 7 ------- .../0_stateless/02701_non_parametric_function.reference | 0 .../0_stateless/02701_non_parametric_function.sql | 9 --------- 4 files changed, 23 deletions(-) delete mode 100644 tests/queries/0_stateless/02701_non_parametric_function.reference delete mode 100644 tests/queries/0_stateless/02701_non_parametric_function.sql diff --git a/src/Functions/UserDefined/UserDefinedSQLFunctionVisitor.cpp b/src/Functions/UserDefined/UserDefinedSQLFunctionVisitor.cpp index 597e4efe35e..360d1cdf76c 100644 --- a/src/Functions/UserDefined/UserDefinedSQLFunctionVisitor.cpp +++ b/src/Functions/UserDefined/UserDefinedSQLFunctionVisitor.cpp @@ -20,7 +20,6 @@ namespace DB namespace ErrorCodes { extern const int UNSUPPORTED_METHOD; - extern const int FUNCTION_CANNOT_HAVE_PARAMETERS; } void UserDefinedSQLFunctionVisitor::visit(ASTPtr & ast) @@ -139,12 +138,6 @@ ASTPtr UserDefinedSQLFunctionVisitor::tryToReplaceFunction(const ASTFunction & f if (!user_defined_function) return nullptr; - /// All UDFs are not parametric for now. - if (function.parameters) - { - throw Exception(ErrorCodes::FUNCTION_CANNOT_HAVE_PARAMETERS, "Function {} is not parametric", function.name); - } - const auto & function_arguments_list = function.children.at(0)->as(); auto & function_arguments = function_arguments_list->children; diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 10502b7e66d..01f2d4cf22e 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -78,7 +78,6 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION; - extern const int FUNCTION_CANNOT_HAVE_PARAMETERS; } static NamesAndTypesList::iterator findColumn(const String & name, NamesAndTypesList & cols) @@ -1106,12 +1105,6 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & } } - /// Normal functions are not parametric for now. - if (node.parameters) - { - throw Exception(ErrorCodes::FUNCTION_CANNOT_HAVE_PARAMETERS, "Function {} is not parametric", node.name); - } - Names argument_names; DataTypes argument_types; bool arguments_present = true; diff --git a/tests/queries/0_stateless/02701_non_parametric_function.reference b/tests/queries/0_stateless/02701_non_parametric_function.reference deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/queries/0_stateless/02701_non_parametric_function.sql b/tests/queries/0_stateless/02701_non_parametric_function.sql deleted file mode 100644 index 5261fa7b082..00000000000 --- a/tests/queries/0_stateless/02701_non_parametric_function.sql +++ /dev/null @@ -1,9 +0,0 @@ --- Tags: no-parallel - -SELECT * FROM system.numbers WHERE number > toUInt64(10)(number) LIMIT 10; -- { serverError 309 } - -CREATE FUNCTION IF NOT EXISTS sum_udf as (x, y) -> (x + y); - -SELECT sum_udf(1)(1, 2); -- { serverError 309 } - -DROP FUNCTION IF EXISTS sum_udf; From f4696d762cb3e15878b99c51bcad9ee15a8972c1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 8 Jul 2023 00:56:42 +0200 Subject: [PATCH 349/522] Revert "Merge pull request #49419 from ClickHouse/fix-function-parameter-exception" This reverts commit b921476a3be536b17b967391cefab3888c0c96b2, reversing changes made to 7896d307379bc813665fa5b11d08c202ea67f4fb. --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 15 --------------- tests/analyzer_tech_debt.txt | 1 + 2 files changed, 1 insertion(+), 15 deletions(-) diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index 163092f1b7f..da8933aabaa 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -116,7 +116,6 @@ namespace ErrorCodes extern const int UNKNOWN_TABLE; extern const int ILLEGAL_COLUMN; extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH; - extern const int FUNCTION_CANNOT_HAVE_PARAMETERS; } /** Query analyzer implementation overview. Please check documentation in QueryAnalysisPass.h first. @@ -4897,11 +4896,6 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi lambda_expression_untyped->formatASTForErrorMessage(), scope.scope_node->formatASTForErrorMessage()); - if (!parameters.empty()) - { - throw Exception(ErrorCodes::FUNCTION_CANNOT_HAVE_PARAMETERS, "Function {} is not parametric", function_node.formatASTForErrorMessage()); - } - auto lambda_expression_clone = lambda_expression_untyped->clone(); IdentifierResolveScope lambda_scope(lambda_expression_clone, &scope /*parent_scope*/); @@ -5018,12 +5012,9 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi } FunctionOverloadResolverPtr function = UserDefinedExecutableFunctionFactory::instance().tryGet(function_name, scope.context, parameters); - bool is_executable_udf = false; if (!function) function = FunctionFactory::instance().tryGet(function_name, scope.context); - else - is_executable_udf = true; if (!function) { @@ -5074,12 +5065,6 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi return result_projection_names; } - /// Executable UDFs may have parameters. They are checked in UserDefinedExecutableFunctionFactory. - if (!parameters.empty() && !is_executable_udf) - { - throw Exception(ErrorCodes::FUNCTION_CANNOT_HAVE_PARAMETERS, "Function {} is not parametric", function_name); - } - /** For lambda arguments we need to initialize lambda argument types DataTypeFunction using `getLambdaArgumentTypes` function. * Then each lambda arguments are initialized with columns, where column source is lambda. * This information is important for later steps of query processing. diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt index f7cc13dd2e2..f838a19940a 100644 --- a/tests/analyzer_tech_debt.txt +++ b/tests/analyzer_tech_debt.txt @@ -111,6 +111,7 @@ 00917_multiple_joins_denny_crane 00725_join_on_bug_1 00636_partition_key_parts_pruning +00261_storage_aliases_and_array_join 01825_type_json_multiple_files 01281_group_by_limit_memory_tracking 02723_zookeeper_name From 19072c9b475fef191dfd18929cc81c25e8115026 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 8 Jul 2023 02:03:23 +0300 Subject: [PATCH 350/522] Corrent example about parametric executable UDFs. --- docs/en/sql-reference/functions/udf.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/udf.md b/docs/en/sql-reference/functions/udf.md index 9c6b1b0c66b..51734beed03 100644 --- a/docs/en/sql-reference/functions/udf.md +++ b/docs/en/sql-reference/functions/udf.md @@ -171,12 +171,13 @@ Result: └──────────────────────────────┘ ``` -Executable user defined functions can take constant parameters configured in `command` setting (works only for user defined functions with `executable` type). +Executable user defined functions can take constant parameters configured in `command` setting (works only for user defined functions with `executable` type). It also requires the `execute_direct` option (to ensure no shell argument expansion vulnerability). File `test_function_parameter_python.xml` (`/etc/clickhouse-server/test_function_parameter_python.xml` with default path settings). ```xml executable + true test_function_parameter_python String From 6990f078a0bf87f23d478e83c51001b7cb0d4b8a Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com> Date: Fri, 7 Jul 2023 19:19:30 -0400 Subject: [PATCH 351/522] cleaner way --- src/Daemon/BaseDaemon.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index a75aac7a08e..af2d355d335 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -154,10 +154,7 @@ static void signalHandler(int sig, siginfo_t * info, void * context) writePODBinary(*info, out); writePODBinary(signal_context, out); writePODBinary(stack_trace, out); - if (Exception::enable_job_stack_trace) - writeVectorBinary(Exception::thread_frame_pointers, out); - else - writeVarUInt(0, out); + writeVectorBinary(Exception::enable_job_stack_trace ? Exception::thread_frame_pointers : std::vector{}, out); writeBinary(static_cast(getThreadId()), out); writePODBinary(current_thread, out); From 9144a2dbb2a17af72304267edfe5a81ee7daa0b9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 8 Jul 2023 05:23:13 +0200 Subject: [PATCH 352/522] Fix unrelated messages from LSan in clickhouse-client --- tests/clickhouse-test | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 4860ce0fac9..95470f77987 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -57,6 +57,8 @@ MESSAGES_TO_RETRY = [ "ConnectionPoolWithFailover: Connection failed at try", "DB::Exception: New table appeared in database being dropped or detached. Try again", "is already started to be removing by another replica right now", + # This is from LSan, and it indicates its own internal problem: + "Unable to get registers from thread", ] MAX_RETRIES = 3 From c828db572078bb68bbcd20c6850073030d4addac Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 8 Jul 2023 05:57:23 +0200 Subject: [PATCH 353/522] Allow OOM in AST Fuzzer with Sanitizers --- docker/test/fuzzer/run-fuzzer.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh index d2c8de7a211..5cda0831a84 100755 --- a/docker/test/fuzzer/run-fuzzer.sh +++ b/docker/test/fuzzer/run-fuzzer.sh @@ -291,7 +291,7 @@ quit if [ "$server_died" == 1 ] then # The server has died. - if ! rg --text -o 'Received signal.*|Logical error.*|Assertion.*failed|Failed assertion.*|.*runtime error: .*|.*is located.*|(SUMMARY|ERROR): [a-zA-Z]+Sanitizer:.*|.*_LIBCPP_ASSERT.*' server.log > description.txt + if ! rg --text -o 'Received signal.*|Logical error.*|Assertion.*failed|Failed assertion.*|.*runtime error: .*|.*is located.*|(SUMMARY|ERROR): [a-zA-Z]+Sanitizer:.*|.*_LIBCPP_ASSERT.*|.*Child process was terminated by signal 9.*' server.log > description.txt then echo "Lost connection to server. See the logs." > description.txt fi From 1bdcd29da2bfc4cab02a0db5dedeb7d0515ac49c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 8 Jul 2023 06:02:38 +0200 Subject: [PATCH 354/522] Disable one test under Analyzer --- tests/analyzer_tech_debt.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt index f7cc13dd2e2..a10f72e743a 100644 --- a/tests/analyzer_tech_debt.txt +++ b/tests/analyzer_tech_debt.txt @@ -128,3 +128,4 @@ 02784_parallel_replicas_automatic_disabling 02581_share_big_sets_between_mutation_tasks_long 02581_share_big_sets_between_multiple_mutations_tasks_long +00992_system_parts_race_condition_zookeeper_long From adbd85b975aba4618ddf2a934422559410eeea48 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 8 Jul 2023 06:26:44 +0200 Subject: [PATCH 355/522] Fix Docker --- tests/integration/conftest.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 749f4aa1cde..5933883f7b0 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -42,6 +42,13 @@ def cleanup_environment(): logging.debug(f"Docker ps before start:{r.stdout}") else: logging.debug(f"No running containers") + + logging.debug("Pruning Docker networks") + run_and_check( + ["docker network prune"], + shell=True, + nothrow=True, + ) except Exception as e: logging.exception(f"cleanup_environment:{str(e)}") pass From cdbf279b65cca972ce63dd7fd835d2b46359f7f3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 8 Jul 2023 06:46:28 +0200 Subject: [PATCH 356/522] Fix test 01825_type_json_from_map --- tests/queries/0_stateless/01825_type_json_from_map.sql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01825_type_json_from_map.sql b/tests/queries/0_stateless/01825_type_json_from_map.sql index 2480aca1667..51e60843a1a 100644 --- a/tests/queries/0_stateless/01825_type_json_from_map.sql +++ b/tests/queries/0_stateless/01825_type_json_from_map.sql @@ -1,4 +1,5 @@ --- Tags: no-fasttest +-- Tags: no-fasttest, no-random-merge-tree-settings +-- For example, it is 4 times slower with --merge_max_block_size=5967 --index_granularity=55 --min_bytes_for_wide_part=847510133 DROP TABLE IF EXISTS t_json; DROP TABLE IF EXISTS t_map; From 0b0caec9c435aaf0df3e01ef64bf06397d11f2ce Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 8 Jul 2023 07:51:17 +0300 Subject: [PATCH 357/522] Update Context.cpp --- src/Interpreters/Context.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 7b3d419cce4..8df8723123f 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -181,7 +181,7 @@ namespace ErrorCodes if (ptr) \ { \ LOG_DEBUG(log, "Shutting down " desc); \ - ptr->method; \ + (ptr)->method; \ } \ } while (false) \ From 4de02c243816f907643eefbbe4743861660b6d99 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 8 Jul 2023 07:04:33 +0200 Subject: [PATCH 358/522] Fix test 02354_distributed_with_external_aggregation_memory_usage --- ...distributed_with_external_aggregation_memory_usage.sql | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql b/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql index 548660e36b1..c8ec40bb0a7 100644 --- a/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql +++ b/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql @@ -1,5 +1,7 @@ -- Tags: long, no-tsan, no-msan, no-asan, no-ubsan, no-debug, no-s3-storage +DROP TABLE IF EXISTS t_2354_dist_with_external_aggr; + create table t_2354_dist_with_external_aggr(a UInt64, b String, c FixedString(100)) engine = MergeTree order by tuple(); insert into t_2354_dist_with_external_aggr select number, toString(number) as s, toFixedString(s, 100) from numbers_mt(5e7); @@ -15,8 +17,12 @@ set max_bytes_before_external_group_by = '2G', -- whole aggregation state of local aggregation uncompressed is 5.8G -- it is hard to provide an accurate estimation for memory usage, so 4G is just the actual value taken from the logs + delta +-- also avoid using localhost, so the queries will go over separate connections +-- (otherwise the memory usage for merge will be counted together with the localhost query) select a, b, c, sum(a) as s -from remote('127.0.0.{1,2}', currentDatabase(), t_2354_dist_with_external_aggr) +from remote('127.0.0.{2,3}', currentDatabase(), t_2354_dist_with_external_aggr) group by a, b, c format Null settings max_memory_usage = '4Gi'; + +DROP TABLE t_2354_dist_with_external_aggr; From df31034820c245030b16fddd7b9b3e06c07b0d51 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Fri, 7 Jul 2023 20:29:59 +0200 Subject: [PATCH 359/522] rollback merge tasks on exception --- src/Storages/MergeTree/IExecutableTask.h | 11 +++++++---- .../MergeTree/MergeFromLogEntryTask.cpp | 2 +- .../MergeTree/MergeFromLogEntryTask.h | 2 +- .../MergeTree/MergePlainMergeTreeTask.cpp | 19 ++++++++++++++++--- .../MergeTree/MergePlainMergeTreeTask.h | 5 +++-- .../MergeTree/MergeTreeBackgroundExecutor.cpp | 17 ++++++++++------- .../MergeTree/MutateFromLogEntryTask.cpp | 2 +- .../MergeTree/MutateFromLogEntryTask.h | 2 +- .../MergeTree/MutatePlainMergeTreeTask.cpp | 4 ++-- .../MergeTree/MutatePlainMergeTreeTask.h | 5 +++-- src/Storages/MergeTree/MutateTask.cpp | 15 +++++++++------ .../ReplicatedMergeMutateTaskBase.cpp | 2 +- .../MergeTree/ReplicatedMergeMutateTaskBase.h | 3 ++- .../MergeTree/tests/gtest_executor.cpp | 10 ++++++---- src/Storages/StorageMergeTree.cpp | 2 +- 15 files changed, 64 insertions(+), 37 deletions(-) diff --git a/src/Storages/MergeTree/IExecutableTask.h b/src/Storages/MergeTree/IExecutableTask.h index d0c2d4a840e..738056e0ea0 100644 --- a/src/Storages/MergeTree/IExecutableTask.h +++ b/src/Storages/MergeTree/IExecutableTask.h @@ -32,8 +32,9 @@ public: using TaskResultCallback = std::function; virtual bool executeStep() = 0; virtual void onCompleted() = 0; - virtual StorageID getStorageID() = 0; - virtual Priority getPriority() = 0; + virtual StorageID getStorageID() const = 0; + virtual String getQueryId() const = 0; + virtual Priority getPriority() const = 0; virtual ~IExecutableTask() = default; }; @@ -63,12 +64,14 @@ public: } void onCompleted() override { job_result_callback(!res); } - StorageID getStorageID() override { return id; } - Priority getPriority() override + StorageID getStorageID() const override { return id; } + Priority getPriority() const override { throw Exception(ErrorCodes::LOGICAL_ERROR, "getPriority() method is not supported by LambdaAdapter"); } + String getQueryId() const override { return id.getShortName() + "::lambda"; } + private: bool res = false; std::function job_to_execute; diff --git a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp index 17582e7df98..9f54c554c85 100644 --- a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp @@ -287,7 +287,7 @@ ReplicatedMergeMutateTaskBase::PrepareResult MergeFromLogEntryTask::prepare() task_context = Context::createCopy(storage.getContext()); task_context->makeQueryContext(); - task_context->setCurrentQueryId(""); + task_context->setCurrentQueryId(getQueryId()); /// Add merge to list merge_mutate_entry = storage.getContext()->getMergeList().insert( diff --git a/src/Storages/MergeTree/MergeFromLogEntryTask.h b/src/Storages/MergeTree/MergeFromLogEntryTask.h index 62908f79fb4..16e69a568ba 100644 --- a/src/Storages/MergeTree/MergeFromLogEntryTask.h +++ b/src/Storages/MergeTree/MergeFromLogEntryTask.h @@ -24,7 +24,7 @@ public: StorageReplicatedMergeTree & storage_, IExecutableTask::TaskResultCallback & task_result_callback_); - Priority getPriority() override { return priority; } + Priority getPriority() const override { return priority; } protected: /// Both return false if we can't execute merge. diff --git a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp index 9302bdf11de..3f5753a0c95 100644 --- a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp +++ b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp @@ -3,8 +3,10 @@ #include #include #include +#include #include #include +#include namespace DB @@ -16,7 +18,7 @@ namespace ErrorCodes } -StorageID MergePlainMergeTreeTask::getStorageID() +StorageID MergePlainMergeTreeTask::getStorageID() const { return storage.getStorageID(); } @@ -77,7 +79,6 @@ bool MergePlainMergeTreeTask::executeStep() throw Exception(ErrorCodes::LOGICAL_ERROR, "Task with state SUCCESS mustn't be executed again"); } } - return false; } @@ -145,16 +146,28 @@ void MergePlainMergeTreeTask::finish() storage.merger_mutator.renameMergedTemporaryPart(new_part, future_part->parts, txn, transaction); transaction.commit(); + ThreadFuzzer::maybeInjectSleep(); + ThreadFuzzer::maybeInjectMemoryLimitException(); + write_part_log({}); storage.incrementMergedPartsProfileEvent(new_part->getType()); transfer_profile_counters_to_initial_query(); + + if (auto txn_ = txn_holder.getTransaction()) + { + /// Explicitly commit the transaction if we own it (it's a background merge, not OPTIMIZE) + TransactionLog::instance().commitTransaction(txn_, /* throw_on_unknown_status */ false); + ThreadFuzzer::maybeInjectSleep(); + ThreadFuzzer::maybeInjectMemoryLimitException(); + } + } ContextMutablePtr MergePlainMergeTreeTask::createTaskContext() const { auto context = Context::createCopy(storage.getContext()); context->makeQueryContext(); - auto queryId = storage.getStorageID().getShortName() + "::" + future_part->name; + auto queryId = getQueryId(); context->setCurrentQueryId(queryId); return context; } diff --git a/src/Storages/MergeTree/MergePlainMergeTreeTask.h b/src/Storages/MergeTree/MergePlainMergeTreeTask.h index 95df8c90c9b..5cc9c0e50d3 100644 --- a/src/Storages/MergeTree/MergePlainMergeTreeTask.h +++ b/src/Storages/MergeTree/MergePlainMergeTreeTask.h @@ -39,8 +39,9 @@ public: bool executeStep() override; void onCompleted() override; - StorageID getStorageID() override; - Priority getPriority() override { return priority; } + StorageID getStorageID() const override; + Priority getPriority() const override { return priority; } + String getQueryId() const override { return getStorageID().getShortName() + "::" + merge_mutate_entry->future_part->name; } void setCurrentTransaction(MergeTreeTransactionHolder && txn_holder_, MergeTreeTransactionPtr && txn_) { diff --git a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp index d4f8d1140a2..6eab4337162 100644 --- a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.cpp @@ -136,7 +136,7 @@ bool MergeTreeBackgroundExecutor::trySchedule(ExecutableTaskPtr task) return true; } -void printExceptionWithRespectToAbort(Poco::Logger * log) +void printExceptionWithRespectToAbort(Poco::Logger * log, const String & query_id) { std::exception_ptr ex = std::current_exception(); @@ -155,14 +155,14 @@ void printExceptionWithRespectToAbort(Poco::Logger * log) if (e.code() == ErrorCodes::ABORTED) LOG_DEBUG(log, getExceptionMessageAndPattern(e, /* with_stacktrace */ false)); else - tryLogCurrentException(__PRETTY_FUNCTION__); + tryLogCurrentException(log, "Exception while executing background task {" + query_id + "}"); }); } catch (...) { NOEXCEPT_SCOPE({ ALLOW_ALLOCATIONS_IN_SCOPE; - tryLogCurrentException(__PRETTY_FUNCTION__); + tryLogCurrentException(log, "Exception while executing background task {" + query_id + "}"); }); } } @@ -239,7 +239,9 @@ void MergeTreeBackgroundExecutor::routine(TaskRuntimeDataPtr item) has_tasks.notify_one(); }; - auto release_task = [this, &erase_from_active, &on_task_done](TaskRuntimeDataPtr && item_) + String query_id; + + auto release_task = [this, &erase_from_active, &on_task_done, &query_id](TaskRuntimeDataPtr && item_) { std::lock_guard guard(mutex); @@ -256,7 +258,7 @@ void MergeTreeBackgroundExecutor::routine(TaskRuntimeDataPtr item) } catch (...) { - printExceptionWithRespectToAbort(log); + printExceptionWithRespectToAbort(log, query_id); } on_task_done(std::move(item_)); @@ -267,11 +269,12 @@ void MergeTreeBackgroundExecutor::routine(TaskRuntimeDataPtr item) try { ALLOW_ALLOCATIONS_IN_SCOPE; + item->task->getQueryId(); need_execute_again = item->task->executeStep(); } catch (...) { - printExceptionWithRespectToAbort(log); + printExceptionWithRespectToAbort(log, query_id); /// Release the task with exception context. /// An exception context is needed to proper delete write buffers without finalization release_task(std::move(item)); @@ -298,7 +301,7 @@ void MergeTreeBackgroundExecutor::routine(TaskRuntimeDataPtr item) } catch (...) { - printExceptionWithRespectToAbort(log); + printExceptionWithRespectToAbort(log, query_id); on_task_done(std::move(item)); return; } diff --git a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp index ba55fb400ca..6cb9d50436e 100644 --- a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp @@ -191,7 +191,7 @@ ReplicatedMergeMutateTaskBase::PrepareResult MutateFromLogEntryTask::prepare() task_context = Context::createCopy(storage.getContext()); task_context->makeQueryContext(); - task_context->setCurrentQueryId(""); + task_context->setCurrentQueryId(getQueryId()); merge_mutate_entry = storage.getContext()->getMergeList().insert( storage.getStorageID(), diff --git a/src/Storages/MergeTree/MutateFromLogEntryTask.h b/src/Storages/MergeTree/MutateFromLogEntryTask.h index b6d3f5d4b6b..42d8307e948 100644 --- a/src/Storages/MergeTree/MutateFromLogEntryTask.h +++ b/src/Storages/MergeTree/MutateFromLogEntryTask.h @@ -31,7 +31,7 @@ public: {} - Priority getPriority() override { return priority; } + Priority getPriority() const override { return priority; } private: diff --git a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp index 3180431d31b..bf8e879e3d0 100644 --- a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp +++ b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp @@ -13,7 +13,7 @@ namespace ErrorCodes } -StorageID MutatePlainMergeTreeTask::getStorageID() +StorageID MutatePlainMergeTreeTask::getStorageID() const { return storage.getStorageID(); } @@ -137,7 +137,7 @@ ContextMutablePtr MutatePlainMergeTreeTask::createTaskContext() const { auto context = Context::createCopy(storage.getContext()); context->makeQueryContext(); - auto queryId = storage.getStorageID().getShortName() + "::" + future_part->name; + auto queryId = getQueryId(); context->setCurrentQueryId(queryId); return context; } diff --git a/src/Storages/MergeTree/MutatePlainMergeTreeTask.h b/src/Storages/MergeTree/MutatePlainMergeTreeTask.h index bd03c276256..ef11780a873 100644 --- a/src/Storages/MergeTree/MutatePlainMergeTreeTask.h +++ b/src/Storages/MergeTree/MutatePlainMergeTreeTask.h @@ -41,8 +41,9 @@ public: bool executeStep() override; void onCompleted() override; - StorageID getStorageID() override; - Priority getPriority() override { return priority; } + StorageID getStorageID() const override; + Priority getPriority() const override { return priority; } + String getQueryId() const override { return getStorageID().getShortName() + "::" + merge_mutate_entry->future_part->name; } private: diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index f4a071b8f27..be512884756 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -884,8 +884,9 @@ public: } void onCompleted() override { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); } - StorageID getStorageID() override { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); } - Priority getPriority() override { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); } + StorageID getStorageID() const override { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); } + Priority getPriority() const override { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); } + String getQueryId() const override { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); } bool executeStep() override { @@ -1206,8 +1207,9 @@ public: explicit MutateAllPartColumnsTask(MutationContextPtr ctx_) : ctx(ctx_) {} void onCompleted() override { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); } - StorageID getStorageID() override { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); } - Priority getPriority() override { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); } + StorageID getStorageID() const override { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); } + Priority getPriority() const override { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); } + String getQueryId() const override { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); } bool executeStep() override { @@ -1434,8 +1436,9 @@ public: explicit MutateSomePartColumnsTask(MutationContextPtr ctx_) : ctx(ctx_) {} void onCompleted() override { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); } - StorageID getStorageID() override { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); } - Priority getPriority() override { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); } + StorageID getStorageID() const override { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); } + Priority getPriority() const override { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); } + String getQueryId() const override { throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented"); } bool executeStep() override { diff --git a/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.cpp b/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.cpp index 61356558e16..b4748ee77ea 100644 --- a/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.cpp @@ -16,7 +16,7 @@ namespace ErrorCodes extern const int PART_IS_TEMPORARILY_LOCKED; } -StorageID ReplicatedMergeMutateTaskBase::getStorageID() +StorageID ReplicatedMergeMutateTaskBase::getStorageID() const { return storage.getStorageID(); } diff --git a/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.h b/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.h index 1e7f9834245..ba514f11f20 100644 --- a/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.h +++ b/src/Storages/MergeTree/ReplicatedMergeMutateTaskBase.h @@ -33,7 +33,8 @@ public: ~ReplicatedMergeMutateTaskBase() override = default; void onCompleted() override; - StorageID getStorageID() override; + StorageID getStorageID() const override; + String getQueryId() const override { return getStorageID().getShortName() + "::" + selected_entry->log_entry->new_part_name; } bool executeStep() override; protected: diff --git a/src/Storages/MergeTree/tests/gtest_executor.cpp b/src/Storages/MergeTree/tests/gtest_executor.cpp index 5815b74284a..6f34eb4dfbd 100644 --- a/src/Storages/MergeTree/tests/gtest_executor.cpp +++ b/src/Storages/MergeTree/tests/gtest_executor.cpp @@ -39,7 +39,7 @@ public: return false; } - StorageID getStorageID() override + StorageID getStorageID() const override { return {"test", name}; } @@ -51,7 +51,8 @@ public: throw std::runtime_error("Unlucky..."); } - Priority getPriority() override { return {}; } + Priority getPriority() const override { return {}; } + String getQueryId() const override { return {}; } private: std::mt19937 generator; @@ -79,14 +80,15 @@ public: return --step_count; } - StorageID getStorageID() override + StorageID getStorageID() const override { return {"test", name}; } void onCompleted() override {} - Priority getPriority() override { return priority; } + Priority getPriority() const override { return priority; } + String getQueryId() const override { return "test::lambda"; } private: String name; diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 4c0c0c8e3fa..add1d112c1a 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -1269,7 +1269,7 @@ bool StorageMergeTree::scheduleDataProcessingJob(BackgroundJobsAssignee & assign { /// TODO Transactions: avoid beginning transaction if there is nothing to merge. txn = TransactionLog::instance().beginTransaction(); - transaction_for_merge = MergeTreeTransactionHolder{txn, /* autocommit = */ true}; + transaction_for_merge = MergeTreeTransactionHolder{txn, /* autocommit = */ false}; } bool has_mutations = false; From 44ae3a0986c941f234a7cb63468e77b626d10713 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Sat, 8 Jul 2023 14:58:38 +0200 Subject: [PATCH 360/522] fix a bug in projections --- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 13 ++++++++++++- src/Storages/MergeTree/IMergeTreeDataPart.h | 9 ++++++++- src/Storages/MergeTree/MergeTreeData.cpp | 14 +++++++++++++- src/Storages/MergeTree/MutateTask.cpp | 2 +- src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp | 4 ++-- src/Storages/StorageMergeTree.cpp | 2 +- src/Storages/StorageReplicatedMergeTree.cpp | 2 +- 7 files changed, 38 insertions(+), 8 deletions(-) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index b9591864869..9309f0d4df6 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -312,15 +312,20 @@ IMergeTreeDataPart::IMergeTreeDataPart( const IMergeTreeDataPart * parent_part_) : DataPartStorageHolder(data_part_storage_) , storage(storage_) - , name(name_) + , mutable_name(name_) + , name(mutable_name) , info(info_) , index_granularity_info(storage_, part_type_) , part_type(part_type_) , parent_part(parent_part_) + , parent_part_name(parent_part ? parent_part->name : "") , use_metadata_cache(storage.use_metadata_cache) { if (parent_part) + { + chassert(parent_part_name.starts_with(parent_part->info.partition_id)); /// Make sure there's no prefix state = MergeTreeDataPartState::Active; + } incrementStateMetric(state); incrementTypeMetric(part_type); @@ -337,6 +342,12 @@ IMergeTreeDataPart::~IMergeTreeDataPart() decrementTypeMetric(part_type); } +void IMergeTreeDataPart::setName(const String & new_name) +{ + mutable_name = new_name; + for (auto & proj_part : projection_parts) + proj_part.second->parent_part_name = new_name; +} String IMergeTreeDataPart::getNewName(const MergeTreePartInfo & new_part_info) const { diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index 92dbe084081..2c0cf37b3a5 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -200,9 +200,14 @@ public: /// If token is not empty, block id is calculated based on it instead of block data String getZeroLevelPartBlockID(std::string_view token) const; + void setName(const String & new_name); + const MergeTreeData & storage; - String name; +private: + String mutable_name; +public: + const String & name; // const ref to private mutable_name MergeTreePartInfo info; /// Part unique identifier. @@ -386,6 +391,7 @@ public: bool isProjectionPart() const { return parent_part != nullptr; } const IMergeTreeDataPart * getParentPart() const { return parent_part; } + String getParentPartName() const { return parent_part_name; } const std::map> & getProjectionParts() const { return projection_parts; } @@ -519,6 +525,7 @@ protected: /// Not null when it's a projection part. const IMergeTreeDataPart * parent_part; + String parent_part_name; std::map> projection_parts; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index b7fde55880e..f81726863b2 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -7455,7 +7455,19 @@ void MergeTreeData::reportBrokenPart(MergeTreeData::DataPartPtr data_part) const return; if (data_part->isProjectionPart()) - data_part = data_part->getParentPart()->shared_from_this(); + { + String parent_part_name = data_part->getParentPartName(); + auto parent_part = getPartIfExists(parent_part_name, {DataPartState::PreActive, DataPartState::Active, DataPartState::Outdated}); + + if (!parent_part) + { + LOG_WARNING(log, "Did not find parent part {} for potentially broken projection part {}", + parent_part_name, data_part->getDataPartStorage().getFullPath()); + return; + } + + data_part = parent_part; + } if (data_part->getDataPartStorage().isBroken()) { diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index f4a071b8f27..41f767cc4de 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -917,7 +917,7 @@ public: { LOG_DEBUG(log, "Merged a projection part in level {}", current_level); selected_parts[0]->renameTo(projection.name + ".proj", true); - selected_parts[0]->name = projection.name; + selected_parts[0]->setName(projection.name); selected_parts[0]->is_temp = false; ctx->new_data_part->addProjectionPart(name, std::move(selected_parts[0])); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 4128654a632..22e2ab945eb 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -788,7 +788,7 @@ std::pair, bool> ReplicatedMergeTreeSinkImpl:: part->info.level = 0; part->info.mutation = 0; - part->name = part->getNewName(part->info); + part->setName(part->getNewName(part->info)); StorageReplicatedMergeTree::LogEntry log_entry; @@ -914,7 +914,7 @@ std::pair, bool> ReplicatedMergeTreeSinkImpl:: /// Note that it may also appear on filesystem right now in PreActive state due to concurrent inserts of the same data. /// It will be checked when we will try to rename directory. - part->name = existing_part_name; + part->setName(existing_part_name); part->info = MergeTreePartInfo::fromPartName(existing_part_name, storage.format_version); /// Used only for exception messages. block_number = part->info.min_block; diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 4c0c0c8e3fa..d427a857f07 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -2260,7 +2260,7 @@ void StorageMergeTree::fillNewPartName(MutableDataPartPtr & part, DataPartsLock { part->info.min_block = part->info.max_block = increment.get(); part->info.mutation = 0; - part->name = part->getNewName(part->info); + part->setName(part->getNewName(part->info)); } } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 2da18f69baf..8a21da69460 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -9262,7 +9262,7 @@ bool StorageReplicatedMergeTree::createEmptyPartInsteadOfLost(zkutil::ZooKeeperP } MergeTreeData::MutableDataPartPtr new_data_part = createEmptyPart(new_part_info, partition, lost_part_name, NO_TRANSACTION_PTR); - new_data_part->name = lost_part_name; + new_data_part->setName(lost_part_name); try { From 85531f32cfb5339c45dade1b84c2a20f0a694cfe Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 8 Jul 2023 19:32:44 +0300 Subject: [PATCH 361/522] Update 02804_clusterAllReplicas_insert.sql --- tests/queries/0_stateless/02804_clusterAllReplicas_insert.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/02804_clusterAllReplicas_insert.sql b/tests/queries/0_stateless/02804_clusterAllReplicas_insert.sql index 05bda19eb9e..c39d9e7d78b 100644 --- a/tests/queries/0_stateless/02804_clusterAllReplicas_insert.sql +++ b/tests/queries/0_stateless/02804_clusterAllReplicas_insert.sql @@ -3,3 +3,4 @@ create table data (key Int) engine=Memory(); -- NOTE: internal_replication is false, so INSERT will be done only into one shard insert into function clusterAllReplicas(test_cluster_two_shards, currentDatabase(), data, rand()) values (2); select * from data order by key; +drop table data; From 2a8c7d0ea23e2b7a41d03d32b0fb44513fa309e0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 8 Jul 2023 21:52:19 +0300 Subject: [PATCH 362/522] Update src/Parsers/ParserCreateQuery.cpp Co-authored-by: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com> --- src/Parsers/ParserCreateQuery.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index c4c02ab7417..415d3321eb5 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -304,7 +304,7 @@ bool ParserTablePropertiesDeclarationList::parseImpl(Pos & pos, ASTPtr & node, E for (const auto & elem : list->children) { - if (auto *cd = elem->as()) + if (auto * cd = elem->as()) { if (cd->primary_key_specifier) { From a10aa9ad50db5bd3b95a7ebe4ccce4bf10c8e1f6 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 3 May 2023 10:09:11 +0200 Subject: [PATCH 363/522] Force libunwind usage (removes gcc_eh support) libunwind is reentrant and signal safe, and works faster then then gcc_eh (plus it has some custom patches for problems that have been found during it's usage in ClickHouse). gcc_eh may be missing in the system (if gcc was not installed), and even if it exists clickhouse uses -nodefaultlibs, so some care should be made to make it work. Also this library is tiny and there shouln't be any problem to require it always (there is already tendency to require some contrib libraries, i.e. poco). Signed-off-by: Azat Khuzhin --- CMakeLists.txt | 1 - cmake/darwin/default_libs.cmake | 1 + cmake/target.cmake | 1 - cmake/unwind.cmake | 14 +------------- contrib/jemalloc-cmake/CMakeLists.txt | 17 +++++++---------- contrib/libcxx-cmake/CMakeLists.txt | 4 +--- contrib/libcxxabi-cmake/CMakeLists.txt | 6 ++---- docker/test/fasttest/run.sh | 1 - docs/en/development/build-cross-riscv.md | 2 +- programs/server/Server.cpp | 6 +----- src/Common/QueryProfiler.cpp | 8 ++++---- src/Common/QueryProfiler.h | 4 ++-- src/Common/StackTrace.cpp | 9 +-------- src/Common/config.h.in | 1 - .../System/StorageSystemBuildOptions.cpp.in | 1 - 15 files changed, 21 insertions(+), 55 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 06ee98b5ee1..45c3c422d7a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -87,7 +87,6 @@ if (ENABLE_FUZZING) set (ENABLE_CLICKHOUSE_ODBC_BRIDGE OFF) set (ENABLE_LIBRARIES 0) set (ENABLE_SSL 1) - set (USE_UNWIND ON) set (ENABLE_EMBEDDED_COMPILER 0) set (ENABLE_EXAMPLES 0) set (ENABLE_UTILS 0) diff --git a/cmake/darwin/default_libs.cmake b/cmake/darwin/default_libs.cmake index 812847e6201..42b8473cb75 100644 --- a/cmake/darwin/default_libs.cmake +++ b/cmake/darwin/default_libs.cmake @@ -15,6 +15,7 @@ set(CMAKE_OSX_DEPLOYMENT_TARGET 10.15) set(THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads REQUIRED) +include (cmake/unwind.cmake) include (cmake/cxx.cmake) link_libraries(global-group) diff --git a/cmake/target.cmake b/cmake/target.cmake index 0791da87bf0..ffab08f1103 100644 --- a/cmake/target.cmake +++ b/cmake/target.cmake @@ -40,7 +40,6 @@ if (CMAKE_CROSSCOMPILING) set (OPENSSL_NO_ASM ON CACHE INTERNAL "") set (ENABLE_JEMALLOC ON CACHE INTERNAL "") set (ENABLE_PARQUET OFF CACHE INTERNAL "") - set (USE_UNWIND OFF CACHE INTERNAL "") set (ENABLE_GRPC OFF CACHE INTERNAL "") set (ENABLE_HDFS OFF CACHE INTERNAL "") set (ENABLE_MYSQL OFF CACHE INTERNAL "") diff --git a/cmake/unwind.cmake b/cmake/unwind.cmake index c9f5f30a5d6..84e4f01b752 100644 --- a/cmake/unwind.cmake +++ b/cmake/unwind.cmake @@ -1,13 +1 @@ -option (USE_UNWIND "Enable libunwind (better stacktraces)" ${ENABLE_LIBRARIES}) - -if (USE_UNWIND) - add_subdirectory(contrib/libunwind-cmake) - set (UNWIND_LIBRARIES unwind) - set (EXCEPTION_HANDLING_LIBRARY ${UNWIND_LIBRARIES}) - - message (STATUS "Using libunwind: ${UNWIND_LIBRARIES}") -else () - set (EXCEPTION_HANDLING_LIBRARY gcc_eh) -endif () - -message (STATUS "Using exception handler: ${EXCEPTION_HANDLING_LIBRARY}") +add_subdirectory(contrib/libunwind-cmake) diff --git a/contrib/jemalloc-cmake/CMakeLists.txt b/contrib/jemalloc-cmake/CMakeLists.txt index 97f723bb540..20025dfc63e 100644 --- a/contrib/jemalloc-cmake/CMakeLists.txt +++ b/contrib/jemalloc-cmake/CMakeLists.txt @@ -170,16 +170,13 @@ endif () target_compile_definitions(_jemalloc PRIVATE -DJEMALLOC_PROF=1) -if (USE_UNWIND) - # jemalloc provides support for two different libunwind flavors: the original HP libunwind and the one coming with gcc / g++ / libstdc++. - # The latter is identified by `JEMALLOC_PROF_LIBGCC` and uses `_Unwind_Backtrace` method instead of `unw_backtrace`. - # At the time ClickHouse uses LLVM libunwind which follows libgcc's way of backtracing. - - # ClickHouse has to provide `unw_backtrace` method by the means of [commit 8e2b31e](https://github.com/ClickHouse/libunwind/commit/8e2b31e766dd502f6df74909e04a7dbdf5182eb1). - - target_compile_definitions (_jemalloc PRIVATE -DJEMALLOC_PROF_LIBGCC=1) - target_link_libraries (_jemalloc PRIVATE unwind) -endif () +# jemalloc provides support for two different libunwind flavors: the original HP libunwind and the one coming with gcc / g++ / libstdc++. +# The latter is identified by `JEMALLOC_PROF_LIBGCC` and uses `_Unwind_Backtrace` method instead of `unw_backtrace`. +# At the time ClickHouse uses LLVM libunwind which follows libgcc's way of backtracing. +# +# ClickHouse has to provide `unw_backtrace` method by the means of [commit 8e2b31e](https://github.com/ClickHouse/libunwind/commit/8e2b31e766dd502f6df74909e04a7dbdf5182eb1). +target_compile_definitions (_jemalloc PRIVATE -DJEMALLOC_PROF_LIBGCC=1) +target_link_libraries (_jemalloc PRIVATE unwind) # for RTLD_NEXT target_compile_options(_jemalloc PRIVATE -D_GNU_SOURCE) diff --git a/contrib/libcxx-cmake/CMakeLists.txt b/contrib/libcxx-cmake/CMakeLists.txt index a13e4f0f60a..b7e59e2c9a3 100644 --- a/contrib/libcxx-cmake/CMakeLists.txt +++ b/contrib/libcxx-cmake/CMakeLists.txt @@ -61,9 +61,7 @@ target_include_directories(cxx SYSTEM BEFORE PUBLIC $<$:$ target_compile_definitions(cxx PRIVATE -D_LIBCPP_BUILDING_LIBRARY -DLIBCXX_BUILDING_LIBCXXABI) # Enable capturing stack traces for all exceptions. -if (USE_UNWIND) - target_compile_definitions(cxx PUBLIC -DSTD_EXCEPTION_HAS_STACK_TRACE=1) -endif () +target_compile_definitions(cxx PUBLIC -DSTD_EXCEPTION_HAS_STACK_TRACE=1) if (USE_MUSL) target_compile_definitions(cxx PUBLIC -D_LIBCPP_HAS_MUSL_LIBC=1) diff --git a/contrib/libcxxabi-cmake/CMakeLists.txt b/contrib/libcxxabi-cmake/CMakeLists.txt index 0473527912e..c7ee34e6e28 100644 --- a/contrib/libcxxabi-cmake/CMakeLists.txt +++ b/contrib/libcxxabi-cmake/CMakeLists.txt @@ -35,12 +35,10 @@ target_include_directories(cxxabi SYSTEM BEFORE ) target_compile_definitions(cxxabi PRIVATE -D_LIBCPP_BUILDING_LIBRARY) target_compile_options(cxxabi PRIVATE -nostdinc++ -fno-sanitize=undefined -Wno-macro-redefined) # If we don't disable UBSan, infinite recursion happens in dynamic_cast. -target_link_libraries(cxxabi PUBLIC ${EXCEPTION_HANDLING_LIBRARY}) +target_link_libraries(cxxabi PUBLIC unwind) # Enable capturing stack traces for all exceptions. -if (USE_UNWIND) - target_compile_definitions(cxxabi PUBLIC -DSTD_EXCEPTION_HAS_STACK_TRACE=1) -endif () +target_compile_definitions(cxxabi PUBLIC -DSTD_EXCEPTION_HAS_STACK_TRACE=1) install( TARGETS cxxabi diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index 989ed9d2fbb..828c73e6781 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -166,7 +166,6 @@ function run_cmake "-DENABLE_UTILS=0" "-DENABLE_EMBEDDED_COMPILER=0" "-DENABLE_THINLTO=0" - "-DUSE_UNWIND=1" "-DENABLE_NURAFT=1" "-DENABLE_SIMDJSON=1" "-DENABLE_JEMALLOC=1" diff --git a/docs/en/development/build-cross-riscv.md b/docs/en/development/build-cross-riscv.md index e3550a046c7..c21353f7f73 100644 --- a/docs/en/development/build-cross-riscv.md +++ b/docs/en/development/build-cross-riscv.md @@ -23,7 +23,7 @@ sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" ``` bash cd ClickHouse mkdir build-riscv64 -CC=clang-16 CXX=clang++-16 cmake . -Bbuild-riscv64 -G Ninja -DCMAKE_TOOLCHAIN_FILE=cmake/linux/toolchain-riscv64.cmake -DGLIBC_COMPATIBILITY=OFF -DENABLE_LDAP=OFF -DOPENSSL_NO_ASM=ON -DENABLE_JEMALLOC=ON -DENABLE_PARQUET=OFF -DUSE_UNWIND=OFF -DENABLE_GRPC=OFF -DENABLE_HDFS=OFF -DENABLE_MYSQL=OFF +CC=clang-16 CXX=clang++-16 cmake . -Bbuild-riscv64 -G Ninja -DCMAKE_TOOLCHAIN_FILE=cmake/linux/toolchain-riscv64.cmake -DGLIBC_COMPATIBILITY=OFF -DENABLE_LDAP=OFF -DOPENSSL_NO_ASM=ON -DENABLE_JEMALLOC=ON -DENABLE_PARQUET=OFF -DENABLE_GRPC=OFF -DENABLE_HDFS=OFF -DENABLE_MYSQL=OFF ninja -C build-riscv64 ``` diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 7fbbcd39446..071f7d3177e 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1627,7 +1627,7 @@ try /// Init trace collector only after trace_log system table was created /// Disable it if we collect test coverage information, because it will work extremely slow. -#if USE_UNWIND && !WITH_COVERAGE +#if !WITH_COVERAGE /// Profilers cannot work reliably with any other libunwind or without PHDR cache. if (hasPHDRCache()) { @@ -1650,10 +1650,6 @@ try /// Describe multiple reasons when query profiler cannot work. -#if !USE_UNWIND - LOG_INFO(log, "Query Profiler and TraceCollector are disabled because they cannot work without bundled unwind (stack unwinding) library."); -#endif - #if WITH_COVERAGE LOG_INFO(log, "Query Profiler and TraceCollector are disabled because they work extremely slow with test coverage."); #endif diff --git a/src/Common/QueryProfiler.cpp b/src/Common/QueryProfiler.cpp index 313d4b77739..dc9f3610513 100644 --- a/src/Common/QueryProfiler.cpp +++ b/src/Common/QueryProfiler.cpp @@ -91,7 +91,7 @@ namespace ErrorCodes extern const int NOT_IMPLEMENTED; } -#if USE_UNWIND +#ifndef __APPLE__ Timer::Timer() : log(&Poco::Logger::get("Timer")) {} @@ -209,13 +209,13 @@ QueryProfilerBase::QueryProfilerBase(UInt64 thread_id, int clock_t UNUSED(pause_signal); throw Exception(ErrorCodes::NOT_IMPLEMENTED, "QueryProfiler disabled because they cannot work under sanitizers"); -#elif !USE_UNWIND +#elif defined(__APPLE__) UNUSED(thread_id); UNUSED(clock_type); UNUSED(period); UNUSED(pause_signal); - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "QueryProfiler cannot work with stock libunwind"); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "QueryProfiler cannot work on OSX"); #else /// Sanity check. if (!hasPHDRCache()) @@ -264,7 +264,7 @@ QueryProfilerBase::~QueryProfilerBase() template void QueryProfilerBase::cleanup() { -#if USE_UNWIND +#ifndef __APPLE__ timer.stop(); signal_handler_disarmed = true; #endif diff --git a/src/Common/QueryProfiler.h b/src/Common/QueryProfiler.h index 6a9ed10e315..87432a4b699 100644 --- a/src/Common/QueryProfiler.h +++ b/src/Common/QueryProfiler.h @@ -28,7 +28,7 @@ namespace DB * Note that signal handler implementation is defined by template parameter. See QueryProfilerReal and QueryProfilerCPU. */ -#if USE_UNWIND +#ifndef __APPLE__ class Timer { public: @@ -60,7 +60,7 @@ private: Poco::Logger * log; -#if USE_UNWIND +#ifndef __APPLE__ inline static thread_local Timer timer = Timer(); #endif diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp index aea0f854fe1..c13b63854e4 100644 --- a/src/Common/StackTrace.cpp +++ b/src/Common/StackTrace.cpp @@ -20,13 +20,10 @@ #include #include #include +#include #include "config.h" -#if USE_UNWIND -# include -#endif - namespace { /// Currently this variable is set up once on server startup. @@ -287,12 +284,8 @@ StackTrace::StackTrace(const ucontext_t & signal_context) void StackTrace::tryCapture() { -#if USE_UNWIND size = unw_backtrace(frame_pointers.data(), capacity); __msan_unpoison(frame_pointers.data(), size * sizeof(frame_pointers[0])); -#else - size = 0; -#endif } /// ClickHouse uses bundled libc++ so type names will be the same on every system thus it's safe to hardcode them diff --git a/src/Common/config.h.in b/src/Common/config.h.in index 71b4e098c8f..1cb13d3ae3e 100644 --- a/src/Common/config.h.in +++ b/src/Common/config.h.in @@ -9,7 +9,6 @@ #cmakedefine01 USE_AWS_S3 #cmakedefine01 USE_AZURE_BLOB_STORAGE #cmakedefine01 USE_BROTLI -#cmakedefine01 USE_UNWIND #cmakedefine01 USE_CASSANDRA #cmakedefine01 USE_SENTRY #cmakedefine01 USE_GRPC diff --git a/src/Storages/System/StorageSystemBuildOptions.cpp.in b/src/Storages/System/StorageSystemBuildOptions.cpp.in index c2d35c96ce5..4e7a25d7726 100644 --- a/src/Storages/System/StorageSystemBuildOptions.cpp.in +++ b/src/Storages/System/StorageSystemBuildOptions.cpp.in @@ -23,7 +23,6 @@ const char * auto_config_build[] "USE_EMBEDDED_COMPILER", "@USE_EMBEDDED_COMPILER@", "USE_GLIBC_COMPATIBILITY", "@GLIBC_COMPATIBILITY@", "USE_JEMALLOC", "@ENABLE_JEMALLOC@", - "USE_UNWIND", "@USE_UNWIND@", "USE_ICU", "@USE_ICU@", "USE_H3", "@USE_H3@", "USE_MYSQL", "@USE_MYSQL@", From 45d36b736a8d6b207fb9cf88f8f0ba8f2a7e0ce6 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sat, 8 Jul 2023 23:14:02 +0000 Subject: [PATCH 364/522] Update version_date.tsv and changelogs after v23.6.2.18-stable --- docker/keeper/Dockerfile | 2 +- docker/server/Dockerfile.alpine | 2 +- docker/server/Dockerfile.ubuntu | 2 +- docs/changelogs/v23.6.2.18-stable.md | 25 +++++++++++++++++++++++++ utils/list-versions/version_date.tsv | 2 ++ 5 files changed, 30 insertions(+), 3 deletions(-) create mode 100644 docs/changelogs/v23.6.2.18-stable.md diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile index f13fcdc14d6..8a6324aef88 100644 --- a/docker/keeper/Dockerfile +++ b/docker/keeper/Dockerfile @@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \ esac ARG REPOSITORY="https://s3.amazonaws.com/clickhouse-builds/22.4/31c367d3cd3aefd316778601ff6565119fe36682/package_release" -ARG VERSION="23.6.1.1524" +ARG VERSION="23.6.2.18" ARG PACKAGES="clickhouse-keeper" # user/group precreated explicitly with fixed uid/gid on purpose. diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index 5e5be3f6d73..7f453627601 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -33,7 +33,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="23.6.1.1524" +ARG VERSION="23.6.2.18" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # user/group precreated explicitly with fixed uid/gid on purpose. diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index 8693193455f..1fa7b83ae16 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -23,7 +23,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list ARG REPO_CHANNEL="stable" ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" -ARG VERSION="23.6.1.1524" +ARG VERSION="23.6.2.18" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # set non-empty deb_location_url url to create a docker image diff --git a/docs/changelogs/v23.6.2.18-stable.md b/docs/changelogs/v23.6.2.18-stable.md new file mode 100644 index 00000000000..1f872a190ba --- /dev/null +++ b/docs/changelogs/v23.6.2.18-stable.md @@ -0,0 +1,25 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.6.2.18-stable (89f39a7ccfe) FIXME as compared to v23.6.1.1524-stable (d1c7e13d088) + +#### Build/Testing/Packaging Improvement +* Backported in [#51888](https://github.com/ClickHouse/ClickHouse/issues/51888): Update cargo dependencies. [#51721](https://github.com/ClickHouse/ClickHouse/pull/51721) ([Raúl Marín](https://github.com/Algunenano)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix reading from empty column in `parseSipHashKey` [#51804](https://github.com/ClickHouse/ClickHouse/pull/51804) ([Nikita Taranov](https://github.com/nickitat)). +* Allow parametric UDFs [#51964](https://github.com/ClickHouse/ClickHouse/pull/51964) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Remove the usage of Analyzer setting in the client [#51578](https://github.com/ClickHouse/ClickHouse/pull/51578) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix 02116_tuple_element with Analyzer [#51669](https://github.com/ClickHouse/ClickHouse/pull/51669) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix SQLLogic docker images [#51719](https://github.com/ClickHouse/ClickHouse/pull/51719) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix source image for sqllogic [#51728](https://github.com/ClickHouse/ClickHouse/pull/51728) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Pin for docker-ce [#51743](https://github.com/ClickHouse/ClickHouse/pull/51743) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 2a098d8c1da..dd46f6103d0 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,3 +1,4 @@ +v23.6.2.18-stable 2023-07-09 v23.6.1.1524-stable 2023-06-30 v23.5.4.25-stable 2023-06-29 v23.5.3.24-stable 2023-06-17 @@ -55,6 +56,7 @@ v22.9.4.32-stable 2022-10-26 v22.9.3.18-stable 2022-09-30 v22.9.2.7-stable 2022-09-23 v22.9.1.2603-stable 2022-09-22 +v22.8.20.11-lts 2023-07-09 v22.8.19.10-lts 2023-06-17 v22.8.18.31-lts 2023-06-12 v22.8.17.17-lts 2023-04-22 From c968fe808fc1b7693e53bb3d4f9adc03f41c7066 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sat, 8 Jul 2023 23:17:41 +0000 Subject: [PATCH 365/522] Update version_date.tsv and changelogs after v22.8.20.11-lts --- docker/keeper/Dockerfile | 2 +- docker/server/Dockerfile.alpine | 2 +- docker/server/Dockerfile.ubuntu | 2 +- docs/changelogs/v22.8.20.11-lts.md | 20 ++++++++++++++++++++ utils/list-versions/version_date.tsv | 2 ++ 5 files changed, 25 insertions(+), 3 deletions(-) create mode 100644 docs/changelogs/v22.8.20.11-lts.md diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile index f13fcdc14d6..8a6324aef88 100644 --- a/docker/keeper/Dockerfile +++ b/docker/keeper/Dockerfile @@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \ esac ARG REPOSITORY="https://s3.amazonaws.com/clickhouse-builds/22.4/31c367d3cd3aefd316778601ff6565119fe36682/package_release" -ARG VERSION="23.6.1.1524" +ARG VERSION="23.6.2.18" ARG PACKAGES="clickhouse-keeper" # user/group precreated explicitly with fixed uid/gid on purpose. diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index 5e5be3f6d73..7f453627601 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -33,7 +33,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="23.6.1.1524" +ARG VERSION="23.6.2.18" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # user/group precreated explicitly with fixed uid/gid on purpose. diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index 8693193455f..1fa7b83ae16 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -23,7 +23,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list ARG REPO_CHANNEL="stable" ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" -ARG VERSION="23.6.1.1524" +ARG VERSION="23.6.2.18" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # set non-empty deb_location_url url to create a docker image diff --git a/docs/changelogs/v22.8.20.11-lts.md b/docs/changelogs/v22.8.20.11-lts.md new file mode 100644 index 00000000000..bd45ce9319a --- /dev/null +++ b/docs/changelogs/v22.8.20.11-lts.md @@ -0,0 +1,20 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v22.8.20.11-lts (c9ca79e24e8) FIXME as compared to v22.8.19.10-lts (989bc2fe8b0) + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix broken index analysis when binary operator contains a null constant argument [#50177](https://github.com/ClickHouse/ClickHouse/pull/50177) ([Amos Bird](https://github.com/amosbird)). +* Fix incorrect constant folding [#50536](https://github.com/ClickHouse/ClickHouse/pull/50536) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix fuzzer failure in ActionsDAG [#51301](https://github.com/ClickHouse/ClickHouse/pull/51301) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix segfault in MathUnary [#51499](https://github.com/ClickHouse/ClickHouse/pull/51499) ([Ilya Yatsishin](https://github.com/qoega)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Decoupled commits from [#51180](https://github.com/ClickHouse/ClickHouse/issues/51180) for backports [#51561](https://github.com/ClickHouse/ClickHouse/pull/51561) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 2a098d8c1da..dd46f6103d0 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,3 +1,4 @@ +v23.6.2.18-stable 2023-07-09 v23.6.1.1524-stable 2023-06-30 v23.5.4.25-stable 2023-06-29 v23.5.3.24-stable 2023-06-17 @@ -55,6 +56,7 @@ v22.9.4.32-stable 2022-10-26 v22.9.3.18-stable 2022-09-30 v22.9.2.7-stable 2022-09-23 v22.9.1.2603-stable 2022-09-22 +v22.8.20.11-lts 2023-07-09 v22.8.19.10-lts 2023-06-17 v22.8.18.31-lts 2023-06-12 v22.8.17.17-lts 2023-04-22 From 8d9e1d41c5d0dc8220b97c68ebe6a21c10042b2a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 9 Jul 2023 03:39:05 +0200 Subject: [PATCH 366/522] Move a test to the right place --- .../00178_query_datetime64_index.reference | 0 .../{1_stateful => 0_stateless}/00178_query_datetime64_index.sql | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename tests/queries/{1_stateful => 0_stateless}/00178_query_datetime64_index.reference (100%) rename tests/queries/{1_stateful => 0_stateless}/00178_query_datetime64_index.sql (100%) diff --git a/tests/queries/1_stateful/00178_query_datetime64_index.reference b/tests/queries/0_stateless/00178_query_datetime64_index.reference similarity index 100% rename from tests/queries/1_stateful/00178_query_datetime64_index.reference rename to tests/queries/0_stateless/00178_query_datetime64_index.reference diff --git a/tests/queries/1_stateful/00178_query_datetime64_index.sql b/tests/queries/0_stateless/00178_query_datetime64_index.sql similarity index 100% rename from tests/queries/1_stateful/00178_query_datetime64_index.sql rename to tests/queries/0_stateless/00178_query_datetime64_index.sql From 29f625e7bb9f8553b3e42850a9bcf9e8411a700e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 9 Jul 2023 03:43:18 +0200 Subject: [PATCH 367/522] Add a check to validate that the stateful tests are stateful --- utils/check-style/check-style | 3 +++ 1 file changed, 3 insertions(+) diff --git a/utils/check-style/check-style b/utils/check-style/check-style index e7c06fefee2..0b3b86b4772 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -407,3 +407,6 @@ find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep # If a user is doing dynamic or typeid cast with a pointer, and immediately dereferencing it, it is unsafe. find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep --line-number -P '(dynamic|typeid)_cast<[^>]+\*>\([^\(\)]+\)->' | grep -P '.' && echo "It's suspicious when you are doing a dynamic_cast or typeid_cast with a pointer and immediately dereferencing it. Use references instead of pointers or check a pointer to nullptr." + +# The stateful directory should only contain the tests that depend on the test dataset (hits or visits). +find $ROOT_PATH/tests/queries/1_stateful -name '*.sql' -or -name '*.sh' | grep -v '00076_system_columns_bytes' | xargs -I{} bash -c 'grep -q -P "hits|visits" "{}" || echo "The test {} does not depend on the test dataset (hits or visits table) and should be located in the 0_stateless directory. You can also add an exception to the check-style script."' From de0837fe21d3f7330fc58712c887b1cc570af05a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 9 Jul 2023 03:55:25 +0200 Subject: [PATCH 368/522] Check that functional tests cleanup their tables --- tests/clickhouse-test | 15 ++++++++++++++- .../02788_fix_logical_error_in_sorting.sql | 6 ++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 95470f77987..46ec19b041d 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -1212,7 +1212,20 @@ class TestCase: seconds_left = max( args.timeout - (datetime.now() - start_time).total_seconds(), 20 ) - drop_database_query = "DROP DATABASE IF EXISTS " + database + + leftover_tables = clickhouse_execute( + args, + f"SHOW TABLES FROM {database}", + timeout=seconds_left, + settings={ + "log_comment": args.testcase_basename, + }, + ).decode().replace("\n", ", "); + + if 0 != len(leftover_tables): + raise Exception(f"The test should cleanup its tables ({leftover_tables}), otherwise it is inconvenient for running it locally.") + + drop_database_query = f"DROP DATABASE IF EXISTS {database}" if args.replicated_database: drop_database_query += " ON CLUSTER test_cluster_database_replicated" diff --git a/tests/queries/0_stateless/02788_fix_logical_error_in_sorting.sql b/tests/queries/0_stateless/02788_fix_logical_error_in_sorting.sql index 1ff68191800..60905e2634f 100644 --- a/tests/queries/0_stateless/02788_fix_logical_error_in_sorting.sql +++ b/tests/queries/0_stateless/02788_fix_logical_error_in_sorting.sql @@ -1,3 +1,6 @@ +DROP TABLE IF EXISTS session_events; +DROP TABLE IF EXISTS event_types; + CREATE TABLE session_events ( clientId UInt64, @@ -75,3 +78,6 @@ FROM WHERE runningDifference(timestamp) >= 500 ORDER BY timestamp ASC FORMAT Null; + +DROP TABLE session_events; +DROP TABLE event_types; From a61bc7cfa593a510f96c670ef987129a12dc9b40 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sun, 9 Jul 2023 02:03:47 +0000 Subject: [PATCH 369/522] Automatic style fix --- tests/clickhouse-test | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 46ec19b041d..1145dfa9358 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -1213,17 +1213,23 @@ class TestCase: args.timeout - (datetime.now() - start_time).total_seconds(), 20 ) - leftover_tables = clickhouse_execute( - args, - f"SHOW TABLES FROM {database}", - timeout=seconds_left, - settings={ - "log_comment": args.testcase_basename, - }, - ).decode().replace("\n", ", "); + leftover_tables = ( + clickhouse_execute( + args, + f"SHOW TABLES FROM {database}", + timeout=seconds_left, + settings={ + "log_comment": args.testcase_basename, + }, + ) + .decode() + .replace("\n", ", ") + ) if 0 != len(leftover_tables): - raise Exception(f"The test should cleanup its tables ({leftover_tables}), otherwise it is inconvenient for running it locally.") + raise Exception( + f"The test should cleanup its tables ({leftover_tables}), otherwise it is inconvenient for running it locally." + ) drop_database_query = f"DROP DATABASE IF EXISTS {database}" if args.replicated_database: From 7311469c32f448a95e046c013f3ef34a2bd880b7 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 9 Jul 2023 04:17:15 +0200 Subject: [PATCH 370/522] Fix test_extreme_deduplication --- .../test_extreme_deduplication/test.py | 27 +++++++++++-------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/tests/integration/test_extreme_deduplication/test.py b/tests/integration/test_extreme_deduplication/test.py index 71f783d37c9..9ce3582a826 100644 --- a/tests/integration/test_extreme_deduplication/test.py +++ b/tests/integration/test_extreme_deduplication/test.py @@ -49,20 +49,25 @@ def test_deduplication_window_in_seconds(started_cluster): node.query("INSERT INTO simple VALUES (0, 1)") assert TSV(node.query("SELECT count() FROM simple")) == TSV("2\n") - # wait clean thread - time.sleep(2) + # Wait for the cleanup thread. + for i in range(100): + time.sleep(1) + + if ( + TSV.toMat( + node.query( + "SELECT count() FROM system.zookeeper WHERE path = '/clickhouse/tables/0/simple/blocks'" + ) + )[0][0] + <= "1" + ): + break + else: + raise Exception("The blocks from Keeper were not removed in time") - assert ( - TSV.toMat( - node.query( - "SELECT count() FROM system.zookeeper WHERE path='/clickhouse/tables/0/simple/blocks'" - ) - )[0][0] - == "1" - ) node.query( "INSERT INTO simple VALUES (0, 0)" - ) # deduplication doesn't works here, the first hash node was deleted + ) # Deduplication doesn't work here as the first hash node was deleted assert TSV.toMat(node.query("SELECT count() FROM simple"))[0][0] == "3" node1.query("""DROP TABLE simple ON CLUSTER test_cluster""") From 52632af9b38e052050e6e33d2b10614a376461be Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 9 Jul 2023 06:18:10 +0300 Subject: [PATCH 371/522] Update connection.py --- tests/sqllogic/connection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/sqllogic/connection.py b/tests/sqllogic/connection.py index d71cc005d09..ca03839fc35 100644 --- a/tests/sqllogic/connection.py +++ b/tests/sqllogic/connection.py @@ -62,7 +62,7 @@ def default_clickhouse_odbc_conn_str(): return str( OdbcConnectingArgs.create_from_kw( dsn="ClickHouse DSN (ANSI)", - Url="http://localhost:8123/query?default_format=ODBCDriver2&default_table_engine=MergeTree", + Url="http://localhost:8123/query?default_format=ODBCDriver2&default_table_engine=MergeTree&union_default_mode=DISTINCT&group_by_use_nulls=1&join_use_nulls=1", ) ) From 00cf66ab7a60025c3722044ee6bf10235e15333e Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 26 Jun 2023 21:25:43 +0200 Subject: [PATCH 372/522] Add ability to disable native copy for BACKUP/RESTORE Native copy uses CopyObject for S3/GCS, but in GCS the CopyObject is buggy - it does not always works. Signed-off-by: Azat Khuzhin --- src/Backups/BackupFactory.h | 1 + src/Backups/BackupIO_S3.cpp | 10 +++-- src/Backups/BackupIO_S3.h | 6 ++- src/Backups/BackupSettings.cpp | 1 + src/Backups/BackupSettings.h | 3 ++ src/Backups/BackupsWorker.cpp | 2 + src/Backups/RestoreSettings.cpp | 1 + src/Backups/RestoreSettings.h | 3 ++ src/Backups/registerBackupEngineS3.cpp | 4 +- tests/config/config.d/storage_conf.xml | 8 ++++ .../02801_backup_native_copy.reference | 4 ++ .../0_stateless/02801_backup_native_copy.sh | 43 +++++++++++++++++++ 12 files changed, 78 insertions(+), 8 deletions(-) create mode 100644 tests/queries/0_stateless/02801_backup_native_copy.reference create mode 100755 tests/queries/0_stateless/02801_backup_native_copy.sh diff --git a/src/Backups/BackupFactory.h b/src/Backups/BackupFactory.h index 92a5e16533c..642f5cb07b9 100644 --- a/src/Backups/BackupFactory.h +++ b/src/Backups/BackupFactory.h @@ -35,6 +35,7 @@ public: std::shared_ptr backup_coordination; std::optional backup_uuid; bool deduplicate_files = true; + bool native_copy = true; }; static BackupFactory & instance(); diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp index 9a2a457e13e..01e6bc78949 100644 --- a/src/Backups/BackupIO_S3.cpp +++ b/src/Backups/BackupIO_S3.cpp @@ -101,12 +101,13 @@ namespace BackupReaderS3::BackupReaderS3( - const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, const ContextPtr & context_) + const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool native_copy_, const ContextPtr & context_) : BackupReaderDefault(&Poco::Logger::get("BackupReaderS3"), context_) , s3_uri(s3_uri_) , client(makeS3Client(s3_uri_, access_key_id_, secret_access_key_, context_)) , request_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString()).request_settings) , data_source_description{DataSourceType::S3, s3_uri.endpoint, false, false} + , native_copy(native_copy_) { request_settings.max_single_read_retries = context_->getSettingsRef().s3_max_single_read_retries; // FIXME: Avoid taking value for endpoint } @@ -138,7 +139,7 @@ void BackupReaderS3::copyFileToDisk(const String & path_in_backup, size_t file_s /// Use the native copy as a more optimal way to copy a file from S3 to S3 if it's possible. /// We don't check for `has_throttling` here because the native copy almost doesn't use network. auto destination_data_source_description = destination_disk->getDataSourceDescription(); - if (destination_data_source_description.sameKind(data_source_description) + if (native_copy && destination_data_source_description.sameKind(data_source_description) && (destination_data_source_description.is_encrypted == encrypted_in_backup)) { /// Use native copy, the more optimal way. @@ -177,12 +178,13 @@ void BackupReaderS3::copyFileToDisk(const String & path_in_backup, size_t file_s BackupWriterS3::BackupWriterS3( - const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, const ContextPtr & context_) + const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool native_copy_, const ContextPtr & context_) : BackupWriterDefault(&Poco::Logger::get("BackupWriterS3"), context_) , s3_uri(s3_uri_) , client(makeS3Client(s3_uri_, access_key_id_, secret_access_key_, context_)) , request_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString()).request_settings) , data_source_description{DataSourceType::S3, s3_uri.endpoint, false, false} + , native_copy(native_copy_) { request_settings.updateFromSettings(context_->getSettingsRef()); request_settings.max_single_read_retries = context_->getSettingsRef().s3_max_single_read_retries; // FIXME: Avoid taking value for endpoint @@ -194,7 +196,7 @@ void BackupWriterS3::copyFileFromDisk(const String & path_in_backup, DiskPtr src /// Use the native copy as a more optimal way to copy a file from S3 to S3 if it's possible. /// We don't check for `has_throttling` here because the native copy almost doesn't use network. auto source_data_source_description = src_disk->getDataSourceDescription(); - if (source_data_source_description.sameKind(data_source_description) && (source_data_source_description.is_encrypted == copy_encrypted)) + if (native_copy && source_data_source_description.sameKind(data_source_description) && (source_data_source_description.is_encrypted == copy_encrypted)) { /// getBlobPath() can return more than 3 elements if the file is stored as multiple objects in S3 bucket. /// In this case we can't use the native copy. diff --git a/src/Backups/BackupIO_S3.h b/src/Backups/BackupIO_S3.h index cca56bae6bc..d02e45370f9 100644 --- a/src/Backups/BackupIO_S3.h +++ b/src/Backups/BackupIO_S3.h @@ -17,7 +17,7 @@ namespace DB class BackupReaderS3 : public BackupReaderDefault { public: - BackupReaderS3(const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, const ContextPtr & context_); + BackupReaderS3(const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool native_copy_, const ContextPtr & context_); ~BackupReaderS3() override; bool fileExists(const String & file_name) override; @@ -32,13 +32,14 @@ private: const std::shared_ptr client; S3Settings::RequestSettings request_settings; const DataSourceDescription data_source_description; + const bool native_copy; }; class BackupWriterS3 : public BackupWriterDefault { public: - BackupWriterS3(const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, const ContextPtr & context_); + BackupWriterS3(const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool native_copy_, const ContextPtr & context_); ~BackupWriterS3() override; bool fileExists(const String & file_name) override; @@ -61,6 +62,7 @@ private: S3Settings::RequestSettings request_settings; std::optional supports_batch_delete; const DataSourceDescription data_source_description; + const bool native_copy; }; } diff --git a/src/Backups/BackupSettings.cpp b/src/Backups/BackupSettings.cpp index 882342467fe..8e9fe7956f9 100644 --- a/src/Backups/BackupSettings.cpp +++ b/src/Backups/BackupSettings.cpp @@ -25,6 +25,7 @@ namespace ErrorCodes M(Bool, async) \ M(Bool, decrypt_files_from_encrypted_disks) \ M(Bool, deduplicate_files) \ + M(Bool, native_copy) \ M(UInt64, shard_num) \ M(UInt64, replica_num) \ M(Bool, internal) \ diff --git a/src/Backups/BackupSettings.h b/src/Backups/BackupSettings.h index 2c899687e6e..e21b70ee25f 100644 --- a/src/Backups/BackupSettings.h +++ b/src/Backups/BackupSettings.h @@ -38,6 +38,9 @@ struct BackupSettings /// Whether the BACKUP will omit similar files (within one backup only). bool deduplicate_files = true; + /// Whether native copy is allowed (optimization for cloud storages, that sometimes could have bugs) + bool native_copy = true; + /// 1-based shard index to store in the backup. 0 means all shards. /// Can only be used with BACKUP ON CLUSTER. size_t shard_num = 0; diff --git a/src/Backups/BackupsWorker.cpp b/src/Backups/BackupsWorker.cpp index 53a076705c4..fddd4f34bb6 100644 --- a/src/Backups/BackupsWorker.cpp +++ b/src/Backups/BackupsWorker.cpp @@ -348,6 +348,7 @@ void BackupsWorker::doBackup( backup_create_params.backup_coordination = backup_coordination; backup_create_params.backup_uuid = backup_settings.backup_uuid; backup_create_params.deduplicate_files = backup_settings.deduplicate_files; + backup_create_params.native_copy = backup_settings.native_copy; BackupMutablePtr backup = BackupFactory::instance().createBackup(backup_create_params); /// Write the backup. @@ -647,6 +648,7 @@ void BackupsWorker::doRestore( backup_open_params.backup_info = backup_info; backup_open_params.base_backup_info = restore_settings.base_backup_info; backup_open_params.password = restore_settings.password; + backup_open_params.native_copy = restore_settings.native_copy; BackupPtr backup = BackupFactory::instance().createBackup(backup_open_params); String current_database = context->getCurrentDatabase(); diff --git a/src/Backups/RestoreSettings.cpp b/src/Backups/RestoreSettings.cpp index d12da704b2d..4dd75911a91 100644 --- a/src/Backups/RestoreSettings.cpp +++ b/src/Backups/RestoreSettings.cpp @@ -161,6 +161,7 @@ namespace M(RestoreAccessCreationMode, create_access) \ M(Bool, allow_unresolved_access_dependencies) \ M(RestoreUDFCreationMode, create_function) \ + M(Bool, native_copy) \ M(Bool, internal) \ M(String, host_id) \ M(OptionalUUID, restore_uuid) diff --git a/src/Backups/RestoreSettings.h b/src/Backups/RestoreSettings.h index 3bce8698620..59d73c83d69 100644 --- a/src/Backups/RestoreSettings.h +++ b/src/Backups/RestoreSettings.h @@ -107,6 +107,9 @@ struct RestoreSettings /// How the RESTORE command will handle if a user-defined function which it's going to restore already exists. RestoreUDFCreationMode create_function = RestoreUDFCreationMode::kCreateIfNotExists; + /// Whether native copy is allowed (optimization for cloud storages, that sometimes could have bugs) + bool native_copy = true; + /// Internal, should not be specified by user. bool internal = false; diff --git a/src/Backups/registerBackupEngineS3.cpp b/src/Backups/registerBackupEngineS3.cpp index 8387b4627d5..ef8ced94590 100644 --- a/src/Backups/registerBackupEngineS3.cpp +++ b/src/Backups/registerBackupEngineS3.cpp @@ -107,12 +107,12 @@ void registerBackupEngineS3(BackupFactory & factory) if (params.open_mode == IBackup::OpenMode::READ) { - auto reader = std::make_shared(S3::URI{s3_uri}, access_key_id, secret_access_key, params.context); + auto reader = std::make_shared(S3::URI{s3_uri}, access_key_id, secret_access_key, params.native_copy, params.context); return std::make_unique(backup_name_for_logging, archive_params, params.base_backup_info, reader, params.context); } else { - auto writer = std::make_shared(S3::URI{s3_uri}, access_key_id, secret_access_key, params.context); + auto writer = std::make_shared(S3::URI{s3_uri}, access_key_id, secret_access_key, params.native_copy, params.context); return std::make_unique( backup_name_for_logging, archive_params, diff --git a/tests/config/config.d/storage_conf.xml b/tests/config/config.d/storage_conf.xml index deee71bd812..af04024d528 100644 --- a/tests/config/config.d/storage_conf.xml +++ b/tests/config/config.d/storage_conf.xml @@ -2,6 +2,14 @@ + + s3 + s3_common_disk/ + http://localhost:11111/test/common/ + clickhouse + clickhouse + 20000 + s3 s3_disk/ diff --git a/tests/queries/0_stateless/02801_backup_native_copy.reference b/tests/queries/0_stateless/02801_backup_native_copy.reference new file mode 100644 index 00000000000..659df5e9b25 --- /dev/null +++ b/tests/queries/0_stateless/02801_backup_native_copy.reference @@ -0,0 +1,4 @@ +BACKUP TABLE data TO S3(s3_conn, \'backups/default/data_native_copy\') SETTINGS native_copy = 1 1 +BACKUP TABLE data TO S3(s3_conn, \'backups/default/data_no_native_copy\') SETTINGS native_copy = 0 0 +RESTORE TABLE data AS data_native_copy FROM S3(s3_conn, \'backups/default/data_native_copy\') SETTINGS native_copy = 1 1 +RESTORE TABLE data AS data_no_native_copy FROM S3(s3_conn, \'backups/default/data_no_native_copy\') SETTINGS native_copy = 0 0 diff --git a/tests/queries/0_stateless/02801_backup_native_copy.sh b/tests/queries/0_stateless/02801_backup_native_copy.sh new file mode 100755 index 00000000000..966d7ae9ce8 --- /dev/null +++ b/tests/queries/0_stateless/02801_backup_native_copy.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash +# Tags: no-fasttest +# Tag: no-fasttest - requires S3 + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +set -e + +$CLICKHOUSE_CLIENT -nm -q " + drop table if exists data; + create table data (key Int) engine=MergeTree() order by tuple() settings disk='s3_common_disk'; + insert into data select * from numbers(10); +" + +query_id=$(random_str 10) +$CLICKHOUSE_CLIENT --format Null --query_id $query_id -q "BACKUP TABLE data TO S3(s3_conn, 'backups/$CLICKHOUSE_DATABASE/data_native_copy') SETTINGS native_copy=true" +$CLICKHOUSE_CLIENT -nm -q " + SYSTEM FLUSH LOGS; + SELECT query, ProfileEvents['S3CopyObject']>0 FROM system.query_log WHERE type = 'QueryFinish' AND event_date >= yesterday() AND current_database = '$CLICKHOUSE_DATABASE' AND query_id = '$query_id' +" + +query_id=$(random_str 10) +$CLICKHOUSE_CLIENT --format Null --query_id $query_id -q "BACKUP TABLE data TO S3(s3_conn, 'backups/$CLICKHOUSE_DATABASE/data_no_native_copy') SETTINGS native_copy=false" +$CLICKHOUSE_CLIENT -nm -q " + SYSTEM FLUSH LOGS; + SELECT query, ProfileEvents['S3CopyObject']>0 FROM system.query_log WHERE type = 'QueryFinish' AND event_date >= yesterday() AND current_database = '$CLICKHOUSE_DATABASE' AND query_id = '$query_id' +" + +query_id=$(random_str 10) +$CLICKHOUSE_CLIENT --send_logs_level=error --format Null --query_id $query_id -q "RESTORE TABLE data AS data_native_copy FROM S3(s3_conn, 'backups/$CLICKHOUSE_DATABASE/data_native_copy') SETTINGS native_copy=true" +$CLICKHOUSE_CLIENT -nm -q " + SYSTEM FLUSH LOGS; + SELECT query, ProfileEvents['S3CopyObject']>0 FROM system.query_log WHERE type = 'QueryFinish' AND event_date >= yesterday() AND current_database = '$CLICKHOUSE_DATABASE' AND query_id = '$query_id' +" + +query_id=$(random_str 10) +$CLICKHOUSE_CLIENT --send_logs_level=error --format Null --query_id $query_id -q "RESTORE TABLE data AS data_no_native_copy FROM S3(s3_conn, 'backups/$CLICKHOUSE_DATABASE/data_no_native_copy') SETTINGS native_copy=false" +$CLICKHOUSE_CLIENT -nm -q " + SYSTEM FLUSH LOGS; + SELECT query, ProfileEvents['S3CopyObject']>0 FROM system.query_log WHERE type = 'QueryFinish' AND event_date >= yesterday() AND current_database = '$CLICKHOUSE_DATABASE' AND query_id = '$query_id' +" From 5835e72fd6d5dd0225a0dda2f81887d6f61015fb Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 27 Jun 2023 16:20:27 +0200 Subject: [PATCH 373/522] More generic approach to disable native copy Previous patch implements this only for BACKUP/RESTORE, but it can be useful for regular disks as well, so add allow_native_copy for disks. Note, that there is s3_allow_native_copy query setting, since it looks redundant, since it make sense only for S3 disks, and not on a per query basis. Signed-off-by: Azat Khuzhin --- src/Backups/BackupIO_S3.cpp | 34 +++++++++++++------ src/Backups/BackupIO_S3.h | 6 ++-- .../ObjectStorages/S3/S3ObjectStorage.cpp | 14 ++++++-- src/IO/S3/copyS3File.cpp | 22 +++++++++++- src/IO/S3/copyS3File.h | 20 +++++++++-- src/Storages/StorageS3Settings.cpp | 2 ++ src/Storages/StorageS3Settings.h | 1 + 7 files changed, 80 insertions(+), 19 deletions(-) diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp index 01e6bc78949..6531948c872 100644 --- a/src/Backups/BackupIO_S3.cpp +++ b/src/Backups/BackupIO_S3.cpp @@ -101,15 +101,16 @@ namespace BackupReaderS3::BackupReaderS3( - const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool native_copy_, const ContextPtr & context_) + const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool native_copy, const ContextPtr & context_) : BackupReaderDefault(&Poco::Logger::get("BackupReaderS3"), context_) , s3_uri(s3_uri_) , client(makeS3Client(s3_uri_, access_key_id_, secret_access_key_, context_)) , request_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString()).request_settings) , data_source_description{DataSourceType::S3, s3_uri.endpoint, false, false} - , native_copy(native_copy_) { + request_settings.updateFromSettings(context_->getSettingsRef()); request_settings.max_single_read_retries = context_->getSettingsRef().s3_max_single_read_retries; // FIXME: Avoid taking value for endpoint + request_settings.allow_native_copy = native_copy; } BackupReaderS3::~BackupReaderS3() = default; @@ -139,11 +140,10 @@ void BackupReaderS3::copyFileToDisk(const String & path_in_backup, size_t file_s /// Use the native copy as a more optimal way to copy a file from S3 to S3 if it's possible. /// We don't check for `has_throttling` here because the native copy almost doesn't use network. auto destination_data_source_description = destination_disk->getDataSourceDescription(); - if (native_copy && destination_data_source_description.sameKind(data_source_description) + if (destination_data_source_description.sameKind(data_source_description) && (destination_data_source_description.is_encrypted == encrypted_in_backup)) { - /// Use native copy, the more optimal way. - LOG_TRACE(log, "Copying {} from S3 to disk {} using native copy", path_in_backup, destination_disk->getName()); + LOG_TRACE(log, "Copying {} from S3 to disk {}", path_in_backup, destination_disk->getName()); auto write_blob_function = [&](const Strings & blob_path, WriteMode mode, const std::optional & object_attributes) -> size_t { /// Object storage always uses mode `Rewrite` because it simulates append using metadata and different files. @@ -152,7 +152,13 @@ void BackupReaderS3::copyFileToDisk(const String & path_in_backup, size_t file_s "Blob writing function called with unexpected blob_path.size={} or mode={}", blob_path.size(), mode); + auto create_read_buffer = [this, path_in_backup] + { + return readFile(path_in_backup); + }; + copyS3File( + create_read_buffer, client, s3_uri.bucket, fs::path(s3_uri.key) / path_in_backup, @@ -178,16 +184,16 @@ void BackupReaderS3::copyFileToDisk(const String & path_in_backup, size_t file_s BackupWriterS3::BackupWriterS3( - const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool native_copy_, const ContextPtr & context_) + const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool native_copy, const ContextPtr & context_) : BackupWriterDefault(&Poco::Logger::get("BackupWriterS3"), context_) , s3_uri(s3_uri_) , client(makeS3Client(s3_uri_, access_key_id_, secret_access_key_, context_)) , request_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString()).request_settings) , data_source_description{DataSourceType::S3, s3_uri.endpoint, false, false} - , native_copy(native_copy_) { request_settings.updateFromSettings(context_->getSettingsRef()); request_settings.max_single_read_retries = context_->getSettingsRef().s3_max_single_read_retries; // FIXME: Avoid taking value for endpoint + request_settings.allow_native_copy = native_copy; } void BackupWriterS3::copyFileFromDisk(const String & path_in_backup, DiskPtr src_disk, const String & src_path, @@ -196,15 +202,23 @@ void BackupWriterS3::copyFileFromDisk(const String & path_in_backup, DiskPtr src /// Use the native copy as a more optimal way to copy a file from S3 to S3 if it's possible. /// We don't check for `has_throttling` here because the native copy almost doesn't use network. auto source_data_source_description = src_disk->getDataSourceDescription(); - if (native_copy && source_data_source_description.sameKind(data_source_description) && (source_data_source_description.is_encrypted == copy_encrypted)) + if (source_data_source_description.sameKind(data_source_description) && (source_data_source_description.is_encrypted == copy_encrypted)) { /// getBlobPath() can return more than 3 elements if the file is stored as multiple objects in S3 bucket. /// In this case we can't use the native copy. if (auto blob_path = src_disk->getBlobPath(src_path); blob_path.size() == 2) { - /// Use native copy, the more optimal way. - LOG_TRACE(log, "Copying file {} from disk {} to S3 using native copy", src_path, src_disk->getName()); + auto create_read_buffer = [src_disk, src_path, copy_encrypted, settings = read_settings.adjustBufferSize(start_pos + length)] + { + if (copy_encrypted) + return src_disk->readEncryptedFile(src_path, settings); + else + return src_disk->readFile(src_path, settings); + }; + + LOG_TRACE(log, "Copying file {} from disk {} to S3", src_path, src_disk->getName()); copyS3File( + create_read_buffer, client, /* src_bucket */ blob_path[1], /* src_key= */ blob_path[0], diff --git a/src/Backups/BackupIO_S3.h b/src/Backups/BackupIO_S3.h index d02e45370f9..16b2abfea3d 100644 --- a/src/Backups/BackupIO_S3.h +++ b/src/Backups/BackupIO_S3.h @@ -17,7 +17,7 @@ namespace DB class BackupReaderS3 : public BackupReaderDefault { public: - BackupReaderS3(const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool native_copy_, const ContextPtr & context_); + BackupReaderS3(const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool native_copy, const ContextPtr & context_); ~BackupReaderS3() override; bool fileExists(const String & file_name) override; @@ -32,14 +32,13 @@ private: const std::shared_ptr client; S3Settings::RequestSettings request_settings; const DataSourceDescription data_source_description; - const bool native_copy; }; class BackupWriterS3 : public BackupWriterDefault { public: - BackupWriterS3(const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool native_copy_, const ContextPtr & context_); + BackupWriterS3(const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool native_copy, const ContextPtr & context_); ~BackupWriterS3() override; bool fileExists(const String & file_name) override; @@ -62,7 +61,6 @@ private: S3Settings::RequestSettings request_settings; std::optional supports_batch_delete; const DataSourceDescription data_source_description; - const bool native_copy; }; } diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index e46ca3d0828..3c19af188dc 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -435,7 +435,12 @@ void S3ObjectStorage::copyObjectToAnotherObjectStorage( // NOLINT auto settings_ptr = s3_settings.get(); auto size = S3::getObjectSize(*client_ptr, bucket, object_from.remote_path, {}, settings_ptr->request_settings, /* for_disk_s3= */ true); auto scheduler = threadPoolCallbackRunner(getThreadPoolWriter(), "S3ObjStor_copy"); - copyS3File(client_ptr, bucket, object_from.remote_path, 0, size, dest_s3->bucket, object_to.remote_path, + auto create_read_buffer = [this, object_from] + { + return readObject(object_from); + }; + + copyS3File(create_read_buffer, client_ptr, bucket, object_from.remote_path, 0, size, dest_s3->bucket, object_to.remote_path, settings_ptr->request_settings, object_to_attributes, scheduler, /* for_disk_s3= */ true); } else @@ -451,7 +456,12 @@ void S3ObjectStorage::copyObject( // NOLINT auto settings_ptr = s3_settings.get(); auto size = S3::getObjectSize(*client_ptr, bucket, object_from.remote_path, {}, settings_ptr->request_settings, /* for_disk_s3= */ true); auto scheduler = threadPoolCallbackRunner(getThreadPoolWriter(), "S3ObjStor_copy"); - copyS3File(client_ptr, bucket, object_from.remote_path, 0, size, bucket, object_to.remote_path, + auto create_read_buffer = [this, object_from] + { + return readObject(object_from); + }; + + copyS3File(create_read_buffer, client_ptr, bucket, object_from.remote_path, 0, size, bucket, object_to.remote_path, settings_ptr->request_settings, object_to_attributes, scheduler, /* for_disk_s3= */ true); } diff --git a/src/IO/S3/copyS3File.cpp b/src/IO/S3/copyS3File.cpp index 7886b84cd00..3f18d3b2145 100644 --- a/src/IO/S3/copyS3File.cpp +++ b/src/IO/S3/copyS3File.cpp @@ -809,7 +809,7 @@ void copyDataToS3File( } -void copyS3File( +void copyS3FileNative( const std::shared_ptr & s3_client, const String & src_bucket, const String & src_key, @@ -826,6 +826,26 @@ void copyS3File( helper.performCopy(); } +void copyS3File( + const CreateReadBuffer & create_read_buffer, + const std::shared_ptr & s3_client, + const String & src_bucket, + const String & src_key, + size_t src_offset, + size_t src_size, + const String & dest_bucket, + const String & dest_key, + const S3Settings::RequestSettings & settings, + const std::optional> & object_metadata, + ThreadPoolCallbackRunner schedule, + bool for_disk_s3) +{ + if (settings.allow_native_copy) + copyS3FileNative(s3_client, src_bucket, src_key, src_offset, src_size, dest_bucket, dest_key, settings, object_metadata, schedule, for_disk_s3); + else + copyDataToS3File(create_read_buffer, src_offset, src_size, s3_client, dest_bucket, dest_key, settings, object_metadata, schedule, for_disk_s3); +} + } #endif diff --git a/src/IO/S3/copyS3File.h b/src/IO/S3/copyS3File.h index 618ef419a9b..d41f34c103c 100644 --- a/src/IO/S3/copyS3File.h +++ b/src/IO/S3/copyS3File.h @@ -19,9 +19,9 @@ using CreateReadBuffer = std::function()>; /// Copies a file from S3 to S3. /// The same functionality can be done by using the function copyData() and the classes ReadBufferFromS3 and WriteBufferFromS3 -/// however copyS3File() is faster and spends less network traffic and memory. +/// however copyS3FileNative() is faster and spends less network traffic and memory. /// The parameters `src_offset` and `src_size` specify a part in the source to copy. -void copyS3File( +void copyS3FileNative( const std::shared_ptr & s3_client, const String & src_bucket, const String & src_key, @@ -51,6 +51,22 @@ void copyDataToS3File( ThreadPoolCallbackRunner schedule_ = {}, bool for_disk_s3 = false); +/// Tries to copy file using native copy (copyS3FileNative()), if this is not +/// possible it will fallback to read-write copy (copyDataToS3File()) +void copyS3File( + const CreateReadBuffer & create_read_buffer, + const std::shared_ptr & s3_client, + const String & src_bucket, + const String & src_key, + size_t src_offset, + size_t src_size, + const String & dest_bucket, + const String & dest_key, + const S3Settings::RequestSettings & settings, + const std::optional> & object_metadata = std::nullopt, + ThreadPoolCallbackRunner schedule_ = {}, + bool for_disk_s3 = false); + } #endif diff --git a/src/Storages/StorageS3Settings.cpp b/src/Storages/StorageS3Settings.cpp index 89e6ee46b4d..0dc8d8d897b 100644 --- a/src/Storages/StorageS3Settings.cpp +++ b/src/Storages/StorageS3Settings.cpp @@ -182,6 +182,7 @@ S3Settings::RequestSettings::RequestSettings(const NamedCollection & collection) max_single_read_retries = collection.getOrDefault("max_single_read_retries", max_single_read_retries); max_connections = collection.getOrDefault("max_connections", max_connections); list_object_keys_size = collection.getOrDefault("list_object_keys_size", list_object_keys_size); + allow_native_copy = collection.getOrDefault("allow_native_copy", allow_native_copy); throw_on_zero_files_match = collection.getOrDefault("throw_on_zero_files_match", throw_on_zero_files_match); } @@ -197,6 +198,7 @@ S3Settings::RequestSettings::RequestSettings( max_connections = config.getUInt64(key + "max_connections", settings.s3_max_connections); check_objects_after_upload = config.getBool(key + "check_objects_after_upload", settings.s3_check_objects_after_upload); list_object_keys_size = config.getUInt64(key + "list_object_keys_size", settings.s3_list_object_keys_size); + allow_native_copy = config.getBool(key + "allow_native_copy", allow_native_copy); throw_on_zero_files_match = config.getBool(key + "throw_on_zero_files_match", settings.s3_throw_on_zero_files_match); retry_attempts = config.getUInt64(key + "retry_attempts", settings.s3_retry_attempts); request_timeout_ms = config.getUInt64(key + "request_timeout_ms", settings.s3_request_timeout_ms); diff --git a/src/Storages/StorageS3Settings.h b/src/Storages/StorageS3Settings.h index 991e323acb6..581665a7dc5 100644 --- a/src/Storages/StorageS3Settings.h +++ b/src/Storages/StorageS3Settings.h @@ -71,6 +71,7 @@ struct S3Settings size_t retry_attempts = 10; size_t request_timeout_ms = 3000; size_t long_request_timeout_ms = 30000; // TODO: Take this from config like request_timeout_ms + bool allow_native_copy = true; bool throw_on_zero_files_match = false; From 5d63b8be0d317af7b2ee1fdfd7dc76daeeec3afd Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 27 Jun 2023 16:48:30 +0200 Subject: [PATCH 374/522] Add a test for allow_native_copy using clickhouse-disks (first ever) Signed-off-by: Azat Khuzhin --- .../02802_clickhouse_disks_s3_copy.reference | 4 +++ .../02802_clickhouse_disks_s3_copy.sh | 26 +++++++++++++++++++ .../02802_clickhouse_disks_s3_copy.xml | 21 +++++++++++++++ 3 files changed, 51 insertions(+) create mode 100644 tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.reference create mode 100755 tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.sh create mode 100644 tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.xml diff --git a/tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.reference b/tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.reference new file mode 100644 index 00000000000..96860a2f90a --- /dev/null +++ b/tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.reference @@ -0,0 +1,4 @@ +s3_plain_native_copy +Single operation copy has completed. +s3_plain_no_native_copy +Single part upload has completed. diff --git a/tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.sh b/tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.sh new file mode 100755 index 00000000000..f879b7a5621 --- /dev/null +++ b/tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash +# Tags: no-fasttest +# Tag no-fasttest: requires S3 + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +config="${BASH_SOURCE[0]/.sh/.xml}" + +function run_test_for_disk() +{ + local disk=$1 && shift + + echo "$disk" + + clickhouse-disks -C "$config" --disk "$disk" write --input "$config" $CLICKHOUSE_DATABASE/test + clickhouse-disks -C "$config" --log-level test --disk "$disk" copy $CLICKHOUSE_DATABASE/test $CLICKHOUSE_DATABASE/test.copy |& { + grep -o -e "Single part upload has completed." -e "Single operation copy has completed." + } + clickhouse-disks -C "$config" --disk "$disk" remove $CLICKHOUSE_DATABASE/test + clickhouse-disks -C "$config" --disk "$disk" remove $CLICKHOUSE_DATABASE/test.copy +} + +run_test_for_disk s3_plain_native_copy +run_test_for_disk s3_plain_no_native_copy diff --git a/tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.xml b/tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.xml new file mode 100644 index 00000000000..d4235a70903 --- /dev/null +++ b/tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.xml @@ -0,0 +1,21 @@ + + + + + s3_plain + http://localhost:11111/test/clickhouse-disks/ + clickhouse + clickhouse + true + + + + s3_plain + http://localhost:11111/test/clickhouse-disks/ + clickhouse + clickhouse + false + + + + From 84c720b33e9ffe44c79658af57f5985b38b8a728 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 28 Jun 2023 10:52:00 +0200 Subject: [PATCH 375/522] Beatify test_backup_restore_s3 (using per-query profile events) Signed-off-by: Azat Khuzhin --- .../test_backup_restore_s3/test.py | 148 +++++++++--------- 1 file changed, 75 insertions(+), 73 deletions(-) diff --git a/tests/integration/test_backup_restore_s3/test.py b/tests/integration/test_backup_restore_s3/test.py index 0285500d044..bb14fa4824b 100644 --- a/tests/integration/test_backup_restore_s3/test.py +++ b/tests/integration/test_backup_restore_s3/test.py @@ -2,6 +2,7 @@ from typing import Dict, Iterable import pytest from helpers.cluster import ClickHouseCluster from helpers.test_tools import TSV +import uuid cluster = ClickHouseCluster(__file__) @@ -37,32 +38,31 @@ def new_backup_name(): return f"backup{backup_id_counter}" -def get_events(events_names: Iterable[str]) -> Dict[str, int]: - _events = TSV( +def get_events_for_query(query_id: str) -> Dict[str, int]: + events = TSV( node.query( - f"SELECT event, value FROM system.events WHERE event in {events_names} SETTINGS system_events_show_zero_values = 1;" + f""" + SYSTEM FLUSH LOGS; + + WITH arrayJoin(ProfileEvents) as pe + SELECT pe.1, pe.2 + FROM system.query_log + WHERE query_id = '{query_id}' + """ ) ) return { event: int(value) - for event, value in [line.split("\t") for line in _events.lines] + for event, value in [line.split("\t") for line in events.lines] } def check_backup_and_restore( - storage_policy, backup_destination, size=1000, backup_name=None, check_events=False + storage_policy, + backup_destination, + size=1000, + backup_name=None, ): - s3_backup_events = ( - "WriteBufferFromS3Microseconds", - "WriteBufferFromS3Bytes", - "WriteBufferFromS3RequestsErrors", - ) - s3_restore_events = ( - "ReadBufferFromS3Microseconds", - "ReadBufferFromS3Bytes", - "ReadBufferFromS3RequestsErrors", - ) - node.query( f""" DROP TABLE IF EXISTS data SYNC; @@ -72,16 +72,17 @@ def check_backup_and_restore( """ ) try: - events_before_backups = get_events(s3_backup_events) - node.query(f"BACKUP TABLE data TO {backup_destination}") - events_after_backups = get_events(s3_backup_events) - events_before_restore = get_events(s3_restore_events) + backup_query_id = uuid.uuid4().hex + node.query( + f"BACKUP TABLE data TO {backup_destination}", query_id=backup_query_id + ) + restore_query_id = uuid.uuid4().hex node.query( f""" RESTORE TABLE data AS data_restored FROM {backup_destination}; - """ + """, + query_id=restore_query_id, ) - events_after_restore = get_events(s3_restore_events) node.query( """ SELECT throwIf( @@ -91,55 +92,10 @@ def check_backup_and_restore( ); """ ) - if check_events and backup_name: - objects = node.cluster.minio_client.list_objects( - "root", f"data/backups/multipart/{backup_name}/" - ) - backup_meta_size = 0 - for obj in objects: - if ".backup" in obj.object_name: - backup_meta_size = obj.size - break - backup_total_size = int( - node.query( - f"SELECT sum(total_size) FROM system.backups WHERE status = 'BACKUP_CREATED' AND name like '%{backup_name}%'" - ).strip() - ) - restore_total_size = int( - node.query( - f"SELECT sum(total_size) FROM system.backups WHERE status = 'RESTORED' AND name like '%{backup_name}%'" - ).strip() - ) - # backup - # NOTE: ~35 bytes is used by .lock file, so set up 100 bytes to avoid flaky test - assert ( - abs( - backup_total_size - - ( - events_after_backups["WriteBufferFromS3Bytes"] - - events_before_backups["WriteBufferFromS3Bytes"] - - backup_meta_size - ) - ) - < 100 - ) - assert ( - events_after_backups["WriteBufferFromS3Microseconds"] - > events_before_backups["WriteBufferFromS3Microseconds"] - ) - assert events_after_backups["WriteBufferFromS3RequestsErrors"] == 0 - # restore - assert ( - events_after_restore["ReadBufferFromS3Bytes"] - - events_before_restore["ReadBufferFromS3Bytes"] - - backup_meta_size - == restore_total_size - ) - assert ( - events_after_restore["ReadBufferFromS3Microseconds"] - > events_before_restore["ReadBufferFromS3Microseconds"] - ) - assert events_after_restore["ReadBufferFromS3RequestsErrors"] == 0 + return [ + get_events_for_query(backup_query_id), + get_events_for_query(restore_query_id), + ] finally: node.query( """ @@ -224,17 +180,63 @@ def test_backup_to_s3_multipart(): storage_policy = "default" backup_name = new_backup_name() backup_destination = f"S3('http://minio1:9001/root/data/backups/multipart/{backup_name}', 'minio', 'minio123')" - check_backup_and_restore( + (backup_events, restore_events) = check_backup_and_restore( storage_policy, backup_destination, size=1000000, backup_name=backup_name, - check_events=True, ) assert node.contains_in_log( f"copyDataToS3File: Multipart upload has completed. Bucket: root, Key: data/backups/multipart/{backup_name}" ) + s3_backup_events = ( + "WriteBufferFromS3Microseconds", + "WriteBufferFromS3Bytes", + "WriteBufferFromS3RequestsErrors", + ) + s3_restore_events = ( + "ReadBufferFromS3Microseconds", + "ReadBufferFromS3Bytes", + "ReadBufferFromS3RequestsErrors", + ) + + objects = node.cluster.minio_client.list_objects( + "root", f"data/backups/multipart/{backup_name}/" + ) + backup_meta_size = 0 + for obj in objects: + if ".backup" in obj.object_name: + backup_meta_size = obj.size + break + backup_total_size = int( + node.query( + f"SELECT sum(total_size) FROM system.backups WHERE status = 'BACKUP_CREATED' AND name like '%{backup_name}%'" + ).strip() + ) + restore_total_size = int( + node.query( + f"SELECT sum(total_size) FROM system.backups WHERE status = 'RESTORED' AND name like '%{backup_name}%'" + ).strip() + ) + # backup + # NOTE: ~35 bytes is used by .lock file, so set up 100 bytes to avoid flaky test + assert ( + abs( + backup_total_size + - (backup_events["WriteBufferFromS3Bytes"] - backup_meta_size) + ) + < 100 + ) + assert backup_events["WriteBufferFromS3Microseconds"] > 0 + assert "WriteBufferFromS3RequestsErrors" not in backup_events + # restore + assert ( + restore_events["ReadBufferFromS3Bytes"] - backup_meta_size == restore_total_size + ) + assert restore_events["ReadBufferFromS3Microseconds"] > 0 + assert "ReadBufferFromS3RequestsErrors" not in restore_events + def test_backup_to_s3_native_copy(): storage_policy = "policy_s3" From 29dc9abfcab495f66689826fdbb8ee7a81ab4c7d Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 28 Jun 2023 10:58:23 +0200 Subject: [PATCH 376/522] Fix test_backup_restore_s3 after logging for native copying changed Check profile events instead of some odd logs. Signed-off-by: Azat Khuzhin --- .../test_backup_restore_s3/test.py | 27 ++++++++++++------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/tests/integration/test_backup_restore_s3/test.py b/tests/integration/test_backup_restore_s3/test.py index bb14fa4824b..8701bf0d832 100644 --- a/tests/integration/test_backup_restore_s3/test.py +++ b/tests/integration/test_backup_restore_s3/test.py @@ -244,9 +244,12 @@ def test_backup_to_s3_native_copy(): backup_destination = ( f"S3('http://minio1:9001/root/data/backups/{backup_name}', 'minio', 'minio123')" ) - check_backup_and_restore(storage_policy, backup_destination) - assert node.contains_in_log("BackupWriterS3.*using native copy") - assert node.contains_in_log("BackupReaderS3.*using native copy") + (backup_events, restore_events) = check_backup_and_restore( + storage_policy, backup_destination + ) + # single part upload + assert backup_events["S3CopyObject"] > 0 + assert restore_events["S3CopyObject"] > 0 assert node.contains_in_log( f"copyS3File: Single operation copy has completed. Bucket: root, Key: data/backups/{backup_name}" ) @@ -258,9 +261,12 @@ def test_backup_to_s3_native_copy_other_bucket(): backup_destination = ( f"S3('http://minio1:9001/root/data/backups/{backup_name}', 'minio', 'minio123')" ) - check_backup_and_restore(storage_policy, backup_destination) - assert node.contains_in_log("BackupWriterS3.*using native copy") - assert node.contains_in_log("BackupReaderS3.*using native copy") + (backup_events, restore_events) = check_backup_and_restore( + storage_policy, backup_destination + ) + # single part upload + assert backup_events["S3CopyObject"] > 0 + assert restore_events["S3CopyObject"] > 0 assert node.contains_in_log( f"copyS3File: Single operation copy has completed. Bucket: root, Key: data/backups/{backup_name}" ) @@ -270,9 +276,12 @@ def test_backup_to_s3_native_copy_multipart(): storage_policy = "policy_s3" backup_name = new_backup_name() backup_destination = f"S3('http://minio1:9001/root/data/backups/multipart/{backup_name}', 'minio', 'minio123')" - check_backup_and_restore(storage_policy, backup_destination, size=1000000) - assert node.contains_in_log("BackupWriterS3.*using native copy") - assert node.contains_in_log("BackupReaderS3.*using native copy") + (backup_events, restore_events) = check_backup_and_restore( + storage_policy, backup_destination, size=1000000 + ) + # multi part upload + assert backup_events["S3CreateMultipartUpload"] > 0 + assert restore_events["S3CreateMultipartUpload"] > 0 assert node.contains_in_log( f"copyS3File: Multipart upload has completed. Bucket: root, Key: data/backups/multipart/{backup_name}/" ) From 1590ffa3b1eee26d66ae3aec3ac32c63acdea153 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 28 Jun 2023 17:22:57 +0200 Subject: [PATCH 377/522] Remove copyS3FileNative() Signed-off-by: Azat Khuzhin --- src/IO/S3/copyS3File.cpp | 22 ++++------------------ src/IO/S3/copyS3File.h | 25 +++++++------------------ 2 files changed, 11 insertions(+), 36 deletions(-) diff --git a/src/IO/S3/copyS3File.cpp b/src/IO/S3/copyS3File.cpp index 3f18d3b2145..2c6557d97e7 100644 --- a/src/IO/S3/copyS3File.cpp +++ b/src/IO/S3/copyS3File.cpp @@ -809,23 +809,6 @@ void copyDataToS3File( } -void copyS3FileNative( - const std::shared_ptr & s3_client, - const String & src_bucket, - const String & src_key, - size_t src_offset, - size_t src_size, - const String & dest_bucket, - const String & dest_key, - const S3Settings::RequestSettings & settings, - const std::optional> & object_metadata, - ThreadPoolCallbackRunner schedule, - bool for_disk_s3) -{ - CopyFileHelper helper{s3_client, src_bucket, src_key, src_offset, src_size, dest_bucket, dest_key, settings, object_metadata, schedule, for_disk_s3}; - helper.performCopy(); -} - void copyS3File( const CreateReadBuffer & create_read_buffer, const std::shared_ptr & s3_client, @@ -841,7 +824,10 @@ void copyS3File( bool for_disk_s3) { if (settings.allow_native_copy) - copyS3FileNative(s3_client, src_bucket, src_key, src_offset, src_size, dest_bucket, dest_key, settings, object_metadata, schedule, for_disk_s3); + { + CopyFileHelper helper{s3_client, src_bucket, src_key, src_offset, src_size, dest_bucket, dest_key, settings, object_metadata, schedule, for_disk_s3}; + helper.performCopy(); + } else copyDataToS3File(create_read_buffer, src_offset, src_size, s3_client, dest_bucket, dest_key, settings, object_metadata, schedule, for_disk_s3); } diff --git a/src/IO/S3/copyS3File.h b/src/IO/S3/copyS3File.h index d41f34c103c..2c848076e9b 100644 --- a/src/IO/S3/copyS3File.h +++ b/src/IO/S3/copyS3File.h @@ -19,9 +19,14 @@ using CreateReadBuffer = std::function()>; /// Copies a file from S3 to S3. /// The same functionality can be done by using the function copyData() and the classes ReadBufferFromS3 and WriteBufferFromS3 -/// however copyS3FileNative() is faster and spends less network traffic and memory. +/// however copyS3File() is faster and spends less network traffic and memory. /// The parameters `src_offset` and `src_size` specify a part in the source to copy. -void copyS3FileNative( +/// +/// Note, that it tries to copy file using native copy (CopyObject), but if it +/// has been disabled (with settings.allow_native_copy) it is fallbacks to +/// read-write copy (copyDataToS3File()). +void copyS3File( + const CreateReadBuffer & create_read_buffer, const std::shared_ptr & s3_client, const String & src_bucket, const String & src_key, @@ -51,22 +56,6 @@ void copyDataToS3File( ThreadPoolCallbackRunner schedule_ = {}, bool for_disk_s3 = false); -/// Tries to copy file using native copy (copyS3FileNative()), if this is not -/// possible it will fallback to read-write copy (copyDataToS3File()) -void copyS3File( - const CreateReadBuffer & create_read_buffer, - const std::shared_ptr & s3_client, - const String & src_bucket, - const String & src_key, - size_t src_offset, - size_t src_size, - const String & dest_bucket, - const String & dest_key, - const S3Settings::RequestSettings & settings, - const std::optional> & object_metadata = std::nullopt, - ThreadPoolCallbackRunner schedule_ = {}, - bool for_disk_s3 = false); - } #endif From 559d3281782c22fa380e85e188d2a15e404a4c19 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 28 Jun 2023 17:16:02 +0200 Subject: [PATCH 378/522] Rename BACKUP setting native_copy to allow_s3_native_copy Signed-off-by: Azat Khuzhin --- src/Backups/BackupFactory.h | 2 +- src/Backups/BackupIO_S3.cpp | 8 ++++---- src/Backups/BackupIO_S3.h | 4 ++-- src/Backups/BackupSettings.cpp | 2 +- src/Backups/BackupSettings.h | 2 +- src/Backups/BackupsWorker.cpp | 4 ++-- src/Backups/RestoreSettings.cpp | 2 +- src/Backups/RestoreSettings.h | 2 +- src/Backups/registerBackupEngineS3.cpp | 4 ++-- .../0_stateless/02801_backup_native_copy.reference | 8 ++++---- tests/queries/0_stateless/02801_backup_native_copy.sh | 8 ++++---- 11 files changed, 23 insertions(+), 23 deletions(-) diff --git a/src/Backups/BackupFactory.h b/src/Backups/BackupFactory.h index 642f5cb07b9..e95aeddb086 100644 --- a/src/Backups/BackupFactory.h +++ b/src/Backups/BackupFactory.h @@ -35,7 +35,7 @@ public: std::shared_ptr backup_coordination; std::optional backup_uuid; bool deduplicate_files = true; - bool native_copy = true; + bool allow_s3_native_copy = true; }; static BackupFactory & instance(); diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp index 6531948c872..60fea9e2008 100644 --- a/src/Backups/BackupIO_S3.cpp +++ b/src/Backups/BackupIO_S3.cpp @@ -101,7 +101,7 @@ namespace BackupReaderS3::BackupReaderS3( - const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool native_copy, const ContextPtr & context_) + const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool allow_s3_native_copy, const ContextPtr & context_) : BackupReaderDefault(&Poco::Logger::get("BackupReaderS3"), context_) , s3_uri(s3_uri_) , client(makeS3Client(s3_uri_, access_key_id_, secret_access_key_, context_)) @@ -110,7 +110,7 @@ BackupReaderS3::BackupReaderS3( { request_settings.updateFromSettings(context_->getSettingsRef()); request_settings.max_single_read_retries = context_->getSettingsRef().s3_max_single_read_retries; // FIXME: Avoid taking value for endpoint - request_settings.allow_native_copy = native_copy; + request_settings.allow_native_copy = allow_s3_native_copy; } BackupReaderS3::~BackupReaderS3() = default; @@ -184,7 +184,7 @@ void BackupReaderS3::copyFileToDisk(const String & path_in_backup, size_t file_s BackupWriterS3::BackupWriterS3( - const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool native_copy, const ContextPtr & context_) + const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool allow_s3_native_copy, const ContextPtr & context_) : BackupWriterDefault(&Poco::Logger::get("BackupWriterS3"), context_) , s3_uri(s3_uri_) , client(makeS3Client(s3_uri_, access_key_id_, secret_access_key_, context_)) @@ -193,7 +193,7 @@ BackupWriterS3::BackupWriterS3( { request_settings.updateFromSettings(context_->getSettingsRef()); request_settings.max_single_read_retries = context_->getSettingsRef().s3_max_single_read_retries; // FIXME: Avoid taking value for endpoint - request_settings.allow_native_copy = native_copy; + request_settings.allow_native_copy = allow_s3_native_copy; } void BackupWriterS3::copyFileFromDisk(const String & path_in_backup, DiskPtr src_disk, const String & src_path, diff --git a/src/Backups/BackupIO_S3.h b/src/Backups/BackupIO_S3.h index 16b2abfea3d..a93d6119786 100644 --- a/src/Backups/BackupIO_S3.h +++ b/src/Backups/BackupIO_S3.h @@ -17,7 +17,7 @@ namespace DB class BackupReaderS3 : public BackupReaderDefault { public: - BackupReaderS3(const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool native_copy, const ContextPtr & context_); + BackupReaderS3(const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool allow_s3_native_copy, const ContextPtr & context_); ~BackupReaderS3() override; bool fileExists(const String & file_name) override; @@ -38,7 +38,7 @@ private: class BackupWriterS3 : public BackupWriterDefault { public: - BackupWriterS3(const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool native_copy, const ContextPtr & context_); + BackupWriterS3(const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool allow_s3_native_copy, const ContextPtr & context_); ~BackupWriterS3() override; bool fileExists(const String & file_name) override; diff --git a/src/Backups/BackupSettings.cpp b/src/Backups/BackupSettings.cpp index 8e9fe7956f9..b6d776d0347 100644 --- a/src/Backups/BackupSettings.cpp +++ b/src/Backups/BackupSettings.cpp @@ -25,7 +25,7 @@ namespace ErrorCodes M(Bool, async) \ M(Bool, decrypt_files_from_encrypted_disks) \ M(Bool, deduplicate_files) \ - M(Bool, native_copy) \ + M(Bool, allow_s3_native_copy) \ M(UInt64, shard_num) \ M(UInt64, replica_num) \ M(Bool, internal) \ diff --git a/src/Backups/BackupSettings.h b/src/Backups/BackupSettings.h index e21b70ee25f..7cec2d9693d 100644 --- a/src/Backups/BackupSettings.h +++ b/src/Backups/BackupSettings.h @@ -39,7 +39,7 @@ struct BackupSettings bool deduplicate_files = true; /// Whether native copy is allowed (optimization for cloud storages, that sometimes could have bugs) - bool native_copy = true; + bool allow_s3_native_copy = true; /// 1-based shard index to store in the backup. 0 means all shards. /// Can only be used with BACKUP ON CLUSTER. diff --git a/src/Backups/BackupsWorker.cpp b/src/Backups/BackupsWorker.cpp index fddd4f34bb6..c08b110075e 100644 --- a/src/Backups/BackupsWorker.cpp +++ b/src/Backups/BackupsWorker.cpp @@ -348,7 +348,7 @@ void BackupsWorker::doBackup( backup_create_params.backup_coordination = backup_coordination; backup_create_params.backup_uuid = backup_settings.backup_uuid; backup_create_params.deduplicate_files = backup_settings.deduplicate_files; - backup_create_params.native_copy = backup_settings.native_copy; + backup_create_params.allow_s3_native_copy = backup_settings.allow_s3_native_copy; BackupMutablePtr backup = BackupFactory::instance().createBackup(backup_create_params); /// Write the backup. @@ -648,7 +648,7 @@ void BackupsWorker::doRestore( backup_open_params.backup_info = backup_info; backup_open_params.base_backup_info = restore_settings.base_backup_info; backup_open_params.password = restore_settings.password; - backup_open_params.native_copy = restore_settings.native_copy; + backup_open_params.allow_s3_native_copy = restore_settings.allow_s3_native_copy; BackupPtr backup = BackupFactory::instance().createBackup(backup_open_params); String current_database = context->getCurrentDatabase(); diff --git a/src/Backups/RestoreSettings.cpp b/src/Backups/RestoreSettings.cpp index 4dd75911a91..2009ca4c1ff 100644 --- a/src/Backups/RestoreSettings.cpp +++ b/src/Backups/RestoreSettings.cpp @@ -161,7 +161,7 @@ namespace M(RestoreAccessCreationMode, create_access) \ M(Bool, allow_unresolved_access_dependencies) \ M(RestoreUDFCreationMode, create_function) \ - M(Bool, native_copy) \ + M(Bool, allow_s3_native_copy) \ M(Bool, internal) \ M(String, host_id) \ M(OptionalUUID, restore_uuid) diff --git a/src/Backups/RestoreSettings.h b/src/Backups/RestoreSettings.h index 59d73c83d69..1861e219dba 100644 --- a/src/Backups/RestoreSettings.h +++ b/src/Backups/RestoreSettings.h @@ -108,7 +108,7 @@ struct RestoreSettings RestoreUDFCreationMode create_function = RestoreUDFCreationMode::kCreateIfNotExists; /// Whether native copy is allowed (optimization for cloud storages, that sometimes could have bugs) - bool native_copy = true; + bool allow_s3_native_copy = true; /// Internal, should not be specified by user. bool internal = false; diff --git a/src/Backups/registerBackupEngineS3.cpp b/src/Backups/registerBackupEngineS3.cpp index ef8ced94590..bd705e4d70f 100644 --- a/src/Backups/registerBackupEngineS3.cpp +++ b/src/Backups/registerBackupEngineS3.cpp @@ -107,12 +107,12 @@ void registerBackupEngineS3(BackupFactory & factory) if (params.open_mode == IBackup::OpenMode::READ) { - auto reader = std::make_shared(S3::URI{s3_uri}, access_key_id, secret_access_key, params.native_copy, params.context); + auto reader = std::make_shared(S3::URI{s3_uri}, access_key_id, secret_access_key, params.allow_s3_native_copy, params.context); return std::make_unique(backup_name_for_logging, archive_params, params.base_backup_info, reader, params.context); } else { - auto writer = std::make_shared(S3::URI{s3_uri}, access_key_id, secret_access_key, params.native_copy, params.context); + auto writer = std::make_shared(S3::URI{s3_uri}, access_key_id, secret_access_key, params.allow_s3_native_copy, params.context); return std::make_unique( backup_name_for_logging, archive_params, diff --git a/tests/queries/0_stateless/02801_backup_native_copy.reference b/tests/queries/0_stateless/02801_backup_native_copy.reference index 659df5e9b25..f9b008cde2e 100644 --- a/tests/queries/0_stateless/02801_backup_native_copy.reference +++ b/tests/queries/0_stateless/02801_backup_native_copy.reference @@ -1,4 +1,4 @@ -BACKUP TABLE data TO S3(s3_conn, \'backups/default/data_native_copy\') SETTINGS native_copy = 1 1 -BACKUP TABLE data TO S3(s3_conn, \'backups/default/data_no_native_copy\') SETTINGS native_copy = 0 0 -RESTORE TABLE data AS data_native_copy FROM S3(s3_conn, \'backups/default/data_native_copy\') SETTINGS native_copy = 1 1 -RESTORE TABLE data AS data_no_native_copy FROM S3(s3_conn, \'backups/default/data_no_native_copy\') SETTINGS native_copy = 0 0 +BACKUP TABLE data TO S3(s3_conn, \'backups/default/data_native_copy\') SETTINGS allow_s3_native_copy = 1 1 +BACKUP TABLE data TO S3(s3_conn, \'backups/default/data_no_native_copy\') SETTINGS allow_s3_native_copy = 0 0 +RESTORE TABLE data AS data_native_copy FROM S3(s3_conn, \'backups/default/data_native_copy\') SETTINGS allow_s3_native_copy = 1 1 +RESTORE TABLE data AS data_no_native_copy FROM S3(s3_conn, \'backups/default/data_no_native_copy\') SETTINGS allow_s3_native_copy = 0 0 diff --git a/tests/queries/0_stateless/02801_backup_native_copy.sh b/tests/queries/0_stateless/02801_backup_native_copy.sh index 966d7ae9ce8..015dcb19b82 100755 --- a/tests/queries/0_stateless/02801_backup_native_copy.sh +++ b/tests/queries/0_stateless/02801_backup_native_copy.sh @@ -15,28 +15,28 @@ $CLICKHOUSE_CLIENT -nm -q " " query_id=$(random_str 10) -$CLICKHOUSE_CLIENT --format Null --query_id $query_id -q "BACKUP TABLE data TO S3(s3_conn, 'backups/$CLICKHOUSE_DATABASE/data_native_copy') SETTINGS native_copy=true" +$CLICKHOUSE_CLIENT --format Null --query_id $query_id -q "BACKUP TABLE data TO S3(s3_conn, 'backups/$CLICKHOUSE_DATABASE/data_native_copy') SETTINGS allow_s3_native_copy=true" $CLICKHOUSE_CLIENT -nm -q " SYSTEM FLUSH LOGS; SELECT query, ProfileEvents['S3CopyObject']>0 FROM system.query_log WHERE type = 'QueryFinish' AND event_date >= yesterday() AND current_database = '$CLICKHOUSE_DATABASE' AND query_id = '$query_id' " query_id=$(random_str 10) -$CLICKHOUSE_CLIENT --format Null --query_id $query_id -q "BACKUP TABLE data TO S3(s3_conn, 'backups/$CLICKHOUSE_DATABASE/data_no_native_copy') SETTINGS native_copy=false" +$CLICKHOUSE_CLIENT --format Null --query_id $query_id -q "BACKUP TABLE data TO S3(s3_conn, 'backups/$CLICKHOUSE_DATABASE/data_no_native_copy') SETTINGS allow_s3_native_copy=false" $CLICKHOUSE_CLIENT -nm -q " SYSTEM FLUSH LOGS; SELECT query, ProfileEvents['S3CopyObject']>0 FROM system.query_log WHERE type = 'QueryFinish' AND event_date >= yesterday() AND current_database = '$CLICKHOUSE_DATABASE' AND query_id = '$query_id' " query_id=$(random_str 10) -$CLICKHOUSE_CLIENT --send_logs_level=error --format Null --query_id $query_id -q "RESTORE TABLE data AS data_native_copy FROM S3(s3_conn, 'backups/$CLICKHOUSE_DATABASE/data_native_copy') SETTINGS native_copy=true" +$CLICKHOUSE_CLIENT --send_logs_level=error --format Null --query_id $query_id -q "RESTORE TABLE data AS data_native_copy FROM S3(s3_conn, 'backups/$CLICKHOUSE_DATABASE/data_native_copy') SETTINGS allow_s3_native_copy=true" $CLICKHOUSE_CLIENT -nm -q " SYSTEM FLUSH LOGS; SELECT query, ProfileEvents['S3CopyObject']>0 FROM system.query_log WHERE type = 'QueryFinish' AND event_date >= yesterday() AND current_database = '$CLICKHOUSE_DATABASE' AND query_id = '$query_id' " query_id=$(random_str 10) -$CLICKHOUSE_CLIENT --send_logs_level=error --format Null --query_id $query_id -q "RESTORE TABLE data AS data_no_native_copy FROM S3(s3_conn, 'backups/$CLICKHOUSE_DATABASE/data_no_native_copy') SETTINGS native_copy=false" +$CLICKHOUSE_CLIENT --send_logs_level=error --format Null --query_id $query_id -q "RESTORE TABLE data AS data_no_native_copy FROM S3(s3_conn, 'backups/$CLICKHOUSE_DATABASE/data_no_native_copy') SETTINGS allow_s3_native_copy=false" $CLICKHOUSE_CLIENT -nm -q " SYSTEM FLUSH LOGS; SELECT query, ProfileEvents['S3CopyObject']>0 FROM system.query_log WHERE type = 'QueryFinish' AND event_date >= yesterday() AND current_database = '$CLICKHOUSE_DATABASE' AND query_id = '$query_id' From 1844ac37d76ac1a660681acb6b79af8af860d5ff Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 28 Jun 2023 19:12:52 +0200 Subject: [PATCH 379/522] Remove create_read_buffer argument for copyS3File() Signed-off-by: Azat Khuzhin --- src/Backups/BackupIO_S3.cpp | 15 --------------- src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp | 14 ++------------ src/IO/S3/copyS3File.cpp | 7 ++++++- src/IO/S3/copyS3File.h | 6 +++--- 4 files changed, 11 insertions(+), 31 deletions(-) diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp index 60fea9e2008..d487ec6e80e 100644 --- a/src/Backups/BackupIO_S3.cpp +++ b/src/Backups/BackupIO_S3.cpp @@ -152,13 +152,7 @@ void BackupReaderS3::copyFileToDisk(const String & path_in_backup, size_t file_s "Blob writing function called with unexpected blob_path.size={} or mode={}", blob_path.size(), mode); - auto create_read_buffer = [this, path_in_backup] - { - return readFile(path_in_backup); - }; - copyS3File( - create_read_buffer, client, s3_uri.bucket, fs::path(s3_uri.key) / path_in_backup, @@ -208,17 +202,8 @@ void BackupWriterS3::copyFileFromDisk(const String & path_in_backup, DiskPtr src /// In this case we can't use the native copy. if (auto blob_path = src_disk->getBlobPath(src_path); blob_path.size() == 2) { - auto create_read_buffer = [src_disk, src_path, copy_encrypted, settings = read_settings.adjustBufferSize(start_pos + length)] - { - if (copy_encrypted) - return src_disk->readEncryptedFile(src_path, settings); - else - return src_disk->readFile(src_path, settings); - }; - LOG_TRACE(log, "Copying file {} from disk {} to S3", src_path, src_disk->getName()); copyS3File( - create_read_buffer, client, /* src_bucket */ blob_path[1], /* src_key= */ blob_path[0], diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index 3c19af188dc..e46ca3d0828 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -435,12 +435,7 @@ void S3ObjectStorage::copyObjectToAnotherObjectStorage( // NOLINT auto settings_ptr = s3_settings.get(); auto size = S3::getObjectSize(*client_ptr, bucket, object_from.remote_path, {}, settings_ptr->request_settings, /* for_disk_s3= */ true); auto scheduler = threadPoolCallbackRunner(getThreadPoolWriter(), "S3ObjStor_copy"); - auto create_read_buffer = [this, object_from] - { - return readObject(object_from); - }; - - copyS3File(create_read_buffer, client_ptr, bucket, object_from.remote_path, 0, size, dest_s3->bucket, object_to.remote_path, + copyS3File(client_ptr, bucket, object_from.remote_path, 0, size, dest_s3->bucket, object_to.remote_path, settings_ptr->request_settings, object_to_attributes, scheduler, /* for_disk_s3= */ true); } else @@ -456,12 +451,7 @@ void S3ObjectStorage::copyObject( // NOLINT auto settings_ptr = s3_settings.get(); auto size = S3::getObjectSize(*client_ptr, bucket, object_from.remote_path, {}, settings_ptr->request_settings, /* for_disk_s3= */ true); auto scheduler = threadPoolCallbackRunner(getThreadPoolWriter(), "S3ObjStor_copy"); - auto create_read_buffer = [this, object_from] - { - return readObject(object_from); - }; - - copyS3File(create_read_buffer, client_ptr, bucket, object_from.remote_path, 0, size, bucket, object_to.remote_path, + copyS3File(client_ptr, bucket, object_from.remote_path, 0, size, bucket, object_to.remote_path, settings_ptr->request_settings, object_to_attributes, scheduler, /* for_disk_s3= */ true); } diff --git a/src/IO/S3/copyS3File.cpp b/src/IO/S3/copyS3File.cpp index 2c6557d97e7..2de2ccd0f9f 100644 --- a/src/IO/S3/copyS3File.cpp +++ b/src/IO/S3/copyS3File.cpp @@ -810,7 +810,6 @@ void copyDataToS3File( void copyS3File( - const CreateReadBuffer & create_read_buffer, const std::shared_ptr & s3_client, const String & src_bucket, const String & src_key, @@ -829,7 +828,13 @@ void copyS3File( helper.performCopy(); } else + { + auto create_read_buffer = [&] + { + return std::make_unique(s3_client, src_bucket, src_key, "", settings, Context::getGlobalContextInstance()->getReadSettings()); + }; copyDataToS3File(create_read_buffer, src_offset, src_size, s3_client, dest_bucket, dest_key, settings, object_metadata, schedule, for_disk_s3); + } } } diff --git a/src/IO/S3/copyS3File.h b/src/IO/S3/copyS3File.h index 2c848076e9b..5d35e5ebe2d 100644 --- a/src/IO/S3/copyS3File.h +++ b/src/IO/S3/copyS3File.h @@ -23,10 +23,10 @@ using CreateReadBuffer = std::function()>; /// The parameters `src_offset` and `src_size` specify a part in the source to copy. /// /// Note, that it tries to copy file using native copy (CopyObject), but if it -/// has been disabled (with settings.allow_native_copy) it is fallbacks to -/// read-write copy (copyDataToS3File()). +/// has been disabled (with settings.allow_native_copy) or request failed +/// because it is a known issue, it is fallbacks to read-write copy +/// (copyDataToS3File()). void copyS3File( - const CreateReadBuffer & create_read_buffer, const std::shared_ptr & s3_client, const String & src_bucket, const String & src_key, From b95836363085160a20bddfceaaf0709a0e721870 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 8 Jul 2023 14:32:34 +0200 Subject: [PATCH 380/522] tests: temporary fix the 02802_clickhouse_disks_s3_copy In #51135 the behavior of the `clickhouse-disks copy` had been changed, let's temporary update the test (and continue discussion about this change in that PR). Signed-off-by: Azat Khuzhin --- tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.sh b/tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.sh index f879b7a5621..33321607728 100755 --- a/tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.sh +++ b/tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.sh @@ -19,6 +19,8 @@ function run_test_for_disk() grep -o -e "Single part upload has completed." -e "Single operation copy has completed." } clickhouse-disks -C "$config" --disk "$disk" remove $CLICKHOUSE_DATABASE/test + # NOTE: this is due to "copy" does works like "cp -R from to/" instead of "cp from to" + clickhouse-disks -C "$config" --disk "$disk" remove $CLICKHOUSE_DATABASE/test.copy/test clickhouse-disks -C "$config" --disk "$disk" remove $CLICKHOUSE_DATABASE/test.copy } From ac972661f9718b9d15e5bb49c63b2dff7d296fe3 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 9 Jul 2023 08:21:18 +0200 Subject: [PATCH 381/522] Add exclusion for "API mode: {}" from S3 in 00002_log_and_exception_messages_formatting Signed-off-by: Azat Khuzhin --- .../0_stateless/00002_log_and_exception_messages_formatting.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql index acb6117f937..86fe01dc0e3 100644 --- a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql +++ b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql @@ -36,7 +36,7 @@ create temporary table known_short_messages (s String) as select * from (select 'Database {} doesn''t exist', 'Dictionary ({}) not found', 'Unknown table function {}', 'Unknown format {}', 'Unknown explain kind ''{}''', 'Unknown setting {}', 'Unknown input format {}', 'Unknown identifier: ''{}''', 'User name is empty', 'Expected function, got: {}', -'Attempt to read after eof', 'String size is too big ({}), maximum: {}' +'Attempt to read after eof', 'String size is too big ({}), maximum: {}', 'API mode: {}' ] as arr) array join arr; -- Check that we don't have too many short meaningless message patterns. From d52041345401bdd1a02c2482546da2d5c21793cb Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 9 Jul 2023 09:20:03 +0200 Subject: [PATCH 382/522] Cleanup SymbolIndex after reload got removed Remove MultiVersion for SymbolIndex structure since after #51873 it is useless. Follow-up for: #51873 Signed-off-by: Azat Khuzhin --- src/Common/StackTrace.cpp | 6 ++---- src/Common/SymbolIndex.cpp | 11 +++-------- src/Common/SymbolIndex.h | 9 +++------ src/Common/examples/symbol_index.cpp | 3 +-- src/Common/getResource.cpp | 2 +- src/Daemon/BaseDaemon.cpp | 2 +- src/Daemon/SentryWriter.cpp | 2 +- src/Functions/addressToLine.h | 3 +-- src/Functions/addressToSymbol.cpp | 3 +-- src/Functions/serverConstants.cpp | 2 +- src/Interpreters/CrashLog.cpp | 2 +- 11 files changed, 16 insertions(+), 29 deletions(-) diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp index c13b63854e4..b323f1e4363 100644 --- a/src/Common/StackTrace.cpp +++ b/src/Common/StackTrace.cpp @@ -208,8 +208,7 @@ void StackTrace::symbolize( const StackTrace::FramePointers & frame_pointers, [[maybe_unused]] size_t offset, size_t size, StackTrace::Frames & frames) { #if defined(__ELF__) && !defined(OS_FREEBSD) - auto symbol_index_ptr = DB::SymbolIndex::instance(); - const DB::SymbolIndex & symbol_index = *symbol_index_ptr; + const DB::SymbolIndex & symbol_index = DB::SymbolIndex::instance(); std::unordered_map dwarfs; for (size_t i = 0; i < offset; ++i) @@ -341,8 +340,7 @@ toStringEveryLineImpl([[maybe_unused]] bool fatal, const StackTraceRefTriple & s using enum DB::Dwarf::LocationInfoMode; const auto mode = fatal ? FULL_WITH_INLINE : FAST; - auto symbol_index_ptr = DB::SymbolIndex::instance(); - const DB::SymbolIndex & symbol_index = *symbol_index_ptr; + const DB::SymbolIndex & symbol_index = DB::SymbolIndex::instance(); std::unordered_map dwarfs; for (size_t i = stack_trace.offset; i < stack_trace.size; ++i) diff --git a/src/Common/SymbolIndex.cpp b/src/Common/SymbolIndex.cpp index 4c7f3827125..cb02bb3ff75 100644 --- a/src/Common/SymbolIndex.cpp +++ b/src/Common/SymbolIndex.cpp @@ -509,7 +509,7 @@ const T * find(const void * address, const std::vector & vec) } -void SymbolIndex::update() +void SymbolIndex::load() { dl_iterate_phdr(collectSymbols, &data); @@ -549,17 +549,12 @@ String SymbolIndex::getBuildIDHex() const return build_id_hex; } -MultiVersion & SymbolIndex::instanceImpl() +const SymbolIndex & SymbolIndex::instance() { - static MultiVersion instance(std::unique_ptr(new SymbolIndex)); + static SymbolIndex instance; return instance; } -MultiVersion::Version SymbolIndex::instance() -{ - return instanceImpl().get(); -} - } #endif diff --git a/src/Common/SymbolIndex.h b/src/Common/SymbolIndex.h index 773f59b7914..4fd108434d5 100644 --- a/src/Common/SymbolIndex.h +++ b/src/Common/SymbolIndex.h @@ -8,8 +8,6 @@ #include #include -#include - namespace DB { @@ -20,10 +18,10 @@ namespace DB class SymbolIndex : private boost::noncopyable { protected: - SymbolIndex() { update(); } + SymbolIndex() { load(); } public: - static MultiVersion::Version instance(); + static const SymbolIndex & instance(); struct Symbol { @@ -89,8 +87,7 @@ public: private: Data data; - void update(); - static MultiVersion & instanceImpl(); + void load(); }; } diff --git a/src/Common/examples/symbol_index.cpp b/src/Common/examples/symbol_index.cpp index 13a49fd65ad..ca9c26f27d6 100644 --- a/src/Common/examples/symbol_index.cpp +++ b/src/Common/examples/symbol_index.cpp @@ -22,8 +22,7 @@ int main(int argc, char ** argv) return 1; } - auto symbol_index_ptr = SymbolIndex::instance(); - const SymbolIndex & symbol_index = *symbol_index_ptr; + const SymbolIndex & symbol_index = SymbolIndex::instance(); for (const auto & elem : symbol_index.symbols()) std::cout << elem.name << ": " << elem.address_begin << " ... " << elem.address_end << "\n"; diff --git a/src/Common/getResource.cpp b/src/Common/getResource.cpp index fe603fcc550..72ba24c2f44 100644 --- a/src/Common/getResource.cpp +++ b/src/Common/getResource.cpp @@ -16,7 +16,7 @@ std::string_view getResource(std::string_view name) #if defined USE_MUSL /// If static linking is used, we cannot use dlsym and have to parse ELF symbol table by ourself. - return DB::SymbolIndex::instance()->getResource(name_replaced); + return DB::SymbolIndex::instance().getResource(name_replaced); #else // In most `dlsym(3)` APIs, one passes the symbol name as it appears via diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index bf6c3b4cdcf..319d2bc8b5b 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -986,7 +986,7 @@ void BaseDaemon::initializeTerminationAndSignalProcessing() signal_listener_thread.start(*signal_listener); #if defined(__ELF__) && !defined(OS_FREEBSD) - String build_id_hex = SymbolIndex::instance()->getBuildIDHex(); + String build_id_hex = SymbolIndex::instance().getBuildIDHex(); if (build_id_hex.empty()) build_id = ""; else diff --git a/src/Daemon/SentryWriter.cpp b/src/Daemon/SentryWriter.cpp index 041d3292841..e38d339d088 100644 --- a/src/Daemon/SentryWriter.cpp +++ b/src/Daemon/SentryWriter.cpp @@ -150,7 +150,7 @@ void SentryWriter::onFault(int sig, const std::string & error_message, const Sta sentry_set_extra("signal_number", sentry_value_new_int32(sig)); #if defined(__ELF__) && !defined(OS_FREEBSD) - const String & build_id_hex = DB::SymbolIndex::instance()->getBuildIDHex(); + const String & build_id_hex = DB::SymbolIndex::instance().getBuildIDHex(); sentry_set_tag("build_id", build_id_hex.c_str()); #endif diff --git a/src/Functions/addressToLine.h b/src/Functions/addressToLine.h index 1410e55d9a9..5c1611fe173 100644 --- a/src/Functions/addressToLine.h +++ b/src/Functions/addressToLine.h @@ -90,8 +90,7 @@ protected: ResultT impl(uintptr_t addr) const { - auto symbol_index_ptr = SymbolIndex::instance(); - const SymbolIndex & symbol_index = *symbol_index_ptr; + const SymbolIndex & symbol_index = SymbolIndex::instance(); if (const auto * object = symbol_index.findObject(reinterpret_cast(addr))) { diff --git a/src/Functions/addressToSymbol.cpp b/src/Functions/addressToSymbol.cpp index 95d57f6d296..cc5ad4c4fdf 100644 --- a/src/Functions/addressToSymbol.cpp +++ b/src/Functions/addressToSymbol.cpp @@ -68,8 +68,7 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { - auto symbol_index_ptr = SymbolIndex::instance(); - const SymbolIndex & symbol_index = *symbol_index_ptr; + const SymbolIndex & symbol_index = SymbolIndex::instance(); const ColumnPtr & column = arguments[0].column; const ColumnUInt64 * column_concrete = checkAndGetColumn(column.get()); diff --git a/src/Functions/serverConstants.cpp b/src/Functions/serverConstants.cpp index 0fda53414de..4294f97d771 100644 --- a/src/Functions/serverConstants.cpp +++ b/src/Functions/serverConstants.cpp @@ -27,7 +27,7 @@ namespace public: static constexpr auto name = "buildId"; static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } - explicit FunctionBuildId(ContextPtr context) : FunctionConstantBase(SymbolIndex::instance()->getBuildIDHex(), context->isDistributed()) {} + explicit FunctionBuildId(ContextPtr context) : FunctionConstantBase(SymbolIndex::instance().getBuildIDHex(), context->isDistributed()) {} }; #endif diff --git a/src/Interpreters/CrashLog.cpp b/src/Interpreters/CrashLog.cpp index f1f0ffb6f60..08c08ffecd1 100644 --- a/src/Interpreters/CrashLog.cpp +++ b/src/Interpreters/CrashLog.cpp @@ -52,7 +52,7 @@ void CrashLogElement::appendToBlock(MutableColumns & columns) const String build_id_hex; #if defined(__ELF__) && !defined(OS_FREEBSD) - build_id_hex = SymbolIndex::instance()->getBuildIDHex(); + build_id_hex = SymbolIndex::instance().getBuildIDHex(); #endif columns[i++]->insert(build_id_hex); } From 3c18a181c997f1f43e759d72eeadcc5d4f35142d Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 9 Jul 2023 13:54:18 +0200 Subject: [PATCH 383/522] Fix using of pools from the main thread Otherwise it is not possible to use clickhouse-disks with S3: $ clickhouse-disks -C /src/ch/clickhouse/tests/queries/0_stateless/02802_clickhouse_disks_s3_copy.xml --log-level test --disk s3_plain_native_copy copy default/test default/test.copy Failed to make request to: http://localhost:11111/test?list-type=2&max-keys=1&prefix=clickhouse-disks%2Fdefault%2Ftest.copy: Code: 49. DB::Exception: current_thread is not initialized. (LOGICAL_ERROR), Stack trace (when copying this message, always include the lines below): 0. ./.cmake-llvm16/./contrib/llvm-project/libcxx/include/exception:134: Poco::Exception::Exception(String const&, int) @ 0x000000001ad7c872 in /src/ch/clickhouse/.cmake-llvm16/programs/clickhouse 1. ./.cmake-llvm16/./src/Common/Exception.cpp:94: DB::Exception::Exception(DB::Exception::MessageMasked&&, int, bool) @ 0x0000000011e2c4b7 in /src/ch/clickhouse/.cmake-llvm16/programs/clickhouse 2. ./.cmake-llvm16/./contrib/llvm-project/libcxx/include/string:1499: DB::Exception::Exception(int, char const (&) [34]) @ 0x000000000d341e58 in /src/ch/clickhouse/.cmake-llvm16/programs/clickhouse 3. ./.cmake-llvm16/./src/Common/MemoryTrackerSwitcher.h:19: DB::(anonymous namespace)::SingleEndpointHTTPSessionPool::allocObject() @ 0x0000000012010e5a in /src/ch/clickhouse/.cmake-llvm16/programs/clickhouse 4. ./.cmake-llvm16/./src/Common/PoolBase.h:174: PoolBase::get(long) @ 0x0000000012011a6f in /src/ch/clickhouse/.cmake-llvm16/programs/clickhouse 5. ./.cmake-llvm16/./contrib/llvm-project/libcxx/include/variant:797: DB::makePooledHTTPSession(Poco::URI const&, Poco::URI const&, DB::ConnectionTimeouts const&, unsigned long, bool, bool) @ 0x000000001200ec69 in /src/ch/clickhouse/.cmake-llvm16/programs/clickhouse 6. ./.cmake-llvm16/./src/IO/HTTPCommon.cpp:0: DB::makePooledHTTPSession(Poco::URI const&, DB::ConnectionTimeouts const&, unsigned long, bool, bool) @ 0x000000001200d909 in /src/ch/clickhouse/.cmake-llvm16/programs/clickhouse 7. ./.cmake-llvm16/./contrib/llvm-project/libcxx/include/variant:797: void DB::S3::PocoHTTPClient::makeRequestInternalImpl(Aws::Http::HttpRequest&, DB::S3::ClientConfigurationPerRequest const&, std::shared_ptr&, Aws::Utils::RateLimits::RateLimiterInterface*, Aws::Utils::RateLimits::RateLimiterInterface*) const @ 0x00000000163f5157 in /src/ch/clickhouse/.cmake-llvm16/programs/clickhouse 8. ./.cmake-llvm16/./contrib/llvm-project/libcxx/include/string:1499: DB::S3::PocoHTTPClient::makeRequestInternal(Aws::Http::HttpRequest&, std::shared_ptr&, Aws::Utils::RateLimits::RateLimiterInterface*, Aws::Utils::RateLimits::RateLimiterInterface*) const @ 0x00000000163f465d in /src/ch/clickhouse/.cmake-llvm16/programs/clickhouse 9. ./.cmake-llvm16/./contrib/llvm-project/libcxx/include/__memory/shared_ptr.h:622: DB::S3::PocoHTTPClient::MakeRequest(std::shared_ptr const&, Aws::Utils::RateLimits::RateLimiterInterface*, Aws::Utils::RateLimits::RateLimiterInterface*) const @ 0x00000000163f4454 in /src/ch/clickhouse/.cmake-llvm16/programs/clickhouse 10. ./.cmake-llvm16/./contrib/aws/src/aws-cpp-sdk-core/source/client/AWSClient.cpp:506: Aws::Client::AWSClient::AttemptOneRequest(std::shared_ptr const&, Aws::AmazonWebServiceRequest const&, char const*, char const*, char const*) const @ 0x000000001ae2a922 in /src/ch/clickhouse/.cmake-llvm16/programs/clickhouse 11. ./.cmake-llvm16/./contrib/llvm-project/libcxx/include/__memory/shared_ptr.h:612: Aws::Client::AWSClient::AttemptExhaustively(Aws::Http::URI const&, Aws::AmazonWebServiceRequest const&, Aws::Http::HttpMethod, char const*, char const*, char const*) const @ 0x000000001ae28299 in /src/ch/clickhouse/.cmake-llvm16/programs/clickhouse 12. ./.cmake-llvm16/./contrib/aws/src/aws-cpp-sdk-core/include/aws/core/utils/Outcome.h:160: Aws::Client::AWSXMLClient::MakeRequest(Aws::Http::URI const&, Aws::AmazonWebServiceRequest const&, Aws::Http::HttpMethod, char const*, char const*, char const*) const @ 0x000000001ae3c9ed in /src/ch/clickhouse/.cmake-llvm16/programs/clickhouse 13. ./.cmake-llvm16/./contrib/aws/src/aws-cpp-sdk-core/source/client/AWSXmlClient.cpp:66: Aws::Client::AWSXMLClient::MakeRequest(Aws::AmazonWebServiceRequest const&, Aws::Endpoint::AWSEndpoint const&, Aws::Http::HttpMethod, char const*, char const*, char const*) const @ 0x000000001ae3c995 in /src/ch/clickhouse/.cmake-llvm16/programs/clickhouse 14. ./.cmake-llvm16/./contrib/aws/generated/src/aws-cpp-sdk-s3/source/S3Client.cpp:0: Aws::S3::S3Client::ListObjectsV2(Aws::S3::Model::ListObjectsV2Request const&) const @ 0x000000001aee6666 in /src/ch/clickhouse/.cmake-llvm16/programs/clickhouse 15. ./.cmake-llvm16/./contrib/aws/src/aws-cpp-sdk-core/include/aws/core/utils/Outcome.h:160: DB::S3::Client::ListObjectsV2(DB::S3::ExtendedRequest const&) const @ 0x00000000163cee42 in /src/ch/clickhouse/.cmake-llvm16/programs/clickhouse 16. ./.cmake-llvm16/./contrib/aws/src/aws-cpp-sdk-core/include/aws/core/utils/Outcome.h:120: DB::S3ObjectStorage::listObjects(String const&, std::vector>&, int) const @ 0x0000000016b582e2 in /src/ch/clickhouse/.cmake-llvm16/programs/clickhouse 17. ./.cmake-llvm16/./contrib/llvm-project/libcxx/include/vector:543: DB::IObjectStorage::existsOrHasAnyChild(String const&) const @ 0x000000001644ebe9 in /src/ch/clickhouse/.cmake-llvm16/programs/clickhouse 18. ./.cmake-llvm16/./src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp:0: DB::MetadataStorageFromPlainObjectStorage::exists(String const&) const @ 0x0000000016b54a64 in /src/ch/clickhouse/.cmake-llvm16/programs/clickhouse 19. ./.cmake-llvm16/./src/Disks/IDisk.cpp:145: DB::IDisk::copyDirectoryContent(String const&, std::shared_ptr const&, String const&) @ 0x0000000016b38fa0 in /src/ch/clickhouse/.cmake-llvm16/programs/clickhouse 20. ./.cmake-llvm16/./contrib/llvm-project/libcxx/include/string:1499: DB::CommandCopy::execute(std::vector> const&, std::shared_ptr&, Poco::Util::LayeredConfiguration&) @ 0x0000000012050403 in /src/ch/clickhouse/.cmake-llvm16/programs/clickhouse 21. ./.cmake-llvm16/./contrib/llvm-project/libcxx/include/vector:434: DB::DisksApp::main(std::vector> const&) @ 0x000000001204bf02 in /src/ch/clickhouse/.cmake-llvm16/programs/clickhouse 22. ./.cmake-llvm16/./base/poco/Util/src/Application.cpp:0: Poco::Util::Application::run() @ 0x000000001ac7a666 in /src/ch/clickhouse/.cmake-llvm16/programs/clickhouse 23. ./.cmake-llvm16/./programs/disks/DisksApp.cpp:0: mainEntryClickHouseDisks(int, char**) @ 0x000000001204c550 in /src/ch/clickhouse/.cmake-llvm16/programs/clickhouse 24. ./.cmake-llvm16/./programs/main.cpp:0: main @ 0x000000000cfbadc4 in /src/ch/clickhouse/.cmake-llvm16/programs/clickhouse 25. ? @ 0x00007ffff7dc9850 in ? 26. __libc_start_main @ 0x00007ffff7dc990a in ? 27. _start @ 0x000000000cfba1ee in /src/ch/clickhouse/.cmake-llvm16/programs/clickhouse (version 23.7.1.1) AWSXmlClient: HTTP response code: -1 Resolved remote host IP address: Request ID: Exception name: Error message: Code: 49. DB::Exception: current_thread is not initialized. (LOGICAL_ERROR) (version 23.7.1.1) 0 response headers: If the signature check failed. This could be because of a time skew. Attempting to adjust the signer. Request failed, now waiting 1600 ms before attempting again. Signed-off-by: Azat Khuzhin --- src/Common/MemoryTrackerSwitcher.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/Common/MemoryTrackerSwitcher.h b/src/Common/MemoryTrackerSwitcher.h index 0fefcbb280a..3c99fd12353 100644 --- a/src/Common/MemoryTrackerSwitcher.h +++ b/src/Common/MemoryTrackerSwitcher.h @@ -6,17 +6,13 @@ namespace DB { -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - struct MemoryTrackerSwitcher { explicit MemoryTrackerSwitcher(MemoryTracker * new_tracker) { + /// current_thread is not initialized for the main thread, so simply do not switch anything if (!current_thread) - throw Exception(ErrorCodes::LOGICAL_ERROR, "current_thread is not initialized"); + return; auto * thread_tracker = CurrentThread::getMemoryTracker(); prev_untracked_memory = current_thread->untracked_memory; @@ -28,6 +24,10 @@ struct MemoryTrackerSwitcher ~MemoryTrackerSwitcher() { + /// current_thread is not initialized for the main thread, so simply do not switch anything + if (!current_thread) + return; + CurrentThread::flushUntrackedMemory(); auto * thread_tracker = CurrentThread::getMemoryTracker(); @@ -35,6 +35,7 @@ struct MemoryTrackerSwitcher thread_tracker->setParent(prev_memory_tracker_parent); } +private: MemoryTracker * prev_memory_tracker_parent = nullptr; Int64 prev_untracked_memory = 0; }; From 3b954a2952477bee203a5e00c2cbb9f6a50ae274 Mon Sep 17 00:00:00 2001 From: Konstantin Ilchenko Date: Sun, 9 Jul 2023 14:38:16 +0200 Subject: [PATCH 384/522] [DOCS] Add REMOVE SAMPLE BY to docs --- .../statements/alter/sample-by.md | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/docs/en/sql-reference/statements/alter/sample-by.md b/docs/en/sql-reference/statements/alter/sample-by.md index b20f3c7b5d3..ccad792f853 100644 --- a/docs/en/sql-reference/statements/alter/sample-by.md +++ b/docs/en/sql-reference/statements/alter/sample-by.md @@ -5,15 +5,28 @@ sidebar_label: SAMPLE BY title: "Manipulating Sampling-Key Expressions" --- -Syntax: +# Manipulating SAMPLE BY expression + +The following operations are available: + +## MODIFY ``` sql ALTER TABLE [db].name [ON CLUSTER cluster] MODIFY SAMPLE BY new_expression ``` -The command changes the [sampling key](../../../engines/table-engines/mergetree-family/mergetree.md) of the table to `new_expression` (an expression or a tuple of expressions). +The command changes the [sampling key](../../../engines/table-engines/mergetree-family/mergetree.md) of the table to `new_expression` (an expression or a tuple of expressions). The primary key must contain the new sample key. -The command is lightweight in the sense that it only changes metadata. The primary key must contain the new sample key. +## REMOVE + +``` sql +ALTER TABLE [db].name [ON CLUSTER cluster] REMOVE SAMPLE BY +``` + +The command removes the [sampling key](../../../engines/table-engines/mergetree-family/mergetree.md) of the table. + + +The commands `MODIFY` and `REMOVE` are lightweight in the sense that they only change metadata or remove files. :::note It only works for tables in the [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) family (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) tables). From 2db092f9d82537e7bac4f31568a0d1c21dbc5799 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 8 Jul 2023 21:06:00 +0200 Subject: [PATCH 385/522] Cleanup remote_servers in dist config.xml At first, there was no such amount of clusters in dist config, they added when someone need to write some new cluster for tests. So let's move them to the clusters.xml that is deployed only for tests, and leave only default cluster. And cleanup also some configs that had been copied from dist config in the repo (about test_config_* integration tests, this should be OK, since there are more_clusters.xml as well, that covers additional cases). Signed-off-by: Azat Khuzhin --- .../internal/platform/data/file_test.go | 4 +- .../testdata/configs/xml/config.xml | 67 -------- .../testdata/configs/yaml/config.yaml | 40 ----- .../testdata/configs/yandex_xml/config.xml | 67 -------- programs/server/config.d/more_clusters.xml | 49 ------ programs/server/config.xml | 155 +---------------- programs/server/config.yaml.example | 44 +---- tests/config/config.d/clusters.xml | 157 ++++++++++++++++++ .../configs/config.xml | 38 ----- .../test_config_xml_full/configs/config.xml | 85 ---------- .../test_config_xml_main/configs/config.xml | 67 -------- .../configs/config.xml | 67 -------- .../test_config_yaml_full/configs/config.yaml | 46 ----- .../test_config_yaml_main/configs/config.yaml | 40 ----- .../configs/disable_lazy_load.xml | 12 +- .../configs/overrides.xml | 12 ++ .../test_dictionaries_dependency/test.py | 8 +- ...torage_configuration.xml => overrides.xml} | 17 ++ .../test.py | 2 +- .../configs/macros.xml | 1 - .../test_https_replication/configs/config.xml | 25 --- .../{named_collections.xml => overrides.xml} | 12 ++ .../test_mask_sensitive_info/test.py | 2 +- .../configs/config.d/remote_servers.xml | 14 ++ .../test_storage_hdfs/configs/cluster.xml | 15 ++ .../test_storage_url/configs/conf.xml | 34 ++++ utils/clickhouse-diagnostics/README.md | 75 --------- 27 files changed, 289 insertions(+), 866 deletions(-) delete mode 100644 programs/server/config.d/more_clusters.xml create mode 100644 tests/integration/test_dictionaries_dependency/configs/overrides.xml rename tests/integration/test_distributed_storage_configuration/configs/config.d/{storage_configuration.xml => overrides.xml} (54%) rename tests/integration/test_mask_sensitive_info/configs/{named_collections.xml => overrides.xml} (65%) diff --git a/programs/diagnostics/internal/platform/data/file_test.go b/programs/diagnostics/internal/platform/data/file_test.go index 938c34281f1..5df1f8cc359 100644 --- a/programs/diagnostics/internal/platform/data/file_test.go +++ b/programs/diagnostics/internal/platform/data/file_test.go @@ -135,7 +135,7 @@ func TestConfigFileFrameCopy(t *testing.T) { sizes := map[string]int64{ "users.xml": int64(2017), "default-password.xml": int64(188), - "config.xml": int64(61662), + "config.xml": int64(59506), "server-include.xml": int64(168), "user-include.xml": int64(559), } @@ -189,7 +189,7 @@ func TestConfigFileFrameCopy(t *testing.T) { sizes := map[string]int64{ "users.yaml": int64(1023), "default-password.yaml": int64(132), - "config.yaml": int64(42512), + "config.yaml": int64(41633), "server-include.yaml": int64(21), "user-include.yaml": int64(120), } diff --git a/programs/diagnostics/testdata/configs/xml/config.xml b/programs/diagnostics/testdata/configs/xml/config.xml index 21a0821f89d..c08b0b2970f 100644 --- a/programs/diagnostics/testdata/configs/xml/config.xml +++ b/programs/diagnostics/testdata/configs/xml/config.xml @@ -649,73 +649,6 @@ - - - - localhost - 9000 - - - - - localhost - 9000 - - - - - - - 127.0.0.1 - 9000 - - - - - 127.0.0.2 - 9000 - - - - - - true - - 127.0.0.1 - 9000 - - - - true - - 127.0.0.2 - 9000 - - - - - - - localhost - 9440 - 1 - - - - - - - localhost - 9000 - - - - - localhost - 1 - - - - + + + - - - - false - - 127.0.0.1 - 9000 - - - 127.0.0.2 - 9000 - - - 127.0.0.3 - 9000 - - - - - - - false - - 127.0.0.1 - 9000 - - - 127.0.0.2 - 9000 - - - 127.0.0.3 - 9000 - - - 127.0.0.4 - 9000 - - - 127.0.0.5 - 9000 - - - 127.0.0.6 - 9000 - - - 127.0.0.7 - 9000 - - - 127.0.0.8 - 9000 - - - 127.0.0.9 - 9000 - - - 127.0.0.10 - 9000 - - - - 127.0.0.11 - 1234 - - - - - - - localhost - 9000 - - - - - localhost - 9000 - - - - - - - 127.0.0.1 - 9000 - - - - - 127.0.0.2 - 9000 - - - - - - true - - 127.0.0.1 - 9000 - - - - true - - 127.0.0.2 - 9000 - - - - - - - localhost - 9440 - 1 - - - - - - - localhost - 9000 - - - - - localhost - 1 - - - + + + 127.0.0.11 + 1234 + + + + + + false + + 127.0.0.1 + 9000 + + + 127.0.0.2 + 9000 + + + 127.0.0.3 + 9000 + + + + + + + + localhost + 9000 + + + + + localhost + 9000 + + + + + + true + + 127.0.0.1 + 9000 + + + + true + + 127.0.0.2 + 9000 + + + + + + + localhost + 9440 + 1 + + + + + + + localhost + 9000 + + + + + localhost + 1 + + + diff --git a/tests/integration/test_config_corresponding_root/configs/config.xml b/tests/integration/test_config_corresponding_root/configs/config.xml index 72014646161..9a38d02a036 100644 --- a/tests/integration/test_config_corresponding_root/configs/config.xml +++ b/tests/integration/test_config_corresponding_root/configs/config.xml @@ -136,7 +136,6 @@ https://clickhouse.com/docs/en/table_engines/distributed/ --> - @@ -145,43 +144,6 @@ - - - - localhost - 9000 - - - - - localhost - 9000 - - - - - - - localhost - 9440 - 1 - - - - - - - localhost - 9000 - - - - - localhost - 1 - - - diff --git a/tests/integration/test_config_xml_full/configs/config.xml b/tests/integration/test_config_xml_full/configs/config.xml index 4e3d1def5fc..d142df18af8 100644 --- a/tests/integration/test_config_xml_full/configs/config.xml +++ b/tests/integration/test_config_xml_full/configs/config.xml @@ -565,91 +565,6 @@ - - - - localhost - 9000 - - - - - localhost - 9000 - - - - - - - 127.0.0.1 - 9000 - - - - - 127.0.0.2 - 9000 - - - - - - true - - 127.0.0.1 - 9000 - - - - true - - 127.0.0.2 - 9000 - - - - - - - localhost - 9440 - 1 - - - - - - - localhost - 9440 - - - - - - - - localhost - 9440 - - - - - - - - localhost - 9000 - - - - - localhost - 1 - - - - - - - - - - localhost - 9000 - - - - - - - localhost - 9440 - 1 - - - - - -