From 9ca533a033657deb1bcae93d0f27ef2ca41a90f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Tue, 23 Jul 2024 11:47:01 +0000 Subject: [PATCH 001/121] Update autogenerated version to 24.7.1.1 and contributors --- cmake/autogenerated_versions.txt | 4 +-- .../StorageSystemContributors.generated.cpp | 27 +++++++++++++++++++ 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index bb776fa9506..7b1f4054560 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -6,7 +6,7 @@ SET(VERSION_REVISION 54488) SET(VERSION_MAJOR 24) SET(VERSION_MINOR 7) SET(VERSION_PATCH 1) -SET(VERSION_GITHASH aa023477a9265e403982fca5ee29a714db5133d9) -SET(VERSION_DESCRIBE v24.7.1.1-testing) +SET(VERSION_GITHASH 3f8b27d7accd2b5ec4afe7d0dd459115323304af) +SET(VERSION_DESCRIBE v24.7.1.1-stable) SET(VERSION_STRING 24.7.1.1) # end of autochange diff --git a/src/Storages/System/StorageSystemContributors.generated.cpp b/src/Storages/System/StorageSystemContributors.generated.cpp index 9201eef185f..35b9c0008c6 100644 --- a/src/Storages/System/StorageSystemContributors.generated.cpp +++ b/src/Storages/System/StorageSystemContributors.generated.cpp @@ -1,5 +1,6 @@ // autogenerated by tests/ci/version_helper.py const char * auto_contributors[] { + "0x01f", "0xflotus", "13DaGGeR", "1lann", @@ -167,6 +168,7 @@ const char * auto_contributors[] { "AnneClickHouse", "Anselmo D. Adams", "Anthony N. Simon", + "AntiTopQuark", "Anton Ivashkin", "Anton Kobzev", "Anton Kozlov", @@ -299,6 +301,7 @@ const char * auto_contributors[] { "Dan Wu", "DanRoscigno", "Dani Pozo", + "Daniel Anugerah", "Daniel Bershatsky", "Daniel Byta", "Daniel Dao", @@ -370,6 +373,7 @@ const char * auto_contributors[] { "Elena", "Elena Baskakova", "Elena Torró", + "Elena Torró Martínez", "Elghazal Ahmed", "Eliot Hautefeuille", "Elizaveta Mironyuk", @@ -415,6 +419,7 @@ const char * auto_contributors[] { "FgoDt", "Filatenkov Artur", "Filipe Caixeta", + "Filipp Bakanov", "Filipp Ozinov", "Filippov Denis", "Fille", @@ -451,6 +456,7 @@ const char * auto_contributors[] { "Gleb Novikov", "Gleb-Tretyakov", "GoGoWen2021", + "Gosha Letov", "Gregory", "Grigorii Sokolik", "Grigory", @@ -461,6 +467,7 @@ const char * auto_contributors[] { "Guillaume Tassery", "Guo Wangyang", "Guo Wei (William)", + "Guspan Tanadi", "Haavard Kvaalen", "Habibullah Oladepo", "HaiBo Li", @@ -474,6 +481,7 @@ const char * auto_contributors[] { "HarryLeeIBM", "Hasitha Kanchana", "Hasnat", + "Haydn", "Heena Bansal", "HeenaBansal2009", "Hendrik M", @@ -606,6 +614,7 @@ const char * auto_contributors[] { "Kevin Chiang", "Kevin Michel", "Kevin Mingtarja", + "Kevin Song", "Kevin Zhang", "KevinyhZou", "KinderRiven", @@ -661,6 +670,7 @@ const char * auto_contributors[] { "Lewinma", "Li Shuai", "Li Yin", + "Linh Giang", "Lino Uruñuela", "Lirikl", "Liu Cong", @@ -690,6 +700,7 @@ const char * auto_contributors[] { "Maksim Alekseev", "Maksim Buren", "Maksim Fedotov", + "Maksim Galkin", "Maksim Kita", "Maksym Sobolyev", "Mal Curtis", @@ -724,6 +735,7 @@ const char * auto_contributors[] { "Max Akhmedov", "Max Bruce", "Max K", + "Max K.", "Max Kainov", "Max Vetrov", "MaxTheHuman", @@ -811,6 +823,7 @@ const char * auto_contributors[] { "Nataly Merezhuk", "Natalya Chizhonkova", "Natasha Murashkina", + "Nathan Clevenger", "NeZeD [Mac Pro]", "Neeke Gao", "Neng Liu", @@ -946,6 +959,7 @@ const char * auto_contributors[] { "Robert Coelho", "Robert Hodges", "Robert Schulze", + "Rodolphe Dugé de Bernonville", "RogerYK", "Rohit Agarwal", "Romain Neutron", @@ -1107,6 +1121,7 @@ const char * auto_contributors[] { "Timur Solodovnikov", "TiunovNN", "Tobias Adamson", + "Tobias Florek", "Tobias Lins", "Tom Bombadil", "Tom Risse", @@ -1231,11 +1246,13 @@ const char * auto_contributors[] { "Yingchun Lai", "Yingfan Chen", "Yinzheng-Sun", + "Yinzuo Jiang", "Yiğit Konur", "Yohann Jardin", "Yong Wang", "Yong-Hao Zou", "Youenn Lebras", + "Your Name", "Yu, Peng", "Yuko Takagi", "Yuntao Wu", @@ -1250,6 +1267,7 @@ const char * auto_contributors[] { "Yury Stankevich", "Yusuke Tanaka", "Zach Naimon", + "Zawa-II", "Zheng Miao", "ZhiHong Zhang", "ZhiYong Wang", @@ -1380,6 +1398,7 @@ const char * auto_contributors[] { "conicliu", "copperybean", "coraxster", + "cw5121", "cwkyaoyao", "d.v.semenov", "dalei2019", @@ -1460,12 +1479,14 @@ const char * auto_contributors[] { "fuzzERot", "fyu", "g-arslan", + "gabrielmcg44", "ggerogery", "giordyb", "glockbender", "glushkovds", "grantovsky", "gulige", + "gun9nir", "guoleiyi", "guomaolin", "guov100", @@ -1527,6 +1548,7 @@ const char * auto_contributors[] { "jferroal", "jiahui-97", "jianmei zhang", + "jiaosenvip", "jinjunzh", "jiyoungyoooo", "jktng", @@ -1541,6 +1563,7 @@ const char * auto_contributors[] { "jun won", "jus1096", "justindeguzman", + "jwoodhead", "jyz0309", "karnevil13", "kashwy", @@ -1633,10 +1656,12 @@ const char * auto_contributors[] { "mateng0915", "mateng915", "mauidude", + "max-vostrikov", "maxim", "maxim-babenko", "maxkuzn", "maxulan", + "maxvostrikov", "mayamika", "mehanizm", "melin", @@ -1677,6 +1702,7 @@ const char * auto_contributors[] { "nathanbegbie", "nauta", "nautaa", + "nauu", "ndchikin", "nellicus", "nemonlou", @@ -1975,6 +2001,7 @@ const char * auto_contributors[] { "张健", "张风啸", "徐炘", + "忒休斯~Theseus", "曲正鹏", "木木夕120", "未来星___费", From dc37a7d697bbd1eed10b79be9b8ab74e8d72a2e5 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 23 Jul 2024 17:07:02 +0000 Subject: [PATCH 002/121] Backport #66955 to 24.7: CI: Fixes docker server build for release branches --- tests/ci/docker_server.py | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/tests/ci/docker_server.py b/tests/ci/docker_server.py index 21fc02ce02a..413c35cbebe 100644 --- a/tests/ci/docker_server.py +++ b/tests/ci/docker_server.py @@ -21,7 +21,7 @@ from env_helper import ( TEMP_PATH, ) from git_helper import Git -from pr_info import PRInfo, EventType +from pr_info import PRInfo from report import FAILURE, SUCCESS, JobReport, TestResult, TestResults from stopwatch import Stopwatch from tee_popen import TeePopen @@ -375,25 +375,23 @@ def main(): tags = gen_tags(args.version, args.release_type) repo_urls = {} direct_urls: Dict[str, List[str]] = {} - if pr_info.event_type == EventType.PULL_REQUEST: - release_or_pr = str(pr_info.number) - sha = pr_info.sha - elif pr_info.event_type == EventType.PUSH and pr_info.is_master: - release_or_pr = str(0) - sha = pr_info.sha - else: - release_or_pr = f"{args.version.major}.{args.version.minor}" - sha = args.sha - assert sha for arch, build_name in zip(ARCH, ("package_release", "package_aarch64")): - if not args.bucket_prefix: + if args.bucket_prefix: + assert not args.allow_build_reuse + repo_urls[arch] = f"{args.bucket_prefix}/{build_name}" + elif args.sha: + # CreateRelease workflow only. TODO + version = args.version repo_urls[arch] = ( f"{S3_DOWNLOAD}/{S3_BUILDS_BUCKET}/" - f"{release_or_pr}/{sha}/{build_name}" + f"{version.major}.{version.minor}/{args.sha}/{build_name}" ) else: - repo_urls[arch] = f"{args.bucket_prefix}/{build_name}" + # In all other cases urls must be fetched from build reports. TODO: script needs refactoring + repo_urls[arch] = "" + assert args.allow_build_reuse + if args.allow_build_reuse: # read s3 urls from pre-downloaded build reports if "clickhouse-server" in image_repo: @@ -431,7 +429,6 @@ def main(): ) if test_results[-1].status != "OK": status = FAILURE - pr_info = pr_info or PRInfo() description = f"Processed tags: {', '.join(tags)}" JobReport( From a299dad400d68ca2119a8e8fabbe529e9eb9f0ec Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 24 Jul 2024 11:09:06 +0000 Subject: [PATCH 003/121] Backport #66898 to 24.7: [CI Fest] Better processing of broken parts and their projections (fixes rare cases of lost forever) --- src/Storages/MergeTree/DataPartsExchange.cpp | 9 +++++-- .../MergeTree/MergeTreeSequentialSource.cpp | 9 ++----- src/Storages/MergeTree/checkDataPart.cpp | 27 ++++++++++++------- 3 files changed, 27 insertions(+), 18 deletions(-) diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index 8e73021d3e7..061ee356203 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -224,14 +225,18 @@ void Service::processQuery(const HTMLForm & params, ReadBuffer & /*body*/, Write } catch (const Exception & e) { - if (e.code() != ErrorCodes::ABORTED && e.code() != ErrorCodes::CANNOT_WRITE_TO_OSTREAM) + if (e.code() != ErrorCodes::CANNOT_WRITE_TO_OSTREAM + && !isRetryableException(std::current_exception())) + { report_broken_part(); + } throw; } catch (...) { - report_broken_part(); + if (!isRetryableException(std::current_exception())) + report_broken_part(); throw; } } diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp index 07476e8b2e9..311720728e7 100644 --- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp +++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp @@ -15,16 +15,11 @@ #include #include #include +#include namespace DB { -namespace ErrorCodes -{ - extern const int MEMORY_LIMIT_EXCEEDED; -} - - /// Lightweight (in terms of logic) stream for reading single part from /// MergeTree, used for merges and mutations. /// @@ -281,7 +276,7 @@ try catch (...) { /// Suspicion of the broken part. A part is added to the queue for verification. - if (getCurrentExceptionCode() != ErrorCodes::MEMORY_LIMIT_EXCEEDED) + if (!isRetryableException(std::current_exception())) storage.reportBrokenPart(data_part); throw; } diff --git a/src/Storages/MergeTree/checkDataPart.cpp b/src/Storages/MergeTree/checkDataPart.cpp index 774fd95ebc6..fb86d9e7603 100644 --- a/src/Storages/MergeTree/checkDataPart.cpp +++ b/src/Storages/MergeTree/checkDataPart.cpp @@ -36,11 +36,13 @@ namespace ErrorCodes extern const int CANNOT_ALLOCATE_MEMORY; extern const int CANNOT_MUNMAP; extern const int CANNOT_MREMAP; + extern const int CANNOT_SCHEDULE_TASK; extern const int UNEXPECTED_FILE_IN_DATA_PART; extern const int NO_FILE_IN_DATA_PART; extern const int NETWORK_ERROR; extern const int SOCKET_TIMEOUT; extern const int BROKEN_PROJECTION; + extern const int ABORTED; } @@ -85,7 +87,9 @@ bool isRetryableException(std::exception_ptr exception_ptr) { return isNotEnoughMemoryErrorCode(e.code()) || e.code() == ErrorCodes::NETWORK_ERROR - || e.code() == ErrorCodes::SOCKET_TIMEOUT; + || e.code() == ErrorCodes::SOCKET_TIMEOUT + || e.code() == ErrorCodes::CANNOT_SCHEDULE_TASK + || e.code() == ErrorCodes::ABORTED; } catch (const Poco::Net::NetException &) { @@ -329,16 +333,21 @@ static IMergeTreeDataPart::Checksums checkDataPart( projections_on_disk.erase(projection_file); } - if (throw_on_broken_projection && !broken_projections_message.empty()) + if (throw_on_broken_projection) { - throw Exception(ErrorCodes::BROKEN_PROJECTION, "{}", broken_projections_message); - } + if (!broken_projections_message.empty()) + { + throw Exception(ErrorCodes::BROKEN_PROJECTION, "{}", broken_projections_message); + } - if (require_checksums && !projections_on_disk.empty()) - { - throw Exception(ErrorCodes::UNEXPECTED_FILE_IN_DATA_PART, - "Found unexpected projection directories: {}", - fmt::join(projections_on_disk, ",")); + /// This one is actually not broken, just redundant files on disk which + /// MergeTree will never use. + if (require_checksums && !projections_on_disk.empty()) + { + throw Exception(ErrorCodes::UNEXPECTED_FILE_IN_DATA_PART, + "Found unexpected projection directories: {}", + fmt::join(projections_on_disk, ",")); + } } if (is_cancelled()) From b5afddad3a90d047f33138fcf5b68281bd61c3cd Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 24 Jul 2024 12:09:56 +0000 Subject: [PATCH 004/121] Backport #66984 to 24.7: [CI Fest] Fix use-of-uninitialized-value in JSONExtract* numeric functions --- src/Functions/FunctionsJSON.cpp | 3 ++- .../03209_functions_json_msan_fuzzer_issue.reference | 1 + .../0_stateless/03209_functions_json_msan_fuzzer_issue.sql | 2 ++ 3 files changed, 5 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/03209_functions_json_msan_fuzzer_issue.reference create mode 100644 tests/queries/0_stateless/03209_functions_json_msan_fuzzer_issue.sql diff --git a/src/Functions/FunctionsJSON.cpp b/src/Functions/FunctionsJSON.cpp index 47040545677..848856c500f 100644 --- a/src/Functions/FunctionsJSON.cpp +++ b/src/Functions/FunctionsJSON.cpp @@ -739,7 +739,8 @@ public: { NumberType value; - tryGetNumericValueFromJSONElement(value, element, convert_bool_to_integer, error); + if (!tryGetNumericValueFromJSONElement(value, element, convert_bool_to_integer, error)) + return false; auto & col_vec = assert_cast &>(dest); col_vec.insertValue(value); return true; diff --git a/tests/queries/0_stateless/03209_functions_json_msan_fuzzer_issue.reference b/tests/queries/0_stateless/03209_functions_json_msan_fuzzer_issue.reference new file mode 100644 index 00000000000..e02f3666d40 --- /dev/null +++ b/tests/queries/0_stateless/03209_functions_json_msan_fuzzer_issue.reference @@ -0,0 +1 @@ +0 0 0 1.1 diff --git a/tests/queries/0_stateless/03209_functions_json_msan_fuzzer_issue.sql b/tests/queries/0_stateless/03209_functions_json_msan_fuzzer_issue.sql new file mode 100644 index 00000000000..a05b07d5971 --- /dev/null +++ b/tests/queries/0_stateless/03209_functions_json_msan_fuzzer_issue.sql @@ -0,0 +1,2 @@ +WITH '{ "v":1.1}' AS raw SELECT JSONExtract(raw, 'float') AS float32_1, JSONExtract(concat(tuple('1970-01-05', 10, materialize(10), 10, 10, 10, toUInt256(10), 10, toNullable(10), 10, 10), materialize(toUInt256(0)), ', ', 2, 2, toLowCardinality(toLowCardinality(2))), raw, toLowCardinality('v'), 'Float32') AS float32_2, JSONExtractFloat(raw) AS float64_1, JSONExtract(raw, 'v', 'double') AS float64_2; + From 9e4f84a7d591b495706eccb65ce32016d731e353 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 24 Jul 2024 14:10:03 +0000 Subject: [PATCH 005/121] Backport #67000 to 24.7: CI: Fix for workflow results parsing --- .github/workflows/backport_branches.yml | 2 +- .github/workflows/master.yml | 2 +- .github/workflows/merge_queue.yml | 2 +- .github/workflows/nightly.yml | 2 +- .github/workflows/pull_request.yml | 2 +- .github/workflows/release_branches.yml | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml index c602a46d23c..322946ac77b 100644 --- a/.github/workflows/backport_branches.yml +++ b/.github/workflows/backport_branches.yml @@ -269,7 +269,7 @@ jobs: - name: Check Workflow results run: | export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json" - cat >> "$WORKFLOW_RESULT_FILE" << 'EOF' + cat > "$WORKFLOW_RESULT_FILE" << 'EOF' ${{ toJson(needs) }} EOF python3 ./tests/ci/ci_buddy.py --check-wf-status diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 7c319da6045..acd7511d520 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -135,7 +135,7 @@ jobs: - name: Check Workflow results run: | export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json" - cat >> "$WORKFLOW_RESULT_FILE" << 'EOF' + cat > "$WORKFLOW_RESULT_FILE" << 'EOF' ${{ toJson(needs) }} EOF python3 ./tests/ci/ci_buddy.py --check-wf-status diff --git a/.github/workflows/merge_queue.yml b/.github/workflows/merge_queue.yml index 4b186241a0e..64083668719 100644 --- a/.github/workflows/merge_queue.yml +++ b/.github/workflows/merge_queue.yml @@ -108,7 +108,7 @@ jobs: - name: Check Workflow results run: | export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json" - cat >> "$WORKFLOW_RESULT_FILE" << 'EOF' + cat > "$WORKFLOW_RESULT_FILE" << 'EOF' ${{ toJson(needs) }} EOF python3 ./tests/ci/ci_buddy.py --check-wf-status diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 84db3338065..ea9c125db70 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -54,7 +54,7 @@ jobs: - name: Check Workflow results run: | export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json" - cat >> "$WORKFLOW_RESULT_FILE" << 'EOF' + cat > "$WORKFLOW_RESULT_FILE" << 'EOF' ${{ toJson(needs) }} EOF python3 ./tests/ci/ci_buddy.py --check-wf-status diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index c7d7b28af38..63b2bd87dc9 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -168,7 +168,7 @@ jobs: - name: Check Workflow results run: | export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json" - cat >> "$WORKFLOW_RESULT_FILE" << 'EOF' + cat > "$WORKFLOW_RESULT_FILE" << 'EOF' ${{ toJson(needs) }} EOF python3 ./tests/ci/ci_buddy.py --check-wf-status diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index bca9ff33cd0..b79208b03a6 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -489,7 +489,7 @@ jobs: - name: Check Workflow results run: | export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json" - cat >> "$WORKFLOW_RESULT_FILE" << 'EOF' + cat > "$WORKFLOW_RESULT_FILE" << 'EOF' ${{ toJson(needs) }} EOF From 139bdeeb7c381b2983004787bb88842a8231468e Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 24 Jul 2024 14:12:13 +0000 Subject: [PATCH 006/121] Backport #66761 to 24.7: Small improvement for background pool in Keeper --- src/Coordination/KeeperServer.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Coordination/KeeperServer.cpp b/src/Coordination/KeeperServer.cpp index d40e5ef2e50..f09ea56391a 100644 --- a/src/Coordination/KeeperServer.cpp +++ b/src/Coordination/KeeperServer.cpp @@ -383,7 +383,10 @@ void KeeperServer::launchRaftServer(const Poco::Util::AbstractConfiguration & co LockMemoryExceptionInThread::removeUniqueLock(); }; - asio_opts.thread_pool_size_ = getNumberOfPhysicalCPUCores(); + /// At least 16 threads for network communication in asio. + /// asio is async framework, so even with 1 thread it should be ok, but + /// still as safeguard it's better to have some redundant capacity here + asio_opts.thread_pool_size_ = std::max(16U, getNumberOfPhysicalCPUCores()); if (state_manager->isSecure()) { From af1f5ed39199f360b55f72d9070636ded79c9f1f Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 24 Jul 2024 22:08:01 +0000 Subject: [PATCH 007/121] Backport #67040 to 24.7: Revert "FuzzQuery table function" --- .../table-functions/fuzzQuery.md | 36 ---- programs/client/Client.h | 5 +- src/Client/ClientBase.h | 2 +- src/{Common => Client}/QueryFuzzer.cpp | 50 ++---- src/{Common => Client}/QueryFuzzer.h | 35 ++-- src/Storages/StorageFuzzQuery.cpp | 169 ------------------ src/Storages/StorageFuzzQuery.h | 88 --------- src/Storages/registerStorages.cpp | 2 - src/TableFunctions/TableFunctionFuzzQuery.cpp | 54 ------ src/TableFunctions/TableFunctionFuzzQuery.h | 42 ----- src/TableFunctions/registerTableFunctions.cpp | 1 - src/TableFunctions/registerTableFunctions.h | 1 - .../03031_table_function_fuzzquery.reference | 2 - .../03031_table_function_fuzzquery.sql | 18 -- 14 files changed, 31 insertions(+), 474 deletions(-) delete mode 100644 docs/en/sql-reference/table-functions/fuzzQuery.md rename src/{Common => Client}/QueryFuzzer.cpp (97%) rename src/{Common => Client}/QueryFuzzer.h (91%) delete mode 100644 src/Storages/StorageFuzzQuery.cpp delete mode 100644 src/Storages/StorageFuzzQuery.h delete mode 100644 src/TableFunctions/TableFunctionFuzzQuery.cpp delete mode 100644 src/TableFunctions/TableFunctionFuzzQuery.h delete mode 100644 tests/queries/0_stateless/03031_table_function_fuzzquery.reference delete mode 100644 tests/queries/0_stateless/03031_table_function_fuzzquery.sql diff --git a/docs/en/sql-reference/table-functions/fuzzQuery.md b/docs/en/sql-reference/table-functions/fuzzQuery.md deleted file mode 100644 index e15f8a40156..00000000000 --- a/docs/en/sql-reference/table-functions/fuzzQuery.md +++ /dev/null @@ -1,36 +0,0 @@ ---- -slug: /en/sql-reference/table-functions/fuzzQuery -sidebar_position: 75 -sidebar_label: fuzzQuery ---- - -# fuzzQuery - -Perturbs the given query string with random variations. - -``` sql -fuzzQuery(query[, max_query_length[, random_seed]]) -``` - -**Arguments** - -- `query` (String) - The source query to perform the fuzzing on. -- `max_query_length` (UInt64) - A maximum length the query can get during the fuzzing process. -- `random_seed` (UInt64) - A random seed for producing stable results. - -**Returned Value** - -A table object with a single column containing perturbed query strings. - -## Usage Example - -``` sql -SELECT * FROM fuzzQuery('SELECT materialize(\'a\' AS key) GROUP BY key') LIMIT 2; -``` - -``` - ┌─query──────────────────────────────────────────────────────────┐ -1. │ SELECT 'a' AS key GROUP BY key │ -2. │ EXPLAIN PIPELINE compact = true SELECT 'a' AS key GROUP BY key │ - └────────────────────────────────────────────────────────────────┘ -``` diff --git a/programs/client/Client.h b/programs/client/Client.h index 6d57a6ea648..229608f787d 100644 --- a/programs/client/Client.h +++ b/programs/client/Client.h @@ -9,10 +9,7 @@ namespace DB class Client : public ClientBase { public: - Client() - { - fuzzer = QueryFuzzer(randomSeed(), &std::cout, &std::cerr); - } + Client() = default; void initialize(Poco::Util::Application & self) override; diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index 4f500a4c45d..986990aecaa 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include @@ -17,6 +16,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Common/QueryFuzzer.cpp b/src/Client/QueryFuzzer.cpp similarity index 97% rename from src/Common/QueryFuzzer.cpp rename to src/Client/QueryFuzzer.cpp index 161c38f20e0..f5b700ea529 100644 --- a/src/Common/QueryFuzzer.cpp +++ b/src/Client/QueryFuzzer.cpp @@ -68,21 +68,22 @@ Field QueryFuzzer::getRandomField(int type) { case 0: { - return bad_int64_values[fuzz_rand() % std::size(bad_int64_values)]; + return bad_int64_values[fuzz_rand() % (sizeof(bad_int64_values) + / sizeof(*bad_int64_values))]; } case 1: { static constexpr double values[] = {NAN, INFINITY, -INFINITY, 0., -0., 0.0001, 0.5, 0.9999, 1., 1.0001, 2., 10.0001, 100.0001, 1000.0001, 1e10, 1e20, - FLT_MIN, FLT_MIN + FLT_EPSILON, FLT_MAX, FLT_MAX + FLT_EPSILON}; return values[fuzz_rand() % std::size(values)]; + FLT_MIN, FLT_MIN + FLT_EPSILON, FLT_MAX, FLT_MAX + FLT_EPSILON}; return values[fuzz_rand() % (sizeof(values) / sizeof(*values))]; } case 2: { static constexpr UInt64 scales[] = {0, 1, 2, 10}; return DecimalField( - bad_int64_values[fuzz_rand() % std::size(bad_int64_values)], - static_cast(scales[fuzz_rand() % std::size(scales)]) + bad_int64_values[fuzz_rand() % (sizeof(bad_int64_values) / sizeof(*bad_int64_values))], + static_cast(scales[fuzz_rand() % (sizeof(scales) / sizeof(*scales))]) ); } default: @@ -164,8 +165,7 @@ Field QueryFuzzer::fuzzField(Field field) { size_t pos = fuzz_rand() % arr.size(); arr.erase(arr.begin() + pos); - if (debug_stream) - *debug_stream << "erased\n"; + std::cerr << "erased\n"; } if (fuzz_rand() % 5 == 0) @@ -174,14 +174,12 @@ Field QueryFuzzer::fuzzField(Field field) { size_t pos = fuzz_rand() % arr.size(); arr.insert(arr.begin() + pos, fuzzField(arr[pos])); - if (debug_stream) - *debug_stream << fmt::format("inserted (pos {})\n", pos); + std::cerr << fmt::format("inserted (pos {})\n", pos); } else { arr.insert(arr.begin(), getRandomField(0)); - if (debug_stream) - *debug_stream << "inserted (0)\n"; + std::cerr << "inserted (0)\n"; } } @@ -199,9 +197,7 @@ Field QueryFuzzer::fuzzField(Field field) { size_t pos = fuzz_rand() % arr.size(); arr.erase(arr.begin() + pos); - - if (debug_stream) - *debug_stream << "erased\n"; + std::cerr << "erased\n"; } if (fuzz_rand() % 5 == 0) @@ -210,16 +206,12 @@ Field QueryFuzzer::fuzzField(Field field) { size_t pos = fuzz_rand() % arr.size(); arr.insert(arr.begin() + pos, fuzzField(arr[pos])); - - if (debug_stream) - *debug_stream << fmt::format("inserted (pos {})\n", pos); + std::cerr << fmt::format("inserted (pos {})\n", pos); } else { arr.insert(arr.begin(), getRandomField(0)); - - if (debug_stream) - *debug_stream << "inserted (0)\n"; + std::cerr << "inserted (0)\n"; } } @@ -352,8 +344,7 @@ void QueryFuzzer::fuzzOrderByList(IAST * ast) } else { - if (debug_stream) - *debug_stream << "No random column.\n"; + std::cerr << "No random column.\n"; } } @@ -387,8 +378,7 @@ void QueryFuzzer::fuzzColumnLikeExpressionList(IAST * ast) if (col) impl->children.insert(pos, col); else - if (debug_stream) - *debug_stream << "No random column.\n"; + std::cerr << "No random column.\n"; } // We don't have to recurse here to fuzz the children, this is handled by @@ -1371,15 +1361,11 @@ void QueryFuzzer::fuzzMain(ASTPtr & ast) collectFuzzInfoMain(ast); fuzz(ast); - if (out_stream) - { - *out_stream << std::endl; - - WriteBufferFromOStream ast_buf(*out_stream, 4096); - formatAST(*ast, ast_buf, false /*highlight*/); - ast_buf.finalize(); - *out_stream << std::endl << std::endl; - } + std::cout << std::endl; + WriteBufferFromOStream ast_buf(std::cout, 4096); + formatAST(*ast, ast_buf, false /*highlight*/); + ast_buf.finalize(); + std::cout << std::endl << std::endl; } } diff --git a/src/Common/QueryFuzzer.h b/src/Client/QueryFuzzer.h similarity index 91% rename from src/Common/QueryFuzzer.h rename to src/Client/QueryFuzzer.h index 35d088809f2..6165e589cae 100644 --- a/src/Common/QueryFuzzer.h +++ b/src/Client/QueryFuzzer.h @@ -35,31 +35,9 @@ struct ASTWindowDefinition; * queries, so you want to feed it a lot of queries to get some interesting mix * of them. Normally we feed SQL regression tests to it. */ -class QueryFuzzer +struct QueryFuzzer { -public: - explicit QueryFuzzer(pcg64 fuzz_rand_ = randomSeed(), std::ostream * out_stream_ = nullptr, std::ostream * debug_stream_ = nullptr) - : fuzz_rand(fuzz_rand_) - , out_stream(out_stream_) - , debug_stream(debug_stream_) - { - } - - // This is the only function you have to call -- it will modify the passed - // ASTPtr to point to new AST with some random changes. - void fuzzMain(ASTPtr & ast); - - ASTs getInsertQueriesForFuzzedTables(const String & full_query); - ASTs getDropQueriesForFuzzedTables(const ASTDropQuery & drop_query); - void notifyQueryFailed(ASTPtr ast); - - static bool isSuitableForFuzzing(const ASTCreateQuery & create); - -private: - pcg64 fuzz_rand; - - std::ostream * out_stream = nullptr; - std::ostream * debug_stream = nullptr; + pcg64 fuzz_rand{randomSeed()}; // We add elements to expression lists with fixed probability. Some elements // are so large, that the expected number of elements we add to them is @@ -88,6 +66,10 @@ private: std::unordered_map index_of_fuzzed_table; std::set created_tables_hashes; + // This is the only function you have to call -- it will modify the passed + // ASTPtr to point to new AST with some random changes. + void fuzzMain(ASTPtr & ast); + // Various helper functions follow, normally you shouldn't have to call them. Field getRandomField(int type); Field fuzzField(Field field); @@ -95,6 +77,9 @@ private: ASTPtr getRandomExpressionList(); DataTypePtr fuzzDataType(DataTypePtr type); DataTypePtr getRandomType(); + ASTs getInsertQueriesForFuzzedTables(const String & full_query); + ASTs getDropQueriesForFuzzedTables(const ASTDropQuery & drop_query); + void notifyQueryFailed(ASTPtr ast); void replaceWithColumnLike(ASTPtr & ast); void replaceWithTableLike(ASTPtr & ast); void fuzzOrderByElement(ASTOrderByElement * elem); @@ -117,6 +102,8 @@ private: void addTableLike(ASTPtr ast); void addColumnLike(ASTPtr ast); void collectFuzzInfoRecurse(ASTPtr ast); + + static bool isSuitableForFuzzing(const ASTCreateQuery & create); }; } diff --git a/src/Storages/StorageFuzzQuery.cpp b/src/Storages/StorageFuzzQuery.cpp deleted file mode 100644 index 6e8f425f8dc..00000000000 --- a/src/Storages/StorageFuzzQuery.cpp +++ /dev/null @@ -1,169 +0,0 @@ -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; -} - -ColumnPtr FuzzQuerySource::createColumn() -{ - auto column = ColumnString::create(); - ColumnString::Chars & data_to = column->getChars(); - ColumnString::Offsets & offsets_to = column->getOffsets(); - - offsets_to.resize(block_size); - IColumn::Offset offset = 0; - - auto fuzz_base = query; - size_t row_num = 0; - - while (row_num < block_size) - { - ASTPtr new_query = fuzz_base->clone(); - - auto base_before_fuzz = fuzz_base->formatForErrorMessage(); - fuzzer.fuzzMain(new_query); - auto fuzzed_text = new_query->formatForErrorMessage(); - - if (base_before_fuzz == fuzzed_text) - continue; - - /// AST is too long, will start from the original query. - if (config.max_query_length > 500) - { - fuzz_base = query; - continue; - } - - IColumn::Offset next_offset = offset + fuzzed_text.size() + 1; - data_to.resize(next_offset); - - std::copy(fuzzed_text.begin(), fuzzed_text.end(), &data_to[offset]); - - data_to[offset + fuzzed_text.size()] = 0; - offsets_to[row_num] = next_offset; - - offset = next_offset; - fuzz_base = new_query; - ++row_num; - } - - return column; -} - -StorageFuzzQuery::StorageFuzzQuery( - const StorageID & table_id_, const ColumnsDescription & columns_, const String & comment_, const Configuration & config_) - : IStorage(table_id_), config(config_) -{ - StorageInMemoryMetadata storage_metadata; - storage_metadata.setColumns(columns_); - storage_metadata.setComment(comment_); - setInMemoryMetadata(storage_metadata); -} - -Pipe StorageFuzzQuery::read( - const Names & column_names, - const StorageSnapshotPtr & storage_snapshot, - SelectQueryInfo & /*query_info*/, - ContextPtr /*context*/, - QueryProcessingStage::Enum /*processed_stage*/, - size_t max_block_size, - size_t num_streams) -{ - storage_snapshot->check(column_names); - - Pipes pipes; - pipes.reserve(num_streams); - - const ColumnsDescription & our_columns = storage_snapshot->metadata->getColumns(); - Block block_header; - for (const auto & name : column_names) - { - const auto & name_type = our_columns.get(name); - MutableColumnPtr column = name_type.type->createColumn(); - block_header.insert({std::move(column), name_type.type, name_type.name}); - } - - const char * begin = config.query.data(); - const char * end = begin + config.query.size(); - - ParserQuery parser(end, false); - auto query = parseQuery(parser, begin, end, "", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); - - for (UInt64 i = 0; i < num_streams; ++i) - pipes.emplace_back(std::make_shared(max_block_size, block_header, config, query)); - - return Pipe::unitePipes(std::move(pipes)); -} - -StorageFuzzQuery::Configuration StorageFuzzQuery::getConfiguration(ASTs & engine_args, ContextPtr local_context) -{ - StorageFuzzQuery::Configuration configuration{}; - - // Supported signatures: - // - // FuzzQuery(query) - // FuzzQuery(query, max_query_length) - // FuzzQuery(query, max_query_length, random_seed) - if (engine_args.empty() || engine_args.size() > 3) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "FuzzQuery requires 1 to 3 arguments: query, max_query_length, random_seed"); - - for (auto & engine_arg : engine_args) - engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, local_context); - - auto first_arg = checkAndGetLiteralArgument(engine_args[0], "query"); - configuration.query = std::move(first_arg); - - if (engine_args.size() >= 2) - { - const auto & literal = engine_args[1]->as(); - if (!literal.value.isNull()) - configuration.max_query_length = checkAndGetLiteralArgument(literal, "max_query_length"); - } - - if (engine_args.size() == 3) - { - const auto & literal = engine_args[2]->as(); - if (!literal.value.isNull()) - configuration.random_seed = checkAndGetLiteralArgument(literal, "random_seed"); - } - - return configuration; -} - -void registerStorageFuzzQuery(StorageFactory & factory) -{ - factory.registerStorage( - "FuzzQuery", - [](const StorageFactory::Arguments & args) -> std::shared_ptr - { - ASTs & engine_args = args.engine_args; - - if (engine_args.empty()) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Storage FuzzQuery must have arguments."); - - StorageFuzzQuery::Configuration configuration = StorageFuzzQuery::getConfiguration(engine_args, args.getLocalContext()); - - for (const auto& col : args.columns) - if (col.type->getTypeId() != TypeIndex::String) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "'StorageFuzzQuery' supports only columns of String type, got {}.", col.type->getName()); - - return std::make_shared(args.table_id, args.columns, args.comment, configuration); - }); -} - -} diff --git a/src/Storages/StorageFuzzQuery.h b/src/Storages/StorageFuzzQuery.h deleted file mode 100644 index 125ef960e74..00000000000 --- a/src/Storages/StorageFuzzQuery.h +++ /dev/null @@ -1,88 +0,0 @@ -#pragma once - -#include -#include -#include -#include - -#include "config.h" - -namespace DB -{ - -class NamedCollection; - -class StorageFuzzQuery final : public IStorage -{ -public: - struct Configuration : public StatelessTableEngineConfiguration - { - String query; - UInt64 max_query_length = 500; - UInt64 random_seed = randomSeed(); - }; - - StorageFuzzQuery( - const StorageID & table_id_, const ColumnsDescription & columns_, const String & comment_, const Configuration & config_); - - std::string getName() const override { return "FuzzQuery"; } - - Pipe read( - const Names & column_names, - const StorageSnapshotPtr & storage_snapshot, - SelectQueryInfo & query_info, - ContextPtr context, - QueryProcessingStage::Enum processed_stage, - size_t max_block_size, - size_t num_streams) override; - - static StorageFuzzQuery::Configuration getConfiguration(ASTs & engine_args, ContextPtr local_context); - -private: - const Configuration config; -}; - - -class FuzzQuerySource : public ISource -{ -public: - FuzzQuerySource( - UInt64 block_size_, Block block_header_, const StorageFuzzQuery::Configuration & config_, ASTPtr query_) - : ISource(block_header_) - , block_size(block_size_) - , block_header(std::move(block_header_)) - , config(config_) - , query(query_) - , fuzzer(config_.random_seed) - { - } - - String getName() const override { return "FuzzQuery"; } - -protected: - Chunk generate() override - { - Columns columns; - columns.reserve(block_header.columns()); - for (const auto & col : block_header) - { - chassert(col.type->getTypeId() == TypeIndex::String); - columns.emplace_back(createColumn()); - } - - return {std::move(columns), block_size}; - } - -private: - ColumnPtr createColumn(); - - UInt64 block_size; - Block block_header; - - StorageFuzzQuery::Configuration config; - ASTPtr query; - - QueryFuzzer fuzzer; -}; - -} diff --git a/src/Storages/registerStorages.cpp b/src/Storages/registerStorages.cpp index adc1074b1fe..8f33314397c 100644 --- a/src/Storages/registerStorages.cpp +++ b/src/Storages/registerStorages.cpp @@ -26,7 +26,6 @@ void registerStorageGenerateRandom(StorageFactory & factory); void registerStorageExecutable(StorageFactory & factory); void registerStorageWindowView(StorageFactory & factory); void registerStorageLoop(StorageFactory & factory); -void registerStorageFuzzQuery(StorageFactory & factory); #if USE_RAPIDJSON || USE_SIMDJSON void registerStorageFuzzJSON(StorageFactory & factory); #endif @@ -127,7 +126,6 @@ void registerStorages() registerStorageExecutable(factory); registerStorageWindowView(factory); registerStorageLoop(factory); - registerStorageFuzzQuery(factory); #if USE_RAPIDJSON || USE_SIMDJSON registerStorageFuzzJSON(factory); #endif diff --git a/src/TableFunctions/TableFunctionFuzzQuery.cpp b/src/TableFunctions/TableFunctionFuzzQuery.cpp deleted file mode 100644 index 224f6666556..00000000000 --- a/src/TableFunctions/TableFunctionFuzzQuery.cpp +++ /dev/null @@ -1,54 +0,0 @@ -#include - -#include -#include -#include -#include - -namespace DB -{ - - -namespace ErrorCodes -{ - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; -} - -void TableFunctionFuzzQuery::parseArguments(const ASTPtr & ast_function, ContextPtr context) -{ - ASTs & args_func = ast_function->children; - - if (args_func.size() != 1) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' must have arguments", getName()); - - auto args = args_func.at(0)->children; - configuration = StorageFuzzQuery::getConfiguration(args, context); -} - -StoragePtr TableFunctionFuzzQuery::executeImpl( - const ASTPtr & /*ast_function*/, - ContextPtr context, - const std::string & table_name, - ColumnsDescription /*cached_columns*/, - bool is_insert_query) const -{ - ColumnsDescription columns = getActualTableStructure(context, is_insert_query); - auto res = std::make_shared( - StorageID(getDatabaseName(), table_name), - columns, - /* comment */ String{}, - configuration); - res->startup(); - return res; -} - -void registerTableFunctionFuzzQuery(TableFunctionFactory & factory) -{ - factory.registerFunction( - {.documentation - = {.description = "Perturbs a query string with random variations.", - .returned_value = "A table object with a single column containing perturbed query strings."}, - .allow_readonly = true}); -} - -} diff --git a/src/TableFunctions/TableFunctionFuzzQuery.h b/src/TableFunctions/TableFunctionFuzzQuery.h deleted file mode 100644 index 22d10341c4d..00000000000 --- a/src/TableFunctions/TableFunctionFuzzQuery.h +++ /dev/null @@ -1,42 +0,0 @@ -#pragma once - -#include - -#include -#include -#include - -#include "config.h" - -namespace DB -{ - -class TableFunctionFuzzQuery : public ITableFunction -{ -public: - static constexpr auto name = "fuzzQuery"; - std::string getName() const override { return name; } - - void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; - - ColumnsDescription getActualTableStructure(ContextPtr /* context */, bool /* is_insert_query */) const override - { - return ColumnsDescription{{"query", std::make_shared()}}; - } - -private: - StoragePtr executeImpl( - const ASTPtr & ast_function, - ContextPtr context, - const std::string & table_name, - ColumnsDescription cached_columns, - bool is_insert_query) const override; - - const char * getStorageTypeName() const override { return "fuzzQuery"; } - - String source; - std::optional random_seed; - StorageFuzzQuery::Configuration configuration; -}; - -} diff --git a/src/TableFunctions/registerTableFunctions.cpp b/src/TableFunctions/registerTableFunctions.cpp index a6c90872f12..ca4913898f9 100644 --- a/src/TableFunctions/registerTableFunctions.cpp +++ b/src/TableFunctions/registerTableFunctions.cpp @@ -26,7 +26,6 @@ void registerTableFunctions() registerTableFunctionMongoDB(factory); registerTableFunctionRedis(factory); registerTableFunctionMergeTreeIndex(factory); - registerTableFunctionFuzzQuery(factory); #if USE_RAPIDJSON || USE_SIMDJSON registerTableFunctionFuzzJSON(factory); #endif diff --git a/src/TableFunctions/registerTableFunctions.h b/src/TableFunctions/registerTableFunctions.h index 2a8864a9bfd..efde4d6dcdc 100644 --- a/src/TableFunctions/registerTableFunctions.h +++ b/src/TableFunctions/registerTableFunctions.h @@ -23,7 +23,6 @@ void registerTableFunctionGenerate(TableFunctionFactory & factory); void registerTableFunctionMongoDB(TableFunctionFactory & factory); void registerTableFunctionRedis(TableFunctionFactory & factory); void registerTableFunctionMergeTreeIndex(TableFunctionFactory & factory); -void registerTableFunctionFuzzQuery(TableFunctionFactory & factory); #if USE_RAPIDJSON || USE_SIMDJSON void registerTableFunctionFuzzJSON(TableFunctionFactory & factory); #endif diff --git a/tests/queries/0_stateless/03031_table_function_fuzzquery.reference b/tests/queries/0_stateless/03031_table_function_fuzzquery.reference deleted file mode 100644 index 202e4557a33..00000000000 --- a/tests/queries/0_stateless/03031_table_function_fuzzquery.reference +++ /dev/null @@ -1,2 +0,0 @@ -query -String diff --git a/tests/queries/0_stateless/03031_table_function_fuzzquery.sql b/tests/queries/0_stateless/03031_table_function_fuzzquery.sql deleted file mode 100644 index b26096f7f0e..00000000000 --- a/tests/queries/0_stateless/03031_table_function_fuzzquery.sql +++ /dev/null @@ -1,18 +0,0 @@ - -SELECT * FROM fuzzQuery('SELECT 1', 500, 8956) LIMIT 0 FORMAT TSVWithNamesAndTypes; - -SELECT * FROM fuzzQuery('SELECT * -FROM ( - SELECT - ([toString(number % 2)] :: Array(LowCardinality(String))) AS item_id, - count() - FROM numbers(3) - GROUP BY item_id WITH TOTALS -) AS l FULL JOIN ( - SELECT - ([toString((number % 2) * 2)] :: Array(String)) AS item_id - FROM numbers(3) -) AS r -ON l.item_id = r.item_id -ORDER BY 1,2,3; -', 500, 8956) LIMIT 10 FORMAT NULL; From b6ffa8d2a2962658b637ee48c93086a3b178e441 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 25 Jul 2024 12:08:06 +0000 Subject: [PATCH 008/121] Backport #67046 to 24.7: Disable setting `optimize_functions_to_subcolumns` --- src/Core/Settings.h | 2 +- src/Core/SettingsChangesHistory.cpp | 1 - tests/clickhouse-test | 1 + 3 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 3d181e33001..fbb7663b612 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -602,7 +602,7 @@ class IColumn; M(Bool, optimize_if_chain_to_multiif, false, "Replace if(cond1, then1, if(cond2, ...)) chains to multiIf. Currently it's not beneficial for numeric types.", 0) \ M(Bool, optimize_multiif_to_if, true, "Replace 'multiIf' with only one condition to 'if'.", 0) \ M(Bool, optimize_if_transform_strings_to_enum, false, "Replaces string-type arguments in If and Transform to enum. Disabled by default cause it could make inconsistent change in distributed query that would lead to its fail.", 0) \ - M(Bool, optimize_functions_to_subcolumns, true, "Transform functions to subcolumns, if possible, to reduce amount of read data. E.g. 'length(arr)' -> 'arr.size0', 'col IS NULL' -> 'col.null' ", 0) \ + M(Bool, optimize_functions_to_subcolumns, false, "Transform functions to subcolumns, if possible, to reduce amount of read data. E.g. 'length(arr)' -> 'arr.size0', 'col IS NULL' -> 'col.null' ", 0) \ M(Bool, optimize_using_constraints, false, "Use constraints for query optimization", 0) \ M(Bool, optimize_substitute_columns, false, "Use constraints for column substitution", 0) \ M(Bool, optimize_append_index, false, "Use constraints in order to append index condition (indexHint)", 0) \ diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index de4725dc350..c395bfdc815 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -63,7 +63,6 @@ static std::initializer_list Date: Thu, 25 Jul 2024 13:10:04 +0000 Subject: [PATCH 009/121] Backport #67067 to 24.7: [CI Fest] Increase timeout for test_broken_part_during_merge --- tests/integration/test_broken_part_during_merge/test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_broken_part_during_merge/test.py b/tests/integration/test_broken_part_during_merge/test.py index 19c22201fb0..0ba7beeb1fd 100644 --- a/tests/integration/test_broken_part_during_merge/test.py +++ b/tests/integration/test_broken_part_during_merge/test.py @@ -54,7 +54,7 @@ def test_merge_and_part_corruption(started_cluster): with Pool(1) as p: def optimize_with_delay(x): - node1.query("OPTIMIZE TABLE replicated_mt FINAL", timeout=30) + node1.query("OPTIMIZE TABLE replicated_mt FINAL", timeout=120) # corrupt part after merge already assigned, but not started res_opt = p.apply_async(optimize_with_delay, (1,)) @@ -70,7 +70,7 @@ def test_merge_and_part_corruption(started_cluster): node1.query( "ALTER TABLE replicated_mt UPDATE value = 7 WHERE 1", settings={"mutations_sync": 2}, - timeout=30, + timeout=120, ) assert node1.query("SELECT sum(value) FROM replicated_mt") == "2100000\n" From e4f20d1cf11e58f8020871284079b21c7cf72304 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 26 Jul 2024 12:07:29 +0000 Subject: [PATCH 010/121] Backport #67130 to 24.7: Attempt to fix flakiness of some window view tests --- .../queries/0_stateless/01052_window_view_proc_tumble_to_now.sh | 1 + tests/queries/0_stateless/01053_window_view_proc_hop_to_now.sh | 1 + tests/queries/0_stateless/01054_window_view_proc_tumble_to.sh | 1 + tests/queries/0_stateless/01055_window_view_proc_hop_to.sh | 1 + .../0_stateless/01075_window_view_proc_tumble_to_now_populate.sh | 1 + 5 files changed, 5 insertions(+) diff --git a/tests/queries/0_stateless/01052_window_view_proc_tumble_to_now.sh b/tests/queries/0_stateless/01052_window_view_proc_tumble_to_now.sh index 4325ebeed24..5c70806ea7b 100755 --- a/tests/queries/0_stateless/01052_window_view_proc_tumble_to_now.sh +++ b/tests/queries/0_stateless/01052_window_view_proc_tumble_to_now.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: no-random-settings, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01053_window_view_proc_hop_to_now.sh b/tests/queries/0_stateless/01053_window_view_proc_hop_to_now.sh index 8e28995980f..32c9c52ab09 100755 --- a/tests/queries/0_stateless/01053_window_view_proc_hop_to_now.sh +++ b/tests/queries/0_stateless/01053_window_view_proc_hop_to_now.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: no-random-settings, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01054_window_view_proc_tumble_to.sh b/tests/queries/0_stateless/01054_window_view_proc_tumble_to.sh index ee11b265ecd..ba566bb4ae6 100755 --- a/tests/queries/0_stateless/01054_window_view_proc_tumble_to.sh +++ b/tests/queries/0_stateless/01054_window_view_proc_tumble_to.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: no-random-settings, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01055_window_view_proc_hop_to.sh b/tests/queries/0_stateless/01055_window_view_proc_hop_to.sh index ea8ad372617..0db4173b3dc 100755 --- a/tests/queries/0_stateless/01055_window_view_proc_hop_to.sh +++ b/tests/queries/0_stateless/01055_window_view_proc_hop_to.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: no-random-settings, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01075_window_view_proc_tumble_to_now_populate.sh b/tests/queries/0_stateless/01075_window_view_proc_tumble_to_now_populate.sh index f7842af4dad..67c249a9d0e 100755 --- a/tests/queries/0_stateless/01075_window_view_proc_tumble_to_now_populate.sh +++ b/tests/queries/0_stateless/01075_window_view_proc_tumble_to_now_populate.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: no-random-settings, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From 0b1bae146d8ed257ca69a4353c415e6f95d75e2d Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 26 Jul 2024 12:10:02 +0000 Subject: [PATCH 011/121] Backport #67129 to 24.7: Fix truncate database --- src/Interpreters/InterpreterDropQuery.cpp | 3 +-- tests/queries/0_stateless/02842_truncate_database.reference | 2 ++ tests/queries/0_stateless/02842_truncate_database.sql | 4 ++++ 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp index b68b3ddcd48..bad3e5277db 100644 --- a/src/Interpreters/InterpreterDropQuery.cpp +++ b/src/Interpreters/InterpreterDropQuery.cpp @@ -399,10 +399,9 @@ BlockIO InterpreterDropQuery::executeToDatabaseImpl(const ASTDropQuery & query, if (query.if_empty) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "DROP IF EMPTY is not implemented for databases"); - if (database->hasReplicationThread()) + if (!truncate && database->hasReplicationThread()) database->stopReplication(); - if (database->shouldBeEmptyOnDetach()) { /// Cancel restarting replicas in that database, wait for remaining RESTART queries to finish. diff --git a/tests/queries/0_stateless/02842_truncate_database.reference b/tests/queries/0_stateless/02842_truncate_database.reference index 71f52bcd1da..bc8c0210d27 100644 --- a/tests/queries/0_stateless/02842_truncate_database.reference +++ b/tests/queries/0_stateless/02842_truncate_database.reference @@ -20,3 +20,5 @@ source_table_stripe_log source_table_tiny_log === DICTIONARIES IN test_truncate_database === dest_dictionary +new tables +new_table diff --git a/tests/queries/0_stateless/02842_truncate_database.sql b/tests/queries/0_stateless/02842_truncate_database.sql index 09ac844cfe2..bcd818f55ba 100644 --- a/tests/queries/0_stateless/02842_truncate_database.sql +++ b/tests/queries/0_stateless/02842_truncate_database.sql @@ -73,4 +73,8 @@ SELECT * FROM dest_dictionary; -- {serverError UNKNOWN_TABLE} SHOW TABLES FROM test_truncate_database; SHOW DICTIONARIES FROM test_truncate_database; +CREATE TABLE new_table (x UInt16) ENGINE = MergeTree ORDER BY x; +select 'new tables'; +SHOW TABLES FROM test_truncate_database; + DROP DATABASE test_truncate_database; From 4f6124d4dd9384767cad4e601b2c732dd3c553e7 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 26 Jul 2024 12:10:27 +0000 Subject: [PATCH 012/121] Backport #67070 to 24.7: Fix flaky `test_seekable_formats_url` and `test_seekable_formats` S3 storage tests --- tests/integration/test_storage_s3/test.py | 53 ++++++++++------------- 1 file changed, 23 insertions(+), 30 deletions(-) diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index 40cbf4b44a6..d634479e332 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -1127,31 +1127,28 @@ def test_url_reconnect_in_the_middle(started_cluster): assert result == "1000000\t3914219105369203805\n" -def test_seekable_formats(started_cluster): - bucket = started_cluster.minio_bucket +# At the time of writing the actual read bytes are respectively 148 and 169, so -10% to not be flaky +@pytest.mark.parametrize( + "format_name,expected_bytes_read", [("Parquet", 133), ("ORC", 150)] +) +def test_seekable_formats(started_cluster, format_name, expected_bytes_read): + expected_lines = 1500000 instance = started_cluster.instances["dummy"] # type: ClickHouseInstance - table_function = f"s3(s3_parquet, structure='a Int32, b String', format='Parquet')" + table_function = f"s3(s3_{format_name.lower()}, structure='a Int32, b String', format='{format_name}')" exec_query_with_retry( instance, - f"insert into table function {table_function} SELECT number, randomString(100) FROM numbers(1000000) settings s3_truncate_on_insert=1", - timeout=100, + f"INSERT INTO TABLE FUNCTION {table_function} SELECT number, randomString(100) FROM numbers({expected_lines}) settings s3_truncate_on_insert=1", + timeout=300, ) result = instance.query(f"SELECT count() FROM {table_function}") - assert int(result) == 1000000 - - table_function = f"s3(s3_orc, structure='a Int32, b String', format='ORC')" - exec_query_with_retry( - instance, - f"insert into table function {table_function} SELECT number, randomString(100) FROM numbers(1500000) settings s3_truncate_on_insert=1", - timeout=100, - ) + assert int(result) == expected_lines result = instance.query( f"SELECT count() FROM {table_function} SETTINGS max_memory_usage='60M', max_download_threads=1" ) - assert int(result) == 1500000 + assert int(result) == expected_lines instance.query(f"SELECT * FROM {table_function} FORMAT Null") @@ -1162,35 +1159,31 @@ def test_seekable_formats(started_cluster): result = result.strip() assert result.endswith("MiB") result = result[: result.index(".")] - assert int(result) > 150 + assert int(result) > 140 -def test_seekable_formats_url(started_cluster): +@pytest.mark.parametrize("format_name", ["Parquet", "ORC"]) +def test_seekable_formats_url(started_cluster, format_name): bucket = started_cluster.minio_bucket + expected_lines = 1500000 instance = started_cluster.instances["dummy"] # type: ClickHouseInstance - table_function = f"s3(s3_parquet, structure='a Int32, b String', format='Parquet')" + format_name_lower = format_name.lower() + table_function = f"s3(s3_{format_name_lower}, structure='a Int32, b String', format='{format_name}')" exec_query_with_retry( instance, - f"insert into table function {table_function} SELECT number, randomString(100) FROM numbers(1500000) settings s3_truncate_on_insert=1", - timeout=100, + f"INSERT INTO TABLE FUNCTION {table_function} SELECT number, randomString(100) FROM numbers({expected_lines}) settings s3_truncate_on_insert=1", + timeout=300, ) result = instance.query(f"SELECT count() FROM {table_function}") - assert int(result) == 1500000 + assert int(result) == expected_lines - table_function = f"s3(s3_orc, structure='a Int32, b String', format='ORC')" - exec_query_with_retry( - instance, - f"insert into table function {table_function} SELECT number, randomString(100) FROM numbers(1500000) settings s3_truncate_on_insert=1", - timeout=100, - ) - - table_function = f"url('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_parquet', 'Parquet', 'a Int32, b String')" + url_function = f"url('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_{format_name_lower}', '{format_name}', 'a Int32, b String')" result = instance.query( - f"SELECT count() FROM {table_function} SETTINGS max_memory_usage='60M'" + f"SELECT count() FROM {url_function} SETTINGS max_memory_usage='60M'" ) - assert int(result) == 1500000 + assert int(result) == expected_lines def test_empty_file(started_cluster): From 2556e8071f421e5b1d4fe68257d35f8a2d95f7a0 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 26 Jul 2024 12:10:48 +0000 Subject: [PATCH 013/121] Backport #67049 to 24.7: Increase max allocation size for sanitizers --- docker/test/base/Dockerfile | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/docker/test/base/Dockerfile b/docker/test/base/Dockerfile index 2317f84e0cb..a81826ed6b5 100644 --- a/docker/test/base/Dockerfile +++ b/docker/test/base/Dockerfile @@ -23,15 +23,17 @@ RUN apt-get update \ # and MEMORY_LIMIT_EXCEEDED exceptions in Functional tests (total memory limit in Functional tests is ~55.24 GiB). # TSAN will flush shadow memory when reaching this limit. # It may cause false-negatives, but it's better than OOM. -RUN echo "TSAN_OPTIONS='verbosity=1000 halt_on_error=1 abort_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1'" >> /etc/environment -RUN echo "UBSAN_OPTIONS='print_stacktrace=1'" >> /etc/environment -RUN echo "MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1'" >> /etc/environment -RUN echo "LSAN_OPTIONS='suppressions=/usr/share/clickhouse-test/config/lsan_suppressions.txt'" >> /etc/environment +# max_allocation_size_mb is set to 32GB, so we have much bigger chance to run into memory limit than the limitation of the sanitizers +RUN echo "TSAN_OPTIONS='verbosity=1000 halt_on_error=1 abort_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1 max_allocation_size_mb=32768'" >> /etc/environment +RUN echo "UBSAN_OPTIONS='print_stacktrace=1 max_allocation_size_mb=32768'" >> /etc/environment +RUN echo "MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1 max_allocation_size_mb=32768'" >> /etc/environment +RUN echo "LSAN_OPTIONS='suppressions=/usr/share/clickhouse-test/config/lsan_suppressions.txt max_allocation_size_mb=32768'" >> /etc/environment # Sanitizer options for current shell (not current, but the one that will be spawned on "docker run") # (but w/o verbosity for TSAN, otherwise test.reference will not match) -ENV TSAN_OPTIONS='halt_on_error=1 abort_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1' -ENV UBSAN_OPTIONS='print_stacktrace=1' -ENV MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1' +ENV TSAN_OPTIONS='halt_on_error=1 abort_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1 max_allocation_size_mb=32768' +ENV UBSAN_OPTIONS='print_stacktrace=1 max_allocation_size_mb=32768' +ENV MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1 max_allocation_size_mb=32768' +ENV LSAN_OPTIONS='max_allocation_size_mb=32768' # for external_symbolizer_path RUN ln -s /usr/bin/llvm-symbolizer-${LLVM_VERSION} /usr/bin/llvm-symbolizer From 53304bf24d3b63d6e756a6b2ab189ffc0c4b7686 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 26 Jul 2024 12:11:12 +0000 Subject: [PATCH 014/121] Backport #66934 to 24.7: Un-flake test_runtime_configurable_cache_size --- ..._query_cache.xml => empty_query_cache.xml} | 2 +- .../test.py | 71 ++++++++++--------- ...query_cache_asynchronous_metrics.reference | 2 + ...02494_query_cache_asynchronous_metrics.sql | 13 ++++ 4 files changed, 55 insertions(+), 33 deletions(-) rename tests/integration/test_runtime_configurable_cache_size/configs/{smaller_query_cache.xml => empty_query_cache.xml} (64%) create mode 100644 tests/queries/0_stateless/02494_query_cache_asynchronous_metrics.reference create mode 100644 tests/queries/0_stateless/02494_query_cache_asynchronous_metrics.sql diff --git a/tests/integration/test_runtime_configurable_cache_size/configs/smaller_query_cache.xml b/tests/integration/test_runtime_configurable_cache_size/configs/empty_query_cache.xml similarity index 64% rename from tests/integration/test_runtime_configurable_cache_size/configs/smaller_query_cache.xml rename to tests/integration/test_runtime_configurable_cache_size/configs/empty_query_cache.xml index 6f2de0fa8f5..c4872a0ce41 100644 --- a/tests/integration/test_runtime_configurable_cache_size/configs/smaller_query_cache.xml +++ b/tests/integration/test_runtime_configurable_cache_size/configs/empty_query_cache.xml @@ -1,7 +1,7 @@ - 1 + 0 diff --git a/tests/integration/test_runtime_configurable_cache_size/test.py b/tests/integration/test_runtime_configurable_cache_size/test.py index f761005f297..beaf83ea754 100644 --- a/tests/integration/test_runtime_configurable_cache_size/test.py +++ b/tests/integration/test_runtime_configurable_cache_size/test.py @@ -94,54 +94,61 @@ CONFIG_DIR = os.path.join(SCRIPT_DIR, "configs") def test_query_cache_size_is_runtime_configurable(start_cluster): - # the initial config specifies the maximum query cache size as 2, run 3 queries, expect 2 cache entries node.query("SYSTEM DROP QUERY CACHE") + + # The initial config allows at most two query cache entries but we don't mind node.query("SELECT 1 SETTINGS use_query_cache = 1, query_cache_ttl = 1") - node.query("SELECT 2 SETTINGS use_query_cache = 1, query_cache_ttl = 1") - node.query("SELECT 3 SETTINGS use_query_cache = 1, query_cache_ttl = 1") time.sleep(2) - node.query("SYSTEM RELOAD ASYNCHRONOUS METRICS") - res = node.query( - "SELECT value FROM system.asynchronous_metrics WHERE metric = 'QueryCacheEntries'", - ) - assert res == "2\n" + # At this point, the query cache contains one entry and it is stale - # switch to a config with a maximum query cache size of 1 + res = node.query( + "SELECT count(*) FROM system.query_cache", + ) + assert res == "1\n" + + # switch to a config with a maximum query cache size of _0_ node.copy_file_to_container( - os.path.join(CONFIG_DIR, "smaller_query_cache.xml"), + os.path.join(CONFIG_DIR, "empty_query_cache.xml"), "/etc/clickhouse-server/config.d/default.xml", ) node.query("SYSTEM RELOAD CONFIG") - # check that eviction worked as expected - time.sleep(2) - node.query("SYSTEM RELOAD ASYNCHRONOUS METRICS") res = node.query( - "SELECT value FROM system.asynchronous_metrics WHERE metric = 'QueryCacheEntries'", - ) - assert ( - res == "2\n" - ) # "Why not 1?", you think. Reason is that QC uses the TTLCachePolicy that evicts lazily only upon insert. - # Not a real issue, can be changed later, at least there's a test now. - - # Also, you may also wonder "why query_cache_ttl = 1"? Reason is that TTLCachePolicy only removes *stale* entries. With the default TTL - # (60 sec), no entries would be removed at all. Again: not a real issue, can be changed later and there's at least a test now. - - # check that the new query cache maximum size is respected when more queries run - node.query("SELECT 4 SETTINGS use_query_cache = 1, query_cache_ttl = 1") - node.query("SELECT 5 SETTINGS use_query_cache = 1, query_cache_ttl = 1") - - time.sleep(2) - node.query("SYSTEM RELOAD ASYNCHRONOUS METRICS") - res = node.query( - "SELECT value FROM system.asynchronous_metrics WHERE metric = 'QueryCacheEntries'", + "SELECT count(*) FROM system.query_cache", ) assert res == "1\n" + # "Why not 0?", I hear you say. Reason is that QC uses the TTLCachePolicy that evicts lazily only upon insert. + # Not a real issue, can be changed later, at least there's a test now. - # restore the original config + # The next SELECT will find a single stale entry which is one entry too much according to the new config. + # This triggers the eviction of all stale entries, in this case the 'SELECT 1' result. + # Then, it tries to insert the 'SELECT 2' result but it also cannot be added according to the config. + node.query("SELECT 2 SETTINGS use_query_cache = 1, query_cache_ttl = 1") + res = node.query( + "SELECT count(*) FROM system.query_cache", + ) + assert res == "0\n" + + # The new maximum cache size is respected when more queries run + node.query("SELECT 3 SETTINGS use_query_cache = 1, query_cache_ttl = 1") + res = node.query( + "SELECT count(*) FROM system.query_cache", + ) + assert res == "0\n" + + # Restore the original config node.copy_file_to_container( os.path.join(CONFIG_DIR, "default.xml"), "/etc/clickhouse-server/config.d/default.xml", ) + + node.query("SYSTEM RELOAD CONFIG") + + # It is possible to insert entries again + node.query("SELECT 4 SETTINGS use_query_cache = 1, query_cache_ttl = 1") + res = node.query( + "SELECT count(*) FROM system.query_cache", + ) + assert res == "1\n" diff --git a/tests/queries/0_stateless/02494_query_cache_asynchronous_metrics.reference b/tests/queries/0_stateless/02494_query_cache_asynchronous_metrics.reference new file mode 100644 index 00000000000..6ed281c757a --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_asynchronous_metrics.reference @@ -0,0 +1,2 @@ +1 +1 diff --git a/tests/queries/0_stateless/02494_query_cache_asynchronous_metrics.sql b/tests/queries/0_stateless/02494_query_cache_asynchronous_metrics.sql new file mode 100644 index 00000000000..d8de4facb38 --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_asynchronous_metrics.sql @@ -0,0 +1,13 @@ +-- Tags: no-parallel +-- Tag no-parallel: Messes with internal cache + +SYSTEM DROP QUERY CACHE; + +-- Create an entry in the query cache +SELECT 1 SETTINGS use_query_cache = true; + +-- Asynchronous metrics must know about the entry +SYSTEM RELOAD ASYNCHRONOUS METRICS; +SELECT value FROM system.asynchronous_metrics WHERE metric = 'QueryCacheEntries'; + +SYSTEM DROP QUERY CACHE; From 1b8401b3e9e1df2510f69c628a40cf0ab1d58e78 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 26 Jul 2024 12:11:35 +0000 Subject: [PATCH 015/121] Backport #66924 to 24.7: Decrease rate limit in `01923_network_receive_time_metric_insert` --- .../0_stateless/01923_network_receive_time_metric_insert.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01923_network_receive_time_metric_insert.sh b/tests/queries/0_stateless/01923_network_receive_time_metric_insert.sh index 4d7e79fae52..a6b83eba27d 100755 --- a/tests/queries/0_stateless/01923_network_receive_time_metric_insert.sh +++ b/tests/queries/0_stateless/01923_network_receive_time_metric_insert.sh @@ -9,7 +9,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) ${CLICKHOUSE_CLIENT} --multiquery --query "DROP TABLE IF EXISTS t; CREATE TABLE t (x UInt64) ENGINE = Memory;" # Rate limit is chosen for operation to spent more than one second. -seq 1 1000 | pv --quiet --rate-limit 500 | ${CLICKHOUSE_CLIENT} --query "INSERT INTO t FORMAT TSV" +seq 1 1000 | pv --quiet --rate-limit 400 | ${CLICKHOUSE_CLIENT} --query "INSERT INTO t FORMAT TSV" # We check that the value of NetworkReceiveElapsedMicroseconds correctly includes the time spent waiting data from the client. ${CLICKHOUSE_CLIENT} --multiquery --query "SYSTEM FLUSH LOGS; From 7d3022303e35840cff1f9ba1034aaeddac9e5153 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 26 Jul 2024 13:09:52 +0000 Subject: [PATCH 016/121] Backport #66981 to 24.7: [CI Fest] Split dynamic tests and rewrite them from sh to sql to avoid timeouts --- ...ad_subcolumns_compact_merge_tree.reference | 17 + ...mic_read_subcolumns_compact_merge_tree.sql | 40 ++ ...6_dynamic_read_subcolumns_memory.reference | 17 + .../03036_dynamic_read_subcolumns_memory.sql | 40 ++ ..._read_subcolumns_wide_merge_tree.reference | 17 + ...ynamic_read_subcolumns_wide_merge_tree.sql | 40 ++ ...3037_dynamic_merges_1_horizontal.reference | 60 -- .../03037_dynamic_merges_1_horizontal.sh | 52 -- ..._1_horizontal_compact_merge_tree.reference | 28 + ...merges_1_horizontal_compact_merge_tree.sql | 33 ++ ...s_1_horizontal_compact_wide_tree.reference | 28 + ..._merges_1_horizontal_compact_wide_tree.sql | 33 ++ .../03037_dynamic_merges_1_vertical.reference | 60 -- .../03037_dynamic_merges_1_vertical.sh | 51 -- ...es_1_vertical_compact_merge_tree.reference | 28 + ...c_merges_1_vertical_compact_merge_tree.sql | 33 ++ ...erges_1_vertical_wide_merge_tree.reference | 28 + ...amic_merges_1_vertical_wide_merge_tree.sql | 33 ++ .../03037_dynamic_merges_2.reference | 20 - .../0_stateless/03037_dynamic_merges_2.sh | 45 -- ..._2_horizontal_compact_merge_tree.reference | 3 + ...merges_2_horizontal_compact_merge_tree.sql | 14 + ...ges_2_horizontal_wide_merge_tree.reference | 3 + ...ic_merges_2_horizontal_wide_merge_tree.sql | 14 + ...es_2_vertical_compact_merge_tree.reference | 3 + ...c_merges_2_vertical_compact_merge_tree.sql | 14 + ...erges_2_vertical_wide_merge_tree.reference | 3 + ...amic_merges_2_vertical_wide_merge_tree.sql | 14 + ...ested_dynamic_merges_compact_horizontal.sh | 32 -- ...sted_dynamic_merges_compact_horizontal.sql | 29 + ..._nested_dynamic_merges_compact_vertical.sh | 32 -- ...nested_dynamic_merges_compact_vertical.sql | 29 + ...8_nested_dynamic_merges_wide_horizontal.sh | 32 -- ..._nested_dynamic_merges_wide_horizontal.sql | 29 + ...038_nested_dynamic_merges_wide_vertical.sh | 32 -- ...38_nested_dynamic_merges_wide_vertical.sql | 29 + .../03040_dynamic_type_alters_1.reference | 526 ------------------ .../03040_dynamic_type_alters_1.sh | 77 --- ...type_alters_1_compact_merge_tree.reference | 174 ++++++ ...namic_type_alters_1_compact_merge_tree.sql | 53 ++ ...040_dynamic_type_alters_1_memory.reference | 175 ++++++ .../03040_dynamic_type_alters_1_memory.sql | 53 ++ ...ic_type_alters_1_wide_merge_tree.reference | 174 ++++++ ..._dynamic_type_alters_1_wide_merge_tree.sql | 53 ++ .../03040_dynamic_type_alters_2.reference | 182 ------ .../03040_dynamic_type_alters_2.sh | 57 -- ...type_alters_2_compact_merge_tree.reference | 90 +++ ...namic_type_alters_2_compact_merge_tree.sql | 39 ++ ...ic_type_alters_2_wide_merge_tree.reference | 90 +++ ..._dynamic_type_alters_2_wide_merge_tree.sql | 39 ++ 50 files changed, 1539 insertions(+), 1258 deletions(-) create mode 100644 tests/queries/0_stateless/03036_dynamic_read_subcolumns_compact_merge_tree.reference create mode 100644 tests/queries/0_stateless/03036_dynamic_read_subcolumns_compact_merge_tree.sql create mode 100644 tests/queries/0_stateless/03036_dynamic_read_subcolumns_memory.reference create mode 100644 tests/queries/0_stateless/03036_dynamic_read_subcolumns_memory.sql create mode 100644 tests/queries/0_stateless/03036_dynamic_read_subcolumns_wide_merge_tree.reference create mode 100644 tests/queries/0_stateless/03036_dynamic_read_subcolumns_wide_merge_tree.sql delete mode 100644 tests/queries/0_stateless/03037_dynamic_merges_1_horizontal.reference delete mode 100755 tests/queries/0_stateless/03037_dynamic_merges_1_horizontal.sh create mode 100644 tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.reference create mode 100644 tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.sql create mode 100644 tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_wide_tree.reference create mode 100644 tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_wide_tree.sql delete mode 100644 tests/queries/0_stateless/03037_dynamic_merges_1_vertical.reference delete mode 100755 tests/queries/0_stateless/03037_dynamic_merges_1_vertical.sh create mode 100644 tests/queries/0_stateless/03037_dynamic_merges_1_vertical_compact_merge_tree.reference create mode 100644 tests/queries/0_stateless/03037_dynamic_merges_1_vertical_compact_merge_tree.sql create mode 100644 tests/queries/0_stateless/03037_dynamic_merges_1_vertical_wide_merge_tree.reference create mode 100644 tests/queries/0_stateless/03037_dynamic_merges_1_vertical_wide_merge_tree.sql delete mode 100644 tests/queries/0_stateless/03037_dynamic_merges_2.reference delete mode 100755 tests/queries/0_stateless/03037_dynamic_merges_2.sh create mode 100644 tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_compact_merge_tree.reference create mode 100644 tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_compact_merge_tree.sql create mode 100644 tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_wide_merge_tree.reference create mode 100644 tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_wide_merge_tree.sql create mode 100644 tests/queries/0_stateless/03037_dynamic_merges_2_vertical_compact_merge_tree.reference create mode 100644 tests/queries/0_stateless/03037_dynamic_merges_2_vertical_compact_merge_tree.sql create mode 100644 tests/queries/0_stateless/03037_dynamic_merges_2_vertical_wide_merge_tree.reference create mode 100644 tests/queries/0_stateless/03037_dynamic_merges_2_vertical_wide_merge_tree.sql delete mode 100755 tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.sh create mode 100644 tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.sql delete mode 100755 tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.sh create mode 100644 tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.sql delete mode 100755 tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.sh create mode 100644 tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.sql delete mode 100755 tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.sh create mode 100644 tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.sql delete mode 100644 tests/queries/0_stateless/03040_dynamic_type_alters_1.reference delete mode 100755 tests/queries/0_stateless/03040_dynamic_type_alters_1.sh create mode 100644 tests/queries/0_stateless/03040_dynamic_type_alters_1_compact_merge_tree.reference create mode 100644 tests/queries/0_stateless/03040_dynamic_type_alters_1_compact_merge_tree.sql create mode 100644 tests/queries/0_stateless/03040_dynamic_type_alters_1_memory.reference create mode 100644 tests/queries/0_stateless/03040_dynamic_type_alters_1_memory.sql create mode 100644 tests/queries/0_stateless/03040_dynamic_type_alters_1_wide_merge_tree.reference create mode 100644 tests/queries/0_stateless/03040_dynamic_type_alters_1_wide_merge_tree.sql delete mode 100644 tests/queries/0_stateless/03040_dynamic_type_alters_2.reference delete mode 100755 tests/queries/0_stateless/03040_dynamic_type_alters_2.sh create mode 100644 tests/queries/0_stateless/03040_dynamic_type_alters_2_compact_merge_tree.reference create mode 100644 tests/queries/0_stateless/03040_dynamic_type_alters_2_compact_merge_tree.sql create mode 100644 tests/queries/0_stateless/03040_dynamic_type_alters_2_wide_merge_tree.reference create mode 100644 tests/queries/0_stateless/03040_dynamic_type_alters_2_wide_merge_tree.sql diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_compact_merge_tree.reference b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_compact_merge_tree.reference new file mode 100644 index 00000000000..d75d75896f7 --- /dev/null +++ b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_compact_merge_tree.reference @@ -0,0 +1,17 @@ +Array(Array(Dynamic)) +Array(Variant(String, UInt64)) +None +String +UInt64 +200000 +200000 +200000 +200000 +0 +0 +200000 +200000 +100000 +100000 +200000 +0 diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_compact_merge_tree.sql b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_compact_merge_tree.sql new file mode 100644 index 00000000000..66fbf006a8c --- /dev/null +++ b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_compact_merge_tree.sql @@ -0,0 +1,40 @@ +-- Tags: long + +set allow_experimental_variant_type = 1; +set use_variant_as_common_type = 1; +set allow_experimental_dynamic_type = 1; + +drop table if exists test; +create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000; + +insert into test select number, number from numbers(100000) settings min_insert_block_size_rows=50000; +insert into test select number, 'str_' || toString(number) from numbers(100000, 100000) settings min_insert_block_size_rows=50000; +insert into test select number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1)) from numbers(200000, 100000) settings min_insert_block_size_rows=50000; +insert into test select number, NULL from numbers(300000, 100000) settings min_insert_block_size_rows=50000; +insert into test select number, multiIf(number % 4 == 3, 'str_' || toString(number), number % 4 == 2, NULL, number % 4 == 1, number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1))) from numbers(400000, 400000) settings min_insert_block_size_rows=50000; +insert into test select number, [range((number % 10 + 1)::UInt64)]::Array(Array(Dynamic)) from numbers(100000, 100000) settings min_insert_block_size_rows=50000; + +select distinct dynamicType(d) as type from test order by type; +select count() from test where dynamicType(d) == 'UInt64'; +select count() from test where d.UInt64 is not NULL; +select count() from test where dynamicType(d) == 'String'; +select count() from test where d.String is not NULL; +select count() from test where dynamicType(d) == 'Date'; +select count() from test where d.Date is not NULL; +select count() from test where dynamicType(d) == 'Array(Variant(String, UInt64))'; +select count() from test where not empty(d.`Array(Variant(String, UInt64))`); +select count() from test where dynamicType(d) == 'Array(Array(Dynamic))'; +select count() from test where not empty(d.`Array(Array(Dynamic))`); +select count() from test where d is NULL; +select count() from test where not empty(d.`Tuple(a Array(Dynamic))`.a.String); + +select d, d.UInt64, d.String, d.`Array(Variant(String, UInt64))` from test format Null; +select d.UInt64, d.String, d.`Array(Variant(String, UInt64))` from test format Null; +select d.Int8, d.Date, d.`Array(String)` from test format Null; +select d, d.UInt64, d.Date, d.`Array(Variant(String, UInt64))`, d.`Array(Variant(String, UInt64))`.size0, d.`Array(Variant(String, UInt64))`.UInt64 from test format Null; +select d.UInt64, d.Date, d.`Array(Variant(String, UInt64))`, d.`Array(Variant(String, UInt64))`.size0, d.`Array(Variant(String, UInt64))`.UInt64, d.`Array(Variant(String, UInt64))`.String from test format Null; +select d, d.`Tuple(a UInt64, b String)`.a, d.`Array(Dynamic)`.`Variant(String, UInt64)`.UInt64, d.`Array(Variant(String, UInt64))`.UInt64 from test format Null; +select d.`Array(Dynamic)`.`Variant(String, UInt64)`.UInt64, d.`Array(Dynamic)`.size0, d.`Array(Variant(String, UInt64))`.UInt64 from test format Null; +select d.`Array(Array(Dynamic))`.size1, d.`Array(Array(Dynamic))`.UInt64, d.`Array(Array(Dynamic))`.`Map(String, Tuple(a UInt64))`.values.a from test format Null; + +drop table test; diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_memory.reference b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_memory.reference new file mode 100644 index 00000000000..d75d75896f7 --- /dev/null +++ b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_memory.reference @@ -0,0 +1,17 @@ +Array(Array(Dynamic)) +Array(Variant(String, UInt64)) +None +String +UInt64 +200000 +200000 +200000 +200000 +0 +0 +200000 +200000 +100000 +100000 +200000 +0 diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_memory.sql b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_memory.sql new file mode 100644 index 00000000000..bb03bdef704 --- /dev/null +++ b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_memory.sql @@ -0,0 +1,40 @@ +-- Tags: long + +set allow_experimental_variant_type = 1; +set use_variant_as_common_type = 1; +set allow_experimental_dynamic_type = 1; + +drop table if exists test; +create table test (id UInt64, d Dynamic) engine=Memory; + +insert into test select number, number from numbers(100000) settings min_insert_block_size_rows=50000; +insert into test select number, 'str_' || toString(number) from numbers(100000, 100000) settings min_insert_block_size_rows=50000; +insert into test select number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1)) from numbers(200000, 100000) settings min_insert_block_size_rows=50000; +insert into test select number, NULL from numbers(300000, 100000) settings min_insert_block_size_rows=50000; +insert into test select number, multiIf(number % 4 == 3, 'str_' || toString(number), number % 4 == 2, NULL, number % 4 == 1, number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1))) from numbers(400000, 400000) settings min_insert_block_size_rows=50000; +insert into test select number, [range((number % 10 + 1)::UInt64)]::Array(Array(Dynamic)) from numbers(100000, 100000) settings min_insert_block_size_rows=50000; + +select distinct dynamicType(d) as type from test order by type; +select count() from test where dynamicType(d) == 'UInt64'; +select count() from test where d.UInt64 is not NULL; +select count() from test where dynamicType(d) == 'String'; +select count() from test where d.String is not NULL; +select count() from test where dynamicType(d) == 'Date'; +select count() from test where d.Date is not NULL; +select count() from test where dynamicType(d) == 'Array(Variant(String, UInt64))'; +select count() from test where not empty(d.`Array(Variant(String, UInt64))`); +select count() from test where dynamicType(d) == 'Array(Array(Dynamic))'; +select count() from test where not empty(d.`Array(Array(Dynamic))`); +select count() from test where d is NULL; +select count() from test where not empty(d.`Tuple(a Array(Dynamic))`.a.String); + +select d, d.UInt64, d.String, d.`Array(Variant(String, UInt64))` from test format Null; +select d.UInt64, d.String, d.`Array(Variant(String, UInt64))` from test format Null; +select d.Int8, d.Date, d.`Array(String)` from test format Null; +select d, d.UInt64, d.Date, d.`Array(Variant(String, UInt64))`, d.`Array(Variant(String, UInt64))`.size0, d.`Array(Variant(String, UInt64))`.UInt64 from test format Null; +select d.UInt64, d.Date, d.`Array(Variant(String, UInt64))`, d.`Array(Variant(String, UInt64))`.size0, d.`Array(Variant(String, UInt64))`.UInt64, d.`Array(Variant(String, UInt64))`.String from test format Null; +select d, d.`Tuple(a UInt64, b String)`.a, d.`Array(Dynamic)`.`Variant(String, UInt64)`.UInt64, d.`Array(Variant(String, UInt64))`.UInt64 from test format Null; +select d.`Array(Dynamic)`.`Variant(String, UInt64)`.UInt64, d.`Array(Dynamic)`.size0, d.`Array(Variant(String, UInt64))`.UInt64 from test format Null; +select d.`Array(Array(Dynamic))`.size1, d.`Array(Array(Dynamic))`.UInt64, d.`Array(Array(Dynamic))`.`Map(String, Tuple(a UInt64))`.values.a from test format Null; + +drop table test; diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_wide_merge_tree.reference b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_wide_merge_tree.reference new file mode 100644 index 00000000000..d75d75896f7 --- /dev/null +++ b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_wide_merge_tree.reference @@ -0,0 +1,17 @@ +Array(Array(Dynamic)) +Array(Variant(String, UInt64)) +None +String +UInt64 +200000 +200000 +200000 +200000 +0 +0 +200000 +200000 +100000 +100000 +200000 +0 diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_wide_merge_tree.sql b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_wide_merge_tree.sql new file mode 100644 index 00000000000..00aba3a57b6 --- /dev/null +++ b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_wide_merge_tree.sql @@ -0,0 +1,40 @@ +-- Tags: long + +set allow_experimental_variant_type = 1; +set use_variant_as_common_type = 1; +set allow_experimental_dynamic_type = 1; + +drop table if exists test; +create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1; + +insert into test select number, number from numbers(100000) settings min_insert_block_size_rows=50000; +insert into test select number, 'str_' || toString(number) from numbers(100000, 100000) settings min_insert_block_size_rows=50000; +insert into test select number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1)) from numbers(200000, 100000) settings min_insert_block_size_rows=50000; +insert into test select number, NULL from numbers(300000, 100000) settings min_insert_block_size_rows=50000; +insert into test select number, multiIf(number % 4 == 3, 'str_' || toString(number), number % 4 == 2, NULL, number % 4 == 1, number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1))) from numbers(400000, 400000) settings min_insert_block_size_rows=50000; +insert into test select number, [range((number % 10 + 1)::UInt64)]::Array(Array(Dynamic)) from numbers(100000, 100000) settings min_insert_block_size_rows=50000; + +select distinct dynamicType(d) as type from test order by type; +select count() from test where dynamicType(d) == 'UInt64'; +select count() from test where d.UInt64 is not NULL; +select count() from test where dynamicType(d) == 'String'; +select count() from test where d.String is not NULL; +select count() from test where dynamicType(d) == 'Date'; +select count() from test where d.Date is not NULL; +select count() from test where dynamicType(d) == 'Array(Variant(String, UInt64))'; +select count() from test where not empty(d.`Array(Variant(String, UInt64))`); +select count() from test where dynamicType(d) == 'Array(Array(Dynamic))'; +select count() from test where not empty(d.`Array(Array(Dynamic))`); +select count() from test where d is NULL; +select count() from test where not empty(d.`Tuple(a Array(Dynamic))`.a.String); + +select d, d.UInt64, d.String, d.`Array(Variant(String, UInt64))` from test format Null; +select d.UInt64, d.String, d.`Array(Variant(String, UInt64))` from test format Null; +select d.Int8, d.Date, d.`Array(String)` from test format Null; +select d, d.UInt64, d.Date, d.`Array(Variant(String, UInt64))`, d.`Array(Variant(String, UInt64))`.size0, d.`Array(Variant(String, UInt64))`.UInt64 from test format Null; +select d.UInt64, d.Date, d.`Array(Variant(String, UInt64))`, d.`Array(Variant(String, UInt64))`.size0, d.`Array(Variant(String, UInt64))`.UInt64, d.`Array(Variant(String, UInt64))`.String from test format Null; +select d, d.`Tuple(a UInt64, b String)`.a, d.`Array(Dynamic)`.`Variant(String, UInt64)`.UInt64, d.`Array(Variant(String, UInt64))`.UInt64 from test format Null; +select d.`Array(Dynamic)`.`Variant(String, UInt64)`.UInt64, d.`Array(Dynamic)`.size0, d.`Array(Variant(String, UInt64))`.UInt64 from test format Null; +select d.`Array(Array(Dynamic))`.size1, d.`Array(Array(Dynamic))`.UInt64, d.`Array(Array(Dynamic))`.`Map(String, Tuple(a UInt64))`.values.a from test format Null; + +drop table test; diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal.reference b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal.reference deleted file mode 100644 index 59297e46330..00000000000 --- a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal.reference +++ /dev/null @@ -1,60 +0,0 @@ -MergeTree compact -test -50000 DateTime -60000 Date -70000 Array(UInt16) -80000 String -100000 None -100000 UInt64 -70000 Array(UInt16) -100000 None -100000 UInt64 -190000 String -70000 Array(UInt16) -100000 None -100000 UInt64 -190000 String -200000 Map(UInt64, UInt64) -100000 None -100000 UInt64 -200000 Map(UInt64, UInt64) -260000 String -10000 Tuple(UInt64, UInt64) -100000 None -100000 UInt64 -200000 Map(UInt64, UInt64) -260000 String -100000 None -100000 UInt64 -200000 Map(UInt64, UInt64) -270000 String -MergeTree wide -test -50000 DateTime -60000 Date -70000 Array(UInt16) -80000 String -100000 None -100000 UInt64 -70000 Array(UInt16) -100000 None -100000 UInt64 -190000 String -70000 Array(UInt16) -100000 None -100000 UInt64 -190000 String -200000 Map(UInt64, UInt64) -100000 None -100000 UInt64 -200000 Map(UInt64, UInt64) -260000 String -10000 Tuple(UInt64, UInt64) -100000 None -100000 UInt64 -200000 Map(UInt64, UInt64) -260000 String -100000 None -100000 UInt64 -200000 Map(UInt64, UInt64) -270000 String diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal.sh b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal.sh deleted file mode 100755 index 887b2ed94d7..00000000000 --- a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal.sh +++ /dev/null @@ -1,52 +0,0 @@ -#!/usr/bin/env bash -# Tags: long - -CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# reset --log_comment -CLICKHOUSE_LOG_COMMENT= -# shellcheck source=../shell_config.sh -. "$CUR_DIR"/../shell_config.sh - - -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1" - -function test() -{ - echo "test" - $CH_CLIENT -q "system stop merges test" - $CH_CLIENT -q "insert into test select number, number from numbers(100000)" - $CH_CLIENT -q "insert into test select number, 'str_' || toString(number) from numbers(80000)" - $CH_CLIENT -q "insert into test select number, range(number % 10 + 1) from numbers(70000)" - $CH_CLIENT -q "insert into test select number, toDate(number) from numbers(60000)" - $CH_CLIENT -q "insert into test select number, toDateTime(number) from numbers(50000)" - $CH_CLIENT -q "insert into test select number, NULL from numbers(100000)" - - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" - $CH_CLIENT -nm -q "system start merges test; optimize table test final;" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" - - $CH_CLIENT -q "system stop merges test" - $CH_CLIENT -q "insert into test select number, map(number, number) from numbers(200000)" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" - $CH_CLIENT -nm -q "system start merges test; optimize table test final;" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" - - $CH_CLIENT -q "system stop merges test" - $CH_CLIENT -q "insert into test select number, tuple(number, number) from numbers(10000)" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" - $CH_CLIENT -nm -q "system start merges test; optimize table test final;" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" -} - -$CH_CLIENT -q "drop table if exists test;" - -echo "MergeTree compact" -$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_columns_to_activate=10, index_granularity_bytes=10485760, index_granularity=8192, merge_max_block_size=8192, merge_max_block_size_bytes=10485760;" -test -$CH_CLIENT -q "drop table test;" - -echo "MergeTree wide" -$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_columns_to_activate=10, index_granularity_bytes=10485760, index_granularity=8192, merge_max_block_size=8192, merge_max_block_size_bytes=10485760;" -test -$CH_CLIENT -q "drop table test;" - diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.reference b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.reference new file mode 100644 index 00000000000..d0d777a5a38 --- /dev/null +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.reference @@ -0,0 +1,28 @@ +50000 DateTime +60000 Date +70000 Array(UInt16) +80000 String +100000 None +100000 UInt64 +70000 Array(UInt16) +100000 None +100000 UInt64 +190000 String +70000 Array(UInt16) +100000 None +100000 UInt64 +190000 String +200000 Map(UInt64, UInt64) +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +260000 String +10000 Tuple(UInt64, UInt64) +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +260000 String +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +270000 String diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.sql new file mode 100644 index 00000000000..b66fe5e2187 --- /dev/null +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.sql @@ -0,0 +1,33 @@ +-- Tags: long +set allow_experimental_dynamic_type=1; + +drop table if exists test; +create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_columns_to_activate=10, index_granularity_bytes=10485760, index_granularity=8192, merge_max_block_size=8192, merge_max_block_size_bytes=10485760; + +system stop merges test; +insert into test select number, number from numbers(100000); +insert into test select number, 'str_' || toString(number) from numbers(80000); +insert into test select number, range(number % 10 + 1) from numbers(70000); +insert into test select number, toDate(number) from numbers(60000); +insert into test select number, toDateTime(number) from numbers(50000); +insert into test select number, NULL from numbers(100000); + +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +system start merges test; optimize table test final;; +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); + +system stop merges test; +insert into test select number, map(number, number) from numbers(200000); +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +system start merges test; +optimize table test final; +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); + +system stop merges test; +insert into test select number, tuple(number, number) from numbers(10000); +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +system start merges test; +optimize table test final; +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); + +drop table test; \ No newline at end of file diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_wide_tree.reference b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_wide_tree.reference new file mode 100644 index 00000000000..d0d777a5a38 --- /dev/null +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_wide_tree.reference @@ -0,0 +1,28 @@ +50000 DateTime +60000 Date +70000 Array(UInt16) +80000 String +100000 None +100000 UInt64 +70000 Array(UInt16) +100000 None +100000 UInt64 +190000 String +70000 Array(UInt16) +100000 None +100000 UInt64 +190000 String +200000 Map(UInt64, UInt64) +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +260000 String +10000 Tuple(UInt64, UInt64) +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +260000 String +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +270000 String diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_wide_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_wide_tree.sql new file mode 100644 index 00000000000..8a376b6d7d7 --- /dev/null +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_wide_tree.sql @@ -0,0 +1,33 @@ +-- Tags: long +set allow_experimental_dynamic_type=1; + +drop table if exists test; +create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_columns_to_activate=10, index_granularity_bytes=10485760, index_granularity=8192, merge_max_block_size=8192, merge_max_block_size_bytes=10485760; + +system stop merges test; +insert into test select number, number from numbers(100000); +insert into test select number, 'str_' || toString(number) from numbers(80000); +insert into test select number, range(number % 10 + 1) from numbers(70000); +insert into test select number, toDate(number) from numbers(60000); +insert into test select number, toDateTime(number) from numbers(50000); +insert into test select number, NULL from numbers(100000); + +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +system start merges test; optimize table test final;; +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); + +system stop merges test; +insert into test select number, map(number, number) from numbers(200000); +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +system start merges test; +optimize table test final; +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); + +system stop merges test; +insert into test select number, tuple(number, number) from numbers(10000); +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +system start merges test; +optimize table test final; +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); + +drop table test; \ No newline at end of file diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical.reference b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical.reference deleted file mode 100644 index 59297e46330..00000000000 --- a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical.reference +++ /dev/null @@ -1,60 +0,0 @@ -MergeTree compact -test -50000 DateTime -60000 Date -70000 Array(UInt16) -80000 String -100000 None -100000 UInt64 -70000 Array(UInt16) -100000 None -100000 UInt64 -190000 String -70000 Array(UInt16) -100000 None -100000 UInt64 -190000 String -200000 Map(UInt64, UInt64) -100000 None -100000 UInt64 -200000 Map(UInt64, UInt64) -260000 String -10000 Tuple(UInt64, UInt64) -100000 None -100000 UInt64 -200000 Map(UInt64, UInt64) -260000 String -100000 None -100000 UInt64 -200000 Map(UInt64, UInt64) -270000 String -MergeTree wide -test -50000 DateTime -60000 Date -70000 Array(UInt16) -80000 String -100000 None -100000 UInt64 -70000 Array(UInt16) -100000 None -100000 UInt64 -190000 String -70000 Array(UInt16) -100000 None -100000 UInt64 -190000 String -200000 Map(UInt64, UInt64) -100000 None -100000 UInt64 -200000 Map(UInt64, UInt64) -260000 String -10000 Tuple(UInt64, UInt64) -100000 None -100000 UInt64 -200000 Map(UInt64, UInt64) -260000 String -100000 None -100000 UInt64 -200000 Map(UInt64, UInt64) -270000 String diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical.sh b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical.sh deleted file mode 100755 index 371ae87c2ef..00000000000 --- a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical.sh +++ /dev/null @@ -1,51 +0,0 @@ -#!/usr/bin/env bash -# Tags: long - -CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# reset --log_comment -CLICKHOUSE_LOG_COMMENT= -# shellcheck source=../shell_config.sh -. "$CUR_DIR"/../shell_config.sh - - - -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1" -function test() -{ - echo "test" - $CH_CLIENT -q "system stop merges test" - $CH_CLIENT -q "insert into test select number, number from numbers(100000)" - $CH_CLIENT -q "insert into test select number, 'str_' || toString(number) from numbers(80000)" - $CH_CLIENT -q "insert into test select number, range(number % 10 + 1) from numbers(70000)" - $CH_CLIENT -q "insert into test select number, toDate(number) from numbers(60000)" - $CH_CLIENT -q "insert into test select number, toDateTime(number) from numbers(50000)" - $CH_CLIENT -q "insert into test select number, NULL from numbers(100000)" - - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" - $CH_CLIENT -nm -q "system start merges test; optimize table test final;" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" - - $CH_CLIENT -q "system stop merges test" - $CH_CLIENT -q "insert into test select number, map(number, number) from numbers(200000)" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" - $CH_CLIENT -nm -q "system start merges test; optimize table test final;" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" - - $CH_CLIENT -q "system stop merges test" - $CH_CLIENT -q "insert into test select number, tuple(number, number) from numbers(10000)" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" - $CH_CLIENT -nm -q "system start merges test; optimize table test final;" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" -} - -$CH_CLIENT -q "drop table if exists test;" - -echo "MergeTree compact" -$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1, index_granularity_bytes=10485760, index_granularity=8192, merge_max_block_size=8192, merge_max_block_size_bytes=10485760;" -test -$CH_CLIENT -q "drop table test;" - -echo "MergeTree wide" -$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1, index_granularity_bytes=10485760, index_granularity=8192, merge_max_block_size=8192, merge_max_block_size_bytes=10485760;" -test -$CH_CLIENT -q "drop table test;" diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_compact_merge_tree.reference b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_compact_merge_tree.reference new file mode 100644 index 00000000000..d0d777a5a38 --- /dev/null +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_compact_merge_tree.reference @@ -0,0 +1,28 @@ +50000 DateTime +60000 Date +70000 Array(UInt16) +80000 String +100000 None +100000 UInt64 +70000 Array(UInt16) +100000 None +100000 UInt64 +190000 String +70000 Array(UInt16) +100000 None +100000 UInt64 +190000 String +200000 Map(UInt64, UInt64) +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +260000 String +10000 Tuple(UInt64, UInt64) +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +260000 String +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +270000 String diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_compact_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_compact_merge_tree.sql new file mode 100644 index 00000000000..127b56e727c --- /dev/null +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_compact_merge_tree.sql @@ -0,0 +1,33 @@ +-- Tags: long +set allow_experimental_dynamic_type=1; + +drop table if exists test; +create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1, index_granularity_bytes=10485760, index_granularity=8192, merge_max_block_size=8192, merge_max_block_size_bytes=10485760; + +system stop merges test; +insert into test select number, number from numbers(100000); +insert into test select number, 'str_' || toString(number) from numbers(80000); +insert into test select number, range(number % 10 + 1) from numbers(70000); +insert into test select number, toDate(number) from numbers(60000); +insert into test select number, toDateTime(number) from numbers(50000); +insert into test select number, NULL from numbers(100000); + +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +system start merges test; optimize table test final;; +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); + +system stop merges test; +insert into test select number, map(number, number) from numbers(200000); +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +system start merges test; +optimize table test final; +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); + +system stop merges test; +insert into test select number, tuple(number, number) from numbers(10000); +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +system start merges test; +optimize table test final; +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); + +drop table test; \ No newline at end of file diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_wide_merge_tree.reference b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_wide_merge_tree.reference new file mode 100644 index 00000000000..d0d777a5a38 --- /dev/null +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_wide_merge_tree.reference @@ -0,0 +1,28 @@ +50000 DateTime +60000 Date +70000 Array(UInt16) +80000 String +100000 None +100000 UInt64 +70000 Array(UInt16) +100000 None +100000 UInt64 +190000 String +70000 Array(UInt16) +100000 None +100000 UInt64 +190000 String +200000 Map(UInt64, UInt64) +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +260000 String +10000 Tuple(UInt64, UInt64) +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +260000 String +100000 None +100000 UInt64 +200000 Map(UInt64, UInt64) +270000 String diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_wide_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_wide_merge_tree.sql new file mode 100644 index 00000000000..e5c273cb592 --- /dev/null +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_wide_merge_tree.sql @@ -0,0 +1,33 @@ +-- Tags: long +set allow_experimental_dynamic_type=1; + +drop table if exists test; +create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1, index_granularity_bytes=10485760, index_granularity=8192, merge_max_block_size=8192, merge_max_block_size_bytes=10485760; + +system stop merges test; +insert into test select number, number from numbers(100000); +insert into test select number, 'str_' || toString(number) from numbers(80000); +insert into test select number, range(number % 10 + 1) from numbers(70000); +insert into test select number, toDate(number) from numbers(60000); +insert into test select number, toDateTime(number) from numbers(50000); +insert into test select number, NULL from numbers(100000); + +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +system start merges test; optimize table test final;; +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); + +system stop merges test; +insert into test select number, map(number, number) from numbers(200000); +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +system start merges test; +optimize table test final; +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); + +system stop merges test; +insert into test select number, tuple(number, number) from numbers(10000); +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +system start merges test; +optimize table test final; +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); + +drop table test; \ No newline at end of file diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2.reference b/tests/queries/0_stateless/03037_dynamic_merges_2.reference deleted file mode 100644 index 420b8185b16..00000000000 --- a/tests/queries/0_stateless/03037_dynamic_merges_2.reference +++ /dev/null @@ -1,20 +0,0 @@ -MergeTree compact + horizontal merge -test -1000000 Array(UInt16) -1000000 String -1000000 UInt64 -MergeTree wide + horizontal merge -test -1000000 Array(UInt16) -1000000 String -1000000 UInt64 -MergeTree compact + vertical merge -test -1000000 Array(UInt16) -1000000 String -1000000 UInt64 -MergeTree wide + vertical merge -test -1000000 Array(UInt16) -1000000 String -1000000 UInt64 diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2.sh b/tests/queries/0_stateless/03037_dynamic_merges_2.sh deleted file mode 100755 index 40adbdd4262..00000000000 --- a/tests/queries/0_stateless/03037_dynamic_merges_2.sh +++ /dev/null @@ -1,45 +0,0 @@ -#!/usr/bin/env bash -# Tags: long - -CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# reset --log_comment -CLICKHOUSE_LOG_COMMENT= -# shellcheck source=../shell_config.sh -. "$CUR_DIR"/../shell_config.sh - -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1" - - -function test() -{ - echo "test" - $CH_CLIENT -q "system stop merges test" - $CH_CLIENT -q "insert into test select number, number from numbers(1000000)" - $CH_CLIENT -q "insert into test select number, 'str_' || toString(number) from numbers(1000000, 1000000)" - $CH_CLIENT -q "insert into test select number, range(number % 10 + 1) from numbers(2000000, 1000000)" - - $CH_CLIENT -nm -q "system start merges test; optimize table test final;" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" -} - -$CH_CLIENT -q "drop table if exists test;" - -echo "MergeTree compact + horizontal merge" -$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000;" -test -$CH_CLIENT -q "drop table test;" - -echo "MergeTree wide + horizontal merge" -$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" -test -$CH_CLIENT -q "drop table test;" - -echo "MergeTree compact + vertical merge" -$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1;" -test -$CH_CLIENT -q "drop table test;" - -echo "MergeTree wide + vertical merge" -$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1;" -test -$CH_CLIENT -q "drop table test;" diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_compact_merge_tree.reference b/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_compact_merge_tree.reference new file mode 100644 index 00000000000..afd392002e5 --- /dev/null +++ b/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_compact_merge_tree.reference @@ -0,0 +1,3 @@ +1000000 Array(UInt16) +1000000 String +1000000 UInt64 diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_compact_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_compact_merge_tree.sql new file mode 100644 index 00000000000..6d7a0dd8c18 --- /dev/null +++ b/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_compact_merge_tree.sql @@ -0,0 +1,14 @@ +-- Tags: long + +set allow_experimental_dynamic_type = 1; + +drop table if exists test; +create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000; +system stop merges test; +insert into test select number, number from numbers(1000000); +insert into test select number, 'str_' || toString(number) from numbers(1000000, 1000000); +insert into test select number, range(number % 10 + 1) from numbers(2000000, 1000000); +system start merges test; +optimize table test final; +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +drop table test; diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_wide_merge_tree.reference b/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_wide_merge_tree.reference new file mode 100644 index 00000000000..afd392002e5 --- /dev/null +++ b/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_wide_merge_tree.reference @@ -0,0 +1,3 @@ +1000000 Array(UInt16) +1000000 String +1000000 UInt64 diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_wide_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_wide_merge_tree.sql new file mode 100644 index 00000000000..011d54d2360 --- /dev/null +++ b/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_wide_merge_tree.sql @@ -0,0 +1,14 @@ +-- Tags: long + +set allow_experimental_dynamic_type = 1; + +drop table if exists test; +create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1; +system stop merges test; +insert into test select number, number from numbers(1000000); +insert into test select number, 'str_' || toString(number) from numbers(1000000, 1000000); +insert into test select number, range(number % 10 + 1) from numbers(2000000, 1000000); +system start merges test; +optimize table test final; +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +drop table test; diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_compact_merge_tree.reference b/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_compact_merge_tree.reference new file mode 100644 index 00000000000..afd392002e5 --- /dev/null +++ b/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_compact_merge_tree.reference @@ -0,0 +1,3 @@ +1000000 Array(UInt16) +1000000 String +1000000 UInt64 diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_compact_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_compact_merge_tree.sql new file mode 100644 index 00000000000..1a74f9e5417 --- /dev/null +++ b/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_compact_merge_tree.sql @@ -0,0 +1,14 @@ +-- Tags: long + +set allow_experimental_dynamic_type = 1; + +drop table if exists test; +create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1; +system stop merges test; +insert into test select number, number from numbers(1000000); +insert into test select number, 'str_' || toString(number) from numbers(1000000, 1000000); +insert into test select number, range(number % 10 + 1) from numbers(2000000, 1000000); +system start merges test; +optimize table test final; +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +drop table test; diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_wide_merge_tree.reference b/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_wide_merge_tree.reference new file mode 100644 index 00000000000..afd392002e5 --- /dev/null +++ b/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_wide_merge_tree.reference @@ -0,0 +1,3 @@ +1000000 Array(UInt16) +1000000 String +1000000 UInt64 diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_wide_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_wide_merge_tree.sql new file mode 100644 index 00000000000..cbc834e9660 --- /dev/null +++ b/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_wide_merge_tree.sql @@ -0,0 +1,14 @@ +-- Tags: long + +set allow_experimental_dynamic_type = 1; + +drop table if exists test; +create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1; +system stop merges test; +insert into test select number, number from numbers(1000000); +insert into test select number, 'str_' || toString(number) from numbers(1000000, 1000000); +insert into test select number, range(number % 10 + 1) from numbers(2000000, 1000000); +system start merges test; +optimize table test final; +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +drop table test; diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.sh b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.sh deleted file mode 100755 index d4b6d1f4b63..00000000000 --- a/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.sh +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env bash -# Tags: long - -CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# reset --log_comment -CLICKHOUSE_LOG_COMMENT= -# shellcheck source=../shell_config.sh -. "$CUR_DIR"/../shell_config.sh - -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1 --enable_named_columns_in_function_tuple=0" - -$CH_CLIENT -q "drop table if exists test;" -$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000;" - -$CH_CLIENT -q "system stop merges test" -$CH_CLIENT -q "insert into test select number, number from numbers(100000)" -$CH_CLIENT -q "insert into test select number, tuple(if(number % 2 == 0, number, 'str_' || toString(number)))::Tuple(a Dynamic(max_types=3)) from numbers(100000)" -$CH_CLIENT -q "insert into test select number, tuple(if(number % 3 == 0, toDate(number), range(number % 10)))::Tuple(a Dynamic(max_types=3)) from numbers(50000)" - -$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" -$CH_CLIENT -nm -q "system start merges test; optimize table test final;" -$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" - -$CH_CLIENT -q "insert into test select number, tuple(if(number % 3 == 0, toDateTime(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(50000)" -$CH_CLIENT -q "insert into test select number, tuple(if(number % 2 == 0, tuple(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(200000)" - -$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" -$CH_CLIENT -nm -q "system start merges test; optimize table test final;" -$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" - -$CH_CLIENT -q "drop table test;" - diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.sql b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.sql new file mode 100644 index 00000000000..ff1dc5e7ded --- /dev/null +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_horizontal.sql @@ -0,0 +1,29 @@ +-- Tags: long + +set allow_experimental_variant_type = 1; +set use_variant_as_common_type = 1; +set allow_experimental_dynamic_type = 1; +set enable_named_columns_in_function_tuple = 0; + +drop table if exists test;; +create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000;; + +system stop merges test; +insert into test select number, number from numbers(100000); +insert into test select number, tuple(if(number % 2 == 0, number, 'str_' || toString(number)))::Tuple(a Dynamic(max_types=3)) from numbers(100000); +insert into test select number, tuple(if(number % 3 == 0, toDate(number), range(number % 10)))::Tuple(a Dynamic(max_types=3)) from numbers(50000); + +select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type; +system start merges test; +optimize table test final; +select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type; + +insert into test select number, tuple(if(number % 3 == 0, toDateTime(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(50000); +insert into test select number, tuple(if(number % 2 == 0, tuple(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(200000); + +select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type; +system start merges test; +optimize table test final; +select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type; + +drop table test; diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.sh b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.sh deleted file mode 100755 index 39671a297cf..00000000000 --- a/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.sh +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env bash -# Tags: long - -CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# reset --log_comment -CLICKHOUSE_LOG_COMMENT= -# shellcheck source=../shell_config.sh -. "$CUR_DIR"/../shell_config.sh - -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1 --enable_named_columns_in_function_tuple=0" - -$CH_CLIENT -q "drop table if exists test;" -$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1;" - -$CH_CLIENT -q "system stop merges test" -$CH_CLIENT -q "insert into test select number, number from numbers(100000)" -$CH_CLIENT -q "insert into test select number, tuple(if(number % 2 == 0, number, 'str_' || toString(number)))::Tuple(a Dynamic(max_types=3)) from numbers(100000)" -$CH_CLIENT -q "insert into test select number, tuple(if(number % 3 == 0, toDate(number), range(number % 10)))::Tuple(a Dynamic(max_types=3)) from numbers(50000)" - -$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" -$CH_CLIENT -nm -q "system start merges test; optimize table test final;" -$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" - -$CH_CLIENT -q "insert into test select number, tuple(if(number % 3 == 0, toDateTime(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(50000)" -$CH_CLIENT -q "insert into test select number, tuple(if(number % 2 == 0, tuple(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(200000)" - -$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" -$CH_CLIENT -nm -q "system start merges test; optimize table test final;" -$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" - -$CH_CLIENT -q "drop table test;" - diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.sql b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.sql new file mode 100644 index 00000000000..f9b0101cb87 --- /dev/null +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_compact_vertical.sql @@ -0,0 +1,29 @@ +-- Tags: long + +set allow_experimental_variant_type = 1; +set use_variant_as_common_type = 1; +set allow_experimental_dynamic_type = 1; +set enable_named_columns_in_function_tuple = 0; + +drop table if exists test;; +create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1; + +system stop merges test; +insert into test select number, number from numbers(100000); +insert into test select number, tuple(if(number % 2 == 0, number, 'str_' || toString(number)))::Tuple(a Dynamic(max_types=3)) from numbers(100000); +insert into test select number, tuple(if(number % 3 == 0, toDate(number), range(number % 10)))::Tuple(a Dynamic(max_types=3)) from numbers(50000); + +select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type; +system start merges test; +optimize table test final; +select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type; + +insert into test select number, tuple(if(number % 3 == 0, toDateTime(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(50000); +insert into test select number, tuple(if(number % 2 == 0, tuple(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(200000); + +select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type; +system start merges test; +optimize table test final; +select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type; + +drop table test; diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.sh b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.sh deleted file mode 100755 index d58545c0b13..00000000000 --- a/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.sh +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env bash -# Tags: long - -CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# reset --log_comment -CLICKHOUSE_LOG_COMMENT= -# shellcheck source=../shell_config.sh -. "$CUR_DIR"/../shell_config.sh - -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1 --enable_named_columns_in_function_tuple=0" - -$CH_CLIENT -q "drop table if exists test;" -$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" - -$CH_CLIENT -q "system stop merges test" -$CH_CLIENT -q "insert into test select number, number from numbers(100000)" -$CH_CLIENT -q "insert into test select number, tuple(if(number % 2 == 0, number, 'str_' || toString(number)))::Tuple(a Dynamic(max_types=3)) from numbers(100000)" -$CH_CLIENT -q "insert into test select number, tuple(if(number % 3 == 0, toDate(number), range(number % 10)))::Tuple(a Dynamic(max_types=3)) from numbers(50000)" - -$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" -$CH_CLIENT -nm -q "system start merges test; optimize table test final;" -$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" - -$CH_CLIENT -q "insert into test select number, tuple(if(number % 3 == 0, toDateTime(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(50000)" -$CH_CLIENT -q "insert into test select number, tuple(if(number % 2 == 0, tuple(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(200000)" - -$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" -$CH_CLIENT -nm -q "system start merges test; optimize table test final;" -$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" - -$CH_CLIENT -q "drop table test;" - diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.sql b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.sql new file mode 100644 index 00000000000..5f373d41c7d --- /dev/null +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_horizontal.sql @@ -0,0 +1,29 @@ +-- Tags: long + +set allow_experimental_variant_type = 1; +set use_variant_as_common_type = 1; +set allow_experimental_dynamic_type = 1; +set enable_named_columns_in_function_tuple = 0; + +drop table if exists test;; +create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1; + +system stop merges test; +insert into test select number, number from numbers(100000); +insert into test select number, tuple(if(number % 2 == 0, number, 'str_' || toString(number)))::Tuple(a Dynamic(max_types=3)) from numbers(100000); +insert into test select number, tuple(if(number % 3 == 0, toDate(number), range(number % 10)))::Tuple(a Dynamic(max_types=3)) from numbers(50000); + +select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type; +system start merges test; +optimize table test final; +select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type; + +insert into test select number, tuple(if(number % 3 == 0, toDateTime(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(50000); +insert into test select number, tuple(if(number % 2 == 0, tuple(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(200000); + +select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type; +system start merges test; +optimize table test final; +select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type; + +drop table test; diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.sh b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.sh deleted file mode 100755 index 39671a297cf..00000000000 --- a/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.sh +++ /dev/null @@ -1,32 +0,0 @@ -#!/usr/bin/env bash -# Tags: long - -CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# reset --log_comment -CLICKHOUSE_LOG_COMMENT= -# shellcheck source=../shell_config.sh -. "$CUR_DIR"/../shell_config.sh - -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1 --enable_named_columns_in_function_tuple=0" - -$CH_CLIENT -q "drop table if exists test;" -$CH_CLIENT -q "create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1;" - -$CH_CLIENT -q "system stop merges test" -$CH_CLIENT -q "insert into test select number, number from numbers(100000)" -$CH_CLIENT -q "insert into test select number, tuple(if(number % 2 == 0, number, 'str_' || toString(number)))::Tuple(a Dynamic(max_types=3)) from numbers(100000)" -$CH_CLIENT -q "insert into test select number, tuple(if(number % 3 == 0, toDate(number), range(number % 10)))::Tuple(a Dynamic(max_types=3)) from numbers(50000)" - -$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" -$CH_CLIENT -nm -q "system start merges test; optimize table test final;" -$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" - -$CH_CLIENT -q "insert into test select number, tuple(if(number % 3 == 0, toDateTime(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(50000)" -$CH_CLIENT -q "insert into test select number, tuple(if(number % 2 == 0, tuple(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(200000)" - -$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" -$CH_CLIENT -nm -q "system start merges test; optimize table test final;" -$CH_CLIENT -q "select count(), dynamicType(d) || ':' || dynamicType(d.\`Tuple(a Dynamic(max_types=3))\`.a) as type from test group by type order by count(), type" - -$CH_CLIENT -q "drop table test;" - diff --git a/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.sql b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.sql new file mode 100644 index 00000000000..36bbc76b8cb --- /dev/null +++ b/tests/queries/0_stateless/03038_nested_dynamic_merges_wide_vertical.sql @@ -0,0 +1,29 @@ +-- Tags: long + +set allow_experimental_variant_type = 1; +set use_variant_as_common_type = 1; +set allow_experimental_dynamic_type = 1; +set enable_named_columns_in_function_tuple = 0; + +drop table if exists test;; +create table test (id UInt64, d Dynamic(max_types=3)) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1; + +system stop merges test; +insert into test select number, number from numbers(100000); +insert into test select number, tuple(if(number % 2 == 0, number, 'str_' || toString(number)))::Tuple(a Dynamic(max_types=3)) from numbers(100000); +insert into test select number, tuple(if(number % 3 == 0, toDate(number), range(number % 10)))::Tuple(a Dynamic(max_types=3)) from numbers(50000); + +select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type; +system start merges test; +optimize table test final; +select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type; + +insert into test select number, tuple(if(number % 3 == 0, toDateTime(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(50000); +insert into test select number, tuple(if(number % 2 == 0, tuple(number), NULL))::Tuple(a Dynamic(max_types=3)) from numbers(200000); + +select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type; +system start merges test; +optimize table test final; +select count(), dynamicType(d) || ':' || dynamicType(d.`Tuple(a Dynamic(max_types=3))`.a) as type from test group by type order by count(), type; + +drop table test; diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_1.reference b/tests/queries/0_stateless/03040_dynamic_type_alters_1.reference deleted file mode 100644 index a9c785d1e48..00000000000 --- a/tests/queries/0_stateless/03040_dynamic_type_alters_1.reference +++ /dev/null @@ -1,526 +0,0 @@ -Memory -initial insert -alter add column 1 -3 None -0 0 \N \N \N 0 -1 1 \N \N \N 0 -2 2 \N \N \N 0 -insert after alter add column 1 -4 String -4 UInt64 -7 None -0 0 \N \N \N \N 0 -1 1 \N \N \N \N 0 -2 2 \N \N \N \N 0 -3 3 3 \N 3 \N 0 -4 4 4 \N 4 \N 0 -5 5 5 \N 5 \N 0 -6 6 str_6 str_6 \N \N 0 -7 7 str_7 str_7 \N \N 0 -8 8 str_8 str_8 \N \N 0 -9 9 \N \N \N \N 0 -10 10 \N \N \N \N 0 -11 11 \N \N \N \N 0 -12 12 12 \N 12 \N 0 -13 13 str_13 str_13 \N \N 0 -14 14 \N \N \N \N 0 -alter modify column 1 -7 None -8 String -0 0 \N \N \N \N 0 -1 1 \N \N \N \N 0 -2 2 \N \N \N \N 0 -3 3 3 3 \N \N 0 -4 4 4 4 \N \N 0 -5 5 5 5 \N \N 0 -6 6 str_6 str_6 \N \N 0 -7 7 str_7 str_7 \N \N 0 -8 8 str_8 str_8 \N \N 0 -9 9 \N \N \N \N 0 -10 10 \N \N \N \N 0 -11 11 \N \N \N \N 0 -12 12 12 12 \N \N 0 -13 13 str_13 str_13 \N \N 0 -14 14 \N \N \N \N 0 -insert after alter modify column 1 -8 None -11 String -0 0 \N \N \N \N 0 -1 1 \N \N \N \N 0 -2 2 \N \N \N \N 0 -3 3 3 3 \N \N 0 -4 4 4 4 \N \N 0 -5 5 5 5 \N \N 0 -6 6 str_6 str_6 \N \N 0 -7 7 str_7 str_7 \N \N 0 -8 8 str_8 str_8 \N \N 0 -9 9 \N \N \N \N 0 -10 10 \N \N \N \N 0 -11 11 \N \N \N \N 0 -12 12 12 12 \N \N 0 -13 13 str_13 str_13 \N \N 0 -14 14 \N \N \N \N 0 -15 15 \N \N \N \N 0 -16 16 16 16 \N \N 0 -17 17 str_17 str_17 \N \N 0 -18 18 1970-01-19 1970-01-19 \N \N 0 -alter modify column 2 -4 UInt64 -7 String -8 None -0 0 \N \N \N \N 0 -1 1 \N \N \N \N 0 -2 2 \N \N \N \N 0 -3 3 3 \N 3 \N 0 -4 4 4 \N 4 \N 0 -5 5 5 \N 5 \N 0 -6 6 str_6 str_6 \N \N 0 -7 7 str_7 str_7 \N \N 0 -8 8 str_8 str_8 \N \N 0 -9 9 \N \N \N \N 0 -10 10 \N \N \N \N 0 -11 11 \N \N \N \N 0 -12 12 12 \N 12 \N 0 -13 13 str_13 str_13 \N \N 0 -14 14 \N \N \N \N 0 -15 15 \N \N \N \N 0 -16 16 16 16 \N \N 0 -17 17 str_17 str_17 \N \N 0 -18 18 1970-01-19 1970-01-19 \N \N 0 -insert after alter modify column 2 -1 Date -5 UInt64 -8 String -9 None -0 0 \N \N \N \N 0 -1 1 \N \N \N \N 0 -2 2 \N \N \N \N 0 -3 3 3 \N 3 \N 0 -4 4 4 \N 4 \N 0 -5 5 5 \N 5 \N 0 -6 6 str_6 str_6 \N \N 0 -7 7 str_7 str_7 \N \N 0 -8 8 str_8 str_8 \N \N 0 -9 9 \N \N \N \N 0 -10 10 \N \N \N \N 0 -11 11 \N \N \N \N 0 -12 12 12 \N 12 \N 0 -13 13 str_13 str_13 \N \N 0 -14 14 \N \N \N \N 0 -15 15 \N \N \N \N 0 -16 16 16 16 \N \N 0 -17 17 str_17 str_17 \N \N 0 -18 18 1970-01-19 1970-01-19 \N \N 0 -19 19 \N \N \N \N 0 -20 20 20 \N 20 \N 0 -21 21 str_21 str_21 \N \N 0 -22 22 1970-01-23 \N \N 1970-01-23 0 -alter modify column 3 -1 Date -5 UInt64 -8 String -9 None -0 0 0 \N 0 \N \N \N 0 -1 1 1 \N 0 \N \N \N 0 -2 2 2 \N 0 \N \N \N 0 -3 3 3 \N 0 \N 3 \N 0 -4 4 4 \N 0 \N 4 \N 0 -5 5 5 \N 0 \N 5 \N 0 -6 6 6 \N 0 str_6 \N \N 0 -7 7 7 \N 0 str_7 \N \N 0 -8 8 8 \N 0 str_8 \N \N 0 -9 9 9 \N 0 \N \N \N 0 -10 10 10 \N 0 \N \N \N 0 -11 11 11 \N 0 \N \N \N 0 -12 12 12 \N 0 \N 12 \N 0 -13 13 13 \N 0 str_13 \N \N 0 -14 14 14 \N 0 \N \N \N 0 -15 15 15 \N 0 \N \N \N 0 -16 16 16 \N 0 16 \N \N 0 -17 17 17 \N 0 str_17 \N \N 0 -18 18 18 \N 0 1970-01-19 \N \N 0 -19 19 19 \N 0 \N \N \N 0 -20 20 20 \N 0 \N 20 \N 0 -21 21 21 \N 0 str_21 \N \N 0 -22 22 22 \N 0 \N \N 1970-01-23 0 -insert after alter modify column 3 -1 Date -5 UInt64 -8 String -12 None -0 0 0 \N 0 \N \N \N 0 -1 1 1 \N 0 \N \N \N 0 -2 2 2 \N 0 \N \N \N 0 -3 3 3 \N 0 \N 3 \N 0 -4 4 4 \N 0 \N 4 \N 0 -5 5 5 \N 0 \N 5 \N 0 -6 6 6 \N 0 str_6 \N \N 0 -7 7 7 \N 0 str_7 \N \N 0 -8 8 8 \N 0 str_8 \N \N 0 -9 9 9 \N 0 \N \N \N 0 -10 10 10 \N 0 \N \N \N 0 -11 11 11 \N 0 \N \N \N 0 -12 12 12 \N 0 \N 12 \N 0 -13 13 13 \N 0 str_13 \N \N 0 -14 14 14 \N 0 \N \N \N 0 -15 15 15 \N 0 \N \N \N 0 -16 16 16 \N 0 16 \N \N 0 -17 17 17 \N 0 str_17 \N \N 0 -18 18 18 \N 0 1970-01-19 \N \N 0 -19 19 19 \N 0 \N \N \N 0 -20 20 20 \N 0 \N 20 \N 0 -21 21 21 \N 0 str_21 \N \N 0 -22 22 22 \N 0 \N \N 1970-01-23 0 -23 \N \N \N 0 \N \N \N 0 -24 24 24 \N 0 \N \N \N 0 -25 str_25 \N str_25 0 \N \N \N 0 -MergeTree compact -initial insert -alter add column 1 -3 None -0 0 \N \N \N 0 -1 1 \N \N \N 0 -2 2 \N \N \N 0 -insert after alter add column 1 -4 String -4 UInt64 -7 None -0 0 \N \N \N \N 0 -1 1 \N \N \N \N 0 -2 2 \N \N \N \N 0 -3 3 3 \N 3 \N 0 -4 4 4 \N 4 \N 0 -5 5 5 \N 5 \N 0 -6 6 str_6 str_6 \N \N 0 -7 7 str_7 str_7 \N \N 0 -8 8 str_8 str_8 \N \N 0 -9 9 \N \N \N \N 0 -10 10 \N \N \N \N 0 -11 11 \N \N \N \N 0 -12 12 12 \N 12 \N 0 -13 13 str_13 str_13 \N \N 0 -14 14 \N \N \N \N 0 -alter modify column 1 -7 None -8 String -0 0 \N \N \N \N 0 -1 1 \N \N \N \N 0 -2 2 \N \N \N \N 0 -3 3 3 3 \N \N 0 -4 4 4 4 \N \N 0 -5 5 5 5 \N \N 0 -6 6 str_6 str_6 \N \N 0 -7 7 str_7 str_7 \N \N 0 -8 8 str_8 str_8 \N \N 0 -9 9 \N \N \N \N 0 -10 10 \N \N \N \N 0 -11 11 \N \N \N \N 0 -12 12 12 12 \N \N 0 -13 13 str_13 str_13 \N \N 0 -14 14 \N \N \N \N 0 -insert after alter modify column 1 -8 None -11 String -0 0 \N \N \N \N 0 -1 1 \N \N \N \N 0 -2 2 \N \N \N \N 0 -3 3 3 3 \N \N 0 -4 4 4 4 \N \N 0 -5 5 5 5 \N \N 0 -6 6 str_6 str_6 \N \N 0 -7 7 str_7 str_7 \N \N 0 -8 8 str_8 str_8 \N \N 0 -9 9 \N \N \N \N 0 -10 10 \N \N \N \N 0 -11 11 \N \N \N \N 0 -12 12 12 12 \N \N 0 -13 13 str_13 str_13 \N \N 0 -14 14 \N \N \N \N 0 -15 15 \N \N \N \N 0 -16 16 16 16 \N \N 0 -17 17 str_17 str_17 \N \N 0 -18 18 1970-01-19 1970-01-19 \N \N 0 -alter modify column 2 -8 None -11 String -0 0 \N \N \N \N 0 -1 1 \N \N \N \N 0 -2 2 \N \N \N \N 0 -3 3 3 3 \N \N 0 -4 4 4 4 \N \N 0 -5 5 5 5 \N \N 0 -6 6 str_6 str_6 \N \N 0 -7 7 str_7 str_7 \N \N 0 -8 8 str_8 str_8 \N \N 0 -9 9 \N \N \N \N 0 -10 10 \N \N \N \N 0 -11 11 \N \N \N \N 0 -12 12 12 12 \N \N 0 -13 13 str_13 str_13 \N \N 0 -14 14 \N \N \N \N 0 -15 15 \N \N \N \N 0 -16 16 16 16 \N \N 0 -17 17 str_17 str_17 \N \N 0 -18 18 1970-01-19 1970-01-19 \N \N 0 -insert after alter modify column 2 -1 Date -1 UInt64 -9 None -12 String -0 0 \N \N \N \N 0 -1 1 \N \N \N \N 0 -2 2 \N \N \N \N 0 -3 3 3 3 \N \N 0 -4 4 4 4 \N \N 0 -5 5 5 5 \N \N 0 -6 6 str_6 str_6 \N \N 0 -7 7 str_7 str_7 \N \N 0 -8 8 str_8 str_8 \N \N 0 -9 9 \N \N \N \N 0 -10 10 \N \N \N \N 0 -11 11 \N \N \N \N 0 -12 12 12 12 \N \N 0 -13 13 str_13 str_13 \N \N 0 -14 14 \N \N \N \N 0 -15 15 \N \N \N \N 0 -16 16 16 16 \N \N 0 -17 17 str_17 str_17 \N \N 0 -18 18 1970-01-19 1970-01-19 \N \N 0 -19 19 \N \N \N \N 0 -20 20 20 \N 20 \N 0 -21 21 str_21 str_21 \N \N 0 -22 22 1970-01-23 \N \N 1970-01-23 0 -alter modify column 3 -1 Date -1 UInt64 -9 None -12 String -0 0 0 \N 0 \N \N \N 0 -1 1 1 \N 0 \N \N \N 0 -2 2 2 \N 0 \N \N \N 0 -3 3 3 \N 0 3 \N \N 0 -4 4 4 \N 0 4 \N \N 0 -5 5 5 \N 0 5 \N \N 0 -6 6 6 \N 0 str_6 \N \N 0 -7 7 7 \N 0 str_7 \N \N 0 -8 8 8 \N 0 str_8 \N \N 0 -9 9 9 \N 0 \N \N \N 0 -10 10 10 \N 0 \N \N \N 0 -11 11 11 \N 0 \N \N \N 0 -12 12 12 \N 0 12 \N \N 0 -13 13 13 \N 0 str_13 \N \N 0 -14 14 14 \N 0 \N \N \N 0 -15 15 15 \N 0 \N \N \N 0 -16 16 16 \N 0 16 \N \N 0 -17 17 17 \N 0 str_17 \N \N 0 -18 18 18 \N 0 1970-01-19 \N \N 0 -19 19 19 \N 0 \N \N \N 0 -20 20 20 \N 0 \N 20 \N 0 -21 21 21 \N 0 str_21 \N \N 0 -22 22 22 \N 0 \N \N 1970-01-23 0 -insert after alter modify column 3 -1 Date -1 UInt64 -12 None -12 String -0 0 0 \N 0 \N \N \N 0 -1 1 1 \N 0 \N \N \N 0 -2 2 2 \N 0 \N \N \N 0 -3 3 3 \N 0 3 \N \N 0 -4 4 4 \N 0 4 \N \N 0 -5 5 5 \N 0 5 \N \N 0 -6 6 6 \N 0 str_6 \N \N 0 -7 7 7 \N 0 str_7 \N \N 0 -8 8 8 \N 0 str_8 \N \N 0 -9 9 9 \N 0 \N \N \N 0 -10 10 10 \N 0 \N \N \N 0 -11 11 11 \N 0 \N \N \N 0 -12 12 12 \N 0 12 \N \N 0 -13 13 13 \N 0 str_13 \N \N 0 -14 14 14 \N 0 \N \N \N 0 -15 15 15 \N 0 \N \N \N 0 -16 16 16 \N 0 16 \N \N 0 -17 17 17 \N 0 str_17 \N \N 0 -18 18 18 \N 0 1970-01-19 \N \N 0 -19 19 19 \N 0 \N \N \N 0 -20 20 20 \N 0 \N 20 \N 0 -21 21 21 \N 0 str_21 \N \N 0 -22 22 22 \N 0 \N \N 1970-01-23 0 -23 \N \N \N 0 \N \N \N 0 -24 24 24 \N 0 \N \N \N 0 -25 str_25 \N str_25 0 \N \N \N 0 -MergeTree wide -initial insert -alter add column 1 -3 None -0 0 \N \N \N 0 -1 1 \N \N \N 0 -2 2 \N \N \N 0 -insert after alter add column 1 -4 String -4 UInt64 -7 None -0 0 \N \N \N \N 0 -1 1 \N \N \N \N 0 -2 2 \N \N \N \N 0 -3 3 3 \N 3 \N 0 -4 4 4 \N 4 \N 0 -5 5 5 \N 5 \N 0 -6 6 str_6 str_6 \N \N 0 -7 7 str_7 str_7 \N \N 0 -8 8 str_8 str_8 \N \N 0 -9 9 \N \N \N \N 0 -10 10 \N \N \N \N 0 -11 11 \N \N \N \N 0 -12 12 12 \N 12 \N 0 -13 13 str_13 str_13 \N \N 0 -14 14 \N \N \N \N 0 -alter modify column 1 -7 None -8 String -0 0 \N \N \N \N 0 -1 1 \N \N \N \N 0 -2 2 \N \N \N \N 0 -3 3 3 3 \N \N 0 -4 4 4 4 \N \N 0 -5 5 5 5 \N \N 0 -6 6 str_6 str_6 \N \N 0 -7 7 str_7 str_7 \N \N 0 -8 8 str_8 str_8 \N \N 0 -9 9 \N \N \N \N 0 -10 10 \N \N \N \N 0 -11 11 \N \N \N \N 0 -12 12 12 12 \N \N 0 -13 13 str_13 str_13 \N \N 0 -14 14 \N \N \N \N 0 -insert after alter modify column 1 -8 None -11 String -0 0 \N \N \N \N 0 -1 1 \N \N \N \N 0 -2 2 \N \N \N \N 0 -3 3 3 3 \N \N 0 -4 4 4 4 \N \N 0 -5 5 5 5 \N \N 0 -6 6 str_6 str_6 \N \N 0 -7 7 str_7 str_7 \N \N 0 -8 8 str_8 str_8 \N \N 0 -9 9 \N \N \N \N 0 -10 10 \N \N \N \N 0 -11 11 \N \N \N \N 0 -12 12 12 12 \N \N 0 -13 13 str_13 str_13 \N \N 0 -14 14 \N \N \N \N 0 -15 15 \N \N \N \N 0 -16 16 16 16 \N \N 0 -17 17 str_17 str_17 \N \N 0 -18 18 1970-01-19 1970-01-19 \N \N 0 -alter modify column 2 -8 None -11 String -0 0 \N \N \N \N 0 -1 1 \N \N \N \N 0 -2 2 \N \N \N \N 0 -3 3 3 3 \N \N 0 -4 4 4 4 \N \N 0 -5 5 5 5 \N \N 0 -6 6 str_6 str_6 \N \N 0 -7 7 str_7 str_7 \N \N 0 -8 8 str_8 str_8 \N \N 0 -9 9 \N \N \N \N 0 -10 10 \N \N \N \N 0 -11 11 \N \N \N \N 0 -12 12 12 12 \N \N 0 -13 13 str_13 str_13 \N \N 0 -14 14 \N \N \N \N 0 -15 15 \N \N \N \N 0 -16 16 16 16 \N \N 0 -17 17 str_17 str_17 \N \N 0 -18 18 1970-01-19 1970-01-19 \N \N 0 -insert after alter modify column 2 -1 Date -1 UInt64 -9 None -12 String -0 0 \N \N \N \N 0 -1 1 \N \N \N \N 0 -2 2 \N \N \N \N 0 -3 3 3 3 \N \N 0 -4 4 4 4 \N \N 0 -5 5 5 5 \N \N 0 -6 6 str_6 str_6 \N \N 0 -7 7 str_7 str_7 \N \N 0 -8 8 str_8 str_8 \N \N 0 -9 9 \N \N \N \N 0 -10 10 \N \N \N \N 0 -11 11 \N \N \N \N 0 -12 12 12 12 \N \N 0 -13 13 str_13 str_13 \N \N 0 -14 14 \N \N \N \N 0 -15 15 \N \N \N \N 0 -16 16 16 16 \N \N 0 -17 17 str_17 str_17 \N \N 0 -18 18 1970-01-19 1970-01-19 \N \N 0 -19 19 \N \N \N \N 0 -20 20 20 \N 20 \N 0 -21 21 str_21 str_21 \N \N 0 -22 22 1970-01-23 \N \N 1970-01-23 0 -alter modify column 3 -1 Date -1 UInt64 -9 None -12 String -0 0 0 \N 0 \N \N \N 0 -1 1 1 \N 0 \N \N \N 0 -2 2 2 \N 0 \N \N \N 0 -3 3 3 \N 0 3 \N \N 0 -4 4 4 \N 0 4 \N \N 0 -5 5 5 \N 0 5 \N \N 0 -6 6 6 \N 0 str_6 \N \N 0 -7 7 7 \N 0 str_7 \N \N 0 -8 8 8 \N 0 str_8 \N \N 0 -9 9 9 \N 0 \N \N \N 0 -10 10 10 \N 0 \N \N \N 0 -11 11 11 \N 0 \N \N \N 0 -12 12 12 \N 0 12 \N \N 0 -13 13 13 \N 0 str_13 \N \N 0 -14 14 14 \N 0 \N \N \N 0 -15 15 15 \N 0 \N \N \N 0 -16 16 16 \N 0 16 \N \N 0 -17 17 17 \N 0 str_17 \N \N 0 -18 18 18 \N 0 1970-01-19 \N \N 0 -19 19 19 \N 0 \N \N \N 0 -20 20 20 \N 0 \N 20 \N 0 -21 21 21 \N 0 str_21 \N \N 0 -22 22 22 \N 0 \N \N 1970-01-23 0 -insert after alter modify column 3 -1 Date -1 UInt64 -12 None -12 String -0 0 0 \N 0 \N \N \N 0 -1 1 1 \N 0 \N \N \N 0 -2 2 2 \N 0 \N \N \N 0 -3 3 3 \N 0 3 \N \N 0 -4 4 4 \N 0 4 \N \N 0 -5 5 5 \N 0 5 \N \N 0 -6 6 6 \N 0 str_6 \N \N 0 -7 7 7 \N 0 str_7 \N \N 0 -8 8 8 \N 0 str_8 \N \N 0 -9 9 9 \N 0 \N \N \N 0 -10 10 10 \N 0 \N \N \N 0 -11 11 11 \N 0 \N \N \N 0 -12 12 12 \N 0 12 \N \N 0 -13 13 13 \N 0 str_13 \N \N 0 -14 14 14 \N 0 \N \N \N 0 -15 15 15 \N 0 \N \N \N 0 -16 16 16 \N 0 16 \N \N 0 -17 17 17 \N 0 str_17 \N \N 0 -18 18 18 \N 0 1970-01-19 \N \N 0 -19 19 19 \N 0 \N \N \N 0 -20 20 20 \N 0 \N 20 \N 0 -21 21 21 \N 0 str_21 \N \N 0 -22 22 22 \N 0 \N \N 1970-01-23 0 -23 \N \N \N 0 \N \N \N 0 -24 24 24 \N 0 \N \N \N 0 -25 str_25 \N str_25 0 \N \N \N 0 diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_1.sh b/tests/queries/0_stateless/03040_dynamic_type_alters_1.sh deleted file mode 100755 index 1f2a6a31ad7..00000000000 --- a/tests/queries/0_stateless/03040_dynamic_type_alters_1.sh +++ /dev/null @@ -1,77 +0,0 @@ -#!/usr/bin/env bash -# Tags: long - -CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# reset --log_comment -CLICKHOUSE_LOG_COMMENT= -# shellcheck source=../shell_config.sh -. "$CUR_DIR"/../shell_config.sh - -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 --allow_experimental_variant_type=1 --use_variant_as_common_type=1" - -function run() -{ - echo "initial insert" - $CH_CLIENT -q "insert into test select number, number from numbers(3)" - - echo "alter add column 1" - $CH_CLIENT -q "alter table test add column d Dynamic(max_types=3) settings mutations_sync=1" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" - $CH_CLIENT -q "select x, y, d, d.String, d.UInt64, d.\`Tuple(a UInt64)\`.a from test order by x" - - echo "insert after alter add column 1" - $CH_CLIENT -q "insert into test select number, number, number from numbers(3, 3)" - $CH_CLIENT -q "insert into test select number, number, 'str_' || toString(number) from numbers(6, 3)" - $CH_CLIENT -q "insert into test select number, number, NULL from numbers(9, 3)" - $CH_CLIENT -q "insert into test select number, number, multiIf(number % 3 == 0, number, number % 3 == 1, 'str_' || toString(number), NULL) from numbers(12, 3)" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" - $CH_CLIENT -q "select x, y, d, d.String, d.UInt64, d.Date, d.\`Tuple(a UInt64)\`.a from test order by x" - - echo "alter modify column 1" - $CH_CLIENT -q "alter table test modify column d Dynamic(max_types=1) settings mutations_sync=1" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" - $CH_CLIENT -q "select x, y, d, d.String, d.UInt64, d.Date, d.\`Tuple(a UInt64)\`.a from test order by x" - - echo "insert after alter modify column 1" - $CH_CLIENT -q "insert into test select number, number, multiIf(number % 4 == 0, number, number % 4 == 1, 'str_' || toString(number), number % 4 == 2, toDate(number), NULL) from numbers(15, 4)" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" - $CH_CLIENT -q "select x, y, d, d.String, d.UInt64, d.Date, d.\`Tuple(a UInt64)\`.a from test order by x" - - echo "alter modify column 2" - $CH_CLIENT -q "alter table test modify column d Dynamic(max_types=3) settings mutations_sync=1" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" - $CH_CLIENT -q "select x, y, d, d.String, d.UInt64, d.Date, d.\`Tuple(a UInt64)\`.a from test order by x" - - echo "insert after alter modify column 2" - $CH_CLIENT -q "insert into test select number, number, multiIf(number % 4 == 0, number, number % 4 == 1, 'str_' || toString(number), number % 4 == 2, toDate(number), NULL) from numbers(19, 4)" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" - $CH_CLIENT -q "select x, y, d, d.String, d.UInt64, d.Date, d.\`Tuple(a UInt64)\`.a from test order by x" - - echo "alter modify column 3" - $CH_CLIENT -q "alter table test modify column y Dynamic settings mutations_sync=1" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" - $CH_CLIENT -q "select x, y, y.UInt64, y.String, y.\`Tuple(a UInt64)\`.a, d.String, d.UInt64, d.Date, d.\`Tuple(a UInt64)\`.a from test order by x" - - echo "insert after alter modify column 3" - $CH_CLIENT -q "insert into test select number, multiIf(number % 3 == 0, number, number % 3 == 1, 'str_' || toString(number), NULL), NULL from numbers(23, 3)" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" - $CH_CLIENT -q "select x, y, y.UInt64, y.String, y.\`Tuple(a UInt64)\`.a, d.String, d.UInt64, d.Date, d.\`Tuple(a UInt64)\`.a from test order by x" -} - -$CH_CLIENT -q "drop table if exists test;" - -echo "Memory" -$CH_CLIENT -q "create table test (x UInt64, y UInt64) engine=Memory" -run -$CH_CLIENT -q "drop table test;" - -echo "MergeTree compact" -$CH_CLIENT -q "create table test (x UInt64, y UInt64) engine=MergeTree order by x settings min_rows_for_wide_part=100000000, min_bytes_for_wide_part=1000000000;" -run -$CH_CLIENT -q "drop table test;" - -echo "MergeTree wide" -$CH_CLIENT -q "create table test (x UInt64, y UInt64 ) engine=MergeTree order by x settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" -run -$CH_CLIENT -q "drop table test;" - diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_1_compact_merge_tree.reference b/tests/queries/0_stateless/03040_dynamic_type_alters_1_compact_merge_tree.reference new file mode 100644 index 00000000000..2ec301b747b --- /dev/null +++ b/tests/queries/0_stateless/03040_dynamic_type_alters_1_compact_merge_tree.reference @@ -0,0 +1,174 @@ +initial insert +alter add column 1 +3 None +0 0 \N \N \N 0 +1 1 \N \N \N 0 +2 2 \N \N \N 0 +insert after alter add column 1 +4 String +4 UInt64 +7 None +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +alter modify column 1 +7 None +8 String +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +insert after alter modify column 1 +8 None +11 String +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 16 \N \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 1970-01-19 \N \N 0 +alter modify column 2 +8 None +11 String +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 16 \N \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 1970-01-19 \N \N 0 +insert after alter modify column 2 +1 Date +1 UInt64 +9 None +12 String +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 16 \N \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 1970-01-19 \N \N 0 +19 19 \N \N \N \N 0 +20 20 20 \N 20 \N 0 +21 21 str_21 str_21 \N \N 0 +22 22 1970-01-23 \N \N 1970-01-23 0 +alter modify column 3 +1 Date +1 UInt64 +9 None +12 String +0 0 0 \N 0 \N \N \N 0 +1 1 1 \N 0 \N \N \N 0 +2 2 2 \N 0 \N \N \N 0 +3 3 3 \N 0 3 \N \N 0 +4 4 4 \N 0 4 \N \N 0 +5 5 5 \N 0 5 \N \N 0 +6 6 6 \N 0 str_6 \N \N 0 +7 7 7 \N 0 str_7 \N \N 0 +8 8 8 \N 0 str_8 \N \N 0 +9 9 9 \N 0 \N \N \N 0 +10 10 10 \N 0 \N \N \N 0 +11 11 11 \N 0 \N \N \N 0 +12 12 12 \N 0 12 \N \N 0 +13 13 13 \N 0 str_13 \N \N 0 +14 14 14 \N 0 \N \N \N 0 +15 15 15 \N 0 \N \N \N 0 +16 16 16 \N 0 16 \N \N 0 +17 17 17 \N 0 str_17 \N \N 0 +18 18 18 \N 0 1970-01-19 \N \N 0 +19 19 19 \N 0 \N \N \N 0 +20 20 20 \N 0 \N 20 \N 0 +21 21 21 \N 0 str_21 \N \N 0 +22 22 22 \N 0 \N \N 1970-01-23 0 +insert after alter modify column 3 +1 Date +1 UInt64 +12 None +12 String +0 0 0 \N 0 \N \N \N 0 +1 1 1 \N 0 \N \N \N 0 +2 2 2 \N 0 \N \N \N 0 +3 3 3 \N 0 3 \N \N 0 +4 4 4 \N 0 4 \N \N 0 +5 5 5 \N 0 5 \N \N 0 +6 6 6 \N 0 str_6 \N \N 0 +7 7 7 \N 0 str_7 \N \N 0 +8 8 8 \N 0 str_8 \N \N 0 +9 9 9 \N 0 \N \N \N 0 +10 10 10 \N 0 \N \N \N 0 +11 11 11 \N 0 \N \N \N 0 +12 12 12 \N 0 12 \N \N 0 +13 13 13 \N 0 str_13 \N \N 0 +14 14 14 \N 0 \N \N \N 0 +15 15 15 \N 0 \N \N \N 0 +16 16 16 \N 0 16 \N \N 0 +17 17 17 \N 0 str_17 \N \N 0 +18 18 18 \N 0 1970-01-19 \N \N 0 +19 19 19 \N 0 \N \N \N 0 +20 20 20 \N 0 \N 20 \N 0 +21 21 21 \N 0 str_21 \N \N 0 +22 22 22 \N 0 \N \N 1970-01-23 0 +23 \N \N \N 0 \N \N \N 0 +24 24 24 \N 0 \N \N \N 0 +25 str_25 \N str_25 0 \N \N \N 0 diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_1_compact_merge_tree.sql b/tests/queries/0_stateless/03040_dynamic_type_alters_1_compact_merge_tree.sql new file mode 100644 index 00000000000..4ab700306d4 --- /dev/null +++ b/tests/queries/0_stateless/03040_dynamic_type_alters_1_compact_merge_tree.sql @@ -0,0 +1,53 @@ +set allow_experimental_dynamic_type = 1; +set allow_experimental_variant_type = 1; +set use_variant_as_common_type = 1; + +drop table if exists test; +create table test (x UInt64, y UInt64) engine=MergeTree order by x settings min_rows_for_wide_part=100000000, min_bytes_for_wide_part=1000000000; +select 'initial insert'; +insert into test select number, number from numbers(3); + +select 'alter add column 1'; +alter table test add column d Dynamic(max_types=3) settings mutations_sync=1; +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select x, y, d, d.String, d.UInt64, d.`Tuple(a UInt64)`.a from test order by x; + +select 'insert after alter add column 1'; +insert into test select number, number, number from numbers(3, 3); +insert into test select number, number, 'str_' || toString(number) from numbers(6, 3); +insert into test select number, number, NULL from numbers(9, 3); +insert into test select number, number, multiIf(number % 3 == 0, number, number % 3 == 1, 'str_' || toString(number), NULL) from numbers(12, 3); +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select x, y, d, d.String, d.UInt64, d.Date, d.`Tuple(a UInt64)`.a from test order by x; + +select 'alter modify column 1'; +alter table test modify column d Dynamic(max_types=1) settings mutations_sync=1; +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select x, y, d, d.String, d.UInt64, d.Date, d.`Tuple(a UInt64)`.a from test order by x; + +select 'insert after alter modify column 1'; +insert into test select number, number, multiIf(number % 4 == 0, number, number % 4 == 1, 'str_' || toString(number), number % 4 == 2, toDate(number), NULL) from numbers(15, 4); +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select x, y, d, d.String, d.UInt64, d.Date, d.`Tuple(a UInt64)`.a from test order by x; + +select 'alter modify column 2'; +alter table test modify column d Dynamic(max_types=3) settings mutations_sync=1; +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select x, y, d, d.String, d.UInt64, d.Date, d.`Tuple(a UInt64)`.a from test order by x; + +select 'insert after alter modify column 2'; +insert into test select number, number, multiIf(number % 4 == 0, number, number % 4 == 1, 'str_' || toString(number), number % 4 == 2, toDate(number), NULL) from numbers(19, 4); +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select x, y, d, d.String, d.UInt64, d.Date, d.`Tuple(a UInt64)`.a from test order by x; + +select 'alter modify column 3'; +alter table test modify column y Dynamic settings mutations_sync=1; +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select x, y, y.UInt64, y.String, y.`Tuple(a UInt64)`.a, d.String, d.UInt64, d.Date, d.`Tuple(a UInt64)`.a from test order by x; + +select 'insert after alter modify column 3'; +insert into test select number, multiIf(number % 3 == 0, number, number % 3 == 1, 'str_' || toString(number), NULL), NULL from numbers(23, 3); +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select x, y, y.UInt64, y.String, y.`Tuple(a UInt64)`.a, d.String, d.UInt64, d.Date, d.`Tuple(a UInt64)`.a from test order by x; + +drop table test; \ No newline at end of file diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_1_memory.reference b/tests/queries/0_stateless/03040_dynamic_type_alters_1_memory.reference new file mode 100644 index 00000000000..c592528c3cd --- /dev/null +++ b/tests/queries/0_stateless/03040_dynamic_type_alters_1_memory.reference @@ -0,0 +1,175 @@ +initial insert +alter add column 1 +3 None +0 0 \N \N \N 0 +1 1 \N \N \N 0 +2 2 \N \N \N 0 +insert after alter add column 1 +4 String +4 UInt64 +7 None +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +alter modify column 1 +7 None +8 String +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +insert after alter modify column 1 +8 None +11 String +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 16 \N \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 1970-01-19 \N \N 0 +alter modify column 2 +4 UInt64 +7 String +8 None +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 16 \N \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 1970-01-19 \N \N 0 +insert after alter modify column 2 +1 Date +5 UInt64 +8 String +9 None +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 16 \N \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 1970-01-19 \N \N 0 +19 19 \N \N \N \N 0 +20 20 20 \N 20 \N 0 +21 21 str_21 str_21 \N \N 0 +22 22 1970-01-23 \N \N 1970-01-23 0 +alter modify column 3 +1 Date +5 UInt64 +8 String +9 None +0 0 0 \N 0 \N \N \N 0 +1 1 1 \N 0 \N \N \N 0 +2 2 2 \N 0 \N \N \N 0 +3 3 3 \N 0 \N 3 \N 0 +4 4 4 \N 0 \N 4 \N 0 +5 5 5 \N 0 \N 5 \N 0 +6 6 6 \N 0 str_6 \N \N 0 +7 7 7 \N 0 str_7 \N \N 0 +8 8 8 \N 0 str_8 \N \N 0 +9 9 9 \N 0 \N \N \N 0 +10 10 10 \N 0 \N \N \N 0 +11 11 11 \N 0 \N \N \N 0 +12 12 12 \N 0 \N 12 \N 0 +13 13 13 \N 0 str_13 \N \N 0 +14 14 14 \N 0 \N \N \N 0 +15 15 15 \N 0 \N \N \N 0 +16 16 16 \N 0 16 \N \N 0 +17 17 17 \N 0 str_17 \N \N 0 +18 18 18 \N 0 1970-01-19 \N \N 0 +19 19 19 \N 0 \N \N \N 0 +20 20 20 \N 0 \N 20 \N 0 +21 21 21 \N 0 str_21 \N \N 0 +22 22 22 \N 0 \N \N 1970-01-23 0 +insert after alter modify column 3 +1 Date +5 UInt64 +8 String +12 None +0 0 0 \N 0 \N \N \N 0 +1 1 1 \N 0 \N \N \N 0 +2 2 2 \N 0 \N \N \N 0 +3 3 3 \N 0 \N 3 \N 0 +4 4 4 \N 0 \N 4 \N 0 +5 5 5 \N 0 \N 5 \N 0 +6 6 6 \N 0 str_6 \N \N 0 +7 7 7 \N 0 str_7 \N \N 0 +8 8 8 \N 0 str_8 \N \N 0 +9 9 9 \N 0 \N \N \N 0 +10 10 10 \N 0 \N \N \N 0 +11 11 11 \N 0 \N \N \N 0 +12 12 12 \N 0 \N 12 \N 0 +13 13 13 \N 0 str_13 \N \N 0 +14 14 14 \N 0 \N \N \N 0 +15 15 15 \N 0 \N \N \N 0 +16 16 16 \N 0 16 \N \N 0 +17 17 17 \N 0 str_17 \N \N 0 +18 18 18 \N 0 1970-01-19 \N \N 0 +19 19 19 \N 0 \N \N \N 0 +20 20 20 \N 0 \N 20 \N 0 +21 21 21 \N 0 str_21 \N \N 0 +22 22 22 \N 0 \N \N 1970-01-23 0 +23 \N \N \N 0 \N \N \N 0 +24 24 24 \N 0 \N \N \N 0 +25 str_25 \N str_25 0 \N \N \N 0 diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_1_memory.sql b/tests/queries/0_stateless/03040_dynamic_type_alters_1_memory.sql new file mode 100644 index 00000000000..e802fd034ce --- /dev/null +++ b/tests/queries/0_stateless/03040_dynamic_type_alters_1_memory.sql @@ -0,0 +1,53 @@ +set allow_experimental_dynamic_type = 1; +set allow_experimental_variant_type = 1; +set use_variant_as_common_type = 1; + +drop table if exists test; +create table test (x UInt64, y UInt64) engine=Memory; +select 'initial insert'; +insert into test select number, number from numbers(3); + +select 'alter add column 1'; +alter table test add column d Dynamic(max_types=3) settings mutations_sync=1; +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select x, y, d, d.String, d.UInt64, d.`Tuple(a UInt64)`.a from test order by x; + +select 'insert after alter add column 1'; +insert into test select number, number, number from numbers(3, 3); +insert into test select number, number, 'str_' || toString(number) from numbers(6, 3); +insert into test select number, number, NULL from numbers(9, 3); +insert into test select number, number, multiIf(number % 3 == 0, number, number % 3 == 1, 'str_' || toString(number), NULL) from numbers(12, 3); +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select x, y, d, d.String, d.UInt64, d.Date, d.`Tuple(a UInt64)`.a from test order by x; + +select 'alter modify column 1'; +alter table test modify column d Dynamic(max_types=1) settings mutations_sync=1; +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select x, y, d, d.String, d.UInt64, d.Date, d.`Tuple(a UInt64)`.a from test order by x; + +select 'insert after alter modify column 1'; +insert into test select number, number, multiIf(number % 4 == 0, number, number % 4 == 1, 'str_' || toString(number), number % 4 == 2, toDate(number), NULL) from numbers(15, 4); +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select x, y, d, d.String, d.UInt64, d.Date, d.`Tuple(a UInt64)`.a from test order by x; + +select 'alter modify column 2'; +alter table test modify column d Dynamic(max_types=3) settings mutations_sync=1; +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select x, y, d, d.String, d.UInt64, d.Date, d.`Tuple(a UInt64)`.a from test order by x; + +select 'insert after alter modify column 2'; +insert into test select number, number, multiIf(number % 4 == 0, number, number % 4 == 1, 'str_' || toString(number), number % 4 == 2, toDate(number), NULL) from numbers(19, 4); +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select x, y, d, d.String, d.UInt64, d.Date, d.`Tuple(a UInt64)`.a from test order by x; + +select 'alter modify column 3'; +alter table test modify column y Dynamic settings mutations_sync=1; +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select x, y, y.UInt64, y.String, y.`Tuple(a UInt64)`.a, d.String, d.UInt64, d.Date, d.`Tuple(a UInt64)`.a from test order by x; + +select 'insert after alter modify column 3'; +insert into test select number, multiIf(number % 3 == 0, number, number % 3 == 1, 'str_' || toString(number), NULL), NULL from numbers(23, 3); +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select x, y, y.UInt64, y.String, y.`Tuple(a UInt64)`.a, d.String, d.UInt64, d.Date, d.`Tuple(a UInt64)`.a from test order by x; + +drop table test; \ No newline at end of file diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_1_wide_merge_tree.reference b/tests/queries/0_stateless/03040_dynamic_type_alters_1_wide_merge_tree.reference new file mode 100644 index 00000000000..2ec301b747b --- /dev/null +++ b/tests/queries/0_stateless/03040_dynamic_type_alters_1_wide_merge_tree.reference @@ -0,0 +1,174 @@ +initial insert +alter add column 1 +3 None +0 0 \N \N \N 0 +1 1 \N \N \N 0 +2 2 \N \N \N 0 +insert after alter add column 1 +4 String +4 UInt64 +7 None +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +alter modify column 1 +7 None +8 String +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +insert after alter modify column 1 +8 None +11 String +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 16 \N \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 1970-01-19 \N \N 0 +alter modify column 2 +8 None +11 String +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 16 \N \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 1970-01-19 \N \N 0 +insert after alter modify column 2 +1 Date +1 UInt64 +9 None +12 String +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 3 \N \N 0 +4 4 4 4 \N \N 0 +5 5 5 5 \N \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 12 \N \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +15 15 \N \N \N \N 0 +16 16 16 16 \N \N 0 +17 17 str_17 str_17 \N \N 0 +18 18 1970-01-19 1970-01-19 \N \N 0 +19 19 \N \N \N \N 0 +20 20 20 \N 20 \N 0 +21 21 str_21 str_21 \N \N 0 +22 22 1970-01-23 \N \N 1970-01-23 0 +alter modify column 3 +1 Date +1 UInt64 +9 None +12 String +0 0 0 \N 0 \N \N \N 0 +1 1 1 \N 0 \N \N \N 0 +2 2 2 \N 0 \N \N \N 0 +3 3 3 \N 0 3 \N \N 0 +4 4 4 \N 0 4 \N \N 0 +5 5 5 \N 0 5 \N \N 0 +6 6 6 \N 0 str_6 \N \N 0 +7 7 7 \N 0 str_7 \N \N 0 +8 8 8 \N 0 str_8 \N \N 0 +9 9 9 \N 0 \N \N \N 0 +10 10 10 \N 0 \N \N \N 0 +11 11 11 \N 0 \N \N \N 0 +12 12 12 \N 0 12 \N \N 0 +13 13 13 \N 0 str_13 \N \N 0 +14 14 14 \N 0 \N \N \N 0 +15 15 15 \N 0 \N \N \N 0 +16 16 16 \N 0 16 \N \N 0 +17 17 17 \N 0 str_17 \N \N 0 +18 18 18 \N 0 1970-01-19 \N \N 0 +19 19 19 \N 0 \N \N \N 0 +20 20 20 \N 0 \N 20 \N 0 +21 21 21 \N 0 str_21 \N \N 0 +22 22 22 \N 0 \N \N 1970-01-23 0 +insert after alter modify column 3 +1 Date +1 UInt64 +12 None +12 String +0 0 0 \N 0 \N \N \N 0 +1 1 1 \N 0 \N \N \N 0 +2 2 2 \N 0 \N \N \N 0 +3 3 3 \N 0 3 \N \N 0 +4 4 4 \N 0 4 \N \N 0 +5 5 5 \N 0 5 \N \N 0 +6 6 6 \N 0 str_6 \N \N 0 +7 7 7 \N 0 str_7 \N \N 0 +8 8 8 \N 0 str_8 \N \N 0 +9 9 9 \N 0 \N \N \N 0 +10 10 10 \N 0 \N \N \N 0 +11 11 11 \N 0 \N \N \N 0 +12 12 12 \N 0 12 \N \N 0 +13 13 13 \N 0 str_13 \N \N 0 +14 14 14 \N 0 \N \N \N 0 +15 15 15 \N 0 \N \N \N 0 +16 16 16 \N 0 16 \N \N 0 +17 17 17 \N 0 str_17 \N \N 0 +18 18 18 \N 0 1970-01-19 \N \N 0 +19 19 19 \N 0 \N \N \N 0 +20 20 20 \N 0 \N 20 \N 0 +21 21 21 \N 0 str_21 \N \N 0 +22 22 22 \N 0 \N \N 1970-01-23 0 +23 \N \N \N 0 \N \N \N 0 +24 24 24 \N 0 \N \N \N 0 +25 str_25 \N str_25 0 \N \N \N 0 diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_1_wide_merge_tree.sql b/tests/queries/0_stateless/03040_dynamic_type_alters_1_wide_merge_tree.sql new file mode 100644 index 00000000000..55c4f0b5f0c --- /dev/null +++ b/tests/queries/0_stateless/03040_dynamic_type_alters_1_wide_merge_tree.sql @@ -0,0 +1,53 @@ +set allow_experimental_dynamic_type = 1; +set allow_experimental_variant_type = 1; +set use_variant_as_common_type = 1; + +drop table if exists test; +create table test (x UInt64, y UInt64) engine=MergeTree order by x settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1; +select 'initial insert'; +insert into test select number, number from numbers(3); + +select 'alter add column 1'; +alter table test add column d Dynamic(max_types=3) settings mutations_sync=1; +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select x, y, d, d.String, d.UInt64, d.`Tuple(a UInt64)`.a from test order by x; + +select 'insert after alter add column 1'; +insert into test select number, number, number from numbers(3, 3); +insert into test select number, number, 'str_' || toString(number) from numbers(6, 3); +insert into test select number, number, NULL from numbers(9, 3); +insert into test select number, number, multiIf(number % 3 == 0, number, number % 3 == 1, 'str_' || toString(number), NULL) from numbers(12, 3); +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select x, y, d, d.String, d.UInt64, d.Date, d.`Tuple(a UInt64)`.a from test order by x; + +select 'alter modify column 1'; +alter table test modify column d Dynamic(max_types=1) settings mutations_sync=1; +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select x, y, d, d.String, d.UInt64, d.Date, d.`Tuple(a UInt64)`.a from test order by x; + +select 'insert after alter modify column 1'; +insert into test select number, number, multiIf(number % 4 == 0, number, number % 4 == 1, 'str_' || toString(number), number % 4 == 2, toDate(number), NULL) from numbers(15, 4); +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select x, y, d, d.String, d.UInt64, d.Date, d.`Tuple(a UInt64)`.a from test order by x; + +select 'alter modify column 2'; +alter table test modify column d Dynamic(max_types=3) settings mutations_sync=1; +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select x, y, d, d.String, d.UInt64, d.Date, d.`Tuple(a UInt64)`.a from test order by x; + +select 'insert after alter modify column 2'; +insert into test select number, number, multiIf(number % 4 == 0, number, number % 4 == 1, 'str_' || toString(number), number % 4 == 2, toDate(number), NULL) from numbers(19, 4); +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select x, y, d, d.String, d.UInt64, d.Date, d.`Tuple(a UInt64)`.a from test order by x; + +select 'alter modify column 3'; +alter table test modify column y Dynamic settings mutations_sync=1; +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select x, y, y.UInt64, y.String, y.`Tuple(a UInt64)`.a, d.String, d.UInt64, d.Date, d.`Tuple(a UInt64)`.a from test order by x; + +select 'insert after alter modify column 3'; +insert into test select number, multiIf(number % 3 == 0, number, number % 3 == 1, 'str_' || toString(number), NULL), NULL from numbers(23, 3); +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select x, y, y.UInt64, y.String, y.`Tuple(a UInt64)`.a, d.String, d.UInt64, d.Date, d.`Tuple(a UInt64)`.a from test order by x; + +drop table test; \ No newline at end of file diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_2.reference b/tests/queries/0_stateless/03040_dynamic_type_alters_2.reference deleted file mode 100644 index f7c00bd8c44..00000000000 --- a/tests/queries/0_stateless/03040_dynamic_type_alters_2.reference +++ /dev/null @@ -1,182 +0,0 @@ -MergeTree compact -initial insert -alter add column -3 None -0 0 \N \N \N 0 -1 1 \N \N \N 0 -2 2 \N \N \N 0 -insert after alter add column 1 -4 String -4 UInt64 -7 None -0 0 \N \N \N \N 0 -1 1 \N \N \N \N 0 -2 2 \N \N \N \N 0 -3 3 3 \N 3 \N 0 -4 4 4 \N 4 \N 0 -5 5 5 \N 5 \N 0 -6 6 str_6 str_6 \N \N 0 -7 7 str_7 str_7 \N \N 0 -8 8 str_8 str_8 \N \N 0 -9 9 \N \N \N \N 0 -10 10 \N \N \N \N 0 -11 11 \N \N \N \N 0 -12 12 12 \N 12 \N 0 -13 13 str_13 str_13 \N \N 0 -14 14 \N \N \N \N 0 -alter rename column 1 -4 String -4 UInt64 -7 None -0 0 \N \N \N \N 0 -1 1 \N \N \N \N 0 -2 2 \N \N \N \N 0 -3 3 3 \N 3 \N 0 -4 4 4 \N 4 \N 0 -5 5 5 \N 5 \N 0 -6 6 str_6 str_6 \N \N 0 -7 7 str_7 str_7 \N \N 0 -8 8 str_8 str_8 \N \N 0 -9 9 \N \N \N \N 0 -10 10 \N \N \N \N 0 -11 11 \N \N \N \N 0 -12 12 12 \N 12 \N 0 -13 13 str_13 str_13 \N \N 0 -14 14 \N \N \N \N 0 -insert nested dynamic -3 Array(Dynamic) -4 String -4 UInt64 -7 None -0 0 \N \N \N \N 0 [] [] [] -1 1 \N \N \N \N 0 [] [] [] -2 2 \N \N \N \N 0 [] [] [] -3 3 3 \N 3 \N 0 [] [] [] -4 4 4 \N 4 \N 0 [] [] [] -5 5 5 \N 5 \N 0 [] [] [] -6 6 str_6 str_6 \N \N 0 [] [] [] -7 7 str_7 str_7 \N \N 0 [] [] [] -8 8 str_8 str_8 \N \N 0 [] [] [] -9 9 \N \N \N \N 0 [] [] [] -10 10 \N \N \N \N 0 [] [] [] -11 11 \N \N \N \N 0 [] [] [] -12 12 12 \N 12 \N 0 [] [] [] -13 13 str_13 str_13 \N \N 0 [] [] [] -14 14 \N \N \N \N 0 [] [] [] -15 15 [15] \N \N \N 0 [15] [NULL] [NULL] -16 16 ['str_16'] \N \N \N 0 [NULL] ['str_16'] [NULL] -17 17 [17] \N \N \N 0 [17] [NULL] [NULL] -alter rename column 2 -3 Array(Dynamic) -4 String -4 UInt64 -7 None -0 0 \N \N \N \N 0 [] [] [] -1 1 \N \N \N \N 0 [] [] [] -2 2 \N \N \N \N 0 [] [] [] -3 3 3 \N 3 \N 0 [] [] [] -4 4 4 \N 4 \N 0 [] [] [] -5 5 5 \N 5 \N 0 [] [] [] -6 6 str_6 str_6 \N \N 0 [] [] [] -7 7 str_7 str_7 \N \N 0 [] [] [] -8 8 str_8 str_8 \N \N 0 [] [] [] -9 9 \N \N \N \N 0 [] [] [] -10 10 \N \N \N \N 0 [] [] [] -11 11 \N \N \N \N 0 [] [] [] -12 12 12 \N 12 \N 0 [] [] [] -13 13 str_13 str_13 \N \N 0 [] [] [] -14 14 \N \N \N \N 0 [] [] [] -15 15 [15] \N \N \N 0 [15] [NULL] [NULL] -16 16 ['str_16'] \N \N \N 0 [NULL] ['str_16'] [NULL] -17 17 [17] \N \N \N 0 [17] [NULL] [NULL] -MergeTree wide -initial insert -alter add column -3 None -0 0 \N \N \N 0 -1 1 \N \N \N 0 -2 2 \N \N \N 0 -insert after alter add column 1 -4 String -4 UInt64 -7 None -0 0 \N \N \N \N 0 -1 1 \N \N \N \N 0 -2 2 \N \N \N \N 0 -3 3 3 \N 3 \N 0 -4 4 4 \N 4 \N 0 -5 5 5 \N 5 \N 0 -6 6 str_6 str_6 \N \N 0 -7 7 str_7 str_7 \N \N 0 -8 8 str_8 str_8 \N \N 0 -9 9 \N \N \N \N 0 -10 10 \N \N \N \N 0 -11 11 \N \N \N \N 0 -12 12 12 \N 12 \N 0 -13 13 str_13 str_13 \N \N 0 -14 14 \N \N \N \N 0 -alter rename column 1 -4 String -4 UInt64 -7 None -0 0 \N \N \N \N 0 -1 1 \N \N \N \N 0 -2 2 \N \N \N \N 0 -3 3 3 \N 3 \N 0 -4 4 4 \N 4 \N 0 -5 5 5 \N 5 \N 0 -6 6 str_6 str_6 \N \N 0 -7 7 str_7 str_7 \N \N 0 -8 8 str_8 str_8 \N \N 0 -9 9 \N \N \N \N 0 -10 10 \N \N \N \N 0 -11 11 \N \N \N \N 0 -12 12 12 \N 12 \N 0 -13 13 str_13 str_13 \N \N 0 -14 14 \N \N \N \N 0 -insert nested dynamic -3 Array(Dynamic) -4 String -4 UInt64 -7 None -0 0 \N \N \N \N 0 [] [] [] -1 1 \N \N \N \N 0 [] [] [] -2 2 \N \N \N \N 0 [] [] [] -3 3 3 \N 3 \N 0 [] [] [] -4 4 4 \N 4 \N 0 [] [] [] -5 5 5 \N 5 \N 0 [] [] [] -6 6 str_6 str_6 \N \N 0 [] [] [] -7 7 str_7 str_7 \N \N 0 [] [] [] -8 8 str_8 str_8 \N \N 0 [] [] [] -9 9 \N \N \N \N 0 [] [] [] -10 10 \N \N \N \N 0 [] [] [] -11 11 \N \N \N \N 0 [] [] [] -12 12 12 \N 12 \N 0 [] [] [] -13 13 str_13 str_13 \N \N 0 [] [] [] -14 14 \N \N \N \N 0 [] [] [] -15 15 [15] \N \N \N 0 [15] [NULL] [NULL] -16 16 ['str_16'] \N \N \N 0 [NULL] ['str_16'] [NULL] -17 17 [17] \N \N \N 0 [17] [NULL] [NULL] -alter rename column 2 -3 Array(Dynamic) -4 String -4 UInt64 -7 None -0 0 \N \N \N \N 0 [] [] [] -1 1 \N \N \N \N 0 [] [] [] -2 2 \N \N \N \N 0 [] [] [] -3 3 3 \N 3 \N 0 [] [] [] -4 4 4 \N 4 \N 0 [] [] [] -5 5 5 \N 5 \N 0 [] [] [] -6 6 str_6 str_6 \N \N 0 [] [] [] -7 7 str_7 str_7 \N \N 0 [] [] [] -8 8 str_8 str_8 \N \N 0 [] [] [] -9 9 \N \N \N \N 0 [] [] [] -10 10 \N \N \N \N 0 [] [] [] -11 11 \N \N \N \N 0 [] [] [] -12 12 12 \N 12 \N 0 [] [] [] -13 13 str_13 str_13 \N \N 0 [] [] [] -14 14 \N \N \N \N 0 [] [] [] -15 15 [15] \N \N \N 0 [15] [NULL] [NULL] -16 16 ['str_16'] \N \N \N 0 [NULL] ['str_16'] [NULL] -17 17 [17] \N \N \N 0 [17] [NULL] [NULL] diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_2.sh b/tests/queries/0_stateless/03040_dynamic_type_alters_2.sh deleted file mode 100755 index 6491e64372f..00000000000 --- a/tests/queries/0_stateless/03040_dynamic_type_alters_2.sh +++ /dev/null @@ -1,57 +0,0 @@ -#!/usr/bin/env bash -# Tags: long - -CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# reset --log_comment -CLICKHOUSE_LOG_COMMENT= -# shellcheck source=../shell_config.sh -. "$CUR_DIR"/../shell_config.sh - -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_dynamic_type=1 --allow_experimental_variant_type=1 --use_variant_as_common_type=1" - -function run() -{ - echo "initial insert" - $CH_CLIENT -q "insert into test select number, number from numbers(3)" - - echo "alter add column" - $CH_CLIENT -q "alter table test add column d Dynamic settings mutations_sync=1" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" - $CH_CLIENT -q "select x, y, d, d.String, d.UInt64, d.\`Tuple(a UInt64)\`.a from test order by x" - - echo "insert after alter add column 1" - $CH_CLIENT -q "insert into test select number, number, number from numbers(3, 3)" - $CH_CLIENT -q "insert into test select number, number, 'str_' || toString(number) from numbers(6, 3)" - $CH_CLIENT -q "insert into test select number, number, NULL from numbers(9, 3)" - $CH_CLIENT -q "insert into test select number, number, multiIf(number % 3 == 0, number, number % 3 == 1, 'str_' || toString(number), NULL) from numbers(12, 3)" - $CH_CLIENT -q "select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d)" - $CH_CLIENT -q "select x, y, d, d.String, d.UInt64, d.Date, d.\`Tuple(a UInt64)\`.a from test order by x" - - echo "alter rename column 1" - $CH_CLIENT -q "alter table test rename column d to d1 settings mutations_sync=1" - $CH_CLIENT -q "select count(), dynamicType(d1) from test group by dynamicType(d1) order by count(), dynamicType(d1)" - $CH_CLIENT -q "select x, y, d1, d1.String, d1.UInt64, d1.Date, d1.\`Tuple(a UInt64)\`.a from test order by x" - - echo "insert nested dynamic" - $CH_CLIENT -q "insert into test select number, number, [number % 2 ? number : 'str_' || toString(number)]::Array(Dynamic) from numbers(15, 3)" - $CH_CLIENT -q "select count(), dynamicType(d1) from test group by dynamicType(d1) order by count(), dynamicType(d1)" - $CH_CLIENT -q "select x, y, d1, d1.String, d1.UInt64, d1.Date, d1.\`Tuple(a UInt64)\`.a, d1.\`Array(Dynamic)\`.UInt64, d1.\`Array(Dynamic)\`.String, d1.\`Array(Dynamic)\`.Date from test order by x" - - echo "alter rename column 2" - $CH_CLIENT -q "alter table test rename column d1 to d2 settings mutations_sync=1" - $CH_CLIENT -q "select count(), dynamicType(d2) from test group by dynamicType(d2) order by count(), dynamicType(d2)" - $CH_CLIENT -q "select x, y, d2, d2.String, d2.UInt64, d2.Date, d2.\`Tuple(a UInt64)\`.a, d2.\`Array(Dynamic)\`.UInt64, d2.\`Array(Dynamic)\`.String, d2.\`Array(Dynamic)\`.Date, from test order by x" -} - -$CH_CLIENT -q "drop table if exists test;" - -echo "MergeTree compact" -$CH_CLIENT -q "create table test (x UInt64, y UInt64) engine=MergeTree order by x settings min_rows_for_wide_part=100000000, min_bytes_for_wide_part=1000000000;" -run -$CH_CLIENT -q "drop table test;" - -echo "MergeTree wide" -$CH_CLIENT -q "create table test (x UInt64, y UInt64 ) engine=MergeTree order by x settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" -run -$CH_CLIENT -q "drop table test;" - diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_2_compact_merge_tree.reference b/tests/queries/0_stateless/03040_dynamic_type_alters_2_compact_merge_tree.reference new file mode 100644 index 00000000000..a2f2a19805d --- /dev/null +++ b/tests/queries/0_stateless/03040_dynamic_type_alters_2_compact_merge_tree.reference @@ -0,0 +1,90 @@ +initial insert +alter add column +3 None +0 0 \N \N \N 0 +1 1 \N \N \N 0 +2 2 \N \N \N 0 +insert after alter add column 1 +4 String +4 UInt64 +7 None +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +alter rename column 1 +4 String +4 UInt64 +7 None +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +insert nested dynamic +3 Array(Dynamic) +4 String +4 UInt64 +7 None +0 0 \N \N \N \N 0 [] [] [] +1 1 \N \N \N \N 0 [] [] [] +2 2 \N \N \N \N 0 [] [] [] +3 3 3 \N 3 \N 0 [] [] [] +4 4 4 \N 4 \N 0 [] [] [] +5 5 5 \N 5 \N 0 [] [] [] +6 6 str_6 str_6 \N \N 0 [] [] [] +7 7 str_7 str_7 \N \N 0 [] [] [] +8 8 str_8 str_8 \N \N 0 [] [] [] +9 9 \N \N \N \N 0 [] [] [] +10 10 \N \N \N \N 0 [] [] [] +11 11 \N \N \N \N 0 [] [] [] +12 12 12 \N 12 \N 0 [] [] [] +13 13 str_13 str_13 \N \N 0 [] [] [] +14 14 \N \N \N \N 0 [] [] [] +15 15 [15] \N \N \N 0 [15] [NULL] [NULL] +16 16 ['str_16'] \N \N \N 0 [NULL] ['str_16'] [NULL] +17 17 [17] \N \N \N 0 [17] [NULL] [NULL] +alter rename column 2 +3 Array(Dynamic) +4 String +4 UInt64 +7 None +0 0 \N \N \N \N 0 [] [] [] +1 1 \N \N \N \N 0 [] [] [] +2 2 \N \N \N \N 0 [] [] [] +3 3 3 \N 3 \N 0 [] [] [] +4 4 4 \N 4 \N 0 [] [] [] +5 5 5 \N 5 \N 0 [] [] [] +6 6 str_6 str_6 \N \N 0 [] [] [] +7 7 str_7 str_7 \N \N 0 [] [] [] +8 8 str_8 str_8 \N \N 0 [] [] [] +9 9 \N \N \N \N 0 [] [] [] +10 10 \N \N \N \N 0 [] [] [] +11 11 \N \N \N \N 0 [] [] [] +12 12 12 \N 12 \N 0 [] [] [] +13 13 str_13 str_13 \N \N 0 [] [] [] +14 14 \N \N \N \N 0 [] [] [] +15 15 [15] \N \N \N 0 [15] [NULL] [NULL] +16 16 ['str_16'] \N \N \N 0 [NULL] ['str_16'] [NULL] +17 17 [17] \N \N \N 0 [17] [NULL] [NULL] diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_2_compact_merge_tree.sql b/tests/queries/0_stateless/03040_dynamic_type_alters_2_compact_merge_tree.sql new file mode 100644 index 00000000000..cead110dd7d --- /dev/null +++ b/tests/queries/0_stateless/03040_dynamic_type_alters_2_compact_merge_tree.sql @@ -0,0 +1,39 @@ +set allow_experimental_dynamic_type = 1; +set allow_experimental_variant_type = 1; +set use_variant_as_common_type = 1; + +drop table if exists test; +create table test (x UInt64, y UInt64) engine=MergeTree order by x settings min_rows_for_wide_part=100000000, min_bytes_for_wide_part=1000000000; + +select 'initial insert'; +insert into test select number, number from numbers(3); + +select 'alter add column'; +alter table test add column d Dynamic settings mutations_sync=1; +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select x, y, d, d.String, d.UInt64, d.`Tuple(a UInt64)`.a from test order by x; + +select 'insert after alter add column 1'; +insert into test select number, number, number from numbers(3, 3); +insert into test select number, number, 'str_' || toString(number) from numbers(6, 3); +insert into test select number, number, NULL from numbers(9, 3); +insert into test select number, number, multiIf(number % 3 == 0, number, number % 3 == 1, 'str_' || toString(number), NULL) from numbers(12, 3); +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select x, y, d, d.String, d.UInt64, d.Date, d.`Tuple(a UInt64)`.a from test order by x; + +select 'alter rename column 1'; +alter table test rename column d to d1 settings mutations_sync=1; +select count(), dynamicType(d1) from test group by dynamicType(d1) order by count(), dynamicType(d1); +select x, y, d1, d1.String, d1.UInt64, d1.Date, d1.`Tuple(a UInt64)`.a from test order by x; + +select 'insert nested dynamic'; +insert into test select number, number, [number % 2 ? number : 'str_' || toString(number)]::Array(Dynamic) from numbers(15, 3); +select count(), dynamicType(d1) from test group by dynamicType(d1) order by count(), dynamicType(d1); +select x, y, d1, d1.String, d1.UInt64, d1.Date, d1.`Tuple(a UInt64)`.a, d1.`Array(Dynamic)`.UInt64, d1.`Array(Dynamic)`.String, d1.`Array(Dynamic)`.Date from test order by x; + +select 'alter rename column 2'; +alter table test rename column d1 to d2 settings mutations_sync=1; +select count(), dynamicType(d2) from test group by dynamicType(d2) order by count(), dynamicType(d2); +select x, y, d2, d2.String, d2.UInt64, d2.Date, d2.`Tuple(a UInt64)`.a, d2.`Array(Dynamic)`.UInt64, d2.`Array(Dynamic)`.String, d2.`Array(Dynamic)`.Date, from test order by x; + +drop table test; diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_2_wide_merge_tree.reference b/tests/queries/0_stateless/03040_dynamic_type_alters_2_wide_merge_tree.reference new file mode 100644 index 00000000000..a2f2a19805d --- /dev/null +++ b/tests/queries/0_stateless/03040_dynamic_type_alters_2_wide_merge_tree.reference @@ -0,0 +1,90 @@ +initial insert +alter add column +3 None +0 0 \N \N \N 0 +1 1 \N \N \N 0 +2 2 \N \N \N 0 +insert after alter add column 1 +4 String +4 UInt64 +7 None +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +alter rename column 1 +4 String +4 UInt64 +7 None +0 0 \N \N \N \N 0 +1 1 \N \N \N \N 0 +2 2 \N \N \N \N 0 +3 3 3 \N 3 \N 0 +4 4 4 \N 4 \N 0 +5 5 5 \N 5 \N 0 +6 6 str_6 str_6 \N \N 0 +7 7 str_7 str_7 \N \N 0 +8 8 str_8 str_8 \N \N 0 +9 9 \N \N \N \N 0 +10 10 \N \N \N \N 0 +11 11 \N \N \N \N 0 +12 12 12 \N 12 \N 0 +13 13 str_13 str_13 \N \N 0 +14 14 \N \N \N \N 0 +insert nested dynamic +3 Array(Dynamic) +4 String +4 UInt64 +7 None +0 0 \N \N \N \N 0 [] [] [] +1 1 \N \N \N \N 0 [] [] [] +2 2 \N \N \N \N 0 [] [] [] +3 3 3 \N 3 \N 0 [] [] [] +4 4 4 \N 4 \N 0 [] [] [] +5 5 5 \N 5 \N 0 [] [] [] +6 6 str_6 str_6 \N \N 0 [] [] [] +7 7 str_7 str_7 \N \N 0 [] [] [] +8 8 str_8 str_8 \N \N 0 [] [] [] +9 9 \N \N \N \N 0 [] [] [] +10 10 \N \N \N \N 0 [] [] [] +11 11 \N \N \N \N 0 [] [] [] +12 12 12 \N 12 \N 0 [] [] [] +13 13 str_13 str_13 \N \N 0 [] [] [] +14 14 \N \N \N \N 0 [] [] [] +15 15 [15] \N \N \N 0 [15] [NULL] [NULL] +16 16 ['str_16'] \N \N \N 0 [NULL] ['str_16'] [NULL] +17 17 [17] \N \N \N 0 [17] [NULL] [NULL] +alter rename column 2 +3 Array(Dynamic) +4 String +4 UInt64 +7 None +0 0 \N \N \N \N 0 [] [] [] +1 1 \N \N \N \N 0 [] [] [] +2 2 \N \N \N \N 0 [] [] [] +3 3 3 \N 3 \N 0 [] [] [] +4 4 4 \N 4 \N 0 [] [] [] +5 5 5 \N 5 \N 0 [] [] [] +6 6 str_6 str_6 \N \N 0 [] [] [] +7 7 str_7 str_7 \N \N 0 [] [] [] +8 8 str_8 str_8 \N \N 0 [] [] [] +9 9 \N \N \N \N 0 [] [] [] +10 10 \N \N \N \N 0 [] [] [] +11 11 \N \N \N \N 0 [] [] [] +12 12 12 \N 12 \N 0 [] [] [] +13 13 str_13 str_13 \N \N 0 [] [] [] +14 14 \N \N \N \N 0 [] [] [] +15 15 [15] \N \N \N 0 [15] [NULL] [NULL] +16 16 ['str_16'] \N \N \N 0 [NULL] ['str_16'] [NULL] +17 17 [17] \N \N \N 0 [17] [NULL] [NULL] diff --git a/tests/queries/0_stateless/03040_dynamic_type_alters_2_wide_merge_tree.sql b/tests/queries/0_stateless/03040_dynamic_type_alters_2_wide_merge_tree.sql new file mode 100644 index 00000000000..f58599b1d61 --- /dev/null +++ b/tests/queries/0_stateless/03040_dynamic_type_alters_2_wide_merge_tree.sql @@ -0,0 +1,39 @@ +set allow_experimental_dynamic_type = 1; +set allow_experimental_variant_type = 1; +set use_variant_as_common_type = 1; + +drop table if exists test; +create table test (x UInt64, y UInt64) engine=MergeTree order by x settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1; + +select 'initial insert'; +insert into test select number, number from numbers(3); + +select 'alter add column'; +alter table test add column d Dynamic settings mutations_sync=1; +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select x, y, d, d.String, d.UInt64, d.`Tuple(a UInt64)`.a from test order by x; + +select 'insert after alter add column 1'; +insert into test select number, number, number from numbers(3, 3); +insert into test select number, number, 'str_' || toString(number) from numbers(6, 3); +insert into test select number, number, NULL from numbers(9, 3); +insert into test select number, number, multiIf(number % 3 == 0, number, number % 3 == 1, 'str_' || toString(number), NULL) from numbers(12, 3); +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +select x, y, d, d.String, d.UInt64, d.Date, d.`Tuple(a UInt64)`.a from test order by x; + +select 'alter rename column 1'; +alter table test rename column d to d1 settings mutations_sync=1; +select count(), dynamicType(d1) from test group by dynamicType(d1) order by count(), dynamicType(d1); +select x, y, d1, d1.String, d1.UInt64, d1.Date, d1.`Tuple(a UInt64)`.a from test order by x; + +select 'insert nested dynamic'; +insert into test select number, number, [number % 2 ? number : 'str_' || toString(number)]::Array(Dynamic) from numbers(15, 3); +select count(), dynamicType(d1) from test group by dynamicType(d1) order by count(), dynamicType(d1); +select x, y, d1, d1.String, d1.UInt64, d1.Date, d1.`Tuple(a UInt64)`.a, d1.`Array(Dynamic)`.UInt64, d1.`Array(Dynamic)`.String, d1.`Array(Dynamic)`.Date from test order by x; + +select 'alter rename column 2'; +alter table test rename column d1 to d2 settings mutations_sync=1; +select count(), dynamicType(d2) from test group by dynamicType(d2) order by count(), dynamicType(d2); +select x, y, d2, d2.String, d2.UInt64, d2.Date, d2.`Tuple(a UInt64)`.a, d2.`Array(Dynamic)`.UInt64, d2.`Array(Dynamic)`.String, d2.`Array(Dynamic)`.Date, from test order by x; + +drop table test; From e67b26d2d90aaf7133f75ad1f089b4b2e12510b3 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 26 Jul 2024 14:06:59 +0000 Subject: [PATCH 017/121] Backport #67078 to 24.7: Fix 2680 flasky --- .../0_stateless/02680_mysql_ast_logical_err.sql | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02680_mysql_ast_logical_err.sql b/tests/queries/0_stateless/02680_mysql_ast_logical_err.sql index bde91df83ca..78ce1b68b0d 100644 --- a/tests/queries/0_stateless/02680_mysql_ast_logical_err.sql +++ b/tests/queries/0_stateless/02680_mysql_ast_logical_err.sql @@ -1,4 +1,10 @@ CREATE TABLE foo (key UInt32, a String, b Int64, c String) ENGINE = TinyLog; -SELECT count() FROM mysql(mysql('127.0.0.1:9004', currentDatabase(), 'foo', 'default', ''), '127.0.0.1:9004', currentDatabase(), 'foo', '', ''); -- { serverError UNKNOWN_FUNCTION } -SELECT count() FROM mysql(mysql('127.0.0.1:9004', currentDatabase(), 'foo', 'default', '', SETTINGS connection_pool_size = 1), '127.0.0.1:9004', currentDatabase(), 'foo', '', ''); -- { serverError UNKNOWN_FUNCTION, UNSUPPORTED_METHOD } +SELECT count() FROM mysql( + mysql('127.0.0.1:9004', currentDatabase(), 'foo', 'default', ''), + '127.0.0.1:9004', currentDatabase(), 'foo', '', '', + SETTINGS connect_timeout = 100, connection_wait_timeout = 100, read_write_timeout = 300); -- { serverError UNKNOWN_FUNCTION } +SELECT count() FROM mysql( + mysql('127.0.0.1:9004', currentDatabase(), 'foo', 'default', '', SETTINGS connection_pool_size = 1), + '127.0.0.1:9004', currentDatabase(), 'foo', '', '', + SETTINGS connect_timeout = 100, connection_wait_timeout = 100, read_write_timeout = 300); -- { serverError UNKNOWN_FUNCTION, UNSUPPORTED_METHOD } From 2c273bfcae6ae2295c2a36e99f9f9e006a1d70a2 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 26 Jul 2024 16:08:10 +0000 Subject: [PATCH 018/121] Backport #66928 to 24.7: groupArrayIntersect: Fix serialization bug --- .../AggregateFunctionGroupArrayIntersect.cpp | 32 ++++++-------- .../02987_group_array_intersect.reference | 8 ++-- .../02987_group_array_intersect.sql | 10 ++--- ...roupArrayIntersect_serialization.reference | 13 ++++++ ...3208_groupArrayIntersect_serialization.sql | 43 +++++++++++++++++++ 5 files changed, 79 insertions(+), 27 deletions(-) create mode 100644 tests/queries/0_stateless/03208_groupArrayIntersect_serialization.reference create mode 100644 tests/queries/0_stateless/03208_groupArrayIntersect_serialization.sql diff --git a/src/AggregateFunctions/AggregateFunctionGroupArrayIntersect.cpp b/src/AggregateFunctions/AggregateFunctionGroupArrayIntersect.cpp index 20acda213da..38f2fcb9fb9 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArrayIntersect.cpp +++ b/src/AggregateFunctions/AggregateFunctionGroupArrayIntersect.cpp @@ -1,12 +1,12 @@ -#include -#include - #include #include #include #include -#include +#include +#include +#include +#include #include #include @@ -15,18 +15,14 @@ #include #include -#include -#include - #include #include -#include #include -#include -#include -#include -#include +#include +#include + +#include namespace DB @@ -51,7 +47,7 @@ struct AggregateFunctionGroupArrayIntersectData }; -/// Puts all values to the hash set. Returns an array of unique values. Implemented for numeric types. +/// Puts all values to the hash set. Returns an array of unique values present in all inputs. Implemented for numeric types. template class AggregateFunctionGroupArrayIntersect : public IAggregateFunctionDataHelper, AggregateFunctionGroupArrayIntersect> @@ -69,7 +65,7 @@ public: : IAggregateFunctionDataHelper, AggregateFunctionGroupArrayIntersect>({argument_type}, parameters_, result_type_) {} - String getName() const override { return "GroupArrayIntersect"; } + String getName() const override { return "groupArrayIntersect"; } bool allocatesMemoryInArena() const override { return false; } @@ -158,7 +154,7 @@ public: set.reserve(size); for (size_t i = 0; i < size; ++i) { - int key; + T key; readIntBinary(key, buf); set.insert(key); } @@ -213,7 +209,7 @@ public: : IAggregateFunctionDataHelper>({input_data_type_}, parameters_, result_type_) , input_data_type(result_type_) {} - String getName() const override { return "GroupArrayIntersect"; } + String getName() const override { return "groupArrayIntersect"; } bool allocatesMemoryInArena() const override { return true; } @@ -240,7 +236,7 @@ public: { const char * begin = nullptr; StringRef serialized = data_column->serializeValueIntoArena(offset + i, *arena, begin); - assert(serialized.data != nullptr); + chassert(serialized.data != nullptr); set.emplace(SerializedKeyHolder{serialized, *arena}, it, inserted); } } @@ -260,7 +256,7 @@ public: { const char * begin = nullptr; StringRef serialized = data_column->serializeValueIntoArena(offset + i, *arena, begin); - assert(serialized.data != nullptr); + chassert(serialized.data != nullptr); it = set.find(serialized); if (it != nullptr) diff --git a/tests/queries/0_stateless/02987_group_array_intersect.reference b/tests/queries/0_stateless/02987_group_array_intersect.reference index 7ec64a889f5..ec4d07742af 100644 --- a/tests/queries/0_stateless/02987_group_array_intersect.reference +++ b/tests/queries/0_stateless/02987_group_array_intersect.reference @@ -8,12 +8,12 @@ [1,4,5] [] [] -1000000 -999999 +100000 +99999 [9] ['a','c'] -1000000 -999999 +50000 +49999 ['1'] [] ['2023-01-01 00:00:00'] diff --git a/tests/queries/0_stateless/02987_group_array_intersect.sql b/tests/queries/0_stateless/02987_group_array_intersect.sql index 321e860b0a8..15acd0ca900 100644 --- a/tests/queries/0_stateless/02987_group_array_intersect.sql +++ b/tests/queries/0_stateless/02987_group_array_intersect.sql @@ -39,15 +39,15 @@ DROP TABLE test_numbers; DROP TABLE IF EXISTS test_big_numbers_sep; CREATE TABLE test_big_numbers_sep (a Array(Int64)) engine=MergeTree ORDER BY a; -INSERT INTO test_big_numbers_sep SELECT array(number) FROM numbers_mt(1000000); +INSERT INTO test_big_numbers_sep SELECT array(number) FROM numbers_mt(100000); SELECT groupArrayIntersect(*) FROM test_big_numbers_sep; DROP TABLE test_big_numbers_sep; DROP TABLE IF EXISTS test_big_numbers; CREATE TABLE test_big_numbers (a Array(Int64)) engine=MergeTree ORDER BY a; -INSERT INTO test_big_numbers SELECT range(1000000); +INSERT INTO test_big_numbers SELECT range(100000); SELECT length(groupArrayIntersect(*)) FROM test_big_numbers; -INSERT INTO test_big_numbers SELECT range(999999); +INSERT INTO test_big_numbers SELECT range(99999); SELECT length(groupArrayIntersect(*)) FROM test_big_numbers; INSERT INTO test_big_numbers VALUES ([9]); SELECT groupArrayIntersect(*) FROM test_big_numbers; @@ -63,9 +63,9 @@ DROP TABLE test_string; DROP TABLE IF EXISTS test_big_string; CREATE TABLE test_big_string (a Array(String)) engine=MergeTree ORDER BY a; -INSERT INTO test_big_string SELECT groupArray(toString(number)) FROM numbers_mt(1000000); +INSERT INTO test_big_string SELECT groupArray(toString(number)) FROM numbers_mt(50000); SELECT length(groupArrayIntersect(*)) FROM test_big_string; -INSERT INTO test_big_string SELECT groupArray(toString(number)) FROM numbers_mt(999999); +INSERT INTO test_big_string SELECT groupArray(toString(number)) FROM numbers_mt(49999); SELECT length(groupArrayIntersect(*)) FROM test_big_string; INSERT INTO test_big_string VALUES (['1']); SELECT groupArrayIntersect(*) FROM test_big_string; diff --git a/tests/queries/0_stateless/03208_groupArrayIntersect_serialization.reference b/tests/queries/0_stateless/03208_groupArrayIntersect_serialization.reference new file mode 100644 index 00000000000..e84856c90fd --- /dev/null +++ b/tests/queries/0_stateless/03208_groupArrayIntersect_serialization.reference @@ -0,0 +1,13 @@ +010101 AggregateFunction(groupArrayIntersect, Array(UInt8)) +[1] +1 [2,4,6,8,10] +2 [2,4,6,8,10] +3 [2,4,6,8,10] +5 [2,6,10] +6 [10] +7 [] +a [(['2','4','6','8','10'])] +b [(['2','4','6','8','10'])] +c [(['2','4','6','8','10'])] +d [] +e [] diff --git a/tests/queries/0_stateless/03208_groupArrayIntersect_serialization.sql b/tests/queries/0_stateless/03208_groupArrayIntersect_serialization.sql new file mode 100644 index 00000000000..1b3d48ce0c3 --- /dev/null +++ b/tests/queries/0_stateless/03208_groupArrayIntersect_serialization.sql @@ -0,0 +1,43 @@ +SELECT hex(groupArrayIntersectState([1]) AS a), toTypeName(a); +SELECT finalizeAggregation(CAST(unhex('010101'), 'AggregateFunction(groupArrayIntersect, Array(UInt8))')); + +DROP TABLE IF EXISTS grouparray; +CREATE TABLE grouparray +( + `v` AggregateFunction(groupArrayIntersect, Array(UInt8)) +) +ENGINE = Log; + +INSERT INTO grouparray Select groupArrayIntersectState([2, 4, 6, 8, 10]::Array(UInt8)); +SELECT '1', arraySort(groupArrayIntersectMerge(v)) FROM grouparray; +INSERT INTO grouparray Select groupArrayIntersectState([2, 4, 6, 8, 10]::Array(UInt8)); +SELECT '2', arraySort(groupArrayIntersectMerge(v)) FROM grouparray; +INSERT INTO grouparray Select groupArrayIntersectState([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]::Array(UInt8)); +SELECT '3', arraySort(groupArrayIntersectMerge(v)) FROM grouparray; +INSERT INTO grouparray Select groupArrayIntersectState([2, 6, 10]::Array(UInt8)); +SELECT '5', arraySort(groupArrayIntersectMerge(v)) FROM grouparray; +INSERT INTO grouparray Select groupArrayIntersectState([10]::Array(UInt8)); +SELECT '6', arraySort(groupArrayIntersectMerge(v)) FROM grouparray; +INSERT INTO grouparray Select groupArrayIntersectState([]::Array(UInt8)); +SELECT '7', arraySort(groupArrayIntersectMerge(v)) FROM grouparray; + +DROP TABLE IF EXISTS grouparray; + + +DROP TABLE IF EXISTS grouparray_string; +CREATE TABLE grouparray_string +( + `v` AggregateFunction(groupArrayIntersect, Array(Tuple(Array(String)))) +) +ENGINE = Log; + +INSERT INTO grouparray_string Select groupArrayIntersectState([tuple(['2', '4', '6', '8', '10'])]); +SELECT 'a', arraySort(groupArrayIntersectMerge(v)) FROM grouparray_string; +INSERT INTO grouparray_string Select groupArrayIntersectState([tuple(['2', '4', '6', '8', '10']), tuple(['2', '4', '6', '8', '10'])]); +SELECT 'b', arraySort(groupArrayIntersectMerge(v)) FROM grouparray_string; +INSERT INTO grouparray_string Select groupArrayIntersectState([tuple(['2', '4', '6', '8', '10']), tuple(['2', '4', '6', '8', '10', '14'])]); +SELECT 'c', arraySort(groupArrayIntersectMerge(v)) FROM grouparray_string; +INSERT INTO grouparray_string Select groupArrayIntersectState([tuple(['2', '4', '6', '8', '10', '20']), tuple(['2', '4', '6', '8', '10', '14'])]); +SELECT 'd', arraySort(groupArrayIntersectMerge(v)) FROM grouparray_string; +INSERT INTO grouparray_string Select groupArrayIntersectState([]::Array(Tuple(Array(String)))); +SELECT 'e', arraySort(groupArrayIntersectMerge(v)) FROM grouparray_string; From 9ae50f7ce7238b6e6f30c884b39797c39204255a Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sat, 27 Jul 2024 00:15:33 +0000 Subject: [PATCH 019/121] Backport #67178 to 24.7: Disable convert OUTER JOIN to INNER JOIN optimization for non ALL JOIN strictness --- .../convertOuterJoinToInnerJoin.cpp | 5 ++- ...uter_join_to_inner_join_any_join.reference | 3 ++ ...vert_outer_join_to_inner_join_any_join.sql | 33 ++++++++++++++ ...ter_join_to_inner_join_anti_join.reference | 19 ++++++++ ...ert_outer_join_to_inner_join_anti_join.sql | 45 +++++++++++++++++++ 5 files changed, 104 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/03210_convert_outer_join_to_inner_join_any_join.reference create mode 100644 tests/queries/0_stateless/03210_convert_outer_join_to_inner_join_any_join.sql create mode 100644 tests/queries/0_stateless/03211_convert_outer_join_to_inner_join_anti_join.reference create mode 100644 tests/queries/0_stateless/03211_convert_outer_join_to_inner_join_anti_join.sql diff --git a/src/Processors/QueryPlan/Optimizations/convertOuterJoinToInnerJoin.cpp b/src/Processors/QueryPlan/Optimizations/convertOuterJoinToInnerJoin.cpp index d90f0e152e7..d9296f10a98 100644 --- a/src/Processors/QueryPlan/Optimizations/convertOuterJoinToInnerJoin.cpp +++ b/src/Processors/QueryPlan/Optimizations/convertOuterJoinToInnerJoin.cpp @@ -23,7 +23,10 @@ size_t tryConvertOuterJoinToInnerJoin(QueryPlan::Node * parent_node, QueryPlan:: return 0; const auto & table_join = join->getJoin()->getTableJoin(); - if (table_join.strictness() == JoinStrictness::Asof) + + /// Any JOIN issue https://github.com/ClickHouse/ClickHouse/issues/66447 + /// Anti JOIN issue https://github.com/ClickHouse/ClickHouse/issues/67156 + if (table_join.strictness() != JoinStrictness::All) return 0; /// TODO: Support join_use_nulls diff --git a/tests/queries/0_stateless/03210_convert_outer_join_to_inner_join_any_join.reference b/tests/queries/0_stateless/03210_convert_outer_join_to_inner_join_any_join.reference new file mode 100644 index 00000000000..3d6a23045fb --- /dev/null +++ b/tests/queries/0_stateless/03210_convert_outer_join_to_inner_join_any_join.reference @@ -0,0 +1,3 @@ +1 tx1 US +1 tx2 US +1 tx3 US diff --git a/tests/queries/0_stateless/03210_convert_outer_join_to_inner_join_any_join.sql b/tests/queries/0_stateless/03210_convert_outer_join_to_inner_join_any_join.sql new file mode 100644 index 00000000000..599875e90cf --- /dev/null +++ b/tests/queries/0_stateless/03210_convert_outer_join_to_inner_join_any_join.sql @@ -0,0 +1,33 @@ +DROP TABLE IF EXISTS user_country; +DROP TABLE IF EXISTS user_transactions; + +CREATE TABLE user_country ( + user_id UInt64, + country String +) +ENGINE = ReplacingMergeTree +ORDER BY user_id; + +CREATE TABLE user_transactions ( + user_id UInt64, + transaction_id String +) +ENGINE = MergeTree +ORDER BY user_id; + +INSERT INTO user_country (user_id, country) VALUES (1, 'US'); +INSERT INTO user_transactions (user_id, transaction_id) VALUES (1, 'tx1'), (1, 'tx2'), (1, 'tx3'), (2, 'tx1'); + +-- Expected 3 rows, got only 1. Removing 'ANY' and adding 'FINAL' fixes +-- the issue (but it is not always possible). Moving filter by 'country' to +-- an outer query doesn't help. Query without filter by 'country' works +-- as expected (returns 3 rows). +SELECT * FROM user_transactions +ANY LEFT JOIN user_country USING (user_id) +WHERE + user_id = 1 + AND country = 'US' +ORDER BY ALL; + +DROP TABLE user_country; +DROP TABLE user_transactions; diff --git a/tests/queries/0_stateless/03211_convert_outer_join_to_inner_join_anti_join.reference b/tests/queries/0_stateless/03211_convert_outer_join_to_inner_join_anti_join.reference new file mode 100644 index 00000000000..d717a29ab23 --- /dev/null +++ b/tests/queries/0_stateless/03211_convert_outer_join_to_inner_join_anti_join.reference @@ -0,0 +1,19 @@ +DATA + ┏━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━┓ + ┃ c0 ┃ c1 ┃ c2 ┃ + ┡━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━┩ +1. │ 826636805 │ 0 │ │ + ├───────────┼───────────┼────┤ +2. │ 0 │ 150808457 │ │ + └───────────┴───────────┴────┘ +NUMBER OF ROWS IN FIRST SHOULD BE EQUAL TO SECOND +FISRT + +SECOND +1 +TO DEBUG I TOOK JUST A SUBQUERY AND IT HAS 1 ROW +THIRD +1 +AND I ADDED SINGLE CONDITION THAT CONDITION <>0 THAT IS 1 IN THIRD QUERY AND IT HAS NO RESULT!!! +FOURTH +1 diff --git a/tests/queries/0_stateless/03211_convert_outer_join_to_inner_join_anti_join.sql b/tests/queries/0_stateless/03211_convert_outer_join_to_inner_join_anti_join.sql new file mode 100644 index 00000000000..77b1d52dd18 --- /dev/null +++ b/tests/queries/0_stateless/03211_convert_outer_join_to_inner_join_anti_join.sql @@ -0,0 +1,45 @@ +DROP TABLE IF EXISTS t0; + +CREATE TABLE t0 (c0 Int32, c1 Int32, c2 String) ENGINE = Log() ; +INSERT INTO t0(c0, c1, c2) VALUES (826636805,0, ''), (0, 150808457, ''); + +SELECT 'DATA'; +SELECT * FROM t0 FORMAT PrettyMonoBlock; + +SELECT 'NUMBER OF ROWS IN FIRST SHOULD BE EQUAL TO SECOND'; + + +SELECT 'FISRT'; +SELECT left.c2 FROM t0 AS left +LEFT ANTI JOIN t0 AS right_0 ON ((left.c0)=(right_0.c1)) +WHERE (abs ((- ((sign (right_0.c1)))))); + +SELECT 'SECOND'; +SELECT SUM(check <> 0) +FROM +( + SELECT (abs ((- ((sign (right_0.c1)))))) AS `check` + FROM t0 AS left + LEFT ANTI JOIN t0 AS right_0 ON ((left.c0)=(right_0.c1)) +); + + +SELECT 'TO DEBUG I TOOK JUST A SUBQUERY AND IT HAS 1 ROW'; + +SELECT 'THIRD'; + +SELECT (abs ((- ((sign (right_0.c1)))))) AS `check` +FROM t0 AS left +LEFT ANTI JOIN t0 AS right_0 ON ((left.c0)=(right_0.c1)); + + +SELECT 'AND I ADDED SINGLE CONDITION THAT CONDITION <>0 THAT IS 1 IN THIRD QUERY AND IT HAS NO RESULT!!!'; + + +SELECT 'FOURTH'; +SELECT (abs ((- ((sign (right_0.c1)))))) AS `check` +FROM t0 AS left +LEFT ANTI JOIN t0 AS right_0 ON ((left.c0)=(right_0.c1)) +WHERE check <> 0; + +DROP TABLE t0; From 606ee6bb15b64e2d69bda2f5af6f20042ae47c64 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sat, 27 Jul 2024 00:17:46 +0000 Subject: [PATCH 020/121] Backport #66869 to 24.7: Followup #66725 --- .../Passes/LogicalExpressionOptimizerPass.cpp | 11 +++++++++-- .../02911_join_on_nullsafe_optimization.reference | 2 ++ .../02911_join_on_nullsafe_optimization.sql | 2 ++ 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp index 5c68bca3a6e..e136440556f 100644 --- a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp +++ b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp @@ -68,10 +68,13 @@ QueryTreeNodePtr findEqualsFunction(const QueryTreeNodes & nodes) return nullptr; } -/// Checks if the node is combination of isNull and notEquals functions of two the same arguments +/// Checks if the node is combination of isNull and notEquals functions of two the same arguments: +/// [ (a <> b AND) ] (a IS NULL) AND (b IS NULL) bool matchIsNullOfTwoArgs(const QueryTreeNodes & nodes, QueryTreeNodePtr & lhs, QueryTreeNodePtr & rhs) { QueryTreeNodePtrWithHashSet all_arguments; + QueryTreeNodePtrWithHashSet is_null_arguments; + for (const auto & node : nodes) { const auto * func_node = node->as(); @@ -80,7 +83,11 @@ bool matchIsNullOfTwoArgs(const QueryTreeNodes & nodes, QueryTreeNodePtr & lhs, const auto & arguments = func_node->getArguments().getNodes(); if (func_node->getFunctionName() == "isNull" && arguments.size() == 1) + { all_arguments.insert(QueryTreeNodePtrWithHash(arguments[0])); + is_null_arguments.insert(QueryTreeNodePtrWithHash(arguments[0])); + } + else if (func_node->getFunctionName() == "notEquals" && arguments.size() == 2) { if (arguments[0]->isEqual(*arguments[1])) @@ -95,7 +102,7 @@ bool matchIsNullOfTwoArgs(const QueryTreeNodes & nodes, QueryTreeNodePtr & lhs, return false; } - if (all_arguments.size() != 2) + if (all_arguments.size() != 2 || is_null_arguments.size() != 2) return false; lhs = all_arguments.begin()->node; diff --git a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference index 31a1cda18e7..8f194b4ffde 100644 --- a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference +++ b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference @@ -39,6 +39,8 @@ SELECT * FROM t1 JOIN t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS N 2 2 2 2 3 3 3 33 \N \N \N \N +SELECT * FROM t1 JOIN t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( t2.x <> t1.x AND t2.x <> t1.x ) ORDER BY t1.x NULLS LAST; -- { serverError INVALID_JOIN_ON_EXPRESSION } +SELECT * FROM t1 JOIN t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( t2.x <> t1.x AND (t2.x IS NULL) AND (t2.x IS NULL) ) ORDER BY t1.x NULLS LAST; -- { serverError INVALID_JOIN_ON_EXPRESSION } -- aliases defined in the join condition are valid -- FIXME(@vdimir) broken query formatting for the following queries: -- SELECT *, e, e2 FROM t1 FULL JOIN t2 ON ( ( ((t1.x == t2.x) AS e) AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) AS e2 ) ORDER BY t1.x NULLS LAST, t2.x NULLS LAST; diff --git a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql index f739259caf9..18cb303a54a 100644 --- a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql +++ b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql @@ -35,6 +35,8 @@ SELECT x = y OR (x IS NULL AND y IS NULL) FROM t1 ORDER BY x NULLS LAST; SELECT * FROM t1 JOIN t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) ORDER BY t1.x NULLS LAST; SELECT * FROM t1 JOIN t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( t2.x <> t1.x AND (t2.x IS NULL) AND (t1.x IS NULL) ) ORDER BY t1.x NULLS LAST; +SELECT * FROM t1 JOIN t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( t2.x <> t1.x AND t2.x <> t1.x ) ORDER BY t1.x NULLS LAST; -- { serverError INVALID_JOIN_ON_EXPRESSION } +SELECT * FROM t1 JOIN t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( t2.x <> t1.x AND (t2.x IS NULL) AND (t2.x IS NULL) ) ORDER BY t1.x NULLS LAST; -- { serverError INVALID_JOIN_ON_EXPRESSION } -- aliases defined in the join condition are valid -- FIXME(@vdimir) broken query formatting for the following queries: From 3d965e6e33f070a9c2143004963c48e852db1a15 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sat, 27 Jul 2024 09:09:06 +0000 Subject: [PATCH 021/121] Backport #67149 to 24.7: Rename bad setting --- CHANGELOG.md | 172 ++++++++++++++++++ src/Core/Settings.h | 2 +- src/Core/SettingsChangesHistory.cpp | 2 +- src/Formats/FormatFactory.cpp | 2 +- src/Formats/FormatSettings.h | 2 +- .../Impl/JSONEachRowRowInputFormat.cpp | 4 +- .../0_stateless/03013_json_key_ignore_case.sh | 4 +- 7 files changed, 180 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e2eb65e2967..4223b50c090 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,178 @@ # 2024 Changelog +### ClickHouse release 24.7, 2024-07-30 + +#### Backward Incompatible Change +* Forbid `CRATE MATERIALIZED VIEW ... ENGINE Replicated*MergeTree POPULATE AS SELECT ...` with Replicated databases. [#63963](https://github.com/ClickHouse/ClickHouse/pull/63963) ([vdimir](https://github.com/vdimir)). +* `clickhouse-keeper-client` will only accept paths in string literals, such as `ls '/hello/world'`, not bare strings such as `ls /hello/world`. [#65494](https://github.com/ClickHouse/ClickHouse/pull/65494) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Metric `KeeperOutstandingRequets` was renamed to `KeeperOutstandingRequests`. [#66206](https://github.com/ClickHouse/ClickHouse/pull/66206) ([Robert Schulze](https://github.com/rschu1ze)). +* Remove `is_deterministic` field from the `system.functions` table. [#66630](https://github.com/ClickHouse/ClickHouse/pull/66630) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Function `tuple` will now try to construct named tuples in query (controlled by `enable_named_columns_in_function_tuple`). Introduce function `tupleNames` to extract names from tuples. [#54881](https://github.com/ClickHouse/ClickHouse/pull/54881) ([Amos Bird](https://github.com/amosbird)). + +#### New Feature +* Add `ASOF JOIN` support for `full_sorting_join` algorithm. [#55051](https://github.com/ClickHouse/ClickHouse/pull/55051) ([vdimir](https://github.com/vdimir)). +* Add new window function `percent_rank`. [#62747](https://github.com/ClickHouse/ClickHouse/pull/62747) ([lgbo](https://github.com/lgbo-ustc)). +* Support JWT authentication in `clickhouse-client` (will be available only in ClickHouse Cloud). [#62829](https://github.com/ClickHouse/ClickHouse/pull/62829) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* Add SQL functions `changeYear`, `changeMonth`, `changeDay`, `changeHour`, `changeMinute`, `changeSecond`. For example, `SELECT changeMonth(toDate('2024-06-14'), 7)` returns date `2024-07-14`. [#63186](https://github.com/ClickHouse/ClickHouse/pull/63186) ([cucumber95](https://github.com/cucumber95)). +* Introduce startup scripts, which allow the execution of preconfigured queries at the startup stage. [#64889](https://github.com/ClickHouse/ClickHouse/pull/64889) ([pufit](https://github.com/pufit)). +* Support accept_invalid_certificate in client's config in order to allow for client to connect over secure TCP to a server running with self-signed certificate - can be used as a shorthand for corresponding `openSSL` client settings `verificationMode=none` + `invalidCertificateHandler.name=AcceptCertificateHandler`. [#65238](https://github.com/ClickHouse/ClickHouse/pull/65238) ([peacewalker122](https://github.com/peacewalker122)). +* Add system.error_log which contains history of error values from table system.errors, periodically flushed to disk. [#65381](https://github.com/ClickHouse/ClickHouse/pull/65381) ([Pablo Marcos](https://github.com/pamarcos)). +* Add aggregate function `groupConcat`. About the same as `arrayStringConcat( groupArray(column), ',')` Can receive 2 parameters: a string delimiter and the number of elements to be processed. [#65451](https://github.com/ClickHouse/ClickHouse/pull/65451) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Add AzureQueue storage. [#65458](https://github.com/ClickHouse/ClickHouse/pull/65458) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add a new setting to disable/enable writing page index into parquet files. [#65475](https://github.com/ClickHouse/ClickHouse/pull/65475) ([lgbo](https://github.com/lgbo-ustc)). +* Introduce `logger.console_log_level` server config to control the log level to the console (if enabled). [#65559](https://github.com/ClickHouse/ClickHouse/pull/65559) ([Azat Khuzhin](https://github.com/azat)). +* Automatically append a wildcard `*` to the end of a directory path with table function `file`. [#66019](https://github.com/ClickHouse/ClickHouse/pull/66019) ([Zhidong (David) Guo](https://github.com/Gun9niR)). +* Add `--memory-usage` option to client in non interactive mode. [#66393](https://github.com/ClickHouse/ClickHouse/pull/66393) ([vdimir](https://github.com/vdimir)). +* Make an interactive client for clickhouse-disks, add local disk from the local directory. [#64446](https://github.com/ClickHouse/ClickHouse/pull/64446) ([Daniil Ivanik](https://github.com/divanik)). +* When lightweight delete happens on a table with projection(s), users have choices either throw an exception (by default) or drop the projection [#65594](https://github.com/ClickHouse/ClickHouse/pull/65594) ([jsc0218](https://github.com/jsc0218)). + +#### Experimental Feature +* Change binary serialization of Variant data type: add `compact` mode to avoid writing the same discriminator multiple times for granules with single variant or with only NULL values. Add MergeTree setting `use_compact_variant_discriminators_serialization` that is enabled by default. Note that Variant type is still experimental and backward-incompatible change in serialization is ok. [#62774](https://github.com/ClickHouse/ClickHouse/pull/62774) ([Kruglov Pavel](https://github.com/Avogar)). +* Support rocksdb as backend storage of keeper. [#56626](https://github.com/ClickHouse/ClickHouse/pull/56626) ([Han Fei](https://github.com/hanfei1991)). +* Refactor JSONExtract functions, support more types including experimental Dynamic type. [#66046](https://github.com/ClickHouse/ClickHouse/pull/66046) ([Kruglov Pavel](https://github.com/Avogar)). +* Support null map subcolumn for Variant and Dynamic subcolumns. [#66178](https://github.com/ClickHouse/ClickHouse/pull/66178) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix reading dynamic subcolumns from altered Memory table. Previously if `max_types` parameter of a Dynamic type was changed in Memory table via alter, further subcolumns reading can return wrong result. [#66066](https://github.com/ClickHouse/ClickHouse/pull/66066) ([Kruglov Pavel](https://github.com/Avogar)). +* Add support for `cluster_for_parallel_replicas` when using custom key parallel replicas. It allows you to use parallel replicas with custom key with MergeTree tables. [#65453](https://github.com/ClickHouse/ClickHouse/pull/65453) ([Antonio Andelic](https://github.com/antonio2368)). + +#### Performance Improvement +* Enable `optimize_functions_to_subcolumns` by default. [#58661](https://github.com/ClickHouse/ClickHouse/pull/58661) ([Anton Popov](https://github.com/CurtizJ)). +* Replace int to string algorithm with a faster one (from a modified amdn/itoa to a modified jeaiii/itoa). [#61661](https://github.com/ClickHouse/ClickHouse/pull/61661) ([Raúl Marín](https://github.com/Algunenano)). +* Sizes of hash tables created by join (`parallel_hash` algorithm) is collected and cached now. This information will be used to preallocate space in hash tables for subsequent query executions and save time on hash table resizes. [#64553](https://github.com/ClickHouse/ClickHouse/pull/64553) ([Nikita Taranov](https://github.com/nickitat)). +* Optimized queries with `ORDER BY` primary key and `WHERE` that have a condition with high selectivity by using of buffering. It is controlled by setting `read_in_order_use_buffering` (enabled by default) and can increase memory usage of query. [#64607](https://github.com/ClickHouse/ClickHouse/pull/64607) ([Anton Popov](https://github.com/CurtizJ)). +* Improve performance of loading `plain_rewritable` metadata. [#65634](https://github.com/ClickHouse/ClickHouse/pull/65634) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Attaching tables on read-only disks will use fewer resources by not loading outdated parts. [#65635](https://github.com/ClickHouse/ClickHouse/pull/65635) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Support minmax hyperrectangle for Set indices. [#65676](https://github.com/ClickHouse/ClickHouse/pull/65676) ([AntiTopQuark](https://github.com/AntiTopQuark)). +* Unload primary index of outdated parts to reduce total memory usage. [#65852](https://github.com/ClickHouse/ClickHouse/pull/65852) ([Anton Popov](https://github.com/CurtizJ)). +* Functions `replaceRegexpAll` and `replaceRegexpOne` are now significantly faster if the pattern is trivial, i.e. contains no metacharacters, pattern classes, flags, grouping characters etc. (Thanks to Taiyang Li). [#66185](https://github.com/ClickHouse/ClickHouse/pull/66185) ([Robert Schulze](https://github.com/rschu1ze)). +* s3 requests: Reduce retry time for queries, increase retries count for backups. 8.5 minutes and 100 retires for queries, 1.2 hours and 1000 retries for backup restore. [#65232](https://github.com/ClickHouse/ClickHouse/pull/65232) ([Sema Checherinda](https://github.com/CheSema)). +* Support query plan LIMIT optimization. Support LIMIT pushdown for PostgreSQL storage and table function. [#65454](https://github.com/ClickHouse/ClickHouse/pull/65454) ([Maksim Kita](https://github.com/kitaisreal)). +* Improved ZooKeeper load balancing. The current session doesn't expire until the optimal nodes become available despite `fallback_session_lifetime`. Added support for AZ-aware balancing. [#65570](https://github.com/ClickHouse/ClickHouse/pull/65570) ([Alexander Tokmakov](https://github.com/tavplubix)). +* DatabaseCatalog drops tables faster by using up to database_catalog_drop_table_concurrency threads. [#66065](https://github.com/ClickHouse/ClickHouse/pull/66065) ([Sema Checherinda](https://github.com/CheSema)). + +#### Improvement +* The setting `optimize_trivial_insert_select` is disabled by default. In most cases, it should be beneficial. Nevertheless, if you are seeing slower INSERT SELECT or increased memory usage, you can enable it back or `SET compatibility = '24.6'`. [#58970](https://github.com/ClickHouse/ClickHouse/pull/58970) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Print stacktrace and diagnostic info if `clickhouse-client` or `clickhouse-local` crashes. [#61109](https://github.com/ClickHouse/ClickHouse/pull/61109) ([Alexander Tokmakov](https://github.com/tavplubix)). +* The result of `SHOW INDEX | INDEXES | INDICES | KEYS` was previously sorted by the primary key column names. Since this was unintuitive, the result is now sorted by the position of the primary key columns within the primary key. [#61131](https://github.com/ClickHouse/ClickHouse/pull/61131) ([Robert Schulze](https://github.com/rschu1ze)). +* Change how deduplication for Materialized Views works. Fixed a lot of cases like: - on destination table: data is split for 2 or more blocks and that blocks is considered as duplicate when that block is inserted in parallel. - on MV destination table: the equal blocks are deduplicated, that happens when MV often produces equal data as a result for different input data due to performing aggregation. - on MV destination table: the equal blocks which comes from different MV are deduplicated. [#61601](https://github.com/ClickHouse/ClickHouse/pull/61601) ([Sema Checherinda](https://github.com/CheSema)). +* Allow matching column names in a case insensitive manner when reading json files (`input_format_json_case_insensitive_column_matching`). [#61750](https://github.com/ClickHouse/ClickHouse/pull/61750) ([kevinyhzou](https://github.com/KevinyhZou)). +* Support reading partitioned data DeltaLake data. Infer DeltaLake schema by reading metadata instead of data. [#63201](https://github.com/ClickHouse/ClickHouse/pull/63201) ([Kseniia Sumarokova](https://github.com/kssenii)). +* In composable protocols TLS layer accepted only `certificateFile` and `privateKeyFile` parameters. https://clickhouse.com/docs/en/operations/settings/composable-protocols. [#63985](https://github.com/ClickHouse/ClickHouse/pull/63985) ([Anton Ivashkin](https://github.com/ianton-ru)). +* Added profile event `SelectQueriesWithPrimaryKeyUsage` which indicates how many SELECT queries use the primary key to evaluate the WHERE clause. [#64492](https://github.com/ClickHouse/ClickHouse/pull/64492) ([0x01f](https://github.com/0xfei)). +* `StorageS3Queue` related fixes and improvements. Deduce a default value of `s3queue_processing_threads_num` according to the number of physical cpu cores on the server (instead of the previous default value as 1). Set default value of `s3queue_loading_retries` to 10. Fix possible vague "Uncaught exception" in exception column of `system.s3queue`. Do not increment retry count on `MEMORY_LIMIT_EXCEEDED` exception. Move files commit to a stage after insertion into table fully finished to avoid files being commited while not inserted. Add settings `s3queue_max_processed_files_before_commit`, `s3queue_max_processed_rows_before_commit`, `s3queue_max_processed_bytes_before_commit`, `s3queue_max_processing_time_sec_before_commit`, to better control commit and flush time. [#65046](https://github.com/ClickHouse/ClickHouse/pull/65046) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Support aliases in parametrized view function (only new analyzer). [#65190](https://github.com/ClickHouse/ClickHouse/pull/65190) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Updated to mask account key in logs in azureBlobStorage. [#65273](https://github.com/ClickHouse/ClickHouse/pull/65273) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Partition pruning for `IN` predicates when filter expression is a part of `PARTITION BY` expression. [#65335](https://github.com/ClickHouse/ClickHouse/pull/65335) ([Eduard Karacharov](https://github.com/korowa)). +* Add system tables with main information about all detached tables. [#65400](https://github.com/ClickHouse/ClickHouse/pull/65400) ([Konstantin Morozov](https://github.com/k-morozov)). +* `arrayMin`/`arrayMax` can be applicable to all data types that are comparable. [#65455](https://github.com/ClickHouse/ClickHouse/pull/65455) ([pn](https://github.com/chloro-pn)). +* Improved memory accounting for cgroups v2 to exclude the amount occupied by the page cache. [#65470](https://github.com/ClickHouse/ClickHouse/pull/65470) ([Nikita Taranov](https://github.com/nickitat)). +* Do not create format settings for each row when serializing chunks to insert to EmbeddedRocksDB table. [#65474](https://github.com/ClickHouse/ClickHouse/pull/65474) ([Duc Canh Le](https://github.com/canhld94)). +* Reduce `clickhouse-local` prompt to just `:)`. `getFQDNOrHostName()` takes too long on macOS, and we don't want a hostname in the prompt for `clickhouse-local` anyway. [#65510](https://github.com/ClickHouse/ClickHouse/pull/65510) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* Avoid printing a message from jemalloc about per-CPU arenas on low-end virtual machines. [#65532](https://github.com/ClickHouse/ClickHouse/pull/65532) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Disable filesystem cache background download by default. It will be enabled back when we fix the issue with possible "Memory limit exceeded" because memory deallocation is done outside of query context (while buffer is allocated inside of query context) if we use background download threads. Plus we need to add a separate setting to define max size to download for background workers (currently it is limited by max_file_segment_size, which might be too big). [#65534](https://github.com/ClickHouse/ClickHouse/pull/65534) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add new option to config `` which allow to specify how often clickhouse will reload config. [#65545](https://github.com/ClickHouse/ClickHouse/pull/65545) ([alesapin](https://github.com/alesapin)). +* Implement binary encoding for ClickHouse data types and add its specification in docs. Use it in Dynamic binary serialization, allow to use it in RowBinaryWithNamesAndTypes and Native formats under settings. [#65546](https://github.com/ClickHouse/ClickHouse/pull/65546) ([Kruglov Pavel](https://github.com/Avogar)). +* Improved ZooKeeper load balancing. The current session doesn't expire until the optimal nodes become available despite `fallback_session_lifetime`. Added support for AZ-aware balancing. [#65570](https://github.com/ClickHouse/ClickHouse/pull/65570) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Server settings `compiled_expression_cache_size` and `compiled_expression_cache_elements_size` are now shown in `system.server_settings`. [#65584](https://github.com/ClickHouse/ClickHouse/pull/65584) ([Robert Schulze](https://github.com/rschu1ze)). +* Add support for user identification based on x509 SubjectAltName extension. [#65626](https://github.com/ClickHouse/ClickHouse/pull/65626) ([Anton Kozlov](https://github.com/tonickkozlov)). +* `clickhouse-local` will respect the `max_server_memory_usage` and `max_server_memory_usage_to_ram_ratio` from the configuration file. It will also set the max memory usage to 90% of the system memory by default, like `clickhouse-server` does. [#65697](https://github.com/ClickHouse/ClickHouse/pull/65697) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a script to backup your files to ClickHouse. [#65699](https://github.com/ClickHouse/ClickHouse/pull/65699) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* PostgreSQL source support cancel. [#65722](https://github.com/ClickHouse/ClickHouse/pull/65722) ([Maksim Kita](https://github.com/kitaisreal)). +* Make allow_experimental_analyzer be controlled by the initiator for distributed queries. This ensures compatibility and correctness during operations in mixed version clusters. [#65777](https://github.com/ClickHouse/ClickHouse/pull/65777) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Respect cgroup CPU limit in Keeper. [#65819](https://github.com/ClickHouse/ClickHouse/pull/65819) ([Antonio Andelic](https://github.com/antonio2368)). +* Allow to use `concat` function with empty arguments ``` sql :) select concat();. [#65887](https://github.com/ClickHouse/ClickHouse/pull/65887) ([李扬](https://github.com/taiyang-li)). +* Allow controlling named collections in clickhouse-local. [#65973](https://github.com/ClickHouse/ClickHouse/pull/65973) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improve Azure profile events. [#65999](https://github.com/ClickHouse/ClickHouse/pull/65999) ([alesapin](https://github.com/alesapin)). +* Support ORC file read by writer time zone. [#66025](https://github.com/ClickHouse/ClickHouse/pull/66025) ([kevinyhzou](https://github.com/KevinyhZou)). +* Add settings to control connection to the PostgreSQL. * Setting `postgresql_connection_attempt_timeout` specifies the value passed to `connect_timeout` parameter of connection URL. * Setting `postgresql_connection_pool_retries` specifies the number of retries to establish a connection to the PostgreSQL end-point. [#66232](https://github.com/ClickHouse/ClickHouse/pull/66232) ([Dmitry Novik](https://github.com/novikd)). +* Reduce inaccuracy of input_wait_elapsed_us/input_wait_elapsed_us/elapsed_us. [#66239](https://github.com/ClickHouse/ClickHouse/pull/66239) ([Azat Khuzhin](https://github.com/azat)). +* Improve FilesystemCache ProfileEvents. [#66249](https://github.com/ClickHouse/ClickHouse/pull/66249) ([zhukai](https://github.com/nauu)). +* Add settings to ignore ON CLUSTER clause in queries for named collection management with replicated storage. [#66288](https://github.com/ClickHouse/ClickHouse/pull/66288) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). +* Function `generateSnowflakeID` now allows to specify a machine ID as a parameter to prevent collisions in large clusters. [#66374](https://github.com/ClickHouse/ClickHouse/pull/66374) ([ZAWA_ll](https://github.com/Zawa-ll)). +* Disable suspending on Ctrl+Z in interactive mode. This is a common trap and is not expected behavior for almost all users. I imagine only a few extreme power users could appreciate suspending terminal applications to the background, but I don't know any. [#66511](https://github.com/ClickHouse/ClickHouse/pull/66511) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add option for validating the Primary key type in Dictionaries. Without this option for simple layouts any column type will be implicitly converted to UInt64. ### Documentation entry for user-facing changes. [#66595](https://github.com/ClickHouse/ClickHouse/pull/66595) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). + +#### Bug Fix (user-visible misbehavior in an official stable release) +* Fix unexpected size of low cardinality column in function calls. [#65298](https://github.com/ClickHouse/ClickHouse/pull/65298) ([Raúl Marín](https://github.com/Algunenano)). +* Check cyclic dependencies on CREATE/REPLACE/RENAME/EXCHANGE queries and throw an exception if there is a cyclic dependency. Previously such cyclic dependencies could lead to a deadlock during server startup. Also fix some bugs in dependencies creation. [#65405](https://github.com/ClickHouse/ClickHouse/pull/65405) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix crash in maxIntersections. [#65689](https://github.com/ClickHouse/ClickHouse/pull/65689) ([Raúl Marín](https://github.com/Algunenano)). +* Fix the VALID UNTIL clause in the user definition resetting after a restart. [#66409](https://github.com/ClickHouse/ClickHouse/pull/66409) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix SHOW MERGES remaining time. [#66735](https://github.com/ClickHouse/ClickHouse/pull/66735) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* `Query was cancelled` might have been printed twice in clickhouse-client. This behaviour is fixed. [#66005](https://github.com/ClickHouse/ClickHouse/pull/66005) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fixed crash while using MaterializedMySQL with TABLE OVERRIDE that maps MySQL NULL field into ClickHouse not NULL field. [#54649](https://github.com/ClickHouse/ClickHouse/pull/54649) ([Filipp Ozinov](https://github.com/bakwc)). +* Fix logical error when PREWHERE expression read no columns and table has no adaptive index granularity (very old table). [#59173](https://github.com/ClickHouse/ClickHouse/pull/59173) ([Alexander Gololobov](https://github.com/davenger)). +* Fix bug with cancellation buffer when canceling a query. [#64478](https://github.com/ClickHouse/ClickHouse/pull/64478) ([Sema Checherinda](https://github.com/CheSema)). +* Fix filling parts columns from metadata (when columns.txt does not exists). [#64757](https://github.com/ClickHouse/ClickHouse/pull/64757) ([Azat Khuzhin](https://github.com/azat)). +* Fix crash for `ALTER TABLE ... ON CLUSTER ... MODIFY SQL SECURITY`. [#64957](https://github.com/ClickHouse/ClickHouse/pull/64957) ([pufit](https://github.com/pufit)). +* Fix crash on destroying AccessControl: add explicit shutdown. [#64993](https://github.com/ClickHouse/ClickHouse/pull/64993) ([Vitaly Baranov](https://github.com/vitlibar)). +* Eliminate injective function in argument of functions `uniq*` recursively. This used to work correctly but was broken in the new analyzer. [#65140](https://github.com/ClickHouse/ClickHouse/pull/65140) ([Duc Canh Le](https://github.com/canhld94)). +* Fix unexpected projection name when query with CTE. [#65267](https://github.com/ClickHouse/ClickHouse/pull/65267) ([wudidapaopao](https://github.com/wudidapaopao)). +* Require `dictGet` privilege when accessing dictionaries via direct query or the `Dictionary` table engine. [#65359](https://github.com/ClickHouse/ClickHouse/pull/65359) ([Joe Lynch](https://github.com/joelynch)). +* Fix user-specific S3 auth with incremental backups. [#65481](https://github.com/ClickHouse/ClickHouse/pull/65481) ([Antonio Andelic](https://github.com/antonio2368)). +* Disable `non-intersecting-parts` optimization for queries with `FINAL` in case of `read-in-order` optimization was enabled. This could lead to an incorrect query result. As a workaround, disable `do_not_merge_across_partitions_select_final` and `split_parts_ranges_into_intersecting_and_non_intersecting_final` before this fix is merged. [#65505](https://github.com/ClickHouse/ClickHouse/pull/65505) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix getting exception `Index out of bound for blob metadata` in case all files from list batch were filtered out. [#65523](https://github.com/ClickHouse/ClickHouse/pull/65523) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix NOT_FOUND_COLUMN_IN_BLOCK for deduplicate merge of projection. [#65573](https://github.com/ClickHouse/ClickHouse/pull/65573) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fixed bug in MergeJoin. Column in sparse serialisation might be treated as a column of its nested type though the required conversion wasn't performed. [#65632](https://github.com/ClickHouse/ClickHouse/pull/65632) ([Nikita Taranov](https://github.com/nickitat)). +* Fixed a bug that compatibility level '23.4' was not properly applied. [#65737](https://github.com/ClickHouse/ClickHouse/pull/65737) ([cw5121](https://github.com/cw5121)). +* Fix odbc table with nullable fields. [#65738](https://github.com/ClickHouse/ClickHouse/pull/65738) ([Rodolphe Dugé de Bernonville](https://github.com/RodolpheDuge)). +* Fix data race in `TCPHandler`, which could happen on fatal error. [#65744](https://github.com/ClickHouse/ClickHouse/pull/65744) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix invalid exceptions in function `parseDateTime` with `%F` and `%D` placeholders. [#65768](https://github.com/ClickHouse/ClickHouse/pull/65768) ([Antonio Andelic](https://github.com/antonio2368)). +* For queries that read from `PostgreSQL`, cancel the internal `PostgreSQL` query if the ClickHouse query is finished. Otherwise, `ClickHouse` query cannot be canceled until the internal `PostgreSQL` query is finished. [#65771](https://github.com/ClickHouse/ClickHouse/pull/65771) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix a bug in short circuit logic when old analyzer and dictGetOrDefault is used. [#65802](https://github.com/ClickHouse/ClickHouse/pull/65802) ([jsc0218](https://github.com/jsc0218)). +* Fix a bug leads to EmbeddedRocksDB with TTL write corrupted SST files. [#65816](https://github.com/ClickHouse/ClickHouse/pull/65816) ([Duc Canh Le](https://github.com/canhld94)). +* Functions `bitTest`, `bitTestAll`, and `bitTestAny` now return an error if the specified bit index is out-of-bounds [#65818](https://github.com/ClickHouse/ClickHouse/pull/65818) ([Pablo Marcos](https://github.com/pamarcos)). +* Setting `join_any_take_last_row` is supported in any query with hash join. [#65820](https://github.com/ClickHouse/ClickHouse/pull/65820) ([vdimir](https://github.com/vdimir)). +* Better handling of join conditions involving `IS NULL` checks (for example `ON (a = b AND (a IS NOT NULL) AND (b IS NOT NULL) ) OR ( (a IS NULL) AND (b IS NULL) )` is rewritten to `ON a <=> b`), fix incorrect optimization when condition other then `IS NULL` are present. [#65835](https://github.com/ClickHouse/ClickHouse/pull/65835) ([vdimir](https://github.com/vdimir)). +* Functions `bitShiftLeft` and `bitShitfRight` return an error for out of bounds shift positions [#65838](https://github.com/ClickHouse/ClickHouse/pull/65838) ([Pablo Marcos](https://github.com/pamarcos)). +* Fix growing memory usage in S3Queue. [#65839](https://github.com/ClickHouse/ClickHouse/pull/65839) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix tie handling in `arrayAUC` to match sklearn. [#65840](https://github.com/ClickHouse/ClickHouse/pull/65840) ([gabrielmcg44](https://github.com/gabrielmcg44)). +* Fix possible issues with MySQL server protocol TLS connections. [#65917](https://github.com/ClickHouse/ClickHouse/pull/65917) ([Azat Khuzhin](https://github.com/azat)). +* Fix possible issues with MySQL client protocol TLS connections. [#65938](https://github.com/ClickHouse/ClickHouse/pull/65938) ([Azat Khuzhin](https://github.com/azat)). +* Fix handling of `SSL_ERROR_WANT_READ`/`SSL_ERROR_WANT_WRITE` with zero timeout. [#65941](https://github.com/ClickHouse/ClickHouse/pull/65941) ([Azat Khuzhin](https://github.com/azat)). +* Add missing settings `input_format_csv_skip_first_lines/input_format_tsv_skip_first_lines/input_format_csv_try_infer_numbers_from_strings/input_format_csv_try_infer_strings_from_quoted_tuples` in schema inference cache because they can change the resulting schema. It prevents from incorrect result of schema inference with these settings changed. [#65980](https://github.com/ClickHouse/ClickHouse/pull/65980) ([Kruglov Pavel](https://github.com/Avogar)). +* Column _size in s3 engine and s3 table function denotes the size of a file inside the archive, not a size of the archive itself. [#65993](https://github.com/ClickHouse/ClickHouse/pull/65993) ([Daniil Ivanik](https://github.com/divanik)). +* Fix resolving dynamic subcolumns in analyzer, avoid reading the whole column on dynamic subcolumn reading. [#66004](https://github.com/ClickHouse/ClickHouse/pull/66004) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix config merging for from_env with replace overrides. [#66034](https://github.com/ClickHouse/ClickHouse/pull/66034) ([Azat Khuzhin](https://github.com/azat)). +* Fix a possible hanging in `GRPCServer` during shutdown. [#66061](https://github.com/ClickHouse/ClickHouse/pull/66061) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fixed several cases in function `has` with non-constant `LowCardinality` arguments. [#66088](https://github.com/ClickHouse/ClickHouse/pull/66088) ([Anton Popov](https://github.com/CurtizJ)). +* Fix for `groupArrayIntersect`. It had incorrect behavior in the `merge()` function. Also, fixed behavior in `deserialise()` for numeric and general data. [#66103](https://github.com/ClickHouse/ClickHouse/pull/66103) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Fixed buffer overflow bug in `unbin`/`unhex` implementation. [#66106](https://github.com/ClickHouse/ClickHouse/pull/66106) ([Nikita Taranov](https://github.com/nickitat)). +* Disable the `merge-filters` optimization introduced in [#64760](https://github.com/ClickHouse/ClickHouse/issues/64760). It may cause an exception if optimization merges two filter expressions and does not apply a short-circuit evaluation. [#66126](https://github.com/ClickHouse/ClickHouse/pull/66126) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed the issue when the server failed to parse Avro files with negative block size arrays encoded, which is now allowed by the Avro specification. [#66130](https://github.com/ClickHouse/ClickHouse/pull/66130) ([Serge Klochkov](https://github.com/slvrtrn)). +* Fixed a bug in ZooKeeper client: a session could get stuck in unusable state after receiving a hardware error from ZooKeeper. For example, this might happen due to "soft memory limit" in ClickHouse Keeper. [#66140](https://github.com/ClickHouse/ClickHouse/pull/66140) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix issue in SumIfToCountIfVisitor and signed integers. [#66146](https://github.com/ClickHouse/ClickHouse/pull/66146) ([Raúl Marín](https://github.com/Algunenano)). +* Fix rare case with missing data in the result of distributed query. [#66174](https://github.com/ClickHouse/ClickHouse/pull/66174) ([vdimir](https://github.com/vdimir)). +* Fix order of parsing metadata fields in StorageDeltaLake. [#66211](https://github.com/ClickHouse/ClickHouse/pull/66211) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Don't throw `TIMEOUT_EXCEEDED` for `none_only_active` mode of `distributed_ddl_output_mode`. [#66218](https://github.com/ClickHouse/ClickHouse/pull/66218) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix handling limit for `system.numbers_mt` when no index can be used. [#66231](https://github.com/ClickHouse/ClickHouse/pull/66231) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Fixed how the ClickHouse server detects the maximum number of usable CPU cores as specified by cgroups v2 if the server runs in a container such as Docker. In more detail, containers often run their process in the root cgroup which has an empty name. In that case, ClickHouse ignored the CPU limits set by cgroups v2. [#66237](https://github.com/ClickHouse/ClickHouse/pull/66237) ([filimonov](https://github.com/filimonov)). +* Fix the `Not-ready set` error when a subquery with `IN` is used in the constraint. [#66261](https://github.com/ClickHouse/ClickHouse/pull/66261) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix error reporting while copying to S3 or AzureBlobStorage. [#66295](https://github.com/ClickHouse/ClickHouse/pull/66295) ([Vitaly Baranov](https://github.com/vitlibar)). +* Prevent watchdog from keeping descriptors of unlinked(rotated) log files. [#66334](https://github.com/ClickHouse/ClickHouse/pull/66334) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Fix the bug that logicalexpressionoptimizerpass lost logical type of constant. [#66344](https://github.com/ClickHouse/ClickHouse/pull/66344) ([pn](https://github.com/chloro-pn)). +* Fix `Column identifier is already registered` error with `group_by_use_nulls=true` and new analyzer. [#66400](https://github.com/ClickHouse/ClickHouse/pull/66400) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix possible incorrect result for queries joining and filtering table external engine (like PostgreSQL), due to too aggressive filter pushdown. Since now, conditions from where section won't be send to external database in case of outer join with external table. [#66402](https://github.com/ClickHouse/ClickHouse/pull/66402) ([vdimir](https://github.com/vdimir)). +* Added missing column materialization for cross join. [#66413](https://github.com/ClickHouse/ClickHouse/pull/66413) ([lgbo](https://github.com/lgbo-ustc)). +* Fix `Cannot find column` error for queries with constant expression in `GROUP BY` key and new analyzer enabled. [#66433](https://github.com/ClickHouse/ClickHouse/pull/66433) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Avoid possible logical error during import from Npy format in case of bad array nesting level, fix testing of other kinds of errors. [#66461](https://github.com/ClickHouse/ClickHouse/pull/66461) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Fix wrong count() result when there is non-deterministic function in predicate. [#66510](https://github.com/ClickHouse/ClickHouse/pull/66510) ([Duc Canh Le](https://github.com/canhld94)). +* Correctly track memory for `Allocator::realloc`. [#66548](https://github.com/ClickHouse/ClickHouse/pull/66548) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix reading of uninitialized memory when hashing empty tuples. [#66562](https://github.com/ClickHouse/ClickHouse/pull/66562) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix an invalid result for queries with `WINDOW`. This could happen when `PARTITION` columns have sparse serialization and window functions are executed in parallel. [#66579](https://github.com/ClickHouse/ClickHouse/pull/66579) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix removing named collections in local storage. [#66599](https://github.com/ClickHouse/ClickHouse/pull/66599) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Fix `column_length` is not updated in `ColumnTuple::insertManyFrom`. [#66626](https://github.com/ClickHouse/ClickHouse/pull/66626) ([lgbo](https://github.com/lgbo-ustc)). +* Fix `Unknown identifier` and `Column is not under aggregate function` errors for queries with the expression `(column IS NULL).` The bug was triggered by [#65088](https://github.com/ClickHouse/ClickHouse/issues/65088), with the disabled analyzer only. [#66654](https://github.com/ClickHouse/ClickHouse/pull/66654) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix `Method getResultType is not supported for QUERY query node` error when scalar subquery was used as the first argument of IN (with new analyzer). [#66655](https://github.com/ClickHouse/ClickHouse/pull/66655) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix possible PARAMETER_OUT_OF_BOUND error during reading variant subcolumn. [#66659](https://github.com/ClickHouse/ClickHouse/pull/66659) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix rare case of stuck merge after drop column. [#66707](https://github.com/ClickHouse/ClickHouse/pull/66707) ([Raúl Marín](https://github.com/Algunenano)). +* Fix assertion `isUniqTypes` when insert select from remote sources. [#66722](https://github.com/ClickHouse/ClickHouse/pull/66722) ([Sema Checherinda](https://github.com/CheSema)). +* Fix logical error in PrometheusRequestHandler. [#66621](https://github.com/ClickHouse/ClickHouse/pull/66621) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix `indexHint` function case found by fuzzer. [#66286](https://github.com/ClickHouse/ClickHouse/pull/66286) ([Anton Popov](https://github.com/CurtizJ)). +* Fix AST formatting of 'create table b empty as a'. [#64951](https://github.com/ClickHouse/ClickHouse/pull/64951) ([Michael Kolupaev](https://github.com/al13n321)). + +#### Build/Testing/Packaging Improvement +* Instantiate template methods ahead in different .cpp files, avoid too large translation units during compiling. [#64818](https://github.com/ClickHouse/ClickHouse/pull/64818) ([lgbo](https://github.com/lgbo-ustc)). + ### ClickHouse release 24.6, 2024-07-01 #### Backward Incompatible Change diff --git a/src/Core/Settings.h b/src/Core/Settings.h index fbb7663b612..de6af441205 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -1123,7 +1123,7 @@ class IColumn; M(Bool, input_format_json_defaults_for_missing_elements_in_named_tuple, true, "Insert default value in named tuple element if it's missing in json object", 0) \ M(Bool, input_format_json_throw_on_bad_escape_sequence, true, "Throw an exception if JSON string contains bad escape sequence in JSON input formats. If disabled, bad escape sequences will remain as is in the data", 0) \ M(Bool, input_format_json_ignore_unnecessary_fields, true, "Ignore unnecessary fields and not parse them. Enabling this may not throw exceptions on json strings of invalid format or with duplicated fields", 0) \ - M(Bool, input_format_json_ignore_key_case, false, "Ignore json key case while read json field from string", 0) \ + M(Bool, input_format_json_case_insensitive_column_matching, false, "Ignore case when matching JSON keys with CH columns", 0) \ M(Bool, input_format_try_infer_integers, true, "Try to infer integers instead of floats while schema inference in text formats", 0) \ M(Bool, input_format_try_infer_dates, true, "Try to infer dates from string fields while schema inference in text formats", 0) \ M(Bool, input_format_try_infer_datetimes, true, "Try to infer datetimes from string fields while schema inference in text formats", 0) \ diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index c395bfdc815..ade38ded2f9 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -64,7 +64,7 @@ static std::initializer_list Date: Mon, 29 Jul 2024 13:15:03 +0000 Subject: [PATCH 022/121] Backport #67146 to 24.7: Remove constants from StorageMerge header in Complete stage --- src/Storages/StorageMerge.cpp | 8 ++++ .../02563_analyzer_merge.reference | 1 + .../0_stateless/02563_analyzer_merge.sql | 45 +++++++++++++++++++ 3 files changed, 54 insertions(+) diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index f5bc183931f..9962da3d6de 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -369,6 +369,14 @@ void StorageMerge::read( /// What will be result structure depending on query processed stage in source tables? Block common_header = getHeaderForProcessingStage(column_names, storage_snapshot, query_info, local_context, processed_stage); + if (local_context->getSettingsRef().allow_experimental_analyzer && processed_stage == QueryProcessingStage::Complete) + { + /// Remove constants. + /// For StorageDistributed some functions like `hostName` that are constants only for local queries. + for (auto & column : common_header) + column.column = column.column->convertToFullColumnIfConst(); + } + auto step = std::make_unique( column_names, query_info, diff --git a/tests/queries/0_stateless/02563_analyzer_merge.reference b/tests/queries/0_stateless/02563_analyzer_merge.reference index 8be01c88d6f..2b3cc2d5dfb 100644 --- a/tests/queries/0_stateless/02563_analyzer_merge.reference +++ b/tests/queries/0_stateless/02563_analyzer_merge.reference @@ -1,2 +1,3 @@ 0 Value_0 02563_db test_merge_table_1 1 Value_1 02563_db test_merge_table_2 +91138316-5127-45ac-9c25-4ad8779777b4 160 diff --git a/tests/queries/0_stateless/02563_analyzer_merge.sql b/tests/queries/0_stateless/02563_analyzer_merge.sql index c90f7dcb2a5..217fb7019c4 100644 --- a/tests/queries/0_stateless/02563_analyzer_merge.sql +++ b/tests/queries/0_stateless/02563_analyzer_merge.sql @@ -35,4 +35,49 @@ SELECT id, value, _database, _table FROM 02563_db.test_merge_table ORDER BY id; DROP TABLE 02563_db.test_merge_table; DROP TABLE 02563_db.test_merge_table_1; DROP TABLE 02563_db.test_merge_table_2; + +CREATE TABLE 02563_db.t_1 +( + timestamp DateTime64(9), + a String, + b String +) +ENGINE = MergeTree +PARTITION BY formatDateTime(toStartOfMinute(timestamp), '%Y%m%d%H', 'UTC') +ORDER BY (timestamp, a, b); + +CREATE TABLE 02563_db.dist_t_1 (timestamp DateTime64(9), a String, b String) ENGINE = Distributed('test_shard_localhost', '02563_db', 't_1'); + +CREATE TABLE 02563_db.m ENGINE = Merge('02563_db', '^dist_'); + +INSERT INTO 02563_db.t_1 (timestamp, a, b) +select + addMinutes(toDateTime64('2024-07-13 22:00:00', 9, 'UTC'), number), + randomString(5), + randomString(5) +from numbers(30); + +INSERT INTO 02563_db.t_1 (timestamp, a, b) +select + addMinutes(toDateTime64('2024-07-13 23:00:00', 9, 'UTC'), number), + randomString(5), + randomString(5) +from numbers(30); + +INSERT INTO 02563_db.t_1 (timestamp, a, b) +select + addMinutes(toDateTime64('2024-07-14 00:00:00', 9, 'UTC'), number), + randomString(5), + randomString(5) +from numbers(100); + + +SELECT '91138316-5127-45ac-9c25-4ad8779777b4', + count() +FROM 02563_db.m; + +DROP TABLE 02563_db.t_1; +DROP TABLE 02563_db.dist_t_1; +DROP TABLE 02563_db.m; + DROP DATABASE 02563_db; From 1f4fd64581582d6fce022b6702c45df4759295ef Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 29 Jul 2024 21:09:50 +0000 Subject: [PATCH 023/121] Backport #67402 to 24.7: CI: Fix build results for release branches --- tests/ci/ci.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 171819e2632..375ff237b23 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -747,7 +747,9 @@ def _upload_build_artifacts( int(job_report.duration), GITHUB_JOB_API_URL(), head_ref=pr_info.head_ref, - pr_number=pr_info.number, + # PRInfo fetches pr number for release branches as well - set pr_number to 0 for release + # so that build results are not mistakenly treated as feature branch builds + pr_number=pr_info.number if pr_info.is_pr else 0, ) report_url = ci_cache.upload_build_report(build_result) print(f"Report file has been uploaded to [{report_url}]") From bca434127dc865a11174a8c3eddd6bca9c5b512f Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 30 Jul 2024 10:12:58 +0000 Subject: [PATCH 024/121] Backport #67158 to 24.7: Try to fix 2572 --- ...uery_views_log_background_thread.reference | 18 +-------- ...02572_query_views_log_background_thread.sh | 38 +++++++++++++++++++ ...2572_query_views_log_background_thread.sql | 36 ------------------ 3 files changed, 39 insertions(+), 53 deletions(-) create mode 100755 tests/queries/0_stateless/02572_query_views_log_background_thread.sh delete mode 100644 tests/queries/0_stateless/02572_query_views_log_background_thread.sql diff --git a/tests/queries/0_stateless/02572_query_views_log_background_thread.reference b/tests/queries/0_stateless/02572_query_views_log_background_thread.reference index 22dfaf93781..d7f2272f5b4 100644 --- a/tests/queries/0_stateless/02572_query_views_log_background_thread.reference +++ b/tests/queries/0_stateless/02572_query_views_log_background_thread.reference @@ -1,20 +1,4 @@ --- { echoOn } -insert into buffer_02572 values (1); --- ensure that the flush was not direct -select * from data_02572; -select * from copy_02572; --- we cannot use OPTIMIZE, this will attach query context, so let's wait -SET function_sleep_max_microseconds_per_block = 6000000; -select sleepEachRow(1) from numbers(3*2) format Null; -select * from data_02572; +OK 1 -select * from copy_02572; 1 -system flush logs; -select count() > 0, lower(status::String), errorCodeToName(exception_code) - from system.query_views_log where - view_name = concatWithSeparator('.', currentDatabase(), 'mv_02572') and - view_target = concatWithSeparator('.', currentDatabase(), 'copy_02572') - group by 2, 3 -; 1 queryfinish OK diff --git a/tests/queries/0_stateless/02572_query_views_log_background_thread.sh b/tests/queries/0_stateless/02572_query_views_log_background_thread.sh new file mode 100755 index 00000000000..a3e428e75c8 --- /dev/null +++ b/tests/queries/0_stateless/02572_query_views_log_background_thread.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash + +# INSERT buffer_02572 -> data_02572 -> copy_02572 +# ^^ +# push to system.query_views_log + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --ignore-error --multiquery --query "drop table if exists buffer_02572; + drop table if exists data_02572; drop table if exists copy_02572; drop table if exists mv_02572;" + +${CLICKHOUSE_CLIENT} --query="create table copy_02572 (key Int) engine=Memory();" +${CLICKHOUSE_CLIENT} --query="create table data_02572 (key Int) engine=Memory();" +${CLICKHOUSE_CLIENT} --query="create table buffer_02572 (key Int) engine=Buffer(currentDatabase(), data_02572, 1, 3, 3, 1, 1e9, 1, 1e9);" +${CLICKHOUSE_CLIENT} --query="create materialized view mv_02572 to copy_02572 as select * from data_02572;" + +${CLICKHOUSE_CLIENT} --query="insert into buffer_02572 values (1);" + +# ensure that the flush was not direct +${CLICKHOUSE_CLIENT} --ignore-error --multiquery --query "select * from data_02572; select * from copy_02572;" + +# we cannot use OPTIMIZE, this will attach query context, so let's wait +for _ in {1..100}; do + $CLICKHOUSE_CLIENT -q "select * from data_02572;" | grep -q "1" && echo 'OK' && break + sleep 0.5 +done + + +${CLICKHOUSE_CLIENT} --ignore-error --multiquery --query "select * from data_02572; select * from copy_02572;" + +${CLICKHOUSE_CLIENT} --query="system flush logs;" +${CLICKHOUSE_CLIENT} --query="select count() > 0, lower(status::String), errorCodeToName(exception_code) + from system.query_views_log where + view_name = concatWithSeparator('.', currentDatabase(), 'mv_02572') and + view_target = concatWithSeparator('.', currentDatabase(), 'copy_02572') + group by 2, 3;" \ No newline at end of file diff --git a/tests/queries/0_stateless/02572_query_views_log_background_thread.sql b/tests/queries/0_stateless/02572_query_views_log_background_thread.sql deleted file mode 100644 index 939c189c5fe..00000000000 --- a/tests/queries/0_stateless/02572_query_views_log_background_thread.sql +++ /dev/null @@ -1,36 +0,0 @@ --- INSERT buffer_02572 -> data_02572 -> copy_02572 --- ^^ --- push to system.query_views_log - -drop table if exists buffer_02572; -drop table if exists data_02572; -drop table if exists copy_02572; -drop table if exists mv_02572; - -create table copy_02572 (key Int) engine=Memory(); -create table data_02572 (key Int) engine=Memory(); -create table buffer_02572 (key Int) engine=Buffer(currentDatabase(), data_02572, 1, - /* never direct flush for flush from background thread */ - /* min_time= */ 3, 3, - 1, 1e9, - 1, 1e9); -create materialized view mv_02572 to copy_02572 as select * from data_02572; - --- { echoOn } -insert into buffer_02572 values (1); --- ensure that the flush was not direct -select * from data_02572; -select * from copy_02572; --- we cannot use OPTIMIZE, this will attach query context, so let's wait -SET function_sleep_max_microseconds_per_block = 6000000; -select sleepEachRow(1) from numbers(3*2) format Null; -select * from data_02572; -select * from copy_02572; - -system flush logs; -select count() > 0, lower(status::String), errorCodeToName(exception_code) - from system.query_views_log where - view_name = concatWithSeparator('.', currentDatabase(), 'mv_02572') and - view_target = concatWithSeparator('.', currentDatabase(), 'copy_02572') - group by 2, 3 -; From 2ec3e70da6e742cf55a3afb1a3b5b7583a4565b4 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 30 Jul 2024 10:13:22 +0000 Subject: [PATCH 025/121] =?UTF-8?q?Backport=20#66980=20to=2024.7:=20Forbid?= =?UTF-8?q?=20create=20as=20select=20even=20when=20database=5Freplicated?= =?UTF-8?q?=5Fallow=5Fheavy=5Fcre=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/Interpreters/InterpreterCreateQuery.cpp | 16 ++++++++++++---- ...ed_database_forbid_create_as_select.reference | 2 ++ ...eplicated_database_forbid_create_as_select.sh | 8 ++++++-- 3 files changed, 20 insertions(+), 6 deletions(-) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 454cedae15c..427378decd5 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1373,8 +1373,8 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) if (need_add_to_database) database = DatabaseCatalog::instance().tryGetDatabase(database_name); - bool allow_heavy_create = getContext()->getSettingsRef().database_replicated_allow_heavy_create; - if (!allow_heavy_create && database && database->getEngineName() == "Replicated" && (create.select || create.is_populate)) + bool allow_heavy_populate = getContext()->getSettingsRef().database_replicated_allow_heavy_create && create.is_populate; + if (!allow_heavy_populate && database && database->getEngineName() == "Replicated" && (create.select || create.is_populate)) { bool is_storage_replicated = false; @@ -1392,10 +1392,18 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) const bool allow_create_select_for_replicated = (create.isView() && !create.is_populate) || create.is_create_empty || !is_storage_replicated; if (!allow_create_select_for_replicated) + { + /// POPULATE can be enabled with setting, provide hint in error message + if (create.is_populate) + throw Exception( + ErrorCodes::SUPPORT_IS_DISABLED, + "CREATE with POPULATE is not supported with Replicated databases. Consider using separate CREATE and INSERT queries. " + "Alternatively, you can enable 'database_replicated_allow_heavy_create' setting to allow this operation, use with caution"); + throw Exception( ErrorCodes::SUPPORT_IS_DISABLED, - "CREATE AS SELECT and POPULATE is not supported with Replicated databases. Consider using separate CREATE and INSERT queries. " - "Alternatively, you can enable 'database_replicated_allow_heavy_create' setting to allow this operation, use with caution"); + "CREATE AS SELECT is not supported with Replicated databases. Consider using separate CREATE and INSERT queries."); + } } if (database && database->shouldReplicateQuery(getContext(), query_ptr)) diff --git a/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.reference b/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.reference index 6ed281c757a..98fb6a68656 100644 --- a/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.reference +++ b/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.reference @@ -1,2 +1,4 @@ 1 1 +1 +1 diff --git a/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh b/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh index 15f169d880f..b587549cb60 100755 --- a/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh +++ b/tests/queries/0_stateless/02933_replicated_database_forbid_create_as_select.sh @@ -18,8 +18,12 @@ ${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE MATERIAL ${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test2 (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id AS SELECT 1" |& grep -cm1 "SUPPORT_IS_DISABLED" ${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}_db.test_mv2 (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id POPULATE AS SELECT 1" |& grep -cm1 "SUPPORT_IS_DISABLED" -# But it is allowed with the special setting -${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test2 (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id AS SELECT 1" --database_replicated_allow_heavy_create=1 +# POPULATE is allowed with the special setting ${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}_db.test_mv2 (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id POPULATE AS SELECT 1" --database_replicated_allow_heavy_create=1 +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE MATERIALIZED VIEW ${CLICKHOUSE_DATABASE}_db.test_mv3 (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id POPULATE AS SELECT 1" --compatibility='24.6' + +# AS SELECT is forbidden even with the setting +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test2 (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id AS SELECT 1" --database_replicated_allow_heavy_create=1 |& grep -cm1 "SUPPORT_IS_DISABLED" +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.test2 (id UInt64) ENGINE = ReplicatedMergeTree ORDER BY id AS SELECT 1" --compatibility='24.6' |& grep -cm1 "SUPPORT_IS_DISABLED" ${CLICKHOUSE_CLIENT} --query "DROP DATABASE ${CLICKHOUSE_DATABASE}_db" From 93ff95bb62b34013e6c99b6030353478fa6af6be Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 30 Jul 2024 12:08:38 +0000 Subject: [PATCH 026/121] Backport #67392 to 24.7: Revert "Slightly better calculation of primary index" --- .../MergeTree/IMergeTreeDataPartWriter.cpp | 19 +----- .../MergeTreeDataPartWriterOnDisk.cpp | 65 +++++++++---------- .../MergeTree/MergeTreeDataPartWriterOnDisk.h | 9 ++- .../02993_lazy_index_loading.reference | 2 +- ..._system_unload_primary_key_table.reference | 8 +-- .../03128_system_unload_primary_key.reference | 4 +- 6 files changed, 45 insertions(+), 62 deletions(-) diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp index c87f66b64f3..6152da78395 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp @@ -1,5 +1,4 @@ #include -#include namespace DB { @@ -72,21 +71,9 @@ IMergeTreeDataPartWriter::IMergeTreeDataPartWriter( Columns IMergeTreeDataPartWriter::releaseIndexColumns() { - /// The memory for index was allocated without thread memory tracker. - /// We need to deallocate it in shrinkToFit without memory tracker as well. - MemoryTrackerBlockerInThread temporarily_disable_memory_tracker; - - Columns result; - result.reserve(index_columns.size()); - - for (auto & column : index_columns) - { - column->shrinkToFit(); - result.push_back(std::move(column)); - } - - index_columns.clear(); - return result; + return Columns( + std::make_move_iterator(index_columns.begin()), + std::make_move_iterator(index_columns.end())); } SerializationPtr IMergeTreeDataPartWriter::getSerialization(const String & column_name) const diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp index 6dc7e649b06..46dd766139a 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp @@ -255,12 +255,6 @@ void MergeTreeDataPartWriterOnDisk::initPrimaryIndex() index_compressor_stream = std::make_unique(*index_file_hashing_stream, primary_key_compression_codec, settings.primary_key_compress_block_size); index_source_hashing_stream = std::make_unique(*index_compressor_stream); } - - const auto & primary_key_types = metadata_snapshot->getPrimaryKey().data_types; - index_serializations.reserve(primary_key_types.size()); - - for (const auto & type : primary_key_types) - index_serializations.push_back(type->getDefaultSerialization()); } } @@ -306,33 +300,22 @@ void MergeTreeDataPartWriterOnDisk::initSkipIndices() store = std::make_shared(stream_name, data_part_storage, data_part_storage, storage_settings->max_digestion_size_per_segment); gin_index_stores[stream_name] = store; } - skip_indices_aggregators.push_back(skip_index->createIndexAggregatorForPart(store, settings)); skip_index_accumulated_marks.push_back(0); } } -void MergeTreeDataPartWriterOnDisk::calculateAndSerializePrimaryIndexRow(const Block & index_block, size_t row) -{ - chassert(index_block.columns() == index_serializations.size()); - auto & index_stream = compress_primary_key ? *index_source_hashing_stream : *index_file_hashing_stream; - - for (size_t i = 0; i < index_block.columns(); ++i) - { - const auto & column = index_block.getByPosition(i).column; - - index_columns[i]->insertFrom(*column, row); - index_serializations[i]->serializeBinary(*column, row, index_stream, {}); - } -} - void MergeTreeDataPartWriterOnDisk::calculateAndSerializePrimaryIndex(const Block & primary_index_block, const Granules & granules_to_write) { - if (!metadata_snapshot->hasPrimaryKey()) - return; - + size_t primary_columns_num = primary_index_block.columns(); if (index_columns.empty()) - index_columns = primary_index_block.cloneEmptyColumns(); + { + index_types = primary_index_block.getDataTypes(); + index_columns.resize(primary_columns_num); + last_block_index_columns.resize(primary_columns_num); + for (size_t i = 0; i < primary_columns_num; ++i) + index_columns[i] = primary_index_block.getByPosition(i).column->cloneEmpty(); + } { /** While filling index (index_columns), disable memory tracker. @@ -346,14 +329,22 @@ void MergeTreeDataPartWriterOnDisk::calculateAndSerializePrimaryIndex(const Bloc /// Write index. The index contains Primary Key value for each `index_granularity` row. for (const auto & granule : granules_to_write) { - if (granule.mark_on_start) - calculateAndSerializePrimaryIndexRow(primary_index_block, granule.start_row); + if (metadata_snapshot->hasPrimaryKey() && granule.mark_on_start) + { + for (size_t j = 0; j < primary_columns_num; ++j) + { + const auto & primary_column = primary_index_block.getByPosition(j); + index_columns[j]->insertFrom(*primary_column.column, granule.start_row); + primary_column.type->getDefaultSerialization()->serializeBinary( + *primary_column.column, granule.start_row, compress_primary_key ? *index_source_hashing_stream : *index_file_hashing_stream, {}); + } + } } } - /// Store block with last index row to write final mark at the end of column - if (with_final_mark) - last_index_block = primary_index_block; + /// store last index row to write final mark at the end of column + for (size_t j = 0; j < primary_columns_num; ++j) + last_block_index_columns[j] = primary_index_block.getByPosition(j).column; } void MergeTreeDataPartWriterOnDisk::calculateAndSerializeStatistics(const Block & block) @@ -430,11 +421,17 @@ void MergeTreeDataPartWriterOnDisk::fillPrimaryIndexChecksums(MergeTreeData::Dat if (index_file_hashing_stream) { - if (write_final_mark && last_index_block) + if (write_final_mark) { - MemoryTrackerBlockerInThread temporarily_disable_memory_tracker; - calculateAndSerializePrimaryIndexRow(last_index_block, last_index_block.rows() - 1); - last_index_block.clear(); + for (size_t j = 0; j < index_columns.size(); ++j) + { + const auto & column = *last_block_index_columns[j]; + size_t last_row_number = column.size() - 1; + index_columns[j]->insertFrom(column, last_row_number); + index_types[j]->getDefaultSerialization()->serializeBinary( + column, last_row_number, compress_primary_key ? *index_source_hashing_stream : *index_file_hashing_stream, {}); + } + last_block_index_columns.clear(); } if (compress_primary_key) diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h index 8d84442981e..bdf0fdb7f32 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h @@ -173,10 +173,10 @@ protected: std::unique_ptr index_source_hashing_stream; bool compress_primary_key; - /// Last block with index columns. - /// It's written to index file in the `writeSuffixAndFinalizePart` method. - Block last_index_block; - Serializations index_serializations; + DataTypes index_types; + /// Index columns from the last block + /// It's written to index file in the `writeSuffixAndFinalizePart` method + Columns last_block_index_columns; bool data_written = false; @@ -193,7 +193,6 @@ private: void initStatistics(); virtual void fillIndexGranularity(size_t index_granularity_for_block, size_t rows_in_block) = 0; - void calculateAndSerializePrimaryIndexRow(const Block & index_block, size_t row); struct ExecutionStatistics { diff --git a/tests/queries/0_stateless/02993_lazy_index_loading.reference b/tests/queries/0_stateless/02993_lazy_index_loading.reference index 08f07a92815..5bc329ae4eb 100644 --- a/tests/queries/0_stateless/02993_lazy_index_loading.reference +++ b/tests/queries/0_stateless/02993_lazy_index_loading.reference @@ -1,4 +1,4 @@ -100000000 100000000 +100000000 140000000 0 0 1 100000000 100000000 diff --git a/tests/queries/0_stateless/03127_system_unload_primary_key_table.reference b/tests/queries/0_stateless/03127_system_unload_primary_key_table.reference index 2d33f7f6683..3ac6127fb21 100644 --- a/tests/queries/0_stateless/03127_system_unload_primary_key_table.reference +++ b/tests/queries/0_stateless/03127_system_unload_primary_key_table.reference @@ -1,8 +1,8 @@ -100000000 100000000 -100000000 100000000 -100000000 100000000 +100000000 140000000 +100000000 140000000 +100000000 140000000 0 0 -100000000 100000000 +100000000 140000000 0 0 0 0 1 diff --git a/tests/queries/0_stateless/03128_system_unload_primary_key.reference b/tests/queries/0_stateless/03128_system_unload_primary_key.reference index 2646dc7247f..c7b40ae5b06 100644 --- a/tests/queries/0_stateless/03128_system_unload_primary_key.reference +++ b/tests/queries/0_stateless/03128_system_unload_primary_key.reference @@ -1,4 +1,4 @@ -100000000 100000000 -100000000 100000000 +100000000 140000000 +100000000 140000000 0 0 0 0 From d55d90b6bd814dfad5fbd0f26bf469259929eeb3 Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Tue, 30 Jul 2024 19:17:20 +0000 Subject: [PATCH 027/121] Update autogenerated version to 24.7.2.1 and contributors --- cmake/autogenerated_versions.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index 7b1f4054560..6d232fbf84f 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -5,8 +5,8 @@ SET(VERSION_REVISION 54488) SET(VERSION_MAJOR 24) SET(VERSION_MINOR 7) -SET(VERSION_PATCH 1) -SET(VERSION_GITHASH 3f8b27d7accd2b5ec4afe7d0dd459115323304af) -SET(VERSION_DESCRIBE v24.7.1.1-stable) -SET(VERSION_STRING 24.7.1.1) +SET(VERSION_PATCH 2) +SET(VERSION_GITHASH a37d2d43da7ff89c512c770d0d7a3053a0a94c43) +SET(VERSION_DESCRIBE v24.7.2.1-stable) +SET(VERSION_STRING 24.7.2.1) # end of autochange From 3b944a941fc433ad98da0e0d716261228ef074dd Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 31 Jul 2024 09:09:56 +0000 Subject: [PATCH 028/121] Backport #67219 to 24.7: [Green CI] Fix crash in DistributedAsyncInsert when connection is empty --- src/Client/ConnectionPoolWithFailover.h | 1 + src/Common/PoolWithFailoverBase.h | 11 ++++++++++- .../Distributed/DistributedAsyncInsertBatch.cpp | 17 +++++++++++++---- .../DistributedAsyncInsertDirectoryQueue.cpp | 8 ++++++-- src/Storages/Distributed/DistributedSink.cpp | 6 +++++- 5 files changed, 35 insertions(+), 8 deletions(-) diff --git a/src/Client/ConnectionPoolWithFailover.h b/src/Client/ConnectionPoolWithFailover.h index 7b9f480aa4e..a2dc188eb7d 100644 --- a/src/Client/ConnectionPoolWithFailover.h +++ b/src/Client/ConnectionPoolWithFailover.h @@ -42,6 +42,7 @@ public: size_t max_error_cap = DBMS_CONNECTION_POOL_WITH_FAILOVER_MAX_ERROR_COUNT); using Entry = IConnectionPool::Entry; + using PoolWithFailoverBase::isTryResultInvalid; /** Allocates connection to work. */ Entry get(const ConnectionTimeouts & timeouts) override; diff --git a/src/Common/PoolWithFailoverBase.h b/src/Common/PoolWithFailoverBase.h index 3d4de773a36..c44ab7df53a 100644 --- a/src/Common/PoolWithFailoverBase.h +++ b/src/Common/PoolWithFailoverBase.h @@ -116,6 +116,12 @@ public: const TryGetEntryFunc & try_get_entry, const GetPriorityFunc & get_priority); + // Returns if the TryResult provided is an invalid one that cannot be used. Used to prevent logical errors. + bool isTryResultInvalid(const TryResult & result, bool skip_read_only_replicas) const + { + return result.entry.isNull() || !result.is_usable || (skip_read_only_replicas && result.is_readonly); + } + size_t getPoolSize() const { return nested_pools.size(); } protected: @@ -300,7 +306,7 @@ PoolWithFailoverBase::getMany( throw DB::NetException(DB::ErrorCodes::ALL_CONNECTION_TRIES_FAILED, "All connection tries failed. Log: \n\n{}\n", fail_messages); - std::erase_if(try_results, [&](const TryResult & r) { return r.entry.isNull() || !r.is_usable || (skip_read_only_replicas && r.is_readonly); }); + std::erase_if(try_results, [&](const TryResult & r) { return isTryResultInvalid(r, skip_read_only_replicas); }); /// Sort so that preferred items are near the beginning. std::stable_sort( @@ -321,6 +327,9 @@ PoolWithFailoverBase::getMany( } else if (up_to_date_count >= min_entries) { + if (try_results.size() < up_to_date_count) + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Could not find enough connections for up-to-date results. Got: {}, needed: {}", try_results.size(), up_to_date_count); + /// There is enough up-to-date entries. try_results.resize(up_to_date_count); } diff --git a/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp b/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp index e1facec5b40..2cf69b9f6b7 100644 --- a/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp +++ b/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp @@ -28,6 +28,7 @@ namespace ErrorCodes extern const int TOO_MANY_PARTITIONS; extern const int DISTRIBUTED_TOO_MANY_PENDING_BYTES; extern const int ARGUMENT_OUT_OF_BOUND; + extern const int LOGICAL_ERROR; } /// Can the batch be split and send files from batch one-by-one instead? @@ -241,8 +242,12 @@ void DistributedAsyncInsertBatch::sendBatch(const SettingsChanges & settings_cha insert_settings.applyChanges(settings_changes); auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(insert_settings); - auto result = parent.pool->getManyCheckedForInsert(timeouts, insert_settings, PoolMode::GET_ONE, parent.storage.remote_storage.getQualifiedName()); - connection = std::move(result.front().entry); + auto results = parent.pool->getManyCheckedForInsert(timeouts, insert_settings, PoolMode::GET_ONE, parent.storage.remote_storage.getQualifiedName()); + auto result = results.front(); + if (parent.pool->isTryResultInvalid(result, insert_settings.distributed_insert_skip_read_only_replicas)) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Got an invalid connection result"); + + connection = std::move(result.entry); compression_expected = connection->getCompression() == Protocol::Compression::Enable; LOG_DEBUG(parent.log, "Sending a batch of {} files to {} ({} rows, {} bytes).", @@ -299,8 +304,12 @@ void DistributedAsyncInsertBatch::sendSeparateFiles(const SettingsChanges & sett parent.storage.getContext()->getOpenTelemetrySpanLog()); auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(insert_settings); - auto result = parent.pool->getManyCheckedForInsert(timeouts, insert_settings, PoolMode::GET_ONE, parent.storage.remote_storage.getQualifiedName()); - auto connection = std::move(result.front().entry); + auto results = parent.pool->getManyCheckedForInsert(timeouts, insert_settings, PoolMode::GET_ONE, parent.storage.remote_storage.getQualifiedName()); + auto result = results.front(); + if (parent.pool->isTryResultInvalid(result, insert_settings.distributed_insert_skip_read_only_replicas)) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Got an invalid connection result"); + + auto connection = std::move(result.entry); bool compression_expected = connection->getCompression() == Protocol::Compression::Enable; RemoteInserter remote(*connection, timeouts, diff --git a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp index d471c67553d..876eff0021f 100644 --- a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp +++ b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp @@ -412,8 +412,12 @@ void DistributedAsyncInsertDirectoryQueue::processFile(std::string & file_path, insert_settings.applyChanges(settings_changes); auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(insert_settings); - auto result = pool->getManyCheckedForInsert(timeouts, insert_settings, PoolMode::GET_ONE, storage.remote_storage.getQualifiedName()); - auto connection = std::move(result.front().entry); + auto results = pool->getManyCheckedForInsert(timeouts, insert_settings, PoolMode::GET_ONE, storage.remote_storage.getQualifiedName()); + auto result = results.front(); + if (pool->isTryResultInvalid(result, insert_settings.distributed_insert_skip_read_only_replicas)) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Got an invalid connection result"); + + auto connection = std::move(result.entry); LOG_DEBUG(log, "Sending `{}` to {} ({} rows, {} bytes)", file_path, diff --git a/src/Storages/Distributed/DistributedSink.cpp b/src/Storages/Distributed/DistributedSink.cpp index b2ce62caf0a..e0baefd5838 100644 --- a/src/Storages/Distributed/DistributedSink.cpp +++ b/src/Storages/Distributed/DistributedSink.cpp @@ -377,7 +377,11 @@ DistributedSink::runWritingJob(JobReplica & job, const Block & current_block, si /// NOTE: INSERT will also take into account max_replica_delay_for_distributed_queries /// (anyway fallback_to_stale_replicas_for_distributed_queries=true by default) auto results = shard_info.pool->getManyCheckedForInsert(timeouts, settings, PoolMode::GET_ONE, storage.remote_storage.getQualifiedName()); - job.connection_entry = std::move(results.front().entry); + auto result = results.front(); + if (shard_info.pool->isTryResultInvalid(result, settings.distributed_insert_skip_read_only_replicas)) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Got an invalid connection result"); + + job.connection_entry = std::move(result.entry); } else { From 966c21e9dfffb37ab5e1b23497d96c9caf6a9778 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 31 Jul 2024 12:08:49 +0000 Subject: [PATCH 029/121] Backport #67211 to 24.7: Split test 02967_parallel_replicas_join_algo_and_analyzer --- ...eplicas_join_algo_and_analyzer_1.reference | 30 +++++ ...allel_replicas_join_algo_and_analyzer_1.sh | 51 ++++++++ ...eplicas_join_algo_and_analyzer_2.reference | 57 +++++++++ ...allel_replicas_join_algo_and_analyzer_2.sh | 103 +++++++++++++++ ...plicas_join_algo_and_analyzer_3.reference} | 87 ------------- ...llel_replicas_join_algo_and_analyzer_3.sh} | 119 ------------------ 6 files changed, 241 insertions(+), 206 deletions(-) create mode 100644 tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_1.reference create mode 100755 tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_1.sh create mode 100644 tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_2.reference create mode 100755 tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_2.sh rename tests/queries/0_stateless/{02967_parallel_replicas_join_algo_and_analyzer.reference => 02967_parallel_replicas_join_algo_and_analyzer_3.reference} (55%) rename tests/queries/0_stateless/{02967_parallel_replicas_join_algo_and_analyzer.sh => 02967_parallel_replicas_join_algo_and_analyzer_3.sh} (58%) diff --git a/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_1.reference b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_1.reference new file mode 100644 index 00000000000..e1bf9c27a81 --- /dev/null +++ b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_1.reference @@ -0,0 +1,30 @@ + +simple join with analyzer +4200000 4200000 4200000 -1400000 +4200006 4200006 4200006 -1400002 +4200012 4200012 4200012 -1400004 +4200018 4200018 4200018 -1400006 +4200024 4200024 4200024 -1400008 +4200030 4200030 4200030 -1400010 +4200036 4200036 4200036 -1400012 +4200042 4200042 4200042 -1400014 +4200048 4200048 4200048 -1400016 +4200054 4200054 4200054 -1400018 + +simple (global) join with analyzer and parallel replicas +4200000 4200000 4200000 -1400000 +4200006 4200006 4200006 -1400002 +4200012 4200012 4200012 -1400004 +4200018 4200018 4200018 -1400006 +4200024 4200024 4200024 -1400008 +4200030 4200030 4200030 -1400010 +4200036 4200036 4200036 -1400012 +4200042 4200042 4200042 -1400014 +4200048 4200048 4200048 -1400016 +4200054 4200054 4200054 -1400018 +SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value` FROM `default`.`num_2` AS `__table1` (stage: WithMergeableState) +SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value` FROM `default`.`num_2` AS `__table1` (stage: WithMergeableState) + DefaultCoordinator: Coordination done +SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(700000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) +SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(700000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) + DefaultCoordinator: Coordination done diff --git a/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_1.sh b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_1.sh new file mode 100755 index 00000000000..1089eb4051f --- /dev/null +++ b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_1.sh @@ -0,0 +1,51 @@ +#!/usr/bin/env bash +# Tags: long, no-random-settings, no-random-merge-tree-settings + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +$CLICKHOUSE_CLIENT -nm -q " +drop table if exists num_1; +drop table if exists num_2; + +create table num_1 (key UInt64, value String) engine = MergeTree order by key; +create table num_2 (key UInt64, value Int64) engine = MergeTree order by key; + +insert into num_1 select number * 2, toString(number * 2) from numbers(1e7); +insert into num_2 select number * 3, -number from numbers(1.5e6); +" + +############## +echo +echo "simple join with analyzer" + +$CLICKHOUSE_CLIENT -q " +select * from (select key, value from num_1) l +inner join (select key, value from num_2) r on l.key = r.key +order by l.key limit 10 offset 700000 +SETTINGS allow_experimental_analyzer=1" + +############## +echo +echo "simple (global) join with analyzer and parallel replicas" + +$CLICKHOUSE_CLIENT -q " +select * from (select key, value from num_1) l +inner join (select key, value from num_2) r on l.key = r.key +order by l.key limit 10 offset 700000 +SETTINGS allow_experimental_analyzer=1, allow_experimental_parallel_reading_from_replicas = 2, +max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, +cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=0" + +$CLICKHOUSE_CLIENT -q " +select * from (select key, value from num_1) l +inner join (select key, value from num_2) r on l.key = r.key +order by l.key limit 10 offset 700000 +SETTINGS allow_experimental_analyzer=1, allow_experimental_parallel_reading_from_replicas = 2, send_logs_level='trace', +max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, +cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=0" 2>&1 | +grep "executeQuery\|.*Coordinator: Coordination done" | +grep -o "SELECT.*WithMergeableState)\|.*Coordinator: Coordination done" | +sed -re 's/_data_[[:digit:]]+_[[:digit:]]+/_data_/g' diff --git a/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_2.reference b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_2.reference new file mode 100644 index 00000000000..297ec311f3e --- /dev/null +++ b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_2.reference @@ -0,0 +1,57 @@ + +simple (local) join with analyzer and parallel replicas +4200000 4200000 4200000 -1400000 +4200006 4200006 4200006 -1400002 +4200012 4200012 4200012 -1400004 +4200018 4200018 4200018 -1400006 +4200024 4200024 4200024 -1400008 +4200030 4200030 4200030 -1400010 +4200036 4200036 4200036 -1400012 +4200042 4200042 4200042 -1400014 +4200048 4200048 4200048 -1400016 +4200054 4200054 4200054 -1400018 +SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` ALL INNER JOIN (SELECT `__table4`.`key` AS `key`, `__table4`.`value` AS `value` FROM `default`.`num_2` AS `__table4`) AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(700000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) +SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` ALL INNER JOIN (SELECT `__table4`.`key` AS `key`, `__table4`.`value` AS `value` FROM `default`.`num_2` AS `__table4`) AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(700000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) + DefaultCoordinator: Coordination done + +simple (local) join with analyzer and parallel replicas and full sorting merge join +4200000 4200000 4200000 -1400000 +4200006 4200006 4200006 -1400002 +4200012 4200012 4200012 -1400004 +4200018 4200018 4200018 -1400006 +4200024 4200024 4200024 -1400008 +4200030 4200030 4200030 -1400010 +4200036 4200036 4200036 -1400012 +4200042 4200042 4200042 -1400014 +4200048 4200048 4200048 -1400016 +4200054 4200054 4200054 -1400018 +SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` ALL INNER JOIN (SELECT `__table4`.`key` AS `key`, `__table4`.`value` AS `value` FROM `default`.`num_2` AS `__table4`) AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(700000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) +SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` ALL INNER JOIN (SELECT `__table4`.`key` AS `key`, `__table4`.`value` AS `value` FROM `default`.`num_2` AS `__table4`) AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(700000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) + WithOrderCoordinator: Coordination done + +nested join with analyzer +420000 420000 420000 -140000 +420042 420042 420042 -140014 +420084 420084 420084 -140028 +420126 420126 420126 -140042 +420168 420168 420168 -140056 +420210 420210 420210 -140070 +420252 420252 420252 -140084 +420294 420294 420294 -140098 +420336 420336 420336 -140112 +420378 420378 420378 -140126 + +nested join with analyzer and parallel replicas, both local +420000 420000 420000 -140000 +420042 420042 420042 -140014 +420084 420084 420084 -140028 +420126 420126 420126 -140042 +420168 420168 420168 -140056 +420210 420210 420210 -140070 +420252 420252 420252 -140084 +420294 420294 420294 -140098 +420336 420336 420336 -140112 +420378 420378 420378 -140126 +SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` ALL INNER JOIN (SELECT `__table4`.`key` AS `key`, `__table4`.`value` AS `value` FROM `default`.`num_2` AS `__table4` ALL INNER JOIN (SELECT `__table6`.`number` * 7 AS `key` FROM numbers(100000.) AS `__table6`) AS `__table5` ON `__table4`.`key` = `__table5`.`key` SETTINGS parallel_replicas_prefer_local_join = 1) AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(10000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) +SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` ALL INNER JOIN (SELECT `__table4`.`key` AS `key`, `__table4`.`value` AS `value` FROM `default`.`num_2` AS `__table4` ALL INNER JOIN (SELECT `__table6`.`number` * 7 AS `key` FROM numbers(100000.) AS `__table6`) AS `__table5` ON `__table4`.`key` = `__table5`.`key` SETTINGS parallel_replicas_prefer_local_join = 1) AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(10000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) + WithOrderCoordinator: Coordination done diff --git a/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_2.sh b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_2.sh new file mode 100755 index 00000000000..7a0e2d9bfdb --- /dev/null +++ b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_2.sh @@ -0,0 +1,103 @@ +#!/usr/bin/env bash +# Tags: long, no-random-settings, no-random-merge-tree-settings + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +$CLICKHOUSE_CLIENT -nm -q " +drop table if exists num_1; +drop table if exists num_2; + +create table num_1 (key UInt64, value String) engine = MergeTree order by key; +create table num_2 (key UInt64, value Int64) engine = MergeTree order by key; + +insert into num_1 select number * 2, toString(number * 2) from numbers(1e7); +insert into num_2 select number * 3, -number from numbers(1.5e6); +" + +############## +echo +echo "simple (local) join with analyzer and parallel replicas" + +$CLICKHOUSE_CLIENT -q " +select * from (select key, value from num_1) l +inner join (select key, value from num_2) r on l.key = r.key +order by l.key limit 10 offset 700000 +SETTINGS allow_experimental_analyzer=1, +allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, +cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=1" + +$CLICKHOUSE_CLIENT -q " +select * from (select key, value from num_1) l +inner join (select key, value from num_2) r on l.key = r.key +order by l.key limit 10 offset 700000 +SETTINGS allow_experimental_analyzer=1, send_logs_level='trace', +allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, +cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=1" 2>&1 | +grep "executeQuery\|.*Coordinator: Coordination done" | +grep -o "SELECT.*WithMergeableState)\|.*Coordinator: Coordination done" | +sed -re 's/_data_[[:digit:]]+_[[:digit:]]+/_data_/g' + + +############## +echo +echo "simple (local) join with analyzer and parallel replicas and full sorting merge join" + +$CLICKHOUSE_CLIENT -q " +select * from (select key, value from num_1) l +inner join (select key, value from num_2) r on l.key = r.key +order by l.key limit 10 offset 700000 +SETTINGS allow_experimental_analyzer=1, join_algorithm='full_sorting_merge', +allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, +cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=1" + +$CLICKHOUSE_CLIENT -q " +select * from (select key, value from num_1) l +inner join (select key, value from num_2) r on l.key = r.key +order by l.key limit 10 offset 700000 +SETTINGS allow_experimental_analyzer=1, join_algorithm='full_sorting_merge', send_logs_level='trace', +allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, +cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=1" 2>&1 | +grep "executeQuery\|.*Coordinator: Coordination done" | +grep -o "SELECT.*WithMergeableState)\|.*Coordinator: Coordination done" | +sed -re 's/_data_[[:digit:]]+_[[:digit:]]+/_data_/g' + + +############## +echo +echo "nested join with analyzer" + +$CLICKHOUSE_CLIENT -q " +select * from (select key, value from num_1) l +inner join (select key, value from num_2 inner join + (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings parallel_replicas_prefer_local_join=1) r +on l.key = r.key order by l.key limit 10 offset 10000 +SETTINGS allow_experimental_analyzer=1" + + +############## +echo +echo "nested join with analyzer and parallel replicas, both local" + +$CLICKHOUSE_CLIENT -q " +select * from (select key, value from num_1) l +inner join (select key, value from num_2 inner join + (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings parallel_replicas_prefer_local_join=1) r +on l.key = r.key order by l.key limit 10 offset 10000 +SETTINGS allow_experimental_analyzer=1, +allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, +cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=1" + +$CLICKHOUSE_CLIENT -q " +select * from (select key, value from num_1) l +inner join (select key, value from num_2 inner join + (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings parallel_replicas_prefer_local_join=1) r +on l.key = r.key order by l.key limit 10 offset 10000 +SETTINGS allow_experimental_analyzer=1, join_algorithm='full_sorting_merge', send_logs_level='trace', +allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, +cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=1" 2>&1 | +grep "executeQuery\|.*Coordinator: Coordination done" | +grep -o "SELECT.*WithMergeableState)\|.*Coordinator: Coordination done" | +sed -re 's/_data_[[:digit:]]+_[[:digit:]]+/_data_/g' diff --git a/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.reference b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_3.reference similarity index 55% rename from tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.reference rename to tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_3.reference index d7fa419aeab..c0485b817c4 100644 --- a/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.reference +++ b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_3.reference @@ -1,91 +1,4 @@ -simple join with analyzer -4200000 4200000 4200000 -1400000 -4200006 4200006 4200006 -1400002 -4200012 4200012 4200012 -1400004 -4200018 4200018 4200018 -1400006 -4200024 4200024 4200024 -1400008 -4200030 4200030 4200030 -1400010 -4200036 4200036 4200036 -1400012 -4200042 4200042 4200042 -1400014 -4200048 4200048 4200048 -1400016 -4200054 4200054 4200054 -1400018 - -simple (global) join with analyzer and parallel replicas -4200000 4200000 4200000 -1400000 -4200006 4200006 4200006 -1400002 -4200012 4200012 4200012 -1400004 -4200018 4200018 4200018 -1400006 -4200024 4200024 4200024 -1400008 -4200030 4200030 4200030 -1400010 -4200036 4200036 4200036 -1400012 -4200042 4200042 4200042 -1400014 -4200048 4200048 4200048 -1400016 -4200054 4200054 4200054 -1400018 -SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value` FROM `default`.`num_2` AS `__table1` (stage: WithMergeableState) -SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value` FROM `default`.`num_2` AS `__table1` (stage: WithMergeableState) - DefaultCoordinator: Coordination done -SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(700000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) -SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` GLOBAL ALL INNER JOIN `_data_` AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(700000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) - DefaultCoordinator: Coordination done - -simple (local) join with analyzer and parallel replicas -4200000 4200000 4200000 -1400000 -4200006 4200006 4200006 -1400002 -4200012 4200012 4200012 -1400004 -4200018 4200018 4200018 -1400006 -4200024 4200024 4200024 -1400008 -4200030 4200030 4200030 -1400010 -4200036 4200036 4200036 -1400012 -4200042 4200042 4200042 -1400014 -4200048 4200048 4200048 -1400016 -4200054 4200054 4200054 -1400018 -SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` ALL INNER JOIN (SELECT `__table4`.`key` AS `key`, `__table4`.`value` AS `value` FROM `default`.`num_2` AS `__table4`) AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(700000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) -SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` ALL INNER JOIN (SELECT `__table4`.`key` AS `key`, `__table4`.`value` AS `value` FROM `default`.`num_2` AS `__table4`) AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(700000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) - DefaultCoordinator: Coordination done - -simple (local) join with analyzer and parallel replicas and full sorting merge join -4200000 4200000 4200000 -1400000 -4200006 4200006 4200006 -1400002 -4200012 4200012 4200012 -1400004 -4200018 4200018 4200018 -1400006 -4200024 4200024 4200024 -1400008 -4200030 4200030 4200030 -1400010 -4200036 4200036 4200036 -1400012 -4200042 4200042 4200042 -1400014 -4200048 4200048 4200048 -1400016 -4200054 4200054 4200054 -1400018 -SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` ALL INNER JOIN (SELECT `__table4`.`key` AS `key`, `__table4`.`value` AS `value` FROM `default`.`num_2` AS `__table4`) AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(700000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) -SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` ALL INNER JOIN (SELECT `__table4`.`key` AS `key`, `__table4`.`value` AS `value` FROM `default`.`num_2` AS `__table4`) AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(700000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) - WithOrderCoordinator: Coordination done - -nested join with analyzer -420000 420000 420000 -140000 -420042 420042 420042 -140014 -420084 420084 420084 -140028 -420126 420126 420126 -140042 -420168 420168 420168 -140056 -420210 420210 420210 -140070 -420252 420252 420252 -140084 -420294 420294 420294 -140098 -420336 420336 420336 -140112 -420378 420378 420378 -140126 - -nested join with analyzer and parallel replicas, both local -420000 420000 420000 -140000 -420042 420042 420042 -140014 -420084 420084 420084 -140028 -420126 420126 420126 -140042 -420168 420168 420168 -140056 -420210 420210 420210 -140070 -420252 420252 420252 -140084 -420294 420294 420294 -140098 -420336 420336 420336 -140112 -420378 420378 420378 -140126 -SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` ALL INNER JOIN (SELECT `__table4`.`key` AS `key`, `__table4`.`value` AS `value` FROM `default`.`num_2` AS `__table4` ALL INNER JOIN (SELECT `__table6`.`number` * 7 AS `key` FROM numbers(100000.) AS `__table6`) AS `__table5` ON `__table4`.`key` = `__table5`.`key` SETTINGS parallel_replicas_prefer_local_join = 1) AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(10000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) -SELECT `__table1`.`key` AS `key`, `__table1`.`value` AS `value`, `__table3`.`key` AS `r.key`, `__table3`.`value` AS `r.value` FROM (SELECT `__table2`.`key` AS `key`, `__table2`.`value` AS `value` FROM `default`.`num_1` AS `__table2`) AS `__table1` ALL INNER JOIN (SELECT `__table4`.`key` AS `key`, `__table4`.`value` AS `value` FROM `default`.`num_2` AS `__table4` ALL INNER JOIN (SELECT `__table6`.`number` * 7 AS `key` FROM numbers(100000.) AS `__table6`) AS `__table5` ON `__table4`.`key` = `__table5`.`key` SETTINGS parallel_replicas_prefer_local_join = 1) AS `__table3` ON `__table1`.`key` = `__table3`.`key` ORDER BY `__table1`.`key` ASC LIMIT _CAST(10000, 'UInt64'), _CAST(10, 'UInt64') (stage: WithMergeableState) - WithOrderCoordinator: Coordination done - nested join with analyzer and parallel replicas, both global 420000 420000 420000 -140000 420042 420042 420042 -140014 diff --git a/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.sh b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_3.sh similarity index 58% rename from tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.sh rename to tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_3.sh index 2840482da6d..e49a340ab67 100755 --- a/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.sh +++ b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer_3.sh @@ -17,125 +17,6 @@ insert into num_1 select number * 2, toString(number * 2) from numbers(1e7); insert into num_2 select number * 3, -number from numbers(1.5e6); " -############## -echo -echo "simple join with analyzer" - -$CLICKHOUSE_CLIENT -q " -select * from (select key, value from num_1) l -inner join (select key, value from num_2) r on l.key = r.key -order by l.key limit 10 offset 700000 -SETTINGS allow_experimental_analyzer=1" - -############## -echo -echo "simple (global) join with analyzer and parallel replicas" - -$CLICKHOUSE_CLIENT -q " -select * from (select key, value from num_1) l -inner join (select key, value from num_2) r on l.key = r.key -order by l.key limit 10 offset 700000 -SETTINGS allow_experimental_analyzer=1, allow_experimental_parallel_reading_from_replicas = 2, -max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, -cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=0" - -$CLICKHOUSE_CLIENT -q " -select * from (select key, value from num_1) l -inner join (select key, value from num_2) r on l.key = r.key -order by l.key limit 10 offset 700000 -SETTINGS allow_experimental_analyzer=1, allow_experimental_parallel_reading_from_replicas = 2, send_logs_level='trace', -max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, -cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=0" 2>&1 | -grep "executeQuery\|.*Coordinator: Coordination done" | -grep -o "SELECT.*WithMergeableState)\|.*Coordinator: Coordination done" | -sed -re 's/_data_[[:digit:]]+_[[:digit:]]+/_data_/g' - -############## -echo -echo "simple (local) join with analyzer and parallel replicas" - -$CLICKHOUSE_CLIENT -q " -select * from (select key, value from num_1) l -inner join (select key, value from num_2) r on l.key = r.key -order by l.key limit 10 offset 700000 -SETTINGS allow_experimental_analyzer=1, -allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, -cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=1" - -$CLICKHOUSE_CLIENT -q " -select * from (select key, value from num_1) l -inner join (select key, value from num_2) r on l.key = r.key -order by l.key limit 10 offset 700000 -SETTINGS allow_experimental_analyzer=1, send_logs_level='trace', -allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, -cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=1" 2>&1 | -grep "executeQuery\|.*Coordinator: Coordination done" | -grep -o "SELECT.*WithMergeableState)\|.*Coordinator: Coordination done" | -sed -re 's/_data_[[:digit:]]+_[[:digit:]]+/_data_/g' - - -############## -echo -echo "simple (local) join with analyzer and parallel replicas and full sorting merge join" - -$CLICKHOUSE_CLIENT -q " -select * from (select key, value from num_1) l -inner join (select key, value from num_2) r on l.key = r.key -order by l.key limit 10 offset 700000 -SETTINGS allow_experimental_analyzer=1, join_algorithm='full_sorting_merge', -allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, -cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=1" - -$CLICKHOUSE_CLIENT -q " -select * from (select key, value from num_1) l -inner join (select key, value from num_2) r on l.key = r.key -order by l.key limit 10 offset 700000 -SETTINGS allow_experimental_analyzer=1, join_algorithm='full_sorting_merge', send_logs_level='trace', -allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, -cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=1" 2>&1 | -grep "executeQuery\|.*Coordinator: Coordination done" | -grep -o "SELECT.*WithMergeableState)\|.*Coordinator: Coordination done" | -sed -re 's/_data_[[:digit:]]+_[[:digit:]]+/_data_/g' - - -############## -echo -echo "nested join with analyzer" - -$CLICKHOUSE_CLIENT -q " -select * from (select key, value from num_1) l -inner join (select key, value from num_2 inner join - (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings parallel_replicas_prefer_local_join=1) r -on l.key = r.key order by l.key limit 10 offset 10000 -SETTINGS allow_experimental_analyzer=1" - - -############## -echo -echo "nested join with analyzer and parallel replicas, both local" - -$CLICKHOUSE_CLIENT -q " -select * from (select key, value from num_1) l -inner join (select key, value from num_2 inner join - (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings parallel_replicas_prefer_local_join=1) r -on l.key = r.key order by l.key limit 10 offset 10000 -SETTINGS allow_experimental_analyzer=1, -allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, -cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=1" - -$CLICKHOUSE_CLIENT -q " -select * from (select key, value from num_1) l -inner join (select key, value from num_2 inner join - (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings parallel_replicas_prefer_local_join=1) r -on l.key = r.key order by l.key limit 10 offset 10000 -SETTINGS allow_experimental_analyzer=1, join_algorithm='full_sorting_merge', send_logs_level='trace', -allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, -cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=1" 2>&1 | -grep "executeQuery\|.*Coordinator: Coordination done" | -grep -o "SELECT.*WithMergeableState)\|.*Coordinator: Coordination done" | -sed -re 's/_data_[[:digit:]]+_[[:digit:]]+/_data_/g' - - ############## echo echo "nested join with analyzer and parallel replicas, both global" From 091f9348187297738bc2cbb0d02b549312f171f9 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 31 Jul 2024 12:09:10 +0000 Subject: [PATCH 030/121] Backport #66983 to 24.7: Split 01508_partition_pruning_long --- .../01508_partition_pruning_long.sh | 30 ----- ... 01508_partition_pruning_long_1.reference} | 119 ------------------ .../01508_partition_pruning_long_1.sh | 79 ++++++++++++ .../01508_partition_pruning_long_2.reference | 119 ++++++++++++++++++ ...ries => 01508_partition_pruning_long_2.sh} | 80 +++++------- 5 files changed, 228 insertions(+), 199 deletions(-) delete mode 100755 tests/queries/0_stateless/01508_partition_pruning_long.sh rename tests/queries/0_stateless/{01508_partition_pruning_long.reference => 01508_partition_pruning_long_1.reference} (50%) create mode 100755 tests/queries/0_stateless/01508_partition_pruning_long_1.sh create mode 100644 tests/queries/0_stateless/01508_partition_pruning_long_2.reference rename tests/queries/0_stateless/{01508_partition_pruning_long.queries => 01508_partition_pruning_long_2.sh} (58%) mode change 100644 => 100755 diff --git a/tests/queries/0_stateless/01508_partition_pruning_long.sh b/tests/queries/0_stateless/01508_partition_pruning_long.sh deleted file mode 100755 index 7b56d8bbf03..00000000000 --- a/tests/queries/0_stateless/01508_partition_pruning_long.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/usr/bin/env bash -# Tags: long, no-polymorphic-parts, no-random-settings, no-random-merge-tree-settings, no-debug - -# Description of test result: -# Test the correctness of the partition pruning -# -# Script executes queries from a file 01508_partition_pruning_long.queries (1 line = 1 query) -# Queries are started with 'select' (but NOT with 'SELECT') are executed with log_level=debug - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - - -queries="${CURDIR}/01508_partition_pruning_long.queries" -while IFS= read -r sql -do - [ -z "$sql" ] && continue - if [[ "$sql" == select* ]] ; - then - echo "$sql" - ${CLICKHOUSE_CLIENT} --query "$sql" - CLICKHOUSE_CLIENT=$(echo ${CLICKHOUSE_CLIENT} | sed 's/'"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"'/--send_logs_level=debug/g') - ${CLICKHOUSE_CLIENT} --query "$sql" 2>&1 | grep -oh "Selected .* parts by partition key, *. parts by primary key, .* marks by primary key, .* marks to read from .* ranges.*$" - CLICKHOUSE_CLIENT=$(echo ${CLICKHOUSE_CLIENT} | sed 's/--send_logs_level=debug/'"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"'/g') - echo "" - else - ${CLICKHOUSE_CLIENT} --query "$sql" - fi -done < "$queries" diff --git a/tests/queries/0_stateless/01508_partition_pruning_long.reference b/tests/queries/0_stateless/01508_partition_pruning_long_1.reference similarity index 50% rename from tests/queries/0_stateless/01508_partition_pruning_long.reference rename to tests/queries/0_stateless/01508_partition_pruning_long_1.reference index afdb4257505..3ea4cc4f6ee 100644 --- a/tests/queries/0_stateless/01508_partition_pruning_long.reference +++ b/tests/queries/0_stateless/01508_partition_pruning_long_1.reference @@ -123,122 +123,3 @@ select uniqExact(_part), count() from tMM where toYYYYMM(d) between 202009 and 2 2 20000 Selected 2/3 parts by partition key, 2 parts by primary key, 2/2 marks by primary key, 2 marks to read from 2 ranges ---------- tDD ---------------------------- -select uniqExact(_part), count() from tDD where toDate(d)=toDate('2020-09-24'); -1 10000 -Selected 1/4 parts by partition key, 1 parts by primary key, 1/1 marks by primary key, 1 marks to read from 1 ranges - -select uniqExact(_part), count() FROM tDD WHERE toDate(d) = toDate('2020-09-24'); -1 10000 -Selected 1/4 parts by partition key, 1 parts by primary key, 1/1 marks by primary key, 1 marks to read from 1 ranges - -select uniqExact(_part), count() FROM tDD WHERE toDate(d) = '2020-09-24'; -1 10000 -Selected 1/4 parts by partition key, 1 parts by primary key, 1/1 marks by primary key, 1 marks to read from 1 ranges - -select uniqExact(_part), count() FROM tDD WHERE toDate(d) >= '2020-09-23' and toDate(d) <= '2020-09-26'; -3 40000 -Selected 3/4 parts by partition key, 3 parts by primary key, 4/4 marks by primary key, 4 marks to read from 3 ranges - -select uniqExact(_part), count() FROM tDD WHERE toYYYYMMDD(d) >= 20200923 and toDate(d) <= '2020-09-26'; -3 40000 -Selected 3/4 parts by partition key, 3 parts by primary key, 4/4 marks by primary key, 4 marks to read from 3 ranges - ---------- sDD ---------------------------- -select uniqExact(_part), count() from sDD; -6 30000 -Selected 6/6 parts by partition key, 6 parts by primary key, 6/6 marks by primary key, 6 marks to read from 6 ranges - -select uniqExact(_part), count() from sDD where toYYYYMM(toDateTime(intDiv(d,1000),'UTC')-1)+1 = 202010; -3 9999 -Selected 3/6 parts by partition key, 3 parts by primary key, 3/3 marks by primary key, 3 marks to read from 3 ranges - -select uniqExact(_part), count() from sDD where toYYYYMM(toDateTime(intDiv(d,1000),'UTC')-1) = 202010; -2 9999 -Selected 2/6 parts by partition key, 2 parts by primary key, 2/2 marks by primary key, 2 marks to read from 2 ranges - -select uniqExact(_part), count() from sDD where toYYYYMM(toDateTime(intDiv(d,1000),'UTC')-1) = 202110; -0 0 -Selected 0/6 parts by partition key, 0 parts by primary key, 0/0 marks by primary key, 0 marks to read from 0 ranges - -select uniqExact(_part), count() from sDD where toYYYYMM(toDateTime(intDiv(d,1000),'UTC'))+1 > 202009 and toStartOfDay(toDateTime(intDiv(d,1000),'UTC')) < toDateTime('2020-10-02 00:00:00','UTC'); -3 11440 -Selected 3/6 parts by partition key, 3 parts by primary key, 3/3 marks by primary key, 3 marks to read from 3 ranges - -select uniqExact(_part), count() from sDD where toYYYYMM(toDateTime(intDiv(d,1000),'UTC'))+1 > 202009 and toDateTime(intDiv(d,1000),'UTC') < toDateTime('2020-10-01 00:00:00','UTC'); -2 10000 -Selected 2/6 parts by partition key, 2 parts by primary key, 2/2 marks by primary key, 2 marks to read from 2 ranges - -select uniqExact(_part), count() from sDD where d >= 1598918400000; -4 20000 -Selected 4/6 parts by partition key, 4 parts by primary key, 4/4 marks by primary key, 4 marks to read from 4 ranges - -select uniqExact(_part), count() from sDD where d >= 1598918400000 and toYYYYMM(toDateTime(intDiv(d,1000),'UTC')-1) < 202010; -3 10001 -Selected 3/6 parts by partition key, 3 parts by primary key, 3/3 marks by primary key, 3 marks to read from 3 ranges - ---------- xMM ---------------------------- -select uniqExact(_part), count() from xMM where toStartOfDay(d) >= '2020-10-01 00:00:00'; -2 10000 -Selected 2/6 parts by partition key, 2 parts by primary key, 2/2 marks by primary key, 2 marks to read from 2 ranges - -select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d <= '2020-10-01 00:00:00'; -3 10001 -Selected 3/6 parts by partition key, 3 parts by primary key, 3/3 marks by primary key, 3 marks to read from 3 ranges - -select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d < '2020-10-01 00:00:00'; -2 10000 -Selected 2/6 parts by partition key, 2 parts by primary key, 2/2 marks by primary key, 2 marks to read from 2 ranges - -select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d <= '2020-10-01 00:00:00' and a=1; -1 1 -Selected 1/6 parts by partition key, 1 parts by primary key, 1/1 marks by primary key, 1 marks to read from 1 ranges - -select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d <= '2020-10-01 00:00:00' and a<>3; -2 5001 -Selected 2/6 parts by partition key, 2 parts by primary key, 2/2 marks by primary key, 2 marks to read from 2 ranges - -select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d < '2020-10-01 00:00:00' and a<>3; -1 5000 -Selected 1/6 parts by partition key, 1 parts by primary key, 1/1 marks by primary key, 1 marks to read from 1 ranges - -select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d < '2020-11-01 00:00:00' and a = 1; -2 10000 -Selected 2/6 parts by partition key, 2 parts by primary key, 2/2 marks by primary key, 2 marks to read from 2 ranges - -select uniqExact(_part), count() from xMM where a = 1; -3 15000 -Selected 3/6 parts by partition key, 3 parts by primary key, 3/3 marks by primary key, 3 marks to read from 3 ranges - -select uniqExact(_part), count() from xMM where a = 66; -0 0 -Selected 0/6 parts by partition key, 0 parts by primary key, 0/0 marks by primary key, 0 marks to read from 0 ranges - -select uniqExact(_part), count() from xMM where a <> 66; -6 30000 -Selected 6/6 parts by partition key, 6 parts by primary key, 6/6 marks by primary key, 6 marks to read from 6 ranges - -select uniqExact(_part), count() from xMM where a = 2; -2 10000 -Selected 2/6 parts by partition key, 2 parts by primary key, 2/2 marks by primary key, 2 marks to read from 2 ranges - -select uniqExact(_part), count() from xMM where a = 1; -2 15000 -Selected 2/5 parts by partition key, 2 parts by primary key, 2/2 marks by primary key, 2 marks to read from 2 ranges - -select uniqExact(_part), count() from xMM where toStartOfDay(d) >= '2020-10-01 00:00:00'; -1 10000 -Selected 1/5 parts by partition key, 1 parts by primary key, 1/1 marks by primary key, 1 marks to read from 1 ranges - -select uniqExact(_part), count() from xMM where a <> 66; -5 30000 -Selected 5/5 parts by partition key, 5 parts by primary key, 5/5 marks by primary key, 5 marks to read from 5 ranges - -select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d <= '2020-10-01 00:00:00' and a<>3; -2 5001 -Selected 2/5 parts by partition key, 2 parts by primary key, 2/2 marks by primary key, 2 marks to read from 2 ranges - -select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d < '2020-10-01 00:00:00' and a<>3; -1 5000 -Selected 1/5 parts by partition key, 1 parts by primary key, 1/1 marks by primary key, 1 marks to read from 1 ranges - diff --git a/tests/queries/0_stateless/01508_partition_pruning_long_1.sh b/tests/queries/0_stateless/01508_partition_pruning_long_1.sh new file mode 100755 index 00000000000..512cf8f5265 --- /dev/null +++ b/tests/queries/0_stateless/01508_partition_pruning_long_1.sh @@ -0,0 +1,79 @@ +#!/usr/bin/env bash +# Tags: long, no-polymorphic-parts, no-random-settings, no-random-merge-tree-settings, no-debug + +# Description of test result: +# Test the correctness of the partition pruning +# +# Script executes queries from a file 01508_partition_pruning_long.queries (1 line = 1 query) +# Queries are started with 'select' (but NOT with 'SELECT') are executed with log_level=debug + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +while IFS= read -r sql +do + [ -z "$sql" ] && continue + if [[ "$sql" == select* ]] ; + then + echo "$sql" + ${CLICKHOUSE_CLIENT} --query "$sql" + CLICKHOUSE_CLIENT=$(echo ${CLICKHOUSE_CLIENT} | sed 's/'"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"'/--send_logs_level=debug/g') + ${CLICKHOUSE_CLIENT} --query "$sql" 2>&1 | grep -oh "Selected .* parts by partition key, *. parts by primary key, .* marks by primary key, .* marks to read from .* ranges.*$" + CLICKHOUSE_CLIENT=$(echo ${CLICKHOUSE_CLIENT} | sed 's/--send_logs_level=debug/'"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"'/g') + echo "" + else + ${CLICKHOUSE_CLIENT} --query "$sql" + fi +done <<< " +DROP TABLE IF EXISTS tMM; + +CREATE TABLE tMM(d DateTime('Asia/Istanbul'), a Int64) ENGINE = MergeTree PARTITION BY toYYYYMM(d) ORDER BY tuple() SETTINGS index_granularity = 8192; +SYSTEM STOP MERGES tMM; +INSERT INTO tMM SELECT toDateTime('2020-08-16 00:00:00', 'Asia/Istanbul') + number*60, number FROM numbers(5000); +INSERT INTO tMM SELECT toDateTime('2020-08-16 00:00:00', 'Asia/Istanbul') + number*60, number FROM numbers(5000); +INSERT INTO tMM SELECT toDateTime('2020-09-01 00:00:00', 'Asia/Istanbul') + number*60, number FROM numbers(5000); +INSERT INTO tMM SELECT toDateTime('2020-09-01 00:00:00', 'Asia/Istanbul') + number*60, number FROM numbers(5000); +INSERT INTO tMM SELECT toDateTime('2020-10-01 00:00:00', 'Asia/Istanbul') + number*60, number FROM numbers(5000); +INSERT INTO tMM SELECT toDateTime('2020-10-15 00:00:00', 'Asia/Istanbul') + number*60, number FROM numbers(5000); + +SELECT '--------- tMM ----------------------------'; +select uniqExact(_part), count() from tMM where toDate(d)=toDate('2020-09-15'); +select uniqExact(_part), count() from tMM where toDate(d)=toDate('2020-09-01'); +select uniqExact(_part), count() from tMM where toDate(d)=toDate('2020-10-15'); +select uniqExact(_part), count() from tMM where toDate(d)='2020-09-15'; +select uniqExact(_part), count() from tMM where toYYYYMM(d)=202009; +select uniqExact(_part), count() from tMM where toYYYYMMDD(d)=20200816; +select uniqExact(_part), count() from tMM where toYYYYMMDD(d)=20201015; +select uniqExact(_part), count() from tMM where toDate(d)='2020-10-15'; +select uniqExact(_part), count() from tMM where d >= '2020-09-01 00:00:00' and d<'2020-10-15 00:00:00'; +select uniqExact(_part), count() from tMM where d >= '2020-01-16 00:00:00' and d < toDateTime('2021-08-17 00:00:00', 'Asia/Istanbul'); +select uniqExact(_part), count() from tMM where d >= '2020-09-16 00:00:00' and d < toDateTime('2020-10-01 00:00:00', 'Asia/Istanbul'); +select uniqExact(_part), count() from tMM where d >= '2020-09-12 00:00:00' and d < '2020-10-16 00:00:00'; +select uniqExact(_part), count() from tMM where toStartOfDay(d) >= '2020-09-12 00:00:00'; +select uniqExact(_part), count() from tMM where toStartOfDay(d) = '2020-09-01 00:00:00'; +select uniqExact(_part), count() from tMM where toStartOfDay(d) = '2020-10-01 00:00:00'; +select uniqExact(_part), count() from tMM where toStartOfDay(d) >= '2020-09-15 00:00:00' and d < '2020-10-16 00:00:00'; +select uniqExact(_part), count() from tMM where toYYYYMM(d) between 202009 and 202010; +select uniqExact(_part), count() from tMM where toYYYYMM(d) between 202009 and 202009; +select uniqExact(_part), count() from tMM where toYYYYMM(d) between 202009 and 202010 and toStartOfDay(d) = '2020-10-01 00:00:00'; +select uniqExact(_part), count() from tMM where toYYYYMM(d) >= 202009 and toStartOfDay(d) < '2020-10-02 00:00:00'; +select uniqExact(_part), count() from tMM where toYYYYMM(d) > 202009 and toStartOfDay(d) < '2020-10-02 00:00:00'; +select uniqExact(_part), count() from tMM where toYYYYMM(d)+1 > 202009 and toStartOfDay(d) < '2020-10-02 00:00:00'; +select uniqExact(_part), count() from tMM where toYYYYMM(d)+1 > 202010 and toStartOfDay(d) < '2020-10-02 00:00:00'; +select uniqExact(_part), count() from tMM where toYYYYMM(d)+1 > 202010; +select uniqExact(_part), count() from tMM where toYYYYMM(d-1)+1 = 202010; +select uniqExact(_part), count() from tMM where toStartOfMonth(d) >= '2020-09-15'; +select uniqExact(_part), count() from tMM where toStartOfMonth(d) >= '2020-09-01'; +select uniqExact(_part), count() from tMM where toStartOfMonth(d) >= '2020-09-01' and toStartOfMonth(d) < '2020-10-01'; + +SYSTEM START MERGES tMM; +OPTIMIZE TABLE tMM FINAL; + +select uniqExact(_part), count() from tMM where toYYYYMM(d-1)+1 = 202010; +select uniqExact(_part), count() from tMM where toYYYYMM(d)+1 > 202010; +select uniqExact(_part), count() from tMM where toYYYYMM(d) between 202009 and 202010; + +DROP TABLE tMM; +" diff --git a/tests/queries/0_stateless/01508_partition_pruning_long_2.reference b/tests/queries/0_stateless/01508_partition_pruning_long_2.reference new file mode 100644 index 00000000000..bc767f17031 --- /dev/null +++ b/tests/queries/0_stateless/01508_partition_pruning_long_2.reference @@ -0,0 +1,119 @@ +--------- tDD ---------------------------- +select uniqExact(_part), count() from tDD where toDate(d)=toDate('2020-09-24'); +1 10000 +Selected 1/4 parts by partition key, 1 parts by primary key, 1/1 marks by primary key, 1 marks to read from 1 ranges + +select uniqExact(_part), count() FROM tDD WHERE toDate(d) = toDate('2020-09-24'); +1 10000 +Selected 1/4 parts by partition key, 1 parts by primary key, 1/1 marks by primary key, 1 marks to read from 1 ranges + +select uniqExact(_part), count() FROM tDD WHERE toDate(d) = '2020-09-24'; +1 10000 +Selected 1/4 parts by partition key, 1 parts by primary key, 1/1 marks by primary key, 1 marks to read from 1 ranges + +select uniqExact(_part), count() FROM tDD WHERE toDate(d) >= '2020-09-23' and toDate(d) <= '2020-09-26'; +3 40000 +Selected 3/4 parts by partition key, 3 parts by primary key, 4/4 marks by primary key, 4 marks to read from 3 ranges + +select uniqExact(_part), count() FROM tDD WHERE toYYYYMMDD(d) >= 20200923 and toDate(d) <= '2020-09-26'; +3 40000 +Selected 3/4 parts by partition key, 3 parts by primary key, 4/4 marks by primary key, 4 marks to read from 3 ranges + +--------- sDD ---------------------------- +select uniqExact(_part), count() from sDD; +6 30000 +Selected 6/6 parts by partition key, 6 parts by primary key, 6/6 marks by primary key, 6 marks to read from 6 ranges + +select uniqExact(_part), count() from sDD where toYYYYMM(toDateTime(intDiv(d,1000),'UTC')-1)+1 = 202010; +3 9999 +Selected 3/6 parts by partition key, 3 parts by primary key, 3/3 marks by primary key, 3 marks to read from 3 ranges + +select uniqExact(_part), count() from sDD where toYYYYMM(toDateTime(intDiv(d,1000),'UTC')-1) = 202010; +2 9999 +Selected 2/6 parts by partition key, 2 parts by primary key, 2/2 marks by primary key, 2 marks to read from 2 ranges + +select uniqExact(_part), count() from sDD where toYYYYMM(toDateTime(intDiv(d,1000),'UTC')-1) = 202110; +0 0 +Selected 0/6 parts by partition key, 0 parts by primary key, 0/0 marks by primary key, 0 marks to read from 0 ranges + +select uniqExact(_part), count() from sDD where toYYYYMM(toDateTime(intDiv(d,1000),'UTC'))+1 > 202009 and toStartOfDay(toDateTime(intDiv(d,1000),'UTC')) < toDateTime('2020-10-02 00:00:00','UTC'); +3 11440 +Selected 3/6 parts by partition key, 3 parts by primary key, 3/3 marks by primary key, 3 marks to read from 3 ranges + +select uniqExact(_part), count() from sDD where toYYYYMM(toDateTime(intDiv(d,1000),'UTC'))+1 > 202009 and toDateTime(intDiv(d,1000),'UTC') < toDateTime('2020-10-01 00:00:00','UTC'); +2 10000 +Selected 2/6 parts by partition key, 2 parts by primary key, 2/2 marks by primary key, 2 marks to read from 2 ranges + +select uniqExact(_part), count() from sDD where d >= 1598918400000; +4 20000 +Selected 4/6 parts by partition key, 4 parts by primary key, 4/4 marks by primary key, 4 marks to read from 4 ranges + +select uniqExact(_part), count() from sDD where d >= 1598918400000 and toYYYYMM(toDateTime(intDiv(d,1000),'UTC')-1) < 202010; +3 10001 +Selected 3/6 parts by partition key, 3 parts by primary key, 3/3 marks by primary key, 3 marks to read from 3 ranges + +--------- xMM ---------------------------- +select uniqExact(_part), count() from xMM where toStartOfDay(d) >= '2020-10-01 00:00:00'; +2 10000 +Selected 2/6 parts by partition key, 2 parts by primary key, 2/2 marks by primary key, 2 marks to read from 2 ranges + +select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d <= '2020-10-01 00:00:00'; +3 10001 +Selected 3/6 parts by partition key, 3 parts by primary key, 3/3 marks by primary key, 3 marks to read from 3 ranges + +select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d < '2020-10-01 00:00:00'; +2 10000 +Selected 2/6 parts by partition key, 2 parts by primary key, 2/2 marks by primary key, 2 marks to read from 2 ranges + +select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d <= '2020-10-01 00:00:00' and a=1; +1 1 +Selected 1/6 parts by partition key, 1 parts by primary key, 1/1 marks by primary key, 1 marks to read from 1 ranges + +select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d <= '2020-10-01 00:00:00' and a<>3; +2 5001 +Selected 2/6 parts by partition key, 2 parts by primary key, 2/2 marks by primary key, 2 marks to read from 2 ranges + +select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d < '2020-10-01 00:00:00' and a<>3; +1 5000 +Selected 1/6 parts by partition key, 1 parts by primary key, 1/1 marks by primary key, 1 marks to read from 1 ranges + +select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d < '2020-11-01 00:00:00' and a = 1; +2 10000 +Selected 2/6 parts by partition key, 2 parts by primary key, 2/2 marks by primary key, 2 marks to read from 2 ranges + +select uniqExact(_part), count() from xMM where a = 1; +3 15000 +Selected 3/6 parts by partition key, 3 parts by primary key, 3/3 marks by primary key, 3 marks to read from 3 ranges + +select uniqExact(_part), count() from xMM where a = 66; +0 0 +Selected 0/6 parts by partition key, 0 parts by primary key, 0/0 marks by primary key, 0 marks to read from 0 ranges + +select uniqExact(_part), count() from xMM where a <> 66; +6 30000 +Selected 6/6 parts by partition key, 6 parts by primary key, 6/6 marks by primary key, 6 marks to read from 6 ranges + +select uniqExact(_part), count() from xMM where a = 2; +2 10000 +Selected 2/6 parts by partition key, 2 parts by primary key, 2/2 marks by primary key, 2 marks to read from 2 ranges + +select uniqExact(_part), count() from xMM where a = 1; +2 15000 +Selected 2/5 parts by partition key, 2 parts by primary key, 2/2 marks by primary key, 2 marks to read from 2 ranges + +select uniqExact(_part), count() from xMM where toStartOfDay(d) >= '2020-10-01 00:00:00'; +1 10000 +Selected 1/5 parts by partition key, 1 parts by primary key, 1/1 marks by primary key, 1 marks to read from 1 ranges + +select uniqExact(_part), count() from xMM where a <> 66; +5 30000 +Selected 5/5 parts by partition key, 5 parts by primary key, 5/5 marks by primary key, 5 marks to read from 5 ranges + +select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d <= '2020-10-01 00:00:00' and a<>3; +2 5001 +Selected 2/5 parts by partition key, 2 parts by primary key, 2/2 marks by primary key, 2 marks to read from 2 ranges + +select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d < '2020-10-01 00:00:00' and a<>3; +1 5000 +Selected 1/5 parts by partition key, 1 parts by primary key, 1/1 marks by primary key, 1 marks to read from 1 ranges + diff --git a/tests/queries/0_stateless/01508_partition_pruning_long.queries b/tests/queries/0_stateless/01508_partition_pruning_long_2.sh old mode 100644 new mode 100755 similarity index 58% rename from tests/queries/0_stateless/01508_partition_pruning_long.queries rename to tests/queries/0_stateless/01508_partition_pruning_long_2.sh index 0d64fc05f0f..3f8a89bdb20 --- a/tests/queries/0_stateless/01508_partition_pruning_long.queries +++ b/tests/queries/0_stateless/01508_partition_pruning_long_2.sh @@ -1,15 +1,35 @@ -DROP TABLE IF EXISTS tMM; +#!/usr/bin/env bash +# Tags: long, no-polymorphic-parts, no-random-settings, no-random-merge-tree-settings, no-debug + +# Description of test result: +# Test the correctness of the partition pruning +# +# Script executes queries from a file 01508_partition_pruning_long.queries (1 line = 1 query) +# Queries are started with 'select' (but NOT with 'SELECT') are executed with log_level=debug + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +while IFS= read -r sql +do + [ -z "$sql" ] && continue + if [[ "$sql" == select* ]] ; + then + echo "$sql" + ${CLICKHOUSE_CLIENT} --query "$sql" + CLICKHOUSE_CLIENT=$(echo ${CLICKHOUSE_CLIENT} | sed 's/'"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"'/--send_logs_level=debug/g') + ${CLICKHOUSE_CLIENT} --query "$sql" 2>&1 | grep -oh "Selected .* parts by partition key, *. parts by primary key, .* marks by primary key, .* marks to read from .* ranges.*$" + CLICKHOUSE_CLIENT=$(echo ${CLICKHOUSE_CLIENT} | sed 's/--send_logs_level=debug/'"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"'/g') + echo "" + else + ${CLICKHOUSE_CLIENT} --query "$sql" + fi +done <<< " DROP TABLE IF EXISTS tDD; DROP TABLE IF EXISTS sDD; DROP TABLE IF EXISTS xMM; -CREATE TABLE tMM(d DateTime('Asia/Istanbul'), a Int64) ENGINE = MergeTree PARTITION BY toYYYYMM(d) ORDER BY tuple() SETTINGS index_granularity = 8192; -SYSTEM STOP MERGES tMM; -INSERT INTO tMM SELECT toDateTime('2020-08-16 00:00:00', 'Asia/Istanbul') + number*60, number FROM numbers(5000); -INSERT INTO tMM SELECT toDateTime('2020-08-16 00:00:00', 'Asia/Istanbul') + number*60, number FROM numbers(5000); -INSERT INTO tMM SELECT toDateTime('2020-09-01 00:00:00', 'Asia/Istanbul') + number*60, number FROM numbers(5000); -INSERT INTO tMM SELECT toDateTime('2020-09-01 00:00:00', 'Asia/Istanbul') + number*60, number FROM numbers(5000); -INSERT INTO tMM SELECT toDateTime('2020-10-01 00:00:00', 'Asia/Istanbul') + number*60, number FROM numbers(5000); -INSERT INTO tMM SELECT toDateTime('2020-10-15 00:00:00', 'Asia/Istanbul') + number*60, number FROM numbers(5000); CREATE TABLE tDD(d DateTime('Asia/Istanbul'),a Int) ENGINE = MergeTree PARTITION BY toYYYYMMDD(d) ORDER BY tuple() SETTINGS index_granularity = 8192; SYSTEM STOP MERGES tDD; @@ -34,44 +54,6 @@ INSERT INTO xMM SELECT toDateTime('2020-10-01 00:00:00', 'Asia/Istanbul') + numb INSERT INTO xMM SELECT toDateTime('2020-10-15 00:00:00', 'Asia/Istanbul') + number*60, 1, number FROM numbers(5000); -SELECT '--------- tMM ----------------------------'; -select uniqExact(_part), count() from tMM where toDate(d)=toDate('2020-09-15'); -select uniqExact(_part), count() from tMM where toDate(d)=toDate('2020-09-01'); -select uniqExact(_part), count() from tMM where toDate(d)=toDate('2020-10-15'); -select uniqExact(_part), count() from tMM where toDate(d)='2020-09-15'; -select uniqExact(_part), count() from tMM where toYYYYMM(d)=202009; -select uniqExact(_part), count() from tMM where toYYYYMMDD(d)=20200816; -select uniqExact(_part), count() from tMM where toYYYYMMDD(d)=20201015; -select uniqExact(_part), count() from tMM where toDate(d)='2020-10-15'; -select uniqExact(_part), count() from tMM where d >= '2020-09-01 00:00:00' and d<'2020-10-15 00:00:00'; -select uniqExact(_part), count() from tMM where d >= '2020-01-16 00:00:00' and d < toDateTime('2021-08-17 00:00:00', 'Asia/Istanbul'); -select uniqExact(_part), count() from tMM where d >= '2020-09-16 00:00:00' and d < toDateTime('2020-10-01 00:00:00', 'Asia/Istanbul'); -select uniqExact(_part), count() from tMM where d >= '2020-09-12 00:00:00' and d < '2020-10-16 00:00:00'; -select uniqExact(_part), count() from tMM where toStartOfDay(d) >= '2020-09-12 00:00:00'; -select uniqExact(_part), count() from tMM where toStartOfDay(d) = '2020-09-01 00:00:00'; -select uniqExact(_part), count() from tMM where toStartOfDay(d) = '2020-10-01 00:00:00'; -select uniqExact(_part), count() from tMM where toStartOfDay(d) >= '2020-09-15 00:00:00' and d < '2020-10-16 00:00:00'; -select uniqExact(_part), count() from tMM where toYYYYMM(d) between 202009 and 202010; -select uniqExact(_part), count() from tMM where toYYYYMM(d) between 202009 and 202009; -select uniqExact(_part), count() from tMM where toYYYYMM(d) between 202009 and 202010 and toStartOfDay(d) = '2020-10-01 00:00:00'; -select uniqExact(_part), count() from tMM where toYYYYMM(d) >= 202009 and toStartOfDay(d) < '2020-10-02 00:00:00'; -select uniqExact(_part), count() from tMM where toYYYYMM(d) > 202009 and toStartOfDay(d) < '2020-10-02 00:00:00'; -select uniqExact(_part), count() from tMM where toYYYYMM(d)+1 > 202009 and toStartOfDay(d) < '2020-10-02 00:00:00'; -select uniqExact(_part), count() from tMM where toYYYYMM(d)+1 > 202010 and toStartOfDay(d) < '2020-10-02 00:00:00'; -select uniqExact(_part), count() from tMM where toYYYYMM(d)+1 > 202010; -select uniqExact(_part), count() from tMM where toYYYYMM(d-1)+1 = 202010; -select uniqExact(_part), count() from tMM where toStartOfMonth(d) >= '2020-09-15'; -select uniqExact(_part), count() from tMM where toStartOfMonth(d) >= '2020-09-01'; -select uniqExact(_part), count() from tMM where toStartOfMonth(d) >= '2020-09-01' and toStartOfMonth(d) < '2020-10-01'; - -SYSTEM START MERGES tMM; -OPTIMIZE TABLE tMM FINAL; - -select uniqExact(_part), count() from tMM where toYYYYMM(d-1)+1 = 202010; -select uniqExact(_part), count() from tMM where toYYYYMM(d)+1 > 202010; -select uniqExact(_part), count() from tMM where toYYYYMM(d) between 202009 and 202010; - - SELECT '--------- tDD ----------------------------'; SYSTEM START MERGES tDD; OPTIMIZE TABLE tDD FINAL; @@ -116,9 +98,7 @@ select uniqExact(_part), count() from xMM where a <> 66; select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d <= '2020-10-01 00:00:00' and a<>3; select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d < '2020-10-01 00:00:00' and a<>3; -DROP TABLE tMM; DROP TABLE tDD; DROP TABLE sDD; DROP TABLE xMM; - - +" From 0ce87f2c63548f41cf81bdff75a83b78cb7d7bb0 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 31 Jul 2024 12:09:30 +0000 Subject: [PATCH 031/121] Backport #66954 to 24.7: Split slow test 03036_dynamic_read_subcolumns --- ...s.sh => 03036_dynamic_read_subcolumns.lib} | 27 --------- .../03036_dynamic_read_subcolumns.reference | 57 ------------------- .../03036_dynamic_read_subcolumns_1.reference | 19 +++++++ .../03036_dynamic_read_subcolumns_1.sh | 21 +++++++ .../03036_dynamic_read_subcolumns_2.reference | 19 +++++++ .../03036_dynamic_read_subcolumns_2.sh | 21 +++++++ .../03036_dynamic_read_subcolumns_3.reference | 19 +++++++ .../03036_dynamic_read_subcolumns_3.sh | 21 +++++++ 8 files changed, 120 insertions(+), 84 deletions(-) rename tests/queries/0_stateless/{03036_dynamic_read_subcolumns.sh => 03036_dynamic_read_subcolumns.lib} (79%) delete mode 100644 tests/queries/0_stateless/03036_dynamic_read_subcolumns.reference create mode 100644 tests/queries/0_stateless/03036_dynamic_read_subcolumns_1.reference create mode 100755 tests/queries/0_stateless/03036_dynamic_read_subcolumns_1.sh create mode 100644 tests/queries/0_stateless/03036_dynamic_read_subcolumns_2.reference create mode 100755 tests/queries/0_stateless/03036_dynamic_read_subcolumns_2.sh create mode 100644 tests/queries/0_stateless/03036_dynamic_read_subcolumns_3.reference create mode 100755 tests/queries/0_stateless/03036_dynamic_read_subcolumns_3.sh diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh b/tests/queries/0_stateless/03036_dynamic_read_subcolumns.lib similarity index 79% rename from tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh rename to tests/queries/0_stateless/03036_dynamic_read_subcolumns.lib index 65517061b99..4914051db82 100755 --- a/tests/queries/0_stateless/03036_dynamic_read_subcolumns.sh +++ b/tests/queries/0_stateless/03036_dynamic_read_subcolumns.lib @@ -1,14 +1,4 @@ #!/usr/bin/env bash -# Tags: long - -CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# reset --log_comment -CLICKHOUSE_LOG_COMMENT= -# shellcheck source=../shell_config.sh -. "$CUR_DIR"/../shell_config.sh - -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1" - function test() { @@ -43,20 +33,3 @@ function test() $CH_CLIENT -q "select d.\`Array(Dynamic)\`.\`Variant(String, UInt64)\`.UInt64, d.\`Array(Dynamic)\`.size0, d.\`Array(Variant(String, UInt64))\`.UInt64 from test format Null" $CH_CLIENT -q "select d.\`Array(Array(Dynamic))\`.size1, d.\`Array(Array(Dynamic))\`.UInt64, d.\`Array(Array(Dynamic))\`.\`Map(String, Tuple(a UInt64))\`.values.a from test format Null" } - -$CH_CLIENT -q "drop table if exists test;" - -echo "Memory" -$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=Memory" -test -$CH_CLIENT -q "drop table test;" - -echo "MergeTree compact" -$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000;" -test -$CH_CLIENT -q "drop table test;" - -echo "MergeTree wide" -$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" -test -$CH_CLIENT -q "drop table test;" diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns.reference b/tests/queries/0_stateless/03036_dynamic_read_subcolumns.reference deleted file mode 100644 index 36984bc8b9b..00000000000 --- a/tests/queries/0_stateless/03036_dynamic_read_subcolumns.reference +++ /dev/null @@ -1,57 +0,0 @@ -Memory -test -Array(Array(Dynamic)) -Array(Variant(String, UInt64)) -None -String -UInt64 -200000 -200000 -200000 -200000 -0 -0 -200000 -200000 -100000 -100000 -200000 -0 -MergeTree compact -test -Array(Array(Dynamic)) -Array(Variant(String, UInt64)) -None -String -UInt64 -200000 -200000 -200000 -200000 -0 -0 -200000 -200000 -100000 -100000 -200000 -0 -MergeTree wide -test -Array(Array(Dynamic)) -Array(Variant(String, UInt64)) -None -String -UInt64 -200000 -200000 -200000 -200000 -0 -0 -200000 -200000 -100000 -100000 -200000 -0 diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_1.reference b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_1.reference new file mode 100644 index 00000000000..0d51ecfac3b --- /dev/null +++ b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_1.reference @@ -0,0 +1,19 @@ +Memory +test +Array(Array(Dynamic)) +Array(Variant(String, UInt64)) +None +String +UInt64 +200000 +200000 +200000 +200000 +0 +0 +200000 +200000 +100000 +100000 +200000 +0 diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_1.sh b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_1.sh new file mode 100755 index 00000000000..aabba731816 --- /dev/null +++ b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_1.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + + +# shellcheck source=./03036_dynamic_read_subcolumns.lib +. "$CUR_DIR"/03036_dynamic_read_subcolumns.lib + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1" + +$CH_CLIENT -q "drop table if exists test;" + +echo "Memory" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=Memory" +test +$CH_CLIENT -q "drop table test;" diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_2.reference b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_2.reference new file mode 100644 index 00000000000..099b7574566 --- /dev/null +++ b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_2.reference @@ -0,0 +1,19 @@ +MergeTree compact +test +Array(Array(Dynamic)) +Array(Variant(String, UInt64)) +None +String +UInt64 +200000 +200000 +200000 +200000 +0 +0 +200000 +200000 +100000 +100000 +200000 +0 diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_2.sh b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_2.sh new file mode 100755 index 00000000000..872f4c20a98 --- /dev/null +++ b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_2.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + + +# shellcheck source=./03036_dynamic_read_subcolumns.lib +. "$CUR_DIR"/03036_dynamic_read_subcolumns.lib + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1" + +$CH_CLIENT -q "drop table if exists test;" + +echo "MergeTree compact" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000;" +test +$CH_CLIENT -q "drop table test;" diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_3.reference b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_3.reference new file mode 100644 index 00000000000..35db4a22b4c --- /dev/null +++ b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_3.reference @@ -0,0 +1,19 @@ +MergeTree wide +test +Array(Array(Dynamic)) +Array(Variant(String, UInt64)) +None +String +UInt64 +200000 +200000 +200000 +200000 +0 +0 +200000 +200000 +100000 +100000 +200000 +0 diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_3.sh b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_3.sh new file mode 100755 index 00000000000..96276c96add --- /dev/null +++ b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_3.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash +# Tags: long + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# reset --log_comment +CLICKHOUSE_LOG_COMMENT= +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + + +# shellcheck source=./03036_dynamic_read_subcolumns.lib +. "$CUR_DIR"/03036_dynamic_read_subcolumns.lib + +CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1" + +$CH_CLIENT -q "drop table if exists test;" + +echo "MergeTree wide" +$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" +test +$CH_CLIENT -q "drop table test;" From b9a269bafffa4311c5489ab2983720e2cb410f13 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 31 Jul 2024 14:06:42 +0000 Subject: [PATCH 032/121] Backport #67185 to 24.7: Reduce max time of 00763_long_lock_buffer_alter_destination_table --- ...ong_lock_buffer_alter_destination_table.sh | 33 +++++++++++++++---- 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/tests/queries/0_stateless/00763_long_lock_buffer_alter_destination_table.sh b/tests/queries/0_stateless/00763_long_lock_buffer_alter_destination_table.sh index 7e2384cfc52..c12b4426740 100755 --- a/tests/queries/0_stateless/00763_long_lock_buffer_alter_destination_table.sh +++ b/tests/queries/0_stateless/00763_long_lock_buffer_alter_destination_table.sh @@ -16,18 +16,39 @@ ${CLICKHOUSE_CLIENT} --query="CREATE TABLE buffer_00763_1 (s String) ENGINE = Bu ${CLICKHOUSE_CLIENT} --query="CREATE TABLE mt_00763_1 (x UInt32, s String) ENGINE = MergeTree ORDER BY x" ${CLICKHOUSE_CLIENT} --query="INSERT INTO mt_00763_1 VALUES (1, '1'), (2, '2'), (3, '3')" -function thread1() +function thread_alter() { - seq 1 300 | sed -r -e 's/.+/ALTER TABLE mt_00763_1 MODIFY column s UInt32; ALTER TABLE mt_00763_1 MODIFY column s String;/' | ${CLICKHOUSE_CLIENT} --multiquery --ignore-error ||: + local TIMELIMIT=$((SECONDS+$1)) + local it=0 + while [ $SECONDS -lt "$TIMELIMIT" ] && [ $it -lt 300 ]; + do + it=$((it+1)) + $CLICKHOUSE_CLIENT --multiquery --ignore-error -q " + ALTER TABLE mt_00763_1 MODIFY column s UInt32; + ALTER TABLE mt_00763_1 MODIFY column s String; + " ||: + done } -function thread2() +function thread_query() { - seq 1 2000 | sed -r -e 's/.+/SELECT sum(length(s)) FROM buffer_00763_1;/' | ${CLICKHOUSE_CLIENT} --multiquery --ignore-error 2>&1 | grep -vP '(^3$|^Received exception from server|^Code: 473)' + local TIMELIMIT=$((SECONDS+$1)) + local it=0 + while [ $SECONDS -lt "$TIMELIMIT" ] && [ $it -lt 2000 ]; + do + it=$((it+1)) + $CLICKHOUSE_CLIENT --multiquery --ignore-error -q " + SELECT sum(length(s)) FROM buffer_00763_1; + " 2>&1 | grep -vP '(^3$|^Received exception from server|^Code: 473)' + done } -thread1 & -thread2 & +export -f thread_alter +export -f thread_query + +TIMEOUT=30 +thread_alter $TIMEOUT & +thread_query $TIMEOUT & wait From 270a2af7d6c324d6a8b0b06fbaa09ea0bbbab44f Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 31 Jul 2024 14:07:03 +0000 Subject: [PATCH 033/121] Backport #67175 to 24.7: Support set orc reader time zone name --- src/Core/Settings.h | 2 +- src/Core/SettingsChangesHistory.cpp | 2 +- src/Formats/FormatFactory.cpp | 2 +- src/Formats/FormatSettings.h | 2 +- .../Formats/Impl/NativeORCBlockInputFormat.cpp | 6 +----- tests/queries/0_stateless/03198_orc_read_time_zone.sh | 10 +++++----- 6 files changed, 10 insertions(+), 14 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index de6af441205..5e1efdbb7e0 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -1071,7 +1071,7 @@ class IColumn; M(Bool, input_format_orc_allow_missing_columns, true, "Allow missing columns while reading ORC input formats", 0) \ M(Bool, input_format_orc_use_fast_decoder, true, "Use a faster ORC decoder implementation.", 0) \ M(Bool, input_format_orc_filter_push_down, true, "When reading ORC files, skip whole stripes or row groups based on the WHERE/PREWHERE expressions, min/max statistics or bloom filter in the ORC metadata.", 0) \ - M(Bool, input_format_orc_read_use_writer_time_zone, false, "Whether use the writer's time zone in ORC stripe for ORC row reader, the default ORC row reader's time zone is GMT.", 0) \ + M(String, input_format_orc_reader_time_zone_name, "GMT", "The time zone name for ORC row reader, the default ORC row reader's time zone is GMT.", 0) \ M(Bool, input_format_parquet_allow_missing_columns, true, "Allow missing columns while reading Parquet input formats", 0) \ M(UInt64, input_format_parquet_local_file_min_bytes_for_seek, 8192, "Min bytes required for local read (file) to do seek, instead of read with ignore in Parquet input format", 0) \ M(Bool, input_format_arrow_allow_missing_columns, true, "Allow missing columns while reading Arrow input formats", 0) \ diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index ade38ded2f9..a27b5fece0c 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -69,7 +69,7 @@ static std::initializer_listgetWriterTimezone(); - row_reader_options.setTimezoneName(writer_time_zone); - } + row_reader_options.setTimezoneName(format_settings.orc.reader_time_zone_name); row_reader_options.range(current_stripe_info->getOffset(), current_stripe_info->getLength()); if (format_settings.orc.filter_push_down && sarg) { diff --git a/tests/queries/0_stateless/03198_orc_read_time_zone.sh b/tests/queries/0_stateless/03198_orc_read_time_zone.sh index 27530c06237..7d1da0c1579 100755 --- a/tests/queries/0_stateless/03198_orc_read_time_zone.sh +++ b/tests/queries/0_stateless/03198_orc_read_time_zone.sh @@ -5,8 +5,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -q "drop table if exists test" -$CLICKHOUSE_CLIENT -q "create table test(id UInt64, t DateTime64) Engine=MergeTree order by id" -$CLICKHOUSE_CLIENT -q "insert into test from infile '$CURDIR/data_orc/test_reader_time_zone.snappy.orc' SETTINGS input_format_orc_read_use_writer_time_zone=true FORMAT ORC" -$CLICKHOUSE_CLIENT -q "select * from test SETTINGS session_timezone='Asia/Shanghai'" -$CLICKHOUSE_CLIENT -q "drop table test" \ No newline at end of file +$CLICKHOUSE_CLIENT -q "drop table if exists test_orc_read_timezone" +$CLICKHOUSE_CLIENT -q "create table test_orc_read_timezone(id UInt64, t DateTime64) Engine=MergeTree order by id" +$CLICKHOUSE_CLIENT -q "insert into test_orc_read_timezone from infile '$CURDIR/data_orc/test_reader_time_zone.snappy.orc' SETTINGS input_format_orc_reader_time_zone_name='Asia/Shanghai' FORMAT ORC" +$CLICKHOUSE_CLIENT -q "select * from test_orc_read_timezone SETTINGS session_timezone='Asia/Shanghai'" +$CLICKHOUSE_CLIENT -q "drop table test_orc_read_timezone" \ No newline at end of file From fac15bdc6367a5b0c76f35d066a59de086fd43dc Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 31 Jul 2024 16:06:39 +0000 Subject: [PATCH 034/121] Backport #67474 to 24.7: [Green CI] Fix WriteBuffer destructor when finalize has failed for MergeTreeDeduplicationLog::shutdown --- src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp index 22ff9b7194f..a8110500f13 100644 --- a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp +++ b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp @@ -341,15 +341,19 @@ void MergeTreeDeduplicationLog::shutdown() stopped = true; if (current_writer) { + /// If an error has occurred during finalize, we'd like to have the exception set for reset. + /// Otherwise, we'll be in a situation when a finalization didn't happen, and we didn't get + /// any error, causing logical error (see ~MemoryBuffer()). try { current_writer->finalize(); + current_writer.reset(); } catch (...) { tryLogCurrentException(__PRETTY_FUNCTION__); + current_writer.reset(); } - current_writer.reset(); } } From 83c4499fbfba2d6a5a6d72082c617e6b411fb331 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 31 Jul 2024 17:08:04 +0000 Subject: [PATCH 035/121] Backport #67035 to 24.7: [Green CI] Fix test test_storage_s3_queue/test.py::test_max_set_age --- tests/integration/README.md | 68 ++++++++++++- tests/integration/helpers/cluster.py | 5 + .../integration/test_storage_s3_queue/test.py | 97 ++++++++----------- 3 files changed, 107 insertions(+), 63 deletions(-) diff --git a/tests/integration/README.md b/tests/integration/README.md index cde4cb05aec..a8deb97b526 100644 --- a/tests/integration/README.md +++ b/tests/integration/README.md @@ -16,7 +16,7 @@ Don't use Docker from your system repository. * [py.test](https://docs.pytest.org/) testing framework. To install: `sudo -H pip install pytest` * [docker-compose](https://docs.docker.com/compose/) and additional python libraries. To install: -``` +```bash sudo -H pip install \ PyMySQL \ avro \ @@ -78,7 +78,7 @@ Notes: * Some tests maybe require a lot of resources (CPU, RAM, etc.). Better not try large tests like `test_distributed_ddl*` on your laptop. You can run tests via `./runner` script and pass pytest arguments as last arg: -``` +```bash $ ./runner --binary $HOME/ClickHouse/programs/clickhouse --odbc-bridge-binary $HOME/ClickHouse/programs/clickhouse-odbc-bridge --base-configs-dir $HOME/ClickHouse/programs/server/ 'test_ssl_cert_authentication -ss' Start tests ====================================================================================================== test session starts ====================================================================================================== @@ -102,7 +102,7 @@ test_ssl_cert_authentication/test.py::test_create_user PASSED ``` Path to binary and configs maybe specified via env variables: -``` +```bash $ export CLICKHOUSE_TESTS_BASE_CONFIG_DIR=$HOME/ClickHouse/programs/server/ $ export CLICKHOUSE_TESTS_SERVER_BIN_PATH=$HOME/ClickHouse/programs/clickhouse $ export CLICKHOUSE_TESTS_ODBC_BRIDGE_BIN_PATH=$HOME/ClickHouse/programs/clickhouse-odbc-bridge @@ -121,6 +121,64 @@ test_odbc_interaction/test.py ...... [100%] You can just open shell inside a container by overwritting the command: ./runner --command=bash +### Parallel test execution + +On the CI, we run a number of parallel runners (5 at the time of this writing), each on its own +Docker container. These runner containers spawn more containers for each test for the services +needed such as ZooKeeper, MySQL, PostgreSQL and minio, among others. This means that tests do not +share any services among them. Within each runner, tests are parallelized using +[pytest-xdist](https://pytest-xdist.readthedocs.io/en/stable/). We're using `--dist=loadfile` to +[distribute the load](https://pytest-xdist.readthedocs.io/en/stable/distribution.html). In the +documentation words: this guarantees that all tests in a file run in the same worker. This means +that any test within the same file will never execute their tests in parallel. They'll be executed +on the same worker one after the other. + +If the test supports parallel and repeated execution, you can run a bunch of them in parallel to +look for flakiness. We use [pytest-repeat](https://pypi.org/project/pytest-repeat/) to set the +number of times we want to execute a test through the `--count` argument. Then, `-n` sets the number +of parallel workers for `pytest-xdist`. + +```bash +$ export CLICKHOUSE_TESTS_BASE_CONFIG_DIR=$HOME/ClickHouse/programs/server/ +$ export CLICKHOUSE_TESTS_SERVER_BIN_PATH=$HOME/ClickHouse/programs/clickhouse +$ export CLICKHOUSE_TESTS_ODBC_BRIDGE_BIN_PATH=$HOME/ClickHouse/programs/clickhouse-odbc-bridge +$ ./runner 'test_storage_s3_queue/test.py::test_max_set_age -- --count 10 -n 5' +Start tests +=============================================================================== test session starts ================================================================================ +platform linux -- Python 3.10.12, pytest-7.4.4, pluggy-1.5.0 -- /usr/bin/python3 +cachedir: .pytest_cache +rootdir: /ClickHouse/tests/integration +configfile: pytest.ini +plugins: reportlog-0.4.0, xdist-3.5.0, random-0.2, repeat-0.9.3, order-1.0.0, timeout-2.2.0 +timeout: 900.0s +timeout method: signal +timeout func_only: False +5 workers [10 items] +scheduling tests via LoadScheduling + +test_storage_s3_queue/test.py::test_max_set_age[9-10] +test_storage_s3_queue/test.py::test_max_set_age[7-10] +test_storage_s3_queue/test.py::test_max_set_age[5-10] +test_storage_s3_queue/test.py::test_max_set_age[1-10] +test_storage_s3_queue/test.py::test_max_set_age[3-10] +[gw3] [ 10%] PASSED test_storage_s3_queue/test.py::test_max_set_age[7-10] +test_storage_s3_queue/test.py::test_max_set_age[8-10] +[gw4] [ 20%] PASSED test_storage_s3_queue/test.py::test_max_set_age[9-10] +test_storage_s3_queue/test.py::test_max_set_age[10-10] +[gw0] [ 30%] PASSED test_storage_s3_queue/test.py::test_max_set_age[1-10] +test_storage_s3_queue/test.py::test_max_set_age[2-10] +[gw1] [ 40%] PASSED test_storage_s3_queue/test.py::test_max_set_age[3-10] +test_storage_s3_queue/test.py::test_max_set_age[4-10] +[gw2] [ 50%] PASSED test_storage_s3_queue/test.py::test_max_set_age[5-10] +test_storage_s3_queue/test.py::test_max_set_age[6-10] +[gw3] [ 60%] PASSED test_storage_s3_queue/test.py::test_max_set_age[8-10] +[gw4] [ 70%] PASSED test_storage_s3_queue/test.py::test_max_set_age[10-10] +[gw0] [ 80%] PASSED test_storage_s3_queue/test.py::test_max_set_age[2-10] +[gw1] [ 90%] PASSED test_storage_s3_queue/test.py::test_max_set_age[4-10] +[gw2] [100%] PASSED test_storage_s3_queue/test.py::test_max_set_age[6-10] +========================================================================== 10 passed in 120.65s (0:02:00) ========================================================================== +``` + ### Rebuilding the docker containers The main container used for integration tests lives in `docker/test/integration/base/Dockerfile`. Rebuild it with @@ -149,7 +207,7 @@ will automagically detect the types of variables and only the small diff of two If tests failing for mysterious reasons, this may help: -``` +```bash sudo service docker stop sudo bash -c 'rm -rf /var/lib/docker/*' sudo service docker start @@ -159,6 +217,6 @@ sudo service docker start On Ubuntu 20.10 and later in host network mode (default) one may encounter problem with nested containers not seeing each other. It happens because legacy and nftables rules are out of sync. Problem can be solved by: -``` +```bash sudo iptables -P FORWARD ACCEPT ``` diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 548b58a17e8..0c8278048bf 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -434,6 +434,11 @@ class ClickHouseCluster: # docker-compose removes everything non-alphanumeric from project names so we do it too. self.project_name = re.sub(r"[^a-z0-9]", "", project_name.lower()) self.instances_dir_name = get_instances_dir(self.name) + xdist_worker = os.getenv("PYTEST_XDIST_WORKER") + if xdist_worker: + self.project_name += f"_{xdist_worker}" + self.instances_dir_name += f"_{xdist_worker}" + self.instances_dir = p.join(self.base_dir, self.instances_dir_name) self.docker_logs_path = p.join(self.instances_dir, "docker.log") self.env_file = p.join(self.instances_dir, DEFAULT_ENV_NAME) diff --git a/tests/integration/test_storage_s3_queue/test.py b/tests/integration/test_storage_s3_queue/test.py index bf3c28c5429..9a97e8c23d1 100644 --- a/tests/integration/test_storage_s3_queue/test.py +++ b/tests/integration/test_storage_s3_queue/test.py @@ -7,6 +7,7 @@ import pytest from helpers.client import QueryRuntimeException from helpers.cluster import ClickHouseCluster, ClickHouseInstance import json +from uuid import uuid4 AVAILABLE_MODES = ["unordered", "ordered"] @@ -822,11 +823,11 @@ def test_multiple_tables_streaming_sync_distributed(started_cluster, mode): def test_max_set_age(started_cluster): node = started_cluster.instances["instance"] - table_name = f"max_set_age" + table_name = "max_set_age" dst_table_name = f"{table_name}_dst" keeper_path = f"/clickhouse/test_{table_name}" files_path = f"{table_name}_data" - max_age = 10 + max_age = 20 files_to_generate = 10 create_table( @@ -847,11 +848,9 @@ def test_max_set_age(started_cluster): ) create_mv(node, table_name, dst_table_name) - total_values = generate_random_files( - started_cluster, files_path, files_to_generate, row_num=1 - ) + _ = generate_random_files(started_cluster, files_path, files_to_generate, row_num=1) - expected_rows = 10 + expected_rows = files_to_generate node.wait_for_log_line("Checking node limits") node.wait_for_log_line("Node limits check finished") @@ -859,25 +858,24 @@ def test_max_set_age(started_cluster): def get_count(): return int(node.query(f"SELECT count() FROM {dst_table_name}")) - for _ in range(20): - if expected_rows == get_count(): - break - time.sleep(1) + def wait_for_condition(check_function, max_wait_time=1.5 * max_age): + before = time.time() + while time.time() - before < max_wait_time: + if check_function(): + return + time.sleep(0.25) + assert False - assert expected_rows == get_count() - assert 10 == int(node.query(f"SELECT uniq(_path) from {dst_table_name}")) + wait_for_condition(lambda: get_count() == expected_rows) + assert files_to_generate == int( + node.query(f"SELECT uniq(_path) from {dst_table_name}") + ) - time.sleep(max_age + 5) - - expected_rows = 20 - - for _ in range(20): - if expected_rows == get_count(): - break - time.sleep(1) - - assert expected_rows == get_count() - assert 10 == int(node.query(f"SELECT uniq(_path) from {dst_table_name}")) + expected_rows *= 2 + wait_for_condition(lambda: get_count() == expected_rows) + assert files_to_generate == int( + node.query(f"SELECT uniq(_path) from {dst_table_name}") + ) paths_count = [ int(x) @@ -885,15 +883,18 @@ def test_max_set_age(started_cluster): f"SELECT count() from {dst_table_name} GROUP BY _path" ).splitlines() ] - assert 10 == len(paths_count) + assert files_to_generate == len(paths_count) for path_count in paths_count: assert 2 == path_count - failed_count = int( - node.query( - "SELECT value FROM system.events WHERE name = 'ObjectStorageQueueFailedFiles' SETTINGS system_events_show_zero_values=1" + def get_object_storage_failures(): + return int( + node.query( + "SELECT value FROM system.events WHERE name = 'ObjectStorageQueueFailedFiles' SETTINGS system_events_show_zero_values=1" + ) ) - ) + + failed_count = get_object_storage_failures() values = [ ["failed", 1, 1], @@ -901,53 +902,33 @@ def test_max_set_age(started_cluster): values_csv = ( "\n".join((",".join(map(str, row)) for row in values)) + "\n" ).encode() - put_s3_file_content(started_cluster, f"{files_path}/fff.csv", values_csv) - for _ in range(30): - if failed_count + 1 == int( - node.query( - "SELECT value FROM system.events WHERE name = 'ObjectStorageQueueFailedFiles' SETTINGS system_events_show_zero_values=1" - ) - ): - break - time.sleep(1) + # use a different filename for each test to allow running a bunch of them sequentially with --count + file_with_error = f"max_set_age_fail_{uuid4().hex[:8]}.csv" + put_s3_file_content(started_cluster, f"{files_path}/{file_with_error}", values_csv) - assert failed_count + 1 == int( - node.query( - "SELECT value FROM system.events WHERE name = 'ObjectStorageQueueFailedFiles' SETTINGS system_events_show_zero_values=1" - ) - ) + wait_for_condition(lambda: failed_count + 1 == get_object_storage_failures()) node.query("SYSTEM FLUSH LOGS") assert "Cannot parse input" in node.query( - "SELECT exception FROM system.s3queue WHERE file_name ilike '%fff.csv'" + f"SELECT exception FROM system.s3queue WHERE file_name ilike '%{file_with_error}'" ) + assert 1 == int( node.query( - "SELECT count() FROM system.s3queue_log WHERE file_name ilike '%fff.csv'" - ) - ) - assert 1 == int( - node.query( - "SELECT count() FROM system.s3queue_log WHERE file_name ilike '%fff.csv' AND notEmpty(exception)" + f"SELECT count() FROM system.s3queue_log WHERE file_name ilike '%{file_with_error}' AND notEmpty(exception)" ) ) - time.sleep(max_age + 1) - - assert failed_count + 2 == int( - node.query( - "SELECT value FROM system.events WHERE name = 'ObjectStorageQueueFailedFiles'" - ) - ) + wait_for_condition(lambda: failed_count + 2 == get_object_storage_failures()) node.query("SYSTEM FLUSH LOGS") assert "Cannot parse input" in node.query( - "SELECT exception FROM system.s3queue WHERE file_name ilike '%fff.csv' ORDER BY processing_end_time DESC LIMIT 1" + f"SELECT exception FROM system.s3queue WHERE file_name ilike '%{file_with_error}' ORDER BY processing_end_time DESC LIMIT 1" ) assert 1 < int( node.query( - "SELECT count() FROM system.s3queue_log WHERE file_name ilike '%fff.csv' AND notEmpty(exception)" + f"SELECT count() FROM system.s3queue_log WHERE file_name ilike '%{file_with_error}' AND notEmpty(exception)" ) ) From 1d75ae723c95814462e9e8b07ed4b1853afae646 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 31 Jul 2024 23:11:14 +0000 Subject: [PATCH 036/121] Backport #67520 to 24.7: Proper fix for short circuit execution with nested dictGetOrDefault --- src/Columns/ColumnFunction.cpp | 26 ++++++++++++++----- src/Interpreters/ExpressionActions.cpp | 4 --- ...sted_short_circuit_functions_bug.reference | 2 ++ ...210_nested_short_circuit_functions_bug.sql | 3 +++ 4 files changed, 24 insertions(+), 11 deletions(-) create mode 100644 tests/queries/0_stateless/03210_nested_short_circuit_functions_bug.reference create mode 100644 tests/queries/0_stateless/03210_nested_short_circuit_functions_bug.sql diff --git a/src/Columns/ColumnFunction.cpp b/src/Columns/ColumnFunction.cpp index fc81efaac0c..18c343c6ca6 100644 --- a/src/Columns/ColumnFunction.cpp +++ b/src/Columns/ColumnFunction.cpp @@ -296,16 +296,28 @@ ColumnWithTypeAndName ColumnFunction::reduce() const function->getName(), toString(args), toString(captured)); ColumnsWithTypeAndName columns = captured_columns; - IFunction::ShortCircuitSettings settings; /// Arguments of lazy executed function can also be lazy executed. - /// But we shouldn't execute arguments if this function is short circuit, - /// because it will handle lazy executed arguments by itself. - if (is_short_circuit_argument && !function->isShortCircuit(settings, args)) + if (is_short_circuit_argument) { - for (auto & col : columns) + IFunction::ShortCircuitSettings settings; + /// We shouldn't execute all arguments if this function is short circuit, + /// because it will handle lazy executed arguments by itself. + /// Execute only arguments with disabled lazy execution. + if (function->isShortCircuit(settings, args)) { - if (const ColumnFunction * arg = checkAndGetShortCircuitArgument(col.column)) - col = arg->reduce(); + for (size_t i : settings.arguments_with_disabled_lazy_execution) + { + if (const ColumnFunction * arg = checkAndGetShortCircuitArgument(columns[i].column)) + columns[i] = arg->reduce(); + } + } + else + { + for (auto & col : columns) + { + if (const ColumnFunction * arg = checkAndGetShortCircuitArgument(col.column)) + col = arg->reduce(); + } } } diff --git a/src/Interpreters/ExpressionActions.cpp b/src/Interpreters/ExpressionActions.cpp index d832f568cb8..8993830af14 100644 --- a/src/Interpreters/ExpressionActions.cpp +++ b/src/Interpreters/ExpressionActions.cpp @@ -195,10 +195,6 @@ static void setLazyExecutionInfo( } lazy_execution_info.short_circuit_ancestors_info[parent].insert(indexes.begin(), indexes.end()); - /// After checking arguments_with_disabled_lazy_execution, if there is no relation with parent, - /// disable the current node. - if (indexes.empty()) - lazy_execution_info.can_be_lazy_executed = false; } else /// If lazy execution is disabled for one of parents, we should disable it for current node. diff --git a/tests/queries/0_stateless/03210_nested_short_circuit_functions_bug.reference b/tests/queries/0_stateless/03210_nested_short_circuit_functions_bug.reference new file mode 100644 index 00000000000..aa47d0d46d4 --- /dev/null +++ b/tests/queries/0_stateless/03210_nested_short_circuit_functions_bug.reference @@ -0,0 +1,2 @@ +0 +0 diff --git a/tests/queries/0_stateless/03210_nested_short_circuit_functions_bug.sql b/tests/queries/0_stateless/03210_nested_short_circuit_functions_bug.sql new file mode 100644 index 00000000000..923f1e3be1f --- /dev/null +++ b/tests/queries/0_stateless/03210_nested_short_circuit_functions_bug.sql @@ -0,0 +1,3 @@ +select if(equals(materialize('abc'), 'aws.lambda.duration'), if(toFloat64(materialize('x86_74')) < 50.0000, 0, 1), 0) settings short_circuit_function_evaluation='enable'; +select if(equals(materialize('abc'), 'aws.lambda.duration'), if(toFloat64(materialize('x86_74')) < 50.0000, 0, 1), 0) settings short_circuit_function_evaluation='force_enable'; + From a131e4f22cf54bffb5da410012b4047cdb137086 Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Thu, 1 Aug 2024 06:30:26 +0000 Subject: [PATCH 037/121] Update autogenerated version to 24.7.3.1 and contributors --- cmake/autogenerated_versions.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index 6d232fbf84f..3be3b9fe60b 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -5,8 +5,8 @@ SET(VERSION_REVISION 54488) SET(VERSION_MAJOR 24) SET(VERSION_MINOR 7) -SET(VERSION_PATCH 2) -SET(VERSION_GITHASH a37d2d43da7ff89c512c770d0d7a3053a0a94c43) -SET(VERSION_DESCRIBE v24.7.2.1-stable) -SET(VERSION_STRING 24.7.2.1) +SET(VERSION_PATCH 3) +SET(VERSION_GITHASH 6e41f601b2f025b0741da55d13287922eb28cf37) +SET(VERSION_DESCRIBE v24.7.3.1-stable) +SET(VERSION_STRING 24.7.3.1) # end of autochange From ada316d1c41286533c190e2c9fab3c85c7c9dc4f Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 1 Aug 2024 13:09:09 +0000 Subject: [PATCH 038/121] Backport #67590 to 24.7: Fix test_zookeeper_config_load_balancing after adding the xdist worker name to the instance --- .../test.py | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/integration/test_zookeeper_config_load_balancing/test.py b/tests/integration/test_zookeeper_config_load_balancing/test.py index 9cdf7db2b08..cc0a9022674 100644 --- a/tests/integration/test_zookeeper_config_load_balancing/test.py +++ b/tests/integration/test_zookeeper_config_load_balancing/test.py @@ -71,7 +71,7 @@ def test_first_or_random(started_cluster): [ "bash", "-c", - "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo1_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", + "lsof -a -i4 -i6 -itcp -w | grep -P 'testzookeeperconfigloadbalancing_(gw\\d+_)?zoo1_1.*testzookeeperconfigloadbalancing_(gw\\d+_)?default:2181' | grep ESTABLISHED | wc -l", ], privileged=True, user="root", @@ -99,7 +99,7 @@ def test_first_or_random(started_cluster): [ "bash", "-c", - "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo1_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", + "lsof -a -i4 -i6 -itcp -w | grep -P 'testzookeeperconfigloadbalancing_(gw\\d+_)?zoo1_1.*testzookeeperconfigloadbalancing_(gw\\d+_)?default:2181' | grep ESTABLISHED | wc -l", ], privileged=True, user="root", @@ -127,7 +127,7 @@ def test_first_or_random(started_cluster): [ "bash", "-c", - "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo1_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", + "lsof -a -i4 -i6 -itcp -w | grep -P 'testzookeeperconfigloadbalancing_(gw\\d+_)?zoo1_1.*testzookeeperconfigloadbalancing_(gw\\d+_)?default:2181' | grep ESTABLISHED | wc -l", ], privileged=True, user="root", @@ -161,7 +161,7 @@ def test_in_order(started_cluster): [ "bash", "-c", - "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo1_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", + "lsof -a -i4 -i6 -itcp -w | grep -P 'testzookeeperconfigloadbalancing_(gw\\d+_)?zoo1_1.*testzookeeperconfigloadbalancing_(gw\\d+_)?default:2181' | grep ESTABLISHED | wc -l", ], privileged=True, user="root", @@ -189,7 +189,7 @@ def test_in_order(started_cluster): [ "bash", "-c", - "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo1_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", + "lsof -a -i4 -i6 -itcp -w | grep -P 'testzookeeperconfigloadbalancing_(gw\\d+_)?zoo1_1.*testzookeeperconfigloadbalancing_(gw\\d+_)?default:2181' | grep ESTABLISHED | wc -l", ], privileged=True, user="root", @@ -217,7 +217,7 @@ def test_in_order(started_cluster): [ "bash", "-c", - "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo1_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", + "lsof -a -i4 -i6 -itcp -w | grep -P 'testzookeeperconfigloadbalancing_(gw\\d+_)?zoo1_1.*testzookeeperconfigloadbalancing_(gw\\d+_)?default:2181' | grep ESTABLISHED | wc -l", ], privileged=True, user="root", @@ -251,7 +251,7 @@ def test_nearest_hostname(started_cluster): [ "bash", "-c", - "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo1_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", + "lsof -a -i4 -i6 -itcp -w | grep -P 'testzookeeperconfigloadbalancing_(gw\\d+_)?zoo1_1.*testzookeeperconfigloadbalancing_(gw\\d+_)?default:2181' | grep ESTABLISHED | wc -l", ], privileged=True, user="root", @@ -279,7 +279,7 @@ def test_nearest_hostname(started_cluster): [ "bash", "-c", - "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo2_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", + "lsof -a -i4 -i6 -itcp -w | grep -P 'testzookeeperconfigloadbalancing_(gw\\d+_)?zoo2_1.*testzookeeperconfigloadbalancing_(gw\\d+_)?default:2181' | grep ESTABLISHED | wc -l", ], privileged=True, user="root", @@ -307,7 +307,7 @@ def test_nearest_hostname(started_cluster): [ "bash", "-c", - "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo3_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", + "lsof -a -i4 -i6 -itcp -w | grep -P 'testzookeeperconfigloadbalancing_(gw\\d+_)?zoo3_1.*testzookeeperconfigloadbalancing_(gw\\d+_)?default:2181' | grep ESTABLISHED | wc -l", ], privileged=True, user="root", @@ -341,7 +341,7 @@ def test_hostname_levenshtein_distance(started_cluster): [ "bash", "-c", - "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo1_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", + "lsof -a -i4 -i6 -itcp -w | grep -P 'testzookeeperconfigloadbalancing_(gw\\d+_)?zoo1_1.*testzookeeperconfigloadbalancing_(gw\\d+_)?default:2181' | grep ESTABLISHED | wc -l", ], privileged=True, user="root", @@ -369,7 +369,7 @@ def test_hostname_levenshtein_distance(started_cluster): [ "bash", "-c", - "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo2_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", + "lsof -a -i4 -i6 -itcp -w | grep -P 'testzookeeperconfigloadbalancing_(gw\\d+_)?zoo2_1.*testzookeeperconfigloadbalancing_(gw\\d+_)?default:2181' | grep ESTABLISHED | wc -l", ], privileged=True, user="root", @@ -397,7 +397,7 @@ def test_hostname_levenshtein_distance(started_cluster): [ "bash", "-c", - "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo3_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", + "lsof -a -i4 -i6 -itcp -w | grep -P 'testzookeeperconfigloadbalancing_(gw\\d+_)?zoo3_1.*testzookeeperconfigloadbalancing_(gw\\d+_)?default:2181' | grep ESTABLISHED | wc -l", ], privileged=True, user="root", From 5c9697a9cb99ba94b729880c7fada99bbbeb8de5 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 1 Aug 2024 18:12:19 +0000 Subject: [PATCH 039/121] Backport #66843 to 24.7: Fix deadlock in Context::getDDLWorker() --- src/Interpreters/Context.cpp | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 94bcb88ed53..48878733a00 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -3490,18 +3490,22 @@ DDLWorker & Context::getDDLWorker() const if (shared->ddl_worker_startup_task) waitLoad(shared->ddl_worker_startup_task); // Just wait and do not prioritize, because it depends on all load and startup tasks - SharedLockGuard lock(shared->mutex); - if (!shared->ddl_worker) { - if (!hasZooKeeper()) - throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "There is no Zookeeper configuration in server config"); - - if (!hasDistributedDDL()) - throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "There is no DistributedDDL configuration in server config"); - - throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "DDL background thread is not initialized"); + /// Only acquire the lock for reading ddl_worker field. + /// hasZooKeeper() and hasDistributedDDL() acquire the same lock as well and double acquisition of the lock in shared mode can lead + /// to a deadlock if an exclusive lock attempt is made in the meantime by another thread. + SharedLockGuard lock(shared->mutex); + if (shared->ddl_worker) + return *shared->ddl_worker; } - return *shared->ddl_worker; + + if (!hasZooKeeper()) + throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "There is no Zookeeper configuration in server config"); + + if (!hasDistributedDDL()) + throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "There is no DistributedDDL configuration in server config"); + + throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "DDL background thread is not initialized"); } zkutil::ZooKeeperPtr Context::getZooKeeper() const From 758b796811f1d4ebc77c4546e53b88c03b57a045 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 1 Aug 2024 22:08:20 +0000 Subject: [PATCH 040/121] Backport #67506 to 24.7: [Green CI] Fix potentially flaky test_mask_sensitive_info integration test --- tests/integration/helpers/cluster.py | 10 +++++++--- .../integration/test_mask_sensitive_info/test.py | 15 +++++++-------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 0c8278048bf..9ac7f3646a8 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -3920,7 +3920,11 @@ class ClickHouseInstance: ) def contains_in_log( - self, substring, from_host=False, filename="clickhouse-server.log" + self, + substring, + from_host=False, + filename="clickhouse-server.log", + exclusion_substring="", ): if from_host: # We check fist file exists but want to look for all rotated logs as well @@ -3928,7 +3932,7 @@ class ClickHouseInstance: [ "bash", "-c", - f'[ -f {self.logs_dir}/{filename} ] && zgrep -aH "{substring}" {self.logs_dir}/{filename}* || true', + f'[ -f {self.logs_dir}/{filename} ] && zgrep -aH "{substring}" {self.logs_dir}/{filename}* | ( [ -z "{exclusion_substring}" ] && cat || grep -v "${exclusion_substring}" ) || true', ] ) else: @@ -3936,7 +3940,7 @@ class ClickHouseInstance: [ "bash", "-c", - f'[ -f /var/log/clickhouse-server/{filename} ] && zgrep -aH "{substring}" /var/log/clickhouse-server/{filename} || true', + f'[ -f /var/log/clickhouse-server/{filename} ] && zgrep -aH "{substring}" /var/log/clickhouse-server/{filename} | ( [ -z "{exclusion_substring}" ] && cat || grep -v "${exclusion_substring}" ) || true', ] ) return len(result) > 0 diff --git a/tests/integration/test_mask_sensitive_info/test.py b/tests/integration/test_mask_sensitive_info/test.py index 902d3800324..6f6dc4d287f 100644 --- a/tests/integration/test_mask_sensitive_info/test.py +++ b/tests/integration/test_mask_sensitive_info/test.py @@ -13,6 +13,7 @@ node = cluster.add_instance( with_zookeeper=True, with_azurite=True, ) +base_search_query = "SELECT COUNT() FROM system.query_log WHERE query LIKE " @pytest.fixture(scope="module", autouse=True) @@ -35,7 +36,7 @@ def check_logs(must_contain=[], must_not_contain=[]): .replace("]", "\\]") .replace("*", "\\*") ) - assert node.contains_in_log(escaped_str) + assert node.contains_in_log(escaped_str, exclusion_substring=base_search_query) for str in must_not_contain: escaped_str = ( @@ -44,7 +45,9 @@ def check_logs(must_contain=[], must_not_contain=[]): .replace("]", "\\]") .replace("*", "\\*") ) - assert not node.contains_in_log(escaped_str) + assert not node.contains_in_log( + escaped_str, exclusion_substring=base_search_query + ) for str in must_contain: escaped_str = str.replace("'", "\\'") @@ -60,7 +63,7 @@ def system_query_log_contains_search_pattern(search_pattern): return ( int( node.query( - f"SELECT COUNT() FROM system.query_log WHERE query LIKE '%{search_pattern}%'" + f"{base_search_query}'%{search_pattern}%' AND query NOT LIKE '{base_search_query}%'" ).strip() ) >= 1 @@ -105,7 +108,6 @@ def test_create_alter_user(): must_not_contain=[ password, "IDENTIFIED BY", - "IDENTIFIED BY", "IDENTIFIED WITH plaintext_password BY", ], ) @@ -366,10 +368,7 @@ def test_table_functions(): f"remoteSecure(named_collection_6, addresses_expr = '127.{{2..11}}', database = 'default', table = 'remote_table', user = 'remote_user', password = '{password}')", f"s3('http://minio1:9001/root/data/test9.csv.gz', 'NOSIGN', 'CSV')", f"s3('http://minio1:9001/root/data/test10.csv.gz', 'minio', '{password}')", - ( - f"deltaLake('http://minio1:9001/root/data/test11.csv.gz', 'minio', '{password}')", - "DNS_ERROR", - ), + f"deltaLake('http://minio1:9001/root/data/test11.csv.gz', 'minio', '{password}')", f"azureBlobStorage('{azure_conn_string}', 'cont', 'test_simple.csv', 'CSV')", f"azureBlobStorage('{azure_conn_string}', 'cont', 'test_simple_1.csv', 'CSV', 'none')", f"azureBlobStorage('{azure_conn_string}', 'cont', 'test_simple_2.csv', 'CSV', 'none', 'auto')", From 6d27b178a1d05a6c338bf660b7f87bce27a35865 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 2 Aug 2024 17:06:25 +0000 Subject: [PATCH 041/121] Backport #67605 to 24.7: Fix 01605_adaptive_granularity_block_borders --- .../0_stateless/01605_adaptive_granularity_block_borders.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql b/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql index 9b96ce3e586..aaeee466794 100644 --- a/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql +++ b/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql @@ -1,4 +1,4 @@ --- Tags: no-random-merge-tree-settings, no-tsan, no-debug, no-object-storage +-- Tags: no-random-merge-tree-settings, no-random-settings, no-tsan, no-debug, no-object-storage, long -- no-tsan: too slow -- no-object-storage: for remote tables we use thread pool even when reading with one stream, so memory consumption is higher @@ -16,7 +16,7 @@ CREATE TABLE adaptive_table( value String ) ENGINE MergeTree() ORDER BY key -SETTINGS index_granularity_bytes=1048576, +SETTINGS index_granularity_bytes = 1048576, min_bytes_for_wide_part = 0, min_rows_for_wide_part = 0, enable_vertical_merge_algorithm = 0; From fca3308f0549e0bc7d2c10f01a30830a096df7eb Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sat, 3 Aug 2024 01:32:53 +0000 Subject: [PATCH 042/121] Backport #67603 to 24.7: Fix 02910_bad_logs_level_in_local in fast tests --- tests/queries/0_stateless/02910_bad_logs_level_in_local.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02910_bad_logs_level_in_local.sh b/tests/queries/0_stateless/02910_bad_logs_level_in_local.sh index badf7232a95..b5de10bf191 100755 --- a/tests/queries/0_stateless/02910_bad_logs_level_in_local.sh +++ b/tests/queries/0_stateless/02910_bad_logs_level_in_local.sh @@ -1,14 +1,14 @@ #!/usr/bin/expect -f log_user 0 -set timeout 60 +set timeout 30 match_max 100000 spawn bash -c "clickhouse-local" expect ":) " send -- "SET send_logs_level = 't'\r" -expect "Exception on client:" +expect "Unexpected value of LogsLevel:" {} timeout {exit 1} expect ":) " send -- "exit\r" expect eof From 1c44ec682a8a926f6739a8e9aea24d71eec0b06e Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sat, 3 Aug 2024 09:11:58 +0000 Subject: [PATCH 043/121] Backport #67665 to 24.7: Fix reloading SQL UDFs with UNION --- src/Databases/DatabaseOrdinary.cpp | 3 +++ .../UserDefinedSQLFunctionFactory.cpp | 8 +++++-- .../UserDefinedSQLObjectsDiskStorage.cpp | 6 ++--- .../UserDefinedSQLObjectsDiskStorage.h | 1 - .../UserDefinedSQLObjectsStorageBase.cpp | 15 +++++++++--- .../UserDefinedSQLObjectsStorageBase.h | 4 ++++ .../UserDefinedSQLObjectsZooKeeperStorage.cpp | 2 +- .../UserDefinedSQLObjectsZooKeeperStorage.h | 2 -- .../NormalizeSelectWithUnionQueryVisitor.h | 2 -- .../test.py | 23 +++++++++++++++++-- .../test.py | 12 ++++++++++ .../03215_udf_with_union.reference | 1 + .../0_stateless/03215_udf_with_union.sql | 14 +++++++++++ 13 files changed, 77 insertions(+), 16 deletions(-) create mode 100644 tests/queries/0_stateless/03215_udf_with_union.reference create mode 100644 tests/queries/0_stateless/03215_udf_with_union.sql diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp index 3ab5d3fa697..8808261654f 100644 --- a/src/Databases/DatabaseOrdinary.cpp +++ b/src/Databases/DatabaseOrdinary.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -250,6 +251,8 @@ void DatabaseOrdinary::loadTablesMetadata(ContextPtr local_context, ParsedTables convertMergeTreeToReplicatedIfNeeded(ast, qualified_name, file_name); + NormalizeSelectWithUnionQueryVisitor::Data data{local_context->getSettingsRef().union_default_mode}; + NormalizeSelectWithUnionQueryVisitor{data}.visit(ast); std::lock_guard lock{metadata.mutex}; metadata.parsed_tables[qualified_name] = ParsedTableMetadata{full_path.string(), ast}; metadata.total_dictionaries += create_query->is_dictionary; diff --git a/src/Functions/UserDefined/UserDefinedSQLFunctionFactory.cpp b/src/Functions/UserDefined/UserDefinedSQLFunctionFactory.cpp index e6796874e50..d0bc812f91d 100644 --- a/src/Functions/UserDefined/UserDefinedSQLFunctionFactory.cpp +++ b/src/Functions/UserDefined/UserDefinedSQLFunctionFactory.cpp @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -9,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -80,13 +82,15 @@ namespace validateFunctionRecursiveness(*function_body, name); } - ASTPtr normalizeCreateFunctionQuery(const IAST & create_function_query) + ASTPtr normalizeCreateFunctionQuery(const IAST & create_function_query, const ContextPtr & context) { auto ptr = create_function_query.clone(); auto & res = typeid_cast(*ptr); res.if_not_exists = false; res.or_replace = false; FunctionNameNormalizer::visit(res.function_core.get()); + NormalizeSelectWithUnionQueryVisitor::Data data{context->getSettingsRef().union_default_mode}; + NormalizeSelectWithUnionQueryVisitor{data}.visit(res.function_core); return ptr; } } @@ -125,7 +129,7 @@ void UserDefinedSQLFunctionFactory::checkCanBeUnregistered(const ContextPtr & co bool UserDefinedSQLFunctionFactory::registerFunction(const ContextMutablePtr & context, const String & function_name, ASTPtr create_function_query, bool throw_if_exists, bool replace_if_exists) { checkCanBeRegistered(context, function_name, *create_function_query); - create_function_query = normalizeCreateFunctionQuery(*create_function_query); + create_function_query = normalizeCreateFunctionQuery(*create_function_query, context); try { diff --git a/src/Functions/UserDefined/UserDefinedSQLObjectsDiskStorage.cpp b/src/Functions/UserDefined/UserDefinedSQLObjectsDiskStorage.cpp index 4c004d2537c..8910b45e79d 100644 --- a/src/Functions/UserDefined/UserDefinedSQLObjectsDiskStorage.cpp +++ b/src/Functions/UserDefined/UserDefinedSQLObjectsDiskStorage.cpp @@ -1,7 +1,7 @@ #include "Functions/UserDefined/UserDefinedSQLObjectsDiskStorage.h" -#include "Functions/UserDefined/UserDefinedSQLFunctionFactory.h" -#include "Functions/UserDefined/UserDefinedSQLObjectType.h" +#include +#include #include #include @@ -54,7 +54,7 @@ namespace } UserDefinedSQLObjectsDiskStorage::UserDefinedSQLObjectsDiskStorage(const ContextPtr & global_context_, const String & dir_path_) - : global_context(global_context_) + : UserDefinedSQLObjectsStorageBase(global_context_) , dir_path{makeDirectoryPathCanonical(dir_path_)} , log{getLogger("UserDefinedSQLObjectsLoaderFromDisk")} { diff --git a/src/Functions/UserDefined/UserDefinedSQLObjectsDiskStorage.h b/src/Functions/UserDefined/UserDefinedSQLObjectsDiskStorage.h index ae0cbd0c589..cafbd140598 100644 --- a/src/Functions/UserDefined/UserDefinedSQLObjectsDiskStorage.h +++ b/src/Functions/UserDefined/UserDefinedSQLObjectsDiskStorage.h @@ -42,7 +42,6 @@ private: ASTPtr tryLoadObject(UserDefinedSQLObjectType object_type, const String & object_name, const String & file_path, bool check_file_exists); String getFilePath(UserDefinedSQLObjectType object_type, const String & object_name) const; - ContextPtr global_context; String dir_path; LoggerPtr log; std::atomic objects_loaded = false; diff --git a/src/Functions/UserDefined/UserDefinedSQLObjectsStorageBase.cpp b/src/Functions/UserDefined/UserDefinedSQLObjectsStorageBase.cpp index f251d11789f..225e919301d 100644 --- a/src/Functions/UserDefined/UserDefinedSQLObjectsStorageBase.cpp +++ b/src/Functions/UserDefined/UserDefinedSQLObjectsStorageBase.cpp @@ -2,7 +2,10 @@ #include +#include +#include #include +#include #include namespace DB @@ -17,18 +20,24 @@ namespace ErrorCodes namespace { -ASTPtr normalizeCreateFunctionQuery(const IAST & create_function_query) +ASTPtr normalizeCreateFunctionQuery(const IAST & create_function_query, const ContextPtr & context) { auto ptr = create_function_query.clone(); auto & res = typeid_cast(*ptr); res.if_not_exists = false; res.or_replace = false; FunctionNameNormalizer::visit(res.function_core.get()); + NormalizeSelectWithUnionQueryVisitor::Data data{context->getSettingsRef().union_default_mode}; + NormalizeSelectWithUnionQueryVisitor{data}.visit(res.function_core); return ptr; } } +UserDefinedSQLObjectsStorageBase::UserDefinedSQLObjectsStorageBase(ContextPtr global_context_) + : global_context(std::move(global_context_)) +{} + ASTPtr UserDefinedSQLObjectsStorageBase::get(const String & object_name) const { std::lock_guard lock(mutex); @@ -148,7 +157,7 @@ void UserDefinedSQLObjectsStorageBase::setAllObjects(const std::vector normalized_functions; for (const auto & [function_name, create_query] : new_objects) - normalized_functions[function_name] = normalizeCreateFunctionQuery(*create_query); + normalized_functions[function_name] = normalizeCreateFunctionQuery(*create_query, global_context); std::lock_guard lock(mutex); object_name_to_create_object_map = std::move(normalized_functions); @@ -166,7 +175,7 @@ std::vector> UserDefinedSQLObjectsStorageBase::getAllO void UserDefinedSQLObjectsStorageBase::setObject(const String & object_name, const IAST & create_object_query) { std::lock_guard lock(mutex); - object_name_to_create_object_map[object_name] = normalizeCreateFunctionQuery(create_object_query); + object_name_to_create_object_map[object_name] = normalizeCreateFunctionQuery(create_object_query, global_context); } void UserDefinedSQLObjectsStorageBase::removeObject(const String & object_name) diff --git a/src/Functions/UserDefined/UserDefinedSQLObjectsStorageBase.h b/src/Functions/UserDefined/UserDefinedSQLObjectsStorageBase.h index cab63a3bfcf..0dbc5586f08 100644 --- a/src/Functions/UserDefined/UserDefinedSQLObjectsStorageBase.h +++ b/src/Functions/UserDefined/UserDefinedSQLObjectsStorageBase.h @@ -4,6 +4,7 @@ #include #include +#include #include @@ -13,6 +14,7 @@ namespace DB class UserDefinedSQLObjectsStorageBase : public IUserDefinedSQLObjectsStorage { public: + explicit UserDefinedSQLObjectsStorageBase(ContextPtr global_context_); ASTPtr get(const String & object_name) const override; ASTPtr tryGet(const String & object_name) const override; @@ -64,6 +66,8 @@ protected: std::unordered_map object_name_to_create_object_map; mutable std::recursive_mutex mutex; + + ContextPtr global_context; }; } diff --git a/src/Functions/UserDefined/UserDefinedSQLObjectsZooKeeperStorage.cpp b/src/Functions/UserDefined/UserDefinedSQLObjectsZooKeeperStorage.cpp index 01e7e3995fa..12c1302a3fe 100644 --- a/src/Functions/UserDefined/UserDefinedSQLObjectsZooKeeperStorage.cpp +++ b/src/Functions/UserDefined/UserDefinedSQLObjectsZooKeeperStorage.cpp @@ -48,7 +48,7 @@ namespace UserDefinedSQLObjectsZooKeeperStorage::UserDefinedSQLObjectsZooKeeperStorage( const ContextPtr & global_context_, const String & zookeeper_path_) - : global_context{global_context_} + : UserDefinedSQLObjectsStorageBase(global_context_) , zookeeper_getter{[global_context_]() { return global_context_->getZooKeeper(); }} , zookeeper_path{zookeeper_path_} , watch_queue{std::make_shared>>(std::numeric_limits::max())} diff --git a/src/Functions/UserDefined/UserDefinedSQLObjectsZooKeeperStorage.h b/src/Functions/UserDefined/UserDefinedSQLObjectsZooKeeperStorage.h index 61002be2bfd..0aa9b198398 100644 --- a/src/Functions/UserDefined/UserDefinedSQLObjectsZooKeeperStorage.h +++ b/src/Functions/UserDefined/UserDefinedSQLObjectsZooKeeperStorage.h @@ -68,8 +68,6 @@ private: void refreshObjects(const zkutil::ZooKeeperPtr & zookeeper, UserDefinedSQLObjectType object_type); void syncObjects(const zkutil::ZooKeeperPtr & zookeeper, UserDefinedSQLObjectType object_type); - ContextPtr global_context; - zkutil::ZooKeeperCachingGetter zookeeper_getter; String zookeeper_path; std::atomic objects_loaded = false; diff --git a/src/Interpreters/NormalizeSelectWithUnionQueryVisitor.h b/src/Interpreters/NormalizeSelectWithUnionQueryVisitor.h index b2f55003da5..b642b5def91 100644 --- a/src/Interpreters/NormalizeSelectWithUnionQueryVisitor.h +++ b/src/Interpreters/NormalizeSelectWithUnionQueryVisitor.h @@ -4,8 +4,6 @@ #include #include -#include - namespace DB { diff --git a/tests/integration/test_replicated_user_defined_functions/test.py b/tests/integration/test_replicated_user_defined_functions/test.py index e5f6683b90b..92d86a8fd2c 100644 --- a/tests/integration/test_replicated_user_defined_functions/test.py +++ b/tests/integration/test_replicated_user_defined_functions/test.py @@ -141,6 +141,9 @@ def test_drop_if_exists(): def test_replication(): node1.query("CREATE FUNCTION f2 AS (x, y) -> x - y") + node1.query( + "CREATE FUNCTION f3 AS () -> (SELECT sum(s) FROM (SELECT 1 as s UNION ALL SELECT 1 as s))" + ) assert ( node1.query("SELECT create_query FROM system.functions WHERE name='f2'") @@ -154,7 +157,11 @@ def test_replication(): assert node1.query("SELECT f2(12,3)") == "9\n" assert node2.query("SELECT f2(12,3)") == "9\n" + assert node1.query("SELECT f3()") == "2\n" + assert node2.query("SELECT f3()") == "2\n" + node1.query("DROP FUNCTION f2") + node1.query("DROP FUNCTION f3") assert ( node1.query("SELECT create_query FROM system.functions WHERE name='f2'") == "" ) @@ -214,7 +221,9 @@ def test_reload_zookeeper(): ) # config reloads, but can still work - node1.query("CREATE FUNCTION f2 AS (x, y) -> x - y") + node1.query( + "CREATE FUNCTION f2 AS () -> (SELECT sum(s) FROM (SELECT 1 as s UNION ALL SELECT 1 as s))" + ) assert_eq_with_retry( node2, "SELECT name FROM system.functions WHERE name IN ['f1', 'f2'] ORDER BY name", @@ -269,7 +278,7 @@ def test_reload_zookeeper(): TSV(["f1", "f2", "f3"]), ) - assert node2.query("SELECT f1(12, 3), f2(12, 3), f3(12, 3)") == TSV([[15, 9, 4]]) + assert node2.query("SELECT f1(12, 3), f2(), f3(12, 3)") == TSV([[15, 2, 4]]) active_zk_connections = get_active_zk_connections() assert ( @@ -307,3 +316,13 @@ def test_start_without_zookeeper(): "CREATE FUNCTION f1 AS (x, y) -> (x + y)\n", ) node1.query("DROP FUNCTION f1") + + +def test_server_restart(): + node1.query( + "CREATE FUNCTION f1 AS () -> (SELECT sum(s) FROM (SELECT 1 as s UNION ALL SELECT 1 as s))" + ) + assert node1.query("SELECT f1()") == "2\n" + node1.restart_clickhouse() + assert node1.query("SELECT f1()") == "2\n" + node1.query("DROP FUNCTION f1") diff --git a/tests/integration/test_user_defined_object_persistence/test.py b/tests/integration/test_user_defined_object_persistence/test.py index 986438a4eed..bd491dfa195 100644 --- a/tests/integration/test_user_defined_object_persistence/test.py +++ b/tests/integration/test_user_defined_object_persistence/test.py @@ -18,20 +18,25 @@ def started_cluster(): def test_persistence(): create_function_query1 = "CREATE FUNCTION MySum1 AS (a, b) -> a + b" create_function_query2 = "CREATE FUNCTION MySum2 AS (a, b) -> MySum1(a, b) + b" + create_function_query3 = "CREATE FUNCTION MyUnion AS () -> (SELECT sum(s) FROM (SELECT 1 as s UNION ALL SELECT 1 as s))" instance.query(create_function_query1) instance.query(create_function_query2) + instance.query(create_function_query3) assert instance.query("SELECT MySum1(1,2)") == "3\n" assert instance.query("SELECT MySum2(1,2)") == "5\n" + assert instance.query("SELECT MyUnion()") == "2\n" instance.restart_clickhouse() assert instance.query("SELECT MySum1(1,2)") == "3\n" assert instance.query("SELECT MySum2(1,2)") == "5\n" + assert instance.query("SELECT MyUnion()") == "2\n" instance.query("DROP FUNCTION MySum2") instance.query("DROP FUNCTION MySum1") + instance.query("DROP FUNCTION MyUnion") instance.restart_clickhouse() @@ -48,3 +53,10 @@ def test_persistence(): or "Function with name 'MySum2' does not exist. In scope SELECT MySum2(1, 2)" in error_message ) + + error_message = instance.query_and_get_error("SELECT MyUnion()") + assert ( + "Unknown function MyUnion" in error_message + or "Function with name 'MyUnion' does not exist. In scope SELECT MyUnion" + in error_message + ) diff --git a/tests/queries/0_stateless/03215_udf_with_union.reference b/tests/queries/0_stateless/03215_udf_with_union.reference new file mode 100644 index 00000000000..0cfbf08886f --- /dev/null +++ b/tests/queries/0_stateless/03215_udf_with_union.reference @@ -0,0 +1 @@ +2 diff --git a/tests/queries/0_stateless/03215_udf_with_union.sql b/tests/queries/0_stateless/03215_udf_with_union.sql new file mode 100644 index 00000000000..00390c5d930 --- /dev/null +++ b/tests/queries/0_stateless/03215_udf_with_union.sql @@ -0,0 +1,14 @@ +DROP FUNCTION IF EXISTS 03215_udf_with_union; +CREATE FUNCTION 03215_udf_with_union AS () -> ( + SELECT sum(s) + FROM + ( + SELECT 1 AS s + UNION ALL + SELECT 1 AS s + ) +); + +SELECT 03215_udf_with_union(); + +DROP FUNCTION 03215_udf_with_union; From 06948babbb90c090dcfdd9de9590b1e9b36a8db5 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sun, 4 Aug 2024 17:09:20 +0000 Subject: [PATCH 044/121] Backport #67306 to 24.7: Fix crash of uniq and uniqTheta with empty tuple argument --- src/AggregateFunctions/UniqVariadicHash.h | 3 +++ .../queries/0_stateless/03208_uniq_with_empty_tuple.reference | 2 ++ tests/queries/0_stateless/03208_uniq_with_empty_tuple.sql | 4 ++++ 3 files changed, 9 insertions(+) create mode 100644 tests/queries/0_stateless/03208_uniq_with_empty_tuple.reference create mode 100644 tests/queries/0_stateless/03208_uniq_with_empty_tuple.sql diff --git a/src/AggregateFunctions/UniqVariadicHash.h b/src/AggregateFunctions/UniqVariadicHash.h index 5bb245397d4..279feed8bc6 100644 --- a/src/AggregateFunctions/UniqVariadicHash.h +++ b/src/AggregateFunctions/UniqVariadicHash.h @@ -67,6 +67,9 @@ struct UniqVariadicHash { static UInt64 apply(size_t num_args, const IColumn ** columns, size_t row_num) { + if (!num_args) + return 0; + UInt64 hash; const auto & tuple_columns = assert_cast(columns[0])->getColumns(); diff --git a/tests/queries/0_stateless/03208_uniq_with_empty_tuple.reference b/tests/queries/0_stateless/03208_uniq_with_empty_tuple.reference new file mode 100644 index 00000000000..6ed281c757a --- /dev/null +++ b/tests/queries/0_stateless/03208_uniq_with_empty_tuple.reference @@ -0,0 +1,2 @@ +1 +1 diff --git a/tests/queries/0_stateless/03208_uniq_with_empty_tuple.sql b/tests/queries/0_stateless/03208_uniq_with_empty_tuple.sql new file mode 100644 index 00000000000..09eeaf6f370 --- /dev/null +++ b/tests/queries/0_stateless/03208_uniq_with_empty_tuple.sql @@ -0,0 +1,4 @@ +-- Tags: no-fasttest +-- https://github.com/ClickHouse/ClickHouse/issues/67303 +SELECT uniqTheta(tuple()); +SELECT uniq(tuple()); From f30656000fe58be3e55f84ca21e9b54328ef55fe Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 5 Aug 2024 11:07:06 +0000 Subject: [PATCH 045/121] Backport #67359 to 24.7: Disable some Dynamic tests under sanitizers, rewrite 03202_dynamic_null_map_subcolumn to sql --- ...mic_read_subcolumns_compact_merge_tree.sql | 2 +- .../03036_dynamic_read_subcolumns_memory.sql | 2 +- ...dynamic_read_subcolumns_small.reference.j2 | 2211 +++++++++++++++++ ...03036_dynamic_read_subcolumns_small.sql.j2 | 43 + ...ynamic_read_subcolumns_wide_merge_tree.sql | 2 +- ...merges_1_horizontal_compact_merge_tree.sql | 4 +- ..._merges_1_horizontal_compact_wide_tree.sql | 4 +- ...c_merges_1_vertical_compact_merge_tree.sql | 4 +- ...amic_merges_1_vertical_wide_merge_tree.sql | 4 +- ...merges_2_horizontal_compact_merge_tree.sql | 2 +- ...ic_merges_2_horizontal_wide_merge_tree.sql | 2 +- ...c_merges_2_vertical_compact_merge_tree.sql | 2 +- ...amic_merges_2_vertical_wide_merge_tree.sql | 2 +- .../03037_dynamic_merges_small.reference.j2 | 112 + .../03037_dynamic_merges_small.sql.j2 | 42 + ...2_dynamic_null_map_subcolumn.reference.j2} | 6 +- .../03202_dynamic_null_map_subcolumn.sh | 62 - .../03202_dynamic_null_map_subcolumn.sql.j2 | 49 + 18 files changed, 2475 insertions(+), 80 deletions(-) create mode 100644 tests/queries/0_stateless/03036_dynamic_read_subcolumns_small.reference.j2 create mode 100644 tests/queries/0_stateless/03036_dynamic_read_subcolumns_small.sql.j2 create mode 100644 tests/queries/0_stateless/03037_dynamic_merges_small.reference.j2 create mode 100644 tests/queries/0_stateless/03037_dynamic_merges_small.sql.j2 rename tests/queries/0_stateless/{03202_dynamic_null_map_subcolumn.reference => 03202_dynamic_null_map_subcolumn.reference.j2} (60%) delete mode 100755 tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sh create mode 100644 tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sql.j2 diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_compact_merge_tree.sql b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_compact_merge_tree.sql index 66fbf006a8c..ddfba4418bd 100644 --- a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_compact_merge_tree.sql +++ b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_compact_merge_tree.sql @@ -1,4 +1,4 @@ --- Tags: long +-- Tags: long, no-tsan, no-msan, no-ubsan, no-asan set allow_experimental_variant_type = 1; set use_variant_as_common_type = 1; diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_memory.sql b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_memory.sql index bb03bdef704..c446c31fc25 100644 --- a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_memory.sql +++ b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_memory.sql @@ -1,4 +1,4 @@ --- Tags: long +-- Tags: long, no-tsan, no-msan, no-ubsan, no-asan set allow_experimental_variant_type = 1; set use_variant_as_common_type = 1; diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_small.reference.j2 b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_small.reference.j2 new file mode 100644 index 00000000000..d6add681f51 --- /dev/null +++ b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_small.reference.j2 @@ -0,0 +1,2211 @@ +Array(Array(Dynamic)) +Array(Variant(String, UInt64)) +None +String +UInt64 +20 +20 +20 +20 +0 +0 +20 +20 +10 +10 +20 +0 +0 0 \N [] +1 1 \N [] +2 2 \N [] +3 3 \N [] +4 4 \N [] +5 5 \N [] +6 6 \N [] +7 7 \N [] +8 8 \N [] +9 9 \N [] +[[0]] \N \N [] +str_10 \N str_10 [] +[[0,1]] \N \N [] +str_11 \N str_11 [] +[[0,1,2]] \N \N [] +str_12 \N str_12 [] +[[0,1,2,3]] \N \N [] +str_13 \N str_13 [] +[[0,1,2,3,4]] \N \N [] +str_14 \N str_14 [] +[[0,1,2,3,4,5]] \N \N [] +str_15 \N str_15 [] +[[0,1,2,3,4,5,6]] \N \N [] +str_16 \N str_16 [] +[[0,1,2,3,4,5,6,7]] \N \N [] +str_17 \N str_17 [] +[[0,1,2,3,4,5,6,7,8]] \N \N [] +str_18 \N str_18 [] +[[0,1,2,3,4,5,6,7,8,9]] \N \N [] +str_19 \N str_19 [] +[20] \N \N [20] +['str_21','str_21'] \N \N ['str_21','str_21'] +[22,22,22] \N \N [22,22,22] +[23,23,23,23] \N \N [23,23,23,23] +[24,24,24,24,24] \N \N [24,24,24,24,24] +[25,25,25,25,25,25] \N \N [25,25,25,25,25,25] +[26,26,26,26,26,26,26] \N \N [26,26,26,26,26,26,26] +[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] \N \N [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +[28,28,28,28,28,28,28,28,28] \N \N [28,28,28,28,28,28,28,28,28] +[29,29,29,29,29,29,29,29,29,29] \N \N [29,29,29,29,29,29,29,29,29,29] +\N \N \N [] +\N \N \N [] +\N \N \N [] +\N \N \N [] +\N \N \N [] +\N \N \N [] +\N \N \N [] +\N \N \N [] +\N \N \N [] +\N \N \N [] +[40] \N \N [40] +41 41 \N [] +\N \N \N [] +str_43 \N str_43 [] +[44,44,44,44,44] \N \N [44,44,44,44,44] +45 45 \N [] +\N \N \N [] +str_47 \N str_47 [] +['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48'] \N \N ['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48'] +49 49 \N [] +\N \N \N [] +str_51 \N str_51 [] +[52,52,52] \N \N [52,52,52] +53 53 \N [] +\N \N \N [] +str_55 \N str_55 [] +[56,56,56,56,56,56,56] \N \N [56,56,56,56,56,56,56] +57 57 \N [] +\N \N \N [] +str_59 \N str_59 [] +[60] \N \N [60] +61 61 \N [] +\N \N \N [] +str_63 \N str_63 [] +[64,64,64,64,64] \N \N [64,64,64,64,64] +65 65 \N [] +\N \N \N [] +str_67 \N str_67 [] +[68,68,68,68,68,68,68,68,68] \N \N [68,68,68,68,68,68,68,68,68] +69 69 \N [] +\N \N \N [] +str_71 \N str_71 [] +[NULL,NULL,NULL] \N \N [NULL,NULL,NULL] +73 73 \N [] +\N \N \N [] +str_75 \N str_75 [] +[76,76,76,76,76,76,76] \N \N [76,76,76,76,76,76,76] +77 77 \N [] +\N \N \N [] +str_79 \N str_79 [] +0 \N [] +1 \N [] +2 \N [] +3 \N [] +4 \N [] +5 \N [] +6 \N [] +7 \N [] +8 \N [] +9 \N [] +\N \N [] +\N str_10 [] +\N \N [] +\N str_11 [] +\N \N [] +\N str_12 [] +\N \N [] +\N str_13 [] +\N \N [] +\N str_14 [] +\N \N [] +\N str_15 [] +\N \N [] +\N str_16 [] +\N \N [] +\N str_17 [] +\N \N [] +\N str_18 [] +\N \N [] +\N str_19 [] +\N \N [20] +\N \N ['str_21','str_21'] +\N \N [22,22,22] +\N \N [23,23,23,23] +\N \N [24,24,24,24,24] +\N \N [25,25,25,25,25,25] +\N \N [26,26,26,26,26,26,26] +\N \N [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +\N \N [28,28,28,28,28,28,28,28,28] +\N \N [29,29,29,29,29,29,29,29,29,29] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [40] +41 \N [] +\N \N [] +\N str_43 [] +\N \N [44,44,44,44,44] +45 \N [] +\N \N [] +\N str_47 [] +\N \N ['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48'] +49 \N [] +\N \N [] +\N str_51 [] +\N \N [52,52,52] +53 \N [] +\N \N [] +\N str_55 [] +\N \N [56,56,56,56,56,56,56] +57 \N [] +\N \N [] +\N str_59 [] +\N \N [60] +61 \N [] +\N \N [] +\N str_63 [] +\N \N [64,64,64,64,64] +65 \N [] +\N \N [] +\N str_67 [] +\N \N [68,68,68,68,68,68,68,68,68] +69 \N [] +\N \N [] +\N str_71 [] +\N \N [NULL,NULL,NULL] +73 \N [] +\N \N [] +\N str_75 [] +\N \N [76,76,76,76,76,76,76] +77 \N [] +\N \N [] +\N str_79 [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +0 0 \N [] 0 [] +1 1 \N [] 0 [] +2 2 \N [] 0 [] +3 3 \N [] 0 [] +4 4 \N [] 0 [] +5 5 \N [] 0 [] +6 6 \N [] 0 [] +7 7 \N [] 0 [] +8 8 \N [] 0 [] +9 9 \N [] 0 [] +[[0]] \N \N [] 0 [] +str_10 \N \N [] 0 [] +[[0,1]] \N \N [] 0 [] +str_11 \N \N [] 0 [] +[[0,1,2]] \N \N [] 0 [] +str_12 \N \N [] 0 [] +[[0,1,2,3]] \N \N [] 0 [] +str_13 \N \N [] 0 [] +[[0,1,2,3,4]] \N \N [] 0 [] +str_14 \N \N [] 0 [] +[[0,1,2,3,4,5]] \N \N [] 0 [] +str_15 \N \N [] 0 [] +[[0,1,2,3,4,5,6]] \N \N [] 0 [] +str_16 \N \N [] 0 [] +[[0,1,2,3,4,5,6,7]] \N \N [] 0 [] +str_17 \N \N [] 0 [] +[[0,1,2,3,4,5,6,7,8]] \N \N [] 0 [] +str_18 \N \N [] 0 [] +[[0,1,2,3,4,5,6,7,8,9]] \N \N [] 0 [] +str_19 \N \N [] 0 [] +[20] \N \N [20] 1 [20] +['str_21','str_21'] \N \N ['str_21','str_21'] 2 [NULL,NULL] +[22,22,22] \N \N [22,22,22] 3 [22,22,22] +[23,23,23,23] \N \N [23,23,23,23] 4 [23,23,23,23] +[24,24,24,24,24] \N \N [24,24,24,24,24] 5 [24,24,24,24,24] +[25,25,25,25,25,25] \N \N [25,25,25,25,25,25] 6 [25,25,25,25,25,25] +[26,26,26,26,26,26,26] \N \N [26,26,26,26,26,26,26] 7 [26,26,26,26,26,26,26] +[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] \N \N [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] 8 [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +[28,28,28,28,28,28,28,28,28] \N \N [28,28,28,28,28,28,28,28,28] 9 [28,28,28,28,28,28,28,28,28] +[29,29,29,29,29,29,29,29,29,29] \N \N [29,29,29,29,29,29,29,29,29,29] 10 [29,29,29,29,29,29,29,29,29,29] +\N \N \N [] 0 [] +\N \N \N [] 0 [] +\N \N \N [] 0 [] +\N \N \N [] 0 [] +\N \N \N [] 0 [] +\N \N \N [] 0 [] +\N \N \N [] 0 [] +\N \N \N [] 0 [] +\N \N \N [] 0 [] +\N \N \N [] 0 [] +[40] \N \N [40] 1 [40] +41 41 \N [] 0 [] +\N \N \N [] 0 [] +str_43 \N \N [] 0 [] +[44,44,44,44,44] \N \N [44,44,44,44,44] 5 [44,44,44,44,44] +45 45 \N [] 0 [] +\N \N \N [] 0 [] +str_47 \N \N [] 0 [] +['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48'] \N \N ['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48'] 9 [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +49 49 \N [] 0 [] +\N \N \N [] 0 [] +str_51 \N \N [] 0 [] +[52,52,52] \N \N [52,52,52] 3 [52,52,52] +53 53 \N [] 0 [] +\N \N \N [] 0 [] +str_55 \N \N [] 0 [] +[56,56,56,56,56,56,56] \N \N [56,56,56,56,56,56,56] 7 [56,56,56,56,56,56,56] +57 57 \N [] 0 [] +\N \N \N [] 0 [] +str_59 \N \N [] 0 [] +[60] \N \N [60] 1 [60] +61 61 \N [] 0 [] +\N \N \N [] 0 [] +str_63 \N \N [] 0 [] +[64,64,64,64,64] \N \N [64,64,64,64,64] 5 [64,64,64,64,64] +65 65 \N [] 0 [] +\N \N \N [] 0 [] +str_67 \N \N [] 0 [] +[68,68,68,68,68,68,68,68,68] \N \N [68,68,68,68,68,68,68,68,68] 9 [68,68,68,68,68,68,68,68,68] +69 69 \N [] 0 [] +\N \N \N [] 0 [] +str_71 \N \N [] 0 [] +[NULL,NULL,NULL] \N \N [NULL,NULL,NULL] 3 [NULL,NULL,NULL] +73 73 \N [] 0 [] +\N \N \N [] 0 [] +str_75 \N \N [] 0 [] +[76,76,76,76,76,76,76] \N \N [76,76,76,76,76,76,76] 7 [76,76,76,76,76,76,76] +77 77 \N [] 0 [] +\N \N \N [] 0 [] +str_79 \N \N [] 0 [] +0 \N [] 0 [] [] +1 \N [] 0 [] [] +2 \N [] 0 [] [] +3 \N [] 0 [] [] +4 \N [] 0 [] [] +5 \N [] 0 [] [] +6 \N [] 0 [] [] +7 \N [] 0 [] [] +8 \N [] 0 [] [] +9 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [20] 1 [20] [NULL] +\N \N ['str_21','str_21'] 2 [NULL,NULL] ['str_21','str_21'] +\N \N [22,22,22] 3 [22,22,22] [NULL,NULL,NULL] +\N \N [23,23,23,23] 4 [23,23,23,23] [NULL,NULL,NULL,NULL] +\N \N [24,24,24,24,24] 5 [24,24,24,24,24] [NULL,NULL,NULL,NULL,NULL] +\N \N [25,25,25,25,25,25] 6 [25,25,25,25,25,25] [NULL,NULL,NULL,NULL,NULL,NULL] +\N \N [26,26,26,26,26,26,26] 7 [26,26,26,26,26,26,26] [NULL,NULL,NULL,NULL,NULL,NULL,NULL] +\N \N [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] 8 [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +\N \N [28,28,28,28,28,28,28,28,28] 9 [28,28,28,28,28,28,28,28,28] [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +\N \N [29,29,29,29,29,29,29,29,29,29] 10 [29,29,29,29,29,29,29,29,29,29] [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [40] 1 [40] [NULL] +41 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [44,44,44,44,44] 5 [44,44,44,44,44] [NULL,NULL,NULL,NULL,NULL] +45 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N ['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48'] 9 [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] ['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48'] +49 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [52,52,52] 3 [52,52,52] [NULL,NULL,NULL] +53 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [56,56,56,56,56,56,56] 7 [56,56,56,56,56,56,56] [NULL,NULL,NULL,NULL,NULL,NULL,NULL] +57 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [60] 1 [60] [NULL] +61 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [64,64,64,64,64] 5 [64,64,64,64,64] [NULL,NULL,NULL,NULL,NULL] +65 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [68,68,68,68,68,68,68,68,68] 9 [68,68,68,68,68,68,68,68,68] [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +69 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [NULL,NULL,NULL] 3 [NULL,NULL,NULL] [NULL,NULL,NULL] +73 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [76,76,76,76,76,76,76] 7 [76,76,76,76,76,76,76] [NULL,NULL,NULL,NULL,NULL,NULL,NULL] +77 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +0 0 [] [] +1 0 [] [] +2 0 [] [] +3 0 [] [] +4 0 [] [] +5 0 [] [] +6 0 [] [] +7 0 [] [] +8 0 [] [] +9 0 [] [] +[[0]] 0 [] [] +str_10 0 [] [] +[[0,1]] 0 [] [] +str_11 0 [] [] +[[0,1,2]] 0 [] [] +str_12 0 [] [] +[[0,1,2,3]] 0 [] [] +str_13 0 [] [] +[[0,1,2,3,4]] 0 [] [] +str_14 0 [] [] +[[0,1,2,3,4,5]] 0 [] [] +str_15 0 [] [] +[[0,1,2,3,4,5,6]] 0 [] [] +str_16 0 [] [] +[[0,1,2,3,4,5,6,7]] 0 [] [] +str_17 0 [] [] +[[0,1,2,3,4,5,6,7,8]] 0 [] [] +str_18 0 [] [] +[[0,1,2,3,4,5,6,7,8,9]] 0 [] [] +str_19 0 [] [] +[20] 0 [] [20] +['str_21','str_21'] 0 [] [NULL,NULL] +[22,22,22] 0 [] [22,22,22] +[23,23,23,23] 0 [] [23,23,23,23] +[24,24,24,24,24] 0 [] [24,24,24,24,24] +[25,25,25,25,25,25] 0 [] [25,25,25,25,25,25] +[26,26,26,26,26,26,26] 0 [] [26,26,26,26,26,26,26] +[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] 0 [] [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +[28,28,28,28,28,28,28,28,28] 0 [] [28,28,28,28,28,28,28,28,28] +[29,29,29,29,29,29,29,29,29,29] 0 [] [29,29,29,29,29,29,29,29,29,29] +\N 0 [] [] +\N 0 [] [] +\N 0 [] [] +\N 0 [] [] +\N 0 [] [] +\N 0 [] [] +\N 0 [] [] +\N 0 [] [] +\N 0 [] [] +\N 0 [] [] +[40] 0 [] [40] +41 0 [] [] +\N 0 [] [] +str_43 0 [] [] +[44,44,44,44,44] 0 [] [44,44,44,44,44] +45 0 [] [] +\N 0 [] [] +str_47 0 [] [] +['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48'] 0 [] [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +49 0 [] [] +\N 0 [] [] +str_51 0 [] [] +[52,52,52] 0 [] [52,52,52] +53 0 [] [] +\N 0 [] [] +str_55 0 [] [] +[56,56,56,56,56,56,56] 0 [] [56,56,56,56,56,56,56] +57 0 [] [] +\N 0 [] [] +str_59 0 [] [] +[60] 0 [] [60] +61 0 [] [] +\N 0 [] [] +str_63 0 [] [] +[64,64,64,64,64] 0 [] [64,64,64,64,64] +65 0 [] [] +\N 0 [] [] +str_67 0 [] [] +[68,68,68,68,68,68,68,68,68] 0 [] [68,68,68,68,68,68,68,68,68] +69 0 [] [] +\N 0 [] [] +str_71 0 [] [] +[NULL,NULL,NULL] 0 [] [NULL,NULL,NULL] +73 0 [] [] +\N 0 [] [] +str_75 0 [] [] +[76,76,76,76,76,76,76] 0 [] [76,76,76,76,76,76,76] +77 0 [] [] +\N 0 [] [] +str_79 0 [] [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [20] +[] 0 [NULL,NULL] +[] 0 [22,22,22] +[] 0 [23,23,23,23] +[] 0 [24,24,24,24,24] +[] 0 [25,25,25,25,25,25] +[] 0 [26,26,26,26,26,26,26] +[] 0 [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +[] 0 [28,28,28,28,28,28,28,28,28] +[] 0 [29,29,29,29,29,29,29,29,29,29] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [40] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [44,44,44,44,44] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [52,52,52] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [56,56,56,56,56,56,56] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [60] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [64,64,64,64,64] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [68,68,68,68,68,68,68,68,68] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [NULL,NULL,NULL] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [76,76,76,76,76,76,76] +[] 0 [] +[] 0 [] +[] 0 [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[1] [[0]] [[[]]] +[] [] [] +[2] [[0,1]] [[[],[]]] +[] [] [] +[3] [[0,1,2]] [[[],[],[]]] +[] [] [] +[4] [[0,1,2,3]] [[[],[],[],[]]] +[] [] [] +[5] [[0,1,2,3,4]] [[[],[],[],[],[]]] +[] [] [] +[6] [[0,1,2,3,4,5]] [[[],[],[],[],[],[]]] +[] [] [] +[7] [[0,1,2,3,4,5,6]] [[[],[],[],[],[],[],[]]] +[] [] [] +[8] [[0,1,2,3,4,5,6,7]] [[[],[],[],[],[],[],[],[]]] +[] [] [] +[9] [[0,1,2,3,4,5,6,7,8]] [[[],[],[],[],[],[],[],[],[]]] +[] [] [] +[10] [[0,1,2,3,4,5,6,7,8,9]] [[[],[],[],[],[],[],[],[],[],[]]] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +Array(Array(Dynamic)) +Array(Variant(String, UInt64)) +None +String +UInt64 +20 +20 +20 +20 +0 +0 +20 +20 +10 +10 +20 +0 +0 0 \N [] +1 1 \N [] +2 2 \N [] +3 3 \N [] +4 4 \N [] +5 5 \N [] +6 6 \N [] +7 7 \N [] +8 8 \N [] +9 9 \N [] +[[0]] \N \N [] +str_10 \N str_10 [] +[[0,1]] \N \N [] +str_11 \N str_11 [] +[[0,1,2]] \N \N [] +str_12 \N str_12 [] +[[0,1,2,3]] \N \N [] +str_13 \N str_13 [] +[[0,1,2,3,4]] \N \N [] +str_14 \N str_14 [] +[[0,1,2,3,4,5]] \N \N [] +str_15 \N str_15 [] +[[0,1,2,3,4,5,6]] \N \N [] +str_16 \N str_16 [] +[[0,1,2,3,4,5,6,7]] \N \N [] +str_17 \N str_17 [] +[[0,1,2,3,4,5,6,7,8]] \N \N [] +str_18 \N str_18 [] +[[0,1,2,3,4,5,6,7,8,9]] \N \N [] +str_19 \N str_19 [] +[20] \N \N [20] +['str_21','str_21'] \N \N ['str_21','str_21'] +[22,22,22] \N \N [22,22,22] +[23,23,23,23] \N \N [23,23,23,23] +[24,24,24,24,24] \N \N [24,24,24,24,24] +[25,25,25,25,25,25] \N \N [25,25,25,25,25,25] +[26,26,26,26,26,26,26] \N \N [26,26,26,26,26,26,26] +[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] \N \N [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +[28,28,28,28,28,28,28,28,28] \N \N [28,28,28,28,28,28,28,28,28] +[29,29,29,29,29,29,29,29,29,29] \N \N [29,29,29,29,29,29,29,29,29,29] +\N \N \N [] +\N \N \N [] +\N \N \N [] +\N \N \N [] +\N \N \N [] +\N \N \N [] +\N \N \N [] +\N \N \N [] +\N \N \N [] +\N \N \N [] +[40] \N \N [40] +41 41 \N [] +\N \N \N [] +str_43 \N str_43 [] +[44,44,44,44,44] \N \N [44,44,44,44,44] +45 45 \N [] +\N \N \N [] +str_47 \N str_47 [] +['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48'] \N \N ['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48'] +49 49 \N [] +\N \N \N [] +str_51 \N str_51 [] +[52,52,52] \N \N [52,52,52] +53 53 \N [] +\N \N \N [] +str_55 \N str_55 [] +[56,56,56,56,56,56,56] \N \N [56,56,56,56,56,56,56] +57 57 \N [] +\N \N \N [] +str_59 \N str_59 [] +[60] \N \N [60] +61 61 \N [] +\N \N \N [] +str_63 \N str_63 [] +[64,64,64,64,64] \N \N [64,64,64,64,64] +65 65 \N [] +\N \N \N [] +str_67 \N str_67 [] +[68,68,68,68,68,68,68,68,68] \N \N [68,68,68,68,68,68,68,68,68] +69 69 \N [] +\N \N \N [] +str_71 \N str_71 [] +[NULL,NULL,NULL] \N \N [NULL,NULL,NULL] +73 73 \N [] +\N \N \N [] +str_75 \N str_75 [] +[76,76,76,76,76,76,76] \N \N [76,76,76,76,76,76,76] +77 77 \N [] +\N \N \N [] +str_79 \N str_79 [] +0 \N [] +1 \N [] +2 \N [] +3 \N [] +4 \N [] +5 \N [] +6 \N [] +7 \N [] +8 \N [] +9 \N [] +\N \N [] +\N str_10 [] +\N \N [] +\N str_11 [] +\N \N [] +\N str_12 [] +\N \N [] +\N str_13 [] +\N \N [] +\N str_14 [] +\N \N [] +\N str_15 [] +\N \N [] +\N str_16 [] +\N \N [] +\N str_17 [] +\N \N [] +\N str_18 [] +\N \N [] +\N str_19 [] +\N \N [20] +\N \N ['str_21','str_21'] +\N \N [22,22,22] +\N \N [23,23,23,23] +\N \N [24,24,24,24,24] +\N \N [25,25,25,25,25,25] +\N \N [26,26,26,26,26,26,26] +\N \N [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +\N \N [28,28,28,28,28,28,28,28,28] +\N \N [29,29,29,29,29,29,29,29,29,29] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [40] +41 \N [] +\N \N [] +\N str_43 [] +\N \N [44,44,44,44,44] +45 \N [] +\N \N [] +\N str_47 [] +\N \N ['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48'] +49 \N [] +\N \N [] +\N str_51 [] +\N \N [52,52,52] +53 \N [] +\N \N [] +\N str_55 [] +\N \N [56,56,56,56,56,56,56] +57 \N [] +\N \N [] +\N str_59 [] +\N \N [60] +61 \N [] +\N \N [] +\N str_63 [] +\N \N [64,64,64,64,64] +65 \N [] +\N \N [] +\N str_67 [] +\N \N [68,68,68,68,68,68,68,68,68] +69 \N [] +\N \N [] +\N str_71 [] +\N \N [NULL,NULL,NULL] +73 \N [] +\N \N [] +\N str_75 [] +\N \N [76,76,76,76,76,76,76] +77 \N [] +\N \N [] +\N str_79 [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +0 0 \N [] 0 [] +1 1 \N [] 0 [] +2 2 \N [] 0 [] +3 3 \N [] 0 [] +4 4 \N [] 0 [] +5 5 \N [] 0 [] +6 6 \N [] 0 [] +7 7 \N [] 0 [] +8 8 \N [] 0 [] +9 9 \N [] 0 [] +[[0]] \N \N [] 0 [] +str_10 \N \N [] 0 [] +[[0,1]] \N \N [] 0 [] +str_11 \N \N [] 0 [] +[[0,1,2]] \N \N [] 0 [] +str_12 \N \N [] 0 [] +[[0,1,2,3]] \N \N [] 0 [] +str_13 \N \N [] 0 [] +[[0,1,2,3,4]] \N \N [] 0 [] +str_14 \N \N [] 0 [] +[[0,1,2,3,4,5]] \N \N [] 0 [] +str_15 \N \N [] 0 [] +[[0,1,2,3,4,5,6]] \N \N [] 0 [] +str_16 \N \N [] 0 [] +[[0,1,2,3,4,5,6,7]] \N \N [] 0 [] +str_17 \N \N [] 0 [] +[[0,1,2,3,4,5,6,7,8]] \N \N [] 0 [] +str_18 \N \N [] 0 [] +[[0,1,2,3,4,5,6,7,8,9]] \N \N [] 0 [] +str_19 \N \N [] 0 [] +[20] \N \N [20] 1 [20] +['str_21','str_21'] \N \N ['str_21','str_21'] 2 [NULL,NULL] +[22,22,22] \N \N [22,22,22] 3 [22,22,22] +[23,23,23,23] \N \N [23,23,23,23] 4 [23,23,23,23] +[24,24,24,24,24] \N \N [24,24,24,24,24] 5 [24,24,24,24,24] +[25,25,25,25,25,25] \N \N [25,25,25,25,25,25] 6 [25,25,25,25,25,25] +[26,26,26,26,26,26,26] \N \N [26,26,26,26,26,26,26] 7 [26,26,26,26,26,26,26] +[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] \N \N [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] 8 [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +[28,28,28,28,28,28,28,28,28] \N \N [28,28,28,28,28,28,28,28,28] 9 [28,28,28,28,28,28,28,28,28] +[29,29,29,29,29,29,29,29,29,29] \N \N [29,29,29,29,29,29,29,29,29,29] 10 [29,29,29,29,29,29,29,29,29,29] +\N \N \N [] 0 [] +\N \N \N [] 0 [] +\N \N \N [] 0 [] +\N \N \N [] 0 [] +\N \N \N [] 0 [] +\N \N \N [] 0 [] +\N \N \N [] 0 [] +\N \N \N [] 0 [] +\N \N \N [] 0 [] +\N \N \N [] 0 [] +[40] \N \N [40] 1 [40] +41 41 \N [] 0 [] +\N \N \N [] 0 [] +str_43 \N \N [] 0 [] +[44,44,44,44,44] \N \N [44,44,44,44,44] 5 [44,44,44,44,44] +45 45 \N [] 0 [] +\N \N \N [] 0 [] +str_47 \N \N [] 0 [] +['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48'] \N \N ['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48'] 9 [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +49 49 \N [] 0 [] +\N \N \N [] 0 [] +str_51 \N \N [] 0 [] +[52,52,52] \N \N [52,52,52] 3 [52,52,52] +53 53 \N [] 0 [] +\N \N \N [] 0 [] +str_55 \N \N [] 0 [] +[56,56,56,56,56,56,56] \N \N [56,56,56,56,56,56,56] 7 [56,56,56,56,56,56,56] +57 57 \N [] 0 [] +\N \N \N [] 0 [] +str_59 \N \N [] 0 [] +[60] \N \N [60] 1 [60] +61 61 \N [] 0 [] +\N \N \N [] 0 [] +str_63 \N \N [] 0 [] +[64,64,64,64,64] \N \N [64,64,64,64,64] 5 [64,64,64,64,64] +65 65 \N [] 0 [] +\N \N \N [] 0 [] +str_67 \N \N [] 0 [] +[68,68,68,68,68,68,68,68,68] \N \N [68,68,68,68,68,68,68,68,68] 9 [68,68,68,68,68,68,68,68,68] +69 69 \N [] 0 [] +\N \N \N [] 0 [] +str_71 \N \N [] 0 [] +[NULL,NULL,NULL] \N \N [NULL,NULL,NULL] 3 [NULL,NULL,NULL] +73 73 \N [] 0 [] +\N \N \N [] 0 [] +str_75 \N \N [] 0 [] +[76,76,76,76,76,76,76] \N \N [76,76,76,76,76,76,76] 7 [76,76,76,76,76,76,76] +77 77 \N [] 0 [] +\N \N \N [] 0 [] +str_79 \N \N [] 0 [] +0 \N [] 0 [] [] +1 \N [] 0 [] [] +2 \N [] 0 [] [] +3 \N [] 0 [] [] +4 \N [] 0 [] [] +5 \N [] 0 [] [] +6 \N [] 0 [] [] +7 \N [] 0 [] [] +8 \N [] 0 [] [] +9 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [20] 1 [20] [NULL] +\N \N ['str_21','str_21'] 2 [NULL,NULL] ['str_21','str_21'] +\N \N [22,22,22] 3 [22,22,22] [NULL,NULL,NULL] +\N \N [23,23,23,23] 4 [23,23,23,23] [NULL,NULL,NULL,NULL] +\N \N [24,24,24,24,24] 5 [24,24,24,24,24] [NULL,NULL,NULL,NULL,NULL] +\N \N [25,25,25,25,25,25] 6 [25,25,25,25,25,25] [NULL,NULL,NULL,NULL,NULL,NULL] +\N \N [26,26,26,26,26,26,26] 7 [26,26,26,26,26,26,26] [NULL,NULL,NULL,NULL,NULL,NULL,NULL] +\N \N [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] 8 [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +\N \N [28,28,28,28,28,28,28,28,28] 9 [28,28,28,28,28,28,28,28,28] [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +\N \N [29,29,29,29,29,29,29,29,29,29] 10 [29,29,29,29,29,29,29,29,29,29] [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [40] 1 [40] [NULL] +41 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [44,44,44,44,44] 5 [44,44,44,44,44] [NULL,NULL,NULL,NULL,NULL] +45 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N ['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48'] 9 [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] ['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48'] +49 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [52,52,52] 3 [52,52,52] [NULL,NULL,NULL] +53 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [56,56,56,56,56,56,56] 7 [56,56,56,56,56,56,56] [NULL,NULL,NULL,NULL,NULL,NULL,NULL] +57 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [60] 1 [60] [NULL] +61 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [64,64,64,64,64] 5 [64,64,64,64,64] [NULL,NULL,NULL,NULL,NULL] +65 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [68,68,68,68,68,68,68,68,68] 9 [68,68,68,68,68,68,68,68,68] [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +69 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [NULL,NULL,NULL] 3 [NULL,NULL,NULL] [NULL,NULL,NULL] +73 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [76,76,76,76,76,76,76] 7 [76,76,76,76,76,76,76] [NULL,NULL,NULL,NULL,NULL,NULL,NULL] +77 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +0 0 [] [] +1 0 [] [] +2 0 [] [] +3 0 [] [] +4 0 [] [] +5 0 [] [] +6 0 [] [] +7 0 [] [] +8 0 [] [] +9 0 [] [] +[[0]] 0 [] [] +str_10 0 [] [] +[[0,1]] 0 [] [] +str_11 0 [] [] +[[0,1,2]] 0 [] [] +str_12 0 [] [] +[[0,1,2,3]] 0 [] [] +str_13 0 [] [] +[[0,1,2,3,4]] 0 [] [] +str_14 0 [] [] +[[0,1,2,3,4,5]] 0 [] [] +str_15 0 [] [] +[[0,1,2,3,4,5,6]] 0 [] [] +str_16 0 [] [] +[[0,1,2,3,4,5,6,7]] 0 [] [] +str_17 0 [] [] +[[0,1,2,3,4,5,6,7,8]] 0 [] [] +str_18 0 [] [] +[[0,1,2,3,4,5,6,7,8,9]] 0 [] [] +str_19 0 [] [] +[20] 0 [] [20] +['str_21','str_21'] 0 [] [NULL,NULL] +[22,22,22] 0 [] [22,22,22] +[23,23,23,23] 0 [] [23,23,23,23] +[24,24,24,24,24] 0 [] [24,24,24,24,24] +[25,25,25,25,25,25] 0 [] [25,25,25,25,25,25] +[26,26,26,26,26,26,26] 0 [] [26,26,26,26,26,26,26] +[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] 0 [] [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +[28,28,28,28,28,28,28,28,28] 0 [] [28,28,28,28,28,28,28,28,28] +[29,29,29,29,29,29,29,29,29,29] 0 [] [29,29,29,29,29,29,29,29,29,29] +\N 0 [] [] +\N 0 [] [] +\N 0 [] [] +\N 0 [] [] +\N 0 [] [] +\N 0 [] [] +\N 0 [] [] +\N 0 [] [] +\N 0 [] [] +\N 0 [] [] +[40] 0 [] [40] +41 0 [] [] +\N 0 [] [] +str_43 0 [] [] +[44,44,44,44,44] 0 [] [44,44,44,44,44] +45 0 [] [] +\N 0 [] [] +str_47 0 [] [] +['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48'] 0 [] [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +49 0 [] [] +\N 0 [] [] +str_51 0 [] [] +[52,52,52] 0 [] [52,52,52] +53 0 [] [] +\N 0 [] [] +str_55 0 [] [] +[56,56,56,56,56,56,56] 0 [] [56,56,56,56,56,56,56] +57 0 [] [] +\N 0 [] [] +str_59 0 [] [] +[60] 0 [] [60] +61 0 [] [] +\N 0 [] [] +str_63 0 [] [] +[64,64,64,64,64] 0 [] [64,64,64,64,64] +65 0 [] [] +\N 0 [] [] +str_67 0 [] [] +[68,68,68,68,68,68,68,68,68] 0 [] [68,68,68,68,68,68,68,68,68] +69 0 [] [] +\N 0 [] [] +str_71 0 [] [] +[NULL,NULL,NULL] 0 [] [NULL,NULL,NULL] +73 0 [] [] +\N 0 [] [] +str_75 0 [] [] +[76,76,76,76,76,76,76] 0 [] [76,76,76,76,76,76,76] +77 0 [] [] +\N 0 [] [] +str_79 0 [] [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [20] +[] 0 [NULL,NULL] +[] 0 [22,22,22] +[] 0 [23,23,23,23] +[] 0 [24,24,24,24,24] +[] 0 [25,25,25,25,25,25] +[] 0 [26,26,26,26,26,26,26] +[] 0 [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +[] 0 [28,28,28,28,28,28,28,28,28] +[] 0 [29,29,29,29,29,29,29,29,29,29] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [40] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [44,44,44,44,44] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [52,52,52] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [56,56,56,56,56,56,56] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [60] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [64,64,64,64,64] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [68,68,68,68,68,68,68,68,68] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [NULL,NULL,NULL] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [76,76,76,76,76,76,76] +[] 0 [] +[] 0 [] +[] 0 [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[1] [[0]] [[[]]] +[] [] [] +[2] [[0,1]] [[[],[]]] +[] [] [] +[3] [[0,1,2]] [[[],[],[]]] +[] [] [] +[4] [[0,1,2,3]] [[[],[],[],[]]] +[] [] [] +[5] [[0,1,2,3,4]] [[[],[],[],[],[]]] +[] [] [] +[6] [[0,1,2,3,4,5]] [[[],[],[],[],[],[]]] +[] [] [] +[7] [[0,1,2,3,4,5,6]] [[[],[],[],[],[],[],[]]] +[] [] [] +[8] [[0,1,2,3,4,5,6,7]] [[[],[],[],[],[],[],[],[]]] +[] [] [] +[9] [[0,1,2,3,4,5,6,7,8]] [[[],[],[],[],[],[],[],[],[]]] +[] [] [] +[10] [[0,1,2,3,4,5,6,7,8,9]] [[[],[],[],[],[],[],[],[],[],[]]] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +Array(Array(Dynamic)) +Array(Variant(String, UInt64)) +None +String +UInt64 +20 +20 +20 +20 +0 +0 +20 +20 +10 +10 +20 +0 +0 0 \N [] +1 1 \N [] +2 2 \N [] +3 3 \N [] +4 4 \N [] +5 5 \N [] +6 6 \N [] +7 7 \N [] +8 8 \N [] +9 9 \N [] +[[0]] \N \N [] +str_10 \N str_10 [] +[[0,1]] \N \N [] +str_11 \N str_11 [] +[[0,1,2]] \N \N [] +str_12 \N str_12 [] +[[0,1,2,3]] \N \N [] +str_13 \N str_13 [] +[[0,1,2,3,4]] \N \N [] +str_14 \N str_14 [] +[[0,1,2,3,4,5]] \N \N [] +str_15 \N str_15 [] +[[0,1,2,3,4,5,6]] \N \N [] +str_16 \N str_16 [] +[[0,1,2,3,4,5,6,7]] \N \N [] +str_17 \N str_17 [] +[[0,1,2,3,4,5,6,7,8]] \N \N [] +str_18 \N str_18 [] +[[0,1,2,3,4,5,6,7,8,9]] \N \N [] +str_19 \N str_19 [] +[20] \N \N [20] +['str_21','str_21'] \N \N ['str_21','str_21'] +[22,22,22] \N \N [22,22,22] +[23,23,23,23] \N \N [23,23,23,23] +[24,24,24,24,24] \N \N [24,24,24,24,24] +[25,25,25,25,25,25] \N \N [25,25,25,25,25,25] +[26,26,26,26,26,26,26] \N \N [26,26,26,26,26,26,26] +[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] \N \N [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +[28,28,28,28,28,28,28,28,28] \N \N [28,28,28,28,28,28,28,28,28] +[29,29,29,29,29,29,29,29,29,29] \N \N [29,29,29,29,29,29,29,29,29,29] +\N \N \N [] +\N \N \N [] +\N \N \N [] +\N \N \N [] +\N \N \N [] +\N \N \N [] +\N \N \N [] +\N \N \N [] +\N \N \N [] +\N \N \N [] +[40] \N \N [40] +41 41 \N [] +\N \N \N [] +str_43 \N str_43 [] +[44,44,44,44,44] \N \N [44,44,44,44,44] +45 45 \N [] +\N \N \N [] +str_47 \N str_47 [] +['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48'] \N \N ['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48'] +49 49 \N [] +\N \N \N [] +str_51 \N str_51 [] +[52,52,52] \N \N [52,52,52] +53 53 \N [] +\N \N \N [] +str_55 \N str_55 [] +[56,56,56,56,56,56,56] \N \N [56,56,56,56,56,56,56] +57 57 \N [] +\N \N \N [] +str_59 \N str_59 [] +[60] \N \N [60] +61 61 \N [] +\N \N \N [] +str_63 \N str_63 [] +[64,64,64,64,64] \N \N [64,64,64,64,64] +65 65 \N [] +\N \N \N [] +str_67 \N str_67 [] +[68,68,68,68,68,68,68,68,68] \N \N [68,68,68,68,68,68,68,68,68] +69 69 \N [] +\N \N \N [] +str_71 \N str_71 [] +[NULL,NULL,NULL] \N \N [NULL,NULL,NULL] +73 73 \N [] +\N \N \N [] +str_75 \N str_75 [] +[76,76,76,76,76,76,76] \N \N [76,76,76,76,76,76,76] +77 77 \N [] +\N \N \N [] +str_79 \N str_79 [] +0 \N [] +1 \N [] +2 \N [] +3 \N [] +4 \N [] +5 \N [] +6 \N [] +7 \N [] +8 \N [] +9 \N [] +\N \N [] +\N str_10 [] +\N \N [] +\N str_11 [] +\N \N [] +\N str_12 [] +\N \N [] +\N str_13 [] +\N \N [] +\N str_14 [] +\N \N [] +\N str_15 [] +\N \N [] +\N str_16 [] +\N \N [] +\N str_17 [] +\N \N [] +\N str_18 [] +\N \N [] +\N str_19 [] +\N \N [20] +\N \N ['str_21','str_21'] +\N \N [22,22,22] +\N \N [23,23,23,23] +\N \N [24,24,24,24,24] +\N \N [25,25,25,25,25,25] +\N \N [26,26,26,26,26,26,26] +\N \N [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +\N \N [28,28,28,28,28,28,28,28,28] +\N \N [29,29,29,29,29,29,29,29,29,29] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [40] +41 \N [] +\N \N [] +\N str_43 [] +\N \N [44,44,44,44,44] +45 \N [] +\N \N [] +\N str_47 [] +\N \N ['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48'] +49 \N [] +\N \N [] +\N str_51 [] +\N \N [52,52,52] +53 \N [] +\N \N [] +\N str_55 [] +\N \N [56,56,56,56,56,56,56] +57 \N [] +\N \N [] +\N str_59 [] +\N \N [60] +61 \N [] +\N \N [] +\N str_63 [] +\N \N [64,64,64,64,64] +65 \N [] +\N \N [] +\N str_67 [] +\N \N [68,68,68,68,68,68,68,68,68] +69 \N [] +\N \N [] +\N str_71 [] +\N \N [NULL,NULL,NULL] +73 \N [] +\N \N [] +\N str_75 [] +\N \N [76,76,76,76,76,76,76] +77 \N [] +\N \N [] +\N str_79 [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +\N \N [] +0 0 \N [] 0 [] +1 1 \N [] 0 [] +2 2 \N [] 0 [] +3 3 \N [] 0 [] +4 4 \N [] 0 [] +5 5 \N [] 0 [] +6 6 \N [] 0 [] +7 7 \N [] 0 [] +8 8 \N [] 0 [] +9 9 \N [] 0 [] +[[0]] \N \N [] 0 [] +str_10 \N \N [] 0 [] +[[0,1]] \N \N [] 0 [] +str_11 \N \N [] 0 [] +[[0,1,2]] \N \N [] 0 [] +str_12 \N \N [] 0 [] +[[0,1,2,3]] \N \N [] 0 [] +str_13 \N \N [] 0 [] +[[0,1,2,3,4]] \N \N [] 0 [] +str_14 \N \N [] 0 [] +[[0,1,2,3,4,5]] \N \N [] 0 [] +str_15 \N \N [] 0 [] +[[0,1,2,3,4,5,6]] \N \N [] 0 [] +str_16 \N \N [] 0 [] +[[0,1,2,3,4,5,6,7]] \N \N [] 0 [] +str_17 \N \N [] 0 [] +[[0,1,2,3,4,5,6,7,8]] \N \N [] 0 [] +str_18 \N \N [] 0 [] +[[0,1,2,3,4,5,6,7,8,9]] \N \N [] 0 [] +str_19 \N \N [] 0 [] +[20] \N \N [20] 1 [20] +['str_21','str_21'] \N \N ['str_21','str_21'] 2 [NULL,NULL] +[22,22,22] \N \N [22,22,22] 3 [22,22,22] +[23,23,23,23] \N \N [23,23,23,23] 4 [23,23,23,23] +[24,24,24,24,24] \N \N [24,24,24,24,24] 5 [24,24,24,24,24] +[25,25,25,25,25,25] \N \N [25,25,25,25,25,25] 6 [25,25,25,25,25,25] +[26,26,26,26,26,26,26] \N \N [26,26,26,26,26,26,26] 7 [26,26,26,26,26,26,26] +[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] \N \N [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] 8 [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +[28,28,28,28,28,28,28,28,28] \N \N [28,28,28,28,28,28,28,28,28] 9 [28,28,28,28,28,28,28,28,28] +[29,29,29,29,29,29,29,29,29,29] \N \N [29,29,29,29,29,29,29,29,29,29] 10 [29,29,29,29,29,29,29,29,29,29] +\N \N \N [] 0 [] +\N \N \N [] 0 [] +\N \N \N [] 0 [] +\N \N \N [] 0 [] +\N \N \N [] 0 [] +\N \N \N [] 0 [] +\N \N \N [] 0 [] +\N \N \N [] 0 [] +\N \N \N [] 0 [] +\N \N \N [] 0 [] +[40] \N \N [40] 1 [40] +41 41 \N [] 0 [] +\N \N \N [] 0 [] +str_43 \N \N [] 0 [] +[44,44,44,44,44] \N \N [44,44,44,44,44] 5 [44,44,44,44,44] +45 45 \N [] 0 [] +\N \N \N [] 0 [] +str_47 \N \N [] 0 [] +['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48'] \N \N ['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48'] 9 [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +49 49 \N [] 0 [] +\N \N \N [] 0 [] +str_51 \N \N [] 0 [] +[52,52,52] \N \N [52,52,52] 3 [52,52,52] +53 53 \N [] 0 [] +\N \N \N [] 0 [] +str_55 \N \N [] 0 [] +[56,56,56,56,56,56,56] \N \N [56,56,56,56,56,56,56] 7 [56,56,56,56,56,56,56] +57 57 \N [] 0 [] +\N \N \N [] 0 [] +str_59 \N \N [] 0 [] +[60] \N \N [60] 1 [60] +61 61 \N [] 0 [] +\N \N \N [] 0 [] +str_63 \N \N [] 0 [] +[64,64,64,64,64] \N \N [64,64,64,64,64] 5 [64,64,64,64,64] +65 65 \N [] 0 [] +\N \N \N [] 0 [] +str_67 \N \N [] 0 [] +[68,68,68,68,68,68,68,68,68] \N \N [68,68,68,68,68,68,68,68,68] 9 [68,68,68,68,68,68,68,68,68] +69 69 \N [] 0 [] +\N \N \N [] 0 [] +str_71 \N \N [] 0 [] +[NULL,NULL,NULL] \N \N [NULL,NULL,NULL] 3 [NULL,NULL,NULL] +73 73 \N [] 0 [] +\N \N \N [] 0 [] +str_75 \N \N [] 0 [] +[76,76,76,76,76,76,76] \N \N [76,76,76,76,76,76,76] 7 [76,76,76,76,76,76,76] +77 77 \N [] 0 [] +\N \N \N [] 0 [] +str_79 \N \N [] 0 [] +0 \N [] 0 [] [] +1 \N [] 0 [] [] +2 \N [] 0 [] [] +3 \N [] 0 [] [] +4 \N [] 0 [] [] +5 \N [] 0 [] [] +6 \N [] 0 [] [] +7 \N [] 0 [] [] +8 \N [] 0 [] [] +9 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [20] 1 [20] [NULL] +\N \N ['str_21','str_21'] 2 [NULL,NULL] ['str_21','str_21'] +\N \N [22,22,22] 3 [22,22,22] [NULL,NULL,NULL] +\N \N [23,23,23,23] 4 [23,23,23,23] [NULL,NULL,NULL,NULL] +\N \N [24,24,24,24,24] 5 [24,24,24,24,24] [NULL,NULL,NULL,NULL,NULL] +\N \N [25,25,25,25,25,25] 6 [25,25,25,25,25,25] [NULL,NULL,NULL,NULL,NULL,NULL] +\N \N [26,26,26,26,26,26,26] 7 [26,26,26,26,26,26,26] [NULL,NULL,NULL,NULL,NULL,NULL,NULL] +\N \N [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] 8 [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +\N \N [28,28,28,28,28,28,28,28,28] 9 [28,28,28,28,28,28,28,28,28] [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +\N \N [29,29,29,29,29,29,29,29,29,29] 10 [29,29,29,29,29,29,29,29,29,29] [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [40] 1 [40] [NULL] +41 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [44,44,44,44,44] 5 [44,44,44,44,44] [NULL,NULL,NULL,NULL,NULL] +45 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N ['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48'] 9 [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] ['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48'] +49 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [52,52,52] 3 [52,52,52] [NULL,NULL,NULL] +53 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [56,56,56,56,56,56,56] 7 [56,56,56,56,56,56,56] [NULL,NULL,NULL,NULL,NULL,NULL,NULL] +57 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [60] 1 [60] [NULL] +61 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [64,64,64,64,64] 5 [64,64,64,64,64] [NULL,NULL,NULL,NULL,NULL] +65 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [68,68,68,68,68,68,68,68,68] 9 [68,68,68,68,68,68,68,68,68] [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +69 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [NULL,NULL,NULL] 3 [NULL,NULL,NULL] [NULL,NULL,NULL] +73 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [76,76,76,76,76,76,76] 7 [76,76,76,76,76,76,76] [NULL,NULL,NULL,NULL,NULL,NULL,NULL] +77 \N [] 0 [] [] +\N \N [] 0 [] [] +\N \N [] 0 [] [] +0 0 [] [] +1 0 [] [] +2 0 [] [] +3 0 [] [] +4 0 [] [] +5 0 [] [] +6 0 [] [] +7 0 [] [] +8 0 [] [] +9 0 [] [] +[[0]] 0 [] [] +str_10 0 [] [] +[[0,1]] 0 [] [] +str_11 0 [] [] +[[0,1,2]] 0 [] [] +str_12 0 [] [] +[[0,1,2,3]] 0 [] [] +str_13 0 [] [] +[[0,1,2,3,4]] 0 [] [] +str_14 0 [] [] +[[0,1,2,3,4,5]] 0 [] [] +str_15 0 [] [] +[[0,1,2,3,4,5,6]] 0 [] [] +str_16 0 [] [] +[[0,1,2,3,4,5,6,7]] 0 [] [] +str_17 0 [] [] +[[0,1,2,3,4,5,6,7,8]] 0 [] [] +str_18 0 [] [] +[[0,1,2,3,4,5,6,7,8,9]] 0 [] [] +str_19 0 [] [] +[20] 0 [] [20] +['str_21','str_21'] 0 [] [NULL,NULL] +[22,22,22] 0 [] [22,22,22] +[23,23,23,23] 0 [] [23,23,23,23] +[24,24,24,24,24] 0 [] [24,24,24,24,24] +[25,25,25,25,25,25] 0 [] [25,25,25,25,25,25] +[26,26,26,26,26,26,26] 0 [] [26,26,26,26,26,26,26] +[NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] 0 [] [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +[28,28,28,28,28,28,28,28,28] 0 [] [28,28,28,28,28,28,28,28,28] +[29,29,29,29,29,29,29,29,29,29] 0 [] [29,29,29,29,29,29,29,29,29,29] +\N 0 [] [] +\N 0 [] [] +\N 0 [] [] +\N 0 [] [] +\N 0 [] [] +\N 0 [] [] +\N 0 [] [] +\N 0 [] [] +\N 0 [] [] +\N 0 [] [] +[40] 0 [] [40] +41 0 [] [] +\N 0 [] [] +str_43 0 [] [] +[44,44,44,44,44] 0 [] [44,44,44,44,44] +45 0 [] [] +\N 0 [] [] +str_47 0 [] [] +['str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48','str_48'] 0 [] [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +49 0 [] [] +\N 0 [] [] +str_51 0 [] [] +[52,52,52] 0 [] [52,52,52] +53 0 [] [] +\N 0 [] [] +str_55 0 [] [] +[56,56,56,56,56,56,56] 0 [] [56,56,56,56,56,56,56] +57 0 [] [] +\N 0 [] [] +str_59 0 [] [] +[60] 0 [] [60] +61 0 [] [] +\N 0 [] [] +str_63 0 [] [] +[64,64,64,64,64] 0 [] [64,64,64,64,64] +65 0 [] [] +\N 0 [] [] +str_67 0 [] [] +[68,68,68,68,68,68,68,68,68] 0 [] [68,68,68,68,68,68,68,68,68] +69 0 [] [] +\N 0 [] [] +str_71 0 [] [] +[NULL,NULL,NULL] 0 [] [NULL,NULL,NULL] +73 0 [] [] +\N 0 [] [] +str_75 0 [] [] +[76,76,76,76,76,76,76] 0 [] [76,76,76,76,76,76,76] +77 0 [] [] +\N 0 [] [] +str_79 0 [] [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [20] +[] 0 [NULL,NULL] +[] 0 [22,22,22] +[] 0 [23,23,23,23] +[] 0 [24,24,24,24,24] +[] 0 [25,25,25,25,25,25] +[] 0 [26,26,26,26,26,26,26] +[] 0 [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +[] 0 [28,28,28,28,28,28,28,28,28] +[] 0 [29,29,29,29,29,29,29,29,29,29] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [40] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [44,44,44,44,44] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [52,52,52] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [56,56,56,56,56,56,56] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [60] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [64,64,64,64,64] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [68,68,68,68,68,68,68,68,68] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [NULL,NULL,NULL] +[] 0 [] +[] 0 [] +[] 0 [] +[] 0 [76,76,76,76,76,76,76] +[] 0 [] +[] 0 [] +[] 0 [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[1] [[0]] [[[]]] +[] [] [] +[2] [[0,1]] [[[],[]]] +[] [] [] +[3] [[0,1,2]] [[[],[],[]]] +[] [] [] +[4] [[0,1,2,3]] [[[],[],[],[]]] +[] [] [] +[5] [[0,1,2,3,4]] [[[],[],[],[],[]]] +[] [] [] +[6] [[0,1,2,3,4,5]] [[[],[],[],[],[],[]]] +[] [] [] +[7] [[0,1,2,3,4,5,6]] [[[],[],[],[],[],[],[]]] +[] [] [] +[8] [[0,1,2,3,4,5,6,7]] [[[],[],[],[],[],[],[],[]]] +[] [] [] +[9] [[0,1,2,3,4,5,6,7,8]] [[[],[],[],[],[],[],[],[],[]]] +[] [] [] +[10] [[0,1,2,3,4,5,6,7,8,9]] [[[],[],[],[],[],[],[],[],[],[]]] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] +[] [] [] diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_small.sql.j2 b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_small.sql.j2 new file mode 100644 index 00000000000..3253d7a6c68 --- /dev/null +++ b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_small.sql.j2 @@ -0,0 +1,43 @@ +set allow_experimental_variant_type = 1; +set use_variant_as_common_type = 1; +set allow_experimental_dynamic_type = 1; + +drop table if exists test; + +{% for engine in ['Memory', 'MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000', 'MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1'] -%} + +create table test (id UInt64, d Dynamic) engine={{ engine }}; + +insert into test select number, number from numbers(10); +insert into test select number, 'str_' || toString(number) from numbers(10, 10); +insert into test select number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1)) from numbers(20, 10); +insert into test select number, NULL from numbers(30, 10); +insert into test select number, multiIf(number % 4 == 3, 'str_' || toString(number), number % 4 == 2, NULL, number % 4 == 1, number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1))) from numbers(40, 40); +insert into test select number, [range((number % 10 + 1)::UInt64)]::Array(Array(Dynamic)) from numbers(10, 10); + +select distinct dynamicType(d) as type from test order by type; +select count() from test where dynamicType(d) == 'UInt64'; +select count() from test where d.UInt64 is not NULL; +select count() from test where dynamicType(d) == 'String'; +select count() from test where d.String is not NULL; +select count() from test where dynamicType(d) == 'Date'; +select count() from test where d.Date is not NULL; +select count() from test where dynamicType(d) == 'Array(Variant(String, UInt64))'; +select count() from test where not empty(d.`Array(Variant(String, UInt64))`); +select count() from test where dynamicType(d) == 'Array(Array(Dynamic))'; +select count() from test where not empty(d.`Array(Array(Dynamic))`); +select count() from test where d is NULL; +select count() from test where not empty(d.`Tuple(a Array(Dynamic))`.a.String); + +select d, d.UInt64, d.String, d.`Array(Variant(String, UInt64))` from test order by id, d; +select d.UInt64, d.String, d.`Array(Variant(String, UInt64))` from test order by id, d; +select d.Int8, d.Date, d.`Array(String)` from test order by id, d; +select d, d.UInt64, d.Date, d.`Array(Variant(String, UInt64))`, d.`Array(Variant(String, UInt64))`.size0, d.`Array(Variant(String, UInt64))`.UInt64 from test order by id, d; +select d.UInt64, d.Date, d.`Array(Variant(String, UInt64))`, d.`Array(Variant(String, UInt64))`.size0, d.`Array(Variant(String, UInt64))`.UInt64, d.`Array(Variant(String, UInt64))`.String from test order by id, d; +select d, d.`Tuple(a UInt64, b String)`.a, d.`Array(Dynamic)`.`Variant(String, UInt64)`.UInt64, d.`Array(Variant(String, UInt64))`.UInt64 from test order by id, d; +select d.`Array(Dynamic)`.`Variant(String, UInt64)`.UInt64, d.`Array(Dynamic)`.size0, d.`Array(Variant(String, UInt64))`.UInt64 from test order by id, d; +select d.`Array(Array(Dynamic))`.size1, d.`Array(Array(Dynamic))`.UInt64, d.`Array(Array(Dynamic))`.`Map(String, Tuple(a UInt64))`.values.a from test order by id, d; + +drop table test; + +{% endfor -%} diff --git a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_wide_merge_tree.sql b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_wide_merge_tree.sql index 00aba3a57b6..5aac5f7b72f 100644 --- a/tests/queries/0_stateless/03036_dynamic_read_subcolumns_wide_merge_tree.sql +++ b/tests/queries/0_stateless/03036_dynamic_read_subcolumns_wide_merge_tree.sql @@ -1,4 +1,4 @@ --- Tags: long +-- Tags: long, no-tsan, no-msan, no-ubsan, no-asan set allow_experimental_variant_type = 1; set use_variant_as_common_type = 1; diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.sql index b66fe5e2187..879ce8e9e95 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.sql +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_merge_tree.sql @@ -1,4 +1,4 @@ --- Tags: long +-- Tags: long, no-tsan, no-msan, no-ubsan, no-asan set allow_experimental_dynamic_type=1; drop table if exists test; @@ -30,4 +30,4 @@ system start merges test; optimize table test final; select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); -drop table test; \ No newline at end of file +drop table test; diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_wide_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_wide_tree.sql index 8a376b6d7d7..e46c7e93a5a 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_wide_tree.sql +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_horizontal_compact_wide_tree.sql @@ -1,4 +1,4 @@ --- Tags: long +-- Tags: long, no-tsan, no-msan, no-ubsan, no-asan set allow_experimental_dynamic_type=1; drop table if exists test; @@ -30,4 +30,4 @@ system start merges test; optimize table test final; select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); -drop table test; \ No newline at end of file +drop table test; diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_compact_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_compact_merge_tree.sql index 127b56e727c..d596f3a1fad 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_compact_merge_tree.sql +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_compact_merge_tree.sql @@ -1,4 +1,4 @@ --- Tags: long +-- Tags: long, no-tsan, no-msan, no-ubsan, no-asan set allow_experimental_dynamic_type=1; drop table if exists test; @@ -30,4 +30,4 @@ system start merges test; optimize table test final; select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); -drop table test; \ No newline at end of file +drop table test; diff --git a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_wide_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_wide_merge_tree.sql index e5c273cb592..aa5abb05be7 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_wide_merge_tree.sql +++ b/tests/queries/0_stateless/03037_dynamic_merges_1_vertical_wide_merge_tree.sql @@ -1,4 +1,4 @@ --- Tags: long +-- Tags: long, no-tsan, no-msan, no-ubsan, no-asan set allow_experimental_dynamic_type=1; drop table if exists test; @@ -30,4 +30,4 @@ system start merges test; optimize table test final; select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); -drop table test; \ No newline at end of file +drop table test; diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_compact_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_compact_merge_tree.sql index 6d7a0dd8c18..28e6c5823a3 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_compact_merge_tree.sql +++ b/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_compact_merge_tree.sql @@ -1,4 +1,4 @@ --- Tags: long +-- Tags: long, no-tsan, no-msan, no-ubsan, no-asan set allow_experimental_dynamic_type = 1; diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_wide_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_wide_merge_tree.sql index 011d54d2360..6046dada689 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_wide_merge_tree.sql +++ b/tests/queries/0_stateless/03037_dynamic_merges_2_horizontal_wide_merge_tree.sql @@ -1,4 +1,4 @@ --- Tags: long +-- Tags: long, no-tsan, no-msan, no-ubsan, no-asan set allow_experimental_dynamic_type = 1; diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_compact_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_compact_merge_tree.sql index 1a74f9e5417..79ae2eb5aeb 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_compact_merge_tree.sql +++ b/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_compact_merge_tree.sql @@ -1,4 +1,4 @@ --- Tags: long +-- Tags: long, no-tsan, no-msan, no-ubsan, no-asan set allow_experimental_dynamic_type = 1; diff --git a/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_wide_merge_tree.sql b/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_wide_merge_tree.sql index cbc834e9660..4ab957d2fb9 100644 --- a/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_wide_merge_tree.sql +++ b/tests/queries/0_stateless/03037_dynamic_merges_2_vertical_wide_merge_tree.sql @@ -1,4 +1,4 @@ --- Tags: long +-- Tags: long, no-tsan, no-msan, no-ubsan, no-asan set allow_experimental_dynamic_type = 1; diff --git a/tests/queries/0_stateless/03037_dynamic_merges_small.reference.j2 b/tests/queries/0_stateless/03037_dynamic_merges_small.reference.j2 new file mode 100644 index 00000000000..96a854630ed --- /dev/null +++ b/tests/queries/0_stateless/03037_dynamic_merges_small.reference.j2 @@ -0,0 +1,112 @@ +5 DateTime +6 Date +7 Array(UInt16) +8 String +10 None +10 UInt64 +7 Array(UInt16) +10 None +10 UInt64 +19 String +7 Array(UInt16) +10 None +10 UInt64 +19 String +20 Map(UInt64, UInt64) +10 None +10 UInt64 +20 Map(UInt64, UInt64) +26 String +1 Tuple(UInt64, UInt64) +10 None +10 UInt64 +20 Map(UInt64, UInt64) +26 String +10 None +10 UInt64 +20 Map(UInt64, UInt64) +27 String +5 DateTime +6 Date +7 Array(UInt16) +8 String +10 None +10 UInt64 +7 Array(UInt16) +10 None +10 UInt64 +19 String +7 Array(UInt16) +10 None +10 UInt64 +19 String +20 Map(UInt64, UInt64) +10 None +10 UInt64 +20 Map(UInt64, UInt64) +26 String +1 Tuple(UInt64, UInt64) +10 None +10 UInt64 +20 Map(UInt64, UInt64) +26 String +10 None +10 UInt64 +20 Map(UInt64, UInt64) +27 String +5 DateTime +6 Date +7 Array(UInt16) +8 String +10 None +10 UInt64 +7 Array(UInt16) +10 None +10 UInt64 +19 String +7 Array(UInt16) +10 None +10 UInt64 +19 String +20 Map(UInt64, UInt64) +10 None +10 UInt64 +20 Map(UInt64, UInt64) +26 String +1 Tuple(UInt64, UInt64) +10 None +10 UInt64 +20 Map(UInt64, UInt64) +26 String +10 None +10 UInt64 +20 Map(UInt64, UInt64) +27 String +5 DateTime +6 Date +7 Array(UInt16) +8 String +10 None +10 UInt64 +7 Array(UInt16) +10 None +10 UInt64 +19 String +7 Array(UInt16) +10 None +10 UInt64 +19 String +20 Map(UInt64, UInt64) +10 None +10 UInt64 +20 Map(UInt64, UInt64) +26 String +1 Tuple(UInt64, UInt64) +10 None +10 UInt64 +20 Map(UInt64, UInt64) +26 String +10 None +10 UInt64 +20 Map(UInt64, UInt64) +27 String diff --git a/tests/queries/0_stateless/03037_dynamic_merges_small.sql.j2 b/tests/queries/0_stateless/03037_dynamic_merges_small.sql.j2 new file mode 100644 index 00000000000..263e92be403 --- /dev/null +++ b/tests/queries/0_stateless/03037_dynamic_merges_small.sql.j2 @@ -0,0 +1,42 @@ +set allow_experimental_variant_type = 1; +set use_variant_as_common_type = 1; +set allow_experimental_dynamic_type = 1; + +drop table if exists test; + +{% for engine in ['MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000', + 'MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1', + 'MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1', + 'MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1, vertical_merge_algorithm_min_rows_to_activate=1, vertical_merge_algorithm_min_columns_to_activate=1'] -%} + +create table test (id UInt64, d Dynamic(max_types=3)) engine={{ engine }}; + +system stop merges test; +insert into test select number, number from numbers(10); +insert into test select number, 'str_' || toString(number) from numbers(8); +insert into test select number, range(number % 10 + 1) from numbers(7); +insert into test select number, toDate(number) from numbers(6); +insert into test select number, toDateTime(number) from numbers(5); +insert into test select number, NULL from numbers(10); + +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +system start merges test; optimize table test final;; +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); + +system stop merges test; +insert into test select number, map(number, number) from numbers(20); +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +system start merges test; +optimize table test final; +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); + +system stop merges test; +insert into test select number, tuple(number, number) from numbers(1); +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); +system start merges test; +optimize table test final; +select count(), dynamicType(d) from test group by dynamicType(d) order by count(), dynamicType(d); + +drop table test; + +{% endfor -%} diff --git a/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.reference b/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.reference.j2 similarity index 60% rename from tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.reference rename to tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.reference.j2 index 8740726c7ef..6684d3736e4 100644 --- a/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.reference +++ b/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.reference.j2 @@ -1,4 +1,4 @@ -Memory +--- Memory --- test Array(Array(Dynamic)) Array(Variant(String, UInt64)) @@ -17,7 +17,7 @@ UInt64 10 20 0 -MergeTree compact +--- MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000 --- test Array(Array(Dynamic)) Array(Variant(String, UInt64)) @@ -36,7 +36,7 @@ UInt64 10 20 0 -MergeTree wide +--- MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1 --- test Array(Array(Dynamic)) Array(Variant(String, UInt64)) diff --git a/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sh b/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sh deleted file mode 100755 index 968c9e5271f..00000000000 --- a/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sh +++ /dev/null @@ -1,62 +0,0 @@ -#!/usr/bin/env bash -# Tags: long - -CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# reset --log_comment -CLICKHOUSE_LOG_COMMENT= -# shellcheck source=../shell_config.sh -. "$CUR_DIR"/../shell_config.sh - -CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --use_variant_as_common_type=1 --allow_experimental_dynamic_type=1 --optimize_functions_to_subcolumns=0" - - -function test() -{ - echo "test" - $CH_CLIENT -q "insert into test select number, number from numbers(10) settings min_insert_block_size_rows=50000" - $CH_CLIENT -q "insert into test select number, 'str_' || toString(number) from numbers(10, 10) settings min_insert_block_size_rows=50000" - $CH_CLIENT -q "insert into test select number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1)) from numbers(20, 10) settings min_insert_block_size_rows=50000" - $CH_CLIENT -q "insert into test select number, NULL from numbers(30, 10) settings min_insert_block_size_rows=50000" - $CH_CLIENT -q "insert into test select number, multiIf(number % 4 == 3, 'str_' || toString(number), number % 4 == 2, NULL, number % 4 == 1, number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1))) from numbers(40, 40) settings min_insert_block_size_rows=50000" - $CH_CLIENT -q "insert into test select number, [range((number % 10 + 1)::UInt64)]::Array(Array(Dynamic)) from numbers(10, 10) settings min_insert_block_size_rows=50000" - - $CH_CLIENT -q "select distinct dynamicType(d) as type from test order by type" - $CH_CLIENT -q "select count() from test where dynamicType(d) == 'UInt64'" - $CH_CLIENT -q "select count() from test where d.UInt64 is not NULL" - $CH_CLIENT -q "select count() from test where dynamicType(d) == 'String'" - $CH_CLIENT -q "select count() from test where d.String is not NULL" - $CH_CLIENT -q "select count() from test where dynamicType(d) == 'Date'" - $CH_CLIENT -q "select count() from test where d.Date is not NULL" - $CH_CLIENT -q "select count() from test where dynamicType(d) == 'Array(Variant(String, UInt64))'" - $CH_CLIENT -q "select count() from test where not empty(d.\`Array(Variant(String, UInt64))\`)" - $CH_CLIENT -q "select count() from test where dynamicType(d) == 'Array(Array(Dynamic))'" - $CH_CLIENT -q "select count() from test where not empty(d.\`Array(Array(Dynamic))\`)" - $CH_CLIENT -q "select count() from test where d is NULL" - $CH_CLIENT -q "select count() from test where not empty(d.\`Tuple(a Array(Dynamic))\`.a.String)" - - $CH_CLIENT -q "select d, d.UInt64.null, d.String.null, d.\`Array(Variant(String, UInt64))\`.null from test format Null" - $CH_CLIENT -q "select d.UInt64.null, d.String.null, d.\`Array(Variant(String, UInt64))\`.null from test format Null" - $CH_CLIENT -q "select d.Int8.null, d.Date.null, d.\`Array(String)\`.null from test format Null" - $CH_CLIENT -q "select d, d.UInt64.null, d.Date.null, d.\`Array(Variant(String, UInt64))\`.null, d.\`Array(Variant(String, UInt64))\`.size0, d.\`Array(Variant(String, UInt64))\`.UInt64.null from test format Null" - $CH_CLIENT -q "select d.UInt64.null, d.Date.null, d.\`Array(Variant(String, UInt64))\`.null, d.\`Array(Variant(String, UInt64))\`.size0, d.\`Array(Variant(String, UInt64))\`.UInt64.null, d.\`Array(Variant(String, UInt64))\`.String.null from test format Null" - $CH_CLIENT -q "select d, d.\`Tuple(a UInt64, b String)\`.a, d.\`Array(Dynamic)\`.\`Variant(String, UInt64)\`.UInt64.null, d.\`Array(Variant(String, UInt64))\`.UInt64.null from test format Null" - $CH_CLIENT -q "select d.\`Array(Dynamic)\`.\`Variant(String, UInt64)\`.UInt64.null, d.\`Array(Dynamic)\`.size0, d.\`Array(Variant(String, UInt64))\`.UInt64.null from test format Null" - $CH_CLIENT -q "select d.\`Array(Array(Dynamic))\`.size1, d.\`Array(Array(Dynamic))\`.UInt64.null, d.\`Array(Array(Dynamic))\`.\`Map(String, Tuple(a UInt64))\`.values.a from test format Null" -} - -$CH_CLIENT -q "drop table if exists test;" - -echo "Memory" -$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=Memory" -test -$CH_CLIENT -q "drop table test;" - -echo "MergeTree compact" -$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000;" -test -$CH_CLIENT -q "drop table test;" - -echo "MergeTree wide" -$CH_CLIENT -q "create table test (id UInt64, d Dynamic) engine=MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;" -test -$CH_CLIENT -q "drop table test;" diff --git a/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sql.j2 b/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sql.j2 new file mode 100644 index 00000000000..21bf738dccb --- /dev/null +++ b/tests/queries/0_stateless/03202_dynamic_null_map_subcolumn.sql.j2 @@ -0,0 +1,49 @@ +# Tags: long + +set allow_experimental_variant_type = 1; +set use_variant_as_common_type = 1; +set allow_experimental_dynamic_type = 1; +set optimize_functions_to_subcolumns = 0; + +drop table if exists test; + +{% for engine in ['Memory', 'MergeTree order by id settings min_rows_for_wide_part=1000000000, min_bytes_for_wide_part=10000000000', 'MergeTree order by id settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1'] -%} + +SELECT '--- {{ engine }} ---'; + +create table test (id UInt64, d Dynamic) engine={{ engine }}; + +select 'test'; +insert into test select number, number from numbers(10) settings min_insert_block_size_rows=50000; +insert into test select number, 'str_' || toString(number) from numbers(10, 10) settings min_insert_block_size_rows=50000; +insert into test select number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1)) from numbers(20, 10) settings min_insert_block_size_rows=50000; +insert into test select number, NULL from numbers(30, 10) settings min_insert_block_size_rows=50000; +insert into test select number, multiIf(number % 4 == 3, 'str_' || toString(number), number % 4 == 2, NULL, number % 4 == 1, number, arrayMap(x -> multiIf(number % 9 == 0, NULL, number % 9 == 3, 'str_' || toString(number), number), range(number % 10 + 1))) from numbers(40, 40) settings min_insert_block_size_rows=50000; +insert into test select number, [range((number % 10 + 1)::UInt64)]::Array(Array(Dynamic)) from numbers(10, 10) settings min_insert_block_size_rows=50000; + +select distinct dynamicType(d) as type from test order by type; +select count() from test where dynamicType(d) == 'UInt64'; +select count() from test where d.UInt64 is not NULL; +select count() from test where dynamicType(d) == 'String'; +select count() from test where d.String is not NULL; +select count() from test where dynamicType(d) == 'Date'; +select count() from test where d.Date is not NULL; +select count() from test where dynamicType(d) == 'Array(Variant(String, UInt64))'; +select count() from test where not empty(d.`Array(Variant(String, UInt64))`); +select count() from test where dynamicType(d) == 'Array(Array(Dynamic))'; +select count() from test where not empty(d.`Array(Array(Dynamic))`); +select count() from test where d is NULL; +select count() from test where not empty(d.`Tuple(a Array(Dynamic))`.a.String); + +select d, d.UInt64.null, d.String.null, d.`Array(Variant(String, UInt64))`.null from test format Null; +select d.UInt64.null, d.String.null, d.`Array(Variant(String, UInt64))`.null from test format Null; +select d.Int8.null, d.Date.null, d.`Array(String)`.null from test format Null; +select d, d.UInt64.null, d.Date.null, d.`Array(Variant(String, UInt64))`.null, d.`Array(Variant(String, UInt64))`.size0, d.`Array(Variant(String, UInt64))`.UInt64.null from test format Null; +select d.UInt64.null, d.Date.null, d.`Array(Variant(String, UInt64))`.null, d.`Array(Variant(String, UInt64))`.size0, d.`Array(Variant(String, UInt64))`.UInt64.null, d.`Array(Variant(String, UInt64))`.String.null from test format Null; +select d, d.`Tuple(a UInt64, b String)`.a, d.`Array(Dynamic)`.`Variant(String, UInt64)`.UInt64.null, d.`Array(Variant(String, UInt64))`.UInt64.null from test format Null; +select d.`Array(Dynamic)`.`Variant(String, UInt64)`.UInt64.null, d.`Array(Dynamic)`.size0, d.`Array(Variant(String, UInt64))`.UInt64.null from test format Null; +select d.`Array(Array(Dynamic))`.size1, d.`Array(Array(Dynamic))`.UInt64.null, d.`Array(Array(Dynamic))`.`Map(String, Tuple(a UInt64))`.values.a from test format Null; + +drop table test; + +{% endfor -%} From e9a86f28cce19bdd05767d76e62c8aa587997021 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 5 Aug 2024 12:10:28 +0000 Subject: [PATCH 046/121] Backport #67122 to 24.7: Fix crash with Variant + AggregateFunction type --- src/Columns/ColumnAggregateFunction.cpp | 33 +++++++++- ...ant_with_aggregate_function_type.reference | 6 ++ ...0_variant_with_aggregate_function_type.sql | 60 +++++++++++++++++++ 3 files changed, 98 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/03210_variant_with_aggregate_function_type.reference create mode 100644 tests/queries/0_stateless/03210_variant_with_aggregate_function_type.sql diff --git a/src/Columns/ColumnAggregateFunction.cpp b/src/Columns/ColumnAggregateFunction.cpp index e26fe790a8e..4bc48c62eb4 100644 --- a/src/Columns/ColumnAggregateFunction.cpp +++ b/src/Columns/ColumnAggregateFunction.cpp @@ -330,7 +330,38 @@ ColumnPtr ColumnAggregateFunction::filter(const Filter & filter, ssize_t result_ void ColumnAggregateFunction::expand(const Filter & mask, bool inverted) { - expandDataByMask(data, mask, inverted); + ensureOwnership(); + Arena & arena = createOrGetArena(); + + if (mask.size() < data.size()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Mask size should be no less than data size."); + + ssize_t from = data.size() - 1; + ssize_t index = mask.size() - 1; + data.resize(mask.size()); + while (index >= 0) + { + if (!!mask[index] ^ inverted) + { + if (from < 0) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Too many bytes in mask"); + + /// Copy only if it makes sense. + if (index != from) + data[index] = data[from]; + --from; + } + else + { + data[index] = arena.alignedAlloc(func->sizeOfData(), func->alignOfData()); + func->create(data[index]); + } + + --index; + } + + if (from != -1) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Not enough bytes in mask"); } ColumnPtr ColumnAggregateFunction::permute(const Permutation & perm, size_t limit) const diff --git a/tests/queries/0_stateless/03210_variant_with_aggregate_function_type.reference b/tests/queries/0_stateless/03210_variant_with_aggregate_function_type.reference new file mode 100644 index 00000000000..105e8e7d8bd --- /dev/null +++ b/tests/queries/0_stateless/03210_variant_with_aggregate_function_type.reference @@ -0,0 +1,6 @@ + 500 +fail 500 + 499 +fail 500 + 500 499 +fail 500 500 diff --git a/tests/queries/0_stateless/03210_variant_with_aggregate_function_type.sql b/tests/queries/0_stateless/03210_variant_with_aggregate_function_type.sql new file mode 100644 index 00000000000..cb9cdb0b456 --- /dev/null +++ b/tests/queries/0_stateless/03210_variant_with_aggregate_function_type.sql @@ -0,0 +1,60 @@ +SET allow_experimental_variant_type = 1; + +DROP TABLE IF EXISTS source; +CREATE TABLE source +( + Name String, + Value Int64 + +) ENGINE = MergeTree ORDER BY (); + +INSERT INTO source SELECT ['fail', 'success'][number % 2] as Name, number AS Value FROM numbers(1000); + +DROP TABLE IF EXISTS test_agg_variant; +CREATE TABLE test_agg_variant +( + Name String, + Value Variant(AggregateFunction(uniqExact, Int64), AggregateFunction(avg, Int64)) +) +ENGINE = MergeTree +ORDER BY (Name); + +INSERT INTO test_agg_variant +SELECT + Name, + t AS Value +FROM +( + SELECT + Name, + arrayJoin([ + uniqExactState(Value)::Variant(AggregateFunction(uniqExact, Int64), AggregateFunction(avg, Int64)), + avgState(Value)::Variant(AggregateFunction(uniqExact, Int64), AggregateFunction(avg, Int64)) + ]) AS t + FROM source + GROUP BY Name +); + +SELECT + Name, + uniqExactMerge(Value.`AggregateFunction(uniqExact, Int64)`) AS Value +FROM test_agg_variant +GROUP BY Name; + +SELECT + Name, + avgMerge(Value.`AggregateFunction(avg, Int64)`) AS Value +FROM test_agg_variant +GROUP BY Name; + +SELECT + Name, + uniqExactMerge(Value.`AggregateFunction(uniqExact, Int64)`) AS ValueUniq, + avgMerge(Value.`AggregateFunction(avg, Int64)`) AS ValueAvg +FROM test_agg_variant +GROUP BY Name; + + +DROP TABLE test_agg_variant; +DROP TABLE source; + From b1fa1a64b934ebdda163db6e1f7bca4546380ff3 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 5 Aug 2024 12:10:50 +0000 Subject: [PATCH 047/121] Backport #66905 to 24.7: Fix possible deadlock on query cancel with parallel replicas --- src/Server/TCPHandler.cpp | 73 +++++++++++++++++++++++---------------- 1 file changed, 43 insertions(+), 30 deletions(-) diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 5bc2d09df35..29568baba58 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -1,48 +1,48 @@ -#include -#include -#include #include #include #include #include -#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include #include #include -#include -#include -#include -#include -#include +#include +#include +#include #include #include -#include -#include +#include +#include +#include +#include +#include +#include +#include #include #include #include +#include +#include +#include +#include #include -#include #include #include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include #include +#include +#include +#include +#include +#include +#include +#include #include -#include #include #include @@ -61,6 +61,8 @@ #include +#include + using namespace std::literals; using namespace DB; @@ -1036,6 +1038,17 @@ void TCPHandler::processOrdinaryQuery() PullingAsyncPipelineExecutor executor(pipeline); CurrentMetrics::Increment query_thread_metric_increment{CurrentMetrics::QueryThread}; + /// The following may happen: + /// * current thread is holding the lock + /// * because of the exception we unwind the stack and call the destructor of `executor` + /// * the destructor calls cancel() and waits for all query threads to finish + /// * at the same time one of the query threads is trying to acquire the lock, e.g. inside `merge_tree_read_task_callback` + /// * deadlock + SCOPE_EXIT({ + if (out_lock.owns_lock()) + out_lock.unlock(); + }); + Block block; while (executor.pull(block, interactive_delay / 1000)) { @@ -1079,8 +1092,7 @@ void TCPHandler::processOrdinaryQuery() } /// This lock wasn't acquired before and we make .lock() call here - /// so everything under this line is covered even together - /// with sendProgress() out of the scope + /// so everything under this line is covered. out_lock.lock(); /** If data has run out, we will send the profiling data and total values to @@ -1107,6 +1119,7 @@ void TCPHandler::processOrdinaryQuery() last_sent_snapshots.clear(); } + out_lock.lock(); sendProgress(); } From ef0bef110ce0f717352fdcb3e0c88af6b641caa8 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 5 Aug 2024 13:09:09 +0000 Subject: [PATCH 048/121] Backport #67620 to 24.7: Try fix 03143_asof_join_ddb_long --- tests/queries/0_stateless/03143_asof_join_ddb_long.sql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03143_asof_join_ddb_long.sql b/tests/queries/0_stateless/03143_asof_join_ddb_long.sql index 17a67511030..0b17ade5d1c 100644 --- a/tests/queries/0_stateless/03143_asof_join_ddb_long.sql +++ b/tests/queries/0_stateless/03143_asof_join_ddb_long.sql @@ -1,4 +1,5 @@ --- Tags: long +-- Tags: long, no-random-merge-tree-settings +-- no-random-merge-tree-settings - times out in private DROP TABLE IF EXISTS build; DROP TABLE IF EXISTS skewed_probe; From dc25d31c1f7fb38eca60ad4a75df1da3747c5ac3 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 5 Aug 2024 14:08:27 +0000 Subject: [PATCH 049/121] Backport #67756 to 24.7: Fix stack overflow in JSONMergePatch function --- src/Common/JSONParsers/RapidJSONParser.h | 12 ++++--- src/Functions/jsonMergePatch.cpp | 34 ++++++++++++------- ..._json_merge_patch_stack_overflow.reference | 0 .../03217_json_merge_patch_stack_overflow.sql | 9 +++++ 4 files changed, 38 insertions(+), 17 deletions(-) create mode 100644 tests/queries/0_stateless/03217_json_merge_patch_stack_overflow.reference create mode 100644 tests/queries/0_stateless/03217_json_merge_patch_stack_overflow.sql diff --git a/src/Common/JSONParsers/RapidJSONParser.h b/src/Common/JSONParsers/RapidJSONParser.h index 6c5ea938bfe..ad7a4cbf53a 100644 --- a/src/Common/JSONParsers/RapidJSONParser.h +++ b/src/Common/JSONParsers/RapidJSONParser.h @@ -3,10 +3,14 @@ #include "config.h" #if USE_RAPIDJSON -# include -# include -# include -# include "ElementTypes.h" + +/// Prevent stack overflow: +#define RAPIDJSON_PARSE_DEFAULT_FLAGS (kParseIterativeFlag) + +#include +#include +#include +#include "ElementTypes.h" namespace DB { diff --git a/src/Functions/jsonMergePatch.cpp b/src/Functions/jsonMergePatch.cpp index a83daacdbf6..3bde415aabf 100644 --- a/src/Functions/jsonMergePatch.cpp +++ b/src/Functions/jsonMergePatch.cpp @@ -10,12 +10,14 @@ #if USE_RAPIDJSON -#include "rapidjson/document.h" -#include "rapidjson/writer.h" -#include "rapidjson/stringbuffer.h" -#include "rapidjson/filewritestream.h" -#include "rapidjson/prettywriter.h" -#include "rapidjson/filereadstream.h" +/// Prevent stack overflow: +#define RAPIDJSON_PARSE_DEFAULT_FLAGS (kParseIterativeFlag) + +#include +#include +#include +#include +#include namespace DB @@ -31,17 +33,17 @@ namespace ErrorCodes namespace { - // select jsonMergePatch('{"a":1}','{"name": "joey"}','{"name": "tom"}','{"name": "zoey"}'); + // select JSONMergePatch('{"a":1}','{"name": "joey"}','{"name": "tom"}','{"name": "zoey"}'); // || // \/ // ┌───────────────────────┐ // │ {"a":1,"name":"zoey"} │ // └───────────────────────┘ - class FunctionjsonMergePatch : public IFunction + class FunctionJSONMergePatch : public IFunction { public: - static constexpr auto name = "jsonMergePatch"; - static FunctionPtr create(ContextPtr) { return std::make_shared(); } + static constexpr auto name = "JSONMergePatch"; + static FunctionPtr create(ContextPtr) { return std::make_shared(); } String getName() const override { return name; } bool isVariadic() const override { return true; } @@ -98,7 +100,11 @@ namespace const char * json = str_ref.data; document.Parse(json); - if (document.HasParseError() || !document.IsObject()) + + if (document.HasParseError()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong JSON string to merge: {}", rapidjson::GetParseError_En(document.GetParseError())); + + if (!document.IsObject()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong JSON string to merge. Expected JSON object"); }; @@ -162,10 +168,12 @@ namespace } -REGISTER_FUNCTION(jsonMergePatch) +REGISTER_FUNCTION(JSONMergePatch) { - factory.registerFunction(FunctionDocumentation{ + factory.registerFunction(FunctionDocumentation{ .description="Returns the merged JSON object string, which is formed by merging multiple JSON objects."}); + + factory.registerAlias("jsonMergePatch", "JSONMergePatch"); } } diff --git a/tests/queries/0_stateless/03217_json_merge_patch_stack_overflow.reference b/tests/queries/0_stateless/03217_json_merge_patch_stack_overflow.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03217_json_merge_patch_stack_overflow.sql b/tests/queries/0_stateless/03217_json_merge_patch_stack_overflow.sql new file mode 100644 index 00000000000..4b366b08c6b --- /dev/null +++ b/tests/queries/0_stateless/03217_json_merge_patch_stack_overflow.sql @@ -0,0 +1,9 @@ +-- Tags: no-fasttest +-- Needs rapidjson library +SELECT JSONMergePatch(REPEAT('{"c":', 1000000)); -- { serverError BAD_ARGUMENTS } +SELECT JSONMergePatch(REPEAT('{"c":', 100000)); -- { serverError BAD_ARGUMENTS } +SELECT JSONMergePatch(REPEAT('{"c":', 10000)); -- { serverError BAD_ARGUMENTS } +SELECT JSONMergePatch(REPEAT('{"c":', 1000)); -- { serverError BAD_ARGUMENTS } +SELECT JSONMergePatch(REPEAT('{"c":', 100)); -- { serverError BAD_ARGUMENTS } +SELECT JSONMergePatch(REPEAT('{"c":', 10)); -- { serverError BAD_ARGUMENTS } +SELECT JSONMergePatch(REPEAT('{"c":', 1)); -- { serverError BAD_ARGUMENTS } From beaa87c05ca36f7ed62b410d7b177722c85d1b25 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 5 Aug 2024 15:08:17 +0000 Subject: [PATCH 050/121] Backport #67522 to 24.7: Analyzer: Do not traverse unresolved subtrees From f901411673af2d9d0c6c2bac380c4ab3bad61a62 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 5 Aug 2024 18:09:35 +0000 Subject: [PATCH 051/121] Backport #67600 to 24.7: Fix 02434_cancel_insert_when_client_dies --- .../0_stateless/02434_cancel_insert_when_client_dies.sh | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/02434_cancel_insert_when_client_dies.sh b/tests/queries/0_stateless/02434_cancel_insert_when_client_dies.sh index f85aaed7716..dca8dae22c3 100755 --- a/tests/queries/0_stateless/02434_cancel_insert_when_client_dies.sh +++ b/tests/queries/0_stateless/02434_cancel_insert_when_client_dies.sh @@ -10,22 +10,23 @@ export DATA_FILE="$CLICKHOUSE_TMP/deduptest.tsv" export TEST_MARK="02434_insert_${CLICKHOUSE_DATABASE}_" $CLICKHOUSE_CLIENT -q 'select * from numbers(5000000) format TSV' > $DATA_FILE -$CLICKHOUSE_CLIENT -q 'create table dedup_test(A Int64) Engine = MergeTree order by A settings non_replicated_deduplication_window=1000;' +$CLICKHOUSE_CLIENT -q "create table dedup_test(A Int64) Engine = MergeTree order by A settings non_replicated_deduplication_window=1000, merge_tree_clear_old_temporary_directories_interval_seconds = 1;" $CLICKHOUSE_CLIENT -q "create table dedup_dist(A Int64) Engine = Distributed('test_cluster_one_shard_two_replicas', currentDatabase(), dedup_test)" function insert_data { - SETTINGS="query_id=$ID&max_insert_block_size=110000&min_insert_block_size_rows=110000" + # send_logs_level: https://github.com/ClickHouse/ClickHouse/issues/67599 + SETTINGS="query_id=$ID&max_insert_block_size=110000&min_insert_block_size_rows=110000&send_logs_level=fatal" # max_block_size=10000, so external table will contain smaller blocks that will be squashed on insert-select (more chances to catch a bug on query cancellation) TRASH_SETTINGS="query_id=$ID&input_format_parallel_parsing=0&max_threads=1&max_insert_threads=1&max_insert_block_size=110000&max_block_size=10000&min_insert_block_size_bytes=0&min_insert_block_size_rows=110000&max_insert_block_size=110000" TYPE=$(( RANDOM % 5 )) if [[ "$TYPE" -eq 0 ]]; then # client will send 10000-rows blocks, server will squash them into 110000-rows blocks (more chances to catch a bug on query cancellation) - $CLICKHOUSE_CLIENT --max_block_size=10000 --max_insert_block_size=10000 --query_id="$ID" \ + $CLICKHOUSE_CLIENT --allow_repeated_settings --send_logs_level=fatal --max_block_size=10000 --max_insert_block_size=10000 --query_id="$ID" \ -q 'insert into dedup_test settings max_insert_block_size=110000, min_insert_block_size_rows=110000 format TSV' < $DATA_FILE elif [[ "$TYPE" -eq 1 ]]; then - $CLICKHOUSE_CLIENT --max_block_size=10000 --max_insert_block_size=10000 --query_id="$ID" --prefer_localhost_replica="$(( RANDOM % 2))" \ + $CLICKHOUSE_CLIENT --allow_repeated_settings --send_logs_level=fatal --max_block_size=10000 --max_insert_block_size=10000 --query_id="$ID" --prefer_localhost_replica="$(( RANDOM % 2))" \ -q 'insert into dedup_dist settings max_insert_block_size=110000, min_insert_block_size_rows=110000 format TSV' < $DATA_FILE elif [[ "$TYPE" -eq 2 ]]; then $CLICKHOUSE_CURL -sS -X POST --data-binary @- "$CLICKHOUSE_URL&$SETTINGS&query=insert+into+dedup_test+format+TSV" < $DATA_FILE From ba03023b54cb1c85b28c37952ccc64a72e4c4690 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 6 Aug 2024 07:07:55 +0000 Subject: [PATCH 052/121] Backport #67433 to 24.7: Fix file/URI parsing with archive syntax --- src/Core/Settings.h | 1 + src/Core/SettingsChangesHistory.cpp | 1 + src/IO/Archives/ArchiveUtils.cpp | 50 +++++++++++++++++ src/IO/Archives/ArchiveUtils.h | 14 +++++ src/IO/Archives/createArchiveReader.cpp | 13 ++--- src/IO/Archives/createArchiveWriter.cpp | 9 ++-- src/IO/S3/URI.cpp | 53 +++++++++++-------- src/IO/S3/URI.h | 5 +- .../ObjectStorage/S3/Configuration.cpp | 8 +-- src/Storages/StorageFile.cpp | 34 ++++++++---- src/Storages/StorageFile.h | 2 +- src/TableFunctions/TableFunctionFile.cpp | 14 +++-- .../02952_archive_parsing.reference | 0 .../0_stateless/02952_archive_parsing.sql | 1 - .../03214_parsing_archive_name_file.reference | 16 ++++++ .../03214_parsing_archive_name_file.sh | 27 ++++++++++ .../03215_parsing_archive_name_s3.reference | 3 ++ .../03215_parsing_archive_name_s3.sql | 7 +++ .../data_minio/::03215_archive.csv | 1 + .../data_minio/test.zip::03215_archive.csv | 1 + .../data_minio/test::03215_archive.csv | 1 + 21 files changed, 199 insertions(+), 62 deletions(-) create mode 100644 src/IO/Archives/ArchiveUtils.cpp delete mode 100644 tests/queries/0_stateless/02952_archive_parsing.reference delete mode 100644 tests/queries/0_stateless/02952_archive_parsing.sql create mode 100644 tests/queries/0_stateless/03214_parsing_archive_name_file.reference create mode 100755 tests/queries/0_stateless/03214_parsing_archive_name_file.sh create mode 100644 tests/queries/0_stateless/03215_parsing_archive_name_s3.reference create mode 100644 tests/queries/0_stateless/03215_parsing_archive_name_s3.sql create mode 100644 tests/queries/0_stateless/data_minio/::03215_archive.csv create mode 100644 tests/queries/0_stateless/data_minio/test.zip::03215_archive.csv create mode 100644 tests/queries/0_stateless/data_minio/test::03215_archive.csv diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 5e1efdbb7e0..34435cef3e6 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -934,6 +934,7 @@ class IColumn; M(UInt64, parallel_replicas_min_number_of_rows_per_replica, 0, "Limit the number of replicas used in a query to (estimated rows to read / min_number_of_rows_per_replica). The max is still limited by 'max_parallel_replicas'", 0) \ M(Bool, parallel_replicas_prefer_local_join, true, "If true, and JOIN can be executed with parallel replicas algorithm, and all storages of right JOIN part are *MergeTree, local JOIN will be used instead of GLOBAL JOIN.", 0) \ M(UInt64, parallel_replicas_mark_segment_size, 128, "Parts virtually divided into segments to be distributed between replicas for parallel reading. This setting controls the size of these segments. Not recommended to change until you're absolutely sure in what you're doing", 0) \ + M(Bool, allow_archive_path_syntax, true, "File/S3 engines/table function will parse paths with '::' as ' :: ' if archive has correct extension", 0) \ \ M(Bool, allow_experimental_inverted_index, false, "If it is set to true, allow to use experimental inverted index.", 0) \ M(Bool, allow_experimental_full_text_index, false, "If it is set to true, allow to use experimental full-text index.", 0) \ diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index a27b5fece0c..671ee287b04 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -123,6 +123,7 @@ static std::initializer_list + +#include +#include + +namespace DB +{ + +namespace +{ + +using namespace std::literals; +constexpr std::array tar_extensions{".tar"sv, ".tar.gz"sv, ".tgz"sv, ".tar.zst"sv, ".tzst"sv, ".tar.xz"sv, ".tar.bz2"sv, ".tar.lzma"sv}; +constexpr std::array zip_extensions{".zip"sv, ".zipx"sv}; +constexpr std::array sevenz_extensiosns{".7z"sv}; + +bool hasSupportedExtension(std::string_view path, const auto & supported_extensions) +{ + for (auto supported_extension : supported_extensions) + { + if (path.ends_with(supported_extension)) + return true; + } + + return false; +} + +} + +bool hasSupportedTarExtension(std::string_view path) +{ + return hasSupportedExtension(path, tar_extensions); +} + +bool hasSupportedZipExtension(std::string_view path) +{ + return hasSupportedExtension(path, zip_extensions); +} + +bool hasSupported7zExtension(std::string_view path) +{ + return hasSupportedExtension(path, sevenz_extensiosns); +} + +bool hasSupportedArchiveExtension(std::string_view path) +{ + return hasSupportedTarExtension(path) || hasSupportedZipExtension(path) || hasSupported7zExtension(path); +} + +} diff --git a/src/IO/Archives/ArchiveUtils.h b/src/IO/Archives/ArchiveUtils.h index 1b66be005a2..cdb731d1d57 100644 --- a/src/IO/Archives/ArchiveUtils.h +++ b/src/IO/Archives/ArchiveUtils.h @@ -10,3 +10,17 @@ #include #include #endif + +#include + +namespace DB +{ + +bool hasSupportedTarExtension(std::string_view path); +bool hasSupportedZipExtension(std::string_view path); +bool hasSupported7zExtension(std::string_view path); + +bool hasSupportedArchiveExtension(std::string_view path); + + +} diff --git a/src/IO/Archives/createArchiveReader.cpp b/src/IO/Archives/createArchiveReader.cpp index 782602091ac..dfa098eede0 100644 --- a/src/IO/Archives/createArchiveReader.cpp +++ b/src/IO/Archives/createArchiveReader.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include @@ -12,7 +13,6 @@ extern const int CANNOT_UNPACK_ARCHIVE; extern const int SUPPORT_IS_DISABLED; } - std::shared_ptr createArchiveReader(const String & path_to_archive) { return createArchiveReader(path_to_archive, {}, 0); @@ -24,11 +24,7 @@ std::shared_ptr createArchiveReader( [[maybe_unused]] const std::function()> & archive_read_function, [[maybe_unused]] size_t archive_size) { - using namespace std::literals; - static constexpr std::array tar_extensions{ - ".tar"sv, ".tar.gz"sv, ".tgz"sv, ".tar.zst"sv, ".tzst"sv, ".tar.xz"sv, ".tar.bz2"sv, ".tar.lzma"sv}; - - if (path_to_archive.ends_with(".zip") || path_to_archive.ends_with(".zipx")) + if (hasSupportedZipExtension(path_to_archive)) { #if USE_MINIZIP return std::make_shared(path_to_archive, archive_read_function, archive_size); @@ -36,8 +32,7 @@ std::shared_ptr createArchiveReader( throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "minizip library is disabled"); #endif } - else if (std::any_of( - tar_extensions.begin(), tar_extensions.end(), [&](const auto extension) { return path_to_archive.ends_with(extension); })) + else if (hasSupportedTarExtension(path_to_archive)) { #if USE_LIBARCHIVE return std::make_shared(path_to_archive, archive_read_function); @@ -45,7 +40,7 @@ std::shared_ptr createArchiveReader( throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "libarchive library is disabled"); #endif } - else if (path_to_archive.ends_with(".7z")) + else if (hasSupported7zExtension(path_to_archive)) { #if USE_LIBARCHIVE return std::make_shared(path_to_archive); diff --git a/src/IO/Archives/createArchiveWriter.cpp b/src/IO/Archives/createArchiveWriter.cpp index 9a169587088..53be0a85a10 100644 --- a/src/IO/Archives/createArchiveWriter.cpp +++ b/src/IO/Archives/createArchiveWriter.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -24,10 +25,7 @@ std::shared_ptr createArchiveWriter(const String & path_to_archi std::shared_ptr createArchiveWriter(const String & path_to_archive, [[maybe_unused]] std::unique_ptr archive_write_buffer) { - using namespace std::literals; - static constexpr std::array tar_extensions{ - ".tar"sv, ".tar.gz"sv, ".tgz"sv, ".tar.bz2"sv, ".tar.lzma"sv, ".tar.zst"sv, ".tzst"sv, ".tar.xz"sv}; - if (path_to_archive.ends_with(".zip") || path_to_archive.ends_with(".zipx")) + if (hasSupportedZipExtension(path_to_archive)) { #if USE_MINIZIP return std::make_shared(path_to_archive, std::move(archive_write_buffer)); @@ -35,8 +33,7 @@ createArchiveWriter(const String & path_to_archive, [[maybe_unused]] std::unique throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "minizip library is disabled"); #endif } - else if (std::any_of( - tar_extensions.begin(), tar_extensions.end(), [&](const auto extension) { return path_to_archive.ends_with(extension); })) + else if (hasSupportedTarExtension(path_to_archive)) { #if USE_LIBARCHIVE return std::make_shared(path_to_archive, std::move(archive_write_buffer)); diff --git a/src/IO/S3/URI.cpp b/src/IO/S3/URI.cpp index 4bf7a3ddf86..fead18315d8 100644 --- a/src/IO/S3/URI.cpp +++ b/src/IO/S3/URI.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include @@ -29,7 +30,7 @@ namespace ErrorCodes namespace S3 { -URI::URI(const std::string & uri_) +URI::URI(const std::string & uri_, bool allow_archive_path_syntax) { /// Case when bucket name represented in domain name of S3 URL. /// E.g. (https://bucket-name.s3.region.amazonaws.com/key) @@ -54,10 +55,11 @@ URI::URI(const std::string & uri_) static constexpr auto OSS = "OSS"; static constexpr auto EOS = "EOS"; - if (containsArchive(uri_)) - std::tie(uri_str, archive_pattern) = getPathToArchiveAndArchivePattern(uri_); + if (allow_archive_path_syntax) + std::tie(uri_str, archive_pattern) = getURIAndArchivePattern(uri_); else uri_str = uri_; + uri = Poco::URI(uri_str); std::unordered_map mapper; @@ -167,32 +169,37 @@ void URI::validateBucket(const String & bucket, const Poco::URI & uri) !uri.empty() ? " (" + uri.toString() + ")" : ""); } -bool URI::containsArchive(const std::string & source) +std::pair> URI::getURIAndArchivePattern(const std::string & source) { size_t pos = source.find("::"); - return (pos != std::string::npos); -} + if (pos == String::npos) + return {source, std::nullopt}; -std::pair URI::getPathToArchiveAndArchivePattern(const std::string & source) -{ - size_t pos = source.find("::"); - assert(pos != std::string::npos); + std::string_view path_to_archive_view = std::string_view{source}.substr(0, pos); + bool contains_spaces_around_operator = false; + while (path_to_archive_view.ends_with(' ')) + { + contains_spaces_around_operator = true; + path_to_archive_view.remove_suffix(1); + } - std::string path_to_archive = source.substr(0, pos); - while ((!path_to_archive.empty()) && path_to_archive.ends_with(' ')) - path_to_archive.pop_back(); + std::string_view archive_pattern_view = std::string_view{source}.substr(pos + 2); + while (archive_pattern_view.starts_with(' ')) + { + contains_spaces_around_operator = true; + archive_pattern_view.remove_prefix(1); + } - if (path_to_archive.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Path to archive is empty"); + /// possible situations when the first part can be archive is only if one of the following is true: + /// - it contains supported extension + /// - it contains spaces after or before :: (URI cannot contain spaces) + /// - it contains characters that could mean glob expression + if (archive_pattern_view.empty() || path_to_archive_view.empty() + || (!contains_spaces_around_operator && !hasSupportedArchiveExtension(path_to_archive_view) + && path_to_archive_view.find_first_of("*?{") == std::string_view::npos)) + return {source, std::nullopt}; - std::string_view path_in_archive_view = std::string_view{source}.substr(pos + 2); - while (path_in_archive_view.front() == ' ') - path_in_archive_view.remove_prefix(1); - - if (path_in_archive_view.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Filename is empty"); - - return {path_to_archive, std::string{path_in_archive_view}}; + return std::pair{std::string{path_to_archive_view}, std::string{archive_pattern_view}}; } } diff --git a/src/IO/S3/URI.h b/src/IO/S3/URI.h index 363f98c46f5..80e2da96cd4 100644 --- a/src/IO/S3/URI.h +++ b/src/IO/S3/URI.h @@ -36,14 +36,13 @@ struct URI bool is_virtual_hosted_style; URI() = default; - explicit URI(const std::string & uri_); + explicit URI(const std::string & uri_, bool allow_archive_path_syntax = false); void addRegionToURI(const std::string & region); static void validateBucket(const std::string & bucket, const Poco::URI & uri); private: - bool containsArchive(const std::string & source); - std::pair getPathToArchiveAndArchivePattern(const std::string & source); + std::pair> getURIAndArchivePattern(const std::string & source); }; } diff --git a/src/Storages/ObjectStorage/S3/Configuration.cpp b/src/Storages/ObjectStorage/S3/Configuration.cpp index 094ca069e7a..7542f59dcc4 100644 --- a/src/Storages/ObjectStorage/S3/Configuration.cpp +++ b/src/Storages/ObjectStorage/S3/Configuration.cpp @@ -142,14 +142,14 @@ ObjectStoragePtr StorageS3Configuration::createObjectStorage(ContextPtr context, void StorageS3Configuration::fromNamedCollection(const NamedCollection & collection, ContextPtr context) { - const auto settings = context->getSettingsRef(); + const auto & settings = context->getSettingsRef(); validateNamedCollection(collection, required_configuration_keys, optional_configuration_keys); auto filename = collection.getOrDefault("filename", ""); if (!filename.empty()) - url = S3::URI(std::filesystem::path(collection.get("url")) / filename); + url = S3::URI(std::filesystem::path(collection.get("url")) / filename, settings.allow_archive_path_syntax); else - url = S3::URI(collection.get("url")); + url = S3::URI(collection.get("url"), settings.allow_archive_path_syntax); auth_settings.access_key_id = collection.getOrDefault("access_key_id", ""); auth_settings.secret_access_key = collection.getOrDefault("secret_access_key", ""); @@ -330,7 +330,7 @@ void StorageS3Configuration::fromAST(ASTs & args, ContextPtr context, bool with_ } /// This argument is always the first - url = S3::URI(checkAndGetLiteralArgument(args[0], "url")); + url = S3::URI(checkAndGetLiteralArgument(args[0], "url"), context->getSettingsRef().allow_archive_path_syntax); if (engine_args_to_idx.contains("format")) { diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 6c31a8a3a71..0ccc2e1baf4 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -2199,7 +2200,11 @@ void registerStorageFile(StorageFactory & factory) else if (type == Field::Types::UInt64) source_fd = static_cast(literal->value.get()); else if (type == Field::Types::String) - StorageFile::parseFileSource(literal->value.get(), source_path, storage_args.path_to_archive); + StorageFile::parseFileSource( + literal->value.get(), + source_path, + storage_args.path_to_archive, + factory_args.getLocalContext()->getSettingsRef().allow_archive_path_syntax); else throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second argument must be path or file descriptor"); } @@ -2226,8 +2231,14 @@ SchemaCache & StorageFile::getSchemaCache(const ContextPtr & context) return schema_cache; } -void StorageFile::parseFileSource(String source, String & filename, String & path_to_archive) +void StorageFile::parseFileSource(String source, String & filename, String & path_to_archive, bool allow_archive_path_syntax) { + if (!allow_archive_path_syntax) + { + filename = std::move(source); + return; + } + size_t pos = source.find("::"); if (pos == String::npos) { @@ -2239,18 +2250,21 @@ void StorageFile::parseFileSource(String source, String & filename, String & pat while (path_to_archive_view.ends_with(' ')) path_to_archive_view.remove_suffix(1); - if (path_to_archive_view.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Path to archive is empty"); - - path_to_archive = path_to_archive_view; - std::string_view filename_view = std::string_view{source}.substr(pos + 2); - while (filename_view.front() == ' ') + while (filename_view.starts_with(' ')) filename_view.remove_prefix(1); - if (filename_view.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Filename is empty"); + /// possible situations when the first part can be archive is only if one of the following is true: + /// - it contains supported extension + /// - it contains characters that could mean glob expression + if (filename_view.empty() || path_to_archive_view.empty() + || (!hasSupportedArchiveExtension(path_to_archive_view) && path_to_archive_view.find_first_of("*?{") == std::string_view::npos)) + { + filename = std::move(source); + return; + } + path_to_archive = path_to_archive_view; filename = filename_view; } diff --git a/src/Storages/StorageFile.h b/src/Storages/StorageFile.h index f955889185c..1ec237c2579 100644 --- a/src/Storages/StorageFile.h +++ b/src/Storages/StorageFile.h @@ -128,7 +128,7 @@ public: static SchemaCache & getSchemaCache(const ContextPtr & context); - static void parseFileSource(String source, String & filename, String & path_to_archive); + static void parseFileSource(String source, String & filename, String & path_to_archive, bool allow_archive_path_syntax); static ArchiveInfo getArchiveInfo( const std::string & path_to_archive, diff --git a/src/TableFunctions/TableFunctionFile.cpp b/src/TableFunctions/TableFunctionFile.cpp index 1b6d86f8fa5..af327cfe54e 100644 --- a/src/TableFunctions/TableFunctionFile.cpp +++ b/src/TableFunctions/TableFunctionFile.cpp @@ -26,7 +26,7 @@ void TableFunctionFile::parseFirstArguments(const ASTPtr & arg, const ContextPtr if (context->getApplicationType() != Context::ApplicationType::LOCAL) { ITableFunctionFileLike::parseFirstArguments(arg, context); - StorageFile::parseFileSource(std::move(filename), filename, path_to_archive); + StorageFile::parseFileSource(std::move(filename), filename, path_to_archive, context->getSettingsRef().allow_archive_path_syntax); return; } @@ -42,7 +42,8 @@ void TableFunctionFile::parseFirstArguments(const ASTPtr & arg, const ContextPtr else if (filename == "stderr") fd = STDERR_FILENO; else - StorageFile::parseFileSource(std::move(filename), filename, path_to_archive); + StorageFile::parseFileSource( + std::move(filename), filename, path_to_archive, context->getSettingsRef().allow_archive_path_syntax); } else if (type == Field::Types::Int64 || type == Field::Types::UInt64) { @@ -63,9 +64,12 @@ std::optional TableFunctionFile::tryGetFormatFromFirstArgument() return FormatFactory::instance().tryGetFormatFromFileName(filename); } -StoragePtr TableFunctionFile::getStorage(const String & source, - const String & format_, const ColumnsDescription & columns, - ContextPtr global_context, const std::string & table_name, +StoragePtr TableFunctionFile::getStorage( + const String & source, + const String & format_, + const ColumnsDescription & columns, + ContextPtr global_context, + const std::string & table_name, const std::string & compression_method_) const { // For `file` table function, we are going to use format settings from the diff --git a/tests/queries/0_stateless/02952_archive_parsing.reference b/tests/queries/0_stateless/02952_archive_parsing.reference deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/queries/0_stateless/02952_archive_parsing.sql b/tests/queries/0_stateless/02952_archive_parsing.sql deleted file mode 100644 index 49b0223e6ec..00000000000 --- a/tests/queries/0_stateless/02952_archive_parsing.sql +++ /dev/null @@ -1 +0,0 @@ -SELECT * FROM file('::a'); -- { serverError BAD_ARGUMENTS } diff --git a/tests/queries/0_stateless/03214_parsing_archive_name_file.reference b/tests/queries/0_stateless/03214_parsing_archive_name_file.reference new file mode 100644 index 00000000000..d793d26dfc3 --- /dev/null +++ b/tests/queries/0_stateless/03214_parsing_archive_name_file.reference @@ -0,0 +1,16 @@ +::nonexistentfile.csv +1 +nonexistent::nonexistentfile.csv +1 +nonexistent :: nonexistentfile.csv +1 +nonexistent ::nonexistentfile.csv +1 +nonexistent.tar.gz :: nonexistentfile.csv +1 +nonexistent.zip:: nonexistentfile.csv +1 +nonexistent.tar.gz :: nonexistentfile.csv SETTINGS allow_archive_path_syntax=0 +1 +nonexistent.zip:: nonexistentfile.csv SETTINGS allow_archive_path_syntax=0 +1 diff --git a/tests/queries/0_stateless/03214_parsing_archive_name_file.sh b/tests/queries/0_stateless/03214_parsing_archive_name_file.sh new file mode 100755 index 00000000000..b54cbb10aa6 --- /dev/null +++ b/tests/queries/0_stateless/03214_parsing_archive_name_file.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +function try_to_read_file() +{ + file_to_read=$1 + file_argument=$2 + settings=$3 + + echo $file_argument $settings + $CLICKHOUSE_LOCAL -q "SELECT * FROM file('$file_argument') $settings" 2>&1 | grep -c "Cannot stat file.*$file_to_read" +} + +# if archive extension is not detected for part before '::', path is taken as is +try_to_read_file "::nonexistentfile.csv" "::nonexistentfile.csv" +try_to_read_file "nonexistent::nonexistentfile.csv" "nonexistent::nonexistentfile.csv" +try_to_read_file "nonexistent :: nonexistentfile.csv" "nonexistent :: nonexistentfile.csv" +try_to_read_file "nonexistent ::nonexistentfile.csv" "nonexistent ::nonexistentfile.csv" +# if archive extension is detected for part before '::', path is split into archive and filename +try_to_read_file "nonexistent.tar.gz" "nonexistent.tar.gz :: nonexistentfile.csv" +try_to_read_file "nonexistent.zip" "nonexistent.zip:: nonexistentfile.csv" +# disabling archive syntax will always parse path as is +try_to_read_file "nonexistent.tar.gz :: nonexistentfile.csv" "nonexistent.tar.gz :: nonexistentfile.csv" "SETTINGS allow_archive_path_syntax=0" +try_to_read_file "nonexistent.zip:: nonexistentfile.csv" "nonexistent.zip:: nonexistentfile.csv" "SETTINGS allow_archive_path_syntax=0" diff --git a/tests/queries/0_stateless/03215_parsing_archive_name_s3.reference b/tests/queries/0_stateless/03215_parsing_archive_name_s3.reference new file mode 100644 index 00000000000..b27524812c7 --- /dev/null +++ b/tests/queries/0_stateless/03215_parsing_archive_name_s3.reference @@ -0,0 +1,3 @@ +::03215_archive.csv test/::03215_archive.csv +test::03215_archive.csv test/test::03215_archive.csv +test.zip::03215_archive.csv test/test.zip::03215_archive.csv diff --git a/tests/queries/0_stateless/03215_parsing_archive_name_s3.sql b/tests/queries/0_stateless/03215_parsing_archive_name_s3.sql new file mode 100644 index 00000000000..e34be475c5a --- /dev/null +++ b/tests/queries/0_stateless/03215_parsing_archive_name_s3.sql @@ -0,0 +1,7 @@ +-- Tags: no-fasttest +-- Tag no-fasttest: Depends on AWS + +SELECT _file, _path FROM s3(s3_conn, filename='::03215_archive.csv') ORDER BY (_file, _path); +SELECT _file, _path FROM s3(s3_conn, filename='test :: 03215_archive.csv') ORDER BY (_file, _path); -- { serverError S3_ERROR } +SELECT _file, _path FROM s3(s3_conn, filename='test::03215_archive.csv') ORDER BY (_file, _path); +SELECT _file, _path FROM s3(s3_conn, filename='test.zip::03215_archive.csv') ORDER BY (_file, _path) SETTINGS allow_archive_path_syntax=0; diff --git a/tests/queries/0_stateless/data_minio/::03215_archive.csv b/tests/queries/0_stateless/data_minio/::03215_archive.csv new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/data_minio/::03215_archive.csv @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/data_minio/test.zip::03215_archive.csv b/tests/queries/0_stateless/data_minio/test.zip::03215_archive.csv new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/data_minio/test.zip::03215_archive.csv @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/data_minio/test::03215_archive.csv b/tests/queries/0_stateless/data_minio/test::03215_archive.csv new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/data_minio/test::03215_archive.csv @@ -0,0 +1 @@ +1 From e098acb4b32f764f5cf7d91ece315daf8343d8ab Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 6 Aug 2024 09:08:36 +0000 Subject: [PATCH 053/121] Backport #67800 to 24.7: Revert "Merge pull request #66510 from canhld94/fix_trivial_count_non_deterministic_func" --- src/Storages/MergeTree/MergeTreeData.cpp | 2 +- src/Storages/VirtualColumnUtils.cpp | 21 +++++++------------ src/Storages/VirtualColumnUtils.h | 10 +-------- ..._with_non_deterministic_function.reference | 2 -- ..._count_with_non_deterministic_function.sql | 4 ---- 5 files changed, 9 insertions(+), 30 deletions(-) delete mode 100644 tests/queries/0_stateless/03203_count_with_non_deterministic_function.reference delete mode 100644 tests/queries/0_stateless/03203_count_with_non_deterministic_function.sql diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index faf2741a456..1a96e111d15 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1144,7 +1144,7 @@ std::optional MergeTreeData::totalRowsByPartitionPredicateImpl( auto metadata_snapshot = getInMemoryMetadataPtr(); auto virtual_columns_block = getBlockWithVirtualsForFilter(metadata_snapshot, {parts[0]}); - auto filter_dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag->getOutputs().at(0), nullptr, /*allow_non_deterministic_functions=*/ false); + auto filter_dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag.getOutputs().at(0), nullptr); if (!filter_dag) return {}; diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index 151079154b1..27c52124e9c 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -271,8 +271,7 @@ bool isDeterministicInScopeOfQuery(const ActionsDAG::Node * node) static const ActionsDAG::Node * splitFilterNodeForAllowedInputs( const ActionsDAG::Node * node, const Block * allowed_inputs, - ActionsDAG::Nodes & additional_nodes, - bool allow_non_deterministic_functions) + ActionsDAG::Nodes & additional_nodes) { if (node->type == ActionsDAG::ActionType::FUNCTION) { @@ -281,14 +280,8 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs( auto & node_copy = additional_nodes.emplace_back(*node); node_copy.children.clear(); for (const auto * child : node->children) - if (const auto * child_copy = splitFilterNodeForAllowedInputs(child, allowed_inputs, additional_nodes, allow_non_deterministic_functions)) + if (const auto * child_copy = splitFilterNodeForAllowedInputs(child, allowed_inputs, additional_nodes)) node_copy.children.push_back(child_copy); - /// Expression like (now_allowed AND allowed) is not allowed if allow_non_deterministic_functions = true. This is important for - /// trivial count optimization, otherwise we can get incorrect results. For example, if the query is - /// SELECT count() FROM table WHERE _partition_id = '0' AND rowNumberInBlock() = 1, we cannot apply - /// trivial count. - else if (!allow_non_deterministic_functions) - return nullptr; if (node_copy.children.empty()) return nullptr; @@ -314,7 +307,7 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs( { auto & node_copy = additional_nodes.emplace_back(*node); for (auto & child : node_copy.children) - if (child = splitFilterNodeForAllowedInputs(child, allowed_inputs, additional_nodes, allow_non_deterministic_functions); !child) + if (child = splitFilterNodeForAllowedInputs(child, allowed_inputs, additional_nodes); !child) return nullptr; return &node_copy; @@ -328,7 +321,7 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs( auto index_hint_dag = index_hint->getActions()->clone(); ActionsDAG::NodeRawConstPtrs atoms; for (const auto & output : index_hint_dag->getOutputs()) - if (const auto * child_copy = splitFilterNodeForAllowedInputs(output, allowed_inputs, additional_nodes, allow_non_deterministic_functions)) + if (const auto * child_copy = splitFilterNodeForAllowedInputs(output, allowed_inputs, additional_nodes)) atoms.push_back(child_copy); if (!atoms.empty()) @@ -362,13 +355,13 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs( return node; } -ActionsDAGPtr splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs, bool allow_non_deterministic_functions) +ActionsDAGPtr splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs) { if (!predicate) return nullptr; ActionsDAG::Nodes additional_nodes; - const auto * res = splitFilterNodeForAllowedInputs(predicate, allowed_inputs, additional_nodes, allow_non_deterministic_functions); + const auto * res = splitFilterNodeForAllowedInputs(predicate, allowed_inputs, additional_nodes); if (!res) return nullptr; @@ -377,7 +370,7 @@ ActionsDAGPtr splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, void filterBlockWithPredicate(const ActionsDAG::Node * predicate, Block & block, ContextPtr context) { - auto dag = splitFilterDagForAllowedInputs(predicate, &block, /*allow_non_deterministic_functions=*/ false); + auto dag = splitFilterDagForAllowedInputs(predicate, &block); if (dag) filterBlockWithDAG(dag, block, context); } diff --git a/src/Storages/VirtualColumnUtils.h b/src/Storages/VirtualColumnUtils.h index e5cfa47c8f6..9045a2f5481 100644 --- a/src/Storages/VirtualColumnUtils.h +++ b/src/Storages/VirtualColumnUtils.h @@ -32,15 +32,7 @@ void buildSetsForDAG(const ActionsDAG & dag, const ContextPtr & context); bool isDeterministicInScopeOfQuery(const ActionsDAG::Node * node); /// Extract a part of predicate that can be evaluated using only columns from input_names. -/// When allow_non_deterministic_functions is true then even if the predicate contains non-deterministic -/// functions, we still allow to extract a part of the predicate, otherwise we return nullptr. -/// allow_non_deterministic_functions must be false when we are going to use the result to filter parts in -/// MergeTreeData::totalRowsByPartitionPredicateImp. For example, if the query is -/// `SELECT count() FROM table WHERE _partition_id = '0' AND rowNumberInBlock() = 1` -/// The predicate will be `_partition_id = '0' AND rowNumberInBlock() = 1`, and `rowNumberInBlock()` is -/// non-deterministic. If we still extract the part `_partition_id = '0'` for filtering parts, then trivial -/// count optimization will be mistakenly applied to the query. -ActionsDAGPtr splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs, bool allow_non_deterministic_functions = true); +ActionsDAGPtr splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs); /// Extract from the input stream a set of `name` column values template diff --git a/tests/queries/0_stateless/03203_count_with_non_deterministic_function.reference b/tests/queries/0_stateless/03203_count_with_non_deterministic_function.reference deleted file mode 100644 index 6ed281c757a..00000000000 --- a/tests/queries/0_stateless/03203_count_with_non_deterministic_function.reference +++ /dev/null @@ -1,2 +0,0 @@ -1 -1 diff --git a/tests/queries/0_stateless/03203_count_with_non_deterministic_function.sql b/tests/queries/0_stateless/03203_count_with_non_deterministic_function.sql deleted file mode 100644 index bb3269da597..00000000000 --- a/tests/queries/0_stateless/03203_count_with_non_deterministic_function.sql +++ /dev/null @@ -1,4 +0,0 @@ -CREATE TABLE t (p UInt8, x UInt64) Engine = MergeTree PARTITION BY p ORDER BY x; -INSERT INTO t SELECT 0, number FROM numbers(10) SETTINGS max_block_size = 100; -SELECT count() FROM t WHERE p = 0 AND rowNumberInAllBlocks() = 1 SETTINGS allow_experimental_analyzer = 0; -SELECT count() FROM t WHERE p = 0 AND rowNumberInAllBlocks() = 1 SETTINGS allow_experimental_analyzer = 1; From bec7f1ab73c820dcae28a17b6badd83fc5554184 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Tue, 6 Aug 2024 11:31:17 +0200 Subject: [PATCH 054/121] Fix build (broken by bad merge) --- src/Storages/MergeTree/MergeTreeData.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 1a96e111d15..78a551591a6 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1144,7 +1144,7 @@ std::optional MergeTreeData::totalRowsByPartitionPredicateImpl( auto metadata_snapshot = getInMemoryMetadataPtr(); auto virtual_columns_block = getBlockWithVirtualsForFilter(metadata_snapshot, {parts[0]}); - auto filter_dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag.getOutputs().at(0), nullptr); + auto filter_dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag->getOutputs().at(0), nullptr); if (!filter_dag) return {}; From b665c1f880280f202d461838e82d11264a2013a5 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 7 Aug 2024 10:06:43 +0000 Subject: [PATCH 055/121] Backport #66243 to 24.7: Fix filling of missed subcolumns --- src/DataTypes/IDataType.cpp | 4 +- src/DataTypes/ObjectUtils.cpp | 31 ++++++ src/DataTypes/ObjectUtils.h | 3 + src/DataTypes/Serializations/ISerialization.h | 3 +- src/Interpreters/inplaceBlockConversions.cpp | 95 +++++++++++++++---- src/Storages/MergeTree/IMergeTreeReader.cpp | 52 +++++++--- src/Storages/MergeTree/IMergeTreeReader.h | 3 + .../MergeTree/MergeTreeReaderCompact.cpp | 74 ++++++++++----- .../MergeTree/MergeTreeReaderCompact.h | 4 +- .../MergeTreeReaderCompactSingleBuffer.cpp | 6 +- ...2026_describe_include_subcolumns.reference | 2 +- .../03203_fill_missed_subcolumns.reference | 31 ++++++ .../03203_fill_missed_subcolumns.sql | 47 +++++++++ 13 files changed, 297 insertions(+), 58 deletions(-) create mode 100644 tests/queries/0_stateless/03203_fill_missed_subcolumns.reference create mode 100644 tests/queries/0_stateless/03203_fill_missed_subcolumns.sql diff --git a/src/DataTypes/IDataType.cpp b/src/DataTypes/IDataType.cpp index 1cb64b65d3a..824bc6e33b0 100644 --- a/src/DataTypes/IDataType.cpp +++ b/src/DataTypes/IDataType.cpp @@ -90,7 +90,9 @@ void IDataType::forEachSubcolumn( { auto name = ISerialization::getSubcolumnNameForStream(subpath, prefix_len); auto subdata = ISerialization::createFromPath(subpath, prefix_len); - callback(subpath, name, subdata); + auto path_copy = subpath; + path_copy.resize(prefix_len); + callback(path_copy, name, subdata); } subpath[i].visited = true; } diff --git a/src/DataTypes/ObjectUtils.cpp b/src/DataTypes/ObjectUtils.cpp index 1d525e5987f..356e609e77a 100644 --- a/src/DataTypes/ObjectUtils.cpp +++ b/src/DataTypes/ObjectUtils.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -66,6 +67,36 @@ DataTypePtr getBaseTypeOfArray(const DataTypePtr & type) return last_array ? last_array->getNestedType() : type; } +DataTypePtr getBaseTypeOfArray(DataTypePtr type, const Names & tuple_elements) +{ + auto it = tuple_elements.begin(); + while (true) + { + if (const auto * type_array = typeid_cast(type.get())) + { + type = type_array->getNestedType(); + } + else if (const auto * type_tuple = typeid_cast(type.get())) + { + if (it == tuple_elements.end()) + break; + + auto pos = type_tuple->tryGetPositionByName(*it); + if (!pos) + break; + + ++it; + type = type_tuple->getElement(*pos); + } + else + { + break; + } + } + + return type; +} + ColumnPtr getBaseColumnOfArray(const ColumnPtr & column) { /// Get raw pointers to avoid extra copying of column pointers. diff --git a/src/DataTypes/ObjectUtils.h b/src/DataTypes/ObjectUtils.h index 6599d8adef1..21e5c3b2f59 100644 --- a/src/DataTypes/ObjectUtils.h +++ b/src/DataTypes/ObjectUtils.h @@ -27,6 +27,9 @@ size_t getNumberOfDimensions(const IColumn & column); /// Returns type of scalars of Array of arbitrary dimensions. DataTypePtr getBaseTypeOfArray(const DataTypePtr & type); +/// The same as above but takes into account Tuples of Nested. +DataTypePtr getBaseTypeOfArray(DataTypePtr type, const Names & tuple_elements); + /// Returns Array type with requested scalar type and number of dimensions. DataTypePtr createArrayOfType(DataTypePtr type, size_t num_dimensions); diff --git a/src/DataTypes/Serializations/ISerialization.h b/src/DataTypes/Serializations/ISerialization.h index 255dbbfadd2..5d0bf60c59f 100644 --- a/src/DataTypes/Serializations/ISerialization.h +++ b/src/DataTypes/Serializations/ISerialization.h @@ -195,7 +195,7 @@ public: /// Types of substreams that can have arbitrary name. static const std::set named_types; - Type type; + Type type = Type::Regular; /// The name of a variant element type. String variant_element_name; @@ -212,6 +212,7 @@ public: /// Flag, that may help to traverse substream paths. mutable bool visited = false; + Substream() = default; Substream(Type type_) : type(type_) {} /// NOLINT String toString() const; }; diff --git a/src/Interpreters/inplaceBlockConversions.cpp b/src/Interpreters/inplaceBlockConversions.cpp index 239cce5b427..e48e41295f6 100644 --- a/src/Interpreters/inplaceBlockConversions.cpp +++ b/src/Interpreters/inplaceBlockConversions.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -35,8 +36,13 @@ namespace /// Add all required expressions for missing columns calculation void addDefaultRequiredExpressionsRecursively( - const Block & block, const String & required_column_name, DataTypePtr required_column_type, - const ColumnsDescription & columns, ASTPtr default_expr_list_accum, NameSet & added_columns, bool null_as_default) + const Block & block, + const String & required_column_name, + DataTypePtr required_column_type, + const ColumnsDescription & columns, + ASTPtr default_expr_list_accum, + NameSet & added_columns, + bool null_as_default) { checkStackSize(); @@ -273,6 +279,53 @@ static std::unordered_map collectOffsetsColumns( return offsets_columns; } +static ColumnPtr createColumnWithDefaultValue(const IDataType & data_type, const String & subcolumn_name, size_t num_rows) +{ + auto column = data_type.createColumnConstWithDefaultValue(num_rows); + + /// We must turn a constant column into a full column because the interpreter could infer + /// that it is constant everywhere but in some blocks (from other parts) it can be a full column. + + if (subcolumn_name.empty()) + return column->convertToFullColumnIfConst(); + + /// Firstly get subcolumn from const column and then replicate. + column = assert_cast(*column).getDataColumnPtr(); + column = data_type.getSubcolumn(subcolumn_name, column); + + return ColumnConst::create(std::move(column), num_rows)->convertToFullColumnIfConst(); +} + +static bool hasDefault(const StorageMetadataPtr & metadata_snapshot, const NameAndTypePair & column) +{ + if (!metadata_snapshot) + return false; + + const auto & columns = metadata_snapshot->getColumns(); + if (columns.has(column.name)) + return columns.hasDefault(column.name); + + auto name_in_storage = column.getNameInStorage(); + return columns.hasDefault(name_in_storage); +} + +static String removeTupleElementsFromSubcolumn(String subcolumn_name, const Names & tuple_elements) +{ + /// Add a dot to the end of name for convenience. + subcolumn_name += "."; + for (const auto & elem : tuple_elements) + { + auto pos = subcolumn_name.find(elem + "."); + if (pos != std::string::npos) + subcolumn_name.erase(pos, elem.size() + 1); + } + + if (subcolumn_name.ends_with(".")) + subcolumn_name.pop_back(); + + return subcolumn_name; +} + void fillMissingColumns( Columns & res_columns, size_t num_rows, @@ -298,21 +351,17 @@ void fillMissingColumns( auto requested_column = requested_columns.begin(); for (size_t i = 0; i < num_columns; ++i, ++requested_column) { - const auto & [name, type] = *requested_column; - - if (res_columns[i] && partially_read_columns.contains(name)) + if (res_columns[i] && partially_read_columns.contains(requested_column->name)) res_columns[i] = nullptr; - if (res_columns[i]) - continue; - - if (metadata_snapshot && metadata_snapshot->getColumns().hasDefault(name)) + /// Nothing to fill or default should be filled in evaluateMissingDefaults + if (res_columns[i] || hasDefault(metadata_snapshot, *requested_column)) continue; std::vector current_offsets; size_t num_dimensions = 0; - const auto * array_type = typeid_cast(type.get()); + const auto * array_type = typeid_cast(requested_column->type.get()); if (array_type && !offsets_columns.empty()) { num_dimensions = getNumberOfDimensions(*array_type); @@ -347,20 +396,34 @@ void fillMissingColumns( if (!current_offsets.empty()) { - size_t num_empty_dimensions = num_dimensions - current_offsets.size(); - auto scalar_type = createArrayOfType(getBaseTypeOfArray(type), num_empty_dimensions); + Names tuple_elements; + auto serialization = IDataType::getSerialization(*requested_column); + /// For Nested columns collect names of tuple elements and skip them while getting the base type of array. + IDataType::forEachSubcolumn([&](const auto & path, const auto &, const auto &) + { + if (path.back().type == ISerialization::Substream::TupleElement) + tuple_elements.push_back(path.back().name_of_substream); + }, ISerialization::SubstreamData(serialization)); + + /// The number of dimensions that belongs to the array itself but not shared in Nested column. + /// For example for column "n Nested(a UInt64, b Array(UInt64))" this value is 0 for `n.a` and 1 for `n.b`. + size_t num_empty_dimensions = num_dimensions - current_offsets.size(); + + auto base_type = getBaseTypeOfArray(requested_column->getTypeInStorage(), tuple_elements); + auto scalar_type = createArrayOfType(base_type, num_empty_dimensions); size_t data_size = assert_cast(*current_offsets.back()).getData().back(); - res_columns[i] = scalar_type->createColumnConstWithDefaultValue(data_size)->convertToFullColumnIfConst(); + + /// Remove names of tuple elements because they are already processed by 'getBaseTypeOfArray'. + auto subcolumn_name = removeTupleElementsFromSubcolumn(requested_column->getSubcolumnName(), tuple_elements); + res_columns[i] = createColumnWithDefaultValue(*scalar_type, subcolumn_name, data_size); for (auto it = current_offsets.rbegin(); it != current_offsets.rend(); ++it) res_columns[i] = ColumnArray::create(res_columns[i], *it); } else { - /// We must turn a constant column into a full column because the interpreter could infer - /// that it is constant everywhere but in some blocks (from other parts) it can be a full column. - res_columns[i] = type->createColumnConstWithDefaultValue(num_rows)->convertToFullColumnIfConst(); + res_columns[i] = createColumnWithDefaultValue(*requested_column->getTypeInStorage(), requested_column->getSubcolumnName(), num_rows); } } } diff --git a/src/Storages/MergeTree/IMergeTreeReader.cpp b/src/Storages/MergeTree/IMergeTreeReader.cpp index 4ad7f6ef991..47ec496b078 100644 --- a/src/Storages/MergeTree/IMergeTreeReader.cpp +++ b/src/Storages/MergeTree/IMergeTreeReader.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -43,6 +44,7 @@ IMergeTreeReader::IMergeTreeReader( , alter_conversions(data_part_info_for_read->getAlterConversions()) /// For wide parts convert plain arrays of Nested to subcolumns /// to allow to use shared offset column from cache. + , original_requested_columns(columns_) , requested_columns(data_part_info_for_read->isWidePart() ? Nested::convertToSubcolumns(columns_) : columns_) @@ -138,25 +140,33 @@ void IMergeTreeReader::evaluateMissingDefaults(Block additional_columns, Columns { try { - size_t num_columns = requested_columns.size(); + size_t num_columns = original_requested_columns.size(); if (res_columns.size() != num_columns) throw Exception(ErrorCodes::LOGICAL_ERROR, "invalid number of columns passed to MergeTreeReader::fillMissingColumns. " "Expected {}, got {}", num_columns, res_columns.size()); - /// Convert columns list to block. - /// TODO: rewrite with columns interface. It will be possible after changes in ExpressionActions. - auto name_and_type = requested_columns.begin(); - for (size_t pos = 0; pos < num_columns; ++pos, ++name_and_type) - { - if (res_columns[pos] == nullptr) - continue; + NameSet full_requested_columns_set; + NamesAndTypesList full_requested_columns; - additional_columns.insert({res_columns[pos], name_and_type->type, name_and_type->name}); + /// Convert columns list to block. And convert subcolumns to full columns. + /// Defaults should be executed on full columns to get correct values for subcolumns. + /// TODO: rewrite with columns interface. It will be possible after changes in ExpressionActions. + + auto it = original_requested_columns.begin(); + for (size_t pos = 0; pos < num_columns; ++pos, ++it) + { + auto name_in_storage = it->getNameInStorage(); + + if (full_requested_columns_set.emplace(name_in_storage).second) + full_requested_columns.emplace_back(name_in_storage, it->getTypeInStorage()); + + if (res_columns[pos]) + additional_columns.insert({res_columns[pos], it->type, it->name}); } auto dag = DB::evaluateMissingDefaults( - additional_columns, requested_columns, + additional_columns, full_requested_columns, storage_snapshot->metadata->getColumns(), data_part_info_for_read->getContext()); @@ -170,9 +180,18 @@ void IMergeTreeReader::evaluateMissingDefaults(Block additional_columns, Columns } /// Move columns from block. - name_and_type = requested_columns.begin(); - for (size_t pos = 0; pos < num_columns; ++pos, ++name_and_type) - res_columns[pos] = std::move(additional_columns.getByName(name_and_type->name).column); + it = original_requested_columns.begin(); + for (size_t pos = 0; pos < num_columns; ++pos, ++it) + { + auto name_in_storage = it->getNameInStorage(); + res_columns[pos] = additional_columns.getByName(name_in_storage).column; + + if (it->isSubcolumn()) + { + const auto & type_in_storage = it->getTypeInStorage(); + res_columns[pos] = type_in_storage->getSubcolumn(it->getSubcolumnName(), res_columns[pos]); + } + } } catch (Exception & e) { @@ -192,7 +211,12 @@ bool IMergeTreeReader::isSubcolumnOffsetsOfNested(const String & name_in_storage if (!data_part_info_for_read->isWidePart() || subcolumn_name != "size0") return false; - return Nested::isSubcolumnOfNested(name_in_storage, part_columns); + auto split = Nested::splitName(name_in_storage); + if (split.second.empty()) + return false; + + auto nested_column = part_columns.tryGetColumn(GetColumnsOptions::All, split.first); + return nested_column && isNested(nested_column->type); } String IMergeTreeReader::getColumnNameInPart(const NameAndTypePair & required_column) const diff --git a/src/Storages/MergeTree/IMergeTreeReader.h b/src/Storages/MergeTree/IMergeTreeReader.h index a1ec0339fd6..d799ce57b40 100644 --- a/src/Storages/MergeTree/IMergeTreeReader.h +++ b/src/Storages/MergeTree/IMergeTreeReader.h @@ -112,6 +112,9 @@ protected: private: /// Columns that are requested to read. + NamesAndTypesList original_requested_columns; + + /// The same as above but with converted Arrays to subcolumns of Nested. NamesAndTypesList requested_columns; /// Actual columns description in part. diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp index a2b8f0ad96f..69dc2e4b2bb 100644 --- a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp @@ -60,39 +60,25 @@ void MergeTreeReaderCompact::fillColumnPositions() for (size_t i = 0; i < columns_num; ++i) { - const auto & column_to_read = columns_to_read[i]; - + auto & column_to_read = columns_to_read[i]; auto position = data_part_info_for_read->getColumnPosition(column_to_read.getNameInStorage()); - bool is_array = isArray(column_to_read.type); if (column_to_read.isSubcolumn()) { - auto storage_column_from_part = getColumnInPart( - {column_to_read.getNameInStorage(), column_to_read.getTypeInStorage()}); + NameAndTypePair column_in_storage{column_to_read.getNameInStorage(), column_to_read.getTypeInStorage()}; + auto storage_column_from_part = getColumnInPart(column_in_storage); auto subcolumn_name = column_to_read.getSubcolumnName(); if (!storage_column_from_part.type->hasSubcolumn(subcolumn_name)) position.reset(); } + column_positions[i] = std::move(position); + /// If array of Nested column is missing in part, /// we have to read its offsets if they exist. - if (!position && is_array) - { - auto column_to_read_with_subcolumns = getColumnConvertedToSubcolumnOfNested(column_to_read); - auto name_level_for_offsets = findColumnForOffsets(column_to_read_with_subcolumns); - - if (name_level_for_offsets.has_value()) - { - column_positions[i] = data_part_info_for_read->getColumnPosition(name_level_for_offsets->first); - columns_for_offsets[i] = name_level_for_offsets; - partially_read_columns.insert(column_to_read.name); - } - } - else - { - column_positions[i] = std::move(position); - } + if (!column_positions[i]) + findPositionForMissedNested(i); } } @@ -115,7 +101,7 @@ NameAndTypePair MergeTreeReaderCompact::getColumnConvertedToSubcolumnOfNested(co if (!storage_columns_with_collected_nested) { - auto options = GetColumnsOptions(GetColumnsOptions::AllPhysical).withExtendedObjects(); + auto options = GetColumnsOptions(GetColumnsOptions::All).withExtendedObjects(); auto storage_columns_list = Nested::collect(storage_snapshot->getColumns(options)); storage_columns_with_collected_nested = ColumnsDescription(std::move(storage_columns_list)); } @@ -125,11 +111,44 @@ NameAndTypePair MergeTreeReaderCompact::getColumnConvertedToSubcolumnOfNested(co Nested::concatenateName(name_in_storage, subcolumn_name)); } +void MergeTreeReaderCompact::findPositionForMissedNested(size_t pos) +{ + auto & column = columns_to_read[pos]; + + bool is_array = isArray(column.type); + bool is_offsets_subcolumn = isArray(column.getTypeInStorage()) && column.getSubcolumnName() == "size0"; + + if (!is_array && !is_offsets_subcolumn) + return; + + NameAndTypePair column_in_storage{column.getNameInStorage(), column.getTypeInStorage()}; + + auto column_to_read_with_subcolumns = getColumnConvertedToSubcolumnOfNested(column_in_storage); + auto name_level_for_offsets = findColumnForOffsets(column_to_read_with_subcolumns); + + if (!name_level_for_offsets) + return; + + column_positions[pos] = data_part_info_for_read->getColumnPosition(name_level_for_offsets->first); + + if (is_offsets_subcolumn) + { + /// Read offsets from antoher array from the same Nested column. + column = {name_level_for_offsets->first, column.getSubcolumnName(), column.getTypeInStorage(), column.type}; + } + else + { + columns_for_offsets[pos] = std::move(name_level_for_offsets); + partially_read_columns.insert(column.name); + } +} + void MergeTreeReaderCompact::readData( const NameAndTypePair & name_and_type, ColumnPtr & column, size_t rows_to_read, - const InputStreamGetter & getter) + const InputStreamGetter & getter, + ISerialization::SubstreamsCache & cache) { try { @@ -140,6 +159,13 @@ void MergeTreeReaderCompact::readData( deserialize_settings.getter = getter; deserialize_settings.avg_value_size_hint = avg_value_size_hints[name]; + auto it = cache.find(name); + if (it != cache.end() && it->second != nullptr) + { + column = it->second; + return; + } + if (name_and_type.isSubcolumn()) { const auto & type_in_storage = name_and_type.getTypeInStorage(); @@ -163,6 +189,8 @@ void MergeTreeReaderCompact::readData( serialization->deserializeBinaryBulkWithMultipleStreams(column, rows_to_read, deserialize_settings, deserialize_binary_bulk_state_map[name], nullptr); } + cache[name] = column; + size_t read_rows_in_column = column->size() - column_size_before_reading; if (read_rows_in_column != rows_to_read) throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.h b/src/Storages/MergeTree/MergeTreeReaderCompact.h index a783e595af5..1c6bd1474e3 100644 --- a/src/Storages/MergeTree/MergeTreeReaderCompact.h +++ b/src/Storages/MergeTree/MergeTreeReaderCompact.h @@ -36,6 +36,7 @@ public: protected: void fillColumnPositions(); NameAndTypePair getColumnConvertedToSubcolumnOfNested(const NameAndTypePair & column); + void findPositionForMissedNested(size_t pos); using InputStreamGetter = ISerialization::InputStreamGetter; @@ -43,7 +44,8 @@ protected: const NameAndTypePair & name_and_type, ColumnPtr & column, size_t rows_to_read, - const InputStreamGetter & getter); + const InputStreamGetter & getter, + ISerialization::SubstreamsCache & cache); void readPrefix( const NameAndTypePair & name_and_type, diff --git a/src/Storages/MergeTree/MergeTreeReaderCompactSingleBuffer.cpp b/src/Storages/MergeTree/MergeTreeReaderCompactSingleBuffer.cpp index 2b2cf493bb5..649bcce1188 100644 --- a/src/Storages/MergeTree/MergeTreeReaderCompactSingleBuffer.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderCompactSingleBuffer.cpp @@ -26,6 +26,10 @@ try { size_t rows_to_read = data_part_info_for_read->getIndexGranularity().getMarkRows(from_mark); + /// Use cache to avoid reading the column with the same name twice. + /// It may happen if there are empty array Nested in the part. + ISerialization::SubstreamsCache cache; + for (size_t pos = 0; pos < num_columns; ++pos) { if (!res_columns[pos]) @@ -52,7 +56,7 @@ try }; readPrefix(columns_to_read[pos], buffer_getter, buffer_getter_for_prefix, columns_for_offsets[pos]); - readData(columns_to_read[pos], column, rows_to_read, buffer_getter); + readData(columns_to_read[pos], column, rows_to_read, buffer_getter, cache); } ++from_mark; diff --git a/tests/queries/0_stateless/02026_describe_include_subcolumns.reference b/tests/queries/0_stateless/02026_describe_include_subcolumns.reference index dec65f62748..62efafceaad 100644 --- a/tests/queries/0_stateless/02026_describe_include_subcolumns.reference +++ b/tests/queries/0_stateless/02026_describe_include_subcolumns.reference @@ -26,7 +26,7 @@ 10. │ t.s │ String │ │ │ │ ZSTD(1) │ │ 1 │ 11. │ t.a │ Array(Tuple( a UInt32, - b UInt32)) │ │ │ │ │ │ 1 │ + b UInt32)) │ │ │ │ ZSTD(1) │ │ 1 │ 12. │ t.a.size0 │ UInt64 │ │ │ │ │ │ 1 │ 13. │ t.a.a │ Array(UInt32) │ │ │ │ ZSTD(1) │ │ 1 │ 14. │ t.a.b │ Array(UInt32) │ │ │ │ ZSTD(1) │ │ 1 │ diff --git a/tests/queries/0_stateless/03203_fill_missed_subcolumns.reference b/tests/queries/0_stateless/03203_fill_missed_subcolumns.reference new file mode 100644 index 00000000000..872eb7da3c8 --- /dev/null +++ b/tests/queries/0_stateless/03203_fill_missed_subcolumns.reference @@ -0,0 +1,31 @@ +0 +2 +4 +6 +8 +0 +2 +4 +6 +8 +1 ['aaa',NULL] [NULL,NULL] +2 ['ccc'] [NULL] +3 [NULL] [NULL] +4 [NULL,'bbb'] ['ddd',NULL] +5 [NULL] [NULL] +1 2 2 +2 1 1 +3 1 1 +4 2 2 +5 1 1 +1 [0,1] [1,1] +2 [0] [1] +3 [1] [1] +4 [1,0] [0,1] +5 [1] [1] +1 ('foo','bar') [1,NULL,3] +2 ('aaa','bbb') [1,NULL,3] +3 ('ccc','ddd') [4,5,6] +1 foo bar 3 [0,1,0] +2 foo bar 3 [0,1,0] +3 ccc ddd 3 [0,0,0] diff --git a/tests/queries/0_stateless/03203_fill_missed_subcolumns.sql b/tests/queries/0_stateless/03203_fill_missed_subcolumns.sql new file mode 100644 index 00000000000..2789c9de35c --- /dev/null +++ b/tests/queries/0_stateless/03203_fill_missed_subcolumns.sql @@ -0,0 +1,47 @@ +DROP TABLE IF EXISTS t_missed_subcolumns; + +-- .null subcolumn + +CREATE TABLE t_missed_subcolumns (x UInt32) ENGINE = MergeTree ORDER BY tuple(); +INSERT INTO t_missed_subcolumns SELECT * FROM numbers(10); + +ALTER TABLE t_missed_subcolumns ADD COLUMN `y` Nullable(UInt32); + +INSERT INTO t_missed_subcolumns SELECT number, if(number % 2, NULL, number) FROM numbers(10); + +SELECT x FROM t_missed_subcolumns WHERE y IS NOT NULL SETTINGS optimize_functions_to_subcolumns = 1; +SELECT x FROM t_missed_subcolumns WHERE y IS NOT NULL SETTINGS optimize_functions_to_subcolumns = 0; + +DROP TABLE IF EXISTS t_missed_subcolumns; + +-- .null and .size0 subcolumn in array + +CREATE TABLE t_missed_subcolumns (id UInt64, `n.a` Array(Nullable(String))) ENGINE = MergeTree ORDER BY id; + +INSERT INTO t_missed_subcolumns VALUES (1, ['aaa', NULL]) (2, ['ccc']) (3, [NULL]); +ALTER TABLE t_missed_subcolumns ADD COLUMN `n.b` Array(Nullable(String)); +INSERT INTO t_missed_subcolumns VALUES (4, [NULL, 'bbb'], ['ddd', NULL]), (5, [NULL], [NULL]); + +SELECT id, n.a, n.b FROM t_missed_subcolumns ORDER BY id; +SELECT id, n.a.size0, n.b.size0 FROM t_missed_subcolumns ORDER BY id; +SELECT id, n.a.null, n.b.null FROM t_missed_subcolumns ORDER BY id; + +DROP TABLE IF EXISTS t_missed_subcolumns; + +-- subcolumns and custom defaults + +CREATE TABLE t_missed_subcolumns (id UInt64) ENGINE = MergeTree ORDER BY id; +SYSTEM STOP MERGES t_missed_subcolumns; + +INSERT INTO t_missed_subcolumns VALUES (1); + +ALTER TABLE t_missed_subcolumns ADD COLUMN t Tuple(a String, b String) DEFAULT ('foo', 'bar'); +INSERT INTO t_missed_subcolumns VALUES (2, ('aaa', 'bbb')); + +ALTER TABLE t_missed_subcolumns ADD COLUMN arr Array(Nullable(UInt64)) DEFAULT [1, NULL, 3]; +INSERT INTO t_missed_subcolumns VALUES (3, ('ccc', 'ddd'), [4, 5, 6]); + +SELECT id, t, arr FROM t_missed_subcolumns ORDER BY id; +SELECT id, t.a, t.b, arr.size0, arr.null FROM t_missed_subcolumns ORDER BY id; + +DROP TABLE t_missed_subcolumns; From 799c3c87c8e04f7414930946178a9503635b2472 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 7 Aug 2024 14:08:28 +0000 Subject: [PATCH 056/121] Backport #67911 to 24.7: Validate data types in ALTER ADD/MODIFY COLUMN --- src/Storages/AlterCommands.cpp | 5 +++++ .../02910_object-json-crash-add-column.sql | 2 ++ ..._type_in_alter_add_modify_column.reference | 0 ...lidate_type_in_alter_add_modify_column.sql | 19 +++++++++++++++++++ 4 files changed, 26 insertions(+) create mode 100644 tests/queries/0_stateless/03215_validate_type_in_alter_add_modify_column.reference create mode 100644 tests/queries/0_stateless/03215_validate_type_in_alter_add_modify_column.sql diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index 7891042bb96..dfb388ffdb2 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -1316,6 +1317,8 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const throw Exception(ErrorCodes::BAD_ARGUMENTS, "Data type have to be specified for column {} to add", backQuote(column_name)); + validateDataType(command.data_type, DataTypeValidationSettings(context->getSettingsRef())); + /// FIXME: Adding a new column of type Object(JSON) is broken. /// Looks like there is something around default expression for this column (method `getDefault` is not implemented for the data type Object). /// But after ALTER TABLE ADD COLUMN we need to fill existing rows with something (exactly the default value). @@ -1395,6 +1398,8 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const /// So we don't allow to do it for now. if (command.data_type) { + validateDataType(command.data_type, DataTypeValidationSettings(context->getSettingsRef())); + const GetColumnsOptions options(GetColumnsOptions::All); const auto old_data_type = all_columns.getColumn(options, column_name).type; diff --git a/tests/queries/0_stateless/02910_object-json-crash-add-column.sql b/tests/queries/0_stateless/02910_object-json-crash-add-column.sql index b2d64be1676..bda5e958453 100644 --- a/tests/queries/0_stateless/02910_object-json-crash-add-column.sql +++ b/tests/queries/0_stateless/02910_object-json-crash-add-column.sql @@ -1,3 +1,5 @@ +SET allow_experimental_object_type=1; + DROP TABLE IF EXISTS test02910; CREATE TABLE test02910 diff --git a/tests/queries/0_stateless/03215_validate_type_in_alter_add_modify_column.reference b/tests/queries/0_stateless/03215_validate_type_in_alter_add_modify_column.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03215_validate_type_in_alter_add_modify_column.sql b/tests/queries/0_stateless/03215_validate_type_in_alter_add_modify_column.sql new file mode 100644 index 00000000000..267bc7111f4 --- /dev/null +++ b/tests/queries/0_stateless/03215_validate_type_in_alter_add_modify_column.sql @@ -0,0 +1,19 @@ +set allow_experimental_variant_type = 0; +set allow_experimental_dynamic_type = 0; +set allow_suspicious_low_cardinality_types = 0; +set allow_suspicious_fixed_string_types = 0; + +drop table if exists test; +create table test (id UInt64) engine=MergeTree order by id; +alter table test add column bad Variant(UInt32, String); -- {serverError ILLEGAL_COLUMN} +alter table test add column bad Dynamic; -- {serverError ILLEGAL_COLUMN} +alter table test add column bad LowCardinality(UInt8); -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY} +alter table test add column bad FixedString(10000); -- {serverError ILLEGAL_COLUMN} + +alter table test modify column id Variant(UInt32, String); -- {serverError ILLEGAL_COLUMN} +alter table test modify column id Dynamic; -- {serverError ILLEGAL_COLUMN} +alter table test modify column id LowCardinality(UInt8); -- {serverError SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY} +alter table test modify column id FixedString(10000); -- {serverError ILLEGAL_COLUMN} + +drop table test; + From ddf72aba43feae2076d23bbe20b4c51465086823 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 8 Aug 2024 13:12:18 +0000 Subject: [PATCH 057/121] Backport #67661 to 24.7: Fix crash on `percent_rank` --- .../window-functions/lagInFrame.md | 2 +- .../window-functions/leadInFrame.md | 2 +- .../window-functions/percent_rank.md | 72 +++++++++++ src/AggregateFunctions/WindowFunction.h | 117 ++++++++++++++++++ src/Interpreters/ExpressionAnalyzer.cpp | 22 +++- src/Interpreters/ExpressionAnalyzer.h | 7 +- src/Planner/PlannerActionsVisitor.cpp | 65 +++------- src/Planner/PlannerActionsVisitor.h | 14 +-- src/Planner/PlannerWindowFunctions.cpp | 26 ++-- src/Planner/Utils.cpp | 19 +++ src/Planner/Utils.h | 7 ++ src/Processors/Transforms/WindowTransform.cpp | 103 ++------------- src/Processors/Transforms/WindowTransform.h | 25 +--- .../01592_window_functions.reference | 13 -- .../0_stateless/01592_window_functions.sql | 23 +--- .../0_stateless/03037_precent_rank.reference | 22 ++++ .../0_stateless/03037_precent_rank.sql | 52 ++++++++ 17 files changed, 363 insertions(+), 228 deletions(-) create mode 100644 docs/en/sql-reference/window-functions/percent_rank.md create mode 100644 src/AggregateFunctions/WindowFunction.h create mode 100644 tests/queries/0_stateless/03037_precent_rank.reference create mode 100644 tests/queries/0_stateless/03037_precent_rank.sql diff --git a/docs/en/sql-reference/window-functions/lagInFrame.md b/docs/en/sql-reference/window-functions/lagInFrame.md index 049e095c10f..fc5e2185b6a 100644 --- a/docs/en/sql-reference/window-functions/lagInFrame.md +++ b/docs/en/sql-reference/window-functions/lagInFrame.md @@ -1,7 +1,7 @@ --- slug: /en/sql-reference/window-functions/lagInFrame sidebar_label: lagInFrame -sidebar_position: 8 +sidebar_position: 9 --- # lagInFrame diff --git a/docs/en/sql-reference/window-functions/leadInFrame.md b/docs/en/sql-reference/window-functions/leadInFrame.md index fc1b92cc266..29b054c46aa 100644 --- a/docs/en/sql-reference/window-functions/leadInFrame.md +++ b/docs/en/sql-reference/window-functions/leadInFrame.md @@ -1,7 +1,7 @@ --- slug: /en/sql-reference/window-functions/leadInFrame sidebar_label: leadInFrame -sidebar_position: 9 +sidebar_position: 10 --- # leadInFrame diff --git a/docs/en/sql-reference/window-functions/percent_rank.md b/docs/en/sql-reference/window-functions/percent_rank.md new file mode 100644 index 00000000000..2e348f2a333 --- /dev/null +++ b/docs/en/sql-reference/window-functions/percent_rank.md @@ -0,0 +1,72 @@ +--- +slug: /en/sql-reference/window-functions/percent_rank +sidebar_label: percent_rank +sidebar_position: 8 +--- + +# percent_rank + +returns the relative rank (i.e. percentile) of rows within a window partition. + +**Syntax** + +Alias: `percentRank` (case-sensitive) + +```sql +percent_rank (column_name) + OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column] + [RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]] | [window_name]) +FROM table_name +WINDOW window_name as ([PARTITION BY grouping_column] [ORDER BY sorting_column] RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) +``` + +The default and required window frame definition is `RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING`. + +For more detail on window function syntax see: [Window Functions - Syntax](./index.md/#syntax). + +**Example** + + +Query: + +```sql +CREATE TABLE salaries +( + `team` String, + `player` String, + `salary` UInt32, + `position` String +) +Engine = Memory; + +INSERT INTO salaries FORMAT Values + ('Port Elizabeth Barbarians', 'Gary Chen', 195000, 'F'), + ('New Coreystad Archdukes', 'Charles Juarez', 190000, 'F'), + ('Port Elizabeth Barbarians', 'Michael Stanley', 150000, 'D'), + ('New Coreystad Archdukes', 'Scott Harrison', 150000, 'D'), + ('Port Elizabeth Barbarians', 'Robert George', 195000, 'M'), + ('South Hampton Seagulls', 'Douglas Benson', 150000, 'M'), + ('South Hampton Seagulls', 'James Henderson', 140000, 'M'); +``` + +```sql +SELECT player, salary, + percent_rank() OVER (ORDER BY salary DESC) AS percent_rank +FROM salaries; +``` + +Result: + +```response + + ┌─player──────────┬─salary─┬───────percent_rank─┐ +1. │ Gary Chen │ 195000 │ 0 │ +2. │ Robert George │ 195000 │ 0 │ +3. │ Charles Juarez │ 190000 │ 0.3333333333333333 │ +4. │ Michael Stanley │ 150000 │ 0.5 │ +5. │ Scott Harrison │ 150000 │ 0.5 │ +6. │ Douglas Benson │ 150000 │ 0.5 │ +7. │ James Henderson │ 140000 │ 1 │ + └─────────────────┴────────┴────────────────────┘ + +``` diff --git a/src/AggregateFunctions/WindowFunction.h b/src/AggregateFunctions/WindowFunction.h new file mode 100644 index 00000000000..f7fbd7389ea --- /dev/null +++ b/src/AggregateFunctions/WindowFunction.h @@ -0,0 +1,117 @@ +#pragma once +#include +#include +#include + + +namespace DB +{ +namespace ErrorCodes +{ +extern const int BAD_ARGUMENTS; +} +class WindowTransform; + + +// Interface for true window functions. It's not much of an interface, they just +// accept the guts of WindowTransform and do 'something'. Given a small number of +// true window functions, and the fact that the WindowTransform internals are +// pretty much well-defined in domain terms (e.g. frame boundaries), this is +// somewhat acceptable. +class IWindowFunction +{ +public: + virtual ~IWindowFunction() = default; + + // Must insert the result for current_row. + virtual void windowInsertResultInto(const WindowTransform * transform, size_t function_index) const = 0; + + virtual std::optional getDefaultFrame() const { return {}; } + + virtual ColumnPtr castColumn(const Columns &, const std::vector &) { return nullptr; } + + /// Is the frame type supported by this function. + virtual bool checkWindowFrameType(const WindowTransform * /*transform*/) const { return true; } +}; + +// Runtime data for computing one window function. +struct WindowFunctionWorkspace +{ + AggregateFunctionPtr aggregate_function; + + // Cached value of aggregate function isState virtual method + bool is_aggregate_function_state = false; + + // This field is set for pure window functions. When set, we ignore the + // window_function.aggregate_function, and work through this interface + // instead. + IWindowFunction * window_function_impl = nullptr; + + std::vector argument_column_indices; + + // Will not be initialized for a pure window function. + mutable AlignedBuffer aggregate_function_state; + + // Argument columns. Be careful, this is a per-block cache. + std::vector argument_columns; + UInt64 cached_block_number = std::numeric_limits::max(); +}; + +// A basic implementation for a true window function. It pretends to be an +// aggregate function, but refuses to work as such. +struct WindowFunction : public IAggregateFunctionHelper, public IWindowFunction +{ + std::string name; + + WindowFunction( + const std::string & name_, const DataTypes & argument_types_, const Array & parameters_, const DataTypePtr & result_type_) + : IAggregateFunctionHelper(argument_types_, parameters_, result_type_), name(name_) + { + } + + bool isOnlyWindowFunction() const override { return true; } + + [[noreturn]] void fail() const + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, "The function '{}' can only be used as a window function, not as an aggregate function", getName()); + } + + String getName() const override { return name; } + void create(AggregateDataPtr __restrict) const override { } + void destroy(AggregateDataPtr __restrict) const noexcept override { } + bool hasTrivialDestructor() const override { return true; } + size_t sizeOfData() const override { return 0; } + size_t alignOfData() const override { return 1; } + void add(AggregateDataPtr __restrict, const IColumn **, size_t, Arena *) const override { fail(); } + void merge(AggregateDataPtr __restrict, ConstAggregateDataPtr, Arena *) const override { fail(); } + void serialize(ConstAggregateDataPtr __restrict, WriteBuffer &, std::optional) const override { fail(); } + void deserialize(AggregateDataPtr __restrict, ReadBuffer &, std::optional, Arena *) const override { fail(); } + void insertResultInto(AggregateDataPtr __restrict, IColumn &, Arena *) const override { fail(); } +}; + +template +struct StatefulWindowFunction : public WindowFunction +{ + StatefulWindowFunction( + const std::string & name_, const DataTypes & argument_types_, const Array & parameters_, const DataTypePtr & result_type_) + : WindowFunction(name_, argument_types_, parameters_, result_type_) + { + } + + size_t sizeOfData() const override { return sizeof(State); } + size_t alignOfData() const override { return 1; } + + void create(AggregateDataPtr __restrict place) const override { new (place) State(); } + + void destroy(AggregateDataPtr __restrict place) const noexcept override { reinterpret_cast(place)->~State(); } + + bool hasTrivialDestructor() const override { return std::is_trivially_destructible_v; } + + State & getState(const WindowFunctionWorkspace & workspace) const + { + return *reinterpret_cast(workspace.aggregate_function_state.data()); + } +}; + +} diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 16d0eb71278..31090070742 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -38,6 +38,7 @@ #include #include +#include #include #include @@ -590,6 +591,7 @@ void ExpressionAnalyzer::makeAggregateDescriptions(ActionsDAG & actions, Aggrega void ExpressionAnalyzer::makeWindowDescriptionFromAST(const Context & context_, const WindowDescriptions & existing_descriptions, + AggregateFunctionPtr aggregate_function, WindowDescription & desc, const IAST * ast) { const auto & definition = ast->as(); @@ -698,7 +700,21 @@ void ExpressionAnalyzer::makeWindowDescriptionFromAST(const Context & context_, ast->formatForErrorMessage()); } + const auto * window_function = aggregate_function ? dynamic_cast(aggregate_function.get()) : nullptr; desc.frame.is_default = definition.frame_is_default; + if (desc.frame.is_default && window_function) + { + auto default_window_frame_opt = window_function->getDefaultFrame(); + if (default_window_frame_opt) + { + desc.frame = *default_window_frame_opt; + /// Append the default frame description to window_name, make sure it will be put into + /// a proper window description. + desc.window_name += " " + desc.frame.toString(); + return; + } + } + desc.frame.type = definition.frame_type; desc.frame.begin_type = definition.frame_begin_type; desc.frame.begin_preceding = definition.frame_begin_preceding; @@ -734,7 +750,7 @@ void ExpressionAnalyzer::makeWindowDescriptions(ActionsDAG & actions) WindowDescription desc; desc.window_name = elem.name; makeWindowDescriptionFromAST(*current_context, window_descriptions, - desc, elem.definition.get()); + nullptr, desc, elem.definition.get()); auto [it, inserted] = window_descriptions.insert( {elem.name, std::move(desc)}); @@ -821,10 +837,10 @@ void ExpressionAnalyzer::makeWindowDescriptions(ActionsDAG & actions) WindowDescription desc; desc.window_name = default_window_name; makeWindowDescriptionFromAST(*current_context, window_descriptions, - desc, &definition); + window_function.aggregate_function, desc, &definition); auto [it, inserted] = window_descriptions.insert( - {default_window_name, desc}); + {desc.window_name, std::move(desc)}); if (!inserted) { diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h index 12d6dce8f72..ff3f185bea9 100644 --- a/src/Interpreters/ExpressionAnalyzer.h +++ b/src/Interpreters/ExpressionAnalyzer.h @@ -138,7 +138,12 @@ public: /// A list of windows for window functions. const WindowDescriptions & windowDescriptions() const { return window_descriptions; } - void makeWindowDescriptionFromAST(const Context & context, const WindowDescriptions & existing_descriptions, WindowDescription & desc, const IAST * ast); + void makeWindowDescriptionFromAST( + const Context & context, + const WindowDescriptions & existing_descriptions, + AggregateFunctionPtr aggregate_function, + WindowDescription & desc, + const IAST * ast); void makeWindowDescriptions(ActionsDAG & actions); /** Checks if subquery is not a plain StorageSet. diff --git a/src/Planner/PlannerActionsVisitor.cpp b/src/Planner/PlannerActionsVisitor.cpp index 6fd37df11c6..0b1be32d987 100644 --- a/src/Planner/PlannerActionsVisitor.cpp +++ b/src/Planner/PlannerActionsVisitor.cpp @@ -1,5 +1,6 @@ #include +#include #include #include #include @@ -237,7 +238,7 @@ public: if (function_node.isWindowFunction()) { buffer << " OVER ("; - buffer << calculateWindowNodeActionName(function_node.getWindowNode()); + buffer << calculateWindowNodeActionName(node, function_node.getWindowNode()); buffer << ')'; } @@ -298,21 +299,22 @@ public: return calculateConstantActionNodeName(constant_literal, applyVisitor(FieldToDataType(), constant_literal)); } - String calculateWindowNodeActionName(const QueryTreeNodePtr & node) + String calculateWindowNodeActionName(const QueryTreeNodePtr & function_nodew_node_, const QueryTreeNodePtr & window_node_) { - auto & window_node = node->as(); + const auto & function_node = function_nodew_node_->as(); + const auto & window_node = window_node_->as(); WriteBufferFromOwnString buffer; if (window_node.hasPartitionBy()) { buffer << "PARTITION BY "; - auto & partition_by_nodes = window_node.getPartitionBy().getNodes(); + const auto & partition_by_nodes = window_node.getPartitionBy().getNodes(); size_t partition_by_nodes_size = partition_by_nodes.size(); for (size_t i = 0; i < partition_by_nodes_size; ++i) { - auto & partition_by_node = partition_by_nodes[i]; + const auto & partition_by_node = partition_by_nodes[i]; buffer << calculateActionNodeName(partition_by_node); if (i + 1 != partition_by_nodes_size) buffer << ", "; @@ -326,7 +328,7 @@ public: buffer << "ORDER BY "; - auto & order_by_nodes = window_node.getOrderBy().getNodes(); + const auto & order_by_nodes = window_node.getOrderBy().getNodes(); size_t order_by_nodes_size = order_by_nodes.size(); for (size_t i = 0; i < order_by_nodes_size; ++i) @@ -364,44 +366,14 @@ public: } } - auto & window_frame = window_node.getWindowFrame(); - if (!window_frame.is_default) + auto window_frame_opt = extractWindowFrame(function_node); + if (window_frame_opt) { + auto & window_frame = *window_frame_opt; if (window_node.hasPartitionBy() || window_node.hasOrderBy()) buffer << ' '; - buffer << window_frame.type << " BETWEEN "; - if (window_frame.begin_type == WindowFrame::BoundaryType::Current) - { - buffer << "CURRENT ROW"; - } - else if (window_frame.begin_type == WindowFrame::BoundaryType::Unbounded) - { - buffer << "UNBOUNDED"; - buffer << " " << (window_frame.begin_preceding ? "PRECEDING" : "FOLLOWING"); - } - else - { - buffer << calculateActionNodeName(window_node.getFrameBeginOffsetNode()); - buffer << " " << (window_frame.begin_preceding ? "PRECEDING" : "FOLLOWING"); - } - - buffer << " AND "; - - if (window_frame.end_type == WindowFrame::BoundaryType::Current) - { - buffer << "CURRENT ROW"; - } - else if (window_frame.end_type == WindowFrame::BoundaryType::Unbounded) - { - buffer << "UNBOUNDED"; - buffer << " " << (window_frame.end_preceding ? "PRECEDING" : "FOLLOWING"); - } - else - { - buffer << calculateActionNodeName(window_node.getFrameEndOffsetNode()); - buffer << " " << (window_frame.end_preceding ? "PRECEDING" : "FOLLOWING"); - } + window_frame.toString(buffer); } return buffer.str(); @@ -1074,20 +1046,11 @@ String calculateConstantActionNodeName(const Field & constant_literal) return ActionNodeNameHelper::calculateConstantActionNodeName(constant_literal); } -String calculateWindowNodeActionName(const QueryTreeNodePtr & node, - const PlannerContext & planner_context, - QueryTreeNodeToName & node_to_name, - bool use_column_identifier_as_action_node_name) -{ - ActionNodeNameHelper helper(node_to_name, planner_context, use_column_identifier_as_action_node_name); - return helper.calculateWindowNodeActionName(node); -} - -String calculateWindowNodeActionName(const QueryTreeNodePtr & node, const PlannerContext & planner_context, bool use_column_identifier_as_action_node_name) +String calculateWindowNodeActionName(const QueryTreeNodePtr & function_node, const QueryTreeNodePtr & window_node, const PlannerContext & planner_context, bool use_column_identifier_as_action_node_name) { QueryTreeNodeToName empty_map; ActionNodeNameHelper helper(empty_map, planner_context, use_column_identifier_as_action_node_name); - return helper.calculateWindowNodeActionName(node); + return helper.calculateWindowNodeActionName(function_node, window_node); } } diff --git a/src/Planner/PlannerActionsVisitor.h b/src/Planner/PlannerActionsVisitor.h index 1dbd149bc4b..45d21ce68a5 100644 --- a/src/Planner/PlannerActionsVisitor.h +++ b/src/Planner/PlannerActionsVisitor.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include @@ -8,6 +9,7 @@ #include #include +#include namespace DB { @@ -80,16 +82,8 @@ String calculateConstantActionNodeName(const Field & constant_literal); * Window node action name can only be part of window function action name. * For column node column node identifier from planner context is used, if use_column_identifier_as_action_node_name = true. */ -String calculateWindowNodeActionName(const QueryTreeNodePtr & node, - const PlannerContext & planner_context, - QueryTreeNodeToName & node_to_name, - bool use_column_identifier_as_action_node_name = true); - -/** Calculate action node name for window node. - * Window node action name can only be part of window function action name. - * For column node column node identifier from planner context is used, if use_column_identifier_as_action_node_name = true. - */ -String calculateWindowNodeActionName(const QueryTreeNodePtr & node, +String calculateWindowNodeActionName(const QueryTreeNodePtr & function_node, + const QueryTreeNodePtr & window_node, const PlannerContext & planner_context, bool use_column_identifier_as_action_node_name = true); diff --git a/src/Planner/PlannerWindowFunctions.cpp b/src/Planner/PlannerWindowFunctions.cpp index 225852de5a7..f91cf644cf0 100644 --- a/src/Planner/PlannerWindowFunctions.cpp +++ b/src/Planner/PlannerWindowFunctions.cpp @@ -1,5 +1,7 @@ +#include #include +#include #include #include #include @@ -8,8 +10,9 @@ #include -#include #include +#include +#include namespace DB { @@ -22,27 +25,33 @@ namespace ErrorCodes namespace { -WindowDescription extractWindowDescriptionFromWindowNode(const QueryTreeNodePtr & node, const PlannerContext & planner_context) +WindowDescription extractWindowDescriptionFromWindowNode(const QueryTreeNodePtr & func_node_, const PlannerContext & planner_context) { + const auto & func_node = func_node_->as(); + auto node = func_node.getWindowNode(); auto & window_node = node->as(); WindowDescription window_description; - window_description.window_name = calculateWindowNodeActionName(node, planner_context); + window_description.window_name = calculateWindowNodeActionName(func_node_, node, planner_context); for (const auto & partition_by_node : window_node.getPartitionBy().getNodes()) { auto partition_by_node_action_name = calculateActionNodeName(partition_by_node, planner_context); - auto partition_by_sort_column_description = SortColumnDescription(partition_by_node_action_name, 1 /* direction */, 1 /* nulls_direction */); + auto partition_by_sort_column_description + = SortColumnDescription(partition_by_node_action_name, 1 /* direction */, 1 /* nulls_direction */); window_description.partition_by.push_back(std::move(partition_by_sort_column_description)); } window_description.order_by = extractSortDescription(window_node.getOrderByNode(), planner_context); window_description.full_sort_description = window_description.partition_by; - window_description.full_sort_description.insert(window_description.full_sort_description.end(), window_description.order_by.begin(), window_description.order_by.end()); + window_description.full_sort_description.insert( + window_description.full_sort_description.end(), window_description.order_by.begin(), window_description.order_by.end()); /// WINDOW frame is validated during query analysis stage - window_description.frame = window_node.getWindowFrame(); + auto window_frame = extractWindowFrame(func_node); + window_description.frame = window_frame ? *window_frame : window_node.getWindowFrame(); + auto node_frame = window_node.getWindowFrame(); const auto & query_context = planner_context.getQueryContext(); const auto & query_context_settings = query_context->getSettingsRef(); @@ -64,7 +73,8 @@ WindowDescription extractWindowDescriptionFromWindowNode(const QueryTreeNodePtr } -std::vector extractWindowDescriptions(const QueryTreeNodes & window_function_nodes, const PlannerContext & planner_context) +std::vector +extractWindowDescriptions(const QueryTreeNodes & window_function_nodes, const PlannerContext & planner_context) { std::unordered_map window_name_to_description; @@ -72,7 +82,7 @@ std::vector extractWindowDescriptions(const QueryTreeNodes & { auto & window_function_node_typed = window_function_node->as(); - auto function_window_description = extractWindowDescriptionFromWindowNode(window_function_node_typed.getWindowNode(), planner_context); + auto function_window_description = extractWindowDescriptionFromWindowNode(window_function_node, planner_context); auto frame_type = function_window_description.frame.type; if (frame_type != WindowFrame::FrameType::ROWS && frame_type != WindowFrame::FrameType::RANGE) diff --git a/src/Planner/Utils.cpp b/src/Planner/Utils.cpp index 3c54c57a28c..1cfb878d338 100644 --- a/src/Planner/Utils.cpp +++ b/src/Planner/Utils.cpp @@ -20,6 +20,8 @@ #include +#include + #include #include #include @@ -32,6 +34,7 @@ #include #include #include +#include #include @@ -477,4 +480,20 @@ ASTPtr parseAdditionalResultFilter(const Settings & settings) return additional_result_filter_ast; } +std::optional extractWindowFrame(const FunctionNode & node) +{ + if (!node.isWindowFunction()) + return {}; + auto & window_node = node.getWindowNode()->as(); + const auto & window_frame = window_node.getWindowFrame(); + if (!window_frame.is_default) + return window_frame; + auto aggregate_function = node.getAggregateFunction(); + if (const auto * win_func = dynamic_cast(aggregate_function.get())) + { + return win_func->getDefaultFrame(); + } + return {}; +} + } diff --git a/src/Planner/Utils.h b/src/Planner/Utils.h index 3172847f053..e33ff4cc4e2 100644 --- a/src/Planner/Utils.h +++ b/src/Planner/Utils.h @@ -19,6 +19,8 @@ #include +#include + namespace DB { @@ -88,4 +90,9 @@ FilterDAGInfo buildFilterInfo(QueryTreeNodePtr filter_query_tree, ASTPtr parseAdditionalResultFilter(const Settings & settings); +/// If the window frame is not set in sql, try to use the default frame from window function +/// if it have any one. Otherwise return empty. +/// If the window frame is set in sql, use it anyway. +std::optional extractWindowFrame(const FunctionNode & node); + } diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index 86421adf4fb..d1e8cf66e2d 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -59,26 +59,6 @@ namespace ErrorCodes extern const int ILLEGAL_TYPE_OF_ARGUMENT; } -// Interface for true window functions. It's not much of an interface, they just -// accept the guts of WindowTransform and do 'something'. Given a small number of -// true window functions, and the fact that the WindowTransform internals are -// pretty much well-defined in domain terms (e.g. frame boundaries), this is -// somewhat acceptable. -class IWindowFunction -{ -public: - virtual ~IWindowFunction() = default; - - // Must insert the result for current_row. - virtual void windowInsertResultInto(const WindowTransform * transform, - size_t function_index) const = 0; - - virtual std::optional getDefaultFrame() const { return {}; } - - /// Is the frame type supported by this function. - virtual bool checkWindowFrameType(const WindowTransform * /*transform*/) const { return true; } -}; - // Compares ORDER BY column values at given rows to find the boundaries of frame: // [compared] with [reference] +/- offset. Return value is -1/0/+1, like in // sorting predicates -- -1 means [compared] is less than [reference] +/- offset. @@ -1512,41 +1492,6 @@ void WindowTransform::work() } } -// A basic implementation for a true window function. It pretends to be an -// aggregate function, but refuses to work as such. -struct WindowFunction - : public IAggregateFunctionHelper - , public IWindowFunction -{ - std::string name; - - WindowFunction(const std::string & name_, const DataTypes & argument_types_, const Array & parameters_, const DataTypePtr & result_type_) - : IAggregateFunctionHelper(argument_types_, parameters_, result_type_) - , name(name_) - {} - - bool isOnlyWindowFunction() const override { return true; } - - [[noreturn]] void fail() const - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "The function '{}' can only be used as a window function, not as an aggregate function", - getName()); - } - - String getName() const override { return name; } - void create(AggregateDataPtr __restrict) const override {} - void destroy(AggregateDataPtr __restrict) const noexcept override {} - bool hasTrivialDestructor() const override { return true; } - size_t sizeOfData() const override { return 0; } - size_t alignOfData() const override { return 1; } - void add(AggregateDataPtr __restrict, const IColumn **, size_t, Arena *) const override { fail(); } - void merge(AggregateDataPtr __restrict, ConstAggregateDataPtr, Arena *) const override { fail(); } - void serialize(ConstAggregateDataPtr __restrict, WriteBuffer &, std::optional) const override { fail(); } - void deserialize(AggregateDataPtr __restrict, ReadBuffer &, std::optional, Arena *) const override { fail(); } - void insertResultInto(AggregateDataPtr __restrict, IColumn &, Arena *) const override { fail(); } -}; - struct WindowFunctionRank final : public WindowFunction { WindowFunctionRank(const std::string & name_, @@ -1658,36 +1603,6 @@ struct WindowFunctionHelpers } }; -template -struct StatefulWindowFunction : public WindowFunction -{ - StatefulWindowFunction(const std::string & name_, - const DataTypes & argument_types_, const Array & parameters_, const DataTypePtr & result_type_) - : WindowFunction(name_, argument_types_, parameters_, result_type_) - { - } - - size_t sizeOfData() const override { return sizeof(State); } - size_t alignOfData() const override { return 1; } - - void create(AggregateDataPtr __restrict place) const override - { - new (place) State(); - } - - void destroy(AggregateDataPtr __restrict place) const noexcept override - { - reinterpret_cast(place)->~State(); - } - - bool hasTrivialDestructor() const override { return std::is_trivially_destructible_v; } - - State & getState(const WindowFunctionWorkspace & workspace) const - { - return *reinterpret_cast(workspace.aggregate_function_state.data()); - } -}; - struct ExponentialTimeDecayedSumState { Float64 previous_time; @@ -2267,14 +2182,13 @@ public: bool checkWindowFrameType(const WindowTransform * transform) const override { - if (transform->window_description.frame.type != WindowFrame::FrameType::RANGE - || transform->window_description.frame.begin_type != WindowFrame::BoundaryType::Unbounded - || transform->window_description.frame.end_type != WindowFrame::BoundaryType::Current) - { - LOG_ERROR( - getLogger("WindowFunctionPercentRank"), - "Window frame for function 'percent_rank' should be 'RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT'"); - return false; + auto default_window_frame = getDefaultFrame(); + if (transform->window_description.frame != default_window_frame) + { + LOG_ERROR( + getLogger("WindowFunctionPercentRank"), + "Window frame for function 'percent_rank' should be '{}'", default_window_frame->toString()); + return false; } return true; } @@ -2284,7 +2198,7 @@ public: WindowFrame frame; frame.type = WindowFrame::FrameType::RANGE; frame.begin_type = WindowFrame::BoundaryType::Unbounded; - frame.end_type = WindowFrame::BoundaryType::Current; + frame.end_type = WindowFrame::BoundaryType::Unbounded; return frame; } @@ -2802,5 +2716,4 @@ void registerWindowFunctions(AggregateFunctionFactory & factory) name, argument_types, parameters); }, properties}); } - } diff --git a/src/Processors/Transforms/WindowTransform.h b/src/Processors/Transforms/WindowTransform.h index 43fa6b28019..f8ad3c92ac7 100644 --- a/src/Processors/Transforms/WindowTransform.h +++ b/src/Processors/Transforms/WindowTransform.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include @@ -21,30 +22,6 @@ using ExpressionActionsPtr = std::shared_ptr; class Arena; -class IWindowFunction; - -// Runtime data for computing one window function. -struct WindowFunctionWorkspace -{ - AggregateFunctionPtr aggregate_function; - - // Cached value of aggregate function isState virtual method - bool is_aggregate_function_state = false; - - // This field is set for pure window functions. When set, we ignore the - // window_function.aggregate_function, and work through this interface - // instead. - IWindowFunction * window_function_impl = nullptr; - - std::vector argument_column_indices; - - // Will not be initialized for a pure window function. - mutable AlignedBuffer aggregate_function_state; - - // Argument columns. Be careful, this is a per-block cache. - std::vector argument_columns; - UInt64 cached_block_number = std::numeric_limits::max(); -}; struct WindowTransformBlock { diff --git a/tests/queries/0_stateless/01592_window_functions.reference b/tests/queries/0_stateless/01592_window_functions.reference index 06ec67ee82d..ec957dd7a02 100644 --- a/tests/queries/0_stateless/01592_window_functions.reference +++ b/tests/queries/0_stateless/01592_window_functions.reference @@ -79,16 +79,3 @@ iPhone 900 Smartphone 500 500 Kindle Fire 150 Tablet 150 350 Samsung Galaxy Tab 200 Tablet 175 350 iPad 700 Tablet 350 350 ----- Q8 ---- -Lenovo Thinkpad Laptop 700 1 0 -Sony VAIO Laptop 700 1 0 -Dell Vostro Laptop 800 3 0.6666666666666666 -HP Elite Laptop 1200 4 1 -Microsoft Lumia Smartphone 200 1 0 -HTC One Smartphone 400 2 0.3333333333333333 -Nexus Smartphone 500 3 0.6666666666666666 -iPhone Smartphone 900 4 1 -Kindle Fire Tablet 150 1 0 -Samsung Galaxy Tab Tablet 200 2 0.5 -iPad Tablet 700 3 1 -Others Unknow 200 1 0 diff --git a/tests/queries/0_stateless/01592_window_functions.sql b/tests/queries/0_stateless/01592_window_functions.sql index a660fcca7b2..c6bb23bc7cf 100644 --- a/tests/queries/0_stateless/01592_window_functions.sql +++ b/tests/queries/0_stateless/01592_window_functions.sql @@ -101,26 +101,7 @@ SELECT FROM products INNER JOIN product_groups USING (group_id)) t order by group_name, product_name, price; -select '---- Q8 ----'; -INSERT INTO product_groups VALUES (4, 'Unknow'); -INSERT INTO products (product_id,product_name, group_id,price) VALUES (12, 'Others', 4, 200); - -SELECT * -FROM -( - SELECT - product_name, - group_name, - price, - rank() OVER (PARTITION BY group_name ORDER BY price ASC) AS rank, - percent_rank() OVER (PARTITION BY group_name ORDER BY price ASC) AS percent - FROM products - INNER JOIN product_groups USING (group_id) -) AS t -ORDER BY - group_name ASC, - price ASC, - product_name ASC; - drop table product_groups; drop table products; + + diff --git a/tests/queries/0_stateless/03037_precent_rank.reference b/tests/queries/0_stateless/03037_precent_rank.reference new file mode 100644 index 00000000000..6a23f3884cd --- /dev/null +++ b/tests/queries/0_stateless/03037_precent_rank.reference @@ -0,0 +1,22 @@ +Lenovo Thinkpad Laptop 700 1 0 +Sony VAIO Laptop 700 1 0 +Dell Vostro Laptop 800 3 0.6666666666666666 +HP Elite Laptop 1200 4 1 +Microsoft Lumia Smartphone 200 1 0 +HTC One Smartphone 400 2 0.3333333333333333 +Nexus Smartphone 500 3 0.6666666666666666 +iPhone Smartphone 900 4 1 +Kindle Fire Tablet 150 1 0 +Samsung Galaxy Tab Tablet 200 2 0.5 +iPad Tablet 700 3 1 +Others Unknow 200 1 0 +0 1 0 +1 2 1 +2 3 2 +3 4 3 +4 5 4 +5 6 5 +6 7 6 +7 8 7 +8 9 8 +9 10 9 diff --git a/tests/queries/0_stateless/03037_precent_rank.sql b/tests/queries/0_stateless/03037_precent_rank.sql new file mode 100644 index 00000000000..b0f83fa3340 --- /dev/null +++ b/tests/queries/0_stateless/03037_precent_rank.sql @@ -0,0 +1,52 @@ +drop table if exists product_groups; +drop table if exists products; + +CREATE TABLE product_groups ( + group_id Int64, + group_name String +) Engine = Memory; + + +CREATE TABLE products ( + product_id Int64, + product_name String, + price DECIMAL(11, 2), + group_id Int64 +) Engine = Memory; + +INSERT INTO product_groups VALUES (1, 'Smartphone'),(2, 'Laptop'),(3, 'Tablet'); + +INSERT INTO products (product_id,product_name, group_id,price) VALUES (1, 'Microsoft Lumia', 1, 200), (2, 'HTC One', 1, 400), (3, 'Nexus', 1, 500), (4, 'iPhone', 1, 900),(5, 'HP Elite', 2, 1200),(6, 'Lenovo Thinkpad', 2, 700),(7, 'Sony VAIO', 2, 700),(8, 'Dell Vostro', 2, 800),(9, 'iPad', 3, 700),(10, 'Kindle Fire', 3, 150),(11, 'Samsung Galaxy Tab', 3, 200); + +INSERT INTO product_groups VALUES (4, 'Unknow'); +INSERT INTO products (product_id,product_name, group_id,price) VALUES (12, 'Others', 4, 200); + +SELECT * +FROM +( + SELECT + product_name, + group_name, + price, + rank() OVER (PARTITION BY group_name ORDER BY price ASC) AS rank, + percent_rank() OVER (PARTITION BY group_name ORDER BY price ASC) AS percent + FROM products + INNER JOIN product_groups USING (group_id) +) AS t +ORDER BY + group_name ASC, + price ASC, + product_name ASC; + +drop table product_groups; +drop table products; + +select number, row_number, cast(percent_rank * 10000 as Int32) as percent_rank +from ( + select number, row_number() over () as row_number, percent_rank() over (order by number) as percent_rank + from numbers(10000) + order by number + limit 10 +) +settings max_block_size=100; + From 43dffca285896ff447bb65ef8b75b7c389e9889d Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 8 Aug 2024 13:12:42 +0000 Subject: [PATCH 058/121] Backport #67587 to 24.7: Fix: creation of view with recursive CTE --- src/Interpreters/AddDefaultDatabaseVisitor.h | 9 ++++ .../03215_view_with_recursive.reference | 2 + .../0_stateless/03215_view_with_recursive.sql | 43 +++++++++++++++++++ 3 files changed, 54 insertions(+) create mode 100644 tests/queries/0_stateless/03215_view_with_recursive.reference create mode 100644 tests/queries/0_stateless/03215_view_with_recursive.sql diff --git a/src/Interpreters/AddDefaultDatabaseVisitor.h b/src/Interpreters/AddDefaultDatabaseVisitor.h index 356bffa75e9..a28c7c1bff3 100644 --- a/src/Interpreters/AddDefaultDatabaseVisitor.h +++ b/src/Interpreters/AddDefaultDatabaseVisitor.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include @@ -100,6 +101,7 @@ private: const String database_name; std::set external_tables; + mutable std::unordered_set with_aliases; bool only_replace_current_database_function = false; bool only_replace_in_join = false; @@ -117,6 +119,10 @@ private: void visit(ASTSelectQuery & select, ASTPtr &) const { + if (select.recursive_with) + for (const auto & child : select.with()->children) + with_aliases.insert(child->as()->name); + if (select.tables()) tryVisit(select.refTables()); @@ -165,6 +171,9 @@ private: /// There is temporary table with such name, should not be rewritten. if (external_tables.contains(identifier.shortName())) return; + /// This is WITH RECURSIVE alias. + if (with_aliases.contains(identifier.name())) + return; auto qualified_identifier = std::make_shared(database_name, identifier.name()); if (!identifier.alias.empty()) diff --git a/tests/queries/0_stateless/03215_view_with_recursive.reference b/tests/queries/0_stateless/03215_view_with_recursive.reference new file mode 100644 index 00000000000..c3ca8065a70 --- /dev/null +++ b/tests/queries/0_stateless/03215_view_with_recursive.reference @@ -0,0 +1,2 @@ +5050 +8 diff --git a/tests/queries/0_stateless/03215_view_with_recursive.sql b/tests/queries/0_stateless/03215_view_with_recursive.sql new file mode 100644 index 00000000000..5d93ccc5438 --- /dev/null +++ b/tests/queries/0_stateless/03215_view_with_recursive.sql @@ -0,0 +1,43 @@ +SET allow_experimental_analyzer = 1; + +CREATE VIEW 03215_test_v +AS WITH RECURSIVE test_table AS + ( + SELECT 1 AS number + UNION ALL + SELECT number + 1 + FROM test_table + WHERE number < 100 + ) +SELECT sum(number) +FROM test_table; + +SELECT * FROM 03215_test_v; + +CREATE VIEW 03215_multi_v +AS WITH RECURSIVE + task AS + ( + SELECT + number AS task_id, + number - 1 AS parent_id + FROM numbers(10) + ), + rtq AS + ( + SELECT + task_id, + parent_id + FROM task AS t + WHERE t.parent_id = 1 + UNION ALL + SELECT + t.task_id, + t.parent_id + FROM task AS t, rtq AS r + WHERE t.parent_id = r.task_id + ) +SELECT count() +FROM rtq; + +SELECT * FROM 03215_multi_v; From 3996f322a0f874b3635737003e1e82b4440dd192 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 8 Aug 2024 13:42:38 +0000 Subject: [PATCH 059/121] Update autogenerated version to 24.7.3.42 and contributors --- cmake/autogenerated_versions.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index 3be3b9fe60b..8ccccb7e3f9 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -6,7 +6,7 @@ SET(VERSION_REVISION 54488) SET(VERSION_MAJOR 24) SET(VERSION_MINOR 7) SET(VERSION_PATCH 3) -SET(VERSION_GITHASH 6e41f601b2f025b0741da55d13287922eb28cf37) -SET(VERSION_DESCRIBE v24.7.3.1-stable) -SET(VERSION_STRING 24.7.3.1) +SET(VERSION_GITHASH 63730bc42939f76cb2b03be385ff08051ea2e3fe) +SET(VERSION_DESCRIBE v24.7.3.42-stable) +SET(VERSION_STRING 24.7.3.42) # end of autochange From 0e7b15c3ad908bf00b545ae11037d8dc2c1b7c2c Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 8 Aug 2024 21:10:58 +0000 Subject: [PATCH 060/121] Backport #66282 to 24.7: Properly convert boolean literals in query tree --- src/Interpreters/convertFieldToType.cpp | 4 ++++ .../gtest_transform_query_for_external_database.cpp | 12 +++++++++++- .../02952_conjunction_optimization.reference | 2 +- 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/convertFieldToType.cpp b/src/Interpreters/convertFieldToType.cpp index 184c263dbdb..d87d4a73e37 100644 --- a/src/Interpreters/convertFieldToType.cpp +++ b/src/Interpreters/convertFieldToType.cpp @@ -214,6 +214,10 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID } else if (type.isValueRepresentedByNumber() && src.getType() != Field::Types::String) { + /// Bool is not represented in which_type, so we need to type it separately + if (isInt64OrUInt64orBoolFieldType(src.getType()) && type.getName() == "Bool") + return bool(src.safeGet()); + if (which_type.isUInt8()) return convertNumericType(src, type); if (which_type.isUInt16()) return convertNumericType(src, type); if (which_type.isUInt32()) return convertNumericType(src, type); diff --git a/src/Storages/tests/gtest_transform_query_for_external_database.cpp b/src/Storages/tests/gtest_transform_query_for_external_database.cpp index 6765e112bb9..5a63c118e2d 100644 --- a/src/Storages/tests/gtest_transform_query_for_external_database.cpp +++ b/src/Storages/tests/gtest_transform_query_for_external_database.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -74,6 +75,7 @@ private: {"a", std::make_shared()}, {"b", std::make_shared()}, {"foo", std::make_shared()}, + {"is_value", DataTypeFactory::instance().get("Bool")}, }), TableWithColumnNamesAndTypes( createDBAndTable("table2"), @@ -411,6 +413,14 @@ TEST(TransformQueryForExternalDatabase, Analyzer) R"(SELECT "column" FROM "test"."table")"); check(state, 1, {"column", "apply_id", "apply_type", "apply_status", "create_time", "field", "value", "a", "b", "foo"}, - "SELECT * FROM table WHERE (column) IN (1)", + "SELECT * EXCEPT (is_value) FROM table WHERE (column) IN (1)", R"(SELECT "column", "apply_id", "apply_type", "apply_status", "create_time", "field", "value", "a", "b", "foo" FROM "test"."table" WHERE "column" IN (1))"); + + check(state, 1, {"is_value"}, + "SELECT is_value FROM table WHERE is_value = true", + R"(SELECT "is_value" FROM "test"."table" WHERE "is_value" = true)"); + + check(state, 1, {"is_value"}, + "SELECT is_value FROM table WHERE is_value = 1", + R"(SELECT "is_value" FROM "test"."table" WHERE "is_value" = 1)"); } diff --git a/tests/queries/0_stateless/02952_conjunction_optimization.reference b/tests/queries/0_stateless/02952_conjunction_optimization.reference index eeadfaae21d..8af0abefd3a 100644 --- a/tests/queries/0_stateless/02952_conjunction_optimization.reference +++ b/tests/queries/0_stateless/02952_conjunction_optimization.reference @@ -32,7 +32,7 @@ QUERY id: 0 FUNCTION id: 5, function_name: and, function_type: ordinary, result_type: Bool ARGUMENTS LIST id: 6, nodes: 2 - CONSTANT id: 7, constant_value: UInt64_1, constant_value_type: Bool + CONSTANT id: 7, constant_value: Bool_1, constant_value_type: Bool FUNCTION id: 8, function_name: notIn, function_type: ordinary, result_type: UInt8 ARGUMENTS LIST id: 9, nodes: 2 From 8404a8e0af357c1a4cc1b75d4624053660048ad3 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 9 Aug 2024 01:36:01 +0000 Subject: [PATCH 061/121] Backport #67953 to 24.7: Add an explicit error for `ALTER MODIFY SQL SECURITY` on non-view tables. --- src/Storages/MergeTree/MergeTreeData.cpp | 4 ++++ .../02884_create_view_with_sql_security_option.reference | 1 + .../0_stateless/02884_create_view_with_sql_security_option.sh | 2 ++ 3 files changed, 7 insertions(+) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 78a551591a6..9d9ee4a3564 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -3346,6 +3346,10 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context throw Exception(ErrorCodes::NOT_IMPLEMENTED, "ALTER MODIFY REFRESH is not supported by MergeTree engines family"); + if (command.type == AlterCommand::MODIFY_SQL_SECURITY) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "ALTER MODIFY SQL SECURITY is not supported by MergeTree engines family"); + if (command.type == AlterCommand::MODIFY_ORDER_BY && !is_custom_partitioned) { throw Exception(ErrorCodes::BAD_ARGUMENTS, diff --git a/tests/queries/0_stateless/02884_create_view_with_sql_security_option.reference b/tests/queries/0_stateless/02884_create_view_with_sql_security_option.reference index a03343c8cb3..39e7aad87e0 100644 --- a/tests/queries/0_stateless/02884_create_view_with_sql_security_option.reference +++ b/tests/queries/0_stateless/02884_create_view_with_sql_security_option.reference @@ -27,6 +27,7 @@ OK OK 100 100 +OK ===== TestGrants ===== OK OK diff --git a/tests/queries/0_stateless/02884_create_view_with_sql_security_option.sh b/tests/queries/0_stateless/02884_create_view_with_sql_security_option.sh index dd869cd9988..bc14df13b2c 100755 --- a/tests/queries/0_stateless/02884_create_view_with_sql_security_option.sh +++ b/tests/queries/0_stateless/02884_create_view_with_sql_security_option.sh @@ -199,6 +199,8 @@ ${CLICKHOUSE_CLIENT} --user $user2 --query "INSERT INTO source SELECT * FROM gen ${CLICKHOUSE_CLIENT} --query "SELECT count() FROM destination1" ${CLICKHOUSE_CLIENT} --query "SELECT count() FROM destination2" +(( $(${CLICKHOUSE_CLIENT} --query "ALTER TABLE test_table MODIFY SQL SECURITY INVOKER" 2>&1 | grep -c "is not supported") >= 1 )) && echo "OK" || echo "UNEXPECTED" + echo "===== TestGrants =====" ${CLICKHOUSE_CLIENT} --query "GRANT CREATE ON *.* TO $user1" ${CLICKHOUSE_CLIENT} --query "GRANT SELECT ON $db.test_table TO $user1, $user2" From 2a43c433f94e48137c8cf56acaf89f2523af73c0 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 9 Aug 2024 09:07:56 +0000 Subject: [PATCH 062/121] Backport #67963 to 24.7: Fix CPU count detection for `concurrent_threads_soft_limit_ratio` in in containers --- programs/server/Server.cpp | 9 +++++---- src/Common/getNumberOfPhysicalCPUCores.h | 1 + 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 16888015f8b..29c330f53b1 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -806,10 +806,11 @@ try const size_t physical_server_memory = getMemoryAmount(); - LOG_INFO(log, "Available RAM: {}; physical cores: {}; logical cores: {}.", + LOG_INFO(log, "Available RAM: {}; logical cores: {}; used cores: {}.", formatReadableSizeWithBinarySuffix(physical_server_memory), - getNumberOfPhysicalCPUCores(), // on ARM processors it can show only enabled at current moment cores - std::thread::hardware_concurrency()); + std::thread::hardware_concurrency(), + getNumberOfPhysicalCPUCores() // on ARM processors it can show only enabled at current moment cores + ); #if defined(__x86_64__) String cpu_info; @@ -1608,7 +1609,7 @@ try concurrent_threads_soft_limit = new_server_settings.concurrent_threads_soft_limit_num; if (new_server_settings.concurrent_threads_soft_limit_ratio_to_cores > 0) { - auto value = new_server_settings.concurrent_threads_soft_limit_ratio_to_cores * std::thread::hardware_concurrency(); + auto value = new_server_settings.concurrent_threads_soft_limit_ratio_to_cores * getNumberOfPhysicalCPUCores(); if (value > 0 && value < concurrent_threads_soft_limit) concurrent_threads_soft_limit = value; } diff --git a/src/Common/getNumberOfPhysicalCPUCores.h b/src/Common/getNumberOfPhysicalCPUCores.h index 827e95e1bea..9e3412fdcba 100644 --- a/src/Common/getNumberOfPhysicalCPUCores.h +++ b/src/Common/getNumberOfPhysicalCPUCores.h @@ -1,4 +1,5 @@ #pragma once /// Get number of CPU cores without hyper-threading. +/// The calculation respects possible cgroups limits. unsigned getNumberOfPhysicalCPUCores(); From 3b25ade49a42d0c375ca9b1a31a6cd9d56373d1a Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 9 Aug 2024 15:08:39 +0000 Subject: [PATCH 063/121] Backport #68052 to 24.7: Fix skip of parts in mutation with analyzer --- src/Interpreters/MutationsInterpreter.cpp | 11 +++++----- src/Interpreters/MutationsInterpreter.h | 1 - src/Storages/MergeTree/MutateTask.cpp | 2 +- ...3221_mutation_analyzer_skip_part.reference | 4 ++++ .../03221_mutation_analyzer_skip_part.sql | 21 +++++++++++++++++++ 5 files changed, 31 insertions(+), 8 deletions(-) create mode 100644 tests/queries/0_stateless/03221_mutation_analyzer_skip_part.reference create mode 100644 tests/queries/0_stateless/03221_mutation_analyzer_skip_part.sql diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 480c6736bc5..30813d5e5af 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -146,7 +146,6 @@ ColumnDependencies getAllColumnDependencies( bool isStorageTouchedByMutations( - MergeTreeData & storage, MergeTreeData::DataPartPtr source_part, const StorageMetadataPtr & metadata_snapshot, const std::vector & commands, @@ -155,7 +154,9 @@ bool isStorageTouchedByMutations( if (commands.empty()) return false; + auto storage_from_part = std::make_shared(source_part); bool all_commands_can_be_skipped = true; + for (const auto & command : commands) { if (command.type == MutationCommand::APPLY_DELETED_MASK) @@ -170,7 +171,7 @@ bool isStorageTouchedByMutations( if (command.partition) { - const String partition_id = storage.getPartitionIDFromQuery(command.partition, context); + const String partition_id = storage_from_part->getPartitionIDFromQuery(command.partition, context); if (partition_id == source_part->info.partition_id) all_commands_can_be_skipped = false; } @@ -184,20 +185,18 @@ bool isStorageTouchedByMutations( if (all_commands_can_be_skipped) return false; - auto storage_from_part = std::make_shared(source_part); - std::optional interpreter_select_query; BlockIO io; if (context->getSettingsRef().allow_experimental_analyzer) { - auto select_query_tree = prepareQueryAffectedQueryTree(commands, storage.shared_from_this(), context); + auto select_query_tree = prepareQueryAffectedQueryTree(commands, storage_from_part, context); InterpreterSelectQueryAnalyzer interpreter(select_query_tree, context, SelectQueryOptions().ignoreLimits()); io = interpreter.execute(); } else { - ASTPtr select_query = prepareQueryAffectedAST(commands, storage.shared_from_this(), context); + ASTPtr select_query = prepareQueryAffectedAST(commands, storage_from_part, context); /// Interpreter must be alive, when we use result of execute() method. /// For some reason it may copy context and give it into ExpressionTransform /// after that we will use context from destroyed stack frame in our stream. diff --git a/src/Interpreters/MutationsInterpreter.h b/src/Interpreters/MutationsInterpreter.h index 6aaa233cda3..57863e9ae73 100644 --- a/src/Interpreters/MutationsInterpreter.h +++ b/src/Interpreters/MutationsInterpreter.h @@ -19,7 +19,6 @@ using QueryPipelineBuilderPtr = std::unique_ptr; /// Return false if the data isn't going to be changed by mutations. bool isStorageTouchedByMutations( - MergeTreeData & storage, MergeTreeData::DataPartPtr source_part, const StorageMetadataPtr & metadata_snapshot, const std::vector & commands, diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 9a775db73e2..5fcf699de59 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -2134,7 +2134,7 @@ bool MutateTask::prepare() ctx->commands_for_part.emplace_back(command); if (ctx->source_part->isStoredOnDisk() && !isStorageTouchedByMutations( - *ctx->data, ctx->source_part, ctx->metadata_snapshot, ctx->commands_for_part, context_for_reading)) + ctx->source_part, ctx->metadata_snapshot, ctx->commands_for_part, context_for_reading)) { NameSet files_to_copy_instead_of_hardlinks; auto settings_ptr = ctx->data->getSettings(); diff --git a/tests/queries/0_stateless/03221_mutation_analyzer_skip_part.reference b/tests/queries/0_stateless/03221_mutation_analyzer_skip_part.reference new file mode 100644 index 00000000000..68f8708dbaf --- /dev/null +++ b/tests/queries/0_stateless/03221_mutation_analyzer_skip_part.reference @@ -0,0 +1,4 @@ +1_1_1_0_3 10000 +1_1_1_0_4 0 +2_2_2_0_3 0 +2_2_2_0_4 10000 diff --git a/tests/queries/0_stateless/03221_mutation_analyzer_skip_part.sql b/tests/queries/0_stateless/03221_mutation_analyzer_skip_part.sql new file mode 100644 index 00000000000..bf9a10e2af4 --- /dev/null +++ b/tests/queries/0_stateless/03221_mutation_analyzer_skip_part.sql @@ -0,0 +1,21 @@ +DROP TABLE IF EXISTS t_mutate_skip_part; + +CREATE TABLE t_mutate_skip_part (key UInt64, id UInt64, v1 UInt64, v2 UInt64) ENGINE = MergeTree ORDER BY id PARTITION BY key; + +INSERT INTO t_mutate_skip_part SELECT 1, number, number, number FROM numbers(10000); +INSERT INTO t_mutate_skip_part SELECT 2, number, number, number FROM numbers(10000); + +SET mutations_sync = 2; + +ALTER TABLE t_mutate_skip_part UPDATE v1 = 1000 WHERE key = 1; +ALTER TABLE t_mutate_skip_part DELETE WHERE key = 2 AND v2 % 10 = 0; + +SYSTEM FLUSH LOGS; + +-- If part is skipped in mutation and hardlinked then read_rows must be 0. +SELECT part_name, read_rows +FROM system.part_log +WHERE database = currentDatabase() AND table = 't_mutate_skip_part' AND event_type = 'MutatePart' +ORDER BY part_name; + +DROP TABLE IF EXISTS t_mutate_skip_part; From 23a385dcd5fcab63f6823dcd12e1cd520aa880ce Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 12 Aug 2024 01:36:48 +0000 Subject: [PATCH 064/121] Backport #68099 to 24.7: Do not apply redundant sorting removal when there's an offset --- .../Optimizations/removeRedundantSorting.cpp | 8 +++-- .../02496_remove_redundant_sorting.reference | 31 +++++++++++++++++++ .../02496_remove_redundant_sorting.sh | 21 +++++++++++++ ...emove_redundant_sorting_analyzer.reference | 30 ++++++++++++++++++ 4 files changed, 87 insertions(+), 3 deletions(-) diff --git a/src/Processors/QueryPlan/Optimizations/removeRedundantSorting.cpp b/src/Processors/QueryPlan/Optimizations/removeRedundantSorting.cpp index 632eba6ab5f..e35cd20f102 100644 --- a/src/Processors/QueryPlan/Optimizations/removeRedundantSorting.cpp +++ b/src/Processors/QueryPlan/Optimizations/removeRedundantSorting.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -59,9 +60,10 @@ public: if (typeid_cast(current_step) || typeid_cast(current_step) /// (1) if there are LIMITs on top of ORDER BY, the ORDER BY is non-removable - || typeid_cast(current_step) /// (2) if ORDER BY is with FILL WITH, it is non-removable - || typeid_cast(current_step) /// (3) ORDER BY will change order of previous sorting - || typeid_cast(current_step)) /// (4) aggregation change order + || typeid_cast(current_step) /// (2) OFFSET on top of ORDER BY, the ORDER BY is non-removable + || typeid_cast(current_step) /// (3) if ORDER BY is with FILL WITH, it is non-removable + || typeid_cast(current_step) /// (4) ORDER BY will change order of previous sorting + || typeid_cast(current_step)) /// (5) aggregation change order { logStep("nodes_affect_order/push", current_node); nodes_affect_order.push_back(current_node); diff --git a/tests/queries/0_stateless/02496_remove_redundant_sorting.reference b/tests/queries/0_stateless/02496_remove_redundant_sorting.reference index 77ef213b36d..4d004f2f78f 100644 --- a/tests/queries/0_stateless/02496_remove_redundant_sorting.reference +++ b/tests/queries/0_stateless/02496_remove_redundant_sorting.reference @@ -465,6 +465,37 @@ Expression ((Projection + Before ORDER BY)) ReadFromStorage (SystemOne) -- execute Float64 9007199254740994 +-- presence of an inner OFFSET retains the ORDER BY +-- query +WITH + t1 AS ( + SELECT a, b + FROM + VALUES ( + 'b UInt32, a Int32', + (1, 1), + (2, 0) + ) + ) +SELECT + SUM(a) +FROM ( + SELECT a, b + FROM t1 + ORDER BY 1 DESC, 2 + OFFSET 1 +) t2 +-- explain +Expression ((Projection + Before ORDER BY)) + Aggregating + Expression (Before GROUP BY) + Offset + Expression (Projection) + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + (Projection + Before ORDER BY))) + ReadFromStorage (Values) +-- execute +0 -- disable common optimization to avoid functions to be lifted up (liftUpFunctions optimization), needed for testing with stateful function -- neighbor() as stateful function prevents removing inner ORDER BY since its result depends on order -- query diff --git a/tests/queries/0_stateless/02496_remove_redundant_sorting.sh b/tests/queries/0_stateless/02496_remove_redundant_sorting.sh index 661b32fce72..953f1257365 100755 --- a/tests/queries/0_stateless/02496_remove_redundant_sorting.sh +++ b/tests/queries/0_stateless/02496_remove_redundant_sorting.sh @@ -302,6 +302,27 @@ FROM )" run_query "$query" +echo "-- presence of an inner OFFSET retains the ORDER BY" +query="WITH + t1 AS ( + SELECT a, b + FROM + VALUES ( + 'b UInt32, a Int32', + (1, 1), + (2, 0) + ) + ) +SELECT + SUM(a) +FROM ( + SELECT a, b + FROM t1 + ORDER BY 1 DESC, 2 + OFFSET 1 +) t2" +run_query "$query" + echo "-- disable common optimization to avoid functions to be lifted up (liftUpFunctions optimization), needed for testing with stateful function" ENABLE_OPTIMIZATION="SET query_plan_enable_optimizations=0;$ENABLE_OPTIMIZATION" echo "-- neighbor() as stateful function prevents removing inner ORDER BY since its result depends on order" diff --git a/tests/queries/0_stateless/02496_remove_redundant_sorting_analyzer.reference b/tests/queries/0_stateless/02496_remove_redundant_sorting_analyzer.reference index b6a2e3182df..dd5ac7bf706 100644 --- a/tests/queries/0_stateless/02496_remove_redundant_sorting_analyzer.reference +++ b/tests/queries/0_stateless/02496_remove_redundant_sorting_analyzer.reference @@ -464,6 +464,36 @@ Expression ((Project names + Projection)) ReadFromStorage (SystemOne) -- execute Float64 9007199254740994 +-- presence of an inner OFFSET retains the ORDER BY +-- query +WITH + t1 AS ( + SELECT a, b + FROM + VALUES ( + 'b UInt32, a Int32', + (1, 1), + (2, 0) + ) + ) +SELECT + SUM(a) +FROM ( + SELECT a, b + FROM t1 + ORDER BY 1 DESC, 2 + OFFSET 1 +) t2 +-- explain +Expression ((Project names + Projection)) + Aggregating + Expression ((Before GROUP BY + (Change column names to column identifiers + Project names))) + Offset + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers)))))) + ReadFromStorage (Values) +-- execute +0 -- disable common optimization to avoid functions to be lifted up (liftUpFunctions optimization), needed for testing with stateful function -- neighbor() as stateful function prevents removing inner ORDER BY since its result depends on order -- query From 5d173546e5886319621a8148674f8e1f8c51c967 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 12 Aug 2024 13:13:17 +0000 Subject: [PATCH 065/121] Backport #68098 to 24.7: Fix UB in hopEnd, hopStart, tumbleEnd, and tumbleStart --- src/Functions/FunctionsTimeWindow.cpp | 14 ++++++++++++-- .../01049_window_view_window_functions.reference | 4 ++++ .../01049_window_view_window_functions.sql | 5 +++++ 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/src/Functions/FunctionsTimeWindow.cpp b/src/Functions/FunctionsTimeWindow.cpp index f93a885ee65..bb78e49a5ea 100644 --- a/src/Functions/FunctionsTimeWindow.cpp +++ b/src/Functions/FunctionsTimeWindow.cpp @@ -268,7 +268,12 @@ struct TimeWindowImpl { auto type = WhichDataType(arguments[0].type); if (type.isTuple()) - return std::static_pointer_cast(arguments[0].type)->getElement(0); + { + const auto & tuple_elems = std::static_pointer_cast(arguments[0].type)->getElements(); + if (tuple_elems.empty()) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Tuple passed to {} should not be empty", function_name); + return tuple_elems[0]; + } else if (type.isUInt32()) return std::make_shared(); else @@ -625,7 +630,12 @@ struct TimeWindowImpl { auto type = WhichDataType(arguments[0].type); if (type.isTuple()) - return std::static_pointer_cast(arguments[0].type)->getElement(0); + { + const auto & tuple_elems = std::static_pointer_cast(arguments[0].type)->getElements(); + if (tuple_elems.empty()) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Tuple passed to {} should not be empty", function_name); + return tuple_elems[0]; + } else if (type.isUInt32()) return std::make_shared(); else diff --git a/tests/queries/0_stateless/01049_window_view_window_functions.reference b/tests/queries/0_stateless/01049_window_view_window_functions.reference index 2d49664b280..073301104d2 100644 --- a/tests/queries/0_stateless/01049_window_view_window_functions.reference +++ b/tests/queries/0_stateless/01049_window_view_window_functions.reference @@ -67,3 +67,7 @@ SELECT toDateTime(hopEnd(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 2020-01-10 00:00:00 SELECT hopEnd(hop(toDateTime('2019-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa')); 2019-01-10 00:00:00 +SELECT hopStart(tuple()); -- { serverError ILLEGAL_COLUMN } +SELECT hopEnd(tuple()); -- { serverError ILLEGAL_COLUMN } +SELECT tumbleStart(tuple()); -- { serverError ILLEGAL_COLUMN } +SELECT tumbleEnd(tuple()); -- { serverError ILLEGAL_COLUMN } diff --git a/tests/queries/0_stateless/01049_window_view_window_functions.sql b/tests/queries/0_stateless/01049_window_view_window_functions.sql index 617019bd2c6..fb2b4b4949a 100644 --- a/tests/queries/0_stateless/01049_window_view_window_functions.sql +++ b/tests/queries/0_stateless/01049_window_view_window_functions.sql @@ -36,3 +36,8 @@ SELECT hopEnd(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, I SELECT toDateTime(hopEnd(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'), 'US/Samoa'); SELECT toDateTime(hopEnd(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'), 'US/Samoa'); SELECT hopEnd(hop(toDateTime('2019-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa')); + +SELECT hopStart(tuple()); -- { serverError ILLEGAL_COLUMN } +SELECT hopEnd(tuple()); -- { serverError ILLEGAL_COLUMN } +SELECT tumbleStart(tuple()); -- { serverError ILLEGAL_COLUMN } +SELECT tumbleEnd(tuple()); -- { serverError ILLEGAL_COLUMN } From ab0f06230079c915083e498c27b5d0263154c96f Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 12 Aug 2024 14:06:54 +0000 Subject: [PATCH 066/121] Backport #67922 to 24.7: Fix wrong `count()` result when there is non-deterministic function in predicate --- src/Storages/MergeTree/MergeTreeData.cpp | 5 +- src/Storages/VirtualColumnUtils.cpp | 87 ++++++++++--------- src/Storages/VirtualColumnUtils.h | 25 +++++- ..._with_non_deterministic_function.reference | 2 + ..._count_with_non_deterministic_function.sql | 4 + ...03217_filtering_in_storage_merge.reference | 6 ++ .../03217_filtering_in_storage_merge.sql | 16 ++++ ...03217_filtering_in_system_tables.reference | 6 ++ .../03217_filtering_in_system_tables.sql | 30 +++++++ 9 files changed, 134 insertions(+), 47 deletions(-) create mode 100644 tests/queries/0_stateless/03203_count_with_non_deterministic_function.reference create mode 100644 tests/queries/0_stateless/03203_count_with_non_deterministic_function.sql create mode 100644 tests/queries/0_stateless/03217_filtering_in_storage_merge.reference create mode 100644 tests/queries/0_stateless/03217_filtering_in_storage_merge.sql create mode 100644 tests/queries/0_stateless/03217_filtering_in_system_tables.reference create mode 100644 tests/queries/0_stateless/03217_filtering_in_system_tables.sql diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 9d9ee4a3564..0635fbfba5e 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1144,7 +1144,7 @@ std::optional MergeTreeData::totalRowsByPartitionPredicateImpl( auto metadata_snapshot = getInMemoryMetadataPtr(); auto virtual_columns_block = getBlockWithVirtualsForFilter(metadata_snapshot, {parts[0]}); - auto filter_dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag->getOutputs().at(0), nullptr); + auto filter_dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag.getOutputs().at(0), nullptr, /*allow_partial_result=*/ false); if (!filter_dag) return {}; @@ -6904,7 +6904,8 @@ Block MergeTreeData::getMinMaxCountProjectionBlock( const auto * predicate = filter_dag->getOutputs().at(0); // Generate valid expressions for filtering - VirtualColumnUtils::filterBlockWithPredicate(predicate, virtual_columns_block, query_context); + VirtualColumnUtils::filterBlockWithPredicate( + predicate, virtual_columns_block, query_context, /*allow_filtering_with_partial_predicate =*/true); rows = virtual_columns_block.rows(); part_name_column = virtual_columns_block.getByName("_part").column; diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index 27c52124e9c..3c2a34bb475 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -1,51 +1,46 @@ -#include +#include + #include #include +#include +#include +#include +#include +#include #include #include - +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include -#include -#include #include +#include #include +#include #include - -#include #include -#include #include +#include +#include #include #include - -#include -#include -#include -#include - -#include -#include -#include -#include - -#include +#include +#include #include #include +#include #include -#include #include - -#include -#include #include -#include "Functions/FunctionsLogical.h" -#include "Functions/IFunction.h" -#include "Functions/IFunctionAdaptors.h" -#include "Functions/indexHint.h" -#include -#include -#include -#include namespace DB @@ -269,9 +264,7 @@ bool isDeterministicInScopeOfQuery(const ActionsDAG::Node * node) } static const ActionsDAG::Node * splitFilterNodeForAllowedInputs( - const ActionsDAG::Node * node, - const Block * allowed_inputs, - ActionsDAG::Nodes & additional_nodes) + const ActionsDAG::Node * node, const Block * allowed_inputs, ActionsDAG::Nodes & additional_nodes, bool allow_partial_result) { if (node->type == ActionsDAG::ActionType::FUNCTION) { @@ -280,8 +273,15 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs( auto & node_copy = additional_nodes.emplace_back(*node); node_copy.children.clear(); for (const auto * child : node->children) - if (const auto * child_copy = splitFilterNodeForAllowedInputs(child, allowed_inputs, additional_nodes)) + if (const auto * child_copy + = splitFilterNodeForAllowedInputs(child, allowed_inputs, additional_nodes, allow_partial_result)) node_copy.children.push_back(child_copy); + /// Expression like (now_allowed AND allowed) is not allowed if allow_partial_result = true. This is important for + /// trivial count optimization, otherwise we can get incorrect results. For example, if the query is + /// SELECT count() FROM table WHERE _partition_id = '0' AND rowNumberInBlock() = 1, we cannot apply + /// trivial count. + else if (!allow_partial_result) + return nullptr; if (node_copy.children.empty()) return nullptr; @@ -289,7 +289,7 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs( if (node_copy.children.size() == 1) { const ActionsDAG::Node * res = node_copy.children.front(); - /// Expression like (not_allowed AND 256) can't be resuced to (and(256)) because AND requires + /// Expression like (not_allowed AND 256) can't be reduced to (and(256)) because AND requires /// at least two arguments; also it can't be reduced to (256) because result type is different. if (!res->result_type->equals(*node->result_type)) { @@ -307,7 +307,7 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs( { auto & node_copy = additional_nodes.emplace_back(*node); for (auto & child : node_copy.children) - if (child = splitFilterNodeForAllowedInputs(child, allowed_inputs, additional_nodes); !child) + if (child = splitFilterNodeForAllowedInputs(child, allowed_inputs, additional_nodes, allow_partial_result); !child) return nullptr; return &node_copy; @@ -321,7 +321,8 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs( auto index_hint_dag = index_hint->getActions()->clone(); ActionsDAG::NodeRawConstPtrs atoms; for (const auto & output : index_hint_dag->getOutputs()) - if (const auto * child_copy = splitFilterNodeForAllowedInputs(output, allowed_inputs, additional_nodes)) + if (const auto * child_copy + = splitFilterNodeForAllowedInputs(output, allowed_inputs, additional_nodes, allow_partial_result)) atoms.push_back(child_copy); if (!atoms.empty()) @@ -355,22 +356,24 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs( return node; } -ActionsDAGPtr splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs) +ActionsDAGPtr +splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs, bool allow_partial_result) { if (!predicate) return nullptr; ActionsDAG::Nodes additional_nodes; - const auto * res = splitFilterNodeForAllowedInputs(predicate, allowed_inputs, additional_nodes); + const auto * res = splitFilterNodeForAllowedInputs(predicate, allowed_inputs, additional_nodes, allow_partial_result); if (!res) return nullptr; return ActionsDAG::cloneSubDAG({res}, true); } -void filterBlockWithPredicate(const ActionsDAG::Node * predicate, Block & block, ContextPtr context) +void filterBlockWithPredicate( + const ActionsDAG::Node * predicate, Block & block, ContextPtr context, bool allow_filtering_with_partial_predicate) { - auto dag = splitFilterDagForAllowedInputs(predicate, &block); + auto dag = splitFilterDagForAllowedInputs(predicate, &block, /*allow_partial_result=*/allow_filtering_with_partial_predicate); if (dag) filterBlockWithDAG(dag, block, context); } diff --git a/src/Storages/VirtualColumnUtils.h b/src/Storages/VirtualColumnUtils.h index 9045a2f5481..c6a167c845c 100644 --- a/src/Storages/VirtualColumnUtils.h +++ b/src/Storages/VirtualColumnUtils.h @@ -18,9 +18,20 @@ class NamesAndTypesList; namespace VirtualColumnUtils { -/// Similar to filterBlockWithQuery, but uses ActionsDAG as a predicate. +/// The filtering functions are tricky to use correctly. +/// There are 2 ways: +/// 1. Call filterBlockWithPredicate() or filterBlockWithExpression() inside SourceStepWithFilter::applyFilters(). +/// 2. Call splitFilterDagForAllowedInputs() and buildSetsForDAG() inside SourceStepWithFilter::applyFilters(). +/// Then call filterBlockWithPredicate() or filterBlockWithExpression() in initializePipeline(). +/// +/// Otherwise calling filter*() outside applyFilters() will throw "Not-ready Set is passed" +/// if there are subqueries. +/// +/// Similar to filterBlockWithExpression(buildFilterExpression(splitFilterDagForAllowedInputs(...)))./// Similar to filterBlockWithQuery, but uses ActionsDAG as a predicate. /// Basically it is filterBlockWithDAG(splitFilterDagForAllowedInputs). -void filterBlockWithPredicate(const ActionsDAG::Node * predicate, Block & block, ContextPtr context); +/// If allow_filtering_with_partial_predicate is true, then the filtering will be done even if some part of the predicate +/// cannot be evaluated using the columns from the block. +void filterBlockWithPredicate(const ActionsDAG::Node * predicate, Block & block, ContextPtr context, bool allow_filtering_with_partial_predicate = true); /// Just filters block. Block should contain all the required columns. void filterBlockWithDAG(ActionsDAGPtr dag, Block & block, ContextPtr context); @@ -32,7 +43,15 @@ void buildSetsForDAG(const ActionsDAG & dag, const ContextPtr & context); bool isDeterministicInScopeOfQuery(const ActionsDAG::Node * node); /// Extract a part of predicate that can be evaluated using only columns from input_names. -ActionsDAGPtr splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs); +/// When allow_partial_result is false, then the result will be empty if any part of if cannot be evaluated deterministically +/// on the given inputs. +/// allow_partial_result must be false when we are going to use the result to filter parts in +/// MergeTreeData::totalRowsByPartitionPredicateImp. For example, if the query is +/// `SELECT count() FROM table WHERE _partition_id = '0' AND rowNumberInBlock() = 1` +/// The predicate will be `_partition_id = '0' AND rowNumberInBlock() = 1`, and `rowNumberInBlock()` is +/// non-deterministic. If we still extract the part `_partition_id = '0'` for filtering parts, then trivial +/// count optimization will be mistakenly applied to the query. +ActionsDAGPtr splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs, bool allow_partial_result = true); /// Extract from the input stream a set of `name` column values template diff --git a/tests/queries/0_stateless/03203_count_with_non_deterministic_function.reference b/tests/queries/0_stateless/03203_count_with_non_deterministic_function.reference new file mode 100644 index 00000000000..6ed281c757a --- /dev/null +++ b/tests/queries/0_stateless/03203_count_with_non_deterministic_function.reference @@ -0,0 +1,2 @@ +1 +1 diff --git a/tests/queries/0_stateless/03203_count_with_non_deterministic_function.sql b/tests/queries/0_stateless/03203_count_with_non_deterministic_function.sql new file mode 100644 index 00000000000..bb3269da597 --- /dev/null +++ b/tests/queries/0_stateless/03203_count_with_non_deterministic_function.sql @@ -0,0 +1,4 @@ +CREATE TABLE t (p UInt8, x UInt64) Engine = MergeTree PARTITION BY p ORDER BY x; +INSERT INTO t SELECT 0, number FROM numbers(10) SETTINGS max_block_size = 100; +SELECT count() FROM t WHERE p = 0 AND rowNumberInAllBlocks() = 1 SETTINGS allow_experimental_analyzer = 0; +SELECT count() FROM t WHERE p = 0 AND rowNumberInAllBlocks() = 1 SETTINGS allow_experimental_analyzer = 1; diff --git a/tests/queries/0_stateless/03217_filtering_in_storage_merge.reference b/tests/queries/0_stateless/03217_filtering_in_storage_merge.reference new file mode 100644 index 00000000000..d366ad04c39 --- /dev/null +++ b/tests/queries/0_stateless/03217_filtering_in_storage_merge.reference @@ -0,0 +1,6 @@ +Expression ((Project names + Projection)) + Aggregating + Expression (Before GROUP BY) + ReadFromMerge + Filter (( + ( + ))) + ReadFromMergeTree (default.test_03217_merge_replica_1) diff --git a/tests/queries/0_stateless/03217_filtering_in_storage_merge.sql b/tests/queries/0_stateless/03217_filtering_in_storage_merge.sql new file mode 100644 index 00000000000..42d31e95f9c --- /dev/null +++ b/tests/queries/0_stateless/03217_filtering_in_storage_merge.sql @@ -0,0 +1,16 @@ +CREATE TABLE test_03217_merge_replica_1(x UInt32) + ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test_03217_merge_replica', 'r1') + ORDER BY x; +CREATE TABLE test_03217_merge_replica_2(x UInt32) + ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test_03217_merge_replica', 'r2') + ORDER BY x; + + +CREATE TABLE test_03217_all_replicas (x UInt32) + ENGINE = Merge(currentDatabase(), 'test_03217_merge_replica_*'); + +INSERT INTO test_03217_merge_replica_1 SELECT number AS x FROM numbers(10); +SYSTEM SYNC REPLICA test_03217_merge_replica_2; + +-- If the filter on _table is not applied, then the plan will show both replicas +EXPLAIN SELECT _table, count() FROM test_03217_all_replicas WHERE _table = 'test_03217_merge_replica_1' AND x >= 0 GROUP BY _table SETTINGS allow_experimental_analyzer=1; diff --git a/tests/queries/0_stateless/03217_filtering_in_system_tables.reference b/tests/queries/0_stateless/03217_filtering_in_system_tables.reference new file mode 100644 index 00000000000..c0761c3f689 --- /dev/null +++ b/tests/queries/0_stateless/03217_filtering_in_system_tables.reference @@ -0,0 +1,6 @@ +information_schema tables +both default test_03217_system_tables_replica_1 r1 +both default test_03217_system_tables_replica_2 r2 +default test_03217_system_tables_replica_1 r1 +1 +1 diff --git a/tests/queries/0_stateless/03217_filtering_in_system_tables.sql b/tests/queries/0_stateless/03217_filtering_in_system_tables.sql new file mode 100644 index 00000000000..eb506dfe39a --- /dev/null +++ b/tests/queries/0_stateless/03217_filtering_in_system_tables.sql @@ -0,0 +1,30 @@ +-- If filtering is not done correctly on databases, then this query report to read 3 rows, which are: `system.tables`, `information_schema.tables` and `INFORMATION_SCHEMA.tables` +SELECT database, table FROM system.tables WHERE database = 'information_schema' AND table = 'tables'; + +CREATE TABLE test_03217_system_tables_replica_1(x UInt32) + ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test_03217_system_tables_replica', 'r1') + ORDER BY x; +CREATE TABLE test_03217_system_tables_replica_2(x UInt32) + ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test_03217_system_tables_replica', 'r2') + ORDER BY x; + +-- Make sure we can read both replicas +-- The replica name might be altered because of `_functional_tests_helper_database_replicated_replace_args_macros`, +-- thus we need to use `left` +SELECT 'both', database, table, left(replica_name, 2) FROM system.replicas WHERE database = currentDatabase(); +-- If filtering is not done correctly on database-table column, then this query report to read 2 rows, which are the above tables +SELECT database, table, left(replica_name, 2) FROM system.replicas WHERE database = currentDatabase() AND table = 'test_03217_system_tables_replica_1' AND replica_name LIKE 'r1%'; +SYSTEM FLUSH LOGS; +-- argMax is necessary to make the test repeatable + +-- StorageSystemTables +SELECT argMax(read_rows, event_time_microseconds) FROM system.query_log WHERE 1 + AND current_database = currentDatabase() + AND query LIKE '%SELECT database, table FROM system.tables WHERE database = \'information_schema\' AND table = \'tables\';' + AND type = 'QueryFinish'; + +-- StorageSystemReplicas +SELECT argMax(read_rows, event_time_microseconds) FROM system.query_log WHERE 1 + AND current_database = currentDatabase() + AND query LIKE '%SELECT database, table, left(replica_name, 2) FROM system.replicas WHERE database = currentDatabase() AND table = \'test_03217_system_tables_replica_1\' AND replica_name LIKE \'r1\%\';' + AND type = 'QueryFinish'; From adb2710bc9fb1e7ba699a2381f4951d840320182 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Mon, 12 Aug 2024 17:35:55 +0200 Subject: [PATCH 067/121] Fix build --- src/Storages/MergeTree/MergeTreeData.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 0635fbfba5e..d56b7ac06d4 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1144,7 +1144,7 @@ std::optional MergeTreeData::totalRowsByPartitionPredicateImpl( auto metadata_snapshot = getInMemoryMetadataPtr(); auto virtual_columns_block = getBlockWithVirtualsForFilter(metadata_snapshot, {parts[0]}); - auto filter_dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag.getOutputs().at(0), nullptr, /*allow_partial_result=*/ false); + auto filter_dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag->getOutputs().at(0), nullptr, /*allow_partial_result=*/ false); if (!filter_dag) return {}; From c38dda47b07a5b541b73eafd463a4901f4d635f6 Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Wed, 3 Jul 2024 02:02:24 +0000 Subject: [PATCH 068/121] Fix 'Not-ready Set is passed' in system tables --- src/Interpreters/PreparedSets.h | 15 ++++++++++--- src/Planner/Planner.cpp | 2 +- src/Storages/RocksDB/StorageSystemRocksDB.cpp | 8 +++++++ src/Storages/RocksDB/StorageSystemRocksDB.h | 1 + src/Storages/StorageMergeTreeIndex.cpp | 22 ++++++++++++++----- src/Storages/StorageMergeTreeIndex.h | 2 +- .../System/IStorageSystemOneBlock.cpp | 18 ++++++++++++--- src/Storages/System/IStorageSystemOneBlock.h | 8 +++++++ src/Storages/System/StorageSystemColumns.cpp | 20 +++++++++++++---- .../StorageSystemDataSkippingIndices.cpp | 17 +++++++++++--- .../System/StorageSystemDatabases.cpp | 8 +++++++ src/Storages/System/StorageSystemDatabases.h | 1 + .../System/StorageSystemDistributionQueue.cpp | 7 ++++++ .../System/StorageSystemDistributionQueue.h | 1 + .../System/StorageSystemMutations.cpp | 7 ++++++ src/Storages/System/StorageSystemMutations.h | 1 + .../StorageSystemPartMovesBetweenShards.cpp | 8 +++++++ .../StorageSystemPartMovesBetweenShards.h | 1 + src/Storages/System/StorageSystemReplicas.cpp | 19 +++++++++++++--- .../System/StorageSystemReplicationQueue.cpp | 8 +++++++ .../System/StorageSystemReplicationQueue.h | 1 + .../0_stateless/02841_not_ready_set_bug.sh | 17 ++++++++++++++ 22 files changed, 168 insertions(+), 24 deletions(-) diff --git a/src/Interpreters/PreparedSets.h b/src/Interpreters/PreparedSets.h index bf99a8ece3c..a6aee974d0e 100644 --- a/src/Interpreters/PreparedSets.h +++ b/src/Interpreters/PreparedSets.h @@ -90,9 +90,18 @@ private: using FutureSetFromTuplePtr = std::shared_ptr; -/// Set from subquery can be built inplace for PK or in CreatingSet step. -/// If use_index_for_in_with_subqueries_max_values is reached, set for PK won't be created, -/// but ordinary set would be created instead. +/// Set from subquery can be filled (by running the subquery) in one of two ways: +/// 1. During query analysis. Specifically, inside `SourceStepWithFilter::applyFilters()`. +/// Useful if the query plan depends on the set contents, e.g. to determine which files to read. +/// 2. During query execution. This is the preferred way. +/// Sets are created by CreatingSetStep, which runs before other steps. +/// Be careful: to build the set during query analysis, the `buildSetInplace()` call must happen +/// inside `SourceStepWithFilter::applyFilters()`. Calling it later, e.g. from `initializePipeline()` +/// will result in LOGICAL_ERROR "Not-ready Set is passed" (because a CreatingSetStep was already +/// added to pipeline but hasn't executed yet). +/// +/// If use_index_for_in_with_subqueries_max_values is reached, the built set won't be suitable for +/// key analysis, but will work with function IN (the set will contain only hashes of elements). class FutureSetFromSubquery final : public FutureSet { public: diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index a35ba69d459..9b3c75a5b12 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -1111,7 +1111,7 @@ void addBuildSubqueriesForSetsStepIfNeeded( auto query_tree = subquery->detachQueryTree(); auto subquery_options = select_query_options.subquery(); /// I don't know if this is a good decision, - /// But for now it is done in the same way as in old analyzer. + /// but for now it is done in the same way as in old analyzer. /// This would not ignore limits for subqueries (affects mutations only). /// See test_build_sets_from_multiple_threads-analyzer. subquery_options.ignore_limits = false; diff --git a/src/Storages/RocksDB/StorageSystemRocksDB.cpp b/src/Storages/RocksDB/StorageSystemRocksDB.cpp index 744d43e1c77..ea5852de830 100644 --- a/src/Storages/RocksDB/StorageSystemRocksDB.cpp +++ b/src/Storages/RocksDB/StorageSystemRocksDB.cpp @@ -41,6 +41,14 @@ ColumnsDescription StorageSystemRocksDB::getColumnsDescription() } +Block StorageSystemRocksDB::getFilterSampleBlock() const +{ + return { + { {}, std::make_shared(), "database" }, + { {}, std::make_shared(), "table" }, + }; +} + void StorageSystemRocksDB::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector) const { const auto access = context->getAccess(); diff --git a/src/Storages/RocksDB/StorageSystemRocksDB.h b/src/Storages/RocksDB/StorageSystemRocksDB.h index ec351c75446..be3bfaa860c 100644 --- a/src/Storages/RocksDB/StorageSystemRocksDB.h +++ b/src/Storages/RocksDB/StorageSystemRocksDB.h @@ -22,6 +22,7 @@ protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector) const override; + Block getFilterSampleBlock() const override; }; } diff --git a/src/Storages/StorageMergeTreeIndex.cpp b/src/Storages/StorageMergeTreeIndex.cpp index 0b1ad02f8c9..90d01d356e9 100644 --- a/src/Storages/StorageMergeTreeIndex.cpp +++ b/src/Storages/StorageMergeTreeIndex.cpp @@ -275,7 +275,7 @@ public: private: std::shared_ptr storage; Poco::Logger * log; - const ActionsDAG::Node * predicate = nullptr; + ActionsDAGPtr virtual_columns_filter; }; void ReadFromMergeTreeIndex::applyFilters(ActionDAGNodes added_filter_nodes) @@ -283,7 +283,17 @@ void ReadFromMergeTreeIndex::applyFilters(ActionDAGNodes added_filter_nodes) SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); if (filter_actions_dag) - predicate = filter_actions_dag->getOutputs().at(0); + { + Block block_to_filter + { + { {}, std::make_shared(), StorageMergeTreeIndex::part_name_column.name }, + }; + + virtual_columns_filter = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag->getOutputs().at(0), &block_to_filter); + + if (virtual_columns_filter) + VirtualColumnUtils::buildSetsForDAG(virtual_columns_filter, context); + } } void StorageMergeTreeIndex::read( @@ -335,7 +345,7 @@ void StorageMergeTreeIndex::read( void ReadFromMergeTreeIndex::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) { - auto filtered_parts = storage->getFilteredDataParts(predicate, context); + auto filtered_parts = storage->getFilteredDataParts(virtual_columns_filter, context); LOG_DEBUG(log, "Reading index{}from {} parts of table {}", storage->with_marks ? " with marks " : " ", @@ -345,9 +355,9 @@ void ReadFromMergeTreeIndex::initializePipeline(QueryPipelineBuilder & pipeline, pipeline.init(Pipe(std::make_shared(getOutputStream().header, storage->key_sample_block, std::move(filtered_parts), context, storage->with_marks))); } -MergeTreeData::DataPartsVector StorageMergeTreeIndex::getFilteredDataParts(const ActionsDAG::Node * predicate, const ContextPtr & context) const +MergeTreeData::DataPartsVector StorageMergeTreeIndex::getFilteredDataParts(ActionsDAGPtr virtual_columns_filter, const ContextPtr & context) const { - if (!predicate) + if (!virtual_columns_filter) return data_parts; auto all_part_names = ColumnString::create(); @@ -355,7 +365,7 @@ MergeTreeData::DataPartsVector StorageMergeTreeIndex::getFilteredDataParts(const all_part_names->insert(part->name); Block filtered_block{{std::move(all_part_names), std::make_shared(), part_name_column.name}}; - VirtualColumnUtils::filterBlockWithPredicate(predicate, filtered_block, context); + VirtualColumnUtils::filterBlockWithDAG(virtual_columns_filter, filtered_block, context); if (!filtered_block.rows()) return {}; diff --git a/src/Storages/StorageMergeTreeIndex.h b/src/Storages/StorageMergeTreeIndex.h index a1fb61d5a56..652a2d6eeaf 100644 --- a/src/Storages/StorageMergeTreeIndex.h +++ b/src/Storages/StorageMergeTreeIndex.h @@ -36,7 +36,7 @@ public: private: friend class ReadFromMergeTreeIndex; - MergeTreeData::DataPartsVector getFilteredDataParts(const ActionsDAG::Node * predicate, const ContextPtr & context) const; + MergeTreeData::DataPartsVector getFilteredDataParts(ActionsDAGPtr virtual_columns_filter, const ContextPtr & context) const; StoragePtr source_table; bool with_marks; diff --git a/src/Storages/System/IStorageSystemOneBlock.cpp b/src/Storages/System/IStorageSystemOneBlock.cpp index 456b7c4f90b..7cde31905aa 100644 --- a/src/Storages/System/IStorageSystemOneBlock.cpp +++ b/src/Storages/System/IStorageSystemOneBlock.cpp @@ -5,6 +5,7 @@ // #include #include #include +#include #include #include #include @@ -44,7 +45,7 @@ public: private: std::shared_ptr storage; std::vector columns_mask; - const ActionsDAG::Node * predicate = nullptr; + ActionsDAGPtr filter; }; void IStorageSystemOneBlock::read( @@ -81,6 +82,7 @@ void ReadFromSystemOneBlock::initializePipeline(QueryPipelineBuilder & pipeline, { const auto & sample_block = getOutputStream().header; MutableColumns res_columns = sample_block.cloneEmptyColumns(); + auto predicate = filter ? filter->getOutputs().at(0) : nullptr; storage->fillData(res_columns, context, predicate, std::move(columns_mask)); UInt64 num_rows = res_columns.at(0)->size(); @@ -93,8 +95,18 @@ void ReadFromSystemOneBlock::applyFilters(ActionDAGNodes added_filter_nodes) { SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); - if (filter_actions_dag) - predicate = filter_actions_dag->getOutputs().at(0); + if (!filter_actions_dag) + return; + + Block sample = storage->getFilterSampleBlock(); + if (sample.columns() == 0) + return; + + filter = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag->getOutputs().at(0), &sample); + + /// Must prepare sets here, initializePipeline() would be too late, see comment on FutureSetFromSubquery. + if (filter) + VirtualColumnUtils::buildSetsForDAG(filter, context); } } diff --git a/src/Storages/System/IStorageSystemOneBlock.h b/src/Storages/System/IStorageSystemOneBlock.h index a20434fd97e..a47875c2537 100644 --- a/src/Storages/System/IStorageSystemOneBlock.h +++ b/src/Storages/System/IStorageSystemOneBlock.h @@ -22,8 +22,16 @@ class Context; class IStorageSystemOneBlock : public IStorage { protected: + /// If this method uses `predicate`, getFilterSampleBlock() must list all columns to which + /// it's applied. (Otherwise there'll be a LOGICAL_ERROR "Not-ready Set is passed" on subqueries.) virtual void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector columns_mask) const = 0; + /// Columns to which fillData() applies the `predicate`. + virtual Block getFilterSampleBlock() const + { + return {}; + } + virtual bool supportsColumnsMask() const { return false; } friend class ReadFromSystemOneBlock; diff --git a/src/Storages/System/StorageSystemColumns.cpp b/src/Storages/System/StorageSystemColumns.cpp index 6a7810b97f9..6b3e0094562 100644 --- a/src/Storages/System/StorageSystemColumns.cpp +++ b/src/Storages/System/StorageSystemColumns.cpp @@ -338,7 +338,7 @@ private: std::shared_ptr storage; std::vector columns_mask; const size_t max_block_size; - const ActionsDAG::Node * predicate = nullptr; + ActionsDAGPtr virtual_columns_filter; }; void ReadFromSystemColumns::applyFilters(ActionDAGNodes added_filter_nodes) @@ -346,7 +346,17 @@ void ReadFromSystemColumns::applyFilters(ActionDAGNodes added_filter_nodes) SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); if (filter_actions_dag) - predicate = filter_actions_dag->getOutputs().at(0); + { + Block block_to_filter; + block_to_filter.insert(ColumnWithTypeAndName(ColumnString::create(), std::make_shared(), "database")); + block_to_filter.insert(ColumnWithTypeAndName(ColumnString::create(), std::make_shared(), "table")); + + virtual_columns_filter = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag->getOutputs().at(0), &block_to_filter); + + /// Must prepare sets here, initializePipeline() would be too late, see comment on FutureSetFromSubquery. + if (virtual_columns_filter) + VirtualColumnUtils::buildSetsForDAG(virtual_columns_filter, context); + } } void StorageSystemColumns::read( @@ -408,7 +418,8 @@ void ReadFromSystemColumns::initializePipeline(QueryPipelineBuilder & pipeline, block_to_filter.insert(ColumnWithTypeAndName(std::move(database_column_mut), std::make_shared(), "database")); /// Filter block with `database` column. - VirtualColumnUtils::filterBlockWithPredicate(predicate, block_to_filter, context); + if (virtual_columns_filter) + VirtualColumnUtils::filterBlockWithPredicate(virtual_columns_filter->getOutputs().at(0), block_to_filter, context); if (!block_to_filter.rows()) { @@ -456,7 +467,8 @@ void ReadFromSystemColumns::initializePipeline(QueryPipelineBuilder & pipeline, } /// Filter block with `database` and `table` columns. - VirtualColumnUtils::filterBlockWithPredicate(predicate, block_to_filter, context); + if (virtual_columns_filter) + VirtualColumnUtils::filterBlockWithDAG(virtual_columns_filter, block_to_filter, context); if (!block_to_filter.rows()) { diff --git a/src/Storages/System/StorageSystemDataSkippingIndices.cpp b/src/Storages/System/StorageSystemDataSkippingIndices.cpp index 093adc59cc6..a6bba44e257 100644 --- a/src/Storages/System/StorageSystemDataSkippingIndices.cpp +++ b/src/Storages/System/StorageSystemDataSkippingIndices.cpp @@ -214,7 +214,7 @@ private: std::shared_ptr storage; std::vector columns_mask; const size_t max_block_size; - const ActionsDAG::Node * predicate = nullptr; + ActionsDAGPtr virtual_columns_filter; }; void ReadFromSystemDataSkippingIndices::applyFilters(ActionDAGNodes added_filter_nodes) @@ -222,7 +222,17 @@ void ReadFromSystemDataSkippingIndices::applyFilters(ActionDAGNodes added_filter SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); if (filter_actions_dag) - predicate = filter_actions_dag->getOutputs().at(0); + { + Block block_to_filter + { + { ColumnString::create(), std::make_shared(), "database" }, + }; + + virtual_columns_filter = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag->getOutputs().at(0), &block_to_filter); + + if (virtual_columns_filter) + VirtualColumnUtils::buildSetsForDAG(virtual_columns_filter, context); + } } void StorageSystemDataSkippingIndices::read( @@ -268,7 +278,8 @@ void ReadFromSystemDataSkippingIndices::initializePipeline(QueryPipelineBuilder /// Condition on "database" in a query acts like an index. Block block { ColumnWithTypeAndName(std::move(column), std::make_shared(), "database") }; - VirtualColumnUtils::filterBlockWithPredicate(predicate, block, context); + if (virtual_columns_filter) + VirtualColumnUtils::filterBlockWithDAG(virtual_columns_filter, block, context); ColumnPtr & filtered_databases = block.getByPosition(0).column; pipeline.init(Pipe(std::make_shared( diff --git a/src/Storages/System/StorageSystemDatabases.cpp b/src/Storages/System/StorageSystemDatabases.cpp index 1dbb187c418..0585506a661 100644 --- a/src/Storages/System/StorageSystemDatabases.cpp +++ b/src/Storages/System/StorageSystemDatabases.cpp @@ -73,6 +73,14 @@ static String getEngineFull(const ContextPtr & ctx, const DatabasePtr & database return engine_full; } +Block StorageSystemDatabases::getFilterSampleBlock() const +{ + return { + { {}, std::make_shared(), "engine" }, + { {}, std::make_shared(), "uuid" }, + }; +} + static ColumnPtr getFilteredDatabases(const Databases & databases, const ActionsDAG::Node * predicate, ContextPtr context) { MutableColumnPtr name_column = ColumnString::create(); diff --git a/src/Storages/System/StorageSystemDatabases.h b/src/Storages/System/StorageSystemDatabases.h index fa55f0aea32..d10b350435b 100644 --- a/src/Storages/System/StorageSystemDatabases.h +++ b/src/Storages/System/StorageSystemDatabases.h @@ -27,6 +27,7 @@ protected: bool supportsColumnsMask() const override { return true; } void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector columns_mask) const override; + Block getFilterSampleBlock() const override; }; } diff --git a/src/Storages/System/StorageSystemDistributionQueue.cpp b/src/Storages/System/StorageSystemDistributionQueue.cpp index e2058448904..dab318a9c1c 100644 --- a/src/Storages/System/StorageSystemDistributionQueue.cpp +++ b/src/Storages/System/StorageSystemDistributionQueue.cpp @@ -107,6 +107,13 @@ ColumnsDescription StorageSystemDistributionQueue::getColumnsDescription() }; } +Block StorageSystemDistributionQueue::getFilterSampleBlock() const +{ + return { + { {}, std::make_shared(), "database" }, + { {}, std::make_shared(), "table" }, + }; +} void StorageSystemDistributionQueue::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector) const { diff --git a/src/Storages/System/StorageSystemDistributionQueue.h b/src/Storages/System/StorageSystemDistributionQueue.h index 159a86bf082..27d777a4762 100644 --- a/src/Storages/System/StorageSystemDistributionQueue.h +++ b/src/Storages/System/StorageSystemDistributionQueue.h @@ -22,6 +22,7 @@ protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector) const override; + Block getFilterSampleBlock() const override; }; } diff --git a/src/Storages/System/StorageSystemMutations.cpp b/src/Storages/System/StorageSystemMutations.cpp index 94656008029..df9a71310e5 100644 --- a/src/Storages/System/StorageSystemMutations.cpp +++ b/src/Storages/System/StorageSystemMutations.cpp @@ -46,6 +46,13 @@ ColumnsDescription StorageSystemMutations::getColumnsDescription() }; } +Block StorageSystemMutations::getFilterSampleBlock() const +{ + return { + { {}, std::make_shared(), "database" }, + { {}, std::make_shared(), "table" }, + }; +} void StorageSystemMutations::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector) const { diff --git a/src/Storages/System/StorageSystemMutations.h b/src/Storages/System/StorageSystemMutations.h index c60157cd853..5341838a65e 100644 --- a/src/Storages/System/StorageSystemMutations.h +++ b/src/Storages/System/StorageSystemMutations.h @@ -22,6 +22,7 @@ protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector) const override; + Block getFilterSampleBlock() const override; }; } diff --git a/src/Storages/System/StorageSystemPartMovesBetweenShards.cpp b/src/Storages/System/StorageSystemPartMovesBetweenShards.cpp index 9cba92bca12..ab74b205a96 100644 --- a/src/Storages/System/StorageSystemPartMovesBetweenShards.cpp +++ b/src/Storages/System/StorageSystemPartMovesBetweenShards.cpp @@ -43,6 +43,14 @@ ColumnsDescription StorageSystemPartMovesBetweenShards::getColumnsDescription() } +Block StorageSystemPartMovesBetweenShards::getFilterSampleBlock() const +{ + return { + { {}, std::make_shared(), "database" }, + { {}, std::make_shared(), "table" }, + }; +} + void StorageSystemPartMovesBetweenShards::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector) const { const auto access = context->getAccess(); diff --git a/src/Storages/System/StorageSystemPartMovesBetweenShards.h b/src/Storages/System/StorageSystemPartMovesBetweenShards.h index 6a859d4de80..bc6133fcaaa 100644 --- a/src/Storages/System/StorageSystemPartMovesBetweenShards.h +++ b/src/Storages/System/StorageSystemPartMovesBetweenShards.h @@ -20,6 +20,7 @@ protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector) const override; + Block getFilterSampleBlock() const override; }; } diff --git a/src/Storages/System/StorageSystemReplicas.cpp b/src/Storages/System/StorageSystemReplicas.cpp index 3bd5fd290db..9fb4dc5ed6f 100644 --- a/src/Storages/System/StorageSystemReplicas.cpp +++ b/src/Storages/System/StorageSystemReplicas.cpp @@ -285,7 +285,7 @@ private: const bool with_zk_fields; const size_t max_block_size; std::shared_ptr impl; - const ActionsDAG::Node * predicate = nullptr; + ActionsDAGPtr virtual_columns_filter; }; void ReadFromSystemReplicas::applyFilters(ActionDAGNodes added_filter_nodes) @@ -293,7 +293,19 @@ void ReadFromSystemReplicas::applyFilters(ActionDAGNodes added_filter_nodes) SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); if (filter_actions_dag) - predicate = filter_actions_dag->getOutputs().at(0); + { + Block block_to_filter + { + { ColumnString::create(), std::make_shared(), "database" }, + { ColumnString::create(), std::make_shared(), "table" }, + { ColumnString::create(), std::make_shared(), "engine" }, + }; + + virtual_columns_filter = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag->getOutputs().at(0), &block_to_filter); + + if (virtual_columns_filter) + VirtualColumnUtils::buildSetsForDAG(virtual_columns_filter, context); + } } void StorageSystemReplicas::read( @@ -430,7 +442,8 @@ void ReadFromSystemReplicas::initializePipeline(QueryPipelineBuilder & pipeline, { col_engine, std::make_shared(), "engine" }, }; - VirtualColumnUtils::filterBlockWithPredicate(predicate, filtered_block, context); + if (virtual_columns_filter) + VirtualColumnUtils::filterBlockWithDAG(virtual_columns_filter, filtered_block, context); if (!filtered_block.rows()) { diff --git a/src/Storages/System/StorageSystemReplicationQueue.cpp b/src/Storages/System/StorageSystemReplicationQueue.cpp index 14b641f46c7..a50982de5f0 100644 --- a/src/Storages/System/StorageSystemReplicationQueue.cpp +++ b/src/Storages/System/StorageSystemReplicationQueue.cpp @@ -62,6 +62,14 @@ ColumnsDescription StorageSystemReplicationQueue::getColumnsDescription() } +Block StorageSystemReplicationQueue::getFilterSampleBlock() const +{ + return { + { {}, std::make_shared(), "database" }, + { {}, std::make_shared(), "table" }, + }; +} + void StorageSystemReplicationQueue::fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector) const { const auto access = context->getAccess(); diff --git a/src/Storages/System/StorageSystemReplicationQueue.h b/src/Storages/System/StorageSystemReplicationQueue.h index a9e57851be1..bcf351381ee 100644 --- a/src/Storages/System/StorageSystemReplicationQueue.h +++ b/src/Storages/System/StorageSystemReplicationQueue.h @@ -21,6 +21,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector) const override; + Block getFilterSampleBlock() const override; }; } diff --git a/tests/queries/0_stateless/02841_not_ready_set_bug.sh b/tests/queries/0_stateless/02841_not_ready_set_bug.sh index 9b2f3b0698e..556e2f52de2 100755 --- a/tests/queries/0_stateless/02841_not_ready_set_bug.sh +++ b/tests/queries/0_stateless/02841_not_ready_set_bug.sh @@ -11,3 +11,20 @@ $CLICKHOUSE_CLIENT --max_threads=2 --max_result_rows=1 --result_overflow_mode=br $CLICKHOUSE_CLIENT -q "SELECT * FROM system.tables WHERE 1 in (SELECT number from numbers(2)) AND database = currentDatabase() format Null" $CLICKHOUSE_CLIENT -q "SELECT xor(1, 0) FROM system.parts WHERE 1 IN (SELECT 1) FORMAT Null" + +# (Not all of these tests are effective because some of these tables are empty.) +$CLICKHOUSE_CLIENT -nq " + select * from system.columns where table in (select '123'); + select * from system.replicas where database in (select '123'); + select * from system.data_skipping_indices where database in (select '123'); + select * from system.databases where name in (select '123'); + select * from system.mutations where table in (select '123'); + select * from system.part_moves_between_shards where database in (select '123'); + select * from system.replication_queue where database in (select '123'); + select * from system.distribution_queue where database in (select '123'); +" +$CLICKHOUSE_CLIENT -nq " + create table a (x Int8) engine MergeTree order by x; + insert into a values (1); + select * from mergeTreeIndex(currentDatabase(), 'a') where part_name in (select '123'); +" From 7f3fe3f5aa9cffe857e4d8d7eefb737a89bb46fc Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Wed, 3 Jul 2024 03:19:31 +0000 Subject: [PATCH 069/121] Style --- src/Storages/System/StorageSystemReplicationQueue.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/System/StorageSystemReplicationQueue.h b/src/Storages/System/StorageSystemReplicationQueue.h index bcf351381ee..82a4d68f300 100644 --- a/src/Storages/System/StorageSystemReplicationQueue.h +++ b/src/Storages/System/StorageSystemReplicationQueue.h @@ -21,7 +21,7 @@ public: protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; void fillData(MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node * predicate, std::vector) const override; - Block getFilterSampleBlock() const override; + Block getFilterSampleBlock() const override; }; } From e15e32f69b3e5e9458755216f144347f128b45cf Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Thu, 4 Jul 2024 18:30:55 +0000 Subject: [PATCH 070/121] Style --- src/Storages/System/IStorageSystemOneBlock.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Storages/System/IStorageSystemOneBlock.cpp b/src/Storages/System/IStorageSystemOneBlock.cpp index 7cde31905aa..308b34510ea 100644 --- a/src/Storages/System/IStorageSystemOneBlock.cpp +++ b/src/Storages/System/IStorageSystemOneBlock.cpp @@ -80,9 +80,9 @@ void IStorageSystemOneBlock::read( void ReadFromSystemOneBlock::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) { - const auto & sample_block = getOutputStream().header; + const Block & sample_block = getOutputStream().header; MutableColumns res_columns = sample_block.cloneEmptyColumns(); - auto predicate = filter ? filter->getOutputs().at(0) : nullptr; + const ActionsDAG::Node * predicate = filter ? filter->getOutputs().at(0) : nullptr; storage->fillData(res_columns, context, predicate, std::move(columns_mask)); UInt64 num_rows = res_columns.at(0)->size(); From e9b2fac4ece5aa49a21a5d5de287d9cc7c0efff5 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 13 Aug 2024 09:10:17 +0000 Subject: [PATCH 071/121] Backport #68203 to 24.7: [Green CI] Fix test 01903_correct_block_size_prediction_with_default --- ...ock_size_prediction_with_default.reference | 6 +++ ...rect_block_size_prediction_with_default.sh | 37 +++++++++++++++++++ ...ect_block_size_prediction_with_default.sql | 13 ------- 3 files changed, 43 insertions(+), 13 deletions(-) create mode 100755 tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.sh delete mode 100644 tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.sql diff --git a/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.reference b/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.reference index b70a1cb7c75..2c66db91737 100644 --- a/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.reference +++ b/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.reference @@ -1,3 +1,9 @@ 8 +8 +1 4 4 +1 +4 +4 +1 diff --git a/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.sh b/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.sh new file mode 100755 index 00000000000..1482730af2c --- /dev/null +++ b/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash +# Tags: no-random-merge-tree-settings, no-random-settings + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +sql="toUInt16OrNull(arrayFirst((v, k) -> (k = '4Id'), arr[2], arr[1]))" + +# Create the table and fill it +$CLICKHOUSE_CLIENT -n --query=" + CREATE TABLE test_extract(str String, arr Array(Array(String)) ALIAS extractAllGroupsHorizontal(str, '\\W(\\w+)=(\"[^\"]*?\"|[^\",}]*)')) ENGINE=MergeTree() PARTITION BY tuple() ORDER BY tuple(); + INSERT INTO test_extract (str) WITH range(8) as range_arr, arrayMap(x-> concat(toString(x),'Id'), range_arr) as key, arrayMap(x -> rand() % 8, range_arr) as val, arrayStringConcat(arrayMap((x,y) -> concat(x,'=',toString(y)), key, val),',') as str SELECT str FROM numbers(500000); + ALTER TABLE test_extract ADD COLUMN 15Id Nullable(UInt16) DEFAULT $sql;" + +function test() +{ + # Execute two queries and compare if they have similar memory usage: + # The first query uses the default column value, while the second explicitly uses the same SQL as the default value. + # Follow https://github.com/ClickHouse/ClickHouse/issues/17317 for more info about the issue + where=$1 + + uuid_1=$(cat /proc/sys/kernel/random/uuid) + $CLICKHOUSE_CLIENT --query="SELECT uniq(15Id) FROM test_extract $where SETTINGS max_threads=1" --query_id=$uuid_1 + uuid_2=$(cat /proc/sys/kernel/random/uuid) + $CLICKHOUSE_CLIENT --query="SELECT uniq($sql) FROM test_extract $where SETTINGS max_threads=1" --query_id=$uuid_2 + $CLICKHOUSE_CLIENT -n --query=" + SYSTEM FLUSH LOGS; + WITH memory_1 AS (SELECT memory_usage FROM system.query_log WHERE current_database = currentDatabase() AND query_id='$uuid_1' AND type = 'QueryFinish' as memory_1), + memory_2 AS (SELECT memory_usage FROM system.query_log WHERE current_database = currentDatabase() AND query_id='$uuid_2' AND type = 'QueryFinish' as memory_2) + SELECT memory_1.memory_usage <= 1.2 * memory_2.memory_usage OR + memory_2.memory_usage <= 1.2 * memory_1.memory_usage FROM memory_1, memory_2;" +} + +test "" +test "PREWHERE 15Id < 4" +test "WHERE 15Id < 4" diff --git a/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.sql b/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.sql deleted file mode 100644 index 2eec08635eb..00000000000 --- a/tests/queries/0_stateless/01903_correct_block_size_prediction_with_default.sql +++ /dev/null @@ -1,13 +0,0 @@ --- Tags: no-random-merge-tree-settings - -CREATE TABLE test_extract(str String, arr Array(Array(String)) ALIAS extractAllGroupsHorizontal(str, '\\W(\\w+)=("[^"]*?"|[^",}]*)')) ENGINE=MergeTree() PARTITION BY tuple() ORDER BY tuple(); - -INSERT INTO test_extract (str) WITH range(8) as range_arr, arrayMap(x-> concat(toString(x),'Id'), range_arr) as key, arrayMap(x -> rand() % 8, range_arr) as val, arrayStringConcat(arrayMap((x,y) -> concat(x,'=',toString(y)), key, val),',') as str SELECT str FROM numbers(500000); - -ALTER TABLE test_extract ADD COLUMN `15Id` Nullable(UInt16) DEFAULT toUInt16OrNull(arrayFirst((v, k) -> (k = '4Id'), arr[2], arr[1])); - -SELECT uniq(15Id) FROM test_extract SETTINGS max_threads=1, max_memory_usage=100000000; - -SELECT uniq(15Id) FROM test_extract PREWHERE 15Id < 4 SETTINGS max_threads=1, max_memory_usage=100000000; - -SELECT uniq(15Id) FROM test_extract WHERE 15Id < 4 SETTINGS max_threads=1, max_memory_usage=100000000; From 1251e31857bb7a7a01c701bd915ab6eeedae9def Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 13 Aug 2024 12:10:23 +0000 Subject: [PATCH 072/121] Backport #67554 to 24.7: Fix message queue sink from http interface --- src/Storages/MessageQueueSink.cpp | 17 +- tests/ci/integration_tests_runner.py | 4 +- tests/integration/parallel_skip.json | 75 +++++- .../format_schemas/string_key_value.capnp | 6 + .../format_schemas/string_key_value.format | 1 + .../format_schemas/string_key_value.proto | 6 + .../test_produce_http_interface.py | 243 ++++++++++++++++++ 7 files changed, 347 insertions(+), 5 deletions(-) create mode 100644 tests/integration/test_storage_kafka/clickhouse_path/format_schemas/string_key_value.capnp create mode 100644 tests/integration/test_storage_kafka/clickhouse_path/format_schemas/string_key_value.format create mode 100644 tests/integration/test_storage_kafka/clickhouse_path/format_schemas/string_key_value.proto create mode 100644 tests/integration/test_storage_kafka/test_produce_http_interface.py diff --git a/src/Storages/MessageQueueSink.cpp b/src/Storages/MessageQueueSink.cpp index 36899011e33..ab9b19793ec 100644 --- a/src/Storages/MessageQueueSink.cpp +++ b/src/Storages/MessageQueueSink.cpp @@ -46,6 +46,8 @@ void MessageQueueSink::consume(Chunk & chunk) if (columns.empty()) return; + /// The formatter might hold pointers to buffer (e.g. if PeekableWriteBuffer is used), which means the formatter + /// needs to be reset after buffer might reallocate its memory. In this exact case after restarting the buffer. if (row_format) { size_t row = 0; @@ -60,12 +62,12 @@ void MessageQueueSink::consume(Chunk & chunk) row_format->writeRow(columns, row); } row_format->finalize(); - row_format->resetFormatter(); producer->produce(buffer->str(), i, columns, row - 1); /// Reallocate buffer if it's capacity is large then DBMS_DEFAULT_BUFFER_SIZE, /// because most likely in this case we serialized abnormally large row /// and won't need this large allocated buffer anymore. buffer->restart(DBMS_DEFAULT_BUFFER_SIZE); + row_format->resetFormatter(); } } else @@ -73,10 +75,21 @@ void MessageQueueSink::consume(Chunk & chunk) format->write(getHeader().cloneWithColumns(chunk.detachColumns())); format->finalize(); producer->produce(buffer->str(), chunk.getNumRows(), columns, chunk.getNumRows() - 1); - format->resetFormatter(); buffer->restart(); + format->resetFormatter(); } } +void MessageQueueSink::onCancel() noexcept +{ + try + { + onFinish(); + } + catch (...) + { + tryLogCurrentException(getLogger("MessageQueueSink"), "Error occurs on cancellation."); + } +} } diff --git a/tests/ci/integration_tests_runner.py b/tests/ci/integration_tests_runner.py index 21f16d995a4..501ac882f6f 100755 --- a/tests/ci/integration_tests_runner.py +++ b/tests/ci/integration_tests_runner.py @@ -67,9 +67,9 @@ def get_changed_tests_to_run(pr_info, repo_path): return [] for fpath in changed_files: - if "tests/integration/test_" in fpath: + if re.search(r"tests/integration/test_.*/test.*\.py", fpath) is not None: logging.info("File %s changed and seems like integration test", fpath) - result.add(fpath.split("/")[2]) + result.add("/".join(fpath.split("/")[2:])) return filter_existing_tests(result, repo_path) diff --git a/tests/integration/parallel_skip.json b/tests/integration/parallel_skip.json index 3c3d1b6cc96..e6fdf27f54c 100644 --- a/tests/integration/parallel_skip.json +++ b/tests/integration/parallel_skip.json @@ -97,5 +97,78 @@ "test_ttl_move/test.py::TestCancelBackgroundMoving::test_cancel_background_moving_on_stop_moves_query", "test_ttl_move/test.py::TestCancelBackgroundMoving::test_cancel_background_moving_on_table_detach", - "test_ttl_move/test.py::TestCancelBackgroundMoving::test_cancel_background_moving_on_zookeeper_disconnect" + "test_ttl_move/test.py::TestCancelBackgroundMoving::test_cancel_background_moving_on_zookeeper_disconnect", + + "test_storage_kafka/test.py::test_kafka_column_types", + "test_storage_kafka/test.py::test_kafka_settings_old_syntax", + "test_storage_kafka/test.py::test_kafka_settings_new_syntax", + "test_storage_kafka/test.py::test_kafka_settings_predefined_macros", + "test_storage_kafka/test.py::test_kafka_json_as_string", + "test_storage_kafka/test.py::test_kafka_formats", + "test_storage_kafka/test.py::test_kafka_issue11308", + "test_storage_kafka/test.py::test_kafka_issue4116", + "test_storage_kafka/test.py::test_kafka_consumer_hang", + "test_storage_kafka/test.py::test_kafka_consumer_hang2", + "test_storage_kafka/test.py::test_kafka_read_consumers_in_parallel", + "test_storage_kafka/test.py::test_kafka_csv_with_delimiter", + "test_storage_kafka/test.py::test_kafka_tsv_with_delimiter", + "test_storage_kafka/test.py::test_kafka_select_empty", + "test_storage_kafka/test.py::test_kafka_json_without_delimiter", + "test_storage_kafka/test.py::test_kafka_protobuf", + "test_storage_kafka/test.py::test_kafka_string_field_on_first_position_in_protobuf", + "test_storage_kafka/test.py::test_kafka_protobuf_no_delimiter", + "test_storage_kafka/test.py::test_kafka_materialized_view", + "test_storage_kafka/test.py::test_kafka_recreate_kafka_table", + "test_storage_kafka/test.py::test_librdkafka_compression", + "test_storage_kafka/test.py::test_kafka_materialized_view_with_subquery", + "test_storage_kafka/test.py::test_kafka_many_materialized_views", + "test_storage_kafka/test.py::test_kafka_flush_on_big_message", + "test_storage_kafka/test.py::test_kafka_virtual_columns", + "test_storage_kafka/test.py::test_kafka_virtual_columns_with_materialized_view", + "test_storage_kafka/test.py::test_kafka_insert", + "test_storage_kafka/test.py::test_kafka_produce_consume", + "test_storage_kafka/test.py::test_kafka_commit_on_block_write", + "test_storage_kafka/test.py::test_kafka_virtual_columns2", + "test_storage_kafka/test.py::test_kafka_producer_consumer_separate_settings", + "test_storage_kafka/test.py::test_kafka_produce_key_timestamp", + "test_storage_kafka/test.py::test_kafka_insert_avro", + "test_storage_kafka/test.py::test_kafka_produce_consume_avro", + "test_storage_kafka/test.py::test_kafka_flush_by_time", + "test_storage_kafka/test.py::test_kafka_flush_by_block_size", + "test_storage_kafka/test.py::test_kafka_lot_of_partitions_partial_commit_of_bulk", + "test_storage_kafka/test.py::test_kafka_rebalance", + "test_storage_kafka/test.py::test_kafka_no_holes_when_write_suffix_failed", + "test_storage_kafka/test.py::test_exception_from_destructor", + "test_storage_kafka/test.py::test_commits_of_unprocessed_messages_on_drop", + "test_storage_kafka/test.py::test_bad_reschedule", + "test_storage_kafka/test.py::test_kafka_duplicates_when_commit_failed", + "test_storage_kafka/test.py::test_premature_flush_on_eof", + "test_storage_kafka/test.py::test_kafka_unavailable", + "test_storage_kafka/test.py::test_kafka_issue14202", + "test_storage_kafka/test.py::test_kafka_csv_with_thread_per_consumer", + "test_storage_kafka/test.py::test_kafka_engine_put_errors_to_stream", + "test_storage_kafka/test.py::test_kafka_engine_put_errors_to_stream_with_random_malformed_json", + "test_storage_kafka/test.py::test_kafka_formats_with_broken_message", + "test_storage_kafka/test.py::test_kafka_consumer_failover", + "test_storage_kafka/test.py::test_kafka_predefined_configuration", + "test_storage_kafka/test.py::test_issue26643", + "test_storage_kafka/test.py::test_num_consumers_limit", + "test_storage_kafka/test.py::test_format_with_prefix_and_suffix", + "test_storage_kafka/test.py::test_max_rows_per_message", + "test_storage_kafka/test.py::test_row_based_formats", + "test_storage_kafka/test.py::test_block_based_formats_1", + "test_storage_kafka/test.py::test_block_based_formats_2", + "test_storage_kafka/test.py::test_system_kafka_consumers", + "test_storage_kafka/test.py::test_system_kafka_consumers_rebalance", + "test_storage_kafka/test.py::test_system_kafka_consumers_rebalance_mv", + "test_storage_kafka/test.py::test_formats_errors", + "test_storage_kafka/test.py::test_multiple_read_in_materialized_views", + "test_storage_kafka/test.py::test_kafka_null_message", + + "test_storage_kafka/test_produce_http_interface.py::test_kafka_produce_http_interface_row_based_format", + + "test_storage_kerberized_kafka/test.py::test_kafka_json_as_string", + "test_storage_kerberized_kafka/test.py::test_kafka_json_as_string_request_new_ticket_after_expiration", + "test_storage_kerberized_kafka/test.py::test_kafka_json_as_string_no_kdc", + "test_storage_kerberized_kafka/test.py::test_kafka_config_from_sql_named_collection" ] diff --git a/tests/integration/test_storage_kafka/clickhouse_path/format_schemas/string_key_value.capnp b/tests/integration/test_storage_kafka/clickhouse_path/format_schemas/string_key_value.capnp new file mode 100644 index 00000000000..4f3eabe22f0 --- /dev/null +++ b/tests/integration/test_storage_kafka/clickhouse_path/format_schemas/string_key_value.capnp @@ -0,0 +1,6 @@ +@0x99f75f775fe63dae; + +struct StringKeyValuePair { + key@0 : Text; + value@1 : Text; +} diff --git a/tests/integration/test_storage_kafka/clickhouse_path/format_schemas/string_key_value.format b/tests/integration/test_storage_kafka/clickhouse_path/format_schemas/string_key_value.format new file mode 100644 index 00000000000..83dff6ce401 --- /dev/null +++ b/tests/integration/test_storage_kafka/clickhouse_path/format_schemas/string_key_value.format @@ -0,0 +1 @@ +(key = ${key:CSV}, value = ${value:CSV}) diff --git a/tests/integration/test_storage_kafka/clickhouse_path/format_schemas/string_key_value.proto b/tests/integration/test_storage_kafka/clickhouse_path/format_schemas/string_key_value.proto new file mode 100644 index 00000000000..71905c63bdf --- /dev/null +++ b/tests/integration/test_storage_kafka/clickhouse_path/format_schemas/string_key_value.proto @@ -0,0 +1,6 @@ +syntax = "proto3"; + +message StringKeyValuePair { + string key = 1; + string value = 2; +} diff --git a/tests/integration/test_storage_kafka/test_produce_http_interface.py b/tests/integration/test_storage_kafka/test_produce_http_interface.py new file mode 100644 index 00000000000..fc10a07f239 --- /dev/null +++ b/tests/integration/test_storage_kafka/test_produce_http_interface.py @@ -0,0 +1,243 @@ +import time +import logging + +import pytest +from helpers.cluster import ClickHouseCluster, is_arm +from helpers.test_tools import TSV +from kafka import KafkaAdminClient +from kafka.admin import NewTopic + +if is_arm(): + pytestmark = pytest.mark.skip + +cluster = ClickHouseCluster(__file__) +instance = cluster.add_instance( + "instance", + main_configs=["configs/kafka.xml", "configs/named_collection.xml"], + user_configs=["configs/users.xml"], + with_kafka=True, + with_zookeeper=True, # For Replicated Table + macros={ + "kafka_broker": "kafka1", + "kafka_topic_old": "old", + "kafka_group_name_old": "old", + "kafka_topic_new": "new", + "kafka_group_name_new": "new", + "kafka_client_id": "instance", + "kafka_format_json_each_row": "JSONEachRow", + }, + clickhouse_path_dir="clickhouse_path", +) + + +@pytest.fixture(scope="module") +def kafka_cluster(): + try: + cluster.start() + kafka_id = instance.cluster.kafka_docker_id + print(("kafka_id is {}".format(kafka_id))) + yield cluster + finally: + cluster.shutdown() + + +@pytest.fixture(autouse=True) +def kafka_setup_teardown(): + instance.query("DROP DATABASE IF EXISTS test; CREATE DATABASE test;") + # logging.debug("kafka is available - running test") + yield # run test + + +def kafka_create_topic( + admin_client, + topic_name, + num_partitions=1, + replication_factor=1, + max_retries=50, + config=None, +): + logging.debug( + f"Kafka create topic={topic_name}, num_partitions={num_partitions}, replication_factor={replication_factor}" + ) + topics_list = [ + NewTopic( + name=topic_name, + num_partitions=num_partitions, + replication_factor=replication_factor, + topic_configs=config, + ) + ] + retries = 0 + while True: + try: + admin_client.create_topics(new_topics=topics_list, validate_only=False) + logging.debug("Admin client succeed") + return + except Exception as e: + retries += 1 + time.sleep(0.5) + if retries < max_retries: + logging.warning(f"Failed to create topic {e}") + else: + raise + + +def kafka_delete_topic(admin_client, topic, max_retries=50): + result = admin_client.delete_topics([topic]) + for topic, e in result.topic_error_codes: + if e == 0: + logging.debug(f"Topic {topic} deleted") + else: + logging.error(f"Failed to delete topic {topic}: {e}") + + retries = 0 + while True: + topics_listed = admin_client.list_topics() + logging.debug(f"TOPICS LISTED: {topics_listed}") + if topic not in topics_listed: + return + else: + retries += 1 + time.sleep(0.5) + if retries > max_retries: + raise Exception(f"Failed to delete topics {topic}, {result}") + + +def test_kafka_produce_http_interface_row_based_format(kafka_cluster): + # reproduction of #61060 with validating the written messages + admin_client = KafkaAdminClient( + bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) + ) + + topic_prefix = "http_row_" + + # It is important to have: + # - long enough messages + # - enough messages + # I don't know the exact requirement for message sizes, but it doesn't reproduce with short messages + # For the number of messages it seems like at least 3 messages is necessary + expected_key = "01234567890123456789" + expected_value = "aaaaabbbbbccccc" + + insert_query_end = f"(key, value) VALUES ('{expected_key}', '{expected_value}'), ('{expected_key}', '{expected_value}'), ('{expected_key}', '{expected_value}')" + insert_query_template = "INSERT INTO {table_name} " + insert_query_end + + extra_settings = { + "Protobuf": ", kafka_schema = 'string_key_value.proto:StringKeyValuePair'", + "CapnProto": ", kafka_schema='string_key_value:StringKeyValuePair'", + "Template": ", format_template_row='string_key_value.format'", + } + + # Only the formats that can be used both and input and output format are tested + # Reasons to exclude following formats: + # - JSONStrings: not actually an input format + # - ProtobufSingle: I cannot make it work to parse the messages. Probably something is broken, + # because the producer can write multiple rows into a same message, which makes them impossible to parse properly. Should added after #67549 is fixed. + # - ProtobufList: I didn't want to deal with the envelope and stuff + # - Npy: supports only single column + # - LineAsString: supports only single column + # - RawBLOB: supports only single column + formats_to_test = [ + "TabSeparated", + "TabSeparatedRaw", + "TabSeparatedWithNames", + "TabSeparatedWithNamesAndTypes", + "TabSeparatedRawWithNames", + "TabSeparatedRawWithNamesAndTypes", + "Template", + "CSV", + "CSVWithNames", + "CSVWithNamesAndTypes", + "CustomSeparated", + "CustomSeparatedWithNames", + "CustomSeparatedWithNamesAndTypes", + "Values", + "JSON", + "JSONColumns", + "JSONColumnsWithMetadata", + "JSONCompact", + "JSONCompactColumns", + "JSONEachRow", + "JSONStringsEachRow", + "JSONCompactEachRow", + "JSONCompactEachRowWithNames", + "JSONCompactEachRowWithNamesAndTypes", + "JSONCompactStringsEachRow", + "JSONCompactStringsEachRowWithNames", + "JSONCompactStringsEachRowWithNamesAndTypes", + "JSONObjectEachRow", + "BSONEachRow", + "TSKV", + "Protobuf", + "Avro", + "Parquet", + "Arrow", + "ArrowStream", + "ORC", + "RowBinary", + "RowBinaryWithNames", + "RowBinaryWithNamesAndTypes", + "Native", + "CapnProto", + "MsgPack", + ] + for format in formats_to_test: + logging.debug(f"Creating tables and writing messages to {format}") + topic = topic_prefix + format + kafka_create_topic(admin_client, topic) + + extra_setting = extra_settings.get(format, "") + + # kafka_max_rows_per_message is set to 2 to make sure every format produces at least 2 messages, thus increasing the chance of catching a bug + instance.query( + f""" + DROP TABLE IF EXISTS test.view_{topic}; + DROP TABLE IF EXISTS test.consumer_{topic}; + CREATE TABLE test.kafka_writer_{topic} (key String, value String) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = '{topic}', + kafka_group_name = '{topic}', + kafka_format = '{format}', + kafka_max_rows_per_message = 2 {extra_setting}; + + CREATE TABLE test.kafka_{topic} (key String, value String) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = '{topic}', + kafka_group_name = '{topic}', + kafka_format = '{format}' {extra_setting}; + + CREATE MATERIALIZED VIEW test.view_{topic} Engine=Log AS + SELECT key, value FROM test.kafka_{topic}; + """ + ) + instance.http_query( + insert_query_template.format(table_name="test.kafka_writer_" + topic), + method="POST", + ) + + expected = f"""\ +{expected_key}\t{expected_value} +{expected_key}\t{expected_value} +{expected_key}\t{expected_value} +""" + # give some times for the readers to read the messages + for format in formats_to_test: + logging.debug(f"Checking result for {format}") + topic = topic_prefix + format + + result = instance.query_with_retry( + f"SELECT * FROM test.view_{topic}", + check_callback=lambda res: res.count("\n") == 3, + ) + + assert TSV(result) == TSV(expected) + + kafka_delete_topic(admin_client, topic) + + +if __name__ == "__main__": + cluster.start() + input("Cluster created, press any key to destroy...") + cluster.shutdown() From 76532ddd8e1cfc8263dcc4b260919627102792ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Tue, 13 Aug 2024 14:37:02 +0200 Subject: [PATCH 073/121] Fix build --- src/Storages/MessageQueueSink.cpp | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/src/Storages/MessageQueueSink.cpp b/src/Storages/MessageQueueSink.cpp index ab9b19793ec..c3b33440569 100644 --- a/src/Storages/MessageQueueSink.cpp +++ b/src/Storages/MessageQueueSink.cpp @@ -80,16 +80,5 @@ void MessageQueueSink::consume(Chunk & chunk) } } -void MessageQueueSink::onCancel() noexcept -{ - try - { - onFinish(); - } - catch (...) - { - tryLogCurrentException(getLogger("MessageQueueSink"), "Error occurs on cancellation."); - } -} } From 39056b2e29c5fe6419e925c3e294b86c7f0ad424 Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Tue, 13 Aug 2024 22:12:23 +0000 Subject: [PATCH 074/121] buildSetsForDAG() arg pointerness --- src/Storages/StorageMergeTreeIndex.cpp | 2 +- src/Storages/System/IStorageSystemOneBlock.cpp | 2 +- src/Storages/System/StorageSystemColumns.cpp | 2 +- src/Storages/System/StorageSystemDataSkippingIndices.cpp | 2 +- src/Storages/System/StorageSystemReplicas.cpp | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Storages/StorageMergeTreeIndex.cpp b/src/Storages/StorageMergeTreeIndex.cpp index 90d01d356e9..af9759ad3f7 100644 --- a/src/Storages/StorageMergeTreeIndex.cpp +++ b/src/Storages/StorageMergeTreeIndex.cpp @@ -292,7 +292,7 @@ void ReadFromMergeTreeIndex::applyFilters(ActionDAGNodes added_filter_nodes) virtual_columns_filter = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag->getOutputs().at(0), &block_to_filter); if (virtual_columns_filter) - VirtualColumnUtils::buildSetsForDAG(virtual_columns_filter, context); + VirtualColumnUtils::buildSetsForDAG(*virtual_columns_filter, context); } } diff --git a/src/Storages/System/IStorageSystemOneBlock.cpp b/src/Storages/System/IStorageSystemOneBlock.cpp index 308b34510ea..d932dc902d4 100644 --- a/src/Storages/System/IStorageSystemOneBlock.cpp +++ b/src/Storages/System/IStorageSystemOneBlock.cpp @@ -106,7 +106,7 @@ void ReadFromSystemOneBlock::applyFilters(ActionDAGNodes added_filter_nodes) /// Must prepare sets here, initializePipeline() would be too late, see comment on FutureSetFromSubquery. if (filter) - VirtualColumnUtils::buildSetsForDAG(filter, context); + VirtualColumnUtils::buildSetsForDAG(*filter, context); } } diff --git a/src/Storages/System/StorageSystemColumns.cpp b/src/Storages/System/StorageSystemColumns.cpp index 6b3e0094562..f48aeba026b 100644 --- a/src/Storages/System/StorageSystemColumns.cpp +++ b/src/Storages/System/StorageSystemColumns.cpp @@ -355,7 +355,7 @@ void ReadFromSystemColumns::applyFilters(ActionDAGNodes added_filter_nodes) /// Must prepare sets here, initializePipeline() would be too late, see comment on FutureSetFromSubquery. if (virtual_columns_filter) - VirtualColumnUtils::buildSetsForDAG(virtual_columns_filter, context); + VirtualColumnUtils::buildSetsForDAG(*virtual_columns_filter, context); } } diff --git a/src/Storages/System/StorageSystemDataSkippingIndices.cpp b/src/Storages/System/StorageSystemDataSkippingIndices.cpp index a6bba44e257..e0a6ad1347f 100644 --- a/src/Storages/System/StorageSystemDataSkippingIndices.cpp +++ b/src/Storages/System/StorageSystemDataSkippingIndices.cpp @@ -231,7 +231,7 @@ void ReadFromSystemDataSkippingIndices::applyFilters(ActionDAGNodes added_filter virtual_columns_filter = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag->getOutputs().at(0), &block_to_filter); if (virtual_columns_filter) - VirtualColumnUtils::buildSetsForDAG(virtual_columns_filter, context); + VirtualColumnUtils::buildSetsForDAG(*virtual_columns_filter, context); } } diff --git a/src/Storages/System/StorageSystemReplicas.cpp b/src/Storages/System/StorageSystemReplicas.cpp index 9fb4dc5ed6f..b6969487d07 100644 --- a/src/Storages/System/StorageSystemReplicas.cpp +++ b/src/Storages/System/StorageSystemReplicas.cpp @@ -304,7 +304,7 @@ void ReadFromSystemReplicas::applyFilters(ActionDAGNodes added_filter_nodes) virtual_columns_filter = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag->getOutputs().at(0), &block_to_filter); if (virtual_columns_filter) - VirtualColumnUtils::buildSetsForDAG(virtual_columns_filter, context); + VirtualColumnUtils::buildSetsForDAG(*virtual_columns_filter, context); } } From ec82d7cfb3b1e448855928b095b39195b1206939 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 14 Aug 2024 12:14:32 +0000 Subject: [PATCH 075/121] Backport #68288 to 24.7: Fix postgres crash --- src/Processors/Sources/PostgreSQLSource.cpp | 12 ++++++------ src/Processors/Sources/PostgreSQLSource.h | 14 +++++++++----- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/src/Processors/Sources/PostgreSQLSource.cpp b/src/Processors/Sources/PostgreSQLSource.cpp index a3d6fd691d8..b9bda46bd10 100644 --- a/src/Processors/Sources/PostgreSQLSource.cpp +++ b/src/Processors/Sources/PostgreSQLSource.cpp @@ -35,9 +35,9 @@ PostgreSQLSource::PostgreSQLSource( const Block & sample_block, UInt64 max_block_size_) : ISource(sample_block.cloneEmpty()) - , query_str(query_str_) , max_block_size(max_block_size_) , connection_holder(std::move(connection_holder_)) + , query_str(query_str_) { init(sample_block); } @@ -51,10 +51,10 @@ PostgreSQLSource::PostgreSQLSource( UInt64 max_block_size_, bool auto_commit_) : ISource(sample_block.cloneEmpty()) - , query_str(query_str_) - , tx(std::move(tx_)) , max_block_size(max_block_size_) , auto_commit(auto_commit_) + , query_str(query_str_) + , tx(std::move(tx_)) { init(sample_block); } @@ -204,15 +204,15 @@ PostgreSQLSource::~PostgreSQLSource() */ stream->close(); } - - stream.reset(); - tx.reset(); } catch (...) { tryLogCurrentException(__PRETTY_FUNCTION__); } + stream.reset(); + tx.reset(); + if (connection_holder) connection_holder->setBroken(); } diff --git a/src/Processors/Sources/PostgreSQLSource.h b/src/Processors/Sources/PostgreSQLSource.h index 8a648ae8bb5..319c5d8d7c2 100644 --- a/src/Processors/Sources/PostgreSQLSource.h +++ b/src/Processors/Sources/PostgreSQLSource.h @@ -38,14 +38,12 @@ protected: UInt64 max_block_size_, bool auto_commit_); - String query_str; - std::shared_ptr tx; - std::unique_ptr stream; - Status prepare() override; - void onStart(); Chunk generate() override; + + void onStart(); + void onFinish(); private: @@ -61,6 +59,12 @@ private: postgres::ConnectionHolderPtr connection_holder; std::unordered_map array_info; + +protected: + String query_str; + /// tx and stream must be destroyed before connection_holder. + std::shared_ptr tx; + std::unique_ptr stream; }; From a265329f8ccb2b2fe1482fb2b80d0b9d9b2ef8f0 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 15 Aug 2024 11:19:02 +0000 Subject: [PATCH 076/121] Backport #68326 to 24.7: Fix missing sync replica mode in query `SYSTEM SYNC REPLICA` --- src/Parsers/ASTSystemQuery.cpp | 45 ++++++++++--------- ...03205_system_sync_replica_format.reference | 1 + .../03205_system_sync_replica_format.sql | 1 + 3 files changed, 25 insertions(+), 22 deletions(-) create mode 100644 tests/queries/0_stateless/03205_system_sync_replica_format.reference create mode 100644 tests/queries/0_stateless/03205_system_sync_replica_format.sql diff --git a/src/Parsers/ASTSystemQuery.cpp b/src/Parsers/ASTSystemQuery.cpp index a730ea0ba3d..7780544d5c2 100644 --- a/src/Parsers/ASTSystemQuery.cpp +++ b/src/Parsers/ASTSystemQuery.cpp @@ -198,6 +198,29 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState & s print_database_table(); } + if (sync_replica_mode != SyncReplicaMode::DEFAULT) + { + settings.ostr << ' '; + print_keyword(magic_enum::enum_name(sync_replica_mode)); + + // If the mode is LIGHTWEIGHT and specific source replicas are specified + if (sync_replica_mode == SyncReplicaMode::LIGHTWEIGHT && !src_replicas.empty()) + { + settings.ostr << ' '; + print_keyword("FROM"); + settings.ostr << ' '; + + bool first = true; + for (const auto & src : src_replicas) + { + if (!first) + settings.ostr << ", "; + first = false; + settings.ostr << quoteString(src); + } + } + } + if (query_settings) { settings.ostr << (settings.hilite ? hilite_keyword : "") << settings.nl_or_ws << "SETTINGS " << (settings.hilite ? hilite_none : ""); @@ -233,28 +256,6 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState & s print_identifier(disk); } - if (sync_replica_mode != SyncReplicaMode::DEFAULT) - { - settings.ostr << ' '; - print_keyword(magic_enum::enum_name(sync_replica_mode)); - - // If the mode is LIGHTWEIGHT and specific source replicas are specified - if (sync_replica_mode == SyncReplicaMode::LIGHTWEIGHT && !src_replicas.empty()) - { - settings.ostr << ' '; - print_keyword("FROM"); - settings.ostr << ' '; - - bool first = true; - for (const auto & src : src_replicas) - { - if (!first) - settings.ostr << ", "; - first = false; - settings.ostr << quoteString(src); - } - } - } break; } case Type::SYNC_DATABASE_REPLICA: diff --git a/tests/queries/0_stateless/03205_system_sync_replica_format.reference b/tests/queries/0_stateless/03205_system_sync_replica_format.reference new file mode 100644 index 00000000000..aad51dd90b0 --- /dev/null +++ b/tests/queries/0_stateless/03205_system_sync_replica_format.reference @@ -0,0 +1 @@ +SYSTEM SYNC REPLICA db.`table` LIGHTWEIGHT diff --git a/tests/queries/0_stateless/03205_system_sync_replica_format.sql b/tests/queries/0_stateless/03205_system_sync_replica_format.sql new file mode 100644 index 00000000000..329bce80afc --- /dev/null +++ b/tests/queries/0_stateless/03205_system_sync_replica_format.sql @@ -0,0 +1 @@ +SELECT formatQuery('SYSTEM SYNC REPLICA db.table LIGHTWEIGHT'); From 47df81aeb17b5fb365d25591c123c337971f1a03 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 15 Aug 2024 21:07:23 +0000 Subject: [PATCH 077/121] Backport #68265 to 24.7: tests: make 01600_parts_states_metrics_long better --- .../01600_parts_states_metrics_long.sh | 63 ++++++++++++------- 1 file changed, 40 insertions(+), 23 deletions(-) diff --git a/tests/queries/0_stateless/01600_parts_states_metrics_long.sh b/tests/queries/0_stateless/01600_parts_states_metrics_long.sh index 2e47034e528..a0ed2b6bfeb 100755 --- a/tests/queries/0_stateless/01600_parts_states_metrics_long.sh +++ b/tests/queries/0_stateless/01600_parts_states_metrics_long.sh @@ -1,40 +1,57 @@ #!/usr/bin/env bash - +# Tags: long, no-parallel CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +function query() +{ + # NOTE: database_atomic_wait_for_drop_and_detach_synchronously needed only for local env, CI has it ON + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&database_atomic_wait_for_drop_and_detach_synchronously=1" -d "$*" +} -# NOTE: database = $CLICKHOUSE_DATABASE is unwanted -verify_sql="SELECT - (SELECT sumIf(value, metric = 'PartsActive'), sumIf(value, metric = 'PartsOutdated') FROM system.metrics) - = (SELECT sum(active), sum(NOT active) FROM - (SELECT active FROM system.parts UNION ALL SELECT active FROM system.projection_parts UNION ALL SELECT 1 FROM system.dropped_tables_parts))" # The query is not atomic - it can compare states between system.parts and system.metrics from different points in time. # So, there is inherent race condition. But it should get expected result eventually. # In case of test failure, this code will do infinite loop and timeout. verify() { - while true - do - result=$( $CLICKHOUSE_CLIENT -m --query="$verify_sql" ) - [ "$result" = "1" ] && break - sleep 0.1 + local result + + for _ in {1..100}; do + # NOTE: database = $CLICKHOUSE_DATABASE is unwanted + result=$( query "SELECT + (SELECT sumIf(value, metric = 'PartsActive'), sumIf(value, metric = 'PartsOutdated') FROM system.metrics) + = + (SELECT sum(active), sum(NOT active) FROM ( + SELECT active FROM system.parts + UNION ALL SELECT active FROM system.projection_parts + UNION ALL SELECT 1 FROM system.dropped_tables_parts + ))" + ) + + if [ "$result" = "1" ]; then + echo "$result" + return + fi + + sleep 0.5 done - echo 1 + + $CLICKHOUSE_CLIENT -q " + SELECT sumIf(value, metric = 'PartsActive'), sumIf(value, metric = 'PartsOutdated') FROM system.metrics; + SELECT sum(active), sum(NOT active) FROM system.parts; + SELECT sum(active), sum(NOT active) FROM system.projection_parts; + SELECT count() FROM system.dropped_tables_parts; + " } -$CLICKHOUSE_CLIENT --database_atomic_wait_for_drop_and_detach_synchronously=1 --query="DROP TABLE IF EXISTS test_table" -$CLICKHOUSE_CLIENT --query="CREATE TABLE test_table(data Date) ENGINE = MergeTree PARTITION BY toYear(data) ORDER BY data;" - -$CLICKHOUSE_CLIENT --query="INSERT INTO test_table VALUES ('1992-01-01')" +query "DROP TABLE IF EXISTS test_table" +query "CREATE TABLE test_table (data Date) ENGINE = MergeTree PARTITION BY toYear(data) ORDER BY data;" +query "INSERT INTO test_table VALUES ('1992-01-01')" verify - -$CLICKHOUSE_CLIENT --query="INSERT INTO test_table VALUES ('1992-01-02')" +query "INSERT INTO test_table VALUES ('1992-01-02')" verify - -$CLICKHOUSE_CLIENT --query="OPTIMIZE TABLE test_table FINAL" -verify - -$CLICKHOUSE_CLIENT --database_atomic_wait_for_drop_and_detach_synchronously=1 --query="DROP TABLE test_table" +query "OPTIMIZE TABLE test_table FINAL" verify +query "DROP TABLE test_table" +verify \ No newline at end of file From 17a54f10fde95fc2869a58bd7f2a1c25f319cae0 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 19 Aug 2024 12:07:50 +0000 Subject: [PATCH 078/121] Backport #68457 to 24.7: CI: Native build for package_aarch64 --- cmake/limit_jobs.cmake | 16 +++------------- tests/ci/ci_config.py | 3 ++- tests/ci/ci_definitions.py | 1 + tests/ci/test_ci_config.py | 14 ++++++++++---- 4 files changed, 16 insertions(+), 18 deletions(-) diff --git a/cmake/limit_jobs.cmake b/cmake/limit_jobs.cmake index 17d8dd42a2c..8e48fc9b9d8 100644 --- a/cmake/limit_jobs.cmake +++ b/cmake/limit_jobs.cmake @@ -42,19 +42,9 @@ endif () # But use 2 parallel jobs, since: # - this is what llvm does # - and I've verfied that lld-11 does not use all available CPU time (in peak) while linking one binary -if (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" AND ENABLE_THINLTO) - if (ARCH_AARCH64) - # aarch64 builds start to often fail with OOMs (reason not yet clear), for now let's limit the concurrency - message(STATUS "ThinLTO provides its own parallel linking - limiting parallel link jobs to 1.") - set (PARALLEL_LINK_JOBS 1) - if (LINKER_NAME MATCHES "lld") - math(EXPR LTO_JOBS ${NUMBER_OF_LOGICAL_CORES}/4) - set (CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO} -Wl,--thinlto-jobs=${LTO_JOBS}") - endif() - elseif (PARALLEL_LINK_JOBS GREATER 2) - message(STATUS "ThinLTO provides its own parallel linking - limiting parallel link jobs to 2.") - set (PARALLEL_LINK_JOBS 2) - endif () +if (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" AND ENABLE_THINLTO AND PARALLEL_LINK_JOBS GREATER 2) + message(STATUS "ThinLTO provides its own parallel linking - limiting parallel link jobs to 2.") + set (PARALLEL_LINK_JOBS 2) endif() message(STATUS "Building sub-tree with ${PARALLEL_COMPILE_JOBS} compile jobs and ${PARALLEL_LINK_JOBS} linker jobs (system: ${NUMBER_OF_LOGICAL_CORES} cores, ${TOTAL_PHYSICAL_MEMORY} MB RAM, 'OFF' means the native core count).") diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index a44b15f34c1..a84d39cf191 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -87,7 +87,8 @@ class CI: package_type="deb", static_binary_name="aarch64", additional_pkgs=True, - ) + ), + runner_type=Runners.BUILDER_ARM, ), BuildNames.PACKAGE_ASAN: CommonJobConfigs.BUILD.with_properties( build_config=BuildConfig( diff --git a/tests/ci/ci_definitions.py b/tests/ci/ci_definitions.py index a8d9793f1d3..78de91bb578 100644 --- a/tests/ci/ci_definitions.py +++ b/tests/ci/ci_definitions.py @@ -79,6 +79,7 @@ class Runners(metaclass=WithIter): """ BUILDER = "builder" + BUILDER_ARM = "builder-aarch64" STYLE_CHECKER = "style-checker" STYLE_CHECKER_ARM = "style-checker-aarch64" FUNC_TESTER = "func-tester" diff --git a/tests/ci/test_ci_config.py b/tests/ci/test_ci_config.py index 44142050821..aff49ae0d62 100644 --- a/tests/ci/test_ci_config.py +++ b/tests/ci/test_ci_config.py @@ -35,10 +35,16 @@ class TestCIConfig(unittest.TestCase): f"Job [{job}] must have style-checker(-aarch64) runner", ) elif "binary_" in job.lower() or "package_" in job.lower(): - self.assertTrue( - CI.JOB_CONFIGS[job].runner_type == CI.Runners.BUILDER, - f"Job [{job}] must have [{CI.Runners.BUILDER}] runner", - ) + if job.lower() == CI.BuildNames.PACKAGE_AARCH64: + self.assertTrue( + CI.JOB_CONFIGS[job].runner_type in (CI.Runners.BUILDER_ARM,), + f"Job [{job}] must have [{CI.Runners.BUILDER_ARM}] runner", + ) + else: + self.assertTrue( + CI.JOB_CONFIGS[job].runner_type in (CI.Runners.BUILDER,), + f"Job [{job}] must have [{CI.Runners.BUILDER}] runner", + ) elif "aarch64" in job.lower(): self.assertTrue( "aarch" in CI.JOB_CONFIGS[job].runner_type, From 9001a302a261a8d8053176b32c84abe93ef7553b Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 19 Aug 2024 15:08:19 +0000 Subject: [PATCH 079/121] Backport #68536 to 24.7: CI: Minor release workflow fix --- .github/workflows/release_branches.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index b79208b03a6..ff40515ffa6 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -485,12 +485,11 @@ jobs: # update mergeable check python3 merge_pr.py --set-ci-status --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} # update overall ci report - python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} + python3 ./tests/ci/finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }} - name: Check Workflow results run: | export WORKFLOW_RESULT_FILE="/tmp/workflow_results.json" cat > "$WORKFLOW_RESULT_FILE" << 'EOF' ${{ toJson(needs) }} EOF - python3 ./tests/ci/ci_buddy.py --check-wf-status From 2f92b735461d57bf0e608c57c0c19a0e8de21a67 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Wed, 31 Jul 2024 14:48:33 +0200 Subject: [PATCH 080/121] Backport #67522 to 24.7: Analyzer: Do not traverse unresolved subtrees --- src/Planner/findParallelReplicasQuery.cpp | 14 ++++++-------- src/Planner/findQueryForParallelReplicas.h | 2 +- ...5_analyzer_replace_with_dummy_tables.reference | 0 .../03215_analyzer_replace_with_dummy_tables.sql | 15 +++++++++++++++ 4 files changed, 22 insertions(+), 9 deletions(-) create mode 100644 tests/queries/0_stateless/03215_analyzer_replace_with_dummy_tables.reference create mode 100644 tests/queries/0_stateless/03215_analyzer_replace_with_dummy_tables.sql diff --git a/src/Planner/findParallelReplicasQuery.cpp b/src/Planner/findParallelReplicasQuery.cpp index c89a70be541..39edb1e6516 100644 --- a/src/Planner/findParallelReplicasQuery.cpp +++ b/src/Planner/findParallelReplicasQuery.cpp @@ -113,13 +113,13 @@ std::stack getSupportingParallelReplicasQuery(const IQueryTre return res; } -class ReplaceTableNodeToDummyVisitor : public InDepthQueryTreeVisitor +class ReplaceTableNodeToDummyVisitor : public InDepthQueryTreeVisitorWithContext { public: - using Base = InDepthQueryTreeVisitor; + using Base = InDepthQueryTreeVisitorWithContext; using Base::Base; - void visitImpl(const QueryTreeNodePtr & node) + void enterImpl(QueryTreeNodePtr & node) { auto * table_node = node->as(); auto * table_function_node = node->as(); @@ -134,21 +134,19 @@ public: ColumnsDescription(storage_snapshot->getColumns(get_column_options)), storage_snapshot); - auto dummy_table_node = std::make_shared(std::move(storage_dummy), context); + auto dummy_table_node = std::make_shared(std::move(storage_dummy), getContext()); dummy_table_node->setAlias(node->getAlias()); replacement_map.emplace(node.get(), std::move(dummy_table_node)); } } - ContextPtr context; std::unordered_map replacement_map; }; -QueryTreeNodePtr replaceTablesWithDummyTables(const QueryTreeNodePtr & query, const ContextPtr & context) +QueryTreeNodePtr replaceTablesWithDummyTables(QueryTreeNodePtr query, const ContextPtr & context) { - ReplaceTableNodeToDummyVisitor visitor; - visitor.context = context; + ReplaceTableNodeToDummyVisitor visitor(context); visitor.visit(query); return query->cloneAndReplace(visitor.replacement_map); diff --git a/src/Planner/findQueryForParallelReplicas.h b/src/Planner/findQueryForParallelReplicas.h index f5dc69dfa0e..cdce4ad0b47 100644 --- a/src/Planner/findQueryForParallelReplicas.h +++ b/src/Planner/findQueryForParallelReplicas.h @@ -13,7 +13,7 @@ using QueryTreeNodePtr = std::shared_ptr; struct SelectQueryOptions; -/// Find a qury which can be executed with parallel replicas up to WithMergableStage. +/// Find a query which can be executed with parallel replicas up to WithMergableStage. /// Returned query will always contain some (>1) subqueries, possibly with joins. const QueryNode * findQueryForParallelReplicas(const QueryTreeNodePtr & query_tree_node, SelectQueryOptions & select_query_options); diff --git a/tests/queries/0_stateless/03215_analyzer_replace_with_dummy_tables.reference b/tests/queries/0_stateless/03215_analyzer_replace_with_dummy_tables.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03215_analyzer_replace_with_dummy_tables.sql b/tests/queries/0_stateless/03215_analyzer_replace_with_dummy_tables.sql new file mode 100644 index 00000000000..6d084c2ac50 --- /dev/null +++ b/tests/queries/0_stateless/03215_analyzer_replace_with_dummy_tables.sql @@ -0,0 +1,15 @@ +create table t (number UInt64) engine MergeTree order by number; + +SELECT 1 +FROM +( + SELECT number IN ( + SELECT number + FROM view( + SELECT number + FROM numbers(1) + ) + ) + FROM t +) +SETTINGS allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 2, allow_experimental_analyzer = 1; -- { serverError CLUSTER_DOESNT_EXIST } From 9c95f8cb565564029452b8d4fefde31736bedc8f Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 21 Aug 2024 15:08:56 +0000 Subject: [PATCH 081/121] Backport #68630 to 24.7: Fix `LOGICAL_ERROR`s with functions `sipHash(64/128)Keyed` --- src/Functions/FunctionsHashing.h | 60 ++++++++++--------- .../0_stateless/02534_keyed_siphash.reference | 9 ++- .../0_stateless/02534_keyed_siphash.sql | 18 ++++-- .../02552_siphash128_reference.sql | 4 +- 4 files changed, 56 insertions(+), 35 deletions(-) diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h index 95c54ac9528..0cf4246fd66 100644 --- a/src/Functions/FunctionsHashing.h +++ b/src/Functions/FunctionsHashing.h @@ -77,64 +77,70 @@ namespace impl ColumnPtr key0; ColumnPtr key1; bool is_const; - const ColumnArray::Offsets * offsets{}; + const ColumnArray::Offsets * offsets = nullptr; size_t size() const { assert(key0 && key1); assert(key0->size() == key1->size()); - assert(offsets == nullptr || offsets->size() == key0->size()); - if (offsets != nullptr) + if (offsets != nullptr && !offsets->empty()) return offsets->back(); return key0->size(); } + SipHashKey getKey(size_t i) const { if (is_const) i = 0; + assert(key0->size() == key1->size()); if (offsets != nullptr) { - const auto *const begin = offsets->begin(); + const auto * const begin = offsets->begin(); const auto * upper = std::upper_bound(begin, offsets->end(), i); - if (upper == offsets->end()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "offset {} not found in function SipHashKeyColumns::getKey", i); - i = upper - begin; + if (upper != offsets->end()) + i = upper - begin; } const auto & key0data = assert_cast(*key0).getData(); const auto & key1data = assert_cast(*key1).getData(); + assert(key0->size() > i); return {key0data[i], key1data[i]}; } }; static SipHashKeyColumns parseSipHashKeyColumns(const ColumnWithTypeAndName & key) { - const ColumnTuple * tuple = nullptr; - const auto * column = key.column.get(); - bool is_const = false; - if (isColumnConst(*column)) + const auto * col_key = key.column.get(); + + bool is_const; + const ColumnTuple * col_key_tuple; + if (isColumnConst(*col_key)) { is_const = true; - tuple = checkAndGetColumnConstData(column); + col_key_tuple = checkAndGetColumnConstData(col_key); } else - tuple = checkAndGetColumn(column); - if (!tuple) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "key must be a tuple"); - if (tuple->tupleSize() != 2) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "wrong tuple size: key must be a tuple of 2 UInt64"); + { + is_const = false; + col_key_tuple = checkAndGetColumn(col_key); + } - SipHashKeyColumns ret{tuple->getColumnPtr(0), tuple->getColumnPtr(1), is_const}; - assert(ret.key0); - if (!checkColumn(*ret.key0)) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "first element of the key tuple is not UInt64"); - assert(ret.key1); - if (!checkColumn(*ret.key1)) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "second element of the key tuple is not UInt64"); + if (!col_key_tuple || col_key_tuple->tupleSize() != 2) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The key must be of type Tuple(UInt64, UInt64)"); - if (ret.size() == 1) - ret.is_const = true; + SipHashKeyColumns result{.key0 = col_key_tuple->getColumnPtr(0), .key1 = col_key_tuple->getColumnPtr(1), .is_const = is_const}; - return ret; + assert(result.key0); + assert(result.key1); + + if (!checkColumn(*result.key0)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The 1st element of the key tuple is not of type UInt64"); + if (!checkColumn(*result.key1)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The 2nd element of the key tuple is not of type UInt64"); + + if (result.size() == 1) + result.is_const = true; + + return result; } } diff --git a/tests/queries/0_stateless/02534_keyed_siphash.reference b/tests/queries/0_stateless/02534_keyed_siphash.reference index 3f478218ff1..31c0cae8981 100644 --- a/tests/queries/0_stateless/02534_keyed_siphash.reference +++ b/tests/queries/0_stateless/02534_keyed_siphash.reference @@ -236,6 +236,13 @@ Check asan bug 0 Check bug found fuzzing 9042C6691B1A75F0EA3314B6F55728BB -Check bug 2 found fuzzing +Test arrays and maps 608E1FF030C9E206185B112C2A25F1A7 ABB65AE97711A2E053E324ED88B1D08B +Test emtpy arrays and maps +4761183170873013810 +0AD04BFD000000000000000000000000 +4761183170873013810 +0AD04BFD000000000000000000000000 +16734549324845627102 +D675BB3D687973A238AB891DD99C7047 diff --git a/tests/queries/0_stateless/02534_keyed_siphash.sql b/tests/queries/0_stateless/02534_keyed_siphash.sql index fb707109c83..b499d8ef02b 100644 --- a/tests/queries/0_stateless/02534_keyed_siphash.sql +++ b/tests/queries/0_stateless/02534_keyed_siphash.sql @@ -263,10 +263,10 @@ select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62)); select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63)); -select sipHash64Keyed((0, 0), '1'); -- { serverError NOT_IMPLEMENTED } -select sipHash128Keyed((0, 0), '1'); -- { serverError NOT_IMPLEMENTED } -select sipHash64Keyed(toUInt64(0), '1'); -- { serverError NOT_IMPLEMENTED } -select sipHash128Keyed(toUInt64(0), '1'); -- { serverError NOT_IMPLEMENTED } +select sipHash64Keyed((0, 0), '1'); -- { serverError BAD_ARGUMENTS } +select sipHash128Keyed((0, 0), '1'); -- { serverError BAD_ARGUMENTS } +select sipHash64Keyed(toUInt64(0), '1'); -- { serverError BAD_ARGUMENTS } +select sipHash128Keyed(toUInt64(0), '1'); -- { serverError BAD_ARGUMENTS } select hex(sipHash64()); SELECT hex(sipHash128()); @@ -339,9 +339,17 @@ SELECT 'Check bug found fuzzing'; SELECT [(255, 1048575)], sipHash128ReferenceKeyed((toUInt64(2147483646), toUInt64(9223372036854775807)), ([(NULL, 100), (NULL, NULL), (1024, 10)], toUInt64(2), toUInt64(1024)), ''), hex(sipHash128ReferenceKeyed((-9223372036854775807, 1.), '-1', NULL)), ('', toUInt64(65535), [(9223372036854775807, 9223372036854775806)], toUInt64(65536)), arrayJoin((NULL, 65537, 255), [(NULL, NULL)]) GROUP BY tupleElement((NULL, NULL, NULL, -1), toUInt64(2), 2) = NULL; -- { serverError NOT_IMPLEMENTED } SELECT hex(sipHash128ReferenceKeyed((0::UInt64, 0::UInt64), ([1, 1]))); -SELECT 'Check bug 2 found fuzzing'; +SELECT 'Test arrays and maps'; DROP TABLE IF EXISTS sipHashKeyed_keys; CREATE TABLE sipHashKeyed_keys (`a` Map(String, String)) ENGINE = Memory; INSERT INTO sipHashKeyed_keys FORMAT VALUES ({'a':'b', 'c':'d'}), ({'e':'f', 'g':'h'}); SELECT hex(sipHash128ReferenceKeyed((0::UInt64, materialize(0::UInt64)), a)) FROM sipHashKeyed_keys ORDER BY a; DROP TABLE sipHashKeyed_keys; + +SELECT 'Test emtpy arrays and maps'; +SELECT sipHash64Keyed((1::UInt64, 2::UInt64), []); +SELECT hex(sipHash128Keyed((1::UInt64, 2::UInt64), [])); +SELECT sipHash64Keyed((1::UInt64, 2::UInt64), mapFromArrays([], [])); +SELECT hex(sipHash128Keyed((1::UInt64, 2::UInt64), mapFromArrays([], []))); +SELECT sipHash64Keyed((1::UInt64, 2::UInt64), map([0], 1, [2], 3)); +SELECT hex(sipHash128Keyed((0::UInt64, 0::UInt64), map([0], 1, [2], 3))); diff --git a/tests/queries/0_stateless/02552_siphash128_reference.sql b/tests/queries/0_stateless/02552_siphash128_reference.sql index f7324ed0ee4..46f292d667d 100644 --- a/tests/queries/0_stateless/02552_siphash128_reference.sql +++ b/tests/queries/0_stateless/02552_siphash128_reference.sql @@ -200,8 +200,8 @@ select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62)); select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63)); -select sipHash128ReferenceKeyed((0, 0), '1'); -- { serverError NOT_IMPLEMENTED } -select sipHash128ReferenceKeyed(toUInt64(0), '1'); -- { serverError NOT_IMPLEMENTED } +select sipHash128ReferenceKeyed((0, 0), '1'); -- { serverError BAD_ARGUMENTS } +select sipHash128ReferenceKeyed(toUInt64(0), '1'); -- { serverError BAD_ARGUMENTS } SELECT hex(sipHash128Reference()) = hex(reverse(unhex('1CE422FEE7BD8DE20000000000000000'))) or hex(sipHash128()) = '1CE422FEE7BD8DE20000000000000000'; SELECT hex(sipHash128ReferenceKeyed()) = hex(reverse(unhex('1CE422FEE7BD8DE20000000000000000'))) or hex(sipHash128Keyed()) = '1CE422FEE7BD8DE20000000000000000'; From 7e1bf34d98190841cd9aca99bd6a1df0f365f8de Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 22 Aug 2024 15:41:36 +0000 Subject: [PATCH 082/121] Update autogenerated version to 24.7.3.47 and contributors --- cmake/autogenerated_versions.txt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index 8ccccb7e3f9..823f8bca1ca 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -2,11 +2,11 @@ # NOTE: VERSION_REVISION has nothing common with DBMS_TCP_PROTOCOL_VERSION, # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes. -SET(VERSION_REVISION 54488) +SET(VERSION_REVISION 54489) SET(VERSION_MAJOR 24) SET(VERSION_MINOR 7) -SET(VERSION_PATCH 3) -SET(VERSION_GITHASH 63730bc42939f76cb2b03be385ff08051ea2e3fe) -SET(VERSION_DESCRIBE v24.7.3.42-stable) -SET(VERSION_STRING 24.7.3.42) +SET(VERSION_PATCH 4) +SET(VERSION_GITHASH 2e50fe27a14f0f08eb3dab142a7a5b1c7014b4e9) +SET(VERSION_DESCRIBE v24.7.4.1-stable) +SET(VERSION_STRING 24.7.4.1) # end of autochange From ec5812c6b2a2e0ee17fcdec892c988278500882e Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Thu, 22 Aug 2024 21:47:25 +0200 Subject: [PATCH 083/121] retrigger builds From 6862ccf8bededd75831edd3b8aed2f1e0ac54bac Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 23 Aug 2024 01:36:01 +0000 Subject: [PATCH 084/121] Backport #68750 to 24.7: CI: Force package_debug build on release branches --- .github/workflows/release_branches.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index ff40515ffa6..297927d1dbd 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -130,6 +130,7 @@ jobs: with: build_name: package_debug data: ${{ needs.RunConfig.outputs.data }} + force: true BuilderBinDarwin: needs: [RunConfig, BuildDockers] if: ${{ !failure() && !cancelled() }} From 36e8645c6b85db03d052b73497d293d0383c88fa Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 23 Aug 2024 08:05:09 +0000 Subject: [PATCH 085/121] Update autogenerated version to 24.7.4.51 and contributors --- cmake/autogenerated_versions.txt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index 823f8bca1ca..3317dc649e5 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -2,11 +2,11 @@ # NOTE: VERSION_REVISION has nothing common with DBMS_TCP_PROTOCOL_VERSION, # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes. -SET(VERSION_REVISION 54489) +SET(VERSION_REVISION 54490) SET(VERSION_MAJOR 24) SET(VERSION_MINOR 7) -SET(VERSION_PATCH 4) -SET(VERSION_GITHASH 2e50fe27a14f0f08eb3dab142a7a5b1c7014b4e9) -SET(VERSION_DESCRIBE v24.7.4.1-stable) -SET(VERSION_STRING 24.7.4.1) +SET(VERSION_PATCH 5) +SET(VERSION_GITHASH 70fe2f6fa527ed19c350e9adacccd073204e031f) +SET(VERSION_DESCRIBE v24.7.5.1-stable) +SET(VERSION_STRING 24.7.5.1) # end of autochange From a7e09f20e36b4ee6a778e4e63441305d7f41f16d Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 23 Aug 2024 10:09:56 +0000 Subject: [PATCH 086/121] Backport #68737 to 24.7: Fix flaky test 00989_parallel_parts_loading --- tests/queries/0_stateless/00989_parallel_parts_loading.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/00989_parallel_parts_loading.sql b/tests/queries/0_stateless/00989_parallel_parts_loading.sql index 407e124f137..3b73e6a0e3c 100644 --- a/tests/queries/0_stateless/00989_parallel_parts_loading.sql +++ b/tests/queries/0_stateless/00989_parallel_parts_loading.sql @@ -1,3 +1,5 @@ +-- Tags: no-random-settings, no-random-merge-tree-settings +-- small number of insert threads can make insert terribly slow, especially with some build like msan DROP TABLE IF EXISTS mt; CREATE TABLE mt (x UInt64) ENGINE = MergeTree ORDER BY x SETTINGS parts_to_delay_insert = 100000, parts_to_throw_insert = 100000; From beb328988ba767d476143d12c9fe55ea312bee5c Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 23 Aug 2024 10:11:45 +0000 Subject: [PATCH 087/121] Backport #68731 to 24.7: Fix regression in `sipHash(64/128)Keyed` --- src/Functions/FunctionsHashing.h | 4 ++-- tests/queries/0_stateless/02534_keyed_siphash.reference | 9 +++++++-- tests/queries/0_stateless/02534_keyed_siphash.sql | 7 +++++-- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h index 0cf4246fd66..3da0b2cd9be 100644 --- a/src/Functions/FunctionsHashing.h +++ b/src/Functions/FunctionsHashing.h @@ -93,9 +93,9 @@ namespace impl if (is_const) i = 0; assert(key0->size() == key1->size()); - if (offsets != nullptr) + if (offsets != nullptr && i > 0) { - const auto * const begin = offsets->begin(); + const auto * const begin = std::upper_bound(offsets->begin(), offsets->end(), i - 1); const auto * upper = std::upper_bound(begin, offsets->end(), i); if (upper != offsets->end()) i = upper - begin; diff --git a/tests/queries/0_stateless/02534_keyed_siphash.reference b/tests/queries/0_stateless/02534_keyed_siphash.reference index 31c0cae8981..a05446a494e 100644 --- a/tests/queries/0_stateless/02534_keyed_siphash.reference +++ b/tests/queries/0_stateless/02534_keyed_siphash.reference @@ -239,10 +239,15 @@ Check bug found fuzzing Test arrays and maps 608E1FF030C9E206185B112C2A25F1A7 ABB65AE97711A2E053E324ED88B1D08B -Test emtpy arrays and maps +Test empty arrays and maps 4761183170873013810 0AD04BFD000000000000000000000000 4761183170873013810 0AD04BFD000000000000000000000000 +Test maps with arrays as keys 16734549324845627102 -D675BB3D687973A238AB891DD99C7047 +1D03941D808D04810D2363A6C107D622 +16734549324845627102 +16734549324845627102 +1D03941D808D04810D2363A6C107D622 +1D03941D808D04810D2363A6C107D622 diff --git a/tests/queries/0_stateless/02534_keyed_siphash.sql b/tests/queries/0_stateless/02534_keyed_siphash.sql index b499d8ef02b..7cfc82512bd 100644 --- a/tests/queries/0_stateless/02534_keyed_siphash.sql +++ b/tests/queries/0_stateless/02534_keyed_siphash.sql @@ -346,10 +346,13 @@ INSERT INTO sipHashKeyed_keys FORMAT VALUES ({'a':'b', 'c':'d'}), ({'e':'f', 'g' SELECT hex(sipHash128ReferenceKeyed((0::UInt64, materialize(0::UInt64)), a)) FROM sipHashKeyed_keys ORDER BY a; DROP TABLE sipHashKeyed_keys; -SELECT 'Test emtpy arrays and maps'; +SELECT 'Test empty arrays and maps'; SELECT sipHash64Keyed((1::UInt64, 2::UInt64), []); SELECT hex(sipHash128Keyed((1::UInt64, 2::UInt64), [])); SELECT sipHash64Keyed((1::UInt64, 2::UInt64), mapFromArrays([], [])); SELECT hex(sipHash128Keyed((1::UInt64, 2::UInt64), mapFromArrays([], []))); +SELECT 'Test maps with arrays as keys'; SELECT sipHash64Keyed((1::UInt64, 2::UInt64), map([0], 1, [2], 3)); -SELECT hex(sipHash128Keyed((0::UInt64, 0::UInt64), map([0], 1, [2], 3))); +SELECT hex(sipHash128Keyed((1::UInt64, 2::UInt64), map([0], 1, [2], 3))); +SELECT sipHash64Keyed((materialize(1::UInt64), 2::UInt64), map([0], 1, [2], 3)) FROM numbers(2); +SELECT hex(sipHash128Keyed((materialize(1::UInt64), 2::UInt64), map([0], 1, [2], 3))) FROM numbers(2); From dbd71f43f025c39dc2e8d353610aaa2f149dca35 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 23 Aug 2024 14:17:57 +0000 Subject: [PATCH 088/121] Backport #68131 to 24.7: Fix crash on parquet column type mismatch --- .../Formats/Impl/ArrowColumnToCHColumn.cpp | 9 ++ .../Formats/Impl/ParquetBlockInputFormat.cpp | 130 +++++++++++++----- ...arquet_big_integer_compatibility.reference | 1 + ...02786_parquet_big_integer_compatibility.sh | 3 + .../02841_parquet_filter_pushdown.reference | 2 + .../02841_parquet_filter_pushdown.sql | 6 + 6 files changed, 114 insertions(+), 37 deletions(-) diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index ed91913de4d..5e7f763dfbc 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -743,6 +743,15 @@ static ColumnWithTypeAndName readNonNullableColumnFromArrowColumn( case TypeIndex::IPv6: return readIPv6ColumnFromBinaryData(arrow_column, column_name); /// ORC format outputs big integers as binary column, because there is no fixed binary in ORC. + /// + /// When ORC/Parquet file says the type is "byte array" or "fixed len byte array", + /// but the clickhouse query says to interpret the column as e.g. Int128, it + /// may mean one of two things: + /// * The byte array is the 16 bytes of Int128, little-endian. + /// * The byte array is an ASCII string containing the Int128 formatted in base 10. + /// There's no reliable way to distinguish these cases. We just guess: if the + /// byte array is variable-length, and the length is different from sizeof(type), + /// we parse as text, otherwise as binary. case TypeIndex::Int128: return readColumnWithBigNumberFromBinaryData(arrow_column, column_name, type_hint); case TypeIndex::UInt128: diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp index e837d4d5e20..035a98e974d 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp @@ -25,6 +25,7 @@ #include #include #include +#include namespace CurrentMetrics { @@ -54,7 +55,7 @@ namespace ErrorCodes } \ } while (false) -/// Decode min/max value from column chunk statistics. +/// Decode min/max value from column chunk statistics. Returns Null if missing or unsupported. /// /// There are two questionable decisions in this implementation: /// * We parse the value from the encoded byte string instead of casting the parquet::Statistics @@ -62,7 +63,7 @@ namespace ErrorCodes /// * We dispatch based on the parquet logical+converted+physical type instead of the ClickHouse type. /// The idea is that this is similar to what we'll have to do when reimplementing Parquet parsing in /// ClickHouse instead of using Arrow (for speed). So, this is an exercise in parsing Parquet manually. -static std::optional decodePlainParquetValueSlow(const std::string & data, parquet::Type::type physical_type, const parquet::ColumnDescriptor & descr) +static Field decodePlainParquetValueSlow(const std::string & data, parquet::Type::type physical_type, const parquet::ColumnDescriptor & descr, TypeIndex type_hint) { using namespace parquet; @@ -118,8 +119,6 @@ static std::optional decodePlainParquetValueSlow(const std::string & data if (data.size() != size || size < 1 || size > 32) throw Exception(ErrorCodes::CANNOT_PARSE_NUMBER, "Unexpected decimal size: {} (actual {})", size, data.size()); - /// For simplicity, widen all decimals to 256-bit. It should compare correctly with values - /// of different bitness. Int256 val = 0; memcpy(&val, data.data(), size); if (big_endian) @@ -128,7 +127,19 @@ static std::optional decodePlainParquetValueSlow(const std::string & data if (size < 32 && (val >> (size * 8 - 1)) != 0) val |= ~((Int256(1) << (size * 8)) - 1); - return Field(DecimalField(Decimal256(val), static_cast(scale))); + auto narrow = [&](auto x) -> Field + { + memcpy(&x, &val, sizeof(x)); + return Field(DecimalField(x, static_cast(scale))); + }; + if (size <= 4) + return narrow(Decimal32(0)); + else if (size <= 8) + return narrow(Decimal64(0)); + else if (size <= 16) + return narrow(Decimal128(0)); + else + return narrow(Decimal256(0)); } while (false); @@ -185,8 +196,6 @@ static std::optional decodePlainParquetValueSlow(const std::string & data return Field(val); } - /// Strings. - if (physical_type == Type::type::BYTE_ARRAY || physical_type == Type::type::FIXED_LEN_BYTE_ARRAY) { /// Arrow's parquet decoder handles missing min/max values slightly incorrectly. @@ -213,14 +222,31 @@ static std::optional decodePlainParquetValueSlow(const std::string & data /// TODO: Remove this workaround either when we implement our own Parquet decoder that /// doesn't have this bug, or if it's fixed in Arrow. if (data.empty()) - return std::nullopt; + return Field(); + /// Long integers, encoded either as text or as little-endian bytes. + /// The parquet file doesn't know that it's numbers, so the min/max are produced by comparing + /// strings lexicographically. So these min and max are mostly useless to us. + /// There's one case where they're not useless: min == max; currently we don't make use of this. + switch (type_hint) + { + case TypeIndex::UInt128: + case TypeIndex::UInt256: + case TypeIndex::Int128: + case TypeIndex::Int256: + case TypeIndex::IPv6: + return Field(); + default: break; + } + + /// Strings. return Field(data); } - /// This one's deprecated in Parquet. + /// This type is deprecated in Parquet. + /// TODO: But turns out it's still used in practice, we should support it. if (physical_type == Type::type::INT96) - throw Exception(ErrorCodes::CANNOT_PARSE_NUMBER, "Parquet INT96 type is deprecated and not supported"); + return Field(); /// Integers. @@ -283,15 +309,13 @@ static std::vector getHyperrectangleForRowGroup(const parquet::FileMetaDa continue; auto stats = it->second; - auto default_value = [&]() -> Field - { - DataTypePtr type = header.getByPosition(idx).type; - if (type->lowCardinality()) - type = assert_cast(*type).getDictionaryType(); - if (type->isNullable()) - type = assert_cast(*type).getNestedType(); - return type->getDefault(); - }; + DataTypePtr type = header.getByPosition(idx).type; + if (type->lowCardinality()) + type = assert_cast(*type).getDictionaryType(); + if (type->isNullable()) + type = assert_cast(*type).getNestedType(); + Field default_value = type->getDefault(); + TypeIndex type_index = type->getTypeId(); /// Only primitive fields are supported, not arrays, maps, tuples, or Nested. /// Arrays, maps, and Nested can't be meaningfully supported because Parquet only has min/max @@ -299,14 +323,47 @@ static std::vector getHyperrectangleForRowGroup(const parquet::FileMetaDa /// Same limitation for tuples, but maybe it would make sense to have some kind of tuple /// expansion in KeyCondition to accept ranges per element instead of whole tuple. - std::optional min; - std::optional max; + Field min; + Field max; if (stats->HasMinMax()) { try { - min = decodePlainParquetValueSlow(stats->EncodeMin(), stats->physical_type(), *stats->descr()); - max = decodePlainParquetValueSlow(stats->EncodeMax(), stats->physical_type(), *stats->descr()); + min = decodePlainParquetValueSlow(stats->EncodeMin(), stats->physical_type(), *stats->descr(), type_index); + max = decodePlainParquetValueSlow(stats->EncodeMax(), stats->physical_type(), *stats->descr(), type_index); + + /// If the data type in parquet file substantially differs from the requested data type, + /// it's sometimes correct to just typecast the min/max values. + /// Other times it's incorrect, e.g.: + /// INSERT INTO FUNCTION file('t.parquet', Parquet, 'x String') VALUES ('1'), ('100'), ('2'); + /// SELECT * FROM file('t.parquet', Parquet, 'x Int64') WHERE x >= 3; + /// If we just typecast min/max from string to integer, this query will incorrectly return empty result. + /// Allow conversion in some simple cases, otherwise ignore the min/max values. + auto min_type = min.getType(); + auto max_type = max.getType(); + min = convertFieldToType(min, *type); + max = convertFieldToType(max, *type); + auto ok_cast = [&](Field::Types::Which from, Field::Types::Which to) -> bool + { + if (from == to) + return true; + /// Decimal -> wider decimal. + if (Field::isDecimal(from) || Field::isDecimal(to)) + return Field::isDecimal(from) && Field::isDecimal(to) && to >= from; + /// Integer -> IP. + if (to == Field::Types::IPv4) + return from == Field::Types::UInt64; + /// Disable index for everything else, especially string <-> number. + return false; + }; + if (!(ok_cast(min_type, min.getType()) && ok_cast(max_type, max.getType())) && + !(min == max) && + !(min_type == Field::Types::Int64 && min.getType() == Field::Types::UInt64 && min.safeGet() >= 0) && + !(max_type == Field::Types::UInt64 && max.getType() == Field::Types::Int64 && max.safeGet() <= UInt64(INT64_MAX))) + { + min = Field(); + max = Field(); + } } catch (Exception & e) { @@ -328,7 +385,7 @@ static std::vector getHyperrectangleForRowGroup(const parquet::FileMetaDa { /// Single-point range containing either the default value of one of the infinities. if (null_as_default) - hyperrectangle[idx].right = hyperrectangle[idx].left = default_value(); + hyperrectangle[idx].right = hyperrectangle[idx].left = default_value; else hyperrectangle[idx].right = hyperrectangle[idx].left; continue; @@ -339,32 +396,31 @@ static std::vector getHyperrectangleForRowGroup(const parquet::FileMetaDa if (null_as_default) { /// Make sure the range contains the default value. - Field def = default_value(); - if (min.has_value() && applyVisitor(FieldVisitorAccurateLess(), def, *min)) - min = def; - if (max.has_value() && applyVisitor(FieldVisitorAccurateLess(), *max, def)) - max = def; + if (!min.isNull() && applyVisitor(FieldVisitorAccurateLess(), default_value, min)) + min = default_value; + if (!max.isNull() && applyVisitor(FieldVisitorAccurateLess(), max, default_value)) + max = default_value; } else { /// Make sure the range reaches infinity on at least one side. - if (min.has_value() && max.has_value()) - min.reset(); + if (!min.isNull() && !max.isNull()) + min = Field(); } } else { /// If the column doesn't have nulls, exclude both infinities. - if (!min.has_value()) + if (min.isNull()) hyperrectangle[idx].left_included = false; - if (!max.has_value()) + if (max.isNull()) hyperrectangle[idx].right_included = false; } - if (min.has_value()) - hyperrectangle[idx].left = std::move(min.value()); - if (max.has_value()) - hyperrectangle[idx].right = std::move(max.value()); + if (!min.isNull()) + hyperrectangle[idx].left = std::move(min); + if (!max.isNull()) + hyperrectangle[idx].right = std::move(max); } return hyperrectangle; diff --git a/tests/queries/0_stateless/02786_parquet_big_integer_compatibility.reference b/tests/queries/0_stateless/02786_parquet_big_integer_compatibility.reference index 7764974255b..877bb5f390f 100644 --- a/tests/queries/0_stateless/02786_parquet_big_integer_compatibility.reference +++ b/tests/queries/0_stateless/02786_parquet_big_integer_compatibility.reference @@ -1 +1,2 @@ 424242424242424242424242424242424242424242424242424242 +22707864971053448441042714569797161695738549521977760418632926980540162388532 diff --git a/tests/queries/0_stateless/02786_parquet_big_integer_compatibility.sh b/tests/queries/0_stateless/02786_parquet_big_integer_compatibility.sh index 8865b2e7aab..0f590027f19 100755 --- a/tests/queries/0_stateless/02786_parquet_big_integer_compatibility.sh +++ b/tests/queries/0_stateless/02786_parquet_big_integer_compatibility.sh @@ -5,5 +5,8 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh +# This is parsed as text. $CLICKHOUSE_LOCAL -q "select toString(424242424242424242424242424242424242424242424242424242::UInt256) as x format Parquet" | $CLICKHOUSE_LOCAL --input-format=Parquet --structure='x UInt256' -q "select * from table" +# But this is parsed as binary because text length happens to be 32 bytes. Not ideal. +$CLICKHOUSE_LOCAL -q "select toString(42424242424242424242424242424242::UInt256) as x format Parquet" | $CLICKHOUSE_LOCAL --input-format=Parquet --structure='x UInt256' -q "select * from table" diff --git a/tests/queries/0_stateless/02841_parquet_filter_pushdown.reference b/tests/queries/0_stateless/02841_parquet_filter_pushdown.reference index 4adf418bcc7..8003b9cb626 100644 --- a/tests/queries/0_stateless/02841_parquet_filter_pushdown.reference +++ b/tests/queries/0_stateless/02841_parquet_filter_pushdown.reference @@ -71,3 +71,5 @@ d256 Nullable(Decimal(76, 40)) 500 244750 500 244750 500 244750 +42 +100 diff --git a/tests/queries/0_stateless/02841_parquet_filter_pushdown.sql b/tests/queries/0_stateless/02841_parquet_filter_pushdown.sql index 950485d53f0..52caee50b32 100644 --- a/tests/queries/0_stateless/02841_parquet_filter_pushdown.sql +++ b/tests/queries/0_stateless/02841_parquet_filter_pushdown.sql @@ -131,3 +131,9 @@ select count(), sum(number) from file('02841.parquet', Parquet, 'number UInt64, select count(), sum(number) from file('02841.parquet') where indexHint(string_or_null == ''); -- quirk with infinities select count(), sum(number) from file('02841.parquet', Parquet, 'number UInt64, string_or_null String') where indexHint(string_or_null == ''); select count(), sum(number) from file('02841.parquet', Parquet, 'number UInt64, nEgAtIvE_oR_nUlL Int64') where indexHint(nEgAtIvE_oR_nUlL > -50) settings input_format_parquet_case_insensitive_column_matching = 1; + +-- Bad type conversions. +insert into function file('02841.parquet') select 42 as x; +select * from file('02841.parquet', Parquet, 'x Nullable(String)') where x not in (1); +insert into function file('t.parquet', Parquet, 'x String') values ('1'), ('100'), ('2'); +select * from file('t.parquet', Parquet, 'x Int64') where x >= 3; From 5b98e2f4b5c03813e19eb67db0989f44a553767d Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 23 Aug 2024 15:14:09 +0000 Subject: [PATCH 089/121] Backport #68715 to 24.7: Turn off fault injection for insert in `01396_inactive_replica_cleanup_nodes_zookeeper` --- .../01396_inactive_replica_cleanup_nodes_zookeeper.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh b/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh index bff85b3e29f..80e9253af2c 100755 --- a/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh +++ b/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh @@ -23,11 +23,11 @@ $CLICKHOUSE_CLIENT -n --query " DETACH TABLE r2; " -$CLICKHOUSE_CLIENT --max_block_size 1 --min_insert_block_size_rows 1 --min_insert_block_size_bytes 1 --max_insert_threads 16 --query "INSERT INTO r1 SELECT * FROM numbers_mt(${SCALE})" +# insert_keeper_fault_injection_probability=0 -- can slowdown insert a lot (produce a lot of parts) +$CLICKHOUSE_CLIENT --insert_keeper_fault_injection_probability=0 --max_block_size 1 --min_insert_block_size_rows 1 --min_insert_block_size_bytes 1 --max_insert_threads 16 --query "INSERT INTO r1 SELECT * FROM numbers_mt(${SCALE})" # Now wait for cleanup thread - for _ in {1..60}; do $CLICKHOUSE_CLIENT --query "SYSTEM FLUSH LOGS" [[ $($CLICKHOUSE_CLIENT --query "SELECT sum(toUInt32(extract(message, 'Removed (\d+) old log entries'))) FROM system.text_log WHERE event_date >= yesterday() AND logger_name LIKE '%' || '$CLICKHOUSE_DATABASE' || '%r1%(ReplicatedMergeTreeCleanupThread)%' AND message LIKE '%Removed % old log entries%'") -gt $((SCALE - 10)) ]] && break; From f7cd42e4f93d756a016cb21ad6567e3d9872fef9 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sat, 24 Aug 2024 13:13:09 +0000 Subject: [PATCH 090/121] Backport #68681 to 24.7: Fix ColumnVariant permutation --- src/Columns/ColumnVariant.cpp | 14 ++++++-- .../03228_variant_permutation_issue.reference | 8 +++++ .../03228_variant_permutation_issue.sql | 33 +++++++++++++++++++ 3 files changed, 52 insertions(+), 3 deletions(-) create mode 100644 tests/queries/0_stateless/03228_variant_permutation_issue.reference create mode 100644 tests/queries/0_stateless/03228_variant_permutation_issue.sql diff --git a/src/Columns/ColumnVariant.cpp b/src/Columns/ColumnVariant.cpp index de7efb41d19..47158758e1f 100644 --- a/src/Columns/ColumnVariant.cpp +++ b/src/Columns/ColumnVariant.cpp @@ -942,7 +942,7 @@ ColumnPtr ColumnVariant::index(const IColumn & indexes, size_t limit) const { /// If we have only NULLs, index will take no effect, just return resized column. if (hasOnlyNulls()) - return cloneResized(limit); + return cloneResized(limit == 0 ? indexes.size(): limit); /// Optimization when we have only one non empty variant and no NULLs. /// In this case local_discriminators column is filled with identical values and offsets column @@ -998,8 +998,16 @@ ColumnPtr ColumnVariant::indexImpl(const PaddedPODArray & indexes, size_t new_variants.reserve(num_variants); for (size_t i = 0; i != num_variants; ++i) { - size_t nested_limit = nested_perms[i].size() == variants[i]->size() ? 0 : nested_perms[i].size(); - new_variants.emplace_back(variants[i]->permute(nested_perms[i], nested_limit)); + /// Check if no values from this variant were selected. + if (nested_perms[i].empty()) + { + new_variants.emplace_back(variants[i]->cloneEmpty()); + } + else + { + size_t nested_limit = nested_perms[i].size() == variants[i]->size() ? 0 : nested_perms[i].size(); + new_variants.emplace_back(variants[i]->permute(nested_perms[i], nested_limit)); + } } /// We cannot use new_offsets column as an offset column, because it became invalid after variants permutation. diff --git a/tests/queries/0_stateless/03228_variant_permutation_issue.reference b/tests/queries/0_stateless/03228_variant_permutation_issue.reference new file mode 100644 index 00000000000..be9cdedaf07 --- /dev/null +++ b/tests/queries/0_stateless/03228_variant_permutation_issue.reference @@ -0,0 +1,8 @@ +2 {"foo2":"bar"} 1 +3 {"foo2":"bar"} 1 +2 {"foo2":"baz"} 2 +3 {"foo2":"bar"} 1 +2 {"foo2":"bar"} 1 +3 {"foo2":"bar"} 1 +2 {"foo2":"baz"} 2 +3 {"foo2":"bar"} 1 diff --git a/tests/queries/0_stateless/03228_variant_permutation_issue.sql b/tests/queries/0_stateless/03228_variant_permutation_issue.sql new file mode 100644 index 00000000000..81eb2ed69af --- /dev/null +++ b/tests/queries/0_stateless/03228_variant_permutation_issue.sql @@ -0,0 +1,33 @@ +SET allow_experimental_json_type = 1; + +DROP TABLE IF EXISTS test_new_json_type; +CREATE TABLE test_new_json_type(id UInt32, data JSON, version UInt64) ENGINE=ReplacingMergeTree(version) ORDER BY id; +INSERT INTO test_new_json_type format JSONEachRow +{"id":1,"data":{"foo1":"bar"},"version":1} +{"id":2,"data":{"foo2":"bar"},"version":1} +{"id":3,"data":{"foo2":"bar"},"version":1} +; + +SELECT * FROM test_new_json_type FINAL WHERE data.foo2 is not null ORDER BY id; + +INSERT INTO test_new_json_type SELECT id, '{"foo2":"baz"}' AS _data, version+1 AS _version FROM test_new_json_type where id=2; + +SELECT * FROM test_new_json_type FINAL WHERE data.foo2 is not null ORDER BY id; + +DROP TABLE test_new_json_type; + +CREATE TABLE test_new_json_type(id Nullable(UInt32), data JSON, version UInt64) ENGINE=ReplacingMergeTree(version) ORDER BY id settings allow_nullable_key=1; +INSERT INTO test_new_json_type format JSONEachRow +{"id":1,"data":{"foo1":"bar"},"version":1} +{"id":2,"data":{"foo2":"bar"},"version":1} +{"id":3,"data":{"foo2":"bar"},"version":1} +; + +SELECT * FROM test_new_json_type FINAL WHERE data.foo2 is not null ORDER BY id; + +INSERT INTO test_new_json_type SELECT id, '{"foo2":"baz"}' AS _data, version+1 AS _version FROM test_new_json_type where id=2; + +SELECT * FROM test_new_json_type FINAL PREWHERE data.foo2 IS NOT NULL WHERE data.foo2 IS NOT NULL ORDER BY id ASC NULLS FIRST; + +DROP TABLE test_new_json_type; + From ec22243eb10211851a8fe23a5fed07aeaea45d75 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Mon, 26 Aug 2024 13:32:00 +0200 Subject: [PATCH 091/121] Update 03228_variant_permutation_issue.sql --- .../03228_variant_permutation_issue.sql | 32 ------------------- 1 file changed, 32 deletions(-) diff --git a/tests/queries/0_stateless/03228_variant_permutation_issue.sql b/tests/queries/0_stateless/03228_variant_permutation_issue.sql index 81eb2ed69af..8b137891791 100644 --- a/tests/queries/0_stateless/03228_variant_permutation_issue.sql +++ b/tests/queries/0_stateless/03228_variant_permutation_issue.sql @@ -1,33 +1 @@ -SET allow_experimental_json_type = 1; - -DROP TABLE IF EXISTS test_new_json_type; -CREATE TABLE test_new_json_type(id UInt32, data JSON, version UInt64) ENGINE=ReplacingMergeTree(version) ORDER BY id; -INSERT INTO test_new_json_type format JSONEachRow -{"id":1,"data":{"foo1":"bar"},"version":1} -{"id":2,"data":{"foo2":"bar"},"version":1} -{"id":3,"data":{"foo2":"bar"},"version":1} -; - -SELECT * FROM test_new_json_type FINAL WHERE data.foo2 is not null ORDER BY id; - -INSERT INTO test_new_json_type SELECT id, '{"foo2":"baz"}' AS _data, version+1 AS _version FROM test_new_json_type where id=2; - -SELECT * FROM test_new_json_type FINAL WHERE data.foo2 is not null ORDER BY id; - -DROP TABLE test_new_json_type; - -CREATE TABLE test_new_json_type(id Nullable(UInt32), data JSON, version UInt64) ENGINE=ReplacingMergeTree(version) ORDER BY id settings allow_nullable_key=1; -INSERT INTO test_new_json_type format JSONEachRow -{"id":1,"data":{"foo1":"bar"},"version":1} -{"id":2,"data":{"foo2":"bar"},"version":1} -{"id":3,"data":{"foo2":"bar"},"version":1} -; - -SELECT * FROM test_new_json_type FINAL WHERE data.foo2 is not null ORDER BY id; - -INSERT INTO test_new_json_type SELECT id, '{"foo2":"baz"}' AS _data, version+1 AS _version FROM test_new_json_type where id=2; - -SELECT * FROM test_new_json_type FINAL PREWHERE data.foo2 IS NOT NULL WHERE data.foo2 IS NOT NULL ORDER BY id ASC NULLS FIRST; - -DROP TABLE test_new_json_type; From c7c3222dabc29fe8355c72a53936a642348edf8d Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Mon, 26 Aug 2024 13:32:09 +0200 Subject: [PATCH 092/121] Update 03228_variant_permutation_issue.reference --- .../03228_variant_permutation_issue.reference | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/tests/queries/0_stateless/03228_variant_permutation_issue.reference b/tests/queries/0_stateless/03228_variant_permutation_issue.reference index be9cdedaf07..8b137891791 100644 --- a/tests/queries/0_stateless/03228_variant_permutation_issue.reference +++ b/tests/queries/0_stateless/03228_variant_permutation_issue.reference @@ -1,8 +1 @@ -2 {"foo2":"bar"} 1 -3 {"foo2":"bar"} 1 -2 {"foo2":"baz"} 2 -3 {"foo2":"bar"} 1 -2 {"foo2":"bar"} 1 -3 {"foo2":"bar"} 1 -2 {"foo2":"baz"} 2 -3 {"foo2":"bar"} 1 + From f778c099fe2efa30ec0abac8c7ce97f60b238532 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 26 Aug 2024 12:08:50 +0000 Subject: [PATCH 093/121] Backport #68323 to 24.7: Fix small value DateTime64 constant folding in nested subquery for remote --- src/Analyzer/ConstantNode.cpp | 22 +++++++++- ...222_datetime64_small_value_const.reference | 18 ++++++++ .../03222_datetime64_small_value_const.sql | 44 +++++++++++++++++++ 3 files changed, 83 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/03222_datetime64_small_value_const.reference create mode 100644 tests/queries/0_stateless/03222_datetime64_small_value_const.sql diff --git a/src/Analyzer/ConstantNode.cpp b/src/Analyzer/ConstantNode.cpp index 46c1f7fb1ed..8fc63aec79e 100644 --- a/src/Analyzer/ConstantNode.cpp +++ b/src/Analyzer/ConstantNode.cpp @@ -162,6 +162,7 @@ QueryTreeNodePtr ConstantNode::cloneImpl() const ASTPtr ConstantNode::toASTImpl(const ConvertToASTOptions & options) const { const auto & constant_value_literal = constant_value->getValue(); + const auto & constant_value_type = constant_value->getType(); auto constant_value_ast = std::make_shared(constant_value_literal); if (!options.add_cast_for_constants) @@ -169,7 +170,26 @@ ASTPtr ConstantNode::toASTImpl(const ConvertToASTOptions & options) const if (requiresCastCall()) { - auto constant_type_name_ast = std::make_shared(constant_value->getType()->getName()); + /** Value for DateTime64 is Decimal64, which is serialized as a string literal. + * If we serialize it as is, DateTime64 would be parsed from that string literal, which can be incorrect. + * For example, DateTime64 cannot be parsed from the short value, like '1', while it's a valid Decimal64 value. + * It could also lead to ambiguous parsing because we don't know if the string literal represents a date or a Decimal64 literal. + * For this reason, we use a string literal representing a date instead of a Decimal64 literal. + */ + const auto & constant_value_end_type = removeNullable(constant_value_type); /// if Nullable + if (WhichDataType(constant_value_end_type->getTypeId()).isDateTime64()) + { + const auto * date_time_type = typeid_cast(constant_value_end_type.get()); + DecimalField decimal_value; + if (constant_value_literal.tryGet>(decimal_value)) + { + WriteBufferFromOwnString ostr; + writeDateTimeText(decimal_value.getValue(), date_time_type->getScale(), ostr, date_time_type->getTimeZone()); + constant_value_ast = std::make_shared(ostr.str()); + } + } + + auto constant_type_name_ast = std::make_shared(constant_value_type->getName()); return makeASTFunction("_CAST", std::move(constant_value_ast), std::move(constant_type_name_ast)); } diff --git a/tests/queries/0_stateless/03222_datetime64_small_value_const.reference b/tests/queries/0_stateless/03222_datetime64_small_value_const.reference new file mode 100644 index 00000000000..ae36c08acc5 --- /dev/null +++ b/tests/queries/0_stateless/03222_datetime64_small_value_const.reference @@ -0,0 +1,18 @@ +0 1970-01-01 00:00:00.000 +0 1970-01-01 00:00:05.000 +0 1970-01-01 00:45:25.456789 +0 1970-01-01 00:53:25.456789123 +0 \N +1 1970-01-01 00:00:00.000 +5 1970-01-01 00:00:00.000 +2 1970-01-01 00:00:02.456 +3 1970-01-01 00:00:04.811 +4 1970-01-01 00:10:05.000 +4 1970-01-01 00:10:05.000 +1 1970-01-01 00:00:00.000 +2 1970-01-01 00:00:02.456 +3 1970-01-01 00:00:04.811 +5 1970-01-01 00:00:00.000 +0 +0 +5 diff --git a/tests/queries/0_stateless/03222_datetime64_small_value_const.sql b/tests/queries/0_stateless/03222_datetime64_small_value_const.sql new file mode 100644 index 00000000000..a64ebd38ccf --- /dev/null +++ b/tests/queries/0_stateless/03222_datetime64_small_value_const.sql @@ -0,0 +1,44 @@ +-- Tags: shard +set session_timezone = 'UTC'; -- don't randomize the session timezone +SET allow_experimental_analyzer = 1; + +select *, (select toDateTime64(0, 3)) from remote('127.0.0.1', system.one) settings prefer_localhost_replica=0; +select *, (select toDateTime64(5, 3)) from remote('127.0.0.1', system.one) settings prefer_localhost_replica=0; +select *, (select toDateTime64('1970-01-01 00:45:25.456789', 6)) from remote('127.0.0.1', system.one) settings prefer_localhost_replica=0; +select *, (select toDateTime64('1970-01-01 00:53:25.456789123', 9)) from remote('127.0.0.1', system.one) settings prefer_localhost_replica=0; +select *, (select toDateTime64(null,3)) from remote('127.0.0.1', system.one) settings prefer_localhost_replica=0; + +create database if not exists shard_0; +create database if not exists shard_1; + +drop table if exists shard_0.dt64_03222; +drop table if exists shard_1.dt64_03222; +drop table if exists distr_03222_dt64; + +create table shard_0.dt64_03222(id UInt64, dt DateTime64(3)) engine = MergeTree order by id; +create table shard_1.dt64_03222(id UInt64, dt DateTime64(3)) engine = MergeTree order by id; +create table distr_03222_dt64 (id UInt64, dt DateTime64(3)) engine = Distributed(test_cluster_two_shards_different_databases, '', dt64_03222); + +insert into shard_0.dt64_03222 values(1, toDateTime64('1970-01-01 00:00:00.000',3)); +insert into shard_0.dt64_03222 values(2, toDateTime64('1970-01-01 00:00:02.456',3)); +insert into shard_1.dt64_03222 values(3, toDateTime64('1970-01-01 00:00:04.811',3)); +insert into shard_1.dt64_03222 values(4, toDateTime64('1970-01-01 00:10:05',3)); +insert into shard_1.dt64_03222 values(5, toDateTime64(0,3)); + +--Output : 1,5 2,3,4 4 1,2,3,5 0 0 5 +select id, dt from distr_03222_dt64 where dt = (select toDateTime64(0,3)) order by id; +select id, dt from distr_03222_dt64 where dt > (select toDateTime64(0,3)) order by id; +select id, dt from distr_03222_dt64 where dt > (select toDateTime64('1970-01-01 00:10:00.000',3)) order by id; +select id, dt from distr_03222_dt64 where dt < (select toDateTime64(5,3)) order by id; + +select count(*) from distr_03222_dt64 where dt > (select toDateTime64('2024-07-20 00:00:00',3)); +select count(*) from distr_03222_dt64 where dt > (select now()); +select count(*) from distr_03222_dt64 where dt < (select toDateTime64('2004-07-20 00:00:00',3)); + + +drop table if exists shard_0.dt64_03222; +drop table if exists shard_1.dt64_03222; +drop table if exists distr_03222_dt64; + +drop database shard_0; +drop database shard_1; From 999f6521eef30f6d09bf43fa9696563e602c5369 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 26 Aug 2024 19:12:59 +0000 Subject: [PATCH 094/121] Backport #68836 to 24.7: Fix complex types metadata parsing in DeltaLake --- .../test/integration/runner/requirements.txt | 2 + .../DataLakes/DeltaLakeMetadata.cpp | 13 +-- tests/integration/test_storage_delta/test.py | 96 +++++++++++++++++++ 3 files changed, 105 insertions(+), 6 deletions(-) diff --git a/docker/test/integration/runner/requirements.txt b/docker/test/integration/runner/requirements.txt index 8a77d8abf77..7ca15ea86b0 100644 --- a/docker/test/integration/runner/requirements.txt +++ b/docker/test/integration/runner/requirements.txt @@ -111,3 +111,5 @@ wadllib==1.3.6 websocket-client==0.59.0 wheel==0.37.1 zipp==1.0.0 +deltalake==0.16.0 + diff --git a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp index c896a760597..14b18809a0d 100644 --- a/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp +++ b/src/Storages/ObjectStorage/DataLakes/DeltaLakeMetadata.cpp @@ -422,8 +422,9 @@ struct DeltaLakeMetadataImpl { auto field = fields->getObject(static_cast(i)); element_names.push_back(field->getValue("name")); - auto required = field->getValue("required"); - element_types.push_back(getFieldType(field, "type", required)); + + auto is_nullable = field->getValue("nullable"); + element_types.push_back(getFieldType(field, "type", is_nullable)); } return std::make_shared(element_types, element_names); @@ -431,16 +432,16 @@ struct DeltaLakeMetadataImpl if (type_name == "array") { - bool is_nullable = type->getValue("containsNull"); - auto element_type = getFieldType(type, "elementType", is_nullable); + bool element_nullable = type->getValue("containsNull"); + auto element_type = getFieldType(type, "elementType", element_nullable); return std::make_shared(element_type); } if (type_name == "map") { - bool is_nullable = type->getValue("containsNull"); auto key_type = getFieldType(type, "keyType", /* is_nullable */false); - auto value_type = getFieldType(type, "valueType", is_nullable); + bool value_nullable = type->getValue("valueContainsNull"); + auto value_type = getFieldType(type, "valueType", value_nullable); return std::make_shared(key_type, value_type); } diff --git a/tests/integration/test_storage_delta/test.py b/tests/integration/test_storage_delta/test.py index d3dd7cfe52a..ff2a4614e49 100644 --- a/tests/integration/test_storage_delta/test.py +++ b/tests/integration/test_storage_delta/test.py @@ -27,6 +27,9 @@ from datetime import datetime from pyspark.sql.functions import monotonically_increasing_id, row_number from pyspark.sql.window import Window from minio.deleteobjects import DeleteObject +import pyarrow as pa +import pyarrow.parquet as pq +from deltalake.writer import write_deltalake from helpers.s3_tools import ( prepare_s3_bucket, @@ -709,3 +712,96 @@ SELECT * FROM deltaLake('http://{started_cluster.minio_ip}:{started_cluster.mini ) == 1 ) + + +def test_complex_types(started_cluster): + node = started_cluster.instances["node1"] + minio_client = started_cluster.minio_client + bucket = started_cluster.minio_bucket + + schema = pa.schema( + [ + ("id", pa.int32()), + ("name", pa.string()), + ( + "address", + pa.struct( + [ + ("street", pa.string()), + ("city", pa.string()), + ("state", pa.string()), + ] + ), + ), + ("interests", pa.list_(pa.string())), + ( + "metadata", + pa.map_( + pa.string(), pa.string() + ), # Map with string keys and string values + ), + ] + ) + + # Create sample data + data = [ + pa.array([1, 2, 3], type=pa.int32()), + pa.array(["John Doe", "Jane Smith", "Jake Johnson"], type=pa.string()), + pa.array( + [ + {"street": "123 Elm St", "city": "Springfield", "state": "IL"}, + {"street": "456 Maple St", "city": "Shelbyville", "state": "IL"}, + {"street": "789 Oak St", "city": "Ogdenville", "state": "IL"}, + ], + type=schema.field("address").type, + ), + pa.array( + [ + pa.array(["dancing", "coding", "hiking"]), + pa.array(["dancing", "coding", "hiking"]), + pa.array(["dancing", "coding", "hiking"]), + ], + type=schema.field("interests").type, + ), + pa.array( + [ + {"key1": "value1", "key2": "value2"}, + {"key1": "value3", "key2": "value4"}, + {"key1": "value5", "key2": "value6"}, + ], + type=schema.field("metadata").type, + ), + ] + + endpoint_url = f"http://{started_cluster.minio_ip}:{started_cluster.minio_port}" + aws_access_key_id = "minio" + aws_secret_access_key = "minio123" + table_name = randomize_table_name("test_complex_types") + + storage_options = { + "AWS_ENDPOINT_URL": endpoint_url, + "AWS_ACCESS_KEY_ID": aws_access_key_id, + "AWS_SECRET_ACCESS_KEY": aws_secret_access_key, + "AWS_ALLOW_HTTP": "true", + "AWS_S3_ALLOW_UNSAFE_RENAME": "true", + } + path = f"s3://root/{table_name}" + table = pa.Table.from_arrays(data, schema=schema) + + write_deltalake(path, table, storage_options=storage_options) + + assert "1\n2\n3\n" in node.query( + f"SELECT id FROM deltaLake('http://{started_cluster.minio_ip}:{started_cluster.minio_port}/root/{table_name}' , 'minio', 'minio123')" + ) + assert ( + "('123 Elm St','Springfield','IL')\n('456 Maple St','Shelbyville','IL')\n('789 Oak St','Ogdenville','IL')" + in node.query( + f"SELECT address FROM deltaLake('http://{started_cluster.minio_ip}:{started_cluster.minio_port}/root/{table_name}' , 'minio', 'minio123')" + ) + ) + assert ( + "{'key1':'value1','key2':'value2'}\n{'key1':'value3','key2':'value4'}\n{'key1':'value5','key2':'value6'}" + in node.query( + f"SELECT metadata FROM deltaLake('http://{started_cluster.minio_ip}:{started_cluster.minio_port}/root/{table_name}' , 'minio', 'minio123')" + ) + ) From 692f69d919e651911d9470342140fb9354e4f58c Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 27 Aug 2024 11:09:34 +0000 Subject: [PATCH 095/121] Backport #68897 to 24.7: Revert "Fix prewhere without columns and without adaptive index granularity (almost w/o anything)" --- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 11 ---- .../MergeTree/MergeTreeDataPartWriterWide.cpp | 12 +---- .../02967_prewhere_no_columns.reference | 2 - .../0_stateless/02967_prewhere_no_columns.sql | 51 ------------------- ...n_adaptive_granularity_no_errors.reference | 2 - ...198_non_adaptive_granularity_no_errors.sql | 12 ----- .../00166_explain_estimate.reference | 4 +- 7 files changed, 3 insertions(+), 91 deletions(-) delete mode 100644 tests/queries/0_stateless/02967_prewhere_no_columns.reference delete mode 100644 tests/queries/0_stateless/02967_prewhere_no_columns.sql delete mode 100644 tests/queries/0_stateless/03198_non_adaptive_granularity_no_errors.reference delete mode 100644 tests/queries/0_stateless/03198_non_adaptive_granularity_no_errors.sql diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index c2f87018872..71f37d01a5f 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -1311,17 +1311,6 @@ void IMergeTreeDataPart::loadRowsCount() auto buf = metadata_manager->read("count.txt"); readIntText(rows_count, *buf); assertEOF(*buf); - - if (!index_granularity.empty() && rows_count < index_granularity.getTotalRows() && index_granularity_info.fixed_index_granularity) - { - /// Adjust last granule size to match the number of rows in the part in case of fixed index_granularity. - index_granularity.popMark(); - index_granularity.appendMark(rows_count % index_granularity_info.fixed_index_granularity); - if (rows_count != index_granularity.getTotalRows()) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Index granularity total rows in part {} does not match rows_count: {}, instead of {}", - name, index_granularity.getTotalRows(), rows_count); - } }; if (index_granularity.empty()) diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp index 3fbabe1dd52..43c0afa77e6 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp @@ -561,10 +561,7 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const NameAndTypePai if (index_granularity_rows != index_granularity.getMarkRows(mark_num)) { - /// With fixed granularity we can have last mark with less rows than granularity - const bool is_last_mark = (mark_num + 1 == index_granularity.getMarksCount()); - if (!index_granularity_info.fixed_index_granularity || !is_last_mark) - throw Exception( + throw Exception( ErrorCodes::LOGICAL_ERROR, "Incorrect mark rows for part {} for mark #{}" " (compressed offset {}, decompressed offset {}), in-memory {}, on disk {}, total marks {}", @@ -828,14 +825,7 @@ void MergeTreeDataPartWriterWide::adjustLastMarkIfNeedAndFlushToDisk(size_t new_ /// Without offset rows_written_in_last_mark = 0; } - - if (compute_granularity) - { - index_granularity.popMark(); - index_granularity.appendMark(new_rows_in_last_mark); - } } - } } diff --git a/tests/queries/0_stateless/02967_prewhere_no_columns.reference b/tests/queries/0_stateless/02967_prewhere_no_columns.reference deleted file mode 100644 index df105254618..00000000000 --- a/tests/queries/0_stateless/02967_prewhere_no_columns.reference +++ /dev/null @@ -1,2 +0,0 @@ -105 -105 diff --git a/tests/queries/0_stateless/02967_prewhere_no_columns.sql b/tests/queries/0_stateless/02967_prewhere_no_columns.sql deleted file mode 100644 index efcc952caa2..00000000000 --- a/tests/queries/0_stateless/02967_prewhere_no_columns.sql +++ /dev/null @@ -1,51 +0,0 @@ -CREATE TABLE t_02967 -( - `key` Date, - `value` UInt16 -) -ENGINE = MergeTree -ORDER BY key -SETTINGS - index_granularity_bytes = 0 --8192 --, min_index_granularity_bytes = 2 - , index_granularity = 100 - , min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0 --- --- , min_bytes_for_wide_part = 2 -AS SELECT - number, - repeat(toString(number), 5) -FROM numbers(105.); - - - --- Check with newly inserted data part. It's in-memory structured are filled at insert time. -SELECT - count(ignore(*)) -FROM t_02967 -PREWHERE CAST(ignore() + 1 as UInt8) -GROUP BY - ignore(65535, *), - ignore(255, 256, *) -SETTINGS - --send_logs_level='test', - max_threads=1; - - - --- Reload part form disk to check that in-meory structures where properly serilaized-deserialized -DETACH TABLE t_02967; -ATTACH TABLE t_02967; - - -SELECT - count(ignore(*)) -FROM t_02967 -PREWHERE CAST(ignore() + 1 as UInt8) -GROUP BY - ignore(65535, *), - ignore(255, 256, *) -SETTINGS - --send_logs_level='test', - max_threads=1; - -DROP TABLE t_02967; diff --git a/tests/queries/0_stateless/03198_non_adaptive_granularity_no_errors.reference b/tests/queries/0_stateless/03198_non_adaptive_granularity_no_errors.reference deleted file mode 100644 index fcd78da1283..00000000000 --- a/tests/queries/0_stateless/03198_non_adaptive_granularity_no_errors.reference +++ /dev/null @@ -1,2 +0,0 @@ -1000000 -1000000 diff --git a/tests/queries/0_stateless/03198_non_adaptive_granularity_no_errors.sql b/tests/queries/0_stateless/03198_non_adaptive_granularity_no_errors.sql deleted file mode 100644 index 25798ef6d33..00000000000 --- a/tests/queries/0_stateless/03198_non_adaptive_granularity_no_errors.sql +++ /dev/null @@ -1,12 +0,0 @@ -DROP TABLE IF EXISTS data_02051__fuzz_24; - -CREATE TABLE data_02051__fuzz_24 (`key` Int16, `value` String) ENGINE = MergeTree ORDER BY key SETTINGS index_granularity_bytes = 0, min_rows_for_wide_part = 0, min_bytes_for_wide_part=0 AS SELECT number, repeat(toString(number), 5) FROM numbers(1000000.); - -SELECT count(ignore(*)) FROM data_02051__fuzz_24 PREWHERE materialize(1) GROUP BY ignore(*); - -detach table data_02051__fuzz_24; -attach table data_02051__fuzz_24; - -SELECT count(ignore(*)) FROM data_02051__fuzz_24 PREWHERE materialize(1) GROUP BY ignore(*); - -DROP TABLE data_02051__fuzz_24; diff --git a/tests/queries/1_stateful/00166_explain_estimate.reference b/tests/queries/1_stateful/00166_explain_estimate.reference index 85ecd0b9a71..71ddd681581 100644 --- a/tests/queries/1_stateful/00166_explain_estimate.reference +++ b/tests/queries/1_stateful/00166_explain_estimate.reference @@ -1,5 +1,5 @@ test hits 1 57344 7 -test hits 1 8832938 1079 -test hits 1 829354 102 +test hits 1 8839168 1079 +test hits 1 835584 102 test hits 1 8003584 977 test hits 2 581632 71 From 6d69cd1f84d91767ead43e521cb2322bb78e8372 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Tue, 27 Aug 2024 14:53:42 +0200 Subject: [PATCH 096/121] Update 03228_variant_permutation_issue.sql --- .../0_stateless/03228_variant_permutation_issue.sql | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/queries/0_stateless/03228_variant_permutation_issue.sql b/tests/queries/0_stateless/03228_variant_permutation_issue.sql index 8b137891791..d6d58ffafd0 100644 --- a/tests/queries/0_stateless/03228_variant_permutation_issue.sql +++ b/tests/queries/0_stateless/03228_variant_permutation_issue.sql @@ -1 +1,9 @@ +SET allow_experimental_variant_type = 1; +select 'test'; +drop table if exists test; +create table test (id UInt32, data Variant(String), version UInt64) engine=ReplacingMergeTree(version) order by id; +insert into test values (1, NULL, 1), (2, 'bar', 1), (3, 'bar', 1); +insert into test select id, 'baz' as _data, version+1 as _version from test where id=2; +select * from test final WHERE data is not null format Null; +drop table test; From 529c9e2831b88a3be07182db223fb926f2828dd1 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Tue, 27 Aug 2024 14:54:23 +0200 Subject: [PATCH 097/121] Update 03228_variant_permutation_issue.sql --- tests/queries/0_stateless/03228_variant_permutation_issue.sql | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/queries/0_stateless/03228_variant_permutation_issue.sql b/tests/queries/0_stateless/03228_variant_permutation_issue.sql index d6d58ffafd0..d5c84a8b7db 100644 --- a/tests/queries/0_stateless/03228_variant_permutation_issue.sql +++ b/tests/queries/0_stateless/03228_variant_permutation_issue.sql @@ -1,6 +1,5 @@ SET allow_experimental_variant_type = 1; -select 'test'; drop table if exists test; create table test (id UInt32, data Variant(String), version UInt64) engine=ReplacingMergeTree(version) order by id; insert into test values (1, NULL, 1), (2, 'bar', 1), (3, 'bar', 1); From 8d66d74d4efbe774e3fbdfe49aa236f41d309acb Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 27 Aug 2024 14:06:43 +0000 Subject: [PATCH 098/121] Backport #68752 to 24.7: Fix 2477 timeout --- .../02477_projection_materialize_and_zero_copy.sql | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02477_projection_materialize_and_zero_copy.sql b/tests/queries/0_stateless/02477_projection_materialize_and_zero_copy.sql index 1845919890c..0597ac10cd7 100644 --- a/tests/queries/0_stateless/02477_projection_materialize_and_zero_copy.sql +++ b/tests/queries/0_stateless/02477_projection_materialize_and_zero_copy.sql @@ -1,9 +1,11 @@ +-- Tags: long, no-parallel + DROP TABLE IF EXISTS t; create table t (c1 Int64, c2 String, c3 DateTime, c4 Int8, c5 String, c6 String, c7 String, c8 String, c9 String, c10 String, c11 String, c12 String, c13 Int8, c14 Int64, c15 String, c16 String, c17 String, c18 Int64, c19 Int64, c20 Int64) engine ReplicatedMergeTree('/clickhouse/test/{database}/test_02477', '1') order by c18 -SETTINGS allow_remote_fs_zero_copy_replication=1, index_granularity=8092, index_granularity_bytes='10Mi'; +SETTINGS allow_remote_fs_zero_copy_replication=1, index_granularity=8192, index_granularity_bytes='10Mi'; -insert into t (c1, c18) select number, -number from numbers(2000000); +insert into t (c1, c18) select number, -number from numbers(500000); alter table t add projection p_norm (select * order by c1); From 9d2140bea4cdddc2cc4c3e474a4c16f425425010 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 27 Aug 2024 16:11:08 +0000 Subject: [PATCH 099/121] Backport #67583 to 24.7: Fix the upper bound of function fromModifiedJulianDay() --- docs/en/sql-reference/functions/date-time-functions.md | 2 +- docs/zh/sql-reference/functions/date-time-functions.md | 2 +- src/Functions/GregorianDate.cpp | 4 ++-- .../queries/0_stateless/01544_fromModifiedJulianDay.reference | 2 ++ tests/queries/0_stateless/01544_fromModifiedJulianDay.sql | 2 ++ 5 files changed, 8 insertions(+), 4 deletions(-) diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 4f5e5a5d716..3d95ae2cb74 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -4287,7 +4287,7 @@ Result: ## fromModifiedJulianDay -Converts a [Modified Julian Day](https://en.wikipedia.org/wiki/Julian_day#Variants) number to a [Proleptic Gregorian calendar](https://en.wikipedia.org/wiki/Proleptic_Gregorian_calendar) date in text form `YYYY-MM-DD`. This function supports day number from `-678941` to `2973119` (which represent 0000-01-01 and 9999-12-31 respectively). It raises an exception if the day number is outside of the supported range. +Converts a [Modified Julian Day](https://en.wikipedia.org/wiki/Julian_day#Variants) number to a [Proleptic Gregorian calendar](https://en.wikipedia.org/wiki/Proleptic_Gregorian_calendar) date in text form `YYYY-MM-DD`. This function supports day number from `-678941` to `2973483` (which represent 0000-01-01 and 9999-12-31 respectively). It raises an exception if the day number is outside of the supported range. **Syntax** diff --git a/docs/zh/sql-reference/functions/date-time-functions.md b/docs/zh/sql-reference/functions/date-time-functions.md index 18b9f3495c0..9fa2d79c655 100644 --- a/docs/zh/sql-reference/functions/date-time-functions.md +++ b/docs/zh/sql-reference/functions/date-time-functions.md @@ -1157,7 +1157,7 @@ SELECT toModifiedJulianDayOrNull('2020-01-01'); ## fromModifiedJulianDay {#frommodifiedjulianday} -将 [Modified Julian Day](https://en.wikipedia.org/wiki/Julian_day#Variants) 数字转换为 `YYYY-MM-DD` 文本格式的 [Proleptic Gregorian calendar](https://en.wikipedia.org/wiki/Proleptic_Gregorian_calendar) 日期。该函数支持从 `-678941` 到 `2973119` 的天数(分别代表 0000-01-01 和 9999-12-31)。如果天数超出支持范围,则会引发异常。 +将 [Modified Julian Day](https://en.wikipedia.org/wiki/Julian_day#Variants) 数字转换为 `YYYY-MM-DD` 文本格式的 [Proleptic Gregorian calendar](https://en.wikipedia.org/wiki/Proleptic_Gregorian_calendar) 日期。该函数支持从 `-678941` 到 `2973483` 的天数(分别代表 0000-01-01 和 9999-12-31)。如果天数超出支持范围,则会引发异常。 **语法** diff --git a/src/Functions/GregorianDate.cpp b/src/Functions/GregorianDate.cpp index 91861e8bbd2..82c81d2bb4f 100644 --- a/src/Functions/GregorianDate.cpp +++ b/src/Functions/GregorianDate.cpp @@ -284,12 +284,12 @@ void OrdinalDate::init(int64_t modified_julian_day) bool OrdinalDate::tryInit(int64_t modified_julian_day) { - /// This function supports day number from -678941 to 2973119 (which represent 0000-01-01 and 9999-12-31 respectively). + /// This function supports day number from -678941 to 2973483 (which represent 0000-01-01 and 9999-12-31 respectively). if (modified_julian_day < -678941) return false; - if (modified_julian_day > 2973119) + if (modified_julian_day > 2973483) return false; const auto a = modified_julian_day + 678575; diff --git a/tests/queries/0_stateless/01544_fromModifiedJulianDay.reference b/tests/queries/0_stateless/01544_fromModifiedJulianDay.reference index 443b90b80a5..4c6d75346e1 100644 --- a/tests/queries/0_stateless/01544_fromModifiedJulianDay.reference +++ b/tests/queries/0_stateless/01544_fromModifiedJulianDay.reference @@ -4,6 +4,8 @@ Invocation with constant 2020-11-01 \N \N +0000-01-01 +9999-12-31 or null 2020-11-01 \N diff --git a/tests/queries/0_stateless/01544_fromModifiedJulianDay.sql b/tests/queries/0_stateless/01544_fromModifiedJulianDay.sql index d405aa16f3f..6f0f08c363f 100644 --- a/tests/queries/0_stateless/01544_fromModifiedJulianDay.sql +++ b/tests/queries/0_stateless/01544_fromModifiedJulianDay.sql @@ -7,6 +7,8 @@ SELECT fromModifiedJulianDay(59154); SELECT fromModifiedJulianDay(NULL); SELECT fromModifiedJulianDay(CAST(NULL, 'Nullable(Int64)')); SELECT fromModifiedJulianDay(-678942); -- { serverError CANNOT_FORMAT_DATETIME } +SELECT fromModifiedJulianDay(-678941); +SELECT fromModifiedJulianDay(2973483); SELECT fromModifiedJulianDay(2973484); -- { serverError CANNOT_FORMAT_DATETIME } SELECT 'or null'; From af7e943d56ee96cea921605632468dc44074bfee Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 28 Aug 2024 13:47:36 +0200 Subject: [PATCH 100/121] Update ConstantNode.cpp --- src/Analyzer/ConstantNode.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Analyzer/ConstantNode.cpp b/src/Analyzer/ConstantNode.cpp index 8fc63aec79e..2afd4a26e2f 100644 --- a/src/Analyzer/ConstantNode.cpp +++ b/src/Analyzer/ConstantNode.cpp @@ -11,6 +11,7 @@ #include #include +#include #include #include From b1a012202fde1757f9d4c4bc33ff3fc169d4168b Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 28 Aug 2024 11:53:18 +0000 Subject: [PATCH 101/121] Fix test --- .../0_stateless/03228_variant_permutation_issue.reference | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/queries/0_stateless/03228_variant_permutation_issue.reference b/tests/queries/0_stateless/03228_variant_permutation_issue.reference index 8b137891791..e69de29bb2d 100644 --- a/tests/queries/0_stateless/03228_variant_permutation_issue.reference +++ b/tests/queries/0_stateless/03228_variant_permutation_issue.reference @@ -1 +0,0 @@ - From afcd8936ef2fd0358ca971c138ad2547c44f6ba7 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 28 Aug 2024 12:09:37 +0000 Subject: [PATCH 102/121] Backport #68298 to 24.7: Fix using schema_inference_make_columns_nullable=0 --- docs/en/interfaces/schema-inference.md | 7 ++--- .../operations/settings/settings-formats.md | 4 +-- src/Core/Settings.h | 2 +- src/Formats/FormatFactory.cpp | 2 +- src/Formats/FormatSettings.h | 2 +- src/Formats/SchemaInferenceUtils.cpp | 4 +++ src/Processors/Formats/ISchemaReader.cpp | 7 +---- .../Formats/Impl/ArrowBlockInputFormat.cpp | 7 +++-- .../Formats/Impl/ArrowColumnToCHColumn.cpp | 12 ++++++--- .../Formats/Impl/ArrowColumnToCHColumn.h | 3 ++- .../Impl/NativeORCBlockInputFormat.cpp | 2 +- .../Formats/Impl/ORCBlockInputFormat.cpp | 7 +++-- .../Formats/Impl/ParquetBlockInputFormat.cpp | 7 +++-- .../02497_schema_inference_nulls.sql | 6 ++--- ...02784_schema_inference_null_as_default.sql | 4 +-- .../03036_parquet_arrow_nullable.reference | 26 +++++++++++++++++++ .../03036_parquet_arrow_nullable.sh | 7 +++++ 17 files changed, 76 insertions(+), 33 deletions(-) diff --git a/docs/en/interfaces/schema-inference.md b/docs/en/interfaces/schema-inference.md index 05fae994cbe..5b3cd179e21 100644 --- a/docs/en/interfaces/schema-inference.md +++ b/docs/en/interfaces/schema-inference.md @@ -1385,7 +1385,7 @@ DESC format(JSONEachRow, '{"id" : 1, "age" : 25, "name" : "Josh", "status" : nul #### schema_inference_make_columns_nullable Controls making inferred types `Nullable` in schema inference for formats without information about nullability. -If the setting is enabled, all inferred type will be `Nullable`, if disabled, the inferred type will be `Nullable` only if `input_format_null_as_default` is disabled and the column contains `NULL` in a sample that is parsed during schema inference. +If the setting is enabled, all inferred type will be `Nullable`, if disabled, the inferred type will never be `Nullable`, if set to `auto`, the inferred type will be `Nullable` only if the column contains `NULL` in a sample that is parsed during schema inference or file metadata contains information about column nullability. Enabled by default. @@ -1408,15 +1408,13 @@ DESC format(JSONEachRow, $$ └─────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` ```sql -SET schema_inference_make_columns_nullable = 0; -SET input_format_null_as_default = 0; +SET schema_inference_make_columns_nullable = 'auto'; DESC format(JSONEachRow, $$ {"id" : 1, "age" : 25, "name" : "Josh", "status" : null, "hobbies" : ["football", "cooking"]} {"id" : 2, "age" : 19, "name" : "Alan", "status" : "married", "hobbies" : ["tennis", "art"]} $$) ``` ```response - ┌─name────┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ │ id │ Int64 │ │ │ │ │ │ │ age │ Int64 │ │ │ │ │ │ @@ -1428,7 +1426,6 @@ DESC format(JSONEachRow, $$ ```sql SET schema_inference_make_columns_nullable = 0; -SET input_format_null_as_default = 1; DESC format(JSONEachRow, $$ {"id" : 1, "age" : 25, "name" : "Josh", "status" : null, "hobbies" : ["football", "cooking"]} {"id" : 2, "age" : 19, "name" : "Alan", "status" : "married", "hobbies" : ["tennis", "art"]} diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md index f8b40cd81ac..57812ef0e03 100644 --- a/docs/en/operations/settings/settings-formats.md +++ b/docs/en/operations/settings/settings-formats.md @@ -171,8 +171,8 @@ If the `schema_inference_hints` is not formated properly, or if there is a typo ## schema_inference_make_columns_nullable {#schema_inference_make_columns_nullable} -Controls making inferred types `Nullable` in schema inference for formats without information about nullability. -If the setting is enabled, the inferred type will be `Nullable` only if column contains `NULL` in a sample that is parsed during schema inference. +Controls making inferred types `Nullable` in schema inference. +If the setting is enabled, all inferred type will be `Nullable`, if disabled, the inferred type will never be `Nullable`, if set to `auto`, the inferred type will be `Nullable` only if the column contains `NULL` in a sample that is parsed during schema inference or file metadata contains information about column nullability. Default value: `true`. diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 34435cef3e6..c0e340afe37 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -1108,7 +1108,7 @@ class IColumn; M(String, column_names_for_schema_inference, "", "The list of column names to use in schema inference for formats without column names. The format: 'column1,column2,column3,...'", 0) \ M(String, schema_inference_hints, "", "The list of column names and types to use in schema inference for formats without column names. The format: 'column_name1 column_type1, column_name2 column_type2, ...'", 0) \ M(SchemaInferenceMode, schema_inference_mode, "default", "Mode of schema inference. 'default' - assume that all files have the same schema and schema can be inferred from any file, 'union' - files can have different schemas and the resulting schema should be the a union of schemas of all files", 0) \ - M(Bool, schema_inference_make_columns_nullable, true, "If set to true, all inferred types will be Nullable in schema inference for formats without information about nullability.", 0) \ + M(UInt64Auto, schema_inference_make_columns_nullable, 1, "If set to true, all inferred types will be Nullable in schema inference. When set to false, no columns will be converted to Nullable. When set to 'auto', ClickHouse will use information about nullability from the data.", 0) \ M(Bool, input_format_json_read_bools_as_numbers, true, "Allow to parse bools as numbers in JSON input formats", 0) \ M(Bool, input_format_json_read_bools_as_strings, true, "Allow to parse bools as strings in JSON input formats", 0) \ M(Bool, input_format_json_try_infer_numbers_from_strings, false, "Try to infer numbers from string fields while schema inference", 0) \ diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 1e03b1bfc20..a3465e9f8ca 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -256,7 +256,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se format_settings.max_bytes_to_read_for_schema_inference = settings.input_format_max_rows_to_read_for_schema_inference; format_settings.column_names_for_schema_inference = settings.column_names_for_schema_inference; format_settings.schema_inference_hints = settings.schema_inference_hints; - format_settings.schema_inference_make_columns_nullable = settings.schema_inference_make_columns_nullable; + format_settings.schema_inference_make_columns_nullable = settings.schema_inference_make_columns_nullable.valueOr(2); format_settings.mysql_dump.table_name = settings.input_format_mysql_dump_table_name; format_settings.mysql_dump.map_column_names = settings.input_format_mysql_dump_map_column_names; format_settings.sql_insert.max_batch_size = settings.output_format_sql_insert_max_batch_size; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 225d03e54fd..b668d750515 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -75,7 +75,7 @@ struct FormatSettings Raw }; - bool schema_inference_make_columns_nullable = true; + UInt64 schema_inference_make_columns_nullable = 1; DateTimeOutputFormat date_time_output_format = DateTimeOutputFormat::Simple; diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp index 3c374ada9e6..c04682e8765 100644 --- a/src/Formats/SchemaInferenceUtils.cpp +++ b/src/Formats/SchemaInferenceUtils.cpp @@ -1262,7 +1262,11 @@ namespace if (checkCharCaseInsensitive('n', buf)) { if (checkStringCaseInsensitive("ull", buf)) + { + if (settings.schema_inference_make_columns_nullable == 0) + return std::make_shared(); return makeNullable(std::make_shared()); + } else if (checkStringCaseInsensitive("an", buf)) return std::make_shared(); } diff --git a/src/Processors/Formats/ISchemaReader.cpp b/src/Processors/Formats/ISchemaReader.cpp index 45523700a5d..e002e64b7e5 100644 --- a/src/Processors/Formats/ISchemaReader.cpp +++ b/src/Processors/Formats/ISchemaReader.cpp @@ -54,13 +54,8 @@ void checkFinalInferredType( type = default_type; } - if (settings.schema_inference_make_columns_nullable) + if (settings.schema_inference_make_columns_nullable == 1) type = makeNullableRecursively(type); - /// In case when data for some column could contain nulls and regular values, - /// resulting inferred type is Nullable. - /// If input_format_null_as_default is enabled, we should remove Nullable type. - else if (settings.null_as_default) - type = removeNullable(type); } void ISchemaReader::transformTypesIfNeeded(DB::DataTypePtr & type, DB::DataTypePtr & new_type) diff --git a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp index 72a93002669..cf079e52db0 100644 --- a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp @@ -204,8 +204,11 @@ NamesAndTypesList ArrowSchemaReader::readSchema() schema = file_reader->schema(); auto header = ArrowColumnToCHColumn::arrowSchemaToCHHeader( - *schema, stream ? "ArrowStream" : "Arrow", format_settings.arrow.skip_columns_with_unsupported_types_in_schema_inference); - if (format_settings.schema_inference_make_columns_nullable) + *schema, + stream ? "ArrowStream" : "Arrow", + format_settings.arrow.skip_columns_with_unsupported_types_in_schema_inference, + format_settings.schema_inference_make_columns_nullable != 0); + if (format_settings.schema_inference_make_columns_nullable == 1) return getNamesAndRecursivelyNullableTypes(header); return header.getNamesAndTypesList(); } diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index 5e7f763dfbc..496468277c9 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -712,6 +712,7 @@ struct ReadColumnFromArrowColumnSettings FormatSettings::DateTimeOverflowBehavior date_time_overflow_behavior; bool allow_arrow_null_type; bool skip_columns_with_unsupported_types; + bool allow_inferring_nullable_columns; }; static ColumnWithTypeAndName readColumnFromArrowColumn( @@ -1094,7 +1095,7 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( bool is_map_nested_column, const ReadColumnFromArrowColumnSettings & settings) { - bool read_as_nullable_column = arrow_column->null_count() || is_nullable_column || (type_hint && type_hint->isNullable()); + bool read_as_nullable_column = (arrow_column->null_count() || is_nullable_column || (type_hint && type_hint->isNullable())) && settings.allow_inferring_nullable_columns; if (read_as_nullable_column && arrow_column->type()->id() != arrow::Type::LIST && arrow_column->type()->id() != arrow::Type::LARGE_LIST && @@ -1158,14 +1159,16 @@ static std::shared_ptr createArrowColumn(const std::shared_ Block ArrowColumnToCHColumn::arrowSchemaToCHHeader( const arrow::Schema & schema, const std::string & format_name, - bool skip_columns_with_unsupported_types) + bool skip_columns_with_unsupported_types, + bool allow_inferring_nullable_columns) { ReadColumnFromArrowColumnSettings settings { .format_name = format_name, .date_time_overflow_behavior = FormatSettings::DateTimeOverflowBehavior::Ignore, .allow_arrow_null_type = false, - .skip_columns_with_unsupported_types = skip_columns_with_unsupported_types + .skip_columns_with_unsupported_types = skip_columns_with_unsupported_types, + .allow_inferring_nullable_columns = allow_inferring_nullable_columns, }; ColumnsWithTypeAndName sample_columns; @@ -1239,7 +1242,8 @@ Chunk ArrowColumnToCHColumn::arrowColumnsToCHChunk(const NameToArrowColumn & nam .format_name = format_name, .date_time_overflow_behavior = date_time_overflow_behavior, .allow_arrow_null_type = true, - .skip_columns_with_unsupported_types = false + .skip_columns_with_unsupported_types = false, + .allow_inferring_nullable_columns = true }; Columns columns; diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h index 27e9afdf763..8521cd2f410 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h @@ -34,7 +34,8 @@ public: static Block arrowSchemaToCHHeader( const arrow::Schema & schema, const std::string & format_name, - bool skip_columns_with_unsupported_types = false); + bool skip_columns_with_unsupported_types = false, + bool allow_inferring_nullable_columns = true); struct DictionaryInfo { diff --git a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp index 649721f28bf..147aea0b73f 100644 --- a/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/NativeORCBlockInputFormat.cpp @@ -1002,7 +1002,7 @@ NamesAndTypesList NativeORCSchemaReader::readSchema() header.insert(ColumnWithTypeAndName{type, name}); } - if (format_settings.schema_inference_make_columns_nullable) + if (format_settings.schema_inference_make_columns_nullable == 1) return getNamesAndRecursivelyNullableTypes(header); return header.getNamesAndTypesList(); } diff --git a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp index a3c218fa26e..2266c0b488c 100644 --- a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp @@ -160,8 +160,11 @@ NamesAndTypesList ORCSchemaReader::readSchema() { initializeIfNeeded(); auto header = ArrowColumnToCHColumn::arrowSchemaToCHHeader( - *schema, "ORC", format_settings.orc.skip_columns_with_unsupported_types_in_schema_inference); - if (format_settings.schema_inference_make_columns_nullable) + *schema, + "ORC", + format_settings.orc.skip_columns_with_unsupported_types_in_schema_inference, + format_settings.schema_inference_make_columns_nullable != 0); + if (format_settings.schema_inference_make_columns_nullable == 1) return getNamesAndRecursivelyNullableTypes(header); return header.getNamesAndTypesList(); } diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp index 035a98e974d..77aacec941e 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp @@ -866,8 +866,11 @@ NamesAndTypesList ParquetSchemaReader::readSchema() THROW_ARROW_NOT_OK(parquet::arrow::FromParquetSchema(metadata->schema(), &schema)); auto header = ArrowColumnToCHColumn::arrowSchemaToCHHeader( - *schema, "Parquet", format_settings.parquet.skip_columns_with_unsupported_types_in_schema_inference); - if (format_settings.schema_inference_make_columns_nullable) + *schema, + "Parquet", + format_settings.parquet.skip_columns_with_unsupported_types_in_schema_inference, + format_settings.schema_inference_make_columns_nullable != 0); + if (format_settings.schema_inference_make_columns_nullable == 1) return getNamesAndRecursivelyNullableTypes(header); return header.getNamesAndTypesList(); } diff --git a/tests/queries/0_stateless/02497_schema_inference_nulls.sql b/tests/queries/0_stateless/02497_schema_inference_nulls.sql index b78b5709dbb..d62fc76d9b9 100644 --- a/tests/queries/0_stateless/02497_schema_inference_nulls.sql +++ b/tests/queries/0_stateless/02497_schema_inference_nulls.sql @@ -18,7 +18,7 @@ desc format(JSONEachRow, '{"x" : [[], [null], [1, 2, 3]]}'); desc format(JSONEachRow, '{"x" : [{"a" : null}, {"b" : 1}]}'); desc format(JSONEachRow, '{"x" : [["2020-01-01", null, "1234"], ["abcd"]]}'); -set schema_inference_make_columns_nullable=0; +set schema_inference_make_columns_nullable='auto'; desc format(JSONEachRow, '{"x" : [1, 2]}'); desc format(JSONEachRow, '{"x" : [null, 1]}'); desc format(JSONEachRow, '{"x" : [1, 2]}, {"x" : [3]}'); @@ -40,7 +40,7 @@ desc format(JSONCompactEachRow, '[[[], [null], [1, 2, 3]]]'); desc format(JSONCompactEachRow, '[[{"a" : null}, {"b" : 1}]]'); desc format(JSONCompactEachRow, '[[["2020-01-01", null, "1234"], ["abcd"]]]'); -set schema_inference_make_columns_nullable=0; +set schema_inference_make_columns_nullable='auto'; desc format(JSONCompactEachRow, '[[1, 2]]'); desc format(JSONCompactEachRow, '[[null, 1]]'); desc format(JSONCompactEachRow, '[[1, 2]], [[3]]'); @@ -59,7 +59,7 @@ desc format(CSV, '"[[], [null], [1, 2, 3]]"'); desc format(CSV, '"[{\'a\' : null}, {\'b\' : 1}]"'); desc format(CSV, '"[[\'2020-01-01\', null, \'1234\'], [\'abcd\']]"'); -set schema_inference_make_columns_nullable=0; +set schema_inference_make_columns_nullable='auto'; desc format(CSV, '"[1,2]"'); desc format(CSV, '"[NULL, 1]"'); desc format(CSV, '"[1, 2]"\n"[3]"'); diff --git a/tests/queries/0_stateless/02784_schema_inference_null_as_default.sql b/tests/queries/0_stateless/02784_schema_inference_null_as_default.sql index 9c9f99d8283..571e3ab4f25 100644 --- a/tests/queries/0_stateless/02784_schema_inference_null_as_default.sql +++ b/tests/queries/0_stateless/02784_schema_inference_null_as_default.sql @@ -1,7 +1,7 @@ desc format(JSONEachRow, '{"x" : null}, {"x" : 42}') settings schema_inference_make_columns_nullable=1; select * from format(JSONEachRow, '{"x" : null}, {"x" : 42}') settings schema_inference_make_columns_nullable=1; -desc format(JSONEachRow, '{"x" : null}, {"x" : 42}') settings schema_inference_make_columns_nullable=0, input_format_null_as_default=0; -select * from format(JSONEachRow, '{"x" : null}, {"x" : 42}') settings schema_inference_make_columns_nullable=0, input_format_null_as_default=0; +desc format(JSONEachRow, '{"x" : null}, {"x" : 42}') settings schema_inference_make_columns_nullable='auto', input_format_null_as_default=0; +select * from format(JSONEachRow, '{"x" : null}, {"x" : 42}') settings schema_inference_make_columns_nullable='auto', input_format_null_as_default=0; desc format(JSONEachRow, '{"x" : null}, {"x" : 42}') settings schema_inference_make_columns_nullable=0, input_format_null_as_default=1; select * from format(JSONEachRow, '{"x" : null}, {"x" : 42}') settings schema_inference_make_columns_nullable=0, input_format_null_as_default=1; diff --git a/tests/queries/0_stateless/03036_parquet_arrow_nullable.reference b/tests/queries/0_stateless/03036_parquet_arrow_nullable.reference index 985f8192f26..d15f0d8365d 100644 --- a/tests/queries/0_stateless/03036_parquet_arrow_nullable.reference +++ b/tests/queries/0_stateless/03036_parquet_arrow_nullable.reference @@ -1,40 +1,66 @@ Parquet a UInt64 a_nullable Nullable(UInt64) +a UInt64 +a_nullable UInt64 Arrow a UInt64 a_nullable Nullable(UInt64) +a UInt64 +a_nullable UInt64 Parquet b Array(UInt64) b_nullable Array(Nullable(UInt64)) +b Array(UInt64) +b_nullable Array(UInt64) Arrow b Array(Nullable(UInt64)) b_nullable Array(Nullable(UInt64)) +b Array(UInt64) +b_nullable Array(UInt64) Parquet c Tuple(\n a UInt64,\n b String) c_nullable Tuple(\n a Nullable(UInt64),\n b Nullable(String)) +c Tuple(\n a UInt64,\n b String) +c_nullable Tuple(\n a UInt64,\n b String) Arrow c Tuple(\n a UInt64,\n b String) c_nullable Tuple(\n a Nullable(UInt64),\n b Nullable(String)) +c Tuple(\n a UInt64,\n b String) +c_nullable Tuple(\n a UInt64,\n b String) Parquet d Tuple(\n a UInt64,\n b Tuple(\n a UInt64,\n b String),\n d_nullable Tuple(\n a UInt64,\n b Tuple(\n a Nullable(UInt64),\n b Nullable(String)))) +d Tuple(\n a UInt64,\n b Tuple(\n a UInt64,\n b String),\n d_nullable Tuple(\n a UInt64,\n b Tuple(\n a UInt64,\n b String))) Arrow d Tuple(\n a UInt64,\n b Tuple(\n a UInt64,\n b String),\n d_nullable Tuple(\n a UInt64,\n b Tuple(\n a Nullable(UInt64),\n b Nullable(String)))) +d Tuple(\n a UInt64,\n b Tuple(\n a UInt64,\n b String),\n d_nullable Tuple(\n a UInt64,\n b Tuple(\n a UInt64,\n b String))) Parquet e Map(UInt64, String) e_nullable Map(UInt64, Nullable(String)) +e Map(UInt64, String) +e_nullable Map(UInt64, String) Arrow e Map(UInt64, Nullable(String)) e_nullable Map(UInt64, Nullable(String)) +e Map(UInt64, String) +e_nullable Map(UInt64, String) Parquet f Map(UInt64, Map(UInt64, String)) f_nullables Map(UInt64, Map(UInt64, Nullable(String))) +f Map(UInt64, Map(UInt64, String)) +f_nullables Map(UInt64, Map(UInt64, String)) Arrow f Map(UInt64, Map(UInt64, Nullable(String))) f_nullables Map(UInt64, Map(UInt64, Nullable(String))) +f Map(UInt64, Map(UInt64, String)) +f_nullables Map(UInt64, Map(UInt64, String)) Parquet g String g_nullable Nullable(String) +g String +g_nullable String Arrow g LowCardinality(String) g_nullable LowCardinality(String) +g LowCardinality(String) +g_nullable LowCardinality(String) diff --git a/tests/queries/0_stateless/03036_parquet_arrow_nullable.sh b/tests/queries/0_stateless/03036_parquet_arrow_nullable.sh index bdd641e2b94..379756f78f3 100755 --- a/tests/queries/0_stateless/03036_parquet_arrow_nullable.sh +++ b/tests/queries/0_stateless/03036_parquet_arrow_nullable.sh @@ -14,6 +14,7 @@ for format in $formats do echo $format $CLICKHOUSE_LOCAL -q "select * from generateRandom('a UInt64, a_nullable Nullable(UInt64)', 42) limit 10 format $format" > $DATA_FILE + $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 'auto'" $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 0" done @@ -21,6 +22,7 @@ for format in $formats do echo $format $CLICKHOUSE_LOCAL -q "select * from generateRandom('b Array(UInt64), b_nullable Array(Nullable(UInt64))', 42) limit 10 format $format" > $DATA_FILE + $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 'auto'" $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 0" done @@ -28,6 +30,7 @@ for format in $formats do echo $format $CLICKHOUSE_LOCAL -q "select * from generateRandom('c Tuple(a UInt64, b String), c_nullable Tuple(a Nullable(UInt64), b Nullable(String))', 42) limit 10 format $format" > $DATA_FILE + $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 'auto'" $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 0" done @@ -35,6 +38,7 @@ for format in $formats do echo $format $CLICKHOUSE_LOCAL -q "select * from generateRandom('d Tuple(a UInt64, b Tuple(a UInt64, b String), d_nullable Tuple(a UInt64, b Tuple(a Nullable(UInt64), b Nullable(String))))', 42) limit 10 format $format" > $DATA_FILE + $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 'auto'" $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 0" done @@ -42,6 +46,7 @@ for format in $formats do echo $format $CLICKHOUSE_LOCAL -q "select * from generateRandom('e Map(UInt64, String), e_nullable Map(UInt64, Nullable(String))', 42) limit 10 format $format" > $DATA_FILE + $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 'auto'" $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 0" done @@ -49,6 +54,7 @@ for format in $formats do echo $format $CLICKHOUSE_LOCAL -q "select * from generateRandom('f Map(UInt64, Map(UInt64, String)), f_nullables Map(UInt64, Map(UInt64, Nullable(String)))', 42) limit 10 format $format" > $DATA_FILE + $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 'auto'" $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 0" done @@ -56,6 +62,7 @@ for format in $formats do echo $format $CLICKHOUSE_LOCAL -q "select * from generateRandom('g LowCardinality(String), g_nullable LowCardinality(Nullable(String))', 42) limit 10 settings output_format_arrow_low_cardinality_as_dictionary=1, allow_suspicious_low_cardinality_types=1 format $format" > $DATA_FILE + $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 'auto'" $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 0" done From ca116657b4db99e25867152d1577799be0c1e474 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 28 Aug 2024 14:07:39 +0000 Subject: [PATCH 103/121] Backport #68672 to 24.7: Return back virtual columns to distributed tables --- src/Storages/StorageDistributed.cpp | 4 ++++ src/Storages/StorageMerge.cpp | 14 +++++------ .../02890_describe_table_options.reference | 8 +++++++ .../03228_virtual_column_merge_dist.reference | 8 +++++++ .../03228_virtual_column_merge_dist.sql | 24 +++++++++++++++++++ 5 files changed, 51 insertions(+), 7 deletions(-) create mode 100644 tests/queries/0_stateless/03228_virtual_column_merge_dist.reference create mode 100644 tests/queries/0_stateless/03228_virtual_column_merge_dist.sql diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 65323b4bb52..3d115d3f551 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -298,6 +298,10 @@ VirtualColumnsDescription StorageDistributed::createVirtuals() desc.addEphemeral("_shard_num", std::make_shared(), "Deprecated. Use function shardNum instead"); + /// Add virtual columns from table with Merge engine. + desc.addEphemeral("_database", std::make_shared(std::make_shared()), "The name of database which the row comes from"); + desc.addEphemeral("_table", std::make_shared(std::make_shared()), "The name of table which the row comes from"); + return desc; } diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 9962da3d6de..118219ab441 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -642,10 +642,6 @@ std::vector ReadFromMerge::createChildrenPlans(SelectQ column_names_as_aliases.push_back(ExpressionActions::getSmallestColumn(storage_metadata_snapshot->getColumns().getAllPhysical()).name); } } - else - { - - } auto child = createPlanForTable( nested_storage_snaphsot, @@ -657,6 +653,7 @@ std::vector ReadFromMerge::createChildrenPlans(SelectQ row_policy_data_opt, modified_context, current_streams); + child.plan.addInterpreterContext(modified_context); if (child.plan.isInitialized()) @@ -914,12 +911,14 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextMutablePtr & mo modified_query_info.table_expression = replacement_table_expression; modified_query_info.planner_context->getOrCreateTableExpressionData(replacement_table_expression); - auto get_column_options = GetColumnsOptions(GetColumnsOptions::All).withExtendedObjects().withVirtuals(); - if (storage_snapshot_->storage.supportsSubcolumns()) - get_column_options.withSubcolumns(); + auto get_column_options = GetColumnsOptions(GetColumnsOptions::All) + .withExtendedObjects() + .withSubcolumns(storage_snapshot_->storage.supportsSubcolumns()); std::unordered_map column_name_to_node; + /// Consider only non-virtual columns of storage while checking for _table and _database columns. + /// I.e. always override virtual columns with these names from underlying table (if any). if (!storage_snapshot_->tryGetColumn(get_column_options, "_table")) { auto table_name_node = std::make_shared(current_storage_id.table_name); @@ -946,6 +945,7 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextMutablePtr & mo column_name_to_node.emplace("_database", function_node); } + get_column_options.withVirtuals(); auto storage_columns = storage_snapshot_->metadata->getColumns(); bool with_aliases = /* common_processed_stage == QueryProcessingStage::FetchColumns && */ !storage_columns.getAliases().empty(); diff --git a/tests/queries/0_stateless/02890_describe_table_options.reference b/tests/queries/0_stateless/02890_describe_table_options.reference index 9181cb27cb0..b77ef4a0fdf 100644 --- a/tests/queries/0_stateless/02890_describe_table_options.reference +++ b/tests/queries/0_stateless/02890_describe_table_options.reference @@ -54,6 +54,8 @@ _row_exists UInt8 Persisted mask created by lightweight delete that show wheth _block_number UInt64 Persisted original number of block that was assigned at insert Delta, LZ4 1 _block_offset UInt64 Persisted original number of row in block that was assigned at insert Delta, LZ4 1 _shard_num UInt32 Deprecated. Use function shardNum instead 1 +_database LowCardinality(String) The name of database which the row comes from 1 +_table LowCardinality(String) The name of table which the row comes from 1 SET describe_compact_output = 0, describe_include_virtual_columns = 1, describe_include_subcolumns = 1; DESCRIBE TABLE t_describe_options; id UInt64 index column 0 0 @@ -87,6 +89,8 @@ _row_exists UInt8 Persisted mask created by lightweight delete that show wheth _block_number UInt64 Persisted original number of block that was assigned at insert Delta, LZ4 0 1 _block_offset UInt64 Persisted original number of row in block that was assigned at insert Delta, LZ4 0 1 _shard_num UInt32 Deprecated. Use function shardNum instead 0 1 +_database LowCardinality(String) The name of database which the row comes from 0 1 +_table LowCardinality(String) The name of table which the row comes from 0 1 arr.size0 UInt64 1 0 t.a String ZSTD(1) 1 0 t.b UInt64 ZSTD(1) 1 0 @@ -144,6 +148,8 @@ _row_exists UInt8 1 _block_number UInt64 1 _block_offset UInt64 1 _shard_num UInt32 1 +_database LowCardinality(String) 1 +_table LowCardinality(String) 1 SET describe_compact_output = 1, describe_include_virtual_columns = 1, describe_include_subcolumns = 1; DESCRIBE TABLE t_describe_options; id UInt64 0 0 @@ -177,6 +183,8 @@ _row_exists UInt8 0 1 _block_number UInt64 0 1 _block_offset UInt64 0 1 _shard_num UInt32 0 1 +_database LowCardinality(String) 0 1 +_table LowCardinality(String) 0 1 arr.size0 UInt64 1 0 t.a String 1 0 t.b UInt64 1 0 diff --git a/tests/queries/0_stateless/03228_virtual_column_merge_dist.reference b/tests/queries/0_stateless/03228_virtual_column_merge_dist.reference new file mode 100644 index 00000000000..28f00bafdfe --- /dev/null +++ b/tests/queries/0_stateless/03228_virtual_column_merge_dist.reference @@ -0,0 +1,8 @@ +1 t_local_1 +2 t_local_2 +1 t_local_1 +2 t_local_2 +1 1 +2 1 +1 1 +2 1 diff --git a/tests/queries/0_stateless/03228_virtual_column_merge_dist.sql b/tests/queries/0_stateless/03228_virtual_column_merge_dist.sql new file mode 100644 index 00000000000..caf00a2e407 --- /dev/null +++ b/tests/queries/0_stateless/03228_virtual_column_merge_dist.sql @@ -0,0 +1,24 @@ +DROP TABLE IF EXISTS t_local_1; +DROP TABLE IF EXISTS t_local_2; +DROP TABLE IF EXISTS t_merge; +DROP TABLE IF EXISTS t_distr; + +CREATE TABLE t_local_1 (a UInt32) ENGINE = MergeTree ORDER BY a; +CREATE TABLE t_local_2 (a UInt32) ENGINE = MergeTree ORDER BY a; + +INSERT INTO t_local_1 VALUES (1); +INSERT INTO t_local_2 VALUES (2); + +CREATE TABLE t_merge AS t_local_1 ENGINE = Merge(currentDatabase(), '^(t_local_1|t_local_2)$'); +CREATE TABLE t_distr AS t_local_1 engine=Distributed('test_shard_localhost', currentDatabase(), t_merge, rand()); + +SELECT a, _table FROM t_merge ORDER BY a; +SELECT a, _table FROM t_distr ORDER BY a; + +SELECT a, _database = currentDatabase() FROM t_merge ORDER BY a; +SELECT a, _database = currentDatabase() FROM t_distr ORDER BY a; + +DROP TABLE IF EXISTS t_local_1; +DROP TABLE IF EXISTS t_local_2; +DROP TABLE IF EXISTS t_merge; +DROP TABLE IF EXISTS t_distr; From e2b1ae5bd492f9fbdc941e8a8a9b57afe938726f Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 28 Aug 2024 18:09:09 +0000 Subject: [PATCH 104/121] Backport #68930 to 24.7: Fix 01114_database_atomic flakiness --- .../01114_database_atomic.reference | 20 +-- .../0_stateless/01114_database_atomic.sh | 115 ++++++++++-------- 2 files changed, 76 insertions(+), 59 deletions(-) diff --git a/tests/queries/0_stateless/01114_database_atomic.reference b/tests/queries/0_stateless/01114_database_atomic.reference index f42cd099d4e..572cedb64c5 100644 --- a/tests/queries/0_stateless/01114_database_atomic.reference +++ b/tests/queries/0_stateless/01114_database_atomic.reference @@ -1,17 +1,17 @@ 1 -CREATE DATABASE test_01114_1\nENGINE = Atomic -CREATE DATABASE test_01114_2\nENGINE = Atomic -CREATE DATABASE test_01114_3\nENGINE = Ordinary -test_01114_1 Atomic store 00001114-1000-4000-8000-000000000001 1 -test_01114_2 Atomic store 00001114-1000-4000-8000-000000000002 1 -test_01114_3 Ordinary test_01114_3 test_01114_3 1 +CREATE DATABASE default_1\nENGINE = Atomic +CREATE DATABASE default_2\nENGINE = Atomic +CREATE DATABASE default_3\nENGINE = Ordinary +default_1 Atomic store 00001114-1000-4000-8000-000000000001 1 +default_2 Atomic store 00001114-1000-4000-8000-000000000002 1 +default_3 Ordinary default_3 default_3 1 110 100 -CREATE TABLE test_01114_2.mt UUID \'00001114-0000-4000-8000-000000000002\'\n(\n `n` UInt64\n)\nENGINE = MergeTree\nPARTITION BY n % 5\nORDER BY tuple()\nSETTINGS index_granularity = 8192 -mt 00001114-0000-4000-8000-000000000002 CREATE TABLE test_01114_2.mt (`n` UInt64) ENGINE = MergeTree PARTITION BY n % 5 ORDER BY tuple() SETTINGS index_granularity = 8192 +CREATE TABLE default_2.mt UUID \'00001114-0000-4000-8000-000000000002\'\n(\n `n` UInt64\n)\nENGINE = MergeTree\nPARTITION BY n % 5\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +mt 00001114-0000-4000-8000-000000000002 CREATE TABLE default_2.mt (`n` UInt64) ENGINE = MergeTree PARTITION BY n % 5 ORDER BY tuple() SETTINGS index_granularity = 8192 110 -CREATE TABLE test_01114_1.mt UUID \'00001114-0000-4000-8000-000000000001\'\n(\n `n` UInt64\n)\nENGINE = MergeTree\nPARTITION BY n % 5\nORDER BY tuple()\nSETTINGS index_granularity = 8192 -CREATE TABLE test_01114_2.mt UUID \'00001114-0000-4000-8000-000000000002\'\n(\n `n` UInt64\n)\nENGINE = MergeTree\nPARTITION BY n % 5\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default_1.mt UUID \'00001114-0000-4000-8000-000000000001\'\n(\n `n` UInt64\n)\nENGINE = MergeTree\nPARTITION BY n % 5\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default_2.mt UUID \'00001114-0000-4000-8000-000000000002\'\n(\n `n` UInt64\n)\nENGINE = MergeTree\nPARTITION BY n % 5\nORDER BY tuple()\nSETTINGS index_granularity = 8192 5 dropped 110 5995 diff --git a/tests/queries/0_stateless/01114_database_atomic.sh b/tests/queries/0_stateless/01114_database_atomic.sh index fed76727a27..fb4672ef906 100755 --- a/tests/queries/0_stateless/01114_database_atomic.sh +++ b/tests/queries/0_stateless/01114_database_atomic.sh @@ -9,81 +9,98 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +DATABASE_1="${CLICKHOUSE_DATABASE}_1" +DATABASE_2="${CLICKHOUSE_DATABASE}_2" +DATABASE_3="${CLICKHOUSE_DATABASE}_3" -$CLICKHOUSE_CLIENT -nm -q " -DROP DATABASE IF EXISTS test_01114_1; -DROP DATABASE IF EXISTS test_01114_2; -DROP DATABASE IF EXISTS test_01114_3; -" +$CLICKHOUSE_CLIENT --allow_deprecated_database_ordinary=0 -q "CREATE DATABASE ${DATABASE_1} ENGINE=Ordinary" 2>&1| grep -Fac "UNKNOWN_DATABASE_ENGINE" -$CLICKHOUSE_CLIENT --allow_deprecated_database_ordinary=0 -q "CREATE DATABASE test_01114_1 ENGINE=Ordinary" 2>&1| grep -Fac "UNKNOWN_DATABASE_ENGINE" +$CLICKHOUSE_CLIENT -q "CREATE DATABASE ${DATABASE_1} ENGINE=Atomic" +$CLICKHOUSE_CLIENT -q "CREATE DATABASE ${DATABASE_2}" +$CLICKHOUSE_CLIENT --allow_deprecated_database_ordinary=1 -q "CREATE DATABASE ${DATABASE_3} ENGINE=Ordinary" -$CLICKHOUSE_CLIENT -q "CREATE DATABASE test_01114_1 ENGINE=Atomic" -$CLICKHOUSE_CLIENT -q "CREATE DATABASE test_01114_2" -$CLICKHOUSE_CLIENT --allow_deprecated_database_ordinary=1 -q "CREATE DATABASE test_01114_3 ENGINE=Ordinary" +$CLICKHOUSE_CLIENT --show_table_uuid_in_table_create_query_if_not_nil=0 -q "SHOW CREATE DATABASE ${DATABASE_1}" +$CLICKHOUSE_CLIENT --show_table_uuid_in_table_create_query_if_not_nil=0 -q "SHOW CREATE DATABASE ${DATABASE_2}" +$CLICKHOUSE_CLIENT -q "SHOW CREATE DATABASE ${DATABASE_3}" -$CLICKHOUSE_CLIENT --show_table_uuid_in_table_create_query_if_not_nil=0 -q "SHOW CREATE DATABASE test_01114_1" -$CLICKHOUSE_CLIENT --show_table_uuid_in_table_create_query_if_not_nil=0 -q "SHOW CREATE DATABASE test_01114_2" -$CLICKHOUSE_CLIENT -q "SHOW CREATE DATABASE test_01114_3" - -uuid_db_1=`$CLICKHOUSE_CLIENT -q "SELECT uuid FROM system.databases WHERE name='test_01114_1'"` -uuid_db_2=`$CLICKHOUSE_CLIENT -q "SELECT uuid FROM system.databases WHERE name='test_01114_2'"` +uuid_db_1=`$CLICKHOUSE_CLIENT -q "SELECT uuid FROM system.databases WHERE name='${DATABASE_1}'"` +uuid_db_2=`$CLICKHOUSE_CLIENT -q "SELECT uuid FROM system.databases WHERE name='${DATABASE_2}'"` $CLICKHOUSE_CLIENT -q "SELECT name, engine, splitByChar('/', data_path)[-2], splitByChar('/', metadata_path)[-2] as uuid_path, ((splitByChar('/', metadata_path)[-3] as metadata) = substr(uuid_path, 1, 3)) OR metadata='metadata' - FROM system.databases WHERE name LIKE 'test_01114_%'" | sed "s/$uuid_db_1/00001114-1000-4000-8000-000000000001/g" | sed "s/$uuid_db_2/00001114-1000-4000-8000-000000000002/g" + FROM system.databases WHERE name LIKE '${CLICKHOUSE_DATABASE}_%'" | sed "s/$uuid_db_1/00001114-1000-4000-8000-000000000001/g" | sed "s/$uuid_db_2/00001114-1000-4000-8000-000000000002/g" $CLICKHOUSE_CLIENT -nm -q " -CREATE TABLE test_01114_1.mt_tmp (n UInt64) ENGINE=MergeTree() ORDER BY tuple(); -INSERT INTO test_01114_1.mt_tmp SELECT * FROM numbers(100); -CREATE TABLE test_01114_3.mt (n UInt64) ENGINE=MergeTree() ORDER BY tuple() PARTITION BY (n % 5); -INSERT INTO test_01114_3.mt SELECT * FROM numbers(110); +CREATE TABLE ${DATABASE_1}.mt_tmp (n UInt64) ENGINE=MergeTree() ORDER BY tuple(); +INSERT INTO ${DATABASE_1}.mt_tmp SELECT * FROM numbers(100); +CREATE TABLE ${DATABASE_3}.mt (n UInt64) ENGINE=MergeTree() ORDER BY tuple() PARTITION BY (n % 5); +INSERT INTO ${DATABASE_3}.mt SELECT * FROM numbers(110); -RENAME TABLE test_01114_1.mt_tmp TO test_01114_3.mt_tmp; /* move from Atomic to Ordinary */ -RENAME TABLE test_01114_3.mt TO test_01114_1.mt; /* move from Ordinary to Atomic */ -SELECT count() FROM test_01114_1.mt; -SELECT count() FROM test_01114_3.mt_tmp; +RENAME TABLE ${DATABASE_1}.mt_tmp TO ${DATABASE_3}.mt_tmp; /* move from Atomic to Ordinary */ +RENAME TABLE ${DATABASE_3}.mt TO ${DATABASE_1}.mt; /* move from Ordinary to Atomic */ +SELECT count() FROM ${DATABASE_1}.mt; +SELECT count() FROM ${DATABASE_3}.mt_tmp; -DROP DATABASE test_01114_3; +DROP DATABASE ${DATABASE_3}; " explicit_uuid=$($CLICKHOUSE_CLIENT -q "SELECT generateUUIDv4()") -$CLICKHOUSE_CLIENT -q "CREATE TABLE test_01114_2.mt UUID '$explicit_uuid' (n UInt64) ENGINE=MergeTree() ORDER BY tuple() PARTITION BY (n % 5)" -$CLICKHOUSE_CLIENT --show_table_uuid_in_table_create_query_if_not_nil=1 -q "SHOW CREATE TABLE test_01114_2.mt" | sed "s/$explicit_uuid/00001114-0000-4000-8000-000000000002/g" -$CLICKHOUSE_CLIENT -q "SELECT name, uuid, create_table_query FROM system.tables WHERE database='test_01114_2'" | sed "s/$explicit_uuid/00001114-0000-4000-8000-000000000002/g" +$CLICKHOUSE_CLIENT -q "CREATE TABLE ${DATABASE_2}.mt UUID '$explicit_uuid' (n UInt64) ENGINE=MergeTree() ORDER BY tuple() PARTITION BY (n % 5)" +$CLICKHOUSE_CLIENT --show_table_uuid_in_table_create_query_if_not_nil=1 -q "SHOW CREATE TABLE ${DATABASE_2}.mt" | sed "s/$explicit_uuid/00001114-0000-4000-8000-000000000002/g" +$CLICKHOUSE_CLIENT -q "SELECT name, uuid, create_table_query FROM system.tables WHERE database='${DATABASE_2}'" | sed "s/$explicit_uuid/00001114-0000-4000-8000-000000000002/g" +RANDOM_COMMENT="$RANDOM" +$CLICKHOUSE_CLIENT --max-threads 5 --function_sleep_max_microseconds_per_block 60000000 -q "SELECT count(col), sum(col) FROM (SELECT n + sleepEachRow(1.5) AS col FROM ${DATABASE_1}.mt) -- ${RANDOM_COMMENT}" & # 33s (1.5s * 22 rows per partition [Using 5 threads in parallel]), result: 110, 5995 +$CLICKHOUSE_CLIENT --max-threads 5 --function_sleep_max_microseconds_per_block 60000000 -q "INSERT INTO ${DATABASE_2}.mt SELECT number + sleepEachRow(1.5) FROM numbers(30) -- ${RANDOM_COMMENT}" & # 45s (1.5s * 30 rows) -$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 60000000 -q "SELECT count(col), sum(col) FROM (SELECT n + sleepEachRow(1.5) AS col FROM test_01114_1.mt)" & # 33s (1.5s * 22 rows per partition), result: 110, 5995 -$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 60000000 -q "INSERT INTO test_01114_2.mt SELECT number + sleepEachRow(1.5) FROM numbers(30)" & # 45s (1.5s * 30 rows) -sleep 1 # SELECT and INSERT should start before the following RENAMEs +it=0 +while [[ $($CLICKHOUSE_CLIENT -q "SELECT count() FROM system.processes WHERE query_id != queryID() AND current_database = currentDatabase() AND query LIKE '%-- ${RANDOM_COMMENT}%'") -ne 2 ]]; do + it=$((it+1)) + if [ $it -ge 50 ]; + then + echo "Failed to wait for first batch of queries" + $CLICKHOUSE_CLIENT -q "SELECT count() FROM system.processes WHERE query_id != queryID() AND current_database = currentDatabase() AND query LIKE '%-- ${RANDOM_COMMENT}%'" + fi + sleep 0.1 +done $CLICKHOUSE_CLIENT -nm -q " -RENAME TABLE test_01114_1.mt TO test_01114_1.mt_tmp; -RENAME TABLE test_01114_1.mt_tmp TO test_01114_2.mt_tmp; -EXCHANGE TABLES test_01114_2.mt AND test_01114_2.mt_tmp; -RENAME TABLE test_01114_2.mt_tmp TO test_01114_1.mt; -EXCHANGE TABLES test_01114_1.mt AND test_01114_2.mt; +RENAME TABLE ${DATABASE_1}.mt TO ${DATABASE_1}.mt_tmp; +RENAME TABLE ${DATABASE_1}.mt_tmp TO ${DATABASE_2}.mt_tmp; +EXCHANGE TABLES ${DATABASE_2}.mt AND ${DATABASE_2}.mt_tmp; +RENAME TABLE ${DATABASE_2}.mt_tmp TO ${DATABASE_1}.mt; +EXCHANGE TABLES ${DATABASE_1}.mt AND ${DATABASE_2}.mt; " # Check that nothing changed -$CLICKHOUSE_CLIENT -q "SELECT count() FROM test_01114_1.mt" -uuid_mt1=$($CLICKHOUSE_CLIENT -q "SELECT uuid FROM system.tables WHERE database='test_01114_1' AND name='mt'") -$CLICKHOUSE_CLIENT --show_table_uuid_in_table_create_query_if_not_nil=1 -q "SHOW CREATE TABLE test_01114_1.mt" | sed "s/$uuid_mt1/00001114-0000-4000-8000-000000000001/g" -$CLICKHOUSE_CLIENT --show_table_uuid_in_table_create_query_if_not_nil=1 -q "SHOW CREATE TABLE test_01114_2.mt" | sed "s/$explicit_uuid/00001114-0000-4000-8000-000000000002/g" +$CLICKHOUSE_CLIENT -q "SELECT count() FROM ${DATABASE_1}.mt" +uuid_mt1=$($CLICKHOUSE_CLIENT -q "SELECT uuid FROM system.tables WHERE database='${DATABASE_1}' AND name='mt'") +$CLICKHOUSE_CLIENT --show_table_uuid_in_table_create_query_if_not_nil=1 -q "SHOW CREATE TABLE ${DATABASE_1}.mt" | sed "s/$uuid_mt1/00001114-0000-4000-8000-000000000001/g" +$CLICKHOUSE_CLIENT --show_table_uuid_in_table_create_query_if_not_nil=1 -q "SHOW CREATE TABLE ${DATABASE_2}.mt" | sed "s/$explicit_uuid/00001114-0000-4000-8000-000000000002/g" $CLICKHOUSE_CLIENT -nm -q " -DROP TABLE test_01114_1.mt SETTINGS database_atomic_wait_for_drop_and_detach_synchronously=0; -CREATE TABLE test_01114_1.mt (s String) ENGINE=Log(); -INSERT INTO test_01114_1.mt SELECT 's' || toString(number) FROM numbers(5); -SELECT count() FROM test_01114_1.mt +DROP TABLE ${DATABASE_1}.mt SETTINGS database_atomic_wait_for_drop_and_detach_synchronously=0; +CREATE TABLE ${DATABASE_1}.mt (s String) ENGINE=Log(); +INSERT INTO ${DATABASE_1}.mt SELECT 's' || toString(number) FROM numbers(5); +SELECT count() FROM ${DATABASE_1}.mt " # result: 5 -$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 60000000 -q "SELECT tuple(s, sleepEachRow(3)) FROM test_01114_1.mt" > /dev/null & # 15s (3s * 5 rows) -sleep 1 -$CLICKHOUSE_CLIENT -q "DROP DATABASE test_01114_1" --database_atomic_wait_for_drop_and_detach_synchronously=0 && echo "dropped" +RANDOM_TUPLE="${RANDOM}_tuple" +$CLICKHOUSE_CLIENT --max-threads 5 --function_sleep_max_microseconds_per_block 60000000 -q "SELECT tuple(s, sleepEachRow(3)) FROM ${DATABASE_1}.mt -- ${RANDOM_TUPLE}" > /dev/null & # 15s (3s * 5 rows) +it=0 +while [[ $($CLICKHOUSE_CLIENT -q "SELECT count() FROM system.processes WHERE query_id != queryID() AND current_database = currentDatabase() AND query LIKE '%-- ${RANDOM_TUPLE}%'") -ne 1 ]]; do + it=$((it+1)) + if [ $it -ge 50 ]; + then + echo "Failed to wait for second batch of queries" + $CLICKHOUSE_CLIENT -q "SELECT count() FROM system.processes WHERE query_id != queryID() AND current_database = currentDatabase() AND query LIKE '%-- ${RANDOM_TUPLE}%'" + fi + sleep 0.1 +done +$CLICKHOUSE_CLIENT -q "DROP DATABASE ${DATABASE_1}" --database_atomic_wait_for_drop_and_detach_synchronously=0 && echo "dropped" wait # for INSERT and SELECT -$CLICKHOUSE_CLIENT -q "SELECT count(n), sum(n) FROM test_01114_2.mt" # result: 30, 435 -$CLICKHOUSE_CLIENT -q "DROP DATABASE test_01114_2" --database_atomic_wait_for_drop_and_detach_synchronously=0 +$CLICKHOUSE_CLIENT -q "SELECT count(n), sum(n) FROM ${DATABASE_2}.mt" # result: 30, 435 +$CLICKHOUSE_CLIENT -q "DROP DATABASE ${DATABASE_2}" --database_atomic_wait_for_drop_and_detach_synchronously=0 From fe6f3130ae82ea6a1ede801b68176691a3fa10e2 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 30 Aug 2024 17:08:00 +0000 Subject: [PATCH 105/121] Backport #69080 to 24.7: fix logical error for empty async inserts --- src/Interpreters/AsynchronousInsertQueue.cpp | 20 +++++++++++++++---- .../02481_async_insert_dedup.python | 11 ++++++++-- .../0_stateless/02481_async_insert_dedup.sh | 2 +- .../02481_async_insert_dedup_token.sh | 2 +- 4 files changed, 27 insertions(+), 8 deletions(-) diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index 62777524c2a..c9137f39426 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -1004,8 +1004,14 @@ Chunk AsynchronousInsertQueue::processEntriesWithParsing( size_t num_rows = executor.execute(*buffer); total_rows += num_rows; - chunk_info->offsets.push_back(total_rows); - chunk_info->tokens.push_back(entry->async_dedup_token); + /// for some reason, client can pass zero rows and bytes to server. + /// We don't update offsets in this case, because we assume every insert has some rows during dedup + /// but we have nothing to deduplicate for this insert. + if (num_rows > 0) + { + chunk_info->offsets.push_back(total_rows); + chunk_info->tokens.push_back(entry->async_dedup_token); + } add_to_async_insert_log(entry, query_for_logging, current_exception, num_rows, num_bytes, data->timeout_ms); @@ -1056,8 +1062,14 @@ Chunk AsynchronousInsertQueue::processPreprocessedEntries( result_columns[i]->insertRangeFrom(*columns[i], 0, columns[i]->size()); total_rows += block->rows(); - chunk_info->offsets.push_back(total_rows); - chunk_info->tokens.push_back(entry->async_dedup_token); + /// for some reason, client can pass zero rows and bytes to server. + /// We don't update offsets in this case, because we assume every insert has some rows during dedup, + /// but we have nothing to deduplicate for this insert. + if (block->rows()) + { + chunk_info->offsets.push_back(total_rows); + chunk_info->tokens.push_back(entry->async_dedup_token); + } const auto & query_for_logging = get_query_by_format(entry->format); add_to_async_insert_log(entry, query_for_logging, "", block->rows(), block->bytes(), data->timeout_ms); diff --git a/tests/queries/0_stateless/02481_async_insert_dedup.python b/tests/queries/0_stateless/02481_async_insert_dedup.python index 24512836290..c8b5abc11b0 100644 --- a/tests/queries/0_stateless/02481_async_insert_dedup.python +++ b/tests/queries/0_stateless/02481_async_insert_dedup.python @@ -48,9 +48,11 @@ def generate_data(q, total_number, use_token): partitions = ["2022-11-11 10:10:10", "2022-12-12 10:10:10"] last_number = 0 while True: - dup_simulate = random.randint(0, 3) + # 0 to simulate duplication + # 1 to simulate empty + simulate_flag = random.randint(0, 4) # insert old data randomly. 25% of them are dup. - if dup_simulate == 0: + if simulate_flag == 0: last_idx = len(old_data) - 1 if last_idx < 0: continue @@ -58,6 +60,11 @@ def generate_data(q, total_number, use_token): if idx < 0: idx = 0 q.put(old_data[idx]) + if simulate_flag == 1: + empty_insert_stmt = ( + "insert into t_async_insert_dedup values format JSONEachRow" + ) + q.put((empty_insert_stmt, "")) else: # insert new data. chunk_size = random.randint(1, max_chunk_size) diff --git a/tests/queries/0_stateless/02481_async_insert_dedup.sh b/tests/queries/0_stateless/02481_async_insert_dedup.sh index 0fe06e6ab58..2a646a6bccd 100755 --- a/tests/queries/0_stateless/02481_async_insert_dedup.sh +++ b/tests/queries/0_stateless/02481_async_insert_dedup.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, zookeeper, no-parallel, no-fasttest +# Tags: long, zookeeper, no-fasttest CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02481_async_insert_dedup_token.sh b/tests/queries/0_stateless/02481_async_insert_dedup_token.sh index bb2d07066a5..81e315ce7dc 100755 --- a/tests/queries/0_stateless/02481_async_insert_dedup_token.sh +++ b/tests/queries/0_stateless/02481_async_insert_dedup_token.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, zookeeper, no-parallel, no-fasttest +# Tags: long, zookeeper, no-fasttest CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From f57d5850b2a5fd1c90e598b8d30920d17dd93aa4 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 2 Sep 2024 14:11:28 +0000 Subject: [PATCH 106/121] Backport #68950 to 24.7: Fix possible wrong result during anyHeavy state merge --- .../AggregateFunctionAnyHeavy.cpp | 3 ++ .../test_functions.py | 45 ++++++++++++++++--- .../03230_anyHeavy_merge.reference | 1 + .../0_stateless/03230_anyHeavy_merge.sql | 4 ++ 4 files changed, 46 insertions(+), 7 deletions(-) create mode 100644 tests/queries/0_stateless/03230_anyHeavy_merge.reference create mode 100644 tests/queries/0_stateless/03230_anyHeavy_merge.sql diff --git a/src/AggregateFunctions/AggregateFunctionAnyHeavy.cpp b/src/AggregateFunctions/AggregateFunctionAnyHeavy.cpp index ffddd46f2e3..dbc5f9be72f 100644 --- a/src/AggregateFunctions/AggregateFunctionAnyHeavy.cpp +++ b/src/AggregateFunctions/AggregateFunctionAnyHeavy.cpp @@ -68,7 +68,10 @@ public: if (data().isEqualTo(to.data())) counter += to.counter; else if (!data().has() || counter < to.counter) + { data().set(to.data(), arena); + counter = to.counter - counter; + } else counter -= to.counter; } diff --git a/tests/integration/test_backward_compatibility/test_functions.py b/tests/integration/test_backward_compatibility/test_functions.py index fc03a77030e..d41d6a46ec3 100644 --- a/tests/integration/test_backward_compatibility/test_functions.py +++ b/tests/integration/test_backward_compatibility/test_functions.py @@ -67,6 +67,11 @@ def test_aggregate_states(start_cluster): f"select hex(initializeAggregation('{function_name}State', 'foo'))" ).strip() + def get_final_value_unhex(node, function_name, value): + return node.query( + f"select finalizeAggregation(unhex('{value}')::AggregateFunction({function_name}, String))" + ).strip() + for aggregate_function in aggregate_functions: logging.info("Checking %s", aggregate_function) @@ -95,13 +100,39 @@ def test_aggregate_states(start_cluster): upstream_state = get_aggregate_state_hex(upstream, aggregate_function) if upstream_state != backward_state: - logging.info( - "Failed %s, %s (backward) != %s (upstream)", - aggregate_function, - backward_state, - upstream_state, - ) - failed += 1 + allowed_changes_if_result_is_the_same = ["anyHeavy"] + + if aggregate_function in allowed_changes_if_result_is_the_same: + backward_final_from_upstream = get_final_value_unhex( + backward, aggregate_function, upstream_state + ) + upstream_final_from_backward = get_final_value_unhex( + upstream, aggregate_function, backward_state + ) + + if backward_final_from_upstream == upstream_final_from_backward: + logging.info( + "OK %s (but different intermediate states)", aggregate_function + ) + passed += 1 + else: + logging.error( + "Failed %s, Intermediate: %s (backward) != %s (upstream). Final from intermediate: %s (backward from upstream state) != %s (upstream from backward state)", + aggregate_function, + backward_state, + upstream_state, + backward_final_from_upstream, + upstream_final_from_backward, + ) + failed += 1 + else: + logging.error( + "Failed %s, %s (backward) != %s (upstream)", + aggregate_function, + backward_state, + upstream_state, + ) + failed += 1 else: logging.info("OK %s", aggregate_function) passed += 1 diff --git a/tests/queries/0_stateless/03230_anyHeavy_merge.reference b/tests/queries/0_stateless/03230_anyHeavy_merge.reference new file mode 100644 index 00000000000..78981922613 --- /dev/null +++ b/tests/queries/0_stateless/03230_anyHeavy_merge.reference @@ -0,0 +1 @@ +a diff --git a/tests/queries/0_stateless/03230_anyHeavy_merge.sql b/tests/queries/0_stateless/03230_anyHeavy_merge.sql new file mode 100644 index 00000000000..5d4c0e55d0f --- /dev/null +++ b/tests/queries/0_stateless/03230_anyHeavy_merge.sql @@ -0,0 +1,4 @@ +DROP TABLE IF EXISTS t; +CREATE TABLE t (letter String) ENGINE=MergeTree order by () partition by letter; +INSERT INTO t VALUES ('a'), ('a'), ('a'), ('a'), ('b'), ('a'), ('a'), ('a'), ('a'), ('a'), ('a'), ('a'), ('a'), ('a'), ('a'), ('a'), ('c'); +SELECT anyHeavy(if(letter != 'b', letter, NULL)) FROM t; From 1749768cb9060e1978aa99d4903e225feb8e793d Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 3 Sep 2024 11:02:32 +0000 Subject: [PATCH 107/121] Update autogenerated version to 24.7.5.37 and contributors --- cmake/autogenerated_versions.txt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index 3317dc649e5..35156c84b13 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -2,11 +2,11 @@ # NOTE: VERSION_REVISION has nothing common with DBMS_TCP_PROTOCOL_VERSION, # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes. -SET(VERSION_REVISION 54490) +SET(VERSION_REVISION 54491) SET(VERSION_MAJOR 24) SET(VERSION_MINOR 7) -SET(VERSION_PATCH 5) -SET(VERSION_GITHASH 70fe2f6fa527ed19c350e9adacccd073204e031f) -SET(VERSION_DESCRIBE v24.7.5.1-stable) -SET(VERSION_STRING 24.7.5.1) +SET(VERSION_PATCH 6) +SET(VERSION_GITHASH f2533ca97be67770ab54581a85016d6428c11c7c) +SET(VERSION_DESCRIBE v24.7.6.1-stable) +SET(VERSION_STRING 24.7.6.1) # end of autochange From 04ffe6bac075c88b6ba762c561c81cb5f862f40c Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 3 Sep 2024 13:11:18 +0000 Subject: [PATCH 108/121] Backport #68744 to 24.7: Fix merging of aggregated data for grouping sets. --- src/Interpreters/Aggregator.h | 12 + src/Interpreters/InterpreterSelectQuery.cpp | 53 +++-- src/Planner/Planner.cpp | 3 +- src/Processors/QueryPlan/AggregatingStep.cpp | 103 +++++---- src/Processors/QueryPlan/AggregatingStep.h | 19 +- .../QueryPlan/MergingAggregatedStep.cpp | 28 ++- .../QueryPlan/MergingAggregatedStep.h | 2 + .../Transforms/MergingAggregatedTransform.cpp | 216 +++++++++++++++++- .../Transforms/MergingAggregatedTransform.h | 30 ++- .../02165_replicated_grouping_sets.reference | 212 +++++++++++++++++ .../02165_replicated_grouping_sets.sql | 20 ++ 11 files changed, 592 insertions(+), 106 deletions(-) diff --git a/src/Interpreters/Aggregator.h b/src/Interpreters/Aggregator.h index f4f1e9a1df3..2cb04fc7c51 100644 --- a/src/Interpreters/Aggregator.h +++ b/src/Interpreters/Aggregator.h @@ -59,6 +59,18 @@ class CompiledAggregateFunctionsHolder; class NativeWriter; struct OutputBlockColumns; +struct GroupingSetsParams +{ + GroupingSetsParams() = default; + + GroupingSetsParams(Names used_keys_, Names missing_keys_) : used_keys(std::move(used_keys_)), missing_keys(std::move(missing_keys_)) { } + + Names used_keys; + Names missing_keys; +}; + +using GroupingSetsParamsList = std::vector; + /** How are "total" values calculated with WITH TOTALS? * (For more details, see TotalsHavingTransform.) * diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 7bee497f6da..3f21e183b58 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -351,6 +351,27 @@ bool shouldIgnoreQuotaAndLimits(const StorageID & table_id) return false; } +GroupingSetsParamsList getAggregatorGroupingSetsParams(const NamesAndTypesLists & aggregation_keys_list, const Names & all_keys) +{ + GroupingSetsParamsList result; + + for (const auto & aggregation_keys : aggregation_keys_list) + { + NameSet keys; + for (const auto & key : aggregation_keys) + keys.insert(key.name); + + Names missing_keys; + for (const auto & key : all_keys) + if (!keys.contains(key)) + missing_keys.push_back(key); + + result.emplace_back(aggregation_keys.getNames(), std::move(missing_keys)); + } + + return result; +} + } InterpreterSelectQuery::InterpreterSelectQuery( @@ -2008,13 +2029,12 @@ static void executeMergeAggregatedImpl( bool has_grouping_sets, const Settings & settings, const NamesAndTypesList & aggregation_keys, + const NamesAndTypesLists & aggregation_keys_list, const AggregateDescriptions & aggregates, bool should_produce_results_in_order_of_bucket_number, SortDescription group_by_sort_description) { auto keys = aggregation_keys.getNames(); - if (has_grouping_sets) - keys.insert(keys.begin(), "__grouping_set"); /** There are two modes of distributed aggregation. * @@ -2032,10 +2052,12 @@ static void executeMergeAggregatedImpl( */ Aggregator::Params params(keys, aggregates, overflow_row, settings.max_threads, settings.max_block_size, settings.min_hit_rate_to_use_consecutive_keys_optimization); + auto grouping_sets_params = getAggregatorGroupingSetsParams(aggregation_keys_list, keys); auto merging_aggregated = std::make_unique( query_plan.getCurrentDataStream(), params, + grouping_sets_params, final, /// Grouping sets don't work with distributed_aggregation_memory_efficient enabled (#43989) settings.distributed_aggregation_memory_efficient && is_remote_storage && !has_grouping_sets, @@ -2654,30 +2676,6 @@ static Aggregator::Params getAggregatorParams( }; } -static GroupingSetsParamsList getAggregatorGroupingSetsParams(const SelectQueryExpressionAnalyzer & query_analyzer, const Names & all_keys) -{ - GroupingSetsParamsList result; - if (query_analyzer.useGroupingSetKey()) - { - auto const & aggregation_keys_list = query_analyzer.aggregationKeysList(); - - for (const auto & aggregation_keys : aggregation_keys_list) - { - NameSet keys; - for (const auto & key : aggregation_keys) - keys.insert(key.name); - - Names missing_keys; - for (const auto & key : all_keys) - if (!keys.contains(key)) - missing_keys.push_back(key); - - result.emplace_back(aggregation_keys.getNames(), std::move(missing_keys)); - } - } - return result; -} - void InterpreterSelectQuery::executeAggregation(QueryPlan & query_plan, const ActionsAndProjectInputsFlagPtr & expression, bool overflow_row, bool final, InputOrderInfoPtr group_by_info) { executeExpression(query_plan, expression, "Before GROUP BY"); @@ -2697,7 +2695,7 @@ void InterpreterSelectQuery::executeAggregation(QueryPlan & query_plan, const Ac settings.group_by_two_level_threshold, settings.group_by_two_level_threshold_bytes); - auto grouping_sets_params = getAggregatorGroupingSetsParams(*query_analyzer, keys); + auto grouping_sets_params = getAggregatorGroupingSetsParams(query_analyzer->aggregationKeysList(), keys); SortDescription group_by_sort_description; SortDescription sort_description_for_merging; @@ -2765,6 +2763,7 @@ void InterpreterSelectQuery::executeMergeAggregated(QueryPlan & query_plan, bool has_grouping_sets, context->getSettingsRef(), query_analyzer->aggregationKeys(), + query_analyzer->aggregationKeysList(), query_analyzer->aggregates(), should_produce_results_in_order_of_bucket_number, std::move(group_by_sort_description)); diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index 9b3c75a5b12..c364a38e5d6 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -503,8 +503,6 @@ void addMergingAggregatedStep(QueryPlan & query_plan, */ auto keys = aggregation_analysis_result.aggregation_keys; - if (!aggregation_analysis_result.grouping_sets_parameters_list.empty()) - keys.insert(keys.begin(), "__grouping_set"); Aggregator::Params params(keys, aggregation_analysis_result.aggregate_descriptions, @@ -529,6 +527,7 @@ void addMergingAggregatedStep(QueryPlan & query_plan, auto merging_aggregated = std::make_unique( query_plan.getCurrentDataStream(), params, + aggregation_analysis_result.grouping_sets_parameters_list, query_analysis_result.aggregate_final, /// Grouping sets don't work with distributed_aggregation_memory_efficient enabled (#43989) settings.distributed_aggregation_memory_efficient && (is_remote_storage || parallel_replicas_from_merge_tree) && !query_analysis_result.aggregation_with_rollup_or_cube_or_grouping_sets, diff --git a/src/Processors/QueryPlan/AggregatingStep.cpp b/src/Processors/QueryPlan/AggregatingStep.cpp index 0d7e05af1de..7450e7be035 100644 --- a/src/Processors/QueryPlan/AggregatingStep.cpp +++ b/src/Processors/QueryPlan/AggregatingStep.cpp @@ -153,6 +153,61 @@ void AggregatingStep::applyOrder(SortDescription sort_description_for_merging_, explicit_sorting_required_for_aggregation_in_order = false; } +ActionsDAG AggregatingStep::makeCreatingMissingKeysForGroupingSetDAG( + const Block & in_header, + const Block & out_header, + const GroupingSetsParamsList & grouping_sets_params, + UInt64 group, + bool group_by_use_nulls) +{ + /// Here we create a DAG which fills missing keys and adds `__grouping_set` column + ActionsDAG dag(in_header.getColumnsWithTypeAndName()); + ActionsDAG::NodeRawConstPtrs outputs; + outputs.reserve(out_header.columns() + 1); + + auto grouping_col = ColumnConst::create(ColumnUInt64::create(1, group), 0); + const auto * grouping_node = &dag.addColumn( + {ColumnPtr(std::move(grouping_col)), std::make_shared(), "__grouping_set"}); + + grouping_node = &dag.materializeNode(*grouping_node); + outputs.push_back(grouping_node); + + const auto & missing_columns = grouping_sets_params[group].missing_keys; + const auto & used_keys = grouping_sets_params[group].used_keys; + + auto to_nullable_function = FunctionFactory::instance().get("toNullable", nullptr); + for (size_t i = 0; i < out_header.columns(); ++i) + { + const auto & col = out_header.getByPosition(i); + const auto missing_it = std::find_if( + missing_columns.begin(), missing_columns.end(), [&](const auto & missing_col) { return missing_col == col.name; }); + const auto used_it = std::find_if( + used_keys.begin(), used_keys.end(), [&](const auto & used_col) { return used_col == col.name; }); + if (missing_it != missing_columns.end()) + { + auto column_with_default = col.column->cloneEmpty(); + col.type->insertDefaultInto(*column_with_default); + column_with_default->finalize(); + + auto column = ColumnConst::create(std::move(column_with_default), 0); + const auto * node = &dag.addColumn({ColumnPtr(std::move(column)), col.type, col.name}); + node = &dag.materializeNode(*node); + outputs.push_back(node); + } + else + { + const auto * column_node = dag.getOutputs()[in_header.getPositionByName(col.name)]; + if (used_it != used_keys.end() && group_by_use_nulls && column_node->result_type->canBeInsideNullable()) + outputs.push_back(&dag.addFunction(to_nullable_function, { column_node }, col.name)); + else + outputs.push_back(column_node); + } + } + + dag.getOutputs().swap(outputs); + return dag; +} + void AggregatingStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings & settings) { QueryPipelineProcessorsCollector collector(pipeline, this); @@ -302,52 +357,8 @@ void AggregatingStep::transformPipeline(QueryPipelineBuilder & pipeline, const B { const auto & header = ports[set_counter]->getHeader(); - /// Here we create a DAG which fills missing keys and adds `__grouping_set` column - auto dag = std::make_shared(header.getColumnsWithTypeAndName()); - ActionsDAG::NodeRawConstPtrs outputs; - outputs.reserve(output_header.columns() + 1); - - auto grouping_col = ColumnConst::create(ColumnUInt64::create(1, set_counter), 0); - const auto * grouping_node = &dag->addColumn( - {ColumnPtr(std::move(grouping_col)), std::make_shared(), "__grouping_set"}); - - grouping_node = &dag->materializeNode(*grouping_node); - outputs.push_back(grouping_node); - - const auto & missing_columns = grouping_sets_params[set_counter].missing_keys; - const auto & used_keys = grouping_sets_params[set_counter].used_keys; - - auto to_nullable_function = FunctionFactory::instance().get("toNullable", nullptr); - for (size_t i = 0; i < output_header.columns(); ++i) - { - auto & col = output_header.getByPosition(i); - const auto missing_it = std::find_if( - missing_columns.begin(), missing_columns.end(), [&](const auto & missing_col) { return missing_col == col.name; }); - const auto used_it = std::find_if( - used_keys.begin(), used_keys.end(), [&](const auto & used_col) { return used_col == col.name; }); - if (missing_it != missing_columns.end()) - { - auto column_with_default = col.column->cloneEmpty(); - col.type->insertDefaultInto(*column_with_default); - column_with_default->finalize(); - - auto column = ColumnConst::create(std::move(column_with_default), 0); - const auto * node = &dag->addColumn({ColumnPtr(std::move(column)), col.type, col.name}); - node = &dag->materializeNode(*node); - outputs.push_back(node); - } - else - { - const auto * column_node = dag->getOutputs()[header.getPositionByName(col.name)]; - if (used_it != used_keys.end() && group_by_use_nulls && column_node->result_type->canBeInsideNullable()) - outputs.push_back(&dag->addFunction(to_nullable_function, { column_node }, col.name)); - else - outputs.push_back(column_node); - } - } - - dag->getOutputs().swap(outputs); - auto expression = std::make_shared(dag, settings.getActionsSettings()); + auto dag = makeCreatingMissingKeysForGroupingSetDAG(header, output_header, grouping_sets_params, set_counter, group_by_use_nulls); + auto expression = std::make_shared(std::move(dag), settings.getActionsSettings()); auto transform = std::make_shared(header, expression); connect(*ports[set_counter], transform->getInputPort()); diff --git a/src/Processors/QueryPlan/AggregatingStep.h b/src/Processors/QueryPlan/AggregatingStep.h index ae43295024a..4e4078047f1 100644 --- a/src/Processors/QueryPlan/AggregatingStep.h +++ b/src/Processors/QueryPlan/AggregatingStep.h @@ -7,18 +7,6 @@ namespace DB { -struct GroupingSetsParams -{ - GroupingSetsParams() = default; - - GroupingSetsParams(Names used_keys_, Names missing_keys_) : used_keys(std::move(used_keys_)), missing_keys(std::move(missing_keys_)) { } - - Names used_keys; - Names missing_keys; -}; - -using GroupingSetsParamsList = std::vector; - Block appendGroupingSetColumn(Block header); Block generateOutputHeader(const Block & input_header, const Names & keys, bool use_nulls); @@ -77,6 +65,13 @@ public: /// Argument input_stream would be the second input (from projection). std::unique_ptr convertToAggregatingProjection(const DataStream & input_stream) const; + static ActionsDAG makeCreatingMissingKeysForGroupingSetDAG( + const Block & in_header, + const Block & out_header, + const GroupingSetsParamsList & grouping_sets_params, + UInt64 group, + bool group_by_use_nulls); + private: void updateOutputStream() override; diff --git a/src/Processors/QueryPlan/MergingAggregatedStep.cpp b/src/Processors/QueryPlan/MergingAggregatedStep.cpp index a5062ac8216..f3eb352faac 100644 --- a/src/Processors/QueryPlan/MergingAggregatedStep.cpp +++ b/src/Processors/QueryPlan/MergingAggregatedStep.cpp @@ -10,6 +10,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + static bool memoryBoundMergingWillBeUsed( const DataStream & input_stream, bool memory_bound_merging_of_aggregation_results_enabled, @@ -37,6 +42,7 @@ static ITransformingStep::Traits getTraits(bool should_produce_results_in_order_ MergingAggregatedStep::MergingAggregatedStep( const DataStream & input_stream_, Aggregator::Params params_, + GroupingSetsParamsList grouping_sets_params_, bool final_, bool memory_efficient_aggregation_, size_t max_threads_, @@ -48,9 +54,10 @@ MergingAggregatedStep::MergingAggregatedStep( bool memory_bound_merging_of_aggregation_results_enabled_) : ITransformingStep( input_stream_, - params_.getHeader(input_stream_.header, final_), + MergingAggregatedTransform::appendGroupingIfNeeded(input_stream_.header, params_.getHeader(input_stream_.header, final_)), getTraits(should_produce_results_in_order_of_bucket_number_)) , params(std::move(params_)) + , grouping_sets_params(std::move(grouping_sets_params_)) , final(final_) , memory_efficient_aggregation(memory_efficient_aggregation_) , max_threads(max_threads_) @@ -89,10 +96,13 @@ void MergingAggregatedStep::applyOrder(SortDescription sort_description, DataStr void MergingAggregatedStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) { - auto transform_params = std::make_shared(pipeline.getHeader(), std::move(params), final); - if (memoryBoundMergingWillBeUsed()) { + if (input_streams.front().header.has("__grouping_set") || !grouping_sets_params.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Memory bound merging of aggregated results is not supported for grouping sets."); + + auto transform_params = std::make_shared(pipeline.getHeader(), std::move(params), final); auto transform = std::make_shared( pipeline.getHeader(), pipeline.getNumStreams(), @@ -127,15 +137,19 @@ void MergingAggregatedStep::transformPipeline(QueryPipelineBuilder & pipeline, c pipeline.resize(1); /// Now merge the aggregated blocks - pipeline.addSimpleTransform([&](const Block & header) - { return std::make_shared(header, transform_params, max_threads); }); + auto transform = std::make_shared(pipeline.getHeader(), params, final, grouping_sets_params, max_threads); + pipeline.addTransform(std::move(transform)); } else { + if (input_streams.front().header.has("__grouping_set") || !grouping_sets_params.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Memory efficient merging of aggregated results is not supported for grouping sets."); auto num_merge_threads = memory_efficient_merge_threads ? memory_efficient_merge_threads : max_threads; + auto transform_params = std::make_shared(pipeline.getHeader(), std::move(params), final); pipeline.addMergingAggregatedMemoryEfficientTransform(transform_params, num_merge_threads); } @@ -154,7 +168,9 @@ void MergingAggregatedStep::describeActions(JSONBuilder::JSONMap & map) const void MergingAggregatedStep::updateOutputStream() { - output_stream = createOutputStream(input_streams.front(), params.getHeader(input_streams.front().header, final), getDataStreamTraits()); + const auto & in_header = input_streams.front().header; + output_stream = createOutputStream(input_streams.front(), + MergingAggregatedTransform::appendGroupingIfNeeded(in_header, params.getHeader(in_header, final)), getDataStreamTraits()); if (is_order_overwritten) /// overwrite order again applyOrder(group_by_sort_description, overwritten_sort_scope); } diff --git a/src/Processors/QueryPlan/MergingAggregatedStep.h b/src/Processors/QueryPlan/MergingAggregatedStep.h index 654f794d5f5..5c3842a6c33 100644 --- a/src/Processors/QueryPlan/MergingAggregatedStep.h +++ b/src/Processors/QueryPlan/MergingAggregatedStep.h @@ -16,6 +16,7 @@ public: MergingAggregatedStep( const DataStream & input_stream_, Aggregator::Params params_, + GroupingSetsParamsList grouping_sets_params_, bool final_, bool memory_efficient_aggregation_, size_t max_threads_, @@ -43,6 +44,7 @@ private: Aggregator::Params params; + GroupingSetsParamsList grouping_sets_params; bool final; bool memory_efficient_aggregation; size_t max_threads; diff --git a/src/Processors/Transforms/MergingAggregatedTransform.cpp b/src/Processors/Transforms/MergingAggregatedTransform.cpp index 446e60a0b81..9b76acb8081 100644 --- a/src/Processors/Transforms/MergingAggregatedTransform.cpp +++ b/src/Processors/Transforms/MergingAggregatedTransform.cpp @@ -1,7 +1,10 @@ #include #include #include +#include #include +#include +#include namespace DB { @@ -10,11 +13,192 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -MergingAggregatedTransform::MergingAggregatedTransform( - Block header_, AggregatingTransformParamsPtr params_, size_t max_threads_) - : IAccumulatingTransform(std::move(header_), params_->getHeader()) - , params(std::move(params_)), max_threads(max_threads_) +Block MergingAggregatedTransform::appendGroupingIfNeeded(const Block & in_header, Block out_header) { + /// __grouping_set is neither GROUP BY key nor an aggregate function. + /// It behaves like a GROUP BY key, but we cannot append it to keys + /// because it changes hashing method and buckets for two level aggregation. + /// Now, this column is processed "manually" by merging each group separately. + if (in_header.has("__grouping_set")) + out_header.insert(0, in_header.getByName("__grouping_set")); + + return out_header; +} + +/// We should keep the order for GROUPING SET keys. +/// Initiator creates a separate Aggregator for every group, so should we do here. +/// Otherwise, two-level aggregation will split the data into different buckets, +/// and the result may have duplicating rows. +static ActionsDAG makeReorderingActions(const Block & in_header, const GroupingSetsParams & params) +{ + ActionsDAG reordering(in_header.getColumnsWithTypeAndName()); + auto & outputs = reordering.getOutputs(); + ActionsDAG::NodeRawConstPtrs new_outputs; + new_outputs.reserve(in_header.columns() + params.used_keys.size() - params.used_keys.size()); + + std::unordered_map index; + for (size_t pos = 0; pos < outputs.size(); ++pos) + index.emplace(outputs[pos]->result_name, pos); + + for (const auto & used_name : params.used_keys) + { + auto & idx = index[used_name]; + new_outputs.push_back(outputs[idx]); + } + + for (const auto & used_name : params.used_keys) + index[used_name] = outputs.size(); + for (const auto & missing_name : params.missing_keys) + index[missing_name] = outputs.size(); + + for (const auto * output : outputs) + { + if (index[output->result_name] != outputs.size()) + new_outputs.push_back(output); + } + + outputs.swap(new_outputs); + return reordering; +} + +MergingAggregatedTransform::~MergingAggregatedTransform() = default; + +MergingAggregatedTransform::MergingAggregatedTransform( + Block header_, + Aggregator::Params params, + bool final, + GroupingSetsParamsList grouping_sets_params, + size_t max_threads_) + : IAccumulatingTransform(header_, appendGroupingIfNeeded(header_, params.getHeader(header_, final))) + , max_threads(max_threads_) +{ + if (!grouping_sets_params.empty()) + { + if (!header_.has("__grouping_set")) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Cannot find __grouping_set column in header of MergingAggregatedTransform with grouping sets." + "Header {}", header_.dumpStructure()); + + auto in_header = header_; + in_header.erase(header_.getPositionByName("__grouping_set")); + auto out_header = params.getHeader(header_, final); + + grouping_sets.reserve(grouping_sets_params.size()); + for (const auto & grouping_set_params : grouping_sets_params) + { + size_t group = grouping_sets.size(); + + auto reordering = makeReorderingActions(in_header, grouping_set_params); + + Aggregator::Params set_params(grouping_set_params.used_keys, + params.aggregates, + params.overflow_row, + params.max_threads, + params.max_block_size, + params.min_hit_rate_to_use_consecutive_keys_optimization); + + auto transform_params = std::make_shared(reordering.updateHeader(in_header), std::move(set_params), final); + + auto creating = AggregatingStep::makeCreatingMissingKeysForGroupingSetDAG( + transform_params->getHeader(), + out_header, + grouping_sets_params, group, false); + + auto & groupiung_set = grouping_sets.emplace_back(); + groupiung_set.reordering_key_columns_actions = std::make_shared(std::move(reordering)); + groupiung_set.creating_missing_keys_actions = std::make_shared(std::move(creating)); + groupiung_set.params = std::move(transform_params); + } + } + else + { + auto & groupiung_set = grouping_sets.emplace_back(); + groupiung_set.params = std::make_shared(header_, std::move(params), final); + } +} + +void MergingAggregatedTransform::addBlock(Block block) +{ + if (grouping_sets.size() == 1) + { + auto bucket = block.info.bucket_num; + if (grouping_sets[0].reordering_key_columns_actions) + grouping_sets[0].reordering_key_columns_actions->execute(block); + grouping_sets[0].bucket_to_blocks[bucket].emplace_back(std::move(block)); + return; + } + + auto grouping_position = block.getPositionByName("__grouping_set"); + auto grouping_column = block.getByPosition(grouping_position).column; + block.erase(grouping_position); + + /// Split a block by __grouping_set values. + + const auto * grouping_column_typed = typeid_cast(grouping_column.get()); + if (!grouping_column_typed) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected UInt64 column for __grouping_set, got {}", grouping_column->getName()); + + IColumn::Selector selector; + + const auto & grouping_data = grouping_column_typed->getData(); + size_t num_rows = grouping_data.size(); + UInt64 last_group = grouping_data[0]; + UInt64 max_group = last_group; + for (size_t row = 1; row < num_rows; ++row) + { + auto group = grouping_data[row]; + + /// Optimization for equal ranges. + if (last_group == group) + continue; + + /// Optimization for single group. + if (selector.empty()) + selector.reserve(num_rows); + + /// Fill the last equal range. + selector.resize_fill(row, last_group); + last_group = group; + max_group = std::max(last_group, max_group); + } + + if (max_group >= grouping_sets.size()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Invalid group number {}. Number of groups {}.", last_group, grouping_sets.size()); + + /// Optimization for single group. + if (selector.empty()) + { + auto bucket = block.info.bucket_num; + grouping_sets[last_group].reordering_key_columns_actions->execute(block); + grouping_sets[last_group].bucket_to_blocks[bucket].emplace_back(std::move(block)); + return; + } + + /// Fill the last equal range. + selector.resize_fill(num_rows, last_group); + + const size_t num_groups = max_group + 1; + Blocks splitted_blocks(num_groups); + + for (size_t group_id = 0; group_id < num_groups; ++group_id) + splitted_blocks[group_id] = block.cloneEmpty(); + + size_t columns_in_block = block.columns(); + for (size_t col_idx_in_block = 0; col_idx_in_block < columns_in_block; ++col_idx_in_block) + { + MutableColumns splitted_columns = block.getByPosition(col_idx_in_block).column->scatter(num_groups, selector); + for (size_t group_id = 0; group_id < num_groups; ++group_id) + splitted_blocks[group_id].getByPosition(col_idx_in_block).column = std::move(splitted_columns[group_id]); + } + + for (size_t group = 0; group < num_groups; ++group) + { + auto & splitted_block = splitted_blocks[group]; + splitted_block.info = block.info; + grouping_sets[group].reordering_key_columns_actions->execute(splitted_block); + grouping_sets[group].bucket_to_blocks[block.info.bucket_num].emplace_back(std::move(splitted_block)); + } } void MergingAggregatedTransform::consume(Chunk chunk) @@ -46,7 +230,7 @@ void MergingAggregatedTransform::consume(Chunk chunk) block.info.is_overflows = agg_info->is_overflows; block.info.bucket_num = agg_info->bucket_num; - bucket_to_blocks[agg_info->bucket_num].emplace_back(std::move(block)); + addBlock(std::move(block)); } else if (chunk.getChunkInfos().get()) { @@ -54,7 +238,7 @@ void MergingAggregatedTransform::consume(Chunk chunk) block.info.is_overflows = false; block.info.bucket_num = -1; - bucket_to_blocks[block.info.bucket_num].emplace_back(std::move(block)); + addBlock(std::move(block)); } else throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk should have AggregatedChunkInfo in MergingAggregatedTransform."); @@ -70,9 +254,23 @@ Chunk MergingAggregatedTransform::generate() /// Exception safety. Make iterator valid in case any method below throws. next_block = blocks.begin(); - /// TODO: this operation can be made async. Add async for IAccumulatingTransform. - params->aggregator.mergeBlocks(std::move(bucket_to_blocks), data_variants, max_threads, is_cancelled); - blocks = params->aggregator.convertToBlocks(data_variants, params->final, max_threads); + for (auto & grouping_set : grouping_sets) + { + auto & params = grouping_set.params; + auto & bucket_to_blocks = grouping_set.bucket_to_blocks; + AggregatedDataVariants data_variants; + + /// TODO: this operation can be made async. Add async for IAccumulatingTransform. + params->aggregator.mergeBlocks(std::move(bucket_to_blocks), data_variants, max_threads, is_cancelled); + auto merged_blocks = params->aggregator.convertToBlocks(data_variants, params->final, max_threads); + + if (grouping_set.creating_missing_keys_actions) + for (auto & block : merged_blocks) + grouping_set.creating_missing_keys_actions->execute(block); + + blocks.splice(blocks.end(), std::move(merged_blocks)); + } + next_block = blocks.begin(); } diff --git a/src/Processors/Transforms/MergingAggregatedTransform.h b/src/Processors/Transforms/MergingAggregatedTransform.h index ade76b2f304..3a043ad74b8 100644 --- a/src/Processors/Transforms/MergingAggregatedTransform.h +++ b/src/Processors/Transforms/MergingAggregatedTransform.h @@ -6,26 +6,46 @@ namespace DB { +class ExpressionActions; +using ExpressionActionsPtr = std::shared_ptr; + /** A pre-aggregate stream of blocks in which each block is already aggregated. * Aggregate functions in blocks should not be finalized so that their states can be merged. */ class MergingAggregatedTransform : public IAccumulatingTransform { public: - MergingAggregatedTransform(Block header_, AggregatingTransformParamsPtr params_, size_t max_threads_); + MergingAggregatedTransform( + Block header_, + Aggregator::Params params_, + bool final_, + GroupingSetsParamsList grouping_sets_params, + size_t max_threads_); + + ~MergingAggregatedTransform() override; + String getName() const override { return "MergingAggregatedTransform"; } + static Block appendGroupingIfNeeded(const Block & in_header, Block out_header); + protected: void consume(Chunk chunk) override; Chunk generate() override; private: - AggregatingTransformParamsPtr params; LoggerPtr log = getLogger("MergingAggregatedTransform"); size_t max_threads; - AggregatedDataVariants data_variants; - Aggregator::BucketToBlocks bucket_to_blocks; + struct GroupingSet + { + Aggregator::BucketToBlocks bucket_to_blocks; + ExpressionActionsPtr reordering_key_columns_actions; + ExpressionActionsPtr creating_missing_keys_actions; + AggregatingTransformParamsPtr params; + }; + + using GroupingSets = std::vector; + GroupingSets grouping_sets; UInt64 total_input_rows = 0; UInt64 total_input_blocks = 0; @@ -35,6 +55,8 @@ private: bool consume_started = false; bool generate_started = false; + + void addBlock(Block block); }; } diff --git a/tests/queries/0_stateless/02165_replicated_grouping_sets.reference b/tests/queries/0_stateless/02165_replicated_grouping_sets.reference index 659cd98368d..31cbf2ad670 100644 --- a/tests/queries/0_stateless/02165_replicated_grouping_sets.reference +++ b/tests/queries/0_stateless/02165_replicated_grouping_sets.reference @@ -11,3 +11,215 @@ 0 6 4 1 10 4 2 14 4 +-- { echo On } + +SELECT count(), arrayMap(x -> '.', range(number % 10)) AS k FROM remote('127.0.0.{1,2}', numbers(10)) where number > ( queryID() = initialQueryID()) GROUP BY GROUPING SETS ((k)) ORDER BY k settings group_by_two_level_threshold=9, max_bytes_before_external_group_by=10000000000; +1 ['.'] +2 ['.','.'] +2 ['.','.','.'] +2 ['.','.','.','.'] +2 ['.','.','.','.','.'] +2 ['.','.','.','.','.','.'] +2 ['.','.','.','.','.','.','.'] +2 ['.','.','.','.','.','.','.','.'] +2 ['.','.','.','.','.','.','.','.','.'] +SELECT count(), arrayMap(x -> '.', range(number % 10)) AS k FROM remote('127.0.0.{1,2}', numbers(10)) where number > ( queryID() = initialQueryID()) GROUP BY GROUPING SETS ((k), (k, k)) ORDER BY k settings group_by_two_level_threshold=9, max_bytes_before_external_group_by=10000000000; +1 ['.'] +1 ['.'] +2 ['.','.'] +2 ['.','.'] +2 ['.','.','.'] +2 ['.','.','.'] +2 ['.','.','.','.'] +2 ['.','.','.','.'] +2 ['.','.','.','.','.'] +2 ['.','.','.','.','.'] +2 ['.','.','.','.','.','.'] +2 ['.','.','.','.','.','.'] +2 ['.','.','.','.','.','.','.'] +2 ['.','.','.','.','.','.','.'] +2 ['.','.','.','.','.','.','.','.'] +2 ['.','.','.','.','.','.','.','.'] +2 ['.','.','.','.','.','.','.','.','.'] +2 ['.','.','.','.','.','.','.','.','.'] +SELECT count(), toString(number) AS k FROM remote('127.0.0.{1,2}', numbers(10)) where number > ( queryID() = initialQueryID()) GROUP BY GROUPING SETS ((k)) ORDER BY k settings group_by_two_level_threshold=9, max_bytes_before_external_group_by=10000000000; +1 1 +2 2 +2 3 +2 4 +2 5 +2 6 +2 7 +2 8 +2 9 +SELECT count(), toString(number) AS k FROM remote('127.0.0.{1,2}', numbers(10)) where number > ( queryID() = initialQueryID()) GROUP BY GROUPING SETS ((k), (k, k)) ORDER BY k settings group_by_two_level_threshold=9, max_bytes_before_external_group_by=10000000000; +1 1 +1 1 +2 2 +2 2 +2 3 +2 3 +2 4 +2 4 +2 5 +2 5 +2 6 +2 6 +2 7 +2 7 +2 8 +2 8 +2 9 +2 9 +SELECT count(), toString(number) AS k FROM remote('127.0.0.{1,2}', numbers(10)) where number > ( queryID() = initialQueryID()) GROUP BY GROUPING SETS ((k), (number + 1, k)) ORDER BY k settings group_by_two_level_threshold=9, max_bytes_before_external_group_by=10000000000; +1 1 +1 1 +2 2 +2 2 +2 3 +2 3 +2 4 +2 4 +2 5 +2 5 +2 6 +2 6 +2 7 +2 7 +2 8 +2 8 +2 9 +2 9 +SELECT count(), toString(number) AS k FROM remote('127.0.0.{1,2}', numbers(10)) where number > ( queryID() = initialQueryID()) GROUP BY GROUPING SETS ((k), (number + 1, k), (k, number + 2)) ORDER BY k settings group_by_two_level_threshold=9, max_bytes_before_external_group_by=10000000000; +1 1 +1 1 +1 1 +2 2 +2 2 +2 2 +2 3 +2 3 +2 3 +2 4 +2 4 +2 4 +2 5 +2 5 +2 5 +2 6 +2 6 +2 6 +2 7 +2 7 +2 7 +2 8 +2 8 +2 8 +2 9 +2 9 +2 9 +SELECT count(), arrayMap(x -> '.', range(number % 10)) AS k FROM remote('127.0.0.{3,2}', numbers(10)) where number > ( queryID() = initialQueryID()) GROUP BY GROUPING SETS ((k)) ORDER BY k settings group_by_two_level_threshold=9, max_bytes_before_external_group_by=10000000000; +2 ['.'] +2 ['.','.'] +2 ['.','.','.'] +2 ['.','.','.','.'] +2 ['.','.','.','.','.'] +2 ['.','.','.','.','.','.'] +2 ['.','.','.','.','.','.','.'] +2 ['.','.','.','.','.','.','.','.'] +2 ['.','.','.','.','.','.','.','.','.'] +SELECT count(), arrayMap(x -> '.', range(number % 10)) AS k FROM remote('127.0.0.{3,2}', numbers(10)) where number > ( queryID() = initialQueryID()) GROUP BY GROUPING SETS ((k), (k, k)) ORDER BY k settings group_by_two_level_threshold=9, max_bytes_before_external_group_by=10000000000; +2 ['.'] +2 ['.'] +2 ['.','.'] +2 ['.','.'] +2 ['.','.','.'] +2 ['.','.','.'] +2 ['.','.','.','.'] +2 ['.','.','.','.'] +2 ['.','.','.','.','.'] +2 ['.','.','.','.','.'] +2 ['.','.','.','.','.','.'] +2 ['.','.','.','.','.','.'] +2 ['.','.','.','.','.','.','.'] +2 ['.','.','.','.','.','.','.'] +2 ['.','.','.','.','.','.','.','.'] +2 ['.','.','.','.','.','.','.','.'] +2 ['.','.','.','.','.','.','.','.','.'] +2 ['.','.','.','.','.','.','.','.','.'] +SELECT count(), toString(number) AS k FROM remote('127.0.0.{3,2}', numbers(10)) where number > ( queryID() = initialQueryID()) GROUP BY GROUPING SETS ((k)) ORDER BY k settings group_by_two_level_threshold=9, max_bytes_before_external_group_by=10000000000; +2 1 +2 2 +2 3 +2 4 +2 5 +2 6 +2 7 +2 8 +2 9 +SELECT count(), toString(number) AS k FROM remote('127.0.0.{3,2}', numbers(10)) where number > ( queryID() = initialQueryID()) GROUP BY GROUPING SETS ((k), (k, k)) ORDER BY k settings group_by_two_level_threshold=9, max_bytes_before_external_group_by=10000000000; +2 1 +2 1 +2 2 +2 2 +2 3 +2 3 +2 4 +2 4 +2 5 +2 5 +2 6 +2 6 +2 7 +2 7 +2 8 +2 8 +2 9 +2 9 +SELECT count(), toString(number) AS k FROM remote('127.0.0.{3,2}', numbers(10)) where number > ( queryID() = initialQueryID()) GROUP BY GROUPING SETS ((k), (number + 1, k)) ORDER BY k settings group_by_two_level_threshold=9, max_bytes_before_external_group_by=10000000000; +2 1 +2 1 +2 2 +2 2 +2 3 +2 3 +2 4 +2 4 +2 5 +2 5 +2 6 +2 6 +2 7 +2 7 +2 8 +2 8 +2 9 +2 9 +SELECT count(), toString(number) AS k FROM remote('127.0.0.{3,2}', numbers(10)) where number > ( queryID() = initialQueryID()) GROUP BY GROUPING SETS ((k), (number + 1, k), (k, number + 2)) ORDER BY k settings group_by_two_level_threshold=9, max_bytes_before_external_group_by=10000000000; +2 1 +2 1 +2 1 +2 2 +2 2 +2 2 +2 3 +2 3 +2 3 +2 4 +2 4 +2 4 +2 5 +2 5 +2 5 +2 6 +2 6 +2 6 +2 7 +2 7 +2 7 +2 8 +2 8 +2 8 +2 9 +2 9 +2 9 diff --git a/tests/queries/0_stateless/02165_replicated_grouping_sets.sql b/tests/queries/0_stateless/02165_replicated_grouping_sets.sql index d92d92c3e72..47d4446f348 100644 --- a/tests/queries/0_stateless/02165_replicated_grouping_sets.sql +++ b/tests/queries/0_stateless/02165_replicated_grouping_sets.sql @@ -43,3 +43,23 @@ GROUP BY ORDER BY sum_value ASC, count_value ASC; + +set prefer_localhost_replica = 1; + +-- { echo On } + +SELECT count(), arrayMap(x -> '.', range(number % 10)) AS k FROM remote('127.0.0.{1,2}', numbers(10)) where number > ( queryID() = initialQueryID()) GROUP BY GROUPING SETS ((k)) ORDER BY k settings group_by_two_level_threshold=9, max_bytes_before_external_group_by=10000000000; +SELECT count(), arrayMap(x -> '.', range(number % 10)) AS k FROM remote('127.0.0.{1,2}', numbers(10)) where number > ( queryID() = initialQueryID()) GROUP BY GROUPING SETS ((k), (k, k)) ORDER BY k settings group_by_two_level_threshold=9, max_bytes_before_external_group_by=10000000000; + +SELECT count(), toString(number) AS k FROM remote('127.0.0.{1,2}', numbers(10)) where number > ( queryID() = initialQueryID()) GROUP BY GROUPING SETS ((k)) ORDER BY k settings group_by_two_level_threshold=9, max_bytes_before_external_group_by=10000000000; +SELECT count(), toString(number) AS k FROM remote('127.0.0.{1,2}', numbers(10)) where number > ( queryID() = initialQueryID()) GROUP BY GROUPING SETS ((k), (k, k)) ORDER BY k settings group_by_two_level_threshold=9, max_bytes_before_external_group_by=10000000000; +SELECT count(), toString(number) AS k FROM remote('127.0.0.{1,2}', numbers(10)) where number > ( queryID() = initialQueryID()) GROUP BY GROUPING SETS ((k), (number + 1, k)) ORDER BY k settings group_by_two_level_threshold=9, max_bytes_before_external_group_by=10000000000; +SELECT count(), toString(number) AS k FROM remote('127.0.0.{1,2}', numbers(10)) where number > ( queryID() = initialQueryID()) GROUP BY GROUPING SETS ((k), (number + 1, k), (k, number + 2)) ORDER BY k settings group_by_two_level_threshold=9, max_bytes_before_external_group_by=10000000000; + +SELECT count(), arrayMap(x -> '.', range(number % 10)) AS k FROM remote('127.0.0.{3,2}', numbers(10)) where number > ( queryID() = initialQueryID()) GROUP BY GROUPING SETS ((k)) ORDER BY k settings group_by_two_level_threshold=9, max_bytes_before_external_group_by=10000000000; +SELECT count(), arrayMap(x -> '.', range(number % 10)) AS k FROM remote('127.0.0.{3,2}', numbers(10)) where number > ( queryID() = initialQueryID()) GROUP BY GROUPING SETS ((k), (k, k)) ORDER BY k settings group_by_two_level_threshold=9, max_bytes_before_external_group_by=10000000000; + +SELECT count(), toString(number) AS k FROM remote('127.0.0.{3,2}', numbers(10)) where number > ( queryID() = initialQueryID()) GROUP BY GROUPING SETS ((k)) ORDER BY k settings group_by_two_level_threshold=9, max_bytes_before_external_group_by=10000000000; +SELECT count(), toString(number) AS k FROM remote('127.0.0.{3,2}', numbers(10)) where number > ( queryID() = initialQueryID()) GROUP BY GROUPING SETS ((k), (k, k)) ORDER BY k settings group_by_two_level_threshold=9, max_bytes_before_external_group_by=10000000000; +SELECT count(), toString(number) AS k FROM remote('127.0.0.{3,2}', numbers(10)) where number > ( queryID() = initialQueryID()) GROUP BY GROUPING SETS ((k), (number + 1, k)) ORDER BY k settings group_by_two_level_threshold=9, max_bytes_before_external_group_by=10000000000; +SELECT count(), toString(number) AS k FROM remote('127.0.0.{3,2}', numbers(10)) where number > ( queryID() = initialQueryID()) GROUP BY GROUPING SETS ((k), (number + 1, k), (k, number + 2)) ORDER BY k settings group_by_two_level_threshold=9, max_bytes_before_external_group_by=10000000000; From 95d1675337c57abd6d8e455cf50fef96f2898cd0 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Tue, 3 Sep 2024 18:13:32 +0200 Subject: [PATCH 109/121] Try to fix build --- src/Processors/QueryPlan/AggregatingStep.cpp | 18 +++++++++--------- src/Processors/QueryPlan/AggregatingStep.h | 2 +- .../Transforms/MergingAggregatedTransform.cpp | 8 ++++---- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/Processors/QueryPlan/AggregatingStep.cpp b/src/Processors/QueryPlan/AggregatingStep.cpp index 7450e7be035..a9d9902b051 100644 --- a/src/Processors/QueryPlan/AggregatingStep.cpp +++ b/src/Processors/QueryPlan/AggregatingStep.cpp @@ -153,7 +153,7 @@ void AggregatingStep::applyOrder(SortDescription sort_description_for_merging_, explicit_sorting_required_for_aggregation_in_order = false; } -ActionsDAG AggregatingStep::makeCreatingMissingKeysForGroupingSetDAG( +ActionsDAGPtr AggregatingStep::makeCreatingMissingKeysForGroupingSetDAG( const Block & in_header, const Block & out_header, const GroupingSetsParamsList & grouping_sets_params, @@ -161,15 +161,15 @@ ActionsDAG AggregatingStep::makeCreatingMissingKeysForGroupingSetDAG( bool group_by_use_nulls) { /// Here we create a DAG which fills missing keys and adds `__grouping_set` column - ActionsDAG dag(in_header.getColumnsWithTypeAndName()); + auto dag = std::make_shared(in_header.getColumnsWithTypeAndName()); ActionsDAG::NodeRawConstPtrs outputs; outputs.reserve(out_header.columns() + 1); auto grouping_col = ColumnConst::create(ColumnUInt64::create(1, group), 0); - const auto * grouping_node = &dag.addColumn( + const auto * grouping_node = &dag->addColumn( {ColumnPtr(std::move(grouping_col)), std::make_shared(), "__grouping_set"}); - grouping_node = &dag.materializeNode(*grouping_node); + grouping_node = &dag->materializeNode(*grouping_node); outputs.push_back(grouping_node); const auto & missing_columns = grouping_sets_params[group].missing_keys; @@ -190,21 +190,21 @@ ActionsDAG AggregatingStep::makeCreatingMissingKeysForGroupingSetDAG( column_with_default->finalize(); auto column = ColumnConst::create(std::move(column_with_default), 0); - const auto * node = &dag.addColumn({ColumnPtr(std::move(column)), col.type, col.name}); - node = &dag.materializeNode(*node); + const auto * node = &dag->addColumn({ColumnPtr(std::move(column)), col.type, col.name}); + node = &dag->materializeNode(*node); outputs.push_back(node); } else { - const auto * column_node = dag.getOutputs()[in_header.getPositionByName(col.name)]; + const auto * column_node = dag->getOutputs()[in_header.getPositionByName(col.name)]; if (used_it != used_keys.end() && group_by_use_nulls && column_node->result_type->canBeInsideNullable()) - outputs.push_back(&dag.addFunction(to_nullable_function, { column_node }, col.name)); + outputs.push_back(&dag->addFunction(to_nullable_function, { column_node }, col.name)); else outputs.push_back(column_node); } } - dag.getOutputs().swap(outputs); + dag->getOutputs().swap(outputs); return dag; } diff --git a/src/Processors/QueryPlan/AggregatingStep.h b/src/Processors/QueryPlan/AggregatingStep.h index 4e4078047f1..60b25a1c06e 100644 --- a/src/Processors/QueryPlan/AggregatingStep.h +++ b/src/Processors/QueryPlan/AggregatingStep.h @@ -65,7 +65,7 @@ public: /// Argument input_stream would be the second input (from projection). std::unique_ptr convertToAggregatingProjection(const DataStream & input_stream) const; - static ActionsDAG makeCreatingMissingKeysForGroupingSetDAG( + static ActionsDAGPtr makeCreatingMissingKeysForGroupingSetDAG( const Block & in_header, const Block & out_header, const GroupingSetsParamsList & grouping_sets_params, diff --git a/src/Processors/Transforms/MergingAggregatedTransform.cpp b/src/Processors/Transforms/MergingAggregatedTransform.cpp index 9b76acb8081..c4f0952dde7 100644 --- a/src/Processors/Transforms/MergingAggregatedTransform.cpp +++ b/src/Processors/Transforms/MergingAggregatedTransform.cpp @@ -29,10 +29,10 @@ Block MergingAggregatedTransform::appendGroupingIfNeeded(const Block & in_header /// Initiator creates a separate Aggregator for every group, so should we do here. /// Otherwise, two-level aggregation will split the data into different buckets, /// and the result may have duplicating rows. -static ActionsDAG makeReorderingActions(const Block & in_header, const GroupingSetsParams & params) +static ActionsDAGPtr makeReorderingActions(const Block & in_header, const GroupingSetsParams & params) { - ActionsDAG reordering(in_header.getColumnsWithTypeAndName()); - auto & outputs = reordering.getOutputs(); + auto reordering = std::make_shared(in_header.getColumnsWithTypeAndName()); + auto & outputs = reordering->getOutputs(); ActionsDAG::NodeRawConstPtrs new_outputs; new_outputs.reserve(in_header.columns() + params.used_keys.size() - params.used_keys.size()); @@ -97,7 +97,7 @@ MergingAggregatedTransform::MergingAggregatedTransform( params.max_block_size, params.min_hit_rate_to_use_consecutive_keys_optimization); - auto transform_params = std::make_shared(reordering.updateHeader(in_header), std::move(set_params), final); + auto transform_params = std::make_shared(reordering->updateHeader(in_header), std::move(set_params), final); auto creating = AggregatingStep::makeCreatingMissingKeysForGroupingSetDAG( transform_params->getHeader(), From 76dd2bacf3040f4fdb9d7087351ebf38f9c3bb34 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 3 Sep 2024 17:11:23 +0000 Subject: [PATCH 110/121] Backport #69193 to 24.7: Disable memory test with sanitizer --- .../0_stateless/01541_max_memory_usage_for_user_long.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01541_max_memory_usage_for_user_long.sh b/tests/queries/0_stateless/01541_max_memory_usage_for_user_long.sh index 9f0699929f8..4309220811b 100755 --- a/tests/queries/0_stateless/01541_max_memory_usage_for_user_long.sh +++ b/tests/queries/0_stateless/01541_max_memory_usage_for_user_long.sh @@ -1,5 +1,6 @@ #!/usr/bin/env bash -# Tags: long, no-replicated-database, no-parallel, no-fasttest +# Tags: long, no-replicated-database, no-parallel, no-fasttest, no-tsan, no-asan, no-msan, no-ubsan +# no sanitizers -- memory consumption is unpredicatable with sanitizers CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From a7f2c8c855c6621dbba44241399d7e023e9923dd Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 4 Sep 2024 11:07:43 +0000 Subject: [PATCH 111/121] Backport #69146 to 24.7: Fix: parallel replicas duplicate announcement request --- src/Planner/Planner.cpp | 3 +- .../03231_pr_duplicate_announcement.reference | 2 + .../03231_pr_duplicate_announcement.sql | 23 ++++++++++ ...3231_pr_duplicate_announcement_2.reference | 1 + .../03231_pr_duplicate_announcement_2.sql | 45 +++++++++++++++++++ 5 files changed, 73 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/03231_pr_duplicate_announcement.reference create mode 100644 tests/queries/0_stateless/03231_pr_duplicate_announcement.sql create mode 100644 tests/queries/0_stateless/03231_pr_duplicate_announcement_2.reference create mode 100644 tests/queries/0_stateless/03231_pr_duplicate_announcement_2.sql diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index c364a38e5d6..fc63a14f58b 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -1299,7 +1299,8 @@ void Planner::buildPlanForUnionNode() for (const auto & query_node : union_queries_nodes) { - Planner query_planner(query_node, select_query_options); + Planner query_planner(query_node, select_query_options, planner_context->getGlobalPlannerContext()); + query_planner.buildQueryPlanIfNeeded(); for (const auto & row_policy : query_planner.getUsedRowPolicies()) used_row_policies.insert(row_policy); diff --git a/tests/queries/0_stateless/03231_pr_duplicate_announcement.reference b/tests/queries/0_stateless/03231_pr_duplicate_announcement.reference new file mode 100644 index 00000000000..b32da0d591a --- /dev/null +++ b/tests/queries/0_stateless/03231_pr_duplicate_announcement.reference @@ -0,0 +1,2 @@ +0 Value_0 +1 Value_1 diff --git a/tests/queries/0_stateless/03231_pr_duplicate_announcement.sql b/tests/queries/0_stateless/03231_pr_duplicate_announcement.sql new file mode 100644 index 00000000000..cd1fc277fe1 --- /dev/null +++ b/tests/queries/0_stateless/03231_pr_duplicate_announcement.sql @@ -0,0 +1,23 @@ +DROP TABLE IF EXISTS test_table SYNC; +CREATE TABLE test_table +( + id UInt64, + value String +) ENGINE=ReplicatedMergeTree('/clickhouse/test/{database}/test_table', 'r1') ORDER BY tuple(); + +INSERT INTO test_table VALUES (0, 'Value_0'), (1, 'Value_1'), (2, 'Value_2'); + +DROP TABLE IF EXISTS test_table_for_in SYNC; +CREATE TABLE test_table_for_in +( + id UInt64 +) ENGINE=ReplicatedMergeTree('/clickhouse/test/{database}/test_table_for_in', 'r1') ORDER BY tuple(); + +INSERT INTO test_table_for_in VALUES (0), (1); + +SET allow_experimental_parallel_reading_from_replicas=1, max_parallel_replicas=3, cluster_for_parallel_replicas='test_cluster_one_shard_three_replicas_localhost'; + +SELECT id, value FROM test_table WHERE id IN (SELECT id FROM test_table_for_in UNION DISTINCT SELECT id FROM test_table_for_in); + +DROP TABLE test_table SYNC; +DROP TABLE test_table_for_in SYNC; diff --git a/tests/queries/0_stateless/03231_pr_duplicate_announcement_2.reference b/tests/queries/0_stateless/03231_pr_duplicate_announcement_2.reference new file mode 100644 index 00000000000..38f7ecf84a4 --- /dev/null +++ b/tests/queries/0_stateless/03231_pr_duplicate_announcement_2.reference @@ -0,0 +1 @@ +CAT 2 diff --git a/tests/queries/0_stateless/03231_pr_duplicate_announcement_2.sql b/tests/queries/0_stateless/03231_pr_duplicate_announcement_2.sql new file mode 100644 index 00000000000..eec1443ad2a --- /dev/null +++ b/tests/queries/0_stateless/03231_pr_duplicate_announcement_2.sql @@ -0,0 +1,45 @@ +DROP TABLE IF EXISTS ANIMAL SYNC; + +CREATE TABLE ANIMAL ( ANIMAL Nullable(String) ) ENGINE = ReplicatedMergeTree('/clickhouse/test/{database}/animal', 'r1') ORDER BY tuple(); + +INSERT INTO ANIMAL (ANIMAL) VALUES ('CAT'), ('FISH'), ('DOG'), ('HORSE'), ('BIRD'); + +SET joined_subquery_requires_alias = 0; +SET allow_experimental_parallel_reading_from_replicas=1, max_parallel_replicas=3, cluster_for_parallel_replicas='test_cluster_one_shard_three_replicas_localhost'; + +SELECT * +FROM +( + SELECT + x.b AS x, + countDistinct(x.c) AS ANIMAL + FROM + ( + SELECT + a.ANIMAL AS a, + 'CAT' AS b, + c.ANIMAL AS c, + d.ANIMAL AS d + FROM ANIMAL AS a + INNER JOIN ANIMAL AS b ON a.ANIMAL = b.ANIMAL + LEFT JOIN ANIMAL AS c ON b.ANIMAL = c.ANIMAL + RIGHT JOIN + ( + SELECT * + FROM ANIMAL + UNION ALL + SELECT * + FROM ANIMAL + UNION ALL + SELECT * + FROM ANIMAL + ) AS d ON a.ANIMAL = d.ANIMAL + WHERE (d.ANIMAL != 'CAT') AND (c.ANIMAL != 'DOG') AND (b.ANIMAL != 'FISH') + ) AS x + WHERE x.b >= 'CAT' + GROUP BY x.b + HAVING ANIMAL >= 0 +) AS ANIMAL +WHERE ANIMAL.ANIMAL >= 0; + +DROP TABLE ANIMAL SYNC; From 05bc309a0e21adc69a5b192761a85679dc681010 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 6 Sep 2024 16:09:57 +0000 Subject: [PATCH 112/121] Update autogenerated version to 24.7.6.8 and contributors --- cmake/autogenerated_versions.txt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index 35156c84b13..9b4791ffc7f 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -2,11 +2,11 @@ # NOTE: VERSION_REVISION has nothing common with DBMS_TCP_PROTOCOL_VERSION, # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes. -SET(VERSION_REVISION 54491) +SET(VERSION_REVISION 54492) SET(VERSION_MAJOR 24) SET(VERSION_MINOR 7) -SET(VERSION_PATCH 6) -SET(VERSION_GITHASH f2533ca97be67770ab54581a85016d6428c11c7c) -SET(VERSION_DESCRIBE v24.7.6.1-stable) -SET(VERSION_STRING 24.7.6.1) +SET(VERSION_PATCH 7) +SET(VERSION_GITHASH 7779883593a9f4e2e0a6a4bd43f712bdb457065d) +SET(VERSION_DESCRIBE v24.7.7.1-stable) +SET(VERSION_STRING 24.7.7.1) # end of autochange From 5392af616b1db4f85fe1244e61713ecd2ca514ec Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 6 Sep 2024 20:11:46 +0000 Subject: [PATCH 113/121] Backport #69264 to 24.7: Fix: Not-ready Set with parallel replicas --- src/Planner/Planner.cpp | 1 + .../QueryPlan/Optimizations/Optimizations.h | 2 +- .../QueryPlanOptimizationSettings.h | 2 ++ .../QueryPlan/Optimizations/optimizeTree.cpp | 2 +- src/Processors/QueryPlan/QueryPlan.cpp | 3 ++- tests/ci/ci_config.py | 4 +++- .../03232_pr_not_ready_set.reference | 0 .../0_stateless/03232_pr_not_ready_set.sql | 18 ++++++++++++++++++ 8 files changed, 28 insertions(+), 4 deletions(-) create mode 100644 tests/queries/0_stateless/03232_pr_not_ready_set.reference create mode 100644 tests/queries/0_stateless/03232_pr_not_ready_set.sql diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index fc63a14f58b..7baefccd38b 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -199,6 +199,7 @@ FiltersForTableExpressionMap collectFiltersForAnalysis(const QueryTreeNodePtr & auto & result_query_plan = planner.getQueryPlan(); auto optimization_settings = QueryPlanOptimizationSettings::fromContext(query_context); + optimization_settings.build_sets = false; // no need to build sets to collect filters result_query_plan.optimize(optimization_settings); FiltersForTableExpressionMap res; diff --git a/src/Processors/QueryPlan/Optimizations/Optimizations.h b/src/Processors/QueryPlan/Optimizations/Optimizations.h index c48bdf1552a..43f07ced696 100644 --- a/src/Processors/QueryPlan/Optimizations/Optimizations.h +++ b/src/Processors/QueryPlan/Optimizations/Optimizations.h @@ -16,7 +16,7 @@ void optimizeTreeFirstPass(const QueryPlanOptimizationSettings & settings, Query void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_settings, QueryPlan::Node & root, QueryPlan::Nodes & nodes); /// Third pass is used to apply filters such as key conditions and skip indexes to the storages that support them. /// After that it add CreateSetsStep for the subqueries that has not be used in the filters. -void optimizeTreeThirdPass(QueryPlan & plan, QueryPlan::Node & root, QueryPlan::Nodes & nodes); +void addStepsToBuildSets(QueryPlan & plan, QueryPlan::Node & root, QueryPlan::Nodes & nodes); /// Optimization (first pass) is a function applied to QueryPlan::Node. /// It can read and update subtree of specified node. diff --git a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h index 539ff2eafbb..a2b22495800 100644 --- a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h +++ b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h @@ -75,6 +75,8 @@ struct QueryPlanOptimizationSettings String force_projection_name; bool optimize_use_implicit_projections = false; + bool build_sets = true; + static QueryPlanOptimizationSettings fromSettings(const Settings & from); static QueryPlanOptimizationSettings fromContext(ContextPtr from); }; diff --git a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp index 25895788e2e..f8504d84d12 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp @@ -216,7 +216,7 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s optimization_settings.force_projection_name); } -void optimizeTreeThirdPass(QueryPlan & plan, QueryPlan::Node & root, QueryPlan::Nodes & nodes) +void addStepsToBuildSets(QueryPlan & plan, QueryPlan::Node & root, QueryPlan::Nodes & nodes) { Stack stack; stack.push_back({.node = &root}); diff --git a/src/Processors/QueryPlan/QueryPlan.cpp b/src/Processors/QueryPlan/QueryPlan.cpp index b78f7a29cde..9a39df26241 100644 --- a/src/Processors/QueryPlan/QueryPlan.cpp +++ b/src/Processors/QueryPlan/QueryPlan.cpp @@ -504,7 +504,8 @@ void QueryPlan::optimize(const QueryPlanOptimizationSettings & optimization_sett QueryPlanOptimizations::optimizeTreeFirstPass(optimization_settings, *root, nodes); QueryPlanOptimizations::optimizeTreeSecondPass(optimization_settings, *root, nodes); - QueryPlanOptimizations::optimizeTreeThirdPass(*this, *root, nodes); + if (optimization_settings.build_sets) + QueryPlanOptimizations::addStepsToBuildSets(*this, *root, nodes); updateDataStreams(*root); } diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index a84d39cf191..dfd7730f113 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -397,7 +397,9 @@ class CI: required_builds=[BuildNames.PACKAGE_ASAN], num_batches=6 ), JobNames.INTEGRATION_TEST_TSAN: CommonJobConfigs.INTEGRATION_TEST.with_properties( - required_builds=[BuildNames.PACKAGE_TSAN], num_batches=6 + required_builds=[BuildNames.PACKAGE_TSAN], + num_batches=6, + timeout=9000, # the job timed out with default value (7200) ), JobNames.INTEGRATION_TEST_ARM: CommonJobConfigs.INTEGRATION_TEST.with_properties( required_builds=[BuildNames.PACKAGE_AARCH64], diff --git a/tests/queries/0_stateless/03232_pr_not_ready_set.reference b/tests/queries/0_stateless/03232_pr_not_ready_set.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03232_pr_not_ready_set.sql b/tests/queries/0_stateless/03232_pr_not_ready_set.sql new file mode 100644 index 00000000000..1a724085903 --- /dev/null +++ b/tests/queries/0_stateless/03232_pr_not_ready_set.sql @@ -0,0 +1,18 @@ +SELECT + is_initial_query, + count() AS c, + replaceRegexpAll(query, '_data_(\\d+)_(\\d+)', '_data_') AS query +FROM system.query_log +WHERE (event_date >= yesterday()) AND (type = 'QueryFinish') AND (ignore(54, 0, ignore('QueryFinish', 11, toLowCardinality(toLowCardinality(11)), 11, 11, 11), 'QueryFinish', materialize(11), toUInt128(11)) IN ( + SELECT query_id + FROM system.query_log + WHERE (current_database = currentDatabase()) AND (event_date >= yesterday()) AND (type = 'QueryFinish') AND (query LIKE '-- Parallel inner query alone%') +)) +GROUP BY + is_initial_query, + query +ORDER BY + is_initial_query ASC, + c ASC, + query ASC +SETTINGS allow_experimental_parallel_reading_from_replicas=1, max_parallel_replicas=3, cluster_for_parallel_replicas='test_cluster_one_shard_three_replicas_localhost', parallel_replicas_for_non_replicated_merge_tree=1, parallel_replicas_min_number_of_rows_per_replica=10; From 8c89ad00f37c29bfacfc69897c89bffd0afc5704 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 10 Sep 2024 14:10:36 +0000 Subject: [PATCH 114/121] Backport #68584 to 24.7: Avoid detached covered-by-broken part duplicates --- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 1 + .../test_covered_by_broken_exists/__init__.py | 0 .../test_covered_by_broken_exists/test.py | 103 ++++++++++++++++++ 3 files changed, 104 insertions(+) create mode 100644 tests/integration/test_covered_by_broken_exists/__init__.py create mode 100644 tests/integration/test_covered_by_broken_exists/test.py diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 71f37d01a5f..312f2b5151b 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -2031,6 +2031,7 @@ DataPartStoragePtr IMergeTreeDataPart::makeCloneInDetached(const String & prefix IDataPartStorage::ClonePartParams params { .copy_instead_of_hardlink = isStoredOnRemoteDiskWithZeroCopySupport() && storage.supportsReplication() && storage_settings->allow_remote_fs_zero_copy_replication, + .keep_metadata_version = prefix == "covered-by-broken", .make_source_readonly = true, .external_transaction = disk_transaction }; diff --git a/tests/integration/test_covered_by_broken_exists/__init__.py b/tests/integration/test_covered_by_broken_exists/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_covered_by_broken_exists/test.py b/tests/integration/test_covered_by_broken_exists/test.py new file mode 100644 index 00000000000..caa091fdd2d --- /dev/null +++ b/tests/integration/test_covered_by_broken_exists/test.py @@ -0,0 +1,103 @@ +import pytest +import logging +import time +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import TSV +from helpers.test_tools import assert_eq_with_retry + +cluster = ClickHouseCluster(__file__) +node1 = cluster.add_instance("node1", stay_alive=True, with_zookeeper=True) +node2 = cluster.add_instance("node2", with_zookeeper=True) + +instance = node1 +q = node1.query + +path_to_data = "/var/lib/clickhouse/" + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def wait_merged_part(table, part_name, retries=100): + q("OPTIMIZE TABLE {} FINAL".format(table)) + for i in range(retries): + result = q( + "SELECT name FROM system.parts where table='{}' AND name='{}'".format( + table, part_name + ) + ) + if result: + return True + time.sleep(0.5) + else: + return False + + +def test_make_clone_covered_by_broken_detached_dir_exists(started_cluster): + q("DROP TABLE IF EXISTS test_make_clone_cvbdde SYNC") + + q( + "CREATE TABLE test_make_clone_cvbdde(n int, m String) ENGINE=ReplicatedMergeTree('/test_make_clone_cvbdde', '1') ORDER BY n SETTINGS old_parts_lifetime=3600, min_age_to_force_merge_seconds=1, min_age_to_force_merge_on_partition_only=0" + ) + path = path_to_data + "data/default/test_make_clone_cvbdde/" + + q("INSERT INTO test_make_clone_cvbdde VALUES (0, 'hbl')") + + q("INSERT INTO test_make_clone_cvbdde VALUES (1, 'hbl')") + if not (wait_merged_part("test_make_clone_cvbdde", "all_0_1_1")): + assert False, "Part all_0_1_1 doesn't appeared in system.parts" + + q("INSERT INTO test_make_clone_cvbdde VALUES (2, 'hbl')") + if not (wait_merged_part("test_make_clone_cvbdde", "all_0_2_2")): + assert False, "Part all_0_2_2 doesn't appeared in system.parts" + + q("INSERT INTO test_make_clone_cvbdde VALUES (3, 'hbl')") + if not (wait_merged_part("test_make_clone_cvbdde", "all_0_3_3")): + assert False, "Part all_0_3_3 doesn't appeared in system.parts" + + res = str(instance.exec_in_container(["ls", path]).strip().split("\n")) + + # broke the merged parts + instance.exec_in_container( + [ + "bash", + "-c", + "echo 'broken' > {}".format(path + "all_0_1_1/data.bin"), + ] + ) + + instance.exec_in_container( + [ + "bash", + "-c", + "echo 'broken' > {}".format(path + "all_0_2_2/data.bin"), + ] + ) + + instance.exec_in_container( + [ + "bash", + "-c", + "echo 'broken' > {}".format(path + "all_0_3_3/data.bin"), + ] + ) + + instance.restart_clickhouse(kill=True) + + assert [ + "broken-on-start_all_0_1_1", + "broken-on-start_all_0_2_2", + "broken-on-start_all_0_3_3", + "covered-by-broken_all_0_0_0", + "covered-by-broken_all_1_1_0", + "covered-by-broken_all_2_2_0", + "covered-by-broken_all_3_3_0", + ] == sorted( + instance.exec_in_container(["ls", path + "detached/"]).strip().split("\n") + ) From df66ffc8a8b675d04099eae0e46569dafe43368f Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 10 Sep 2024 16:07:37 +0000 Subject: [PATCH 115/121] Backport #69390 to 24.7: Fix undefined behavior if all connection tries fail --- src/Client/ConnectionPoolWithFailover.cpp | 2 +- src/Client/ConnectionPoolWithFailover.h | 4 ++-- src/Client/HedgedConnectionsFactory.cpp | 2 +- src/Common/PoolWithFailoverBase.h | 14 ++++++++++++++ src/Core/Defines.h | 2 +- .../Distributed/DistributedAsyncInsertBatch.cpp | 10 ++-------- .../DistributedAsyncInsertDirectoryQueue.cpp | 5 +---- src/Storages/Distributed/DistributedSink.cpp | 5 +---- 8 files changed, 23 insertions(+), 21 deletions(-) diff --git a/src/Client/ConnectionPoolWithFailover.cpp b/src/Client/ConnectionPoolWithFailover.cpp index fb895d17763..a5c14dc9957 100644 --- a/src/Client/ConnectionPoolWithFailover.cpp +++ b/src/Client/ConnectionPoolWithFailover.cpp @@ -168,7 +168,7 @@ std::vector ConnectionPoolWithFailover::g { return tryGetEntry(pool, timeouts, fail_message, settings, &table_to_check, /*async_callback=*/ {}); }; return getManyImpl(settings, pool_mode, try_get_entry, - /*skip_unavailable_endpoints=*/ std::nullopt, + /*skip_unavailable_endpoints=*/ false, /// skip_unavailable_endpoints is used to get the min number of entries, and we need at least one /*priority_func=*/ {}, settings.distributed_insert_skip_read_only_replicas); } diff --git a/src/Client/ConnectionPoolWithFailover.h b/src/Client/ConnectionPoolWithFailover.h index a2dc188eb7d..6db52140854 100644 --- a/src/Client/ConnectionPoolWithFailover.h +++ b/src/Client/ConnectionPoolWithFailover.h @@ -42,7 +42,7 @@ public: size_t max_error_cap = DBMS_CONNECTION_POOL_WITH_FAILOVER_MAX_ERROR_COUNT); using Entry = IConnectionPool::Entry; - using PoolWithFailoverBase::isTryResultInvalid; + using PoolWithFailoverBase::getValidTryResult; /** Allocates connection to work. */ Entry get(const ConnectionTimeouts & timeouts) override; @@ -98,7 +98,7 @@ public: std::vector getShuffledPools(const Settings & settings, GetPriorityFunc priority_func = {}, bool use_slowdown_count = false); - size_t getMaxErrorCup() const { return Base::max_error_cap; } + size_t getMaxErrorCap() const { return Base::max_error_cap; } void updateSharedError(std::vector & shuffled_pools) { diff --git a/src/Client/HedgedConnectionsFactory.cpp b/src/Client/HedgedConnectionsFactory.cpp index be7397b0fad..df63a124539 100644 --- a/src/Client/HedgedConnectionsFactory.cpp +++ b/src/Client/HedgedConnectionsFactory.cpp @@ -327,7 +327,7 @@ HedgedConnectionsFactory::State HedgedConnectionsFactory::processFinishedConnect ShuffledPool & shuffled_pool = shuffled_pools[index]; LOG_INFO(log, "Connection failed at try №{}, reason: {}", (shuffled_pool.error_count + 1), fail_message); - shuffled_pool.error_count = std::min(pool->getMaxErrorCup(), shuffled_pool.error_count + 1); + shuffled_pool.error_count = std::min(pool->getMaxErrorCap(), shuffled_pool.error_count + 1); shuffled_pool.slowdown_count = 0; if (shuffled_pool.error_count >= max_tries) diff --git a/src/Common/PoolWithFailoverBase.h b/src/Common/PoolWithFailoverBase.h index c44ab7df53a..989ffd888f8 100644 --- a/src/Common/PoolWithFailoverBase.h +++ b/src/Common/PoolWithFailoverBase.h @@ -122,6 +122,20 @@ public: return result.entry.isNull() || !result.is_usable || (skip_read_only_replicas && result.is_readonly); } + TryResult getValidTryResult(const std::vector & results, bool skip_read_only_replicas) const + { + if (results.empty()) + throw DB::Exception(DB::ErrorCodes::ALL_CONNECTION_TRIES_FAILED, "Cannot get any valid connection because all connection tries failed"); + + auto result = results.front(); + if (isTryResultInvalid(result, skip_read_only_replicas)) + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, + "Got an invalid connection result: entry.isNull {}, is_usable {}, is_up_to_date {}, delay {}, is_readonly {}, skip_read_only_replicas {}", + result.entry.isNull(), result.is_usable, result.is_up_to_date, result.delay, result.is_readonly, skip_read_only_replicas); + + return result; + } + size_t getPoolSize() const { return nested_pools.size(); } protected: diff --git a/src/Core/Defines.h b/src/Core/Defines.h index 6df335a9c8f..3341669fed2 100644 --- a/src/Core/Defines.h +++ b/src/Core/Defines.h @@ -40,7 +40,7 @@ static constexpr auto SHOW_CHARS_ON_SYNTAX_ERROR = ptrdiff_t(160); /// each period reduces the error counter by 2 times /// too short a period can cause errors to disappear immediately after creation. static constexpr auto DBMS_CONNECTION_POOL_WITH_FAILOVER_DEFAULT_DECREASE_ERROR_PERIOD = 60; -/// replica error max cap, this is to prevent replica from accumulating too many errors and taking to long to recover. +/// replica error max cap, this is to prevent replica from accumulating too many errors and taking too long to recover. static constexpr auto DBMS_CONNECTION_POOL_WITH_FAILOVER_MAX_ERROR_COUNT = 1000; /// The boundary on which the blocks for asynchronous file operations should be aligned. diff --git a/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp b/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp index 2cf69b9f6b7..f1166b5544b 100644 --- a/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp +++ b/src/Storages/Distributed/DistributedAsyncInsertBatch.cpp @@ -243,10 +243,7 @@ void DistributedAsyncInsertBatch::sendBatch(const SettingsChanges & settings_cha auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(insert_settings); auto results = parent.pool->getManyCheckedForInsert(timeouts, insert_settings, PoolMode::GET_ONE, parent.storage.remote_storage.getQualifiedName()); - auto result = results.front(); - if (parent.pool->isTryResultInvalid(result, insert_settings.distributed_insert_skip_read_only_replicas)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Got an invalid connection result"); - + auto result = parent.pool->getValidTryResult(results, insert_settings.distributed_insert_skip_read_only_replicas); connection = std::move(result.entry); compression_expected = connection->getCompression() == Protocol::Compression::Enable; @@ -305,10 +302,7 @@ void DistributedAsyncInsertBatch::sendSeparateFiles(const SettingsChanges & sett auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(insert_settings); auto results = parent.pool->getManyCheckedForInsert(timeouts, insert_settings, PoolMode::GET_ONE, parent.storage.remote_storage.getQualifiedName()); - auto result = results.front(); - if (parent.pool->isTryResultInvalid(result, insert_settings.distributed_insert_skip_read_only_replicas)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Got an invalid connection result"); - + auto result = parent.pool->getValidTryResult(results, insert_settings.distributed_insert_skip_read_only_replicas); auto connection = std::move(result.entry); bool compression_expected = connection->getCompression() == Protocol::Compression::Enable; diff --git a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp index 876eff0021f..f86951de60a 100644 --- a/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp +++ b/src/Storages/Distributed/DistributedAsyncInsertDirectoryQueue.cpp @@ -413,10 +413,7 @@ void DistributedAsyncInsertDirectoryQueue::processFile(std::string & file_path, auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(insert_settings); auto results = pool->getManyCheckedForInsert(timeouts, insert_settings, PoolMode::GET_ONE, storage.remote_storage.getQualifiedName()); - auto result = results.front(); - if (pool->isTryResultInvalid(result, insert_settings.distributed_insert_skip_read_only_replicas)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Got an invalid connection result"); - + auto result = pool->getValidTryResult(results, insert_settings.distributed_insert_skip_read_only_replicas); auto connection = std::move(result.entry); LOG_DEBUG(log, "Sending `{}` to {} ({} rows, {} bytes)", diff --git a/src/Storages/Distributed/DistributedSink.cpp b/src/Storages/Distributed/DistributedSink.cpp index e0baefd5838..3c282cd6e6f 100644 --- a/src/Storages/Distributed/DistributedSink.cpp +++ b/src/Storages/Distributed/DistributedSink.cpp @@ -377,10 +377,7 @@ DistributedSink::runWritingJob(JobReplica & job, const Block & current_block, si /// NOTE: INSERT will also take into account max_replica_delay_for_distributed_queries /// (anyway fallback_to_stale_replicas_for_distributed_queries=true by default) auto results = shard_info.pool->getManyCheckedForInsert(timeouts, settings, PoolMode::GET_ONE, storage.remote_storage.getQualifiedName()); - auto result = results.front(); - if (shard_info.pool->isTryResultInvalid(result, settings.distributed_insert_skip_read_only_replicas)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Got an invalid connection result"); - + auto result = shard_info.pool->getValidTryResult(results, settings.distributed_insert_skip_read_only_replicas); job.connection_entry = std::move(result.entry); } else From 0c30d140ee37fed60dd475782401add54b71cb8b Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 10 Sep 2024 16:09:20 +0000 Subject: [PATCH 116/121] Backport #67748 to 24.7: Fix expiration in RoleCache --- src/Access/RoleCache.cpp | 2 +- tests/integration/test_role/test.py | 240 ++++++++++++++++++++++------ 2 files changed, 193 insertions(+), 49 deletions(-) diff --git a/src/Access/RoleCache.cpp b/src/Access/RoleCache.cpp index 2d94df2eea5..cc1f1520b67 100644 --- a/src/Access/RoleCache.cpp +++ b/src/Access/RoleCache.cpp @@ -120,7 +120,7 @@ void RoleCache::collectEnabledRoles(EnabledRoles & enabled_roles, SubscriptionsO SubscriptionsOnRoles new_subscriptions_on_roles; new_subscriptions_on_roles.reserve(subscriptions_on_roles.size()); - auto get_role_function = [this, &subscriptions_on_roles](const UUID & id) TSA_NO_THREAD_SAFETY_ANALYSIS { return getRole(id, subscriptions_on_roles); }; + auto get_role_function = [this, &new_subscriptions_on_roles](const UUID & id) TSA_NO_THREAD_SAFETY_ANALYSIS { return getRole(id, new_subscriptions_on_roles); }; for (const auto & current_role : enabled_roles.params.current_roles) collectRoles(*new_info, skip_ids, get_role_function, current_role, true, false); diff --git a/tests/integration/test_role/test.py b/tests/integration/test_role/test.py index b3b18dc8271..225cab975ff 100644 --- a/tests/integration/test_role/test.py +++ b/tests/integration/test_role/test.py @@ -1,5 +1,6 @@ import time import pytest +import random from helpers.client import QueryRuntimeException from helpers.cluster import ClickHouseCluster from helpers.test_tools import TSV @@ -418,72 +419,215 @@ def test_function_current_roles(): ) -def test_role_expiration(): - instance.query("CREATE USER ure") +@pytest.mark.parametrize("with_extra_role", [False, True]) +def test_role_expiration(with_extra_role): instance.query("CREATE ROLE rre") - instance.query("GRANT rre TO ure") + instance.query("CREATE USER ure DEFAULT ROLE rre") - instance.query("CREATE TABLE IF NOT EXISTS tre (id Int) Engine=Log") - instance.query("INSERT INTO tre VALUES (0)") + instance.query("CREATE TABLE table1 (id Int) Engine=Log") + instance.query("CREATE TABLE table2 (id Int) Engine=Log") + instance.query("INSERT INTO table1 VALUES (1)") + instance.query("INSERT INTO table2 VALUES (2)") + instance.query("GRANT SELECT ON table1 TO rre") + + assert instance.query("SELECT * FROM table1", user="ure") == "1\n" assert "Not enough privileges" in instance.query_and_get_error( - "SELECT * FROM tre", user="ure" + "SELECT * FROM table2", user="ure" ) - instance.query("GRANT SELECT ON tre TO rre") - - assert instance.query("SELECT * FROM tre", user="ure") == "0\n" - # access_control_improvements/role_cache_expiration_time_seconds value is 2 for the test # so we wait >2 seconds until the role is expired time.sleep(5) - instance.query("CREATE TABLE IF NOT EXISTS tre1 (id Int) Engine=Log") - instance.query("INSERT INTO tre1 VALUES (0)") - instance.query("GRANT SELECT ON tre1 TO rre") + if with_extra_role: + # Expiration of role "rre" from the role cache can be caused by another role being used. + instance.query("CREATE ROLE extra_role") + instance.query("CREATE USER extra_user DEFAULT ROLE extra_role") + instance.query("GRANT SELECT ON table1 TO extra_role") + assert instance.query("SELECT * FROM table1", user="extra_user") == "1\n" - assert instance.query("SELECT * from tre1", user="ure") == "0\n" + instance.query("GRANT SELECT ON table2 TO rre") + assert instance.query("SELECT * FROM table1", user="ure") == "1\n" + assert instance.query("SELECT * FROM table2", user="ure") == "2\n" - instance.query("DROP USER ure") instance.query("DROP ROLE rre") - instance.query("DROP TABLE tre") - instance.query("DROP TABLE tre1") + instance.query("DROP USER ure") + instance.query("DROP TABLE table1") + instance.query("DROP TABLE table2") + + if with_extra_role: + instance.query("DROP ROLE extra_role") + instance.query("DROP USER extra_user") -def test_two_roles_expiration(): - instance.query("CREATE USER ure") - instance.query("CREATE ROLE rre") - instance.query("GRANT rre TO ure") +def test_roles_cache(): + # This test takes 20 seconds. + test_time = 20 - instance.query("CREATE ROLE rre_second") - - instance.query("CREATE TABLE IF NOT EXISTS tre (id Int) Engine=Log") - instance.query("INSERT INTO tre VALUES (0)") - - assert "Not enough privileges" in instance.query_and_get_error( - "SELECT * FROM tre", user="ure" - ) - - instance.query("GRANT SELECT ON tre TO rre") - - assert instance.query("SELECT * FROM tre", user="ure") == "0\n" - - # access_control_improvements/role_cache_expiration_time_seconds value is 2 for the test - # so we wait >2 seconds until the roles are expired - time.sleep(5) + # Three users A, B, C. + users = ["A", "B", "C"] + instance.query("CREATE USER " + ", ".join(users)) + # Table "tbl" has 10 columns. Each of the users has access to a different set of columns. + num_columns = 10 + columns = [f"x{i}" for i in range(1, num_columns + 1)] + columns_with_types = [column + " Int64" for column in columns] + columns_with_types_comma_separated = ", ".join(columns_with_types) + values = list(range(1, num_columns + 1)) + values_comma_separated = ", ".join([str(value) for value in values]) instance.query( - "GRANT SELECT ON tre1 TO rre_second" - ) # we expect that both rre and rre_second are gone from cache upon this operation + f"CREATE TABLE tbl ({columns_with_types_comma_separated}) ENGINE=MergeTree ORDER BY tuple()" + ) + instance.query(f"INSERT INTO tbl VALUES ({values_comma_separated})") + columns_to_values = dict([(f"x{i}", i) for i in range(1, num_columns + 1)]) - instance.query("CREATE TABLE IF NOT EXISTS tre1 (id Int) Engine=Log") - instance.query("INSERT INTO tre1 VALUES (0)") - instance.query("GRANT SELECT ON tre1 TO rre") + # In this test we create and modify roles multiple times along with updating the following variables. + # Then we check that each of the users has access to the expected set of columns. + roles = [] + users_to_roles = dict([(user, []) for user in users]) + roles_to_columns = {} - assert instance.query("SELECT * from tre1", user="ure") == "0\n" + # Checks that each of the users can access the expected set of columns and can't access other columns. + def check(): + for user in random.sample(users, len(users)): + expected_roles = users_to_roles[user] + expected_columns = list( + set(sum([roles_to_columns[role] for role in expected_roles], [])) + ) + expected_result = sorted( + [columns_to_values[column] for column in expected_columns] + ) + query = " UNION ALL ".join( + [ + f"SELECT * FROM viewIfPermitted(SELECT {column} AS c FROM tbl ELSE null('c Int64'))" + for column in columns + ] + ) + result = instance.query(query, user=user).splitlines() + result = sorted([int(value) for value in result]) + ok = result == expected_result + if not ok: + print(f"Show grants for {user}:") + print( + instance.query( + "SHOW GRANTS FOR " + ", ".join([user] + expected_roles) + ) + ) + print(f"Expected result: {expected_result}") + print(f"Got unexpected result: {result}") + assert ok - instance.query("DROP USER ure") - instance.query("DROP ROLE rre") - instance.query("DROP ROLE rre_second") - instance.query("DROP TABLE tre") - instance.query("DROP TABLE tre1") + # Grants one of our roles a permission to access one of the columns. + def grant_column(): + columns_used_in_roles = sum(roles_to_columns.values(), []) + columns_to_choose = [ + column for column in columns if column not in columns_used_in_roles + ] + if not columns_to_choose or not roles: + return False + column = random.choice(columns_to_choose) + role = random.choice(roles) + instance.query(f"GRANT SELECT({column}) ON tbl TO {role}") + roles_to_columns[role].append(column) + return True + + # Revokes a permission to access one of the granted column from all our roles. + def revoke_column(): + columns_used_in_roles = sum(roles_to_columns.values(), []) + columns_to_choose = list(set(columns_used_in_roles)) + if not columns_to_choose or not roles: + return False + column = random.choice(columns_to_choose) + roles_str = ", ".join(roles) + instance.query(f"REVOKE SELECT({column}) ON tbl FROM {roles_str}") + for role in roles_to_columns: + if column in roles_to_columns[role]: + roles_to_columns[role].remove(column) + return True + + # Creates a role and grants it to one of the users. + def create_role(): + for role in ["R1", "R2", "R3"]: + if role not in roles: + instance.query(f"CREATE ROLE {role}") + roles.append(role) + if role not in roles_to_columns: + roles_to_columns[role] = [] + if "R1" not in users_to_roles["A"]: + instance.query("GRANT R1 TO A") + users_to_roles["A"].append("R1") + elif "R2" not in users_to_roles["B"]: + instance.query("GRANT R2 TO B") + users_to_roles["B"].append("R2") + elif "R3" not in users_to_roles["B"]: + instance.query("GRANT R3 TO R2") + users_to_roles["B"].append("R3") + elif "R3" not in users_to_roles["C"]: + instance.query("GRANT R3 TO C") + users_to_roles["C"].append("R3") + else: + return False + return True + + # Drops one of our roles. + def drop_role(): + if not roles: + return False + role = random.choice(roles) + instance.query(f"DROP ROLE {role}") + roles.remove(role) + for u in users_to_roles: + if role in users_to_roles[u]: + users_to_roles[u].remove(role) + del roles_to_columns[role] + if (role == "R2") and ("R3" in users_to_roles["B"]): + users_to_roles["B"].remove("R3") + return True + + # Modifies some grants or roles randomly. + def modify(): + while True: + rnd = random.random() + if rnd < 0.4: + if grant_column(): + break + elif rnd < 0.5: + if revoke_column(): + break + elif rnd < 0.9: + if create_role(): + break + else: + if drop_role(): + break + + def maybe_modify(): + if random.random() < 0.9: + modify() + modify() + + # Sleeping is necessary in this test because the role cache in ClickHouse has expiration timeout. + def maybe_sleep(): + if random.random() < 0.1: + # "role_cache_expiration_time_seconds" is set to 2 seconds in the test configuration. + # We need a sleep longer than that in this test sometimes. + seconds = random.random() * 5 + print(f"Sleeping {seconds} seconds") + time.sleep(seconds) + + # Main part of the test. + start_time = time.time() + end_time = start_time + test_time + + while time.time() < end_time: + check() + maybe_sleep() + maybe_modify() + maybe_sleep() + + check() + + instance.query("DROP USER " + ", ".join(users)) + instance.query("DROP ROLE " + ", ".join(roles)) + instance.query("DROP TABLE tbl") From 13dfd3934f4a836f4c17f2a33b1d8066910c39ae Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 10 Sep 2024 22:07:11 +0000 Subject: [PATCH 117/121] Backport #69454 to 24.7: Quick fix for s3queue problem --- .../StorageObjectStorageQueue.cpp | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp index 14b828e7268..eb9a6e6b718 100644 --- a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp +++ b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp @@ -63,9 +63,7 @@ namespace void checkAndAdjustSettings( ObjectStorageQueueSettings & queue_settings, - ASTStorage * engine_args, - bool is_attach, - const LoggerPtr & log) + bool is_attach) { if (!is_attach && !queue_settings.mode.changed) { @@ -84,16 +82,6 @@ namespace "Setting `cleanup_interval_min_ms` ({}) must be less or equal to `cleanup_interval_max_ms` ({})", queue_settings.cleanup_interval_min_ms, queue_settings.cleanup_interval_max_ms); } - - if (!is_attach && !queue_settings.processing_threads_num.changed) - { - queue_settings.processing_threads_num = std::max(getNumberOfPhysicalCPUCores(), 16); - engine_args->settings->as()->changes.insertSetting( - "processing_threads_num", - queue_settings.processing_threads_num.value); - - LOG_TRACE(log, "Set `processing_threads_num` to {}", queue_settings.processing_threads_num); - } } std::shared_ptr getQueueLog(const ObjectStoragePtr & storage, const ContextPtr & context, const ObjectStorageQueueSettings & table_settings) @@ -129,7 +117,7 @@ StorageObjectStorageQueue::StorageObjectStorageQueue( const String & comment, ContextPtr context_, std::optional format_settings_, - ASTStorage * engine_args, + ASTStorage * /* engine_args */, LoadingStrictnessLevel mode) : IStorage(table_id_) , WithContext(context_) @@ -153,7 +141,7 @@ StorageObjectStorageQueue::StorageObjectStorageQueue( throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, "ObjectStorageQueue url must either end with '/' or contain globs"); } - checkAndAdjustSettings(*queue_settings, engine_args, mode > LoadingStrictnessLevel::CREATE, log); + checkAndAdjustSettings(*queue_settings, mode > LoadingStrictnessLevel::CREATE); object_storage = configuration->createObjectStorage(context_, /* is_readonly */true); FormatFactory::instance().checkFormatName(configuration->format); From b6aa5b7e9f4bfe0f0dbb442f0a482c3347145516 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 11 Sep 2024 09:10:18 +0000 Subject: [PATCH 118/121] Backport #69013 to 24.7: Fix test_role & test_keeper_s3_snapshot integration tests --- .../configs/keeper_config1.xml | 1 + .../configs/keeper_config2.xml | 1 + .../configs/keeper_config3.xml | 1 + .../test_keeper_s3_snapshot/test.py | 42 +++++++++++++++++++ tests/integration/test_role/test.py | 3 +- 5 files changed, 47 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_keeper_s3_snapshot/configs/keeper_config1.xml b/tests/integration/test_keeper_s3_snapshot/configs/keeper_config1.xml index 8459ea3e068..6af17946eec 100644 --- a/tests/integration/test_keeper_s3_snapshot/configs/keeper_config1.xml +++ b/tests/integration/test_keeper_s3_snapshot/configs/keeper_config1.xml @@ -5,6 +5,7 @@ minio minio123 + false 9181 1 /var/lib/clickhouse/coordination/log diff --git a/tests/integration/test_keeper_s3_snapshot/configs/keeper_config2.xml b/tests/integration/test_keeper_s3_snapshot/configs/keeper_config2.xml index dfe73628f66..25f2b0de812 100644 --- a/tests/integration/test_keeper_s3_snapshot/configs/keeper_config2.xml +++ b/tests/integration/test_keeper_s3_snapshot/configs/keeper_config2.xml @@ -5,6 +5,7 @@ minio minio123 + false 9181 2 /var/lib/clickhouse/coordination/log diff --git a/tests/integration/test_keeper_s3_snapshot/configs/keeper_config3.xml b/tests/integration/test_keeper_s3_snapshot/configs/keeper_config3.xml index 948d9527718..e274b5184f1 100644 --- a/tests/integration/test_keeper_s3_snapshot/configs/keeper_config3.xml +++ b/tests/integration/test_keeper_s3_snapshot/configs/keeper_config3.xml @@ -5,6 +5,7 @@ minio minio123 + false 9181 3 /var/lib/clickhouse/coordination/log diff --git a/tests/integration/test_keeper_s3_snapshot/test.py b/tests/integration/test_keeper_s3_snapshot/test.py index 84ffc964621..b6c25305aef 100644 --- a/tests/integration/test_keeper_s3_snapshot/test.py +++ b/tests/integration/test_keeper_s3_snapshot/test.py @@ -2,6 +2,9 @@ import pytest from helpers.cluster import ClickHouseCluster from time import sleep from retry import retry +from multiprocessing.dummy import Pool +import helpers.keeper_utils as keeper_utils +from minio.deleteobjects import DeleteObject from kazoo.client import KazooClient @@ -75,7 +78,18 @@ def wait_node(node): raise Exception("Can't wait node", node.name, "to become ready") +def delete_keeper_snapshots_logs(nodex): + nodex.exec_in_container( + [ + "bash", + "-c", + "rm -rf /var/lib/clickhouse/coordination/log /var/lib/clickhouse/coordination/snapshots", + ] + ) + + def test_s3_upload(started_cluster): + node1_zk = get_fake_zk(node1.name) # we defined in configs snapshot_distance as 50 @@ -89,6 +103,11 @@ def test_s3_upload(started_cluster): for obj in list(cluster.minio_client.list_objects("snapshots")) ] + def delete_s3_snapshots(): + snapshots = cluster.minio_client.list_objects("snapshots") + for s in snapshots: + cluster.minio_client.remove_object("snapshots", s.object_name) + # Keeper sends snapshots asynchornously, hence we need to retry. @retry(AssertionError, tries=10, delay=2) def _check_snapshots(): @@ -125,3 +144,26 @@ def test_s3_upload(started_cluster): ) destroy_zk_client(node2_zk) + node2.stop_clickhouse() + delete_keeper_snapshots_logs(node2) + node3.stop_clickhouse() + delete_keeper_snapshots_logs(node3) + delete_keeper_snapshots_logs(node1) + p = Pool(3) + waiters = [] + + def start_clickhouse(node): + node.start_clickhouse() + + waiters.append(p.apply_async(start_clickhouse, args=(node1,))) + waiters.append(p.apply_async(start_clickhouse, args=(node2,))) + waiters.append(p.apply_async(start_clickhouse, args=(node3,))) + + delete_s3_snapshots() # for next iteration + + for waiter in waiters: + waiter.wait() + + keeper_utils.wait_until_connected(cluster, node1) + keeper_utils.wait_until_connected(cluster, node2) + keeper_utils.wait_until_connected(cluster, node3) diff --git a/tests/integration/test_role/test.py b/tests/integration/test_role/test.py index 225cab975ff..b746af56083 100644 --- a/tests/integration/test_role/test.py +++ b/tests/integration/test_role/test.py @@ -629,5 +629,6 @@ def test_roles_cache(): check() instance.query("DROP USER " + ", ".join(users)) - instance.query("DROP ROLE " + ", ".join(roles)) + if roles: + instance.query("DROP ROLE " + ", ".join(roles)) instance.query("DROP TABLE tbl") From b2b74b1284446fd9e6c22473d3908e7991a0c0c1 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 11 Sep 2024 10:09:02 +0000 Subject: [PATCH 119/121] Backport #69451 to 24.7: Fix crash in `sqidDecode` --- src/Functions/sqid.cpp | 2 +- tests/queries/0_stateless/02933_sqid.reference | 1 + tests/queries/0_stateless/02933_sqid.sql | 7 +++++++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/Functions/sqid.cpp b/src/Functions/sqid.cpp index 0e133590b84..074a34bd083 100644 --- a/src/Functions/sqid.cpp +++ b/src/Functions/sqid.cpp @@ -124,7 +124,7 @@ public: std::string_view sqid = col_non_const->getDataAt(i).toView(); std::vector integers = sqids.decode(String(sqid)); res_nested_data.insert(integers.begin(), integers.end()); - res_offsets_data.push_back(integers.size()); + res_offsets_data.push_back(res_offsets_data.back() + integers.size()); } } else diff --git a/tests/queries/0_stateless/02933_sqid.reference b/tests/queries/0_stateless/02933_sqid.reference index a559bacb0ac..4597e2347e3 100644 --- a/tests/queries/0_stateless/02933_sqid.reference +++ b/tests/queries/0_stateless/02933_sqid.reference @@ -13,5 +13,6 @@ Td1EnWQo [1,2,3,4] XMbT -- invalid sqid [] +-- bug 69450 -- alias XMbT diff --git a/tests/queries/0_stateless/02933_sqid.sql b/tests/queries/0_stateless/02933_sqid.sql index 81d4b2bc35c..822fe33df51 100644 --- a/tests/queries/0_stateless/02933_sqid.sql +++ b/tests/queries/0_stateless/02933_sqid.sql @@ -25,5 +25,12 @@ SELECT sqidEncode(toNullable(materialize(1)), toLowCardinality(materialize(2))); SELECT '-- invalid sqid'; SELECT sqidDecode('invalid sqid'); +SELECT '-- bug 69450'; +DROP TABLE IF EXISTS tab; +CREATE TABLE tab (id String) ENGINE = MergeTree ORDER BY id; +INSERT INTO tab SELECT * FROM generateRandom() LIMIT 1000000; +SELECT sqidDecode(id) FROM tab FORMAT Null; +DROP TABLE tab; + SELECT '-- alias'; SELECT sqid(1, 2); From 60df745b268616f3b0c5f4be0c316e3ef2515196 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 11 Sep 2024 15:17:25 +0000 Subject: [PATCH 120/121] Backport #67029 to 24.7: Revert "Revert "Fix for 992 and friends"" --- src/Storages/StorageReplicatedMergeTree.cpp | 2 +- ...00992_system_parts_race_condition_zookeeper_long.sh | 2 +- tests/queries/0_stateless/replication.lib | 10 +++++++++- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 3f02486ed15..da379a466af 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -3940,7 +3940,7 @@ void StorageReplicatedMergeTree::mergeSelectingTask() merge_selecting_task->schedule(); else { - LOG_TRACE(log, "Scheduling next merge selecting task after {}ms", merge_selecting_sleep_ms); + LOG_TRACE(log, "Scheduling next merge selecting task after {}ms, current attempt status: {}", merge_selecting_sleep_ms, result); merge_selecting_task->scheduleAfter(merge_selecting_sleep_ms); } } diff --git a/tests/queries/0_stateless/00992_system_parts_race_condition_zookeeper_long.sh b/tests/queries/0_stateless/00992_system_parts_race_condition_zookeeper_long.sh index 4887c409844..02a739ece4a 100755 --- a/tests/queries/0_stateless/00992_system_parts_race_condition_zookeeper_long.sh +++ b/tests/queries/0_stateless/00992_system_parts_race_condition_zookeeper_long.sh @@ -41,7 +41,7 @@ function thread3() function thread4() { - while true; do $CLICKHOUSE_CLIENT --receive_timeout=3 -q "OPTIMIZE TABLE alter_table0 FINAL" | grep -Fv "Timeout exceeded while receiving data from server"; done + while true; do $CLICKHOUSE_CLIENT --receive_timeout=1 -q "OPTIMIZE TABLE alter_table0 FINAL" | grep -Fv "Timeout exceeded while receiving data from server"; done } function thread5() diff --git a/tests/queries/0_stateless/replication.lib b/tests/queries/0_stateless/replication.lib index 1a86cd9f8db..05651531fba 100755 --- a/tests/queries/0_stateless/replication.lib +++ b/tests/queries/0_stateless/replication.lib @@ -51,6 +51,14 @@ function check_replication_consistency() table_name_prefix=$1 check_query_part=$2 + # Try to kill some mutations because sometimes tests run too much (it's not guarenteed to kill all mutations, see below) + # Try multiple replicas, because queries are not finished yet, and "global" KILL MUTATION may fail due to another query (like DROP TABLE) + readarray -t tables_arr < <(${CLICKHOUSE_CLIENT} -q "SELECT name FROM system.tables WHERE database=currentDatabase() AND name like '$table_name_prefix%'") + for t in "${tables_arr[@]}" + do + ${CLICKHOUSE_CLIENT} -q "KILL MUTATION WHERE database=currentDatabase() AND table='$t'" > /dev/null 2>/dev/null ||: + done + # Wait for all queries to finish (query may still be running if thread is killed by timeout) num_tries=0 while [[ $($CLICKHOUSE_CLIENT -q "SELECT count() FROM system.processes WHERE current_database=currentDatabase() AND query LIKE '%$table_name_prefix%'") -ne 1 ]]; do @@ -96,7 +104,7 @@ function check_replication_consistency() some_table=$($CLICKHOUSE_CLIENT -q "SELECT name FROM system.tables WHERE database=currentDatabase() AND name like '$table_name_prefix%' ORDER BY rand() LIMIT 1") $CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA $some_table PULL" 1>/dev/null 2>/dev/null ||: - # Forcefully cancel mutations to avoid waiting for them to finish + # Forcefully cancel mutations to avoid waiting for them to finish. Kills the remaining mutations ${CLICKHOUSE_CLIENT} -q "KILL MUTATION WHERE database=currentDatabase() AND table like '$table_name_prefix%'" > /dev/null # SYNC REPLICA is not enough if some MUTATE_PARTs are not assigned yet From d22da608979ed31da4e6f08e3b2546ead3895d05 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 11 Sep 2024 18:09:54 +0000 Subject: [PATCH 121/121] Backport #69469 to 24.7: Fix high memory consumption on Squashing --- src/Interpreters/Squashing.cpp | 2 +- .../03236_squashing_high_memory.reference | 0 .../03236_squashing_high_memory.sql | 27 +++++++++++++++++++ 3 files changed, 28 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/03236_squashing_high_memory.reference create mode 100644 tests/queries/0_stateless/03236_squashing_high_memory.sql diff --git a/src/Interpreters/Squashing.cpp b/src/Interpreters/Squashing.cpp index 488177c3b4f..ef6b749136c 100644 --- a/src/Interpreters/Squashing.cpp +++ b/src/Interpreters/Squashing.cpp @@ -46,7 +46,7 @@ Chunk Squashing::squash(Chunk && input_chunk) Chunk Squashing::add(Chunk && input_chunk) { - if (!input_chunk) + if (!input_chunk || input_chunk.getNumRows() == 0) return {}; /// Just read block is already enough. diff --git a/tests/queries/0_stateless/03236_squashing_high_memory.reference b/tests/queries/0_stateless/03236_squashing_high_memory.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03236_squashing_high_memory.sql b/tests/queries/0_stateless/03236_squashing_high_memory.sql new file mode 100644 index 00000000000..f6e5dbdef03 --- /dev/null +++ b/tests/queries/0_stateless/03236_squashing_high_memory.sql @@ -0,0 +1,27 @@ +-- Tags: no-fasttest, no-asan, no-tsan, no-msan, no-ubsan +-- reason: test requires too many rows to read + +SET max_rows_to_read = '501G'; + +DROP TABLE IF EXISTS id_values; + +DROP TABLE IF EXISTS test_table; + +CREATE TABLE id_values ENGINE MergeTree ORDER BY id1 AS + SELECT arrayJoin(range(500000)) AS id1, arrayJoin(range(1000)) AS id2; + +SET max_memory_usage = '1G'; + +CREATE TABLE test_table ENGINE MergeTree ORDER BY id AS +SELECT id_values.id1 AS id, + string_values.string_val1 AS string_val1, + string_values.string_val2 AS string_val2 +FROM id_values + JOIN (SELECT arrayJoin(range(10)) AS id1, + 'qwe' AS string_val1, + 'asd' AS string_val2) AS string_values + ON id_values.id1 = string_values.id1 + SETTINGS join_algorithm = 'hash'; + +DROP TABLE IF EXISTS id_values; +DROP TABLE IF EXISTS test_table;