From bb995c74ccfaf9ca28edebbd55fba5d32a4739bb Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Thu, 13 Jun 2024 16:30:38 +0200 Subject: [PATCH 01/42] Revert "Revert "S3: reduce retires time for queries, increase retries count for backups"" This reverts commit 7683f06188d8dc901bd912c4ace935a4b3f498e2. --- src/Backups/BackupIO_S3.cpp | 6 +++--- src/Core/Settings.h | 1 + src/Core/SettingsChangesHistory.h | 1 + src/IO/S3/Client.h | 2 +- .../integration/test_mask_sensitive_info/configs/users.xml | 1 + 5 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp index 92f086295a0..1ea59c1d38b 100644 --- a/src/Backups/BackupIO_S3.cpp +++ b/src/Backups/BackupIO_S3.cpp @@ -54,9 +54,9 @@ namespace S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration( settings.auth_settings.region, context->getRemoteHostFilter(), - static_cast(global_settings.s3_max_redirects), - static_cast(global_settings.s3_retry_attempts), - global_settings.enable_s3_requests_logging, + static_cast(local_settings.s3_max_redirects), + static_cast(local_settings.backup_restore_s3_retry_attempts), + local_settings.enable_s3_requests_logging, /* for_disk_s3 = */ false, request_settings.get_request_throttler, request_settings.put_request_throttler, diff --git a/src/Core/Settings.h b/src/Core/Settings.h index b3e83092a77..7f99243e285 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -517,6 +517,7 @@ class IColumn; M(UInt64, backup_restore_keeper_value_max_size, 1048576, "Maximum size of data of a [Zoo]Keeper's node during backup", 0) \ M(UInt64, backup_restore_batch_size_for_keeper_multiread, 10000, "Maximum size of batch for multiread request to [Zoo]Keeper during backup or restore", 0) \ M(UInt64, backup_restore_batch_size_for_keeper_multi, 1000, "Maximum size of batch for multi request to [Zoo]Keeper during backup or restore", 0) \ + M(UInt64, backup_restore_s3_retry_attempts, 1000, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries. It takes place only for backup/restore.", 0) \ M(UInt64, max_backup_bandwidth, 0, "The maximum read speed in bytes per second for particular backup on server. Zero means unlimited.", 0) \ \ M(Bool, log_profile_events, true, "Log query performance statistics into the query_log, query_thread_log and query_views_log.", 0) \ diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 69bc8c5d207..b447421671e 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -115,6 +115,7 @@ static const std::map& error, long attemptedRetries) const override; diff --git a/tests/integration/test_mask_sensitive_info/configs/users.xml b/tests/integration/test_mask_sensitive_info/configs/users.xml index f129a5bb3e3..f767216e907 100644 --- a/tests/integration/test_mask_sensitive_info/configs/users.xml +++ b/tests/integration/test_mask_sensitive_info/configs/users.xml @@ -2,6 +2,7 @@ 5 + 5 From 13e00d4f8b621c800b286e383b20765e669d5163 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Thu, 13 Jun 2024 17:40:42 +0200 Subject: [PATCH 02/42] put backup_restore_s3_retry_attempts to 24.6 --- src/Core/SettingsChangesHistory.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index b447421671e..496c7a46798 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -105,6 +105,7 @@ static const std::map Date: Thu, 20 Jun 2024 19:09:15 +0200 Subject: [PATCH 03/42] backup_restore_s3_retry_attempts moved to 24.7 --- src/Core/SettingsChangesHistory.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 0370368a8f8..5f332f6301e 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -85,6 +85,7 @@ namespace SettingsChangesHistory /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972) static const std::map settings_changes_history = { + {"24.7", {{"backup_restore_s3_retry_attempts", 0, 1000, "A new setting."}}}, {"24.6", {{"materialize_skip_indexes_on_insert", true, true, "Added new setting to allow to disable materialization of skip indexes on insert"}, {"materialize_statistics_on_insert", true, true, "Added new setting to allow to disable materialization of statistics on insert"}, {"input_format_parquet_use_native_reader", false, false, "When reading Parquet files, to use native reader instead of arrow reader."}, @@ -114,7 +115,6 @@ static const std::map Date: Thu, 20 Jun 2024 22:01:16 +0000 Subject: [PATCH 04/42] Update libunwind to 18.1.7 --- contrib/libunwind | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/libunwind b/contrib/libunwind index d6a01c46327..02f17ec85cd 160000 --- a/contrib/libunwind +++ b/contrib/libunwind @@ -1 +1 @@ -Subproject commit d6a01c46327e56fd86beb8aaa31591fcd9a6b7df +Subproject commit 02f17ec85cd6b28540d0a4b42f32c2f8a7c8d79d From 424224b2023fbdc72993f83e19f0dd7c6ff0b84b Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Thu, 20 Jun 2024 22:05:16 +0000 Subject: [PATCH 05/42] Set _LIBUNWIND_REMEMBER_STACK_ALLOC explicitly just in case --- contrib/libunwind-cmake/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/contrib/libunwind-cmake/CMakeLists.txt b/contrib/libunwind-cmake/CMakeLists.txt index 37a2f29afcf..8d99d729010 100644 --- a/contrib/libunwind-cmake/CMakeLists.txt +++ b/contrib/libunwind-cmake/CMakeLists.txt @@ -32,6 +32,7 @@ set_target_properties(unwind PROPERTIES FOLDER "contrib/libunwind-cmake") target_include_directories(unwind SYSTEM BEFORE PUBLIC $) target_compile_definitions(unwind PRIVATE -D_LIBUNWIND_NO_HEAP=1) +target_compile_definitions(unwind PRIVATE -D_LIBUNWIND_REMEMBER_STACK_ALLOC=1) # NOTE: from this macros sizeof(unw_context_t)/sizeof(unw_cursor_t) is depends, so it should be set always target_compile_definitions(unwind PUBLIC -D_LIBUNWIND_IS_NATIVE_ONLY) From 92ff09f303dad8e43f4cf8ad6bb295882733aaf9 Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Fri, 21 Jun 2024 22:53:38 +0000 Subject: [PATCH 06/42] There's no Unwind_AppleExtras.cpp anymore --- contrib/libunwind-cmake/CMakeLists.txt | 3 --- 1 file changed, 3 deletions(-) diff --git a/contrib/libunwind-cmake/CMakeLists.txt b/contrib/libunwind-cmake/CMakeLists.txt index 8d99d729010..b566e8cb9b3 100644 --- a/contrib/libunwind-cmake/CMakeLists.txt +++ b/contrib/libunwind-cmake/CMakeLists.txt @@ -4,9 +4,6 @@ set(LIBUNWIND_CXX_SOURCES "${LIBUNWIND_SOURCE_DIR}/src/libunwind.cpp" "${LIBUNWIND_SOURCE_DIR}/src/Unwind-EHABI.cpp" "${LIBUNWIND_SOURCE_DIR}/src/Unwind-seh.cpp") -if (APPLE) - set(LIBUNWIND_CXX_SOURCES ${LIBUNWIND_CXX_SOURCES} "${LIBUNWIND_SOURCE_DIR}/src/Unwind_AppleExtras.cpp") -endif () set(LIBUNWIND_C_SOURCES "${LIBUNWIND_SOURCE_DIR}/src/UnwindLevel1.c" From ad246312d7ff6009547be89c65754a27e1dc9a22 Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Fri, 21 Jun 2024 23:05:41 +0000 Subject: [PATCH 07/42] Switch to final commit hash --- contrib/libunwind | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/libunwind b/contrib/libunwind index 02f17ec85cd..010ea7c5f9d 160000 --- a/contrib/libunwind +++ b/contrib/libunwind @@ -1 +1 @@ -Subproject commit 02f17ec85cd6b28540d0a4b42f32c2f8a7c8d79d +Subproject commit 010ea7c5f9de966b37a6fcda855d52bdfe0a6ac6 From 840edd99fa1bc2325ab5f875c2c9a65ddf67c579 Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Thu, 27 Jun 2024 21:53:23 +0000 Subject: [PATCH 08/42] Apply https://github.com/ClickHouse/libunwind/pull/27 too --- contrib/libunwind | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/libunwind b/contrib/libunwind index 010ea7c5f9d..8f28e64d158 160000 --- a/contrib/libunwind +++ b/contrib/libunwind @@ -1 +1 @@ -Subproject commit 010ea7c5f9de966b37a6fcda855d52bdfe0a6ac6 +Subproject commit 8f28e64d15819d2d096badd598c7d85bebddb1f2 From 20f1256a2fce9ea9604d58bb4065302af06422ec Mon Sep 17 00:00:00 2001 From: vdimir Date: Thu, 4 Jul 2024 18:29:09 +0000 Subject: [PATCH 09/42] Debuging 02956_rocksdb_bulk_sink --- src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp index 31812406d34..6f0f5f19970 100644 --- a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp +++ b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp @@ -199,9 +199,10 @@ void EmbeddedRocksDBBulkSink::consume(Chunk chunk_) if (chunks_to_write.empty()) return; + size_t num_chunks = chunks_to_write.size(); auto [serialized_key_column, serialized_value_column] = serializeChunks(std::move(chunks_to_write)); auto sst_file_path = getTemporarySSTFilePath(); - LOG_DEBUG(getLogger("EmbeddedRocksDBBulkSink"), "Writing {} rows to SST file {}", serialized_key_column->size(), sst_file_path); + LOG_DEBUG(getLogger("EmbeddedRocksDBBulkSink"), "Writing {} rows from {} chunks to SST file {}", serialized_key_column->size(), num_chunks, sst_file_path); if (auto status = buildSSTFile(sst_file_path, *serialized_key_column, *serialized_value_column); !status.ok()) throw Exception(ErrorCodes::ROCKSDB_ERROR, "RocksDB write error: {}", status.ToString()); From d2d72794a14459e60be1c3a88d39440af1f7a8f9 Mon Sep 17 00:00:00 2001 From: serxa Date: Fri, 5 Jul 2024 15:06:50 +0000 Subject: [PATCH 10/42] proper destruction order of AsyncLoader::Pool fields --- src/Common/AsyncLoader.cpp | 4 ++-- src/Common/AsyncLoader.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Common/AsyncLoader.cpp b/src/Common/AsyncLoader.cpp index cfb273b9058..6264eb03106 100644 --- a/src/Common/AsyncLoader.cpp +++ b/src/Common/AsyncLoader.cpp @@ -49,6 +49,7 @@ void logAboutProgress(LoggerPtr log, size_t processed, size_t total, AtomicStopw AsyncLoader::Pool::Pool(const AsyncLoader::PoolInitializer & init) : name(init.name) , priority(init.priority) + , max_threads(init.max_threads > 0 ? init.max_threads : getNumberOfPhysicalCPUCores()) , thread_pool(std::make_unique( init.metric_threads, init.metric_active_threads, @@ -56,17 +57,16 @@ AsyncLoader::Pool::Pool(const AsyncLoader::PoolInitializer & init) /* max_threads = */ std::numeric_limits::max(), // Unlimited number of threads, we do worker management ourselves /* max_free_threads = */ 0, // We do not require free threads /* queue_size = */0)) // Unlimited queue to avoid blocking during worker spawning - , max_threads(init.max_threads > 0 ? init.max_threads : getNumberOfPhysicalCPUCores()) {} AsyncLoader::Pool::Pool(Pool&& o) noexcept : name(o.name) , priority(o.priority) - , thread_pool(std::move(o.thread_pool)) , ready_queue(std::move(o.ready_queue)) , max_threads(o.max_threads) , workers(o.workers) , suspended_workers(o.suspended_workers.load()) // All these constructors are needed because std::atomic is neither copy-constructible, nor move-constructible. We never move pools after init, so it is safe. + , thread_pool(std::move(o.thread_pool)) {} void cancelOnDependencyFailure(const LoadJobPtr & self, const LoadJobPtr & dependency, std::exception_ptr & cancel) diff --git a/src/Common/AsyncLoader.h b/src/Common/AsyncLoader.h index 42707a4ee91..05b809aceae 100644 --- a/src/Common/AsyncLoader.h +++ b/src/Common/AsyncLoader.h @@ -365,11 +365,11 @@ private: { const String name; const Priority priority; - std::unique_ptr thread_pool; // NOTE: we avoid using a `ThreadPool` queue to be able to move jobs between pools. std::map ready_queue; // FIFO queue of jobs to be executed in this pool. Map is used for faster erasing. Key is `ready_seqno` size_t max_threads; // Max number of workers to be spawn size_t workers = 0; // Number of currently executing workers std::atomic suspended_workers{0}; // Number of workers that are blocked by `wait()` call on a job executing in the same pool (for deadlock resolution) + std::unique_ptr thread_pool; // NOTE: we avoid using a `ThreadPool` queue to be able to move jobs between pools. explicit Pool(const PoolInitializer & init); Pool(Pool&& o) noexcept; From 7ab4af85e5d5bea114f1c74dd5cbe4b5a6176772 Mon Sep 17 00:00:00 2001 From: vdimir Date: Mon, 8 Jul 2024 12:36:02 +0200 Subject: [PATCH 11/42] Delete flaky case from 02956_rocksdb_bulk_sink --- tests/queries/0_stateless/02956_rocksdb_bulk_sink.sh | 7 ------- 1 file changed, 7 deletions(-) diff --git a/tests/queries/0_stateless/02956_rocksdb_bulk_sink.sh b/tests/queries/0_stateless/02956_rocksdb_bulk_sink.sh index f7111d0afe2..4e6e123bba2 100755 --- a/tests/queries/0_stateless/02956_rocksdb_bulk_sink.sh +++ b/tests/queries/0_stateless/02956_rocksdb_bulk_sink.sh @@ -25,13 +25,6 @@ ${CLICKHOUSE_CLIENT} --query "INSERT INTO rocksdb_worm SELECT number, number+1 F ${CLICKHOUSE_CLIENT} --query "SELECT sum(value) FROM system.rocksdb WHERE database = currentDatabase() AND table = 'rocksdb_worm' AND name = 'no.file.opens';" # should be 1 ${CLICKHOUSE_CLIENT} --query "SELECT count() FROM rocksdb_worm;" -# Testing insert with multiple sinks and fixed block size -${CLICKHOUSE_CLIENT} --query "TRUNCATE TABLE rocksdb_worm;" -# Must set both max_threads and max_insert_threads to 2 to make sure there is only two sinks -${CLICKHOUSE_CLIENT} --query "INSERT INTO rocksdb_worm SELECT number, number+1 FROM numbers_mt(1000000) SETTINGS max_threads = 2, max_insert_threads = 2, max_block_size = 10000, min_insert_block_size_rows = 0, min_insert_block_size_bytes = 0, insert_deduplication_token = '', optimize_trivial_insert_select = 1;" -${CLICKHOUSE_CLIENT} --query "SELECT sum(value) FROM system.rocksdb WHERE database = currentDatabase() AND table = 'rocksdb_worm' AND name = 'no.file.opens';" # should be 2 because default bulk sink size is ~1M rows / SST file -${CLICKHOUSE_CLIENT} --query "SELECT count() FROM rocksdb_worm;" - # Testing insert with duplicated keys ${CLICKHOUSE_CLIENT} --query "TRUNCATE TABLE rocksdb_worm;" ${CLICKHOUSE_CLIENT} --query "INSERT INTO rocksdb_worm SELECT number % 1000, number+1 FROM numbers_mt(1000000) SETTINGS max_block_size = 100000, max_insert_threads = 1, optimize_trivial_insert_select = 1;" From 1e924768f95250acbfeed67841e40f7ec2fcf9c5 Mon Sep 17 00:00:00 2001 From: vdimir Date: Mon, 8 Jul 2024 16:14:51 +0200 Subject: [PATCH 12/42] fix 02956_rocksdb_bulk_sink.reference --- tests/queries/0_stateless/02956_rocksdb_bulk_sink.reference | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/queries/0_stateless/02956_rocksdb_bulk_sink.reference b/tests/queries/0_stateless/02956_rocksdb_bulk_sink.reference index 74c71827e6e..2b887148ffb 100644 --- a/tests/queries/0_stateless/02956_rocksdb_bulk_sink.reference +++ b/tests/queries/0_stateless/02956_rocksdb_bulk_sink.reference @@ -3,7 +3,6 @@ 1000 1 1000 -2 1000000 1000 0 999001 From 7cf38826afa53deccee9aeb904d98bf98ae78d20 Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Mon, 8 Jul 2024 17:48:49 +0200 Subject: [PATCH 13/42] Fix detection of number of CPUs in containers In the case when the 'parent' cgroup was used (i.e. name of cgroup was empty, which is common for containers) ClickHouse was ignoring the CPU limits set for the container. --- base/base/cgroupsv2.cpp | 17 ++++++++++------- base/base/cgroupsv2.h | 6 +++--- base/base/getMemoryAmount.cpp | 6 ++++-- src/Common/CgroupsMemoryUsageObserver.cpp | 6 ++++-- src/Common/getNumberOfPhysicalCPUCores.cpp | 10 +++++----- 5 files changed, 26 insertions(+), 19 deletions(-) diff --git a/base/base/cgroupsv2.cpp b/base/base/cgroupsv2.cpp index f20b9daf22e..466ebbc3ffb 100644 --- a/base/base/cgroupsv2.cpp +++ b/base/base/cgroupsv2.cpp @@ -33,8 +33,9 @@ bool cgroupsV2MemoryControllerEnabled() /// According to https://docs.kernel.org/admin-guide/cgroup-v2.html, file "cgroup.controllers" defines which controllers are available /// for the current + child cgroups. The set of available controllers can be restricted from level to level using file /// "cgroups.subtree_control". It is therefore sufficient to check the bottom-most nested "cgroup.controllers" file. - std::string cgroup = cgroupV2OfProcess(); - auto cgroup_dir = cgroup.empty() ? default_cgroups_mount : (default_cgroups_mount / cgroup); + auto cgroup_dir = currentCGroupV2Path(); + if (cgroup_dir.empty()) + return false; std::ifstream controllers_file(cgroup_dir / "cgroup.controllers"); if (!controllers_file.is_open()) return false; @@ -46,7 +47,7 @@ bool cgroupsV2MemoryControllerEnabled() #endif } -std::string cgroupV2OfProcess() +std::filesystem::path currentCGroupV2Path() { #if defined(OS_LINUX) chassert(cgroupsV2Enabled()); @@ -54,17 +55,19 @@ std::string cgroupV2OfProcess() /// A simpler way to get the membership is: std::ifstream cgroup_name_file("/proc/self/cgroup"); if (!cgroup_name_file.is_open()) - return ""; + return {}; /// With cgroups v2, there will be a *single* line with prefix "0::/" /// (see https://docs.kernel.org/admin-guide/cgroup-v2.html) std::string cgroup; std::getline(cgroup_name_file, cgroup); static const std::string v2_prefix = "0::/"; if (!cgroup.starts_with(v2_prefix)) - return ""; + return {}; + + // the 'root' cgroup can have empty path, which is valid cgroup = cgroup.substr(v2_prefix.length()); - return cgroup; + return default_cgroups_mount / cgroup; #else - return ""; + return {}; #endif } diff --git a/base/base/cgroupsv2.h b/base/base/cgroupsv2.h index 70219d87cd1..2c58682ce31 100644 --- a/base/base/cgroupsv2.h +++ b/base/base/cgroupsv2.h @@ -16,7 +16,7 @@ bool cgroupsV2Enabled(); /// Assumes that cgroupsV2Enabled() is enabled. bool cgroupsV2MemoryControllerEnabled(); -/// Which cgroup does the process belong to? -/// Returns an empty string if the cgroup cannot be determined. +/// Detects which cgroup the process belong and returns the path to it in sysfs (for cgroups v2). +/// Returns an empty path if the cgroup cannot be determined. /// Assumes that cgroupsV2Enabled() is enabled. -std::string cgroupV2OfProcess(); +std::filesystem::path currentCGroupV2Path(); diff --git a/base/base/getMemoryAmount.cpp b/base/base/getMemoryAmount.cpp index f47cba9833d..9bd5ad75445 100644 --- a/base/base/getMemoryAmount.cpp +++ b/base/base/getMemoryAmount.cpp @@ -23,8 +23,10 @@ std::optional getCgroupsV2MemoryLimit() if (!cgroupsV2MemoryControllerEnabled()) return {}; - std::string cgroup = cgroupV2OfProcess(); - auto current_cgroup = cgroup.empty() ? default_cgroups_mount : (default_cgroups_mount / cgroup); + auto current_cgroup = currentCGroupV2Path(); + + if (current_cgroup.empty()) + return {}; /// Open the bottom-most nested memory limit setting file. If there is no such file at the current /// level, try again at the parent level as memory settings are inherited. diff --git a/src/Common/CgroupsMemoryUsageObserver.cpp b/src/Common/CgroupsMemoryUsageObserver.cpp index d36c7fd08aa..e034319b21f 100644 --- a/src/Common/CgroupsMemoryUsageObserver.cpp +++ b/src/Common/CgroupsMemoryUsageObserver.cpp @@ -129,8 +129,10 @@ std::optional getCgroupsV2Path() if (!cgroupsV2MemoryControllerEnabled()) return {}; - String cgroup = cgroupV2OfProcess(); - auto current_cgroup = cgroup.empty() ? default_cgroups_mount : (default_cgroups_mount / cgroup); + auto current_cgroup = currentCGroupV2Path(); + + if (current_cgroup.empty()) + return {}; /// Return the bottom-most nested current memory file. If there is no such file at the current /// level, try again at the parent level as memory settings are inherited. diff --git a/src/Common/getNumberOfPhysicalCPUCores.cpp b/src/Common/getNumberOfPhysicalCPUCores.cpp index 7e18a93e6ed..b16c635f23e 100644 --- a/src/Common/getNumberOfPhysicalCPUCores.cpp +++ b/src/Common/getNumberOfPhysicalCPUCores.cpp @@ -37,12 +37,12 @@ uint32_t getCGroupLimitedCPUCores(unsigned default_cpu_count) /// cgroupsv2 if (cgroupsV2Enabled()) { - /// First, we identify the cgroup the process belongs - std::string cgroup = cgroupV2OfProcess(); - if (cgroup.empty()) + /// First, we identify the path of the cgroup the process belongs + auto cgroup_path = currentCGroupV2Path(); + if (cgroup_path.empty()) return default_cpu_count; - auto current_cgroup = cgroup.empty() ? default_cgroups_mount : (default_cgroups_mount / cgroup); + auto current_cgroup = cgroup_path; // Looking for cpu.max in directories from the current cgroup to the top level // It does not stop on the first time since the child could have a greater value than parent @@ -62,7 +62,7 @@ uint32_t getCGroupLimitedCPUCores(unsigned default_cpu_count) } current_cgroup = current_cgroup.parent_path(); } - current_cgroup = default_cgroups_mount / cgroup; + current_cgroup = cgroup_path; // Looking for cpuset.cpus.effective in directories from the current cgroup to the top level while (current_cgroup != default_cgroups_mount.parent_path()) { From 859c63298ef044d92c585921579f90dd6e56deda Mon Sep 17 00:00:00 2001 From: vdimir Date: Tue, 9 Jul 2024 10:51:40 +0000 Subject: [PATCH 14/42] upd 02956_rocksdb_bulk_sink --- .../queries/0_stateless/02956_rocksdb_bulk_sink.reference | 1 + tests/queries/0_stateless/02956_rocksdb_bulk_sink.sh | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/tests/queries/0_stateless/02956_rocksdb_bulk_sink.reference b/tests/queries/0_stateless/02956_rocksdb_bulk_sink.reference index 2b887148ffb..1f140df1d6b 100644 --- a/tests/queries/0_stateless/02956_rocksdb_bulk_sink.reference +++ b/tests/queries/0_stateless/02956_rocksdb_bulk_sink.reference @@ -3,6 +3,7 @@ 1000 1 1000 +1 1000000 1000 0 999001 diff --git a/tests/queries/0_stateless/02956_rocksdb_bulk_sink.sh b/tests/queries/0_stateless/02956_rocksdb_bulk_sink.sh index 4e6e123bba2..95c136584f0 100755 --- a/tests/queries/0_stateless/02956_rocksdb_bulk_sink.sh +++ b/tests/queries/0_stateless/02956_rocksdb_bulk_sink.sh @@ -25,6 +25,13 @@ ${CLICKHOUSE_CLIENT} --query "INSERT INTO rocksdb_worm SELECT number, number+1 F ${CLICKHOUSE_CLIENT} --query "SELECT sum(value) FROM system.rocksdb WHERE database = currentDatabase() AND table = 'rocksdb_worm' AND name = 'no.file.opens';" # should be 1 ${CLICKHOUSE_CLIENT} --query "SELECT count() FROM rocksdb_worm;" +# Testing insert with multiple sinks and fixed block size +${CLICKHOUSE_CLIENT} --query "TRUNCATE TABLE rocksdb_worm;" +# Must set both max_threads and max_insert_threads to 2 to make sure there is only two sinks +${CLICKHOUSE_CLIENT} --query "INSERT INTO rocksdb_worm SELECT number, number+1 FROM numbers_mt(1000000) SETTINGS max_threads = 2, max_insert_threads = 2, max_block_size = 10000, min_insert_block_size_rows = 0, min_insert_block_size_bytes = 0, insert_deduplication_token = '', optimize_trivial_insert_select = 1;" +${CLICKHOUSE_CLIENT} --query "SELECT sum(value) IN (1, 2) FROM system.rocksdb WHERE database = currentDatabase() AND table = 'rocksdb_worm' AND name = 'no.file.opens';" # should be not more than 2 because default bulk sink size is ~1M rows / SST file. +${CLICKHOUSE_CLIENT} --query "SELECT count() FROM rocksdb_worm;" + # Testing insert with duplicated keys ${CLICKHOUSE_CLIENT} --query "TRUNCATE TABLE rocksdb_worm;" ${CLICKHOUSE_CLIENT} --query "INSERT INTO rocksdb_worm SELECT number % 1000, number+1 FROM numbers_mt(1000000) SETTINGS max_block_size = 100000, max_insert_threads = 1, optimize_trivial_insert_select = 1;" From 1846705497018b01b48b7ffaea353537edfb5b67 Mon Sep 17 00:00:00 2001 From: chloro <13125187405@163.com> Date: Wed, 10 Jul 2024 17:57:09 +0800 Subject: [PATCH 15/42] fix the bug that LogicalExpressionOptimizerPass lost logical type of constant --- src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp index 698602ca5bc..bd8b6f9faa1 100644 --- a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp +++ b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp @@ -11,6 +11,7 @@ #include #include +#include namespace DB { @@ -615,6 +616,7 @@ private: bool is_any_nullable = false; Tuple args; args.reserve(equals_functions.size()); + DataTypes tuple_element_types; /// first we create tuple from RHS of equals functions for (const auto & equals : equals_functions) { @@ -627,16 +629,18 @@ private: if (const auto * rhs_literal = equals_arguments[1]->as()) { args.push_back(rhs_literal->getValue()); + tuple_element_types.push_back(rhs_literal->getResultType()); } else { const auto * lhs_literal = equals_arguments[0]->as(); assert(lhs_literal); args.push_back(lhs_literal->getValue()); + tuple_element_types.push_back(lhs_literal->getResultType()); } } - auto rhs_node = std::make_shared(std::move(args)); + auto rhs_node = std::make_shared(std::move(args), std::make_shared(std::move(tuple_element_types))); auto in_function = std::make_shared("in"); From 1ac2933d7d079ea31767da6304fa22712d38d6f4 Mon Sep 17 00:00:00 2001 From: chloro <13125187405@163.com> Date: Wed, 10 Jul 2024 22:12:15 +0800 Subject: [PATCH 16/42] add test --- ...03203_optimize_disjunctions_chain_to_in.reference | 2 ++ .../03203_optimize_disjunctions_chain_to_in.sql | 12 ++++++++++++ 2 files changed, 14 insertions(+) create mode 100644 tests/queries/0_stateless/03203_optimize_disjunctions_chain_to_in.reference create mode 100644 tests/queries/0_stateless/03203_optimize_disjunctions_chain_to_in.sql diff --git a/tests/queries/0_stateless/03203_optimize_disjunctions_chain_to_in.reference b/tests/queries/0_stateless/03203_optimize_disjunctions_chain_to_in.reference new file mode 100644 index 00000000000..353c8f98b03 --- /dev/null +++ b/tests/queries/0_stateless/03203_optimize_disjunctions_chain_to_in.reference @@ -0,0 +1,2 @@ +2020-01-01 +2020-01-02 diff --git a/tests/queries/0_stateless/03203_optimize_disjunctions_chain_to_in.sql b/tests/queries/0_stateless/03203_optimize_disjunctions_chain_to_in.sql new file mode 100644 index 00000000000..e48138ab990 --- /dev/null +++ b/tests/queries/0_stateless/03203_optimize_disjunctions_chain_to_in.sql @@ -0,0 +1,12 @@ +SET allow_experimental_analyzer=1; +CREATE OR REPLACE TABLE foo (i Date) ENGINE MergeTree ORDER BY i; +INSERT INTO foo VALUES ('2020-01-01'); +INSERT INTO foo VALUES ('2020-01-02'); + +SET optimize_min_equality_disjunction_chain_length = 3; +SELECT * +FROM foo +WHERE (foo.i = parseDateTimeBestEffort('2020-01-01')) + OR (foo.i = parseDateTimeBestEffort('2020-01-02')) + OR (foo.i = parseDateTimeBestEffort('2020-01-03')) +ORDER BY foo.i ASC From faa0cb93fb524c14155e3ab409a099654cec8b91 Mon Sep 17 00:00:00 2001 From: chloro <13125187405@163.com> Date: Wed, 10 Jul 2024 23:37:27 +0800 Subject: [PATCH 17/42] Empty commit From 1fb0bdf51093d3482950f1cfd9d1ad0bb727a69e Mon Sep 17 00:00:00 2001 From: chloro <13125187405@163.com> Date: Thu, 11 Jul 2024 01:00:49 +0800 Subject: [PATCH 18/42] fix test failed --- .../0_stateless/03203_optimize_disjunctions_chain_to_in.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03203_optimize_disjunctions_chain_to_in.sql b/tests/queries/0_stateless/03203_optimize_disjunctions_chain_to_in.sql index e48138ab990..f9ba28bcd60 100644 --- a/tests/queries/0_stateless/03203_optimize_disjunctions_chain_to_in.sql +++ b/tests/queries/0_stateless/03203_optimize_disjunctions_chain_to_in.sql @@ -1,5 +1,5 @@ SET allow_experimental_analyzer=1; -CREATE OR REPLACE TABLE foo (i Date) ENGINE MergeTree ORDER BY i; +CREATE TABLE foo (i Date) ENGINE MergeTree ORDER BY i; INSERT INTO foo VALUES ('2020-01-01'); INSERT INTO foo VALUES ('2020-01-02'); From 2f391cfa6c18bea786fbe21c692b7c7e33b6d34b Mon Sep 17 00:00:00 2001 From: chloro <13125187405@163.com> Date: Thu, 11 Jul 2024 17:50:46 +0800 Subject: [PATCH 19/42] restart ci From 82e13d785a460592e68662f0d94d4c0ce6d79057 Mon Sep 17 00:00:00 2001 From: chloro <13125187405@163.com> Date: Thu, 11 Jul 2024 22:23:09 +0800 Subject: [PATCH 20/42] restart ci From 861dcbbffbef382c542e6e9485ec21a8e5fa5ffd Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 11 Jul 2024 16:04:00 +0000 Subject: [PATCH 21/42] Do not check parent scope for group_by_use_nulls outside of subquery. --- .../Resolve/IdentifierResolveScope.cpp | 3 +- src/Analyzer/Resolve/IdentifierResolveScope.h | 4 +- src/Analyzer/Resolve/QueryAnalyzer.cpp | 30 +++++++------ ...2535_analyzer_group_by_use_nulls.reference | 41 +++++++++++++++++ .../02535_analyzer_group_by_use_nulls.sql | 45 +++++++++++++++++++ 5 files changed, 108 insertions(+), 15 deletions(-) diff --git a/src/Analyzer/Resolve/IdentifierResolveScope.cpp b/src/Analyzer/Resolve/IdentifierResolveScope.cpp index ae363b57047..32b3107ac16 100644 --- a/src/Analyzer/Resolve/IdentifierResolveScope.cpp +++ b/src/Analyzer/Resolve/IdentifierResolveScope.cpp @@ -11,9 +11,10 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -IdentifierResolveScope::IdentifierResolveScope(QueryTreeNodePtr scope_node_, IdentifierResolveScope * parent_scope_) +IdentifierResolveScope::IdentifierResolveScope(QueryTreeNodePtr scope_node_, IdentifierResolveScope * parent_scope_, bool is_query_) : scope_node(std::move(scope_node_)) , parent_scope(parent_scope_) + , is_query(is_query_) { if (parent_scope) { diff --git a/src/Analyzer/Resolve/IdentifierResolveScope.h b/src/Analyzer/Resolve/IdentifierResolveScope.h index ab2e27cc14d..917e032321d 100644 --- a/src/Analyzer/Resolve/IdentifierResolveScope.h +++ b/src/Analyzer/Resolve/IdentifierResolveScope.h @@ -128,7 +128,7 @@ constexpr auto PROJECTION_NAME_PLACEHOLDER = "__projection_name_placeholder"; struct IdentifierResolveScope { /// Construct identifier resolve scope using scope node, and parent scope - IdentifierResolveScope(QueryTreeNodePtr scope_node_, IdentifierResolveScope * parent_scope_); + IdentifierResolveScope(QueryTreeNodePtr scope_node_, IdentifierResolveScope * parent_scope_, bool is_query_); QueryTreeNodePtr scope_node; @@ -188,6 +188,8 @@ struct IdentifierResolveScope /// Join retutns NULLs instead of default values bool join_use_nulls = false; + bool is_query; + /// JOINs count size_t joins_count = 0; diff --git a/src/Analyzer/Resolve/QueryAnalyzer.cpp b/src/Analyzer/Resolve/QueryAnalyzer.cpp index 576c4943ccb..63249ad2aeb 100644 --- a/src/Analyzer/Resolve/QueryAnalyzer.cpp +++ b/src/Analyzer/Resolve/QueryAnalyzer.cpp @@ -117,7 +117,7 @@ QueryAnalyzer::~QueryAnalyzer() = default; void QueryAnalyzer::resolve(QueryTreeNodePtr & node, const QueryTreeNodePtr & table_expression, ContextPtr context) { - IdentifierResolveScope scope(node, nullptr /*parent_scope*/); + IdentifierResolveScope scope(node, nullptr /*parent_scope*/, true /*is_query*/); if (!scope.context) scope.context = context; @@ -509,7 +509,7 @@ void QueryAnalyzer::evaluateScalarSubqueryIfNeeded(QueryTreeNodePtr & node, Iden /// exception with this settings enabled(https://github.com/ClickHouse/ClickHouse/issues/52494). subquery_context->setSetting("use_structure_from_insertion_table_in_table_functions", false); - auto options = SelectQueryOptions(QueryProcessingStage::Complete, scope.subquery_depth, true /*is_subquery*/); + auto options = SelectQueryOptions(QueryProcessingStage::Complete, scope.subquery_depth, true /*is_query*/); options.only_analyze = only_analyze; auto interpreter = std::make_unique(node->toAST(), subquery_context, subquery_context->getViewSource(), options); @@ -2163,7 +2163,7 @@ ProjectionNames QueryAnalyzer::resolveMatcher(QueryTreeNodePtr & matcher_node, I if (apply_transformer->getApplyTransformerType() == ApplyColumnTransformerType::LAMBDA) { auto lambda_expression_to_resolve = expression_node->clone(); - IdentifierResolveScope lambda_scope(expression_node, &scope /*parent_scope*/); + IdentifierResolveScope lambda_scope(expression_node, &scope /*parent_scope*/, false /*is_query*/); node_projection_names = resolveLambda(expression_node, lambda_expression_to_resolve, {node}, lambda_scope); auto & lambda_expression_to_resolve_typed = lambda_expression_to_resolve->as(); node = lambda_expression_to_resolve_typed.getExpression(); @@ -3036,7 +3036,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi auto lambda_expression_clone = lambda_expression_untyped->clone(); - IdentifierResolveScope lambda_scope(lambda_expression_clone, &scope /*parent_scope*/); + IdentifierResolveScope lambda_scope(lambda_expression_clone, &scope /*parent_scope*/, false /*is_query*/); ProjectionNames lambda_projection_names = resolveLambda(lambda_expression_untyped, lambda_expression_clone, function_arguments, lambda_scope); auto & resolved_lambda = lambda_expression_clone->as(); @@ -3291,7 +3291,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi lambda_arguments.push_back(std::make_shared(std::move(column_name_and_type), lambda_to_resolve)); } - IdentifierResolveScope lambda_scope(lambda_to_resolve, &scope /*parent_scope*/); + IdentifierResolveScope lambda_scope(lambda_to_resolve, &scope /*parent_scope*/, false /*is_query*/); lambda_projection_names = resolveLambda(lambda_argument, lambda_to_resolve, lambda_arguments, lambda_scope); if (auto * lambda_list_node_result = lambda_to_resolve_typed.getExpression()->as()) @@ -3512,7 +3512,7 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode( auto node_type = node->getNodeType(); if (!allow_table_expression && (node_type == QueryTreeNodeType::QUERY || node_type == QueryTreeNodeType::UNION)) { - IdentifierResolveScope subquery_scope(node, &scope /*parent_scope*/); + IdentifierResolveScope subquery_scope(node, &scope /*parent_scope*/, false /*is_query*/); subquery_scope.subquery_depth = scope.subquery_depth + 1; evaluateScalarSubqueryIfNeeded(node, subquery_scope); @@ -3619,7 +3619,7 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode( else union_node->setIsCTE(false); - IdentifierResolveScope subquery_scope(resolved_identifier_node, &scope /*parent_scope*/); + IdentifierResolveScope subquery_scope(resolved_identifier_node, &scope /*parent_scope*/, true /*is_query*/); subquery_scope.subquery_depth = scope.subquery_depth + 1; /// CTE is being resolved, it's required to forbid to resolve to it again @@ -3752,7 +3752,7 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode( [[fallthrough]]; case QueryTreeNodeType::UNION: { - IdentifierResolveScope subquery_scope(node, &scope /*parent_scope*/); + IdentifierResolveScope subquery_scope(node, &scope /*parent_scope*/, true /*is_query*/); subquery_scope.subquery_depth = scope.subquery_depth + 1; std::string projection_name = "_subquery_" + std::to_string(subquery_counter); @@ -3826,6 +3826,10 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode( node->convertToNullable(); break; } + + /// Check parent scopes until find current query scope. + if (scope_ptr->is_query) + break; } } @@ -4112,7 +4116,7 @@ void QueryAnalyzer::resolveInterpolateColumnsNodeList(QueryTreeNodePtr & interpo bool is_column_constant = interpolate_node_typed.getExpression()->getNodeType() == QueryTreeNodeType::CONSTANT; auto & interpolation_to_resolve = interpolate_node_typed.getInterpolateExpression(); - IdentifierResolveScope interpolate_scope(interpolation_to_resolve, &scope /*parent_scope*/); + IdentifierResolveScope interpolate_scope(interpolation_to_resolve, &scope /*parent_scope*/, false /*is_query*/); auto fake_column_node = std::make_shared(NameAndTypePair(column_to_interpolate_name, interpolate_node_typed.getExpression()->getResultType()), interpolate_node_typed.getExpression()); if (is_column_constant) @@ -4410,7 +4414,7 @@ void QueryAnalyzer::initializeTableExpressionData(const QueryTreeNodePtr & table */ alias_column_to_resolve = column_name_to_column_node[alias_column_to_resolve_name]; - IdentifierResolveScope alias_column_resolve_scope(alias_column_to_resolve, nullptr /*parent_scope*/); + IdentifierResolveScope alias_column_resolve_scope(alias_column_to_resolve, nullptr /*parent_scope*/, false /*is_query*/); alias_column_resolve_scope.column_name_to_column_node = std::move(column_name_to_column_node); alias_column_resolve_scope.context = scope.context; @@ -5003,7 +5007,7 @@ void QueryAnalyzer::resolveJoin(QueryTreeNodePtr & join_node, IdentifierResolveS left_subquery->getProjection().getNodes().push_back(projection_node->clone()); left_subquery->getJoinTree() = left_table_expression; - IdentifierResolveScope left_subquery_scope(left_subquery, nullptr /*parent_scope*/); + IdentifierResolveScope left_subquery_scope(left_subquery, nullptr /*parent_scope*/, true /*is_query*/); resolveQuery(left_subquery, left_subquery_scope); const auto & resolved_nodes = left_subquery->getProjection().getNodes(); @@ -5612,7 +5616,7 @@ void QueryAnalyzer::resolveUnion(const QueryTreeNodePtr & union_node, Identifier auto & non_recursive_query_mutable_context = non_recursive_query_is_query_node ? non_recursive_query->as().getMutableContext() : non_recursive_query->as().getMutableContext(); - IdentifierResolveScope non_recursive_subquery_scope(non_recursive_query, &scope /*parent_scope*/); + IdentifierResolveScope non_recursive_subquery_scope(non_recursive_query, &scope /*parent_scope*/, true /*is_query*/); non_recursive_subquery_scope.subquery_depth = scope.subquery_depth + 1; if (non_recursive_query_is_query_node) @@ -5643,7 +5647,7 @@ void QueryAnalyzer::resolveUnion(const QueryTreeNodePtr & union_node, Identifier { auto & query_node = queries_nodes[i]; - IdentifierResolveScope subquery_scope(query_node, &scope /*parent_scope*/); + IdentifierResolveScope subquery_scope(query_node, &scope /*parent_scope*/, true /*is_subquery*/); if (recursive_cte_table_node) subquery_scope.expression_argument_name_to_node[union_node_typed.getCTEName()] = recursive_cte_table_node; diff --git a/tests/queries/0_stateless/02535_analyzer_group_by_use_nulls.reference b/tests/queries/0_stateless/02535_analyzer_group_by_use_nulls.reference index 63610604ddd..858fbe98838 100644 --- a/tests/queries/0_stateless/02535_analyzer_group_by_use_nulls.reference +++ b/tests/queries/0_stateless/02535_analyzer_group_by_use_nulls.reference @@ -264,3 +264,44 @@ SETTINGS group_by_use_nulls = 1, max_bytes_before_external_sort=10; 9 \N 9 \N 0 20 \N 1 25 +CREATE TABLE test +ENGINE = ReplacingMergeTree +PRIMARY KEY id +AS SELECT number AS id FROM numbers(100); +SELECT id +FROM test +GROUP BY id + WITH CUBE +HAVING id IN ( + SELECT id + FROM test +) +FORMAT `NUll` +SETTINGS allow_experimental_analyzer = 1, group_by_use_nulls = true; +SELECT id +FROM test +FINAL +GROUP BY id + WITH CUBE +HAVING id IN ( + SELECT DISTINCT id + FROM test + FINAL +) +FORMAT `NUll` +SETTINGS allow_experimental_analyzer = 1, group_by_use_nulls = true; +SELECT id +FROM test +FINAL +GROUP BY + GROUPING SETS ((id)) +ORDER BY + id IN ( + SELECT DISTINCT id + FROM test + FINAL + LIMIT 4 + ) ASC +LIMIT 256 BY id +FORMAT `NUll` +SETTINGS allow_experimental_analyzer = 1, group_by_use_nulls=true; diff --git a/tests/queries/0_stateless/02535_analyzer_group_by_use_nulls.sql b/tests/queries/0_stateless/02535_analyzer_group_by_use_nulls.sql index a4d4f2f8bc9..4ae5df9629a 100644 --- a/tests/queries/0_stateless/02535_analyzer_group_by_use_nulls.sql +++ b/tests/queries/0_stateless/02535_analyzer_group_by_use_nulls.sql @@ -83,3 +83,48 @@ GROUP BY ) ORDER BY 1, tuple(val) SETTINGS group_by_use_nulls = 1, max_bytes_before_external_sort=10; + +CREATE TABLE test +ENGINE = ReplacingMergeTree +PRIMARY KEY id +AS SELECT number AS id FROM numbers(100); + +SELECT id +FROM test +GROUP BY id + WITH CUBE +HAVING id IN ( + SELECT id + FROM test +) +FORMAT `NUll` +SETTINGS allow_experimental_analyzer = 1, group_by_use_nulls = true; + +SELECT id +FROM test +FINAL +GROUP BY id + WITH CUBE +HAVING id IN ( + SELECT DISTINCT id + FROM test + FINAL +) +FORMAT `NUll` +SETTINGS allow_experimental_analyzer = 1, group_by_use_nulls = true; + +SELECT id +FROM test +FINAL +GROUP BY + GROUPING SETS ((id)) +ORDER BY + id IN ( + SELECT DISTINCT id + FROM test + FINAL + LIMIT 4 + ) ASC +LIMIT 256 BY id +FORMAT `NUll` +SETTINGS allow_experimental_analyzer = 1, group_by_use_nulls=true; From 7781067ba18ed4817770a804f4407c670dea5550 Mon Sep 17 00:00:00 2001 From: vdimir Date: Thu, 11 Jul 2024 16:53:38 +0000 Subject: [PATCH 22/42] Fix pushdown for join with external table Fix possible incorrect result for queries joining and filtering table external engine (like PostgreSQL), due to too agressive filter pushdown. Since now, conditions from where section won't be send to external database in case of outer join with external table. --- .../transformQueryForExternalDatabase.cpp | 2 +- ...nsformQueryForExternalDatabaseAnalyzer.cpp | 29 ++++++++-- ...ransformQueryForExternalDatabaseAnalyzer.h | 2 +- .../test_storage_postgresql/test.py | 54 +++++++++++++++++++ 4 files changed, 82 insertions(+), 5 deletions(-) diff --git a/src/Storages/transformQueryForExternalDatabase.cpp b/src/Storages/transformQueryForExternalDatabase.cpp index fc85bde11d9..7aac138296d 100644 --- a/src/Storages/transformQueryForExternalDatabase.cpp +++ b/src/Storages/transformQueryForExternalDatabase.cpp @@ -419,7 +419,7 @@ String transformQueryForExternalDatabase( throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "No column names for query '{}' to external table '{}.{}'", query_info.query_tree->formatASTForErrorMessage(), database, table); - auto clone_query = getASTForExternalDatabaseFromQueryTree(query_info.query_tree); + auto clone_query = getASTForExternalDatabaseFromQueryTree(query_info.query_tree, query_info.table_expression); return transformQueryForExternalDatabaseImpl( clone_query, diff --git a/src/Storages/transformQueryForExternalDatabaseAnalyzer.cpp b/src/Storages/transformQueryForExternalDatabaseAnalyzer.cpp index 5e0bfdd5f2a..dc1749b3196 100644 --- a/src/Storages/transformQueryForExternalDatabaseAnalyzer.cpp +++ b/src/Storages/transformQueryForExternalDatabaseAnalyzer.cpp @@ -3,6 +3,7 @@ #include #include +#include #include #include @@ -10,7 +11,7 @@ #include #include #include - +#include #include @@ -20,6 +21,7 @@ namespace DB namespace ErrorCodes { extern const int UNSUPPORTED_METHOD; + extern const int LOGICAL_ERROR; } namespace @@ -55,7 +57,7 @@ public: } -ASTPtr getASTForExternalDatabaseFromQueryTree(const QueryTreeNodePtr & query_tree) +ASTPtr getASTForExternalDatabaseFromQueryTree(const QueryTreeNodePtr & query_tree, const QueryTreeNodePtr & table_expression) { auto new_tree = query_tree->clone(); @@ -63,6 +65,21 @@ ASTPtr getASTForExternalDatabaseFromQueryTree(const QueryTreeNodePtr & query_tre visitor.visit(new_tree); const auto * query_node = new_tree->as(); + const auto & join_tree = query_node->getJoinTree(); + bool allow_where = true; + if (const auto * join_node = join_tree->as()) + { + if (join_node->getStrictness() != JoinStrictness::All) + allow_where = false; + + if (join_node->getKind() == JoinKind::Left) + allow_where = join_node->getLeftTableExpression()->isEqual(*table_expression); + else if (join_node->getKind() == JoinKind::Right) + allow_where = join_node->getRightTableExpression()->isEqual(*table_expression); + else + allow_where = (join_node->getKind() == JoinKind::Inner); + } + auto query_node_ast = query_node->toAST({ .add_cast_for_constants = false, .fully_qualified_identifiers = false }); const IAST * ast = query_node_ast.get(); @@ -76,7 +93,13 @@ ASTPtr getASTForExternalDatabaseFromQueryTree(const QueryTreeNodePtr & query_tre if (union_ast->list_of_selects->children.size() != 1) throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "QueryNode AST is not a single ASTSelectQuery, got {}", union_ast->list_of_selects->children.size()); - return union_ast->list_of_selects->children.at(0); + ASTPtr select_query = union_ast->list_of_selects->children.at(0); + auto * select_query_typed = select_query->as(); + if (!select_query_typed) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected ASTSelectQuery, got {}", select_query ? select_query->formatForErrorMessage() : "nullptr"); + if (!allow_where) + select_query_typed->setExpression(ASTSelectQuery::Expression::WHERE, nullptr); + return select_query; } } diff --git a/src/Storages/transformQueryForExternalDatabaseAnalyzer.h b/src/Storages/transformQueryForExternalDatabaseAnalyzer.h index f8983619d1f..7d8bf99646b 100644 --- a/src/Storages/transformQueryForExternalDatabaseAnalyzer.h +++ b/src/Storages/transformQueryForExternalDatabaseAnalyzer.h @@ -6,6 +6,6 @@ namespace DB { -ASTPtr getASTForExternalDatabaseFromQueryTree(const QueryTreeNodePtr & query_tree); +ASTPtr getASTForExternalDatabaseFromQueryTree(const QueryTreeNodePtr & query_tree, const QueryTreeNodePtr & table_expression); } diff --git a/tests/integration/test_storage_postgresql/test.py b/tests/integration/test_storage_postgresql/test.py index d9f3a9917ab..ffcff36c47e 100644 --- a/tests/integration/test_storage_postgresql/test.py +++ b/tests/integration/test_storage_postgresql/test.py @@ -834,6 +834,60 @@ def test_literal_escaping(started_cluster): cursor.execute(f"DROP TABLE escaping") +def test_filter_pushdown(started_cluster): + cursor = started_cluster.postgres_conn.cursor() + cursor.execute("CREATE SCHEMA test_filter_pushdown") + cursor.execute( + "CREATE TABLE test_filter_pushdown.test_table (id integer, value integer)" + ) + cursor.execute( + "INSERT INTO test_filter_pushdown.test_table VALUES (1, 10), (1, 110), (2, 0), (3, 33), (4, 0)" + ) + + node1.query( + """ + CREATE TABLE test_filter_pushdown_pg_table (id UInt32, value UInt32) + ENGINE PostgreSQL('postgres1:5432', 'postgres', 'test_table', 'postgres', 'mysecretpassword', 'test_filter_pushdown'); + """ + ) + + node1.query( + """ + CREATE TABLE test_filter_pushdown_local_table (id UInt32, value UInt32) ENGINE Memory AS SELECT * FROM test_filter_pushdown_pg_table + """ + ) + + node1.query( + "CREATE TABLE ch_table (id UInt32, pg_id UInt32) ENGINE MergeTree ORDER BY id" + ) + node1.query("INSERT INTO ch_table VALUES (1, 1), (2, 2), (3, 1), (4, 2), (5, 999)") + + def compare_results(query, **kwargs): + result1 = node1.query( + query.format(pg_table="test_filter_pushdown_pg_table", **kwargs) + ) + result2 = node1.query( + query.format(pg_table="test_filter_pushdown_local_table", **kwargs) + ) + assert result1 == result2 + + for kind in ["INNER", "LEFT", "RIGHT", "FULL"]: + for value in [0, 10]: + compare_results( + "SELECT * FROM ch_table {kind} JOIN {pg_table} as p ON ch_table.pg_id = p.id WHERE value = {value} ORDER BY ALL", + kind=kind, + value=value, + ) + + compare_results( + "SELECT * FROM {pg_table} as p {kind} JOIN ch_table ON ch_table.pg_id = p.id WHERE value = {value} ORDER BY ALL", + kind=kind, + value=value, + ) + + cursor.execute("DROP SCHEMA test_filter_pushdown CASCADE") + + if __name__ == "__main__": cluster.start() input("Cluster created, press any key to destroy...") From 20faa70389a2a7e3d4c30d3d647e96129e1eb8e2 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 15 Jul 2024 03:30:03 +0200 Subject: [PATCH 23/42] Fix bad test `02530_dictionaries_update_field` --- .../02530_dictionaries_update_field.reference | 60 ------------------- .../02530_dictionaries_update_field.sh | 35 ++++++----- 2 files changed, 21 insertions(+), 74 deletions(-) diff --git a/tests/queries/0_stateless/02530_dictionaries_update_field.reference b/tests/queries/0_stateless/02530_dictionaries_update_field.reference index 4d5a7447a49..ada13443a10 100644 --- a/tests/queries/0_stateless/02530_dictionaries_update_field.reference +++ b/tests/queries/0_stateless/02530_dictionaries_update_field.reference @@ -1,102 +1,42 @@ flat --- { echoOn } - INSERT INTO table_for_update_field_dictionary VALUES (1, 'First', now()); -SELECT key, value FROM dict_flat ORDER BY key ASC; 1 First -INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now()); -SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; -SELECT key, value FROM dict_flat ORDER BY key ASC; 1 First 2 Second -INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now()); -INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now()); -SELECT sleepEachRow(1) FROM numbers(20) SETTINGS function_sleep_max_microseconds_per_block = 20000000 FORMAT Null; -SELECT key, value FROM dict_flat ORDER BY key ASC; 1 First 2 SecondUpdated 3 Third flat/custom --- { echoOn } - INSERT INTO table_for_update_field_dictionary VALUES (1, 'First', now()); -SELECT key, value FROM dict_flat_custom ORDER BY key ASC; 1 First -INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now()); -SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; -SELECT key, value FROM dict_flat_custom ORDER BY key ASC; 1 First 2 Second -INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now()); -INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now()); -SELECT sleepEachRow(1) FROM numbers(20) SETTINGS function_sleep_max_microseconds_per_block = 20000000 FORMAT Null; -SELECT key, value FROM dict_flat_custom ORDER BY key ASC; 1 First 2 SecondUpdated 3 Third hashed --- { echoOn } - INSERT INTO table_for_update_field_dictionary VALUES (1, 'First', now()); -SELECT key, value FROM dict_hashed ORDER BY key ASC; 1 First -INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now()); -SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; -SELECT key, value FROM dict_hashed ORDER BY key ASC; 1 First 2 Second -INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now()); -INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now()); -SELECT sleepEachRow(1) FROM numbers(20) SETTINGS function_sleep_max_microseconds_per_block = 20000000 FORMAT Null; -SELECT key, value FROM dict_hashed ORDER BY key ASC; 1 First 2 SecondUpdated 3 Third hashed/custom --- { echoOn } - INSERT INTO table_for_update_field_dictionary VALUES (1, 'First', now()); -SELECT key, value FROM dict_hashed_custom ORDER BY key ASC; 1 First -INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now()); -SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; -SELECT key, value FROM dict_hashed_custom ORDER BY key ASC; 1 First 2 Second -INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now()); -INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now()); -SELECT sleepEachRow(1) FROM numbers(20) SETTINGS function_sleep_max_microseconds_per_block = 20000000 FORMAT Null; -SELECT key, value FROM dict_hashed_custom ORDER BY key ASC; 1 First 2 SecondUpdated 3 Third complex_key_hashed --- { echoOn } - INSERT INTO table_for_update_field_dictionary VALUES (1, 'First', now()); -SELECT key, value FROM dict_complex_key_hashed ORDER BY key ASC; 1 First -INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now()); -SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; -SELECT key, value FROM dict_complex_key_hashed ORDER BY key ASC; 1 First 2 Second -INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now()); -INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now()); -SELECT sleepEachRow(1) FROM numbers(20) SETTINGS function_sleep_max_microseconds_per_block = 20000000 FORMAT Null; -SELECT key, value FROM dict_complex_key_hashed ORDER BY key ASC; 1 First 2 SecondUpdated 3 Third complex_key_hashed/custom --- { echoOn } - INSERT INTO table_for_update_field_dictionary VALUES (1, 'First', now()); -SELECT key, value FROM dict_complex_key_hashed_custom ORDER BY key ASC; 1 First -INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now()); -SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; -SELECT key, value FROM dict_complex_key_hashed_custom ORDER BY key ASC; 1 First 2 Second -INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now()); -INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now()); -SELECT sleepEachRow(1) FROM numbers(20) SETTINGS function_sleep_max_microseconds_per_block = 20000000 FORMAT Null; -SELECT key, value FROM dict_complex_key_hashed_custom ORDER BY key ASC; 1 First 2 SecondUpdated 3 Third diff --git a/tests/queries/0_stateless/02530_dictionaries_update_field.sh b/tests/queries/0_stateless/02530_dictionaries_update_field.sh index 9ced78a1196..a8e5804fba4 100755 --- a/tests/queries/0_stateless/02530_dictionaries_update_field.sh +++ b/tests/queries/0_stateless/02530_dictionaries_update_field.sh @@ -35,7 +35,7 @@ for layout in "${layouts[@]}"; do echo "$layout" fi - $CLICKHOUSE_CLIENT -nm -q " + $CLICKHOUSE_CLIENT --multiquery " TRUNCATE TABLE table_for_update_field_dictionary; CREATE DICTIONARY $dictionary_name @@ -49,24 +49,31 @@ for layout in "${layouts[@]}"; do LAYOUT($layout()) LIFETIME(1); - -- { echoOn } - INSERT INTO table_for_update_field_dictionary VALUES (1, 'First', now()); - SELECT key, value FROM $dictionary_name ORDER BY key ASC; + INSERT INTO table_for_update_field_dictionary VALUES (1, 'First', now());" - INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now()); - SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; + while true + do + $CLICKHOUSE_CLIENT --query "SELECT key, value FROM $dictionary_name ORDER BY key ASC" | grep -A10 -B10 'First' && break; + sleep .1; + done - SELECT key, value FROM $dictionary_name ORDER BY key ASC; + $CLICKHOUSE_CLIENT --query "INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now());" - INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now()); - INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now()); - SELECT sleepEachRow(1) FROM numbers(20) SETTINGS function_sleep_max_microseconds_per_block = 20000000 FORMAT Null; + while true + do + $CLICKHOUSE_CLIENT --query "SELECT key, value FROM $dictionary_name ORDER BY key ASC" | grep -A10 -B10 'Second' && break; + sleep .1; + done - SELECT key, value FROM $dictionary_name ORDER BY key ASC; - -- { echoOff } + $CLICKHOUSE_CLIENT --query "INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now()), (3, 'Third', now())" - DROP DICTIONARY $dictionary_name; - " + while true + do + $CLICKHOUSE_CLIENT --query "SELECT key, value FROM $dictionary_name ORDER BY key ASC" | grep -A10 -B10 'SecondUpdated' && break; + sleep .1; + done + + $CLICKHOUSE_CLIENT --query "DROP DICTIONARY $dictionary_name" done done From bc1b19591d1c78ba9b782722a27c1d29ce1d8d7d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 15 Jul 2024 05:13:38 +0200 Subject: [PATCH 24/42] Disable suspend on Ctrl+Z --- src/Client/ClientBase.cpp | 5 +++-- src/Client/LineReader.h | 3 +-- src/Client/ReplxxLineReader.cpp | 3 +++ 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 5d472ba99b9..c6a070219a3 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -80,6 +80,7 @@ #include #include "config.h" + namespace fs = std::filesystem; using namespace std::literals; @@ -2565,12 +2566,12 @@ void ClientBase::runInteractive() word_break_characters, highlight_callback); #else + (void)word_break_characters; LineReader lr( history_file, getClientConfiguration().has("multiline"), query_extenders, - query_delimiters, - word_break_characters); + query_delimiters); #endif static const std::initializer_list> backslash_aliases = diff --git a/src/Client/LineReader.h b/src/Client/LineReader.h index 0172bd7ec22..8c101401190 100644 --- a/src/Client/LineReader.h +++ b/src/Client/LineReader.h @@ -46,8 +46,7 @@ public: Patterns delimiters, std::istream & input_stream_ = std::cin, std::ostream & output_stream_ = std::cout, - int in_fd_ = STDIN_FILENO - ); + int in_fd_ = STDIN_FILENO); virtual ~LineReader() = default; diff --git a/src/Client/ReplxxLineReader.cpp b/src/Client/ReplxxLineReader.cpp index 46600168695..3b3508d1a58 100644 --- a/src/Client/ReplxxLineReader.cpp +++ b/src/Client/ReplxxLineReader.cpp @@ -362,6 +362,9 @@ ReplxxLineReader::ReplxxLineReader( rx.bind_key(Replxx::KEY::control('N'), [this](char32_t code) { return rx.invoke(Replxx::ACTION::HISTORY_NEXT, code); }); rx.bind_key(Replxx::KEY::control('P'), [this](char32_t code) { return rx.invoke(Replxx::ACTION::HISTORY_PREVIOUS, code); }); + /// We don't want the default, "suspend" behavior, it confuses people. + rx.bind_key_internal(replxx::Replxx::KEY::control('Z'), "insert_character"); + auto commit_action = [this](char32_t code) { /// If we allow multiline and there is already something in the input, start a newline. From 2af0edd9e9a509e1bffa15e8da8454a4feb7f0ed Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 15 Jul 2024 08:47:08 +0000 Subject: [PATCH 25/42] Some minor fixups --- base/base/cgroupsv2.cpp | 14 +++++++------- base/base/cgroupsv2.h | 7 +++---- base/base/getMemoryAmount.cpp | 3 +-- src/Common/CgroupsMemoryUsageObserver.cpp | 12 ++++++------ src/Common/getNumberOfPhysicalCPUCores.cpp | 2 +- 5 files changed, 18 insertions(+), 20 deletions(-) diff --git a/base/base/cgroupsv2.cpp b/base/base/cgroupsv2.cpp index 466ebbc3ffb..87f62bf377d 100644 --- a/base/base/cgroupsv2.cpp +++ b/base/base/cgroupsv2.cpp @@ -3,8 +3,9 @@ #include #include -#include +#include +namespace fs = std::filesystem; bool cgroupsV2Enabled() { @@ -13,11 +14,11 @@ bool cgroupsV2Enabled() { /// This file exists iff the host has cgroups v2 enabled. auto controllers_file = default_cgroups_mount / "cgroup.controllers"; - if (!std::filesystem::exists(controllers_file)) + if (!fs::exists(controllers_file)) return false; return true; } - catch (const std::filesystem::filesystem_error &) /// all "underlying OS API errors", typically: permission denied + catch (const fs::filesystem_error &) /// all "underlying OS API errors", typically: permission denied { return false; /// not logging the exception as most callers fall back to cgroups v1 } @@ -33,7 +34,7 @@ bool cgroupsV2MemoryControllerEnabled() /// According to https://docs.kernel.org/admin-guide/cgroup-v2.html, file "cgroup.controllers" defines which controllers are available /// for the current + child cgroups. The set of available controllers can be restricted from level to level using file /// "cgroups.subtree_control". It is therefore sufficient to check the bottom-most nested "cgroup.controllers" file. - auto cgroup_dir = currentCGroupV2Path(); + fs::path cgroup_dir = cgroupV2PathOfProcess(); if (cgroup_dir.empty()) return false; std::ifstream controllers_file(cgroup_dir / "cgroup.controllers"); @@ -47,7 +48,7 @@ bool cgroupsV2MemoryControllerEnabled() #endif } -std::filesystem::path currentCGroupV2Path() +fs::path cgroupV2PathOfProcess() { #if defined(OS_LINUX) chassert(cgroupsV2Enabled()); @@ -63,9 +64,8 @@ std::filesystem::path currentCGroupV2Path() static const std::string v2_prefix = "0::/"; if (!cgroup.starts_with(v2_prefix)) return {}; - - // the 'root' cgroup can have empty path, which is valid cgroup = cgroup.substr(v2_prefix.length()); + /// Note: The 'root' cgroup can have an empty cgroup name, this is valid return default_cgroups_mount / cgroup; #else return {}; diff --git a/base/base/cgroupsv2.h b/base/base/cgroupsv2.h index 2c58682ce31..cfb916ff358 100644 --- a/base/base/cgroupsv2.h +++ b/base/base/cgroupsv2.h @@ -1,7 +1,6 @@ #pragma once #include -#include #if defined(OS_LINUX) /// I think it is possible to mount the cgroups hierarchy somewhere else (e.g. when in containers). @@ -16,7 +15,7 @@ bool cgroupsV2Enabled(); /// Assumes that cgroupsV2Enabled() is enabled. bool cgroupsV2MemoryControllerEnabled(); -/// Detects which cgroup the process belong and returns the path to it in sysfs (for cgroups v2). -/// Returns an empty path if the cgroup cannot be determined. +/// Detects which cgroup v2 the process belongs to and returns the filesystem path to the cgroup. +/// Returns an empty path the cgroup cannot be determined. /// Assumes that cgroupsV2Enabled() is enabled. -std::filesystem::path currentCGroupV2Path(); +std::filesystem::path cgroupV2PathOfProcess(); diff --git a/base/base/getMemoryAmount.cpp b/base/base/getMemoryAmount.cpp index 9bd5ad75445..afdb6ba068a 100644 --- a/base/base/getMemoryAmount.cpp +++ b/base/base/getMemoryAmount.cpp @@ -23,8 +23,7 @@ std::optional getCgroupsV2MemoryLimit() if (!cgroupsV2MemoryControllerEnabled()) return {}; - auto current_cgroup = currentCGroupV2Path(); - + std::filesystem::path current_cgroup = cgroupV2PathOfProcess(); if (current_cgroup.empty()) return {}; diff --git a/src/Common/CgroupsMemoryUsageObserver.cpp b/src/Common/CgroupsMemoryUsageObserver.cpp index e034319b21f..02bde0d80b7 100644 --- a/src/Common/CgroupsMemoryUsageObserver.cpp +++ b/src/Common/CgroupsMemoryUsageObserver.cpp @@ -25,6 +25,7 @@ #endif using namespace DB; +namespace fs = std::filesystem; namespace DB { @@ -69,7 +70,7 @@ uint64_t readMetricFromStatFile(ReadBufferFromFile & buf, const std::string & ke struct CgroupsV1Reader : ICgroupsReader { - explicit CgroupsV1Reader(const std::filesystem::path & stat_file_dir) : buf(stat_file_dir / "memory.stat") { } + explicit CgroupsV1Reader(const fs::path & stat_file_dir) : buf(stat_file_dir / "memory.stat") { } uint64_t readMemoryUsage() override { @@ -85,7 +86,7 @@ private: struct CgroupsV2Reader : ICgroupsReader { - explicit CgroupsV2Reader(const std::filesystem::path & stat_file_dir) + explicit CgroupsV2Reader(const fs::path & stat_file_dir) : current_buf(stat_file_dir / "memory.current"), stat_buf(stat_file_dir / "memory.stat") { } @@ -129,8 +130,7 @@ std::optional getCgroupsV2Path() if (!cgroupsV2MemoryControllerEnabled()) return {}; - auto current_cgroup = currentCGroupV2Path(); - + fs::path current_cgroup = cgroupV2PathOfProcess(); if (current_cgroup.empty()) return {}; @@ -140,7 +140,7 @@ std::optional getCgroupsV2Path() { const auto current_path = current_cgroup / "memory.current"; const auto stat_path = current_cgroup / "memory.stat"; - if (std::filesystem::exists(current_path) && std::filesystem::exists(stat_path)) + if (fs::exists(current_path) && fs::exists(stat_path)) return {current_cgroup}; current_cgroup = current_cgroup.parent_path(); } @@ -150,7 +150,7 @@ std::optional getCgroupsV2Path() std::optional getCgroupsV1Path() { auto path = default_cgroups_mount / "memory/memory.stat"; - if (!std::filesystem::exists(path)) + if (!fs::exists(path)) return {}; return {default_cgroups_mount / "memory"}; } diff --git a/src/Common/getNumberOfPhysicalCPUCores.cpp b/src/Common/getNumberOfPhysicalCPUCores.cpp index b16c635f23e..34a1add2f0e 100644 --- a/src/Common/getNumberOfPhysicalCPUCores.cpp +++ b/src/Common/getNumberOfPhysicalCPUCores.cpp @@ -38,7 +38,7 @@ uint32_t getCGroupLimitedCPUCores(unsigned default_cpu_count) if (cgroupsV2Enabled()) { /// First, we identify the path of the cgroup the process belongs - auto cgroup_path = currentCGroupV2Path(); + std::filesystem::path cgroup_path = cgroupV2PathOfProcess(); if (cgroup_path.empty()) return default_cpu_count; From bba45958c5b3e62d4f99902610a46c4779bc8f40 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 15 Jul 2024 13:07:38 +0000 Subject: [PATCH 26/42] Review fixes. --- .../Resolve/IdentifierResolveScope.cpp | 3 +-- src/Analyzer/Resolve/IdentifierResolveScope.h | 4 +-- src/Analyzer/Resolve/QueryAnalyzer.cpp | 26 +++++++++---------- 3 files changed, 15 insertions(+), 18 deletions(-) diff --git a/src/Analyzer/Resolve/IdentifierResolveScope.cpp b/src/Analyzer/Resolve/IdentifierResolveScope.cpp index 1600efacf4a..eb3e2179440 100644 --- a/src/Analyzer/Resolve/IdentifierResolveScope.cpp +++ b/src/Analyzer/Resolve/IdentifierResolveScope.cpp @@ -12,10 +12,9 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -IdentifierResolveScope::IdentifierResolveScope(QueryTreeNodePtr scope_node_, IdentifierResolveScope * parent_scope_, bool is_query_) +IdentifierResolveScope::IdentifierResolveScope(QueryTreeNodePtr scope_node_, IdentifierResolveScope * parent_scope_) : scope_node(std::move(scope_node_)) , parent_scope(parent_scope_) - , is_query(is_query_) { if (parent_scope) { diff --git a/src/Analyzer/Resolve/IdentifierResolveScope.h b/src/Analyzer/Resolve/IdentifierResolveScope.h index 917e032321d..ab2e27cc14d 100644 --- a/src/Analyzer/Resolve/IdentifierResolveScope.h +++ b/src/Analyzer/Resolve/IdentifierResolveScope.h @@ -128,7 +128,7 @@ constexpr auto PROJECTION_NAME_PLACEHOLDER = "__projection_name_placeholder"; struct IdentifierResolveScope { /// Construct identifier resolve scope using scope node, and parent scope - IdentifierResolveScope(QueryTreeNodePtr scope_node_, IdentifierResolveScope * parent_scope_, bool is_query_); + IdentifierResolveScope(QueryTreeNodePtr scope_node_, IdentifierResolveScope * parent_scope_); QueryTreeNodePtr scope_node; @@ -188,8 +188,6 @@ struct IdentifierResolveScope /// Join retutns NULLs instead of default values bool join_use_nulls = false; - bool is_query; - /// JOINs count size_t joins_count = 0; diff --git a/src/Analyzer/Resolve/QueryAnalyzer.cpp b/src/Analyzer/Resolve/QueryAnalyzer.cpp index 6965036bb0c..a0043c8c04f 100644 --- a/src/Analyzer/Resolve/QueryAnalyzer.cpp +++ b/src/Analyzer/Resolve/QueryAnalyzer.cpp @@ -119,7 +119,7 @@ QueryAnalyzer::~QueryAnalyzer() = default; void QueryAnalyzer::resolve(QueryTreeNodePtr & node, const QueryTreeNodePtr & table_expression, ContextPtr context) { - IdentifierResolveScope scope(node, nullptr /*parent_scope*/, true /*is_query*/); + IdentifierResolveScope scope(node, nullptr /*parent_scope*/); if (!scope.context) scope.context = context; @@ -2169,7 +2169,7 @@ ProjectionNames QueryAnalyzer::resolveMatcher(QueryTreeNodePtr & matcher_node, I if (apply_transformer->getApplyTransformerType() == ApplyColumnTransformerType::LAMBDA) { auto lambda_expression_to_resolve = expression_node->clone(); - IdentifierResolveScope lambda_scope(expression_node, &scope /*parent_scope*/, false /*is_query*/); + IdentifierResolveScope lambda_scope(expression_node, &scope /*parent_scope*/); node_projection_names = resolveLambda(expression_node, lambda_expression_to_resolve, {node}, lambda_scope); auto & lambda_expression_to_resolve_typed = lambda_expression_to_resolve->as(); node = lambda_expression_to_resolve_typed.getExpression(); @@ -3042,7 +3042,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi auto lambda_expression_clone = lambda_expression_untyped->clone(); - IdentifierResolveScope lambda_scope(lambda_expression_clone, &scope /*parent_scope*/, false /*is_query*/); + IdentifierResolveScope lambda_scope(lambda_expression_clone, &scope /*parent_scope*/); ProjectionNames lambda_projection_names = resolveLambda(lambda_expression_untyped, lambda_expression_clone, function_arguments, lambda_scope); auto & resolved_lambda = lambda_expression_clone->as(); @@ -3297,7 +3297,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi lambda_arguments.push_back(std::make_shared(std::move(column_name_and_type), lambda_to_resolve)); } - IdentifierResolveScope lambda_scope(lambda_to_resolve, &scope /*parent_scope*/, false /*is_query*/); + IdentifierResolveScope lambda_scope(lambda_to_resolve, &scope /*parent_scope*/); lambda_projection_names = resolveLambda(lambda_argument, lambda_to_resolve, lambda_arguments, lambda_scope); if (auto * lambda_list_node_result = lambda_to_resolve_typed.getExpression()->as()) @@ -3518,7 +3518,7 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode( auto node_type = node->getNodeType(); if (!allow_table_expression && (node_type == QueryTreeNodeType::QUERY || node_type == QueryTreeNodeType::UNION)) { - IdentifierResolveScope subquery_scope(node, &scope /*parent_scope*/, false /*is_query*/); + IdentifierResolveScope subquery_scope(node, &scope /*parent_scope*/); subquery_scope.subquery_depth = scope.subquery_depth + 1; evaluateScalarSubqueryIfNeeded(node, subquery_scope); @@ -3625,7 +3625,7 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode( else union_node->setIsCTE(false); - IdentifierResolveScope subquery_scope(resolved_identifier_node, &scope /*parent_scope*/, true /*is_query*/); + IdentifierResolveScope subquery_scope(resolved_identifier_node, &scope /*parent_scope*/); subquery_scope.subquery_depth = scope.subquery_depth + 1; /// CTE is being resolved, it's required to forbid to resolve to it again @@ -3758,7 +3758,7 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode( [[fallthrough]]; case QueryTreeNodeType::UNION: { - IdentifierResolveScope subquery_scope(node, &scope /*parent_scope*/, true /*is_query*/); + IdentifierResolveScope subquery_scope(node, &scope /*parent_scope*/); subquery_scope.subquery_depth = scope.subquery_depth + 1; std::string projection_name = "_subquery_" + std::to_string(subquery_counter); @@ -3834,7 +3834,7 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode( } /// Check parent scopes until find current query scope. - if (scope_ptr->is_query) + if (scope_ptr->scope_node->getNodeType() == QueryTreeNodeType::QUERY) break; } } @@ -4122,7 +4122,7 @@ void QueryAnalyzer::resolveInterpolateColumnsNodeList(QueryTreeNodePtr & interpo bool is_column_constant = interpolate_node_typed.getExpression()->getNodeType() == QueryTreeNodeType::CONSTANT; auto & interpolation_to_resolve = interpolate_node_typed.getInterpolateExpression(); - IdentifierResolveScope interpolate_scope(interpolation_to_resolve, &scope /*parent_scope*/, false /*is_query*/); + IdentifierResolveScope interpolate_scope(interpolation_to_resolve, &scope /*parent_scope*/); auto fake_column_node = std::make_shared(NameAndTypePair(column_to_interpolate_name, interpolate_node_typed.getExpression()->getResultType()), interpolate_node_typed.getExpression()); if (is_column_constant) @@ -4420,7 +4420,7 @@ void QueryAnalyzer::initializeTableExpressionData(const QueryTreeNodePtr & table */ alias_column_to_resolve = column_name_to_column_node[alias_column_to_resolve_name]; - IdentifierResolveScope alias_column_resolve_scope(alias_column_to_resolve, nullptr /*parent_scope*/, false /*is_query*/); + IdentifierResolveScope alias_column_resolve_scope(alias_column_to_resolve, nullptr /*parent_scope*/); alias_column_resolve_scope.column_name_to_column_node = std::move(column_name_to_column_node); alias_column_resolve_scope.context = scope.context; @@ -5012,7 +5012,7 @@ void QueryAnalyzer::resolveJoin(QueryTreeNodePtr & join_node, IdentifierResolveS left_subquery->getProjection().getNodes().push_back(projection_node->clone()); left_subquery->getJoinTree() = left_table_expression; - IdentifierResolveScope left_subquery_scope(left_subquery, nullptr /*parent_scope*/, true /*is_query*/); + IdentifierResolveScope left_subquery_scope(left_subquery, nullptr /*parent_scope*/); resolveQuery(left_subquery, left_subquery_scope); const auto & resolved_nodes = left_subquery->getProjection().getNodes(); @@ -5621,7 +5621,7 @@ void QueryAnalyzer::resolveUnion(const QueryTreeNodePtr & union_node, Identifier auto & non_recursive_query_mutable_context = non_recursive_query_is_query_node ? non_recursive_query->as().getMutableContext() : non_recursive_query->as().getMutableContext(); - IdentifierResolveScope non_recursive_subquery_scope(non_recursive_query, &scope /*parent_scope*/, true /*is_query*/); + IdentifierResolveScope non_recursive_subquery_scope(non_recursive_query, &scope /*parent_scope*/); non_recursive_subquery_scope.subquery_depth = scope.subquery_depth + 1; if (non_recursive_query_is_query_node) @@ -5652,7 +5652,7 @@ void QueryAnalyzer::resolveUnion(const QueryTreeNodePtr & union_node, Identifier { auto & query_node = queries_nodes[i]; - IdentifierResolveScope subquery_scope(query_node, &scope /*parent_scope*/, true /*is_subquery*/); + IdentifierResolveScope subquery_scope(query_node, &scope /*parent_scope*/); if (recursive_cte_table_node) subquery_scope.expression_argument_name_to_node[union_node_typed.getCTEName()] = recursive_cte_table_node; From 7d42a44944b9876f14749c1e6ebeb309a0750008 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 15 Jul 2024 13:08:31 +0000 Subject: [PATCH 27/42] Review fixes. --- src/Analyzer/Resolve/QueryAnalyzer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Analyzer/Resolve/QueryAnalyzer.cpp b/src/Analyzer/Resolve/QueryAnalyzer.cpp index a0043c8c04f..a0be2a53ef6 100644 --- a/src/Analyzer/Resolve/QueryAnalyzer.cpp +++ b/src/Analyzer/Resolve/QueryAnalyzer.cpp @@ -511,7 +511,7 @@ void QueryAnalyzer::evaluateScalarSubqueryIfNeeded(QueryTreeNodePtr & node, Iden /// exception with this settings enabled(https://github.com/ClickHouse/ClickHouse/issues/52494). subquery_context->setSetting("use_structure_from_insertion_table_in_table_functions", false); - auto options = SelectQueryOptions(QueryProcessingStage::Complete, scope.subquery_depth, true /*is_query*/); + auto options = SelectQueryOptions(QueryProcessingStage::Complete, scope.subquery_depth, true /*is_subquery*/); options.only_analyze = only_analyze; auto interpreter = std::make_unique(node->toAST(), subquery_context, subquery_context->getViewSource(), options); From e3d28f92688d63fd7d417553ad77944a380d3d30 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 15 Jul 2024 15:12:23 +0200 Subject: [PATCH 28/42] Update 02443_detach_attach_partition.sh --- tests/queries/0_stateless/02443_detach_attach_partition.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02443_detach_attach_partition.sh b/tests/queries/0_stateless/02443_detach_attach_partition.sh index d72d771a150..6a47b7d8d61 100755 --- a/tests/queries/0_stateless/02443_detach_attach_partition.sh +++ b/tests/queries/0_stateless/02443_detach_attach_partition.sh @@ -73,7 +73,7 @@ kill -TERM $PID_1 && kill -TERM $PID_2 && kill -TERM $PID_3 && kill -TERM $PID_4 wait $CLICKHOUSE_CLIENT -q "SELECT '$CLICKHOUSE_DATABASE', 'threads finished'" -wait_for_queries_to_finish +wait_for_queries_to_finish 60 $CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA alter_table0" $CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA alter_table1" From 7fe35a83b6116eac84adbfd71df61e009b348939 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Mon, 15 Jul 2024 14:26:30 +0100 Subject: [PATCH 29/42] impl --- programs/CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt index 6b3a0b16624..ce3a4659e0e 100644 --- a/programs/CMakeLists.txt +++ b/programs/CMakeLists.txt @@ -4,6 +4,9 @@ if (USE_CLANG_TIDY) set (CMAKE_CXX_CLANG_TIDY "${CLANG_TIDY_PATH}") endif () +set(MAX_LINKER_MEMORY 3500) +include(../cmake/limit_jobs.cmake) + include(${ClickHouse_SOURCE_DIR}/cmake/split_debug_symbols.cmake) # The `clickhouse` binary is a multi purpose tool that contains multiple execution modes (client, server, etc.), From cfb2183d118b783d142a721c89c04b1835dfa3ed Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 15 Jul 2024 17:25:43 +0200 Subject: [PATCH 30/42] Update 01396_inactive_replica_cleanup_nodes_zookeeper.sh --- .../01396_inactive_replica_cleanup_nodes_zookeeper.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh b/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh index b81bb75891d..bff85b3e29f 100755 --- a/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh +++ b/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh @@ -30,7 +30,7 @@ $CLICKHOUSE_CLIENT --max_block_size 1 --min_insert_block_size_rows 1 --min_inser for _ in {1..60}; do $CLICKHOUSE_CLIENT --query "SYSTEM FLUSH LOGS" - [[ $($CLICKHOUSE_CLIENT --query "SELECT sum(toUInt32(extract(message, 'Removed (\d+) old log entries'))) FROM system.text_log WHERE event_date >= yesterday() AND logger_name LIKE '%' || '$CLICKHOUSE_DATABASE' || '%r1%(ReplicatedMergeTreeCleanupThread)%' AND message LIKE '%Removed % old log entries%'") -gt $((SCALE - 100)) ]] && break; + [[ $($CLICKHOUSE_CLIENT --query "SELECT sum(toUInt32(extract(message, 'Removed (\d+) old log entries'))) FROM system.text_log WHERE event_date >= yesterday() AND logger_name LIKE '%' || '$CLICKHOUSE_DATABASE' || '%r1%(ReplicatedMergeTreeCleanupThread)%' AND message LIKE '%Removed % old log entries%'") -gt $((SCALE - 10)) ]] && break; sleep 1 done From de2e789c06e255b7a31a6caf530043fad8b248a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 15 Jul 2024 19:44:30 +0200 Subject: [PATCH 31/42] Change env variable to run tests with SMT --- tests/clickhouse-test | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 90fb9611151..8df1dd2a8eb 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -3386,13 +3386,13 @@ def parse_args(): parser.add_argument( "--replace-replicated-with-shared", action="store_true", - default=os.environ.get("USE_META_IN_KEEPER_FOR_MERGE_TREE", False), + default=os.environ.get("REPLACE_RMT_WITH_SMT", False), help="Replace ReplicatedMergeTree engine with SharedMergeTree", ) parser.add_argument( "--replace-non-replicated-with-shared", action="store_true", - default=False, + default=os.environ.get("REPLACE_MT_WITH_SMT", False), help="Replace ordinary MergeTree engine with SharedMergeTree", ) From c17d5926c6f9f71dff07cb828a4e3bb53b3e6b81 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 15 Jul 2024 21:29:52 +0200 Subject: [PATCH 32/42] Fix typo in new_delete.cpp --- src/Common/memory.h | 4 ++-- src/Common/new_delete.cpp | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Common/memory.h b/src/Common/memory.h index caa0418fa56..dbef069b408 100644 --- a/src/Common/memory.h +++ b/src/Common/memory.h @@ -80,7 +80,7 @@ inline ALWAYS_INLINE void * newImpl(std::size_t size, TAlign... align) throw std::bad_alloc{}; } -inline ALWAYS_INLINE void * newNoExept(std::size_t size) noexcept +inline ALWAYS_INLINE void * newNoExcept(std::size_t size) noexcept { #if USE_GWP_ASAN if (unlikely(GWPAsan::GuardedAlloc.shouldSample())) @@ -99,7 +99,7 @@ inline ALWAYS_INLINE void * newNoExept(std::size_t size) noexcept return malloc(size); } -inline ALWAYS_INLINE void * newNoExept(std::size_t size, std::align_val_t align) noexcept +inline ALWAYS_INLINE void * newNoExcept(std::size_t size, std::align_val_t align) noexcept { #if USE_GWP_ASAN if (unlikely(GWPAsan::GuardedAlloc.shouldSample())) diff --git a/src/Common/new_delete.cpp b/src/Common/new_delete.cpp index e8151fbe201..80e05fc4ea0 100644 --- a/src/Common/new_delete.cpp +++ b/src/Common/new_delete.cpp @@ -87,7 +87,7 @@ void * operator new(std::size_t size, const std::nothrow_t &) noexcept { AllocationTrace trace; std::size_t actual_size = Memory::trackMemory(size, trace); - void * ptr = Memory::newNoExept(size); + void * ptr = Memory::newNoExcept(size); trace.onAlloc(ptr, actual_size); return ptr; } @@ -96,7 +96,7 @@ void * operator new[](std::size_t size, const std::nothrow_t &) noexcept { AllocationTrace trace; std::size_t actual_size = Memory::trackMemory(size, trace); - void * ptr = Memory::newNoExept(size); + void * ptr = Memory::newNoExcept(size); trace.onAlloc(ptr, actual_size); return ptr; } @@ -105,7 +105,7 @@ void * operator new(std::size_t size, std::align_val_t align, const std::nothrow { AllocationTrace trace; std::size_t actual_size = Memory::trackMemory(size, trace, align); - void * ptr = Memory::newNoExept(size, align); + void * ptr = Memory::newNoExcept(size, align); trace.onAlloc(ptr, actual_size); return ptr; } @@ -114,7 +114,7 @@ void * operator new[](std::size_t size, std::align_val_t align, const std::nothr { AllocationTrace trace; std::size_t actual_size = Memory::trackMemory(size, trace, align); - void * ptr = Memory::newNoExept(size, align); + void * ptr = Memory::newNoExcept(size, align); trace.onAlloc(ptr, actual_size); return ptr; } From 398440c242a9e10ef900815ebb603c67506ffcbe Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 16 Jul 2024 05:21:20 +0200 Subject: [PATCH 33/42] Delete bad test `02805_distributed_queries_timeouts` --- .../0_stateless/02805_distributed_queries_timeouts.reference | 0 .../queries/0_stateless/02805_distributed_queries_timeouts.sql | 3 --- 2 files changed, 3 deletions(-) delete mode 100644 tests/queries/0_stateless/02805_distributed_queries_timeouts.reference delete mode 100644 tests/queries/0_stateless/02805_distributed_queries_timeouts.sql diff --git a/tests/queries/0_stateless/02805_distributed_queries_timeouts.reference b/tests/queries/0_stateless/02805_distributed_queries_timeouts.reference deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/queries/0_stateless/02805_distributed_queries_timeouts.sql b/tests/queries/0_stateless/02805_distributed_queries_timeouts.sql deleted file mode 100644 index 0b7337d1255..00000000000 --- a/tests/queries/0_stateless/02805_distributed_queries_timeouts.sql +++ /dev/null @@ -1,3 +0,0 @@ -select * from remote('127.2', view(select sleep(3) from system.one)) settings receive_timeout=1, async_socket_for_remote=0, use_hedged_requests=1 format Null; -select * from remote('127.2', view(select sleep(3) from system.one)) settings receive_timeout=1, async_socket_for_remote=1, use_hedged_requests=0 format Null; -select * from remote('127.2', view(select sleep(3) from system.one)) settings receive_timeout=1, async_socket_for_remote=0, use_hedged_requests=0 format Null; From c05b2bfd39bdb12290f2c698bfdbdec41021a45e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 16 Jul 2024 05:33:33 +0200 Subject: [PATCH 34/42] More clarity in the test `03001_consider_lwd_when_merge` --- tests/queries/0_stateless/03001_consider_lwd_when_merge.sql | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/03001_consider_lwd_when_merge.sql b/tests/queries/0_stateless/03001_consider_lwd_when_merge.sql index 988d7058f21..2b10c72ae1b 100644 --- a/tests/queries/0_stateless/03001_consider_lwd_when_merge.sql +++ b/tests/queries/0_stateless/03001_consider_lwd_when_merge.sql @@ -7,12 +7,14 @@ SETTINGS max_bytes_to_merge_at_max_space_in_pool = 80000, exclude_deleted_rows_f INSERT INTO lwd_merge SELECT number FROM numbers(10000); INSERT INTO lwd_merge SELECT number FROM numbers(10000, 10000); -OPTIMIZE TABLE lwd_merge; +SET optimize_throw_if_noop = 1; + +OPTIMIZE TABLE lwd_merge; -- { serverError CANNOT_ASSIGN_OPTIMIZE } SELECT count() FROM system.parts WHERE database = currentDatabase() AND table = 'lwd_merge' AND active = 1; DELETE FROM lwd_merge WHERE id % 10 > 0; -OPTIMIZE TABLE lwd_merge; +OPTIMIZE TABLE lwd_merge; -- { serverError CANNOT_ASSIGN_OPTIMIZE } SELECT count() FROM system.parts WHERE database = currentDatabase() AND table = 'lwd_merge' AND active = 1; ALTER TABLE lwd_merge MODIFY SETTING exclude_deleted_rows_for_part_size_in_merge = 1; From f3047cc78dc310e72b913964429f925876f27fd1 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Fri, 12 Jul 2024 10:35:16 +0800 Subject: [PATCH 35/42] fixed type mismatch in cross join --- src/Interpreters/HashJoin/HashJoin.cpp | 2 +- tests/queries/0_stateless/00202_cross_join.reference | 1 + tests/queries/0_stateless/00202_cross_join.sql | 2 ++ 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/HashJoin/HashJoin.cpp b/src/Interpreters/HashJoin/HashJoin.cpp index fa8ebd2c0f0..0c7cad4360d 100644 --- a/src/Interpreters/HashJoin/HashJoin.cpp +++ b/src/Interpreters/HashJoin/HashJoin.cpp @@ -125,7 +125,7 @@ HashJoin::HashJoin(std::shared_ptr table_join_, const Block & right_s if (isCrossOrComma(kind)) { data->type = Type::CROSS; - sample_block_with_columns_to_add = right_sample_block; + sample_block_with_columns_to_add = materializeBlock(right_sample_block); } else if (table_join->getClauses().empty()) { diff --git a/tests/queries/0_stateless/00202_cross_join.reference b/tests/queries/0_stateless/00202_cross_join.reference index a8db281730a..e134631383d 100644 --- a/tests/queries/0_stateless/00202_cross_join.reference +++ b/tests/queries/0_stateless/00202_cross_join.reference @@ -43,3 +43,4 @@ 2 2 2 3 2 4 +1 1 1 1 1 diff --git a/tests/queries/0_stateless/00202_cross_join.sql b/tests/queries/0_stateless/00202_cross_join.sql index 8d62c56b3f1..e4929d038c3 100644 --- a/tests/queries/0_stateless/00202_cross_join.sql +++ b/tests/queries/0_stateless/00202_cross_join.sql @@ -5,3 +5,5 @@ SELECT x, y FROM (SELECT number AS x FROM system.numbers LIMIT 3) js1 CROSS JOIN SET allow_experimental_analyzer = 1; SELECT x, y FROM (SELECT number AS x FROM system.numbers LIMIT 3) js1 CROSS JOIN (SELECT number AS y FROM system.numbers LIMIT 5) js2; + +SELECT * FROM ( SELECT 1 AS a, toLowCardinality(1), 1) AS t1 CROSS JOIN (SELECT toLowCardinality(1 AS a), 1 AS b) AS t2; From 9c1532e02f5f1dfeb2f3833afe585e26d08ccbf0 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Mon, 15 Jul 2024 09:46:51 +0800 Subject: [PATCH 36/42] add tests --- tests/queries/0_stateless/00202_cross_join.reference | 1 - tests/queries/0_stateless/00202_cross_join.sql | 2 -- .../queries/0_stateless/03205_column_type_check.reference | 2 ++ tests/queries/0_stateless/03205_column_type_check.sql | 7 +++++++ 4 files changed, 9 insertions(+), 3 deletions(-) create mode 100644 tests/queries/0_stateless/03205_column_type_check.reference create mode 100644 tests/queries/0_stateless/03205_column_type_check.sql diff --git a/tests/queries/0_stateless/00202_cross_join.reference b/tests/queries/0_stateless/00202_cross_join.reference index e134631383d..a8db281730a 100644 --- a/tests/queries/0_stateless/00202_cross_join.reference +++ b/tests/queries/0_stateless/00202_cross_join.reference @@ -43,4 +43,3 @@ 2 2 2 3 2 4 -1 1 1 1 1 diff --git a/tests/queries/0_stateless/00202_cross_join.sql b/tests/queries/0_stateless/00202_cross_join.sql index e4929d038c3..8d62c56b3f1 100644 --- a/tests/queries/0_stateless/00202_cross_join.sql +++ b/tests/queries/0_stateless/00202_cross_join.sql @@ -5,5 +5,3 @@ SELECT x, y FROM (SELECT number AS x FROM system.numbers LIMIT 3) js1 CROSS JOIN SET allow_experimental_analyzer = 1; SELECT x, y FROM (SELECT number AS x FROM system.numbers LIMIT 3) js1 CROSS JOIN (SELECT number AS y FROM system.numbers LIMIT 5) js2; - -SELECT * FROM ( SELECT 1 AS a, toLowCardinality(1), 1) AS t1 CROSS JOIN (SELECT toLowCardinality(1 AS a), 1 AS b) AS t2; diff --git a/tests/queries/0_stateless/03205_column_type_check.reference b/tests/queries/0_stateless/03205_column_type_check.reference new file mode 100644 index 00000000000..3b6c93a0610 --- /dev/null +++ b/tests/queries/0_stateless/03205_column_type_check.reference @@ -0,0 +1,2 @@ +1 nan 1048575 2 +1 1 1 1 1 diff --git a/tests/queries/0_stateless/03205_column_type_check.sql b/tests/queries/0_stateless/03205_column_type_check.sql new file mode 100644 index 00000000000..ab122821eb0 --- /dev/null +++ b/tests/queries/0_stateless/03205_column_type_check.sql @@ -0,0 +1,7 @@ +SELECT * FROM (SELECT toUInt256(1)) AS t, (SELECT greatCircleAngle(toLowCardinality(toNullable(toUInt256(1048575))), 257, -9223372036854775808, 1048576), 1048575, materialize(2)) AS u; + + +SET join_algorithm='hash'; +SET allow_experimental_join_condition=1; +SELECT * FROM ( SELECT 1 AS a, toLowCardinality(1), 1) AS t1 CROSS JOIN (SELECT toLowCardinality(1 AS a), 1 AS b) AS t2; + From 1f752eb313804fcdb1e360a69b195a2481fd970a Mon Sep 17 00:00:00 2001 From: Yinzuo Jiang Date: Tue, 16 Jul 2024 15:59:10 +0800 Subject: [PATCH 37/42] Update build-cross-riscv.md --- docs/en/development/build-cross-riscv.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/development/build-cross-riscv.md b/docs/en/development/build-cross-riscv.md index 759d97823e2..dd97b6081e8 100644 --- a/docs/en/development/build-cross-riscv.md +++ b/docs/en/development/build-cross-riscv.md @@ -11,7 +11,7 @@ This is for the case when you have Linux machine and want to use it to build `cl The cross-build for RISC-V 64 is based on the [Build instructions](../development/build.md), follow them first. -## Install Clang-16 +## Install Clang-18 Follow the instructions from https://apt.llvm.org/ for your Ubuntu or Debian setup or do ``` From 77c4e6ae8ca03022c99c1f3f328ef1c8d282ed7b Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Tue, 16 Jul 2024 12:21:08 +0200 Subject: [PATCH 38/42] Stateless tests: disable sequential tests in parallel tests --- docker/test/stateless/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index c0bfc12bc75..2b535f8dd23 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -12,7 +12,7 @@ MAX_RUN_TIME=$((MAX_RUN_TIME == 0 ? 7200 : MAX_RUN_TIME)) USE_DATABASE_REPLICATED=${USE_DATABASE_REPLICATED:=0} USE_SHARED_CATALOG=${USE_SHARED_CATALOG:=0} -RUN_SEQUENTIAL_TESTS_IN_PARALLEL=1 +RUN_SEQUENTIAL_TESTS_IN_PARALLEL=0 if [[ "$USE_DATABASE_REPLICATED" -eq 1 ]] || [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then RUN_SEQUENTIAL_TESTS_IN_PARALLEL=0 From e8627601b52e28a4d4203cd365cd5e4693e2af57 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Tue, 16 Jul 2024 13:08:25 +0200 Subject: [PATCH 39/42] Add a note about reading subcolumns from Array type in documentation --- docs/en/sql-reference/data-types/array.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/docs/en/sql-reference/data-types/array.md b/docs/en/sql-reference/data-types/array.md index e5a8ce5d18b..1737fdd88b2 100644 --- a/docs/en/sql-reference/data-types/array.md +++ b/docs/en/sql-reference/data-types/array.md @@ -96,3 +96,22 @@ Result: │ 1 │ [2] │ [[4,1]] │ └───────────┴───────────┴───────────┘ ``` + +## Reading nested subcolumns from Array + +If nested type `T` inside `Array` has subcolumns (for example, if it's a [named tuple](./tuple.md)), you can read its subcolumns from an `Array(T)` type with the same subcolumn names. The type of a subcolumn will be `Array` of the type of original subcolumn. + +**Example** + +```sql +CREATE TABLE t_arr (arr Array(Tuple(field1 UInt32, field2 String))) ENGINE = MergeTree ORDER BY tuple(); +INSERT INTO t_arr VALUES ([(1, 'Hello'), (2, 'World')]), ([(3, 'This'), (4, 'is'), (5, 'subcolumn')]); +SELECT arr.field1, toTypeName(arr.field1), arr.field2, toTypeName(arr.field2) from t_arr; +``` + +```test +┌─arr.field1─┬─toTypeName(arr.field1)─┬─arr.field2────────────────┬─toTypeName(arr.field2)─┐ +│ [1,2] │ Array(UInt32) │ ['Hello','World'] │ Array(String) │ +│ [3,4,5] │ Array(UInt32) │ ['This','is','subcolumn'] │ Array(String) │ +└────────────┴────────────────────────┴───────────────────────────┴────────────────────────┘ +``` From 630a29f537ecae672dcc295f3f29cea5478253a2 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 16 Jul 2024 15:11:17 +0200 Subject: [PATCH 40/42] Fix bsdtar for @nikitamikhaylov --- tests/ci/compress_files.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/ci/compress_files.py b/tests/ci/compress_files.py index 70c0aca6a36..f49c872b5a0 100644 --- a/tests/ci/compress_files.py +++ b/tests/ci/compress_files.py @@ -58,14 +58,14 @@ def decompress_fast(archive_path: Path, result_path: Optional[Path] = None) -> N archive_path, result_path, ) - program_part = "--use-compress-program='zstd --threads=0'" + program_part = "--use-compress-program='zstd --threads=0 -d'" elif PIGZ.exists(): logging.info( "pigz found, will compress and decompress faster ('%s' -> '%s')", archive_path, result_path, ) - program_part = "--use-compress-program='pigz'" + program_part = "--use-compress-program='pigz -d'" else: program_part = "-z" logging.info( From 58ce070f8bd632793d149d2e65f05889f641bd87 Mon Sep 17 00:00:00 2001 From: vdimir Date: Tue, 16 Jul 2024 15:03:46 +0000 Subject: [PATCH 41/42] Fix clang tidy after #66402 --- src/Storages/transformQueryForExternalDatabaseAnalyzer.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Storages/transformQueryForExternalDatabaseAnalyzer.cpp b/src/Storages/transformQueryForExternalDatabaseAnalyzer.cpp index dc1749b3196..ff40b1fcc2d 100644 --- a/src/Storages/transformQueryForExternalDatabaseAnalyzer.cpp +++ b/src/Storages/transformQueryForExternalDatabaseAnalyzer.cpp @@ -71,8 +71,7 @@ ASTPtr getASTForExternalDatabaseFromQueryTree(const QueryTreeNodePtr & query_tre { if (join_node->getStrictness() != JoinStrictness::All) allow_where = false; - - if (join_node->getKind() == JoinKind::Left) + else if (join_node->getKind() == JoinKind::Left) allow_where = join_node->getLeftTableExpression()->isEqual(*table_expression); else if (join_node->getKind() == JoinKind::Right) allow_where = join_node->getRightTableExpression()->isEqual(*table_expression); From d1a70944c7740b011c9416a6e9cf57727111d0a1 Mon Sep 17 00:00:00 2001 From: vdimir Date: Tue, 16 Jul 2024 15:09:36 +0000 Subject: [PATCH 42/42] Add test for join kind to test_storage_postgresql/test.py::test_filter_push_down --- tests/integration/test_storage_postgresql/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_storage_postgresql/test.py b/tests/integration/test_storage_postgresql/test.py index ffcff36c47e..12823f1f72d 100644 --- a/tests/integration/test_storage_postgresql/test.py +++ b/tests/integration/test_storage_postgresql/test.py @@ -871,7 +871,7 @@ def test_filter_pushdown(started_cluster): ) assert result1 == result2 - for kind in ["INNER", "LEFT", "RIGHT", "FULL"]: + for kind in ["INNER", "LEFT", "RIGHT", "FULL", "ANY LEFT", "SEMI RIGHT"]: for value in [0, 10]: compare_results( "SELECT * FROM ch_table {kind} JOIN {pg_table} as p ON ch_table.pg_id = p.id WHERE value = {value} ORDER BY ALL",