From db71a6fa7777dbf4ec8f1b67862edbbbc992ab90 Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Tue, 5 Mar 2024 17:48:48 +0000 Subject: [PATCH 01/47] Fix race in refreshable materialized views causing SELECT to fail sometimes --- src/Storages/StorageMaterializedView.cpp | 9 +++++++-- src/Storages/StorageMaterializedView.h | 4 ++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 1d0898a2f11..b8361109cb2 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -153,6 +153,7 @@ StorageMaterializedView::StorageMaterializedView( if (query.refresh_strategy) { + fixed_uuid = query.refresh_strategy->append; refresher = RefreshTask::create( *this, getContext(), @@ -629,10 +630,14 @@ void StorageMaterializedView::onActionLockRemove(StorageActionBlockType action_t refresher->start(); } -DB::StorageID StorageMaterializedView::getTargetTableId() const +StorageID StorageMaterializedView::getTargetTableId() const { std::lock_guard guard(target_table_id_mutex); - return target_table_id; + auto id = target_table_id; + /// TODO: Avoid putting uuid into target_table_id in the first place, instead of clearing it here. + if (!fixed_uuid) + id.uuid = UUIDHelpers::Nil; + return id; } void StorageMaterializedView::setTargetTableId(DB::StorageID id) diff --git a/src/Storages/StorageMaterializedView.h b/src/Storages/StorageMaterializedView.h index 4d574a821ec..4d070f4e40d 100644 --- a/src/Storages/StorageMaterializedView.h +++ b/src/Storages/StorageMaterializedView.h @@ -111,6 +111,10 @@ private: bool has_inner_table = false; + /// If false, inner table is replaced on each refresh. In that case, target_table_id doesn't + /// have UUID, and we do inner table lookup by name instead. + bool fixed_uuid = true; + friend class RefreshTask; void checkStatementCanBeForwarded() const; From 34e0fbd83ee0488584bbd75e670dd585095f4e55 Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Tue, 5 Mar 2024 20:18:40 +0000 Subject: [PATCH 02/47] Oops, append mode doesn't exist yet --- src/Storages/StorageMaterializedView.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index b8361109cb2..2a6e5cf2e03 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -153,7 +153,7 @@ StorageMaterializedView::StorageMaterializedView( if (query.refresh_strategy) { - fixed_uuid = query.refresh_strategy->append; + fixed_uuid = false; refresher = RefreshTask::create( *this, getContext(), From ed63ad5e61916f38fa8db2143aaa755d49665584 Mon Sep 17 00:00:00 2001 From: tomershafir Date: Tue, 7 May 2024 14:10:49 +0300 Subject: [PATCH 03/47] iouring: refactor get from context --- src/Coordination/Standalone/Context.cpp | 3 +- src/Disks/IO/IOUringReader.cpp | 2 +- src/Disks/IO/createReadBufferFromFileBase.cpp | 11 +---- src/Disks/IO/getIOUringReader.cpp | 40 +++++++++++++++++++ src/Disks/IO/getIOUringReader.h | 19 +++++++++ src/Interpreters/Context.cpp | 3 +- src/Storages/StorageFile.cpp | 6 +-- 7 files changed, 68 insertions(+), 16 deletions(-) create mode 100644 src/Disks/IO/getIOUringReader.cpp create mode 100644 src/Disks/IO/getIOUringReader.h diff --git a/src/Coordination/Standalone/Context.cpp b/src/Coordination/Standalone/Context.cpp index 1095a11566f..84e54ed7100 100644 --- a/src/Coordination/Standalone/Context.cpp +++ b/src/Coordination/Standalone/Context.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include @@ -306,7 +307,7 @@ IAsynchronousReader & Context::getThreadPoolReader(FilesystemReaderType type) co IOUringReader & Context::getIOURingReader() const { callOnce(shared->io_uring_reader_initialized, [&] { - shared->io_uring_reader = std::make_unique(512); + shared->io_uring_reader = createIOUringReader(); }); return *shared->io_uring_reader; diff --git a/src/Disks/IO/IOUringReader.cpp b/src/Disks/IO/IOUringReader.cpp index 90a4d285ecb..6b0e3f8cc89 100644 --- a/src/Disks/IO/IOUringReader.cpp +++ b/src/Disks/IO/IOUringReader.cpp @@ -1,5 +1,4 @@ #include "IOUringReader.h" -#include #if USE_LIBURING @@ -13,6 +12,7 @@ #include #include #include +#include namespace ProfileEvents { diff --git a/src/Disks/IO/createReadBufferFromFileBase.cpp b/src/Disks/IO/createReadBufferFromFileBase.cpp index a9d451496ff..f3bb6ae1740 100644 --- a/src/Disks/IO/createReadBufferFromFileBase.cpp +++ b/src/Disks/IO/createReadBufferFromFileBase.cpp @@ -4,9 +4,9 @@ #include #include #include +#include #include #include -#include #include #include #include "config.h" @@ -100,14 +100,7 @@ std::unique_ptr createReadBufferFromFileBase( else if (settings.local_fs_method == LocalFSReadMethod::io_uring) { #if USE_LIBURING - auto global_context = Context::getGlobalContextInstance(); - if (!global_context) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot obtain io_uring reader (global context not initialized)"); - - auto & reader = global_context->getIOURingReader(); - if (!reader.isSupported()) - throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "io_uring is not supported by this system"); - + auto & reader = getIOURingReaderOrThrow(); res = std::make_unique( reader, settings.priority, diff --git a/src/Disks/IO/getIOUringReader.cpp b/src/Disks/IO/getIOUringReader.cpp new file mode 100644 index 00000000000..8e9a9655a41 --- /dev/null +++ b/src/Disks/IO/getIOUringReader.cpp @@ -0,0 +1,40 @@ +#include "getIOUringReader.h" + +#if USE_LIBURING + +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int UNSUPPORTED_METHOD; +} + +std::unique_ptr createIOUringReader() +{ + return std::make_unique(512); +} + +IOUringReader & getIOUringReaderOrThrow(ContextPtr context) +{ + auto reader = context->getIOUringReader(); + if (!reader.isSupported) + { + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "io_uring is not supported by this system"); + } + return reader; +} + +IOUringReader & getIOUringReaderOrThrow() +{ + auto context = Context::getGlobalContextInstance(); + if (!context) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context not initialized"); + return getIOUringReaderOrThrow(context) +} + +} +#endif diff --git a/src/Disks/IO/getIOUringReader.h b/src/Disks/IO/getIOUringReader.h new file mode 100644 index 00000000000..0980f32b5a2 --- /dev/null +++ b/src/Disks/IO/getIOUringReader.h @@ -0,0 +1,19 @@ +#pragma once + +#include "config.h" + +#if USE_LIBURING + +#include + +namespace DB +{ + +std::unique_ptr createIOUringReader(); + +IOUringReader & getIOUringReaderOrThrow(ContextPtr); + +IOUringReader & getIOUringReaderOrThrow(); + +} +#endif diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 44d36e94441..d847cab013c 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -5178,7 +5179,7 @@ IAsynchronousReader & Context::getThreadPoolReader(FilesystemReaderType type) co IOUringReader & Context::getIOURingReader() const { callOnce(shared->io_uring_reader_initialized, [&] { - shared->io_uring_reader = std::make_unique(512); + shared->io_uring_reader = createIOUringReader() }); return *shared->io_uring_reader; diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 430e68d8562..9bead6d0267 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -282,10 +283,7 @@ std::unique_ptr selectReadBuffer( else if (read_method == LocalFSReadMethod::io_uring && !use_table_fd) { #if USE_LIBURING - auto & reader = context->getIOURingReader(); - if (!reader.isSupported()) - throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "io_uring is not supported by this system"); - + auto & reader = getIOURingReaderOrThrow(context); res = std::make_unique( reader, Priority{}, From f57abbd806ef78be7829844e3f285b994661ca5e Mon Sep 17 00:00:00 2001 From: tomershafir Date: Tue, 7 May 2024 17:27:52 +0300 Subject: [PATCH 04/47] add missing include --- src/Disks/IO/getIOUringReader.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Disks/IO/getIOUringReader.h b/src/Disks/IO/getIOUringReader.h index 0980f32b5a2..ca619785ab4 100644 --- a/src/Disks/IO/getIOUringReader.h +++ b/src/Disks/IO/getIOUringReader.h @@ -5,6 +5,7 @@ #if USE_LIBURING #include +#include namespace DB { From eedef6d826412b6b9f621527a0c4367142b03632 Mon Sep 17 00:00:00 2001 From: Eduard Karacharov Date: Tue, 7 May 2024 11:10:27 +0300 Subject: [PATCH 05/47] fix empty used_dictionaries in query_log --- .../ExternalDictionariesLoader.cpp | 4 +- ...3148_query_log_used_dictionaries.reference | 4 + .../03148_query_log_used_dictionaries.sql | 84 +++++++++++++++++++ 3 files changed, 90 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/03148_query_log_used_dictionaries.reference create mode 100644 tests/queries/0_stateless/03148_query_log_used_dictionaries.sql diff --git a/src/Interpreters/ExternalDictionariesLoader.cpp b/src/Interpreters/ExternalDictionariesLoader.cpp index f48ee61dab8..49891e6cd60 100644 --- a/src/Interpreters/ExternalDictionariesLoader.cpp +++ b/src/Interpreters/ExternalDictionariesLoader.cpp @@ -79,7 +79,7 @@ ExternalDictionariesLoader::DictPtr ExternalDictionariesLoader::getDictionary(co std::string resolved_dictionary_name = resolveDictionaryName(dictionary_name, local_context->getCurrentDatabase()); if (local_context->hasQueryContext() && local_context->getSettingsRef().log_queries) - local_context->addQueryFactoriesInfo(Context::QueryLogFactories::Dictionary, resolved_dictionary_name); + local_context->getQueryContext()->addQueryFactoriesInfo(Context::QueryLogFactories::Dictionary, resolved_dictionary_name); return std::static_pointer_cast(load(resolved_dictionary_name)); } @@ -89,7 +89,7 @@ ExternalDictionariesLoader::DictPtr ExternalDictionariesLoader::tryGetDictionary std::string resolved_dictionary_name = resolveDictionaryName(dictionary_name, local_context->getCurrentDatabase()); if (local_context->hasQueryContext() && local_context->getSettingsRef().log_queries) - local_context->addQueryFactoriesInfo(Context::QueryLogFactories::Dictionary, resolved_dictionary_name); + local_context->getQueryContext()->addQueryFactoriesInfo(Context::QueryLogFactories::Dictionary, resolved_dictionary_name); return std::static_pointer_cast(tryLoad(resolved_dictionary_name)); } diff --git a/tests/queries/0_stateless/03148_query_log_used_dictionaries.reference b/tests/queries/0_stateless/03148_query_log_used_dictionaries.reference new file mode 100644 index 00000000000..1f54474efa3 --- /dev/null +++ b/tests/queries/0_stateless/03148_query_log_used_dictionaries.reference @@ -0,0 +1,4 @@ +simple_with_analyzer 1 +nested_with_analyzer 1 +simple_without_analyzer 1 +nested_without_analyzer 1 diff --git a/tests/queries/0_stateless/03148_query_log_used_dictionaries.sql b/tests/queries/0_stateless/03148_query_log_used_dictionaries.sql new file mode 100644 index 00000000000..6f10b118c92 --- /dev/null +++ b/tests/queries/0_stateless/03148_query_log_used_dictionaries.sql @@ -0,0 +1,84 @@ +DROP DICTIONARY IF EXISTS 03148_dictionary; + +CREATE DICTIONARY 03148_dictionary ( + id UInt64, + name String +) +PRIMARY KEY id +SOURCE(CLICKHOUSE( + QUERY 'select 0 as id, ''name0'' as name' +)) +LIFETIME(MIN 1 MAX 10) +LAYOUT(HASHED); + +SELECT + dictGet('03148_dictionary', 'name', number) as dict_value +FROM numbers(1) +SETTINGS + allow_experimental_analyzer = 1, + log_comment = 'simple_with_analyzer' +FORMAT Null; + +SYSTEM FLUSH LOGS; + +SELECT 'simple_with_analyzer', length(used_dictionaries) as used_dictionaries_qty +FROM system.query_log +WHERE current_database = currentDatabase() + AND type = 'QueryFinish' + AND log_comment = 'simple_with_analyzer'; + +SELECT * +FROM ( + SELECT + dictGet('03148_dictionary', 'name', number) as dict_value + FROM numbers(1) +) t +SETTINGS + allow_experimental_analyzer = 1, + log_comment = 'nested_with_analyzer' +FORMAT Null; + +SYSTEM FLUSH LOGS; + +SELECT 'nested_with_analyzer', length(used_dictionaries) as used_dictionaries_qty +FROM system.query_log +WHERE current_database = currentDatabase() + AND type = 'QueryFinish' + AND log_comment = 'nested_with_analyzer'; + +SELECT + dictGet('03148_dictionary', 'name', number) as dict_value +FROM numbers(1) +SETTINGS + allow_experimental_analyzer = 0, + log_comment = 'simple_without_analyzer' +FORMAT Null; + +SYSTEM FLUSH LOGS; + +SELECT 'simple_without_analyzer', length(used_dictionaries) as used_dictionaries_qty +FROM system.query_log +WHERE current_database = currentDatabase() + AND type = 'QueryFinish' + AND log_comment = 'simple_without_analyzer'; + +SELECT * +FROM ( + SELECT + dictGet('03148_dictionary', 'name', number) as dict_value + FROM numbers(1) +) t +SETTINGS + allow_experimental_analyzer = 0, + log_comment = 'nested_without_analyzer' +FORMAT Null; + +SYSTEM FLUSH LOGS; + +SELECT 'nested_without_analyzer', length(used_dictionaries) as used_dictionaries_qty +FROM system.query_log +WHERE current_database = currentDatabase() + AND type = 'QueryFinish' + AND log_comment = 'nested_without_analyzer'; + +DROP DICTIONARY IF EXISTS 03148_dictionary; From a365c36e9d4202d1f1a7802268b27ac5b35673b9 Mon Sep 17 00:00:00 2001 From: Eduard Karacharov Date: Thu, 9 May 2024 11:06:31 +0300 Subject: [PATCH 06/47] use qualified dictionary name in query log --- docs/en/operations/system-tables/query_log.md | 2 +- src/Dictionaries/IDictionary.h | 9 +++++++++ src/Interpreters/ExternalDictionariesLoader.cpp | 12 +++++++----- .../03148_query_log_used_dictionaries.reference | 8 ++++---- .../03148_query_log_used_dictionaries.sql | 8 ++++---- 5 files changed, 25 insertions(+), 14 deletions(-) diff --git a/docs/en/operations/system-tables/query_log.md b/docs/en/operations/system-tables/query_log.md index d48eb31df00..75b855966a3 100644 --- a/docs/en/operations/system-tables/query_log.md +++ b/docs/en/operations/system-tables/query_log.md @@ -108,7 +108,7 @@ Columns: - `used_aggregate_function_combinators` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `aggregate functions combinators`, which were used during query execution. - `used_database_engines` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `database engines`, which were used during query execution. - `used_data_type_families` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `data type families`, which were used during query execution. -- `used_dictionaries` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `dictionaries`, which were used during query execution. +- `used_dictionaries` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `dictionaries`, which were used during query execution. For dictionaries configured using an XML file this is the name of the dictionary, and for dictionaries created by an SQL statement, the canonical name is the fully qualified object name. - `used_formats` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `formats`, which were used during query execution. - `used_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `functions`, which were used during query execution. - `used_storages` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `storages`, which were used during query execution. diff --git a/src/Dictionaries/IDictionary.h b/src/Dictionaries/IDictionary.h index bab80d3cd57..944e00f14c9 100644 --- a/src/Dictionaries/IDictionary.h +++ b/src/Dictionaries/IDictionary.h @@ -69,6 +69,15 @@ public: return dictionary_id.getNameForLogs(); } + /// Returns fully qualified unquoted dictionary name + std::string getQualifiedName() const + { + std::lock_guard lock{mutex}; + if (dictionary_id.database_name.empty()) + return dictionary_id.table_name; + return dictionary_id.database_name + "." + dictionary_id.table_name; + } + StorageID getDictionaryID() const { std::lock_guard lock{mutex}; diff --git a/src/Interpreters/ExternalDictionariesLoader.cpp b/src/Interpreters/ExternalDictionariesLoader.cpp index 49891e6cd60..1685c06d387 100644 --- a/src/Interpreters/ExternalDictionariesLoader.cpp +++ b/src/Interpreters/ExternalDictionariesLoader.cpp @@ -77,21 +77,23 @@ void ExternalDictionariesLoader::updateObjectFromConfigWithoutReloading(IExterna ExternalDictionariesLoader::DictPtr ExternalDictionariesLoader::getDictionary(const std::string & dictionary_name, ContextPtr local_context) const { std::string resolved_dictionary_name = resolveDictionaryName(dictionary_name, local_context->getCurrentDatabase()); + auto dictionary = std::static_pointer_cast(load(resolved_dictionary_name)); if (local_context->hasQueryContext() && local_context->getSettingsRef().log_queries) - local_context->getQueryContext()->addQueryFactoriesInfo(Context::QueryLogFactories::Dictionary, resolved_dictionary_name); + local_context->getQueryContext()->addQueryFactoriesInfo(Context::QueryLogFactories::Dictionary, dictionary->getQualifiedName()); - return std::static_pointer_cast(load(resolved_dictionary_name)); + return dictionary; } ExternalDictionariesLoader::DictPtr ExternalDictionariesLoader::tryGetDictionary(const std::string & dictionary_name, ContextPtr local_context) const { std::string resolved_dictionary_name = resolveDictionaryName(dictionary_name, local_context->getCurrentDatabase()); + auto dictionary = std::static_pointer_cast(tryLoad(resolved_dictionary_name)); - if (local_context->hasQueryContext() && local_context->getSettingsRef().log_queries) - local_context->getQueryContext()->addQueryFactoriesInfo(Context::QueryLogFactories::Dictionary, resolved_dictionary_name); + if (local_context->hasQueryContext() && local_context->getSettingsRef().log_queries && dictionary) + local_context->getQueryContext()->addQueryFactoriesInfo(Context::QueryLogFactories::Dictionary, dictionary->getQualifiedName()); - return std::static_pointer_cast(tryLoad(resolved_dictionary_name)); + return dictionary; } diff --git a/tests/queries/0_stateless/03148_query_log_used_dictionaries.reference b/tests/queries/0_stateless/03148_query_log_used_dictionaries.reference index 1f54474efa3..4fa3a14e63f 100644 --- a/tests/queries/0_stateless/03148_query_log_used_dictionaries.reference +++ b/tests/queries/0_stateless/03148_query_log_used_dictionaries.reference @@ -1,4 +1,4 @@ -simple_with_analyzer 1 -nested_with_analyzer 1 -simple_without_analyzer 1 -nested_without_analyzer 1 +simple_with_analyzer ['default.03148_dictionary'] +nested_with_analyzer ['default.03148_dictionary'] +simple_without_analyzer ['default.03148_dictionary'] +nested_without_analyzer ['default.03148_dictionary'] diff --git a/tests/queries/0_stateless/03148_query_log_used_dictionaries.sql b/tests/queries/0_stateless/03148_query_log_used_dictionaries.sql index 6f10b118c92..1b647a7ee62 100644 --- a/tests/queries/0_stateless/03148_query_log_used_dictionaries.sql +++ b/tests/queries/0_stateless/03148_query_log_used_dictionaries.sql @@ -21,7 +21,7 @@ FORMAT Null; SYSTEM FLUSH LOGS; -SELECT 'simple_with_analyzer', length(used_dictionaries) as used_dictionaries_qty +SELECT log_comment, used_dictionaries FROM system.query_log WHERE current_database = currentDatabase() AND type = 'QueryFinish' @@ -40,7 +40,7 @@ FORMAT Null; SYSTEM FLUSH LOGS; -SELECT 'nested_with_analyzer', length(used_dictionaries) as used_dictionaries_qty +SELECT log_comment, used_dictionaries FROM system.query_log WHERE current_database = currentDatabase() AND type = 'QueryFinish' @@ -56,7 +56,7 @@ FORMAT Null; SYSTEM FLUSH LOGS; -SELECT 'simple_without_analyzer', length(used_dictionaries) as used_dictionaries_qty +SELECT log_comment, used_dictionaries FROM system.query_log WHERE current_database = currentDatabase() AND type = 'QueryFinish' @@ -75,7 +75,7 @@ FORMAT Null; SYSTEM FLUSH LOGS; -SELECT 'nested_without_analyzer', length(used_dictionaries) as used_dictionaries_qty +SELECT log_comment, used_dictionaries FROM system.query_log WHERE current_database = currentDatabase() AND type = 'QueryFinish' From b97bf456c6e89f36d5225c9befda3738f54cdf31 Mon Sep 17 00:00:00 2001 From: tomershafir Date: Wed, 8 May 2024 18:51:25 +0300 Subject: [PATCH 07/47] try fix build --- programs/keeper/CMakeLists.txt | 1 + src/Coordination/Standalone/Context.cpp | 2 +- src/Coordination/Standalone/Context.h | 2 +- src/Disks/IO/createReadBufferFromFileBase.cpp | 4 ++-- src/Disks/IO/getIOUringReader.cpp | 9 +++++---- src/Disks/IO/getIOUringReader.h | 1 + src/Interpreters/Context.cpp | 4 ++-- src/Interpreters/Context.h | 2 +- src/Storages/StorageFile.cpp | 2 +- 9 files changed, 15 insertions(+), 12 deletions(-) diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index b811868333b..af360e44ff4 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -148,6 +148,7 @@ if (BUILD_STANDALONE_KEEPER) ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/createReadBufferFromFileBase.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/ReadBufferFromRemoteFSGather.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/IOUringReader.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/getIOUringReader.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/WriteBufferFromTemporaryFile.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/WriteBufferWithFinalizeCallback.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/AsynchronousBoundedReadBuffer.cpp diff --git a/src/Coordination/Standalone/Context.cpp b/src/Coordination/Standalone/Context.cpp index 84e54ed7100..ec5400b7384 100644 --- a/src/Coordination/Standalone/Context.cpp +++ b/src/Coordination/Standalone/Context.cpp @@ -304,7 +304,7 @@ IAsynchronousReader & Context::getThreadPoolReader(FilesystemReaderType type) co } #if USE_LIBURING -IOUringReader & Context::getIOURingReader() const +IOUringReader & Context::getIOUringReader() const { callOnce(shared->io_uring_reader_initialized, [&] { shared->io_uring_reader = createIOUringReader(); diff --git a/src/Coordination/Standalone/Context.h b/src/Coordination/Standalone/Context.h index adb9111185f..29a66a0e3c7 100644 --- a/src/Coordination/Standalone/Context.h +++ b/src/Coordination/Standalone/Context.h @@ -137,7 +137,7 @@ public: IAsynchronousReader & getThreadPoolReader(FilesystemReaderType type) const; #if USE_LIBURING - IOUringReader & getIOURingReader() const; + IOUringReader & getIOUringReader() const; #endif std::shared_ptr getAsyncReadCounters() const; ThreadPool & getThreadPoolWriter() const; diff --git a/src/Disks/IO/createReadBufferFromFileBase.cpp b/src/Disks/IO/createReadBufferFromFileBase.cpp index f3bb6ae1740..b132e25ac6b 100644 --- a/src/Disks/IO/createReadBufferFromFileBase.cpp +++ b/src/Disks/IO/createReadBufferFromFileBase.cpp @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include #include #include @@ -100,7 +100,7 @@ std::unique_ptr createReadBufferFromFileBase( else if (settings.local_fs_method == LocalFSReadMethod::io_uring) { #if USE_LIBURING - auto & reader = getIOURingReaderOrThrow(); + auto & reader = getIOUringReaderOrThrow(); res = std::make_unique( reader, settings.priority, diff --git a/src/Disks/IO/getIOUringReader.cpp b/src/Disks/IO/getIOUringReader.cpp index 8e9a9655a41..d9cc6211164 100644 --- a/src/Disks/IO/getIOUringReader.cpp +++ b/src/Disks/IO/getIOUringReader.cpp @@ -1,7 +1,8 @@ -#include "getIOUringReader.h" +#include #if USE_LIBURING +#include #include namespace DB @@ -20,8 +21,8 @@ std::unique_ptr createIOUringReader() IOUringReader & getIOUringReaderOrThrow(ContextPtr context) { - auto reader = context->getIOUringReader(); - if (!reader.isSupported) + auto & reader = context->getIOUringReader(); + if (!reader.isSupported()) { throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "io_uring is not supported by this system"); } @@ -33,7 +34,7 @@ IOUringReader & getIOUringReaderOrThrow() auto context = Context::getGlobalContextInstance(); if (!context) throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context not initialized"); - return getIOUringReaderOrThrow(context) + return getIOUringReaderOrThrow(context); } } diff --git a/src/Disks/IO/getIOUringReader.h b/src/Disks/IO/getIOUringReader.h index ca619785ab4..59e71980750 100644 --- a/src/Disks/IO/getIOUringReader.h +++ b/src/Disks/IO/getIOUringReader.h @@ -6,6 +6,7 @@ #include #include +#include namespace DB { diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index d847cab013c..db374bc85f9 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -5176,10 +5176,10 @@ IAsynchronousReader & Context::getThreadPoolReader(FilesystemReaderType type) co } #if USE_LIBURING -IOUringReader & Context::getIOURingReader() const +IOUringReader & Context::getIOUringReader() const { callOnce(shared->io_uring_reader_initialized, [&] { - shared->io_uring_reader = createIOUringReader() + shared->io_uring_reader = createIOUringReader(); }); return *shared->io_uring_reader; diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index d1ff5b4c2b2..0430db10de2 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -1243,7 +1243,7 @@ public: IAsynchronousReader & getThreadPoolReader(FilesystemReaderType type) const; #if USE_LIBURING - IOUringReader & getIOURingReader() const; + IOUringReader & getIOUringReader() const; #endif std::shared_ptr getAsyncReadCounters() const; diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 9bead6d0267..1493e649b60 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -283,7 +283,7 @@ std::unique_ptr selectReadBuffer( else if (read_method == LocalFSReadMethod::io_uring && !use_table_fd) { #if USE_LIBURING - auto & reader = getIOURingReaderOrThrow(context); + auto & reader = getIOUringReaderOrThrow(context); res = std::make_unique( reader, Priority{}, From 90b8ae0f0b2dcec76f750580f152f1e8e005b938 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 10 May 2024 19:39:31 +0200 Subject: [PATCH 08/47] Ignore global profiler if system.trace_log is not enabled Signed-off-by: Azat Khuzhin --- src/Common/ThreadStatus.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/Common/ThreadStatus.cpp b/src/Common/ThreadStatus.cpp index ad96018a17e..71cd811b6f2 100644 --- a/src/Common/ThreadStatus.cpp +++ b/src/Common/ThreadStatus.cpp @@ -127,6 +127,11 @@ ThreadStatus::ThreadStatus(bool check_current_thread_on_destruction_) void ThreadStatus::initGlobalProfiler([[maybe_unused]] UInt64 global_profiler_real_time_period, [[maybe_unused]] UInt64 global_profiler_cpu_time_period) { #if !defined(SANITIZER) && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) && !defined(__APPLE__) + /// profilers are useless without trace collector + auto global_context_ptr = global_context.lock(); + if (!global_context_ptr || !global_context_ptr->hasTraceCollector()) + return; + try { if (global_profiler_real_time_period > 0) From 0b270a67cfe0d52dd3de8e62db0fc20e4725d723 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 10 May 2024 21:21:07 +0200 Subject: [PATCH 09/47] Fix disabling global profilers for keeper standalone build CLICKHOUSE_KEEPER_STANDALONE_BUILD does not set while compiling ThreadStatus.cpp, but it linked to the clickhouse-keeper standalone build, and before this patch it simply leads to the linking error [1]: May 10 20:02:58 ld.lld-17: error: undefined symbol: DB::Context::hasTraceCollector() const May 10 20:02:58 >>> referenced by ThreadStatus.cpp:132 (./build_docker/./src/Common/ThreadStatus.cpp:132) May 10 20:02:58 >>> lto.tmp:(DB::ThreadStatus::initGlobalProfiler(unsigned long, unsigned long)) May 10 20:02:58 clang++-17: error: linker command failed with exit code 1 (use -v to see invocation) [1]: https://s3.amazonaws.com/clickhouse-test-reports/63632/643061bd9d7ef16641ea9537be868fc39d029726/clickhouse_build_check/report.html Signed-off-by: Azat Khuzhin --- src/Common/ThreadStatus.cpp | 2 +- src/Coordination/Standalone/Context.cpp | 5 +++++ src/Coordination/Standalone/Context.h | 2 ++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/Common/ThreadStatus.cpp b/src/Common/ThreadStatus.cpp index 71cd811b6f2..aaf5618ca5e 100644 --- a/src/Common/ThreadStatus.cpp +++ b/src/Common/ThreadStatus.cpp @@ -126,7 +126,7 @@ ThreadStatus::ThreadStatus(bool check_current_thread_on_destruction_) void ThreadStatus::initGlobalProfiler([[maybe_unused]] UInt64 global_profiler_real_time_period, [[maybe_unused]] UInt64 global_profiler_cpu_time_period) { -#if !defined(SANITIZER) && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) && !defined(__APPLE__) +#if !defined(SANITIZER) && !defined(__APPLE__) /// profilers are useless without trace collector auto global_context_ptr = global_context.lock(); if (!global_context_ptr || !global_context_ptr->hasTraceCollector()) diff --git a/src/Coordination/Standalone/Context.cpp b/src/Coordination/Standalone/Context.cpp index 1095a11566f..36990d263b5 100644 --- a/src/Coordination/Standalone/Context.cpp +++ b/src/Coordination/Standalone/Context.cpp @@ -457,4 +457,9 @@ const ServerSettings & Context::getServerSettings() const return shared->server_settings; } +bool Context::hasTraceCollector() const +{ + return false; +} + } diff --git a/src/Coordination/Standalone/Context.h b/src/Coordination/Standalone/Context.h index ff85e032814..452989e9296 100644 --- a/src/Coordination/Standalone/Context.h +++ b/src/Coordination/Standalone/Context.h @@ -163,6 +163,8 @@ public: zkutil::ZooKeeperPtr getZooKeeper() const; const ServerSettings & getServerSettings() const; + + bool hasTraceCollector() const; }; } From 11f1d9a30effcccc23109a99cde3cae0c91f5612 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 11 May 2024 09:09:09 +0200 Subject: [PATCH 10/47] Remove extra includes of ThreadPool.h in tests Signed-off-by: Azat Khuzhin --- src/Common/tests/gtest_rw_lock.cpp | 4 ++-- src/Databases/MySQL/tests/gtest_mysql_binlog.cpp | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/Common/tests/gtest_rw_lock.cpp b/src/Common/tests/gtest_rw_lock.cpp index 08a14aba8fb..d8c6e9cb99d 100644 --- a/src/Common/tests/gtest_rw_lock.cpp +++ b/src/Common/tests/gtest_rw_lock.cpp @@ -3,8 +3,8 @@ #include #include #include +#include #include -#include #include #include #include @@ -541,7 +541,7 @@ TEST(Common, RWLockWriteLockTimeoutDuringWriteWithWaitingRead) events.add(wc ? "Locked wb" : "Failed to lock wb"); EXPECT_EQ(wc, nullptr); }); - + std::thread rc_thread([&] () { std::this_thread::sleep_for(std::chrono::duration(200)); diff --git a/src/Databases/MySQL/tests/gtest_mysql_binlog.cpp b/src/Databases/MySQL/tests/gtest_mysql_binlog.cpp index df8433f7cce..11299c5b8b1 100644 --- a/src/Databases/MySQL/tests/gtest_mysql_binlog.cpp +++ b/src/Databases/MySQL/tests/gtest_mysql_binlog.cpp @@ -1,4 +1,3 @@ -#include #include #include #include From 4ad88d04b416b6fd8fd4c2d468108df164f9ba36 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 11 May 2024 08:58:53 +0200 Subject: [PATCH 11/47] Move initGlobalProfiler() into ThreadStatusExt.cpp and fix examples To avoid undefined references in examples: May 11 01:58:40 ld.lld-17: error: undefined symbol: DB::Context::hasTraceCollector() const May 11 01:58:40 >>> referenced by ThreadStatus.cpp:132 (/build/src/Common/ThreadStatus.cpp:132) May 11 01:58:40 >>> ThreadStatus.cpp.o:(DB::ThreadStatus::initGlobalProfiler(unsigned long, unsigned long)) in archive src/libclickhouse_common_iod.a May 11 01:58:40 clang++-17: error: linker command failed with exit code 1 (use -v to see invocation) Move it firstly into ThreadStatusExt and then do not try to use it from the ThreadPool. Signed-off-by: Azat Khuzhin --- src/Common/ThreadPool.cpp | 2 ++ src/Common/ThreadPool.h | 5 ++++ src/Common/ThreadStatus.cpp | 25 ------------------- src/Common/examples/parallel_aggregation.cpp | 5 +++- src/Common/examples/parallel_aggregation2.cpp | 17 +++++++------ .../examples/thread_creation_latency.cpp | 8 +++--- src/Interpreters/ThreadStatusExt.cpp | 25 +++++++++++++++++++ 7 files changed, 51 insertions(+), 36 deletions(-) diff --git a/src/Common/ThreadPool.cpp b/src/Common/ThreadPool.cpp index b9029d9287d..60c1e12bc2a 100644 --- a/src/Common/ThreadPool.cpp +++ b/src/Common/ThreadPool.cpp @@ -494,8 +494,10 @@ void ThreadPoolImpl::worker(typename std::list::iterator thread_ template class ThreadPoolImpl; template class ThreadPoolImpl>; +template class ThreadPoolImpl>; template class ThreadFromGlobalPoolImpl; template class ThreadFromGlobalPoolImpl; +template class ThreadFromGlobalPoolImpl; std::unique_ptr GlobalThreadPool::the_instance; diff --git a/src/Common/ThreadPool.h b/src/Common/ThreadPool.h index 0f1b609f899..4c2403ed6e3 100644 --- a/src/Common/ThreadPool.h +++ b/src/Common/ThreadPool.h @@ -242,6 +242,11 @@ public: if (unlikely(global_profiler_real_time_period != 0 || global_profiler_cpu_time_period != 0)) thread_status.initGlobalProfiler(global_profiler_real_time_period, global_profiler_cpu_time_period); } + else + { + UNUSED(global_profiler_real_time_period); + UNUSED(global_profiler_cpu_time_period); + } std::apply(function, arguments); }, diff --git a/src/Common/ThreadStatus.cpp b/src/Common/ThreadStatus.cpp index aaf5618ca5e..8719a9e093a 100644 --- a/src/Common/ThreadStatus.cpp +++ b/src/Common/ThreadStatus.cpp @@ -124,31 +124,6 @@ ThreadStatus::ThreadStatus(bool check_current_thread_on_destruction_) #endif } -void ThreadStatus::initGlobalProfiler([[maybe_unused]] UInt64 global_profiler_real_time_period, [[maybe_unused]] UInt64 global_profiler_cpu_time_period) -{ -#if !defined(SANITIZER) && !defined(__APPLE__) - /// profilers are useless without trace collector - auto global_context_ptr = global_context.lock(); - if (!global_context_ptr || !global_context_ptr->hasTraceCollector()) - return; - - try - { - if (global_profiler_real_time_period > 0) - query_profiler_real = std::make_unique(thread_id, - /* period= */ static_cast(global_profiler_real_time_period)); - - if (global_profiler_cpu_time_period > 0) - query_profiler_cpu = std::make_unique(thread_id, - /* period= */ static_cast(global_profiler_cpu_time_period)); - } - catch (...) - { - tryLogCurrentException("ThreadStatus", "Cannot initialize GlobalProfiler"); - } -#endif -} - ThreadGroupPtr ThreadStatus::getThreadGroup() const { chassert(current_thread == this); diff --git a/src/Common/examples/parallel_aggregation.cpp b/src/Common/examples/parallel_aggregation.cpp index 7094690a3a8..a7650ff1dc5 100644 --- a/src/Common/examples/parallel_aggregation.cpp +++ b/src/Common/examples/parallel_aggregation.cpp @@ -20,6 +20,9 @@ #include +using ThreadFromGlobalPoolSimple = ThreadFromGlobalPoolImpl; +using SimpleThreadPool = ThreadPoolImpl; + using Key = UInt64; using Value = UInt64; @@ -255,7 +258,7 @@ int main(int argc, char ** argv) std::cerr << std::fixed << std::setprecision(2); - ThreadPool pool(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, CurrentMetrics::LocalThreadScheduled, num_threads); + SimpleThreadPool pool(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, CurrentMetrics::LocalThreadScheduled, num_threads); Source data(n); diff --git a/src/Common/examples/parallel_aggregation2.cpp b/src/Common/examples/parallel_aggregation2.cpp index e7136707dbd..a1cebdba469 100644 --- a/src/Common/examples/parallel_aggregation2.cpp +++ b/src/Common/examples/parallel_aggregation2.cpp @@ -20,6 +20,9 @@ #include +using ThreadFromGlobalPoolSimple = ThreadFromGlobalPoolImpl; +using SimpleThreadPool = ThreadPoolImpl; + using Key = UInt64; using Value = UInt64; using Source = std::vector; @@ -38,7 +41,7 @@ struct AggregateIndependent template static void NO_INLINE execute(const Source & data, size_t num_threads, std::vector> & results, Creator && creator, Updater && updater, - ThreadPool & pool) + SimpleThreadPool & pool) { results.reserve(num_threads); for (size_t i = 0; i < num_threads; ++i) @@ -76,7 +79,7 @@ struct AggregateIndependentWithSequentialKeysOptimization template static void NO_INLINE execute(const Source & data, size_t num_threads, std::vector> & results, Creator && creator, Updater && updater, - ThreadPool & pool) + SimpleThreadPool & pool) { results.reserve(num_threads); for (size_t i = 0; i < num_threads; ++i) @@ -124,7 +127,7 @@ struct MergeSequential template static void NO_INLINE execute(Map ** source_maps, size_t num_maps, Map *& result_map, Merger && merger, - ThreadPool &) + SimpleThreadPool &) { for (size_t i = 1; i < num_maps; ++i) { @@ -144,7 +147,7 @@ struct MergeSequentialTransposed /// In practice not better than usual. template static void NO_INLINE execute(Map ** source_maps, size_t num_maps, Map *& result_map, Merger && merger, - ThreadPool &) + SimpleThreadPool &) { std::vector iterators(num_maps); for (size_t i = 1; i < num_maps; ++i) @@ -177,7 +180,7 @@ struct MergeParallelForTwoLevelTable template static void NO_INLINE execute(Map ** source_maps, size_t num_maps, Map *& result_map, Merger && merger, - ThreadPool & pool) + SimpleThreadPool & pool) { for (size_t bucket = 0; bucket < Map::NUM_BUCKETS; ++bucket) pool.scheduleOrThrowOnError([&, bucket, num_maps] @@ -202,7 +205,7 @@ struct Work template static void NO_INLINE execute(const Source & data, size_t num_threads, Creator && creator, Updater && updater, Merger && merger, - ThreadPool & pool) + SimpleThreadPool & pool) { std::vector> intermediate_results; @@ -282,7 +285,7 @@ int main(int argc, char ** argv) std::cerr << std::fixed << std::setprecision(2); - ThreadPool pool(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, CurrentMetrics::LocalThreadScheduled, num_threads); + SimpleThreadPool pool(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, CurrentMetrics::LocalThreadScheduled, num_threads); Source data(n); diff --git a/src/Common/examples/thread_creation_latency.cpp b/src/Common/examples/thread_creation_latency.cpp index 48a28488068..8732d0a97d1 100644 --- a/src/Common/examples/thread_creation_latency.cpp +++ b/src/Common/examples/thread_creation_latency.cpp @@ -14,6 +14,8 @@ int value = 0; static void f() { ++value; } static void * g(void *) { f(); return {}; } +using ThreadFromGlobalPoolSimple = ThreadFromGlobalPoolImpl; +using SimpleThreadPool = ThreadPoolImpl; namespace CurrentMetrics { @@ -72,7 +74,7 @@ int main(int argc, char ** argv) test(n, "Create and destroy ThreadPool each iteration", [] { - ThreadPool tp(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, CurrentMetrics::LocalThreadScheduled, 1); + SimpleThreadPool tp(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, CurrentMetrics::LocalThreadScheduled, 1); tp.scheduleOrThrowOnError(f); tp.wait(); }); @@ -93,7 +95,7 @@ int main(int argc, char ** argv) }); { - ThreadPool tp(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, CurrentMetrics::LocalThreadScheduled, 1); + SimpleThreadPool tp(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, CurrentMetrics::LocalThreadScheduled, 1); test(n, "Schedule job for Threadpool each iteration", [&tp] { @@ -103,7 +105,7 @@ int main(int argc, char ** argv) } { - ThreadPool tp(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, CurrentMetrics::LocalThreadScheduled, 128); + SimpleThreadPool tp(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, CurrentMetrics::LocalThreadScheduled, 128); test(n, "Schedule job for Threadpool with 128 threads each iteration", [&tp] { diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp index 2b8e8bef6d4..6607df8d9af 100644 --- a/src/Interpreters/ThreadStatusExt.cpp +++ b/src/Interpreters/ThreadStatusExt.cpp @@ -458,6 +458,31 @@ void ThreadStatus::resetPerformanceCountersLastUsage() taskstats->reset(); } +void ThreadStatus::initGlobalProfiler([[maybe_unused]] UInt64 global_profiler_real_time_period, [[maybe_unused]] UInt64 global_profiler_cpu_time_period) +{ +#if !defined(SANITIZER) && !defined(__APPLE__) + /// profilers are useless without trace collector + auto global_context_ptr = global_context.lock(); + if (!global_context_ptr || !global_context_ptr->hasTraceCollector()) + return; + + try + { + if (global_profiler_real_time_period > 0) + query_profiler_real = std::make_unique(thread_id, + /* period= */ static_cast(global_profiler_real_time_period)); + + if (global_profiler_cpu_time_period > 0) + query_profiler_cpu = std::make_unique(thread_id, + /* period= */ static_cast(global_profiler_cpu_time_period)); + } + catch (...) + { + tryLogCurrentException("ThreadStatus", "Cannot initialize GlobalProfiler"); + } +#endif +} + void ThreadStatus::initQueryProfiler() { if (internal_thread) From d74328541767eba99f2699c5539cd79f11dd41e3 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 11 May 2024 09:42:00 +0200 Subject: [PATCH 12/47] Fix undefined symbol createFunctionBaseCast() during linking examples Signed-off-by: Azat Khuzhin --- src/Functions/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt index 11bcc948288..23f34828802 100644 --- a/src/Functions/CMakeLists.txt +++ b/src/Functions/CMakeLists.txt @@ -15,6 +15,8 @@ extract_into_parent_list(clickhouse_functions_sources dbms_sources checkHyperscanRegexp.cpp array/has.cpp CastOverloadResolver.cpp + # Provides dependency for cast - createFunctionBaseCast() + FunctionsConversion.cpp ) extract_into_parent_list(clickhouse_functions_headers dbms_headers IFunction.h From 44f77fe9f5881033b8484bbe8a7cac4ad940a5d3 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 11 May 2024 13:25:36 +0200 Subject: [PATCH 13/47] Link dbms for zookeeper examples Since it uses ZooKeeper, which has ThreadFromGlobalPool inside, which requires THreadPool with enabled profiler, which requires ThreadStatusExt.cpp, which included only into dbms, but not into clickhouse_common_io (like ThreadStatus.cpp) Error: FAILED: src/Common/ZooKeeper/examples/zkutil_test_commands_new_lib ld.lld-17: error: undefined symbol: DB::ThreadStatus::initGlobalProfiler(unsigned long, unsigned long) >>> referenced by ThreadPool.h:243 (./src/Common/ThreadPool.h:243) >>> ZooKeeperImpl.cpp.o:(void std::__1::__function::__policy_invoker::__call_impl::ThreadFromGlobalPoolImpl> const&, zkutil::ZooKeeperArgs const&, std::__1::shared_ptr)::$_0>(Coordination::ZooKeeper::ZooKeeper(std::__1::vector> const&, zkutil::ZooKeeperArgs const&, std::__1::shared_ptr)::$_0&&)::'lambda'(), void ()>>(std::__1::__function::__policy_storage const*)) in archive src/Common/ZooKeeper/libclickhouse_common_zookeeper_no_log.a Another way of fixing it is to provide some define wich default value for "is profiler enabled" for ThreadPool, should work, but will be tricky. Signed-off-by: Azat Khuzhin --- src/Common/ZooKeeper/examples/CMakeLists.txt | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/Common/ZooKeeper/examples/CMakeLists.txt b/src/Common/ZooKeeper/examples/CMakeLists.txt index a99fbe55dd8..c5a93f2701e 100644 --- a/src/Common/ZooKeeper/examples/CMakeLists.txt +++ b/src/Common/ZooKeeper/examples/CMakeLists.txt @@ -1,8 +1,16 @@ clickhouse_add_executable(zkutil_test_commands zkutil_test_commands.cpp) -target_link_libraries(zkutil_test_commands PRIVATE clickhouse_common_zookeeper_no_log) +target_link_libraries(zkutil_test_commands PRIVATE + clickhouse_common_zookeeper_no_log + dbms) clickhouse_add_executable(zkutil_test_commands_new_lib zkutil_test_commands_new_lib.cpp) -target_link_libraries(zkutil_test_commands_new_lib PRIVATE clickhouse_common_zookeeper_no_log clickhouse_compression string_utils) +target_link_libraries(zkutil_test_commands_new_lib PRIVATE + clickhouse_common_zookeeper_no_log + clickhouse_compression + string_utils + dbms) clickhouse_add_executable(zkutil_test_async zkutil_test_async.cpp) -target_link_libraries(zkutil_test_async PRIVATE clickhouse_common_zookeeper_no_log) +target_link_libraries(zkutil_test_async PRIVATE + clickhouse_common_zookeeper_no_log + dbms) From 5d7d9e9e3485d41d70998c1bb9c0864910a64de0 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 11 May 2024 14:35:41 +0200 Subject: [PATCH 14/47] Provide ThreadStatus::initGlobalProfiler() for standalone keeper build Signed-off-by: Azat Khuzhin --- src/Coordination/Standalone/ThreadStatusExt.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/Coordination/Standalone/ThreadStatusExt.cpp b/src/Coordination/Standalone/ThreadStatusExt.cpp index 97f7287be8c..fc78233d9dc 100644 --- a/src/Coordination/Standalone/ThreadStatusExt.cpp +++ b/src/Coordination/Standalone/ThreadStatusExt.cpp @@ -1,4 +1,5 @@ #include +#include namespace DB { @@ -11,4 +12,8 @@ void CurrentThread::attachToGroup(const ThreadGroupPtr &) { } +void ThreadStatus::initGlobalProfiler(UInt64 /*global_profiler_real_time_period*/, UInt64 /*global_profiler_cpu_time_period*/) +{ +} + } From 7261f924bb671ceb9d2131175d558df7296ff217 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 11 May 2024 15:54:02 +0200 Subject: [PATCH 15/47] Exclude FunctionsConversion from the large objects check for now Signed-off-by: Azat Khuzhin --- utils/check-style/check-large-objects.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/utils/check-style/check-large-objects.sh b/utils/check-style/check-large-objects.sh index 2122cca911e..e2266e89556 100755 --- a/utils/check-style/check-large-objects.sh +++ b/utils/check-style/check-large-objects.sh @@ -7,6 +7,8 @@ export LC_ALL=C # The "total" should be printed without localization TU_EXCLUDES=( AggregateFunctionUniq Aggregator + # FIXME: Exclude for now + FunctionsConversion ) if find $1 -name '*.o' | xargs wc -c | grep --regexp='\.o$' | sort -rn | awk '{ if ($1 > 50000000) print }' \ From a4e1ddc95ab34d51c3e2a3cc023abbbe1c6b24e5 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 11 May 2024 19:21:32 +0200 Subject: [PATCH 16/47] Link dbms to ZooKeeper examples Signed-off-by: Azat Khuzhin --- utils/zookeeper-cli/CMakeLists.txt | 4 +++- utils/zookeeper-dump-tree/CMakeLists.txt | 6 +++++- utils/zookeeper-remove-by-list/CMakeLists.txt | 5 ++++- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/utils/zookeeper-cli/CMakeLists.txt b/utils/zookeeper-cli/CMakeLists.txt index be8cf81320c..cad7164b775 100644 --- a/utils/zookeeper-cli/CMakeLists.txt +++ b/utils/zookeeper-cli/CMakeLists.txt @@ -1,4 +1,6 @@ clickhouse_add_executable(clickhouse-zookeeper-cli zookeeper-cli.cpp ${ClickHouse_SOURCE_DIR}/src/Client/LineReader.cpp) -target_link_libraries(clickhouse-zookeeper-cli PRIVATE clickhouse_common_zookeeper_no_log) +target_link_libraries(clickhouse-zookeeper-cli PRIVATE + clickhouse_common_zookeeper_no_log + dbms) diff --git a/utils/zookeeper-dump-tree/CMakeLists.txt b/utils/zookeeper-dump-tree/CMakeLists.txt index 182cb65f194..85e4d18c19f 100644 --- a/utils/zookeeper-dump-tree/CMakeLists.txt +++ b/utils/zookeeper-dump-tree/CMakeLists.txt @@ -1,2 +1,6 @@ clickhouse_add_executable (zookeeper-dump-tree main.cpp ${SRCS}) -target_link_libraries(zookeeper-dump-tree PRIVATE clickhouse_common_zookeeper_no_log clickhouse_common_io boost::program_options) +target_link_libraries(zookeeper-dump-tree PRIVATE + clickhouse_common_zookeeper_no_log + clickhouse_common_io + dbms + boost::program_options) diff --git a/utils/zookeeper-remove-by-list/CMakeLists.txt b/utils/zookeeper-remove-by-list/CMakeLists.txt index 01965413d29..50aaed76110 100644 --- a/utils/zookeeper-remove-by-list/CMakeLists.txt +++ b/utils/zookeeper-remove-by-list/CMakeLists.txt @@ -1,2 +1,5 @@ clickhouse_add_executable (zookeeper-remove-by-list main.cpp ${SRCS}) -target_link_libraries(zookeeper-remove-by-list PRIVATE clickhouse_common_zookeeper_no_log boost::program_options) +target_link_libraries(zookeeper-remove-by-list PRIVATE + clickhouse_common_zookeeper_no_log + dbms + boost::program_options) From 599fce53e38933f3f6f3d9679e9df5fe7026e98a Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 13 May 2024 11:03:58 +0200 Subject: [PATCH 17/47] Fix compiling FunctionsConversion.cpp (by properly passing -g0) CI reports [1]: May 11 20:27:25 FAILED: src/CMakeFiles/dbms.dir/Functions/FunctionsConversion.cpp.o May 11 20:27:25 prlimit --as=10000000000 --data=5000000000 --cpu=1800 /usr/bin/sccache /usr/bin/clang++-17 --target=riscv64-linux-gnu --sysroot=/build/cmake/linux/../../contrib/sysroot/linux-riscv64 -DANNOYLIB_MULTITHREADED_BUILD -DBOOST_ASIO_HAS_STD_INVOKE_RESULT=1 -DBOOST_ASIO_STANDALONE=1 -DBOOST_TIMER_ENABLE_DEPRECATED=1 -DCONFIGDIR=\"\" -DDUMMY_BACKTRACE -DENABLE_ANNOY -DENABLE_MULTITARGET_CODE=1 -DENABLE_USEARCH -DHAVE_BZLIB_H=1 -DHAVE_CONFIG_H -DHAVE_FUTIMESAT=1 -DHAVE_ICONV=1 -DHAVE_LIBLZMA=1 -DHAVE_LIBZSTD=1 -DHAVE_LIBZSTD_COMPRESSOR=1 -DHAVE_LINUX_FS_H=1 -DHAVE_LINUX_TYPES_H=1 -DHAVE_LZMA_H=1 -DHAVE_STRUCT_STAT_ST_MTIM_TV_NSEC=1 -DHAVE_SYS_STATFS_H=1 -DHAVE_ZLIB_H=1 -DHAVE_ZSTD_H=1 -DINCBIN_SILENCE_BITCODE_WARNING -DLIBSASL_EXPORTS=1 -DLZ4_DISABLE_DEPRECATE_WARNINGS=1 -DLZ4_FAST_DEC_LOOP=1 -DMAJOR_IN_SYSMACROS=1 -DOBSOLETE_CRAM_ATTR=1 -DOBSOLETE_DIGEST_ATTR=1 -DPLUGINDIR=\"\" -DPOCO_ENABLE_CPP11 -DPOCO_HAVE_FD_EPOLL -DPOCO_OS_FAMILY_UNIX -DSASLAUTHD_CONF_FILE_DEFAULT=\"\" -DSNAPPY_CODEC_AVAILABLE -DSTD_EXCEPTION_HAS_STACK_TRACE=1 -DUSE_CLICKHOUSE_THREADS=1 -DWITH_COVERAGE=0 -DWITH_GZFILEOP -DZLIB_COMPAT -D_LIBCPP_ENABLE_THREAD_SAFETY_ANNOTATIONS -I/build/build_docker/includes/configs -I/build/src -I/build/build_docker/src -I/build/build_docker/src/Core/include -I/build/base/base/.. -I/build/build_docker/base/base/.. -I/build/contrib/cctz/include -I/build/contrib/re2 -I/build/base/pcg-random/. -I/build/contrib/libfiu/libfiu -I/build/contrib/libssh/include -I/build/build_docker/contrib/libssh/include -I/build/contrib/miniselect/include -I/build/contrib/zstd/lib -I/build/contrib/pocketfft -I/build/contrib/libarchive-cmake -I/build/contrib/libarchive/libarchive -I/build/build_docker/contrib/cyrus-sasl-cmake -I/build/contrib/lz4/lib -isystem /build/contrib/llvm-project/libcxx/include -isystem /build/contrib/llvm-project/libcxxabi/include -isystem /build/contrib/libunwind/include -isystem /build/contrib/libdivide-cmake/. -isystem /build/contrib/libdivide -isystem /build/contrib/jemalloc-cmake/include -isystem /build/contrib/llvm-project/llvm/include -isystem /build/build_docker/contrib/llvm-project/llvm/include -isystem /build/contrib/abseil-cpp -isystem /build/contrib/croaring/cpp -isystem /build/contrib/croaring/include -isystem /build/contrib/sparsehash-c11 -isystem /build/contrib/incbin -isystem /build/contrib/cityhash102/include -isystem /build/contrib/boost -isystem /build/base/poco/Net/include -isystem /build/base/poco/Foundation/include -isystem /build/base/poco/NetSSL_OpenSSL/include -isystem /build/base/poco/Crypto/include -isystem /build/contrib/openssl-cmake/linux_riscv64/include -isystem /build/contrib/openssl/include -isystem /build/base/poco/Util/include -isystem /build/base/poco/JSON/include -isystem /build/base/poco/XML/include -isystem /build/contrib/replxx/include -isystem /build/contrib/fmtlib-cmake/../fmtlib/include -isystem /build/contrib/magic_enum/include -isystem /build/contrib/double-conversion -isystem /build/contrib/dragonbox/include -isystem /build/contrib/zlib-ng -isystem /build/build_docker/contrib/zlib-ng-cmake -isystem /build/contrib/pdqsort -isystem /build/contrib/xz/src/liblzma/api -isystem /build/contrib/aws/src/aws-cpp-sdk-core/include -isystem /build/build_docker/contrib/aws-cmake/include -isystem /build/contrib/aws/generated/src/aws-cpp-sdk-s3/include -isystem /build/contrib/aws-c-auth/include -isystem /build/contrib/aws-c-common/include -isystem /build/contrib/aws-c-io/include -isystem /build/contrib/aws-crt-cpp/include -isystem /build/contrib/aws-c-mqtt/include -isystem /build/contrib/aws-c-sdkutils/include -isystem /build/contrib/azure/sdk/core/azure-core/inc -isystem /build/contrib/azure/sdk/identity/azure-identity/inc -isystem /build/contrib/azure/sdk/storage/azure-storage-common/inc -isystem /build/contrib/azure/sdk/storage/azure-storage-blobs/inc -isystem /build/contrib/snappy -isystem /build/build_docker/contrib/snappy-cmake -isystem /build/contrib/libbcrypt -isystem /build/contrib/msgpack-c/include -isystem /build/build_docker/contrib/liburing/src/include-compat -isystem /build/build_docker/contrib/liburing/src/include -isystem /build/contrib/liburing/src/include -isystem /build/contrib/fast_float/include -isystem /build/contrib/librdkafka-cmake/include -isystem /build/contrib/librdkafka/src -isystem /build/build_docker/contrib/librdkafka-cmake/auxdir -isystem /build/contrib/cppkafka/include -isystem /build/contrib/nats-io/src -isystem /build/contrib/nats-io/src/adapters -isystem /build/contrib/nats-io/src/include -isystem /build/contrib/nats-io/src/unix -isystem /build/contrib/libuv/include -isystem /build/contrib/krb5/src/include -isystem /build/build_docker/contrib/krb5-cmake/include -isystem /build/contrib/NuRaft/include -isystem /build/base/poco/MongoDB/include -isystem /build/base/poco/Redis/include -isystem /build/contrib/icu/icu4c/source/i18n -isystem /build/contrib/icu/icu4c/source/common -isystem /build/contrib/capnproto/c++/src -isystem /build/contrib/avro/lang/c++/api -isystem /build/contrib/google-protobuf/src -isystem /build/contrib/s2geometry/src -isystem /build/contrib/s2geometry-cmake -isystem /build/contrib/AMQP-CPP/include -isystem /build/contrib/AMQP-CPP -isystem /build/contrib/sqlite-amalgamation -isystem /build/contrib/rocksdb/include -isystem /build/contrib/libpqxx/include -isystem /build/contrib/libpq -isystem /build/contrib/libpq/include -isystem /build/contrib/libstemmer_c/include -isystem /build/contrib/wordnet-blast -isystem /build/contrib/lemmagen-c/include -isystem /build/contrib/ulid-c/include -isystem /build/contrib/simdjson/include -isystem /build/contrib/rapidjson/include -isystem /build/contrib/consistent-hashing -isystem /build/contrib/annoy/src -isystem /build/contrib/FP16/include -isystem /build/contrib/robin-map/include -isystem /build/contrib/SimSIMD-map/include -isystem /build/contrib/usearch/include --gcc-toolchain=/build/cmake/linux/../../contrib/sysroot/linux-riscv64 -fdiagnostics-color=always -Xclang -fuse-ctor-homing -Wno-enum-constexpr-conversion -fsized-deallocation -gdwarf-aranges -pipe -fasynchronous-unwind-tables -ffile-prefix-map=/build=. -ftime-trace -falign-functions=32 -ffp-contract=off -fdiagnostics-absolute-paths -fstrict-vtable-pointers -Wall -Wextra -Wframe-larger-than=65536 -Weverything -Wpedantic -Wno-zero-length-array -Wno-c++98-compat-pedantic -Wno-c++98-compat -Wno-c++20-compat -Wno-sign-conversion -Wno-implicit-int-conversion -Wno-implicit-int-float-conversion -Wno-ctad-maybe-unsupported -Wno-disabled-macro-expansion -Wno-documentation-unknown-command -Wno-double-promotion -Wno-exit-time-destructors -Wno-float-equal -Wno-global-constructors -Wno-missing-prototypes -Wno-missing-variable-declarations -Wno-padded -Wno-switch-enum -Wno-undefined-func-template -Wno-unused-template -Wno-vla -Wno-weak-template-vtables -Wno-weak-vtables -Wno-thread-safety-negative -Wno-enum-constexpr-conversion -Wno-unsafe-buffer-usage -Wno-switch-default -O2 -g -DNDEBUG -O3 -g -fno-pie -std=c++23 -D OS_LINUX -Werror -Wno-deprecated-declarations -Wno-poison-system-directories -nostdinc++ -MD -MT src/CMakeFiles/dbms.dir/Functions/FunctionsConversion.cpp.o -MF src/CMakeFiles/dbms.dir/Functions/FunctionsConversion.cpp.o.d -o src/CMakeFiles/dbms.dir/Functions/FunctionsConversion.cpp.o -c /build/src/Functions/FunctionsConversion.cpp May 11 20:27:25 sccache: warning: The server looks like it shut down unexpectedly, compiling locally instead [1]: https://s3.amazonaws.com/clickhouse-test-reports/63632/e795e0e028d45b654e099dee136a44e7ac5ed627/clickhouse_special_build_check/report.html Signed-off-by: Azat Khuzhin --- src/Functions/CMakeLists.txt | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt index 23f34828802..751e8cf5103 100644 --- a/src/Functions/CMakeLists.txt +++ b/src/Functions/CMakeLists.txt @@ -3,7 +3,7 @@ add_subdirectory(divide) include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake") add_headers_and_sources(clickhouse_functions .) -extract_into_parent_list(clickhouse_functions_sources dbms_sources +set(DBMS_FUNCTIONS IFunction.cpp FunctionFactory.cpp FunctionHelpers.cpp @@ -18,6 +18,7 @@ extract_into_parent_list(clickhouse_functions_sources dbms_sources # Provides dependency for cast - createFunctionBaseCast() FunctionsConversion.cpp ) +extract_into_parent_list(clickhouse_functions_sources dbms_sources ${DBMS_FUNCTIONS}) extract_into_parent_list(clickhouse_functions_headers dbms_headers IFunction.h FunctionFactory.h @@ -28,6 +29,10 @@ extract_into_parent_list(clickhouse_functions_headers dbms_headers ) add_library(clickhouse_functions_obj OBJECT ${clickhouse_functions_headers} ${clickhouse_functions_sources}) +if (OMIT_HEAVY_DEBUG_SYMBOLS) + target_compile_options(clickhouse_functions_obj PRIVATE "-g0") + set_source_files_properties(${DBMS_FUNCTIONS} PROPERTIES COMPILE_FLAGS "-g0") +endif() list (APPEND OBJECT_LIBS $) @@ -64,10 +69,6 @@ if (TARGET OpenSSL::Crypto) list (APPEND PUBLIC_LIBS OpenSSL::Crypto) endif() -if (OMIT_HEAVY_DEBUG_SYMBOLS) - target_compile_options(clickhouse_functions_obj PRIVATE "-g0") -endif() - if (TARGET ch_contrib::icu) list (APPEND PRIVATE_LIBS ch_contrib::icu) endif () From 6158332d9104fb8d790bf1ec23df416d640550c9 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 15 May 2024 11:12:28 +0200 Subject: [PATCH 18/47] Fix race in ReplicatedMergeTreeLogEntryData --- .../MergeTree/ReplicatedMergeTreeLogEntry.h | 32 +++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h index 7693f34cc1e..7011794e16d 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h @@ -9,7 +9,6 @@ #include #include -#include #include @@ -174,7 +173,36 @@ struct ReplicatedMergeTreeLogEntryData size_t quorum = 0; /// Used only in tests for permanent fault injection for particular queue entry. - bool fault_injected = false; + struct CopyableAtomicFlag + { + CopyableAtomicFlag() = default; + + CopyableAtomicFlag(const CopyableAtomicFlag & other) + : value(other.value.load()) + {} + + explicit CopyableAtomicFlag(bool value_) + : value(value_) + {} + + CopyableAtomicFlag & operator=(const CopyableAtomicFlag & other) + { + value = other.value.load(); + return *this; + } + + CopyableAtomicFlag & operator=(bool value_) + { + value = value_; + return *this; + } + + explicit operator bool() const { return value; } + + std::atomic value = false; + }; + + CopyableAtomicFlag fault_injected; /// If this MUTATE_PART entry caused by alter(modify/drop) query. bool isAlterMutation() const From 73e11cda7bfaffb7eea5d08b6964d971332f7cce Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 15 May 2024 13:28:40 +0200 Subject: [PATCH 19/47] Move to common --- src/Common/CopyableAtomic.h | 39 +++++++++++++++++++ .../MergeTree/ReplicatedMergeTreeLogEntry.h | 32 +-------------- 2 files changed, 41 insertions(+), 30 deletions(-) create mode 100644 src/Common/CopyableAtomic.h diff --git a/src/Common/CopyableAtomic.h b/src/Common/CopyableAtomic.h new file mode 100644 index 00000000000..227fffe927f --- /dev/null +++ b/src/Common/CopyableAtomic.h @@ -0,0 +1,39 @@ +#pragma once + +#include +#include + +namespace DB +{ + +template +struct CopyableAtomic +{ + CopyableAtomic(const CopyableAtomic & other) + : value(other.value.load()) + {} + + explicit CopyableAtomic(T && value_) + : value(std::forward(value_)) + {} + + CopyableAtomic & operator=(const CopyableAtomic & other) + { + value = other.value.load(); + return *this; + } + + CopyableAtomic & operator=(bool value_) + { + value = value_; + return *this; + } + + explicit operator T() const { return value; } + + const T & getValue() const { return value; } + + std::atomic value; +}; + +} diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h index 7011794e16d..7ff37c609eb 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -173,36 +174,7 @@ struct ReplicatedMergeTreeLogEntryData size_t quorum = 0; /// Used only in tests for permanent fault injection for particular queue entry. - struct CopyableAtomicFlag - { - CopyableAtomicFlag() = default; - - CopyableAtomicFlag(const CopyableAtomicFlag & other) - : value(other.value.load()) - {} - - explicit CopyableAtomicFlag(bool value_) - : value(value_) - {} - - CopyableAtomicFlag & operator=(const CopyableAtomicFlag & other) - { - value = other.value.load(); - return *this; - } - - CopyableAtomicFlag & operator=(bool value_) - { - value = value_; - return *this; - } - - explicit operator bool() const { return value; } - - std::atomic value = false; - }; - - CopyableAtomicFlag fault_injected; + CopyableAtomic fault_injected{false}; /// If this MUTATE_PART entry caused by alter(modify/drop) query. bool isAlterMutation() const From bfb3fe0c230cbb00be9862c1f38e6c60e5fe6bb0 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 17 May 2024 11:35:10 +0200 Subject: [PATCH 20/47] Fix libbcrypt for FreeBSD build Right now it fails due to [1] with the following error: /usr/work/ClickHouse/contrib/libbcrypt/crypt_blowfish/ow-crypt.h:27:14: error: conflicting types for 'crypt_r' 27 | extern char *crypt_r(__const char *key, __const char *setting, void *data); | ^ /usr/include/unistd.h:500:7: note: previous declaration is here 500 | char *crypt_r(const char *, const char *, struct crypt_data *); | ^ [1]: https://github.com/freebsd/freebsd-src/commit/5f521d7ba72145092ea23ff6081d8791ad6c1f9d Signed-off-by: Azat Khuzhin --- contrib/libbcrypt-cmake/CMakeLists.txt | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/contrib/libbcrypt-cmake/CMakeLists.txt b/contrib/libbcrypt-cmake/CMakeLists.txt index d40d7f9195e..9e97f0af493 100644 --- a/contrib/libbcrypt-cmake/CMakeLists.txt +++ b/contrib/libbcrypt-cmake/CMakeLists.txt @@ -7,7 +7,7 @@ endif() set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/libbcrypt") -set(SRCS +set(SRCS "${LIBRARY_DIR}/bcrypt.c" "${LIBRARY_DIR}/crypt_blowfish/crypt_blowfish.c" "${LIBRARY_DIR}/crypt_blowfish/crypt_gensalt.c" @@ -16,4 +16,13 @@ set(SRCS add_library(_bcrypt ${SRCS}) target_include_directories(_bcrypt SYSTEM PUBLIC "${LIBRARY_DIR}") +# Avoid conflicts for crypt_r on FreeBSD [1]: +# +# - char *crypt_r(__const char *key, __const char *setting, void *data); +# - char *crypt_r(const char *, const char *, struct crypt_data *); +# +# [1]: https://github.com/freebsd/freebsd-src/commit/5f521d7ba72145092ea23ff6081d8791ad6c1f9d +# +# NOTE: ow-crypt.h is unsed only internally, so PRIVATE is enough +target_compile_definitions(_bcrypt PRIVATE -D__SKIP_GNU) add_library(ch_contrib::bcrypt ALIAS _bcrypt) From 85aa8b71a4094b592099f9913563fe573fc05f73 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 17 May 2024 15:57:12 +0200 Subject: [PATCH 21/47] Fix searching for libclang_rt.builtins.*.a on FreeBSD Signed-off-by: Azat Khuzhin --- cmake/freebsd/default_libs.cmake | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/cmake/freebsd/default_libs.cmake b/cmake/freebsd/default_libs.cmake index 1eeb1a872bd..6bde75f8c9a 100644 --- a/cmake/freebsd/default_libs.cmake +++ b/cmake/freebsd/default_libs.cmake @@ -1,11 +1,23 @@ set (DEFAULT_LIBS "-nodefaultlibs") if (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "amd64") - execute_process (COMMAND ${CMAKE_CXX_COMPILER} --print-file-name=libclang_rt.builtins-x86_64.a OUTPUT_VARIABLE BUILTINS_LIBRARY OUTPUT_STRIP_TRAILING_WHITESPACE) + set(system_processor "x86_64") else () - execute_process (COMMAND ${CMAKE_CXX_COMPILER} --print-file-name=libclang_rt.builtins-${CMAKE_SYSTEM_PROCESSOR}.a OUTPUT_VARIABLE BUILTINS_LIBRARY OUTPUT_STRIP_TRAILING_WHITESPACE) + set(system_processor "${CMAKE_SYSTEM_PROCESSOR}") endif () +file(GLOB bprefix "/usr/local/llvm${COMPILER_VERSION_MAJOR}/lib/clang/${COMPILER_VERSION_MAJOR}/lib/${system_processor}-portbld-freebsd*/") +message(STATUS "-Bprefix: ${bprefix}") + +execute_process(COMMAND ${CMAKE_CXX_COMPILER} -Bprefix=${bprefix} --print-file-name=libclang_rt.builtins-${system_processor}.a OUTPUT_VARIABLE BUILTINS_LIBRARY OUTPUT_STRIP_TRAILING_WHITESPACE) +# --print-file-name simply prints what you passed in case of nothing was resolved, so let's try one other possible option +if (BUILTINS_LIBRARY STREQUAL "libclang_rt.builtins-${system_processor}.a") + execute_process(COMMAND ${CMAKE_CXX_COMPILER} -Bprefix=${bprefix} --print-file-name=libclang_rt.builtins.a OUTPUT_VARIABLE BUILTINS_LIBRARY OUTPUT_STRIP_TRAILING_WHITESPACE) +endif() +if (BUILTINS_LIBRARY STREQUAL "libclang_rt.builtins.a") + message(FATAL_ERROR "libclang_rt.builtins had not been found") +endif() + set (DEFAULT_LIBS "${DEFAULT_LIBS} ${BUILTINS_LIBRARY} ${COVERAGE_OPTION} -lc -lm -lrt -lpthread") message(STATUS "Default libraries: ${DEFAULT_LIBS}") From 349f9eeeee881a38b1feaee144626592487f9815 Mon Sep 17 00:00:00 2001 From: Yohann Jardin Date: Sat, 18 May 2024 00:29:22 +0200 Subject: [PATCH 22/47] harmonize h3PointDist* error messages --- src/Functions/h3PointDist.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/h3PointDist.cpp b/src/Functions/h3PointDist.cpp index 00b8fb0089e..889675a2dda 100644 --- a/src/Functions/h3PointDist.cpp +++ b/src/Functions/h3PointDist.cpp @@ -49,7 +49,7 @@ public: throw Exception( ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument {} of function {}. Must be Float64", - arg->getName(), i, getName()); + arg->getName(), i + 1, getName()); } return std::make_shared(); } From 8562d8595a624149f69706c3833fd55a409b8aea Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 18 May 2024 04:40:37 +0200 Subject: [PATCH 23/47] This log message is better in Trace --- src/Interpreters/Cache/Metadata.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/Cache/Metadata.cpp b/src/Interpreters/Cache/Metadata.cpp index c832473c4cd..5ed4ccdbeca 100644 --- a/src/Interpreters/Cache/Metadata.cpp +++ b/src/Interpreters/Cache/Metadata.cpp @@ -846,7 +846,7 @@ LockedKey::~LockedKey() /// See comment near cleanupThreadFunc() for more details. key_metadata->key_state = KeyMetadata::KeyState::REMOVING; - LOG_DEBUG(key_metadata->logger(), "Submitting key {} for removal", getKey()); + LOG_TRACE(key_metadata->logger(), "Submitting key {} for removal", getKey()); key_metadata->addToCleanupQueue(); } From cafd074e0a888d722e5c6aacc9c363efd39f9648 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 18 May 2024 05:11:18 +0200 Subject: [PATCH 24/47] Prevent stack overflow in Fuzzer and Stress test --- docker/test/fuzzer/query-fuzzer-tweaks-users.xml | 5 +++++ docker/test/stateless/stress_tests.lib | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/docker/test/fuzzer/query-fuzzer-tweaks-users.xml b/docker/test/fuzzer/query-fuzzer-tweaks-users.xml index ad261be1abe..e2a4976b385 100644 --- a/docker/test/fuzzer/query-fuzzer-tweaks-users.xml +++ b/docker/test/fuzzer/query-fuzzer-tweaks-users.xml @@ -36,6 +36,11 @@ + + + + + diff --git a/docker/test/stateless/stress_tests.lib b/docker/test/stateless/stress_tests.lib index 6aaddbfe590..3b6ad244c82 100644 --- a/docker/test/stateless/stress_tests.lib +++ b/docker/test/stateless/stress_tests.lib @@ -154,6 +154,11 @@ EOL + + + + + From cde1b82ebdea4d2f768aacb65989228108715ed4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 18 May 2024 06:42:59 +0200 Subject: [PATCH 25/47] Print number tips in case of LowCardinality and Nullable --- src/Columns/ColumnNullable.cpp | 33 ++++++++++++++ src/Columns/ColumnNullable.h | 4 ++ .../Formats/Impl/PrettyBlockOutputFormat.cpp | 14 +++++- .../03156_nullable_number_tips.reference | 43 +++++++++++++++++++ .../03156_nullable_number_tips.sql | 24 +++++++++++ 5 files changed, 117 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/03156_nullable_number_tips.reference create mode 100644 tests/queries/0_stateless/03156_nullable_number_tips.sql diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp index fa5fdfb8c21..30e62548ad6 100644 --- a/src/Columns/ColumnNullable.cpp +++ b/src/Columns/ColumnNullable.cpp @@ -22,6 +22,7 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; extern const int ILLEGAL_COLUMN; extern const int NOT_IMPLEMENTED; + extern const int BAD_ARGUMENTS; } @@ -116,6 +117,38 @@ void ColumnNullable::get(size_t n, Field & res) const getNestedColumn().get(n, res); } +Float64 ColumnNullable::getFloat64(size_t n) const +{ + if (isNullAt(n)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The value of {} at {} is NULL while calling method getFloat64", getName(), n); + else + return getNestedColumn().getFloat64(n); +} + +Float32 ColumnNullable::getFloat32(size_t n) const +{ + if (isNullAt(n)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The value of {} at {} is NULL while calling method getFloat32", getName(), n); + else + return getNestedColumn().getFloat32(n); +} + +UInt64 ColumnNullable::getUInt(size_t n) const +{ + if (isNullAt(n)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The value of {} at {} is NULL while calling method getUInt", getName(), n); + else + return getNestedColumn().getUInt(n); +} + +Int64 ColumnNullable::getInt(size_t n) const +{ + if (isNullAt(n)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The value of {} at {} is NULL while calling method getInt", getName(), n); + else + return getNestedColumn().getInt(n); +} + void ColumnNullable::insertData(const char * pos, size_t length) { if (pos == nullptr) diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h index ef4bf4fa41b..c7ebb6ed7b6 100644 --- a/src/Columns/ColumnNullable.h +++ b/src/Columns/ColumnNullable.h @@ -57,6 +57,10 @@ public: void get(size_t n, Field & res) const override; bool getBool(size_t n) const override { return isNullAt(n) ? false : nested_column->getBool(n); } UInt64 get64(size_t n) const override { return nested_column->get64(n); } + Float64 getFloat64(size_t n) const override; + Float32 getFloat32(size_t n) const override; + UInt64 getUInt(size_t n) const override; + Int64 getInt(size_t n) const override; bool isDefaultAt(size_t n) const override { return isNullAt(n); } StringRef getDataAt(size_t) const override; /// Will insert null value if pos=nullptr diff --git a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp index 086b5bfada2..b1dbe68579f 100644 --- a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp @@ -7,6 +7,8 @@ #include #include #include +#include +#include namespace DB @@ -16,7 +18,14 @@ PrettyBlockOutputFormat::PrettyBlockOutputFormat( WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_, bool mono_block_, bool color_) : IOutputFormat(header_, out_), format_settings(format_settings_), serializations(header_.getSerializations()), color(color_), mono_block(mono_block_) { - readable_number_tip = header_.getColumns().size() == 1 && WhichDataType(header_.getDataTypes()[0]->getTypeId()).isNumber(); + /// Decide whether we should print a tip near the single number value in the result. + if (header_.getColumns().size() == 1) + { + /// Check if it is a numeric type, possible wrapped by Nullable or LowCardinality. + DataTypePtr type = removeNullable(recursiveRemoveLowCardinality(header_.getDataTypes().at(0))); + if (isNumber(type)) + readable_number_tip = true; + } } @@ -497,6 +506,9 @@ void PrettyBlockOutputFormat::writeReadableNumberTip(const Chunk & chunk) if (!is_single_number) return; + if (columns[0]->isNullAt(0)) + return; + auto value = columns[0]->getFloat64(0); auto threshold = format_settings.pretty.output_format_pretty_single_large_number_tip_threshold; diff --git a/tests/queries/0_stateless/03156_nullable_number_tips.reference b/tests/queries/0_stateless/03156_nullable_number_tips.reference new file mode 100644 index 00000000000..cb4e12684d8 --- /dev/null +++ b/tests/queries/0_stateless/03156_nullable_number_tips.reference @@ -0,0 +1,43 @@ + ┌─────────x─┐ +1. │ 123456789 │ -- 123.46 million + └───────────┘ + ┌─────────x─┐ +1. │ 123456789 │ -- 123.46 million + └───────────┘ + ┌─────────x─┐ +1. │ 123456789 │ -- 123.46 million + └───────────┘ + ┌─────────x─┐ +1. │ 123456789 │ -- 123.46 million + └───────────┘ + ┌─────────x─┐ +1. │ 123456789 │ -- 123.46 million + └───────────┘ +Nullable(UInt64), Nullable(size = 10, UInt64(size = 10), UInt8(size = 10)) + ┏━━━━━━━━━━━━┓ + ┃ x ┃ + ┡━━━━━━━━━━━━┩ +1. │ 1111111101 │ -- 1.11 billion + └────────────┘ + ┏━━━━━━━━━━━┓ + ┃ x ┃ + ┡━━━━━━━━━━━┩ +1. │ 123456789 │ -- 123.46 million + └───────────┘ + x + +1. ᴺᵁᴸᴸ +UInt64, Sparse(size = 10, UInt64(size = 6), UInt64(size = 5)) + ┏━━━━━━━━━━━━┓ + ┃ x ┃ + ┡━━━━━━━━━━━━┩ +1. │ 1111111101 │ -- 1.11 billion + └────────────┘ + ┏━━━┓ + ┃ x ┃ + ┡━━━┩ +1. │ 0 │ + └───┘ + x + +1. 0 diff --git a/tests/queries/0_stateless/03156_nullable_number_tips.sql b/tests/queries/0_stateless/03156_nullable_number_tips.sql new file mode 100644 index 00000000000..e6f2fa36d86 --- /dev/null +++ b/tests/queries/0_stateless/03156_nullable_number_tips.sql @@ -0,0 +1,24 @@ +SELECT 123456789 AS x FORMAT PrettyCompact; +SELECT toNullable(123456789) AS x FORMAT PrettyCompact; +SELECT toLowCardinality(toNullable(123456789)) AS x FORMAT PrettyCompact; +SELECT toNullable(toLowCardinality(123456789)) AS x FORMAT PrettyCompact; +SELECT toLowCardinality(123456789) AS x FORMAT PrettyCompact; + +CREATE TEMPORARY TABLE test (x Nullable(UInt64), PRIMARY KEY ()) ENGINE = MergeTree SETTINGS ratio_of_defaults_for_sparse_serialization = 0; +INSERT INTO test SELECT number % 2 ? number * 123456789 : NULL FROM numbers(10); + +SELECT DISTINCT dumpColumnStructure(*) FROM test; + +SELECT * FROM test ORDER BY ALL DESC NULLS LAST LIMIT 1 FORMAT PRETTY; +SELECT * FROM test ORDER BY ALL ASC NULLS LAST LIMIT 1 FORMAT PRETTY; +SELECT * FROM test ORDER BY ALL ASC NULLS FIRST LIMIT 1 FORMAT PrettySpace; + +DROP TEMPORARY TABLE test; +CREATE TEMPORARY TABLE test (x UInt64, PRIMARY KEY ()) ENGINE = MergeTree SETTINGS ratio_of_defaults_for_sparse_serialization = 0; +INSERT INTO test SELECT number % 2 ? number * 123456789 : NULL FROM numbers(10); + +SELECT DISTINCT dumpColumnStructure(*) FROM test; + +SELECT * FROM test ORDER BY ALL DESC NULLS LAST LIMIT 1 FORMAT PRETTY; +SELECT * FROM test ORDER BY ALL ASC NULLS LAST LIMIT 1 FORMAT PRETTY; +SELECT * FROM test ORDER BY ALL ASC NULLS FIRST LIMIT 1 FORMAT PrettySpace; From e885e9957ed807c67104b773c179bebf8241a4e8 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 18 May 2024 14:24:15 +0200 Subject: [PATCH 26/47] tests: fix expected error for 03036_reading_s3_archives Signed-off-by: Azat Khuzhin --- tests/queries/0_stateless/03036_reading_s3_archives.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/03036_reading_s3_archives.sql b/tests/queries/0_stateless/03036_reading_s3_archives.sql index 98ca0425174..00d7cc25e1a 100644 --- a/tests/queries/0_stateless/03036_reading_s3_archives.sql +++ b/tests/queries/0_stateless/03036_reading_s3_archives.sql @@ -18,5 +18,5 @@ CREATE table table_tar2star Engine S3(s3_conn, filename='03036_archive2.tar :: e SELECT id, data, _file, _path FROM table_tar2star ORDER BY (id, _file, _path); CREATE table table_tarstarglobs Engine S3(s3_conn, filename='03036_archive*.tar* :: example{2..3}.csv'); SELECT id, data, _file, _path FROM table_tarstarglobs ORDER BY (id, _file, _path); -CREATE table table_noexist Engine s3(s3_conn, filename='03036_archive2.zip :: nonexistent.csv'); -- { serverError INCORRECT_QUERY } -SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_compressed_file_archive.zip :: example7.csv', format='CSV', structure='auto', compression_method='gz') ORDER BY (id, _file, _path) \ No newline at end of file +CREATE table table_noexist Engine s3(s3_conn, filename='03036_archive2.zip :: nonexistent.csv'); -- { serverError UNKNOWN_STORAGE } +SELECT id, data, _file, _path FROM s3(s3_conn, filename='03036_compressed_file_archive.zip :: example7.csv', format='CSV', structure='auto', compression_method='gz') ORDER BY (id, _file, _path) From 6ed515554befd46cfeaed82ddea1231d2dfae937 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 16 May 2024 15:50:30 +0200 Subject: [PATCH 27/47] Revert "CI: disable ARM integration test cases with libunwind crash" --- tests/integration/test_crash_log/test.py | 8 -------- tests/integration/test_send_crash_reports/test.py | 4 ---- 2 files changed, 12 deletions(-) diff --git a/tests/integration/test_crash_log/test.py b/tests/integration/test_crash_log/test.py index fe24777de94..a5b82039a84 100644 --- a/tests/integration/test_crash_log/test.py +++ b/tests/integration/test_crash_log/test.py @@ -39,10 +39,6 @@ def wait_for_clickhouse_stop(started_node): assert result == "OK", "ClickHouse process is still running" -@pytest.mark.skipif( - helpers.cluster.is_arm(), - reason="Fails on ARM, issue https://github.com/ClickHouse/ClickHouse/issues/63855", -) def test_pkill(started_node): if ( started_node.is_built_with_thread_sanitizer() @@ -63,10 +59,6 @@ def test_pkill(started_node): ) -@pytest.mark.skipif( - helpers.cluster.is_arm(), - reason="Fails on ARM, issue https://github.com/ClickHouse/ClickHouse/issues/63855", -) def test_pkill_query_log(started_node): for signal in ["SEGV", "4"]: # force create query_log if it was not created diff --git a/tests/integration/test_send_crash_reports/test.py b/tests/integration/test_send_crash_reports/test.py index 15a15a13e2f..83c0827f891 100644 --- a/tests/integration/test_send_crash_reports/test.py +++ b/tests/integration/test_send_crash_reports/test.py @@ -35,10 +35,6 @@ def started_node(): pass -@pytest.mark.skipif( - helpers.cluster.is_arm(), - reason="Fails on ARM, issue https://github.com/ClickHouse/ClickHouse/issues/63855", -) def test_send_segfault(started_node): # NOTE: another option is to increase waiting time. if ( From 81a0c63928bb7608b55c1fcf5f0335c47e459a2b Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 17 May 2024 17:58:42 +0200 Subject: [PATCH 28/47] Fix unwind on SIGSEGV on aarch64 (due to small stack for signal) Only SIGSEGV uses alternative stack (sigaltstack()), which is very small, 16K, and for aarch64 it is likely not enough for unwinding (likely due to lots of registers on this platform): (gdb) bt #0 libunwind::CFI_Parser::parseFDEInstructions (addressSpace=..., fdeInfo=..., cieInfo=..., upToPC=, arch=4, results=) at ./contrib/libunwind/src/DwarfParser.hpp:561 And this is: 554 case DW_CFA_remember_state: { 555 // Avoid operator new because that would be an upward dependency. 556 // Avoid malloc because it needs heap allocation. 557 PrologInfoStackEntry *entry = 558 (PrologInfoStackEntry *)_LIBUNWIND_REMEMBER_ALLOC( 559 sizeof(PrologInfoStackEntry)); 560 if (entry != NULL) { 561 entry->next = rememberStack.entry; ^^^ 562 entry->info = *results; 563 rememberStack.entry = entry; 564 } else { 565 return false; 566 } 567 _LIBUNWIND_TRACE_DWARF("DW_CFA_remember_state\n"); 568 break; 569 } Signed-off-by: Azat Khuzhin --- src/Common/ThreadStatus.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/Common/ThreadStatus.cpp b/src/Common/ThreadStatus.cpp index ad96018a17e..f2930513280 100644 --- a/src/Common/ThreadStatus.cpp +++ b/src/Common/ThreadStatus.cpp @@ -23,6 +23,13 @@ thread_local ThreadStatus constinit * current_thread = nullptr; namespace { +#if defined(__aarch64__) +/// For aarch64 16K is not enough (likely due to tons of registers) +static constexpr size_t UNWIND_MINSIGSTKSZ = 32 << 10; +#else +static constexpr size_t UNWIND_MINSIGSTKSZ = 16 << 10; +#endif + /// Alternative stack for signal handling. /// /// This stack should not be located in the TLS (thread local storage), since: @@ -50,7 +57,7 @@ struct ThreadStack free(data); } - static size_t getSize() { return std::max(16 << 10, MINSIGSTKSZ); } + static size_t getSize() { return std::max(UNWIND_MINSIGSTKSZ, MINSIGSTKSZ); } void * getData() const { return data; } private: From 714420fc6713d8e1f1a6af29bd37ad932d86059f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 19 May 2024 09:00:35 +0200 Subject: [PATCH 29/47] Speed up Set index a little --- src/Storages/MergeTree/MergeTreeIndexSet.cpp | 74 ++++++-------------- src/Storages/MergeTree/MergeTreeIndexSet.h | 1 - 2 files changed, 23 insertions(+), 52 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.cpp b/src/Storages/MergeTree/MergeTreeIndexSet.cpp index 1bd42518fdd..0b7e2e1f942 100644 --- a/src/Storages/MergeTree/MergeTreeIndexSet.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexSet.cpp @@ -35,8 +35,7 @@ MergeTreeIndexGranuleSet::MergeTreeIndexGranuleSet( size_t max_rows_) : index_name(index_name_) , max_rows(max_rows_) - , index_sample_block(index_sample_block_) - , block(index_sample_block) + , block(index_sample_block_) { } @@ -47,8 +46,7 @@ MergeTreeIndexGranuleSet::MergeTreeIndexGranuleSet( MutableColumns && mutable_columns_) : index_name(index_name_) , max_rows(max_rows_) - , index_sample_block(index_sample_block_) - , block(index_sample_block.cloneWithColumns(std::move(mutable_columns_))) + , block(index_sample_block_.cloneWithColumns(std::move(mutable_columns_))) { } @@ -67,10 +65,11 @@ void MergeTreeIndexGranuleSet::serializeBinary(WriteBuffer & ostr) const } size_serialization->serializeBinary(size(), ostr, {}); + size_t num_columns = block.columns(); - for (size_t i = 0; i < index_sample_block.columns(); ++i) + for (size_t i = 0; i < num_columns; ++i) { - const auto & type = index_sample_block.getByPosition(i).type; + const auto & type = block.getByPosition(i).type; ISerialization::SerializeBinaryBulkSettings settings; settings.getter = [&ostr](ISerialization::SubstreamPath) -> WriteBuffer * { return &ostr; }; @@ -92,8 +91,6 @@ void MergeTreeIndexGranuleSet::deserializeBinary(ReadBuffer & istr, MergeTreeInd if (version != 1) throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown index version {}.", version); - block.clear(); - Field field_rows; const auto & size_type = DataTypePtr(std::make_shared()); size_type->getDefaultSerialization()->deserializeBinary(field_rows, istr, {}); @@ -102,24 +99,22 @@ void MergeTreeIndexGranuleSet::deserializeBinary(ReadBuffer & istr, MergeTreeInd if (rows_to_read == 0) return; - for (size_t i = 0; i < index_sample_block.columns(); ++i) + size_t num_columns = block.columns(); + + ISerialization::DeserializeBinaryBulkSettings settings; + settings.getter = [&](ISerialization::SubstreamPath) -> ReadBuffer * { return &istr; }; + settings.position_independent_encoding = false; + + for (size_t i = 0; i < num_columns; ++i) { - const auto & column = index_sample_block.getByPosition(i); - const auto & type = column.type; - ColumnPtr new_column = type->createColumn(); - - - ISerialization::DeserializeBinaryBulkSettings settings; - settings.getter = [&](ISerialization::SubstreamPath) -> ReadBuffer * { return &istr; }; - settings.position_independent_encoding = false; + auto & elem = block.getByPosition(i); + elem.column = elem.column->cloneEmpty(); ISerialization::DeserializeBinaryBulkStatePtr state; - auto serialization = type->getDefaultSerialization(); + auto serialization = elem.type->getDefaultSerialization(); serialization->deserializeBinaryBulkStatePrefix(settings, state); - serialization->deserializeBinaryBulkWithMultipleStreams(new_column, rows_to_read, settings, state, nullptr); - - block.insert(ColumnWithTypeAndName(new_column, type, column.name)); + serialization->deserializeBinaryBulkWithMultipleStreams(elem.column, rows_to_read, settings, state, nullptr); } } @@ -284,42 +279,19 @@ bool MergeTreeIndexConditionSet::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx if (isUseless()) return true; - auto granule = std::dynamic_pointer_cast(idx_granule); - if (!granule) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Set index condition got a granule with the wrong type"); + const MergeTreeIndexGranuleSet & granule = assert_cast(*idx_granule); - if (isUseless() || granule->empty() || (max_rows != 0 && granule->size() > max_rows)) + size_t size = granule.size(); + if (size == 0 || (max_rows != 0 && size > max_rows)) return true; - Block result = granule->block; + Block result = granule.block; actions->execute(result); - const auto & filter_node_name = actions->getActionsDAG().getOutputs().at(0)->result_name; - auto column = result.getByName(filter_node_name).column->convertToFullColumnIfConst()->convertToFullColumnIfLowCardinality(); + const auto & column = result.getByPosition(result.columns() - 1).column; - if (column->onlyNull()) - return false; - - const auto * col_uint8 = typeid_cast(column.get()); - - const NullMap * null_map = nullptr; - - if (const auto * col_nullable = checkAndGetColumn(&*column)) - { - col_uint8 = typeid_cast(&col_nullable->getNestedColumn()); - null_map = &col_nullable->getNullMapData(); - } - - if (!col_uint8) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "ColumnUInt8 expected as Set index condition result"); - - const auto & condition = col_uint8->getData(); - size_t column_size = column->size(); - - for (size_t i = 0; i < column_size; ++i) - if ((!null_map || (*null_map)[i] == 0) && condition[i] & 1) + for (size_t i = 0; i < size; ++i) + if (column->getBool(i)) return true; return false; diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.h b/src/Storages/MergeTree/MergeTreeIndexSet.h index 7c66ba1a867..3348b5fbe34 100644 --- a/src/Storages/MergeTree/MergeTreeIndexSet.h +++ b/src/Storages/MergeTree/MergeTreeIndexSet.h @@ -34,7 +34,6 @@ struct MergeTreeIndexGranuleSet final : public IMergeTreeIndexGranule const String index_name; const size_t max_rows; - const Block index_sample_block; Block block; }; From 500475f2b81e74276f6316e710ff7313244928e0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 19 May 2024 10:45:05 +0200 Subject: [PATCH 30/47] Add a test --- tests/performance/set_index_analysis.xml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 tests/performance/set_index_analysis.xml diff --git a/tests/performance/set_index_analysis.xml b/tests/performance/set_index_analysis.xml new file mode 100644 index 00000000000..64d0af6690b --- /dev/null +++ b/tests/performance/set_index_analysis.xml @@ -0,0 +1,14 @@ + + + CREATE TABLE test_set (k UInt32, x UInt32, INDEX idx (x) TYPE set(10) GRANULARITY 1) ENGINE = MergeTree ORDER BY k SETTINGS index_granularity = 111; + + SYSTEM STOP MERGES + INSERT INTO test_set SELECT number, number DIV 100 + rand() % 7 FROM numbers(3000000) SETTINGS max_insert_threads = 4; + + + SELECT count() FROM test_set WHERE x = 1234 SETTINGS max_threads = 8; + + + SYSTEM START MERGES + DROP TABLE IF EXISTS test_set + From 2a9795f4e39e6b8e2ef0aee3d2e97f396416662e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 19 May 2024 10:45:19 +0200 Subject: [PATCH 31/47] Minor changes --- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index de769c59d33..949807bb88b 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -1296,8 +1296,7 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex( size_t last_index_mark = 0; PostingsCacheForStore cache_in_store; - - if (dynamic_cast(&*index_helper) != nullptr) + if (dynamic_cast(index_helper.get())) cache_in_store.store = GinIndexStoreFactory::instance().get(index_helper->getFileName(), part->getDataPartStoragePtr()); for (size_t i = 0; i < ranges.size(); ++i) @@ -1315,12 +1314,12 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex( auto ann_condition = std::dynamic_pointer_cast(condition); if (ann_condition != nullptr) { - // vector of indexes of useful ranges + /// An array of indices of useful ranges. auto result = ann_condition->getUsefulRanges(granule); for (auto range : result) { - // range for corresponding index + /// The range for the corresponding index. MarkRange data_range( std::max(ranges[i].begin, index_mark * index_granularity + range), std::min(ranges[i].end, index_mark * index_granularity + range + 1)); @@ -1344,8 +1343,8 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex( continue; MarkRange data_range( - std::max(ranges[i].begin, index_mark * index_granularity), - std::min(ranges[i].end, (index_mark + 1) * index_granularity)); + std::max(ranges[i].begin, index_mark * index_granularity), + std::min(ranges[i].end, (index_mark + 1) * index_granularity)); if (res.empty() || data_range.begin - res.back().end > min_marks_for_seek) res.push_back(data_range); From 332ec7c51fe260d43bcd9b9480daaa2e95179dcb Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 19 May 2024 14:28:04 +0300 Subject: [PATCH 32/47] Update MergeTreeIndexSet.cpp --- src/Storages/MergeTree/MergeTreeIndexSet.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.cpp b/src/Storages/MergeTree/MergeTreeIndexSet.cpp index 0b7e2e1f942..e9dc638341a 100644 --- a/src/Storages/MergeTree/MergeTreeIndexSet.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexSet.cpp @@ -35,7 +35,7 @@ MergeTreeIndexGranuleSet::MergeTreeIndexGranuleSet( size_t max_rows_) : index_name(index_name_) , max_rows(max_rows_) - , block(index_sample_block_) + , block(index_sample_block_.cloneEmpty()) { } From 31f0b2f741e8a8c7b06e2271cfd5838a8d16fb32 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 19 May 2024 14:52:51 +0300 Subject: [PATCH 33/47] Update MergeTreeIndexSet.cpp --- src/Storages/MergeTree/MergeTreeIndexSet.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.cpp b/src/Storages/MergeTree/MergeTreeIndexSet.cpp index e9dc638341a..797455816f0 100644 --- a/src/Storages/MergeTree/MergeTreeIndexSet.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexSet.cpp @@ -291,7 +291,7 @@ bool MergeTreeIndexConditionSet::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx const auto & column = result.getByPosition(result.columns() - 1).column; for (size_t i = 0; i < size; ++i) - if (column->getBool(i)) + if (column->getUInt(i) & 1) return true; return false; From e18fa68f3d72a0dbed4257c4922a6c534fdb677e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 19 May 2024 15:00:14 +0300 Subject: [PATCH 34/47] Update MergeTreeIndexSet.cpp --- src/Storages/MergeTree/MergeTreeIndexSet.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.cpp b/src/Storages/MergeTree/MergeTreeIndexSet.cpp index 797455816f0..068e08f6819 100644 --- a/src/Storages/MergeTree/MergeTreeIndexSet.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexSet.cpp @@ -291,7 +291,7 @@ bool MergeTreeIndexConditionSet::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx const auto & column = result.getByPosition(result.columns() - 1).column; for (size_t i = 0; i < size; ++i) - if (column->getUInt(i) & 1) + if (!column->isNullAt(i) && (column->get64(i) & 1)) return true; return false; From 11af3fd54f6ee3ed0291fee9ed88a852f03a252a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 19 May 2024 16:13:41 +0300 Subject: [PATCH 35/47] Update MergeTreeIndexSet.cpp --- src/Storages/MergeTree/MergeTreeIndexSet.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.cpp b/src/Storages/MergeTree/MergeTreeIndexSet.cpp index 068e08f6819..3e5cbb34556 100644 --- a/src/Storages/MergeTree/MergeTreeIndexSet.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexSet.cpp @@ -267,6 +267,8 @@ MergeTreeIndexConditionSet::MergeTreeIndexConditionSet( filter_actions_dag->removeUnusedActions(); actions = std::make_shared(filter_actions_dag); + + actions_output_column_name = filter_actions_dag->getOutputs().at(0)->result_name; } bool MergeTreeIndexConditionSet::alwaysUnknownOrTrue() const @@ -288,7 +290,7 @@ bool MergeTreeIndexConditionSet::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx Block result = granule.block; actions->execute(result); - const auto & column = result.getByPosition(result.columns() - 1).column; + const auto & column = result.getByName(actions_output_column_name).column; for (size_t i = 0; i < size; ++i) if (!column->isNullAt(i) && (column->get64(i) & 1)) From a28309689f26e161dfbaa014bc51dea7460de30f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 19 May 2024 16:13:58 +0300 Subject: [PATCH 36/47] Update MergeTreeIndexSet.h --- src/Storages/MergeTree/MergeTreeIndexSet.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.h b/src/Storages/MergeTree/MergeTreeIndexSet.h index 3348b5fbe34..901653e47d6 100644 --- a/src/Storages/MergeTree/MergeTreeIndexSet.h +++ b/src/Storages/MergeTree/MergeTreeIndexSet.h @@ -126,6 +126,7 @@ private: std::unordered_set key_columns; ExpressionActionsPtr actions; + String actions_output_column_name; }; From 524f289f47fc6c7cd7b0ac52e6827f8636907160 Mon Sep 17 00:00:00 2001 From: Yohann Jardin Date: Sun, 19 May 2024 17:20:22 +0200 Subject: [PATCH 37/47] empty commit From 2765fd951cbcd6f5c576ee1919ae644cb4d76256 Mon Sep 17 00:00:00 2001 From: alesapin Date: Sun, 19 May 2024 21:02:12 +0200 Subject: [PATCH 38/47] Properly support native copy for azure --- src/Backups/BackupFactory.h | 1 + src/Backups/BackupIO_AzureBlobStorage.cpp | 38 ++-- src/Backups/BackupIO_AzureBlobStorage.h | 15 +- src/Backups/BackupSettings.cpp | 1 + src/Backups/BackupSettings.h | 3 + src/Backups/BackupsWorker.cpp | 1 + .../registerBackupEngineAzureBlobStorage.cpp | 22 +- .../AzureBlobStorage/AzureObjectStorage.cpp | 7 +- .../AzureBlobStorage/AzureObjectStorage.h | 6 +- .../ObjectStorages/ObjectStorageFactory.cpp | 5 +- .../copyAzureBlobStorageFile.cpp | 1 + src/Storages/StorageAzureBlob.cpp | 10 +- src/Storages/StorageAzureBlob.h | 2 + .../TableFunctionAzureBlobStorage.cpp | 4 +- .../TableFunctionAzureBlobStorageCluster.cpp | 4 +- .../__init__.py | 1 + .../test.py | 215 ++++++++++++++++++ 17 files changed, 301 insertions(+), 35 deletions(-) create mode 100644 tests/integration/test_azure_blob_storage_native_copy/__init__.py create mode 100644 tests/integration/test_azure_blob_storage_native_copy/test.py diff --git a/src/Backups/BackupFactory.h b/src/Backups/BackupFactory.h index 4e752508577..e13a9a12ca2 100644 --- a/src/Backups/BackupFactory.h +++ b/src/Backups/BackupFactory.h @@ -39,6 +39,7 @@ public: std::optional backup_uuid; bool deduplicate_files = true; bool allow_s3_native_copy = true; + bool allow_azure_native_copy = true; bool use_same_s3_credentials_for_base_backup = false; bool azure_attempt_to_create_container = true; ReadSettings read_settings; diff --git a/src/Backups/BackupIO_AzureBlobStorage.cpp b/src/Backups/BackupIO_AzureBlobStorage.cpp index a3998431674..672a68e089f 100644 --- a/src/Backups/BackupIO_AzureBlobStorage.cpp +++ b/src/Backups/BackupIO_AzureBlobStorage.cpp @@ -31,22 +31,28 @@ namespace ErrorCodes BackupReaderAzureBlobStorage::BackupReaderAzureBlobStorage( StorageAzureBlob::Configuration configuration_, + bool allow_azure_native_copy, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_) : BackupReaderDefault(read_settings_, write_settings_, getLogger("BackupReaderAzureBlobStorage")) - , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, configuration_.container, false, false} + , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, configuration_.getConnectionURLWithContainer(), false, false} , configuration(configuration_) { auto client_ptr = StorageAzureBlob::createClient(configuration, /* is_read_only */ false); client_ptr->SetClickhouseOptions(Azure::Storage::Blobs::ClickhouseClientOptions{.IsClientForDisk=true}); - object_storage = std::make_unique("BackupReaderAzureBlobStorage", - std::move(client_ptr), - StorageAzureBlob::createSettings(context_), - configuration_.container); + object_storage = std::make_unique( + "BackupReaderAzureBlobStorage", + std::move(client_ptr), + StorageAzureBlob::createSettings(context_), + configuration.container, + configuration.getConnectionURLWithContainer()); + client = object_storage->getAzureBlobStorageClient(); - settings = object_storage->getSettings(); + auto settings_copy = *object_storage->getSettings(); + settings_copy.use_native_copy = allow_azure_native_copy; + settings = std::make_unique(settings_copy); } BackupReaderAzureBlobStorage::~BackupReaderAzureBlobStorage() = default; @@ -76,9 +82,9 @@ void BackupReaderAzureBlobStorage::copyFileToDisk(const String & path_in_backup, DiskPtr destination_disk, const String & destination_path, WriteMode write_mode) { auto destination_data_source_description = destination_disk->getDataSourceDescription(); - if ((destination_data_source_description.type == DataSourceType::ObjectStorage) - && (destination_data_source_description.object_storage_type == ObjectStorageType::Azure) - && (destination_data_source_description.is_encrypted == encrypted_in_backup)) + LOG_TRACE(log, "Source description {} desctionation description {}", data_source_description.description, destination_data_source_description.description); + if (destination_data_source_description.sameKind(data_source_description) + && destination_data_source_description.is_encrypted == encrypted_in_backup) { LOG_TRACE(log, "Copying {} from AzureBlobStorage to disk {}", path_in_backup, destination_disk->getName()); auto write_blob_function = [&](const Strings & blob_path, WriteMode mode, const std::optional &) -> size_t @@ -116,12 +122,13 @@ void BackupReaderAzureBlobStorage::copyFileToDisk(const String & path_in_backup, BackupWriterAzureBlobStorage::BackupWriterAzureBlobStorage( StorageAzureBlob::Configuration configuration_, + bool allow_azure_native_copy, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_, bool attempt_to_create_container) : BackupWriterDefault(read_settings_, write_settings_, getLogger("BackupWriterAzureBlobStorage")) - , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, configuration_.container, false, false} + , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::Azure, MetadataStorageType::None, configuration_.getConnectionURLWithContainer(), false, false} , configuration(configuration_) { auto client_ptr = StorageAzureBlob::createClient(configuration, /* is_read_only */ false, attempt_to_create_container); @@ -130,9 +137,12 @@ BackupWriterAzureBlobStorage::BackupWriterAzureBlobStorage( object_storage = std::make_unique("BackupWriterAzureBlobStorage", std::move(client_ptr), StorageAzureBlob::createSettings(context_), - configuration_.container); + configuration_.container, + configuration.getConnectionURLWithContainer()); client = object_storage->getAzureBlobStorageClient(); - settings = object_storage->getSettings(); + auto settings_copy = *object_storage->getSettings(); + settings_copy.use_native_copy = allow_azure_native_copy; + settings = std::make_unique(settings_copy); } void BackupWriterAzureBlobStorage::copyFileFromDisk(const String & path_in_backup, DiskPtr src_disk, const String & src_path, @@ -140,7 +150,9 @@ void BackupWriterAzureBlobStorage::copyFileFromDisk(const String & path_in_backu { /// Use the native copy as a more optimal way to copy a file from AzureBlobStorage to AzureBlobStorage if it's possible. auto source_data_source_description = src_disk->getDataSourceDescription(); - if (source_data_source_description.sameKind(data_source_description) && (source_data_source_description.is_encrypted == copy_encrypted)) + LOG_TRACE(log, "Source description {} desctionation description {}", source_data_source_description.description, data_source_description.description); + if (source_data_source_description.sameKind(data_source_description) + && source_data_source_description.is_encrypted == copy_encrypted) { /// getBlobPath() can return more than 3 elements if the file is stored as multiple objects in AzureBlobStorage container. /// In this case we can't use the native copy. diff --git a/src/Backups/BackupIO_AzureBlobStorage.h b/src/Backups/BackupIO_AzureBlobStorage.h index f0b9aace4d4..3a909ab684a 100644 --- a/src/Backups/BackupIO_AzureBlobStorage.h +++ b/src/Backups/BackupIO_AzureBlobStorage.h @@ -16,7 +16,12 @@ namespace DB class BackupReaderAzureBlobStorage : public BackupReaderDefault { public: - BackupReaderAzureBlobStorage(StorageAzureBlob::Configuration configuration_, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_); + BackupReaderAzureBlobStorage( + StorageAzureBlob::Configuration configuration_, + bool allow_azure_native_copy, + const ReadSettings & read_settings_, + const WriteSettings & write_settings_, + const ContextPtr & context_); ~BackupReaderAzureBlobStorage() override; bool fileExists(const String & file_name) override; @@ -37,7 +42,13 @@ private: class BackupWriterAzureBlobStorage : public BackupWriterDefault { public: - BackupWriterAzureBlobStorage(StorageAzureBlob::Configuration configuration_, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_, bool attempt_to_create_container); + BackupWriterAzureBlobStorage( + StorageAzureBlob::Configuration configuration_, + bool allow_azure_native_copy, + const ReadSettings & read_settings_, + const WriteSettings & write_settings_, + const ContextPtr & context_, + bool attempt_to_create_container); ~BackupWriterAzureBlobStorage() override; bool fileExists(const String & file_name) override; diff --git a/src/Backups/BackupSettings.cpp b/src/Backups/BackupSettings.cpp index 06f49dfa448..e33880f88e3 100644 --- a/src/Backups/BackupSettings.cpp +++ b/src/Backups/BackupSettings.cpp @@ -27,6 +27,7 @@ namespace ErrorCodes M(Bool, decrypt_files_from_encrypted_disks) \ M(Bool, deduplicate_files) \ M(Bool, allow_s3_native_copy) \ + M(Bool, allow_azure_native_copy) \ M(Bool, use_same_s3_credentials_for_base_backup) \ M(Bool, azure_attempt_to_create_container) \ M(Bool, read_from_filesystem_cache) \ diff --git a/src/Backups/BackupSettings.h b/src/Backups/BackupSettings.h index eccf4e90ce7..a6c4d5d7181 100644 --- a/src/Backups/BackupSettings.h +++ b/src/Backups/BackupSettings.h @@ -44,6 +44,9 @@ struct BackupSettings /// Whether native copy is allowed (optimization for cloud storages, that sometimes could have bugs) bool allow_s3_native_copy = true; + /// Whether native copy is allowed (optimization for cloud storages, that sometimes could have bugs) + bool allow_azure_native_copy = true; + /// Whether base backup to S3 should inherit credentials from the BACKUP query. bool use_same_s3_credentials_for_base_backup = false; diff --git a/src/Backups/BackupsWorker.cpp b/src/Backups/BackupsWorker.cpp index 9057dc9d198..69d9c52ebd9 100644 --- a/src/Backups/BackupsWorker.cpp +++ b/src/Backups/BackupsWorker.cpp @@ -598,6 +598,7 @@ void BackupsWorker::doBackup( backup_create_params.backup_uuid = backup_settings.backup_uuid; backup_create_params.deduplicate_files = backup_settings.deduplicate_files; backup_create_params.allow_s3_native_copy = backup_settings.allow_s3_native_copy; + backup_create_params.allow_azure_native_copy = backup_settings.allow_azure_native_copy; backup_create_params.use_same_s3_credentials_for_base_backup = backup_settings.use_same_s3_credentials_for_base_backup; backup_create_params.azure_attempt_to_create_container = backup_settings.azure_attempt_to_create_container; backup_create_params.read_settings = getReadSettingsForBackup(context, backup_settings); diff --git a/src/Backups/registerBackupEngineAzureBlobStorage.cpp b/src/Backups/registerBackupEngineAzureBlobStorage.cpp index 1b9545fc455..8b05965f472 100644 --- a/src/Backups/registerBackupEngineAzureBlobStorage.cpp +++ b/src/Backups/registerBackupEngineAzureBlobStorage.cpp @@ -135,10 +135,12 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory) if (params.open_mode == IBackup::OpenMode::READ) { - auto reader = std::make_shared(configuration, - params.read_settings, - params.write_settings, - params.context); + auto reader = std::make_shared( + configuration, + params.allow_azure_native_copy, + params.read_settings, + params.write_settings, + params.context); return std::make_unique( params.backup_info, @@ -150,11 +152,13 @@ void registerBackupEngineAzureBlobStorage(BackupFactory & factory) } else { - auto writer = std::make_shared(configuration, - params.read_settings, - params.write_settings, - params.context, - params.azure_attempt_to_create_container); + auto writer = std::make_shared( + configuration, + params.allow_azure_native_copy, + params.read_settings, + params.write_settings, + params.context, + params.azure_attempt_to_create_container); return std::make_unique( params.backup_info, diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp index 36225b13ee8..bee8e206ec4 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp @@ -107,11 +107,13 @@ AzureObjectStorage::AzureObjectStorage( const String & name_, AzureClientPtr && client_, SettingsPtr && settings_, - const String & object_namespace_) + const String & object_namespace_, + const String & description_) : name(name_) , client(std::move(client_)) , settings(std::move(settings_)) , object_namespace(object_namespace_) + , description(description_) , log(getLogger("AzureObjectStorage")) { } @@ -409,7 +411,8 @@ std::unique_ptr AzureObjectStorage::cloneObjectStorage(const std name, getAzureBlobContainerClient(config, config_prefix), getAzureBlobStorageSettings(config, config_prefix, context), - object_namespace + object_namespace, + description ); } diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h index f52ab803012..3d94090bcc6 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h @@ -81,7 +81,8 @@ public: const String & name_, AzureClientPtr && client_, SettingsPtr && settings_, - const String & object_namespace_); + const String & object_namespace_, + const String & description_); void listObjects(const std::string & path, RelativePathsWithMetadata & children, int max_keys) const override; @@ -93,7 +94,7 @@ public: std::string getCommonKeyPrefix() const override { return ""; } - std::string getDescription() const override { return client.get()->GetUrl(); } + std::string getDescription() const override { return description; } bool exists(const StoredObject & object) const override; @@ -172,6 +173,7 @@ private: MultiVersion client; MultiVersion settings; const String object_namespace; /// container + prefix + const String description; /// url + container LoggerPtr log; }; diff --git a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp index 761ff24e648..cddcea979b5 100644 --- a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp +++ b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp @@ -306,11 +306,14 @@ void registerAzureObjectStorage(ObjectStorageFactory & factory) bool /* skip_access_check */) -> ObjectStoragePtr { AzureBlobStorageEndpoint endpoint = processAzureBlobStorageEndpoint(config, config_prefix); + std::string endpoint_string = endpoint.getEndpoint(); + return createObjectStorage( ObjectStorageType::Azure, config, config_prefix, name, getAzureBlobContainerClient(config, config_prefix), getAzureBlobStorageSettings(config, config_prefix, context), - endpoint.prefix.empty() ? endpoint.container_name : endpoint.container_name + "/" + endpoint.prefix); + endpoint.prefix.empty() ? endpoint.container_name : endpoint.container_name + "/" + endpoint.prefix, + endpoint.prefix.empty() ? endpoint_string : endpoint_string.substr(0, endpoint_string.length() - endpoint.prefix.length())); }; factory.registerObjectStorageType("azure_blob_storage", creator); factory.registerObjectStorageType("azure", creator); diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp index 769f1a184f6..dc46de1e07f 100644 --- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp +++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp @@ -289,6 +289,7 @@ void copyAzureBlobStorageFile( if (settings->use_native_copy) { + LOG_TRACE(&Poco::Logger::get("copyAzureBlobStorageFile"), "Copying Blob: {} from Container: {} using native copy", src_container_for_logging, src_blob); ProfileEvents::increment(ProfileEvents::AzureCopyObject); if (dest_client->GetClickhouseOptions().IsClientForDisk) ProfileEvents::increment(ProfileEvents::DiskAzureCopyObject); diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp index 9c551e82a99..0103fc0d2a2 100644 --- a/src/Storages/StorageAzureBlob.cpp +++ b/src/Storages/StorageAzureBlob.cpp @@ -302,8 +302,8 @@ void registerStorageAzureBlob(StorageFactory & factory) auto settings = StorageAzureBlob::createSettings(args.getContext()); return std::make_shared( - std::move(configuration), - std::make_unique("AzureBlobStorage", std::move(client), std::move(settings),configuration.container), + configuration, + std::make_unique("AzureBlobStorage", std::move(client), std::move(settings), configuration.container, configuration.getConnectionURLWithContainer()), args.getContext(), args.table_id, args.columns, @@ -491,6 +491,12 @@ Poco::URI StorageAzureBlob::Configuration::getConnectionURL() const return Poco::URI(parsed_connection_string.BlobServiceUrl.GetAbsoluteUrl()); } +std::string StorageAzureBlob::Configuration::getConnectionURLWithContainer() const +{ + auto url = getConnectionURL(); + return fs::path(url.toString()) / container; +} + bool StorageAzureBlob::Configuration::withGlobsIgnorePartitionWildcard() const { if (!withPartitionWildcard()) diff --git a/src/Storages/StorageAzureBlob.h b/src/Storages/StorageAzureBlob.h index b433cd92d68..7bce40bce26 100644 --- a/src/Storages/StorageAzureBlob.h +++ b/src/Storages/StorageAzureBlob.h @@ -45,6 +45,8 @@ public: Poco::URI getConnectionURL() const; + std::string getConnectionURLWithContainer() const; + std::string connection_url; bool is_connection_string; diff --git a/src/TableFunctions/TableFunctionAzureBlobStorage.cpp b/src/TableFunctions/TableFunctionAzureBlobStorage.cpp index 275cd2a9cbb..e73277b4d7b 100644 --- a/src/TableFunctions/TableFunctionAzureBlobStorage.cpp +++ b/src/TableFunctions/TableFunctionAzureBlobStorage.cpp @@ -333,7 +333,7 @@ ColumnsDescription TableFunctionAzureBlobStorage::getActualTableStructure(Contex auto client = StorageAzureBlob::createClient(configuration, !is_insert_query); auto settings = StorageAzureBlob::createSettings(context); - auto object_storage = std::make_unique("AzureBlobStorageTableFunction", std::move(client), std::move(settings), configuration.container); + auto object_storage = std::make_unique("AzureBlobStorageTableFunction", std::move(client), std::move(settings), configuration.container, configuration.getConnectionURLWithContainer()); if (configuration.format == "auto") return StorageAzureBlob::getTableStructureAndFormatFromData(object_storage.get(), configuration, std::nullopt, context).first; return StorageAzureBlob::getTableStructureFromData(object_storage.get(), configuration, std::nullopt, context); @@ -365,7 +365,7 @@ StoragePtr TableFunctionAzureBlobStorage::executeImpl(const ASTPtr & /*ast_funct StoragePtr storage = std::make_shared( configuration, - std::make_unique(table_name, std::move(client), std::move(settings), configuration.container), + std::make_unique(table_name, std::move(client), std::move(settings), configuration.container, configuration.getConnectionURLWithContainer()), context, StorageID(getDatabaseName(), table_name), columns, diff --git a/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp b/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp index 04dddca7672..dc65426a6e3 100644 --- a/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp +++ b/src/TableFunctions/TableFunctionAzureBlobStorageCluster.cpp @@ -39,7 +39,7 @@ StoragePtr TableFunctionAzureBlobStorageCluster::executeImpl( /// On worker node this filename won't contains globs storage = std::make_shared( configuration, - std::make_unique(table_name, std::move(client), std::move(settings), configuration.container), + std::make_unique(table_name, std::move(client), std::move(settings), configuration.container, configuration.getConnectionURLWithContainer()), context, StorageID(getDatabaseName(), table_name), columns, @@ -54,7 +54,7 @@ StoragePtr TableFunctionAzureBlobStorageCluster::executeImpl( storage = std::make_shared( cluster_name, configuration, - std::make_unique(table_name, std::move(client), std::move(settings), configuration.container), + std::make_unique(table_name, std::move(client), std::move(settings), configuration.container, configuration.getConnectionURLWithContainer()), StorageID(getDatabaseName(), table_name), columns, ConstraintsDescription{}, diff --git a/tests/integration/test_azure_blob_storage_native_copy/__init__.py b/tests/integration/test_azure_blob_storage_native_copy/__init__.py new file mode 100644 index 00000000000..e5a0d9b4834 --- /dev/null +++ b/tests/integration/test_azure_blob_storage_native_copy/__init__.py @@ -0,0 +1 @@ +#!/usr/bin/env python3 diff --git a/tests/integration/test_azure_blob_storage_native_copy/test.py b/tests/integration/test_azure_blob_storage_native_copy/test.py new file mode 100644 index 00000000000..b16d9b4b5c4 --- /dev/null +++ b/tests/integration/test_azure_blob_storage_native_copy/test.py @@ -0,0 +1,215 @@ +#!/usr/bin/env python3 + +import gzip +import json +import logging +import os +import io +import random +import threading +import time + +from azure.storage.blob import BlobServiceClient +import helpers.client +import pytest +from helpers.cluster import ClickHouseCluster, ClickHouseInstance +from helpers.network import PartitionManager +from helpers.mock_servers import start_mock_servers +from helpers.test_tools import exec_query_with_retry + + +def generate_config(port): + path = os.path.join( + os.path.dirname(os.path.realpath(__file__)), + "./_gen/storage_conf.xml", + ) + os.makedirs(os.path.dirname(path), exist_ok=True) + with open(path, "w") as f: + TEMPLATE = """ + + + + + local + object_storage + azure_blob_storage + http://azurite1:{port}/devstoreaccount1 + cont + false + devstoreaccount1 + Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw== + true + + + local + object_storage + azure_blob_storage + true + http://azurite1:{port}/devstoreaccount1 + othercontainer + false + devstoreaccount1 + Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw== + + + cache + disk_azure + /tmp/azure_cache/ + 1000000000 + 1 + + + + + +
+ disk_azure +
+
+
+ + +
+ disk_azure_other_bucket +
+
+
+ + +
+ disk_azure_cache +
+
+
+
+
+ + disk_azure + disk_azure_cache + disk_azure_other_bucket + +
+ """ + f.write(TEMPLATE.format(port=port)) + return path + + +@pytest.fixture(scope="module") +def cluster(): + try: + cluster = ClickHouseCluster(__file__) + port = cluster.azurite_port + path = generate_config(port) + cluster.add_instance( + "node1", + main_configs=[path], + with_azurite=True, + ) + cluster.add_instance( + "node2", + main_configs=[path], + with_azurite=True, + ) + cluster.start() + + yield cluster + finally: + cluster.shutdown() + + +def azure_query( + node, query, expect_error=False, try_num=10, settings={}, query_on_retry=None +): + for i in range(try_num): + try: + if expect_error: + return node.query_and_get_error(query, settings=settings) + else: + return node.query(query, settings=settings) + except Exception as ex: + retriable_errors = [ + "DB::Exception: Azure::Core::Http::TransportException: Connection was closed by the server while trying to read a response", + "DB::Exception: Azure::Core::Http::TransportException: Connection closed before getting full response or response is less than expected", + "DB::Exception: Azure::Core::Http::TransportException: Connection was closed by the server while trying to read a response", + "DB::Exception: Azure::Core::Http::TransportException: Error while polling for socket ready read", + "Azure::Core::Http::TransportException, e.what() = Connection was closed by the server while trying to read a response", + "Azure::Core::Http::TransportException, e.what() = Connection closed before getting full response or response is less than expected", + "Azure::Core::Http::TransportException, e.what() = Connection was closed by the server while trying to read a response", + "Azure::Core::Http::TransportException, e.what() = Error while polling for socket ready read", + ] + retry = False + for error in retriable_errors: + if error in str(ex): + retry = True + print(f"Try num: {i}. Having retriable error: {ex}") + time.sleep(i) + break + if not retry or i == try_num - 1: + raise Exception(ex) + if query_on_retry is not None: + node.query(query_on_retry) + continue + + +def test_backup_restore_on_merge_tree_same_container(cluster): + node1 = cluster.instances["node1"] + azure_query( + node1, + f"CREATE TABLE test_simple_merge_tree(key UInt64, data String) Engine = MergeTree() ORDER BY tuple() SETTINGS storage_policy='policy_azure_cache'", + ) + azure_query(node1, f"INSERT INTO test_simple_merge_tree VALUES (1, 'a')") + + backup_destination = f"AzureBlobStorage('{cluster.env_variables['AZURITE_CONNECTION_STRING']}', 'cont', 'test_simple_merge_tree_backup')" + print("BACKUP DEST", backup_destination) + azure_query( + node1, + f"BACKUP TABLE test_simple_merge_tree TO {backup_destination}", + ) + + assert node1.contains_in_log("using native copy") + + azure_query( + node1, + f"RESTORE TABLE test_simple_merge_tree AS test_simple_merge_tree_restored FROM {backup_destination};", + ) + assert ( + azure_query(node1, f"SELECT * from test_simple_merge_tree_restored") == "1\ta\n" + ) + azure_query(node1, f"DROP TABLE test_simple_merge_tree") + azure_query(node1, f"DROP TABLE test_simple_merge_tree_restored") + + +def test_backup_restore_on_merge_tree_different_container(cluster): + node2 = cluster.instances["node2"] + azure_query( + node2, + f"CREATE TABLE test_simple_merge_tree_different_bucket(key UInt64, data String) Engine = MergeTree() ORDER BY tuple() SETTINGS storage_policy='policy_azure_other_bucket'", + ) + azure_query( + node2, f"INSERT INTO test_simple_merge_tree_different_bucket VALUES (1, 'a')" + ) + + backup_destination = f"AzureBlobStorage('{cluster.env_variables['AZURITE_CONNECTION_STRING']}', 'cont', 'test_simple_merge_tree_different_bucket_backup_different_bucket')" + print("BACKUP DEST", backup_destination) + azure_query( + node2, + f"BACKUP TABLE test_simple_merge_tree_different_bucket TO {backup_destination}", + ) + + assert not node2.contains_in_log("using native copy") + + azure_query( + node2, + f"RESTORE TABLE test_simple_merge_tree_different_bucket AS test_simple_merge_tree_different_bucket_restored FROM {backup_destination};", + ) + assert ( + azure_query( + node2, f"SELECT * from test_simple_merge_tree_different_bucket_restored" + ) + == "1\ta\n" + ) + + assert not node2.contains_in_log("using native copy") + + azure_query(node2, f"DROP TABLE test_simple_merge_tree_different_bucket") + azure_query(node2, f"DROP TABLE test_simple_merge_tree_different_bucket_restored") From 64a308013f6d0075fcf9d7c90d7e50cd9a3ae19e Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 18 May 2024 12:54:23 +0200 Subject: [PATCH 39/47] Tune mmap_rnd_bits to workaround sanitizers issues v1: vm.mmap_rnd_bits=28 v2: rebase with clang 18.1.6 + kernel.randomize_va_space=0 v3: leave only vm.mmap_rnd_bits=28 + use pre-run.sh (hope that it will be used), that way docker will not require --privileged and by some reason this breaks ASAN (though I cannot reproduce it) v4: use actions/common_setup over init_runner.sh (it requires some manual deploy) --- .github/actions/common_setup/action.yml | 7 +++++++ tests/ci/worker/prepare-ci-ami.sh | 2 ++ 2 files changed, 9 insertions(+) diff --git a/.github/actions/common_setup/action.yml b/.github/actions/common_setup/action.yml index e492fa97816..b9299c64e72 100644 --- a/.github/actions/common_setup/action.yml +++ b/.github/actions/common_setup/action.yml @@ -28,3 +28,10 @@ runs: run: | # to remove every leftovers sudo rm -fr "$TEMP_PATH" && mkdir -p "$TEMP_PATH" + - name: Tune vm.mmap_rnd_bits for sanitizers + shell: bash + run: | + sudo sysctl vm.mmap_rnd_bits + # https://github.com/google/sanitizers/issues/856 + echo "Tune vm.mmap_rnd_bits for sanitizers" + sudo sysctl vm.mmap_rnd_bits=28 diff --git a/tests/ci/worker/prepare-ci-ami.sh b/tests/ci/worker/prepare-ci-ami.sh index 92e97865b18..3e2f33c89d1 100644 --- a/tests/ci/worker/prepare-ci-ami.sh +++ b/tests/ci/worker/prepare-ci-ami.sh @@ -91,6 +91,8 @@ apt-get install --yes --no-install-recommends azure-cli # Increase the limit on number of virtual memory mappings to aviod 'Cannot mmap' error echo "vm.max_map_count = 2097152" > /etc/sysctl.d/01-increase-map-counts.conf +# Workarond for sanitizers uncompatibility with some kernels, see https://github.com/google/sanitizers/issues/856 +echo "vm.mmap_rnd_bits=28" > /etc/sysctl.d/02-vm-mmap_rnd_bits.conf systemctl restart docker From 9fefece70b624afcd79e5c016abab3a7f20f0922 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 20 May 2024 08:37:48 +0200 Subject: [PATCH 40/47] Fix tidy --- src/Common/ThreadStatus.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/Common/ThreadStatus.cpp b/src/Common/ThreadStatus.cpp index f2930513280..6ac4feac814 100644 --- a/src/Common/ThreadStatus.cpp +++ b/src/Common/ThreadStatus.cpp @@ -23,12 +23,8 @@ thread_local ThreadStatus constinit * current_thread = nullptr; namespace { -#if defined(__aarch64__) /// For aarch64 16K is not enough (likely due to tons of registers) -static constexpr size_t UNWIND_MINSIGSTKSZ = 32 << 10; -#else -static constexpr size_t UNWIND_MINSIGSTKSZ = 16 << 10; -#endif +constexpr size_t UNWIND_MINSIGSTKSZ = 32 << 10; /// Alternative stack for signal handling. /// From f7c9fa696f91c306550ed3435bb51999983903e3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 20 May 2024 09:45:23 +0200 Subject: [PATCH 41/47] Fix UBSan error in negative positional arguments --- src/Interpreters/replaceForPositionalArguments.cpp | 2 +- .../03157_negative_positional_arguments_ubsan.reference | 0 .../0_stateless/03157_negative_positional_arguments_ubsan.sql | 1 + 3 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/03157_negative_positional_arguments_ubsan.reference create mode 100644 tests/queries/0_stateless/03157_negative_positional_arguments_ubsan.sql diff --git a/src/Interpreters/replaceForPositionalArguments.cpp b/src/Interpreters/replaceForPositionalArguments.cpp index cceb0650fcd..3d60723a167 100644 --- a/src/Interpreters/replaceForPositionalArguments.cpp +++ b/src/Interpreters/replaceForPositionalArguments.cpp @@ -44,7 +44,7 @@ bool replaceForPositionalArguments(ASTPtr & argument, const ASTSelectQuery * sel pos = value; else { - if (static_cast(std::abs(value)) > columns.size()) + if (value < -static_cast(columns.size())) throw Exception( ErrorCodes::BAD_ARGUMENTS, "Negative positional argument number {} is out of bounds. Expected in range [-{}, -1]", diff --git a/tests/queries/0_stateless/03157_negative_positional_arguments_ubsan.reference b/tests/queries/0_stateless/03157_negative_positional_arguments_ubsan.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03157_negative_positional_arguments_ubsan.sql b/tests/queries/0_stateless/03157_negative_positional_arguments_ubsan.sql new file mode 100644 index 00000000000..ddf5185c945 --- /dev/null +++ b/tests/queries/0_stateless/03157_negative_positional_arguments_ubsan.sql @@ -0,0 +1 @@ +SELECT 1 GROUP BY -9223372036854775808; -- { serverError BAD_ARGUMENTS } From 1525ca4cf02479e53ebb670720ea385ddb7670a1 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 16 May 2024 12:09:47 +0200 Subject: [PATCH 42/47] Disable pretty format restrictions when stdout is not TTY Signed-off-by: Azat Khuzhin --- src/Client/ClientBase.cpp | 4 ++-- .../queries/0_stateless/03160_pretty_format_tty.reference | 1 + tests/queries/0_stateless/03160_pretty_format_tty.sh | 8 ++++++++ 3 files changed, 11 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/03160_pretty_format_tty.reference create mode 100755 tests/queries/0_stateless/03160_pretty_format_tty.sh diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 67aba2256e8..4441d884754 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -710,8 +710,8 @@ void ClientBase::adjustSettings() settings.input_format_values_allow_data_after_semicolon.changed = false; } - /// Do not limit pretty format output in case of --pager specified. - if (!pager.empty()) + /// Do not limit pretty format output in case of --pager specified or in case of stdout is not a tty. + if (!pager.empty() || !stdout_is_a_tty) { if (!global_context->getSettingsRef().output_format_pretty_max_rows.changed) { diff --git a/tests/queries/0_stateless/03160_pretty_format_tty.reference b/tests/queries/0_stateless/03160_pretty_format_tty.reference new file mode 100644 index 00000000000..6a5b453966d --- /dev/null +++ b/tests/queries/0_stateless/03160_pretty_format_tty.reference @@ -0,0 +1 @@ +100004 diff --git a/tests/queries/0_stateless/03160_pretty_format_tty.sh b/tests/queries/0_stateless/03160_pretty_format_tty.sh new file mode 100755 index 00000000000..bbc4b96eb90 --- /dev/null +++ b/tests/queries/0_stateless/03160_pretty_format_tty.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# default output_format_pretty_max_rows is 10K +$CLICKHOUSE_LOCAL -q "select * from numbers(100e3) format PrettySpace settings max_threads=1" | wc -l From 434816820d31e09873453024e9563fca06b5b3d4 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 18 May 2024 15:27:13 +0200 Subject: [PATCH 43/47] Fix upgrade settings changes check settings changes check simply checks that the settings are either the same, or there is a record in system.settings_changes, however, the problem is that clickhouse-local now adjusts some settings for Pretty format in case of stdout is not a tty, and this is the case for this check. So to avoid this, just run the clickhouse-local under script(1) to fool it. Signed-off-by: Azat Khuzhin --- docker/test/upgrade/run.sh | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/docker/test/upgrade/run.sh b/docker/test/upgrade/run.sh index 6761ddba3e5..29174cc87e6 100644 --- a/docker/test/upgrade/run.sh +++ b/docker/test/upgrade/run.sh @@ -58,8 +58,14 @@ echo "ATTACH DATABASE system ENGINE=Ordinary" > /var/lib/clickhouse/metadata/sys # Install previous release packages install_packages previous_release_package_folder -# Save old settings from system table for settings changes check -clickhouse-local -q "select * from system.settings format Native" > old_settings.native +# NOTE: we need to run clickhouse-local under script to get settings without any adjustments, like clickhouse-local does in case of stdout is not a tty +function save_settings_clean() +{ + local out=$1 && shift + script -q -c "clickhouse-local -q \"select * from system.settings into outfile '$out'\"" --log-out /dev/null +} + +save_settings_clean 'old_settings.native' # Initial run without S3 to create system.*_log on local file system to make it # available for dump via clickhouse-local @@ -183,7 +189,7 @@ configure IS_SANITIZED=$(clickhouse-local --query "SELECT value LIKE '%-fsanitize=%' FROM system.build_options WHERE name = 'CXX_FLAGS'") if [ "${IS_SANITIZED}" -eq "0" ] then - clickhouse-local -q "select * from system.settings format Native" > new_settings.native + save_settings_clean 'new_settings.native' clickhouse-local -nmq " CREATE TABLE old_settings AS file('old_settings.native'); CREATE TABLE new_settings AS file('new_settings.native'); From 925ed89195008a787eeebd9213ac59f1a8adb17b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 20 May 2024 10:31:32 +0200 Subject: [PATCH 44/47] More instrumentation around index --- src/Common/CurrentMetrics.cpp | 6 ++++-- src/Databases/DatabaseOnDisk.cpp | 3 +++ ...ObjectStorageRemoteMetadataRestoreHelper.cpp | 1 + src/Interpreters/AsynchronousInsertQueue.cpp | 3 ++- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 17 ++++++++++++++++- src/Storages/System/StorageSystemReplicas.cpp | 2 ++ 6 files changed, 28 insertions(+), 4 deletions(-) diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp index b9916130bb9..21b4d114d79 100644 --- a/src/Common/CurrentMetrics.cpp +++ b/src/Common/CurrentMetrics.cpp @@ -288,8 +288,10 @@ M(HTTPConnectionsTotal, "Total count of all sessions: stored in the pool and actively used right now for http hosts") \ \ M(AddressesActive, "Total count of addresses which are used for creation connections with connection pools") \ - M(AddressesBanned, "Total count of addresses which are banned as faulty for creation connections with connection pools") \ - + M(AddressesBanned, "Total count of addresses which are banned as faulty for creation connections with connection pools") \ + \ + M(FilteringMarksWithPrimaryKey, "Number of threads currently doing filtering of mark ranges by the primary key") \ + M(FilteringMarksWithSecondaryKeys, "Number of threads currently doing filtering of mark ranges by secondary keys") \ #ifdef APPLY_FOR_EXTERNAL_METRICS #define APPLY_FOR_METRICS(M) APPLY_FOR_BUILTIN_METRICS(M) APPLY_FOR_EXTERNAL_METRICS(M) diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index 5b9723fabc5..161be35f129 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -26,6 +26,8 @@ #include #include #include +#include + namespace fs = std::filesystem; @@ -665,6 +667,7 @@ void DatabaseOnDisk::iterateMetadataFiles(ContextPtr local_context, const Iterat pool.scheduleOrThrowOnError( [batch, &process_metadata_file, &process_tmp_drop_metadata_file]() mutable { + setThreadName("DatabaseOnDisk"); for (const auto & file : batch) if (file.second) process_metadata_file(file.first); diff --git a/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp b/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp index 0314e0a7e92..18a0377efe7 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp @@ -129,6 +129,7 @@ void DiskObjectStorageRemoteMetadataRestoreHelper::migrateToRestorableSchemaRecu { pool.scheduleOrThrowOnError([this, path] { + setThreadName("BackupWorker"); for (auto it = disk->iterateDirectory(path); it->isValid(); it->next()) migrateFileToRestorableSchema(it->path()); }); diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index ab29c64184d..d72f3d81549 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -695,7 +695,6 @@ String serializeQuery(const IAST & query, size_t max_length) } -// static void AsynchronousInsertQueue::processData( InsertQuery key, InsertDataPtr data, ContextPtr global_context, QueueShardFlushTimeHistory & queue_shard_flush_time_history) try @@ -705,6 +704,8 @@ try SCOPE_EXIT(CurrentMetrics::sub(CurrentMetrics::PendingAsyncInsert, data->entries.size())); + setThreadName("AsyncInsertQ"); + const auto log = getLogger("AsynchronousInsertQueue"); const auto & insert_query = assert_cast(*key.query); diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index de769c59d33..2b1b1b26347 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -53,6 +53,8 @@ namespace CurrentMetrics extern const Metric MergeTreeDataSelectExecutorThreads; extern const Metric MergeTreeDataSelectExecutorThreadsActive; extern const Metric MergeTreeDataSelectExecutorThreadsScheduled; + extern const Metric FilteringMarksWithPrimaryKey; + extern const Metric FilteringMarksWithSecondaryKeys; } namespace DB @@ -664,15 +666,22 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd size_t total_marks_count = part->index_granularity.getMarksCountWithoutFinal(); if (metadata_snapshot->hasPrimaryKey() || part_offset_condition) + { + CurrentMetrics::Increment metric(CurrentMetrics::FilteringMarksWithPrimaryKey); ranges.ranges = markRangesFromPKRange(part, metadata_snapshot, key_condition, part_offset_condition, settings, log); + } else if (total_marks_count) + { ranges.ranges = MarkRanges{{MarkRange{0, total_marks_count}}}; + } sum_marks_pk.fetch_add(ranges.getMarksCount(), std::memory_order_relaxed); if (!ranges.ranges.empty()) sum_parts_pk.fetch_add(1, std::memory_order_relaxed); + CurrentMetrics::Increment metric(CurrentMetrics::FilteringMarksWithSecondaryKeys); + for (size_t idx = 0; idx < skip_indexes.useful_indices.size(); ++idx) { if (ranges.ranges.empty()) @@ -733,6 +742,8 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd num_threads = std::min(num_streams, settings.max_threads_for_indexes); } + LOG_TRACE(log, "Filtering marks by primary and secondary keys"); + if (num_threads <= 1) { for (size_t part_index = 0; part_index < parts.size(); ++part_index) @@ -740,7 +751,7 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd } else { - /// Parallel loading of data parts. + /// Parallel loading and filtering of data parts. ThreadPool pool( CurrentMetrics::MergeTreeDataSelectExecutorThreads, CurrentMetrics::MergeTreeDataSelectExecutorThreadsActive, @@ -748,8 +759,11 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd num_threads); for (size_t part_index = 0; part_index < parts.size(); ++part_index) + { pool.scheduleOrThrowOnError([&, part_index, thread_group = CurrentThread::getGroup()] { + setThreadName("MergeTreeIndex"); + SCOPE_EXIT_SAFE( if (thread_group) CurrentThread::detachFromGroupIfNotDetached(); @@ -759,6 +773,7 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd process_part(part_index); }); + } pool.wait(); } diff --git a/src/Storages/System/StorageSystemReplicas.cpp b/src/Storages/System/StorageSystemReplicas.cpp index 5045dec3682..10d5c353c43 100644 --- a/src/Storages/System/StorageSystemReplicas.cpp +++ b/src/Storages/System/StorageSystemReplicas.cpp @@ -141,6 +141,8 @@ public: if (thread_group) CurrentThread::attachToGroupIfDetached(thread_group); + setThreadName("SystemReplicas"); + try { ReplicatedTableStatus status; From 314752e044bcdf5d7516d5188e7e6a4302467b90 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 20 May 2024 11:55:44 +0200 Subject: [PATCH 45/47] Update src/Backups/BackupIO_AzureBlobStorage.cpp Co-authored-by: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> --- src/Backups/BackupIO_AzureBlobStorage.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Backups/BackupIO_AzureBlobStorage.cpp b/src/Backups/BackupIO_AzureBlobStorage.cpp index 672a68e089f..5f6495e5733 100644 --- a/src/Backups/BackupIO_AzureBlobStorage.cpp +++ b/src/Backups/BackupIO_AzureBlobStorage.cpp @@ -82,7 +82,7 @@ void BackupReaderAzureBlobStorage::copyFileToDisk(const String & path_in_backup, DiskPtr destination_disk, const String & destination_path, WriteMode write_mode) { auto destination_data_source_description = destination_disk->getDataSourceDescription(); - LOG_TRACE(log, "Source description {} desctionation description {}", data_source_description.description, destination_data_source_description.description); + LOG_TRACE(log, "Source description {}, desctionation description {}", data_source_description.description, destination_data_source_description.description); if (destination_data_source_description.sameKind(data_source_description) && destination_data_source_description.is_encrypted == encrypted_in_backup) { From 6a7a09a1f902ec4fb54cd19f9368581cddf6da1e Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 20 May 2024 11:55:50 +0200 Subject: [PATCH 46/47] Update src/Backups/BackupIO_AzureBlobStorage.cpp Co-authored-by: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> --- src/Backups/BackupIO_AzureBlobStorage.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Backups/BackupIO_AzureBlobStorage.cpp b/src/Backups/BackupIO_AzureBlobStorage.cpp index 5f6495e5733..b035125a545 100644 --- a/src/Backups/BackupIO_AzureBlobStorage.cpp +++ b/src/Backups/BackupIO_AzureBlobStorage.cpp @@ -150,7 +150,7 @@ void BackupWriterAzureBlobStorage::copyFileFromDisk(const String & path_in_backu { /// Use the native copy as a more optimal way to copy a file from AzureBlobStorage to AzureBlobStorage if it's possible. auto source_data_source_description = src_disk->getDataSourceDescription(); - LOG_TRACE(log, "Source description {} desctionation description {}", source_data_source_description.description, data_source_description.description); + LOG_TRACE(log, "Source description {}, desctionation description {}", source_data_source_description.description, data_source_description.description); if (source_data_source_description.sameKind(data_source_description) && source_data_source_description.is_encrypted == copy_encrypted) { From 23e87ef80ee332b71d5e9a1d51e81de9d4626a84 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 20 May 2024 11:56:44 +0200 Subject: [PATCH 47/47] Review fix --- src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp index dc46de1e07f..667e63729ca 100644 --- a/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp +++ b/src/IO/AzureBlobStorage/copyAzureBlobStorageFile.cpp @@ -289,7 +289,7 @@ void copyAzureBlobStorageFile( if (settings->use_native_copy) { - LOG_TRACE(&Poco::Logger::get("copyAzureBlobStorageFile"), "Copying Blob: {} from Container: {} using native copy", src_container_for_logging, src_blob); + LOG_TRACE(getLogger("copyAzureBlobStorageFile"), "Copying Blob: {} from Container: {} using native copy", src_container_for_logging, src_blob); ProfileEvents::increment(ProfileEvents::AzureCopyObject); if (dest_client->GetClickhouseOptions().IsClientForDisk) ProfileEvents::increment(ProfileEvents::DiskAzureCopyObject);