From 548d79c2e80bb23f246c63fc7e33d0c01eb6b944 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Thu, 2 Mar 2023 12:31:09 +0000 Subject: [PATCH 001/478] Remove perf test duplicate_order_by_and_distinct.xml --- tests/performance/duplicate_order_by_and_distinct.xml | 8 -------- 1 file changed, 8 deletions(-) delete mode 100644 tests/performance/duplicate_order_by_and_distinct.xml diff --git a/tests/performance/duplicate_order_by_and_distinct.xml b/tests/performance/duplicate_order_by_and_distinct.xml deleted file mode 100644 index e36bc470512..00000000000 --- a/tests/performance/duplicate_order_by_and_distinct.xml +++ /dev/null @@ -1,8 +0,0 @@ - - 1 - - - SELECT * FROM (SELECT CounterID, EventDate FROM hits_10m_single ORDER BY CounterID DESC) ORDER BY EventDate, CounterID FORMAT Null - SELECT DISTINCT * FROM (SELECT DISTINCT CounterID, EventDate FROM hits_10m_single) FORMAT Null - SELECT DISTINCT * FROM (SELECT DISTINCT CounterID, EventDate FROM hits_10m_single ORDER BY CounterID DESC) ORDER BY toStartOfWeek(EventDate) FORMAT Null - From 1e7080a9aae4403ea613d401501f5b6498bf2df9 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 11 Apr 2023 17:35:47 +0000 Subject: [PATCH 002/478] ReadFromMergeTree: update sort description after applying prewhere info --- .../QueryPlan/ReadFromMergeTree.cpp | 66 +++++++++++-------- 1 file changed, 39 insertions(+), 27 deletions(-) diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 291499ff412..64a3a4c74ae 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -133,6 +133,35 @@ static bool checkAllPartsOnRemoteFS(const RangesInDataParts & parts) return true; } +/// build sort description for output stream +static void updateSortDescriptionForOutputStream( + DataStream & output_stream, const Names & sorting_key_columns, const int sort_direction, InputOrderInfoPtr input_order_info) +{ + SortDescription sort_description; + const Block & header = output_stream.header; + for (const auto & column_name : sorting_key_columns) + { + if (std::find_if(header.begin(), header.end(), [&](ColumnWithTypeAndName const & col) { return col.name == column_name; }) + == header.end()) + break; + sort_description.emplace_back(column_name, sort_direction); + } + if (!sort_description.empty()) + { + if (input_order_info) + { + output_stream.sort_scope = DataStream::SortScope::Stream; + const size_t used_prefix_of_sorting_key_size = input_order_info->used_prefix_of_sorting_key_size; + if (sort_description.size() > used_prefix_of_sorting_key_size) + sort_description.resize(used_prefix_of_sorting_key_size); + } + else + output_stream.sort_scope = DataStream::SortScope::Chunk; + } + + output_stream.sort_description = std::move(sort_description); +} + void ReadFromMergeTree::AnalysisResult::checkLimits(const Settings & settings, const SelectQueryInfo & query_info_) const { @@ -244,33 +273,11 @@ ReadFromMergeTree::ReadFromMergeTree( /// Add explicit description. setStepDescription(data.getStorageID().getFullNameNotQuoted()); - { /// build sort description for output stream - SortDescription sort_description; - const Names & sorting_key_columns = storage_snapshot->getMetadataForQuery()->getSortingKeyColumns(); - const Block & header = output_stream->header; - const int sort_direction = getSortDirection(); - for (const auto & column_name : sorting_key_columns) - { - if (std::find_if(header.begin(), header.end(), [&](ColumnWithTypeAndName const & col) { return col.name == column_name; }) - == header.end()) - break; - sort_description.emplace_back(column_name, sort_direction); - } - if (!sort_description.empty()) - { - if (query_info.getInputOrderInfo()) - { - output_stream->sort_scope = DataStream::SortScope::Stream; - const size_t used_prefix_of_sorting_key_size = query_info.getInputOrderInfo()->used_prefix_of_sorting_key_size; - if (sort_description.size() > used_prefix_of_sorting_key_size) - sort_description.resize(used_prefix_of_sorting_key_size); - } - else - output_stream->sort_scope = DataStream::SortScope::Chunk; - } - - output_stream->sort_description = std::move(sort_description); - } + updateSortDescriptionForOutputStream( + *output_stream, + storage_snapshot->getMetadataForQuery()->getSortingKeyColumns(), + getSortDirection(), + query_info.getInputOrderInfo()); } @@ -1425,6 +1432,11 @@ void ReadFromMergeTree::updatePrewhereInfo(const PrewhereInfoPtr & prewhere_info prewhere_info_value, data.getPartitionValueType(), virt_column_names)}; + updateSortDescriptionForOutputStream( + *output_stream, + storage_snapshot->getMetadataForQuery()->getSortingKeyColumns(), + getSortDirection(), + query_info.getInputOrderInfo()); } bool ReadFromMergeTree::requestOutputEachPartitionThroughSeparatePort() From 125e5c50b891038740a2a75e2570a201999c09f3 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 25 Apr 2023 21:30:03 +0000 Subject: [PATCH 003/478] allow to flush async insert queue --- programs/server/Server.cpp | 11 ++- src/Access/Common/AccessType.h | 1 + src/Core/Settings.h | 2 +- src/Interpreters/AsynchronousInsertQueue.cpp | 71 +++++++++++++++---- src/Interpreters/AsynchronousInsertQueue.h | 11 ++- src/Interpreters/InterpreterSystemQuery.cpp | 17 +++++ src/Parsers/ASTSystemQuery.h | 1 + .../02726_async_insert_flush_queue.reference | 5 ++ .../02726_async_insert_flush_queue.sql | 28 ++++++++ 9 files changed, 128 insertions(+), 19 deletions(-) create mode 100644 tests/queries/0_stateless/02726_async_insert_flush_queue.reference create mode 100644 tests/queries/0_stateless/02726_async_insert_flush_queue.sql diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 8c0d50bae55..cd08de126c9 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1461,16 +1461,21 @@ try /// Load global settings from default_profile and system_profile. global_context->setDefaultProfiles(config()); - const Settings & settings = global_context->getSettingsRef(); /// Initialize background executors after we load default_profile config. /// This is needed to load proper values of background_pool_size etc. global_context->initializeBackgroundExecutorsIfNeeded(); - if (settings.async_insert_threads) + size_t async_insert_threads = config().getUInt("async_insert_threads", 16); + bool async_insert_queue_flush_on_shutdown = config().getBool("async_insert_queue_flush_on_shutdown", false); + + if (async_insert_threads) + { global_context->setAsynchronousInsertQueue(std::make_shared( global_context, - settings.async_insert_threads)); + async_insert_threads, + async_insert_queue_flush_on_shutdown)); + } size_t mark_cache_size = server_settings.mark_cache_size; String mark_cache_policy = server_settings.mark_cache_policy; diff --git a/src/Access/Common/AccessType.h b/src/Access/Common/AccessType.h index 57fa75dc67b..ae7e7ab5bf0 100644 --- a/src/Access/Common/AccessType.h +++ b/src/Access/Common/AccessType.h @@ -182,6 +182,7 @@ enum class AccessType M(SYSTEM_SYNC_FILE_CACHE, "SYNC FILE CACHE", GLOBAL, SYSTEM) \ M(SYSTEM_FLUSH_DISTRIBUTED, "FLUSH DISTRIBUTED", TABLE, SYSTEM_FLUSH) \ M(SYSTEM_FLUSH_LOGS, "FLUSH LOGS", GLOBAL, SYSTEM_FLUSH) \ + M(SYSTEM_FLUSH_ASYNC_INSERT_QUEUE, "FLUSH ASYNC INSERT QUEUE", GLOBAL, SYSTEM_FLUSH) \ M(SYSTEM_FLUSH, "", GROUP, SYSTEM) \ M(SYSTEM_THREAD_FUZZER, "SYSTEM START THREAD FUZZER, SYSTEM STOP THREAD FUZZER, START THREAD FUZZER, STOP THREAD FUZZER", GLOBAL, SYSTEM) \ M(SYSTEM_UNFREEZE, "SYSTEM UNFREEZE", GLOBAL, SYSTEM) \ diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 101f6f1f934..96dbe26f820 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -646,7 +646,6 @@ class IColumn; M(UInt64, merge_tree_min_bytes_for_concurrent_read_for_remote_filesystem, (24 * 10 * 1024 * 1024), "If at least as many bytes are read from one file, the reading can be parallelized, when reading from remote filesystem.", 0) \ M(UInt64, remote_read_min_bytes_for_seek, 4 * DBMS_DEFAULT_BUFFER_SIZE, "Min bytes required for remote read (url, s3) to do seek, instead of read with ignore.", 0) \ \ - M(UInt64, async_insert_threads, 16, "Maximum number of threads to actually parse and insert data in background. Zero means asynchronous mode is disabled", 0) \ M(Bool, async_insert, false, "If true, data from INSERT query is stored in queue and later flushed to table in background. Makes sense only for inserts via HTTP protocol. If wait_for_async_insert is false, INSERT query is processed almost instantly, otherwise client will wait until data will be flushed to table", 0) \ M(Bool, wait_for_async_insert, true, "If true wait for processing of asynchronous insertion", 0) \ M(Seconds, wait_for_async_insert_timeout, DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC, "Timeout for waiting for processing asynchronous insertion", 0) \ @@ -783,6 +782,7 @@ class IColumn; MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, background_distributed_schedule_pool_size, 16) \ MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, max_remote_read_network_bandwidth_for_server, 0) \ MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, max_remote_write_network_bandwidth_for_server, 0) \ + MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, async_insert_threads, 16) \ /* ---- */ \ MAKE_OBSOLETE(M, DefaultDatabaseEngine, default_database_engine, DefaultDatabaseEngine::Atomic) \ MAKE_OBSOLETE(M, UInt64, max_pipeline_depth, 0) \ diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index b8de0246ae2..0a817995eb4 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -128,9 +128,10 @@ void AsynchronousInsertQueue::InsertData::Entry::finish(std::exception_ptr excep } } -AsynchronousInsertQueue::AsynchronousInsertQueue(ContextPtr context_, size_t pool_size_) +AsynchronousInsertQueue::AsynchronousInsertQueue(ContextPtr context_, size_t pool_size_, bool flush_on_shutdown_) : WithContext(context_) , pool_size(pool_size_) + , flush_on_shutdown(flush_on_shutdown_) , queue_shards(pool_size) , pool(CurrentMetrics::AsynchronousInsertThreads, CurrentMetrics::AsynchronousInsertThreadsActive, pool_size) { @@ -143,8 +144,6 @@ AsynchronousInsertQueue::AsynchronousInsertQueue(ContextPtr context_, size_t poo AsynchronousInsertQueue::~AsynchronousInsertQueue() { - /// TODO: add a setting for graceful shutdown. - LOG_TRACE(log, "Shutting down the asynchronous insertion queue"); shutdown = true; @@ -156,17 +155,18 @@ AsynchronousInsertQueue::~AsynchronousInsertQueue() assert(dump_by_first_update_threads[i].joinable()); dump_by_first_update_threads[i].join(); + if (flush_on_shutdown) + { + for (auto & [_, elem] : shard.queue) + scheduleDataProcessingJob(elem.key, std::move(elem.data), getContext()); + } + else { - std::lock_guard lock(shard.mutex); for (auto & [_, elem] : shard.queue) - { for (const auto & entry : elem.data->entries) - { entry->finish(std::make_exception_ptr(Exception( ErrorCodes::TIMEOUT_EXCEEDED, "Wait for async insert timeout exceeded)"))); - } - } } } @@ -210,7 +210,9 @@ AsynchronousInsertQueue::push(ASTPtr query, ContextPtr query_context) /// to avoid buffering of huge amount of data in memory. auto read_buf = getReadBufferFromASTInsertQuery(query); - LimitReadBuffer limit_buf(*read_buf, settings.async_insert_max_data_size, /* trow_exception */ false, /* exact_limit */ {}); + LimitReadBuffer limit_buf( + *read_buf, settings.async_insert_max_data_size, + /*throw_exception=*/ false, /*exact_limit=*/ {}); WriteBufferFromString write_buf(bytes); copyData(limit_buf, write_buf); @@ -262,18 +264,19 @@ AsynchronousInsertQueue::push(ASTPtr query, ContextPtr query_context) assert(data); data->size_in_bytes += entry_data_size; - ++data->query_number; data->entries.emplace_back(entry); insert_future = entry->getFuture(); LOG_TRACE(log, "Have {} pending inserts with total {} bytes of data for query '{}'", data->entries.size(), data->size_in_bytes, key.query_str); + bool has_enough_bytes = data->size_in_bytes >= key.settings.async_insert_max_data_size; + bool has_enough_queries = data->entries.size() >= key.settings.async_insert_max_query_number && key.settings.async_insert_deduplicate; + /// Here we check whether we hit the limit on maximum data size in the buffer. /// And use setting from query context. /// It works, because queries with the same set of settings are already grouped together. - if (data->size_in_bytes >= key.settings.async_insert_max_data_size - || (data->query_number >= key.settings.async_insert_max_query_number && key.settings.async_insert_deduplicate)) + if (!flush_stopped && (has_enough_bytes || has_enough_queries)) { data_to_process = std::move(data); shard.iterators.erase(it); @@ -297,6 +300,47 @@ AsynchronousInsertQueue::push(ASTPtr query, ContextPtr query_context) }; } +void AsynchronousInsertQueue::flushAll() +{ + std::lock_guard flush_lock(flush_mutex); + + LOG_DEBUG(log, "Requested to flush asynchronous insert queue"); + + flush_stopped = true; + std::vector queues_to_flush(pool_size); + + for (size_t i = 0; i < pool_size; ++i) + { + std::lock_guard lock(queue_shards[i].mutex); + queues_to_flush[i] = std::move(queue_shards[i].queue); + queue_shards[i].iterators.clear(); + } + + size_t total_queries = 0; + size_t total_bytes = 0; + size_t total_entries = 0; + + for (auto & queue : queues_to_flush) + { + total_queries += queue.size(); + for (auto & [_, entry] : queue) + { + total_bytes += entry.data->size_in_bytes; + total_entries += entry.data->entries.size(); + scheduleDataProcessingJob(entry.key, std::move(entry.data), getContext()); + } + } + + LOG_DEBUG(log, + "Will wait for finishing of {} flushing jobs (about {} inserts, {} bytes, {} distinct queries)", + pool.active(), total_entries, total_bytes, total_queries); + + pool.wait(); + + LOG_DEBUG(log, "Finished flushing of asynchronous insert queue"); + flush_stopped = false; +} + void AsynchronousInsertQueue::processBatchDeadlines(size_t shard_num) { auto & shard = queue_shards[shard_num]; @@ -322,6 +366,9 @@ void AsynchronousInsertQueue::processBatchDeadlines(size_t shard_num) if (shutdown) return; + if (flush_stopped) + continue; + const auto now = std::chrono::steady_clock::now(); while (true) diff --git a/src/Interpreters/AsynchronousInsertQueue.h b/src/Interpreters/AsynchronousInsertQueue.h index 23a2860364d..97294d70ead 100644 --- a/src/Interpreters/AsynchronousInsertQueue.h +++ b/src/Interpreters/AsynchronousInsertQueue.h @@ -16,7 +16,7 @@ class AsynchronousInsertQueue : public WithContext public: using Milliseconds = std::chrono::milliseconds; - AsynchronousInsertQueue(ContextPtr context_, size_t pool_size_); + AsynchronousInsertQueue(ContextPtr context_, size_t pool_size_, bool flush_on_shutdown_); ~AsynchronousInsertQueue(); struct PushResult @@ -37,6 +37,7 @@ public: std::unique_ptr insert_data_buffer; }; + void flushAll(); PushResult push(ASTPtr query, ContextPtr query_context); size_t getPoolSize() const { return pool_size; } @@ -82,9 +83,7 @@ private: using EntryPtr = std::shared_ptr; std::list entries; - size_t size_in_bytes = 0; - size_t query_number = 0; }; using InsertDataPtr = std::unique_ptr; @@ -112,6 +111,8 @@ private: }; const size_t pool_size; + const bool flush_on_shutdown; + std::vector queue_shards; /// Logic and events behind queue are as follows: @@ -123,6 +124,10 @@ private: /// (async_insert_max_data_size setting). If so, then again we dump the data. std::atomic shutdown{false}; + std::atomic flush_stopped{false}; + + /// A mutex that prevents concurrent forced flushes of queue. + mutable std::mutex flush_mutex; /// Dump the data only inside this pool. ThreadPool pool; diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index 36cb57c3678..f73429913b3 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -564,6 +565,17 @@ BlockIO InterpreterSystemQuery::execute() ); break; } + case Type::FLUSH_ASYNC_INSERT_QUEUE: + { + getContext()->checkAccess(AccessType::SYSTEM_FLUSH_ASYNC_INSERT_QUEUE); + auto * queue = getContext()->getAsynchronousInsertQueue(); + if (!queue) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Cannot flush asynchronous insert queue because it is not initialized"); + + queue->flushAll(); + break; + } case Type::STOP_LISTEN_QUERIES: case Type::START_LISTEN_QUERIES: throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not supported yet", query.type); @@ -1156,6 +1168,11 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster() required_access.emplace_back(AccessType::SYSTEM_FLUSH_LOGS); break; } + case Type::FLUSH_ASYNC_INSERT_QUEUE: + { + required_access.emplace_back(AccessType::SYSTEM_FLUSH_ASYNC_INSERT_QUEUE); + break; + } case Type::RESTART_DISK: { required_access.emplace_back(AccessType::SYSTEM_RESTART_DISK); diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h index dfe2389edb7..9e2dca8bb23 100644 --- a/src/Parsers/ASTSystemQuery.h +++ b/src/Parsers/ASTSystemQuery.h @@ -72,6 +72,7 @@ public: START_REPLICATION_QUEUES, FLUSH_LOGS, FLUSH_DISTRIBUTED, + FLUSH_ASYNC_INSERT_QUEUE, STOP_DISTRIBUTED_SENDS, START_DISTRIBUTED_SENDS, START_THREAD_FUZZER, diff --git a/tests/queries/0_stateless/02726_async_insert_flush_queue.reference b/tests/queries/0_stateless/02726_async_insert_flush_queue.reference new file mode 100644 index 00000000000..b94888d227e --- /dev/null +++ b/tests/queries/0_stateless/02726_async_insert_flush_queue.reference @@ -0,0 +1,5 @@ +JSONEachRow 3 +Values 2 +0 +0 +9 diff --git a/tests/queries/0_stateless/02726_async_insert_flush_queue.sql b/tests/queries/0_stateless/02726_async_insert_flush_queue.sql new file mode 100644 index 00000000000..33f40eef14e --- /dev/null +++ b/tests/queries/0_stateless/02726_async_insert_flush_queue.sql @@ -0,0 +1,28 @@ +DROP TABLE IF EXISTS t_async_inserts_flush; + +CREATE TABLE t_async_inserts_flush (a UInt64) ENGINE = Memory; + +SET async_insert = 1; +SET wait_for_async_insert = 0; +SET async_insert_busy_timeout_ms = 1000000; + +INSERT INTO t_async_inserts_flush VALUES (1) (2); +INSERT INTO t_async_inserts_flush FORMAT JSONEachRow {"a": 10} {"a": 20}; +INSERT INTO t_async_inserts_flush FORMAT JSONEachRow {"a": "str"} +INSERT INTO t_async_inserts_flush FORMAT JSONEachRow {"a": 100} {"a": 200} +INSERT INTO t_async_inserts_flush VALUES (3) (4) (5); + +SELECT sleep(1) FORMAT Null; + +SELECT format, length(entries.query_id) FROM system.asynchronous_inserts +WHERE database = currentDatabase() AND table = 't_async_inserts_flush' +ORDER BY format; + +SELECT count() FROM t_async_inserts_flush; + +SYSTEM FLUSH ASYNC INSERT QUEUE; + +SELECT count() FROM system.asynchronous_inserts; +SELECT count() FROM t_async_inserts_flush; + +DROP TABLE t_async_inserts_flush; From c9e30d3cf5f5f0ac9f35e2e08df429bacbe4cd25 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 29 Apr 2023 19:04:20 +0200 Subject: [PATCH 004/478] Properly check the limit for `sleepEachRow` function. Add a setting `function_sleep_max_microseconds_per_block` --- src/Core/Settings.h | 3 ++- src/Functions/sleep.h | 21 +++++++++++++++---- .../02725_sleep_max_time.reference | 0 .../0_stateless/02725_sleep_max_time.sql | 1 + 4 files changed, 20 insertions(+), 5 deletions(-) create mode 100644 tests/queries/0_stateless/02725_sleep_max_time.reference create mode 100644 tests/queries/0_stateless/02725_sleep_max_time.sql diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 7f1fe838b80..5aa054d43b0 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -633,7 +633,8 @@ class IColumn; M(UInt64, limit, 0, "Limit on read rows from the most 'end' result for select query, default 0 means no limit length", 0) \ M(UInt64, offset, 0, "Offset on read rows from the most 'end' result for select query", 0) \ \ - M(UInt64, function_range_max_elements_in_block, 500000000, "Maximum number of values generated by function 'range' per block of data (sum of array sizes for every row in a block, see also 'max_block_size' and 'min_insert_block_size_rows'). It is a safety threshold.", 0) \ + M(UInt64, function_range_max_elements_in_block, 500000000, "Maximum number of values generated by function `range` per block of data (sum of array sizes for every row in a block, see also 'max_block_size' and 'min_insert_block_size_rows'). It is a safety threshold.", 0) \ + M(UInt64, function_sleep_max_microseconds_per_block, 3000, "Maximum number of microseconds the function `sleep` is allowed to sleep for each block. If a user called it with a larger value, it throws an exception. It is a safety threshold.", 0) \ M(ShortCircuitFunctionEvaluation, short_circuit_function_evaluation, ShortCircuitFunctionEvaluation::ENABLE, "Setting for short-circuit function evaluation configuration. Possible values: 'enable' - use short-circuit function evaluation for functions that are suitable for it, 'disable' - disable short-circuit function evaluation, 'force_enable' - use short-circuit function evaluation for all functions.", 0) \ \ M(LocalFSReadMethod, storage_file_read_method, LocalFSReadMethod::mmap, "Method of reading data from storage file, one of: read, pread, mmap.", 0) \ diff --git a/src/Functions/sleep.h b/src/Functions/sleep.h index d1960860308..93525c3f310 100644 --- a/src/Functions/sleep.h +++ b/src/Functions/sleep.h @@ -9,7 +9,8 @@ #include #include #include -#include +#include + namespace ProfileEvents { @@ -40,11 +41,17 @@ enum class FunctionSleepVariant template class FunctionSleep : public IFunction { +private: + UInt64 max_microseconds; public: static constexpr auto name = variant == FunctionSleepVariant::PerBlock ? "sleep" : "sleepEachRow"; - static FunctionPtr create(ContextPtr) + static FunctionPtr create(ContextPtr context) + { + return std::make_shared>(context->getSettingsRef().function_sleep_max_microseconds_per_block); + } + + FunctionSleep(UInt64 max_microseconds_) : max_microseconds(max_microseconds_) { - return std::make_shared>(); } /// Get the name of the function. @@ -105,13 +112,19 @@ public: if (size > 0) { /// When sleeping, the query cannot be cancelled. For ability to cancel query, we limit sleep time. - if (seconds > 3.0) /// The choice is arbitrary + if (seconds * 1e6 > max_microseconds) throw Exception(ErrorCodes::TOO_SLOW, "The maximum sleep time is 3 seconds. Requested: {}", toString(seconds)); if (!dry_run) { UInt64 count = (variant == FunctionSleepVariant::PerBlock ? 1 : size); UInt64 microseconds = static_cast(seconds * count * 1e6); + + if (microseconds > max_microseconds) + throw Exception(ErrorCodes::TOO_SLOW, + "The maximum sleep time is 3 seconds. Requested: {} microseconds per block (of size {})", + microseconds, size); + sleepForMicroseconds(microseconds); ProfileEvents::increment(ProfileEvents::SleepFunctionCalls, count); ProfileEvents::increment(ProfileEvents::SleepFunctionMicroseconds, microseconds); diff --git a/tests/queries/0_stateless/02725_sleep_max_time.reference b/tests/queries/0_stateless/02725_sleep_max_time.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02725_sleep_max_time.sql b/tests/queries/0_stateless/02725_sleep_max_time.sql new file mode 100644 index 00000000000..b8378aee17e --- /dev/null +++ b/tests/queries/0_stateless/02725_sleep_max_time.sql @@ -0,0 +1 @@ +SELECT * FROM system.numbers WHERE sleepEachRow(0.05) LIMIT 10; -- { serverError TOO_SLOW } From 3de0c319c2d6b6206196ece48b228f72f3a9aecd Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 29 Apr 2023 19:08:52 +0200 Subject: [PATCH 005/478] Add compatibility --- src/Core/Settings.h | 2 +- src/Core/SettingsChangesHistory.h | 1 + src/Functions/sleep.h | 4 ++-- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 5aa054d43b0..2ab4fe9b32a 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -634,7 +634,7 @@ class IColumn; M(UInt64, offset, 0, "Offset on read rows from the most 'end' result for select query", 0) \ \ M(UInt64, function_range_max_elements_in_block, 500000000, "Maximum number of values generated by function `range` per block of data (sum of array sizes for every row in a block, see also 'max_block_size' and 'min_insert_block_size_rows'). It is a safety threshold.", 0) \ - M(UInt64, function_sleep_max_microseconds_per_block, 3000, "Maximum number of microseconds the function `sleep` is allowed to sleep for each block. If a user called it with a larger value, it throws an exception. It is a safety threshold.", 0) \ + M(UInt64, function_sleep_max_microseconds_per_block, 3000000, "Maximum number of microseconds the function `sleep` is allowed to sleep for each block. If a user called it with a larger value, it throws an exception. It is a safety threshold.", 0) \ M(ShortCircuitFunctionEvaluation, short_circuit_function_evaluation, ShortCircuitFunctionEvaluation::ENABLE, "Setting for short-circuit function evaluation configuration. Possible values: 'enable' - use short-circuit function evaluation for functions that are suitable for it, 'disable' - disable short-circuit function evaluation, 'force_enable' - use short-circuit function evaluation for all functions.", 0) \ \ M(LocalFSReadMethod, storage_file_read_method, LocalFSReadMethod::mmap, "Method of reading data from storage file, one of: read, pread, mmap.", 0) \ diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 266d14f645b..33010dc6b3b 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -80,6 +80,7 @@ namespace SettingsChangesHistory /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972) static std::map settings_changes_history = { + {"23.5", {{"function_sleep_max_microseconds_per_block", 0, 3000000, "In previous versions, the maximim sleep time of 3 seconds was applied only for `sleep`, but not for `sleepEachRow` function. In the new version, we introduce this setting. If you set compatibility with the previous versions, we will disable the limit altogether."}}}, {"23.4", {{"allow_suspicious_indices", true, false, "If true, index can defined with identical expressions"}}}, {"23.4", {{"connect_timeout_with_failover_ms", 50, 1000, "Increase default connect timeout because of async connect"}, {"connect_timeout_with_failover_secure_ms", 100, 1000, "Increase default secure connect timeout because of async connect"}, diff --git a/src/Functions/sleep.h b/src/Functions/sleep.h index 93525c3f310..db4f0e7dd3e 100644 --- a/src/Functions/sleep.h +++ b/src/Functions/sleep.h @@ -112,7 +112,7 @@ public: if (size > 0) { /// When sleeping, the query cannot be cancelled. For ability to cancel query, we limit sleep time. - if (seconds * 1e6 > max_microseconds) + if (max_microseconds && seconds * 1e6 > max_microseconds) throw Exception(ErrorCodes::TOO_SLOW, "The maximum sleep time is 3 seconds. Requested: {}", toString(seconds)); if (!dry_run) @@ -120,7 +120,7 @@ public: UInt64 count = (variant == FunctionSleepVariant::PerBlock ? 1 : size); UInt64 microseconds = static_cast(seconds * count * 1e6); - if (microseconds > max_microseconds) + if (max_microseconds && microseconds > max_microseconds) throw Exception(ErrorCodes::TOO_SLOW, "The maximum sleep time is 3 seconds. Requested: {} microseconds per block (of size {})", microseconds, size); From 582cf2ca8427c572a83c0bc249275c22fae6de5c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 29 Apr 2023 19:48:33 +0200 Subject: [PATCH 006/478] Update tests --- src/Functions/sleep.h | 6 +++--- tests/queries/0_stateless/00956_sensitive_data_masking.sh | 1 + tests/queries/0_stateless/01107_atomic_db_detach_attach.sh | 4 ++-- tests/queries/0_stateless/01114_database_atomic.sh | 6 +++--- .../queries/0_stateless/01192_rename_database_zookeeper.sh | 4 ++-- tests/queries/0_stateless/01238_http_memory_tracking.sh | 2 +- tests/queries/0_stateless/01246_buffer_flush.sql | 2 ++ tests/queries/0_stateless/01338_long_select_and_alter.sh | 2 +- .../0_stateless/01338_long_select_and_alter_zookeeper.sh | 2 +- .../01532_execute_merges_on_single_replica_long.sql | 1 + .../01715_background_checker_blather_zookeeper_long.sql | 1 + .../01737_clickhouse_server_wait_server_pool_long.sh | 2 +- 12 files changed, 19 insertions(+), 14 deletions(-) diff --git a/src/Functions/sleep.h b/src/Functions/sleep.h index db4f0e7dd3e..fba8293e5ff 100644 --- a/src/Functions/sleep.h +++ b/src/Functions/sleep.h @@ -113,7 +113,7 @@ public: { /// When sleeping, the query cannot be cancelled. For ability to cancel query, we limit sleep time. if (max_microseconds && seconds * 1e6 > max_microseconds) - throw Exception(ErrorCodes::TOO_SLOW, "The maximum sleep time is 3 seconds. Requested: {}", toString(seconds)); + throw Exception(ErrorCodes::TOO_SLOW, "The maximum sleep time is {} microseconds. Requested: {}", max_microseconds, seconds); if (!dry_run) { @@ -122,8 +122,8 @@ public: if (max_microseconds && microseconds > max_microseconds) throw Exception(ErrorCodes::TOO_SLOW, - "The maximum sleep time is 3 seconds. Requested: {} microseconds per block (of size {})", - microseconds, size); + "The maximum sleep time is {} microseconds. Requested: {} microseconds per block (of size {})", + max_microseconds, microseconds, size); sleepForMicroseconds(microseconds); ProfileEvents::increment(ProfileEvents::SleepFunctionCalls, count); diff --git a/tests/queries/0_stateless/00956_sensitive_data_masking.sh b/tests/queries/0_stateless/00956_sensitive_data_masking.sh index ccd9bbcf10e..a31a71ce381 100755 --- a/tests/queries/0_stateless/00956_sensitive_data_masking.sh +++ b/tests/queries/0_stateless/00956_sensitive_data_masking.sh @@ -65,6 +65,7 @@ echo 5 # run in background rm -f "$tmp_file2" >/dev/null 2>&1 bash -c "$CLICKHOUSE_CLIENT \ + --function_sleep_max_microseconds_per_block 60 \ --query=\"select sleepEachRow(1) from numbers(10) where ignore('find_me_TOPSECRET=TOPSECRET')=0 and ignore('fwerkh_that_magic_string_make_me_unique') = 0 FORMAT Null\" \ --log_queries=1 --ignore-error --multiquery |& grep -v '^(query: ' > $tmp_file2" & diff --git a/tests/queries/0_stateless/01107_atomic_db_detach_attach.sh b/tests/queries/0_stateless/01107_atomic_db_detach_attach.sh index e4dad56bc29..e2a23258584 100755 --- a/tests/queries/0_stateless/01107_atomic_db_detach_attach.sh +++ b/tests/queries/0_stateless/01107_atomic_db_detach_attach.sh @@ -9,7 +9,7 @@ $CLICKHOUSE_CLIENT -q "DROP DATABASE IF EXISTS test_01107" $CLICKHOUSE_CLIENT -q "CREATE DATABASE test_01107 ENGINE=Atomic" $CLICKHOUSE_CLIENT -q "CREATE TABLE test_01107.mt (n UInt64) ENGINE=MergeTree() ORDER BY tuple()" -$CLICKHOUSE_CLIENT -q "INSERT INTO test_01107.mt SELECT number + sleepEachRow(3) FROM numbers(5)" & +$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 60 -q "INSERT INTO test_01107.mt SELECT number + sleepEachRow(3) FROM numbers(5)" & sleep 1 $CLICKHOUSE_CLIENT -q "DETACH TABLE test_01107.mt" --database_atomic_wait_for_drop_and_detach_synchronously=0 @@ -23,7 +23,7 @@ $CLICKHOUSE_CLIENT -q "DETACH DATABASE test_01107" --database_atomic_wait_for_dr $CLICKHOUSE_CLIENT -q "ATTACH DATABASE test_01107" $CLICKHOUSE_CLIENT -q "SELECT count(n), sum(n) FROM test_01107.mt" -$CLICKHOUSE_CLIENT -q "INSERT INTO test_01107.mt SELECT number + sleepEachRow(1) FROM numbers(5)" && echo "end" & +$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 60 -q "INSERT INTO test_01107.mt SELECT number + sleepEachRow(1) FROM numbers(5)" && echo "end" & sleep 1 $CLICKHOUSE_CLIENT -q "DROP DATABASE test_01107" --database_atomic_wait_for_drop_and_detach_synchronously=0 && sleep 1 && echo "dropped" wait diff --git a/tests/queries/0_stateless/01114_database_atomic.sh b/tests/queries/0_stateless/01114_database_atomic.sh index 4a3d35e48b7..634b19a7624 100755 --- a/tests/queries/0_stateless/01114_database_atomic.sh +++ b/tests/queries/0_stateless/01114_database_atomic.sh @@ -49,8 +49,8 @@ $CLICKHOUSE_CLIENT --show_table_uuid_in_table_create_query_if_not_nil=1 -q "SHOW $CLICKHOUSE_CLIENT -q "SELECT name, uuid, create_table_query FROM system.tables WHERE database='test_01114_2'" | sed "s/$explicit_uuid/00001114-0000-4000-8000-000000000002/g" -$CLICKHOUSE_CLIENT -q "SELECT count(col), sum(col) FROM (SELECT n + sleepEachRow(1.5) AS col FROM test_01114_1.mt)" & # 33s (1.5s * 22 rows per partition), result: 110, 5995 -$CLICKHOUSE_CLIENT -q "INSERT INTO test_01114_2.mt SELECT number + sleepEachRow(1.5) FROM numbers(30)" & # 45s (1.5s * 30 rows) +$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 60 -q "SELECT count(col), sum(col) FROM (SELECT n + sleepEachRow(1.5) AS col FROM test_01114_1.mt)" & # 33s (1.5s * 22 rows per partition), result: 110, 5995 +$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 60 -q "INSERT INTO test_01114_2.mt SELECT number + sleepEachRow(1.5) FROM numbers(30)" & # 45s (1.5s * 30 rows) sleep 1 # SELECT and INSERT should start before the following RENAMEs $CLICKHOUSE_CLIENT -nm -q " @@ -74,7 +74,7 @@ INSERT INTO test_01114_1.mt SELECT 's' || toString(number) FROM numbers(5); SELECT count() FROM test_01114_1.mt " # result: 5 -$CLICKHOUSE_CLIENT -q "SELECT tuple(s, sleepEachRow(3)) FROM test_01114_1.mt" > /dev/null & # 15s (3s * 5 rows) +$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 60 -q "SELECT tuple(s, sleepEachRow(3)) FROM test_01114_1.mt" > /dev/null & # 15s (3s * 5 rows) sleep 1 $CLICKHOUSE_CLIENT -q "DROP DATABASE test_01114_1" --database_atomic_wait_for_drop_and_detach_synchronously=0 && echo "dropped" diff --git a/tests/queries/0_stateless/01192_rename_database_zookeeper.sh b/tests/queries/0_stateless/01192_rename_database_zookeeper.sh index dec1276111a..ac516e83c84 100755 --- a/tests/queries/0_stateless/01192_rename_database_zookeeper.sh +++ b/tests/queries/0_stateless/01192_rename_database_zookeeper.sh @@ -20,7 +20,7 @@ $CLICKHOUSE_CLIENT -q "SELECT engine, splitByChar('/', data_path)[-2], uuid, spl # 3. check RENAME don't wait for INSERT $CLICKHOUSE_CLIENT -q "CREATE TABLE test_01192.mt (n UInt64) ENGINE=MergeTree ORDER BY n" -$CLICKHOUSE_CLIENT -q "INSERT INTO test_01192.mt SELECT number + sleepEachRow(1.5) FROM numbers(10)" && echo "inserted" & +$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 15 -q "INSERT INTO test_01192.mt SELECT number + sleepEachRow(1.5) FROM numbers(10)" && echo "inserted" & sleep 1 $CLICKHOUSE_CLIENT -q "RENAME DATABASE test_01192 TO default" 2>&1| grep -F "already exists" > /dev/null && echo "ok" @@ -60,7 +60,7 @@ $CLICKHOUSE_CLIENT -q "SELECT database, name, status, origin FROM system.diction $CLICKHOUSE_CLIENT -q "SELECT dictGet('test_01192_atomic.dict', '_part', toUInt64(1))" # 8. check RENAME don't wait for INSERT -$CLICKHOUSE_CLIENT -q "INSERT INTO test_01192_atomic.mt SELECT number + sleepEachRow(1) + 10 FROM numbers(10)" && echo "inserted" & +$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 10 -q "INSERT INTO test_01192_atomic.mt SELECT number + sleepEachRow(1) + 10 FROM numbers(10)" && echo "inserted" & sleep 1 $CLICKHOUSE_CLIENT --check_table_dependencies=0 -q "RENAME DATABASE test_01192 TO test_01192_renamed" 2>&1| grep -F "not supported" > /dev/null && echo "ok" diff --git a/tests/queries/0_stateless/01238_http_memory_tracking.sh b/tests/queries/0_stateless/01238_http_memory_tracking.sh index 9b0fe875416..eb42159ce15 100755 --- a/tests/queries/0_stateless/01238_http_memory_tracking.sh +++ b/tests/queries/0_stateless/01238_http_memory_tracking.sh @@ -10,7 +10,7 @@ set -o pipefail # This is needed to keep at least one running query for user for the time of test. # (1k http queries takes ~1 second, let's run for 5x more to avoid flaps) -${CLICKHOUSE_CLIENT} --format Null -n <<<'SELECT sleepEachRow(1) FROM numbers(5)' & +${CLICKHOUSE_CLIENT} --function_sleep_max_microseconds_per_block 5 --format Null -n <<<'SELECT sleepEachRow(1) FROM numbers(5)' & # ignore "yes: standard output: Broken pipe" yes 'SELECT 1' 2>/dev/null | { diff --git a/tests/queries/0_stateless/01246_buffer_flush.sql b/tests/queries/0_stateless/01246_buffer_flush.sql index ac507d94b69..36bcaae383f 100644 --- a/tests/queries/0_stateless/01246_buffer_flush.sql +++ b/tests/queries/0_stateless/01246_buffer_flush.sql @@ -1,5 +1,7 @@ -- Tags: no-fasttest +SET function_sleep_max_microseconds_per_block = 4000000; + drop table if exists data_01256; drop table if exists buffer_01256; diff --git a/tests/queries/0_stateless/01338_long_select_and_alter.sh b/tests/queries/0_stateless/01338_long_select_and_alter.sh index 2e3080e9cfc..04a10cfe55e 100755 --- a/tests/queries/0_stateless/01338_long_select_and_alter.sh +++ b/tests/queries/0_stateless/01338_long_select_and_alter.sh @@ -11,7 +11,7 @@ $CLICKHOUSE_CLIENT --query "CREATE TABLE alter_mt (key UInt64, value String) ENG $CLICKHOUSE_CLIENT --query "INSERT INTO alter_mt SELECT number, toString(number) FROM numbers(5)" -$CLICKHOUSE_CLIENT --query "SELECT count(distinct concat(value, '_')) FROM alter_mt WHERE not sleepEachRow(2)" & +$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 10 --query "SELECT count(distinct concat(value, '_')) FROM alter_mt WHERE not sleepEachRow(2)" & # to be sure that select took all required locks sleep 2 diff --git a/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.sh b/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.sh index 12bc3b09472..829352110f6 100755 --- a/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.sh +++ b/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.sh @@ -11,7 +11,7 @@ $CLICKHOUSE_CLIENT --query "CREATE TABLE alter_mt (key UInt64, value String) ENG $CLICKHOUSE_CLIENT --query "INSERT INTO alter_mt SELECT number, toString(number) FROM numbers(5)" -$CLICKHOUSE_CLIENT --query "SELECT count(distinct concat(value, '_')) FROM alter_mt WHERE not sleepEachRow(2)" & +$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 10 --query "SELECT count(distinct concat(value, '_')) FROM alter_mt WHERE not sleepEachRow(2)" & # to be sure that select took all required locks sleep 2 diff --git a/tests/queries/0_stateless/01532_execute_merges_on_single_replica_long.sql b/tests/queries/0_stateless/01532_execute_merges_on_single_replica_long.sql index f217b6094b2..d39ffdc4049 100644 --- a/tests/queries/0_stateless/01532_execute_merges_on_single_replica_long.sql +++ b/tests/queries/0_stateless/01532_execute_merges_on_single_replica_long.sql @@ -44,6 +44,7 @@ SYSTEM STOP REPLICATION QUEUES execute_on_single_replica_r2; OPTIMIZE TABLE execute_on_single_replica_r1 FINAL SETTINGS replication_alter_partitions_sync=0; /* if we will check immediately we can find the log entry unchecked */ +SET function_sleep_max_microseconds_per_block = 4000000; SELECT * FROM numbers(4) where sleepEachRow(1); SELECT '****************************'; diff --git a/tests/queries/0_stateless/01715_background_checker_blather_zookeeper_long.sql b/tests/queries/0_stateless/01715_background_checker_blather_zookeeper_long.sql index 87e1a039488..32481be1bcd 100644 --- a/tests/queries/0_stateless/01715_background_checker_blather_zookeeper_long.sql +++ b/tests/queries/0_stateless/01715_background_checker_blather_zookeeper_long.sql @@ -18,6 +18,7 @@ DETACH TABLE i20203_1; ATTACH TABLE i20203_2; -- sleep 10 seconds +SET function_sleep_max_microseconds_per_block = 10000000; SELECT number from numbers(10) where sleepEachRow(1) Format Null; SELECT num_tries < 50 diff --git a/tests/queries/0_stateless/01737_clickhouse_server_wait_server_pool_long.sh b/tests/queries/0_stateless/01737_clickhouse_server_wait_server_pool_long.sh index d83656e0e8c..adab3906e5b 100755 --- a/tests/queries/0_stateless/01737_clickhouse_server_wait_server_pool_long.sh +++ b/tests/queries/0_stateless/01737_clickhouse_server_wait_server_pool_long.sh @@ -54,7 +54,7 @@ if ! $CLICKHOUSE_CLIENT_BINARY --host 127.1 --port "$server_port" --format Null fi query_id="$CLICKHOUSE_DATABASE-$SECONDS" -$CLICKHOUSE_CLIENT_BINARY --query_id "$query_id" --host 127.1 --port "$server_port" --format Null -q 'select sleepEachRow(1) from numbers(10)' 2>/dev/null & +$CLICKHOUSE_CLIENT_BINARY --query_id "$query_id" --host 127.1 --port "$server_port" --format Null --function_sleep_max_microseconds_per_block 0 -q 'select sleepEachRow(1) from numbers(10)' 2>/dev/null & client_pid=$! # wait until the query will appear in processlist (max 10 second) From 95caa02cbc053f672ffa83a6dbe1a96259ea4d25 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 29 Apr 2023 23:28:08 +0200 Subject: [PATCH 007/478] Update test --- ...02494_zero_copy_and_projection_and_mutation_work_together.sql | 1 + .../02572_query_views_log_background_thread.reference | 1 + .../0_stateless/02572_query_views_log_background_thread.sql | 1 + 3 files changed, 3 insertions(+) diff --git a/tests/queries/0_stateless/02494_zero_copy_and_projection_and_mutation_work_together.sql b/tests/queries/0_stateless/02494_zero_copy_and_projection_and_mutation_work_together.sql index 7a51d86dd30..b6ab9b7d0c3 100644 --- a/tests/queries/0_stateless/02494_zero_copy_and_projection_and_mutation_work_together.sql +++ b/tests/queries/0_stateless/02494_zero_copy_and_projection_and_mutation_work_together.sql @@ -70,6 +70,7 @@ SYSTEM SYNC REPLICA wikistat2; -- it doesn't make test flaky, rarely we will not delete the parts because of cleanup thread was slow. -- Such condition will lead to successful queries. +SET function_sleep_max_microseconds_per_block = 5000000; SELECT 0 FROM numbers(5) WHERE sleepEachRow(1) = 1; select sum(hits), count() from wikistat1 GROUP BY project, subproject, path settings allow_experimental_projection_optimization = 1, force_optimize_projection = 1; diff --git a/tests/queries/0_stateless/02572_query_views_log_background_thread.reference b/tests/queries/0_stateless/02572_query_views_log_background_thread.reference index eeba62c5dc8..22dfaf93781 100644 --- a/tests/queries/0_stateless/02572_query_views_log_background_thread.reference +++ b/tests/queries/0_stateless/02572_query_views_log_background_thread.reference @@ -4,6 +4,7 @@ insert into buffer_02572 values (1); select * from data_02572; select * from copy_02572; -- we cannot use OPTIMIZE, this will attach query context, so let's wait +SET function_sleep_max_microseconds_per_block = 6000000; select sleepEachRow(1) from numbers(3*2) format Null; select * from data_02572; 1 diff --git a/tests/queries/0_stateless/02572_query_views_log_background_thread.sql b/tests/queries/0_stateless/02572_query_views_log_background_thread.sql index dc229412b13..939c189c5fe 100644 --- a/tests/queries/0_stateless/02572_query_views_log_background_thread.sql +++ b/tests/queries/0_stateless/02572_query_views_log_background_thread.sql @@ -22,6 +22,7 @@ insert into buffer_02572 values (1); select * from data_02572; select * from copy_02572; -- we cannot use OPTIMIZE, this will attach query context, so let's wait +SET function_sleep_max_microseconds_per_block = 6000000; select sleepEachRow(1) from numbers(3*2) format Null; select * from data_02572; select * from copy_02572; From 748a21b791f5846a4f9f1d49d38fc077c7f9d3d1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 30 Apr 2023 01:44:03 +0200 Subject: [PATCH 008/478] Fix typo --- src/Core/SettingsChangesHistory.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 33010dc6b3b..e0d23d139f3 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -80,7 +80,7 @@ namespace SettingsChangesHistory /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972) static std::map settings_changes_history = { - {"23.5", {{"function_sleep_max_microseconds_per_block", 0, 3000000, "In previous versions, the maximim sleep time of 3 seconds was applied only for `sleep`, but not for `sleepEachRow` function. In the new version, we introduce this setting. If you set compatibility with the previous versions, we will disable the limit altogether."}}}, + {"23.5", {{"function_sleep_max_microseconds_per_block", 0, 3000000, "In previous versions, the maximum sleep time of 3 seconds was applied only for `sleep`, but not for `sleepEachRow` function. In the new version, we introduce this setting. If you set compatibility with the previous versions, we will disable the limit altogether."}}}, {"23.4", {{"allow_suspicious_indices", true, false, "If true, index can defined with identical expressions"}}}, {"23.4", {{"connect_timeout_with_failover_ms", 50, 1000, "Increase default connect timeout because of async connect"}, {"connect_timeout_with_failover_secure_ms", 100, 1000, "Increase default secure connect timeout because of async connect"}, From d3c3d8b8e401d239416f323c69ceb12c67e3c26d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 11 Mar 2023 16:48:40 +0100 Subject: [PATCH 009/478] Remove export of dynamic symbols --- CMakeLists.txt | 10 ++++++++-- programs/library-bridge/CMakeLists.txt | 4 ---- programs/odbc-bridge/CMakeLists.txt | 6 ------ 3 files changed, 8 insertions(+), 12 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 263b202049b..3283ca52ca7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -155,8 +155,14 @@ elseif(GLIBC_COMPATIBILITY) message (${RECONFIGURE_MESSAGE_LEVEL} "Glibc compatibility cannot be enabled in current configuration") endif () -# Make sure the final executable has symbols exported -set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -rdynamic") +if (OS_LINUX) + # We should not export dynamic symbols, because: + # - The main clickhouse binary does not use dlopen, + # and whatever is poisoning it by LD_PRELOAD should not link to our symbols. + # - The clickhouse-odbc-bridge and clickhouse-library-bridge binaries + # should not expose their symbols to ODBC drivers and libraries. + set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-export-dynamic") +endif () if (OS_DARWIN) # The `-all_load` flag forces loading of all symbols from all libraries, diff --git a/programs/library-bridge/CMakeLists.txt b/programs/library-bridge/CMakeLists.txt index 1cacc391ca5..dd0bf67cb64 100644 --- a/programs/library-bridge/CMakeLists.txt +++ b/programs/library-bridge/CMakeLists.txt @@ -13,10 +13,6 @@ set (CLICKHOUSE_LIBRARY_BRIDGE_SOURCES library-bridge.cpp ) -if (OS_LINUX) - set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-export-dynamic") -endif () - clickhouse_add_executable(clickhouse-library-bridge ${CLICKHOUSE_LIBRARY_BRIDGE_SOURCES}) target_link_libraries(clickhouse-library-bridge PRIVATE diff --git a/programs/odbc-bridge/CMakeLists.txt b/programs/odbc-bridge/CMakeLists.txt index 118610e4dcd..56373601b95 100644 --- a/programs/odbc-bridge/CMakeLists.txt +++ b/programs/odbc-bridge/CMakeLists.txt @@ -15,12 +15,6 @@ set (CLICKHOUSE_ODBC_BRIDGE_SOURCES validateODBCConnectionString.cpp ) -if (OS_LINUX) - # clickhouse-odbc-bridge is always a separate binary. - # Reason: it must not export symbols from SSL, mariadb-client, etc. to not break ABI compatibility with ODBC drivers. - set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-export-dynamic") -endif () - clickhouse_add_executable(clickhouse-odbc-bridge ${CLICKHOUSE_ODBC_BRIDGE_SOURCES}) target_link_libraries(clickhouse-odbc-bridge PRIVATE From 5a3281bb4912dce6a6125681b0804b97653da763 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 11 Mar 2023 19:30:03 +0100 Subject: [PATCH 010/478] Remove unused code --- src/Common/getResource.cpp | 35 ----------------------------------- 1 file changed, 35 deletions(-) diff --git a/src/Common/getResource.cpp b/src/Common/getResource.cpp index fe603fcc550..09777640dd9 100644 --- a/src/Common/getResource.cpp +++ b/src/Common/getResource.cpp @@ -1,6 +1,4 @@ #include "getResource.h" -#include -#include #include #include @@ -14,39 +12,6 @@ std::string_view getResource(std::string_view name) std::replace(name_replaced.begin(), name_replaced.end(), '.', '_'); boost::replace_all(name_replaced, "+", "_PLUS_"); -#if defined USE_MUSL /// If static linking is used, we cannot use dlsym and have to parse ELF symbol table by ourself. return DB::SymbolIndex::instance()->getResource(name_replaced); - -#else - // In most `dlsym(3)` APIs, one passes the symbol name as it appears via - // something like `nm` or `objdump -t`. For example, a symbol `_foo` would be - // looked up with the string `"_foo"`. - // - // Apple's linker is confusingly different. The NOTES on the man page for - // `dlsym(3)` claim that one looks up the symbol with "the name used in C - // source code". In this example, that would mean using the string `"foo"`. - // This apparently applies even in the case where the symbol did not originate - // from C source, such as the embedded binary resource files used here. So - // the symbol name must not have a leading `_` on Apple platforms. It's not - // clear how this applies to other symbols, such as those which _have_ a leading - // underscore in them by design, many leading underscores, etc. -#if defined OS_DARWIN - std::string prefix = "binary_"; -#else - std::string prefix = "_binary_"; -#endif - std::string symbol_name_start = prefix + name_replaced + "_start"; - std::string symbol_name_end = prefix + name_replaced + "_end"; - - const char * sym_start = reinterpret_cast(dlsym(RTLD_DEFAULT, symbol_name_start.c_str())); - const char * sym_end = reinterpret_cast(dlsym(RTLD_DEFAULT, symbol_name_end.c_str())); - - if (sym_start && sym_end) - { - auto resource_size = static_cast(std::distance(sym_start, sym_end)); - return { sym_start, resource_size }; - } - return {}; -#endif } From 8cd9fc4a2d6936343c2be119d8fdd61986cdd77f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 11 Mar 2023 21:20:20 +0100 Subject: [PATCH 011/478] Fix build --- docker/packager/binary/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh index 2cd0a011013..c7f31e13287 100755 --- a/docker/packager/binary/build.sh +++ b/docker/packager/binary/build.sh @@ -62,7 +62,7 @@ then ninja $NINJA_FLAGS clickhouse-keeper ls -la ./programs/ - ldd ./programs/clickhouse-keeper + ldd ./programs/clickhouse-keeper ||: if [ -n "$MAKE_DEB" ]; then # No quotes because I want it to expand to nothing if empty. From 03845ba9c5fb1ccb03330059369641751f1c9ab1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 7 May 2023 00:16:23 +0200 Subject: [PATCH 012/478] Fix MSan. --- src/Common/SymbolIndex.cpp | 81 +++++++++++++++++++++++--------------- 1 file changed, 50 insertions(+), 31 deletions(-) diff --git a/src/Common/SymbolIndex.cpp b/src/Common/SymbolIndex.cpp index f1cace5017c..79f97e93a2f 100644 --- a/src/Common/SymbolIndex.cpp +++ b/src/Common/SymbolIndex.cpp @@ -9,7 +9,6 @@ #include -//#include #include #include @@ -63,9 +62,11 @@ Otherwise you will get only exported symbols from program headers. #endif #define __msan_unpoison_string(X) // NOLINT +#define __msan_unpoison(X, Y) // NOLINT #if defined(ch_has_feature) # if ch_has_feature(memory_sanitizer) # undef __msan_unpoison_string +# undef __msan_unpoison # include # endif #endif @@ -136,10 +137,12 @@ void collectSymbolsFromProgramHeaders( /* Iterate over all headers of the current shared lib * (first call is for the executable itself) */ + __msan_unpoison(&info->dlpi_phnum, sizeof(info->dlpi_phnum)); for (size_t header_index = 0; header_index < info->dlpi_phnum; ++header_index) { /* Further processing is only needed if the dynamic section is reached */ + __msan_unpoison(&info->dlpi_phdr[header_index], sizeof(info->dlpi_phdr[header_index])); if (info->dlpi_phdr[header_index].p_type != PT_DYNAMIC) continue; @@ -160,44 +163,53 @@ void collectSymbolsFromProgramHeaders( */ size_t sym_cnt = 0; - for (const auto * it = dyn_begin; it->d_tag != DT_NULL; ++it) { - ElfW(Addr) base_address = correct_address(info->dlpi_addr, it->d_un.d_ptr); - - // TODO: this branch leads to invalid address of the hash table. Need further investigation. - // if (it->d_tag == DT_HASH) - // { - // const ElfW(Word) * hash = reinterpret_cast(base_address); - // sym_cnt = hash[1]; - // break; - // } - if (it->d_tag == DT_GNU_HASH) + const auto * it = dyn_begin; + while (true) { - /// This code based on Musl-libc. + __msan_unpoison(it, sizeof(*it)); + if (it->d_tag != DT_NULL) + break; - const uint32_t * buckets = nullptr; - const uint32_t * hashval = nullptr; + ElfW(Addr) base_address = correct_address(info->dlpi_addr, it->d_un.d_ptr); - const ElfW(Word) * hash = reinterpret_cast(base_address); - - buckets = hash + 4 + (hash[2] * sizeof(size_t) / 4); - - for (ElfW(Word) i = 0; i < hash[0]; ++i) - if (buckets[i] > sym_cnt) - sym_cnt = buckets[i]; - - if (sym_cnt) + if (it->d_tag == DT_GNU_HASH) { - sym_cnt -= hash[1]; - hashval = buckets + hash[0] + sym_cnt; - do + /// This code based on Musl-libc. + + const uint32_t * buckets = nullptr; + const uint32_t * hashval = nullptr; + + const ElfW(Word) * hash = reinterpret_cast(base_address); + + __msan_unpoison(&hash[0], sizeof(*hash)); + __msan_unpoison(&hash[1], sizeof(*hash)); + __msan_unpoison(&hash[2], sizeof(*hash)); + + buckets = hash + 4 + (hash[2] * sizeof(size_t) / 4); + + __msan_unpoison(buckets, hash[0] * sizeof(buckets[0])); + + for (ElfW(Word) i = 0; i < hash[0]; ++i) + if (buckets[i] > sym_cnt) + sym_cnt = buckets[i]; + + if (sym_cnt) { - ++sym_cnt; + sym_cnt -= hash[1]; + hashval = buckets + hash[0] + sym_cnt; + __msan_unpoison(&hashval, sizeof(hashval)); + do + { + ++sym_cnt; + } + while (!(*hashval++ & 1)); } - while (!(*hashval++ & 1)); + + break; } - break; + ++it; } } @@ -228,6 +240,8 @@ void collectSymbolsFromProgramHeaders( /* Get the pointer to the first entry of the symbol table */ const ElfW(Sym) * elf_sym = reinterpret_cast(base_address); + __msan_unpoison(elf_sym, sym_cnt * sizeof(*elf_sym)); + /* Iterate over the symbol table */ for (ElfW(Word) sym_index = 0; sym_index < ElfW(Word)(sym_cnt); ++sym_index) { @@ -235,6 +249,7 @@ void collectSymbolsFromProgramHeaders( * This is located at the address of st_name relative to the beginning of the string table. */ const char * sym_name = &strtab[elf_sym[sym_index].st_name]; + __msan_unpoison_string(sym_name); if (!sym_name) continue; @@ -264,13 +279,17 @@ void collectSymbolsFromProgramHeaders( #if !defined USE_MUSL String getBuildIDFromProgramHeaders(dl_phdr_info * info) { + __msan_unpoison(&info->dlpi_phnum, sizeof(info->dlpi_phnum)); for (size_t header_index = 0; header_index < info->dlpi_phnum; ++header_index) { const ElfPhdr & phdr = info->dlpi_phdr[header_index]; + __msan_unpoison(&phdr, sizeof(phdr)); if (phdr.p_type != PT_NOTE) continue; - return Elf::getBuildID(reinterpret_cast(info->dlpi_addr + phdr.p_vaddr), phdr.p_memsz); + std::string_view view(reinterpret_cast(info->dlpi_addr + phdr.p_vaddr), phdr.p_memsz); + __msan_unpoison(view.data(), view.size()); + return Elf::getBuildID(view.data(), view.size()); } return {}; } From 9e513a147b5ca7ca0b75feec5488093f32df77d1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 7 May 2023 04:22:01 +0200 Subject: [PATCH 013/478] Fixup --- src/Common/SymbolIndex.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Common/SymbolIndex.cpp b/src/Common/SymbolIndex.cpp index 79f97e93a2f..394ae1a0592 100644 --- a/src/Common/SymbolIndex.cpp +++ b/src/Common/SymbolIndex.cpp @@ -138,6 +138,7 @@ void collectSymbolsFromProgramHeaders( * (first call is for the executable itself) */ __msan_unpoison(&info->dlpi_phnum, sizeof(info->dlpi_phnum)); + __msan_unpoison(&info->dlpi_phdr, sizeof(info->dlpi_phdr)); for (size_t header_index = 0; header_index < info->dlpi_phnum; ++header_index) { /* Further processing is only needed if the dynamic section is reached @@ -280,6 +281,7 @@ void collectSymbolsFromProgramHeaders( String getBuildIDFromProgramHeaders(dl_phdr_info * info) { __msan_unpoison(&info->dlpi_phnum, sizeof(info->dlpi_phnum)); + __msan_unpoison(&info->dlpi_phdr, sizeof(info->dlpi_phdr)); for (size_t header_index = 0; header_index < info->dlpi_phnum; ++header_index) { const ElfPhdr & phdr = info->dlpi_phdr[header_index]; From 726222f1ea69018115642156a06c64ec546244d0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 7 May 2023 19:33:11 +0200 Subject: [PATCH 014/478] Fix tests --- tests/queries/0_stateless/00956_sensitive_data_masking.sh | 2 +- tests/queries/0_stateless/01107_atomic_db_detach_attach.sh | 4 ++-- tests/queries/0_stateless/01114_database_atomic.sh | 6 +++--- .../queries/0_stateless/01192_rename_database_zookeeper.sh | 4 ++-- tests/queries/0_stateless/01238_http_memory_tracking.sh | 2 +- tests/queries/0_stateless/01338_long_select_and_alter.sh | 2 +- .../0_stateless/01338_long_select_and_alter_zookeeper.sh | 2 +- 7 files changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/queries/0_stateless/00956_sensitive_data_masking.sh b/tests/queries/0_stateless/00956_sensitive_data_masking.sh index a31a71ce381..926557e4ba6 100755 --- a/tests/queries/0_stateless/00956_sensitive_data_masking.sh +++ b/tests/queries/0_stateless/00956_sensitive_data_masking.sh @@ -65,7 +65,7 @@ echo 5 # run in background rm -f "$tmp_file2" >/dev/null 2>&1 bash -c "$CLICKHOUSE_CLIENT \ - --function_sleep_max_microseconds_per_block 60 \ + --function_sleep_max_microseconds_per_block 60000000 \ --query=\"select sleepEachRow(1) from numbers(10) where ignore('find_me_TOPSECRET=TOPSECRET')=0 and ignore('fwerkh_that_magic_string_make_me_unique') = 0 FORMAT Null\" \ --log_queries=1 --ignore-error --multiquery |& grep -v '^(query: ' > $tmp_file2" & diff --git a/tests/queries/0_stateless/01107_atomic_db_detach_attach.sh b/tests/queries/0_stateless/01107_atomic_db_detach_attach.sh index e2a23258584..bcaa70abbb5 100755 --- a/tests/queries/0_stateless/01107_atomic_db_detach_attach.sh +++ b/tests/queries/0_stateless/01107_atomic_db_detach_attach.sh @@ -9,7 +9,7 @@ $CLICKHOUSE_CLIENT -q "DROP DATABASE IF EXISTS test_01107" $CLICKHOUSE_CLIENT -q "CREATE DATABASE test_01107 ENGINE=Atomic" $CLICKHOUSE_CLIENT -q "CREATE TABLE test_01107.mt (n UInt64) ENGINE=MergeTree() ORDER BY tuple()" -$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 60 -q "INSERT INTO test_01107.mt SELECT number + sleepEachRow(3) FROM numbers(5)" & +$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 60000000 -q "INSERT INTO test_01107.mt SELECT number + sleepEachRow(3) FROM numbers(5)" & sleep 1 $CLICKHOUSE_CLIENT -q "DETACH TABLE test_01107.mt" --database_atomic_wait_for_drop_and_detach_synchronously=0 @@ -23,7 +23,7 @@ $CLICKHOUSE_CLIENT -q "DETACH DATABASE test_01107" --database_atomic_wait_for_dr $CLICKHOUSE_CLIENT -q "ATTACH DATABASE test_01107" $CLICKHOUSE_CLIENT -q "SELECT count(n), sum(n) FROM test_01107.mt" -$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 60 -q "INSERT INTO test_01107.mt SELECT number + sleepEachRow(1) FROM numbers(5)" && echo "end" & +$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 60000000 -q "INSERT INTO test_01107.mt SELECT number + sleepEachRow(1) FROM numbers(5)" && echo "end" & sleep 1 $CLICKHOUSE_CLIENT -q "DROP DATABASE test_01107" --database_atomic_wait_for_drop_and_detach_synchronously=0 && sleep 1 && echo "dropped" wait diff --git a/tests/queries/0_stateless/01114_database_atomic.sh b/tests/queries/0_stateless/01114_database_atomic.sh index 634b19a7624..decbe136fc4 100755 --- a/tests/queries/0_stateless/01114_database_atomic.sh +++ b/tests/queries/0_stateless/01114_database_atomic.sh @@ -49,8 +49,8 @@ $CLICKHOUSE_CLIENT --show_table_uuid_in_table_create_query_if_not_nil=1 -q "SHOW $CLICKHOUSE_CLIENT -q "SELECT name, uuid, create_table_query FROM system.tables WHERE database='test_01114_2'" | sed "s/$explicit_uuid/00001114-0000-4000-8000-000000000002/g" -$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 60 -q "SELECT count(col), sum(col) FROM (SELECT n + sleepEachRow(1.5) AS col FROM test_01114_1.mt)" & # 33s (1.5s * 22 rows per partition), result: 110, 5995 -$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 60 -q "INSERT INTO test_01114_2.mt SELECT number + sleepEachRow(1.5) FROM numbers(30)" & # 45s (1.5s * 30 rows) +$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 60000000 -q "SELECT count(col), sum(col) FROM (SELECT n + sleepEachRow(1.5) AS col FROM test_01114_1.mt)" & # 33s (1.5s * 22 rows per partition), result: 110, 5995 +$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 60000000 -q "INSERT INTO test_01114_2.mt SELECT number + sleepEachRow(1.5) FROM numbers(30)" & # 45s (1.5s * 30 rows) sleep 1 # SELECT and INSERT should start before the following RENAMEs $CLICKHOUSE_CLIENT -nm -q " @@ -74,7 +74,7 @@ INSERT INTO test_01114_1.mt SELECT 's' || toString(number) FROM numbers(5); SELECT count() FROM test_01114_1.mt " # result: 5 -$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 60 -q "SELECT tuple(s, sleepEachRow(3)) FROM test_01114_1.mt" > /dev/null & # 15s (3s * 5 rows) +$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 60000000 -q "SELECT tuple(s, sleepEachRow(3)) FROM test_01114_1.mt" > /dev/null & # 15s (3s * 5 rows) sleep 1 $CLICKHOUSE_CLIENT -q "DROP DATABASE test_01114_1" --database_atomic_wait_for_drop_and_detach_synchronously=0 && echo "dropped" diff --git a/tests/queries/0_stateless/01192_rename_database_zookeeper.sh b/tests/queries/0_stateless/01192_rename_database_zookeeper.sh index ac516e83c84..6dd7ff3cdc8 100755 --- a/tests/queries/0_stateless/01192_rename_database_zookeeper.sh +++ b/tests/queries/0_stateless/01192_rename_database_zookeeper.sh @@ -20,7 +20,7 @@ $CLICKHOUSE_CLIENT -q "SELECT engine, splitByChar('/', data_path)[-2], uuid, spl # 3. check RENAME don't wait for INSERT $CLICKHOUSE_CLIENT -q "CREATE TABLE test_01192.mt (n UInt64) ENGINE=MergeTree ORDER BY n" -$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 15 -q "INSERT INTO test_01192.mt SELECT number + sleepEachRow(1.5) FROM numbers(10)" && echo "inserted" & +$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 15000000 -q "INSERT INTO test_01192.mt SELECT number + sleepEachRow(1.5) FROM numbers(10)" && echo "inserted" & sleep 1 $CLICKHOUSE_CLIENT -q "RENAME DATABASE test_01192 TO default" 2>&1| grep -F "already exists" > /dev/null && echo "ok" @@ -60,7 +60,7 @@ $CLICKHOUSE_CLIENT -q "SELECT database, name, status, origin FROM system.diction $CLICKHOUSE_CLIENT -q "SELECT dictGet('test_01192_atomic.dict', '_part', toUInt64(1))" # 8. check RENAME don't wait for INSERT -$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 10 -q "INSERT INTO test_01192_atomic.mt SELECT number + sleepEachRow(1) + 10 FROM numbers(10)" && echo "inserted" & +$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 10000000 -q "INSERT INTO test_01192_atomic.mt SELECT number + sleepEachRow(1) + 10 FROM numbers(10)" && echo "inserted" & sleep 1 $CLICKHOUSE_CLIENT --check_table_dependencies=0 -q "RENAME DATABASE test_01192 TO test_01192_renamed" 2>&1| grep -F "not supported" > /dev/null && echo "ok" diff --git a/tests/queries/0_stateless/01238_http_memory_tracking.sh b/tests/queries/0_stateless/01238_http_memory_tracking.sh index eb42159ce15..26d3dd8acd4 100755 --- a/tests/queries/0_stateless/01238_http_memory_tracking.sh +++ b/tests/queries/0_stateless/01238_http_memory_tracking.sh @@ -10,7 +10,7 @@ set -o pipefail # This is needed to keep at least one running query for user for the time of test. # (1k http queries takes ~1 second, let's run for 5x more to avoid flaps) -${CLICKHOUSE_CLIENT} --function_sleep_max_microseconds_per_block 5 --format Null -n <<<'SELECT sleepEachRow(1) FROM numbers(5)' & +${CLICKHOUSE_CLIENT} --function_sleep_max_microseconds_per_block 5000000 --format Null -n <<<'SELECT sleepEachRow(1) FROM numbers(5)' & # ignore "yes: standard output: Broken pipe" yes 'SELECT 1' 2>/dev/null | { diff --git a/tests/queries/0_stateless/01338_long_select_and_alter.sh b/tests/queries/0_stateless/01338_long_select_and_alter.sh index 04a10cfe55e..fcdfa2dec82 100755 --- a/tests/queries/0_stateless/01338_long_select_and_alter.sh +++ b/tests/queries/0_stateless/01338_long_select_and_alter.sh @@ -11,7 +11,7 @@ $CLICKHOUSE_CLIENT --query "CREATE TABLE alter_mt (key UInt64, value String) ENG $CLICKHOUSE_CLIENT --query "INSERT INTO alter_mt SELECT number, toString(number) FROM numbers(5)" -$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 10 --query "SELECT count(distinct concat(value, '_')) FROM alter_mt WHERE not sleepEachRow(2)" & +$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 10000000 --query "SELECT count(distinct concat(value, '_')) FROM alter_mt WHERE not sleepEachRow(2)" & # to be sure that select took all required locks sleep 2 diff --git a/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.sh b/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.sh index 829352110f6..50ade3fad45 100755 --- a/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.sh +++ b/tests/queries/0_stateless/01338_long_select_and_alter_zookeeper.sh @@ -11,7 +11,7 @@ $CLICKHOUSE_CLIENT --query "CREATE TABLE alter_mt (key UInt64, value String) ENG $CLICKHOUSE_CLIENT --query "INSERT INTO alter_mt SELECT number, toString(number) FROM numbers(5)" -$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 10 --query "SELECT count(distinct concat(value, '_')) FROM alter_mt WHERE not sleepEachRow(2)" & +$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 10000000 --query "SELECT count(distinct concat(value, '_')) FROM alter_mt WHERE not sleepEachRow(2)" & # to be sure that select took all required locks sleep 2 From e159ee84e918c587f873a27665ca346cb3b4f7db Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 7 May 2023 19:38:30 +0200 Subject: [PATCH 015/478] Fix tests --- .../0_stateless/01098_temporary_and_external_tables.sh | 2 +- .../01532_execute_merges_on_single_replica_long.sql | 2 +- tests/queries/0_stateless/02473_optimize_old_parts.sh | 2 +- tests/queries/0_stateless/02530_dictionaries_update_field.sh | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/queries/0_stateless/01098_temporary_and_external_tables.sh b/tests/queries/0_stateless/01098_temporary_and_external_tables.sh index 860529a26e5..9ed78fd9f81 100755 --- a/tests/queries/0_stateless/01098_temporary_and_external_tables.sh +++ b/tests/queries/0_stateless/01098_temporary_and_external_tables.sh @@ -25,7 +25,7 @@ echo "SELECT COUNT() FROM $internal_table_name" | ${CLICKHOUSE_CURL} -m 60 -sSgk echo -ne '0\n1\n' | ${CLICKHOUSE_CURL} -m 30 -sSkF 'file=@-' "$url&file_format=CSV&file_types=UInt64&query=SELECT+sum((number+GLOBAL+IN+(SELECT+number+AS+n+FROM+remote('127.0.0.2',+numbers(5))+WHERE+n+GLOBAL+IN+(SELECT+*+FROM+tmp_table)+AND+n+GLOBAL+NOT+IN+(SELECT+*+FROM+file)+))+AS+res),+sum(number*res)+FROM+remote('127.0.0.2',+numbers(10))" -echo -ne '0\n1\n' | ${CLICKHOUSE_CURL} -m 30 -sSkF 'file=@-' "$url&file_format=CSV&file_types=UInt64&query=SELECT+_1%2BsleepEachRow(3)+FROM+file" & +echo -ne '0\n1\n' | ${CLICKHOUSE_CURL} -m 30 -sSkF 'file=@-' "$url&function_sleep_max_microseconds_per_block=0&file_format=CSV&file_types=UInt64&query=SELECT+_1%2BsleepEachRow(3)+FROM+file" & wait ${CLICKHOUSE_CURL} -m 30 -sSk "$url" --data "DROP TEMPORARY TABLE tmp_table" diff --git a/tests/queries/0_stateless/01532_execute_merges_on_single_replica_long.sql b/tests/queries/0_stateless/01532_execute_merges_on_single_replica_long.sql index 4bd5e79d1b3..30beb29251e 100644 --- a/tests/queries/0_stateless/01532_execute_merges_on_single_replica_long.sql +++ b/tests/queries/0_stateless/01532_execute_merges_on_single_replica_long.sql @@ -44,7 +44,7 @@ SYSTEM STOP REPLICATION QUEUES execute_on_single_replica_r2; OPTIMIZE TABLE execute_on_single_replica_r1 FINAL SETTINGS replication_alter_partitions_sync=0; /* if we will check immediately we can find the log entry unchecked */ -SET function_sleep_max_microseconds_per_block = 4000000; +SET function_sleep_max_microseconds_per_block = 10000000; SELECT * FROM numbers(4) where sleepEachRow(1); SELECT '****************************'; diff --git a/tests/queries/0_stateless/02473_optimize_old_parts.sh b/tests/queries/0_stateless/02473_optimize_old_parts.sh index 0c2dd04d024..b563bc31b39 100755 --- a/tests/queries/0_stateless/02473_optimize_old_parts.sh +++ b/tests/queries/0_stateless/02473_optimize_old_parts.sh @@ -61,7 +61,7 @@ INSERT INTO test_with_merge SELECT 3;" wait_for_number_of_parts 'test_with_merge' 1 100 $CLICKHOUSE_CLIENT -nmq " -SELECT sleepEachRow(1) FROM numbers(9) FORMAT Null; -- Sleep for 9 seconds and verify that we keep the old part because it's the only one +SELECT sleepEachRow(1) FROM numbers(9) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; -- Sleep for 9 seconds and verify that we keep the old part because it's the only one SELECT (now() - modification_time) > 5 FROM system.parts WHERE database = currentDatabase() AND table='test_with_merge' AND active; DROP TABLE test_with_merge;" diff --git a/tests/queries/0_stateless/02530_dictionaries_update_field.sh b/tests/queries/0_stateless/02530_dictionaries_update_field.sh index 569466fe606..44000e5d2cd 100755 --- a/tests/queries/0_stateless/02530_dictionaries_update_field.sh +++ b/tests/queries/0_stateless/02530_dictionaries_update_field.sh @@ -53,13 +53,13 @@ for layout in "${layouts[@]}"; do SELECT key, value FROM $dictionary_name ORDER BY key ASC; INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now()); - SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; + SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; SELECT key, value FROM $dictionary_name ORDER BY key ASC; INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now()); INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now()); - SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; + SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; SELECT key, value FROM $dictionary_name ORDER BY key ASC; -- { echoOff } From fbda7974a5424b79a952fa30b16b7cd3c390bdc8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 7 May 2023 19:39:23 +0200 Subject: [PATCH 016/478] Fix tests --- .../queries/0_stateless/02676_optimize_old_parts_replicated.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02676_optimize_old_parts_replicated.sh b/tests/queries/0_stateless/02676_optimize_old_parts_replicated.sh index 2202a349c56..c1f28f9f079 100755 --- a/tests/queries/0_stateless/02676_optimize_old_parts_replicated.sh +++ b/tests/queries/0_stateless/02676_optimize_old_parts_replicated.sh @@ -61,7 +61,7 @@ INSERT INTO test_replicated SELECT 3;" wait_for_number_of_parts 'test_replicated' 1 100 $CLICKHOUSE_CLIENT -nmq " -SELECT sleepEachRow(1) FROM numbers(9) FORMAT Null; -- Sleep for 9 seconds and verify that we keep the old part because it's the only one +SELECT sleepEachRow(1) FROM numbers(9) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; -- Sleep for 9 seconds and verify that we keep the old part because it's the only one SELECT (now() - modification_time) > 5 FROM system.parts WHERE database = currentDatabase() AND table='test_replicated' AND active; DROP TABLE test_replicated;" From 08a9d97de74a27bd28d7cc387d7f5cdba707d6cb Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 7 May 2023 19:40:43 +0200 Subject: [PATCH 017/478] Fix tests --- tests/queries/0_stateless/02352_rwlock.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02352_rwlock.sh b/tests/queries/0_stateless/02352_rwlock.sh index 7a0b9ef8911..7505a03a382 100755 --- a/tests/queries/0_stateless/02352_rwlock.sh +++ b/tests/queries/0_stateless/02352_rwlock.sh @@ -51,7 +51,7 @@ while :; do insert_query_id="insert-$(random_str 10)" # 20 seconds sleep - $CLICKHOUSE_CLIENT --query_id "$insert_query_id" -q "INSERT INTO ${CLICKHOUSE_DATABASE}_ordinary.data_02352 SELECT sleepEachRow(1) FROM numbers(20) GROUP BY number" & + $CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 20000000 --query_id "$insert_query_id" -q "INSERT INTO ${CLICKHOUSE_DATABASE}_ordinary.data_02352 SELECT sleepEachRow(1) FROM numbers(20) GROUP BY number" & if ! wait_query_by_id_started "$insert_query_id"; then wait continue From 0818092ae8d49f2e7f87fed6c8703374384719fc Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 7 May 2023 19:45:57 +0200 Subject: [PATCH 018/478] Enable Sparse columns by default --- src/Storages/MergeTree/MergeTreeSettings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index 5416b77a97e..27f482d79ba 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -37,7 +37,7 @@ struct Settings; M(UInt64, min_rows_for_compact_part, 0, "Experimental. Minimal number of rows to create part in compact format instead of saving it in RAM", 0) \ M(Bool, in_memory_parts_enable_wal, true, "Whether to write blocks in Native format to write-ahead-log before creation in-memory part", 0) \ M(UInt64, write_ahead_log_max_bytes, 1024 * 1024 * 1024, "Rotate WAL, if it exceeds that amount of bytes", 0) \ - M(Float, ratio_of_defaults_for_sparse_serialization, 1.0, "Minimal ratio of number of default values to number of all values in column to store it in sparse serializations. If >= 1, columns will be always written in full serialization.", 0) \ + M(Float, ratio_of_defaults_for_sparse_serialization, 0.95, "Minimal ratio of number of default values to number of all values in column to store it in sparse serializations. If >= 1, columns will be always written in full serialization.", 0) \ \ /** Merge settings. */ \ M(UInt64, merge_max_block_size, 8192, "How many rows in blocks should be formed for merge operations. By default has the same value as `index_granularity`.", 0) \ From 7ec98205b58ab36eb28b2f46348dfcfe22215a3c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 7 May 2023 22:54:14 +0300 Subject: [PATCH 019/478] Update MergeTreeSettings.h --- src/Storages/MergeTree/MergeTreeSettings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index 27f482d79ba..caac86c6706 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -37,7 +37,7 @@ struct Settings; M(UInt64, min_rows_for_compact_part, 0, "Experimental. Minimal number of rows to create part in compact format instead of saving it in RAM", 0) \ M(Bool, in_memory_parts_enable_wal, true, "Whether to write blocks in Native format to write-ahead-log before creation in-memory part", 0) \ M(UInt64, write_ahead_log_max_bytes, 1024 * 1024 * 1024, "Rotate WAL, if it exceeds that amount of bytes", 0) \ - M(Float, ratio_of_defaults_for_sparse_serialization, 0.95, "Minimal ratio of number of default values to number of all values in column to store it in sparse serializations. If >= 1, columns will be always written in full serialization.", 0) \ + M(Float, ratio_of_defaults_for_sparse_serialization, 0.9375f, "Minimal ratio of number of default values to number of all values in column to store it in sparse serializations. If >= 1, columns will be always written in full serialization.", 0) \ \ /** Merge settings. */ \ M(UInt64, merge_max_block_size, 8192, "How many rows in blocks should be formed for merge operations. By default has the same value as `index_granularity`.", 0) \ From f3f6ccd7733aa4946c339b4973210f85243e44d1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 8 May 2023 00:28:54 +0200 Subject: [PATCH 020/478] Update tests --- .../0_stateless/00443_preferred_block_size_bytes.sh | 6 +++--- ...0484_preferred_max_column_in_block_size_bytes.sql | 8 ++++---- .../00804_test_delta_codec_compression.sql | 12 ++++++------ .../0_stateless/00950_test_double_delta_codec.sql | 2 +- ...00961_checksums_in_system_parts_columns_table.sql | 2 +- .../0_stateless/01055_compact_parts_granularity.sh | 2 +- .../queries/0_stateless/01786_explain_merge_tree.sh | 4 ++-- tests/queries/0_stateless/02263_lazy_mark_load.sh | 2 +- .../0_stateless/02293_selected_rows_and_merges.sh | 8 +++----- .../0_stateless/02361_fsync_profile_events.sh | 7 ++++--- .../02381_compress_marks_and_primary_key.sql | 4 ++-- 11 files changed, 28 insertions(+), 29 deletions(-) diff --git a/tests/queries/0_stateless/00443_preferred_block_size_bytes.sh b/tests/queries/0_stateless/00443_preferred_block_size_bytes.sh index c184b58bf53..27b9f5c00c7 100755 --- a/tests/queries/0_stateless/00443_preferred_block_size_bytes.sh +++ b/tests/queries/0_stateless/00443_preferred_block_size_bytes.sh @@ -8,7 +8,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS preferred_block_size_bytes" -$CLICKHOUSE_CLIENT -q "CREATE TABLE preferred_block_size_bytes (p Date, s String) ENGINE = MergeTree PARTITION BY p ORDER BY p SETTINGS index_granularity=1, index_granularity_bytes=0, min_bytes_for_wide_part = 0, min_rows_for_wide_part = 0" +$CLICKHOUSE_CLIENT -q "CREATE TABLE preferred_block_size_bytes (p Date, s String) ENGINE = MergeTree PARTITION BY p ORDER BY p SETTINGS index_granularity=1, index_granularity_bytes=0, min_bytes_for_wide_part = 0, min_rows_for_wide_part = 0, ratio_of_defaults_for_sparse_serialization = 1" $CLICKHOUSE_CLIENT -q "INSERT INTO preferred_block_size_bytes (s) SELECT '16_bytes_-_-_-_' AS s FROM system.numbers LIMIT 10, 90" $CLICKHOUSE_CLIENT -q "OPTIMIZE TABLE preferred_block_size_bytes" $CLICKHOUSE_CLIENT --preferred_block_size_bytes=26 -q "SELECT DISTINCT blockSize(), ignore(p, s) FROM preferred_block_size_bytes" @@ -19,7 +19,7 @@ $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS preferred_block_size_bytes" # PREWHERE using empty column $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS pbs" -$CLICKHOUSE_CLIENT -q "CREATE TABLE pbs (p Date, i UInt64, sa Array(String)) ENGINE = MergeTree PARTITION BY p ORDER BY p SETTINGS index_granularity=100, index_granularity_bytes=0, min_bytes_for_wide_part = 0, min_rows_for_wide_part = 0" +$CLICKHOUSE_CLIENT -q "CREATE TABLE pbs (p Date, i UInt64, sa Array(String)) ENGINE = MergeTree PARTITION BY p ORDER BY p SETTINGS index_granularity=100, index_granularity_bytes=0, min_bytes_for_wide_part = 0, min_rows_for_wide_part = 0, ratio_of_defaults_for_sparse_serialization = 1" $CLICKHOUSE_CLIENT -q "INSERT INTO pbs (p, i, sa) SELECT toDate(i % 30) AS p, number AS i, ['a'] AS sa FROM system.numbers LIMIT 1000" $CLICKHOUSE_CLIENT -q "ALTER TABLE pbs ADD COLUMN s UInt8 DEFAULT 0" $CLICKHOUSE_CLIENT --preferred_block_size_bytes=100000 -q "SELECT count() FROM pbs PREWHERE s = 0" @@ -30,7 +30,7 @@ $CLICKHOUSE_CLIENT -q "DROP TABLE pbs" # Nullable PREWHERE $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS nullable_prewhere" -$CLICKHOUSE_CLIENT -q "CREATE TABLE nullable_prewhere (p Date, f Nullable(UInt64), d UInt64) ENGINE = MergeTree PARTITION BY p ORDER BY p SETTINGS index_granularity=8, index_granularity_bytes=0, min_bytes_for_wide_part = 0, min_rows_for_wide_part = 0" +$CLICKHOUSE_CLIENT -q "CREATE TABLE nullable_prewhere (p Date, f Nullable(UInt64), d UInt64) ENGINE = MergeTree PARTITION BY p ORDER BY p SETTINGS index_granularity=8, index_granularity_bytes=0, min_bytes_for_wide_part = 0, min_rows_for_wide_part = 0, ratio_of_defaults_for_sparse_serialization = 1" $CLICKHOUSE_CLIENT -q "INSERT INTO nullable_prewhere SELECT toDate(0) AS p, if(number % 2 = 0, CAST(number AS Nullable(UInt64)), CAST(NULL AS Nullable(UInt64))) AS f, number as d FROM system.numbers LIMIT 1001" $CLICKHOUSE_CLIENT -q "SELECT sum(d), sum(f), max(d) FROM nullable_prewhere PREWHERE NOT isNull(f)" $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS nullable_prewhere" diff --git a/tests/queries/0_stateless/00484_preferred_max_column_in_block_size_bytes.sql b/tests/queries/0_stateless/00484_preferred_max_column_in_block_size_bytes.sql index 470bca70e06..be4af2221a5 100644 --- a/tests/queries/0_stateless/00484_preferred_max_column_in_block_size_bytes.sql +++ b/tests/queries/0_stateless/00484_preferred_max_column_in_block_size_bytes.sql @@ -1,7 +1,7 @@ -- Tags: no-random-settings drop table if exists tab_00484; -create table tab_00484 (date Date, x UInt64, s FixedString(128)) engine = MergeTree PARTITION BY date ORDER BY (date, x) SETTINGS min_bytes_for_wide_part = 0; +create table tab_00484 (date Date, x UInt64, s FixedString(128)) engine = MergeTree PARTITION BY date ORDER BY (date, x) SETTINGS min_bytes_for_wide_part = 0, ratio_of_defaults_for_sparse_serialization = 1; insert into tab_00484 select today(), number, toFixedString('', 128) from system.numbers limit 8192; set preferred_block_size_bytes = 2000000; @@ -17,19 +17,19 @@ set preferred_max_column_in_block_size_bytes = 4194304; select max(blockSize()), min(blockSize()), any(ignore(*)) from tab_00484; drop table if exists tab_00484; -create table tab_00484 (date Date, x UInt64, s FixedString(128)) engine = MergeTree PARTITION BY date ORDER BY (date, x) SETTINGS min_bytes_for_wide_part = 0; +create table tab_00484 (date Date, x UInt64, s FixedString(128)) engine = MergeTree PARTITION BY date ORDER BY (date, x) SETTINGS min_bytes_for_wide_part = 0, ratio_of_defaults_for_sparse_serialization = 1; insert into tab_00484 select today(), number, toFixedString('', 128) from system.numbers limit 47; set preferred_max_column_in_block_size_bytes = 1152; select blockSize(), * from tab_00484 where x = 1 or x > 36 format Null; drop table if exists tab_00484; -create table tab_00484 (date Date, x UInt64, s FixedString(128)) engine = MergeTree PARTITION BY date ORDER BY (date, x) SETTINGS min_bytes_for_wide_part = 0; +create table tab_00484 (date Date, x UInt64, s FixedString(128)) engine = MergeTree PARTITION BY date ORDER BY (date, x) SETTINGS min_bytes_for_wide_part = 0, ratio_of_defaults_for_sparse_serialization = 1; insert into tab_00484 select today(), number, toFixedString('', 128) from system.numbers limit 10; set preferred_max_column_in_block_size_bytes = 128; select s from tab_00484 where s == '' format Null; drop table if exists tab_00484; -create table tab_00484 (date Date, x UInt64, s String) engine = MergeTree PARTITION BY date ORDER BY (date, x) SETTINGS min_bytes_for_wide_part = 0; +create table tab_00484 (date Date, x UInt64, s String) engine = MergeTree PARTITION BY date ORDER BY (date, x) SETTINGS min_bytes_for_wide_part = 0, ratio_of_defaults_for_sparse_serialization = 1; insert into tab_00484 select today(), number, 'abc' from system.numbers limit 81920; set preferred_block_size_bytes = 0; select count(*) from tab_00484 prewhere s != 'abc' format Null; diff --git a/tests/queries/0_stateless/00804_test_delta_codec_compression.sql b/tests/queries/0_stateless/00804_test_delta_codec_compression.sql index 25988f6474b..01a2f53bf93 100644 --- a/tests/queries/0_stateless/00804_test_delta_codec_compression.sql +++ b/tests/queries/0_stateless/00804_test_delta_codec_compression.sql @@ -9,12 +9,12 @@ DROP TABLE IF EXISTS default_codec_synthetic; CREATE TABLE delta_codec_synthetic ( id UInt64 Codec(Delta, ZSTD(3)) -) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key=false; +) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key = false, ratio_of_defaults_for_sparse_serialization = 1; CREATE TABLE default_codec_synthetic ( id UInt64 Codec(ZSTD(3)) -) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key=false; +) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key = false, ratio_of_defaults_for_sparse_serialization = 1; INSERT INTO delta_codec_synthetic SELECT number FROM system.numbers LIMIT 5000000; INSERT INTO default_codec_synthetic SELECT number FROM system.numbers LIMIT 5000000; @@ -47,12 +47,12 @@ DROP TABLE IF EXISTS default_codec_float; CREATE TABLE delta_codec_float ( id Float64 Codec(Delta, LZ4HC) -) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key=false; +) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key = false, ratio_of_defaults_for_sparse_serialization = 1; CREATE TABLE default_codec_float ( id Float64 Codec(LZ4HC) -) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key=false; +) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key = false, ratio_of_defaults_for_sparse_serialization = 1; INSERT INTO delta_codec_float SELECT number FROM numbers(1547510400, 500000) WHERE number % 3 == 0 OR number % 5 == 0 OR number % 7 == 0 OR number % 11 == 0; INSERT INTO default_codec_float SELECT * from delta_codec_float; @@ -85,12 +85,12 @@ DROP TABLE IF EXISTS default_codec_string; CREATE TABLE delta_codec_string ( id Float64 Codec(Delta, LZ4) -) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key=false; +) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key = false, ratio_of_defaults_for_sparse_serialization = 1; CREATE TABLE default_codec_string ( id Float64 Codec(LZ4) -) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key=false; +) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key = false, ratio_of_defaults_for_sparse_serialization = 1; INSERT INTO delta_codec_string SELECT concat(toString(number), toString(number % 100)) FROM numbers(1547510400, 500000); INSERT INTO default_codec_string SELECT * from delta_codec_string; diff --git a/tests/queries/0_stateless/00950_test_double_delta_codec.sql b/tests/queries/0_stateless/00950_test_double_delta_codec.sql index f6199a6e4ec..58cf35b5248 100644 --- a/tests/queries/0_stateless/00950_test_double_delta_codec.sql +++ b/tests/queries/0_stateless/00950_test_double_delta_codec.sql @@ -24,7 +24,7 @@ CREATE TABLE codecTest ( valueI8 Int8 CODEC(DoubleDelta), valueDT DateTime CODEC(DoubleDelta), valueD Date CODEC(DoubleDelta) -) Engine = MergeTree ORDER BY key SETTINGS min_bytes_for_wide_part = 0; +) Engine = MergeTree ORDER BY key SETTINGS min_bytes_for_wide_part = 0, ratio_of_defaults_for_sparse_serialization = 1; -- checking for overflow diff --git a/tests/queries/0_stateless/00961_checksums_in_system_parts_columns_table.sql b/tests/queries/0_stateless/00961_checksums_in_system_parts_columns_table.sql index 43b7775e816..8df7d728560 100644 --- a/tests/queries/0_stateless/00961_checksums_in_system_parts_columns_table.sql +++ b/tests/queries/0_stateless/00961_checksums_in_system_parts_columns_table.sql @@ -4,7 +4,7 @@ DROP TABLE IF EXISTS test_00961; CREATE TABLE test_00961 (d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = MergeTree PARTITION BY d ORDER BY (a, b) - SETTINGS index_granularity = 111, min_bytes_for_wide_part = 0, compress_marks = 0, compress_primary_key = 0, index_granularity_bytes = '10Mi'; + SETTINGS index_granularity = 111, min_bytes_for_wide_part = 0, compress_marks = 0, compress_primary_key = 0, index_granularity_bytes = '10Mi', ratio_of_defaults_for_sparse_serialization = 1; INSERT INTO test_00961 VALUES ('2000-01-01', 'Hello, world!', 123, 'xxx yyy', -123, 123456789); diff --git a/tests/queries/0_stateless/01055_compact_parts_granularity.sh b/tests/queries/0_stateless/01055_compact_parts_granularity.sh index f3da33f6ccf..3e5da1e6f90 100755 --- a/tests/queries/0_stateless/01055_compact_parts_granularity.sh +++ b/tests/queries/0_stateless/01055_compact_parts_granularity.sh @@ -11,7 +11,7 @@ $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS mt_compact" $CLICKHOUSE_CLIENT -q "CREATE TABLE mt_compact(a Int, s String) ENGINE = MergeTree ORDER BY a SETTINGS min_rows_for_wide_part = 1000, - index_granularity = 14;" + index_granularity = 14, ratio_of_defaults_for_sparse_serialization = 1;" $CLICKHOUSE_CLIENT -q "SYSTEM STOP MERGES mt_compact" diff --git a/tests/queries/0_stateless/01786_explain_merge_tree.sh b/tests/queries/0_stateless/01786_explain_merge_tree.sh index 15f8821d80d..0d4acba338a 100755 --- a/tests/queries/0_stateless/01786_explain_merge_tree.sh +++ b/tests/queries/0_stateless/01786_explain_merge_tree.sh @@ -10,7 +10,7 @@ CLICKHOUSE_CLIENT="$CLICKHOUSE_CLIENT --optimize_move_to_prewhere=1 --convert_qu $CLICKHOUSE_CLIENT -q "drop table if exists test_index" $CLICKHOUSE_CLIENT -q "drop table if exists idx" -$CLICKHOUSE_CLIENT -q "create table test_index (x UInt32, y UInt32, z UInt32, t UInt32, index t_minmax t % 20 TYPE minmax GRANULARITY 2, index t_set t % 19 type set(4) granularity 2) engine = MergeTree order by (x, y) partition by (y, bitAnd(z, 3), intDiv(t, 15)) settings index_granularity = 2, min_bytes_for_wide_part = 0" +$CLICKHOUSE_CLIENT -q "create table test_index (x UInt32, y UInt32, z UInt32, t UInt32, index t_minmax t % 20 TYPE minmax GRANULARITY 2, index t_set t % 19 type set(4) granularity 2) engine = MergeTree order by (x, y) partition by (y, bitAnd(z, 3), intDiv(t, 15)) settings index_granularity = 2, min_bytes_for_wide_part = 0, ratio_of_defaults_for_sparse_serialization = 1" $CLICKHOUSE_CLIENT -q "insert into test_index select number, number > 3 ? 3 : number, number = 1 ? 1 : 0, number from numbers(20)" $CLICKHOUSE_CLIENT -q " @@ -35,7 +35,7 @@ $CLICKHOUSE_CLIENT -q " explain actions = 1 select x from test_index where x > 15 order by x desc; " | grep -A 100 "ReadFromMergeTree" -$CLICKHOUSE_CLIENT -q "CREATE TABLE idx (x UInt32, y UInt32, z UInt32) ENGINE = MergeTree ORDER BY (x, x + y) settings min_bytes_for_wide_part = 0" +$CLICKHOUSE_CLIENT -q "CREATE TABLE idx (x UInt32, y UInt32, z UInt32) ENGINE = MergeTree ORDER BY (x, x + y) settings min_bytes_for_wide_part = 0, ratio_of_defaults_for_sparse_serialization = 1" $CLICKHOUSE_CLIENT -q "insert into idx select number, number, number from numbers(10)" $CLICKHOUSE_CLIENT -q " diff --git a/tests/queries/0_stateless/02263_lazy_mark_load.sh b/tests/queries/0_stateless/02263_lazy_mark_load.sh index bf37556bfa6..35a1b4a44dd 100755 --- a/tests/queries/0_stateless/02263_lazy_mark_load.sh +++ b/tests/queries/0_stateless/02263_lazy_mark_load.sh @@ -24,7 +24,7 @@ CREATE TABLE lazy_mark_test n9 UInt64 ) ENGINE = MergeTree -ORDER BY n0 SETTINGS min_bytes_for_wide_part = 0; +ORDER BY n0 SETTINGS min_bytes_for_wide_part = 0, ratio_of_defaults_for_sparse_serialization = 1; EOF ${CLICKHOUSE_CLIENT} -q "SYSTEM STOP MERGES lazy_mark_test" diff --git a/tests/queries/0_stateless/02293_selected_rows_and_merges.sh b/tests/queries/0_stateless/02293_selected_rows_and_merges.sh index 9d1483f5bf7..76c562c9744 100755 --- a/tests/queries/0_stateless/02293_selected_rows_and_merges.sh +++ b/tests/queries/0_stateless/02293_selected_rows_and_merges.sh @@ -9,7 +9,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) query_id=$(${CLICKHOUSE_CLIENT} -q "select lower(hex(reverse(reinterpretAsString(generateUUIDv4()))))") -${CLICKHOUSE_CLIENT} -q "create table tt (x UInt32, y UInt32) engine = MergeTree order by x" +${CLICKHOUSE_CLIENT} -q "create table tt (x UInt32, y UInt32) engine = MergeTree order by x SETTINGS ratio_of_defaults_for_sparse_serialization = 1" ${CLICKHOUSE_CLIENT} -q "insert into tt select number, 0 from numbers(1e6)" ${CLICKHOUSE_CLIENT} -q "insert into tt select number, 1 from numbers(1e6)" @@ -17,13 +17,11 @@ ${CLICKHOUSE_CLIENT} --optimize_throw_if_noop 1 -q "optimize table tt final" "-- # Here SelectRows and SelectBytes should be zero, MergedRows is 2m and MergedUncompressedBytes is 16m ${CLICKHOUSE_CLIENT} -q "system flush logs" -${CLICKHOUSE_CLIENT} -q "select ProfileEvents['SelectedRows'], ProfileEvents['SelecteBytes'], ProfileEvents['MergedRows'], ProfileEvents['MergedUncompressedBytes'] from system.query_log where query_id = '$query_id' and type = 'QueryFinish' and query like 'optimize%' and current_database = currentDatabase()" +${CLICKHOUSE_CLIENT} -q "select ProfileEvents['SelectedRows'], ProfileEvents['SelectedBytes'], ProfileEvents['MergedRows'], ProfileEvents['MergedUncompressedBytes'] from system.query_log where query_id = '$query_id' and type = 'QueryFinish' and query like 'optimize%' and current_database = currentDatabase()" ${CLICKHOUSE_CLIENT} --mutations_sync 1 -q "alter table tt update y = y + 1 where 1" "--query_id=$query_id" ${CLICKHOUSE_CLIENT} -q "system flush logs" # Here for mutation all values are 0, cause mutation is executed async. # It's pretty hard to write a test with total counter. -${CLICKHOUSE_CLIENT} -q "select ProfileEvents['SelectedRows'] > 10, ProfileEvents['SelecteBytes'], ProfileEvents['MergedRows'], ProfileEvents['MergedUncompressedBytes'] from system.query_log where query_id = '$query_id' and type = 'QueryFinish' and query like 'alter%' and current_database = currentDatabase()" - - +${CLICKHOUSE_CLIENT} -q "select ProfileEvents['SelectedRows'] > 10, ProfileEvents['SelectedBytes'], ProfileEvents['MergedRows'], ProfileEvents['MergedUncompressedBytes'] from system.query_log where query_id = '$query_id' and type = 'QueryFinish' and query like 'alter%' and current_database = currentDatabase()" diff --git a/tests/queries/0_stateless/02361_fsync_profile_events.sh b/tests/queries/0_stateless/02361_fsync_profile_events.sh index 5b603133f6c..e150d70b896 100755 --- a/tests/queries/0_stateless/02361_fsync_profile_events.sh +++ b/tests/queries/0_stateless/02361_fsync_profile_events.sh @@ -12,9 +12,10 @@ $CLICKHOUSE_CLIENT -nm -q " create table data_fsync_pe (key Int) engine=MergeTree() order by key settings - min_rows_for_wide_part=2, - fsync_after_insert=1, - fsync_part_directory=1; + min_rows_for_wide_part = 2, + fsync_after_insert = 1, + fsync_part_directory = 1, + ratio_of_defaults_for_sparse_serialization = 1; " ret=1 diff --git a/tests/queries/0_stateless/02381_compress_marks_and_primary_key.sql b/tests/queries/0_stateless/02381_compress_marks_and_primary_key.sql index 842e22ba87d..2fe0943745d 100644 --- a/tests/queries/0_stateless/02381_compress_marks_and_primary_key.sql +++ b/tests/queries/0_stateless/02381_compress_marks_and_primary_key.sql @@ -1,12 +1,12 @@ -- Tags: no-upgrade-check, no-random-merge-tree-settings drop table if exists test_02381; -create table test_02381(a UInt64, b UInt64) ENGINE = MergeTree order by (a, b) SETTINGS compress_marks=false, compress_primary_key=false; +create table test_02381(a UInt64, b UInt64) ENGINE = MergeTree order by (a, b) SETTINGS compress_marks = false, compress_primary_key = false, ratio_of_defaults_for_sparse_serialization = 1; insert into test_02381 select number, number * 10 from system.numbers limit 1000000; drop table if exists test_02381_compress; create table test_02381_compress(a UInt64, b UInt64) ENGINE = MergeTree order by (a, b) - SETTINGS compress_marks=true, compress_primary_key=true, marks_compression_codec='ZSTD(3)', primary_key_compression_codec='ZSTD(3)', marks_compress_block_size=65536, primary_key_compress_block_size=65536; + SETTINGS compress_marks = true, compress_primary_key = true, marks_compression_codec = 'ZSTD(3)', primary_key_compression_codec = 'ZSTD(3)', marks_compress_block_size = 65536, primary_key_compress_block_size = 65536, ratio_of_defaults_for_sparse_serialization = 1; insert into test_02381_compress select number, number * 10 from system.numbers limit 1000000; select * from test_02381_compress where a = 1000 limit 1; From e8f7a84ca6c4e00f6f9ddbf282b109f491244c4c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 8 May 2023 00:37:10 +0200 Subject: [PATCH 021/478] Update a few tests --- tests/queries/0_stateless/01375_compact_parts_codecs.sql | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/01375_compact_parts_codecs.sql b/tests/queries/0_stateless/01375_compact_parts_codecs.sql index 1dd39e67876..1c89eb09d0b 100644 --- a/tests/queries/0_stateless/01375_compact_parts_codecs.sql +++ b/tests/queries/0_stateless/01375_compact_parts_codecs.sql @@ -4,7 +4,7 @@ DROP TABLE IF EXISTS codecs; CREATE TABLE codecs (id UInt32, val UInt32, s String) ENGINE = MergeTree ORDER BY id - SETTINGS min_rows_for_wide_part = 10000; + SETTINGS min_rows_for_wide_part = 10000, ratio_of_defaults_for_sparse_serialization = 1; INSERT INTO codecs SELECT number, number, toString(number) FROM numbers(1000); SELECT sum(data_compressed_bytes), sum(data_uncompressed_bytes) FROM system.parts @@ -21,7 +21,7 @@ DROP TABLE codecs; CREATE TABLE codecs (id UInt32 CODEC(NONE), val UInt32 CODEC(NONE), s String CODEC(NONE)) ENGINE = MergeTree ORDER BY id - SETTINGS min_rows_for_wide_part = 10000; + SETTINGS min_rows_for_wide_part = 10000, ratio_of_defaults_for_sparse_serialization = 1; INSERT INTO codecs SELECT number, number, toString(number) FROM numbers(1000); SELECT sum(data_compressed_bytes), sum(data_uncompressed_bytes) FROM system.parts @@ -38,7 +38,7 @@ DROP TABLE codecs; CREATE TABLE codecs (id UInt32, val UInt32 CODEC(Delta, ZSTD), s String CODEC(ZSTD)) ENGINE = MergeTree ORDER BY id - SETTINGS min_rows_for_wide_part = 10000; + SETTINGS min_rows_for_wide_part = 10000, ratio_of_defaults_for_sparse_serialization = 1; INSERT INTO codecs SELECT number, number, toString(number) FROM numbers(1000); SELECT sum(data_compressed_bytes), sum(data_uncompressed_bytes) FROM system.parts From 7c03801bf7da6803e47f57ab78478c33a9c9a764 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 8 May 2023 00:54:57 +0200 Subject: [PATCH 022/478] Update a test --- tests/queries/0_stateless/02725_parquet_preserve_order.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/02725_parquet_preserve_order.sh b/tests/queries/0_stateless/02725_parquet_preserve_order.sh index ea3e4219e35..ac29ef3f361 100755 --- a/tests/queries/0_stateless/02725_parquet_preserve_order.sh +++ b/tests/queries/0_stateless/02725_parquet_preserve_order.sh @@ -10,7 +10,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # It'll be read into two blocks. The first block will sleep 2x longer than the second. # So reordering is very likely if the order-preservation doesn't work. -$CLICKHOUSE_LOCAL -q "select number+sleepEachRow(3) from file('$CURDIR/data_parquet/02725_data.parquet') settings input_format_parquet_preserve_order=1" +$CLICKHOUSE_LOCAL -q "select number + sleepEachRow(3) from file('$CURDIR/data_parquet/02725_data.parquet') settings input_format_parquet_preserve_order=1, function_sleep_max_microseconds_per_block = 6000000" -$CLICKHOUSE_LOCAL -q "explain pipeline select number+sleepEachRow(3) from file('$CURDIR/data_parquet/02725_data.parquet') settings input_format_parquet_preserve_order=1, max_threads=2" -$CLICKHOUSE_LOCAL -q "explain pipeline select number+sleepEachRow(3) from file('$CURDIR/data_parquet/02725_data.parquet') settings input_format_parquet_preserve_order=0, parallelize_output_from_storages=1, max_threads=2" +$CLICKHOUSE_LOCAL -q "explain pipeline select number + sleepEachRow(3) from file('$CURDIR/data_parquet/02725_data.parquet') settings input_format_parquet_preserve_order=1, max_threads=2" +$CLICKHOUSE_LOCAL -q "explain pipeline select number + sleepEachRow(3) from file('$CURDIR/data_parquet/02725_data.parquet') settings input_format_parquet_preserve_order=0, parallelize_output_from_storages=1, max_threads=2" From a25de5fb4186fbe103f916b07aa8bd89975048b9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 8 May 2023 00:55:44 +0200 Subject: [PATCH 023/478] Update a test --- .../02530_dictionaries_update_field.reference | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/queries/0_stateless/02530_dictionaries_update_field.reference b/tests/queries/0_stateless/02530_dictionaries_update_field.reference index 40f2c0ee400..88c910e0313 100644 --- a/tests/queries/0_stateless/02530_dictionaries_update_field.reference +++ b/tests/queries/0_stateless/02530_dictionaries_update_field.reference @@ -4,13 +4,13 @@ flat SELECT key, value FROM dict_flat ORDER BY key ASC; 1 First INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now()); -SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; SELECT key, value FROM dict_flat ORDER BY key ASC; 1 First 2 Second INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now()); INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now()); -SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; SELECT key, value FROM dict_flat ORDER BY key ASC; 1 First 2 SecondUpdated @@ -21,13 +21,13 @@ flat/custom SELECT key, value FROM dict_flat_custom ORDER BY key ASC; 1 First INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now()); -SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; SELECT key, value FROM dict_flat_custom ORDER BY key ASC; 1 First 2 Second INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now()); INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now()); -SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; SELECT key, value FROM dict_flat_custom ORDER BY key ASC; 1 First 2 SecondUpdated @@ -38,13 +38,13 @@ hashed SELECT key, value FROM dict_hashed ORDER BY key ASC; 1 First INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now()); -SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; SELECT key, value FROM dict_hashed ORDER BY key ASC; 1 First 2 Second INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now()); INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now()); -SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; SELECT key, value FROM dict_hashed ORDER BY key ASC; 1 First 2 SecondUpdated @@ -55,13 +55,13 @@ hashed/custom SELECT key, value FROM dict_hashed_custom ORDER BY key ASC; 1 First INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now()); -SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; SELECT key, value FROM dict_hashed_custom ORDER BY key ASC; 1 First 2 Second INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now()); INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now()); -SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; SELECT key, value FROM dict_hashed_custom ORDER BY key ASC; 1 First 2 SecondUpdated @@ -72,13 +72,13 @@ complex_key_hashed SELECT key, value FROM dict_complex_key_hashed ORDER BY key ASC; 1 First INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now()); -SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; SELECT key, value FROM dict_complex_key_hashed ORDER BY key ASC; 1 First 2 Second INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now()); INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now()); -SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; SELECT key, value FROM dict_complex_key_hashed ORDER BY key ASC; 1 First 2 SecondUpdated @@ -89,13 +89,13 @@ complex_key_hashed/custom SELECT key, value FROM dict_complex_key_hashed_custom ORDER BY key ASC; 1 First INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now()); -SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; SELECT key, value FROM dict_complex_key_hashed_custom ORDER BY key ASC; 1 First 2 Second INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now()); INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now()); -SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; SELECT key, value FROM dict_complex_key_hashed_custom ORDER BY key ASC; 1 First 2 SecondUpdated From 63b559df17a07e42768c4425538426e245d829fa Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 8 May 2023 06:49:41 +0200 Subject: [PATCH 024/478] Update a test --- .../02530_dictionaries_update_field.reference | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/queries/0_stateless/02530_dictionaries_update_field.reference b/tests/queries/0_stateless/02530_dictionaries_update_field.reference index 40f2c0ee400..88c910e0313 100644 --- a/tests/queries/0_stateless/02530_dictionaries_update_field.reference +++ b/tests/queries/0_stateless/02530_dictionaries_update_field.reference @@ -4,13 +4,13 @@ flat SELECT key, value FROM dict_flat ORDER BY key ASC; 1 First INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now()); -SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; SELECT key, value FROM dict_flat ORDER BY key ASC; 1 First 2 Second INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now()); INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now()); -SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; SELECT key, value FROM dict_flat ORDER BY key ASC; 1 First 2 SecondUpdated @@ -21,13 +21,13 @@ flat/custom SELECT key, value FROM dict_flat_custom ORDER BY key ASC; 1 First INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now()); -SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; SELECT key, value FROM dict_flat_custom ORDER BY key ASC; 1 First 2 Second INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now()); INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now()); -SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; SELECT key, value FROM dict_flat_custom ORDER BY key ASC; 1 First 2 SecondUpdated @@ -38,13 +38,13 @@ hashed SELECT key, value FROM dict_hashed ORDER BY key ASC; 1 First INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now()); -SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; SELECT key, value FROM dict_hashed ORDER BY key ASC; 1 First 2 Second INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now()); INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now()); -SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; SELECT key, value FROM dict_hashed ORDER BY key ASC; 1 First 2 SecondUpdated @@ -55,13 +55,13 @@ hashed/custom SELECT key, value FROM dict_hashed_custom ORDER BY key ASC; 1 First INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now()); -SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; SELECT key, value FROM dict_hashed_custom ORDER BY key ASC; 1 First 2 Second INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now()); INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now()); -SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; SELECT key, value FROM dict_hashed_custom ORDER BY key ASC; 1 First 2 SecondUpdated @@ -72,13 +72,13 @@ complex_key_hashed SELECT key, value FROM dict_complex_key_hashed ORDER BY key ASC; 1 First INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now()); -SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; SELECT key, value FROM dict_complex_key_hashed ORDER BY key ASC; 1 First 2 Second INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now()); INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now()); -SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; SELECT key, value FROM dict_complex_key_hashed ORDER BY key ASC; 1 First 2 SecondUpdated @@ -89,13 +89,13 @@ complex_key_hashed/custom SELECT key, value FROM dict_complex_key_hashed_custom ORDER BY key ASC; 1 First INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now()); -SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; SELECT key, value FROM dict_complex_key_hashed_custom ORDER BY key ASC; 1 First 2 Second INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now()); INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now()); -SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; SELECT key, value FROM dict_complex_key_hashed_custom ORDER BY key ASC; 1 First 2 SecondUpdated From f0bfdb6b029748a486e5f683171f135d6a5dd957 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Wed, 22 Feb 2023 16:24:02 +0000 Subject: [PATCH 025/478] Refactor Query Tree visitor --- src/Analyzer/InDepthQueryTreeVisitor.h | 65 +++++++++++++++++++++++ src/Analyzer/Passes/CountDistinctPass.cpp | 34 ++++++++---- 2 files changed, 88 insertions(+), 11 deletions(-) diff --git a/src/Analyzer/InDepthQueryTreeVisitor.h b/src/Analyzer/InDepthQueryTreeVisitor.h index 1cc48fb1e53..ee321842ffa 100644 --- a/src/Analyzer/InDepthQueryTreeVisitor.h +++ b/src/Analyzer/InDepthQueryTreeVisitor.h @@ -235,4 +235,69 @@ public: template using ConstInDepthQueryTreeConditionalVisitor = InDepthQueryTreeConditionalVisitor; +template +class QueryTreeVisitor +{ +public: + explicit QueryTreeVisitor(ContextPtr context_) + : current_context(std::move(context_)) + {} + + bool needApply(QueryTreeNodePtr & node) + { + return getImpl().needApply(node); + } + + void visit(QueryTreeNodePtr & node) + { + auto current_scope_context_ptr = current_context; + SCOPE_EXIT( + current_context = std::move(current_scope_context_ptr); + ); + + if (auto * query_node = node->template as()) + current_context = query_node->getContext(); + else if (auto * union_node = node->template as()) + current_context = union_node->getContext(); + + if (!TOP_TO_BOTTOM) + visitChildren(node); + + if (needApply(node)) + getImpl().apply(node); + + if (TOP_TO_BOTTOM) + visitChildren(node); + } + + const ContextPtr & getContext() const + { + return current_context; + } + + const Settings & getSettings() const + { + return current_context->getSettingsRef(); + } +private: + + Impl & getImpl() + { + return *static_cast(this); + } + + void visitChildren(QueryTreeNodePtr & node) + { + for (auto & child : node->getChildren()) + { + if (child) + visit(child); + } + } + + static constexpr bool TOP_TO_BOTTOM = Impl::TOP_TO_BOTTOM; + + ContextPtr current_context; +}; + } diff --git a/src/Analyzer/Passes/CountDistinctPass.cpp b/src/Analyzer/Passes/CountDistinctPass.cpp index 945295f5cbc..38f7d07d052 100644 --- a/src/Analyzer/Passes/CountDistinctPass.cpp +++ b/src/Analyzer/Passes/CountDistinctPass.cpp @@ -16,16 +16,17 @@ namespace DB namespace { -class CountDistinctVisitor : public InDepthQueryTreeVisitorWithContext +class CountDistinctVisitor : public QueryTreeVisitor { public: - using Base = InDepthQueryTreeVisitorWithContext; - using Base::Base; + using QueryTreeVisitor::QueryTreeVisitor; - void visitImpl(QueryTreeNodePtr & node) + static constexpr bool TOP_TO_BOTTOM = true; + + bool needApply(QueryTreeNodePtr & node) { if (!getSettings().count_distinct_optimization) - return; + return false; auto * query_node = node->as(); @@ -33,32 +34,43 @@ public: if (!query_node || (query_node->hasWith() || query_node->hasPrewhere() || query_node->hasWhere() || query_node->hasGroupBy() || query_node->hasHaving() || query_node->hasWindow() || query_node->hasOrderBy() || query_node->hasLimitByLimit() || query_node->hasLimitByOffset() || query_node->hasLimitBy() || query_node->hasLimit() || query_node->hasOffset())) - return; + return false; /// Check that query has only single table expression auto join_tree_node_type = query_node->getJoinTree()->getNodeType(); if (join_tree_node_type == QueryTreeNodeType::JOIN || join_tree_node_type == QueryTreeNodeType::ARRAY_JOIN) - return; + return false; /// Check that query has only single node in projection auto & projection_nodes = query_node->getProjection().getNodes(); if (projection_nodes.size() != 1) - return; + return false; /// Check that query single projection node is `countDistinct` function auto & projection_node = projection_nodes[0]; auto * function_node = projection_node->as(); if (!function_node) - return; + return false; auto lower_function_name = Poco::toLower(function_node->getFunctionName()); if (lower_function_name != "countdistinct" && lower_function_name != "uniqexact") - return; + return false; /// Check that `countDistinct` function has single COLUMN argument auto & count_distinct_arguments_nodes = function_node->getArguments().getNodes(); if (count_distinct_arguments_nodes.size() != 1 && count_distinct_arguments_nodes[0]->getNodeType() != QueryTreeNodeType::COLUMN) - return; + return false; + + return true; + } + + void apply(QueryTreeNodePtr & node) + { + auto * query_node = node->as(); + auto & projection_nodes = query_node->getProjection().getNodes(); + auto * function_node = projection_nodes[0]->as(); + + auto & count_distinct_arguments_nodes = function_node->getArguments().getNodes(); auto & count_distinct_argument_column = count_distinct_arguments_nodes[0]; auto & count_distinct_argument_column_typed = count_distinct_argument_column->as(); From 5c34ee3019199a7e1d24730684c9c84e6c8e0615 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Tue, 9 May 2023 15:14:49 +0000 Subject: [PATCH 026/478] Skip unresolved table function arguments --- src/Analyzer/InDepthQueryTreeVisitor.h | 15 ++++++++++++++- src/Analyzer/Passes/QueryAnalysisPass.cpp | 2 +- src/Analyzer/TableFunctionNode.cpp | 3 ++- src/Analyzer/TableFunctionNode.h | 8 +++++++- 4 files changed, 24 insertions(+), 4 deletions(-) diff --git a/src/Analyzer/InDepthQueryTreeVisitor.h b/src/Analyzer/InDepthQueryTreeVisitor.h index ee321842ffa..be3a760d4e6 100644 --- a/src/Analyzer/InDepthQueryTreeVisitor.h +++ b/src/Analyzer/InDepthQueryTreeVisitor.h @@ -7,6 +7,7 @@ #include #include +#include #include #include @@ -248,6 +249,16 @@ public: return getImpl().needApply(node); } + bool shouldSkipSubtree(QueryTreeNodePtr & parent, size_t subtree_index) + { + if (auto * table_function_node = parent->as()) + { + const auto & unresolved_indexes = table_function_node->getUnresolvedArgumentIndexes(); + return std::find(unresolved_indexes.begin(), unresolved_indexes.end(), subtree_index) != unresolved_indexes.end(); + } + return false; + } + void visit(QueryTreeNodePtr & node) { auto current_scope_context_ptr = current_context; @@ -288,10 +299,12 @@ private: void visitChildren(QueryTreeNodePtr & node) { + size_t index = 0; for (auto & child : node->getChildren()) { - if (child) + if (child && !shouldSkipSubtree(node, index)) visit(child); + ++index; } } diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index 7ab0261850b..aaea81dcada 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -6356,7 +6356,7 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node, table_function_ptr->parseArguments(table_function_ast, scope_context); auto table_function_storage = table_function_ptr->execute(table_function_ast, scope_context, table_function_ptr->getName()); - table_function_node_typed.resolve(std::move(table_function_ptr), std::move(table_function_storage), scope_context); + table_function_node_typed.resolve(std::move(table_function_ptr), std::move(table_function_storage), scope_context, std::move(skip_analysis_arguments_indexes)); } /// Resolve array join node in scope diff --git a/src/Analyzer/TableFunctionNode.cpp b/src/Analyzer/TableFunctionNode.cpp index c130503d660..30644ad4ec4 100644 --- a/src/Analyzer/TableFunctionNode.cpp +++ b/src/Analyzer/TableFunctionNode.cpp @@ -27,12 +27,13 @@ TableFunctionNode::TableFunctionNode(String table_function_name_) children[arguments_child_index] = std::make_shared(); } -void TableFunctionNode::resolve(TableFunctionPtr table_function_value, StoragePtr storage_value, ContextPtr context) +void TableFunctionNode::resolve(TableFunctionPtr table_function_value, StoragePtr storage_value, ContextPtr context, std::vector unresolved_arguments_indexes_) { table_function = std::move(table_function_value); storage = std::move(storage_value); storage_id = storage->getStorageID(); storage_snapshot = storage->getStorageSnapshot(storage->getInMemoryMetadataPtr(), context); + unresolved_arguments_indexes = std::move(unresolved_arguments_indexes_); } const StorageID & TableFunctionNode::getStorageID() const diff --git a/src/Analyzer/TableFunctionNode.h b/src/Analyzer/TableFunctionNode.h index 7786ba62205..69237ac8416 100644 --- a/src/Analyzer/TableFunctionNode.h +++ b/src/Analyzer/TableFunctionNode.h @@ -98,7 +98,7 @@ public: } /// Resolve table function with table function, storage and context - void resolve(TableFunctionPtr table_function_value, StoragePtr storage_value, ContextPtr context); + void resolve(TableFunctionPtr table_function_value, StoragePtr storage_value, ContextPtr context, std::vector unresolved_arguments_indexes_); /// Get storage id, throws exception if function node is not resolved const StorageID & getStorageID() const; @@ -106,6 +106,11 @@ public: /// Get storage snapshot, throws exception if function node is not resolved const StorageSnapshotPtr & getStorageSnapshot() const; + const std::vector & getUnresolvedArgumentIndexes() const + { + return unresolved_arguments_indexes; + } + /// Return true if table function node has table expression modifiers, false otherwise bool hasTableExpressionModifiers() const { @@ -164,6 +169,7 @@ private: StoragePtr storage; StorageID storage_id; StorageSnapshotPtr storage_snapshot; + std::vector unresolved_arguments_indexes; std::optional table_expression_modifiers; SettingsChanges settings_changes; From 6b0bd698d36014a5eac052857bac2185a1f45f41 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 11 May 2023 04:17:53 +0200 Subject: [PATCH 027/478] Fix mistake --- .../02530_dictionaries_update_field.reference | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/queries/0_stateless/02530_dictionaries_update_field.reference b/tests/queries/0_stateless/02530_dictionaries_update_field.reference index 88c910e0313..40f2c0ee400 100644 --- a/tests/queries/0_stateless/02530_dictionaries_update_field.reference +++ b/tests/queries/0_stateless/02530_dictionaries_update_field.reference @@ -4,13 +4,13 @@ flat SELECT key, value FROM dict_flat ORDER BY key ASC; 1 First INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now()); -SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; SELECT key, value FROM dict_flat ORDER BY key ASC; 1 First 2 Second INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now()); INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now()); -SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; SELECT key, value FROM dict_flat ORDER BY key ASC; 1 First 2 SecondUpdated @@ -21,13 +21,13 @@ flat/custom SELECT key, value FROM dict_flat_custom ORDER BY key ASC; 1 First INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now()); -SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; SELECT key, value FROM dict_flat_custom ORDER BY key ASC; 1 First 2 Second INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now()); INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now()); -SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; SELECT key, value FROM dict_flat_custom ORDER BY key ASC; 1 First 2 SecondUpdated @@ -38,13 +38,13 @@ hashed SELECT key, value FROM dict_hashed ORDER BY key ASC; 1 First INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now()); -SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; SELECT key, value FROM dict_hashed ORDER BY key ASC; 1 First 2 Second INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now()); INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now()); -SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; SELECT key, value FROM dict_hashed ORDER BY key ASC; 1 First 2 SecondUpdated @@ -55,13 +55,13 @@ hashed/custom SELECT key, value FROM dict_hashed_custom ORDER BY key ASC; 1 First INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now()); -SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; SELECT key, value FROM dict_hashed_custom ORDER BY key ASC; 1 First 2 Second INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now()); INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now()); -SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; SELECT key, value FROM dict_hashed_custom ORDER BY key ASC; 1 First 2 SecondUpdated @@ -72,13 +72,13 @@ complex_key_hashed SELECT key, value FROM dict_complex_key_hashed ORDER BY key ASC; 1 First INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now()); -SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; SELECT key, value FROM dict_complex_key_hashed ORDER BY key ASC; 1 First 2 Second INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now()); INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now()); -SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; SELECT key, value FROM dict_complex_key_hashed ORDER BY key ASC; 1 First 2 SecondUpdated @@ -89,13 +89,13 @@ complex_key_hashed/custom SELECT key, value FROM dict_complex_key_hashed_custom ORDER BY key ASC; 1 First INSERT INTO table_for_update_field_dictionary VALUES (2, 'Second', now()); -SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; SELECT key, value FROM dict_complex_key_hashed_custom ORDER BY key ASC; 1 First 2 Second INSERT INTO table_for_update_field_dictionary VALUES (2, 'SecondUpdated', now()); INSERT INTO table_for_update_field_dictionary VALUES (3, 'Third', now()); -SELECT sleepEachRow(1) FROM numbers(10) SETTINGS function_sleep_max_microseconds_per_block = 10000000 FORMAT Null; +SELECT sleepEachRow(1) FROM numbers(10) FORMAT Null; SELECT key, value FROM dict_complex_key_hashed_custom ORDER BY key ASC; 1 First 2 SecondUpdated From 65d28a959ff5b21199c2b20d8dcb7c7b399f314d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 11 May 2023 04:26:29 +0200 Subject: [PATCH 028/478] Update integration tests (1/2) --- .../configs/config.d/storage_conf.xml | 1 + .../test_merge_tree_hdfs/configs/config.d/storage_conf.xml | 1 + .../test_merge_tree_s3_failover/configs/config.xml | 4 ++++ .../test_s3_zero_copy_replication/configs/config.d/s3.xml | 1 + 4 files changed, 7 insertions(+) diff --git a/tests/integration/test_azure_blob_storage_zero_copy_replication/configs/config.d/storage_conf.xml b/tests/integration/test_azure_blob_storage_zero_copy_replication/configs/config.d/storage_conf.xml index cb87abcc693..d69fe96a3e2 100644 --- a/tests/integration/test_azure_blob_storage_zero_copy_replication/configs/config.d/storage_conf.xml +++ b/tests/integration/test_azure_blob_storage_zero_copy_replication/configs/config.d/storage_conf.xml @@ -45,5 +45,6 @@ true + 1.0 diff --git a/tests/integration/test_merge_tree_hdfs/configs/config.d/storage_conf.xml b/tests/integration/test_merge_tree_hdfs/configs/config.d/storage_conf.xml index 890c396ed95..7d59081486b 100644 --- a/tests/integration/test_merge_tree_hdfs/configs/config.d/storage_conf.xml +++ b/tests/integration/test_merge_tree_hdfs/configs/config.d/storage_conf.xml @@ -28,5 +28,6 @@ 0 + 1.0 diff --git a/tests/integration/test_merge_tree_s3_failover/configs/config.xml b/tests/integration/test_merge_tree_s3_failover/configs/config.xml index feb537ebbce..743d75d9a21 100644 --- a/tests/integration/test_merge_tree_s3_failover/configs/config.xml +++ b/tests/integration/test_merge_tree_s3_failover/configs/config.xml @@ -15,4 +15,8 @@ 500 ./clickhouse/ users.xml + + + 1.0 + diff --git a/tests/integration/test_s3_zero_copy_replication/configs/config.d/s3.xml b/tests/integration/test_s3_zero_copy_replication/configs/config.d/s3.xml index f7d9efc2cae..55c35999703 100644 --- a/tests/integration/test_s3_zero_copy_replication/configs/config.d/s3.xml +++ b/tests/integration/test_s3_zero_copy_replication/configs/config.d/s3.xml @@ -70,6 +70,7 @@ 1024 1 true + 1.0 From 1eb939766bc78a59dd11b3534f4fd7b693d75e21 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 16 May 2023 17:48:49 +0000 Subject: [PATCH 029/478] add test --- src/Interpreters/AsynchronousInsertQueue.cpp | 4 + src/Interpreters/AsynchronousInsertQueue.h | 1 + .../02726_async_insert_flush_stress.reference | 1 + .../02726_async_insert_flush_stress.sh | 86 +++++++++++++++++++ 4 files changed, 92 insertions(+) create mode 100644 tests/queries/0_stateless/02726_async_insert_flush_stress.reference create mode 100755 tests/queries/0_stateless/02726_async_insert_flush_stress.sh diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index 4592e92151e..e176c7afd76 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -318,6 +318,7 @@ void AsynchronousInsertQueue::flushAll() LOG_DEBUG(log, "Requested to flush asynchronous insert queue"); + /// Disable background flushes to avoid adding new elements to the queue. flush_stopped = true; std::vector queues_to_flush(pool_size); @@ -343,10 +344,13 @@ void AsynchronousInsertQueue::flushAll() } } + /// Note that jobs scheduled before the call of 'flushAll' are not counted here. LOG_DEBUG(log, "Will wait for finishing of {} flushing jobs (about {} inserts, {} bytes, {} distinct queries)", pool.active(), total_entries, total_bytes, total_queries); + /// Wait until all jobs are finished. That includes also jobs + /// that were scheduled before the call of 'flushAll'. pool.wait(); LOG_DEBUG(log, "Finished flushing of asynchronous insert queue"); diff --git a/src/Interpreters/AsynchronousInsertQueue.h b/src/Interpreters/AsynchronousInsertQueue.h index 455e486c798..b22b0c73907 100644 --- a/src/Interpreters/AsynchronousInsertQueue.h +++ b/src/Interpreters/AsynchronousInsertQueue.h @@ -38,6 +38,7 @@ public: std::unique_ptr insert_data_buffer; }; + /// Force flush the whole queue. void flushAll(); PushResult push(ASTPtr query, ContextPtr query_context); size_t getPoolSize() const { return pool_size; } diff --git a/tests/queries/0_stateless/02726_async_insert_flush_stress.reference b/tests/queries/0_stateless/02726_async_insert_flush_stress.reference new file mode 100644 index 00000000000..573541ac970 --- /dev/null +++ b/tests/queries/0_stateless/02726_async_insert_flush_stress.reference @@ -0,0 +1 @@ +0 diff --git a/tests/queries/0_stateless/02726_async_insert_flush_stress.sh b/tests/queries/0_stateless/02726_async_insert_flush_stress.sh new file mode 100755 index 00000000000..4685e49b96d --- /dev/null +++ b/tests/queries/0_stateless/02726_async_insert_flush_stress.sh @@ -0,0 +1,86 @@ +#!/usr/bin/env bash +# Tags: long + +set -e + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +function insert1() +{ + url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=0" + while true; do + ${CLICKHOUSE_CURL} -sS "$url" -d 'INSERT INTO async_inserts FORMAT CSV +1,"a" +2,"b" +' + done +} + +function insert2() +{ + url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=0" + while true; do + ${CLICKHOUSE_CURL} -sS "$url" -d 'INSERT INTO async_inserts FORMAT JSONEachRow {"id": 5, "s": "e"} {"id": 6, "s": "f"}' + done +} + +function insert3() +{ + url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=0" + while true; do + ${CLICKHOUSE_CURL} -sS "$url" -d "INSERT INTO FUNCTION remote('127.0.0.1', $CLICKHOUSE_DATABASE, async_inserts) VALUES (7, 'g') (8, 'h')" + done +} + +function select1() +{ + while true; do + ${CLICKHOUSE_CLIENT} -q "SELECT * FROM async_inserts FORMAT Null" + done +} + +function select2() +{ + while true; do + ${CLICKHOUSE_CLIENT} -q "SELECT * FROM system.asynchronous_inserts FORMAT Null" + done +} + +function flush1() +{ + while true; do + sleep 0.2 + ${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH ASYNC INSERT QUEUE" + done +} + +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS async_inserts" +${CLICKHOUSE_CLIENT} -q "CREATE TABLE async_inserts (id UInt32, s String) ENGINE = MergeTree ORDER BY id" + +TIMEOUT=10 + +export -f insert1 +export -f insert2 +export -f insert3 +export -f select1 +export -f select2 +export -f flush1 + +for _ in {1..5}; do + timeout $TIMEOUT bash -c insert1 & + timeout $TIMEOUT bash -c insert2 & + timeout $TIMEOUT bash -c insert3 & +done + +timeout $TIMEOUT bash -c select1 & +timeout $TIMEOUT bash -c select2 & +timeout $TIMEOUT bash -c flush1 & + +wait + +${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH ASYNC INSERT QUEUE" +${CLICKHOUSE_CLIENT} -q "SELECT count() FROM system.asynchronous_inserts" +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS async_inserts"; From 05a90a2e971ae7538ed72e1a3db02523c91b67d8 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 17 May 2023 12:19:00 +0000 Subject: [PATCH 030/478] fix tests --- tests/queries/0_stateless/01271_show_privileges.reference | 1 + .../0_stateless/02117_show_create_table_system.reference | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/01271_show_privileges.reference b/tests/queries/0_stateless/01271_show_privileges.reference index ec245d8b9e0..eb8b912f03b 100644 --- a/tests/queries/0_stateless/01271_show_privileges.reference +++ b/tests/queries/0_stateless/01271_show_privileges.reference @@ -133,6 +133,7 @@ SYSTEM SYNC TRANSACTION LOG ['SYNC TRANSACTION LOG'] GLOBAL SYSTEM SYSTEM SYNC FILE CACHE ['SYNC FILE CACHE'] GLOBAL SYSTEM SYSTEM FLUSH DISTRIBUTED ['FLUSH DISTRIBUTED'] TABLE SYSTEM FLUSH SYSTEM FLUSH LOGS ['FLUSH LOGS'] GLOBAL SYSTEM FLUSH +SYSTEM FLUSH ASYNC INSERT QUEUE ['FLUSH ASYNC INSERT QUEUE'] GLOBAL SYSTEM FLUSH SYSTEM FLUSH [] \N SYSTEM SYSTEM THREAD FUZZER ['SYSTEM START THREAD FUZZER','SYSTEM STOP THREAD FUZZER','START THREAD FUZZER','STOP THREAD FUZZER'] GLOBAL SYSTEM SYSTEM UNFREEZE ['SYSTEM UNFREEZE'] GLOBAL SYSTEM diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index 09cc62dac00..85cdc278892 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -297,7 +297,7 @@ CREATE TABLE system.grants ( `user_name` Nullable(String), `role_name` Nullable(String), - `access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION CONTROL' = 96, 'SYSTEM SHUTDOWN' = 97, 'SYSTEM DROP DNS CACHE' = 98, 'SYSTEM DROP MARK CACHE' = 99, 'SYSTEM DROP UNCOMPRESSED CACHE' = 100, 'SYSTEM DROP MMAP CACHE' = 101, 'SYSTEM DROP QUERY CACHE' = 102, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 103, 'SYSTEM DROP FILESYSTEM CACHE' = 104, 'SYSTEM DROP SCHEMA CACHE' = 105, 'SYSTEM DROP S3 CLIENT CACHE' = 106, 'SYSTEM DROP CACHE' = 107, 'SYSTEM RELOAD CONFIG' = 108, 'SYSTEM RELOAD USERS' = 109, 'SYSTEM RELOAD SYMBOLS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM DISTRIBUTED SENDS' = 121, 'SYSTEM REPLICATED SENDS' = 122, 'SYSTEM SENDS' = 123, 'SYSTEM REPLICATION QUEUES' = 124, 'SYSTEM DROP REPLICA' = 125, 'SYSTEM SYNC REPLICA' = 126, 'SYSTEM RESTART REPLICA' = 127, 'SYSTEM RESTORE REPLICA' = 128, 'SYSTEM WAIT LOADING PARTS' = 129, 'SYSTEM SYNC DATABASE REPLICA' = 130, 'SYSTEM SYNC TRANSACTION LOG' = 131, 'SYSTEM SYNC FILE CACHE' = 132, 'SYSTEM FLUSH DISTRIBUTED' = 133, 'SYSTEM FLUSH LOGS' = 134, 'SYSTEM FLUSH' = 135, 'SYSTEM THREAD FUZZER' = 136, 'SYSTEM UNFREEZE' = 137, 'SYSTEM FAILPOINT' = 138, 'SYSTEM' = 139, 'dictGet' = 140, 'displaySecretsInShowAndSelect' = 141, 'addressToLine' = 142, 'addressToLineWithInlines' = 143, 'addressToSymbol' = 144, 'demangle' = 145, 'INTROSPECTION' = 146, 'FILE' = 147, 'URL' = 148, 'REMOTE' = 149, 'MONGO' = 150, 'MEILISEARCH' = 151, 'MYSQL' = 152, 'POSTGRES' = 153, 'SQLITE' = 154, 'ODBC' = 155, 'JDBC' = 156, 'HDFS' = 157, 'S3' = 158, 'HIVE' = 159, 'SOURCES' = 160, 'CLUSTER' = 161, 'ALL' = 162, 'NONE' = 163), + `access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION CONTROL' = 96, 'SYSTEM SHUTDOWN' = 97, 'SYSTEM DROP DNS CACHE' = 98, 'SYSTEM DROP MARK CACHE' = 99, 'SYSTEM DROP UNCOMPRESSED CACHE' = 100, 'SYSTEM DROP MMAP CACHE' = 101, 'SYSTEM DROP QUERY CACHE' = 102, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 103, 'SYSTEM DROP FILESYSTEM CACHE' = 104, 'SYSTEM DROP SCHEMA CACHE' = 105, 'SYSTEM DROP S3 CLIENT CACHE' = 106, 'SYSTEM DROP CACHE' = 107, 'SYSTEM RELOAD CONFIG' = 108, 'SYSTEM RELOAD USERS' = 109, 'SYSTEM RELOAD SYMBOLS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM DISTRIBUTED SENDS' = 121, 'SYSTEM REPLICATED SENDS' = 122, 'SYSTEM SENDS' = 123, 'SYSTEM REPLICATION QUEUES' = 124, 'SYSTEM DROP REPLICA' = 125, 'SYSTEM SYNC REPLICA' = 126, 'SYSTEM RESTART REPLICA' = 127, 'SYSTEM RESTORE REPLICA' = 128, 'SYSTEM WAIT LOADING PARTS' = 129, 'SYSTEM SYNC DATABASE REPLICA' = 130, 'SYSTEM SYNC TRANSACTION LOG' = 131, 'SYSTEM SYNC FILE CACHE' = 132, 'SYSTEM FLUSH DISTRIBUTED' = 133, 'SYSTEM FLUSH LOGS' = 134, 'SYSTEM FLUSH ASYNC INSERT QUEUE' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM' = 140, 'dictGet' = 141, 'displaySecretsInShowAndSelect' = 142, 'addressToLine' = 143, 'addressToLineWithInlines' = 144, 'addressToSymbol' = 145, 'demangle' = 146, 'INTROSPECTION' = 147, 'FILE' = 148, 'URL' = 149, 'REMOTE' = 150, 'MONGO' = 151, 'MEILISEARCH' = 152, 'MYSQL' = 153, 'POSTGRES' = 154, 'SQLITE' = 155, 'ODBC' = 156, 'JDBC' = 157, 'HDFS' = 158, 'S3' = 159, 'HIVE' = 160, 'SOURCES' = 161, 'CLUSTER' = 162, 'ALL' = 163, 'NONE' = 164), `database` Nullable(String), `table` Nullable(String), `column` Nullable(String), @@ -581,10 +581,10 @@ ENGINE = SystemPartsColumns COMMENT 'SYSTEM TABLE is built on the fly.' CREATE TABLE system.privileges ( - `privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION CONTROL' = 96, 'SYSTEM SHUTDOWN' = 97, 'SYSTEM DROP DNS CACHE' = 98, 'SYSTEM DROP MARK CACHE' = 99, 'SYSTEM DROP UNCOMPRESSED CACHE' = 100, 'SYSTEM DROP MMAP CACHE' = 101, 'SYSTEM DROP QUERY CACHE' = 102, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 103, 'SYSTEM DROP FILESYSTEM CACHE' = 104, 'SYSTEM DROP SCHEMA CACHE' = 105, 'SYSTEM DROP S3 CLIENT CACHE' = 106, 'SYSTEM DROP CACHE' = 107, 'SYSTEM RELOAD CONFIG' = 108, 'SYSTEM RELOAD USERS' = 109, 'SYSTEM RELOAD SYMBOLS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM DISTRIBUTED SENDS' = 121, 'SYSTEM REPLICATED SENDS' = 122, 'SYSTEM SENDS' = 123, 'SYSTEM REPLICATION QUEUES' = 124, 'SYSTEM DROP REPLICA' = 125, 'SYSTEM SYNC REPLICA' = 126, 'SYSTEM RESTART REPLICA' = 127, 'SYSTEM RESTORE REPLICA' = 128, 'SYSTEM WAIT LOADING PARTS' = 129, 'SYSTEM SYNC DATABASE REPLICA' = 130, 'SYSTEM SYNC TRANSACTION LOG' = 131, 'SYSTEM SYNC FILE CACHE' = 132, 'SYSTEM FLUSH DISTRIBUTED' = 133, 'SYSTEM FLUSH LOGS' = 134, 'SYSTEM FLUSH' = 135, 'SYSTEM THREAD FUZZER' = 136, 'SYSTEM UNFREEZE' = 137, 'SYSTEM FAILPOINT' = 138, 'SYSTEM' = 139, 'dictGet' = 140, 'displaySecretsInShowAndSelect' = 141, 'addressToLine' = 142, 'addressToLineWithInlines' = 143, 'addressToSymbol' = 144, 'demangle' = 145, 'INTROSPECTION' = 146, 'FILE' = 147, 'URL' = 148, 'REMOTE' = 149, 'MONGO' = 150, 'MEILISEARCH' = 151, 'MYSQL' = 152, 'POSTGRES' = 153, 'SQLITE' = 154, 'ODBC' = 155, 'JDBC' = 156, 'HDFS' = 157, 'S3' = 158, 'HIVE' = 159, 'SOURCES' = 160, 'CLUSTER' = 161, 'ALL' = 162, 'NONE' = 163), + `privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION CONTROL' = 96, 'SYSTEM SHUTDOWN' = 97, 'SYSTEM DROP DNS CACHE' = 98, 'SYSTEM DROP MARK CACHE' = 99, 'SYSTEM DROP UNCOMPRESSED CACHE' = 100, 'SYSTEM DROP MMAP CACHE' = 101, 'SYSTEM DROP QUERY CACHE' = 102, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 103, 'SYSTEM DROP FILESYSTEM CACHE' = 104, 'SYSTEM DROP SCHEMA CACHE' = 105, 'SYSTEM DROP S3 CLIENT CACHE' = 106, 'SYSTEM DROP CACHE' = 107, 'SYSTEM RELOAD CONFIG' = 108, 'SYSTEM RELOAD USERS' = 109, 'SYSTEM RELOAD SYMBOLS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM DISTRIBUTED SENDS' = 121, 'SYSTEM REPLICATED SENDS' = 122, 'SYSTEM SENDS' = 123, 'SYSTEM REPLICATION QUEUES' = 124, 'SYSTEM DROP REPLICA' = 125, 'SYSTEM SYNC REPLICA' = 126, 'SYSTEM RESTART REPLICA' = 127, 'SYSTEM RESTORE REPLICA' = 128, 'SYSTEM WAIT LOADING PARTS' = 129, 'SYSTEM SYNC DATABASE REPLICA' = 130, 'SYSTEM SYNC TRANSACTION LOG' = 131, 'SYSTEM SYNC FILE CACHE' = 132, 'SYSTEM FLUSH DISTRIBUTED' = 133, 'SYSTEM FLUSH LOGS' = 134, 'SYSTEM FLUSH ASYNC INSERT QUEUE' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM' = 140, 'dictGet' = 141, 'displaySecretsInShowAndSelect' = 142, 'addressToLine' = 143, 'addressToLineWithInlines' = 144, 'addressToSymbol' = 145, 'demangle' = 146, 'INTROSPECTION' = 147, 'FILE' = 148, 'URL' = 149, 'REMOTE' = 150, 'MONGO' = 151, 'MEILISEARCH' = 152, 'MYSQL' = 153, 'POSTGRES' = 154, 'SQLITE' = 155, 'ODBC' = 156, 'JDBC' = 157, 'HDFS' = 158, 'S3' = 159, 'HIVE' = 160, 'SOURCES' = 161, 'CLUSTER' = 162, 'ALL' = 163, 'NONE' = 164), `aliases` Array(String), `level` Nullable(Enum8('GLOBAL' = 0, 'DATABASE' = 1, 'TABLE' = 2, 'DICTIONARY' = 3, 'VIEW' = 4, 'COLUMN' = 5, 'NAMED_COLLECTION' = 6)), - `parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION CONTROL' = 96, 'SYSTEM SHUTDOWN' = 97, 'SYSTEM DROP DNS CACHE' = 98, 'SYSTEM DROP MARK CACHE' = 99, 'SYSTEM DROP UNCOMPRESSED CACHE' = 100, 'SYSTEM DROP MMAP CACHE' = 101, 'SYSTEM DROP QUERY CACHE' = 102, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 103, 'SYSTEM DROP FILESYSTEM CACHE' = 104, 'SYSTEM DROP SCHEMA CACHE' = 105, 'SYSTEM DROP S3 CLIENT CACHE' = 106, 'SYSTEM DROP CACHE' = 107, 'SYSTEM RELOAD CONFIG' = 108, 'SYSTEM RELOAD USERS' = 109, 'SYSTEM RELOAD SYMBOLS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM DISTRIBUTED SENDS' = 121, 'SYSTEM REPLICATED SENDS' = 122, 'SYSTEM SENDS' = 123, 'SYSTEM REPLICATION QUEUES' = 124, 'SYSTEM DROP REPLICA' = 125, 'SYSTEM SYNC REPLICA' = 126, 'SYSTEM RESTART REPLICA' = 127, 'SYSTEM RESTORE REPLICA' = 128, 'SYSTEM WAIT LOADING PARTS' = 129, 'SYSTEM SYNC DATABASE REPLICA' = 130, 'SYSTEM SYNC TRANSACTION LOG' = 131, 'SYSTEM SYNC FILE CACHE' = 132, 'SYSTEM FLUSH DISTRIBUTED' = 133, 'SYSTEM FLUSH LOGS' = 134, 'SYSTEM FLUSH' = 135, 'SYSTEM THREAD FUZZER' = 136, 'SYSTEM UNFREEZE' = 137, 'SYSTEM FAILPOINT' = 138, 'SYSTEM' = 139, 'dictGet' = 140, 'displaySecretsInShowAndSelect' = 141, 'addressToLine' = 142, 'addressToLineWithInlines' = 143, 'addressToSymbol' = 144, 'demangle' = 145, 'INTROSPECTION' = 146, 'FILE' = 147, 'URL' = 148, 'REMOTE' = 149, 'MONGO' = 150, 'MEILISEARCH' = 151, 'MYSQL' = 152, 'POSTGRES' = 153, 'SQLITE' = 154, 'ODBC' = 155, 'JDBC' = 156, 'HDFS' = 157, 'S3' = 158, 'HIVE' = 159, 'SOURCES' = 160, 'CLUSTER' = 161, 'ALL' = 162, 'NONE' = 163)) + `parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION CONTROL' = 96, 'SYSTEM SHUTDOWN' = 97, 'SYSTEM DROP DNS CACHE' = 98, 'SYSTEM DROP MARK CACHE' = 99, 'SYSTEM DROP UNCOMPRESSED CACHE' = 100, 'SYSTEM DROP MMAP CACHE' = 101, 'SYSTEM DROP QUERY CACHE' = 102, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 103, 'SYSTEM DROP FILESYSTEM CACHE' = 104, 'SYSTEM DROP SCHEMA CACHE' = 105, 'SYSTEM DROP S3 CLIENT CACHE' = 106, 'SYSTEM DROP CACHE' = 107, 'SYSTEM RELOAD CONFIG' = 108, 'SYSTEM RELOAD USERS' = 109, 'SYSTEM RELOAD SYMBOLS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM DISTRIBUTED SENDS' = 121, 'SYSTEM REPLICATED SENDS' = 122, 'SYSTEM SENDS' = 123, 'SYSTEM REPLICATION QUEUES' = 124, 'SYSTEM DROP REPLICA' = 125, 'SYSTEM SYNC REPLICA' = 126, 'SYSTEM RESTART REPLICA' = 127, 'SYSTEM RESTORE REPLICA' = 128, 'SYSTEM WAIT LOADING PARTS' = 129, 'SYSTEM SYNC DATABASE REPLICA' = 130, 'SYSTEM SYNC TRANSACTION LOG' = 131, 'SYSTEM SYNC FILE CACHE' = 132, 'SYSTEM FLUSH DISTRIBUTED' = 133, 'SYSTEM FLUSH LOGS' = 134, 'SYSTEM FLUSH ASYNC INSERT QUEUE' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM' = 140, 'dictGet' = 141, 'displaySecretsInShowAndSelect' = 142, 'addressToLine' = 143, 'addressToLineWithInlines' = 144, 'addressToSymbol' = 145, 'demangle' = 146, 'INTROSPECTION' = 147, 'FILE' = 148, 'URL' = 149, 'REMOTE' = 150, 'MONGO' = 151, 'MEILISEARCH' = 152, 'MYSQL' = 153, 'POSTGRES' = 154, 'SQLITE' = 155, 'ODBC' = 156, 'JDBC' = 157, 'HDFS' = 158, 'S3' = 159, 'HIVE' = 160, 'SOURCES' = 161, 'CLUSTER' = 162, 'ALL' = 163, 'NONE' = 164)) ) ENGINE = SystemPrivileges COMMENT 'SYSTEM TABLE is built on the fly.' From 616904cd790473ca8075a8175a6334dd837b5bca Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Tue, 23 May 2023 15:50:52 +0000 Subject: [PATCH 031/478] Add encryptConfig() --- src/Common/Config/ConfigProcessor.cpp | 34 +++++++++++++++++++++++++++ src/Common/Config/ConfigProcessor.h | 5 ++++ src/Common/Config/ConfigReloader.cpp | 1 + src/Daemon/BaseDaemon.cpp | 1 + 4 files changed, 41 insertions(+) diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp index 5bbc8eae0de..76e4ea1ebd1 100644 --- a/src/Common/Config/ConfigProcessor.cpp +++ b/src/Common/Config/ConfigProcessor.cpp @@ -171,6 +171,33 @@ static void mergeAttributes(Element & config_element, Element & with_element) with_element_attributes->release(); } +void ConfigProcessor::encryptRecursive(Poco::XML::Node * config_root) +{ + for (Node * node = config_root->firstChild(); node;) + { + if (node->nodeType() == Node::ELEMENT_NODE) + { + // NamedNodeMapPtr attributes = node->attributes(); + Element & element = dynamic_cast(*node); + if (element.hasAttribute("enc_codec")) + { + LOG_DEBUG(log, "Encrypted node {} value '{}'.", node->nodeName(), element.getNodeValue()); + // for (Node * child_node = node->firstChild(); child_node;) + // { + // LOG_DEBUG(log, " Child node {} value '{}'.", child_node->nodeName(), child_node->getNodeValue()); + // child_node = child_node->nextSibling(); + // } + Node * child_node = node->firstChild(); + child_node->setNodeValue("encrypted_" + child_node->getNodeValue() + "_encrypted"); + } + } + + encryptRecursive(node); + + node = node->nextSibling(); + } +} + void ConfigProcessor::mergeRecursive(XMLDocumentPtr config, Node * config_root, const Node * with_root) { const NodeListPtr with_nodes = with_root->childNodes(); @@ -700,6 +727,13 @@ ConfigProcessor::LoadedConfig ConfigProcessor::loadConfigWithZooKeeperIncludes( return LoadedConfig{configuration, has_zk_includes, !processed_successfully, config_xml, path}; } +void ConfigProcessor::encryptConfig(LoadedConfig & loaded_config) +{ + Node * config_root = getRootNode(loaded_config.preprocessed_xml.get()); + encryptRecursive(config_root); + loaded_config.configuration = new Poco::Util::XMLConfiguration(loaded_config.preprocessed_xml); +} + void ConfigProcessor::savePreprocessedConfig(const LoadedConfig & loaded_config, std::string preprocessed_dir) { try diff --git a/src/Common/Config/ConfigProcessor.h b/src/Common/Config/ConfigProcessor.h index 0ca3e46db88..2f0046bc39c 100644 --- a/src/Common/Config/ConfigProcessor.h +++ b/src/Common/Config/ConfigProcessor.h @@ -92,6 +92,9 @@ public: const zkutil::EventPtr & zk_changed_event, bool fallback_to_preprocessed = false); + /// Encrypt nodes in config with specified encryption attributes + void encryptConfig(LoadedConfig & loaded_config); + /// Save preprocessed config to specified directory. /// If preprocessed_dir is empty - calculate from loaded_config.path + /preprocessed_configs/ void savePreprocessedConfig(const LoadedConfig & loaded_config, std::string preprocessed_dir); @@ -124,6 +127,8 @@ private: using NodePtr = Poco::AutoPtr; + void encryptRecursive(Poco::XML::Node * config_root); + void mergeRecursive(XMLDocumentPtr config, Poco::XML::Node * config_root, const Poco::XML::Node * with_root); void merge(XMLDocumentPtr config, XMLDocumentPtr with); diff --git a/src/Common/Config/ConfigReloader.cpp b/src/Common/Config/ConfigReloader.cpp index de7011b67bf..896bd5949d9 100644 --- a/src/Common/Config/ConfigReloader.cpp +++ b/src/Common/Config/ConfigReloader.cpp @@ -130,6 +130,7 @@ void ConfigReloader::reloadIfNewer(bool force, bool throw_on_error, bool fallbac return; } config_processor.savePreprocessedConfig(loaded_config, preprocessed_dir); + config_processor.encryptConfig(loaded_config); /** We should remember last modification time if and only if config was successfully loaded * Otherwise a race condition could occur during config files update: diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index 4780dfed4b2..2634439ee14 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -663,6 +663,7 @@ void BaseDaemon::initialize(Application & self) umask(umask_num); DB::ConfigProcessor(config_path).savePreprocessedConfig(loaded_config, ""); + DB::ConfigProcessor(config_path).encryptConfig(loaded_config); /// Write core dump on crash. { From dd78008c9ec586a213e0e541b70dfe5055f7df0e Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Thu, 25 May 2023 09:36:41 +0000 Subject: [PATCH 032/478] Rename encryptConfig() into decryptConfig() --- src/Common/Config/ConfigProcessor.cpp | 12 ++++++------ src/Common/Config/ConfigProcessor.h | 6 +++--- src/Common/Config/ConfigReloader.cpp | 2 +- src/Daemon/BaseDaemon.cpp | 2 +- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp index 76e4ea1ebd1..3f9535205d8 100644 --- a/src/Common/Config/ConfigProcessor.cpp +++ b/src/Common/Config/ConfigProcessor.cpp @@ -171,7 +171,7 @@ static void mergeAttributes(Element & config_element, Element & with_element) with_element_attributes->release(); } -void ConfigProcessor::encryptRecursive(Poco::XML::Node * config_root) +void ConfigProcessor::decryptRecursive(Poco::XML::Node * config_root) { for (Node * node = config_root->firstChild(); node;) { @@ -179,7 +179,7 @@ void ConfigProcessor::encryptRecursive(Poco::XML::Node * config_root) { // NamedNodeMapPtr attributes = node->attributes(); Element & element = dynamic_cast(*node); - if (element.hasAttribute("enc_codec")) + if (element.hasAttribute("encryption_codec")) { LOG_DEBUG(log, "Encrypted node {} value '{}'.", node->nodeName(), element.getNodeValue()); // for (Node * child_node = node->firstChild(); child_node;) @@ -188,11 +188,11 @@ void ConfigProcessor::encryptRecursive(Poco::XML::Node * config_root) // child_node = child_node->nextSibling(); // } Node * child_node = node->firstChild(); - child_node->setNodeValue("encrypted_" + child_node->getNodeValue() + "_encrypted"); + child_node->setNodeValue("decrypted_" + child_node->getNodeValue() + "_decrypted"); } } - encryptRecursive(node); + decryptRecursive(node); node = node->nextSibling(); } @@ -727,10 +727,10 @@ ConfigProcessor::LoadedConfig ConfigProcessor::loadConfigWithZooKeeperIncludes( return LoadedConfig{configuration, has_zk_includes, !processed_successfully, config_xml, path}; } -void ConfigProcessor::encryptConfig(LoadedConfig & loaded_config) +void ConfigProcessor::decryptConfig(LoadedConfig & loaded_config) { Node * config_root = getRootNode(loaded_config.preprocessed_xml.get()); - encryptRecursive(config_root); + decryptRecursive(config_root); loaded_config.configuration = new Poco::Util::XMLConfiguration(loaded_config.preprocessed_xml); } diff --git a/src/Common/Config/ConfigProcessor.h b/src/Common/Config/ConfigProcessor.h index 2f0046bc39c..bc2f923f705 100644 --- a/src/Common/Config/ConfigProcessor.h +++ b/src/Common/Config/ConfigProcessor.h @@ -92,8 +92,8 @@ public: const zkutil::EventPtr & zk_changed_event, bool fallback_to_preprocessed = false); - /// Encrypt nodes in config with specified encryption attributes - void encryptConfig(LoadedConfig & loaded_config); + /// Decrypt nodes in config with specified encryption attributes + void decryptConfig(LoadedConfig & loaded_config); /// Save preprocessed config to specified directory. /// If preprocessed_dir is empty - calculate from loaded_config.path + /preprocessed_configs/ @@ -127,7 +127,7 @@ private: using NodePtr = Poco::AutoPtr; - void encryptRecursive(Poco::XML::Node * config_root); + void decryptRecursive(Poco::XML::Node * config_root); void mergeRecursive(XMLDocumentPtr config, Poco::XML::Node * config_root, const Poco::XML::Node * with_root); diff --git a/src/Common/Config/ConfigReloader.cpp b/src/Common/Config/ConfigReloader.cpp index 896bd5949d9..a4d2cb3d305 100644 --- a/src/Common/Config/ConfigReloader.cpp +++ b/src/Common/Config/ConfigReloader.cpp @@ -130,7 +130,7 @@ void ConfigReloader::reloadIfNewer(bool force, bool throw_on_error, bool fallbac return; } config_processor.savePreprocessedConfig(loaded_config, preprocessed_dir); - config_processor.encryptConfig(loaded_config); + config_processor.decryptConfig(loaded_config); /** We should remember last modification time if and only if config was successfully loaded * Otherwise a race condition could occur during config files update: diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index 2634439ee14..4b1cd4e036e 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -663,7 +663,7 @@ void BaseDaemon::initialize(Application & self) umask(umask_num); DB::ConfigProcessor(config_path).savePreprocessedConfig(loaded_config, ""); - DB::ConfigProcessor(config_path).encryptConfig(loaded_config); + DB::ConfigProcessor(config_path).decryptConfig(loaded_config); /// Write core dump on crash. { From 5f73681b00fb1a13873c9a8e6b07c7f57c335668 Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Thu, 25 May 2023 15:51:20 +0000 Subject: [PATCH 033/478] Make working note descryption --- src/Common/Config/ConfigProcessor.cpp | 44 +++++++++++++++++++++++++-- src/Common/Config/ConfigProcessor.h | 2 +- utils/config-processor/CMakeLists.txt | 3 +- 3 files changed, 44 insertions(+), 5 deletions(-) diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp index 3f9535205d8..fdfc6343876 100644 --- a/src/Common/Config/ConfigProcessor.cpp +++ b/src/Common/Config/ConfigProcessor.cpp @@ -26,6 +26,10 @@ #include #include #include +#include +#include +#include +#include #define PREPROCESSED_SUFFIX "-preprocessed" @@ -181,14 +185,47 @@ void ConfigProcessor::decryptRecursive(Poco::XML::Node * config_root) Element & element = dynamic_cast(*node); if (element.hasAttribute("encryption_codec")) { - LOG_DEBUG(log, "Encrypted node {} value '{}'.", node->nodeName(), element.getNodeValue()); + LOG_DEBUG(log, "Encrypted node <{}>", node->nodeName()); // for (Node * child_node = node->firstChild(); child_node;) // { // LOG_DEBUG(log, " Child node {} value '{}'.", child_node->nodeName(), child_node->getNodeValue()); // child_node = child_node->nextSibling(); // } - Node * child_node = node->firstChild(); - child_node->setNodeValue("decrypted_" + child_node->getNodeValue() + "_decrypted"); + + Node * text_node = node->firstChild(); + auto codec_128 = DB::CompressionCodecEncrypted(DB::AES_128_GCM_SIV); + // DB::CompressionCodecEncrypted::Configuration::instance().tryLoad(*config, ""); + + /* + DB::Memory<> memory1; + std::string password="abcd"; + memory1.resize(password.size() + codec_128.getAdditionalSizeAtTheEndOfBuffer() + codec_128.getHeaderSize()+100); + auto bytes_written = codec_128.compress(password.data(), static_cast(password.size()), memory1.data()); + // std::string encrypted_password = std::string(memory1.data(), memory1.size()); + std::string encrypted_password = std::string(memory1.data(), bytes_written); + std::string password_hex; + boost::algorithm::hex(encrypted_password.begin(), encrypted_password.end(), std::back_inserter(password_hex)); + LOG_DEBUG(log, "Encrypted password: '{}'.", password_hex); + */ + + DB::Memory<> memory; + std::string encrypted_value; + + try + { + boost::algorithm::unhex(text_node->getNodeValue(), std::back_inserter(encrypted_value)); + // boost::algorithm::unhex(password_hex, std::back_inserter(encrypted_value)); + } + catch (const std::exception &) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot read encrypted text for {}, check for valid characters [0-9a-fA-F] and length", node->nodeName()); + } + + memory.resize(codec_128.readDecompressedBlockSize(encrypted_value.data()) + codec_128.getAdditionalSizeAtTheEndOfBuffer()); + codec_128.decompress(encrypted_value.data(), static_cast(encrypted_value.size()), memory.data()); + std::string decrypted_value = std::string(memory.data(), memory.size()); + LOG_DEBUG(log, "Decrypted value '{}'", decrypted_value); + text_node->setNodeValue(decrypted_value); } } @@ -729,6 +766,7 @@ ConfigProcessor::LoadedConfig ConfigProcessor::loadConfigWithZooKeeperIncludes( void ConfigProcessor::decryptConfig(LoadedConfig & loaded_config) { + DB::CompressionCodecEncrypted::Configuration::instance().tryLoad(*loaded_config.configuration, "encryption_codecs"); Node * config_root = getRootNode(loaded_config.preprocessed_xml.get()); decryptRecursive(config_root); loaded_config.configuration = new Poco::Util::XMLConfiguration(loaded_config.preprocessed_xml); diff --git a/src/Common/Config/ConfigProcessor.h b/src/Common/Config/ConfigProcessor.h index bc2f923f705..479a0053efa 100644 --- a/src/Common/Config/ConfigProcessor.h +++ b/src/Common/Config/ConfigProcessor.h @@ -92,7 +92,7 @@ public: const zkutil::EventPtr & zk_changed_event, bool fallback_to_preprocessed = false); - /// Decrypt nodes in config with specified encryption attributes + /// crypt nodes in config with specified encryption attributes void decryptConfig(LoadedConfig & loaded_config); /// Save preprocessed config to specified directory. diff --git a/utils/config-processor/CMakeLists.txt b/utils/config-processor/CMakeLists.txt index 53b6163ba87..00cbfbba659 100644 --- a/utils/config-processor/CMakeLists.txt +++ b/utils/config-processor/CMakeLists.txt @@ -1,2 +1,3 @@ clickhouse_add_executable (config-processor config-processor.cpp) -target_link_libraries(config-processor PRIVATE clickhouse_common_config_no_zookeeper_log) +target_link_libraries(config-processor PRIVATE dbms clickhouse_common_config_no_zookeeper_log) +target_link_libraries(config-processor PUBLIC clickhouse_parsers clickhouse_common_io common ch_contrib::lz4) From cd8eb44f0c54945f4777ed3e50e08b057ee41f43 Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Wed, 31 May 2023 14:03:11 +0000 Subject: [PATCH 034/478] Add encryptValue(), decryptValue() and exceptions --- src/Common/Config/ConfigProcessor.cpp | 94 ++++++++++++++++----------- src/Common/Config/ConfigProcessor.h | 8 ++- 2 files changed, 63 insertions(+), 39 deletions(-) diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp index fdfc6343876..b6db53018f4 100644 --- a/src/Common/Config/ConfigProcessor.cpp +++ b/src/Common/Config/ConfigProcessor.cpp @@ -46,6 +46,17 @@ namespace ErrorCodes extern const int CANNOT_LOAD_CONFIG; } +/// Get method for string name. Throw exception for wrong name +EncryptionMethod getEncryptionMethod(const std::string & name) +{ + if (name == "AES_128_GCM_SIV") + return AES_128_GCM_SIV; + else if (name == "AES_256_GCM_SIV") + return AES_256_GCM_SIV; + else + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong encryption Method. Got {}", name); +} + /// For cutting preprocessed path to this base static std::string main_config_path; @@ -175,57 +186,64 @@ static void mergeAttributes(Element & config_element, Element & with_element) with_element_attributes->release(); } +std::string ConfigProcessor::encryptValue(const std::string & codec_name, const std::string & value) +{ + auto codec = DB::CompressionCodecEncrypted(getEncryptionMethod(codec_name)); + + DB::Memory<> memory1; + memory1.resize(value.size() + codec.getAdditionalSizeAtTheEndOfBuffer() + codec.getHeaderSize()+100); + auto bytes_written = codec.compress(value.data(), static_cast(value.size()), memory1.data()); + std::string encrypted_value = std::string(memory1.data(), bytes_written); + std::string hex_value; + boost::algorithm::hex(encrypted_value.begin(), encrypted_value.end(), std::back_inserter(hex_value)); + LOG_DEBUG(log, "Encrypted value: '{}'.", hex_value); + return hex_value; +} + +std::string ConfigProcessor::decryptValue(const std::string & codec_name, const std::string & value) +{ + auto codec = DB::CompressionCodecEncrypted(getEncryptionMethod(codec_name)); + + DB::Memory<> memory; + std::string encrypted_value; + + try + { + boost::algorithm::unhex(value, std::back_inserter(encrypted_value)); + } + catch (const std::exception &) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot read encrypted text, check for valid characters [0-9a-fA-F] and length"); + } + + memory.resize(codec.readDecompressedBlockSize(encrypted_value.data()) + codec.getAdditionalSizeAtTheEndOfBuffer()); + codec.decompress(encrypted_value.data(), static_cast(encrypted_value.size()), memory.data()); + std::string decrypted_value = std::string(memory.data(), memory.size()); + LOG_DEBUG(log, "Decrypted value '{}'", decrypted_value); + return decrypted_value; +} + void ConfigProcessor::decryptRecursive(Poco::XML::Node * config_root) { for (Node * node = config_root->firstChild(); node;) { if (node->nodeType() == Node::ELEMENT_NODE) { - // NamedNodeMapPtr attributes = node->attributes(); Element & element = dynamic_cast(*node); if (element.hasAttribute("encryption_codec")) { LOG_DEBUG(log, "Encrypted node <{}>", node->nodeName()); - // for (Node * child_node = node->firstChild(); child_node;) - // { - // LOG_DEBUG(log, " Child node {} value '{}'.", child_node->nodeName(), child_node->getNodeValue()); - // child_node = child_node->nextSibling(); - // } + + const NodeListPtr children = element.childNodes(); + if (children->length() != 1) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Encrypted node {} should have only one text node", node->nodeName()); Node * text_node = node->firstChild(); - auto codec_128 = DB::CompressionCodecEncrypted(DB::AES_128_GCM_SIV); - // DB::CompressionCodecEncrypted::Configuration::instance().tryLoad(*config, ""); + if (text_node->nodeType() != Node::TEXT_NODE) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Encrypted node {} should have text node", node->nodeName()); - /* - DB::Memory<> memory1; - std::string password="abcd"; - memory1.resize(password.size() + codec_128.getAdditionalSizeAtTheEndOfBuffer() + codec_128.getHeaderSize()+100); - auto bytes_written = codec_128.compress(password.data(), static_cast(password.size()), memory1.data()); - // std::string encrypted_password = std::string(memory1.data(), memory1.size()); - std::string encrypted_password = std::string(memory1.data(), bytes_written); - std::string password_hex; - boost::algorithm::hex(encrypted_password.begin(), encrypted_password.end(), std::back_inserter(password_hex)); - LOG_DEBUG(log, "Encrypted password: '{}'.", password_hex); - */ - - DB::Memory<> memory; - std::string encrypted_value; - - try - { - boost::algorithm::unhex(text_node->getNodeValue(), std::back_inserter(encrypted_value)); - // boost::algorithm::unhex(password_hex, std::back_inserter(encrypted_value)); - } - catch (const std::exception &) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot read encrypted text for {}, check for valid characters [0-9a-fA-F] and length", node->nodeName()); - } - - memory.resize(codec_128.readDecompressedBlockSize(encrypted_value.data()) + codec_128.getAdditionalSizeAtTheEndOfBuffer()); - codec_128.decompress(encrypted_value.data(), static_cast(encrypted_value.size()), memory.data()); - std::string decrypted_value = std::string(memory.data(), memory.size()); - LOG_DEBUG(log, "Decrypted value '{}'", decrypted_value); - text_node->setNodeValue(decrypted_value); + auto encryption_codec = element.getAttribute("encryption_codec"); + text_node->setNodeValue(decryptValue(encryption_codec, text_node->getNodeValue())); } } diff --git a/src/Common/Config/ConfigProcessor.h b/src/Common/Config/ConfigProcessor.h index 479a0053efa..c9b227863f0 100644 --- a/src/Common/Config/ConfigProcessor.h +++ b/src/Common/Config/ConfigProcessor.h @@ -92,7 +92,13 @@ public: const zkutil::EventPtr & zk_changed_event, bool fallback_to_preprocessed = false); - /// crypt nodes in config with specified encryption attributes + /// Encrypt text value + std::string encryptValue(const std::string & codec_name, const std::string & value); + + /// Decrypt value + std::string decryptValue(const std::string & codec_name, const std::string & value); + + /// Decrypt nodes in config with specified encryption attributes void decryptConfig(LoadedConfig & loaded_config); /// Save preprocessed config to specified directory. From fd8c5992889728c76d231a4f96c577bc6578017d Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Wed, 31 May 2023 15:16:18 +0000 Subject: [PATCH 035/478] Add encrypt_decrypt example --- src/Common/examples/CMakeLists.txt | 3 ++ src/Common/examples/encrypt_decrypt.cpp | 50 +++++++++++++++++++++++++ utils/config-processor/CMakeLists.txt | 3 +- 3 files changed, 54 insertions(+), 2 deletions(-) create mode 100644 src/Common/examples/encrypt_decrypt.cpp diff --git a/src/Common/examples/CMakeLists.txt b/src/Common/examples/CMakeLists.txt index d095ab3a1be..12a2b59ff77 100644 --- a/src/Common/examples/CMakeLists.txt +++ b/src/Common/examples/CMakeLists.txt @@ -82,3 +82,6 @@ endif() clickhouse_add_executable (interval_tree interval_tree.cpp) target_link_libraries (interval_tree PRIVATE dbms) + +clickhouse_add_executable (encrypt_decrypt encrypt_decrypt.cpp) +target_link_libraries (encrypt_decrypt PRIVATE dbms) diff --git a/src/Common/examples/encrypt_decrypt.cpp b/src/Common/examples/encrypt_decrypt.cpp new file mode 100644 index 00000000000..cd48963c47a --- /dev/null +++ b/src/Common/examples/encrypt_decrypt.cpp @@ -0,0 +1,50 @@ +#include +#include +#include +#include + + +int main(int argc, char ** argv) +{ + try + { + if (argc != 5) + { + std::cerr << "usage: " << argv[0] << " path action codec value" << std::endl; + return 3; + } + + std::string action = argv[2]; + std::string codec_name = argv[3]; + std::string value = argv[4]; + DB::ConfigProcessor processor(argv[1], false, true); + + auto loaded_config = processor.loadConfig(); + + DB::CompressionCodecEncrypted::Configuration::instance().tryLoad(*loaded_config.configuration, "encryption_codecs"); + + if (action == "-e") + std::cout << processor.encryptValue(codec_name, value) << std::endl; + else if (action == "-d") + std::cout << processor.decryptValue(codec_name, value) << std::endl; + else + std::cerr << "Unknown action: " << action << std::endl; + } + catch (Poco::Exception & e) + { + std::cerr << "Exception: " << e.displayText() << std::endl; + return 1; + } + catch (std::exception & e) + { + std::cerr << "std::exception: " << e.what() << std::endl; + return 3; + } + catch (...) + { + std::cerr << "Some exception" << std::endl; + return 2; + } + + return 0; +} diff --git a/utils/config-processor/CMakeLists.txt b/utils/config-processor/CMakeLists.txt index 00cbfbba659..80c3535ef4e 100644 --- a/utils/config-processor/CMakeLists.txt +++ b/utils/config-processor/CMakeLists.txt @@ -1,3 +1,2 @@ clickhouse_add_executable (config-processor config-processor.cpp) -target_link_libraries(config-processor PRIVATE dbms clickhouse_common_config_no_zookeeper_log) -target_link_libraries(config-processor PUBLIC clickhouse_parsers clickhouse_common_io common ch_contrib::lz4) +target_link_libraries(config-processor PRIVATE dbms) From 0708caeb770e88a4805e084eeb01465c85fa45e2 Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Thu, 1 Jun 2023 09:01:01 +0000 Subject: [PATCH 036/478] Fix style --- src/Common/Config/ConfigProcessor.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp index b6db53018f4..055a497fb38 100644 --- a/src/Common/Config/ConfigProcessor.cpp +++ b/src/Common/Config/ConfigProcessor.cpp @@ -44,6 +44,7 @@ namespace ErrorCodes { extern const int FILE_DOESNT_EXIST; extern const int CANNOT_LOAD_CONFIG; + extern const int BAD_ARGUMENTS; } /// Get method for string name. Throw exception for wrong name From 2ccec017717e57b0eb1bdfb573f6f09e5201446d Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Thu, 1 Jun 2023 09:53:31 +0000 Subject: [PATCH 037/478] Set correct memory size for encrypt/decrypt --- src/Common/Config/ConfigProcessor.cpp | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp index 055a497fb38..99bea019c3b 100644 --- a/src/Common/Config/ConfigProcessor.cpp +++ b/src/Common/Config/ConfigProcessor.cpp @@ -191,13 +191,12 @@ std::string ConfigProcessor::encryptValue(const std::string & codec_name, const { auto codec = DB::CompressionCodecEncrypted(getEncryptionMethod(codec_name)); - DB::Memory<> memory1; - memory1.resize(value.size() + codec.getAdditionalSizeAtTheEndOfBuffer() + codec.getHeaderSize()+100); - auto bytes_written = codec.compress(value.data(), static_cast(value.size()), memory1.data()); - std::string encrypted_value = std::string(memory1.data(), bytes_written); + DB::Memory<> memory; + memory.resize(codec.getCompressedReserveSize(static_cast(value.size()))); + auto bytes_written = codec.compress(value.data(), static_cast(value.size()), memory.data()); + std::string encrypted_value = std::string(memory.data(), bytes_written); std::string hex_value; boost::algorithm::hex(encrypted_value.begin(), encrypted_value.end(), std::back_inserter(hex_value)); - LOG_DEBUG(log, "Encrypted value: '{}'.", hex_value); return hex_value; } @@ -217,10 +216,9 @@ std::string ConfigProcessor::decryptValue(const std::string & codec_name, const throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot read encrypted text, check for valid characters [0-9a-fA-F] and length"); } - memory.resize(codec.readDecompressedBlockSize(encrypted_value.data()) + codec.getAdditionalSizeAtTheEndOfBuffer()); + memory.resize(codec.readDecompressedBlockSize(encrypted_value.data())); codec.decompress(encrypted_value.data(), static_cast(encrypted_value.size()), memory.data()); std::string decrypted_value = std::string(memory.data(), memory.size()); - LOG_DEBUG(log, "Decrypted value '{}'", decrypted_value); return decrypted_value; } From d5add614daa2e6f7f0a18eaada22f5c43a057934 Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Thu, 1 Jun 2023 12:48:45 +0000 Subject: [PATCH 038/478] Add text memo for encrypt_decrypt --- src/Common/examples/encrypt_decrypt.cpp | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/Common/examples/encrypt_decrypt.cpp b/src/Common/examples/encrypt_decrypt.cpp index cd48963c47a..542e173deb9 100644 --- a/src/Common/examples/encrypt_decrypt.cpp +++ b/src/Common/examples/encrypt_decrypt.cpp @@ -3,6 +3,12 @@ #include #include +/** This test program encrypts or decrypts text values using AES_128_GCM_SIV or AES_256_GCM_SIV codecs. + * Keys for codecs are loaded from section of configuration file. + * + * How to use: + * ./encrypt_decrypt /etc/clickhouse-server/config.xml -e AES_128_GCM_SIV test + */ int main(int argc, char ** argv) { @@ -10,17 +16,22 @@ int main(int argc, char ** argv) { if (argc != 5) { - std::cerr << "usage: " << argv[0] << " path action codec value" << std::endl; + std::cerr << "Usage:" << std::endl + << " " << argv[0] << " path action codec value" << std::endl + << "path: path to configuration file." << std::endl + << "action: -e for encryption and -d for decryption." << std::endl + << "codec: AES_128_GCM_SIV or AES_256_GCM_SIV." << std::endl << std::endl + << "Example:" << std::endl + << " ./encrypt_decrypt /etc/clickhouse-server/config.xml -e AES_128_GCM_SIV test"; return 3; } std::string action = argv[2]; std::string codec_name = argv[3]; std::string value = argv[4]; + DB::ConfigProcessor processor(argv[1], false, true); - auto loaded_config = processor.loadConfig(); - DB::CompressionCodecEncrypted::Configuration::instance().tryLoad(*loaded_config.configuration, "encryption_codecs"); if (action == "-e") From e269235dbcf32f7e507370e1bff74a202a33446c Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Thu, 1 Jun 2023 13:09:21 +0000 Subject: [PATCH 039/478] Make decryptRecursive() go through element nodes only --- src/Common/Config/ConfigProcessor.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp index 99bea019c3b..df25a9a3825 100644 --- a/src/Common/Config/ConfigProcessor.cpp +++ b/src/Common/Config/ConfigProcessor.cpp @@ -231,8 +231,6 @@ void ConfigProcessor::decryptRecursive(Poco::XML::Node * config_root) Element & element = dynamic_cast(*node); if (element.hasAttribute("encryption_codec")) { - LOG_DEBUG(log, "Encrypted node <{}>", node->nodeName()); - const NodeListPtr children = element.childNodes(); if (children->length() != 1) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Encrypted node {} should have only one text node", node->nodeName()); @@ -244,10 +242,8 @@ void ConfigProcessor::decryptRecursive(Poco::XML::Node * config_root) auto encryption_codec = element.getAttribute("encryption_codec"); text_node->setNodeValue(decryptValue(encryption_codec, text_node->getNodeValue())); } + decryptRecursive(node); } - - decryptRecursive(node); - node = node->nextSibling(); } } From 88bf4e49d4767b6a3a3ccfcc383a42ca90ae12f1 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Thu, 1 Jun 2023 17:40:40 -0400 Subject: [PATCH 040/478] update for min_chunk_bytes_for_parallel_parsing --- docs/en/sql-reference/transactions.md | 53 +++++++++++++++++++-------- 1 file changed, 37 insertions(+), 16 deletions(-) diff --git a/docs/en/sql-reference/transactions.md b/docs/en/sql-reference/transactions.md index 68fbfe0b22a..1ca2db44b13 100644 --- a/docs/en/sql-reference/transactions.md +++ b/docs/en/sql-reference/transactions.md @@ -3,23 +3,44 @@ slug: /en/guides/developer/transactional --- # Transactional (ACID) support -INSERT into one partition* in one table* of MergeTree* family up to max_insert_block_size rows* is transactional (ACID): -- Atomic: INSERT is succeeded or rejected as a whole: if confirmation is sent to the client, all rows INSERTed; if error is sent to the client, no rows INSERTed. +## Case 1: INSERT into one partition, of one table, of the MergeTree* family + +This is transactional (ACID) if the number of rows inserted is less than or equal to `max_insert_block_size rows`, and in the case of data in TSV, TKSV, CSV, or JSONEachRow format if the number of bytes is less than `min_chunk_bytes_for_parallel_parsing`: +- Atomic: an INSERT succeeds or is rejected as a whole: if a confirmation is sent to the client, then all rows were inserted; if an error is sent to the client, then no rows were inserted. - Consistent: if there are no table constraints violated, then all rows in an INSERT are inserted and the INSERT succeeds; if constraints are violated, then no rows are inserted. -- Isolated: concurrent clients observe a consistent snapshot of the table–the state of the table either as if before INSERT or after successful INSERT; no partial state is seen; -- Durable: successful INSERT is written to the filesystem before answering to the client, on single replica or multiple replicas (controlled by the `insert_quorum` setting), and ClickHouse can ask the OS to sync the filesystem data on the storage media (controlled by the `fsync_after_insert` setting). -* If table has many partitions and INSERT covers many partitions–then insertion into every partition is transactional on its own; -* INSERT into multiple tables with one statement is possible if materialized views are involved; -* INSERT into Distributed table is not transactional as a whole, while insertion into every shard is transactional; -* another example: insert into Buffer tables is neither atomic nor isolated or consistent or durable; -* atomicity is ensured even if `async_insert` is enabled, but it can be turned off by the wait_for_async_insert setting; -* max_insert_block_size is 1 000 000 by default and can be adjusted as needed; -* if client did not receive the answer from the server, the client does not know if transaction succeeded, and it can repeat the transaction, using exactly-once insertion properties; -* ClickHouse is using MVCC with snapshot isolation internally; -* all ACID properties are valid even in case of server kill / crash; -* either insert_quorum into different AZ or fsync should be enabled to ensure durable inserts in typical setup; -* "consistency" in ACID terms does not cover the semantics of distributed systems, see https://jepsen.io/consistency which is controlled by different settings (select_sequential_consistency) -* this explanation does not cover a new transactions feature that allow to have full-featured transactions over multiple tables, materialized views, for multiple SELECTs, etc. +- Isolated: concurrent clients observe a consistent snapshot of the table–the state of the table either as it was before the INSERT attempt, or after the successful INSERT; no partial state is seen +- Durable: a successful INSERT is written to the filesystem before answering to the client, on a single replica or multiple replicas (controlled by the `insert_quorum` setting), and ClickHouse can ask the OS to sync the filesystem data on the storage media (controlled by the `fsync_after_insert` setting). +- INSERT into multiple tables with one statement is possible if materialized views are involved (the INSERT from the client is to a table which has associate materialized views). + +## Case 2: INSERT into multiple partitions, of one table, of the MergeTree* family + +Same as Case 1 above, with this detail: +- If table has many partitions and INSERT covers many partitions–then insertion into every partition is transactional on its own + + +## Case 3: INSERT into one distributed table of the MergeTree* family + +Same as Case 1 above, with this detail: +- INSERT into Distributed table is not transactional as a whole, while insertion into every shard is transactional + +## Case 4: Using a Buffer table + +- insert into Buffer tables is neither atomic nor isolated nor consistent nor durable + +## Case 5: Using async_insert + +Same as Case 1 above, with this detail: +- atomicity is ensured even if `async_insert` is enabled and `wait_for_async_insert` is set to 1 (the default), but if `wait_for_async_insert` is set to 0, then atomicity is not ensured. + +## Notes +- `max_insert_block_size` is 1 000 000 by default and can be adjusted as needed +- `min_chunk_bytes_for_parallel_parsing` is 1 000 000 by default and can be adjusted as needed +- if the client did not receive an answer from the server, the client does not know if the transaction succeeded, and it can repeat the transaction, using exactly-once insertion properties +- ClickHouse is using MVCC with snapshot isolation internally +- all ACID properties are valid even in the case of server kill/crash +- either insert_quorum into different AZ or fsync should be enabled to ensure durable inserts in the typical setup +- "consistency" in ACID terms does not cover the semantics of distributed systems, see https://jepsen.io/consistency which is controlled by different settings (select_sequential_consistency) +- this explanation does not cover a new transactions feature that allow to have full-featured transactions over multiple tables, materialized views, for multiple SELECTs, etc. (see the next section on Transactions, Commit, and Rollback). ## Transactions, Commit, and Rollback From d868e35863c3a80c9924b347ac017e9e93c33ba2 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Thu, 1 Jun 2023 19:08:44 -0400 Subject: [PATCH 041/478] update spelling list --- utils/check-style/aspell-ignore/en/aspell-dict.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index ded7a4643a9..0787ead76cf 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -169,6 +169,7 @@ SelfManaged Stateful Submodules Subqueries +TKSV TSVRaw TSan TabItem From d316add2f1f6ffa9cf6f2a1107a4d7d69960c72a Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Tue, 6 Jun 2023 15:24:29 +0000 Subject: [PATCH 042/478] Add integration test test_config_decryption --- .../test_config_decryption/__init__.py | 0 .../test_config_decryption/configs/config.xml | 12 +++++++ .../test_config_decryption/test.py | 31 +++++++++++++++++++ 3 files changed, 43 insertions(+) create mode 100644 tests/integration/test_config_decryption/__init__.py create mode 100644 tests/integration/test_config_decryption/configs/config.xml create mode 100644 tests/integration/test_config_decryption/test.py diff --git a/tests/integration/test_config_decryption/__init__.py b/tests/integration/test_config_decryption/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_config_decryption/configs/config.xml b/tests/integration/test_config_decryption/configs/config.xml new file mode 100644 index 00000000000..5c274128e39 --- /dev/null +++ b/tests/integration/test_config_decryption/configs/config.xml @@ -0,0 +1,12 @@ + + + + 00112233445566778899aabbccddeeff + + + 00112233445566778899aabbccddeeff00112233445566778899aabbccddeeff + + + 96260000000B0000000000E8FE3C087CED2205A5071078B29FD5C3B97F824911DED3217E980C + 97260000000B0000000000BFFF70C4DA718754C1DA0E2F25FF9246D4783F7FFEC4089EC1CC14 + diff --git a/tests/integration/test_config_decryption/test.py b/tests/integration/test_config_decryption/test.py new file mode 100644 index 00000000000..a3cb1bb57f3 --- /dev/null +++ b/tests/integration/test_config_decryption/test.py @@ -0,0 +1,31 @@ +import pytest +import os +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance("node", main_configs=["configs/config.xml"]) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + +def test_successful_decryption(started_cluster): + assert ( + node.query( + "select value from system.server_settings where name ='max_table_size_to_drop'" + ) + == "60000000000\n" + ) + assert ( + node.query( + "select value from system.server_settings where name ='max_partition_size_to_drop'" + ) + == "40000000000\n" + ) From 9cd0d5e6db0ce68ffdc320e2f73d17531fb54ec0 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 7 Jun 2023 11:49:41 +0000 Subject: [PATCH 043/478] move settings to server_settings --- programs/server/Server.cpp | 9 +++------ src/Core/ServerSettings.h | 2 ++ 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index a64676cfa01..5496720e5dc 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1438,15 +1438,12 @@ try /// This is needed to load proper values of background_pool_size etc. global_context->initializeBackgroundExecutorsIfNeeded(); - size_t async_insert_threads = config().getUInt("async_insert_threads", 16); - bool async_insert_queue_flush_on_shutdown = config().getBool("async_insert_queue_flush_on_shutdown", false); - - if (async_insert_threads) + if (server_settings.async_insert_threads) { global_context->setAsynchronousInsertQueue(std::make_shared( global_context, - async_insert_threads, - async_insert_queue_flush_on_shutdown)); + server_settings.async_insert_threads, + server_settings.async_insert_queue_flush_on_shutdown)); } size_t mark_cache_size = server_settings.mark_cache_size; diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h index 1a9f226041b..ca27cbdbf19 100644 --- a/src/Core/ServerSettings.h +++ b/src/Core/ServerSettings.h @@ -48,6 +48,8 @@ namespace DB M(UInt64, merges_mutations_memory_usage_soft_limit, 0, "Limit on total memory usage for merges and mutations. Zero means Unlimited.", 0) \ M(Double, merges_mutations_memory_usage_to_ram_ratio, 0.5, "Same as merges_mutations_memory_usage_soft_limit but in to ram ratio. Allows to lower memory limit on low-memory systems.", 0) \ M(Bool, allow_use_jemalloc_memory, true, "Allows to use jemalloc memory.", 0) \ + M(UInt64, async_insert_threads, 16, "Maximum number of threads to actually parse and insert data in background. Zero means asynchronous mode is disabled", 0) \ + M(Bool, async_insert_queue_flush_on_shutdown, true, "If true queue of asynchronous inserts is flushed on graceful shutdown", 0) \ \ M(UInt64, max_concurrent_queries, 0, "Limit on total number of concurrently executed queries. Zero means Unlimited.", 0) \ M(UInt64, max_concurrent_insert_queries, 0, "Limit on total number of concurrently insert queries. Zero means Unlimited.", 0) \ From ff1b069cdb9301f5c13bed69d70c484210693dea Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 7 Jun 2023 21:15:35 +0000 Subject: [PATCH 044/478] fix test --- tests/queries/0_stateless/02726_async_insert_flush_queue.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/02726_async_insert_flush_queue.sql b/tests/queries/0_stateless/02726_async_insert_flush_queue.sql index 33f40eef14e..98e78045b85 100644 --- a/tests/queries/0_stateless/02726_async_insert_flush_queue.sql +++ b/tests/queries/0_stateless/02726_async_insert_flush_queue.sql @@ -1,3 +1,5 @@ +-- Tags: no-parallel + DROP TABLE IF EXISTS t_async_inserts_flush; CREATE TABLE t_async_inserts_flush (a UInt64) ENGINE = Memory; From 60499164b3acc8663a10836233926c0f997ac381 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 8 Jun 2023 14:35:11 +0000 Subject: [PATCH 045/478] fix tests --- tests/queries/0_stateless/02726_async_insert_flush_stress.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02726_async_insert_flush_stress.sh b/tests/queries/0_stateless/02726_async_insert_flush_stress.sh index 4685e49b96d..5fafb773d16 100755 --- a/tests/queries/0_stateless/02726_async_insert_flush_stress.sh +++ b/tests/queries/0_stateless/02726_async_insert_flush_stress.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long +# Tags: long, no-parallel set -e From 1bce32c1cc1d9e2b0aeea93c01947646e18c52b3 Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Fri, 9 Jun 2023 15:18:46 +0000 Subject: [PATCH 046/478] Add tests for wrong settings --- .../configs/config_invalid_chars.xml | 12 +++++++ .../configs/config_no_encryption_codecs.xml | 4 +++ .../configs/config_subnodes.xml | 10 ++++++ .../configs/config_wrong_method.xml | 12 +++++++ .../test_wrong_settings.py | 34 +++++++++++++++++++ 5 files changed, 72 insertions(+) create mode 100644 tests/integration/test_config_decryption/configs/config_invalid_chars.xml create mode 100644 tests/integration/test_config_decryption/configs/config_no_encryption_codecs.xml create mode 100644 tests/integration/test_config_decryption/configs/config_subnodes.xml create mode 100644 tests/integration/test_config_decryption/configs/config_wrong_method.xml create mode 100644 tests/integration/test_config_decryption/test_wrong_settings.py diff --git a/tests/integration/test_config_decryption/configs/config_invalid_chars.xml b/tests/integration/test_config_decryption/configs/config_invalid_chars.xml new file mode 100644 index 00000000000..49bf51b5bad --- /dev/null +++ b/tests/integration/test_config_decryption/configs/config_invalid_chars.xml @@ -0,0 +1,12 @@ + + + + 00112233445566778899aabbccddeeff + + + 00112233445566778899aabbccddeeff00112233445566778899aabbccddeeff + + + --96260000000B0000000000E8FE3C087CED2205A5071078B29FD5C3B97F824911DED3217E980C + 97260000000B0000000000BFFF70C4DA718754C1DA0E2F25FF9246D4783F7FFEC4089EC1CC14 + diff --git a/tests/integration/test_config_decryption/configs/config_no_encryption_codecs.xml b/tests/integration/test_config_decryption/configs/config_no_encryption_codecs.xml new file mode 100644 index 00000000000..07bf69d17c8 --- /dev/null +++ b/tests/integration/test_config_decryption/configs/config_no_encryption_codecs.xml @@ -0,0 +1,4 @@ + + 96260000000B0000000000E8FE3C087CED2205A5071078B29FD5C3B97F824911DED3217E980C + 97260000000B0000000000BFFF70C4DA718754C1DA0E2F25FF9246D4783F7FFEC4089EC1CC14 + diff --git a/tests/integration/test_config_decryption/configs/config_subnodes.xml b/tests/integration/test_config_decryption/configs/config_subnodes.xml new file mode 100644 index 00000000000..b0e519ff546 --- /dev/null +++ b/tests/integration/test_config_decryption/configs/config_subnodes.xml @@ -0,0 +1,10 @@ + + + + 00112233445566778899aabbccddeeff + + + + 96260000000B0000000000E8FE3C087CED2205A5071078B29FD5C3B97F824911DED3217E980C + + diff --git a/tests/integration/test_config_decryption/configs/config_wrong_method.xml b/tests/integration/test_config_decryption/configs/config_wrong_method.xml new file mode 100644 index 00000000000..b452ce6374c --- /dev/null +++ b/tests/integration/test_config_decryption/configs/config_wrong_method.xml @@ -0,0 +1,12 @@ + + + + 00112233445566778899aabbccddeeff + + + 00112233445566778899aabbccddeeff00112233445566778899aabbccddeeff + + + 96260000000B0000000000E8FE3C087CED2205A5071078B29FD5C3B97F824911DED3217E980C + 97260000000B0000000000BFFF70C4DA718754C1DA0E2F25FF9246D4783F7FFEC4089EC1CC14 + diff --git a/tests/integration/test_config_decryption/test_wrong_settings.py b/tests/integration/test_config_decryption/test_wrong_settings.py new file mode 100644 index 00000000000..c01f5050b00 --- /dev/null +++ b/tests/integration/test_config_decryption/test_wrong_settings.py @@ -0,0 +1,34 @@ +import pytest +from helpers.cluster import ClickHouseCluster + + +def start_clickhouse(config, err_msg): + cluster = ClickHouseCluster(__file__) + node = cluster.add_instance("node", main_configs=[config]) + caught_exception = "" + try: + cluster.start() + except Exception as e: + caught_exception = str(e) + assert caught_exception.find(err_msg) != -1 + + +def test_wrong_method(): + start_clickhouse("configs/config_wrong_method.xml", "Wrong encryption Method") + + +def test_invalid_chars(): + start_clickhouse( + "configs/config_invalid_chars.xml", + "Cannot read encrypted text, check for valid characters", + ) + + +def test_no_encryption_codecs(): + start_clickhouse( + "configs/config_no_encryption_codecs.xml", "There is no key 0 in config" + ) + + +def test_subnodes(): + start_clickhouse("configs/config_subnodes.xml", "should have only one text node") From 4a7761c16210c7e2eccc0b26e172ec8dc7e6c183 Mon Sep 17 00:00:00 2001 From: flynn Date: Sat, 10 Jun 2023 08:26:32 +0000 Subject: [PATCH 047/478] Add column is_obsolete for system.settings table and related system tables --- src/Interpreters/Context.cpp | 6 ++++-- src/Storages/System/StorageSystemMergeTreeSettings.cpp | 2 ++ src/Storages/System/StorageSystemServerSettings.cpp | 2 ++ src/Storages/System/StorageSystemSettings.cpp | 2 ++ tests/queries/0_stateless/01945_show_debug_warning.expect | 2 +- tests/queries/0_stateless/01945_system_warnings.reference | 2 +- tests/queries/0_stateless/01945_system_warnings.sh | 4 ++-- 7 files changed, 14 insertions(+), 6 deletions(-) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 995e78d8f0b..a12117b7677 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -781,8 +781,10 @@ Strings Context::getWarnings() const { if (setting.isValueChanged() && setting.isObsolete()) { - common_warnings.emplace_back("Some obsolete setting is changed. " - "Check 'select * from system.settings where changed' and read the changelog."); + common_warnings.emplace_back( + "Obsolete setting `" + setting.getName() + + "` is changed. " + "Check 'select * from system.settings where changed' and read the changelog."); break; } } diff --git a/src/Storages/System/StorageSystemMergeTreeSettings.cpp b/src/Storages/System/StorageSystemMergeTreeSettings.cpp index 6de3fb800f4..0ddd4546208 100644 --- a/src/Storages/System/StorageSystemMergeTreeSettings.cpp +++ b/src/Storages/System/StorageSystemMergeTreeSettings.cpp @@ -21,6 +21,7 @@ NamesAndTypesList SystemMergeTreeSettings::getNamesAndTypes() {"max", std::make_shared(std::make_shared())}, {"readonly", std::make_shared()}, {"type", std::make_shared()}, + {"is_obsolete", std::make_shared()}, }; } @@ -52,6 +53,7 @@ void SystemMergeTreeSettings::fillData(MutableColumns & res_columns, res_columns[5]->insert(max); res_columns[6]->insert(writability == SettingConstraintWritability::CONST); res_columns[7]->insert(setting.getTypeName()); + res_columns[8]->insert(setting.isObsolete()); } } diff --git a/src/Storages/System/StorageSystemServerSettings.cpp b/src/Storages/System/StorageSystemServerSettings.cpp index ad52c6896ac..290b575465c 100644 --- a/src/Storages/System/StorageSystemServerSettings.cpp +++ b/src/Storages/System/StorageSystemServerSettings.cpp @@ -15,6 +15,7 @@ NamesAndTypesList StorageSystemServerSettings::getNamesAndTypes() {"changed", std::make_shared()}, {"description", std::make_shared()}, {"type", std::make_shared()}, + {"is_obsolete", std::make_shared()}, }; } @@ -33,6 +34,7 @@ void StorageSystemServerSettings::fillData(MutableColumns & res_columns, Context res_columns[3]->insert(setting.isValueChanged()); res_columns[4]->insert(setting.getDescription()); res_columns[5]->insert(setting.getTypeName()); + res_columns[6]->insert(setting.isObsolete()); } } diff --git a/src/Storages/System/StorageSystemSettings.cpp b/src/Storages/System/StorageSystemSettings.cpp index c54f7eef25f..dcb54eac0a0 100644 --- a/src/Storages/System/StorageSystemSettings.cpp +++ b/src/Storages/System/StorageSystemSettings.cpp @@ -21,6 +21,7 @@ NamesAndTypesList StorageSystemSettings::getNamesAndTypes() {"type", std::make_shared()}, {"default", std::make_shared()}, {"alias_for", std::make_shared()}, + {"is_obsolete", std::make_shared()}, }; } @@ -51,6 +52,7 @@ void StorageSystemSettings::fillData(MutableColumns & res_columns, ContextPtr co res_columns[6]->insert(writability == SettingConstraintWritability::CONST); res_columns[7]->insert(setting.getTypeName()); res_columns[8]->insert(setting.getDefaultValueString()); + res_columns[10]->insert(setting.isObsolete()); }; const auto & settings_to_aliases = Settings::Traits::settingsToAliases(); diff --git a/tests/queries/0_stateless/01945_show_debug_warning.expect b/tests/queries/0_stateless/01945_show_debug_warning.expect index 4e6dd3e1b0f..3d5b1ca99a5 100755 --- a/tests/queries/0_stateless/01945_show_debug_warning.expect +++ b/tests/queries/0_stateless/01945_show_debug_warning.expect @@ -55,7 +55,7 @@ expect eof spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --max_memory_usage_for_all_queries=123 --history_file=$history_file" expect "Warnings:" -expect " * Some obsolete setting is changed." +expect " * Obsolete setting `max_memory_usage_for_all_queries` is changed." expect ":) " send -- "q\r" expect eof diff --git a/tests/queries/0_stateless/01945_system_warnings.reference b/tests/queries/0_stateless/01945_system_warnings.reference index 296a03447db..d6ae567289c 100644 --- a/tests/queries/0_stateless/01945_system_warnings.reference +++ b/tests/queries/0_stateless/01945_system_warnings.reference @@ -1,5 +1,5 @@ Server was built in debug mode. It will work slowly. 0 -Some obsolete setting is changed. Check \'select * from system.settings where changed\' and read the changelog. +Obsolete setting `multiple_joins_rewriter_version` is changed. Check \'select * from system.settings where changed\' and read the changelog. 1 1 diff --git a/tests/queries/0_stateless/01945_system_warnings.sh b/tests/queries/0_stateless/01945_system_warnings.sh index bf11cee2911..112baab614e 100755 --- a/tests/queries/0_stateless/01945_system_warnings.sh +++ b/tests/queries/0_stateless/01945_system_warnings.sh @@ -14,8 +14,8 @@ else echo "Server was built in debug mode. It will work slowly." fi -${CLICKHOUSE_CLIENT} -q "SELECT count() FROM system.warnings WHERE message LIKE '%obsolete setting%'" -${CLICKHOUSE_CLIENT} --multiple_joins_rewriter_version=42 -q "SELECT message FROM system.warnings WHERE message LIKE '%obsolete setting%'" +${CLICKHOUSE_CLIENT} -q "SELECT count() FROM system.warnings WHERE message LIKE '%Obsolete setting%'" +${CLICKHOUSE_CLIENT} --multiple_joins_rewriter_version=42 -q "SELECT message FROM system.warnings WHERE message LIKE '%Obsolete setting%'" # Avoid duplicated warnings ${CLICKHOUSE_CLIENT} -q "SELECT count() = countDistinct(message) FROM system.warnings" From 820673a5cf3f3f1c17b781496b3ab56f72f72c08 Mon Sep 17 00:00:00 2001 From: flynn Date: Sat, 10 Jun 2023 10:16:53 +0000 Subject: [PATCH 048/478] update test --- .../queries/0_stateless/01221_system_settings.reference | 4 ++-- .../0_stateless/02117_show_create_table_system.reference | 9 ++++++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/tests/queries/0_stateless/01221_system_settings.reference b/tests/queries/0_stateless/01221_system_settings.reference index 399b3778b66..e9c2f3fec32 100644 --- a/tests/queries/0_stateless/01221_system_settings.reference +++ b/tests/queries/0_stateless/01221_system_settings.reference @@ -1,4 +1,4 @@ -send_timeout 300 0 Timeout for sending data to network, in seconds. If client needs to sent some data, but it did not able to send any bytes in this interval, exception is thrown. If you set this setting on client, the \'receive_timeout\' for the socket will be also set on the corresponding connection end on the server. \N \N 0 Seconds 300 -storage_policy default 0 Name of storage disk policy \N \N 0 String +send_timeout 300 0 Timeout for sending data to network, in seconds. If client needs to sent some data, but it did not able to send any bytes in this interval, exception is thrown. If you set this setting on client, the \'receive_timeout\' for the socket will be also set on the corresponding connection end on the server. \N \N 0 Seconds 300 0 +storage_policy default 0 Name of storage disk policy \N \N 0 String 0 1 1 diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index e864ba85018..38d00c15725 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -346,7 +346,8 @@ CREATE TABLE system.merge_tree_settings `min` Nullable(String), `max` Nullable(String), `readonly` UInt8, - `type` String + `type` String, + `is_obsolete` UInt8 ) ENGINE = SystemMergeTreeSettings COMMENT 'SYSTEM TABLE is built on the fly.' @@ -918,7 +919,8 @@ CREATE TABLE system.replicated_merge_tree_settings `min` Nullable(String), `max` Nullable(String), `readonly` UInt8, - `type` String + `type` String, + `is_obsolete` UInt8 ) ENGINE = SystemReplicatedMergeTreeSettings COMMENT 'SYSTEM TABLE is built on the fly.' @@ -993,7 +995,8 @@ CREATE TABLE system.settings `readonly` UInt8, `type` String, `default` String, - `alias_for` String + `alias_for` String, + `is_obsolete` UInt8 ) ENGINE = SystemSettings COMMENT 'SYSTEM TABLE is built on the fly.' From e9763caa0eb7078cd28e3765d0da1e0a9b4a204b Mon Sep 17 00:00:00 2001 From: flynn Date: Mon, 12 Jun 2023 14:21:58 +0000 Subject: [PATCH 049/478] fix --- src/Interpreters/Context.cpp | 18 +++++++++++++----- .../01945_system_warnings.reference | 2 +- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index a12117b7677..823c3d678df 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -777,17 +777,25 @@ Strings Context::getWarnings() const auto lock = getLock(); common_warnings = shared->warnings; } + String res = "Obsolete settings ["; + size_t obsolete_settings_count = 0; for (const auto & setting : settings) { if (setting.isValueChanged() && setting.isObsolete()) { - common_warnings.emplace_back( - "Obsolete setting `" + setting.getName() - + "` is changed. " - "Check 'select * from system.settings where changed' and read the changelog."); - break; + res += (obsolete_settings_count ? ", `" : "`") + setting.getName() + "`"; + ++obsolete_settings_count; } } + + if (obsolete_settings_count) + { + res = res + "]" + (obsolete_settings_count == 1 ? " is" : " are") + + " changed. " + "Please check 'select * from system.settings where changed and is_obsolete' and read the changelog."; + common_warnings.emplace_back(res); + } + return common_warnings; } diff --git a/tests/queries/0_stateless/01945_system_warnings.reference b/tests/queries/0_stateless/01945_system_warnings.reference index d6ae567289c..3e7edacd275 100644 --- a/tests/queries/0_stateless/01945_system_warnings.reference +++ b/tests/queries/0_stateless/01945_system_warnings.reference @@ -1,5 +1,5 @@ Server was built in debug mode. It will work slowly. 0 -Obsolete setting `multiple_joins_rewriter_version` is changed. Check \'select * from system.settings where changed\' and read the changelog. +Obsolete settings [`multiple_joins_rewriter_version`] is changed. Check \'select * from system.settings where changed\' and read the changelog. 1 1 From 18f4f1a5238c64f3b45e1d6781ef2c7104ab842d Mon Sep 17 00:00:00 2001 From: flynn Date: Mon, 12 Jun 2023 15:11:19 +0000 Subject: [PATCH 050/478] udpate test --- tests/queries/0_stateless/01945_show_debug_warning.expect | 2 +- tests/queries/0_stateless/01945_system_warnings.reference | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01945_show_debug_warning.expect b/tests/queries/0_stateless/01945_show_debug_warning.expect index 3d5b1ca99a5..f0c97acb1f5 100755 --- a/tests/queries/0_stateless/01945_show_debug_warning.expect +++ b/tests/queries/0_stateless/01945_show_debug_warning.expect @@ -55,7 +55,7 @@ expect eof spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --max_memory_usage_for_all_queries=123 --history_file=$history_file" expect "Warnings:" -expect " * Obsolete setting `max_memory_usage_for_all_queries` is changed." +expect " * Obsolete settings [`max_memory_usage_for_all_queries`] is changed." expect ":) " send -- "q\r" expect eof diff --git a/tests/queries/0_stateless/01945_system_warnings.reference b/tests/queries/0_stateless/01945_system_warnings.reference index 3e7edacd275..0c05d5d7049 100644 --- a/tests/queries/0_stateless/01945_system_warnings.reference +++ b/tests/queries/0_stateless/01945_system_warnings.reference @@ -1,5 +1,5 @@ Server was built in debug mode. It will work slowly. 0 -Obsolete settings [`multiple_joins_rewriter_version`] is changed. Check \'select * from system.settings where changed\' and read the changelog. +Obsolete settings [`multiple_joins_rewriter_version`] is changed. Please check \'select * from system.settings where changed and is_obsolete\' and read the changelog. 1 1 From b76ba13250ad5b0abe728875be0e41667450cd5f Mon Sep 17 00:00:00 2001 From: flynn Date: Mon, 12 Jun 2023 15:41:46 +0000 Subject: [PATCH 051/478] fix --- tests/queries/0_stateless/01945_show_debug_warning.expect | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01945_show_debug_warning.expect b/tests/queries/0_stateless/01945_show_debug_warning.expect index f0c97acb1f5..617e54a375e 100755 --- a/tests/queries/0_stateless/01945_show_debug_warning.expect +++ b/tests/queries/0_stateless/01945_show_debug_warning.expect @@ -55,7 +55,7 @@ expect eof spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --max_memory_usage_for_all_queries=123 --history_file=$history_file" expect "Warnings:" -expect " * Obsolete settings [`max_memory_usage_for_all_queries`] is changed." +expect " * Obsolete settings [`max_memory_usage_for_all_queries`] is changed. Please check \'select * from system.settings where changed and is_obsolete\' and read the changelog." expect ":) " send -- "q\r" expect eof From 2148f29a40f44f387b2cfbd9d3496bf9bc0b7e8d Mon Sep 17 00:00:00 2001 From: pufit Date: Thu, 8 Jun 2023 20:29:27 -0400 Subject: [PATCH 052/478] More accurate DNS resolve for the keeper connection --- src/Common/ZooKeeper/ZooKeeper.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp index a587ad6caf4..e078470476a 100644 --- a/src/Common/ZooKeeper/ZooKeeper.cpp +++ b/src/Common/ZooKeeper/ZooKeeper.cpp @@ -13,6 +13,7 @@ #include #include #include "Common/ZooKeeper/IKeeper.h" +#include #include #include #include @@ -80,8 +81,12 @@ void ZooKeeper::init(ZooKeeperArgs args_) if (secure) host_string.erase(0, strlen("secure://")); - LOG_TEST(log, "Adding ZooKeeper host {} ({})", host_string, Poco::Net::SocketAddress{host_string}.toString()); - nodes.emplace_back(Coordination::ZooKeeper::Node{Poco::Net::SocketAddress{host_string}, secure}); + /// We want to resolve all hosts without DNS cache for keeper connection. + Coordination::DNSResolver::instance().removeHostFromCache(host_string); + + auto address = Coordination::DNSResolver::instance().resolveAddress(host_string); + LOG_TEST(log, "Adding ZooKeeper host {} ({})", host_string, address.toString()); + nodes.emplace_back(Coordination::ZooKeeper::Node{address, secure}); } catch (const Poco::Net::HostNotFoundException & e) { From bbf0548007432dc5482cd28fda4c31e57dd5c24f Mon Sep 17 00:00:00 2001 From: flynn Date: Tue, 13 Jun 2023 02:48:28 +0000 Subject: [PATCH 053/478] fix test --- tests/queries/0_stateless/01945_show_debug_warning.expect | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01945_show_debug_warning.expect b/tests/queries/0_stateless/01945_show_debug_warning.expect index 617e54a375e..9a8e22aa26f 100755 --- a/tests/queries/0_stateless/01945_show_debug_warning.expect +++ b/tests/queries/0_stateless/01945_show_debug_warning.expect @@ -55,7 +55,7 @@ expect eof spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --max_memory_usage_for_all_queries=123 --history_file=$history_file" expect "Warnings:" -expect " * Obsolete settings [`max_memory_usage_for_all_queries`] is changed. Please check \'select * from system.settings where changed and is_obsolete\' and read the changelog." +expect " * Obsolete settings [\`max_memory_usage_for_all_queries\`] is changed. Please check \'select * from system.settings where changed and is_obsolete\' and read the changelog." expect ":) " send -- "q\r" expect eof From 6ad6c6afa3bdf1cd95e1454bad9e7eb75db7b0ab Mon Sep 17 00:00:00 2001 From: flynn Date: Tue, 13 Jun 2023 04:13:16 +0000 Subject: [PATCH 054/478] fix --- src/Interpreters/Context.cpp | 2 +- tests/queries/0_stateless/01945_show_debug_warning.expect | 2 +- tests/queries/0_stateless/01945_system_warnings.reference | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 823c3d678df..1b8c52ee06b 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -783,7 +783,7 @@ Strings Context::getWarnings() const { if (setting.isValueChanged() && setting.isObsolete()) { - res += (obsolete_settings_count ? ", `" : "`") + setting.getName() + "`"; + res += (obsolete_settings_count ? ", '" : "'") + setting.getName() + "'"; ++obsolete_settings_count; } } diff --git a/tests/queries/0_stateless/01945_show_debug_warning.expect b/tests/queries/0_stateless/01945_show_debug_warning.expect index 9a8e22aa26f..5315c56bde8 100755 --- a/tests/queries/0_stateless/01945_show_debug_warning.expect +++ b/tests/queries/0_stateless/01945_show_debug_warning.expect @@ -55,7 +55,7 @@ expect eof spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --max_memory_usage_for_all_queries=123 --history_file=$history_file" expect "Warnings:" -expect " * Obsolete settings [\`max_memory_usage_for_all_queries\`] is changed. Please check \'select * from system.settings where changed and is_obsolete\' and read the changelog." +expect " * Obsolete settings [\'max_memory_usage_for_all_queries\'] is changed. Please check \'select * from system.settings where changed and is_obsolete\' and read the changelog." expect ":) " send -- "q\r" expect eof diff --git a/tests/queries/0_stateless/01945_system_warnings.reference b/tests/queries/0_stateless/01945_system_warnings.reference index 0c05d5d7049..dcb296c61aa 100644 --- a/tests/queries/0_stateless/01945_system_warnings.reference +++ b/tests/queries/0_stateless/01945_system_warnings.reference @@ -1,5 +1,5 @@ Server was built in debug mode. It will work slowly. 0 -Obsolete settings [`multiple_joins_rewriter_version`] is changed. Please check \'select * from system.settings where changed and is_obsolete\' and read the changelog. +Obsolete settings [\'multiple_joins_rewriter_version\'] is changed. Please check \'select * from system.settings where changed and is_obsolete\' and read the changelog. 1 1 From 404bfe773ef726b63e944b70a8b4253907637b8c Mon Sep 17 00:00:00 2001 From: flynn Date: Tue, 13 Jun 2023 06:28:47 +0000 Subject: [PATCH 055/478] fix --- tests/queries/0_stateless/01945_show_debug_warning.expect | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01945_show_debug_warning.expect b/tests/queries/0_stateless/01945_show_debug_warning.expect index 5315c56bde8..9be0eb6e399 100755 --- a/tests/queries/0_stateless/01945_show_debug_warning.expect +++ b/tests/queries/0_stateless/01945_show_debug_warning.expect @@ -55,7 +55,7 @@ expect eof spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --max_memory_usage_for_all_queries=123 --history_file=$history_file" expect "Warnings:" -expect " * Obsolete settings [\'max_memory_usage_for_all_queries\'] is changed. Please check \'select * from system.settings where changed and is_obsolete\' and read the changelog." +expect " * Obsolete settings" expect ":) " send -- "q\r" expect eof From e7d1dfb704caa283174823ba8ff59b6c10ae0e1d Mon Sep 17 00:00:00 2001 From: flynn Date: Tue, 13 Jun 2023 08:30:07 +0000 Subject: [PATCH 056/478] fix --- tests/queries/0_stateless/01945_system_warnings.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/01945_system_warnings.sh b/tests/queries/0_stateless/01945_system_warnings.sh index 112baab614e..e44fe0ad6b5 100755 --- a/tests/queries/0_stateless/01945_system_warnings.sh +++ b/tests/queries/0_stateless/01945_system_warnings.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: no-parallel CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From 906db0318dee9d08a8af603ab0400143578e4f3d Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 13 Jun 2023 19:45:43 +0000 Subject: [PATCH 057/478] Remove AST based optimization duplicate_order_by_and_distinct It was quite some time ago since it was replaced by plan level optimizations: - query_plan_remove_redundant_sorting - query_plan_remove_redundant_distinct --- src/Core/Settings.h | 2 +- src/Interpreters/TreeOptimizer.cpp | 53 -------- ..._duplicate_order_by_and_distinct.reference | 58 --------- .../01305_duplicate_order_by_and_distinct.sql | 123 ------------------ ...t_optimize_for_distributed_table.reference | 4 - ...istinct_optimize_for_distributed_table.sql | 46 ------- 6 files changed, 1 insertion(+), 285 deletions(-) delete mode 100644 tests/queries/0_stateless/01305_duplicate_order_by_and_distinct.reference delete mode 100644 tests/queries/0_stateless/01305_duplicate_order_by_and_distinct.sql delete mode 100644 tests/queries/0_stateless/01306_disable_duplicate_order_by_and_distinct_optimize_for_distributed_table.reference delete mode 100644 tests/queries/0_stateless/01306_disable_duplicate_order_by_and_distinct_optimize_for_distributed_table.sql diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 3a23127e2fd..c53bed2007a 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -526,7 +526,6 @@ class IColumn; M(Bool, convert_query_to_cnf, false, "Convert SELECT query to CNF", 0) \ M(Bool, optimize_or_like_chain, false, "Optimize multiple OR LIKE into multiMatchAny. This optimization should not be enabled by default, because it defies index analysis in some cases.", 0) \ M(Bool, optimize_arithmetic_operations_in_aggregate_functions, true, "Move arithmetic operations out of aggregation functions", 0) \ - M(Bool, optimize_duplicate_order_by_and_distinct, false, "Remove duplicate ORDER BY and DISTINCT if it's possible", 0) \ M(Bool, optimize_redundant_functions_in_order_by, true, "Remove functions from ORDER BY if its argument is also in ORDER BY", 0) \ M(Bool, optimize_if_chain_to_multiif, false, "Replace if(cond1, then1, if(cond2, ...)) chains to multiIf. Currently it's not beneficial for numeric types.", 0) \ M(Bool, optimize_multiif_to_if, true, "Replace 'multiIf' with only one condition to 'if'.", 0) \ @@ -818,6 +817,7 @@ class IColumn; MAKE_OBSOLETE(M, Seconds, drain_timeout, 3) \ MAKE_OBSOLETE(M, UInt64, backup_threads, 16) \ MAKE_OBSOLETE(M, UInt64, restore_threads, 16) \ + MAKE_OBSOLETE(M, Bool, optimize_duplicate_order_by_and_distinct, false) \ /** The section above is for obsolete settings. Do not add anything there. */ diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index c38b3c79026..b6b45c664f9 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -288,13 +288,6 @@ void optimizeDuplicatesInOrderBy(const ASTSelectQuery * select_query) elems = std::move(unique_elems); } -/// Optimize duplicate ORDER BY -void optimizeDuplicateOrderBy(ASTPtr & query, ContextPtr context) -{ - DuplicateOrderByVisitor::Data order_by_data{context}; - DuplicateOrderByVisitor(order_by_data).visit(query); -} - /// Return simple subselect (without UNIONs or JOINs or SETTINGS) if any const ASTSelectQuery * getSimpleSubselect(const ASTSelectQuery & select) { @@ -378,41 +371,6 @@ std::unordered_set getDistinctNames(const ASTSelectQuery & select) return names; } -/// Remove DISTINCT from query if columns are known as DISTINCT from subquery -void optimizeDuplicateDistinct(ASTSelectQuery & select) -{ - if (!select.select() || select.select()->children.empty()) - return; - - const ASTSelectQuery * subselect = getSimpleSubselect(select); - if (!subselect) - return; - - std::unordered_set distinct_names = getDistinctNames(*subselect); - std::unordered_set selected_names; - - /// Check source column names from select list (ignore aliases and table names) - for (const auto & id : select.select()->children) - { - const auto * identifier = id->as(); - if (!identifier) - return; - - const String & name = identifier->shortName(); - if (!distinct_names.contains(name)) - return; /// Not a distinct column, keep DISTINCT for it. - - selected_names.emplace(name); - } - - /// select columns list != distinct columns list - /// SELECT DISTINCT a FROM (SELECT DISTINCT a, b FROM ...)) -- cannot remove DISTINCT - if (selected_names.size() != distinct_names.size()) - return; - - select.distinct = false; -} - /// Replace monotonous functions in ORDER BY if they don't participate in GROUP BY expression, /// has a single argument and not an aggregate functions. void optimizeMonotonousFunctionsInOrderBy(ASTSelectQuery * select_query, ContextPtr context, @@ -811,17 +769,6 @@ void TreeOptimizer::apply(ASTPtr & query, TreeRewriterResult & result, && !select_query->group_by_with_cube) optimizeAggregateFunctionsOfGroupByKeys(select_query, query); - /// Remove duplicate ORDER BY and DISTINCT from subqueries. - if (settings.optimize_duplicate_order_by_and_distinct) - { - optimizeDuplicateOrderBy(query, context); - - /// DISTINCT has special meaning in Distributed query with enabled distributed_group_by_no_merge - /// TODO: disable Distributed/remote() tables only - if (!settings.distributed_group_by_no_merge) - optimizeDuplicateDistinct(*select_query); - } - /// Remove functions from ORDER BY if its argument is also in ORDER BY if (settings.optimize_redundant_functions_in_order_by) optimizeRedundantFunctionsInOrderBy(select_query, context); diff --git a/tests/queries/0_stateless/01305_duplicate_order_by_and_distinct.reference b/tests/queries/0_stateless/01305_duplicate_order_by_and_distinct.reference deleted file mode 100644 index 10f8bbfd392..00000000000 --- a/tests/queries/0_stateless/01305_duplicate_order_by_and_distinct.reference +++ /dev/null @@ -1,58 +0,0 @@ -SELECT number -FROM -( - SELECT number - FROM - ( - SELECT DISTINCT number - FROM numbers(3) - ) -) -ORDER BY number ASC -0 -1 -2 -SELECT DISTINCT number -FROM -( - SELECT DISTINCT number - FROM - ( - SELECT DISTINCT number - FROM numbers(3) - ORDER BY number ASC - ) - ORDER BY number ASC -) -ORDER BY number ASC -0 -1 -2 -SELECT number -FROM -( - SELECT number - FROM - ( - SELECT DISTINCT number % 2 AS number - FROM numbers(3) - ) -) -ORDER BY number ASC -0 -1 -SELECT DISTINCT number -FROM -( - SELECT DISTINCT number - FROM - ( - SELECT DISTINCT number % 2 AS number - FROM numbers(3) - ORDER BY number ASC - ) - ORDER BY number ASC -) -ORDER BY number ASC -0 -1 diff --git a/tests/queries/0_stateless/01305_duplicate_order_by_and_distinct.sql b/tests/queries/0_stateless/01305_duplicate_order_by_and_distinct.sql deleted file mode 100644 index 3b13b208eb5..00000000000 --- a/tests/queries/0_stateless/01305_duplicate_order_by_and_distinct.sql +++ /dev/null @@ -1,123 +0,0 @@ -set optimize_duplicate_order_by_and_distinct = 1; - -EXPLAIN SYNTAX SELECT DISTINCT * -FROM -( - SELECT DISTINCT * - FROM - ( - SELECT DISTINCT * - FROM numbers(3) - ORDER BY number - ) - ORDER BY number -) -ORDER BY number; - -SELECT DISTINCT * -FROM -( - SELECT DISTINCT * - FROM - ( - SELECT DISTINCT * - FROM numbers(3) - ORDER BY number - ) - ORDER BY number -) -ORDER BY number; - -set optimize_duplicate_order_by_and_distinct = 0; - -EXPLAIN SYNTAX SELECT DISTINCT * -FROM -( - SELECT DISTINCT * - FROM - ( - SELECT DISTINCT * - FROM numbers(3) - ORDER BY number - ) - ORDER BY number -) -ORDER BY number; - -SELECT DISTINCT * -FROM -( - SELECT DISTINCT * - FROM - ( - SELECT DISTINCT * - FROM numbers(3) - ORDER BY number - ) - ORDER BY number -) -ORDER BY number; - -set optimize_duplicate_order_by_and_distinct = 1; - -EXPLAIN SYNTAX SELECT DISTINCT * -FROM -( - SELECT DISTINCT * - FROM - ( - SELECT DISTINCT number % 2 - AS number - FROM numbers(3) - ORDER BY number - ) - ORDER BY number -) -ORDER BY number; - -SELECT DISTINCT * -FROM -( - SELECT DISTINCT * - FROM - ( - SELECT DISTINCT number % 2 - AS number - FROM numbers(3) - ORDER BY number - ) - ORDER BY number -) -ORDER BY number; - -set optimize_duplicate_order_by_and_distinct = 0; - -EXPLAIN SYNTAX SELECT DISTINCT * -FROM -( - SELECT DISTINCT * - FROM - ( - SELECT DISTINCT number % 2 - AS number - FROM numbers(3) - ORDER BY number - ) - ORDER BY number -) -ORDER BY number; - -SELECT DISTINCT * -FROM -( - SELECT DISTINCT * - FROM - ( - SELECT DISTINCT number % 2 - AS number - FROM numbers(3) - ORDER BY number - ) - ORDER BY number -) -ORDER BY number; diff --git a/tests/queries/0_stateless/01306_disable_duplicate_order_by_and_distinct_optimize_for_distributed_table.reference b/tests/queries/0_stateless/01306_disable_duplicate_order_by_and_distinct_optimize_for_distributed_table.reference deleted file mode 100644 index 44e0be8e356..00000000000 --- a/tests/queries/0_stateless/01306_disable_duplicate_order_by_and_distinct_optimize_for_distributed_table.reference +++ /dev/null @@ -1,4 +0,0 @@ -0 -0 -0 -0 diff --git a/tests/queries/0_stateless/01306_disable_duplicate_order_by_and_distinct_optimize_for_distributed_table.sql b/tests/queries/0_stateless/01306_disable_duplicate_order_by_and_distinct_optimize_for_distributed_table.sql deleted file mode 100644 index 8ef1273c855..00000000000 --- a/tests/queries/0_stateless/01306_disable_duplicate_order_by_and_distinct_optimize_for_distributed_table.sql +++ /dev/null @@ -1,46 +0,0 @@ --- Tags: distributed - -set query_plan_remove_redundant_distinct = 1; -set optimize_duplicate_order_by_and_distinct = 0; -SET distributed_group_by_no_merge = 0; - -SELECT DISTINCT number -FROM -( - SELECT DISTINCT number - FROM remote('127.0.0.{1,2}', system.numbers) - LIMIT 1 - SETTINGS distributed_group_by_no_merge = 1 -); - -SET distributed_group_by_no_merge = 1; - -SELECT DISTINCT number -FROM -( - SELECT DISTINCT number - FROM remote('127.0.0.{1,2}', system.numbers) - LIMIT 1 -); - -set optimize_duplicate_order_by_and_distinct = 0; -SET distributed_group_by_no_merge = 0; - -SELECT DISTINCT number -FROM -( - SELECT DISTINCT number - FROM remote('127.0.0.{1,2}', system.numbers) - LIMIT 1 - SETTINGS distributed_group_by_no_merge = 1 -); - -SET distributed_group_by_no_merge = 1; -set optimize_duplicate_order_by_and_distinct = 0; -SELECT DISTINCT number -FROM -( - SELECT DISTINCT number - FROM remote('127.0.0.{1,2}', system.numbers) - LIMIT 1 -); From a4e982442f4a3d6b3007b432f8e0b6211e9aa4e7 Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Wed, 14 Jun 2023 11:13:59 +0000 Subject: [PATCH 058/478] Update documentation --- docs/en/operations/configuration-files.md | 36 +++++++++++++++++++++++ docs/ru/operations/configuration-files.md | 36 +++++++++++++++++++++++ 2 files changed, 72 insertions(+) diff --git a/docs/en/operations/configuration-files.md b/docs/en/operations/configuration-files.md index b3583e156ad..b5d52acca49 100644 --- a/docs/en/operations/configuration-files.md +++ b/docs/en/operations/configuration-files.md @@ -54,6 +54,42 @@ XML substitution example: Substitutions can also be performed from ZooKeeper. To do this, specify the attribute `from_zk = "/path/to/node"`. The element value is replaced with the contents of the node at `/path/to/node` in ZooKeeper. You can also put an entire XML subtree on the ZooKeeper node and it will be fully inserted into the source element. +## Decryption {#decryption} + +Elements with text nodes may be encrypted with [encryption codecs](../../sql-reference/statements/create/table.md#encryption-codecs). In this case `` section should be included in configuration file and each element node with encrypted text should have `encryption_codec` attribute with name of codec. + +Example: + +```xml + + + + 00112233445566778899aabbccddeeff + + + + admin + 961F000000040000000000EEDDEF4F453CFE6457C4234BD7C09258BD651D85 + + +``` + +To get the encrypted value `encrypt_decrypt` example application may be used. + +Example: + +``` bash +./encrypt_decrypt /etc/clickhouse-server/config.xml -e AES_128_GCM_SIV abcd +``` + +``` text +961F000000040000000000EEDDEF4F453CFE6457C4234BD7C09258BD651D85 +``` + +:::note +The decryption is executed after creation of preprocessed configuration file. It means that elements with `encryption_codec` attribute in the preprocessed configuration file are encrypted. But the values of corresponding parameters in server's memory are decrypted. +::: + ## User Settings {#user-settings} The `config.xml` file can specify a separate config with user settings, profiles, and quotas. The relative path to this config is set in the `users_config` element. By default, it is `users.xml`. If `users_config` is omitted, the user settings, profiles, and quotas are specified directly in `config.xml`. diff --git a/docs/ru/operations/configuration-files.md b/docs/ru/operations/configuration-files.md index 2b824ce91bd..96512fbbe23 100644 --- a/docs/ru/operations/configuration-files.md +++ b/docs/ru/operations/configuration-files.md @@ -85,6 +85,42 @@ $ cat /etc/clickhouse-server/users.d/alice.xml Сервер следит за изменениями конфигурационных файлов, а также файлов и ZooKeeper-узлов, которые были использованы при выполнении подстановок и переопределений, и перезагружает настройки пользователей и кластеров на лету. То есть, можно изменять кластера, пользователей и их настройки без перезапуска сервера. +## Расшифровка {#decryption} + +Элементы с текстовыми узлами могут быть зашифрован с помощью [кодеков шифрования](../../sql-reference/statements/create/table.md#encryption-codecs). В этом случае секция `` должна быть включена в конфигурационный файл и каждый элемент с зашифрованным текстом должен иметь аттрибут `encryption_codec` с именем кодека. + +Пример: + +```xml + + + + 00112233445566778899aabbccddeeff + + + + admin + 961F000000040000000000EEDDEF4F453CFE6457C4234BD7C09258BD651D85 + + +``` + +Чтобы получить зашифрованное значение может быть использовано приложение-пример `encrypt_decrypt` . + +Пример: + +``` bash +./encrypt_decrypt /etc/clickhouse-server/config.xml -e AES_128_GCM_SIV abcd +``` + +``` text +961F000000040000000000EEDDEF4F453CFE6457C4234BD7C09258BD651D85 +``` + +:::note +Расшифровка выполняется после создания конфигурационного файла предобработки. Это означает что элементы с аттрибутом `encryption_codec` в конфигурационном файле предобработки зашифрованы. Но значения соответствующих параметров в памяти сервера расшифрованы. +::: + ## Примеры записи конфигурации на YAML {#example} Здесь можно рассмотреть пример реальной конфигурации записанной на YAML: [config.yaml.example](https://github.com/ClickHouse/ClickHouse/blob/master/programs/server/config.yaml.example). From b5d4ad583f3741f87843f51c56ccc41b91833523 Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Wed, 14 Jun 2023 11:35:55 +0000 Subject: [PATCH 059/478] Small code style improvements --- src/Common/Config/ConfigProcessor.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp index df25a9a3825..9548bf33b7b 100644 --- a/src/Common/Config/ConfigProcessor.cpp +++ b/src/Common/Config/ConfigProcessor.cpp @@ -26,9 +26,9 @@ #include #include #include +#include #include #include -#include #include #define PREPROCESSED_SUFFIX "-preprocessed" @@ -194,7 +194,7 @@ std::string ConfigProcessor::encryptValue(const std::string & codec_name, const DB::Memory<> memory; memory.resize(codec.getCompressedReserveSize(static_cast(value.size()))); auto bytes_written = codec.compress(value.data(), static_cast(value.size()), memory.data()); - std::string encrypted_value = std::string(memory.data(), bytes_written); + auto encrypted_value = std::string(memory.data(), bytes_written); std::string hex_value; boost::algorithm::hex(encrypted_value.begin(), encrypted_value.end(), std::back_inserter(hex_value)); return hex_value; @@ -224,7 +224,7 @@ std::string ConfigProcessor::decryptValue(const std::string & codec_name, const void ConfigProcessor::decryptRecursive(Poco::XML::Node * config_root) { - for (Node * node = config_root->firstChild(); node;) + for (Node * node = config_root->firstChild(); node; node = node->nextSibling()) { if (node->nodeType() == Node::ELEMENT_NODE) { @@ -244,7 +244,6 @@ void ConfigProcessor::decryptRecursive(Poco::XML::Node * config_root) } decryptRecursive(node); } - node = node->nextSibling(); } } From f55623aa2d23fda63f2b19720f4035568a4595a4 Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Wed, 14 Jun 2023 11:46:43 +0000 Subject: [PATCH 060/478] Use anonymous namespace for getEncryptionMethod() --- src/Common/Config/ConfigProcessor.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp index 9548bf33b7b..17abc3d161d 100644 --- a/src/Common/Config/ConfigProcessor.cpp +++ b/src/Common/Config/ConfigProcessor.cpp @@ -47,6 +47,9 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } +namespace +{ + /// Get method for string name. Throw exception for wrong name EncryptionMethod getEncryptionMethod(const std::string & name) { @@ -58,6 +61,8 @@ EncryptionMethod getEncryptionMethod(const std::string & name) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong encryption Method. Got {}", name); } +} + /// For cutting preprocessed path to this base static std::string main_config_path; From 14dfebba49543378b80716cffb5aaea7dcc7fbf7 Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Wed, 14 Jun 2023 13:35:11 +0000 Subject: [PATCH 061/478] Fix links in MD --- docs/en/operations/configuration-files.md | 2 +- docs/ru/operations/configuration-files.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/operations/configuration-files.md b/docs/en/operations/configuration-files.md index b5d52acca49..71d5885058a 100644 --- a/docs/en/operations/configuration-files.md +++ b/docs/en/operations/configuration-files.md @@ -56,7 +56,7 @@ Substitutions can also be performed from ZooKeeper. To do this, specify the attr ## Decryption {#decryption} -Elements with text nodes may be encrypted with [encryption codecs](../../sql-reference/statements/create/table.md#encryption-codecs). In this case `` section should be included in configuration file and each element node with encrypted text should have `encryption_codec` attribute with name of codec. +Elements with text nodes may be encrypted with [encryption codecs](../sql-reference/statements/create/table.md#encryption-codecs). In this case `` section should be included in configuration file and each element node with encrypted text should have `encryption_codec` attribute with name of codec. Example: diff --git a/docs/ru/operations/configuration-files.md b/docs/ru/operations/configuration-files.md index 96512fbbe23..df50d900919 100644 --- a/docs/ru/operations/configuration-files.md +++ b/docs/ru/operations/configuration-files.md @@ -87,7 +87,7 @@ $ cat /etc/clickhouse-server/users.d/alice.xml ## Расшифровка {#decryption} -Элементы с текстовыми узлами могут быть зашифрован с помощью [кодеков шифрования](../../sql-reference/statements/create/table.md#encryption-codecs). В этом случае секция `` должна быть включена в конфигурационный файл и каждый элемент с зашифрованным текстом должен иметь аттрибут `encryption_codec` с именем кодека. +Элементы с текстовыми узлами могут быть зашифрован с помощью [кодеков шифрования](../sql-reference/statements/create/table.md#create-query-encryption-codecs). В этом случае секция `` должна быть включена в конфигурационный файл и каждый элемент с зашифрованным текстом должен иметь аттрибут `encryption_codec` с именем кодека. Пример: From 3d64cf4423b9fb4b935786eca392875d3b66c17c Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Wed, 14 Jun 2023 15:40:32 +0000 Subject: [PATCH 062/478] Add dbms in cmake --- src/Common/Config/CMakeLists.txt | 2 ++ utils/config-processor/CMakeLists.txt | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Common/Config/CMakeLists.txt b/src/Common/Config/CMakeLists.txt index ec7bdd10196..fdcba5d4a4b 100644 --- a/src/Common/Config/CMakeLists.txt +++ b/src/Common/Config/CMakeLists.txt @@ -15,6 +15,7 @@ target_link_libraries(clickhouse_common_config Poco::XML PRIVATE string_utils + dbms ) add_library(clickhouse_common_config_no_zookeeper_log ${SRCS}) @@ -25,6 +26,7 @@ target_link_libraries(clickhouse_common_config_no_zookeeper_log Poco::XML PRIVATE string_utils + dbms ) if (TARGET ch_contrib::yaml_cpp) diff --git a/utils/config-processor/CMakeLists.txt b/utils/config-processor/CMakeLists.txt index 80c3535ef4e..53b6163ba87 100644 --- a/utils/config-processor/CMakeLists.txt +++ b/utils/config-processor/CMakeLists.txt @@ -1,2 +1,2 @@ clickhouse_add_executable (config-processor config-processor.cpp) -target_link_libraries(config-processor PRIVATE dbms) +target_link_libraries(config-processor PRIVATE clickhouse_common_config_no_zookeeper_log) From f830f246627327f5ede014d08da7af8964f4cb7c Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Thu, 15 Jun 2023 14:31:21 +0000 Subject: [PATCH 063/478] Revert "Add dbms in cmake" This reverts commit 3d64cf4423b9fb4b935786eca392875d3b66c17c. --- src/Common/Config/CMakeLists.txt | 2 -- utils/config-processor/CMakeLists.txt | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/src/Common/Config/CMakeLists.txt b/src/Common/Config/CMakeLists.txt index fdcba5d4a4b..ec7bdd10196 100644 --- a/src/Common/Config/CMakeLists.txt +++ b/src/Common/Config/CMakeLists.txt @@ -15,7 +15,6 @@ target_link_libraries(clickhouse_common_config Poco::XML PRIVATE string_utils - dbms ) add_library(clickhouse_common_config_no_zookeeper_log ${SRCS}) @@ -26,7 +25,6 @@ target_link_libraries(clickhouse_common_config_no_zookeeper_log Poco::XML PRIVATE string_utils - dbms ) if (TARGET ch_contrib::yaml_cpp) diff --git a/utils/config-processor/CMakeLists.txt b/utils/config-processor/CMakeLists.txt index 53b6163ba87..80c3535ef4e 100644 --- a/utils/config-processor/CMakeLists.txt +++ b/utils/config-processor/CMakeLists.txt @@ -1,2 +1,2 @@ clickhouse_add_executable (config-processor config-processor.cpp) -target_link_libraries(config-processor PRIVATE clickhouse_common_config_no_zookeeper_log) +target_link_libraries(config-processor PRIVATE dbms) From 98597a3b422c4b145ed44ecfeb305840643dfb1b Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Thu, 15 Jun 2023 14:50:52 +0000 Subject: [PATCH 064/478] Add USE_SSL --- src/Common/Config/ConfigProcessor.cpp | 16 ++++++++++++++++ src/Common/Config/ConfigProcessor.h | 4 ++++ src/Common/Config/ConfigReloader.cpp | 3 +++ src/Common/examples/CMakeLists.txt | 2 ++ src/Daemon/BaseDaemon.cpp | 3 +++ 5 files changed, 28 insertions(+) diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp index 17abc3d161d..11b45977322 100644 --- a/src/Common/Config/ConfigProcessor.cpp +++ b/src/Common/Config/ConfigProcessor.cpp @@ -26,10 +26,13 @@ #include #include #include + +#if USE_SSL #include #include #include #include +#endif #define PREPROCESSED_SUFFIX "-preprocessed" @@ -44,9 +47,12 @@ namespace ErrorCodes { extern const int FILE_DOESNT_EXIST; extern const int CANNOT_LOAD_CONFIG; +#if USE_SSL extern const int BAD_ARGUMENTS; +#endif } +#if USE_SSL namespace { @@ -63,6 +69,8 @@ EncryptionMethod getEncryptionMethod(const std::string & name) } +#endif + /// For cutting preprocessed path to this base static std::string main_config_path; @@ -192,6 +200,8 @@ static void mergeAttributes(Element & config_element, Element & with_element) with_element_attributes->release(); } +#if USE_SSL + std::string ConfigProcessor::encryptValue(const std::string & codec_name, const std::string & value) { auto codec = DB::CompressionCodecEncrypted(getEncryptionMethod(codec_name)); @@ -252,6 +262,8 @@ void ConfigProcessor::decryptRecursive(Poco::XML::Node * config_root) } } +#endif + void ConfigProcessor::mergeRecursive(XMLDocumentPtr config, Node * config_root, const Node * with_root) { const NodeListPtr with_nodes = with_root->childNodes(); @@ -781,6 +793,8 @@ ConfigProcessor::LoadedConfig ConfigProcessor::loadConfigWithZooKeeperIncludes( return LoadedConfig{configuration, has_zk_includes, !processed_successfully, config_xml, path}; } +#if USE_SSL + void ConfigProcessor::decryptConfig(LoadedConfig & loaded_config) { DB::CompressionCodecEncrypted::Configuration::instance().tryLoad(*loaded_config.configuration, "encryption_codecs"); @@ -789,6 +803,8 @@ void ConfigProcessor::decryptConfig(LoadedConfig & loaded_config) loaded_config.configuration = new Poco::Util::XMLConfiguration(loaded_config.preprocessed_xml); } +#endif + void ConfigProcessor::savePreprocessedConfig(const LoadedConfig & loaded_config, std::string preprocessed_dir) { try diff --git a/src/Common/Config/ConfigProcessor.h b/src/Common/Config/ConfigProcessor.h index c9b227863f0..8d7caa9e9c8 100644 --- a/src/Common/Config/ConfigProcessor.h +++ b/src/Common/Config/ConfigProcessor.h @@ -92,6 +92,7 @@ public: const zkutil::EventPtr & zk_changed_event, bool fallback_to_preprocessed = false); +#if USE_SSL /// Encrypt text value std::string encryptValue(const std::string & codec_name, const std::string & value); @@ -100,6 +101,7 @@ public: /// Decrypt nodes in config with specified encryption attributes void decryptConfig(LoadedConfig & loaded_config); +#endif /// Save preprocessed config to specified directory. /// If preprocessed_dir is empty - calculate from loaded_config.path + /preprocessed_configs/ @@ -133,7 +135,9 @@ private: using NodePtr = Poco::AutoPtr; +#if USE_SSL void decryptRecursive(Poco::XML::Node * config_root); +#endif void mergeRecursive(XMLDocumentPtr config, Poco::XML::Node * config_root, const Poco::XML::Node * with_root); diff --git a/src/Common/Config/ConfigReloader.cpp b/src/Common/Config/ConfigReloader.cpp index a4d2cb3d305..45192d2d281 100644 --- a/src/Common/Config/ConfigReloader.cpp +++ b/src/Common/Config/ConfigReloader.cpp @@ -130,7 +130,10 @@ void ConfigReloader::reloadIfNewer(bool force, bool throw_on_error, bool fallbac return; } config_processor.savePreprocessedConfig(loaded_config, preprocessed_dir); + +#if USE_SSL config_processor.decryptConfig(loaded_config); +#endif /** We should remember last modification time if and only if config was successfully loaded * Otherwise a race condition could occur during config files update: diff --git a/src/Common/examples/CMakeLists.txt b/src/Common/examples/CMakeLists.txt index 12a2b59ff77..0965a07761b 100644 --- a/src/Common/examples/CMakeLists.txt +++ b/src/Common/examples/CMakeLists.txt @@ -83,5 +83,7 @@ endif() clickhouse_add_executable (interval_tree interval_tree.cpp) target_link_libraries (interval_tree PRIVATE dbms) +if (ENABLE_SSL) clickhouse_add_executable (encrypt_decrypt encrypt_decrypt.cpp) target_link_libraries (encrypt_decrypt PRIVATE dbms) +endif() diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index 4b1cd4e036e..5747cfe158d 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -663,7 +663,10 @@ void BaseDaemon::initialize(Application & self) umask(umask_num); DB::ConfigProcessor(config_path).savePreprocessedConfig(loaded_config, ""); + +#if USE_SSL DB::ConfigProcessor(config_path).decryptConfig(loaded_config); +#endif /// Write core dump on crash. { From 55013342d18ba3363695358d02e693b82872a93b Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Fri, 16 Jun 2023 13:26:45 +0000 Subject: [PATCH 065/478] Fix code align in cmake --- src/Common/examples/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Common/examples/CMakeLists.txt b/src/Common/examples/CMakeLists.txt index 0965a07761b..90a238c9800 100644 --- a/src/Common/examples/CMakeLists.txt +++ b/src/Common/examples/CMakeLists.txt @@ -84,6 +84,6 @@ clickhouse_add_executable (interval_tree interval_tree.cpp) target_link_libraries (interval_tree PRIVATE dbms) if (ENABLE_SSL) -clickhouse_add_executable (encrypt_decrypt encrypt_decrypt.cpp) -target_link_libraries (encrypt_decrypt PRIVATE dbms) + clickhouse_add_executable (encrypt_decrypt encrypt_decrypt.cpp) + target_link_libraries (encrypt_decrypt PRIVATE dbms) endif() From f026cf17a3c8e8a69d3837237e9c9dac08937644 Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Fri, 16 Jun 2023 15:26:58 +0000 Subject: [PATCH 066/478] Fix building with BUILD_STANDALONE_KEEPER --- programs/keeper/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index e5d56023f7b..aa90ba78f44 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -84,6 +84,7 @@ if (BUILD_STANDALONE_KEEPER) ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedReadBuffer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedReadBufferFromFile.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedWriteBuffer.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecEncrypted.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecLZ4.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecMultiple.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecNone.cpp From 5bba0ff6984de2baf8e94efef11eafa036090ad5 Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Fri, 16 Jun 2023 20:13:36 +0000 Subject: [PATCH 067/478] Fix build of keeper-bench --- utils/config-processor/CMakeLists.txt | 6 +++++- utils/keeper-bench/CMakeLists.txt | 7 ++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/utils/config-processor/CMakeLists.txt b/utils/config-processor/CMakeLists.txt index 80c3535ef4e..4394083a1c3 100644 --- a/utils/config-processor/CMakeLists.txt +++ b/utils/config-processor/CMakeLists.txt @@ -1,2 +1,6 @@ clickhouse_add_executable (config-processor config-processor.cpp) -target_link_libraries(config-processor PRIVATE dbms) +if (ENABLE_SSL) + target_link_libraries(config-processor PRIVATE dbms) +else () + target_link_libraries(config-processor PRIVATE clickhouse_common_config_no_zookeeper_log) +endif () diff --git a/utils/keeper-bench/CMakeLists.txt b/utils/keeper-bench/CMakeLists.txt index 49ce2068246..e8daec9e164 100644 --- a/utils/keeper-bench/CMakeLists.txt +++ b/utils/keeper-bench/CMakeLists.txt @@ -4,4 +4,9 @@ if (NOT TARGET ch_contrib::rapidjson) endif () clickhouse_add_executable(keeper-bench Generator.cpp Runner.cpp Stats.cpp main.cpp) -target_link_libraries(keeper-bench PRIVATE clickhouse_common_config_no_zookeeper_log ch_contrib::rapidjson) +if (ENABLE_SSL) + target_link_libraries(keeper-bench PRIVATE dbms) +else () + target_link_libraries(keeper-bench PRIVATE clickhouse_common_config_no_zookeeper_log) +endif () +target_link_libraries(keeper-bench PRIVATE ch_contrib::rapidjson) From af2be06c42395d3f29d7325e28e89e7dc1aad12d Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Fri, 16 Jun 2023 20:56:26 +0000 Subject: [PATCH 068/478] Remove test --- ..._duplicate_distinct_optimization.reference | 136 ------------------ .../01455_duplicate_distinct_optimization.sql | 32 ----- 2 files changed, 168 deletions(-) delete mode 100644 tests/queries/0_stateless/01455_duplicate_distinct_optimization.reference delete mode 100644 tests/queries/0_stateless/01455_duplicate_distinct_optimization.sql diff --git a/tests/queries/0_stateless/01455_duplicate_distinct_optimization.reference b/tests/queries/0_stateless/01455_duplicate_distinct_optimization.reference deleted file mode 100644 index 82e887e1b92..00000000000 --- a/tests/queries/0_stateless/01455_duplicate_distinct_optimization.reference +++ /dev/null @@ -1,136 +0,0 @@ -SELECT DISTINCT number -FROM numbers(1) -SELECT number -FROM -( - SELECT DISTINCT number - FROM numbers(1) -) -SELECT DISTINCT number * 2 -FROM -( - SELECT DISTINCT - number * 2, - number - FROM numbers(1) -) -SELECT number -FROM -( - SELECT DISTINCT number * 2 AS number - FROM numbers(1) -) -SELECT - b, - a -FROM -( - SELECT DISTINCT - number % 2 AS a, - number % 3 AS b - FROM numbers(100) -) -SELECT DISTINCT a -FROM -( - SELECT DISTINCT - number % 2 AS a, - number % 3 AS b - FROM numbers(100) -) -SELECT a -FROM -( - SELECT DISTINCT a - FROM - ( - SELECT DISTINCT - number % 2 AS a, - number % 3 AS b - FROM numbers(100) - ) -) -SELECT DISTINCT a -FROM -( - SELECT - a, - b - FROM - ( - SELECT DISTINCT - number % 2 AS a, - number % 3 AS b - FROM numbers(100) - ) -) -SELECT - a, - b -FROM -( - SELECT - b, - a - FROM - ( - SELECT DISTINCT - number AS a, - number AS b - FROM numbers(1) - ) -) -SELECT - a, - b -FROM -( - SELECT - b, - a, - a + b - FROM - ( - SELECT DISTINCT - number % 2 AS a, - number % 3 AS b - FROM numbers(100) - ) -) -SELECT DISTINCT a -FROM -( - SELECT a - FROM - ( - SELECT DISTINCT - number % 2 AS a, - number % 3 AS b - FROM numbers(100) - ) -) -SELECT DISTINCT number -FROM -( - SELECT DISTINCT number - FROM numbers(1) -) AS t1 -CROSS JOIN numbers(2) AS t2 -SELECT number -FROM -( - SELECT DISTINCT number - FROM numbers(1) AS t1 - CROSS JOIN numbers(2) AS t2 -) -SELECT DISTINCT number -FROM -( - SELECT DISTINCT number - FROM numbers(1) - UNION ALL - SELECT DISTINCT number - FROM numbers(2) -) -0 -1 diff --git a/tests/queries/0_stateless/01455_duplicate_distinct_optimization.sql b/tests/queries/0_stateless/01455_duplicate_distinct_optimization.sql deleted file mode 100644 index 6fbf80a4dc3..00000000000 --- a/tests/queries/0_stateless/01455_duplicate_distinct_optimization.sql +++ /dev/null @@ -1,32 +0,0 @@ -SET optimize_duplicate_order_by_and_distinct = 1; - -EXPLAIN SYNTAX SELECT DISTINCT number FROM numbers(1); -EXPLAIN SYNTAX SELECT DISTINCT number FROM (SELECT DISTINCT number FROM numbers(1)); -EXPLAIN SYNTAX SELECT DISTINCT number * 2 FROM (SELECT DISTINCT number * 2, number FROM numbers(1)); -EXPLAIN SYNTAX SELECT DISTINCT number FROM (SELECT DISTINCT number * 2 AS number FROM numbers(1)); -EXPLAIN SYNTAX SELECT DISTINCT b, a FROM (SELECT DISTINCT number % 2 AS a, number % 3 AS b FROM numbers(100)); -EXPLAIN SYNTAX SELECT DISTINCT a FROM (SELECT DISTINCT number % 2 AS a, number % 3 AS b FROM numbers(100)); -EXPLAIN SYNTAX SELECT DISTINCT a FROM (SELECT DISTINCT a FROM (SELECT DISTINCT number % 2 AS a, number % 3 AS b FROM numbers(100))); -EXPLAIN SYNTAX SELECT DISTINCT a FROM (SELECT DISTINCT a, b FROM (SELECT DISTINCT number % 2 AS a, number % 3 AS b FROM numbers(100))); -EXPLAIN SYNTAX SELECT DISTINCT a, b FROM (SELECT DISTINCT b, a FROM (SELECT DISTINCT number a, number b FROM numbers(1))); -EXPLAIN SYNTAX SELECT DISTINCT a, b FROM (SELECT b, a, a + b FROM (SELECT DISTINCT number % 2 AS a, number % 3 AS b FROM numbers(100))); -EXPLAIN SYNTAX SELECT DISTINCT a FROM (SELECT a FROM (SELECT DISTINCT number % 2 AS a, number % 3 AS b FROM numbers(100))); -EXPLAIN SYNTAX SELECT DISTINCT number FROM (SELECT DISTINCT number FROM numbers(1)) t1 CROSS JOIN numbers(2) t2; -EXPLAIN SYNTAX SELECT DISTINCT number FROM (SELECT DISTINCT number FROM numbers(1) t1 CROSS JOIN numbers(2) t2); - -EXPLAIN SYNTAX SELECT DISTINCT number FROM -( - (SELECT DISTINCT number FROM numbers(1)) - UNION ALL - (SELECT DISTINCT number FROM numbers(2)) -); - --- - -SELECT DISTINCT number FROM -( - (SELECT DISTINCT number FROM numbers(1)) - UNION ALL - (SELECT DISTINCT number FROM numbers(2)) -) -ORDER BY number; From 85d86fec8337e347b519ebd7318012e83af109ec Mon Sep 17 00:00:00 2001 From: sanjam Date: Thu, 22 Jun 2023 13:38:50 +0000 Subject: [PATCH 069/478] external_aggregation_fix --- .../HashTable/TwoLevelStringHashTable.h | 28 +++++++++++++------ 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/src/Common/HashTable/TwoLevelStringHashTable.h b/src/Common/HashTable/TwoLevelStringHashTable.h index ea1914348b2..ee6dcd05d9a 100644 --- a/src/Common/HashTable/TwoLevelStringHashTable.h +++ b/src/Common/HashTable/TwoLevelStringHashTable.h @@ -113,14 +113,20 @@ public: if ((reinterpret_cast(p) & 2048) == 0) { memcpy(&n[0], p, 8); - n[0] &= -1ULL >> s; - } + if constexpr (std::endian::native == std::endian::little) + n[0] &= -1ULL >> s; + else + n[0] &= -1ULL << s; + } else { const char * lp = x.data + x.size - 8; memcpy(&n[0], lp, 8); - n[0] >>= s; - } + if constexpr (std::endian::native == std::endian::little) + n[0] >>= s; + else + n[0] <<= s; + } auto res = hash(k8); auto buck = getBucketFromHash(res); keyHolderDiscardKey(key_holder); @@ -131,8 +137,11 @@ public: memcpy(&n[0], p, 8); const char * lp = x.data + x.size - 8; memcpy(&n[1], lp, 8); - n[1] >>= s; - auto res = hash(k16); + if constexpr (std::endian::native == std::endian::little) + n[1] >>= s; + else + n[1] <<= s; + auto res = hash(k16); auto buck = getBucketFromHash(res); keyHolderDiscardKey(key_holder); return func(self.impls[buck].m2, k16, res); @@ -142,8 +151,11 @@ public: memcpy(&n[0], p, 16); const char * lp = x.data + x.size - 8; memcpy(&n[2], lp, 8); - n[2] >>= s; - auto res = hash(k24); + if constexpr (std::endian::native == std::endian::little) + n[2] >>= s; + else + n[2] <<= s; + auto res = hash(k24); auto buck = getBucketFromHash(res); keyHolderDiscardKey(key_holder); return func(self.impls[buck].m3, k24, res); From cf082f2f9a68c21241c9b6667a8e4241da220601 Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 22 Jun 2023 17:24:43 +0000 Subject: [PATCH 070/478] Use read_bytes/total_bytes_to_read for progress bar in s3/file/url/... table functions --- .../IO/ReadBufferFromAzureBlobStorage.cpp | 8 +- src/Disks/IO/ReadBufferFromAzureBlobStorage.h | 5 +- .../AzureBlobStorage/AzureObjectStorage.cpp | 10 +- .../AzureBlobStorage/AzureObjectStorage.h | 5 + src/IO/Progress.h | 9 +- src/IO/ReadBufferFromFileBase.cpp | 2 +- src/IO/ReadBufferFromFileBase.h | 2 +- src/IO/ReadBufferFromS3.cpp | 15 +- src/IO/ReadBufferFromS3.h | 6 +- src/IO/ReadWriteBufferFromHTTP.cpp | 15 +- src/IO/ReadWriteBufferFromHTTP.h | 5 +- .../Executors/ExecutionThreadContext.cpp | 3 + src/Processors/IProcessor.h | 1 + src/Processors/ISource.h | 1 + src/Processors/Sources/RemoteSource.cpp | 2 + src/QueryPipeline/ReadProgressCallback.cpp | 12 ++ src/QueryPipeline/ReadProgressCallback.h | 3 + src/Storages/HDFS/ReadBufferFromHDFS.cpp | 29 +++- src/Storages/HDFS/ReadBufferFromHDFS.h | 6 +- src/Storages/HDFS/StorageHDFS.cpp | 74 ++++----- src/Storages/HDFS/StorageHDFS.h | 6 - src/Storages/StorageAzureBlob.cpp | 83 ++++------ src/Storages/StorageAzureBlob.h | 18 +-- src/Storages/StorageFile.cpp | 32 +--- src/Storages/StorageS3.cpp | 152 ++++++++---------- src/Storages/StorageS3.h | 29 +--- src/Storages/StorageS3Cluster.cpp | 2 +- src/Storages/StorageURL.cpp | 37 ++--- src/Storages/StorageURL.h | 8 +- 29 files changed, 268 insertions(+), 312 deletions(-) diff --git a/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp b/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp index 129bb97be09..6a328de0341 100644 --- a/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp +++ b/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp @@ -8,6 +8,7 @@ #include #include #include +#include namespace ProfileEvents @@ -36,7 +37,8 @@ ReadBufferFromAzureBlobStorage::ReadBufferFromAzureBlobStorage( size_t max_single_download_retries_, bool use_external_buffer_, bool restricted_seek_, - size_t read_until_position_) + size_t read_until_position_, + std::function progress_callback_) : ReadBufferFromFileBase(use_external_buffer_ ? 0 : read_settings_.remote_fs_buffer_size, nullptr, 0) , blob_container_client(blob_container_client_) , path(path_) @@ -47,6 +49,7 @@ ReadBufferFromAzureBlobStorage::ReadBufferFromAzureBlobStorage( , use_external_buffer(use_external_buffer_) , restricted_seek(restricted_seek_) , read_until_position(read_until_position_) + , progress_callback(progress_callback_) { if (!use_external_buffer) { @@ -127,6 +130,9 @@ bool ReadBufferFromAzureBlobStorage::nextImpl() if (bytes_read == 0) return false; + if (progress_callback) + progress_callback(FileProgress(bytes_read)); + BufferBase::set(data_ptr, bytes_read, 0); offset += bytes_read; diff --git a/src/Disks/IO/ReadBufferFromAzureBlobStorage.h b/src/Disks/IO/ReadBufferFromAzureBlobStorage.h index 4e21f543653..6f683dcf1ce 100644 --- a/src/Disks/IO/ReadBufferFromAzureBlobStorage.h +++ b/src/Disks/IO/ReadBufferFromAzureBlobStorage.h @@ -25,7 +25,8 @@ public: size_t max_single_download_retries_, bool use_external_buffer_ = false, bool restricted_seek_ = false, - size_t read_until_position_ = 0); + size_t read_until_position_ = 0, + std::function progress_callback_ = {}); off_t seek(off_t off, int whence) override; @@ -74,6 +75,8 @@ private: size_t data_capacity; Poco::Logger * log = &Poco::Logger::get("ReadBufferFromAzureBlobStorage"); + + std::function progress_callback; }; } diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp index dbb41851053..982c376404a 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp @@ -191,7 +191,7 @@ std::unique_ptr AzureObjectStorage::readObject( /// NOLI return std::make_unique( client.get(), object.remote_path, patchSettings(read_settings), settings_ptr->max_single_read_retries, - settings_ptr->max_single_download_retries); + settings_ptr->max_single_download_retries, false, false, 0, progress_callback); } std::unique_ptr AzureObjectStorage::readObjects( /// NOLINT @@ -216,7 +216,8 @@ std::unique_ptr AzureObjectStorage::readObjects( /// NOL settings_ptr->max_single_download_retries, /* use_external_buffer */true, /* restricted_seek */true, - read_until_position); + read_until_position, + progress_callback); }; switch (read_settings.remote_fs_method) @@ -390,6 +391,11 @@ std::unique_ptr AzureObjectStorage::cloneObjectStorage(const std ); } +void AzureObjectStorage::setProgressCallback(const ContextPtr & context) +{ + progress_callback = context->getFileProgressCallback(); +} + } #endif diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h index b5f81cef235..ee144cdd56e 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h @@ -123,6 +123,9 @@ public: bool isRemote() const override { return true; } + /// Set progress callback to read buffer while reading from storage. + void setProgressCallback(const ContextPtr & context); + private: const String name; /// client used to access the files in the Blob Storage cloud @@ -132,6 +135,8 @@ private: Poco::Logger * log; DataSourceDescription data_source_description; + + std::function progress_callback; }; } diff --git a/src/IO/Progress.h b/src/IO/Progress.h index c21b1b854b0..a68ff9bc5c2 100644 --- a/src/IO/Progress.h +++ b/src/IO/Progress.h @@ -40,9 +40,10 @@ struct ReadProgress UInt64 read_rows = 0; UInt64 read_bytes = 0; UInt64 total_rows_to_read = 0; + UInt64 total_bytes_to_read = 0; - ReadProgress(UInt64 read_rows_, UInt64 read_bytes_, UInt64 total_rows_to_read_ = 0) - : read_rows(read_rows_), read_bytes(read_bytes_), total_rows_to_read(total_rows_to_read_) {} + ReadProgress(UInt64 read_rows_, UInt64 read_bytes_, UInt64 total_rows_to_read_ = 0, UInt64 total_bytes_to_read_ = 0) + : read_rows(read_rows_), read_bytes(read_bytes_), total_rows_to_read(total_rows_to_read_), total_bytes_to_read(total_bytes_to_read_) {} }; struct WriteProgress @@ -98,8 +99,8 @@ struct Progress Progress() = default; - Progress(UInt64 read_rows_, UInt64 read_bytes_, UInt64 total_rows_to_read_ = 0) - : read_rows(read_rows_), read_bytes(read_bytes_), total_rows_to_read(total_rows_to_read_) {} + Progress(UInt64 read_rows_, UInt64 read_bytes_, UInt64 total_rows_to_read_ = 0, UInt64 total_bytes_to_read_ = 0) + : read_rows(read_rows_), read_bytes(read_bytes_), total_rows_to_read(total_rows_to_read_), total_bytes_to_read(total_bytes_to_read_) {} explicit Progress(ReadProgress read_progress) : read_rows(read_progress.read_rows), read_bytes(read_progress.read_bytes), total_rows_to_read(read_progress.total_rows_to_read) {} diff --git a/src/IO/ReadBufferFromFileBase.cpp b/src/IO/ReadBufferFromFileBase.cpp index 4181615bc52..4ac3f984f78 100644 --- a/src/IO/ReadBufferFromFileBase.cpp +++ b/src/IO/ReadBufferFromFileBase.cpp @@ -42,7 +42,7 @@ void ReadBufferFromFileBase::setProgressCallback(ContextPtr context) setProfileCallback([file_progress_callback](const ProfileInfo & progress) { - file_progress_callback(FileProgress(progress.bytes_read, 0)); + file_progress_callback(FileProgress(progress.bytes_read)); }); } diff --git a/src/IO/ReadBufferFromFileBase.h b/src/IO/ReadBufferFromFileBase.h index b77db29bc23..2abdf883ab0 100644 --- a/src/IO/ReadBufferFromFileBase.h +++ b/src/IO/ReadBufferFromFileBase.h @@ -52,7 +52,7 @@ public: size_t getFileSize() override; - void setProgressCallback(ContextPtr context); + virtual void setProgressCallback(ContextPtr context); protected: std::optional file_size; diff --git a/src/IO/ReadBufferFromS3.cpp b/src/IO/ReadBufferFromS3.cpp index d1cb1ec9ab0..36ff81a85d4 100644 --- a/src/IO/ReadBufferFromS3.cpp +++ b/src/IO/ReadBufferFromS3.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -162,12 +163,13 @@ bool ReadBufferFromS3::nextImpl() offset += working_buffer.size(); if (read_settings.remote_throttler) read_settings.remote_throttler->add(working_buffer.size(), ProfileEvents::RemoteReadThrottlerBytes, ProfileEvents::RemoteReadThrottlerSleepMicroseconds); - + if (progress_callback) + progress_callback(FileProgress(working_buffer.size())); return true; } -size_t ReadBufferFromS3::readBigAt(char * to, size_t n, size_t range_begin, const std::function & progress_callback) +size_t ReadBufferFromS3::readBigAt(char * to, size_t n, size_t range_begin, const std::function & custom_progress_callback) { if (n == 0) return 0; @@ -184,7 +186,9 @@ size_t ReadBufferFromS3::readBigAt(char * to, size_t n, size_t range_begin, cons auto result = sendRequest(range_begin, range_begin + n - 1); std::istream & istr = result.GetBody(); - size_t bytes = copyFromIStreamWithProgressCallback(istr, to, n, progress_callback); + size_t bytes = copyFromIStreamWithProgressCallback(istr, to, n, custom_progress_callback); + if (progress_callback) + progress_callback(FileProgress(bytes, 0)); ProfileEvents::increment(ProfileEvents::ReadBufferFromS3Bytes, bytes); @@ -415,6 +419,11 @@ Aws::S3::Model::GetObjectResult ReadBufferFromS3::sendRequest(size_t range_begin } } +void ReadBufferFromS3::setProgressCallback(DB::ContextPtr context) +{ + progress_callback = context->getFileProgressCallback(); +} + } #endif diff --git a/src/IO/ReadBufferFromS3.h b/src/IO/ReadBufferFromS3.h index 0f665861a1e..824038c7af0 100644 --- a/src/IO/ReadBufferFromS3.h +++ b/src/IO/ReadBufferFromS3.h @@ -77,10 +77,12 @@ public: String getFileName() const override { return bucket + "/" + key; } - size_t readBigAt(char * to, size_t n, size_t range_begin, const std::function & progress_callback) override; + size_t readBigAt(char * to, size_t n, size_t range_begin, const std::function & custom_progress_callback) override; bool supportsReadAt() override { return true; } + void setProgressCallback(ContextPtr context) override; + private: std::unique_ptr initialize(); @@ -100,6 +102,8 @@ private: /// There is different seek policy for disk seek and for non-disk seek /// (non-disk seek is applied for seekable input formats: orc, arrow, parquet). bool restricted_seek; + + std::function progress_callback; }; } diff --git a/src/IO/ReadWriteBufferFromHTTP.cpp b/src/IO/ReadWriteBufferFromHTTP.cpp index cf1159bfb4b..7bd7f4a9b8e 100644 --- a/src/IO/ReadWriteBufferFromHTTP.cpp +++ b/src/IO/ReadWriteBufferFromHTTP.cpp @@ -587,11 +587,13 @@ bool ReadWriteBufferFromHTTPBase::nextImpl() internal_buffer = impl->buffer(); working_buffer = internal_buffer; offset_from_begin_pos += working_buffer.size(); + if (progress_callback) + progress_callback(FileProgress(working_buffer.size())); return true; } template -size_t ReadWriteBufferFromHTTPBase::readBigAt(char * to, size_t n, size_t offset, const std::function & progress_callback) +size_t ReadWriteBufferFromHTTPBase::readBigAt(char * to, size_t n, size_t offset, const std::function & custom_progress_callback) { /// Caller must have checked supportsReadAt(). /// This ensures we've sent at least one HTTP request and populated saved_uri_redirect. @@ -633,8 +635,9 @@ size_t ReadWriteBufferFromHTTPBase::readBigAt(char * to, si toString(response.getStatus()), uri_.toString(), offset, offset + n); bool cancelled; - size_t r = copyFromIStreamWithProgressCallback(*result_istr, to, n, progress_callback, &cancelled); - + size_t r = copyFromIStreamWithProgressCallback(*result_istr, to, n, custom_progress_callback, &cancelled); + if (progress_callback) + progress_callback(FileProgress(r)); return r; } catch (const Poco::Exception & e) @@ -780,6 +783,12 @@ void ReadWriteBufferFromHTTPBase::setNextCallback(NextCallb next_callback(count()); } +template +void ReadWriteBufferFromHTTPBase::setProgressCallback(std::function file_progress_callback_) +{ + progress_callback = file_progress_callback_; +} + template const std::string & ReadWriteBufferFromHTTPBase::getCompressionMethod() const { return content_encoding; } diff --git a/src/IO/ReadWriteBufferFromHTTP.h b/src/IO/ReadWriteBufferFromHTTP.h index 2d2ae5fe724..18bd31fcdce 100644 --- a/src/IO/ReadWriteBufferFromHTTP.h +++ b/src/IO/ReadWriteBufferFromHTTP.h @@ -92,6 +92,7 @@ namespace detail HTTPHeaderEntries http_header_entries; const RemoteHostFilter * remote_host_filter = nullptr; std::function next_callback; + std::function progress_callback; size_t buffer_size; bool use_external_buffer; @@ -176,7 +177,7 @@ namespace detail bool nextImpl() override; - size_t readBigAt(char * to, size_t n, size_t offset, const std::function & progress_callback) override; + size_t readBigAt(char * to, size_t n, size_t offset, const std::function & custom_progress_callback) override; off_t getPosition() override; @@ -199,6 +200,8 @@ namespace detail /// passed through the buffer void setNextCallback(NextCallback next_callback_); + void setProgressCallback(std::function progress_callback_); + const std::string & getCompressionMethod() const; std::optional getLastModificationTime(); diff --git a/src/Processors/Executors/ExecutionThreadContext.cpp b/src/Processors/Executors/ExecutionThreadContext.cpp index 794f478b272..0fa7e0b552f 100644 --- a/src/Processors/Executors/ExecutionThreadContext.cpp +++ b/src/Processors/Executors/ExecutionThreadContext.cpp @@ -56,6 +56,9 @@ static void executeJob(ExecutingGraph::Node * node, ReadProgressCallback * read_ if (read_progress->counters.total_rows_approx) read_progress_callback->addTotalRowsApprox(read_progress->counters.total_rows_approx); + if (read_progress->counters.total_bytes) + read_progress_callback->addTotalBytes(read_progress->counters.total_bytes); + if (!read_progress_callback->onProgress(read_progress->counters.read_rows, read_progress->counters.read_bytes, read_progress->limits)) node->processor->cancel(); } diff --git a/src/Processors/IProcessor.h b/src/Processors/IProcessor.h index 34322acb2af..c6bef186877 100644 --- a/src/Processors/IProcessor.h +++ b/src/Processors/IProcessor.h @@ -343,6 +343,7 @@ public: uint64_t read_rows = 0; uint64_t read_bytes = 0; uint64_t total_rows_approx = 0; + uint64_t total_bytes = 0; }; struct ReadProgress diff --git a/src/Processors/ISource.h b/src/Processors/ISource.h index 292f79ba348..2593a241c63 100644 --- a/src/Processors/ISource.h +++ b/src/Processors/ISource.h @@ -43,6 +43,7 @@ public: std::optional getReadProgress() final; void addTotalRowsApprox(size_t value) { read_progress.total_rows_approx += value; } + void addTotalBytes(size_t value) { read_progress.total_bytes += value; } }; using SourcePtr = std::shared_ptr; diff --git a/src/Processors/Sources/RemoteSource.cpp b/src/Processors/Sources/RemoteSource.cpp index 310a1d33e28..74ab3649068 100644 --- a/src/Processors/Sources/RemoteSource.cpp +++ b/src/Processors/Sources/RemoteSource.cpp @@ -77,6 +77,8 @@ std::optional RemoteSource::tryGenerate() { if (value.total_rows_to_read) addTotalRowsApprox(value.total_rows_to_read); + if (value.total_bytes_to_read) + addTotalBytes(value.total_bytes_to_read); progress(value.read_rows, value.read_bytes); }); diff --git a/src/QueryPipeline/ReadProgressCallback.cpp b/src/QueryPipeline/ReadProgressCallback.cpp index 0f50d56f1a5..4d7c7aa0f2a 100644 --- a/src/QueryPipeline/ReadProgressCallback.cpp +++ b/src/QueryPipeline/ReadProgressCallback.cpp @@ -63,6 +63,18 @@ bool ReadProgressCallback::onProgress(uint64_t read_rows, uint64_t read_bytes, c process_list_elem->updateProgressIn(total_rows_progress); } + size_t bytes = 0; + if ((bytes = total_bytes.exchange(0)) != 0) + { + Progress total_bytes_progress = {0, 0, 0, bytes}; + + if (progress_callback) + progress_callback(total_bytes_progress); + + if (process_list_elem) + process_list_elem->updateProgressIn(total_bytes_progress); + } + Progress value {read_rows, read_bytes}; if (progress_callback) diff --git a/src/QueryPipeline/ReadProgressCallback.h b/src/QueryPipeline/ReadProgressCallback.h index 08f2f9fc99b..5dbf3344bdf 100644 --- a/src/QueryPipeline/ReadProgressCallback.h +++ b/src/QueryPipeline/ReadProgressCallback.h @@ -23,6 +23,7 @@ public: void setProcessListElement(QueryStatusPtr elem); void setProgressCallback(const ProgressCallback & callback) { progress_callback = callback; } void addTotalRowsApprox(size_t value) { total_rows_approx += value; } + void addTotalBytes(size_t value) { total_bytes += value; } /// Skip updating profile events. /// For merges in mutations it may need special logic, it's done inside ProgressCallback. @@ -37,6 +38,8 @@ private: /// The approximate total number of rows to read. For progress bar. std::atomic_size_t total_rows_approx = 0; + /// The total number of bytes to read. For progress bar. + std::atomic_size_t total_bytes = 0; std::mutex limits_and_quotas_mutex; Stopwatch total_stopwatch{CLOCK_MONOTONIC_COARSE}; /// Including waiting time diff --git a/src/Storages/HDFS/ReadBufferFromHDFS.cpp b/src/Storages/HDFS/ReadBufferFromHDFS.cpp index ee8e0764db0..2c2c5047cb1 100644 --- a/src/Storages/HDFS/ReadBufferFromHDFS.cpp +++ b/src/Storages/HDFS/ReadBufferFromHDFS.cpp @@ -3,6 +3,7 @@ #if USE_HDFS #include #include +#include #include #include #include @@ -42,19 +43,23 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemory file_size; + explicit ReadBufferFromHDFSImpl( const std::string & hdfs_uri_, const std::string & hdfs_file_path_, const Poco::Util::AbstractConfiguration & config_, const ReadSettings & read_settings_, size_t read_until_position_, - bool use_external_buffer_) + bool use_external_buffer_, + std::optional file_size_) : BufferWithOwnMemory(use_external_buffer_ ? 0 : read_settings_.remote_fs_buffer_size) , hdfs_uri(hdfs_uri_) , hdfs_file_path(hdfs_file_path_) , builder(createHDFSBuilder(hdfs_uri_, config_)) , read_settings(read_settings_) , read_until_position(read_until_position_) + , file_size(file_size_) { fs = createHDFSFS(builder.get()); fin = hdfsOpenFile(fs.get(), hdfs_file_path.c_str(), O_RDONLY, 0, 0, 0); @@ -70,12 +75,16 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemorymSize; + file_size = static_cast(file_info->mSize); + return *file_size; } bool nextImpl() override @@ -156,10 +165,11 @@ ReadBufferFromHDFS::ReadBufferFromHDFS( const Poco::Util::AbstractConfiguration & config_, const ReadSettings & read_settings_, size_t read_until_position_, - bool use_external_buffer_) + bool use_external_buffer_, + std::optional file_size_) : ReadBufferFromFileBase(read_settings_.remote_fs_buffer_size, nullptr, 0) , impl(std::make_unique( - hdfs_uri_, hdfs_file_path_, config_, read_settings_, read_until_position_, use_external_buffer_)) + hdfs_uri_, hdfs_file_path_, config_, read_settings_, read_until_position_, use_external_buffer_, file_size_)) , use_external_buffer(use_external_buffer_) { } @@ -188,7 +198,11 @@ bool ReadBufferFromHDFS::nextImpl() auto result = impl->next(); if (result) + { BufferBase::set(impl->buffer().begin(), impl->buffer().size(), impl->offset()); /// use the buffer returned by `impl` + if (progress_callback) + progress_callback(FileProgress(working_buffer.size())); + } return result; } @@ -248,6 +262,11 @@ String ReadBufferFromHDFS::getFileName() const return impl->hdfs_file_path; } +void ReadBufferFromHDFS::setProgressCallback(DB::ContextPtr context) +{ + progress_callback = context->getFileProgressCallback(); +} + } #endif diff --git a/src/Storages/HDFS/ReadBufferFromHDFS.h b/src/Storages/HDFS/ReadBufferFromHDFS.h index 6aed3ddff26..3dce6a93cba 100644 --- a/src/Storages/HDFS/ReadBufferFromHDFS.h +++ b/src/Storages/HDFS/ReadBufferFromHDFS.h @@ -29,7 +29,8 @@ public: const Poco::Util::AbstractConfiguration & config_, const ReadSettings & read_settings_, size_t read_until_position_ = 0, - bool use_external_buffer = false); + bool use_external_buffer = false, + std::optional file_size = std::nullopt); ~ReadBufferFromHDFS() override; @@ -47,9 +48,12 @@ public: String getFileName() const override; + void setProgressCallback(ContextPtr context) override; + private: std::unique_ptr impl; bool use_external_buffer; + std::function progress_callback; }; } diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index 583c45a0633..79cda3050d6 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -259,8 +259,13 @@ public: { const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(uri); uris = getPathsList(path_from_uri, uri_without_path, context_); + auto file_progress_callback = context_->getFileProgressCallback(); for (auto & elem : uris) + { elem.path = uri_without_path + elem.path; + if (file_progress_callback && elem.info) + file_progress_callback(FileProgress(0, elem.info->size)); + } uris_iter = uris.begin(); } @@ -281,37 +286,40 @@ private: std::vector::iterator uris_iter; }; -class HDFSSource::URISIterator::Impl +class HDFSSource::URISIterator::Impl : WithContext { public: - explicit Impl(const std::vector & uris_, ContextPtr context) + explicit Impl(const std::vector & uris_, ContextPtr context_) + : WithContext(context_), uris(uris_), file_progress_callback(context_->getFileProgressCallback()) { - auto path_and_uri = getPathFromUriAndUriWithoutPath(uris_[0]); - HDFSBuilderWrapper builder = createHDFSBuilder(path_and_uri.second + "/", context->getGlobalContext()->getConfigRef()); - auto fs = createHDFSFS(builder.get()); - for (const auto & uri : uris_) - { - path_and_uri = getPathFromUriAndUriWithoutPath(uri); - if (!hdfsExists(fs.get(), path_and_uri.first.c_str())) - uris.push_back(uri); - } - uris_iter = uris.begin(); } StorageHDFS::PathWithInfo next() { - std::lock_guard lock(mutex); - if (uris_iter == uris.end()) + size_t current_index = index.fetch_add(1); + if (current_index >= uris.size()) return {"", {}}; - auto key = *uris_iter; - ++uris_iter; - return {key, {}}; + + auto uri = uris[current_index]; + auto path_and_uri = getPathFromUriAndUriWithoutPath(uri); + HDFSBuilderWrapper builder = createHDFSBuilder(path_and_uri.second + "/", getContext()->getGlobalContext()->getConfigRef()); + auto fs = createHDFSFS(builder.get()); + auto * hdfs_info = hdfsGetPathInfo(fs.get(), path_and_uri.first.c_str()); + std::optional info; + if (hdfs_info) + { + info = StorageHDFS::PathInfo{hdfs_info->mLastMod, static_cast(hdfs_info->mSize)}; + if (file_progress_callback && hdfs_info) + file_progress_callback(FileProgress(0, hdfs_info->mSize)); + } + + return {uri, info}; } private: - std::mutex mutex; + std::atomic_size_t index = 0; Strings uris; - Strings::iterator uris_iter; + std::function file_progress_callback; }; HDFSSource::DisclosedGlobIterator::DisclosedGlobIterator(ContextPtr context_, const String & uri) @@ -348,7 +356,7 @@ HDFSSource::HDFSSource( UInt64 max_block_size_, std::shared_ptr file_iterator_, ColumnsDescription columns_description_) - : ISource(getHeader(block_for_format_, requested_virtual_columns_)) + : ISource(getHeader(block_for_format_, requested_virtual_columns_), false) , WithContext(context_) , storage(std::move(storage_)) , block_for_format(block_for_format_) @@ -374,13 +382,17 @@ bool HDFSSource::initialize() continue; current_path = path_with_info.path; + std::optional file_size; + if (path_with_info.info) + file_size = path_with_info.info->size; const auto [path_from_uri, uri_without_path] = getPathFromUriAndUriWithoutPath(current_path); auto compression = chooseCompressionMethod(path_from_uri, storage->compression_method); auto impl = std::make_unique( - uri_without_path, path_from_uri, getContext()->getGlobalContext()->getConfigRef(), getContext()->getReadSettings()); + uri_without_path, path_from_uri, getContext()->getGlobalContext()->getConfigRef(), getContext()->getReadSettings(), 0, false, file_size); if (!skip_empty_files || !impl->eof()) { + impl->setProgressCallback(getContext()); const Int64 zstd_window_log_max = getContext()->getSettingsRef().zstd_window_log_max; read_buf = wrapReadBufferWithCompressionMethod(std::move(impl), compression, static_cast(zstd_window_log_max)); break; @@ -389,15 +401,7 @@ bool HDFSSource::initialize() current_path = path_with_info.path; - if (path_with_info.info && path_with_info.info->size) - { - /// Adjust total_rows_approx_accumulated with new total size. - if (total_files_size) - total_rows_approx_accumulated = static_cast(std::ceil(static_cast(total_files_size + path_with_info.info->size) / total_files_size * total_rows_approx_accumulated)); - total_files_size += path_with_info.info->size; - } - - input_format = getContext()->getInputFormat(storage->format_name, *read_buf, block_for_format, max_block_size); + auto input_format = getContext()->getInputFormat(storage->format_name, *read_buf, block_for_format, max_block_size); QueryPipelineBuilder builder; builder.init(Pipe(input_format)); @@ -434,14 +438,7 @@ Chunk HDFSSource::generate() { Columns columns = chunk.getColumns(); UInt64 num_rows = chunk.getNumRows(); - - if (num_rows && total_files_size) - { - size_t chunk_size = input_format->getApproxBytesReadForChunk(); - if (!chunk_size) - chunk_size = chunk.bytes(); - updateRowsProgressApprox(*this, num_rows, chunk_size, total_files_size, total_rows_approx_accumulated, total_rows_count_times, total_rows_approx_max); - } + progress(num_rows, 0); for (const auto & virtual_column : requested_virtual_columns) { @@ -465,7 +462,6 @@ Chunk HDFSSource::generate() reader.reset(); pipeline.reset(); - input_format.reset(); read_buf.reset(); if (!initialize()) diff --git a/src/Storages/HDFS/StorageHDFS.h b/src/Storages/HDFS/StorageHDFS.h index 74801b68f73..5a3b97a0e3c 100644 --- a/src/Storages/HDFS/StorageHDFS.h +++ b/src/Storages/HDFS/StorageHDFS.h @@ -164,16 +164,10 @@ private: ColumnsDescription columns_description; std::unique_ptr read_buf; - std::shared_ptr input_format; std::unique_ptr pipeline; std::unique_ptr reader; String current_path; - UInt64 total_rows_approx_max = 0; - size_t total_rows_count_times = 0; - UInt64 total_rows_approx_accumulated = 0; - size_t total_files_size = 0; - /// Recreate ReadBuffer and PullingPipelineExecutor for each file. bool initialize(); }; diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp index 336c4eaed9b..8e06ceda885 100644 --- a/src/Storages/StorageAzureBlob.cpp +++ b/src/Storages/StorageAzureBlob.cpp @@ -617,13 +617,13 @@ Pipe StorageAzureBlob::read( /// Iterate through disclosed globs and make a source for each file iterator_wrapper = std::make_shared( object_storage.get(), configuration.container, std::nullopt, - configuration.blob_path, query_info.query, virtual_block, local_context, nullptr); + configuration.blob_path, query_info.query, virtual_block, local_context, nullptr, local_context->getFileProgressCallback()); } else { iterator_wrapper = std::make_shared( object_storage.get(), configuration.container, configuration.blobs_paths, - std::nullopt, query_info.query, virtual_block, local_context, nullptr); + std::nullopt, query_info.query, virtual_block, local_context, nullptr, local_context->getFileProgressCallback()); } ColumnsDescription columns_description; @@ -794,15 +794,16 @@ StorageAzureBlobSource::Iterator::Iterator( ASTPtr query_, const Block & virtual_header_, ContextPtr context_, - RelativePathsWithMetadata * outer_blobs_) + RelativePathsWithMetadata * outer_blobs_, + std::function file_progress_callback_) : WithContext(context_) , object_storage(object_storage_) , container(container_) - , keys(keys_) , blob_path_with_globs(blob_path_with_globs_) , query(query_) , virtual_header(virtual_header_) , outer_blobs(outer_blobs_) + , file_progress_callback(file_progress_callback_) { if (keys.has_value() && blob_path_with_globs.has_value()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot specify keys and glob simultaneously it's a bug"); @@ -810,11 +811,10 @@ StorageAzureBlobSource::Iterator::Iterator( if (!keys.has_value() && !blob_path_with_globs.has_value()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Both keys and glob mask are not specified"); - if (keys) + if (keys_) { Strings all_keys = *keys; - blobs_with_metadata.emplace(); /// Create a virtual block with one row to construct filter if (query && virtual_header && !all_keys.empty()) { @@ -843,29 +843,12 @@ StorageAzureBlobSource::Iterator::Iterator( all_keys = std::move(filtered_keys); } } - - for (auto && key : all_keys) - { - ObjectMetadata object_metadata = object_storage->getObjectMetadata(key); - total_size += object_metadata.size_bytes; - blobs_with_metadata->emplace_back(RelativePathWithMetadata{key, object_metadata}); - if (outer_blobs) - outer_blobs->emplace_back(blobs_with_metadata->back()); - } + keys = std::move(all_keys); } else { const String key_prefix = blob_path_with_globs->substr(0, blob_path_with_globs->find_first_of("*?{")); - - /// We don't have to list bucket, because there is no asterisks. - if (key_prefix.size() == blob_path_with_globs->size()) - { - ObjectMetadata object_metadata = object_storage->getObjectMetadata(*blob_path_with_globs); - blobs_with_metadata->emplace_back(*blob_path_with_globs, object_metadata); - if (outer_blobs) - outer_blobs->emplace_back(blobs_with_metadata->back()); - return; - } + assert(key_prefix.size() != blob_path_with_globs->size()); object_storage_iterator = object_storage->iterate(key_prefix); @@ -888,13 +871,17 @@ RelativePathWithMetadata StorageAzureBlobSource::Iterator::next() if (keys) { size_t current_index = index.fetch_add(1, std::memory_order_relaxed); - if (current_index >= blobs_with_metadata->size()) + if (current_index >= keys->size()) { is_finished = true; return {}; } - return (*blobs_with_metadata)[current_index]; + auto key = (*keys)[current_index]; + ObjectMetadata object_metadata = object_storage->getObjectMetadata(key); + if (file_progress_callback) + file_progress_callback(FileProgress(0, object_metadata.size_bytes)); + return {key, object_metadata}; } else { @@ -946,11 +933,12 @@ RelativePathWithMetadata StorageAzureBlobSource::Iterator::next() const auto & idxs = typeid_cast(*block.getByName("_idx").column); std::lock_guard lock(next_mutex); - blob_path_with_globs.reset(); - blob_path_with_globs.emplace(); + blobs_with_metadata.reset(); + blobs_with_metadata.emplace(); for (UInt64 idx : idxs.getData()) { - total_size.fetch_add(new_batch[idx].metadata.size_bytes, std::memory_order_relaxed); + if (file_progress_callback) + file_progress_callback(FileProgress(0, new_batch[idx].metadata.size_bytes)); blobs_with_metadata->emplace_back(std::move(new_batch[idx])); if (outer_blobs) outer_blobs->emplace_back(blobs_with_metadata->back()); @@ -963,8 +951,11 @@ RelativePathWithMetadata StorageAzureBlobSource::Iterator::next() std::lock_guard lock(next_mutex); blobs_with_metadata = std::move(new_batch); - for (const auto & [_, info] : *blobs_with_metadata) - total_size.fetch_add(info.size_bytes, std::memory_order_relaxed); + if (file_progress_callback) + { + for (const auto & [_, info] : *blobs_with_metadata) + file_progress_callback(FileProgress(0, info.size_bytes)); + } } } @@ -1011,17 +1002,9 @@ Chunk StorageAzureBlobSource::generate() if (reader->pull(chunk)) { UInt64 num_rows = chunk.getNumRows(); + progress(num_rows, 0); const auto & file_path = reader.getPath(); - if (num_rows && total_objects_size) - { - size_t chunk_size = reader.getFormat()->getApproxBytesReadForChunk(); - if (!chunk_size) - chunk_size = chunk.bytes(); - updateRowsProgressApprox( - *this, num_rows, chunk_size, total_objects_size, total_rows_approx_accumulated, total_rows_count_times, total_rows_approx_max); - } - for (const auto & virtual_column : requested_virtual_columns) { if (virtual_column.name == "_path") @@ -1046,13 +1029,6 @@ Chunk StorageAzureBlobSource::generate() if (!reader) break; - size_t object_size = tryGetFileSizeFromReadBuffer(*reader.getReadBuffer()).value_or(0); - /// Adjust total_rows_approx_accumulated with new total size. - if (total_objects_size) - total_rows_approx_accumulated = static_cast( - std::ceil(static_cast(total_objects_size + object_size) / total_objects_size * total_rows_approx_accumulated)); - total_objects_size += object_size; - /// Even if task is finished the thread may be not freed in pool. /// So wait until it will be freed before scheduling a new task. create_reader_pool.wait(); @@ -1083,7 +1059,7 @@ StorageAzureBlobSource::StorageAzureBlobSource( AzureObjectStorage * object_storage_, const String & container_, std::shared_ptr file_iterator_) - :ISource(getHeader(sample_block_, requested_virtual_columns_)) + :ISource(getHeader(sample_block_, requested_virtual_columns_), false) , WithContext(context_) , requested_virtual_columns(requested_virtual_columns_) , format(format_) @@ -1101,13 +1077,7 @@ StorageAzureBlobSource::StorageAzureBlobSource( { reader = createReader(); if (reader) - { - const auto & read_buf = reader.getReadBuffer(); - if (read_buf) - total_objects_size = tryGetFileSizeFromReadBuffer(*reader.getReadBuffer()).value_or(0); - reader_future = createReaderAsync(); - } } @@ -1149,7 +1119,7 @@ StorageAzureBlobSource::ReaderHolder StorageAzureBlobSource::createReader() auto pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); auto current_reader = std::make_unique(*pipeline); - return ReaderHolder{fs::path(container) / current_key, std::move(read_buf), input_format, std::move(pipeline), std::move(current_reader)}; + return ReaderHolder{fs::path(container) / current_key, std::move(read_buf), std::move(pipeline), std::move(current_reader)}; } std::future StorageAzureBlobSource::createReaderAsync() @@ -1163,6 +1133,7 @@ std::unique_ptr StorageAzureBlobSource::createAzureReadBuffer(const read_settings.enable_filesystem_cache = false; auto download_buffer_size = getContext()->getSettings().max_download_buffer_size; const bool object_too_small = object_size <= 2 * download_buffer_size; + object_storage->setProgressCallback(getContext()); // Create a read buffer that will prefetch the first ~1 MB of the file. // When reading lots of tiny files, this prefetching almost doubles the throughput. diff --git a/src/Storages/StorageAzureBlob.h b/src/Storages/StorageAzureBlob.h index 25c791f1700..a78ba691b57 100644 --- a/src/Storages/StorageAzureBlob.h +++ b/src/Storages/StorageAzureBlob.h @@ -153,7 +153,8 @@ public: ASTPtr query_, const Block & virtual_header_, ContextPtr context_, - RelativePathsWithMetadata * outer_blobs_); + RelativePathsWithMetadata * outer_blobs_, + std::function file_progress_callback_ = {}); RelativePathWithMetadata next(); size_t getTotalSize() const; @@ -182,6 +183,8 @@ public: std::atomic is_finished = false; std::atomic is_initialized = false; std::mutex next_mutex; + + std::function file_progress_callback; }; StorageAzureBlobSource( @@ -225,12 +228,10 @@ private: ReaderHolder( String path_, std::unique_ptr read_buf_, - std::shared_ptr input_format_, std::unique_ptr pipeline_, std::unique_ptr reader_) : path(std::move(path_)) , read_buf(std::move(read_buf_)) - , input_format(input_format_) , pipeline(std::move(pipeline_)) , reader(std::move(reader_)) { @@ -251,7 +252,6 @@ private: /// reader uses pipeline, pipeline uses read_buf. reader = std::move(other.reader); pipeline = std::move(other.pipeline); - input_format = std::move(other.input_format); read_buf = std::move(other.read_buf); path = std::move(other.path); return *this; @@ -262,14 +262,9 @@ private: const PullingPipelineExecutor * operator->() const { return reader.get(); } const String & getPath() const { return path; } - const std::unique_ptr & getReadBuffer() const { return read_buf; } - - const std::shared_ptr & getFormat() const { return input_format; } - private: String path; std::unique_ptr read_buf; - std::shared_ptr input_format; std::unique_ptr pipeline; std::unique_ptr reader; }; @@ -282,11 +277,6 @@ private: ThreadPoolCallbackRunner create_reader_scheduler; std::future reader_future; - UInt64 total_rows_approx_max = 0; - size_t total_rows_count_times = 0; - UInt64 total_rows_approx_accumulated = 0; - size_t total_objects_size = 0; - /// Recreate ReadBuffer and Pipeline for each file. ReaderHolder createReader(); std::future createReaderAsync(); diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 7c04de1a28a..f196415e2dc 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -295,13 +295,8 @@ std::unique_ptr createReadBuffer( std::unique_ptr nested_buffer = selectReadBuffer(current_path, use_table_fd, table_fd, file_stat, context); - /// For clickhouse-local and clickhouse-client add progress callback to display progress bar. - if (context->getApplicationType() == Context::ApplicationType::LOCAL - || context->getApplicationType() == Context::ApplicationType::CLIENT) - { - auto & in = static_cast(*nested_buffer); - in.setProgressCallback(context); - } + auto & in = static_cast(*nested_buffer); + in.setProgressCallback(context); int zstd_window_log_max = static_cast(context->getSettingsRef().zstd_window_log_max); return wrapReadBufferWithCompressionMethod(std::move(nested_buffer), method, zstd_window_log_max); @@ -607,7 +602,7 @@ public: ColumnsDescription columns_description_, const Block & block_for_format_, std::unique_ptr read_buf_) - : ISource(getBlockForSource(block_for_format_, files_info_)) + : ISource(getBlockForSource(block_for_format_, files_info_), false) , storage(std::move(storage_)) , storage_snapshot(storage_snapshot_) , files_info(std::move(files_info_)) @@ -722,12 +717,6 @@ public: read_buf = createReadBuffer(current_path, file_stat, storage->use_table_fd, storage->table_fd, storage->compression_method, context); } - size_t file_size = tryGetFileSizeFromReadBuffer(*read_buf).value_or(0); - /// Adjust total_rows_approx_accumulated with new total size. - if (total_files_size) - total_rows_approx_accumulated = static_cast(std::ceil(static_cast(total_files_size + file_size) / total_files_size * total_rows_approx_accumulated)); - total_files_size += file_size; - const Settings & settings = context->getSettingsRef(); chassert(!storage->paths.empty()); const auto max_parsing_threads = std::max(settings.max_threads/ storage->paths.size(), 1UL); @@ -753,6 +742,7 @@ public: if (reader->pull(chunk)) { UInt64 num_rows = chunk.getNumRows(); + progress(num_rows, 0); /// Enrich with virtual columns. if (files_info->need_path_column) @@ -770,14 +760,6 @@ public: chunk.addColumn(column->convertToFullColumnIfConst()); } - if (num_rows && total_files_size) - { - size_t chunk_size = input_format->getApproxBytesReadForChunk(); - if (!chunk_size) - chunk_size = chunk.bytes(); - updateRowsProgressApprox( - *this, num_rows, chunk_size, total_files_size, total_rows_approx_accumulated, total_rows_count_times, total_rows_approx_max); - } return chunk; } @@ -816,12 +798,6 @@ private: bool finished_generate = false; std::shared_lock shared_lock; - - UInt64 total_rows_approx_accumulated = 0; - size_t total_rows_count_times = 0; - UInt64 total_rows_approx_max = 0; - - size_t total_files_size = 0; }; diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index f4791e45e2b..d933ffe8041 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -148,7 +148,8 @@ public: const Block & virtual_header_, ContextPtr context_, KeysWithInfo * read_keys_, - const S3Settings::RequestSettings & request_settings_) + const S3Settings::RequestSettings & request_settings_, + std::function progress_callback_) : WithContext(context_) , client(S3::Client::create(client_)) , globbed_uri(globbed_uri_) @@ -158,6 +159,7 @@ public: , request_settings(request_settings_) , list_objects_pool(CurrentMetrics::StorageS3Threads, CurrentMetrics::StorageS3ThreadsActive, 1) , list_objects_scheduler(threadPoolCallbackRunner(list_objects_pool, "ListObjects")) + , progress_callback(progress_callback_) { if (globbed_uri.bucket.find_first_of("*?{") != globbed_uri.bucket.npos) throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION, "Expression can not have wildcards inside bucket name"); @@ -194,11 +196,6 @@ public: return nextAssumeLocked(); } - size_t getTotalSize() const - { - return total_size.load(std::memory_order_relaxed); - } - ~Impl() { list_objects_pool.wait(); @@ -312,15 +309,19 @@ private: buffer.reserve(block.rows()); for (UInt64 idx : idxs.getData()) { - total_size.fetch_add(temp_buffer[idx].info->size, std::memory_order_relaxed); + if (progress_callback) + progress_callback(FileProgress(0, temp_buffer[idx].info->size)); buffer.emplace_back(std::move(temp_buffer[idx])); } } else { buffer = std::move(temp_buffer); - for (const auto & [_, info] : buffer) - total_size.fetch_add(info->size, std::memory_order_relaxed); + if (progress_callback) + { + for (const auto & [_, info] : buffer) + progress_callback(FileProgress(0, info->size)); + } } /// Set iterator only after the whole batch is processed @@ -381,7 +382,7 @@ private: ThreadPool list_objects_pool; ThreadPoolCallbackRunner list_objects_scheduler; std::future outcome_future; - std::atomic total_size = 0; + std::function progress_callback; }; StorageS3Source::DisclosedGlobIterator::DisclosedGlobIterator( @@ -391,8 +392,9 @@ StorageS3Source::DisclosedGlobIterator::DisclosedGlobIterator( const Block & virtual_header, ContextPtr context, KeysWithInfo * read_keys_, - const S3Settings::RequestSettings & request_settings_) - : pimpl(std::make_shared(client_, globbed_uri_, query, virtual_header, context, read_keys_, request_settings_)) + const S3Settings::RequestSettings & request_settings_, + std::function progress_callback_) + : pimpl(std::make_shared(client_, globbed_uri_, query, virtual_header, context, read_keys_, request_settings_, progress_callback_)) { } @@ -401,11 +403,6 @@ StorageS3Source::KeyWithInfo StorageS3Source::DisclosedGlobIterator::next() return pimpl->next(); } -size_t StorageS3Source::DisclosedGlobIterator::getTotalSize() const -{ - return pimpl->getTotalSize(); -} - class StorageS3Source::KeysIterator::Impl : WithContext { public: @@ -418,23 +415,26 @@ public: ASTPtr query_, const Block & virtual_header_, ContextPtr context_, - bool need_total_size, - KeysWithInfo * read_keys_) + KeysWithInfo * read_keys_, + std::function progress_callback_) : WithContext(context_) + , keys(keys_) + , client(S3::Client::create(client_)) + , version_id(version_id_) , bucket(bucket_) + , request_settings(request_settings_) , query(query_) , virtual_header(virtual_header_) + , progress_callback(progress_callback_) { - Strings all_keys = keys_; - /// Create a virtual block with one row to construct filter - if (query && virtual_header && !all_keys.empty()) + if (query && virtual_header && !keys.empty()) { /// Append "idx" column as the filter result virtual_header.insert({ColumnUInt64::create(), std::make_shared(), "_idx"}); auto block = virtual_header.cloneEmpty(); - addPathToVirtualColumns(block, fs::path(bucket) / all_keys.front(), 0); + addPathToVirtualColumns(block, fs::path(bucket) / keys.front(), 0); ASTPtr filter_ast; VirtualColumnUtils::prepareFilterBlockWithQuery(query, getContext(), block, filter_ast); @@ -442,8 +442,8 @@ public: if (filter_ast) { block = virtual_header.cloneEmpty(); - for (size_t i = 0; i < all_keys.size(); ++i) - addPathToVirtualColumns(block, fs::path(bucket) / all_keys[i], i); + for (size_t i = 0; i < keys.size(); ++i) + addPathToVirtualColumns(block, fs::path(bucket) / keys[i], i); VirtualColumnUtils::filterBlockWithQuery(query, block, getContext(), filter_ast); const auto & idxs = typeid_cast(*block.getByName("_idx").column); @@ -451,29 +451,17 @@ public: Strings filtered_keys; filtered_keys.reserve(block.rows()); for (UInt64 idx : idxs.getData()) - filtered_keys.emplace_back(std::move(all_keys[idx])); + filtered_keys.emplace_back(std::move(keys[idx])); - all_keys = std::move(filtered_keys); + keys = std::move(filtered_keys); } } - for (auto && key : all_keys) - { - std::optional info; - /// In case all_keys.size() > 1, avoid getting object info now - /// (it will be done anyway eventually, but with delay and in parallel). - /// But progress bar will not work in this case. - if (need_total_size && all_keys.size() == 1) - { - info = S3::getObjectInfo(client_, bucket, key, version_id_, request_settings_); - total_size += info->size; - } - - keys.emplace_back(std::move(key), std::move(info)); - } - if (read_keys_) - *read_keys_ = keys; + { + for (const auto & key : keys) + read_keys_->push_back({key, {}}); + } } KeyWithInfo next() @@ -481,24 +469,27 @@ public: size_t current_index = index.fetch_add(1, std::memory_order_relaxed); if (current_index >= keys.size()) return {}; + auto key = keys[current_index]; + std::optional info; + if (progress_callback) + { + info = S3::getObjectInfo(*client, bucket, key, version_id, request_settings); + progress_callback(FileProgress(0, info->size)); + } - return keys[current_index]; - } - - size_t getTotalSize() const - { - return total_size; + return {key, info}; } private: - KeysWithInfo keys; + Strings keys; std::atomic_size_t index = 0; - + std::unique_ptr client; + String version_id; String bucket; + S3Settings::RequestSettings request_settings; ASTPtr query; Block virtual_header; - - size_t total_size = 0; + std::function progress_callback; }; StorageS3Source::KeysIterator::KeysIterator( @@ -510,11 +501,11 @@ StorageS3Source::KeysIterator::KeysIterator( ASTPtr query, const Block & virtual_header, ContextPtr context, - bool need_total_size, - KeysWithInfo * read_keys) + KeysWithInfo * read_keys, + std::function progress_callback_) : pimpl(std::make_shared( client_, version_id_, keys_, bucket_, request_settings_, - query, virtual_header, context, need_total_size, read_keys)) + query, virtual_header, context, read_keys, progress_callback_)) { } @@ -523,11 +514,6 @@ StorageS3Source::KeyWithInfo StorageS3Source::KeysIterator::next() return pimpl->next(); } -size_t StorageS3Source::KeysIterator::getTotalSize() const -{ - return pimpl->getTotalSize(); -} - Block StorageS3Source::getHeader(Block sample_block, const std::vector & requested_virtual_columns) { for (const auto & virtual_column : requested_virtual_columns) @@ -552,7 +538,7 @@ StorageS3Source::StorageS3Source( const String & version_id_, std::shared_ptr file_iterator_, const size_t download_thread_num_) - : ISource(getHeader(sample_block_, requested_virtual_columns_)) + : ISource(getHeader(sample_block_, requested_virtual_columns_), false) , WithContext(context_) , name(std::move(name_)) , bucket(bucket_) @@ -573,10 +559,7 @@ StorageS3Source::StorageS3Source( { reader = createReader(); if (reader) - { - total_objects_size = tryGetFileSizeFromReadBuffer(*reader.getReadBuffer()).value_or(0); reader_future = createReaderAsync(); - } } StorageS3Source::ReaderHolder StorageS3Source::createReader() @@ -614,7 +597,7 @@ StorageS3Source::ReaderHolder StorageS3Source::createReader() auto pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); auto current_reader = std::make_unique(*pipeline); - return ReaderHolder{fs::path(bucket) / key_with_info.key, std::move(read_buf), input_format, std::move(pipeline), std::move(current_reader)}; + return ReaderHolder{fs::path(bucket) / key_with_info.key, std::move(read_buf), std::move(pipeline), std::move(current_reader)}; } std::future StorageS3Source::createReaderAsync() @@ -638,10 +621,13 @@ std::unique_ptr StorageS3Source::createS3ReadBuffer(const String & k return createAsyncS3ReadBuffer(key, read_settings, object_size); } - return std::make_unique( + auto buf = std::make_unique( client, bucket, key, version_id, request_settings, read_settings, /*use_external_buffer*/ false, /*offset_*/ 0, /*read_until_position_*/ 0, /*restricted_seek_*/ false, object_size); + + buf->setProgressCallback(getContext()); + return buf; } std::unique_ptr StorageS3Source::createAsyncS3ReadBuffer( @@ -652,7 +638,7 @@ std::unique_ptr StorageS3Source::createAsyncS3ReadBuffer( [this, read_settings, object_size] (const std::string & path, size_t read_until_position) -> std::unique_ptr { - return std::make_unique( + auto buf = std::make_unique( client, bucket, path, @@ -664,6 +650,8 @@ std::unique_ptr StorageS3Source::createAsyncS3ReadBuffer( read_until_position, /* restricted_seek */true, object_size); + buf->setProgressCallback(getContext()); + return buf; }; auto s3_impl = std::make_unique( @@ -713,17 +701,10 @@ Chunk StorageS3Source::generate() if (reader->pull(chunk)) { UInt64 num_rows = chunk.getNumRows(); + progress(num_rows, 0); const auto & file_path = reader.getPath(); - if (num_rows && total_objects_size) - { - size_t chunk_size = reader.getFormat()->getApproxBytesReadForChunk(); - if (!chunk_size) - chunk_size = chunk.bytes(); - updateRowsProgressApprox(*this, num_rows, chunk_size, total_objects_size, total_rows_approx_accumulated, total_rows_count_times, total_rows_approx_max); - } - for (const auto & virtual_column : requested_virtual_columns) { if (virtual_column.name == "_path") @@ -748,13 +729,6 @@ Chunk StorageS3Source::generate() if (!reader) break; - size_t object_size = tryGetFileSizeFromReadBuffer(*reader.getReadBuffer()).value_or(0); - /// Adjust total_rows_approx_accumulated with new total size. - if (total_objects_size) - total_rows_approx_accumulated = static_cast( - std::ceil(static_cast(total_objects_size + object_size) / total_objects_size * total_rows_approx_accumulated)); - total_objects_size += object_size; - /// Even if task is finished the thread may be not freed in pool. /// So wait until it will be freed before scheduling a new task. create_reader_pool.wait(); @@ -990,8 +964,8 @@ std::shared_ptr StorageS3::createFileIterator( ContextPtr local_context, ASTPtr query, const Block & virtual_block, - bool need_total_size, - KeysWithInfo * read_keys) + KeysWithInfo * read_keys, + std::function progress_callback) { if (distributed_processing) { @@ -1002,14 +976,14 @@ std::shared_ptr StorageS3::createFileIterator( /// Iterate through disclosed globs and make a source for each file return std::make_shared( *configuration.client, configuration.url, query, virtual_block, - local_context, read_keys, configuration.request_settings); + local_context, read_keys, configuration.request_settings, progress_callback); } else { return std::make_shared( *configuration.client, configuration.url.version_id, configuration.keys, configuration.url.bucket, configuration.request_settings, query, - virtual_block, local_context, need_total_size, read_keys); + virtual_block, local_context, read_keys, progress_callback); } } @@ -1059,7 +1033,7 @@ Pipe StorageS3::read( } std::shared_ptr iterator_wrapper = createFileIterator( - query_configuration, distributed_processing, local_context, query_info.query, virtual_block); + query_configuration, distributed_processing, local_context, query_info.query, virtual_block, nullptr, local_context->getFileProgressCallback()); ColumnsDescription columns_description; Block block_for_format; @@ -1459,7 +1433,7 @@ ColumnsDescription StorageS3::getTableStructureFromDataImpl( { KeysWithInfo read_keys; - auto file_iterator = createFileIterator(configuration, false, ctx, nullptr, {}, false, &read_keys); + auto file_iterator = createFileIterator(configuration, false, ctx, nullptr, {}, &read_keys); std::optional columns_from_cache; size_t prev_read_keys_size = read_keys.size(); diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index 13053833623..16d075a67d2 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -56,7 +56,6 @@ public: public: virtual ~IIterator() = default; virtual KeyWithInfo next() = 0; - virtual size_t getTotalSize() const = 0; KeyWithInfo operator ()() { return next(); } }; @@ -71,10 +70,10 @@ public: const Block & virtual_header, ContextPtr context, KeysWithInfo * read_keys_ = nullptr, - const S3Settings::RequestSettings & request_settings_ = {}); + const S3Settings::RequestSettings & request_settings_ = {}, + std::function progress_callback_ = {}); KeyWithInfo next() override; - size_t getTotalSize() const override; private: class Impl; @@ -94,11 +93,10 @@ public: ASTPtr query, const Block & virtual_header, ContextPtr context, - bool need_total_size = true, - KeysWithInfo * read_keys = nullptr); + KeysWithInfo * read_keys = nullptr, + std::function progress_callback_ = {}); KeyWithInfo next() override; - size_t getTotalSize() const override; private: class Impl; @@ -113,8 +111,6 @@ public: KeyWithInfo next() override { return {callback(), {}}; } - size_t getTotalSize() const override { return 0; } - private: ReadTaskCallback callback; }; @@ -163,12 +159,10 @@ private: ReaderHolder( String path_, std::unique_ptr read_buf_, - std::shared_ptr input_format_, std::unique_ptr pipeline_, std::unique_ptr reader_) : path(std::move(path_)) , read_buf(std::move(read_buf_)) - , input_format(input_format_) , pipeline(std::move(pipeline_)) , reader(std::move(reader_)) { @@ -189,16 +183,11 @@ private: /// reader uses pipeline, pipeline uses read_buf. reader = std::move(other.reader); pipeline = std::move(other.pipeline); - input_format = std::move(other.input_format); read_buf = std::move(other.read_buf); path = std::move(other.path); return *this; } - const std::unique_ptr & getReadBuffer() const { return read_buf; } - - const std::shared_ptr & getFormat() const { return input_format; } - explicit operator bool() const { return reader != nullptr; } PullingPipelineExecutor * operator->() { return reader.get(); } const PullingPipelineExecutor * operator->() const { return reader.get(); } @@ -207,7 +196,6 @@ private: private: String path; std::unique_ptr read_buf; - std::shared_ptr input_format; std::unique_ptr pipeline; std::unique_ptr reader; }; @@ -224,11 +212,6 @@ private: ThreadPoolCallbackRunner create_reader_scheduler; std::future reader_future; - UInt64 total_rows_approx_max = 0; - size_t total_rows_count_times = 0; - UInt64 total_rows_approx_accumulated = 0; - size_t total_objects_size = 0; - /// Recreate ReadBuffer and Pipeline for each file. ReaderHolder createReader(); std::future createReaderAsync(); @@ -352,8 +335,8 @@ private: ContextPtr local_context, ASTPtr query, const Block & virtual_block, - bool need_total_size = true, - KeysWithInfo * read_keys = nullptr); + KeysWithInfo * read_keys = nullptr, + std::function progress_callback = {}); static ColumnsDescription getTableStructureFromDataImpl( const Configuration & configuration, diff --git a/src/Storages/StorageS3Cluster.cpp b/src/Storages/StorageS3Cluster.cpp index 153a3b7f11b..18ae44bc1ad 100644 --- a/src/Storages/StorageS3Cluster.cpp +++ b/src/Storages/StorageS3Cluster.cpp @@ -86,7 +86,7 @@ void StorageS3Cluster::updateConfigurationIfChanged(ContextPtr local_context) RemoteQueryExecutor::Extension StorageS3Cluster::getTaskIteratorExtension(ASTPtr query, const ContextPtr & context) const { auto iterator = std::make_shared( - *s3_configuration.client, s3_configuration.url, query, virtual_block, context); + *s3_configuration.client, s3_configuration.url, query, virtual_block, context, nullptr, s3_configuration.request_settings, context->getFileProgressCallback()); auto callback = std::make_shared>([iterator]() mutable -> String { return iterator->next().key; }); return RemoteQueryExecutor::Extension{ .task_iterator = std::move(callback) }; } diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 1d6aed204cb..1ea0eb5a88e 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -234,7 +234,7 @@ StorageURLSource::StorageURLSource( const HTTPHeaderEntries & headers_, const URIParams & params, bool glob_url) - : ISource(getHeader(sample_block, requested_virtual_columns_)), name(std::move(name_)), requested_virtual_columns(requested_virtual_columns_), uri_iterator(uri_iterator_) + : ISource(getHeader(sample_block, requested_virtual_columns_), false), name(std::move(name_)), requested_virtual_columns(requested_virtual_columns_), uri_iterator(uri_iterator_) { auto headers = getHeaders(headers_); @@ -261,7 +261,8 @@ StorageURLSource::StorageURLSource( credentials, headers, glob_url, - current_uri_options.size() == 1); + current_uri_options.size() == 1, + context->getFileProgressCallback()); /// If file is empty and engine_url_skip_empty_files=1, skip it and go to the next file. } @@ -270,22 +271,11 @@ StorageURLSource::StorageURLSource( curr_uri = uri_and_buf.first; read_buf = std::move(uri_and_buf.second); - size_t file_size = 0; - try + if (auto progress_callback = context->getFileProgressCallback()) { - file_size = getFileSizeFromReadBuffer(*read_buf); - } - catch (...) - { - // we simply continue without updating total_size - } - - if (file_size) - { - /// Adjust total_rows_approx_accumulated with new total size. - if (total_size) - total_rows_approx_accumulated = static_cast(std::ceil(static_cast(total_size + file_size) / total_size * total_rows_approx_accumulated)); - total_size += file_size; + size_t file_size = tryGetFileSizeFromReadBuffer(*read_buf).value_or(0); + LOG_DEBUG(&Poco::Logger::get("URL"), "Send file size {}", file_size); + progress_callback(FileProgress(0, file_size)); } // TODO: Pass max_parsing_threads and max_download_threads adjusted for num_streams. @@ -331,14 +321,7 @@ Chunk StorageURLSource::generate() if (reader->pull(chunk)) { UInt64 num_rows = chunk.getNumRows(); - if (num_rows && total_size) - { - size_t chunk_size = input_format->getApproxBytesReadForChunk(); - if (!chunk_size) - chunk_size = chunk.bytes(); - updateRowsProgressApprox( - *this, num_rows, chunk_size, total_size, total_rows_approx_accumulated, total_rows_count_times, total_rows_approx_max); - } + progress(num_rows, 0); const String & path{curr_uri.getPath()}; @@ -376,7 +359,8 @@ std::pair> StorageURLSource: Poco::Net::HTTPBasicCredentials & credentials, const HTTPHeaderEntries & headers, bool glob_url, - bool delay_initialization) + bool delay_initialization, + std::function file_progress_callback) { String first_exception_message; ReadSettings read_settings = context->getReadSettings(); @@ -418,6 +402,7 @@ std::pair> StorageURLSource: continue; } + res->setProgressCallback(file_progress_callback); return std::make_tuple(request_uri, std::move(res)); } catch (...) diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h index e3305cda89e..315a5f9897b 100644 --- a/src/Storages/StorageURL.h +++ b/src/Storages/StorageURL.h @@ -195,7 +195,8 @@ public: Poco::Net::HTTPBasicCredentials & credentials, const HTTPHeaderEntries & headers, bool glob_url, - bool delay_initialization); + bool delay_initialization, + std::function file_progress_callback = {}); private: using InitializeFunc = std::function; @@ -212,11 +213,6 @@ private: std::unique_ptr reader; Poco::Net::HTTPBasicCredentials credentials; - - size_t total_size = 0; - UInt64 total_rows_approx_max = 0; - size_t total_rows_count_times = 0; - UInt64 total_rows_approx_accumulated = 0; }; class StorageURLSink : public SinkToStorage From f48cd0f926338d2420b123aaed3bc22fbc2969e9 Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 22 Jun 2023 17:30:24 +0000 Subject: [PATCH 071/478] Delete updateRowsProgressApprox implementation --- src/Storages/ReadFromStorageProgress.cpp | 52 ------------------------ src/Storages/ReadFromStorageProgress.h | 18 -------- 2 files changed, 70 deletions(-) delete mode 100644 src/Storages/ReadFromStorageProgress.cpp delete mode 100644 src/Storages/ReadFromStorageProgress.h diff --git a/src/Storages/ReadFromStorageProgress.cpp b/src/Storages/ReadFromStorageProgress.cpp deleted file mode 100644 index 8ad1cf92209..00000000000 --- a/src/Storages/ReadFromStorageProgress.cpp +++ /dev/null @@ -1,52 +0,0 @@ -#include -#include -#include - -namespace DB -{ - -void updateRowsProgressApprox( - ISource & source, - size_t num_rows, - UInt64 chunk_bytes_size, - UInt64 total_result_size, - UInt64 & total_rows_approx_accumulated, - size_t & total_rows_count_times, - UInt64 & total_rows_approx_max) -{ - if (!total_result_size) - return; - - if (!num_rows) - return; - - const auto progress = source.getReadProgress(); - if (progress && !progress->limits.empty()) - { - for (const auto & limit : progress->limits) - { - if (limit.leaf_limits.max_rows || limit.leaf_limits.max_bytes - || limit.local_limits.size_limits.max_rows || limit.local_limits.size_limits.max_bytes) - return; - } - } - - const auto bytes_per_row = std::ceil(static_cast(chunk_bytes_size) / num_rows); - size_t total_rows_approx = static_cast(std::ceil(static_cast(total_result_size) / bytes_per_row)); - total_rows_approx_accumulated += total_rows_approx; - ++total_rows_count_times; - total_rows_approx = total_rows_approx_accumulated / total_rows_count_times; - - /// We need to add diff, because total_rows_approx is incremental value. - /// It would be more correct to send total_rows_approx as is (not a diff), - /// but incrementation of total_rows_to_read does not allow that. - /// A new counter can be introduced for that to be sent to client, but it does not worth it. - if (total_rows_approx > total_rows_approx_max) - { - size_t diff = total_rows_approx - total_rows_approx_max; - source.addTotalRowsApprox(diff); - total_rows_approx_max = total_rows_approx; - } -} - -} diff --git a/src/Storages/ReadFromStorageProgress.h b/src/Storages/ReadFromStorageProgress.h deleted file mode 100644 index 2be37d26fee..00000000000 --- a/src/Storages/ReadFromStorageProgress.h +++ /dev/null @@ -1,18 +0,0 @@ -#pragma once -#include - -namespace DB -{ - -class ISource; - -void updateRowsProgressApprox( - ISource & source, - size_t num_rows, - UInt64 chunk_bytes_size, - UInt64 total_result_size, - UInt64 & total_rows_approx_accumulated, - size_t & total_rows_count_times, - UInt64 & total_rows_approx_max); - -} From 24fab7bfde4557303335609949548632dbafc218 Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 22 Jun 2023 18:48:15 +0000 Subject: [PATCH 072/478] Remove old includes --- src/Storages/HDFS/StorageHDFS.cpp | 1 - src/Storages/StorageAzureBlob.cpp | 1 - src/Storages/StorageFile.cpp | 1 - src/Storages/StorageS3.cpp | 1 - src/Storages/StorageURL.cpp | 1 - 5 files changed, 5 deletions(-) diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index 79cda3050d6..c6f0bd3f18d 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -30,7 +30,6 @@ #include #include #include -#include #include #include diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp index 8e06ceda885..1af7afc952f 100644 --- a/src/Storages/StorageAzureBlob.cpp +++ b/src/Storages/StorageAzureBlob.cpp @@ -30,7 +30,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index f196415e2dc..914fc432907 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -5,7 +5,6 @@ #include #include #include -#include #include #include diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index d933ffe8041..5a75da7a188 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -29,7 +29,6 @@ #include #include #include -#include #include #include diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 1ea0eb5a88e..5a8f94d07fd 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -3,7 +3,6 @@ #include #include #include -#include #include #include From 8b1cd9fcec4408933d537bd1b74a382884d7b52f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 23 Jun 2023 02:27:10 +0200 Subject: [PATCH 073/478] Remove metadata_cache from config --- programs/server/config.xml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/programs/server/config.xml b/programs/server/config.xml index d18b4cb2ac9..f52241ff44d 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -1550,12 +1550,6 @@ - - - '.*' || toString(number % 10) || '.' - - '([a-zA-Z][a-zA-Z0-9]*)://([^ /]+)(/[^ ]*)?([^ @]+)@([^ @]+)([0-9][0-9]?)/([0-9][0-9]?)/([0-9][0-9]([0-9][0-9])?)(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9])\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9])' || toString(number) + + '([a-zA-Z][a-zA-Z0-9]*)://([^ /]+)(/[^ ]*)?([^ @]+)@([^ @]+)([0-9][0-9]?)/([0-9][0-9]?)/([0-9][0-9]([0-9][0-9])?)(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9])\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9])' || toString(number % 10) 0 + 0 From 6fd27b6cd882b31f73ecd27ca7ae0bb2f0d25854 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 18 Jul 2023 22:19:35 +0200 Subject: [PATCH 228/478] Fix build --- src/Storages/StorageMergeTree.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 085d532b09c..32e100edc4d 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -45,6 +45,7 @@ #include #include + namespace DB { @@ -940,7 +941,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge( SelectPartsDecision select_decision = SelectPartsDecision::CANNOT_SELECT; - auto is_background_memory_usage_ok = [](String * disable_reason) -> bool + auto is_background_memory_usage_ok = [](String & disable_reason) -> bool { if (canEnqueueBackgroundTask()) return true; From ff6e5ff1c547494ed7c6320c5d62bf789d433ae2 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 18 Jul 2023 20:23:55 +0000 Subject: [PATCH 229/478] Automatic style fix --- tests/integration/test_concurrent_ttl_merges/test.py | 10 ++++++++-- .../test_shutdown_wait_unfinished_queries/test.py | 10 ++++++++-- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/tests/integration/test_concurrent_ttl_merges/test.py b/tests/integration/test_concurrent_ttl_merges/test.py index f6ba3834c92..96264e53522 100644 --- a/tests/integration/test_concurrent_ttl_merges/test.py +++ b/tests/integration/test_concurrent_ttl_merges/test.py @@ -7,10 +7,16 @@ from helpers.test_tools import assert_eq_with_retry, TSV cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance( - "node1", main_configs=["configs/fast_background_pool.xml"], user_configs=["configs/users.xml"], with_zookeeper=True + "node1", + main_configs=["configs/fast_background_pool.xml"], + user_configs=["configs/users.xml"], + with_zookeeper=True, ) node2 = cluster.add_instance( - "node2", main_configs=["configs/fast_background_pool.xml"], user_configs=["configs/users.xml"], with_zookeeper=True + "node2", + main_configs=["configs/fast_background_pool.xml"], + user_configs=["configs/users.xml"], + with_zookeeper=True, ) diff --git a/tests/integration/test_shutdown_wait_unfinished_queries/test.py b/tests/integration/test_shutdown_wait_unfinished_queries/test.py index 71f8b9a759d..074667fc92f 100644 --- a/tests/integration/test_shutdown_wait_unfinished_queries/test.py +++ b/tests/integration/test_shutdown_wait_unfinished_queries/test.py @@ -6,10 +6,16 @@ from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) node_wait_queries = cluster.add_instance( - "node_wait_queries", main_configs=["configs/config_wait.xml"], user_configs=["configs/users.xml"], stay_alive=True + "node_wait_queries", + main_configs=["configs/config_wait.xml"], + user_configs=["configs/users.xml"], + stay_alive=True, ) node_kill_queries = cluster.add_instance( - "node_kill_queries", main_configs=["configs/config_kill.xml"], user_configs=["configs/users.xml"], stay_alive=True + "node_kill_queries", + main_configs=["configs/config_kill.xml"], + user_configs=["configs/users.xml"], + stay_alive=True, ) global result From 3715c7f461dc9a0c48ea3cfac52ef52c47a53c64 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 19 Jul 2023 01:08:14 +0200 Subject: [PATCH 230/478] Fix error in a test --- tests/queries/0_stateless/02293_selected_rows_and_merges.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02293_selected_rows_and_merges.sh b/tests/queries/0_stateless/02293_selected_rows_and_merges.sh index 76c562c9744..2f281d27814 100755 --- a/tests/queries/0_stateless/02293_selected_rows_and_merges.sh +++ b/tests/queries/0_stateless/02293_selected_rows_and_merges.sh @@ -24,4 +24,4 @@ ${CLICKHOUSE_CLIENT} -q "system flush logs" # Here for mutation all values are 0, cause mutation is executed async. # It's pretty hard to write a test with total counter. -${CLICKHOUSE_CLIENT} -q "select ProfileEvents['SelectedRows'] > 10, ProfileEvents['SelectedBytes'], ProfileEvents['MergedRows'], ProfileEvents['MergedUncompressedBytes'] from system.query_log where query_id = '$query_id' and type = 'QueryFinish' and query like 'alter%' and current_database = currentDatabase()" +${CLICKHOUSE_CLIENT} -q "select ProfileEvents['SelectedRows'] > 10, ProfileEvents['SelectedBytes'] > 1000, ProfileEvents['MergedRows'], ProfileEvents['MergedUncompressedBytes'] from system.query_log where query_id = '$query_id' and type = 'QueryFinish' and query like 'alter%' and current_database = currentDatabase()" From c724816cb8403c07d2d4c4601e0c4c9dcfc16e5f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 19 Jul 2023 01:15:16 +0200 Subject: [PATCH 231/478] Fix test --- .../configs/config.d/merge_tree.xml | 5 +++++ .../configs/config.d/users.xml | 5 ----- .../configs/config.xml | 22 ------------------- .../test_merge_tree_s3_failover/test.py | 1 + 4 files changed, 6 insertions(+), 27 deletions(-) create mode 100644 tests/integration/test_merge_tree_s3_failover/configs/config.d/merge_tree.xml delete mode 100644 tests/integration/test_merge_tree_s3_failover/configs/config.d/users.xml delete mode 100644 tests/integration/test_merge_tree_s3_failover/configs/config.xml diff --git a/tests/integration/test_merge_tree_s3_failover/configs/config.d/merge_tree.xml b/tests/integration/test_merge_tree_s3_failover/configs/config.d/merge_tree.xml new file mode 100644 index 00000000000..c58c957b596 --- /dev/null +++ b/tests/integration/test_merge_tree_s3_failover/configs/config.d/merge_tree.xml @@ -0,0 +1,5 @@ + + + 1.0 + + diff --git a/tests/integration/test_merge_tree_s3_failover/configs/config.d/users.xml b/tests/integration/test_merge_tree_s3_failover/configs/config.d/users.xml deleted file mode 100644 index 0011583a68c..00000000000 --- a/tests/integration/test_merge_tree_s3_failover/configs/config.d/users.xml +++ /dev/null @@ -1,5 +0,0 @@ - - - - - diff --git a/tests/integration/test_merge_tree_s3_failover/configs/config.xml b/tests/integration/test_merge_tree_s3_failover/configs/config.xml deleted file mode 100644 index 743d75d9a21..00000000000 --- a/tests/integration/test_merge_tree_s3_failover/configs/config.xml +++ /dev/null @@ -1,22 +0,0 @@ - - 9000 - 127.0.0.1 - - - - true - none - - AcceptCertificateHandler - - - - - 500 - ./clickhouse/ - users.xml - - - 1.0 - - diff --git a/tests/integration/test_merge_tree_s3_failover/test.py b/tests/integration/test_merge_tree_s3_failover/test.py index 90dda631924..57ca5ed5ffd 100644 --- a/tests/integration/test_merge_tree_s3_failover/test.py +++ b/tests/integration/test_merge_tree_s3_failover/test.py @@ -67,6 +67,7 @@ def cluster(): "configs/config.d/storage_conf.xml", "configs/config.d/instant_moves.xml", "configs/config.d/part_log.xml", + "configs/config.d/merge_tree.xml" ], with_minio=True, ) From 3c8141529f0f8d4d7c48c077e91af77ee9885ad8 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 18 Jul 2023 23:25:21 +0000 Subject: [PATCH 232/478] Automatic style fix --- tests/integration/test_merge_tree_s3_failover/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_merge_tree_s3_failover/test.py b/tests/integration/test_merge_tree_s3_failover/test.py index 57ca5ed5ffd..b47d741e78e 100644 --- a/tests/integration/test_merge_tree_s3_failover/test.py +++ b/tests/integration/test_merge_tree_s3_failover/test.py @@ -67,7 +67,7 @@ def cluster(): "configs/config.d/storage_conf.xml", "configs/config.d/instant_moves.xml", "configs/config.d/part_log.xml", - "configs/config.d/merge_tree.xml" + "configs/config.d/merge_tree.xml", ], with_minio=True, ) From d666272b7666967cf1d1bed3804673e3beb1ca64 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 19 Jul 2023 05:29:12 +0200 Subject: [PATCH 233/478] Enable `allow_vertical_merges_from_compact_to_wide_parts` by default --- src/Storages/MergeTree/MergeTreeSettings.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index dc24327712c..783fde088dc 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -160,7 +160,7 @@ struct Settings; M(UInt64, min_marks_to_honor_max_concurrent_queries, 0, "Minimal number of marks to honor the MergeTree-level's max_concurrent_queries (0 - disabled). Queries will still be limited by other max_concurrent_queries settings.", 0) \ M(UInt64, min_bytes_to_rebalance_partition_over_jbod, 0, "Minimal amount of bytes to enable part rebalance over JBOD array (0 - disabled).", 0) \ M(Bool, check_sample_column_is_correct, true, "Check columns or columns by hash for sampling are unsigned integer.", 0) \ - M(Bool, allow_vertical_merges_from_compact_to_wide_parts, false, "Allows vertical merges from compact to wide parts. This settings must have the same value on all replicas", 0) \ + M(Bool, allow_vertical_merges_from_compact_to_wide_parts, true, "Allows vertical merges from compact to wide parts. This settings must have the same value on all replicas", 0) \ M(Bool, enable_the_endpoint_id_with_zookeeper_name_prefix, false, "Enable the endpoint id with zookeeper name prefix for the replicated merge tree table", 0) \ M(UInt64, zero_copy_merge_mutation_min_parts_size_sleep_before_lock, 1ULL * 1024 * 1024 * 1024, "If zero copy replication is enabled sleep random amount of time before trying to lock depending on parts size for merge or mutation", 0) \ \ @@ -169,8 +169,9 @@ struct Settings; M(UInt64, part_moves_between_shards_delay_seconds, 30, "Time to wait before/after moving parts between shards.", 0) \ M(Bool, use_metadata_cache, false, "Experimental feature to speed up parts loading process by using MergeTree metadata cache", 0) \ M(Bool, allow_remote_fs_zero_copy_replication, false, "Don't use this setting in production, because it is not ready.", 0) \ - M(String, remote_fs_zero_copy_zookeeper_path, "/clickhouse/zero_copy", "ZooKeeper path for Zero-copy table-independet info.", 0) \ + M(String, remote_fs_zero_copy_zookeeper_path, "/clickhouse/zero_copy", "ZooKeeper path for zero-copy table-independent info.", 0) \ M(Bool, remote_fs_zero_copy_path_compatible_mode, false, "Run zero-copy in compatible mode during conversion process.", 0) \ + \ /** Compress marks and primary key. */ \ M(Bool, compress_marks, true, "Marks support compression, reduce mark file size and speed up network transmission.", 0) \ M(Bool, compress_primary_key, true, "Primary key support compression, reduce primary key file size and speed up network transmission.", 0) \ From c3b8978023fae8adaa98a111f6253be50ee72a35 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Wed, 19 Jul 2023 11:53:03 +0800 Subject: [PATCH 234/478] Don't use minmax_count projections when counting nullable columns --- .../optimizeUseAggregateProjection.cpp | 32 ++++--------------- ..._count_projection_count_nullable.reference | 1 + ...minmax_count_projection_count_nullable.sql | 9 ++++++ 3 files changed, 17 insertions(+), 25 deletions(-) create mode 100644 tests/queries/0_stateless/01710_minmax_count_projection_count_nullable.reference create mode 100644 tests/queries/0_stateless/01710_minmax_count_projection_count_nullable.sql diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp index f183bdca7a9..4f25118958f 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp @@ -92,18 +92,6 @@ static AggregateProjectionInfo getAggregatingProjectionInfo( return info; } -static bool hasNullableOrMissingColumn(const DAGIndex & index, const Names & names) -{ - for (const auto & query_name : names) - { - auto jt = index.find(query_name); - if (jt == index.end() || jt->second->result_type->isNullable()) - return true; - } - - return false; -} - struct AggregateFunctionMatch { const AggregateDescription * description = nullptr; @@ -170,20 +158,14 @@ std::optional matchAggregateFunctions( } /// This is a special case for the function count(). - /// We can assume that 'count(expr) == count()' if expr is not nullable. - if (typeid_cast(candidate.function.get())) + /// We can assume that 'count(expr) == count()' if expr is not nullable, + /// which can be verified by simply casting to `AggregateFunctionCount *`. + if (typeid_cast(aggregate.function.get())) { - bool has_nullable_or_missing_arg = false; - has_nullable_or_missing_arg |= hasNullableOrMissingColumn(query_index, aggregate.argument_names); - has_nullable_or_missing_arg |= hasNullableOrMissingColumn(proj_index, candidate.argument_names); - - if (!has_nullable_or_missing_arg) - { - /// we can ignore arguments for count() - found_match = true; - res.push_back({&candidate, DataTypes()}); - break; - } + /// we can ignore arguments for count() + found_match = true; + res.push_back({&candidate, DataTypes()}); + break; } /// Now, function names and types matched. diff --git a/tests/queries/0_stateless/01710_minmax_count_projection_count_nullable.reference b/tests/queries/0_stateless/01710_minmax_count_projection_count_nullable.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/01710_minmax_count_projection_count_nullable.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/01710_minmax_count_projection_count_nullable.sql b/tests/queries/0_stateless/01710_minmax_count_projection_count_nullable.sql new file mode 100644 index 00000000000..048d725e0a0 --- /dev/null +++ b/tests/queries/0_stateless/01710_minmax_count_projection_count_nullable.sql @@ -0,0 +1,9 @@ +DROP TABLE IF EXISTS test; + +CREATE TABLE test (`val` LowCardinality(Nullable(String))) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192; + +insert into test select number == 3 ? 'some value' : null from numbers(5); + +SELECT count(val) FROM test SETTINGS optimize_use_implicit_projections = 1; + +DROP TABLE test; From 2d46052d62bf8b7efd16aeb769e278b9df54971b Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 19 Jul 2023 08:35:46 +0000 Subject: [PATCH 235/478] Update description of events "QueryCacheHits/Misses" --- src/Common/ProfileEvents.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index 0838e0366df..75d1e493873 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -57,8 +57,8 @@ M(TableFunctionExecute, "Number of table function calls.") \ M(MarkCacheHits, "Number of times an entry has been found in the mark cache, so we didn't have to load a mark file.") \ M(MarkCacheMisses, "Number of times an entry has not been found in the mark cache, so we had to load a mark file in memory, which is a costly operation, adding to query latency.") \ - M(QueryCacheHits, "Number of times a query result has been found in the query cache (and query computation was avoided).") \ - M(QueryCacheMisses, "Number of times a query result has not been found in the query cache (and required query computation).") \ + M(QueryCacheHits, "Number of times a query result has been found in the query cache (and query computation was avoided). Only updated for SELECT queries with SETTING use_query_cache = 1.") \ + M(QueryCacheMisses, "Number of times a query result has not been found in the query cache (and required query computation). Only updated for SELECT queries with SETTING use_query_cache = 1.") \ M(CreatedReadBufferOrdinary, "Number of times ordinary read buffer was created for reading data (while choosing among other read methods).") \ M(CreatedReadBufferDirectIO, "Number of times a read buffer with O_DIRECT was created for reading data (while choosing among other read methods).") \ M(CreatedReadBufferDirectIOFailed, "Number of times a read buffer with O_DIRECT was attempted to be created for reading data (while choosing among other read methods), but the OS did not allow it (due to lack of filesystem support or other reasons) and we fallen back to the ordinary reading method.") \ From 08409059cc198873ffbf11060bfdabaa0c74f07f Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Wed, 19 Jul 2023 18:46:20 +0800 Subject: [PATCH 236/478] support alias for new analyzer --- src/Analyzer/Passes/UniqToCountPass.cpp | 108 +++++++++++++----- .../test_rewrite_uniq_to_count/test.py | 16 +-- 2 files changed, 90 insertions(+), 34 deletions(-) diff --git a/src/Analyzer/Passes/UniqToCountPass.cpp b/src/Analyzer/Passes/UniqToCountPass.cpp index ae7952051e7..7533a99107b 100644 --- a/src/Analyzer/Passes/UniqToCountPass.cpp +++ b/src/Analyzer/Passes/UniqToCountPass.cpp @@ -21,36 +21,82 @@ bool matchFnUniq(String func_name) || name == "uniqCombined64"; } -bool nodeEquals(const QueryTreeNodePtr & lhs, const QueryTreeNodePtr & rhs) +/// Extract the corresponding projection columns for group by node list. +/// For example: +/// SELECT a as aa, any(b) FROM table group by a; -> aa(ColumnNode) +NamesAndTypes extractProjectionColumnsForGroupBy(const QueryNode * query_node) { - auto * lhs_node = lhs->as(); - auto * rhs_node = rhs->as(); + if (!query_node->hasGroupBy()) + return {}; - if (lhs_node && rhs_node && lhs_node->getColumn() == rhs_node->getColumn()) - return true; - return false; + NamesAndTypes result; + for (const auto & group_by_ele : query_node->getGroupByNode()->getChildren()) + { + const auto & projection_columns = query_node->getProjectionColumns(); + const auto & projection_nodes = query_node->getProjection().getNodes(); + + assert(projection_columns.size() == projection_nodes.size()); + + for (size_t i = 0; i < projection_columns.size(); i++) + { + if (projection_nodes[i]->isEqual(*group_by_ele)) + result.push_back(projection_columns[i]); + } + } + return result; } -bool nodeListEquals(const QueryTreeNodes & lhs, const QueryTreeNodes & rhs) +/// Whether query_columns equals subquery_columns. +/// query_columns: query columns from query +/// subquery_columns: projection columns from subquery +bool nodeListEquals(const QueryTreeNodes & query_columns, const NamesAndTypes & subquery_columns) { - if (lhs.size() != rhs.size()) + if (query_columns.size() != subquery_columns.size()) return false; - for (size_t i = 0; i < lhs.size(); i++) + + for (const auto & query_column : query_columns) { - if (!nodeEquals(lhs[i], rhs[i])) + auto find = std::find_if( + subquery_columns.begin(), + subquery_columns.end(), + [&](const auto & subquery_column) -> bool + { + if (auto * column_node = query_column->as()) + { + return subquery_column == column_node->getColumn(); + } + return false; + }); + + if (find == subquery_columns.end()) return false; } return true; } -bool nodeListContainsAll(const QueryTreeNodes & lhs, const QueryTreeNodes & rhs) +/// Whether subquery_columns contains all columns in subquery_columns. +/// query_columns: query columns from query +/// subquery_columns: projection columns from subquery +bool nodeListContainsAll(const QueryTreeNodes & query_columns, const NamesAndTypes & subquery_columns) { - if (lhs.size() < rhs.size()) + if (query_columns.size() > subquery_columns.size()) return false; - for (const auto & re : rhs) + + for (const auto & query_column : query_columns) { - auto predicate = [&](const QueryTreeNodePtr & le) { return nodeEquals(le, re); }; - if (std::find_if(lhs.begin(), lhs.end(), predicate) == lhs.end()) + auto find = std::find_if( + subquery_columns.begin(), + subquery_columns.end(), + [&](const auto & subquery_column) -> bool + { + if (auto * column_node = query_column->as()) + { + return subquery_column == column_node->getColumn(); + } + return false; + }); + + if (find == subquery_columns.end()) return false; } return true; @@ -58,17 +104,14 @@ bool nodeListContainsAll(const QueryTreeNodes & lhs, const QueryTreeNodes & rhs) } -class UniqToCountVisitor : public InDepthQueryTreeVisitorWithContext +class UniqToCountVisitor : public InDepthQueryTreeVisitor { public: - using Base = InDepthQueryTreeVisitorWithContext; + using Base = InDepthQueryTreeVisitor; using Base::Base; void visitImpl(QueryTreeNodePtr & node) { - if (!getSettings().optimize_uniq_to_count) - return; - auto * query_node = node->as(); if (!query_node) return; @@ -100,9 +143,11 @@ public: { if (!subquery_node->isDistinct()) return false; - /// uniq expression list == subquery group by expression list - if (!nodeListEquals(uniq_arguments_nodes, subquery_node->getProjection().getNodes())) + + /// uniq expression list == subquery projection columns + if (!nodeListEquals(uniq_arguments_nodes, subquery_node->getProjectionColumns())) return false; + return true; }; @@ -111,12 +156,17 @@ public: { if (!subquery_node->hasGroupBy()) return false; + /// uniq argument node list == subquery group by node list - if (!nodeListEquals(uniq_arguments_nodes, subquery_node->getGroupByNode()->getChildren())) + auto group_by_columns = extractProjectionColumnsForGroupBy(subquery_node); + + if (!nodeListEquals(uniq_arguments_nodes, group_by_columns)) return false; - /// subquery select node list must contain all columns in uniq argument node list - if (!nodeListContainsAll(subquery_node->getProjection().getNodes(), uniq_arguments_nodes)) + + /// subquery projection columns must contain all columns in uniq argument node list + if (!nodeListContainsAll(uniq_arguments_nodes, subquery_node->getProjectionColumns())) return false; + return true; }; @@ -125,8 +175,11 @@ public: { AggregateFunctionProperties properties; auto aggregate_function = AggregateFunctionFactory::instance().get("count", {}, {}, properties); + function_node->resolveAsAggregateFunction(std::move(aggregate_function)); function_node->getArguments().getNodes().clear(); + + /// Update projection columns query_node->resolveProjectionColumns({{"count()", function_node->getResultType()}}); } } @@ -135,7 +188,10 @@ public: void UniqToCountPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context) { - UniqToCountVisitor visitor(std::move(context)); + if (!context->getSettings().optimize_uniq_to_count) + return; + + UniqToCountVisitor visitor; visitor.visit(query_tree_node); } diff --git a/tests/integration/test_rewrite_uniq_to_count/test.py b/tests/integration/test_rewrite_uniq_to_count/test.py index d7fa9f39441..e38e57f5cee 100644 --- a/tests/integration/test_rewrite_uniq_to_count/test.py +++ b/tests/integration/test_rewrite_uniq_to_count/test.py @@ -83,13 +83,13 @@ def test_rewrite_distinct(started_cluster): ) # test select expression alias - check_by_old_analyzer( - "SELECT uniq(a) FROM (SELECT DISTINCT test_rewrite_uniq_to_count.a as alias_of_a FROM test_rewrite_uniq_to_count) t", + check( + "SELECT uniq(alias_of_a) FROM (SELECT DISTINCT test_rewrite_uniq_to_count.a as alias_of_a FROM test_rewrite_uniq_to_count) t", 3, ) # test select expression alias - check_by_old_analyzer( + check( "SELECT uniq(alias_of_a) FROM (SELECT DISTINCT a as alias_of_a FROM test_rewrite_uniq_to_count) t", 3, ) @@ -109,19 +109,19 @@ def test_rewrite_group_by(started_cluster): ) # test select expression alias - check_by_old_analyzer( + check( "SELECT uniq(t.alias_of_a) FROM (SELECT a as alias_of_a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY a) t", 3, ) # test select expression alias - check_by_old_analyzer( - "SELECT uniq(t.a) FROM (SELECT a as alias_of_a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY alias_of_a) t", + check( + "SELECT uniq(t.alias_of_a) FROM (SELECT a as alias_of_a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY alias_of_a) t", 3, ) # test select expression alias - check_by_old_analyzer( - "SELECT uniq(t.alias_of_a) FROM (SELECT a as alias_of_a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY alias_of_a) t", + check( + "SELECT uniq(t.alias_of_a) FROM (SELECT a as alias_of_a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY a) t", 3, ) From 53500be941bc1d63ef85c3b5afb6bcc01103fb85 Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Wed, 19 Jul 2023 17:03:00 +0200 Subject: [PATCH 237/478] Updated fix of multiple usage in parameterized view to support cte by not adding column which is previously added --- src/Interpreters/ActionsVisitor.cpp | 22 +++++++------------ ...zed_view_with_cte_multiple_usage.reference | 2 ++ ...meterized_view_with_cte_multiple_usage.sql | 16 ++++++++++++++ 3 files changed, 26 insertions(+), 14 deletions(-) create mode 100644 tests/queries/0_stateless/02818_parameterized_view_with_cte_multiple_usage.reference create mode 100755 tests/queries/0_stateless/02818_parameterized_view_with_cte_multiple_usage.sql diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index efab11003f5..8b10df516dc 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -1202,22 +1202,16 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & else if (data.is_create_parameterized_view && query_parameter) { const auto data_type = DataTypeFactory::instance().get(query_parameter->type); - /// Use getUniqueName() to allow multiple use of query parameter in the query: - /// - /// CREATE VIEW view AS - /// SELECT * - /// FROM system.one - /// WHERE dummy = {k1:Int}+1 OR dummy = {k1:Int}+2 - /// ^^ ^^ - /// - /// NOTE: query in the VIEW will not be modified this is needed - /// only during analysis for CREATE VIEW to avoid duplicated - /// column names. - ColumnWithTypeAndName column(data_type, data.getUniqueName("__" + query_parameter->getColumnName())); - data.addColumn(column); + /// During analysis for CREATE VIEW of a parameterized view, if parameter is + /// used multiple times, column is only added once + if (!data.hasColumn(query_parameter->name)) + { + ColumnWithTypeAndName column(data_type, query_parameter->name); + data.addColumn(column); + } argument_types.push_back(data_type); - argument_names.push_back(column.name); + argument_names.push_back(query_parameter->name); } else { diff --git a/tests/queries/0_stateless/02818_parameterized_view_with_cte_multiple_usage.reference b/tests/queries/0_stateless/02818_parameterized_view_with_cte_multiple_usage.reference new file mode 100644 index 00000000000..004d27bacad --- /dev/null +++ b/tests/queries/0_stateless/02818_parameterized_view_with_cte_multiple_usage.reference @@ -0,0 +1,2 @@ +3 2 +3 2 3 diff --git a/tests/queries/0_stateless/02818_parameterized_view_with_cte_multiple_usage.sql b/tests/queries/0_stateless/02818_parameterized_view_with_cte_multiple_usage.sql new file mode 100755 index 00000000000..d56d9c4e181 --- /dev/null +++ b/tests/queries/0_stateless/02818_parameterized_view_with_cte_multiple_usage.sql @@ -0,0 +1,16 @@ +create view test_param_view as +with {param_test_val:UInt8} as param_test_val +select param_test_val, + arrayCount((a)->(a < param_test_val), t.arr) as cnt1 +from (select [1,2,3,4,5] as arr) t; + +select * from test_param_view(param_test_val = 3); + +create view test_param_view2 as +with {param_test_val:UInt8} as param_test_val +select param_test_val, + arrayCount((a)->(a < param_test_val), t.arr) as cnt1, + arrayCount((a)->(a < param_test_val+1), t.arr) as cnt2 +from (select [1,2,3,4,5] as arr) t; + +select * from test_param_view2(param_test_val = 3); \ No newline at end of file From b34655e74310dba07c25b06cac817168b6012907 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 19 Jul 2023 17:05:03 +0200 Subject: [PATCH 238/478] Update src/Storages/StorageReplicatedMergeTree.cpp Co-authored-by: Alexander Tokmakov --- src/Storages/StorageReplicatedMergeTree.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 53fac578fca..04799a08e37 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -4114,7 +4114,7 @@ std::set StorageReplicatedMergeTree::findReplicaUniqueParts(c } if (!our_parts.empty() && our_unique_parts.empty()) - LOG_TRACE(log_, "All parts found on replica"); + LOG_TRACE(log_, "All parts found on replicas"); return our_unique_parts; } From 544081163d751a62dcdfc21e5841c9cb53877cb0 Mon Sep 17 00:00:00 2001 From: Alexander Sapin Date: Wed, 19 Jul 2023 17:21:01 +0200 Subject: [PATCH 239/478] Remove redundant deactivate --- src/Storages/StorageReplicatedMergeTree.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 04799a08e37..06f5330f6d9 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -4903,7 +4903,6 @@ void StorageReplicatedMergeTree::flushAndPrepareForShutdown() fetcher.blocker.cancelForever(); merger_mutator.merges_blocker.cancelForever(); parts_mover.moves_blocker.cancelForever(); - mutations_finalizing_task->deactivate(); stopBeingLeader(); if (attach_thread) From 13d1e21da820dd97ddb624eb7671ca2fee86d530 Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Wed, 19 Jul 2023 18:26:26 +0200 Subject: [PATCH 240/478] Fixed test file permissions --- .../02818_parameterized_view_with_cte_multiple_usage.sql | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 tests/queries/0_stateless/02818_parameterized_view_with_cte_multiple_usage.sql diff --git a/tests/queries/0_stateless/02818_parameterized_view_with_cte_multiple_usage.sql b/tests/queries/0_stateless/02818_parameterized_view_with_cte_multiple_usage.sql old mode 100755 new mode 100644 From dbdac5d823d431fb34405649f7125e76c88f1f05 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Wed, 19 Jul 2023 19:34:49 +0000 Subject: [PATCH 241/478] Add query with UNION --- .../02500_remove_redundant_distinct.reference | 29 +++++++++++++++++++ .../02500_remove_redundant_distinct.sh | 12 ++++++++ ...move_redundant_distinct_analyzer.reference | 29 +++++++++++++++++++ 3 files changed, 70 insertions(+) diff --git a/tests/queries/0_stateless/02500_remove_redundant_distinct.reference b/tests/queries/0_stateless/02500_remove_redundant_distinct.reference index 2e049dbc936..763a7cc4286 100644 --- a/tests/queries/0_stateless/02500_remove_redundant_distinct.reference +++ b/tests/queries/0_stateless/02500_remove_redundant_distinct.reference @@ -477,3 +477,32 @@ Expression (Projection) ReadFromStorage (SystemNumbers) -- execute 1 +-- UNION ALL with DISTINCT => do _not_ remove DISTINCT +-- query +SELECT DISTINCT number +FROM +( + SELECT DISTINCT number + FROM numbers(1) + UNION ALL + SELECT DISTINCT number + FROM numbers(2) +) +-- explain +Expression (Projection) + Distinct + Distinct (Preliminary DISTINCT) + Union + Expression ((Before ORDER BY + Projection)) + Distinct + Distinct (Preliminary DISTINCT) + Expression (Before ORDER BY) + ReadFromStorage (SystemNumbers) + Expression (( + Projection)) + Distinct + Distinct (Preliminary DISTINCT) + Expression (Before ORDER BY) + ReadFromStorage (SystemNumbers) +-- execute +0 +1 diff --git a/tests/queries/0_stateless/02500_remove_redundant_distinct.sh b/tests/queries/0_stateless/02500_remove_redundant_distinct.sh index 41744cc59f9..f07cdca4b5a 100755 --- a/tests/queries/0_stateless/02500_remove_redundant_distinct.sh +++ b/tests/queries/0_stateless/02500_remove_redundant_distinct.sh @@ -264,3 +264,15 @@ run_query "$query" echo "-- DISTINCT COUNT() with GROUP BY => do _not_ remove DISTINCT" query="select distinct count() from numbers(10) group by number" run_query "$query" + +echo "-- UNION ALL with DISTINCT => do _not_ remove DISTINCT" +query="SELECT DISTINCT number +FROM +( + SELECT DISTINCT number + FROM numbers(1) + UNION ALL + SELECT DISTINCT number + FROM numbers(2) +)" +run_query "$query" diff --git a/tests/queries/0_stateless/02500_remove_redundant_distinct_analyzer.reference b/tests/queries/0_stateless/02500_remove_redundant_distinct_analyzer.reference index c9301c1f0a3..50ca5981cf1 100644 --- a/tests/queries/0_stateless/02500_remove_redundant_distinct_analyzer.reference +++ b/tests/queries/0_stateless/02500_remove_redundant_distinct_analyzer.reference @@ -479,3 +479,32 @@ Expression (Project names) ReadFromStorage (SystemNumbers) -- execute 1 +-- UNION ALL with DISTINCT => do _not_ remove DISTINCT +-- query +SELECT DISTINCT number +FROM +( + SELECT DISTINCT number + FROM numbers(1) + UNION ALL + SELECT DISTINCT number + FROM numbers(2) +) +-- explain +Expression (Project names) + Distinct (DISTINCT) + Distinct (Preliminary DISTINCT) + Union + Expression ((Projection + (Change column names to column identifiers + Project names))) + Distinct (DISTINCT) + Distinct (Preliminary DISTINCT) + Expression ((Projection + Change column names to column identifiers)) + ReadFromStorage (SystemNumbers) + Expression (( + ( + Project names))) + Distinct (DISTINCT) + Distinct (Preliminary DISTINCT) + Expression ((Projection + Change column names to column identifiers)) + ReadFromStorage (SystemNumbers) +-- execute +0 +1 From 2b8e4ebd4c3df56c2d3e445321cedb157c7956f7 Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 19 Jul 2023 19:48:39 +0000 Subject: [PATCH 242/478] Allow to disable decoding/encoding path in uri in URL engine --- base/poco/Foundation/include/Poco/URI.h | 6 +++- base/poco/Foundation/src/URI.cpp | 39 ++++++++++++++++++------- docs/en/operations/settings/settings.md | 6 ++++ src/Core/Settings.h | 1 + src/IO/ReadWriteBufferFromHTTP.cpp | 6 ++-- src/Storages/StorageURL.cpp | 2 +- 6 files changed, 45 insertions(+), 15 deletions(-) diff --git a/base/poco/Foundation/include/Poco/URI.h b/base/poco/Foundation/include/Poco/URI.h index 1880af4ccd2..5e6e7efd938 100644 --- a/base/poco/Foundation/include/Poco/URI.h +++ b/base/poco/Foundation/include/Poco/URI.h @@ -57,7 +57,7 @@ public: URI(); /// Creates an empty URI. - explicit URI(const std::string & uri); + explicit URI(const std::string & uri, bool decode_and_encode_path = true); /// Parses an URI from the given string. Throws a /// SyntaxException if the uri is not valid. @@ -350,6 +350,8 @@ protected: static const std::string ILLEGAL; private: + void encodePath(std::string & encodedStr) const; + std::string _scheme; std::string _userInfo; std::string _host; @@ -357,6 +359,8 @@ private: std::string _path; std::string _query; std::string _fragment; + + bool _decode_and_encode_path = true; }; diff --git a/base/poco/Foundation/src/URI.cpp b/base/poco/Foundation/src/URI.cpp index 5543e02b279..91a82868dcf 100644 --- a/base/poco/Foundation/src/URI.cpp +++ b/base/poco/Foundation/src/URI.cpp @@ -36,8 +36,8 @@ URI::URI(): } -URI::URI(const std::string& uri): - _port(0) +URI::URI(const std::string& uri, bool decode_and_encode_path): + _port(0), _decode_and_encode_path(decode_and_encode_path) { parse(uri); } @@ -107,7 +107,8 @@ URI::URI(const URI& uri): _port(uri._port), _path(uri._path), _query(uri._query), - _fragment(uri._fragment) + _fragment(uri._fragment), + _decode_and_encode_path(uri._decode_and_encode_path) { } @@ -119,7 +120,8 @@ URI::URI(const URI& baseURI, const std::string& relativeURI): _port(baseURI._port), _path(baseURI._path), _query(baseURI._query), - _fragment(baseURI._fragment) + _fragment(baseURI._fragment), + _decode_and_encode_path(baseURI._decode_and_encode_path) { resolve(relativeURI); } @@ -151,6 +153,7 @@ URI& URI::operator = (const URI& uri) _path = uri._path; _query = uri._query; _fragment = uri._fragment; + _decode_and_encode_path = uri._decode_and_encode_path; } return *this; } @@ -181,6 +184,7 @@ void URI::swap(URI& uri) std::swap(_path, uri._path); std::swap(_query, uri._query); std::swap(_fragment, uri._fragment); + std::swap(_decode_and_encode_path, uri._decode_and_encode_path); } @@ -201,7 +205,7 @@ std::string URI::toString() const std::string uri; if (isRelative()) { - encode(_path, RESERVED_PATH, uri); + encodePath(uri); } else { @@ -217,7 +221,7 @@ std::string URI::toString() const { if (!auth.empty() && _path[0] != '/') uri += '/'; - encode(_path, RESERVED_PATH, uri); + encodePath(uri); } else if (!_query.empty() || !_fragment.empty()) { @@ -313,7 +317,10 @@ void URI::setAuthority(const std::string& authority) void URI::setPath(const std::string& path) { _path.clear(); - decode(path, _path); + if (_decode_and_encode_path) + decode(path, _path); + else + _path = path; } @@ -418,7 +425,7 @@ void URI::setPathEtc(const std::string& pathEtc) std::string URI::getPathEtc() const { std::string pathEtc; - encode(_path, RESERVED_PATH, pathEtc); + encodePath(pathEtc); if (!_query.empty()) { pathEtc += '?'; @@ -436,7 +443,7 @@ std::string URI::getPathEtc() const std::string URI::getPathAndQuery() const { std::string pathAndQuery; - encode(_path, RESERVED_PATH, pathAndQuery); + encodePath(pathAndQuery); if (!_query.empty()) { pathAndQuery += '?'; @@ -626,6 +633,8 @@ void URI::encode(const std::string& str, const std::string& reserved, std::strin for (std::string::const_iterator it = str.begin(); it != str.end(); ++it) { char c = *it; + if (c == '%') + throw std::runtime_error("WTF"); if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || @@ -681,6 +690,13 @@ void URI::decode(const std::string& str, std::string& decodedStr, bool plusAsSpa } } +void URI::encodePath(std::string & encodedStr) const +{ + if (_decode_and_encode_path) + encode(_path, RESERVED_PATH, encodedStr); + else + encodedStr = _path; +} bool URI::isWellKnownPort() const { @@ -820,7 +836,10 @@ void URI::parsePath(std::string::const_iterator& it, const std::string::const_it { std::string path; while (it != end && *it != '?' && *it != '#') path += *it++; - decode(path, _path); + if (_decode_and_encode_path) + decode(path, _path); + else + _path = path; } diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 8b969f87a4d..db5d1a2f5d9 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -3466,6 +3466,12 @@ Possible values: Default value: `0`. +## decode_and_encode_path_in_url {#decode_and_encode_path_in_url} + +Enables or disables decoding/encoding path in uri in [URL](../../engines/table-engines/special/url.md) engine tables. + +Enabled by default. + ## database_atomic_wait_for_drop_and_detach_synchronously {#database_atomic_wait_for_drop_and_detach_synchronously} Adds a modifier `SYNC` to all `DROP` and `DETACH` queries. diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 8f304f0aab6..ffa72d841be 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -621,6 +621,7 @@ class IColumn; M(Bool, engine_file_allow_create_multiple_files, false, "Enables or disables creating a new file on each insert in file engine tables if format has suffix.", 0) \ M(Bool, engine_file_skip_empty_files, false, "Allows to skip empty files in file table engine", 0) \ M(Bool, engine_url_skip_empty_files, false, "Allows to skip empty files in url table engine", 0) \ + M(Bool, decode_and_encode_path_in_url, true, "Enables or disables decoding/encoding path in uri in URL table engine", 0) \ M(Bool, allow_experimental_database_replicated, false, "Allow to create databases with Replicated engine", 0) \ M(UInt64, database_replicated_initial_query_timeout_sec, 300, "How long initial DDL query should wait for Replicated database to precess previous DDL queue entries", 0) \ M(Bool, database_replicated_enforce_synchronous_settings, false, "Enforces synchronous waiting for some queries (see also database_atomic_wait_for_drop_and_detach_synchronously, mutation_sync, alter_sync). Not recommended to enable these settings.", 0) \ diff --git a/src/IO/ReadWriteBufferFromHTTP.cpp b/src/IO/ReadWriteBufferFromHTTP.cpp index 6d1c0f7aafa..eea801ce65e 100644 --- a/src/IO/ReadWriteBufferFromHTTP.cpp +++ b/src/IO/ReadWriteBufferFromHTTP.cpp @@ -305,12 +305,12 @@ void ReadWriteBufferFromHTTPBase::callWithRedirects(Poco::N current_session = session; call(current_session, response, method_, throw_on_all_errors, for_object_info); - Poco::URI prev_uri = uri; + saved_uri_redirect = uri; while (isRedirect(response.getStatus())) { - Poco::URI uri_redirect = getUriAfterRedirect(prev_uri, response); - prev_uri = uri_redirect; + Poco::URI uri_redirect = getUriAfterRedirect(*saved_uri_redirect, response); + saved_uri_redirect = uri_redirect; if (remote_host_filter) remote_host_filter->checkURL(uri_redirect); diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index e6953afe68e..4cfefbc5527 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -389,7 +389,7 @@ std::pair> StorageURLSource: for (; option != end; ++option) { bool skip_url_not_found_error = glob_url && read_settings.http_skip_not_found_url_for_globs && option == std::prev(end); - auto request_uri = Poco::URI(*option); + auto request_uri = Poco::URI(*option, context->getSettingsRef().decode_and_encode_path_in_url); for (const auto & [param, value] : params) request_uri.addQueryParameter(param, value); From 483ddb53ebfa01c02deda76a39bc44cc08df4f00 Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 19 Jul 2023 19:51:58 +0000 Subject: [PATCH 243/478] Fixes --- base/poco/Foundation/src/URI.cpp | 2 -- docs/en/engines/table-engines/special/url.md | 1 + docs/en/sql-reference/table-functions/url.md | 3 ++- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/base/poco/Foundation/src/URI.cpp b/base/poco/Foundation/src/URI.cpp index 91a82868dcf..9bad1b39a87 100644 --- a/base/poco/Foundation/src/URI.cpp +++ b/base/poco/Foundation/src/URI.cpp @@ -633,8 +633,6 @@ void URI::encode(const std::string& str, const std::string& reserved, std::strin for (std::string::const_iterator it = str.begin(); it != str.end(); ++it) { char c = *it; - if (c == '%') - throw std::runtime_error("WTF"); if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || diff --git a/docs/en/engines/table-engines/special/url.md b/docs/en/engines/table-engines/special/url.md index 26d4975954f..9f2bf177c96 100644 --- a/docs/en/engines/table-engines/special/url.md +++ b/docs/en/engines/table-engines/special/url.md @@ -106,3 +106,4 @@ For partitioning by month, use the `toYYYYMM(date_column)` expression, where `da ## Storage Settings {#storage-settings} - [engine_url_skip_empty_files](/docs/en/operations/settings/settings.md#engine_url_skip_empty_files) - allows to skip empty files while reading. Disabled by default. +- [decode_and_encode_path_in_url](/docs/en/operations/settings/settings.md#decode_and_encode_path_in_url) - enables or disables decoding/encoding path in uri. Enabled by default. diff --git a/docs/en/sql-reference/table-functions/url.md b/docs/en/sql-reference/table-functions/url.md index 2ab43f1b895..96f36f03949 100644 --- a/docs/en/sql-reference/table-functions/url.md +++ b/docs/en/sql-reference/table-functions/url.md @@ -56,7 +56,8 @@ Character `|` inside patterns is used to specify failover addresses. They are it ## Storage Settings {#storage-settings} - [engine_url_skip_empty_files](/docs/en/operations/settings/settings.md#engine_url_skip_empty_files) - allows to skip empty files while reading. Disabled by default. +- [decode_and_encode_path_in_url](/docs/en/operations/settings/settings.md#decode_and_encode_path_in_url) - enables or disables decoding/encoding path in uri. Enabled by default. -**See Also** +- **See Also** - [Virtual columns](/docs/en/engines/table-engines/index.md#table_engines-virtual_columns) From 6a21995b2097e747a28a23333e651208c25f0224 Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Thu, 20 Jul 2023 10:42:19 +0200 Subject: [PATCH 244/478] Added test to analyzer_tech_debt.txt --- tests/analyzer_tech_debt.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt index e0f259306aa..9a9412e55db 100644 --- a/tests/analyzer_tech_debt.txt +++ b/tests/analyzer_tech_debt.txt @@ -130,3 +130,4 @@ 02581_share_big_sets_between_mutation_tasks_long 02581_share_big_sets_between_multiple_mutations_tasks_long 00992_system_parts_race_condition_zookeeper_long +02818_parameterized_view_with_cte_multiple_usage From 16cc00784217574bfa4b434936b25f24c531e542 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Thu, 20 Jul 2023 09:21:18 +0000 Subject: [PATCH 245/478] Fix table ad variable name --- tests/integration/test_storage_kafka/test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_storage_kafka/test.py b/tests/integration/test_storage_kafka/test.py index 27a315b53bb..7013f0198f3 100644 --- a/tests/integration/test_storage_kafka/test.py +++ b/tests/integration/test_storage_kafka/test.py @@ -865,7 +865,7 @@ def test_kafka_formats(kafka_cluster): expected_rows_count = raw_expected.count("\n") instance.query_with_retry( - f"SELECT * FROM test.kafka_data_{list(all_formats.keys())[-1]}_mv;", + f"SELECT * FROM test.kafka_{list(all_formats.keys())[-1]}_mv;", retry_count=30, sleep_time=1, check_callback=lambda res: res.count("\n") == expected_rows_count, @@ -3798,7 +3798,7 @@ def test_kafka_formats_with_broken_message(kafka_cluster): format_name=format_name ) ) - expected = pre_formatted_expected.format( + expected = raw_expected.format( topic_name=topic_name, offset_0=offsets[0], offset_1=offsets[1], From cbcd48979cab1a3a4a0f0d5baaf8077164887cf5 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 20 Jul 2023 13:04:43 +0200 Subject: [PATCH 246/478] Fix race one more time --- programs/server/Server.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index d1c1a1d200f..774c3f223a6 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -739,9 +739,10 @@ try [&]() -> std::vector { std::vector metrics; - metrics.reserve(servers_to_start_before_tables.size() + servers.size()); std::lock_guard lock(servers_lock); + metrics.reserve(servers_to_start_before_tables.size() + servers.size()); + for (const auto & server : servers_to_start_before_tables) metrics.emplace_back(ProtocolServerMetrics{server.getPortName(), server.currentThreads()}); From d16d4449432999cdee3393b1f47b4a7d7c5314a6 Mon Sep 17 00:00:00 2001 From: Val Doroshchuk Date: Thu, 20 Jul 2023 12:24:52 +0200 Subject: [PATCH 247/478] MaterializedMySQL: Add support of double quoted comments --- src/Parsers/ExpressionElementParsers.cpp | 33 +++++++++++++++++ src/Parsers/ExpressionElementParsers.h | 15 ++++++++ src/Parsers/MySQL/ASTDeclareColumn.cpp | 2 +- .../materialized_with_ddl.py | 35 +++++++++++++++++++ .../test_materialized_mysql_database/test.py | 6 ++++ 5 files changed, 90 insertions(+), 1 deletion(-) diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 3a7e8790bb4..0149526da79 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -1900,6 +1900,39 @@ bool ParserSubstitution::parseImpl(Pos & pos, ASTPtr & node, Expected & expected } +bool ParserMySQLComment::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + if (pos->type != TokenType::QuotedIdentifier && pos->type != TokenType::StringLiteral) + return false; + String s; + ReadBufferFromMemory in(pos->begin, pos->size()); + try + { + if (pos->type == TokenType::StringLiteral) + readQuotedStringWithSQLStyle(s, in); + else + readDoubleQuotedStringWithSQLStyle(s, in); + } + catch (const Exception &) + { + expected.add(pos, "string literal or double quoted string"); + return false; + } + + if (in.count() != pos->size()) + { + expected.add(pos, "string literal or double quoted string"); + return false; + } + + auto literal = std::make_shared(s); + literal->begin = pos; + literal->end = ++pos; + node = literal; + return true; +} + + bool ParserMySQLGlobalVariable::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { if (pos->type != TokenType::DoubleAt) diff --git a/src/Parsers/ExpressionElementParsers.h b/src/Parsers/ExpressionElementParsers.h index cc88faf2653..f33f2d99f71 100644 --- a/src/Parsers/ExpressionElementParsers.h +++ b/src/Parsers/ExpressionElementParsers.h @@ -367,6 +367,21 @@ protected: }; +/** MySQL comment: + * CREATE TABLE t ( + * i INT PRIMARY KEY, + * first_name VARCHAR(255) COMMENT 'FIRST_NAME', + * last_name VARCHAR(255) COMMENT "LAST_NAME" + * ) + */ +class ParserMySQLComment : public IParserBase +{ +protected: + const char * getName() const override { return "MySQL comment parser"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + + /** MySQL-style global variable: @@var */ class ParserMySQLGlobalVariable : public IParserBase diff --git a/src/Parsers/MySQL/ASTDeclareColumn.cpp b/src/Parsers/MySQL/ASTDeclareColumn.cpp index e585dcb670c..e5f2b7870e2 100644 --- a/src/Parsers/MySQL/ASTDeclareColumn.cpp +++ b/src/Parsers/MySQL/ASTDeclareColumn.cpp @@ -50,7 +50,7 @@ static inline bool parseColumnDeclareOptions(IParser::Pos & pos, ASTPtr & node, OptionDescribe("PRIMARY KEY", "primary_key", std::make_unique()), OptionDescribe("UNIQUE", "unique_key", std::make_unique()), OptionDescribe("KEY", "primary_key", std::make_unique()), - OptionDescribe("COMMENT", "comment", std::make_unique()), + OptionDescribe("COMMENT", "comment", std::make_unique()), OptionDescribe("CHARACTER SET", "charset_name", std::make_unique()), OptionDescribe("CHARSET", "charset", std::make_unique()), OptionDescribe("COLLATE", "collate", std::make_unique()), diff --git a/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py b/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py index 8cf9e67bf63..f7a930ec00b 100644 --- a/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py +++ b/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py @@ -1617,6 +1617,41 @@ def materialized_with_column_comments_test(clickhouse_node, mysql_node, service_ mysql_node.query("DROP DATABASE materialized_with_column_comments_test") +def double_quoted_comment(clickhouse_node, mysql_node, service_name): + db = "comment_db" + mysql_node.query(f"DROP DATABASE IF EXISTS {db}") + clickhouse_node.query(f"DROP DATABASE IF EXISTS {db}") + mysql_node.query(f"CREATE DATABASE {db}") + mysql_node.query( + f'CREATE TABLE {db}.t1 (i INT PRIMARY KEY, id VARCHAR(255) COMMENT "ID")' + ) + mysql_node.query( + f"CREATE TABLE {db}.t2 (i INT PRIMARY KEY, id VARCHAR(255) COMMENT 'ID')" + ) + clickhouse_node.query( + f"CREATE DATABASE {db} ENGINE = MaterializedMySQL('{service_name}:3306', '{db}', 'root', 'clickhouse')" + ) + check_query( + clickhouse_node, + f"SHOW TABLES FROM {db} FORMAT TSV", + "t1\nt2\n", + ) + + # incremental + mysql_node.query( + f'CREATE TABLE {db}.t3 (i INT PRIMARY KEY, id VARCHAR(255) COMMENT "ID")' + ) + mysql_node.query( + f"CREATE TABLE {db}.t4 (i INT PRIMARY KEY, id VARCHAR(255) COMMENT 'ID')" + ) + check_query( + clickhouse_node, f"SHOW TABLES FROM {db} FORMAT TSV", "t1\nt2\nt3\nt4\n" + ) + + clickhouse_node.query(f"DROP DATABASE IF EXISTS {db}") + mysql_node.query(f"DROP DATABASE IF EXISTS {db}") + + def materialized_with_enum8_test(clickhouse_node, mysql_node, service_name): mysql_node.query("DROP DATABASE IF EXISTS materialized_with_enum8_test") clickhouse_node.query("DROP DATABASE IF EXISTS materialized_with_enum8_test") diff --git a/tests/integration/test_materialized_mysql_database/test.py b/tests/integration/test_materialized_mysql_database/test.py index 21316d1a474..0166f7d1d33 100644 --- a/tests/integration/test_materialized_mysql_database/test.py +++ b/tests/integration/test_materialized_mysql_database/test.py @@ -416,6 +416,12 @@ def test_materialized_with_column_comments( ) +def test_double_quoted_comment(started_cluster, started_mysql_8_0, clickhouse_node): + materialized_with_ddl.double_quoted_comment( + clickhouse_node, started_mysql_8_0, "mysql80" + ) + + def test_materialized_with_enum( started_cluster, started_mysql_8_0, started_mysql_5_7, clickhouse_node ): From fe934d3059936cd203952cfe5881ff7243001ae9 Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 20 Jul 2023 12:38:41 +0000 Subject: [PATCH 248/478] Make better --- docs/en/engines/table-engines/special/url.md | 2 +- docs/en/operations/settings/settings.md | 6 +++--- docs/en/sql-reference/table-functions/url.md | 4 ++-- src/Core/Settings.h | 2 +- src/Storages/StorageURL.cpp | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/en/engines/table-engines/special/url.md b/docs/en/engines/table-engines/special/url.md index 9f2bf177c96..f556df0a088 100644 --- a/docs/en/engines/table-engines/special/url.md +++ b/docs/en/engines/table-engines/special/url.md @@ -106,4 +106,4 @@ For partitioning by month, use the `toYYYYMM(date_column)` expression, where `da ## Storage Settings {#storage-settings} - [engine_url_skip_empty_files](/docs/en/operations/settings/settings.md#engine_url_skip_empty_files) - allows to skip empty files while reading. Disabled by default. -- [decode_and_encode_path_in_url](/docs/en/operations/settings/settings.md#decode_and_encode_path_in_url) - enables or disables decoding/encoding path in uri. Enabled by default. +- [disable_url_encoding](/docs/en/operations/settings/settings.md#disable_url_encoding) -allows to disable decoding/encoding path in uri. Disabled by default. diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index db5d1a2f5d9..d138b07d3ae 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -3466,11 +3466,11 @@ Possible values: Default value: `0`. -## decode_and_encode_path_in_url {#decode_and_encode_path_in_url} +## disable_url_encoding {#disable_url_encoding} -Enables or disables decoding/encoding path in uri in [URL](../../engines/table-engines/special/url.md) engine tables. +Allows to disable decoding/encoding path in uri in [URL](../../engines/table-engines/special/url.md) engine tables. -Enabled by default. +Disabled by default. ## database_atomic_wait_for_drop_and_detach_synchronously {#database_atomic_wait_for_drop_and_detach_synchronously} diff --git a/docs/en/sql-reference/table-functions/url.md b/docs/en/sql-reference/table-functions/url.md index 96f36f03949..677ed011960 100644 --- a/docs/en/sql-reference/table-functions/url.md +++ b/docs/en/sql-reference/table-functions/url.md @@ -56,8 +56,8 @@ Character `|` inside patterns is used to specify failover addresses. They are it ## Storage Settings {#storage-settings} - [engine_url_skip_empty_files](/docs/en/operations/settings/settings.md#engine_url_skip_empty_files) - allows to skip empty files while reading. Disabled by default. -- [decode_and_encode_path_in_url](/docs/en/operations/settings/settings.md#decode_and_encode_path_in_url) - enables or disables decoding/encoding path in uri. Enabled by default. +- [disable_url_encoding](/docs/en/operations/settings/settings.md#disable_url_encoding) - allows to disable decoding/encoding path in uri. Disabled by default. -- **See Also** +**See Also** - [Virtual columns](/docs/en/engines/table-engines/index.md#table_engines-virtual_columns) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index ffa72d841be..5dc40494115 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -621,7 +621,7 @@ class IColumn; M(Bool, engine_file_allow_create_multiple_files, false, "Enables or disables creating a new file on each insert in file engine tables if format has suffix.", 0) \ M(Bool, engine_file_skip_empty_files, false, "Allows to skip empty files in file table engine", 0) \ M(Bool, engine_url_skip_empty_files, false, "Allows to skip empty files in url table engine", 0) \ - M(Bool, decode_and_encode_path_in_url, true, "Enables or disables decoding/encoding path in uri in URL table engine", 0) \ + M(Bool, disable_url_encoding, false, " Allows to disable decoding/encoding path in uri in URL table engine", 0) \ M(Bool, allow_experimental_database_replicated, false, "Allow to create databases with Replicated engine", 0) \ M(UInt64, database_replicated_initial_query_timeout_sec, 300, "How long initial DDL query should wait for Replicated database to precess previous DDL queue entries", 0) \ M(Bool, database_replicated_enforce_synchronous_settings, false, "Enforces synchronous waiting for some queries (see also database_atomic_wait_for_drop_and_detach_synchronously, mutation_sync, alter_sync). Not recommended to enable these settings.", 0) \ diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 4cfefbc5527..0c915f54cff 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -389,7 +389,7 @@ std::pair> StorageURLSource: for (; option != end; ++option) { bool skip_url_not_found_error = glob_url && read_settings.http_skip_not_found_url_for_globs && option == std::prev(end); - auto request_uri = Poco::URI(*option, context->getSettingsRef().decode_and_encode_path_in_url); + auto request_uri = Poco::URI(*option, context->getSettingsRef().disable_url_encoding); for (const auto & [param, value] : params) request_uri.addQueryParameter(param, value); From f6a44f8eedce98bd50ceee72e5fdc4da1a82a43a Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 20 Jul 2023 12:40:41 +0000 Subject: [PATCH 249/478] Better --- base/poco/Foundation/include/Poco/URI.h | 6 +++-- base/poco/Foundation/src/URI.cpp | 34 +++++++++++++------------ 2 files changed, 22 insertions(+), 18 deletions(-) diff --git a/base/poco/Foundation/include/Poco/URI.h b/base/poco/Foundation/include/Poco/URI.h index 5e6e7efd938..f4505147ced 100644 --- a/base/poco/Foundation/include/Poco/URI.h +++ b/base/poco/Foundation/include/Poco/URI.h @@ -57,7 +57,7 @@ public: URI(); /// Creates an empty URI. - explicit URI(const std::string & uri, bool decode_and_encode_path = true); + explicit URI(const std::string & uri, bool disable_url_encoding = true); /// Parses an URI from the given string. Throws a /// SyntaxException if the uri is not valid. @@ -351,6 +351,8 @@ protected: private: void encodePath(std::string & encodedStr) const; + void decodePath(const std::string & encodedStr); + std::string _scheme; std::string _userInfo; @@ -360,7 +362,7 @@ private: std::string _query; std::string _fragment; - bool _decode_and_encode_path = true; + bool _disable_url_encoding = true; }; diff --git a/base/poco/Foundation/src/URI.cpp b/base/poco/Foundation/src/URI.cpp index 9bad1b39a87..3354c69d188 100644 --- a/base/poco/Foundation/src/URI.cpp +++ b/base/poco/Foundation/src/URI.cpp @@ -37,7 +37,7 @@ URI::URI(): URI::URI(const std::string& uri, bool decode_and_encode_path): - _port(0), _decode_and_encode_path(decode_and_encode_path) + _port(0), _disable_url_encoding(decode_and_encode_path) { parse(uri); } @@ -108,7 +108,7 @@ URI::URI(const URI& uri): _path(uri._path), _query(uri._query), _fragment(uri._fragment), - _decode_and_encode_path(uri._decode_and_encode_path) + _disable_url_encoding(uri._disable_url_encoding) { } @@ -121,7 +121,7 @@ URI::URI(const URI& baseURI, const std::string& relativeURI): _path(baseURI._path), _query(baseURI._query), _fragment(baseURI._fragment), - _decode_and_encode_path(baseURI._decode_and_encode_path) + _disable_url_encoding(baseURI._disable_url_encoding) { resolve(relativeURI); } @@ -153,7 +153,7 @@ URI& URI::operator = (const URI& uri) _path = uri._path; _query = uri._query; _fragment = uri._fragment; - _decode_and_encode_path = uri._decode_and_encode_path; + _disable_url_encoding = uri._disable_url_encoding; } return *this; } @@ -184,7 +184,7 @@ void URI::swap(URI& uri) std::swap(_path, uri._path); std::swap(_query, uri._query); std::swap(_fragment, uri._fragment); - std::swap(_decode_and_encode_path, uri._decode_and_encode_path); + std::swap(_disable_url_encoding, uri._disable_url_encoding); } @@ -317,10 +317,7 @@ void URI::setAuthority(const std::string& authority) void URI::setPath(const std::string& path) { _path.clear(); - if (_decode_and_encode_path) - decode(path, _path); - else - _path = path; + decodePath(path); } @@ -690,10 +687,18 @@ void URI::decode(const std::string& str, std::string& decodedStr, bool plusAsSpa void URI::encodePath(std::string & encodedStr) const { - if (_decode_and_encode_path) - encode(_path, RESERVED_PATH, encodedStr); - else + if (_disable_url_encoding) encodedStr = _path; + else + encode(_path, RESERVED_PATH, encodedStr); +} + +void URI::decodePath(const std::string & encodedStr) +{ + if (_disable_url_encoding) + _path = encodedStr; + else + decode(encodedStr, _path); } bool URI::isWellKnownPort() const @@ -834,10 +839,7 @@ void URI::parsePath(std::string::const_iterator& it, const std::string::const_it { std::string path; while (it != end && *it != '?' && *it != '#') path += *it++; - if (_decode_and_encode_path) - decode(path, _path); - else - _path = path; + decodePath(path); } From 8649c84461f3c27bdf9fcab4db1884b21603dc2e Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Thu, 20 Jul 2023 13:28:37 +0000 Subject: [PATCH 250/478] Remove conditional linking --- utils/config-processor/CMakeLists.txt | 6 +----- utils/keeper-bench/CMakeLists.txt | 6 +----- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/utils/config-processor/CMakeLists.txt b/utils/config-processor/CMakeLists.txt index 4394083a1c3..80c3535ef4e 100644 --- a/utils/config-processor/CMakeLists.txt +++ b/utils/config-processor/CMakeLists.txt @@ -1,6 +1,2 @@ clickhouse_add_executable (config-processor config-processor.cpp) -if (ENABLE_SSL) - target_link_libraries(config-processor PRIVATE dbms) -else () - target_link_libraries(config-processor PRIVATE clickhouse_common_config_no_zookeeper_log) -endif () +target_link_libraries(config-processor PRIVATE dbms) diff --git a/utils/keeper-bench/CMakeLists.txt b/utils/keeper-bench/CMakeLists.txt index e8daec9e164..5514c34f4ef 100644 --- a/utils/keeper-bench/CMakeLists.txt +++ b/utils/keeper-bench/CMakeLists.txt @@ -4,9 +4,5 @@ if (NOT TARGET ch_contrib::rapidjson) endif () clickhouse_add_executable(keeper-bench Generator.cpp Runner.cpp Stats.cpp main.cpp) -if (ENABLE_SSL) - target_link_libraries(keeper-bench PRIVATE dbms) -else () - target_link_libraries(keeper-bench PRIVATE clickhouse_common_config_no_zookeeper_log) -endif () +target_link_libraries(keeper-bench PRIVATE dbms) target_link_libraries(keeper-bench PRIVATE ch_contrib::rapidjson) From 8adf57a6981610936acc84f3c69342682952ff0a Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Thu, 20 Jul 2023 14:18:32 +0000 Subject: [PATCH 251/478] Fix text in comments and improve exception handling --- src/Common/examples/encrypt_decrypt.cpp | 2 +- tests/integration/test_config_decryption/test_wrong_settings.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Common/examples/encrypt_decrypt.cpp b/src/Common/examples/encrypt_decrypt.cpp index 542e173deb9..2d8c5a5f61f 100644 --- a/src/Common/examples/encrypt_decrypt.cpp +++ b/src/Common/examples/encrypt_decrypt.cpp @@ -3,7 +3,7 @@ #include #include -/** This test program encrypts or decrypts text values using AES_128_GCM_SIV or AES_256_GCM_SIV codecs. +/** This test program encrypts or decrypts text values using a symmetric encryption codec like AES_128_GCM_SIV or AES_256_GCM_SIV. * Keys for codecs are loaded from section of configuration file. * * How to use: diff --git a/tests/integration/test_config_decryption/test_wrong_settings.py b/tests/integration/test_config_decryption/test_wrong_settings.py index e86f7fa9b39..e0fbd4b2948 100644 --- a/tests/integration/test_config_decryption/test_wrong_settings.py +++ b/tests/integration/test_config_decryption/test_wrong_settings.py @@ -10,7 +10,7 @@ def start_clickhouse(config, err_msg): cluster.start() except Exception as e: caught_exception = str(e) - assert caught_exception.find(err_msg) != -1 + assert err_msg in caught_exception def test_wrong_method(): From ed59870f92fa2893c9c105eaaeff82b1efaede22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20G=C3=B3ralski?= Date: Thu, 20 Jul 2023 18:04:58 +0200 Subject: [PATCH 252/478] Update LRUFileCachePriority.cpp --- src/Interpreters/Cache/LRUFileCachePriority.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Interpreters/Cache/LRUFileCachePriority.cpp b/src/Interpreters/Cache/LRUFileCachePriority.cpp index 18862e154da..33e567b7a76 100644 --- a/src/Interpreters/Cache/LRUFileCachePriority.cpp +++ b/src/Interpreters/Cache/LRUFileCachePriority.cpp @@ -7,6 +7,7 @@ namespace CurrentMetrics { extern const Metric FilesystemCacheSize; + extern const Metric FilesystemCacheSizeLimit; extern const Metric FilesystemCacheElements; } @@ -101,6 +102,7 @@ void LRUFileCachePriority::updateSize(int64_t size) { current_size += size; CurrentMetrics::add(CurrentMetrics::FilesystemCacheSize, size); + CurrentMetrics::set(CurrentMetrics::FilesystemCacheSizeLimit, getSizeLimit()); } void LRUFileCachePriority::updateElementsCount(int64_t num) From b3c42a1171e3f631e8985b80fc3c822c7ac87dd1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20G=C3=B3ralski?= Date: Thu, 20 Jul 2023 18:06:54 +0200 Subject: [PATCH 253/478] Update CurrentMetrics.cpp with FilesystemCacheSizeLimit metric --- src/Common/CurrentMetrics.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp index 626b43aea2c..583b13cf79d 100644 --- a/src/Common/CurrentMetrics.cpp +++ b/src/Common/CurrentMetrics.cpp @@ -187,6 +187,7 @@ M(CacheFileSegments, "Number of existing cache file segments") \ M(CacheDetachedFileSegments, "Number of existing detached cache file segments") \ M(FilesystemCacheSize, "Filesystem cache size in bytes") \ + M(FilesystemCacheSizeLimit, "Filesystem cache size limit in bytes") \ M(FilesystemCacheElements, "Filesystem cache elements (file segments)") \ M(FilesystemCacheDownloadQueueElements, "Filesystem cache elements in download queue") \ M(AsyncInsertCacheSize, "Number of async insert hash id in cache") \ From f0e277f94a642647cfd3eb5ebc722b486d9203b0 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 21 Jul 2023 06:45:35 +0200 Subject: [PATCH 254/478] Rename TaskStatsInfoGetter into NetlinkMetricsProvider There is ProcfsMetricsProvider, so by analogy to it. Signed-off-by: Azat Khuzhin --- src/Common/CurrentThread.cpp | 1 - ...oGetter.cpp => NetlinkMetricsProvider.cpp} | 22 +++++++++---------- ...sInfoGetter.h => NetlinkMetricsProvider.h} | 6 ++--- src/Common/ThreadProfileEvents.cpp | 6 ++--- src/Disks/IO/ThreadPoolReader.cpp | 2 +- src/IO/ReadBufferFromFileDescriptor.cpp | 2 +- src/IO/SynchronousReader.cpp | 2 +- 7 files changed, 20 insertions(+), 21 deletions(-) rename src/Common/{TaskStatsInfoGetter.cpp => NetlinkMetricsProvider.cpp} (93%) rename src/Common/{TaskStatsInfoGetter.h => NetlinkMetricsProvider.h} (85%) diff --git a/src/Common/CurrentThread.cpp b/src/Common/CurrentThread.cpp index 057b1eeda12..ac5b712279e 100644 --- a/src/Common/CurrentThread.cpp +++ b/src/Common/CurrentThread.cpp @@ -3,7 +3,6 @@ #include "CurrentThread.h" #include #include -#include #include #include #include diff --git a/src/Common/TaskStatsInfoGetter.cpp b/src/Common/NetlinkMetricsProvider.cpp similarity index 93% rename from src/Common/TaskStatsInfoGetter.cpp rename to src/Common/NetlinkMetricsProvider.cpp index 867a50c8cce..4c228bcc6fc 100644 --- a/src/Common/TaskStatsInfoGetter.cpp +++ b/src/Common/NetlinkMetricsProvider.cpp @@ -1,4 +1,4 @@ -#include "TaskStatsInfoGetter.h" +#include "NetlinkMetricsProvider.h" #include #include #include @@ -200,7 +200,7 @@ bool checkPermissionsImpl() if (!res) return false; - /// Check that we can successfully initialize TaskStatsInfoGetter. + /// Check that we can successfully initialize NetlinkMetricsProvider. /// It will ask about family id through Netlink. /// On some LXC containers we have capability but we still cannot use Netlink. /// There is an evidence that Linux fedora-riscv 6.1.22 gives something strange instead of the expected result. @@ -208,7 +208,7 @@ bool checkPermissionsImpl() try { ::taskstats stats{}; - TaskStatsInfoGetter().getStat(stats, static_cast(getThreadId())); + NetlinkMetricsProvider().getStat(stats, static_cast(getThreadId())); } catch (const Exception & e) { @@ -244,14 +244,14 @@ UInt16 getFamilyId(int fd) } -bool TaskStatsInfoGetter::checkPermissions() +bool NetlinkMetricsProvider::checkPermissions() { static bool res = checkPermissionsImpl(); return res; } -TaskStatsInfoGetter::TaskStatsInfoGetter() +NetlinkMetricsProvider::NetlinkMetricsProvider() { netlink_socket_fd = ::socket(PF_NETLINK, SOCK_RAW, NETLINK_GENERIC); if (netlink_socket_fd < 0) @@ -293,7 +293,7 @@ TaskStatsInfoGetter::TaskStatsInfoGetter() } -void TaskStatsInfoGetter::getStat(::taskstats & out_stats, pid_t tid) const +void NetlinkMetricsProvider::getStat(::taskstats & out_stats, pid_t tid) const { NetlinkMessage answer = query(netlink_socket_fd, taskstats_family_id, tid, TASKSTATS_CMD_GET, TASKSTATS_CMD_ATTR_PID, &tid, sizeof(tid)); @@ -318,7 +318,7 @@ void TaskStatsInfoGetter::getStat(::taskstats & out_stats, pid_t tid) const } -TaskStatsInfoGetter::~TaskStatsInfoGetter() +NetlinkMetricsProvider::~NetlinkMetricsProvider() { if (netlink_socket_fd >= 0) { @@ -335,15 +335,15 @@ TaskStatsInfoGetter::~TaskStatsInfoGetter() namespace DB { -bool TaskStatsInfoGetter::checkPermissions() +bool NetlinkMetricsProvider::checkPermissions() { return false; } -TaskStatsInfoGetter::TaskStatsInfoGetter() = default; -TaskStatsInfoGetter::~TaskStatsInfoGetter() = default; +NetlinkMetricsProvider::NetlinkMetricsProvider() = default; +NetlinkMetricsProvider::~NetlinkMetricsProvider() = default; -void TaskStatsInfoGetter::getStat(::taskstats &, pid_t) const +void NetlinkMetricsProvider::getStat(::taskstats &, pid_t) const { } diff --git a/src/Common/TaskStatsInfoGetter.h b/src/Common/NetlinkMetricsProvider.h similarity index 85% rename from src/Common/TaskStatsInfoGetter.h rename to src/Common/NetlinkMetricsProvider.h index 66655d7ad0d..8a54f33be80 100644 --- a/src/Common/TaskStatsInfoGetter.h +++ b/src/Common/NetlinkMetricsProvider.h @@ -15,11 +15,11 @@ namespace DB /// /// [1]: https://elixir.bootlin.com/linux/v5.18-rc4/source/kernel/tsacct.c#L101 /// -class TaskStatsInfoGetter : private boost::noncopyable +class NetlinkMetricsProvider : private boost::noncopyable { public: - TaskStatsInfoGetter(); - ~TaskStatsInfoGetter(); + NetlinkMetricsProvider(); + ~NetlinkMetricsProvider(); void getStat(::taskstats & out_stats, pid_t tid) const; diff --git a/src/Common/ThreadProfileEvents.cpp b/src/Common/ThreadProfileEvents.cpp index a94fd81559a..256f53df011 100644 --- a/src/Common/ThreadProfileEvents.cpp +++ b/src/Common/ThreadProfileEvents.cpp @@ -2,7 +2,7 @@ #if defined(OS_LINUX) -#include "TaskStatsInfoGetter.h" +#include "NetlinkMetricsProvider.h" #include "ProcfsMetricsProvider.h" #include "hasLinuxCapability.h" @@ -99,7 +99,7 @@ TasksStatsCounters::MetricsProvider TasksStatsCounters::findBestAvailableProvide static std::optional provider = []() -> MetricsProvider { - if (TaskStatsInfoGetter::checkPermissions()) + if (NetlinkMetricsProvider::checkPermissions()) { return MetricsProvider::Netlink; } @@ -119,7 +119,7 @@ TasksStatsCounters::TasksStatsCounters(const UInt64 tid, const MetricsProvider p switch (provider) { case MetricsProvider::Netlink: - stats_getter = [metrics_provider = std::make_shared(), tid]() + stats_getter = [metrics_provider = std::make_shared(), tid]() { ::taskstats result{}; metrics_provider->getStat(result, static_cast(tid)); diff --git a/src/Disks/IO/ThreadPoolReader.cpp b/src/Disks/IO/ThreadPoolReader.cpp index effa19bc1af..cd3f2d8dea0 100644 --- a/src/Disks/IO/ThreadPoolReader.cpp +++ b/src/Disks/IO/ThreadPoolReader.cpp @@ -114,7 +114,7 @@ std::future ThreadPoolReader::submit(Request reques /// It reports real time spent including the time spent while thread was preempted doing nothing. /// And it is Ok for the purpose of this watch (it is used to lower the number of threads to read from tables). /// Sometimes it is better to use taskstats::blkio_delay_total, but it is quite expensive to get it - /// (TaskStatsInfoGetter has about 500K RPS). + /// (NetlinkMetricsProvider has about 500K RPS). Stopwatch watch(CLOCK_MONOTONIC); SCOPE_EXIT({ diff --git a/src/IO/ReadBufferFromFileDescriptor.cpp b/src/IO/ReadBufferFromFileDescriptor.cpp index 67bc01279c3..6c0c1681a4c 100644 --- a/src/IO/ReadBufferFromFileDescriptor.cpp +++ b/src/IO/ReadBufferFromFileDescriptor.cpp @@ -95,7 +95,7 @@ size_t ReadBufferFromFileDescriptor::readImpl(char * to, size_t min_bytes, size_ /// It reports real time spent including the time spent while thread was preempted doing nothing. /// And it is Ok for the purpose of this watch (it is used to lower the number of threads to read from tables). /// Sometimes it is better to use taskstats::blkio_delay_total, but it is quite expensive to get it - /// (TaskStatsInfoGetter has about 500K RPS). + /// (NetlinkMetricsProvider has about 500K RPS). watch.stop(); ProfileEvents::increment(ProfileEvents::DiskReadElapsedMicroseconds, watch.elapsedMicroseconds()); diff --git a/src/IO/SynchronousReader.cpp b/src/IO/SynchronousReader.cpp index 7cef3bd8963..e1c654e48a3 100644 --- a/src/IO/SynchronousReader.cpp +++ b/src/IO/SynchronousReader.cpp @@ -78,7 +78,7 @@ std::future SynchronousReader::submit(Request reque /// It reports real time spent including the time spent while thread was preempted doing nothing. /// And it is Ok for the purpose of this watch (it is used to lower the number of threads to read from tables). /// Sometimes it is better to use taskstats::blkio_delay_total, but it is quite expensive to get it - /// (TaskStatsInfoGetter has about 500K RPS). + /// (NetlinkMetricsProvider has about 500K RPS). watch.stop(); ProfileEvents::increment(ProfileEvents::DiskReadElapsedMicroseconds, watch.elapsedMicroseconds()); From d2dba496bf0c703178758b1c534c0914044d2094 Mon Sep 17 00:00:00 2001 From: StianBerger <111980234+StianBerger@users.noreply.github.com> Date: Fri, 21 Jul 2023 10:26:01 +0200 Subject: [PATCH 255/478] Update date-time-functions.md formatDateTime %r for 12-hour time, mentioned %H in equivalent, which is 24H. Replaced with %h. --- docs/en/sql-reference/functions/date-time-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index c6b978506a1..87d84425029 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -1449,7 +1449,7 @@ Using replacement fields, you can define a pattern for the resulting string. “ | %n | new-line character (‘’) | | | %p | AM or PM designation | PM | | %Q | Quarter (1-4) | 1 | -| %r | 12-hour HH:MM AM/PM time, equivalent to %H:%i %p | 10:30 PM | +| %r | 12-hour HH:MM AM/PM time, equivalent to %h:%i %p | 10:30 PM | | %R | 24-hour HH:MM time, equivalent to %H:%i | 22:33 | | %s | second (00-59) | 44 | | %S | second (00-59) | 44 | From 53d77e6b1397e3621a81fc88da76aa9bac72ad75 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Wed, 19 Jul 2023 21:28:17 +0800 Subject: [PATCH 256/478] Add back missing projection QueryAccessInfo. --- src/Interpreters/Context.cpp | 13 ++-- src/Interpreters/Context.h | 1 + .../optimizeUseAggregateProjection.cpp | 18 +++++- .../optimizeUseNormalProjection.cpp | 9 ++- .../QueryPlan/ReadFromMergeTree.cpp | 4 ++ .../QueryPlan/ReadFromPreparedSource.cpp | 8 ++- .../QueryPlan/ReadFromPreparedSource.h | 3 +- ...0_query_log_with_projection_info.reference | 3 + .../01710_query_log_with_projection_info.sql | 64 +++++++++++++++++++ 9 files changed, 114 insertions(+), 9 deletions(-) create mode 100644 tests/queries/0_stateless/01710_query_log_with_projection_info.reference create mode 100644 tests/queries/0_stateless/01710_query_log_with_projection_info.sql diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 9e4d1e8d1e2..434fc1adb40 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -1461,15 +1461,20 @@ void Context::addQueryAccessInfo( void Context::addQueryAccessInfo(const Names & partition_names) { if (isGlobalContext()) - { throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context cannot have query access info"); - } std::lock_guard lock(query_access_info.mutex); for (const auto & partition_name : partition_names) - { query_access_info.partitions.emplace(partition_name); - } +} + +void Context::addQueryAccessInfo(const String & qualified_projection_name) +{ + if (isGlobalContext()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context cannot have query access info"); + + std::lock_guard lock(query_access_info.mutex); + query_access_info.projections.emplace(qualified_projection_name); } void Context::addQueryFactoriesInfo(QueryLogFactories factory_type, const String & created_object) const diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 3a8d41bf130..3ce899bfb77 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -657,6 +657,7 @@ public: const String & projection_name = {}, const String & view_name = {}); void addQueryAccessInfo(const Names & partition_names); + void addQueryAccessInfo(const String & qualified_projection_name); /// Supported factories for records in query_log diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp index f183bdca7a9..8c85435138c 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp @@ -625,7 +625,14 @@ bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes & // candidates.minmax_projection->block.dumpStructure()); Pipe pipe(std::make_shared(std::move(candidates.minmax_projection->block))); - projection_reading = std::make_unique(std::move(pipe)); + projection_reading = std::make_unique( + std::move(pipe), + context, + query_info.is_internal ? "" + : fmt::format( + "{}.{}", + reading->getMergeTreeData().getStorageID().getFullTableName(), + backQuoteIfNeed(candidates.minmax_projection->candidate.projection->name))); has_ordinary_parts = !candidates.minmax_projection->normal_parts.empty(); if (has_ordinary_parts) @@ -658,7 +665,14 @@ bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes & { auto header = proj_snapshot->getSampleBlockForColumns(best_candidate->dag->getRequiredColumnsNames()); Pipe pipe(std::make_shared(std::move(header))); - projection_reading = std::make_unique(std::move(pipe)); + projection_reading = std::make_unique( + std::move(pipe), + context, + query_info.is_internal ? "" + : fmt::format( + "{}.{}", + reading->getMergeTreeData().getStorageID().getFullTableName(), + backQuoteIfNeed(best_candidate->projection->name))); } has_ordinary_parts = best_candidate->merge_tree_ordinary_select_result_ptr != nullptr; diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp index dd7a5d449bc..71db561e1c9 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp @@ -183,7 +183,14 @@ bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes) if (!projection_reading) { Pipe pipe(std::make_shared(proj_snapshot->getSampleBlockForColumns(required_columns))); - projection_reading = std::make_unique(std::move(pipe)); + projection_reading = std::make_unique( + std::move(pipe), + context, + query_info.is_internal ? "" + : fmt::format( + "{}.{}", + reading->getMergeTreeData().getStorageID().getFullTableName(), + backQuoteIfNeed(best_candidate->projection->name))); } bool has_ordinary_parts = best_candidate->merge_tree_ordinary_select_result_ptr != nullptr; diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 13de5d1d140..82f47cc61d5 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -1761,6 +1761,10 @@ void ReadFromMergeTree::initializePipeline(QueryPipelineBuilder & pipeline, cons fmt::format("{}.{}", data.getStorageID().getFullNameNotQuoted(), part.data_part->info.partition_id)); } context->getQueryContext()->addQueryAccessInfo(partition_names); + + if (storage_snapshot->projection) + context->getQueryContext()->addQueryAccessInfo( + fmt::format("{}.{}", data.getStorageID().getFullTableName(), backQuoteIfNeed(storage_snapshot->projection->name))); } ProfileEvents::increment(ProfileEvents::SelectedParts, result.selected_parts); diff --git a/src/Processors/QueryPlan/ReadFromPreparedSource.cpp b/src/Processors/QueryPlan/ReadFromPreparedSource.cpp index 7446203ec35..d50eec47ca8 100644 --- a/src/Processors/QueryPlan/ReadFromPreparedSource.cpp +++ b/src/Processors/QueryPlan/ReadFromPreparedSource.cpp @@ -1,17 +1,23 @@ +#include #include #include namespace DB { -ReadFromPreparedSource::ReadFromPreparedSource(Pipe pipe_) +ReadFromPreparedSource::ReadFromPreparedSource(Pipe pipe_, ContextPtr context_, const String & qualified_projection_name_) : ISourceStep(DataStream{.header = pipe_.getHeader()}) , pipe(std::move(pipe_)) + , context(context_) + , qualified_projection_name(qualified_projection_name_) { } void ReadFromPreparedSource::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) { + if (context && context->hasQueryContext() && !qualified_projection_name.empty()) + context->getQueryContext()->addQueryAccessInfo(qualified_projection_name); + for (const auto & processor : pipe.getProcessors()) processors.emplace_back(processor); diff --git a/src/Processors/QueryPlan/ReadFromPreparedSource.h b/src/Processors/QueryPlan/ReadFromPreparedSource.h index 05e3ebd5102..5e64dcb7a4f 100644 --- a/src/Processors/QueryPlan/ReadFromPreparedSource.h +++ b/src/Processors/QueryPlan/ReadFromPreparedSource.h @@ -9,7 +9,7 @@ namespace DB class ReadFromPreparedSource : public ISourceStep { public: - explicit ReadFromPreparedSource(Pipe pipe_); + explicit ReadFromPreparedSource(Pipe pipe_, ContextPtr context_ = nullptr, const String & qualified_projection_name_ = ""); String getName() const override { return "ReadFromPreparedSource"; } @@ -18,6 +18,7 @@ public: protected: Pipe pipe; ContextPtr context; + String qualified_projection_name; }; class ReadFromStorageStep : public ReadFromPreparedSource diff --git a/tests/queries/0_stateless/01710_query_log_with_projection_info.reference b/tests/queries/0_stateless/01710_query_log_with_projection_info.reference new file mode 100644 index 00000000000..9c2e9df6662 --- /dev/null +++ b/tests/queries/0_stateless/01710_query_log_with_projection_info.reference @@ -0,0 +1,3 @@ +t.t_normal +t.t_agg +t._minmax_count_projection diff --git a/tests/queries/0_stateless/01710_query_log_with_projection_info.sql b/tests/queries/0_stateless/01710_query_log_with_projection_info.sql new file mode 100644 index 00000000000..25e7e8fed60 --- /dev/null +++ b/tests/queries/0_stateless/01710_query_log_with_projection_info.sql @@ -0,0 +1,64 @@ +set log_queries=1; +set log_queries_min_type='QUERY_FINISH'; +set optimize_use_implicit_projections=1; + +DROP TABLE IF EXISTS t; + +CREATE TABLE t +( + `id` UInt64, + `id2` UInt64, + `id3` UInt64, + PROJECTION t_normal + ( + SELECT + id, + id2, + id3 + ORDER BY + id2, + id, + id3 + ), + PROJECTION t_agg + ( + SELECT + sum(id3) + GROUP BY id2 + ) +) +ENGINE = MergeTree +ORDER BY id +SETTINGS index_granularity = 8; + +insert into t SELECT number, -number, number FROM numbers(10000); + +SELECT * FROM t WHERE id2 = 3 FORMAT Null; +SELECT sum(id3) FROM t GROUP BY id2 FORMAT Null; +SELECT min(id) FROM t FORMAT Null; + +SYSTEM FLUSH LOGS; + +SELECT + --Remove the prefix string which is a mutable database name. + arrayStringConcat(arrayPopFront(splitByString('.', projections[1])), '.') +FROM + system.query_log +WHERE + current_database=currentDatabase() and query = 'SELECT * FROM t WHERE id2 = 3 FORMAT Null;'; + +SELECT + --Remove the prefix string which is a mutable database name. + arrayStringConcat(arrayPopFront(splitByString('.', projections[1])), '.') +FROM + system.query_log +WHERE + current_database=currentDatabase() and query = 'SELECT sum(id3) FROM t GROUP BY id2 FORMAT Null;'; + +SELECT + --Remove the prefix string which is a mutable database name. + arrayStringConcat(arrayPopFront(splitByString('.', projections[1])), '.') +FROM + system.query_log +WHERE + current_database=currentDatabase() and query = 'SELECT min(id) FROM t FORMAT Null;'; From 8187118232371630fb10ee4062b8a52285003fa0 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Thu, 20 Jul 2023 11:12:22 +0800 Subject: [PATCH 257/478] Better code --- src/Interpreters/Context.cpp | 8 ++++++-- src/Interpreters/Context.h | 9 ++++++++- .../optimizeUseAggregateProjection.cpp | 18 ++++++++---------- .../optimizeUseNormalProjection.cpp | 9 ++++----- src/Processors/QueryPlan/ReadFromMergeTree.cpp | 2 +- .../QueryPlan/ReadFromPreparedSource.cpp | 9 ++++----- .../QueryPlan/ReadFromPreparedSource.h | 7 +++++-- 7 files changed, 36 insertions(+), 26 deletions(-) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 434fc1adb40..cc1277e08b9 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -1468,13 +1468,17 @@ void Context::addQueryAccessInfo(const Names & partition_names) query_access_info.partitions.emplace(partition_name); } -void Context::addQueryAccessInfo(const String & qualified_projection_name) +void Context::addQueryAccessInfo(const QualifiedProjectionName & qualified_projection_name) { + if (!qualified_projection_name) + return; + if (isGlobalContext()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context cannot have query access info"); std::lock_guard lock(query_access_info.mutex); - query_access_info.projections.emplace(qualified_projection_name); + query_access_info.projections.emplace(fmt::format( + "{}.{}", qualified_projection_name.storage_id.getFullTableName(), backQuoteIfNeed(qualified_projection_name.projection_name))); } void Context::addQueryFactoriesInfo(QueryLogFactories factory_type, const String & created_object) const diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 3ce899bfb77..fa210f04451 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -657,7 +657,14 @@ public: const String & projection_name = {}, const String & view_name = {}); void addQueryAccessInfo(const Names & partition_names); - void addQueryAccessInfo(const String & qualified_projection_name); + + struct QualifiedProjectionName + { + StorageID storage_id = StorageID::createEmpty(); + String projection_name; + explicit operator bool() const { return !projection_name.empty(); } + }; + void addQueryAccessInfo(const QualifiedProjectionName & qualified_projection_name); /// Supported factories for records in query_log diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp index 8c85435138c..fa6a7f5b8ea 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp @@ -628,11 +628,10 @@ bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes & projection_reading = std::make_unique( std::move(pipe), context, - query_info.is_internal ? "" - : fmt::format( - "{}.{}", - reading->getMergeTreeData().getStorageID().getFullTableName(), - backQuoteIfNeed(candidates.minmax_projection->candidate.projection->name))); + query_info.is_internal ? Context::QualifiedProjectionName{} + : Context::QualifiedProjectionName{ + .storage_id = reading->getMergeTreeData().getStorageID(), + .projection_name = candidates.minmax_projection->candidate.projection->name}); has_ordinary_parts = !candidates.minmax_projection->normal_parts.empty(); if (has_ordinary_parts) @@ -668,11 +667,10 @@ bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes & projection_reading = std::make_unique( std::move(pipe), context, - query_info.is_internal ? "" - : fmt::format( - "{}.{}", - reading->getMergeTreeData().getStorageID().getFullTableName(), - backQuoteIfNeed(best_candidate->projection->name))); + query_info.is_internal + ? Context::QualifiedProjectionName{} + : Context::QualifiedProjectionName{ + .storage_id = reading->getMergeTreeData().getStorageID(), .projection_name = best_candidate->projection->name}); } has_ordinary_parts = best_candidate->merge_tree_ordinary_select_result_ptr != nullptr; diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp index 71db561e1c9..93d1be20e81 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp @@ -186,11 +186,10 @@ bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes) projection_reading = std::make_unique( std::move(pipe), context, - query_info.is_internal ? "" - : fmt::format( - "{}.{}", - reading->getMergeTreeData().getStorageID().getFullTableName(), - backQuoteIfNeed(best_candidate->projection->name))); + query_info.is_internal + ? Context::QualifiedProjectionName{} + : Context::QualifiedProjectionName{ + .storage_id = reading->getMergeTreeData().getStorageID(), .projection_name = best_candidate->projection->name}); } bool has_ordinary_parts = best_candidate->merge_tree_ordinary_select_result_ptr != nullptr; diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 82f47cc61d5..2d2412f7e36 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -1764,7 +1764,7 @@ void ReadFromMergeTree::initializePipeline(QueryPipelineBuilder & pipeline, cons if (storage_snapshot->projection) context->getQueryContext()->addQueryAccessInfo( - fmt::format("{}.{}", data.getStorageID().getFullTableName(), backQuoteIfNeed(storage_snapshot->projection->name))); + Context::QualifiedProjectionName{.storage_id = data.getStorageID(), .projection_name = storage_snapshot->projection->name}); } ProfileEvents::increment(ProfileEvents::SelectedParts, result.selected_parts); diff --git a/src/Processors/QueryPlan/ReadFromPreparedSource.cpp b/src/Processors/QueryPlan/ReadFromPreparedSource.cpp index d50eec47ca8..a24c4dbe4d0 100644 --- a/src/Processors/QueryPlan/ReadFromPreparedSource.cpp +++ b/src/Processors/QueryPlan/ReadFromPreparedSource.cpp @@ -1,21 +1,20 @@ -#include #include #include namespace DB { -ReadFromPreparedSource::ReadFromPreparedSource(Pipe pipe_, ContextPtr context_, const String & qualified_projection_name_) +ReadFromPreparedSource::ReadFromPreparedSource(Pipe pipe_, ContextPtr context_, Context::QualifiedProjectionName qualified_projection_name_) : ISourceStep(DataStream{.header = pipe_.getHeader()}) , pipe(std::move(pipe_)) - , context(context_) - , qualified_projection_name(qualified_projection_name_) + , context(std::move(context_)) + , qualified_projection_name(std::move(qualified_projection_name_)) { } void ReadFromPreparedSource::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) { - if (context && context->hasQueryContext() && !qualified_projection_name.empty()) + if (context && context->hasQueryContext()) context->getQueryContext()->addQueryAccessInfo(qualified_projection_name); for (const auto & processor : pipe.getProcessors()) diff --git a/src/Processors/QueryPlan/ReadFromPreparedSource.h b/src/Processors/QueryPlan/ReadFromPreparedSource.h index 5e64dcb7a4f..2606f501009 100644 --- a/src/Processors/QueryPlan/ReadFromPreparedSource.h +++ b/src/Processors/QueryPlan/ReadFromPreparedSource.h @@ -1,4 +1,6 @@ #pragma once + +#include #include #include @@ -9,7 +11,8 @@ namespace DB class ReadFromPreparedSource : public ISourceStep { public: - explicit ReadFromPreparedSource(Pipe pipe_, ContextPtr context_ = nullptr, const String & qualified_projection_name_ = ""); + explicit ReadFromPreparedSource( + Pipe pipe_, ContextPtr context_ = nullptr, Context::QualifiedProjectionName qualified_projection_name_ = {}); String getName() const override { return "ReadFromPreparedSource"; } @@ -18,7 +21,7 @@ public: protected: Pipe pipe; ContextPtr context; - String qualified_projection_name; + Context::QualifiedProjectionName qualified_projection_name; }; class ReadFromStorageStep : public ReadFromPreparedSource From 2cad81731be0443b50e66e43fb68b2b064d67a77 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Fri, 21 Jul 2023 16:46:56 +0800 Subject: [PATCH 258/478] Try to fix style issues --- .../optimizeUseAggregateProjection.cpp | 19 ++++++++++++------- .../optimizeUseNormalProjection.cpp | 7 +++++-- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp index fa6a7f5b8ea..53f47bcdf95 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp @@ -628,11 +628,13 @@ bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes & projection_reading = std::make_unique( std::move(pipe), context, - query_info.is_internal ? Context::QualifiedProjectionName{} - : Context::QualifiedProjectionName{ - .storage_id = reading->getMergeTreeData().getStorageID(), - .projection_name = candidates.minmax_projection->candidate.projection->name}); - + query_info.is_internal + ? Context::QualifiedProjectionName{} + : Context::QualifiedProjectionName + { + .storage_id = reading->getMergeTreeData().getStorageID(), + .projection_name = candidates.minmax_projection->candidate.projection->name, + }); has_ordinary_parts = !candidates.minmax_projection->normal_parts.empty(); if (has_ordinary_parts) reading->resetParts(std::move(candidates.minmax_projection->normal_parts)); @@ -669,8 +671,11 @@ bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes & context, query_info.is_internal ? Context::QualifiedProjectionName{} - : Context::QualifiedProjectionName{ - .storage_id = reading->getMergeTreeData().getStorageID(), .projection_name = best_candidate->projection->name}); + : Context::QualifiedProjectionName + { + .storage_id = reading->getMergeTreeData().getStorageID(), + .projection_name = best_candidate->projection->name, + }); } has_ordinary_parts = best_candidate->merge_tree_ordinary_select_result_ptr != nullptr; diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp index 93d1be20e81..f6ace6f8025 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp @@ -188,8 +188,11 @@ bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes) context, query_info.is_internal ? Context::QualifiedProjectionName{} - : Context::QualifiedProjectionName{ - .storage_id = reading->getMergeTreeData().getStorageID(), .projection_name = best_candidate->projection->name}); + : Context::QualifiedProjectionName + { + .storage_id = reading->getMergeTreeData().getStorageID(), + .projection_name = best_candidate->projection->name, + }); } bool has_ordinary_parts = best_candidate->merge_tree_ordinary_select_result_ptr != nullptr; From 5fa45bdbeaef99ba6a7db894d89dc749b7ac3f97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20G=C3=B3ralski?= Date: Fri, 21 Jul 2023 12:12:34 +0200 Subject: [PATCH 259/478] Setting the metric FilesystemCacheSizeLimit in LRUFileCachePriority.h --- src/Interpreters/Cache/LRUFileCachePriority.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/Cache/LRUFileCachePriority.h b/src/Interpreters/Cache/LRUFileCachePriority.h index e0d7d45062a..662a76968bc 100644 --- a/src/Interpreters/Cache/LRUFileCachePriority.h +++ b/src/Interpreters/Cache/LRUFileCachePriority.h @@ -5,6 +5,12 @@ #include #include +namespace CurrentMetrics +{ + extern const Metric FilesystemCacheSizeLimit; +} + + namespace DB { @@ -18,7 +24,9 @@ private: using LRUQueueIterator = typename LRUQueue::iterator; public: - LRUFileCachePriority(size_t max_size_, size_t max_elements_) : IFileCachePriority(max_size_, max_elements_) {} + LRUFileCachePriority(size_t max_size_, size_t max_elements_) : IFileCachePriority(max_size_, max_elements_) { + CurrentMetrics::set(CurrentMetrics::FilesystemCacheSizeLimit, max_size_); + } size_t getSize(const CacheGuard::Lock &) const override { return current_size; } From 930d45303c5b96b7553d611e82e0c94215ef5705 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20G=C3=B3ralski?= Date: Fri, 21 Jul 2023 12:13:38 +0200 Subject: [PATCH 260/478] removing the metric set from LRUFileCachePriority.cpp --- src/Interpreters/Cache/LRUFileCachePriority.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Interpreters/Cache/LRUFileCachePriority.cpp b/src/Interpreters/Cache/LRUFileCachePriority.cpp index 33e567b7a76..18862e154da 100644 --- a/src/Interpreters/Cache/LRUFileCachePriority.cpp +++ b/src/Interpreters/Cache/LRUFileCachePriority.cpp @@ -7,7 +7,6 @@ namespace CurrentMetrics { extern const Metric FilesystemCacheSize; - extern const Metric FilesystemCacheSizeLimit; extern const Metric FilesystemCacheElements; } @@ -102,7 +101,6 @@ void LRUFileCachePriority::updateSize(int64_t size) { current_size += size; CurrentMetrics::add(CurrentMetrics::FilesystemCacheSize, size); - CurrentMetrics::set(CurrentMetrics::FilesystemCacheSizeLimit, getSizeLimit()); } void LRUFileCachePriority::updateElementsCount(int64_t num) From 3412dd225919f3850dfb4c0f8647e74e6630e31f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20G=C3=B3ralski?= Date: Fri, 21 Jul 2023 12:14:30 +0200 Subject: [PATCH 261/478] removed unnecessary whitespace --- src/Interpreters/Cache/LRUFileCachePriority.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Interpreters/Cache/LRUFileCachePriority.h b/src/Interpreters/Cache/LRUFileCachePriority.h index 662a76968bc..9396070b792 100644 --- a/src/Interpreters/Cache/LRUFileCachePriority.h +++ b/src/Interpreters/Cache/LRUFileCachePriority.h @@ -10,7 +10,6 @@ namespace CurrentMetrics extern const Metric FilesystemCacheSizeLimit; } - namespace DB { From 714a3a8d121326e2d908648bfc0e76e09f6e0815 Mon Sep 17 00:00:00 2001 From: Alexander Sapin Date: Fri, 21 Jul 2023 12:23:22 +0200 Subject: [PATCH 262/478] Don't do it on drop --- src/Storages/StorageReplicatedMergeTree.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 06f5330f6d9..841b646a126 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -3994,6 +3994,13 @@ void StorageReplicatedMergeTree::addLastSentPart(const MergeTreePartInfo & info) void StorageReplicatedMergeTree::waitForUniquePartsToBeFetchedByOtherReplicas(StorageReplicatedMergeTree::ShutdownDeadline shutdown_deadline_) { + /// Will be true in case in case of query + if (CurrentThread::isInitialized() && CurrentThread::get().getQueryContext() != nullptr) + { + LOG_TRACE(log, "Will not wait for unique parts to be fetched by other replicas because shutdown called from DROP/DETACH query"); + return; + } + if (!shutdown_called.load()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Called waitForUniquePartsToBeFetchedByOtherReplicas before shutdown, it's a bug"); @@ -4951,7 +4958,6 @@ void StorageReplicatedMergeTree::shutdown() flushAndPrepareForShutdown(); - auto settings_ptr = getSettings(); if (!shutdown_deadline.has_value()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Shutdown deadline is not set in shutdown"); @@ -6311,7 +6317,7 @@ bool StorageReplicatedMergeTree::tryWaitForReplicaToProcessLogEntry( const auto & stop_waiting = [&]() { - bool stop_waiting_itself = waiting_itself && partial_shutdown_called; + bool stop_waiting_itself = waiting_itself && (partial_shutdown_called || shutdown_prepared_called || shutdown_called); bool timeout_exceeded = check_timeout && wait_for_inactive_timeout < time_waiting.elapsedSeconds(); bool stop_waiting_inactive = (!wait_for_inactive || timeout_exceeded) && !getZooKeeper()->exists(fs::path(table_zookeeper_path) / "replicas" / replica / "is_active"); From e638a9ecd3cebe83c0c3997b19e0e73d1fb14639 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Fri, 21 Jul 2023 12:24:36 +0200 Subject: [PATCH 263/478] Fix style check --- src/Interpreters/Cache/LRUFileCachePriority.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/Cache/LRUFileCachePriority.h b/src/Interpreters/Cache/LRUFileCachePriority.h index 9396070b792..e041e59a91a 100644 --- a/src/Interpreters/Cache/LRUFileCachePriority.h +++ b/src/Interpreters/Cache/LRUFileCachePriority.h @@ -23,7 +23,8 @@ private: using LRUQueueIterator = typename LRUQueue::iterator; public: - LRUFileCachePriority(size_t max_size_, size_t max_elements_) : IFileCachePriority(max_size_, max_elements_) { + LRUFileCachePriority(size_t max_size_, size_t max_elements_) : IFileCachePriority(max_size_, max_elements_) + { CurrentMetrics::set(CurrentMetrics::FilesystemCacheSizeLimit, max_size_); } From 0f969923229375d72faac15257fc70bd7ece9095 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 16 Jul 2023 08:07:50 +0200 Subject: [PATCH 264/478] Fix possible EADDRINUSE ("Address already in use") in integration tests Here is one example [1]: minio1_1 | WARNING: Console endpoint is listening on a dynamic port (32911), please use --console-address ":PORT" to choose a static port. minio1_1 | ERROR Unable to initialize console server: Specified port is already in use minio1_1 | > Please ensure no other program uses the same address/port [1]: https://s3.amazonaws.com/clickhouse-test-reports/52103/7d510eac7c5f0dfb3361e269be30972e6022fada/integration_tests__tsan__[1_6].html And here is second [2]: java.net.BindException: Problem binding to [0.0.0.0:50020] java.net.BindException: Address already in use; For more details see: http://wiki.apache.org/hadoop/BindException [2]: https://s3.amazonaws.com/clickhouse-test-reports/51493/63e88b725d3d255a6534adce4d434ce5f95d2874/integration_tests__asan__[1_6].html v2: increase the limit from 5K to 10K Signed-off-by: Azat Khuzhin --- tests/integration/conftest.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 5933883f7b0..968571bfdde 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -12,6 +12,22 @@ from helpers.network import _NetworkManager logging.raiseExceptions = False +@pytest.fixture(autouse=True, scope="session") +def tune_local_port_range(): + # Lots of services uses non privileged ports: + # - hdfs -- 50020/50070/... + # - minio + # - mysql + # - psql + # + # So instead of tuning all these thirdparty services, let's simply + # prohibit using such ports for outgoing connections, this should fix + # possible "Address already in use" errors. + # + # NOTE: 5K is not enough, and sometimes leads to EADDRNOTAVAIL error. + run_and_check(["sysctl net.ipv4.ip_local_port_range='55000 65535'"], shell=True) + + @pytest.fixture(autouse=True, scope="session") def cleanup_environment(): try: From 3798bd6f509b7fc0591201c66c2e0d1b254835dd Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Fri, 21 Jul 2023 12:52:07 +0000 Subject: [PATCH 265/478] Replace test by text_to_encrypt --- src/Common/examples/encrypt_decrypt.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Common/examples/encrypt_decrypt.cpp b/src/Common/examples/encrypt_decrypt.cpp index 2d8c5a5f61f..503802016cb 100644 --- a/src/Common/examples/encrypt_decrypt.cpp +++ b/src/Common/examples/encrypt_decrypt.cpp @@ -7,7 +7,7 @@ * Keys for codecs are loaded from section of configuration file. * * How to use: - * ./encrypt_decrypt /etc/clickhouse-server/config.xml -e AES_128_GCM_SIV test + * ./encrypt_decrypt /etc/clickhouse-server/config.xml -e AES_128_GCM_SIV text_to_encrypt */ int main(int argc, char ** argv) @@ -22,7 +22,7 @@ int main(int argc, char ** argv) << "action: -e for encryption and -d for decryption." << std::endl << "codec: AES_128_GCM_SIV or AES_256_GCM_SIV." << std::endl << std::endl << "Example:" << std::endl - << " ./encrypt_decrypt /etc/clickhouse-server/config.xml -e AES_128_GCM_SIV test"; + << " ./encrypt_decrypt /etc/clickhouse-server/config.xml -e AES_128_GCM_SIV text_to_encrypt"; return 3; } From 8ec8388a9ef063beb02b430ae4b89dfe5bab9ddd Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Fri, 21 Jul 2023 14:53:02 +0200 Subject: [PATCH 266/478] Update gtest_lru_file_cache.cpp --- src/Interpreters/tests/gtest_lru_file_cache.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/Interpreters/tests/gtest_lru_file_cache.cpp b/src/Interpreters/tests/gtest_lru_file_cache.cpp index b9d12c8ed42..12e7d9372f7 100644 --- a/src/Interpreters/tests/gtest_lru_file_cache.cpp +++ b/src/Interpreters/tests/gtest_lru_file_cache.cpp @@ -470,6 +470,7 @@ TEST_F(FileCacheTest, get) auto & file_segment2 = get(holder2, 2); ASSERT_TRUE(file_segment2.getOrSetDownloader() != FileSegment::getCallerId()); + ASSERT_EQ(file_segment2.state(), State::DOWNLOADING); { std::lock_guard lock(mutex); @@ -478,8 +479,7 @@ TEST_F(FileCacheTest, get) cv.notify_one(); file_segment2.wait(file_segment2.range().right); - file_segment2.complete(); - ASSERT_TRUE(file_segment2.state() == State::DOWNLOADED); + ASSERT_EQ(file_segment2.getDownloadedSize(false), file_segment2.range().size()); }); { @@ -488,7 +488,8 @@ TEST_F(FileCacheTest, get) } download(file_segment); - ASSERT_TRUE(file_segment.state() == State::DOWNLOADED); + ASSERT_EQ(file_segment.state(), State::DOWNLOADED); + file_segment.completePartAndResetDownloader(); other_1.join(); From 4695ec6802c80d25e93a7b523821840c10a3b200 Mon Sep 17 00:00:00 2001 From: Alexander Sapin Date: Fri, 21 Jul 2023 14:56:29 +0200 Subject: [PATCH 267/478] Add an ability to specify allocations size for sampling memory profiler --- programs/server/Server.cpp | 21 ++++++++++----- src/Common/MemoryTracker.cpp | 11 ++++++-- src/Common/MemoryTracker.h | 18 +++++++++++++ src/Core/ServerSettings.h | 8 ++++-- src/Core/Settings.h | 4 ++- src/Interpreters/ProcessList.cpp | 3 +++ src/Interpreters/ThreadStatusExt.cpp | 2 ++ .../__init__.py | 1 + .../configs/max_untracked_memory.xml | 7 +++++ .../configs/memory_profiler.xml | 5 ++++ .../test.py | 27 +++++++++++++++++++ ...r_sample_min_max_allocation_size.reference | 1 + ...profiler_sample_min_max_allocation_size.sh | 18 +++++++++++++ 13 files changed, 115 insertions(+), 11 deletions(-) create mode 100644 tests/integration/test_memory_profiler_min_max_borders/__init__.py create mode 100644 tests/integration/test_memory_profiler_min_max_borders/configs/max_untracked_memory.xml create mode 100644 tests/integration/test_memory_profiler_min_max_borders/configs/memory_profiler.xml create mode 100644 tests/integration/test_memory_profiler_min_max_borders/test.py create mode 100644 tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.reference create mode 100755 tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.sh diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 948824242fb..71bf8cc9e89 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1643,17 +1643,26 @@ try global_context->initializeTraceCollector(); /// Set up server-wide memory profiler (for total memory tracker). - UInt64 total_memory_profiler_step = config().getUInt64("total_memory_profiler_step", 0); - if (total_memory_profiler_step) + if (server_settings.total_memory_profiler_step) { - total_memory_tracker.setProfilerStep(total_memory_profiler_step); + total_memory_tracker.setProfilerStep(server_settings.total_memory_profiler_step); } - double total_memory_tracker_sample_probability = config().getDouble("total_memory_tracker_sample_probability", 0); - if (total_memory_tracker_sample_probability > 0.0) + if (server_settings.total_memory_tracker_sample_probability > 0.0) { - total_memory_tracker.setSampleProbability(total_memory_tracker_sample_probability); + total_memory_tracker.setSampleProbability(server_settings.total_memory_tracker_sample_probability); } + + if (server_settings.total_memory_profiler_sample_min_allocation_size) + { + total_memory_tracker.setSampleMinAllocationSize(server_settings.total_memory_profiler_sample_min_allocation_size); + } + + if (server_settings.total_memory_profiler_sample_max_allocation_size) + { + total_memory_tracker.setSampleMaxAllocationSize(server_settings.total_memory_profiler_sample_max_allocation_size); + } + } #endif diff --git a/src/Common/MemoryTracker.cpp b/src/Common/MemoryTracker.cpp index 81cac2617c5..52cd9cc8073 100644 --- a/src/Common/MemoryTracker.cpp +++ b/src/Common/MemoryTracker.cpp @@ -229,7 +229,7 @@ void MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryT } std::bernoulli_distribution sample(sample_probability); - if (unlikely(sample_probability > 0.0 && sample(thread_local_rng))) + if (unlikely(sample_probability > 0.0 && isSizeOkForSampling(size) && sample(thread_local_rng))) { MemoryTrackerBlockerInThread untrack_lock(VariableContext::Global); DB::TraceSender::send(DB::TraceType::MemorySample, StackTrace(), {.size = size}); @@ -413,7 +413,7 @@ void MemoryTracker::free(Int64 size) } std::bernoulli_distribution sample(sample_probability); - if (unlikely(sample_probability > 0.0 && sample(thread_local_rng))) + if (unlikely(sample_probability > 0.0 && isSizeOkForSampling(size) && sample(thread_local_rng))) { MemoryTrackerBlockerInThread untrack_lock(VariableContext::Global); DB::TraceSender::send(DB::TraceType::MemorySample, StackTrace(), {.size = -size}); @@ -534,6 +534,13 @@ void MemoryTracker::setOrRaiseProfilerLimit(Int64 value) ; } +bool MemoryTracker::isSizeOkForSampling(UInt64 size) const +{ + //LOG_DEBUG(&Poco::Logger::get("MemoryTracker"), "CHECKING SIZE {} IN BORDERS [{}; {}]", size, min_allocation_size_bytes, max_allocation_size_bytes); + /// We can avoid comparison min_allocation_size_bytes with zero, because we cannot have 0 bytes allocation/deallocation + return ((max_allocation_size_bytes == 0 || size <= max_allocation_size_bytes) && size >= min_allocation_size_bytes); +} + bool canEnqueueBackgroundTask() { auto limit = background_memory_tracker.getSoftLimit(); diff --git a/src/Common/MemoryTracker.h b/src/Common/MemoryTracker.h index 4e29d40c953..768dc8a7404 100644 --- a/src/Common/MemoryTracker.h +++ b/src/Common/MemoryTracker.h @@ -67,6 +67,12 @@ private: /// To randomly sample allocations and deallocations in trace_log. double sample_probability = 0; + /// Randomly sample allocations only larger or equal to this size + UInt64 min_allocation_size_bytes = 0; + + /// Randomly sample allocations only smaller or equal to this size + UInt64 max_allocation_size_bytes = 0; + /// Singly-linked list. All information will be passed to subsequent memory trackers also (it allows to implement trackers hierarchy). /// In terms of tree nodes it is the list of parents. Lifetime of these trackers should "include" lifetime of current tracker. std::atomic parent {}; @@ -88,6 +94,8 @@ private: void setOrRaiseProfilerLimit(Int64 value); + bool isSizeOkForSampling(UInt64 size) const; + /// allocImpl(...) and free(...) should not be used directly friend struct CurrentMemoryTracker; void allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryTracker * query_tracker = nullptr); @@ -165,6 +173,16 @@ public: sample_probability = value; } + void setSampleMinAllocationSize(UInt64 value) + { + min_allocation_size_bytes = value; + } + + void setSampleMaxAllocationSize(UInt64 value) + { + max_allocation_size_bytes = value; + } + void setProfilerStep(Int64 value) { profiler_step = value; diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h index 1a9f226041b..f7a6c9e950e 100644 --- a/src/Core/ServerSettings.h +++ b/src/Core/ServerSettings.h @@ -81,8 +81,12 @@ namespace DB M(UInt64, background_schedule_pool_size, 128, "The maximum number of threads that will be used for constantly executing some lightweight periodic operations.", 0) \ M(UInt64, background_message_broker_schedule_pool_size, 16, "The maximum number of threads that will be used for executing background operations for message streaming.", 0) \ M(UInt64, background_distributed_schedule_pool_size, 16, "The maximum number of threads that will be used for executing distributed sends.", 0) \ - M(Bool, display_secrets_in_show_and_select, false, "Allow showing secrets in SHOW and SELECT queries via a format setting and a grant", 0) - + M(Bool, display_secrets_in_show_and_select, false, "Allow showing secrets in SHOW and SELECT queries via a format setting and a grant", 0) \ + \ + M(UInt64, total_memory_profiler_step, 0, "Whenever server memory usage becomes larger than every next step in number of bytes the memory profiler will collect the allocating stack trace. Zero means disabled memory profiler. Values lower than a few megabytes will slow down server.", 0) \ + M(Double, total_memory_tracker_sample_probability, 0, "Collect random allocations and deallocations and write them into system.trace_log with 'MemorySample' trace_type. The probability is for every alloc/free regardless to the size of the allocation (can be changed with `memory_profiler_sample_min_allocation_size` and `memory_profiler_sample_max_allocation_size`). Note that sampling happens only when the amount of untracked memory exceeds 'max_untracked_memory'. You may want to set 'max_untracked_memory' to 0 for extra fine grained sampling.", 0) \ + M(UInt64, total_memory_profiler_sample_min_allocation_size, 0, "Collect random allocations of size greater or equal than specified value with probability equal to `total_memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) \ + M(UInt64, total_memory_profiler_sample_max_allocation_size, 0, "Collect random allocations of size less or equal than specified value with probability equal to `total_memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) DECLARE_SETTINGS_TRAITS(ServerSettingsTraits, SERVER_SETTINGS) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 6fb26994d2f..bcfc179be5e 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -426,7 +426,9 @@ class IColumn; M(UInt64, memory_overcommit_ratio_denominator_for_user, 1_GiB, "It represents soft memory limit on the global level. This value is used to compute query overcommit ratio.", 0) \ M(UInt64, max_untracked_memory, (4 * 1024 * 1024), "Small allocations and deallocations are grouped in thread local variable and tracked or profiled only when amount (in absolute value) becomes larger than specified value. If the value is higher than 'memory_profiler_step' it will be effectively lowered to 'memory_profiler_step'.", 0) \ M(UInt64, memory_profiler_step, (4 * 1024 * 1024), "Whenever query memory usage becomes larger than every next step in number of bytes the memory profiler will collect the allocating stack trace. Zero means disabled memory profiler. Values lower than a few megabytes will slow down query processing.", 0) \ - M(Float, memory_profiler_sample_probability, 0., "Collect random allocations and deallocations and write them into system.trace_log with 'MemorySample' trace_type. The probability is for every alloc/free regardless to the size of the allocation. Note that sampling happens only when the amount of untracked memory exceeds 'max_untracked_memory'. You may want to set 'max_untracked_memory' to 0 for extra fine grained sampling.", 0) \ + M(Float, memory_profiler_sample_probability, 0., "Collect random allocations and deallocations and write them into system.trace_log with 'MemorySample' trace_type. The probability is for every alloc/free regardless to the size of the allocation (can be changed with `memory_profiler_sample_min_allocation_size` and `memory_profiler_sample_max_allocation_size`). Note that sampling happens only when the amount of untracked memory exceeds 'max_untracked_memory'. You may want to set 'max_untracked_memory' to 0 for extra fine grained sampling.", 0) \ + M(UInt64, memory_profiler_sample_min_allocation_size, 0, "Collect random allocations of size greater or equal than specified value with probability equal to `memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) \ + M(UInt64, memory_profiler_sample_max_allocation_size, 0, "Collect random allocations of size less or equal than specified value with probability equal to `memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) \ M(Bool, trace_profile_events, false, "Send to system.trace_log profile event and value of increment on each increment with 'ProfileEvent' trace_type", 0) \ \ M(UInt64, memory_usage_overcommit_max_wait_microseconds, 5'000'000, "Maximum time thread will wait for memory to be freed in the case of memory overcommit. If timeout is reached and memory is not freed, exception is thrown.", 0) \ diff --git a/src/Interpreters/ProcessList.cpp b/src/Interpreters/ProcessList.cpp index 1503e396298..c299572ef41 100644 --- a/src/Interpreters/ProcessList.cpp +++ b/src/Interpreters/ProcessList.cpp @@ -223,7 +223,10 @@ ProcessList::insert(const String & query_, const IAST * ast, ContextMutablePtr q { /// Set up memory profiling thread_group->memory_tracker.setProfilerStep(settings.memory_profiler_step); + thread_group->memory_tracker.setSampleProbability(settings.memory_profiler_sample_probability); + thread_group->memory_tracker.setSampleMinAllocationSize(settings.memory_profiler_sample_min_allocation_size); + thread_group->memory_tracker.setSampleMaxAllocationSize(settings.memory_profiler_sample_max_allocation_size); thread_group->performance_counters.setTraceProfileEvents(settings.trace_profile_events); } diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp index 5acfe500b1d..49d9d3ccdf6 100644 --- a/src/Interpreters/ThreadStatusExt.cpp +++ b/src/Interpreters/ThreadStatusExt.cpp @@ -83,6 +83,8 @@ ThreadGroupPtr ThreadGroup::createForBackgroundProcess(ContextPtr storage_contex const Settings & settings = storage_context->getSettingsRef(); group->memory_tracker.setProfilerStep(settings.memory_profiler_step); group->memory_tracker.setSampleProbability(settings.memory_profiler_sample_probability); + group->memory_tracker.setSampleMinAllocationSize(settings.memory_profiler_sample_min_allocation_size); + group->memory_tracker.setSampleMaxAllocationSize(settings.memory_profiler_sample_max_allocation_size); group->memory_tracker.setSoftLimit(settings.memory_overcommit_ratio_denominator); group->memory_tracker.setParent(&background_memory_tracker); if (settings.memory_tracker_fault_probability > 0.0) diff --git a/tests/integration/test_memory_profiler_min_max_borders/__init__.py b/tests/integration/test_memory_profiler_min_max_borders/__init__.py new file mode 100644 index 00000000000..e5a0d9b4834 --- /dev/null +++ b/tests/integration/test_memory_profiler_min_max_borders/__init__.py @@ -0,0 +1 @@ +#!/usr/bin/env python3 diff --git a/tests/integration/test_memory_profiler_min_max_borders/configs/max_untracked_memory.xml b/tests/integration/test_memory_profiler_min_max_borders/configs/max_untracked_memory.xml new file mode 100644 index 00000000000..56fc5ed34ca --- /dev/null +++ b/tests/integration/test_memory_profiler_min_max_borders/configs/max_untracked_memory.xml @@ -0,0 +1,7 @@ + + + + 1 + + + diff --git a/tests/integration/test_memory_profiler_min_max_borders/configs/memory_profiler.xml b/tests/integration/test_memory_profiler_min_max_borders/configs/memory_profiler.xml new file mode 100644 index 00000000000..5b3e17d145f --- /dev/null +++ b/tests/integration/test_memory_profiler_min_max_borders/configs/memory_profiler.xml @@ -0,0 +1,5 @@ + + 1 + 4096 + 8192 + diff --git a/tests/integration/test_memory_profiler_min_max_borders/test.py b/tests/integration/test_memory_profiler_min_max_borders/test.py new file mode 100644 index 00000000000..b768a442591 --- /dev/null +++ b/tests/integration/test_memory_profiler_min_max_borders/test.py @@ -0,0 +1,27 @@ +from helpers.cluster import ClickHouseCluster +import pytest + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance( + "node", + main_configs=["configs/memory_profiler.xml"], + user_configs=["configs/max_untracked_memory.xml"], +) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + +def test_trace_boundaries_work(started_cluster): + node.query("select randomPrintableASCII(number) from numbers(1000) FORMAT Null") + node.query("SYSTEM FLUSH LOGS") + + assert node.query("SELECT countDistinct(abs(size)) > 0 FROM system.trace_log where trace_type = 'MemorySample'") == "1\n" + assert node.query("SELECT count() FROM system.trace_log where trace_type = 'MemorySample' and (abs(size) > 8192 or abs(size) < 4096)") == "0\n" diff --git a/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.reference b/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.sh b/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.sh new file mode 100755 index 00000000000..b1fbea26da7 --- /dev/null +++ b/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash +# Tags: no-tsan, no-asan, no-ubsan, no-msan, no-cpu-aarch64, no-random-settings +# requires TraceCollector, does not available under sanitizers and aarch64 + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +query_id="${CLICKHOUSE_DATABASE}_min_max_allocation_size_$RANDOM$RANDOM" +${CLICKHOUSE_CLIENT} --query_id="$query_id" --memory_profiler_sample_min_allocation_size=4096 --memory_profiler_sample_max_allocation_size=8192 --log_queries=1 --max_threads=1 --max_untracked_memory=0 --memory_profiler_sample_probability=1 --query "select randomPrintableASCII(number) from numbers(1000) FORMAT Null" + +${CLICKHOUSE_CLIENT} --query "SYSTEM FLUSH LOGS" + +# at least something allocated +${CLICKHOUSE_CLIENT} --query "SELECT countDistinct(abs(size)) > 0 FROM system.trace_log where query_id='$query_id' and trace_type = 'MemorySample'" + +# show wrong allocations +${CLICKHOUSE_CLIENT} --query "SELECT abs(size) FROM system.trace_log where query_id='$query_id' and trace_type = 'MemorySample' and (abs(size) > 8192 or abs(size) < 4096)" From 0aed62ec73b8de4614506f5b72a086d8d10db4aa Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Fri, 21 Jul 2023 13:03:25 +0000 Subject: [PATCH 268/478] Add codec name into exception message --- src/Compression/CompressionCodecEncrypted.cpp | 2 +- ..._no_encryption_codecs.xml => config_no_encryption_key.xml} | 1 - .../integration/test_config_decryption/test_wrong_settings.py | 4 ++-- 3 files changed, 3 insertions(+), 4 deletions(-) rename tests/integration/test_config_decryption/configs/{config_no_encryption_codecs.xml => config_no_encryption_key.xml} (52%) diff --git a/src/Compression/CompressionCodecEncrypted.cpp b/src/Compression/CompressionCodecEncrypted.cpp index fb870ababa3..3f4e35a78a4 100644 --- a/src/Compression/CompressionCodecEncrypted.cpp +++ b/src/Compression/CompressionCodecEncrypted.cpp @@ -588,7 +588,7 @@ String CompressionCodecEncrypted::Configuration::getKey(EncryptionMethod method, if (current_params->keys_storage[method].contains(key_id)) key = current_params->keys_storage[method].at(key_id); else - throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no key {} in config", key_id); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no key {} in config for {} encryption codec", key_id, getMethodName(method)); return key; } diff --git a/tests/integration/test_config_decryption/configs/config_no_encryption_codecs.xml b/tests/integration/test_config_decryption/configs/config_no_encryption_key.xml similarity index 52% rename from tests/integration/test_config_decryption/configs/config_no_encryption_codecs.xml rename to tests/integration/test_config_decryption/configs/config_no_encryption_key.xml index 07bf69d17c8..5f7769f7403 100644 --- a/tests/integration/test_config_decryption/configs/config_no_encryption_codecs.xml +++ b/tests/integration/test_config_decryption/configs/config_no_encryption_key.xml @@ -1,4 +1,3 @@ 96260000000B0000000000E8FE3C087CED2205A5071078B29FD5C3B97F824911DED3217E980C - 97260000000B0000000000BFFF70C4DA718754C1DA0E2F25FF9246D4783F7FFEC4089EC1CC14 diff --git a/tests/integration/test_config_decryption/test_wrong_settings.py b/tests/integration/test_config_decryption/test_wrong_settings.py index e0fbd4b2948..62610964502 100644 --- a/tests/integration/test_config_decryption/test_wrong_settings.py +++ b/tests/integration/test_config_decryption/test_wrong_settings.py @@ -26,9 +26,9 @@ def test_invalid_chars(): ) -def test_no_encryption_codecs(): +def test_no_encryption_key(): start_clickhouse( - "configs/config_no_encryption_codecs.xml", "There is no key 0 in config" + "configs/config_no_encryption_key.xml", "There is no key 0 in config for AES_128_GCM_SIV encryption codec" ) From 10ec06917506c1a61caadf4c680bd0148520426f Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Fri, 21 Jul 2023 13:29:40 +0000 Subject: [PATCH 269/478] Improve exception message text --- src/Common/Config/ConfigProcessor.cpp | 2 +- tests/integration/test_config_decryption/test_wrong_settings.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp index 6529e94a41d..73fc5c58b2f 100644 --- a/src/Common/Config/ConfigProcessor.cpp +++ b/src/Common/Config/ConfigProcessor.cpp @@ -232,7 +232,7 @@ void ConfigProcessor::decryptRecursive(Poco::XML::Node * config_root) { const NodeListPtr children = element.childNodes(); if (children->length() != 1) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Encrypted node {} should have only one text node", node->nodeName()); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Encrypted node {} cannot contain nested elements", node->nodeName()); Node * text_node = node->firstChild(); if (text_node->nodeType() != Node::TEXT_NODE) diff --git a/tests/integration/test_config_decryption/test_wrong_settings.py b/tests/integration/test_config_decryption/test_wrong_settings.py index 62610964502..da32a8f0ac8 100644 --- a/tests/integration/test_config_decryption/test_wrong_settings.py +++ b/tests/integration/test_config_decryption/test_wrong_settings.py @@ -33,4 +33,4 @@ def test_no_encryption_key(): def test_subnodes(): - start_clickhouse("configs/config_subnodes.xml", "should have only one text node") + start_clickhouse("configs/config_subnodes.xml", "cannot contain nested elements") From 1daa26c74130003a4039dcc809b9d3d0a5bcba95 Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Fri, 21 Jul 2023 13:31:42 +0000 Subject: [PATCH 270/478] Fix black formatting --- .../integration/test_config_decryption/test_wrong_settings.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_config_decryption/test_wrong_settings.py b/tests/integration/test_config_decryption/test_wrong_settings.py index da32a8f0ac8..b148f9a051a 100644 --- a/tests/integration/test_config_decryption/test_wrong_settings.py +++ b/tests/integration/test_config_decryption/test_wrong_settings.py @@ -28,7 +28,8 @@ def test_invalid_chars(): def test_no_encryption_key(): start_clickhouse( - "configs/config_no_encryption_key.xml", "There is no key 0 in config for AES_128_GCM_SIV encryption codec" + "configs/config_no_encryption_key.xml", + "There is no key 0 in config for AES_128_GCM_SIV encryption codec", ) From abd8bfed2b6e6c20b46ffbeb82699c8530523ffe Mon Sep 17 00:00:00 2001 From: Alexander Sapin Date: Fri, 21 Jul 2023 15:44:49 +0200 Subject: [PATCH 271/478] Remove comment --- src/Common/MemoryTracker.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Common/MemoryTracker.cpp b/src/Common/MemoryTracker.cpp index 52cd9cc8073..52cae0768dc 100644 --- a/src/Common/MemoryTracker.cpp +++ b/src/Common/MemoryTracker.cpp @@ -536,7 +536,6 @@ void MemoryTracker::setOrRaiseProfilerLimit(Int64 value) bool MemoryTracker::isSizeOkForSampling(UInt64 size) const { - //LOG_DEBUG(&Poco::Logger::get("MemoryTracker"), "CHECKING SIZE {} IN BORDERS [{}; {}]", size, min_allocation_size_bytes, max_allocation_size_bytes); /// We can avoid comparison min_allocation_size_bytes with zero, because we cannot have 0 bytes allocation/deallocation return ((max_allocation_size_bytes == 0 || size <= max_allocation_size_bytes) && size >= min_allocation_size_bytes); } From c080e9b450faeaced13c149212456ab006648c3a Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Fri, 21 Jul 2023 21:48:49 +0800 Subject: [PATCH 272/478] Fix normal projection with merge table --- .../Optimizations/optimizeUseNormalProjection.cpp | 8 ++++++-- ..._projection_query_plan_optimization_misc.reference | 1 + .../01710_projection_query_plan_optimization_misc.sql | 11 +++++++++++ 3 files changed, 18 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/01710_projection_query_plan_optimization_misc.reference create mode 100644 tests/queries/0_stateless/01710_projection_query_plan_optimization_misc.sql diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp index dd7a5d449bc..2a03a082d89 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeUseNormalProjection.cpp @@ -92,6 +92,10 @@ bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes) break; } + /// Dangling query plan node. This might be generated by StorageMerge. + if (iter->node->step.get() == reading) + return false; + const auto metadata = reading->getStorageMetadata(); const auto & projections = metadata->projections; @@ -105,8 +109,8 @@ bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes) QueryDAG query; { - auto & clild = iter->node->children[iter->next_child - 1]; - if (!query.build(*clild)) + auto & child = iter->node->children[iter->next_child - 1]; + if (!query.build(*child)) return false; if (query.dag) diff --git a/tests/queries/0_stateless/01710_projection_query_plan_optimization_misc.reference b/tests/queries/0_stateless/01710_projection_query_plan_optimization_misc.reference new file mode 100644 index 00000000000..9874d6464ab --- /dev/null +++ b/tests/queries/0_stateless/01710_projection_query_plan_optimization_misc.reference @@ -0,0 +1 @@ +1 2 diff --git a/tests/queries/0_stateless/01710_projection_query_plan_optimization_misc.sql b/tests/queries/0_stateless/01710_projection_query_plan_optimization_misc.sql new file mode 100644 index 00000000000..cb565313380 --- /dev/null +++ b/tests/queries/0_stateless/01710_projection_query_plan_optimization_misc.sql @@ -0,0 +1,11 @@ +drop table if exists t; + +create table t (x Int32, codectest Int32) engine = MergeTree order by x; + +alter table t add projection x (select * order by codectest); + +insert into t values (1, 2); + +select * from merge('', 't'); + +drop table t; From 2a6b96f9e339e602c59968741741e57b1675bf52 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 21 Jul 2023 13:51:40 +0000 Subject: [PATCH 273/478] Automatic style fix --- .../test_memory_profiler_min_max_borders/test.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_memory_profiler_min_max_borders/test.py b/tests/integration/test_memory_profiler_min_max_borders/test.py index b768a442591..6ab971fa9c4 100644 --- a/tests/integration/test_memory_profiler_min_max_borders/test.py +++ b/tests/integration/test_memory_profiler_min_max_borders/test.py @@ -23,5 +23,15 @@ def test_trace_boundaries_work(started_cluster): node.query("select randomPrintableASCII(number) from numbers(1000) FORMAT Null") node.query("SYSTEM FLUSH LOGS") - assert node.query("SELECT countDistinct(abs(size)) > 0 FROM system.trace_log where trace_type = 'MemorySample'") == "1\n" - assert node.query("SELECT count() FROM system.trace_log where trace_type = 'MemorySample' and (abs(size) > 8192 or abs(size) < 4096)") == "0\n" + assert ( + node.query( + "SELECT countDistinct(abs(size)) > 0 FROM system.trace_log where trace_type = 'MemorySample'" + ) + == "1\n" + ) + assert ( + node.query( + "SELECT count() FROM system.trace_log where trace_type = 'MemorySample' and (abs(size) > 8192 or abs(size) < 4096)" + ) + == "0\n" + ) From 3acb6005f041051b7c00c48df5035843744a7e24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Fri, 21 Jul 2023 17:08:01 +0200 Subject: [PATCH 274/478] Reduce the number of syscalls in FileCache::loadMetadata --- src/Interpreters/Cache/FileCache.cpp | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index 91d1c63e832..42cc7b80a66 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -870,13 +870,12 @@ void FileCache::loadMetadata() } size_t total_size = 0; - for (auto key_prefix_it = fs::directory_iterator{metadata.getBaseDirectory()}; - key_prefix_it != fs::directory_iterator();) + for (auto key_prefix_it = fs::directory_iterator{metadata.getBaseDirectory()}; key_prefix_it != fs::directory_iterator(); + key_prefix_it++) { const fs::path key_prefix_directory = key_prefix_it->path(); - key_prefix_it++; - if (!fs::is_directory(key_prefix_directory)) + if (!key_prefix_it->is_directory()) { if (key_prefix_directory.filename() != "status") { @@ -887,19 +886,19 @@ void FileCache::loadMetadata() continue; } - if (fs::is_empty(key_prefix_directory)) + fs::directory_iterator key_it{key_prefix_directory}; + if (key_it == fs::directory_iterator{}) { LOG_DEBUG(log, "Removing empty key prefix directory: {}", key_prefix_directory.string()); fs::remove(key_prefix_directory); continue; } - for (fs::directory_iterator key_it{key_prefix_directory}; key_it != fs::directory_iterator();) + for (/* key_it already initialized to verify emptiness */; key_it != fs::directory_iterator(); key_it++) { const fs::path key_directory = key_it->path(); - ++key_it; - if (!fs::is_directory(key_directory)) + if (!key_it->is_directory()) { LOG_DEBUG( log, @@ -908,7 +907,7 @@ void FileCache::loadMetadata() continue; } - if (fs::is_empty(key_directory)) + if (fs::directory_iterator{key_directory} == fs::directory_iterator{}) { LOG_DEBUG(log, "Removing empty key directory: {}", key_directory.string()); fs::remove(key_directory); From 5fb5ba71edbaf664045871b0fc8d6d5d6f5f45e6 Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Fri, 21 Jul 2023 15:40:53 +0000 Subject: [PATCH 275/478] Throw exception when several text nodes found in YAML for element node --- src/Common/Config/YAMLParser.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/Common/Config/YAMLParser.cpp b/src/Common/Config/YAMLParser.cpp index a1de14afc13..72706cb98ba 100644 --- a/src/Common/Config/YAMLParser.cpp +++ b/src/Common/Config/YAMLParser.cpp @@ -112,6 +112,11 @@ namespace { if (key == "#text" && value_node.IsScalar()) { + for (Node * child_node = parent_xml_node.firstChild(); child_node; child_node = child_node->nextSibling()) + if (child_node->nodeType() == Node::TEXT_NODE) + throw Exception(ErrorCodes::CANNOT_PARSE_YAML, + "YAMLParser has encountered node with several text nodes " + "and cannot continue parsing of the file"); std::string value = value_node.as(); Poco::AutoPtr xml_value = xml_document->createTextNode(value); parent_xml_node.appendChild(xml_value); From a2b170a18e7db041eb41e631f693b3ddec8e79a7 Mon Sep 17 00:00:00 2001 From: Alexander Sapin Date: Fri, 21 Jul 2023 17:42:55 +0200 Subject: [PATCH 276/478] Avoid exception which I didn't understand --- src/Storages/StorageReplicatedMergeTree.cpp | 33 +++++++++++++-------- 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 841b646a126..bc8dbfa0e1f 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -4905,20 +4905,29 @@ void StorageReplicatedMergeTree::flushAndPrepareForShutdown() if (shutdown_prepared_called.exchange(true)) return; - auto settings_ptr = getSettings(); - /// Cancel fetches, merges and mutations to force the queue_task to finish ASAP. - fetcher.blocker.cancelForever(); - merger_mutator.merges_blocker.cancelForever(); - parts_mover.moves_blocker.cancelForever(); - stopBeingLeader(); + try + { + auto settings_ptr = getSettings(); + /// Cancel fetches, merges and mutations to force the queue_task to finish ASAP. + fetcher.blocker.cancelForever(); + merger_mutator.merges_blocker.cancelForever(); + parts_mover.moves_blocker.cancelForever(); + stopBeingLeader(); - if (attach_thread) - attach_thread->shutdown(); + if (attach_thread) + attach_thread->shutdown(); - restarting_thread.shutdown(/* part_of_full_shutdown */true); - /// Explicetly set the event, because the restarting thread will not set it again - startup_event.set(); - shutdown_deadline.emplace(std::chrono::system_clock::now() + std::chrono::milliseconds(settings_ptr->wait_for_unique_parts_send_before_shutdown_ms.totalMilliseconds())); + restarting_thread.shutdown(/* part_of_full_shutdown */true); + /// Explicetly set the event, because the restarting thread will not set it again + startup_event.set(); + shutdown_deadline.emplace(std::chrono::system_clock::now() + std::chrono::milliseconds(settings_ptr->wait_for_unique_parts_send_before_shutdown_ms.totalMilliseconds())); + } + catch (...) + { + /// Don't wait anything in case of inproper prepare for shutdown + shutdown_deadline.emplace(std::chrono::system_clock::now()); + throw; + } } void StorageReplicatedMergeTree::partialShutdown() From ed97284bfae5b18f8dbc8841e8b296edd45cd286 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 21 Jul 2023 19:28:28 +0200 Subject: [PATCH 277/478] Update src/Storages/StorageReplicatedMergeTree.cpp --- src/Storages/StorageReplicatedMergeTree.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index bc8dbfa0e1f..f191440442d 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -4924,7 +4924,7 @@ void StorageReplicatedMergeTree::flushAndPrepareForShutdown() } catch (...) { - /// Don't wait anything in case of inproper prepare for shutdown + /// Don't wait anything in case of improper prepare for shutdown shutdown_deadline.emplace(std::chrono::system_clock::now()); throw; } From 6c8d5ca0a554ecc4fee32269858797d139f3c02a Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Fri, 21 Jul 2023 21:33:51 +0000 Subject: [PATCH 278/478] Fix: remove redundant distinct with views --- src/Interpreters/ActionsDAG.cpp | 18 +++++++++++---- ...x_remove_dedundant_distinct_view.reference | 13 +++++++++++ ...810_fix_remove_dedundant_distinct_view.sql | 22 +++++++++++++++++++ 3 files changed, 49 insertions(+), 4 deletions(-) create mode 100644 tests/queries/0_stateless/02810_fix_remove_dedundant_distinct_view.reference create mode 100644 tests/queries/0_stateless/02810_fix_remove_dedundant_distinct_view.sql diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 906875dd314..ce273e78ff3 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -2511,11 +2511,21 @@ FindOriginalNodeForOutputName::FindOriginalNodeForOutputName(const ActionsDAGPtr /// find input node which refers to the output node /// consider only aliases on the path const auto * node = output_node; - while (node && node->type == ActionsDAG::ActionType::ALIAS) + while (node) { - /// alias has only one child - chassert(node->children.size() == 1); - node = node->children.front(); + if (node->type == ActionsDAG::ActionType::ALIAS) + { + node = node->children.front(); + } + /// materiailze can occure when dealing with views, special case + /// TODO: not sure if it should be done here, looks too generic place + else if (node->type == ActionsDAG::ActionType::FUNCTION && node->function_base->getName() == "materialize") + { + chassert(node->children.size() == 1); + node = node->children.front(); + } + else + break; } if (node && node->type == ActionsDAG::ActionType::INPUT) index.emplace(output_node->result_name, node); diff --git a/tests/queries/0_stateless/02810_fix_remove_dedundant_distinct_view.reference b/tests/queries/0_stateless/02810_fix_remove_dedundant_distinct_view.reference new file mode 100644 index 00000000000..01f14f82e94 --- /dev/null +++ b/tests/queries/0_stateless/02810_fix_remove_dedundant_distinct_view.reference @@ -0,0 +1,13 @@ +-- { echoOn } +set query_plan_remove_redundant_distinct=1; +-- DISTINCT has to be removed since the view already has DISTINCT on the same column +SELECT count() +FROM +( + EXPLAIN SELECT DISTINCT x FROM tab_v +) +WHERE explain ILIKE '%distinct%'; +2 +SELECT DISTINCT x FROM tab_v; +2 +1 diff --git a/tests/queries/0_stateless/02810_fix_remove_dedundant_distinct_view.sql b/tests/queries/0_stateless/02810_fix_remove_dedundant_distinct_view.sql new file mode 100644 index 00000000000..99fc24dae8b --- /dev/null +++ b/tests/queries/0_stateless/02810_fix_remove_dedundant_distinct_view.sql @@ -0,0 +1,22 @@ +set allow_experimental_analyzer=1; + +drop table if exists tab_v; +drop table if exists tab; +create table tab (x UInt64, y UInt64) engine MergeTree() order by (x, y); +insert into tab values(1, 1); +insert into tab values(1, 2); +insert into tab values(2, 1); + +create view tab_v as select distinct(x) from tab; + +-- { echoOn } +set query_plan_remove_redundant_distinct=1; +-- DISTINCT has to be removed since the view already has DISTINCT on the same column +SELECT count() +FROM +( + EXPLAIN SELECT DISTINCT x FROM tab_v +) +WHERE explain ILIKE '%distinct%'; + +SELECT DISTINCT x FROM tab_v; From 687cbc57bba42a67b62b1b717e51a5be7e14b733 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Fri, 21 Jul 2023 22:15:02 +0000 Subject: [PATCH 279/478] Fix typo --- src/Interpreters/ActionsDAG.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index ce273e78ff3..284c42b658a 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -2517,7 +2517,7 @@ FindOriginalNodeForOutputName::FindOriginalNodeForOutputName(const ActionsDAGPtr { node = node->children.front(); } - /// materiailze can occure when dealing with views, special case + /// materiailze() function can occur when dealing with views /// TODO: not sure if it should be done here, looks too generic place else if (node->type == ActionsDAG::ActionType::FUNCTION && node->function_base->getName() == "materialize") { From 5ec63c782c6bdd62705f26cc9b09e8a640ca9da8 Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Sat, 22 Jul 2023 00:15:05 +0200 Subject: [PATCH 280/478] Fixed inserting into Buffer engine by not throwing exception from DatabaseCatalog::tryGetTable() when database name is empty --- src/Interpreters/DatabaseCatalog.cpp | 3 ++- ...rentDatabase_for_table_functions.reference | 17 +++++++++++++++++ ...14_currentDatabase_for_table_functions.sql | 19 +++++++++++++++++++ 3 files changed, 38 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02814_currentDatabase_for_table_functions.reference create mode 100644 tests/queries/0_stateless/02814_currentDatabase_for_table_functions.sql diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index e0b6348ed3c..f9ed2c0d5ca 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -344,7 +344,8 @@ DatabaseAndTable DatabaseCatalog::getTableImpl( DatabasePtr database; { std::lock_guard lock{databases_mutex}; - auto it = databases.find(table_id.getDatabaseName()); + // hasDatabase() to avod getDatabaseName() throwing exception if database is empty. + auto it = table_id.hasDatabase() ? databases.find(table_id.getDatabaseName()) : databases.end(); if (databases.end() == it) { if (exception) diff --git a/tests/queries/0_stateless/02814_currentDatabase_for_table_functions.reference b/tests/queries/0_stateless/02814_currentDatabase_for_table_functions.reference new file mode 100644 index 00000000000..7ff95106d3d --- /dev/null +++ b/tests/queries/0_stateless/02814_currentDatabase_for_table_functions.reference @@ -0,0 +1,17 @@ +-- Based on https://github.com/ClickHouse/ClickHouse/issues/52436 +-- Test that inserts performed via Buffer table engine land into destination table. +-- { echoOn } + +DROP TABLE IF EXISTS null_table; +DROP TABLE IF EXISTS null_table_buffer; +DROP TABLE IF EXISTS null_mv; +DROP VIEW IF EXISTS number_view; +CREATE TABLE null_table (number UInt64) ENGINE = Null; +CREATE VIEW number_view as SELECT * FROM numbers(10) as tb; +CREATE MATERIALIZED VIEW null_mv Engine = Log AS SELECT * FROM null_table LEFT JOIN number_view as tb USING number; +CREATE TABLE null_table_buffer (number UInt64) ENGINE = Buffer(currentDatabase(), null_table, 1, 1, 1, 100, 200, 10000, 20000); +INSERT INTO null_table_buffer VALUES (1); +SELECT sleep(3) FORMAT Null; +-- Insert about should've landed into `null_mv` +SELECT count() FROM null_mv; +1 diff --git a/tests/queries/0_stateless/02814_currentDatabase_for_table_functions.sql b/tests/queries/0_stateless/02814_currentDatabase_for_table_functions.sql new file mode 100644 index 00000000000..74b5cf5f432 --- /dev/null +++ b/tests/queries/0_stateless/02814_currentDatabase_for_table_functions.sql @@ -0,0 +1,19 @@ +-- Based on https://github.com/ClickHouse/ClickHouse/issues/52436 +-- Test that inserts performed via Buffer table engine land into destination table. +-- { echoOn } + +DROP TABLE IF EXISTS null_table; +DROP TABLE IF EXISTS null_table_buffer; +DROP TABLE IF EXISTS null_mv; +DROP VIEW IF EXISTS number_view; + +CREATE TABLE null_table (number UInt64) ENGINE = Null; +CREATE VIEW number_view as SELECT * FROM numbers(10) as tb; +CREATE MATERIALIZED VIEW null_mv Engine = Log AS SELECT * FROM null_table LEFT JOIN number_view as tb USING number; + +CREATE TABLE null_table_buffer (number UInt64) ENGINE = Buffer(currentDatabase(), null_table, 1, 1, 1, 100, 200, 10000, 20000); +INSERT INTO null_table_buffer VALUES (1); +SELECT sleep(3) FORMAT Null; + +-- Insert about should've landed into `null_mv` +SELECT count() FROM null_mv; From ae8f511ac5ffc6191394dd9fbfed9a0b082102e2 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 22 Jul 2023 02:27:07 +0200 Subject: [PATCH 281/478] Fix a test --- tests/integration/test_zero_copy_fetch/configs/users.xml | 7 +++++++ tests/integration/test_zero_copy_fetch/test.py | 2 ++ 2 files changed, 9 insertions(+) create mode 100644 tests/integration/test_zero_copy_fetch/configs/users.xml diff --git a/tests/integration/test_zero_copy_fetch/configs/users.xml b/tests/integration/test_zero_copy_fetch/configs/users.xml new file mode 100644 index 00000000000..b0990ca3a60 --- /dev/null +++ b/tests/integration/test_zero_copy_fetch/configs/users.xml @@ -0,0 +1,7 @@ + + + + 0 + + + diff --git a/tests/integration/test_zero_copy_fetch/test.py b/tests/integration/test_zero_copy_fetch/test.py index 4f3d42096c3..dc79e5d8723 100644 --- a/tests/integration/test_zero_copy_fetch/test.py +++ b/tests/integration/test_zero_copy_fetch/test.py @@ -19,12 +19,14 @@ def started_cluster(): cluster.add_instance( "node1", main_configs=["configs/storage_conf.xml"], + user_configs=["configs/users.xml"], with_minio=True, with_zookeeper=True, ) cluster.add_instance( "node2", main_configs=["configs/storage_conf.xml"], + user_configs=["configs/users.xml"], with_minio=True, with_zookeeper=True, ) From 9a5aed35e24a9aa4d7de71971665449cf344f917 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 22 Jul 2023 02:33:44 +0200 Subject: [PATCH 282/478] Add a note about potential caveats for the "session_timezone" setting --- src/Core/Settings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index cfcb56729d2..f267fa15276 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -775,7 +775,7 @@ class IColumn; M(Bool, allow_experimental_undrop_table_query, false, "Allow to use undrop query to restore dropped table in a limited time", 0) \ M(Bool, keeper_map_strict_mode, false, "Enforce additional checks during operations on KeeperMap. E.g. throw an exception on an insert for already existing key", 0) \ M(UInt64, extract_kvp_max_pairs_per_row, 1000, "Max number pairs that can be produced by extractKeyValuePairs function. Used to safeguard against consuming too much memory.", 0) \ - M(Timezone, session_timezone, "", "The default timezone for current session or query. The server default timezone if empty.", 0) \ + M(Timezone, session_timezone, "", "This setting can be removed in the future due to potential caveats. It is experimental and is not suitable for production usage. The default timezone for current session or query. The server default timezone if empty.", 0) \ M(Bool, allow_create_index_without_type, false, "Allow CREATE INDEX query without TYPE. Query will be ignored. Made for SQL compatibility tests.", 0)\ // End of COMMON_SETTINGS // Please add settings related to formats into the FORMAT_FACTORY_SETTINGS and move obsolete settings to OBSOLETE_SETTINGS. From e68234a231bf234d60ccfa262ca5a2374fb4f98a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 22 Jul 2023 04:45:50 +0300 Subject: [PATCH 283/478] Revert "Re-add SipHash keyed functions" --- .../sql-reference/functions/hash-functions.md | 8 +- src/Functions/FunctionsHashing.h | 329 +++++------------- src/Functions/FunctionsHashingMisc.cpp | 5 - .../0_stateless/02534_keyed_siphash.reference | 37 -- .../0_stateless/02534_keyed_siphash.sql | 61 +--- .../02552_siphash128_reference.reference | 151 -------- .../02552_siphash128_reference.sql | 253 -------------- 7 files changed, 99 insertions(+), 745 deletions(-) diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md index 556fe622c27..06097d92480 100644 --- a/docs/en/sql-reference/functions/hash-functions.md +++ b/docs/en/sql-reference/functions/hash-functions.md @@ -51,7 +51,7 @@ Calculates the MD5 from a string and returns the resulting set of bytes as Fixed If you do not need MD5 in particular, but you need a decent cryptographic 128-bit hash, use the ‘sipHash128’ function instead. If you want to get the same result as output by the md5sum utility, use lower(hex(MD5(s))). -## sipHash64 {#hash_functions-siphash64} +## sipHash64 (#hash_functions-siphash64) Produces a 64-bit [SipHash](https://en.wikipedia.org/wiki/SipHash) hash value. @@ -63,9 +63,9 @@ This is a cryptographic hash function. It works at least three times faster than The function [interprets](/docs/en/sql-reference/functions/type-conversion-functions.md/#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the hash value for each of them. It then combines the hashes by the following algorithm: -1. The first and the second hash value are concatenated to an array which is hashed. -2. The previously calculated hash value and the hash of the third input parameter are hashed in a similar way. -3. This calculation is repeated for all remaining hash values of the original input. +1. The first and the second hash value are concatenated to an array which is hashed. +2. The previously calculated hash value and the hash of the third input parameter are hashed in a similar way. +3. This calculation is repeated for all remaining hash values of the original input. **Arguments** diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h index 82944630b10..279294b367c 100644 --- a/src/Functions/FunctionsHashing.h +++ b/src/Functions/FunctionsHashing.h @@ -79,51 +79,28 @@ namespace impl UInt64 key1 = 0; }; - struct SipHashKeyColumns + static SipHashKey parseSipHashKey(const ColumnWithTypeAndName & key) { - ColumnPtr key0; - ColumnPtr key1; - bool is_const; + SipHashKey ret{}; - size_t size() const - { - assert(key0 && key1); - assert(key0->size() == key1->size()); - return key0->size(); - } - SipHashKey getKey(size_t i) const - { - if (is_const) - i = 0; - const auto & key0data = assert_cast(*key0).getData(); - const auto & key1data = assert_cast(*key1).getData(); - return {key0data[i], key1data[i]}; - } - }; - - static SipHashKeyColumns parseSipHashKeyColumns(const ColumnWithTypeAndName & key) - { - const ColumnTuple * tuple = nullptr; - const auto * column = key.column.get(); - bool is_const = false; - if (isColumnConst(*column)) - { - is_const = true; - tuple = checkAndGetColumnConstData(column); - } - else - tuple = checkAndGetColumn(column); + const auto * tuple = checkAndGetColumn(key.column.get()); if (!tuple) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "key must be a tuple"); + if (tuple->tupleSize() != 2) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "wrong tuple size: key must be a tuple of 2 UInt64"); - SipHashKeyColumns ret{tuple->getColumnPtr(0), tuple->getColumnPtr(1), is_const}; - assert(ret.key0); - if (!checkColumn(*ret.key0)) + if (tuple->empty()) + return ret; + + if (const auto * key0col = checkAndGetColumn(&(tuple->getColumn(0)))) + ret.key0 = key0col->get64(0); + else throw Exception(ErrorCodes::NOT_IMPLEMENTED, "first element of the key tuple is not UInt64"); - assert(ret.key1); - if (!checkColumn(*ret.key1)) + + if (const auto * key1col = checkAndGetColumn(&(tuple->getColumn(1)))) + ret.key1 = key1col->get64(0); + else throw Exception(ErrorCodes::NOT_IMPLEMENTED, "second element of the key tuple is not UInt64"); return ret; @@ -352,10 +329,8 @@ struct SipHash64KeyedImpl static constexpr auto name = "sipHash64Keyed"; using ReturnType = UInt64; using Key = impl::SipHashKey; - using KeyColumns = impl::SipHashKeyColumns; - static KeyColumns parseKeyColumns(const ColumnWithTypeAndName & key) { return impl::parseSipHashKeyColumns(key); } - static Key getKey(const KeyColumns & key, size_t i) { return key.getKey(i); } + static Key parseKey(const ColumnWithTypeAndName & key) { return impl::parseSipHashKey(key); } static UInt64 applyKeyed(const Key & key, const char * begin, size_t size) { return sipHash64Keyed(key.key0, key.key1, begin, size); } @@ -396,10 +371,8 @@ struct SipHash128KeyedImpl static constexpr auto name = "sipHash128Keyed"; using ReturnType = UInt128; using Key = impl::SipHashKey; - using KeyColumns = impl::SipHashKeyColumns; - static KeyColumns parseKeyColumns(const ColumnWithTypeAndName & key) { return impl::parseSipHashKeyColumns(key); } - static Key getKey(const KeyColumns & key, size_t i) { return key.getKey(i); } + static Key parseKey(const ColumnWithTypeAndName & key) { return impl::parseSipHashKey(key); } static UInt128 applyKeyed(const Key & key, const char * begin, size_t size) { return sipHash128Keyed(key.key0, key.key1, begin, size); } @@ -425,43 +398,13 @@ struct SipHash128ReferenceImpl using ReturnType = UInt128; - static UInt128 combineHashes(UInt128 h1, UInt128 h2) { return combineHashesFunc(h1, h2); } + static UInt128 combineHashes(UInt128 h1, UInt128 h2) { return combineHashesFunc(h1, h2); } static UInt128 apply(const char * data, const size_t size) { return sipHash128Reference(data, size); } static constexpr bool use_int_hash_for_pods = false; }; -struct SipHash128ReferenceKeyedImpl -{ - static constexpr auto name = "sipHash128ReferenceKeyed"; - using ReturnType = UInt128; - using Key = impl::SipHashKey; - using KeyColumns = impl::SipHashKeyColumns; - - static KeyColumns parseKeyColumns(const ColumnWithTypeAndName & key) { return impl::parseSipHashKeyColumns(key); } - static Key getKey(const KeyColumns & key, size_t i) { return key.getKey(i); } - - static UInt128 applyKeyed(const Key & key, const char * begin, size_t size) - { - return sipHash128ReferenceKeyed(key.key0, key.key1, begin, size); - } - - static UInt128 combineHashesKeyed(const Key & key, UInt128 h1, UInt128 h2) - { -#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - UInt128 tmp; - reverseMemcpy(&tmp, &h1, sizeof(UInt128)); - h1 = tmp; - reverseMemcpy(&tmp, &h2, sizeof(UInt128)); - h2 = tmp; -#endif - UInt128 hashes[] = {h1, h2}; - return applyKeyed(key, reinterpret_cast(hashes), 2 * sizeof(UInt128)); - } - - static constexpr bool use_int_hash_for_pods = false; -}; /** Why we need MurmurHash2? * MurmurHash2 is an outdated hash function, superseded by MurmurHash3 and subsequently by CityHash, xxHash, HighwayHash. @@ -1080,7 +1023,7 @@ private: DECLARE_MULTITARGET_CODE( -template +template class FunctionAnyHash : public IFunction { public: @@ -1090,12 +1033,9 @@ private: using ToType = typename Impl::ReturnType; template - void executeIntType(const KeyColumnsType & key_cols, const IColumn * column, typename ColumnVector::Container & vec_to) const + void executeIntType(const KeyType & key, const IColumn * column, typename ColumnVector::Container & vec_to) const { using ColVecType = ColumnVectorOrDecimal; - KeyType key{}; - if constexpr (Keyed) - key = Impl::getKey(key_cols, 0); if (const ColVecType * col_from = checkAndGetColumn(column)) { @@ -1104,9 +1044,6 @@ private: for (size_t i = 0; i < size; ++i) { ToType hash; - if constexpr (Keyed) - if (!key_cols.is_const && i != 0) - key = Impl::getKey(key_cols, i); if constexpr (Impl::use_int_hash_for_pods) { @@ -1140,14 +1077,6 @@ private: } else if (auto col_from_const = checkAndGetColumnConst(column)) { - if constexpr (Keyed) - { - if (!key_cols.is_const) - { - ColumnPtr full_column = col_from_const->convertToFullColumn(); - return executeIntType(key_cols, full_column.get(), vec_to); - } - } auto value = col_from_const->template getValue(); ToType hash; @@ -1178,15 +1107,8 @@ private: if constexpr (first) vec_to.assign(size, hash); else - { for (size_t i = 0; i < size; ++i) - { - if constexpr (Keyed) - if (!key_cols.is_const && i != 0) - key = Impl::getKey(key_cols, i); vec_to[i] = combineHashes(key, vec_to[i], hash); - } - } } else throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", @@ -1194,12 +1116,9 @@ private: } template - void executeBigIntType(const KeyColumnsType & key_cols, const IColumn * column, typename ColumnVector::Container & vec_to) const + void executeBigIntType(const KeyType & key, const IColumn * column, typename ColumnVector::Container & vec_to) const { using ColVecType = ColumnVectorOrDecimal; - KeyType key{}; - if constexpr (Keyed) - key = Impl::getKey(key_cols, 0); if (const ColVecType * col_from = checkAndGetColumn(column)) { @@ -1208,9 +1127,6 @@ private: for (size_t i = 0; i < size; ++i) { ToType hash; - if constexpr (Keyed) - if (!key_cols.is_const && i != 0) - key = Impl::getKey(key_cols, i); if constexpr (std::endian::native == std::endian::little) hash = apply(key, reinterpret_cast(&vec_from[i]), sizeof(vec_from[i])); else @@ -1227,14 +1143,6 @@ private: } else if (auto col_from_const = checkAndGetColumnConst(column)) { - if constexpr (Keyed) - { - if (!key_cols.is_const) - { - ColumnPtr full_column = col_from_const->convertToFullColumn(); - return executeBigIntType(key_cols, full_column.get(), vec_to); - } - } auto value = col_from_const->template getValue(); ToType hash; @@ -1250,15 +1158,8 @@ private: if constexpr (first) vec_to.assign(size, hash); else - { for (size_t i = 0; i < size; ++i) - { - if constexpr (Keyed) - if (!key_cols.is_const && i != 0) - key = Impl::getKey(key_cols, i); vec_to[i] = combineHashes(key, vec_to[i], hash); - } - } } else throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", @@ -1266,16 +1167,10 @@ private: } template - void executeGeneric(const KeyColumnsType & key_cols, const IColumn * column, typename ColumnVector::Container & vec_to) const + void executeGeneric(const KeyType & key, const IColumn * column, typename ColumnVector::Container & vec_to) const { - KeyType key{}; - if constexpr (Keyed) - key = Impl::getKey(key_cols, 0); for (size_t i = 0, size = column->size(); i < size; ++i) { - if constexpr (Keyed) - if (!key_cols.is_const && i != 0) - key = Impl::getKey(key_cols, i); StringRef bytes = column->getDataAt(i); const ToType hash = apply(key, bytes.data, bytes.size); if constexpr (first) @@ -1286,11 +1181,8 @@ private: } template - void executeString(const KeyColumnsType & key_cols, const IColumn * column, typename ColumnVector::Container & vec_to) const + void executeString(const KeyType & key, const IColumn * column, typename ColumnVector::Container & vec_to) const { - KeyType key{}; - if constexpr (Keyed) - key = Impl::getKey(key_cols, 0); if (const ColumnString * col_from = checkAndGetColumn(column)) { const typename ColumnString::Chars & data = col_from->getChars(); @@ -1300,9 +1192,6 @@ private: ColumnString::Offset current_offset = 0; for (size_t i = 0; i < size; ++i) { - if constexpr (Keyed) - if (!key_cols.is_const && i != 0) - key = Impl::getKey(key_cols, i); const ToType hash = apply(key, reinterpret_cast(&data[current_offset]), offsets[i] - current_offset - 1); @@ -1323,9 +1212,6 @@ private: for (size_t i = 0; i < size; ++i) { - if constexpr (Keyed) - if (!key_cols.is_const && i != 0) - key = Impl::getKey(key_cols, i); const ToType hash = apply(key, reinterpret_cast(&data[i * n]), n); if constexpr (first) vec_to[i] = hash; @@ -1335,14 +1221,6 @@ private: } else if (const ColumnConst * col_from_const = checkAndGetColumnConstStringOrFixedString(column)) { - if constexpr (Keyed) - { - if (!key_cols.is_const) - { - ColumnPtr full_column = col_from_const->convertToFullColumn(); - return executeString(key_cols, full_column.get(), vec_to); - } - } String value = col_from_const->getValue(); const ToType hash = apply(key, value.data(), value.size()); const size_t size = vec_to.size(); @@ -1350,15 +1228,8 @@ private: if constexpr (first) vec_to.assign(size, hash); else - { for (size_t i = 0; i < size; ++i) - { - if constexpr (Keyed) - if (!key_cols.is_const && i != 0) - key = Impl::getKey(key_cols, i); vec_to[i] = combineHashes(key, vec_to[i], hash); - } - } } else throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", @@ -1366,7 +1237,7 @@ private: } template - void executeArray(const KeyColumnsType & key_cols, const IDataType * type, const IColumn * column, typename ColumnVector::Container & vec_to) const + void executeArray(const KeyType & key, const IDataType * type, const IColumn * column, typename ColumnVector::Container & vec_to) const { const IDataType * nested_type = typeid_cast(*type).getNestedType().get(); @@ -1378,19 +1249,13 @@ private: typename ColumnVector::Container vec_temp(nested_size); bool nested_is_first = true; - executeForArgument(key_cols, nested_type, nested_column, vec_temp, nested_is_first); + executeForArgument(key, nested_type, nested_column, vec_temp, nested_is_first); const size_t size = offsets.size(); ColumnArray::Offset current_offset = 0; - KeyType key{}; - if constexpr (Keyed) - key = Impl::getKey(key_cols, 0); for (size_t i = 0; i < size; ++i) { - if constexpr (Keyed) - if (!key_cols.is_const && i != 0) - key = Impl::getKey(key_cols, i); ColumnArray::Offset next_offset = offsets[i]; ToType hash; @@ -1414,7 +1279,7 @@ private: { /// NOTE: here, of course, you can do without the materialization of the column. ColumnPtr full_column = col_from_const->convertToFullColumn(); - executeArray(key_cols, type, full_column.get(), vec_to); + executeArray(key, type, full_column.get(), vec_to); } else throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", @@ -1422,7 +1287,7 @@ private: } template - void executeAny(const KeyColumnsType & key_cols, const IDataType * from_type, const IColumn * icolumn, typename ColumnVector::Container & vec_to) const + void executeAny(const KeyType & key, const IDataType * from_type, const IColumn * icolumn, typename ColumnVector::Container & vec_to) const { WhichDataType which(from_type); @@ -1430,45 +1295,40 @@ private: throw Exception(ErrorCodes::LOGICAL_ERROR, "Argument column '{}' size {} doesn't match result column size {} of function {}", icolumn->getName(), icolumn->size(), vec_to.size(), getName()); - if constexpr (Keyed) - if ((!key_cols.is_const && key_cols.size() != vec_to.size()) - || (key_cols.is_const && key_cols.size() != 1)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Key column size {} doesn't match result column size {} of function {}", key_cols.size(), vec_to.size(), getName()); - - if (which.isUInt8()) executeIntType(key_cols, icolumn, vec_to); - else if (which.isUInt16()) executeIntType(key_cols, icolumn, vec_to); - else if (which.isUInt32()) executeIntType(key_cols, icolumn, vec_to); - else if (which.isUInt64()) executeIntType(key_cols, icolumn, vec_to); - else if (which.isUInt128()) executeBigIntType(key_cols, icolumn, vec_to); - else if (which.isUInt256()) executeBigIntType(key_cols, icolumn, vec_to); - else if (which.isInt8()) executeIntType(key_cols, icolumn, vec_to); - else if (which.isInt16()) executeIntType(key_cols, icolumn, vec_to); - else if (which.isInt32()) executeIntType(key_cols, icolumn, vec_to); - else if (which.isInt64()) executeIntType(key_cols, icolumn, vec_to); - else if (which.isInt128()) executeBigIntType(key_cols, icolumn, vec_to); - else if (which.isInt256()) executeBigIntType(key_cols, icolumn, vec_to); - else if (which.isUUID()) executeBigIntType(key_cols, icolumn, vec_to); - else if (which.isIPv4()) executeIntType(key_cols, icolumn, vec_to); - else if (which.isIPv6()) executeBigIntType(key_cols, icolumn, vec_to); - else if (which.isEnum8()) executeIntType(key_cols, icolumn, vec_to); - else if (which.isEnum16()) executeIntType(key_cols, icolumn, vec_to); - else if (which.isDate()) executeIntType(key_cols, icolumn, vec_to); - else if (which.isDate32()) executeIntType(key_cols, icolumn, vec_to); - else if (which.isDateTime()) executeIntType(key_cols, icolumn, vec_to); + if (which.isUInt8()) executeIntType(key, icolumn, vec_to); + else if (which.isUInt16()) executeIntType(key, icolumn, vec_to); + else if (which.isUInt32()) executeIntType(key, icolumn, vec_to); + else if (which.isUInt64()) executeIntType(key, icolumn, vec_to); + else if (which.isUInt128()) executeBigIntType(key, icolumn, vec_to); + else if (which.isUInt256()) executeBigIntType(key, icolumn, vec_to); + else if (which.isInt8()) executeIntType(key, icolumn, vec_to); + else if (which.isInt16()) executeIntType(key, icolumn, vec_to); + else if (which.isInt32()) executeIntType(key, icolumn, vec_to); + else if (which.isInt64()) executeIntType(key, icolumn, vec_to); + else if (which.isInt128()) executeBigIntType(key, icolumn, vec_to); + else if (which.isInt256()) executeBigIntType(key, icolumn, vec_to); + else if (which.isUUID()) executeBigIntType(key, icolumn, vec_to); + else if (which.isIPv4()) executeIntType(key, icolumn, vec_to); + else if (which.isIPv6()) executeBigIntType(key, icolumn, vec_to); + else if (which.isEnum8()) executeIntType(key, icolumn, vec_to); + else if (which.isEnum16()) executeIntType(key, icolumn, vec_to); + else if (which.isDate()) executeIntType(key, icolumn, vec_to); + else if (which.isDate32()) executeIntType(key, icolumn, vec_to); + else if (which.isDateTime()) executeIntType(key, icolumn, vec_to); /// TODO: executeIntType() for Decimal32/64 leads to incompatible result - else if (which.isDecimal32()) executeBigIntType(key_cols, icolumn, vec_to); - else if (which.isDecimal64()) executeBigIntType(key_cols, icolumn, vec_to); - else if (which.isDecimal128()) executeBigIntType(key_cols, icolumn, vec_to); - else if (which.isDecimal256()) executeBigIntType(key_cols, icolumn, vec_to); - else if (which.isFloat32()) executeIntType(key_cols, icolumn, vec_to); - else if (which.isFloat64()) executeIntType(key_cols, icolumn, vec_to); - else if (which.isString()) executeString(key_cols, icolumn, vec_to); - else if (which.isFixedString()) executeString(key_cols, icolumn, vec_to); - else if (which.isArray()) executeArray(key_cols, from_type, icolumn, vec_to); - else executeGeneric(key_cols, icolumn, vec_to); + else if (which.isDecimal32()) executeBigIntType(key, icolumn, vec_to); + else if (which.isDecimal64()) executeBigIntType(key, icolumn, vec_to); + else if (which.isDecimal128()) executeBigIntType(key, icolumn, vec_to); + else if (which.isDecimal256()) executeBigIntType(key, icolumn, vec_to); + else if (which.isFloat32()) executeIntType(key, icolumn, vec_to); + else if (which.isFloat64()) executeIntType(key, icolumn, vec_to); + else if (which.isString()) executeString(key, icolumn, vec_to); + else if (which.isFixedString()) executeString(key, icolumn, vec_to); + else if (which.isArray()) executeArray(key, from_type, icolumn, vec_to); + else executeGeneric(key, icolumn, vec_to); } - void executeForArgument(const KeyColumnsType & key_cols, const IDataType * type, const IColumn * column, typename ColumnVector::Container & vec_to, bool & is_first) const + void executeForArgument(const KeyType & key, const IDataType * type, const IColumn * column, typename ColumnVector::Container & vec_to, bool & is_first) const { /// Flattening of tuples. if (const ColumnTuple * tuple = typeid_cast(column)) @@ -1477,7 +1337,7 @@ private: const DataTypes & tuple_types = typeid_cast(*type).getElements(); size_t tuple_size = tuple_columns.size(); for (size_t i = 0; i < tuple_size; ++i) - executeForArgument(key_cols, tuple_types[i].get(), tuple_columns[i].get(), vec_to, is_first); + executeForArgument(key, tuple_types[i].get(), tuple_columns[i].get(), vec_to, is_first); } else if (const ColumnTuple * tuple_const = checkAndGetColumnConstData(column)) { @@ -1487,24 +1347,24 @@ private: for (size_t i = 0; i < tuple_size; ++i) { auto tmp = ColumnConst::create(tuple_columns[i], column->size()); - executeForArgument(key_cols, tuple_types[i].get(), tmp.get(), vec_to, is_first); + executeForArgument(key, tuple_types[i].get(), tmp.get(), vec_to, is_first); } } else if (const auto * map = checkAndGetColumn(column)) { const auto & type_map = assert_cast(*type); - executeForArgument(key_cols, type_map.getNestedType().get(), map->getNestedColumnPtr().get(), vec_to, is_first); + executeForArgument(key, type_map.getNestedType().get(), map->getNestedColumnPtr().get(), vec_to, is_first); } else if (const auto * const_map = checkAndGetColumnConst(column)) { - executeForArgument(key_cols, type, const_map->convertToFullColumnIfConst().get(), vec_to, is_first); + executeForArgument(key, type, const_map->convertToFullColumnIfConst().get(), vec_to, is_first); } else { if (is_first) - executeAny(key_cols, type, column, vec_to); + executeAny(key, type, column, vec_to); else - executeAny(key_cols, type, column, vec_to); + executeAny(key, type, column, vec_to); } is_first = false; @@ -1535,33 +1395,30 @@ public: { auto col_to = ColumnVector::create(input_rows_count); - if (input_rows_count != 0) + typename ColumnVector::Container & vec_to = col_to->getData(); + + /// If using a "keyed" algorithm, the first argument is the key and + /// the data starts from the second argument. + /// Otherwise there is no key and all arguments are interpreted as data. + constexpr size_t first_data_argument = Keyed; + + if (arguments.size() <= first_data_argument) { - typename ColumnVector::Container & vec_to = col_to->getData(); + /// Return a fixed random-looking magic number when input is empty + vec_to.assign(input_rows_count, static_cast(0xe28dbde7fe22e41c)); + } - /// If using a "keyed" algorithm, the first argument is the key and - /// the data starts from the second argument. - /// Otherwise there is no key and all arguments are interpreted as data. - constexpr size_t first_data_argument = Keyed; + KeyType key{}; + if constexpr (Keyed) + if (!arguments.empty()) + key = Impl::parseKey(arguments[0]); - if (arguments.size() <= first_data_argument) - { - /// Return a fixed random-looking magic number when input is empty - vec_to.assign(input_rows_count, static_cast(0xe28dbde7fe22e41c)); - } - - KeyColumnsType key_cols{}; - if constexpr (Keyed) - if (!arguments.empty()) - key_cols = Impl::parseKeyColumns(arguments[0]); - - /// The function supports arbitrary number of arguments of arbitrary types. - bool is_first_argument = true; - for (size_t i = first_data_argument; i < arguments.size(); ++i) - { - const auto & col = arguments[i]; - executeForArgument(key_cols, col.type.get(), col.column.get(), vec_to, is_first_argument); - } + /// The function supports arbitrary number of arguments of arbitrary types. + bool is_first_argument = true; + for (size_t i = first_data_argument; i < arguments.size(); ++i) + { + const auto & col = arguments[i]; + executeForArgument(key, col.type.get(), col.column.get(), vec_to, is_first_argument); } if constexpr (std::is_same_v) /// backward-compatible @@ -1593,19 +1450,17 @@ public: ) // DECLARE_MULTITARGET_CODE -template -class FunctionAnyHash : public TargetSpecific::Default::FunctionAnyHash +template +class FunctionAnyHash : public TargetSpecific::Default::FunctionAnyHash { public: explicit FunctionAnyHash(ContextPtr context) : selector(context) { - selector - .registerImplementation>(); + selector.registerImplementation>(); #if USE_MULTITARGET_CODE - selector.registerImplementation>(); - selector - .registerImplementation>(); + selector.registerImplementation>(); + selector.registerImplementation>(); #endif } @@ -1841,7 +1696,7 @@ struct NameIntHash32 { static constexpr auto name = "intHash32"; }; struct NameIntHash64 { static constexpr auto name = "intHash64"; }; using FunctionSipHash64 = FunctionAnyHash; -using FunctionSipHash64Keyed = FunctionAnyHash; +using FunctionSipHash64Keyed = FunctionAnyHash; using FunctionIntHash32 = FunctionIntHash; using FunctionIntHash64 = FunctionIntHash; #if USE_SSL @@ -1855,10 +1710,8 @@ using FunctionSHA384 = FunctionStringHashFixedString; using FunctionSHA512 = FunctionStringHashFixedString; #endif using FunctionSipHash128 = FunctionAnyHash; -using FunctionSipHash128Keyed = FunctionAnyHash; +using FunctionSipHash128Keyed = FunctionAnyHash; using FunctionSipHash128Reference = FunctionAnyHash; -using FunctionSipHash128ReferenceKeyed - = FunctionAnyHash; using FunctionCityHash64 = FunctionAnyHash; using FunctionFarmFingerprint64 = FunctionAnyHash; using FunctionFarmHash64 = FunctionAnyHash; diff --git a/src/Functions/FunctionsHashingMisc.cpp b/src/Functions/FunctionsHashingMisc.cpp index f56568b2508..56c3c1ed00c 100644 --- a/src/Functions/FunctionsHashingMisc.cpp +++ b/src/Functions/FunctionsHashingMisc.cpp @@ -20,11 +20,6 @@ REGISTER_FUNCTION(Hashing) .examples{{"hash", "SELECT hex(sipHash128Reference('foo', '\\x01', 3))", ""}}, .categories{"Hash"} }); - factory.registerFunction(FunctionDocumentation{ - .description = "Same as [sipHash128Reference](#hash_functions-siphash128reference) but additionally takes an explicit key argument " - "instead of using a fixed key.", - .examples{{"hash", "SELECT hex(sipHash128ReferenceKeyed((506097522914230528, 1084818905618843912),'foo', '\\x01', 3));", ""}}, - .categories{"Hash"}}); factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); diff --git a/tests/queries/0_stateless/02534_keyed_siphash.reference b/tests/queries/0_stateless/02534_keyed_siphash.reference index a9f724365a8..ccc514e7ea2 100644 --- a/tests/queries/0_stateless/02534_keyed_siphash.reference +++ b/tests/queries/0_stateless/02534_keyed_siphash.reference @@ -197,40 +197,3 @@ E28DBDE7FE22E41C Check bug with hashing of const integer values 11862823756610506724 11862823756610506724 -86AE90BB6A238D3F6221457630142C9B -86AE90BB6A238D3F6221457630142C9B -Check memsan bug -18096612095653370192 -20AF99D3A87829E0 -12489502208762728797 -Check const columns -15080046610211022027 -15080046610211022027 -15080046610211022027 -15080046610211022027 -2E779C73D13981AA1AE19AFF9617EA49 -2E779C73D13981AA1AE19AFF9617EA49 -2E779C73D13981AA1AE19AFF9617EA49 -2E779C73D13981AA1AE19AFF9617EA49 -Check multiple keys as tuple from a table -11862823756610506724 -9357996107237883963 -86AE90BB6A238D3F6221457630142C9B -F6D93D8FEA6D7DECCDD95A7A0A2AA36D -Check multiple keys as separate ints from a table -11862823756610506724 -9357996107237883963 -86AE90BB6A238D3F6221457630142C9B -F6D93D8FEA6D7DECCDD95A7A0A2AA36D -Check constant key and data from a table -11862823756610506724 -11862823756610506724 -86AE90BB6A238D3F6221457630142C9B -86AE90BB6A238D3F6221457630142C9B -Check multiple keys as separate ints from a table with constant data -11862823756610506724 -9357996107237883963 -86AE90BB6A238D3F6221457630142C9B -F6D93D8FEA6D7DECCDD95A7A0A2AA36D -Check asan bug -0 diff --git a/tests/queries/0_stateless/02534_keyed_siphash.sql b/tests/queries/0_stateless/02534_keyed_siphash.sql index 4f3ae7d62bd..900b99f548a 100644 --- a/tests/queries/0_stateless/02534_keyed_siphash.sql +++ b/tests/queries/0_stateless/02534_keyed_siphash.sql @@ -263,10 +263,10 @@ select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62)); select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63)); -select sipHash64Keyed((0, 0), '1'); -- { serverError NOT_IMPLEMENTED } -select sipHash128Keyed((0, 0), '1'); -- { serverError NOT_IMPLEMENTED } -select sipHash64Keyed(toUInt64(0), '1'); -- { serverError NOT_IMPLEMENTED } -select sipHash128Keyed(toUInt64(0), '1'); -- { serverError NOT_IMPLEMENTED } +select sipHash64Keyed((0, 0), '1'); -- { serverError 48 } +select sipHash128Keyed((0, 0), '1'); -- { serverError 48 } +select sipHash64Keyed(toUInt64(0), '1'); -- { serverError 48 } +select sipHash128Keyed(toUInt64(0), '1'); -- { serverError 48 } select hex(sipHash64()); SELECT hex(sipHash128()) = hex(reverse(unhex('1CE422FEE7BD8DE20000000000000000'))) or hex(sipHash128()) = '1CE422FEE7BD8DE20000000000000000'; @@ -280,57 +280,4 @@ INSERT INTO tab VALUES ((2, 2), 4); -- these two statements must produce the same result SELECT sipHash64Keyed(key, val) FROM tab; SELECT sipHash64Keyed(key, 4::UInt64) FROM tab; -SELECT hex(sipHash128Keyed(key, val)) FROM tab; -SELECT hex(sipHash128Keyed(key, 4::UInt64)) FROM tab; DROP TABLE tab; - -SELECT 'Check memsan bug'; -SELECT sipHash64Keyed((2::UInt64, toUInt64(2)), 4) GROUP BY toUInt64(2); -SELECT hex(sipHash64Keyed((toUInt64(9223372036854775806), toUInt64(-9223372036854775808)), char(2147483646, -2147483648, 1, 3, 4, 7, 2147483647))) GROUP BY toUInt64(257), (toUInt64(9223372036854775806), toUInt64(2147483646)); -SELECT sipHash64Keyed((toUInt64(9223372036854775806), 9223372036854775808::UInt64), char(2)) GROUP BY toUInt64(9223372036854775806); - -SELECT 'Check const columns'; -DROP TABLE IF EXISTS sipHashKeyed_test; -CREATE TABLE sipHashKeyed_test ENGINE = Memory() AS SELECT 1 a, 'test' b; -SELECT sipHash64Keyed((toUInt64(0), toUInt64(0)), 1, 'test'); -SELECT sipHash64(tuple(*)) FROM sipHashKeyed_test; -SELECT sipHash64Keyed((toUInt64(0), toUInt64(0)), tuple(*)) FROM sipHashKeyed_test; -SELECT sipHash64Keyed((toUInt64(0), toUInt64(0)), a, b) FROM sipHashKeyed_test; -SELECT hex(sipHash128Keyed((toUInt64(0), toUInt64(0)), 1, 'test')); -SELECT hex(sipHash128(tuple(*))) FROM sipHashKeyed_test; -SELECT hex(sipHash128Keyed((toUInt64(0), toUInt64(0)), tuple(*))) FROM sipHashKeyed_test; -SELECT hex(sipHash128Keyed((toUInt64(0), toUInt64(0)), a, b)) FROM sipHashKeyed_test; -DROP TABLE sipHashKeyed_test; - -SELECT 'Check multiple keys as tuple from a table'; -DROP TABLE IF EXISTS sipHashKeyed_keys; -CREATE TABLE sipHashKeyed_keys (key Tuple(UInt64, UInt64), val UInt64) ENGINE=Memory; -INSERT INTO sipHashKeyed_keys VALUES ((2, 2), 4); -INSERT INTO sipHashKeyed_keys VALUES ((4, 4), 4); -SELECT sipHash64Keyed(key, val) FROM sipHashKeyed_keys ORDER by key; -SELECT hex(sipHash128Keyed(key, val)) FROM sipHashKeyed_keys ORDER by key; -DROP TABLE sipHashKeyed_keys; - -SELECT 'Check multiple keys as separate ints from a table'; -DROP TABLE IF EXISTS sipHashKeyed_keys; -CREATE TABLE sipHashKeyed_keys (key0 UInt64, key1 UInt64, val UInt64) ENGINE=Memory; -INSERT INTO sipHashKeyed_keys VALUES (2, 2, 4); -INSERT INTO sipHashKeyed_keys VALUES (4, 4, 4); -SELECT sipHash64Keyed((key0, key1), val) FROM sipHashKeyed_keys ORDER by key0; -SELECT hex(sipHash128Keyed((key0, key1), val)) FROM sipHashKeyed_keys ORDER by key0; -SELECT 'Check constant key and data from a table'; -SELECT sipHash64Keyed((2::UInt64, 2::UInt64), val) FROM sipHashKeyed_keys ORDER by val; -SELECT hex(sipHash128Keyed((2::UInt64, 2::UInt64), val)) FROM sipHashKeyed_keys ORDER by val; -DROP TABLE sipHashKeyed_keys; - -SELECT 'Check multiple keys as separate ints from a table with constant data'; -DROP TABLE IF EXISTS sipHashKeyed_keys; -CREATE TABLE sipHashKeyed_keys (key0 UInt64, key1 UInt64) ENGINE=Memory; -INSERT INTO sipHashKeyed_keys VALUES (2, 2); -INSERT INTO sipHashKeyed_keys VALUES (4, 4); -SELECT sipHash64Keyed((key0, key1), 4::UInt64) FROM sipHashKeyed_keys ORDER by key0; -SELECT hex(sipHash128Keyed((key0, key1), 4::UInt64)) FROM sipHashKeyed_keys ORDER by key0; -DROP TABLE sipHashKeyed_keys; - -SELECT 'Check asan bug'; -SELECT sipHash128((toUInt64(9223372036854775806), 1)) = sipHash128(1) GROUP BY sipHash128(1::UInt8), toUInt64(9223372036854775806); diff --git a/tests/queries/0_stateless/02552_siphash128_reference.reference b/tests/queries/0_stateless/02552_siphash128_reference.reference index ece9f6a4615..d00491fd7e5 100644 --- a/tests/queries/0_stateless/02552_siphash128_reference.reference +++ b/tests/queries/0_stateless/02552_siphash128_reference.reference @@ -1,152 +1 @@ -A3817F04BA25A8E66DF67214C7550293 -DA87C1D86B99AF44347659119B22FC45 -8177228DA4A45DC7FCA38BDEF60AFFE4 -9C70B60C5267A94E5F33B6B02985ED51 -F88164C12D9C8FAF7D0F6E7C7BCD5579 -1368875980776F8854527A07690E9627 -14EECA338B208613485EA0308FD7A15E -A1F1EBBED8DBC153C0B84AA61FF08239 -3B62A9BA6258F5610F83E264F31497B4 -264499060AD9BAABC47F8B02BB6D71ED -00110DC378146956C95447D3F3D0FBBA -0151C568386B6677A2B4DC6F81E5DC18 -D626B266905EF35882634DF68532C125 -9869E247E9C08B10D029934FC4B952F7 -31FCEFAC66D7DE9C7EC7485FE4494902 -5493E99933B0A8117E08EC0F97CFC3D9 -6EE2A4CA67B054BBFD3315BF85230577 -473D06E8738DB89854C066C47AE47740 -A426E5E423BF4885294DA481FEAEF723 -78017731CF65FAB074D5208952512EB1 -9E25FC833F2290733E9344A5E83839EB -568E495ABE525A218A2214CD3E071D12 -4A29B54552D16B9A469C10528EFF0AAE -C9D184DDD5A9F5E0CF8CE29A9ABF691C -2DB479AE78BD50D8882A8A178A6132AD -8ECE5F042D5E447B5051B9EACB8D8F6F -9C0B53B4B3C307E87EAEE08678141F66 -ABF248AF69A6EAE4BFD3EB2F129EEB94 -0664DA1668574B88B935F3027358AEF4 -AA4B9DC4BF337DE90CD4FD3C467C6AB7 -EA5C7F471FAF6BDE2B1AD7D4686D2287 -2939B0183223FAFC1723DE4F52C43D35 -7C3956CA5EEAFC3E363E9D556546EB68 -77C6077146F01C32B6B69D5F4EA9FFCF -37A6986CB8847EDF0925F0F1309B54DE -A705F0E69DA9A8F907241A2E923C8CC8 -3DC47D1F29C448461E9E76ED904F6711 -0D62BF01E6FC0E1A0D3C4751C5D3692B -8C03468BCA7C669EE4FD5E084BBEE7B5 -528A5BB93BAF2C9C4473CCE5D0D22BD9 -DF6A301E95C95DAD97AE0CC8C6913BD8 -801189902C857F39E73591285E70B6DB -E617346AC9C231BB3650AE34CCCA0C5B -27D93437EFB721AA401821DCEC5ADF89 -89237D9DED9C5E78D8B1C9B166CC7342 -4A6D8091BF5E7D651189FA94A250B14C -0E33F96055E7AE893FFC0E3DCF492902 -E61C432B720B19D18EC8D84BDC63151B -F7E5AEF549F782CF379055A608269B16 -438D030FD0B7A54FA837F2AD201A6403 -A590D3EE4FBF04E3247E0D27F286423F -5FE2C1A172FE93C4B15CD37CAEF9F538 -2C97325CBD06B36EB2133DD08B3A017C -92C814227A6BCA949FF0659F002AD39E -DCE850110BD8328CFBD50841D6911D87 -67F14984C7DA791248E32BB5922583DA -1938F2CF72D54EE97E94166FA91D2A36 -74481E9646ED49FE0F6224301604698E -57FCA5DE98A9D6D8006438D0583D8A1D -9FECDE1CEFDC1CBED4763674D9575359 -E3040C00EB28F15366CA73CBD872E740 -7697009A6A831DFECCA91C5993670F7A -5853542321F567A005D547A4F04759BD -5150D1772F50834A503E069A973FBD7C 1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -Check bug with hashing of const integer values -E940B12600C844966162FF8FE7A16AAE -E940B12600C844966162FF8FE7A16AAE -Check memsan bug -1CE422FEE7BD8DE20000000000000000 -Check const columns -B66B53476BDBEB8549A257E3B1766C30 -B66B53476BDBEB8549A257E3B1766C30 -B66B53476BDBEB8549A257E3B1766C30 -B66B53476BDBEB8549A257E3B1766C30 -Check multiple keys as tuple from a table -E940B12600C844966162FF8FE7A16AAE -EC58946A98A0D37F4E3FAC02FBBA9480 -Check multiple keys as separate ints from a table -E940B12600C844966162FF8FE7A16AAE -EC58946A98A0D37F4E3FAC02FBBA9480 -Check constant key and data from a table -E940B12600C844966162FF8FE7A16AAE -E940B12600C844966162FF8FE7A16AAE -Check multiple keys as separate ints from a table with constant data -E940B12600C844966162FF8FE7A16AAE -EC58946A98A0D37F4E3FAC02FBBA9480 diff --git a/tests/queries/0_stateless/02552_siphash128_reference.sql b/tests/queries/0_stateless/02552_siphash128_reference.sql index f7324ed0ee4..200954c3b57 100644 --- a/tests/queries/0_stateless/02552_siphash128_reference.sql +++ b/tests/queries/0_stateless/02552_siphash128_reference.sql @@ -1,254 +1 @@ --- Test Vectors from the SipHash reference C implementation: --- Written by --- Jean-Philippe Aumasson --- Daniel J. Bernstein --- Released under CC0 --- https://github.com/veorq/SipHash/blob/eee7d0d84dc7731df2359b243aa5e75d85f6eaef/vectors.h#L645 - -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - '')); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61))); -select hex(sipHash128ReferenceKeyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), - char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62))); - --- CH tests -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0)) == sipHash128Reference(char(0)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1)) == sipHash128Reference(char(0, 1)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2)) == sipHash128Reference(char(0, 1, 2)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3)) == sipHash128Reference(char(0, 1, 2, 3)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4)) == sipHash128Reference(char(0, 1, 2, 3, 4)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62)); -select sipHash128ReferenceKeyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63)) == sipHash128Reference(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63)); - -select sipHash128ReferenceKeyed((0, 0), '1'); -- { serverError NOT_IMPLEMENTED } -select sipHash128ReferenceKeyed(toUInt64(0), '1'); -- { serverError NOT_IMPLEMENTED } - SELECT hex(sipHash128Reference()) = hex(reverse(unhex('1CE422FEE7BD8DE20000000000000000'))) or hex(sipHash128()) = '1CE422FEE7BD8DE20000000000000000'; -SELECT hex(sipHash128ReferenceKeyed()) = hex(reverse(unhex('1CE422FEE7BD8DE20000000000000000'))) or hex(sipHash128Keyed()) = '1CE422FEE7BD8DE20000000000000000'; - -SELECT 'Check bug with hashing of const integer values'; -DROP TABLE IF EXISTS tab; -CREATE TABLE tab (key Tuple(UInt64, UInt64), val UInt64) ENGINE=Memory; -INSERT INTO tab VALUES ((2, 2), 4); --- these two statements must produce the same result -SELECT hex(sipHash128ReferenceKeyed(key, val)) FROM tab; -SELECT hex(sipHash128ReferenceKeyed(key, 4::UInt64)) FROM tab; -DROP TABLE tab; - -SELECT 'Check memsan bug'; -SELECT hex(sipHash128ReferenceKeyed((toUInt64(2), toUInt64(-9223372036854775807)))) GROUP BY (toUInt64(506097522914230528), toUInt64(now64(2, NULL + NULL), 1084818905618843912)), toUInt64(2), NULL + NULL, char(-2147483649, 1); - -SELECT 'Check const columns'; -DROP TABLE IF EXISTS sipHashKeyed_test; -CREATE TABLE sipHashKeyed_test ENGINE = Memory() AS SELECT 1 a, 'test' b; -SELECT hex(sipHash128ReferenceKeyed((toUInt64(0), toUInt64(0)), 1, 'test')); -SELECT hex(sipHash128Reference(tuple(*))) FROM sipHashKeyed_test; -SELECT hex(sipHash128ReferenceKeyed((toUInt64(0), toUInt64(0)), tuple(*))) FROM sipHashKeyed_test; -SELECT hex(sipHash128ReferenceKeyed((toUInt64(0), toUInt64(0)), a, b)) FROM sipHashKeyed_test; -DROP TABLE sipHashKeyed_test; - -SELECT 'Check multiple keys as tuple from a table'; -DROP TABLE IF EXISTS sipHashKeyed_keys; -CREATE TABLE sipHashKeyed_keys (key Tuple(UInt64, UInt64), val UInt64) ENGINE=Memory; -INSERT INTO sipHashKeyed_keys VALUES ((2, 2), 4); -INSERT INTO sipHashKeyed_keys VALUES ((4, 4), 4); -SELECT hex(sipHash128ReferenceKeyed(key, val)) FROM sipHashKeyed_keys ORDER by key; -DROP TABLE sipHashKeyed_keys; - -SELECT 'Check multiple keys as separate ints from a table'; -DROP TABLE IF EXISTS sipHashKeyed_keys; -CREATE TABLE sipHashKeyed_keys (key0 UInt64, key1 UInt64, val UInt64) ENGINE=Memory; -INSERT INTO sipHashKeyed_keys VALUES (2, 2, 4); -INSERT INTO sipHashKeyed_keys VALUES (4, 4, 4); -SELECT hex(sipHash128ReferenceKeyed((key0, key1), val)) FROM sipHashKeyed_keys ORDER by key0; -SELECT 'Check constant key and data from a table'; -SELECT hex(sipHash128ReferenceKeyed((2::UInt64, 2::UInt64), val)) FROM sipHashKeyed_keys ORDER by val; -DROP TABLE sipHashKeyed_keys; - -SELECT 'Check multiple keys as separate ints from a table with constant data'; -DROP TABLE IF EXISTS sipHashKeyed_keys; -CREATE TABLE sipHashKeyed_keys (key0 UInt64, key1 UInt64) ENGINE=Memory; -INSERT INTO sipHashKeyed_keys VALUES (2, 2); -INSERT INTO sipHashKeyed_keys VALUES (4, 4); -SELECT hex(sipHash128ReferenceKeyed((key0, key1), 4::UInt64)) FROM sipHashKeyed_keys ORDER by key0; -DROP TABLE sipHashKeyed_keys; From 7cc3372355d06dfc1184b3ebcd6d2164d179b7be Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 22 Jul 2023 04:30:09 +0200 Subject: [PATCH 284/478] Fix terrible trash --- src/Functions/FunctionsHashing.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h index 82944630b10..090d38fa73d 100644 --- a/src/Functions/FunctionsHashing.h +++ b/src/Functions/FunctionsHashing.h @@ -1567,7 +1567,10 @@ public: if constexpr (std::is_same_v) /// backward-compatible { auto col_to_fixed_string = ColumnFixedString::create(sizeof(UInt128)); - col_to_fixed_string->getChars() = std::move(*reinterpret_cast(&col_to->getData())); + const auto & data = col_to->getData(); + auto & chars = col_to_fixed_string->getChars(); + chars.resize(data.size() * sizeof(UInt128)); + memcpy(chars.data(), data.data(), data.size() * sizeof(UInt128)); return col_to_fixed_string; } @@ -1601,12 +1604,12 @@ public: { selector .registerImplementation>(); - +/* #if USE_MULTITARGET_CODE selector.registerImplementation>(); selector .registerImplementation>(); -#endif +#endif*/ } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override From 4c2dabddb6d697ba3744e48e07e09aeaf8fc59d6 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 22 Jul 2023 04:31:38 +0200 Subject: [PATCH 285/478] Add a test --- tests/queries/0_stateless/02831_trash.reference | 2 ++ tests/queries/0_stateless/02831_trash.sql | 2 ++ 2 files changed, 4 insertions(+) create mode 100644 tests/queries/0_stateless/02831_trash.reference create mode 100644 tests/queries/0_stateless/02831_trash.sql diff --git a/tests/queries/0_stateless/02831_trash.reference b/tests/queries/0_stateless/02831_trash.reference new file mode 100644 index 00000000000..e25f2e9e23f --- /dev/null +++ b/tests/queries/0_stateless/02831_trash.reference @@ -0,0 +1,2 @@ +2761631236 +1210084689 diff --git a/tests/queries/0_stateless/02831_trash.sql b/tests/queries/0_stateless/02831_trash.sql new file mode 100644 index 00000000000..600e2ad0695 --- /dev/null +++ b/tests/queries/0_stateless/02831_trash.sql @@ -0,0 +1,2 @@ +SELECT CRC32IEEE(sipHash128()); +SELECT CRC32(murmurHash3_128()); From d2b178536e1e5b6d85c917d3d26bbe2cff7594ea Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 22 Jul 2023 04:38:16 +0200 Subject: [PATCH 286/478] Fix terrible trash --- src/Functions/FunctionsHashing.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h index 090d38fa73d..8f8715ec3f1 100644 --- a/src/Functions/FunctionsHashing.h +++ b/src/Functions/FunctionsHashing.h @@ -1604,12 +1604,12 @@ public: { selector .registerImplementation>(); -/* + #if USE_MULTITARGET_CODE selector.registerImplementation>(); selector .registerImplementation>(); -#endif*/ +#endif } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override From 4d0b75ebdd1bb69e155b237768c7db7a22cb09cc Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 22 Jul 2023 04:42:16 +0200 Subject: [PATCH 287/478] Remove hashid --- .gitmodules | 3 - contrib/CMakeLists.txt | 1 - contrib/hashidsxx | 1 - contrib/hashidsxx-cmake/CMakeLists.txt | 14 -- src/Core/Settings.h | 2 +- src/Functions/CMakeLists.txt | 1 - src/Functions/FunctionHashID.cpp | 12 -- src/Functions/FunctionHashID.h | 170 ------------------ .../0_stateless/02293_hashid.reference | 15 -- tests/queries/0_stateless/02293_hashid.sql | 16 -- ...new_functions_must_be_documented.reference | 1 - 11 files changed, 1 insertion(+), 235 deletions(-) delete mode 160000 contrib/hashidsxx delete mode 100644 contrib/hashidsxx-cmake/CMakeLists.txt delete mode 100644 src/Functions/FunctionHashID.cpp delete mode 100644 src/Functions/FunctionHashID.h delete mode 100644 tests/queries/0_stateless/02293_hashid.reference delete mode 100644 tests/queries/0_stateless/02293_hashid.sql diff --git a/.gitmodules b/.gitmodules index 151dc28c55b..ba71a8ae3a7 100644 --- a/.gitmodules +++ b/.gitmodules @@ -258,9 +258,6 @@ [submodule "contrib/wyhash"] path = contrib/wyhash url = https://github.com/wangyi-fudan/wyhash -[submodule "contrib/hashidsxx"] - path = contrib/hashidsxx - url = https://github.com/schoentoon/hashidsxx [submodule "contrib/nats-io"] path = contrib/nats-io url = https://github.com/ClickHouse/nats.c diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 2af468970f1..0f68c0cbc7c 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -164,7 +164,6 @@ add_contrib (libpq-cmake libpq) add_contrib (nuraft-cmake NuRaft) add_contrib (fast_float-cmake fast_float) add_contrib (datasketches-cpp-cmake datasketches-cpp) -add_contrib (hashidsxx-cmake hashidsxx) option(ENABLE_NLP "Enable NLP functions support" ${ENABLE_LIBRARIES}) if (ENABLE_NLP) diff --git a/contrib/hashidsxx b/contrib/hashidsxx deleted file mode 160000 index 783f6911ccf..00000000000 --- a/contrib/hashidsxx +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 783f6911ccfdaca83e3cfac084c4aad888a80cee diff --git a/contrib/hashidsxx-cmake/CMakeLists.txt b/contrib/hashidsxx-cmake/CMakeLists.txt deleted file mode 100644 index 17f3888bd94..00000000000 --- a/contrib/hashidsxx-cmake/CMakeLists.txt +++ /dev/null @@ -1,14 +0,0 @@ -set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/hashidsxx") - -set (SRCS - "${LIBRARY_DIR}/hashids.cpp" -) - -set (HDRS - "${LIBRARY_DIR}/hashids.h" -) - -add_library(_hashidsxx ${SRCS} ${HDRS}) -target_include_directories(_hashidsxx SYSTEM PUBLIC "${LIBRARY_DIR}") - -add_library(ch_contrib::hashidsxx ALIAS _hashidsxx) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index cfcb56729d2..bde51ae9971 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -761,7 +761,7 @@ class IColumn; /** Experimental functions */ \ M(Bool, allow_experimental_funnel_functions, false, "Enable experimental functions for funnel analysis.", 0) \ M(Bool, allow_experimental_nlp_functions, false, "Enable experimental functions for natural language processing.", 0) \ - M(Bool, allow_experimental_hash_functions, false, "Enable experimental hash functions (hashid, etc)", 0) \ + M(Bool, allow_experimental_hash_functions, false, "Enable experimental hash functions", 0) \ M(Bool, allow_experimental_object_type, false, "Allow Object and JSON data types", 0) \ M(Bool, allow_experimental_annoy_index, false, "Allows to use Annoy index. Disabled by default because this feature is experimental", 0) \ M(UInt64, max_limit_for_ann_queries, 1'000'000, "SELECT queries with LIMIT bigger than this setting cannot use ANN indexes. Helps to prevent memory overflows in ANN search indexes.", 0) \ diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt index 2f5c8a212f2..06436488050 100644 --- a/src/Functions/CMakeLists.txt +++ b/src/Functions/CMakeLists.txt @@ -21,7 +21,6 @@ list (APPEND PUBLIC_LIBS dbms ch_contrib::metrohash ch_contrib::murmurhash - ch_contrib::hashidsxx ch_contrib::morton_nd ) diff --git a/src/Functions/FunctionHashID.cpp b/src/Functions/FunctionHashID.cpp deleted file mode 100644 index 829b3d9d2f6..00000000000 --- a/src/Functions/FunctionHashID.cpp +++ /dev/null @@ -1,12 +0,0 @@ -#include "FunctionHashID.h" -#include - -namespace DB -{ - -REGISTER_FUNCTION(HashID) -{ - factory.registerFunction(); -} - -} diff --git a/src/Functions/FunctionHashID.h b/src/Functions/FunctionHashID.h deleted file mode 100644 index 680c3f6430b..00000000000 --- a/src/Functions/FunctionHashID.h +++ /dev/null @@ -1,170 +0,0 @@ -#pragma once - -#include "config.h" - -#include - -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; - extern const int ILLEGAL_COLUMN; - extern const int ILLEGAL_TYPE_OF_ARGUMENT; - extern const int SUPPORT_IS_DISABLED; - extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION; - extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; -} - -// hashid(string, salt) -class FunctionHashID : public IFunction -{ -public: - static constexpr auto name = "hashid"; - - static FunctionPtr create(ContextPtr context) - { - if (!context->getSettingsRef().allow_experimental_hash_functions) - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, - "Hashing function '{}' is experimental. Set `allow_experimental_hash_functions` setting to enable it", name); - - return std::make_shared(); - } - - String getName() const override { return name; } - - size_t getNumberOfArguments() const override { return 0; } - - bool isVariadic() const override { return true; } - - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } - - bool useDefaultImplementationForConstants() const override { return true; } - ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2, 3}; } - - DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override - { - if (arguments.empty()) - throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, "Function {} expects at least one argument", getName()); - - const auto & id_col = arguments[0]; - if (!isUnsignedInteger(id_col.type)) - throw Exception( - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "First argument of function {} must be unsigned integer, got {}", - getName(), - arguments[0].type->getName()); - - if (arguments.size() > 1) - { - const auto & hash_col = arguments[1]; - if (!isString(hash_col.type)) - throw Exception( - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Second argument of function {} must be String, got {}", - getName(), - arguments[1].type->getName()); - } - - if (arguments.size() > 2) - { - const auto & min_length_col = arguments[2]; - if (!isUInt8(min_length_col.type)) - throw Exception( - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Third argument of function {} must be UInt8, got {}", - getName(), - arguments[2].type->getName()); - } - - if (arguments.size() > 3) - { - const auto & alphabet_col = arguments[3]; - if (!isString(alphabet_col.type)) - throw Exception( - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Fourth argument of function {} must be String, got {}", - getName(), - arguments[3].type->getName()); - } - - if (arguments.size() > 4) - { - throw Exception( - ErrorCodes::TOO_MANY_ARGUMENTS_FOR_FUNCTION, - "Function {} expect no more than four arguments (integer, salt, min_length, optional_alphabet), got {}", - getName(), - arguments.size()); - } - - return std::make_shared(); - } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override - { - const auto & numcolumn = arguments[0].column; - - if (checkAndGetColumn(numcolumn.get()) || checkAndGetColumn(numcolumn.get()) - || checkAndGetColumn(numcolumn.get()) || checkAndGetColumn(numcolumn.get())) - { - std::string salt; - UInt8 min_length = 0; - std::string alphabet; - - if (arguments.size() >= 4) - { - const auto & alphabetcolumn = arguments[3].column; - if (const auto * alpha_col = checkAndGetColumnConst(alphabetcolumn.get())) - { - alphabet = alpha_col->getValue(); - if (alphabet.find('\0') != std::string::npos) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Custom alphabet must not contain null character"); - } - } - else - alphabet.assign(DEFAULT_ALPHABET); - - if (arguments.size() >= 3) - { - const auto & minlengthcolumn = arguments[2].column; - if (const auto * min_length_col = checkAndGetColumnConst(minlengthcolumn.get())) - min_length = min_length_col->getValue(); - } - - if (arguments.size() >= 2) - { - const auto & saltcolumn = arguments[1].column; - if (const auto * salt_col = checkAndGetColumnConst(saltcolumn.get())) - salt = salt_col->getValue(); - } - - hashidsxx::Hashids hash(salt, min_length, alphabet); - - auto col_res = ColumnString::create(); - - for (size_t i = 0; i < input_rows_count; ++i) - { - col_res->insert(hash.encode({numcolumn->getUInt(i)})); - } - - return col_res; - } - else - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function hashid", - arguments[0].column->getName()); - } -}; - -} diff --git a/tests/queries/0_stateless/02293_hashid.reference b/tests/queries/0_stateless/02293_hashid.reference deleted file mode 100644 index dfc78349c05..00000000000 --- a/tests/queries/0_stateless/02293_hashid.reference +++ /dev/null @@ -1,15 +0,0 @@ -0 gY -1 jR -2 k5 -3 l5 -4 mO -0 pbgkmdljlpjoapne -1 akemglnjepjpodba -2 obmgndljgajpkeao -3 dldokmpjpgjgeanb -4 nkdlpgajngjnobme -YQrvD5XGvbx -Bm3zaOq7zbp -oV -oV -6b diff --git a/tests/queries/0_stateless/02293_hashid.sql b/tests/queries/0_stateless/02293_hashid.sql deleted file mode 100644 index 06af0b5e1d8..00000000000 --- a/tests/queries/0_stateless/02293_hashid.sql +++ /dev/null @@ -1,16 +0,0 @@ --- Tags: no-upgrade-check -SET allow_experimental_hash_functions = 1; - -select number, hashid(number) from system.numbers limit 5; -select number, hashid(number, 's3cr3t', 16, 'abcdefghijklmnop') from system.numbers limit 5; -select hashid(1234567890123456, 's3cr3t'); -select hashid(1234567890123456, 's3cr3t2'); - -SELECT hashid(1, hashid(2)); -SELECT hashid(1, 'k5'); -SELECT hashid(1, 'k5_othersalt'); - --- https://github.com/ClickHouse/ClickHouse/issues/39672 -SELECT - JSONExtractRaw(257, NULL), - hashid(1024, if(rand() % 10, 'truetruetruetrue', NULL), 's3\0r3t'); -- {serverError 43} diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference index fc00bfdadca..595ebb483d5 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference @@ -346,7 +346,6 @@ hasAny hasColumnInTable hasSubstr hasThreadFuzzer -hashid hex hiveHash hop From 5f4756fb33f754913f4ab8ddfa84c39739920f19 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 22 Jul 2023 04:54:58 +0200 Subject: [PATCH 288/478] Remove toDecimalString --- .../functions/type-conversion-functions.md | 38 --- .../functions/type-conversion-functions.md | 38 --- src/Functions/FunctionToDecimalString.cpp | 22 -- src/Functions/FunctionToDecimalString.h | 312 ------------------ src/IO/WriteHelpers.h | 44 +-- .../02676_to_decimal_string.reference | 21 -- .../0_stateless/02676_to_decimal_string.sql | 35 -- 7 files changed, 13 insertions(+), 497 deletions(-) delete mode 100644 src/Functions/FunctionToDecimalString.cpp delete mode 100644 src/Functions/FunctionToDecimalString.h delete mode 100644 tests/queries/0_stateless/02676_to_decimal_string.reference delete mode 100644 tests/queries/0_stateless/02676_to_decimal_string.sql diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index 36f40b37238..c2bd525c483 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -945,44 +945,6 @@ Result: └────────────┴───────┘ ``` -## toDecimalString - -Converts a numeric value to String with the number of fractional digits in the output specified by the user. - -**Syntax** - -``` sql -toDecimalString(number, scale) -``` - -**Parameters** - -- `number` — Value to be represented as String, [Int, UInt](/docs/en/sql-reference/data-types/int-uint.md), [Float](/docs/en/sql-reference/data-types/float.md), [Decimal](/docs/en/sql-reference/data-types/decimal.md), -- `scale` — Number of fractional digits, [UInt8](/docs/en/sql-reference/data-types/int-uint.md). - * Maximum scale for [Decimal](/docs/en/sql-reference/data-types/decimal.md) and [Int, UInt](/docs/en/sql-reference/data-types/int-uint.md) types is 77 (it is the maximum possible number of significant digits for Decimal), - * Maximum scale for [Float](/docs/en/sql-reference/data-types/float.md) is 60. - -**Returned value** - -- Input value represented as [String](/docs/en/sql-reference/data-types/string.md) with given number of fractional digits (scale). - The number is rounded up or down according to common arithmetic in case requested scale is smaller than original number's scale. - -**Example** - -Query: - -``` sql -SELECT toDecimalString(CAST('64.32', 'Float64'), 5); -``` - -Result: - -```response -┌toDecimalString(CAST('64.32', 'Float64'), 5)─┐ -│ 64.32000 │ -└─────────────────────────────────────────────┘ -``` - ## reinterpretAsUInt(8\|16\|32\|64) ## reinterpretAsInt(8\|16\|32\|64) diff --git a/docs/ru/sql-reference/functions/type-conversion-functions.md b/docs/ru/sql-reference/functions/type-conversion-functions.md index e53104d8d71..088b1a9a1f1 100644 --- a/docs/ru/sql-reference/functions/type-conversion-functions.md +++ b/docs/ru/sql-reference/functions/type-conversion-functions.md @@ -762,44 +762,6 @@ SELECT toFixedString('foo\0bar', 8) AS s, toStringCutToZero(s) AS s_cut; └────────────┴───────┘ ``` -## toDecimalString - -Принимает любой численный тип первым аргументом, возвращает строковое десятичное представление числа с точностью, заданной вторым аргументом. - -**Синтаксис** - -``` sql -toDecimalString(number, scale) -``` - -**Параметры** - -- `number` — Значение любого числового типа: [Int, UInt](/docs/ru/sql-reference/data-types/int-uint.md), [Float](/docs/ru/sql-reference/data-types/float.md), [Decimal](/docs/ru/sql-reference/data-types/decimal.md), -- `scale` — Требуемое количество десятичных знаков после запятой, [UInt8](/docs/ru/sql-reference/data-types/int-uint.md). - * Значение `scale` для типов [Decimal](/docs/ru/sql-reference/data-types/decimal.md) и [Int, UInt](/docs/ru/sql-reference/data-types/int-uint.md) должно не превышать 77 (так как это наибольшее количество значимых символов для этих типов), - * Значение `scale` для типа [Float](/docs/ru/sql-reference/data-types/float.md) не должно превышать 60. - -**Возвращаемое значение** - -- Строка ([String](/docs/en/sql-reference/data-types/string.md)), представляющая собой десятичное представление входного числа с заданной длиной дробной части. - При необходимости число округляется по стандартным правилам арифметики. - -**Пример использования** - -Запрос: - -``` sql -SELECT toDecimalString(CAST('64.32', 'Float64'), 5); -``` - -Результат: - -```response -┌─toDecimalString(CAST('64.32', 'Float64'), 5)┐ -│ 64.32000 │ -└─────────────────────────────────────────────┘ -``` - ## reinterpretAsUInt(8\|16\|32\|64) {#reinterpretasuint8163264} ## reinterpretAsInt(8\|16\|32\|64) {#reinterpretasint8163264} diff --git a/src/Functions/FunctionToDecimalString.cpp b/src/Functions/FunctionToDecimalString.cpp deleted file mode 100644 index fe417b19137..00000000000 --- a/src/Functions/FunctionToDecimalString.cpp +++ /dev/null @@ -1,22 +0,0 @@ -#include -#include -#include - -namespace DB -{ - -REGISTER_FUNCTION(ToDecimalString) -{ - factory.registerFunction( - FunctionDocumentation{ - .description=R"( -Returns string representation of a number. First argument is the number of any numeric type, -second argument is the desired number of digits in fractional part. Returns String. - - )", - .examples{{"toDecimalString", "SELECT toDecimalString(2.1456,2)", ""}}, - .categories{"String"} - }, FunctionFactory::CaseInsensitive); -} - -} diff --git a/src/Functions/FunctionToDecimalString.h b/src/Functions/FunctionToDecimalString.h deleted file mode 100644 index 6ae007e6b66..00000000000 --- a/src/Functions/FunctionToDecimalString.h +++ /dev/null @@ -1,312 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int ILLEGAL_TYPE_OF_ARGUMENT; - extern const int ILLEGAL_COLUMN; - extern const int CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER; -} - -class FunctionToDecimalString : public IFunction -{ -public: - static constexpr auto name = "toDecimalString"; - static FunctionPtr create(ContextPtr) { return std::make_shared(); } - - String getName() const override { return name; } - - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } - - size_t getNumberOfArguments() const override { return 2; } - - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - if (!isNumber(*arguments[0])) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Illegal first argument for formatDecimal function: got {}, expected numeric type", - arguments[0]->getName()); - - if (!isUInt8(*arguments[1])) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Illegal second argument for formatDecimal function: got {}, expected UInt8", - arguments[1]->getName()); - - return std::make_shared(); - } - - bool useDefaultImplementationForConstants() const override { return true; } - -private: - /// For operations with Integer/Float - template - void vectorConstant(const FromVectorType & vec_from, UInt8 precision, - ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets) const - { - size_t input_rows_count = vec_from.size(); - result_offsets.resize(input_rows_count); - - /// Buffer is used here and in functions below because resulting size cannot be precisely anticipated, - /// and buffer resizes on-the-go. Also, .count() provided by buffer is convenient in this case. - WriteBufferFromVector buf_to(vec_to); - - for (size_t i = 0; i < input_rows_count; ++i) - { - format(vec_from[i], buf_to, precision); - result_offsets[i] = buf_to.count(); - } - - buf_to.finalize(); - } - - template - void vectorVector(const FirstArgVectorType & vec_from, const ColumnVector::Container & vec_precision, - ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets) const - { - size_t input_rows_count = vec_from.size(); - result_offsets.resize(input_rows_count); - - WriteBufferFromVector buf_to(vec_to); - - constexpr size_t max_digits = std::numeric_limits::digits10; - - for (size_t i = 0; i < input_rows_count; ++i) - { - if (vec_precision[i] > max_digits) - throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER, - "Too many fractional digits requested, shall not be more than {}", max_digits); - format(vec_from[i], buf_to, vec_precision[i]); - result_offsets[i] = buf_to.count(); - } - - buf_to.finalize(); - } - - template - void constantVector(const FirstArgType & value_from, const ColumnVector::Container & vec_precision, - ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets) const - { - size_t input_rows_count = vec_precision.size(); - result_offsets.resize(input_rows_count); - - WriteBufferFromVector buf_to(vec_to); - - constexpr size_t max_digits = std::numeric_limits::digits10; - - for (size_t i = 0; i < input_rows_count; ++i) - { - if (vec_precision[i] > max_digits) - throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER, - "Too many fractional digits requested, shall not be more than {}", max_digits); - format(value_from, buf_to, vec_precision[i]); - result_offsets[i] = buf_to.count(); - } - - buf_to.finalize(); - } - - /// For operations with Decimal - template - void vectorConstant(const FirstArgVectorType & vec_from, UInt8 precision, - ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets, UInt8 from_scale) const - { - /// There are no more than 77 meaning digits (as it is the max length of UInt256). So we can limit it with 77. - constexpr size_t max_digits = std::numeric_limits::digits10; - if (precision > max_digits) - throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER, - "Too many fractional digits requested for Decimal, must not be more than {}", max_digits); - - WriteBufferFromVector buf_to(vec_to); - size_t input_rows_count = vec_from.size(); - result_offsets.resize(input_rows_count); - - for (size_t i = 0; i < input_rows_count; ++i) - { - writeText(vec_from[i], from_scale, buf_to, true, true, precision); - writeChar(0, buf_to); - result_offsets[i] = buf_to.count(); - } - buf_to.finalize(); - } - - template - void vectorVector(const FirstArgVectorType & vec_from, const ColumnVector::Container & vec_precision, - ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets, UInt8 from_scale) const - { - size_t input_rows_count = vec_from.size(); - result_offsets.resize(input_rows_count); - - WriteBufferFromVector buf_to(vec_to); - - constexpr size_t max_digits = std::numeric_limits::digits10; - - for (size_t i = 0; i < input_rows_count; ++i) - { - if (vec_precision[i] > max_digits) - throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER, - "Too many fractional digits requested for Decimal, must not be more than {}", max_digits); - writeText(vec_from[i], from_scale, buf_to, true, true, vec_precision[i]); - writeChar(0, buf_to); - result_offsets[i] = buf_to.count(); - } - buf_to.finalize(); - } - - template - void constantVector(const FirstArgType & value_from, const ColumnVector::Container & vec_precision, - ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets, UInt8 from_scale) const - { - size_t input_rows_count = vec_precision.size(); - result_offsets.resize(input_rows_count); - - WriteBufferFromVector buf_to(vec_to); - - constexpr size_t max_digits = std::numeric_limits::digits10; - - for (size_t i = 0; i < input_rows_count; ++i) - { - if (vec_precision[i] > max_digits) - throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER, - "Too many fractional digits requested for Decimal, must not be more than {}", max_digits); - writeText(value_from, from_scale, buf_to, true, true, vec_precision[i]); - writeChar(0, buf_to); - result_offsets[i] = buf_to.count(); - } - buf_to.finalize(); - } - - template - static void format(T value, DB::WriteBuffer & out, UInt8 precision) - { - /// Maximum of 60 is hard-coded in 'double-conversion/double-conversion.h' for floating point values, - /// Catch this here to give user a more reasonable error. - if (precision > 60) - throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER, - "Too high precision requested for Float, must not be more than 60, got {}", Int8(precision)); - - DB::DoubleConverter::BufferType buffer; - double_conversion::StringBuilder builder{buffer, sizeof(buffer)}; - - const auto result = DB::DoubleConverter::instance().ToFixed(value, precision, &builder); - - if (!result) - throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER, "Error processing number: {}", value); - - out.write(buffer, builder.position()); - writeChar(0, out); - } - - template - static void format(T value, DB::WriteBuffer & out, UInt8 precision) - { - /// Fractional part for Integer is just trailing zeros. Let's limit it with 77 (like with Decimals). - constexpr size_t max_digits = std::numeric_limits::digits10; - if (precision > max_digits) - throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER, - "Too many fractional digits requested, shall not be more than {}", max_digits); - writeText(value, out); - if (precision > 0) [[likely]] - { - writeChar('.', out); - for (int i = 0; i < precision; ++i) - writeChar('0', out); - writeChar(0, out); - } - } - -public: - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override - { - switch (arguments[0].type->getTypeId()) - { - case TypeIndex::UInt8: return executeType(arguments); - case TypeIndex::UInt16: return executeType(arguments); - case TypeIndex::UInt32: return executeType(arguments); - case TypeIndex::UInt64: return executeType(arguments); - case TypeIndex::UInt128: return executeType(arguments); - case TypeIndex::UInt256: return executeType(arguments); - case TypeIndex::Int8: return executeType(arguments); - case TypeIndex::Int16: return executeType(arguments); - case TypeIndex::Int32: return executeType(arguments); - case TypeIndex::Int64: return executeType(arguments); - case TypeIndex::Int128: return executeType(arguments); - case TypeIndex::Int256: return executeType(arguments); - case TypeIndex::Float32: return executeType(arguments); - case TypeIndex::Float64: return executeType(arguments); - case TypeIndex::Decimal32: return executeType(arguments); - case TypeIndex::Decimal64: return executeType(arguments); - case TypeIndex::Decimal128: return executeType(arguments); - case TypeIndex::Decimal256: return executeType(arguments); - default: - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", - arguments[0].column->getName(), getName()); - } - } - -private: - template - ColumnPtr executeType(const ColumnsWithTypeAndName & arguments) const - { - const auto * from_col_const = typeid_cast(arguments[0].column.get()); - const auto * precision_col = checkAndGetColumn>(arguments[1].column.get()); - const auto * precision_col_const = typeid_cast(arguments[1].column.get()); - - auto result_col = ColumnString::create(); - auto * result_col_string = assert_cast(result_col.get()); - ColumnString::Chars & result_chars = result_col_string->getChars(); - ColumnString::Offsets & result_offsets = result_col_string->getOffsets(); - - if constexpr (is_decimal) - { - const auto * from_col = checkAndGetColumn>(arguments[0].column.get()); - UInt8 from_scale = from_col->getScale(); - - if (from_col) - { - if (precision_col_const) - vectorConstant(from_col->getData(), precision_col_const->template getValue(), result_chars, result_offsets, from_scale); - else - vectorVector(from_col->getData(), precision_col->getData(), result_chars, result_offsets, from_scale); - } - else if (from_col_const) - constantVector(from_col_const->template getValue(), precision_col->getData(), result_chars, result_offsets, from_scale); - else - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function formatDecimal", arguments[0].column->getName()); - } - else - { - const auto * from_col = checkAndGetColumn>(arguments[0].column.get()); - if (from_col) - { - if (precision_col_const) - vectorConstant(from_col->getData(), precision_col_const->template getValue(), result_chars, result_offsets); - else - vectorVector(from_col->getData(), precision_col->getData(), result_chars, result_offsets); - } - else if (from_col_const) - constantVector(from_col_const->template getValue(), precision_col->getData(), result_chars, result_offsets); - else - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function formatDecimal", arguments[0].column->getName()); - } - - return result_col; - } -}; - -} diff --git a/src/IO/WriteHelpers.h b/src/IO/WriteHelpers.h index aa4c9b17e48..0494cdf22e7 100644 --- a/src/IO/WriteHelpers.h +++ b/src/IO/WriteHelpers.h @@ -905,26 +905,26 @@ inline void writeText(const IPv4 & x, WriteBuffer & buf) { writeIPv4Text(x, buf) inline void writeText(const IPv6 & x, WriteBuffer & buf) { writeIPv6Text(x, buf); } template -void writeDecimalFractional(const T & x, UInt32 scale, WriteBuffer & ostr, bool trailing_zeros, - bool fixed_fractional_length, UInt32 fractional_length) +void writeDecimalFractional(const T & x, UInt32 scale, WriteBuffer & ostr, bool trailing_zeros) { /// If it's big integer, but the number of digits is small, /// use the implementation for smaller integers for more efficient arithmetic. + if constexpr (std::is_same_v) { if (x <= std::numeric_limits::max()) { - writeDecimalFractional(static_cast(x), scale, ostr, trailing_zeros, fixed_fractional_length, fractional_length); + writeDecimalFractional(static_cast(x), scale, ostr, trailing_zeros); return; } else if (x <= std::numeric_limits::max()) { - writeDecimalFractional(static_cast(x), scale, ostr, trailing_zeros, fixed_fractional_length, fractional_length); + writeDecimalFractional(static_cast(x), scale, ostr, trailing_zeros); return; } else if (x <= std::numeric_limits::max()) { - writeDecimalFractional(static_cast(x), scale, ostr, trailing_zeros, fixed_fractional_length, fractional_length); + writeDecimalFractional(static_cast(x), scale, ostr, trailing_zeros); return; } } @@ -932,53 +932,35 @@ void writeDecimalFractional(const T & x, UInt32 scale, WriteBuffer & ostr, bool { if (x <= std::numeric_limits::max()) { - writeDecimalFractional(static_cast(x), scale, ostr, trailing_zeros, fixed_fractional_length, fractional_length); + writeDecimalFractional(static_cast(x), scale, ostr, trailing_zeros); return; } else if (x <= std::numeric_limits::max()) { - writeDecimalFractional(static_cast(x), scale, ostr, trailing_zeros, fixed_fractional_length, fractional_length); + writeDecimalFractional(static_cast(x), scale, ostr, trailing_zeros); return; } } constexpr size_t max_digits = std::numeric_limits::digits10; assert(scale <= max_digits); - assert(fractional_length <= max_digits); - char buf[max_digits]; - memset(buf, '0', std::max(scale, fractional_length)); + memset(buf, '0', scale); T value = x; Int32 last_nonzero_pos = 0; - - if (fixed_fractional_length && fractional_length < scale) - { - T new_value = value / DecimalUtils::scaleMultiplier(scale - fractional_length - 1); - auto round_carry = new_value % 10; - value = new_value / 10; - if (round_carry >= 5) - value += 1; - } - - for (Int32 pos = fixed_fractional_length ? std::min(scale - 1, fractional_length - 1) : scale - 1; pos >= 0; --pos) + for (Int32 pos = scale - 1; pos >= 0; --pos) { auto remainder = value % 10; value /= 10; - - if (remainder != 0 && last_nonzero_pos == 0) - last_nonzero_pos = pos; - - buf[pos] += static_cast(remainder); } writeChar('.', ostr); - ostr.write(buf, fixed_fractional_length ? fractional_length : (trailing_zeros ? scale : last_nonzero_pos + 1)); + ostr.write(buf, trailing_zeros ? scale : last_nonzero_pos + 1); } template -void writeText(Decimal x, UInt32 scale, WriteBuffer & ostr, bool trailing_zeros, - bool fixed_fractional_length = false, UInt32 fractional_length = 0) +void writeText(Decimal x, UInt32 scale, WriteBuffer & ostr, bool trailing_zeros) { T part = DecimalUtils::getWholePart(x, scale); @@ -989,7 +971,7 @@ void writeText(Decimal x, UInt32 scale, WriteBuffer & ostr, bool trailing_zer writeIntText(part, ostr); - if (scale || (fixed_fractional_length && fractional_length > 0)) + if (scale) { part = DecimalUtils::getFractionalPart(x, scale); if (part || trailing_zeros) @@ -997,7 +979,7 @@ void writeText(Decimal x, UInt32 scale, WriteBuffer & ostr, bool trailing_zer if (part < 0) part *= T(-1); - writeDecimalFractional(part, scale, ostr, trailing_zeros, fixed_fractional_length, fractional_length); + writeDecimalFractional(part, scale, ostr, trailing_zeros); } } } diff --git a/tests/queries/0_stateless/02676_to_decimal_string.reference b/tests/queries/0_stateless/02676_to_decimal_string.reference deleted file mode 100644 index 4c27ee5b528..00000000000 --- a/tests/queries/0_stateless/02676_to_decimal_string.reference +++ /dev/null @@ -1,21 +0,0 @@ -2.00000000000000000000000000000000000000000000000000000000000000000000000000000 -2.12 --2.00000000000000000000000000000000000000000000000000000000000000000000000000000 --2.12 -2.987600000000000033395508580724708735942840576171875000000000 -2.15 --2.987600000000000033395508580724708735942840576171875000000000 --2.15 -64.1230010986 -64.2340000000 --64.1230010986 --64.2340000000 --32.345 -32.34500000000000000000000000000000000000000000000000000000000000000000000000000 -32.46 --64.5671232345 -128.78932312332132985464 --128.78932312332132985464 -128.78932312332132985464000000000000000000000000000000000000000000000000000000000 -128.7893231233 --128.78932312332132985464123123789323123321329854600000000000000000000000000000000 diff --git a/tests/queries/0_stateless/02676_to_decimal_string.sql b/tests/queries/0_stateless/02676_to_decimal_string.sql deleted file mode 100644 index 563d60c62c7..00000000000 --- a/tests/queries/0_stateless/02676_to_decimal_string.sql +++ /dev/null @@ -1,35 +0,0 @@ --- Regular types -SELECT toDecimalString(2, 77); -- more digits required than exist -SELECT toDecimalString(2.123456, 2); -- rounding -SELECT toDecimalString(-2, 77); -- more digits required than exist -SELECT toDecimalString(-2.123456, 2); -- rounding - -SELECT toDecimalString(2.9876, 60); -- more digits required than exist (took 60 as it is float by default) -SELECT toDecimalString(2.1456, 2); -- rounding -SELECT toDecimalString(-2.9876, 60); -- more digits required than exist -SELECT toDecimalString(-2.1456, 2); -- rounding - --- Float32 and Float64 tests. No sense to test big float precision -- the result will be a mess anyway. -SELECT toDecimalString(64.123::Float32, 10); -SELECT toDecimalString(64.234::Float64, 10); -SELECT toDecimalString(-64.123::Float32, 10); -SELECT toDecimalString(-64.234::Float64, 10); - --- Decimals -SELECT toDecimalString(-32.345::Decimal32(3), 3); -SELECT toDecimalString(32.345::Decimal32(3), 77); -- more digits required than exist -SELECT toDecimalString(32.456::Decimal32(3), 2); -- rounding -SELECT toDecimalString('-64.5671232345'::Decimal64(10), 10); -SELECT toDecimalString('128.78932312332132985464'::Decimal128(20), 20); -SELECT toDecimalString('-128.78932312332132985464123123'::Decimal128(26), 20); -- rounding -SELECT toDecimalString('128.78932312332132985464'::Decimal128(20), 77); -- more digits required than exist -SELECT toDecimalString('128.789323123321329854641231237893231233213298546'::Decimal256(45), 10); -- rounding -SELECT toDecimalString('-128.789323123321329854641231237893231233213298546'::Decimal256(45), 77); -- more digits required than exist - --- Max number of decimal fractional digits is defined as 77 for Int/UInt/Decimal and 60 for Float. --- These values shall work OK. -SELECT toDecimalString('32.32'::Float32, 61); -- {serverError CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER} -SELECT toDecimalString('64.64'::Float64, 61); -- {serverError CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER} -SELECT toDecimalString('88'::UInt8, 78); -- {serverError CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER} -SELECT toDecimalString('646464'::Int256, 78); -- {serverError CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER} -SELECT toDecimalString('-128.789323123321329854641231237893231233213298546'::Decimal256(45), 78); -- {serverError CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER} From cda42e6dd4eaa56822ad64aad7aa09f632547d93 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 22 Jul 2023 05:05:51 +0200 Subject: [PATCH 289/478] Add a test --- .../queries/0_stateless/02831_regexp_analyze_recursion.reference | 0 tests/queries/0_stateless/02831_regexp_analyze_recursion.sql | 1 + 2 files changed, 1 insertion(+) create mode 100644 tests/queries/0_stateless/02831_regexp_analyze_recursion.reference create mode 100644 tests/queries/0_stateless/02831_regexp_analyze_recursion.sql diff --git a/tests/queries/0_stateless/02831_regexp_analyze_recursion.reference b/tests/queries/0_stateless/02831_regexp_analyze_recursion.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02831_regexp_analyze_recursion.sql b/tests/queries/0_stateless/02831_regexp_analyze_recursion.sql new file mode 100644 index 00000000000..018d1f031e6 --- /dev/null +++ b/tests/queries/0_stateless/02831_regexp_analyze_recursion.sql @@ -0,0 +1 @@ +SELECT match('', repeat('(', 100000)); -- { serverError 306 } From 21ffce0ff20fc7f136d8d5b05369a1abcdc01be3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 22 Jul 2023 05:06:02 +0200 Subject: [PATCH 290/478] Check regular expression depth --- src/Common/OptimizedRegularExpression.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Common/OptimizedRegularExpression.cpp b/src/Common/OptimizedRegularExpression.cpp index c542945c78d..0b80e2f3f97 100644 --- a/src/Common/OptimizedRegularExpression.cpp +++ b/src/Common/OptimizedRegularExpression.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #define MIN_LENGTH_FOR_STRSTR 3 @@ -50,6 +51,8 @@ const char * analyzeImpl( bool & is_trivial, Literals & global_alternatives) { + checkStackSize(); + /** The expression is trivial if all the metacharacters in it are escaped. * The non-alternative string is * a string outside parentheses, From de2016261ef32878456de9efae5cfab748611853 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 22 Jul 2023 05:08:27 +0200 Subject: [PATCH 291/478] Get rid of it --- docker/test/fasttest/run.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index 828c73e6781..e25b5fdbfed 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -141,7 +141,6 @@ function clone_submodules contrib/jemalloc contrib/replxx contrib/wyhash - contrib/hashidsxx contrib/c-ares contrib/morton-nd contrib/xxHash From 20625d75ab52319b8e67e50d2df803d0e2dc0934 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 15 Jun 2023 14:08:43 +0200 Subject: [PATCH 292/478] Fix optimize_skip_unused_shards with JOINs In case of JOIN query may contains conditions for other tables, while optimize_skip_unused_shards was pretty dumb and failed to skip such columns. Fix this by removing JOIN before applying this optimization. v2: restriction for analyzer v3: ignore 01940_custom_tld_sharding_key under analyzer Signed-off-by: Azat Khuzhin Co-Authored-By: Alexey Milovidov --- src/Storages/StorageDistributed.cpp | 42 ++++++++------ src/Storages/StorageDistributed.h | 4 +- tests/analyzer_tech_debt.txt | 2 + ...optimize_skip_unused_shards_join.reference | 0 ...02790_optimize_skip_unused_shards_join.sql | 55 +++++++++++++++++++ 5 files changed, 84 insertions(+), 19 deletions(-) create mode 100644 tests/queries/0_stateless/02790_optimize_skip_unused_shards_join.reference create mode 100644 tests/queries/0_stateless/02790_optimize_skip_unused_shards_join.sql diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 0727658160c..1a99d272cab 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -75,6 +75,7 @@ #include #include #include +#include #include #include @@ -434,7 +435,7 @@ QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage( { /// Always calculate optimized cluster here, to avoid conditions during read() /// (Anyway it will be calculated in the read()) - ClusterPtr optimized_cluster = getOptimizedCluster(local_context, storage_snapshot, query_info.query); + ClusterPtr optimized_cluster = getOptimizedCluster(local_context, storage_snapshot, query_info); if (optimized_cluster) { LOG_DEBUG(log, "Skipping irrelevant shards - the query will be sent to the following shards of the cluster (shard numbers): {}", @@ -1297,7 +1298,7 @@ ClusterPtr StorageDistributed::getCluster() const } ClusterPtr StorageDistributed::getOptimizedCluster( - ContextPtr local_context, const StorageSnapshotPtr & storage_snapshot, const ASTPtr & query_ptr) const + ContextPtr local_context, const StorageSnapshotPtr & storage_snapshot, const SelectQueryInfo & query_info) const { ClusterPtr cluster = getCluster(); const Settings & settings = local_context->getSettingsRef(); @@ -1306,7 +1307,7 @@ ClusterPtr StorageDistributed::getOptimizedCluster( if (has_sharding_key && sharding_key_is_usable) { - ClusterPtr optimized = skipUnusedShards(cluster, query_ptr, storage_snapshot, local_context); + ClusterPtr optimized = skipUnusedShards(cluster, query_info, storage_snapshot, local_context); if (optimized) return optimized; } @@ -1355,25 +1356,34 @@ IColumn::Selector StorageDistributed::createSelector(const ClusterPtr cluster, c /// using constraints from "PREWHERE" and "WHERE" conditions, otherwise returns `nullptr` ClusterPtr StorageDistributed::skipUnusedShards( ClusterPtr cluster, - const ASTPtr & query_ptr, + const SelectQueryInfo & query_info, const StorageSnapshotPtr & storage_snapshot, ContextPtr local_context) const { - const auto & select = query_ptr->as(); - + const auto & select = query_info.query->as(); if (!select.prewhere() && !select.where()) - { return nullptr; - } + + /// FIXME: support analyzer + if (!query_info.syntax_analyzer_result) + return nullptr; ASTPtr condition_ast; - if (select.prewhere() && select.where()) + /// Remove JOIN from the query since it may contain a condition for other tables. + /// But only the conditions for the left table should be analyzed for shard skipping. { - condition_ast = makeASTFunction("and", select.prewhere()->clone(), select.where()->clone()); - } - else - { - condition_ast = select.prewhere() ? select.prewhere()->clone() : select.where()->clone(); + ASTPtr select_without_join_ptr = select.clone(); + ASTSelectQuery select_without_join = select_without_join_ptr->as(); + TreeRewriterResult analyzer_result_without_join = *query_info.syntax_analyzer_result; + + removeJoin(select_without_join, analyzer_result_without_join, local_context); + if (!select_without_join.prewhere() && !select_without_join.where()) + return nullptr; + + if (select_without_join.prewhere() && select_without_join.where()) + condition_ast = makeASTFunction("and", select_without_join.prewhere()->clone(), select_without_join.where()->clone()); + else + condition_ast = select_without_join.prewhere() ? select_without_join.prewhere()->clone() : select_without_join.where()->clone(); } replaceConstantExpressions(condition_ast, local_context, storage_snapshot->metadata->getColumns().getAll(), shared_from_this(), storage_snapshot); @@ -1396,11 +1406,9 @@ ClusterPtr StorageDistributed::skipUnusedShards( return nullptr; } - // Can't get definite answer if we can skip any shards + // Can't get a definite answer if we can skip any shards if (!blocks) - { return nullptr; - } std::set shards; diff --git a/src/Storages/StorageDistributed.h b/src/Storages/StorageDistributed.h index f45286341cf..615d6e337b6 100644 --- a/src/Storages/StorageDistributed.h +++ b/src/Storages/StorageDistributed.h @@ -182,10 +182,10 @@ private: /// Apply the following settings: /// - optimize_skip_unused_shards /// - force_optimize_skip_unused_shards - ClusterPtr getOptimizedCluster(ContextPtr, const StorageSnapshotPtr & storage_snapshot, const ASTPtr & query_ptr) const; + ClusterPtr getOptimizedCluster(ContextPtr, const StorageSnapshotPtr & storage_snapshot, const SelectQueryInfo & query_info) const; ClusterPtr skipUnusedShards( - ClusterPtr cluster, const ASTPtr & query_ptr, const StorageSnapshotPtr & storage_snapshot, ContextPtr context) const; + ClusterPtr cluster, const SelectQueryInfo & query_info, const StorageSnapshotPtr & storage_snapshot, ContextPtr context) const; /// This method returns optimal query processing stage. /// diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt index e0f259306aa..8ffb94e17b8 100644 --- a/tests/analyzer_tech_debt.txt +++ b/tests/analyzer_tech_debt.txt @@ -130,3 +130,5 @@ 02581_share_big_sets_between_mutation_tasks_long 02581_share_big_sets_between_multiple_mutations_tasks_long 00992_system_parts_race_condition_zookeeper_long +02790_optimize_skip_unused_shards_join +01940_custom_tld_sharding_key diff --git a/tests/queries/0_stateless/02790_optimize_skip_unused_shards_join.reference b/tests/queries/0_stateless/02790_optimize_skip_unused_shards_join.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02790_optimize_skip_unused_shards_join.sql b/tests/queries/0_stateless/02790_optimize_skip_unused_shards_join.sql new file mode 100644 index 00000000000..0773e0a9a5e --- /dev/null +++ b/tests/queries/0_stateless/02790_optimize_skip_unused_shards_join.sql @@ -0,0 +1,55 @@ +-- Issue: https://github.com/ClickHouse/ClickHouse/issues/15995 + +DROP TABLE IF EXISTS outer; +DROP TABLE IF EXISTS inner; + +DROP TABLE IF EXISTS outer_distributed; +DROP TABLE IF EXISTS inner_distributed; + +CREATE TABLE IF NOT EXISTS outer +( + `id` UInt64, + `organization_id` UInt64, + `version` UInt64 +) +ENGINE = ReplacingMergeTree(version) +PARTITION BY organization_id % 8 +ORDER BY (organization_id, id); + +CREATE TABLE inner +( + `id` UInt64, + `outer_id` UInt64, + `organization_id` UInt64, + `version` UInt64, + `date` Date +) +ENGINE = ReplacingMergeTree(version) +PARTITION BY toYYYYMM(date) +ORDER BY (organization_id, outer_id); + +CREATE TABLE inner_distributed AS inner +ENGINE = Distributed('test_cluster_two_shards', currentDatabase(), 'inner', intHash64(organization_id)); + +CREATE TABLE outer_distributed AS outer +ENGINE = Distributed('test_cluster_two_shards', currentDatabase(), 'outer', intHash64(organization_id)); + +SELECT + sum(if(inner_distributed.id != 0, 1, 0)) AS total, + inner_distributed.date AS date +FROM outer_distributed AS outer_distributed +FINAL +LEFT JOIN +( + SELECT + inner_distributed.outer_id AS outer_id, + inner_distributed.id AS id, + inner_distributed.date AS date + FROM inner_distributed AS inner_distributed + FINAL + WHERE inner_distributed.organization_id = 15078 +) AS inner_distributed ON inner_distributed.outer_id = outer_distributed.id +WHERE (outer_distributed.organization_id = 15078) AND (date != toDate('1970-01-01')) +GROUP BY date +ORDER BY date DESC +SETTINGS distributed_product_mode = 'local', optimize_skip_unused_shards = 1; From 5ca6c97832f786e6e3be085e3ec79829f9233cdd Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Sat, 22 Jul 2023 12:03:20 +0200 Subject: [PATCH 293/478] Update gtest_lru_file_cache.cpp --- src/Interpreters/tests/gtest_lru_file_cache.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Interpreters/tests/gtest_lru_file_cache.cpp b/src/Interpreters/tests/gtest_lru_file_cache.cpp index 12e7d9372f7..dab14a66ed7 100644 --- a/src/Interpreters/tests/gtest_lru_file_cache.cpp +++ b/src/Interpreters/tests/gtest_lru_file_cache.cpp @@ -489,7 +489,6 @@ TEST_F(FileCacheTest, get) download(file_segment); ASSERT_EQ(file_segment.state(), State::DOWNLOADED); - file_segment.completePartAndResetDownloader(); other_1.join(); From 363201270c00f0ebfa61e80471b372f434370380 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 22 Jul 2023 17:49:53 +0300 Subject: [PATCH 294/478] Update 01710_query_log_with_projection_info.sql --- .../0_stateless/01710_query_log_with_projection_info.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/01710_query_log_with_projection_info.sql b/tests/queries/0_stateless/01710_query_log_with_projection_info.sql index 25e7e8fed60..cd84b392fe5 100644 --- a/tests/queries/0_stateless/01710_query_log_with_projection_info.sql +++ b/tests/queries/0_stateless/01710_query_log_with_projection_info.sql @@ -62,3 +62,5 @@ FROM system.query_log WHERE current_database=currentDatabase() and query = 'SELECT min(id) FROM t FORMAT Null;'; + +DROP TABLE t; From dab954a92d7893a7ebbef9cda0a3aedf63a96a50 Mon Sep 17 00:00:00 2001 From: Han Fei Date: Sat, 22 Jul 2023 18:10:54 +0200 Subject: [PATCH 295/478] do not throw exception in OptimizedRegularExpressionImpl::analyze --- src/Common/OptimizedRegularExpression.cpp | 13 +++++++++++-- .../0_stateless/02831_regexp_analyze_recursion.sql | 2 +- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/Common/OptimizedRegularExpression.cpp b/src/Common/OptimizedRegularExpression.cpp index 0b80e2f3f97..918ebd75fc0 100644 --- a/src/Common/OptimizedRegularExpression.cpp +++ b/src/Common/OptimizedRegularExpression.cpp @@ -423,6 +423,7 @@ void OptimizedRegularExpressionImpl::analyze( bool & is_trivial, bool & required_substring_is_prefix, std::vector & alternatives) +try { Literals alternative_literals; Literal required_literal; @@ -432,12 +433,20 @@ void OptimizedRegularExpressionImpl::analyze( for (auto & lit : alternative_literals) alternatives.push_back(std::move(lit.literal)); } +catch(...) +{ + required_substring = ""; + is_trivial = false; + required_substring_is_prefix = false; + alternatives.clear(); + std::cerr << "Analyze RegularExpression failed, got error: {}" << DB::getCurrentExceptionMessage(false) << "\n"; +} template OptimizedRegularExpressionImpl::OptimizedRegularExpressionImpl(const std::string & regexp_, int options) { - std::vector alternativesDummy; /// this vector extracts patterns a,b,c from pattern (a|b|c). for now it's not used. - analyze(regexp_, required_substring, is_trivial, required_substring_is_prefix, alternativesDummy); + std::vector alternatives_dummy; /// this vector extracts patterns a,b,c from pattern (a|b|c). for now it's not used. + analyze(regexp_, required_substring, is_trivial, required_substring_is_prefix, alternatives_dummy); /// Just three following options are supported diff --git a/tests/queries/0_stateless/02831_regexp_analyze_recursion.sql b/tests/queries/0_stateless/02831_regexp_analyze_recursion.sql index 018d1f031e6..a2075ae903b 100644 --- a/tests/queries/0_stateless/02831_regexp_analyze_recursion.sql +++ b/tests/queries/0_stateless/02831_regexp_analyze_recursion.sql @@ -1 +1 @@ -SELECT match('', repeat('(', 100000)); -- { serverError 306 } +SELECT match('', repeat('(', 100000)); -- { serverError 427 } From c60090ccbd30143d44ab715b8b7b5e0060a2095f Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Sat, 22 Jul 2023 17:43:22 +0000 Subject: [PATCH 296/478] Add test with materialize() + fix --- ...2810_fix_remove_dedundant_distinct_view.reference | 12 ++++++++++-- .../02810_fix_remove_dedundant_distinct_view.sql | 10 +++++++++- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/02810_fix_remove_dedundant_distinct_view.reference b/tests/queries/0_stateless/02810_fix_remove_dedundant_distinct_view.reference index 01f14f82e94..ec714a5df07 100644 --- a/tests/queries/0_stateless/02810_fix_remove_dedundant_distinct_view.reference +++ b/tests/queries/0_stateless/02810_fix_remove_dedundant_distinct_view.reference @@ -8,6 +8,14 @@ FROM ) WHERE explain ILIKE '%distinct%'; 2 -SELECT DISTINCT x FROM tab_v; -2 +SELECT DISTINCT x FROM tab_v ORDER BY x; 1 +2 +-- explicitly checking that materialize() doesn't affect the result, - redundant DISTINCT is still removed +SELECT count() +FROM +( + EXPLAIN SELECT DISTINCT x FROM (SELECT materialize(x) as x FROM (select DISTINCT x from tab)) +) +WHERE explain ILIKE '%distinct%'; +2 diff --git a/tests/queries/0_stateless/02810_fix_remove_dedundant_distinct_view.sql b/tests/queries/0_stateless/02810_fix_remove_dedundant_distinct_view.sql index 99fc24dae8b..ca0a2edd99d 100644 --- a/tests/queries/0_stateless/02810_fix_remove_dedundant_distinct_view.sql +++ b/tests/queries/0_stateless/02810_fix_remove_dedundant_distinct_view.sql @@ -19,4 +19,12 @@ FROM ) WHERE explain ILIKE '%distinct%'; -SELECT DISTINCT x FROM tab_v; +SELECT DISTINCT x FROM tab_v ORDER BY x; + +-- explicitly checking that materialize() doesn't affect the result, - redundant DISTINCT is still removed +SELECT count() +FROM +( + EXPLAIN SELECT DISTINCT x FROM (SELECT materialize(x) as x FROM (select DISTINCT x from tab)) +) +WHERE explain ILIKE '%distinct%'; From afdda489bdfb27d1db2a7554223f5dfcb8cca7b1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 22 Jul 2023 19:53:16 +0200 Subject: [PATCH 297/478] Fix test --- .../02790_sql_standard_fetch.reference | 72 +++++++++---------- .../0_stateless/02790_sql_standard_fetch.sql | 12 ++-- 2 files changed, 42 insertions(+), 42 deletions(-) diff --git a/tests/queries/0_stateless/02790_sql_standard_fetch.reference b/tests/queries/0_stateless/02790_sql_standard_fetch.reference index 429eecbc936..270af6e5c17 100644 --- a/tests/queries/0_stateless/02790_sql_standard_fetch.reference +++ b/tests/queries/0_stateless/02790_sql_standard_fetch.reference @@ -1,36 +1,36 @@ -┌─id─┬─name──┬─department─┬─salary─┐ -│ 25 │ Frank │ it │ 120 │ -│ 23 │ Henry │ it │ 104 │ -│ 24 │ Irene │ it │ 104 │ -│ 33 │ Alice │ sales │ 100 │ -│ 32 │ Dave │ sales │ 96 │ -└────┴───────┴────────────┴────────┘ -┌─id─┬─name──┬─department─┬─salary─┐ -│ 25 │ Frank │ it │ 120 │ -│ 23 │ Henry │ it │ 104 │ -│ 24 │ Irene │ it │ 104 │ -│ 33 │ Alice │ sales │ 100 │ -│ 32 │ Dave │ sales │ 96 │ -└────┴───────┴────────────┴────────┘ -┌─id─┬─name──┬─department─┬─salary─┐ -│ 25 │ Frank │ it │ 120 │ -│ 23 │ Henry │ it │ 104 │ -│ 24 │ Irene │ it │ 104 │ -│ 33 │ Alice │ sales │ 100 │ -│ 31 │ Cindy │ sales │ 96 │ -│ 32 │ Dave │ sales │ 96 │ -└────┴───────┴────────────┴────────┘ -┌─id─┬─name──┬─department─┬─salary─┐ -│ 33 │ Alice │ sales │ 100 │ -│ 31 │ Cindy │ sales │ 96 │ -│ 32 │ Dave │ sales │ 96 │ -│ 22 │ Grace │ it │ 90 │ -│ 21 │ Emma │ it │ 84 │ -└────┴───────┴────────────┴────────┘ -┌─id─┬─name──┬─department─┬─salary─┐ -│ 33 │ Alice │ sales │ 100 │ -│ 31 │ Cindy │ sales │ 96 │ -│ 32 │ Dave │ sales │ 96 │ -│ 22 │ Grace │ it │ 90 │ -│ 21 │ Emma │ it │ 84 │ -└────┴───────┴────────────┴────────┘ +┌─id─┬─name───────────┬─department─┬─salary─┐ +│ 25 │ Frank │ it │ 120 │ +│ 23 │ Henry or Irene │ it │ 104 │ +│ 24 │ Henry or Irene │ it │ 104 │ +│ 33 │ Alice │ sales │ 100 │ +│ 32 │ Dave or Cindy │ sales │ 96 │ +└────┴────────────────┴────────────┴────────┘ +┌─id─┬─name───────────┬─department─┬─salary─┐ +│ 25 │ Frank │ it │ 120 │ +│ 23 │ Henry or Irene │ it │ 104 │ +│ 24 │ Henry or Irene │ it │ 104 │ +│ 33 │ Alice │ sales │ 100 │ +│ 32 │ Dave or Cindy │ sales │ 96 │ +└────┴────────────────┴────────────┴────────┘ +┌─id─┬─name───────────┬─department─┬─salary─┐ +│ 25 │ Frank │ it │ 120 │ +│ 23 │ Henry or Irene │ it │ 104 │ +│ 24 │ Henry or Irene │ it │ 104 │ +│ 33 │ Alice │ sales │ 100 │ +│ 31 │ Dave or Cindy │ sales │ 96 │ +│ 32 │ Dave or Cindy │ sales │ 96 │ +└────┴────────────────┴────────────┴────────┘ +┌─id─┬─name──────────┬─department─┬─salary─┐ +│ 33 │ Alice │ sales │ 100 │ +│ 31 │ Dave or Cindy │ sales │ 96 │ +│ 32 │ Dave or Cindy │ sales │ 96 │ +│ 22 │ Grace │ it │ 90 │ +│ 21 │ Emma │ it │ 84 │ +└────┴───────────────┴────────────┴────────┘ +┌─id─┬─name──────────┬─department─┬─salary─┐ +│ 33 │ Alice │ sales │ 100 │ +│ 31 │ Dave or Cindy │ sales │ 96 │ +│ 32 │ Dave or Cindy │ sales │ 96 │ +│ 22 │ Grace │ it │ 90 │ +│ 21 │ Emma │ it │ 84 │ +└────┴───────────────┴────────────┴────────┘ diff --git a/tests/queries/0_stateless/02790_sql_standard_fetch.sql b/tests/queries/0_stateless/02790_sql_standard_fetch.sql index 4204279a746..07a806eddf9 100644 --- a/tests/queries/0_stateless/02790_sql_standard_fetch.sql +++ b/tests/queries/0_stateless/02790_sql_standard_fetch.sql @@ -1,33 +1,33 @@ -- https://antonz.org/sql-fetch/ CREATE TEMPORARY TABLE employees (id UInt64, name String, department String, salary UInt64); -INSERT INTO employees VALUES (23, 'Henry', 'it', 104), (24, 'Irene', 'it', 104), (25, 'Frank', 'it', 120), (31, 'Cindy', 'sales', 96), (33, 'Alice', 'sales', 100), (32, 'Dave', 'sales', 96), (22, 'Grace', 'it', 90), (21, 'Emma', 'it', '84'); +INSERT INTO employees VALUES (23, 'Henry', 'it', 104), (24, 'Irene', 'it', 104), (25, 'Frank', 'it', 120), (31, 'Cindy', 'sales', 96), (33, 'Alice', 'sales', 100), (32, 'Dave', 'sales', 96), (22, 'Grace', 'it', 90), (21, 'Emma', 'it', 84); -- Determinism SET max_threads = 1, parallelize_output_from_storages = 0; -select * from (SELECT * FROM employees ORDER BY id, name, department, salary) +select id, transform(name, ['Henry', 'Irene', 'Dave', 'Cindy'], ['Henry or Irene', 'Henry or Irene', 'Dave or Cindy', 'Dave or Cindy']) AS name, department, salary from (SELECT * FROM employees ORDER BY id, name, department, salary) order by salary desc limit 5 format PrettyCompactNoEscapes; -select * from (SELECT * FROM employees ORDER BY id, name, department, salary) +select id, transform(name, ['Henry', 'Irene', 'Dave', 'Cindy'], ['Henry or Irene', 'Henry or Irene', 'Dave or Cindy', 'Dave or Cindy']) AS name, department, salary from (SELECT * FROM employees ORDER BY id, name, department, salary) order by salary desc fetch first 5 rows only format PrettyCompactNoEscapes; -select * from (SELECT * FROM employees ORDER BY id, name, department, salary) +select id, transform(name, ['Henry', 'Irene', 'Dave', 'Cindy'], ['Henry or Irene', 'Henry or Irene', 'Dave or Cindy', 'Dave or Cindy']) AS name, department, salary from (SELECT * FROM employees ORDER BY id, name, department, salary) order by salary desc fetch first 5 rows with ties format PrettyCompactNoEscapes; -select * from (SELECT * FROM employees ORDER BY id, name, department, salary) +select id, transform(name, ['Henry', 'Irene', 'Dave', 'Cindy'], ['Henry or Irene', 'Henry or Irene', 'Dave or Cindy', 'Dave or Cindy']) AS name, department, salary from (SELECT * FROM employees ORDER BY id, name, department, salary) order by salary desc offset 3 rows fetch next 5 rows only format PrettyCompactNoEscapes; -select * from (SELECT * FROM employees ORDER BY id, name, department, salary) +select id, transform(name, ['Henry', 'Irene', 'Dave', 'Cindy'], ['Henry or Irene', 'Henry or Irene', 'Dave or Cindy', 'Dave or Cindy']) AS name, department, salary from (SELECT * FROM employees ORDER BY id, name, department, salary) order by salary desc offset 3 rows fetch first 5 rows only From 7a24de801d93957cd87e8a1d2f726b934912b038 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 22 Jul 2023 19:53:45 +0200 Subject: [PATCH 298/478] Fix test --- .../02790_sql_standard_fetch.reference | 72 +++++++++---------- .../0_stateless/02790_sql_standard_fetch.sql | 10 +-- 2 files changed, 41 insertions(+), 41 deletions(-) diff --git a/tests/queries/0_stateless/02790_sql_standard_fetch.reference b/tests/queries/0_stateless/02790_sql_standard_fetch.reference index 270af6e5c17..ef578b526da 100644 --- a/tests/queries/0_stateless/02790_sql_standard_fetch.reference +++ b/tests/queries/0_stateless/02790_sql_standard_fetch.reference @@ -1,36 +1,36 @@ -┌─id─┬─name───────────┬─department─┬─salary─┐ -│ 25 │ Frank │ it │ 120 │ -│ 23 │ Henry or Irene │ it │ 104 │ -│ 24 │ Henry or Irene │ it │ 104 │ -│ 33 │ Alice │ sales │ 100 │ -│ 32 │ Dave or Cindy │ sales │ 96 │ -└────┴────────────────┴────────────┴────────┘ -┌─id─┬─name───────────┬─department─┬─salary─┐ -│ 25 │ Frank │ it │ 120 │ -│ 23 │ Henry or Irene │ it │ 104 │ -│ 24 │ Henry or Irene │ it │ 104 │ -│ 33 │ Alice │ sales │ 100 │ -│ 32 │ Dave or Cindy │ sales │ 96 │ -└────┴────────────────┴────────────┴────────┘ -┌─id─┬─name───────────┬─department─┬─salary─┐ -│ 25 │ Frank │ it │ 120 │ -│ 23 │ Henry or Irene │ it │ 104 │ -│ 24 │ Henry or Irene │ it │ 104 │ -│ 33 │ Alice │ sales │ 100 │ -│ 31 │ Dave or Cindy │ sales │ 96 │ -│ 32 │ Dave or Cindy │ sales │ 96 │ -└────┴────────────────┴────────────┴────────┘ -┌─id─┬─name──────────┬─department─┬─salary─┐ -│ 33 │ Alice │ sales │ 100 │ -│ 31 │ Dave or Cindy │ sales │ 96 │ -│ 32 │ Dave or Cindy │ sales │ 96 │ -│ 22 │ Grace │ it │ 90 │ -│ 21 │ Emma │ it │ 84 │ -└────┴───────────────┴────────────┴────────┘ -┌─id─┬─name──────────┬─department─┬─salary─┐ -│ 33 │ Alice │ sales │ 100 │ -│ 31 │ Dave or Cindy │ sales │ 96 │ -│ 32 │ Dave or Cindy │ sales │ 96 │ -│ 22 │ Grace │ it │ 90 │ -│ 21 │ Emma │ it │ 84 │ -└────┴───────────────┴────────────┴────────┘ +┌─name───────────┬─department─┬─salary─┐ +│ Frank │ it │ 120 │ +│ Henry or Irene │ it │ 104 │ +│ Henry or Irene │ it │ 104 │ +│ Alice │ sales │ 100 │ +│ Dave or Cindy │ sales │ 96 │ +└────────────────┴────────────┴────────┘ +┌─name───────────┬─department─┬─salary─┐ +│ Frank │ it │ 120 │ +│ Henry or Irene │ it │ 104 │ +│ Henry or Irene │ it │ 104 │ +│ Alice │ sales │ 100 │ +│ Dave or Cindy │ sales │ 96 │ +└────────────────┴────────────┴────────┘ +┌─name───────────┬─department─┬─salary─┐ +│ Frank │ it │ 120 │ +│ Henry or Irene │ it │ 104 │ +│ Henry or Irene │ it │ 104 │ +│ Alice │ sales │ 100 │ +│ Dave or Cindy │ sales │ 96 │ +│ Dave or Cindy │ sales │ 96 │ +└────────────────┴────────────┴────────┘ +┌─name──────────┬─department─┬─salary─┐ +│ Alice │ sales │ 100 │ +│ Dave or Cindy │ sales │ 96 │ +│ Dave or Cindy │ sales │ 96 │ +│ Grace │ it │ 90 │ +│ Emma │ it │ 84 │ +└───────────────┴────────────┴────────┘ +┌─name──────────┬─department─┬─salary─┐ +│ Alice │ sales │ 100 │ +│ Dave or Cindy │ sales │ 96 │ +│ Dave or Cindy │ sales │ 96 │ +│ Grace │ it │ 90 │ +│ Emma │ it │ 84 │ +└───────────────┴────────────┴────────┘ diff --git a/tests/queries/0_stateless/02790_sql_standard_fetch.sql b/tests/queries/0_stateless/02790_sql_standard_fetch.sql index 07a806eddf9..638cc66682d 100644 --- a/tests/queries/0_stateless/02790_sql_standard_fetch.sql +++ b/tests/queries/0_stateless/02790_sql_standard_fetch.sql @@ -6,28 +6,28 @@ INSERT INTO employees VALUES (23, 'Henry', 'it', 104), (24, 'Irene', 'it', 104), -- Determinism SET max_threads = 1, parallelize_output_from_storages = 0; -select id, transform(name, ['Henry', 'Irene', 'Dave', 'Cindy'], ['Henry or Irene', 'Henry or Irene', 'Dave or Cindy', 'Dave or Cindy']) AS name, department, salary from (SELECT * FROM employees ORDER BY id, name, department, salary) +select transform(name, ['Henry', 'Irene', 'Dave', 'Cindy'], ['Henry or Irene', 'Henry or Irene', 'Dave or Cindy', 'Dave or Cindy']) AS name, department, salary from (SELECT * FROM employees ORDER BY id, name, department, salary) order by salary desc limit 5 format PrettyCompactNoEscapes; -select id, transform(name, ['Henry', 'Irene', 'Dave', 'Cindy'], ['Henry or Irene', 'Henry or Irene', 'Dave or Cindy', 'Dave or Cindy']) AS name, department, salary from (SELECT * FROM employees ORDER BY id, name, department, salary) +select transform(name, ['Henry', 'Irene', 'Dave', 'Cindy'], ['Henry or Irene', 'Henry or Irene', 'Dave or Cindy', 'Dave or Cindy']) AS name, department, salary from (SELECT * FROM employees ORDER BY id, name, department, salary) order by salary desc fetch first 5 rows only format PrettyCompactNoEscapes; -select id, transform(name, ['Henry', 'Irene', 'Dave', 'Cindy'], ['Henry or Irene', 'Henry or Irene', 'Dave or Cindy', 'Dave or Cindy']) AS name, department, salary from (SELECT * FROM employees ORDER BY id, name, department, salary) +select transform(name, ['Henry', 'Irene', 'Dave', 'Cindy'], ['Henry or Irene', 'Henry or Irene', 'Dave or Cindy', 'Dave or Cindy']) AS name, department, salary from (SELECT * FROM employees ORDER BY id, name, department, salary) order by salary desc fetch first 5 rows with ties format PrettyCompactNoEscapes; -select id, transform(name, ['Henry', 'Irene', 'Dave', 'Cindy'], ['Henry or Irene', 'Henry or Irene', 'Dave or Cindy', 'Dave or Cindy']) AS name, department, salary from (SELECT * FROM employees ORDER BY id, name, department, salary) +select transform(name, ['Henry', 'Irene', 'Dave', 'Cindy'], ['Henry or Irene', 'Henry or Irene', 'Dave or Cindy', 'Dave or Cindy']) AS name, department, salary from (SELECT * FROM employees ORDER BY id, name, department, salary) order by salary desc offset 3 rows fetch next 5 rows only format PrettyCompactNoEscapes; -select id, transform(name, ['Henry', 'Irene', 'Dave', 'Cindy'], ['Henry or Irene', 'Henry or Irene', 'Dave or Cindy', 'Dave or Cindy']) AS name, department, salary from (SELECT * FROM employees ORDER BY id, name, department, salary) +select transform(name, ['Henry', 'Irene', 'Dave', 'Cindy'], ['Henry or Irene', 'Henry or Irene', 'Dave or Cindy', 'Dave or Cindy']) AS name, department, salary from (SELECT * FROM employees ORDER BY id, name, department, salary) order by salary desc offset 3 rows fetch first 5 rows only From 2c6bc318476ce98b916cd2ffb6a9a44f5a5488f8 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Sat, 22 Jul 2023 18:07:22 +0000 Subject: [PATCH 299/478] Test is not dependent on new analyzer --- .../0_stateless/02810_fix_remove_dedundant_distinct_view.sql | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/queries/0_stateless/02810_fix_remove_dedundant_distinct_view.sql b/tests/queries/0_stateless/02810_fix_remove_dedundant_distinct_view.sql index ca0a2edd99d..10a68721c51 100644 --- a/tests/queries/0_stateless/02810_fix_remove_dedundant_distinct_view.sql +++ b/tests/queries/0_stateless/02810_fix_remove_dedundant_distinct_view.sql @@ -1,5 +1,3 @@ -set allow_experimental_analyzer=1; - drop table if exists tab_v; drop table if exists tab; create table tab (x UInt64, y UInt64) engine MergeTree() order by (x, y); From d25cd0d0b635196b1a4cb2178d93b7060bf02819 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 22 Jul 2023 20:21:33 +0200 Subject: [PATCH 300/478] Partial revert --- tests/queries/0_stateless/01187_set_profile_as_setting.sh | 2 +- .../0_stateless/02360_rename_table_along_with_log_name.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01187_set_profile_as_setting.sh b/tests/queries/0_stateless/01187_set_profile_as_setting.sh index fccac57aea8..dacb609d790 100755 --- a/tests/queries/0_stateless/01187_set_profile_as_setting.sh +++ b/tests/queries/0_stateless/01187_set_profile_as_setting.sh @@ -4,13 +4,13 @@ unset CLICKHOUSE_LOG_COMMENT CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=fatal # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh $CLICKHOUSE_CLIENT -n -m -q "select value, changed from system.settings where name='readonly';" $CLICKHOUSE_CLIENT -n -m -q "set profile='default'; select value, changed from system.settings where name='readonly';" $CLICKHOUSE_CLIENT -n -m -q "set profile='readonly'; select value, changed from system.settings where name='readonly';" 2>&1| grep -Fa "Cannot modify 'send_logs_level' setting in readonly mode" > /dev/null && echo "OK" +CLICKHOUSE_CLIENT=$(echo ${CLICKHOUSE_CLIENT} | sed 's/'"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"'/--send_logs_level=fatal/g') $CLICKHOUSE_CLIENT -n -m -q "set profile='readonly'; select value, changed from system.settings where name='readonly';" ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query=select+value,changed+from+system.settings+where+name='readonly'" diff --git a/tests/queries/0_stateless/02360_rename_table_along_with_log_name.sh b/tests/queries/0_stateless/02360_rename_table_along_with_log_name.sh index c07dcdd549b..e8c7f844b5c 100755 --- a/tests/queries/0_stateless/02360_rename_table_along_with_log_name.sh +++ b/tests/queries/0_stateless/02360_rename_table_along_with_log_name.sh @@ -1,7 +1,6 @@ #!/usr/bin/env bash CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=trace # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh @@ -12,6 +11,7 @@ $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS y;" $CLICKHOUSE_CLIENT -q "CREATE TABLE x(i int) ENGINE MergeTree ORDER BY i;" $CLICKHOUSE_CLIENT -q "RENAME TABLE x TO y;" +CLICKHOUSE_CLIENT_WITH_LOG=$(echo ${CLICKHOUSE_CLIENT} | sed 's/'"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"'/--send_logs_level=trace/g') regexp="${CLICKHOUSE_DATABASE}\\.x" # Check if there are still log entries with old table name $CLICKHOUSE_CLIENT_WITH_LOG --send_logs_source_regexp "$regexp" -q "INSERT INTO y VALUES(1);" From 0b258dda4ee618a4d002e2b5246d68bbd2c77c7e Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 21 Jul 2023 08:31:45 +0200 Subject: [PATCH 301/478] Reproducible builds for Rust From now on cargo will not download anything from the internet during builds. This step had been moved for docker image builds (via cargo vendor). And now cargo inside docker.io/clickhouse/binary-builder will not use any crates from the internet, so we don't need to add --offline for cargo commands in cmake (corrosion_import_crate()). Also the docker build command had been adjusted to allow following symlinks inside build context, by using tar, this is required for Rust packages. Note, that to make proper Cargo.lock that could be vendored I did the following: - per-project locks had been removed (since there is no automatic way to sync the workspace Cargo.lock with per-project Cargo.lock, since cargo update/generate-lockfile will use only per-project Cargo.toml files apparently, -Z minimal-versions does not helps either) - and to generate Cargo.lock with less changes I've pinned version in the Cargo.toml strictly, i.e. not 'foo = "0.1"' but 'foo = "=0.1"' then the Cargo.lock for workspace had been generated and afterwards I've reverted this part. Plus I have to update the dependencies afterwards, since otherwise there are conflicts with dependencies for std library. Non trivial. Signed-off-by: Azat Khuzhin --- .gitignore | 2 + docker/packager/binary/Dockerfile | 27 ++ docker/packager/binary/rust | 1 + rust/.dockerignore | 4 + rust/.gitignore | 4 + rust/BLAKE3/Cargo.lock | 92 ----- rust/CMakeLists.txt | 2 + rust/{skim => }/Cargo.lock | 519 +++++++++++++++++++++++++-- rust/Cargo.toml | 12 + rust/prql/Cargo.lock | 569 ------------------------------ tests/ci/docker_images_check.py | 33 +- tests/ci/docker_test.py | 12 +- 12 files changed, 582 insertions(+), 695 deletions(-) create mode 120000 docker/packager/binary/rust create mode 100644 rust/.dockerignore create mode 100644 rust/.gitignore delete mode 100644 rust/BLAKE3/Cargo.lock rename rust/{skim => }/Cargo.lock (66%) create mode 100644 rust/Cargo.toml delete mode 100644 rust/prql/Cargo.lock diff --git a/.gitignore b/.gitignore index 39d6f3f9fc8..5341f23a94f 100644 --- a/.gitignore +++ b/.gitignore @@ -69,6 +69,7 @@ cmake-build-* *.pyc __pycache__ *.pytest_cache +.mypy_cache test.cpp CPackConfig.cmake @@ -167,3 +168,4 @@ tests/integration/**/_gen /rust/**/target # It is autogenerated from *.in /rust/**/.cargo/config.toml +/rust/**/vendor diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index 897bcd24d04..99e748c41d4 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -58,6 +58,33 @@ RUN curl https://sh.rustup.rs -sSf | bash -s -- -y && \ rustup target add aarch64-apple-darwin && \ rustup target add powerpc64le-unknown-linux-gnu +# Create vendor cache for cargo. +# +# Note, that the config.toml for the root is used, you will not be able to +# install any other crates, except those which had been vendored (since if +# there is "replace-with" for some source, then cargo will not look to other +# remotes except this). +# +# Notes for the command itself: +# - --chown is required to preserve the rights +# - unstable-options for -C +# - chmod is required to fix the permissions, since builds are running from a different user +# - copy of the Cargo.lock is required for proper dependencies versions +# - cargo vendor --sync is requried to overcome [1] bug. +# +# [1]: https://github.com/rust-lang/wg-cargo-std-aware/issues/23 +COPY --chown=root:root /rust /rust/packages +RUN cargo -Z unstable-options -C /rust/packages vendor > $CARGO_HOME/config.toml && \ + cp "$(rustc --print=sysroot)"/lib/rustlib/src/rust/Cargo.lock "$(rustc --print=sysroot)"/lib/rustlib/src/rust/library/test/ && \ + cargo -Z unstable-options -C /rust/packages vendor --sync "$(rustc --print=sysroot)"/lib/rustlib/src/rust/library/test/Cargo.toml && \ + rm "$(rustc --print=sysroot)"/lib/rustlib/src/rust/library/test/Cargo.lock && \ + sed -i "s#\"vendor\"#\"/rust/vendor\"#" $CARGO_HOME/config.toml && \ + cat $CARGO_HOME/config.toml && \ + mv /rust/packages/vendor /rust/vendor && \ + chmod -R o=r+X /rust/vendor && \ + ls -R -l /rust/packages && \ + rm -r /rust/packages + # NOTE: Seems like gcc-11 is too new for ubuntu20 repository # A cross-linker for RISC-V 64 (we need it, because LLVM's LLD does not work): RUN add-apt-repository ppa:ubuntu-toolchain-r/test --yes \ diff --git a/docker/packager/binary/rust b/docker/packager/binary/rust new file mode 120000 index 00000000000..742dc49e9ac --- /dev/null +++ b/docker/packager/binary/rust @@ -0,0 +1 @@ +../../../rust \ No newline at end of file diff --git a/rust/.dockerignore b/rust/.dockerignore new file mode 100644 index 00000000000..6b761aa401c --- /dev/null +++ b/rust/.dockerignore @@ -0,0 +1,4 @@ +# Just in case ignore any cargo stuff (and just in case someone will run this +# docker build locally with build context using folder root): +target +vendor diff --git a/rust/.gitignore b/rust/.gitignore new file mode 100644 index 00000000000..f850cd563c9 --- /dev/null +++ b/rust/.gitignore @@ -0,0 +1,4 @@ +# This is for tar --exclude-vcs-ignores (and just in case someone will run +# docker build locally with build context created via tar): +target +vendor diff --git a/rust/BLAKE3/Cargo.lock b/rust/BLAKE3/Cargo.lock deleted file mode 100644 index 9ac60773732..00000000000 --- a/rust/BLAKE3/Cargo.lock +++ /dev/null @@ -1,92 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -version = 3 - -[[package]] -name = "_ch_rust_blake3" -version = "0.1.0" -dependencies = [ - "blake3", - "libc", -] - -[[package]] -name = "arrayref" -version = "0.3.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4c527152e37cf757a3f78aae5a06fbeefdb07ccc535c980a3208ee3060dd544" - -[[package]] -name = "arrayvec" -version = "0.7.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6" - -[[package]] -name = "blake3" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "526c210b4520e416420759af363083471656e819a75e831b8d2c9d5a584f2413" -dependencies = [ - "arrayref", - "arrayvec", - "cc", - "cfg-if", - "constant_time_eq", - "digest", -] - -[[package]] -name = "cc" -version = "1.0.73" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11" - -[[package]] -name = "cfg-if" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" - -[[package]] -name = "constant_time_eq" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc" - -[[package]] -name = "digest" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3dd60d1080a57a05ab032377049e0591415d2b31afd7028356dbf3cc6dcb066" -dependencies = [ - "generic-array", -] - -[[package]] -name = "generic-array" -version = "0.14.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bff49e947297f3312447abdca79f45f4738097cc82b06e72054d2223f601f1b9" -dependencies = [ - "typenum", - "version_check", -] - -[[package]] -name = "libc" -version = "0.2.132" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8371e4e5341c3a96db127eb2465ac681ced4c433e01dd0e938adbef26ba93ba5" - -[[package]] -name = "typenum" -version = "1.15.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987" - -[[package]] -name = "version_check" -version = "0.9.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" diff --git a/rust/CMakeLists.txt b/rust/CMakeLists.txt index 41451fe0a1e..ca0886cb300 100644 --- a/rust/CMakeLists.txt +++ b/rust/CMakeLists.txt @@ -55,6 +55,8 @@ function(clickhouse_import_crate) endif() endif() + # Note, here --offline is not used, since on CI vendor archive is used, and + # passing --offline here will be inconvenient for local development. corrosion_import_crate(NO_STD ${ARGN} PROFILE ${profile}) endfunction() diff --git a/rust/skim/Cargo.lock b/rust/Cargo.lock similarity index 66% rename from rust/skim/Cargo.lock rename to rust/Cargo.lock index f55ea8a84b0..07bbf8ba27e 100644 --- a/rust/skim/Cargo.lock +++ b/rust/Cargo.lock @@ -2,6 +2,22 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "_ch_rust_blake3" +version = "0.1.0" +dependencies = [ + "blake3", + "libc", +] + +[[package]] +name = "_ch_rust_prql" +version = "0.1.0" +dependencies = [ + "prql-compiler", + "serde_json", +] + [[package]] name = "_ch_rust_skim_rust" version = "0.1.0" @@ -12,6 +28,32 @@ dependencies = [ "term", ] +[[package]] +name = "addr2line" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4fa78e18c64fce05e902adecd7a5eed15a5e0a3439f7b0e169f0252214865e3" +dependencies = [ + "gimli", +] + +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + +[[package]] +name = "ahash" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47" +dependencies = [ + "getrandom", + "once_cell", + "version_check", +] + [[package]] name = "aho-corasick" version = "1.0.2" @@ -36,6 +78,31 @@ dependencies = [ "libc", ] +[[package]] +name = "anyhow" +version = "1.0.72" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b13c32d80ecc7ab747b80c3784bce54ee8a7a0cc4fbda9bf4cda2cf6fe90854" +dependencies = [ + "backtrace", +] + +[[package]] +name = "ariadne" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "367fd0ad87307588d087544707bc5fbf4805ded96c7db922b70d368fa1cb5702" +dependencies = [ + "unicode-width", + "yansi", +] + +[[package]] +name = "arrayref" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b4930d2cb77ce62f89ee5d5289b4ac049559b1c45539271f5ed4fdc7db34545" + [[package]] name = "arrayvec" version = "0.7.4" @@ -48,6 +115,21 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" +[[package]] +name = "backtrace" +version = "0.3.68" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4319208da049c43661739c5fade2ba182f09d1dc2299b32298d3a31692b17e12" +dependencies = [ + "addr2line", + "cc", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", +] + [[package]] name = "beef" version = "0.5.2" @@ -60,6 +142,29 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +[[package]] +name = "blake3" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "199c42ab6972d92c9f8995f086273d25c42fc0f7b2a1fcefba465c1352d25ba5" +dependencies = [ + "arrayref", + "arrayvec", + "cc", + "cfg-if", + "constant_time_eq", + "digest", +] + +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + [[package]] name = "bumpalo" version = "3.13.0" @@ -93,6 +198,16 @@ dependencies = [ "winapi", ] +[[package]] +name = "chumsky" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23170228b96236b5a7299057ac284a321457700bc8c41a4476052f0f4ba5349d" +dependencies = [ + "hashbrown 0.12.3", + "stacker", +] + [[package]] name = "codespan-reporting" version = "0.11.1" @@ -103,6 +218,12 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "constant_time_eq" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7144d30dcf0fafbce74250a3963025d8d52177934239851c917d29f1df280c2" + [[package]] name = "core-foundation-sys" version = "0.8.4" @@ -177,10 +298,41 @@ dependencies = [ ] [[package]] -name = "cxx" -version = "1.0.101" +name = "crypto-common" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5032837c1384de3708043de9d4e97bb91290faca6c16529a28aa340592a78166" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "csv" +version = "1.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "626ae34994d3d8d668f4269922248239db4ae42d538b14c398b74a52208e8086" +dependencies = [ + "csv-core", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "csv-core" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" +dependencies = [ + "memchr", +] + +[[package]] +name = "cxx" +version = "1.0.102" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f68e12e817cb19eaab81aaec582b4052d07debd3c3c6b083b9d361db47c7dc9d" dependencies = [ "cc", "cxxbridge-flags", @@ -190,9 +342,9 @@ dependencies = [ [[package]] name = "cxx-build" -version = "1.0.101" +version = "1.0.102" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51368b3d0dbf356e10fcbfd455a038503a105ee556f7ee79b6bb8c53a7247456" +checksum = "e789217e4ab7cf8cc9ce82253180a9fe331f35f5d339f0ccfe0270b39433f397" dependencies = [ "cc", "codespan-reporting", @@ -200,24 +352,24 @@ dependencies = [ "proc-macro2", "quote", "scratch", - "syn 2.0.26", + "syn 2.0.27", ] [[package]] name = "cxxbridge-flags" -version = "1.0.101" +version = "1.0.102" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d9062157072e4aafc8e56ceaf8325ce850c5ae37578c852a0d4de2cecdded13" +checksum = "78a19f4c80fd9ab6c882286fa865e92e07688f4387370a209508014ead8751d0" [[package]] name = "cxxbridge-macro" -version = "1.0.101" +version = "1.0.102" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf01e8a540f5a4e0f284595834f81cf88572f244b768f051724537afa99a2545" +checksum = "b8fcfa71f66c8563c4fa9dd2bb68368d50267856f831ac5d85367e0805f9606c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.26", + "syn 2.0.27", ] [[package]] @@ -296,6 +448,17 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", + "subtle", +] + [[package]] name = "dirs-next" version = "2.0.0" @@ -319,9 +482,27 @@ dependencies = [ [[package]] name = "either" -version = "1.8.1" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91" +checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" + +[[package]] +name = "enum-as-inner" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c9720bba047d567ffc8a3cba48bf19126600e249ab7f128e9233e6376976a116" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" [[package]] name = "fnv" @@ -338,6 +519,16 @@ dependencies = [ "thread_local", ] +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + [[package]] name = "getrandom" version = "0.2.10" @@ -349,6 +540,33 @@ dependencies = [ "wasi 0.11.0+wasi-snapshot-preview1", ] +[[package]] +name = "gimli" +version = "0.27.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c80984affa11d98d1b88b66ac8853f143217b399d3c74116778ff8fdb4ed2e" + +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +dependencies = [ + "ahash", +] + +[[package]] +name = "hashbrown" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a" + +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + [[package]] name = "hermit-abi" version = "0.3.2" @@ -384,6 +602,31 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" +[[package]] +name = "indexmap" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5477fe2230a79769d8dc68e0eabf5437907c0457a5614a9e8dddb67f65eb65d" +dependencies = [ + "equivalent", + "hashbrown 0.14.0", +] + +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" + [[package]] name = "js-sys" version = "0.3.64" @@ -444,6 +687,21 @@ dependencies = [ "autocfg", ] +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + +[[package]] +name = "miniz_oxide" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7" +dependencies = [ + "adler", +] + [[package]] name = "nix" version = "0.24.3" @@ -470,10 +728,20 @@ dependencies = [ ] [[package]] -name = "num-traits" -version = "0.2.15" +name = "nom" +version = "7.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + +[[package]] +name = "num-traits" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f30b0abd723be7e2ffca1272140fac1a2f084c77ec3e123c192b66af1ee9e6c2" dependencies = [ "autocfg", ] @@ -488,6 +756,15 @@ dependencies = [ "libc", ] +[[package]] +name = "object" +version = "0.31.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8bda667d9f2b5051b8833f59f3bf748b28ef54f850f4fcb389a252aa383866d1" +dependencies = [ + "memchr", +] + [[package]] name = "once_cell" version = "1.18.0" @@ -509,6 +786,41 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "prql-compiler" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c99b52154002ac7f286dd2293c2f8d4e30526c1d396b14deef5ada1deef3c9ff" +dependencies = [ + "anyhow", + "ariadne", + "chumsky", + "csv", + "enum-as-inner", + "itertools", + "lazy_static", + "log", + "once_cell", + "regex", + "semver", + "serde", + "serde_json", + "serde_yaml", + "sqlformat", + "sqlparser", + "strum", + "strum_macros", +] + +[[package]] +name = "psm" +version = "0.1.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5787f7cda34e3033a72192c018bc5883100330f362ef279a8cbccfce8bb4e874" +dependencies = [ + "cc", +] + [[package]] name = "quote" version = "1.0.31" @@ -589,12 +901,24 @@ version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5ea92a5b6195c6ef2a0295ea818b312502c6fc94dde986c5553242e18fd4ce2" +[[package]] +name = "rustc-demangle" +version = "0.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" + [[package]] name = "rustversion" version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4" +[[package]] +name = "ryu" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741" + [[package]] name = "scopeguard" version = "1.2.0" @@ -608,10 +932,57 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a3cf7c11c38cb994f3d40e8a8cde3bbd1f72a435e4c49e85d6553d8312306152" [[package]] -name = "serde" -version = "1.0.171" +name = "semver" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30e27d1e4fd7659406c492fd6cfaf2066ba8773de45ca75e855590f856dc34a9" +checksum = "b0293b4b29daaf487284529cc2f5675b8e57c61f70167ba415a463651fd6a918" +dependencies = [ + "serde", +] + +[[package]] +name = "serde" +version = "1.0.174" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b88756493a5bd5e5395d53baa70b194b05764ab85b59e43e4b8f4e1192fa9b1" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.174" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e5c3a298c7f978e53536f95a63bdc4c4a64550582f31a0359a9afda6aede62e" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.27", +] + +[[package]] +name = "serde_json" +version = "1.0.103" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d03b412469450d4404fe8499a268edd7f8b79fecb074b0d812ad64ca21f4031b" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "serde_yaml" +version = "0.9.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a49e178e4452f45cb61d0cd8cebc1b0fafd3e41929e996cef79aa3aca91f574" +dependencies = [ + "indexmap", + "itoa", + "ryu", + "serde", + "unsafe-libyaml", +] [[package]] name = "skim" @@ -638,12 +1009,74 @@ dependencies = [ "vte", ] +[[package]] +name = "sqlformat" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c12bc9199d1db8234678b7051747c07f517cdcf019262d1847b94ec8b1aee3e" +dependencies = [ + "itertools", + "nom", + "unicode_categories", +] + +[[package]] +name = "sqlparser" +version = "0.33.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "355dc4d4b6207ca8a3434fc587db0a8016130a574dbcdbfb93d7f7b5bc5b211a" +dependencies = [ + "log", + "serde", +] + +[[package]] +name = "stacker" +version = "0.1.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c886bd4480155fd3ef527d45e9ac8dd7118a898a46530b7b94c3e21866259fce" +dependencies = [ + "cc", + "cfg-if", + "libc", + "psm", + "winapi", +] + [[package]] name = "strsim" version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" +[[package]] +name = "strum" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f" +dependencies = [ + "strum_macros", +] + +[[package]] +name = "strum_macros" +version = "0.24.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "rustversion", + "syn 1.0.109", +] + +[[package]] +name = "subtle" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc" + [[package]] name = "syn" version = "1.0.109" @@ -657,9 +1090,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.26" +version = "2.0.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45c3457aacde3c65315de5031ec191ce46604304d2446e803d71ade03308d970" +checksum = "b60f673f44a8255b9c8c657daf66a596d435f2da81a555b06dc644d080ba45e0" dependencies = [ "proc-macro2", "quote", @@ -688,22 +1121,22 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.43" +version = "1.0.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a35fc5b8971143ca348fa6df4f024d4d55264f3468c71ad1c2f365b0a4d58c42" +checksum = "611040a08a0439f8248d1990b111c95baa9c704c805fa1f62104b39655fd7f90" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.43" +version = "1.0.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "463fe12d7993d3b327787537ce8dd4dfa058de32fc2b195ef3cde03dc4771e8f" +checksum = "090198534930841fab3a5d1bb637cde49e339654e606195f8d9c76eeb081dc96" dependencies = [ "proc-macro2", "quote", - "syn 2.0.26", + "syn 2.0.27", ] [[package]] @@ -766,6 +1199,12 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "typenum" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba" + [[package]] name = "unicode-ident" version = "1.0.11" @@ -778,12 +1217,30 @@ version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" +[[package]] +name = "unicode_categories" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e" + +[[package]] +name = "unsafe-libyaml" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f28467d3e1d3c6586d8f25fa243f544f5800fec42d97032474e17222c2b75cfa" + [[package]] name = "utf8parse" version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + [[package]] name = "vte" version = "0.11.1" @@ -838,7 +1295,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.26", + "syn 2.0.27", "wasm-bindgen-shared", ] @@ -860,7 +1317,7 @@ checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.26", + "syn 2.0.27", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -967,3 +1424,9 @@ name = "windows_x86_64_msvc" version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" + +[[package]] +name = "yansi" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec" diff --git a/rust/Cargo.toml b/rust/Cargo.toml new file mode 100644 index 00000000000..2a2b582cea8 --- /dev/null +++ b/rust/Cargo.toml @@ -0,0 +1,12 @@ +# workspace is required to vendor crates for all packages. +[workspace] +members = [ + "BLAKE3", + "skim", + "prql", +] +resolver = "2" + +# FIXME: even though the profiles should be defined in the main cargo config we +# cannot do this yet, since we compile each package separatelly, so you should +# ignore warning from cargo about this. diff --git a/rust/prql/Cargo.lock b/rust/prql/Cargo.lock deleted file mode 100644 index da94e4ca852..00000000000 --- a/rust/prql/Cargo.lock +++ /dev/null @@ -1,569 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -version = 3 - -[[package]] -name = "_ch_rust_prql" -version = "0.1.0" -dependencies = [ - "prql-compiler", - "serde_json", -] - -[[package]] -name = "addr2line" -version = "0.20.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4fa78e18c64fce05e902adecd7a5eed15a5e0a3439f7b0e169f0252214865e3" -dependencies = [ - "gimli", -] - -[[package]] -name = "adler" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" - -[[package]] -name = "ahash" -version = "0.7.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47" -dependencies = [ - "getrandom", - "once_cell", - "version_check", -] - -[[package]] -name = "aho-corasick" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43f6cb1bf222025340178f382c426f13757b2960e89779dfcb319c32542a5a41" -dependencies = [ - "memchr", -] - -[[package]] -name = "anyhow" -version = "1.0.71" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c7d0618f0e0b7e8ff11427422b64564d5fb0be1940354bfe2e0529b18a9d9b8" -dependencies = [ - "backtrace", -] - -[[package]] -name = "ariadne" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "367fd0ad87307588d087544707bc5fbf4805ded96c7db922b70d368fa1cb5702" -dependencies = [ - "unicode-width", - "yansi", -] - -[[package]] -name = "backtrace" -version = "0.3.68" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4319208da049c43661739c5fade2ba182f09d1dc2299b32298d3a31692b17e12" -dependencies = [ - "addr2line", - "cc", - "cfg-if", - "libc", - "miniz_oxide", - "object", - "rustc-demangle", -] - -[[package]] -name = "cc" -version = "1.0.79" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f" - -[[package]] -name = "cfg-if" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" - -[[package]] -name = "chumsky" -version = "0.9.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23170228b96236b5a7299057ac284a321457700bc8c41a4476052f0f4ba5349d" -dependencies = [ - "hashbrown 0.12.3", - "stacker", -] - -[[package]] -name = "csv" -version = "1.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "626ae34994d3d8d668f4269922248239db4ae42d538b14c398b74a52208e8086" -dependencies = [ - "csv-core", - "itoa", - "ryu", - "serde", -] - -[[package]] -name = "csv-core" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" -dependencies = [ - "memchr", -] - -[[package]] -name = "either" -version = "1.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91" - -[[package]] -name = "enum-as-inner" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c9720bba047d567ffc8a3cba48bf19126600e249ab7f128e9233e6376976a116" -dependencies = [ - "heck", - "proc-macro2", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "equivalent" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88bffebc5d80432c9b140ee17875ff173a8ab62faad5b257da912bd2f6c1c0a1" - -[[package]] -name = "getrandom" -version = "0.2.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" -dependencies = [ - "cfg-if", - "libc", - "wasi", -] - -[[package]] -name = "gimli" -version = "0.27.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6c80984affa11d98d1b88b66ac8853f143217b399d3c74116778ff8fdb4ed2e" - -[[package]] -name = "hashbrown" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" -dependencies = [ - "ahash", -] - -[[package]] -name = "hashbrown" -version = "0.14.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a" - -[[package]] -name = "heck" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" - -[[package]] -name = "indexmap" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d5477fe2230a79769d8dc68e0eabf5437907c0457a5614a9e8dddb67f65eb65d" -dependencies = [ - "equivalent", - "hashbrown 0.14.0", -] - -[[package]] -name = "itertools" -version = "0.10.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" -dependencies = [ - "either", -] - -[[package]] -name = "itoa" -version = "1.0.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62b02a5381cc465bd3041d84623d0fa3b66738b52b8e2fc3bab8ad63ab032f4a" - -[[package]] -name = "lazy_static" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" - -[[package]] -name = "libc" -version = "0.2.147" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3" - -[[package]] -name = "log" -version = "0.4.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b06a4cde4c0f271a446782e3eff8de789548ce57dbc8eca9292c27f4a42004b4" - -[[package]] -name = "memchr" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" - -[[package]] -name = "minimal-lexical" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" - -[[package]] -name = "miniz_oxide" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7" -dependencies = [ - "adler", -] - -[[package]] -name = "nom" -version = "7.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" -dependencies = [ - "memchr", - "minimal-lexical", -] - -[[package]] -name = "object" -version = "0.31.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bda667d9f2b5051b8833f59f3bf748b28ef54f850f4fcb389a252aa383866d1" -dependencies = [ - "memchr", -] - -[[package]] -name = "once_cell" -version = "1.18.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" - -[[package]] -name = "proc-macro2" -version = "1.0.63" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b368fba921b0dce7e60f5e04ec15e565b3303972b42bcfde1d0713b881959eb" -dependencies = [ - "unicode-ident", -] - -[[package]] -name = "prql-compiler" -version = "0.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c99b52154002ac7f286dd2293c2f8d4e30526c1d396b14deef5ada1deef3c9ff" -dependencies = [ - "anyhow", - "ariadne", - "chumsky", - "csv", - "enum-as-inner", - "itertools", - "lazy_static", - "log", - "once_cell", - "regex", - "semver", - "serde", - "serde_json", - "serde_yaml", - "sqlformat", - "sqlparser", - "strum", - "strum_macros", -] - -[[package]] -name = "psm" -version = "0.1.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5787f7cda34e3033a72192c018bc5883100330f362ef279a8cbccfce8bb4e874" -dependencies = [ - "cc", -] - -[[package]] -name = "quote" -version = "1.0.29" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "573015e8ab27661678357f27dc26460738fd2b6c86e46f386fde94cb5d913105" -dependencies = [ - "proc-macro2", -] - -[[package]] -name = "regex" -version = "1.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89089e897c013b3deb627116ae56a6955a72b8bed395c9526af31c9fe528b484" -dependencies = [ - "aho-corasick", - "memchr", - "regex-automata", - "regex-syntax", -] - -[[package]] -name = "regex-automata" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa250384981ea14565685dea16a9ccc4d1c541a13f82b9c168572264d1df8c56" -dependencies = [ - "aho-corasick", - "memchr", - "regex-syntax", -] - -[[package]] -name = "regex-syntax" -version = "0.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ab07dc67230e4a4718e70fd5c20055a4334b121f1f9db8fe63ef39ce9b8c846" - -[[package]] -name = "rustc-demangle" -version = "0.1.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" - -[[package]] -name = "rustversion" -version = "1.0.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc31bd9b61a32c31f9650d18add92aa83a49ba979c143eefd27fe7177b05bd5f" - -[[package]] -name = "ryu" -version = "1.0.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe232bdf6be8c8de797b22184ee71118d63780ea42ac85b61d1baa6d3b782ae9" - -[[package]] -name = "semver" -version = "1.0.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bebd363326d05ec3e2f532ab7660680f3b02130d780c299bca73469d521bc0ed" -dependencies = [ - "serde", -] - -[[package]] -name = "serde" -version = "1.0.166" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d01b7404f9d441d3ad40e6a636a7782c377d2abdbe4fa2440e2edcc2f4f10db8" -dependencies = [ - "serde_derive", -] - -[[package]] -name = "serde_derive" -version = "1.0.166" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5dd83d6dde2b6b2d466e14d9d1acce8816dedee94f735eac6395808b3483c6d6" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.23", -] - -[[package]] -name = "serde_json" -version = "1.0.100" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f1e14e89be7aa4c4b78bdbdc9eb5bf8517829a600ae8eaa39a6e1d960b5185c" -dependencies = [ - "itoa", - "ryu", - "serde", -] - -[[package]] -name = "serde_yaml" -version = "0.9.22" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "452e67b9c20c37fa79df53201dc03839651086ed9bbe92b3ca585ca9fdaa7d85" -dependencies = [ - "indexmap", - "itoa", - "ryu", - "serde", - "unsafe-libyaml", -] - -[[package]] -name = "sqlformat" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c12bc9199d1db8234678b7051747c07f517cdcf019262d1847b94ec8b1aee3e" -dependencies = [ - "itertools", - "nom", - "unicode_categories", -] - -[[package]] -name = "sqlparser" -version = "0.33.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "355dc4d4b6207ca8a3434fc587db0a8016130a574dbcdbfb93d7f7b5bc5b211a" -dependencies = [ - "log", - "serde", -] - -[[package]] -name = "stacker" -version = "0.1.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c886bd4480155fd3ef527d45e9ac8dd7118a898a46530b7b94c3e21866259fce" -dependencies = [ - "cc", - "cfg-if", - "libc", - "psm", - "winapi", -] - -[[package]] -name = "strum" -version = "0.24.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f" -dependencies = [ - "strum_macros", -] - -[[package]] -name = "strum_macros" -version = "0.24.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59" -dependencies = [ - "heck", - "proc-macro2", - "quote", - "rustversion", - "syn 1.0.109", -] - -[[package]] -name = "syn" -version = "1.0.109" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "syn" -version = "2.0.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59fb7d6d8281a51045d62b8eb3a7d1ce347b76f312af50cd3dc0af39c87c1737" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "unicode-ident" -version = "1.0.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22049a19f4a68748a168c0fc439f9516686aa045927ff767eca0a85101fb6e73" - -[[package]] -name = "unicode-width" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" - -[[package]] -name = "unicode_categories" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e" - -[[package]] -name = "unsafe-libyaml" -version = "0.2.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1865806a559042e51ab5414598446a5871b561d21b6764f2eabb0dd481d880a6" - -[[package]] -name = "version_check" -version = "0.9.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" - -[[package]] -name = "wasi" -version = "0.11.0+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" - -[[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - -[[package]] -name = "yansi" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec" diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index 16a58a90dcf..fff2975cea4 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -8,6 +8,7 @@ import shutil import subprocess import time import sys +from glob import glob from pathlib import Path from typing import Any, Dict, List, Optional, Set, Tuple, Union @@ -31,6 +32,17 @@ TEMP_PATH = os.path.join(RUNNER_TEMP, "docker_images_check") ImagesDict = Dict[str, dict] +# workaround for mypy issue [1]: +# +# "Argument 1 to "map" has incompatible type overloaded function" [1] +# +# [1]: https://github.com/python/mypy/issues/9864 +# +# NOTE: simply lambda will do the trick as well, but pylint will not like it +def realpath(*args, **kwargs): + return os.path.realpath(*args, **kwargs) + + class DockerImage: def __init__( self, @@ -111,8 +123,23 @@ def get_changed_docker_images( changed_images = [] for dockerfile_dir, image_description in images_dict.items(): + source_dir = GITHUB_WORKSPACE.rstrip("/") + "/" + dockerfile_files = glob(f"{source_dir}/{dockerfile_dir}/**", recursive=True) + # resolve symlinks + dockerfile_files = list(map(realpath, dockerfile_files)) + # trim prefix to get relative path again, to match with files_changed + dockerfile_files = list(map(lambda x: x[len(source_dir) :], dockerfile_files)) + logging.info( + "Docker %s (source_dir=%s) build context for PR %s @ %s: %s", + dockerfile_dir, + source_dir, + pr_info.number, + pr_info.sha, + str(dockerfile_files), + ) + for f in files_changed: - if f.startswith(dockerfile_dir): + if f in dockerfile_files: name = image_description["name"] only_amd64 = image_description.get("only_amd64", False) logging.info( @@ -245,6 +272,8 @@ def build_and_push_one_image( cache_from = f"{cache_from} --cache-from type=registry,ref={image.repo}:{tag}" cmd = ( + # tar is requried to follow symlinks, since docker-build cannot do this + f"tar -v --exclude-vcs-ignores --show-transformed-names --transform 's#{image.full_path.lstrip('/')}#./#' --dereference --create {image.full_path} | " "docker buildx build --builder default " f"--label build-url={GITHUB_RUN_URL} " f"{from_tag_arg}" @@ -254,7 +283,7 @@ def build_and_push_one_image( f"{cache_from} " f"--cache-to type=inline,mode=max " f"{push_arg}" - f"--progress plain {image.full_path}" + f"--progress plain -" ) logging.info("Docker command to run: %s", cmd) with TeePopen(cmd, build_log) as proc: diff --git a/tests/ci/docker_test.py b/tests/ci/docker_test.py index d5d27f73694..c679ab984ee 100644 --- a/tests/ci/docker_test.py +++ b/tests/ci/docker_test.py @@ -126,12 +126,13 @@ class TestDockerImageCheck(unittest.TestCase): mock_popen.assert_called_once() mock_machine.assert_not_called() self.assertIn( + "tar -v --exclude-vcs-ignores --show-transformed-names --transform 's#path#./#' --dereference --create path | " f"docker buildx build --builder default --label build-url={GITHUB_RUN_URL} " "--build-arg FROM_TAG=version " f"--build-arg CACHE_INVALIDATOR={GITHUB_RUN_URL} " "--tag name:version --cache-from type=registry,ref=name:version " "--cache-from type=registry,ref=name:latest " - "--cache-to type=inline,mode=max --push --progress plain path", + "--cache-to type=inline,mode=max --push --progress plain -", mock_popen.call_args.args, ) self.assertTrue(result) @@ -143,12 +144,13 @@ class TestDockerImageCheck(unittest.TestCase): mock_popen.assert_called_once() mock_machine.assert_not_called() self.assertIn( + "tar -v --exclude-vcs-ignores --show-transformed-names --transform 's#path#./#' --dereference --create path | " f"docker buildx build --builder default --label build-url={GITHUB_RUN_URL} " "--build-arg FROM_TAG=version2 " f"--build-arg CACHE_INVALIDATOR={GITHUB_RUN_URL} " "--tag name:version2 --cache-from type=registry,ref=name:version2 " "--cache-from type=registry,ref=name:latest " - "--cache-to type=inline,mode=max --progress plain path", + "--cache-to type=inline,mode=max --progress plain -", mock_popen.call_args.args, ) self.assertTrue(result) @@ -160,11 +162,12 @@ class TestDockerImageCheck(unittest.TestCase): mock_popen.assert_called_once() mock_machine.assert_not_called() self.assertIn( + "tar -v --exclude-vcs-ignores --show-transformed-names --transform 's#path#./#' --dereference --create path | " f"docker buildx build --builder default --label build-url={GITHUB_RUN_URL} " f"--build-arg CACHE_INVALIDATOR={GITHUB_RUN_URL} " "--tag name:version2 --cache-from type=registry,ref=name:version2 " "--cache-from type=registry,ref=name:latest " - "--cache-to type=inline,mode=max --progress plain path", + "--cache-to type=inline,mode=max --progress plain -", mock_popen.call_args.args, ) self.assertFalse(result) @@ -178,13 +181,14 @@ class TestDockerImageCheck(unittest.TestCase): mock_popen.assert_called_once() mock_machine.assert_not_called() self.assertIn( + "tar -v --exclude-vcs-ignores --show-transformed-names --transform 's#path#./#' --dereference --create path | " f"docker buildx build --builder default --label build-url={GITHUB_RUN_URL} " f"--build-arg CACHE_INVALIDATOR={GITHUB_RUN_URL} " "--tag name:version2 --cache-from type=registry,ref=name:version2 " "--cache-from type=registry,ref=name:latest " "--cache-from type=registry,ref=name:cached-version " "--cache-from type=registry,ref=name:another-cached " - "--cache-to type=inline,mode=max --progress plain path", + "--cache-to type=inline,mode=max --progress plain -", mock_popen.call_args.args, ) self.assertFalse(result) From dc7c66396223329021641372c9156261edce5f99 Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Sat, 22 Jul 2023 23:44:11 +0200 Subject: [PATCH 302/478] Update comment in DatabaseCatalog.cpp --- src/Interpreters/DatabaseCatalog.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index f9ed2c0d5ca..0d74e86a26d 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -344,7 +344,7 @@ DatabaseAndTable DatabaseCatalog::getTableImpl( DatabasePtr database; { std::lock_guard lock{databases_mutex}; - // hasDatabase() to avod getDatabaseName() throwing exception if database is empty. + // Callers assume that this method doesn't throw the exceptions, but getDatabaseName() can if there is no database part. auto it = table_id.hasDatabase() ? databases.find(table_id.getDatabaseName()) : databases.end(); if (databases.end() == it) { From 12065d94c5e35c51e3a94c1919f1a38f4723d272 Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Sat, 22 Jul 2023 23:59:02 +0200 Subject: [PATCH 303/478] Update comment DatabaseCatalog.cpp --- src/Interpreters/DatabaseCatalog.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 0d74e86a26d..c8f332ae76d 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -344,7 +344,7 @@ DatabaseAndTable DatabaseCatalog::getTableImpl( DatabasePtr database; { std::lock_guard lock{databases_mutex}; - // Callers assume that this method doesn't throw the exceptions, but getDatabaseName() can if there is no database part. + // Callers assume that this method doesn't throw exceptions, but getDatabaseName() will throw if there is no database part. auto it = table_id.hasDatabase() ? databases.find(table_id.getDatabaseName()) : databases.end(); if (databases.end() == it) { From ef0dca626142322fa5420eea8fab491bb53c4ac2 Mon Sep 17 00:00:00 2001 From: Han Fei Date: Sun, 23 Jul 2023 00:37:34 +0200 Subject: [PATCH 304/478] fix style --- src/Common/OptimizedRegularExpression.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/OptimizedRegularExpression.cpp b/src/Common/OptimizedRegularExpression.cpp index 918ebd75fc0..e636b0b987d 100644 --- a/src/Common/OptimizedRegularExpression.cpp +++ b/src/Common/OptimizedRegularExpression.cpp @@ -433,7 +433,7 @@ try for (auto & lit : alternative_literals) alternatives.push_back(std::move(lit.literal)); } -catch(...) +catch (...) { required_substring = ""; is_trivial = false; From 9bd8bdca98d21605f10d172b76c80951f990d965 Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Sun, 23 Jul 2023 01:14:26 +0200 Subject: [PATCH 305/478] Better error message in case of empty database name --- src/Interpreters/DatabaseCatalog.cpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index c8f332ae76d..f54b0e0ab3a 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -343,9 +343,17 @@ DatabaseAndTable DatabaseCatalog::getTableImpl( DatabasePtr database; { - std::lock_guard lock{databases_mutex}; // Callers assume that this method doesn't throw exceptions, but getDatabaseName() will throw if there is no database part. - auto it = table_id.hasDatabase() ? databases.find(table_id.getDatabaseName()) : databases.end(); + // So, fail early and gracefully... + if (!table_id.hasDatabase()) + { + if (exception) + exception->emplace(Exception(ErrorCodes::UNKNOWN_DATABASE, "Empty database name")); + return {}; + } + + std::lock_guard lock{databases_mutex}; + auto it = databases.find(table_id.getDatabaseName()); if (databases.end() == it) { if (exception) From 00d6f2ee08a3e442363a078b322adab7b6988f91 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 23 Jul 2023 04:56:47 +0200 Subject: [PATCH 306/478] Use incbin for resources, part 1 --- .gitmodules | 3 + contrib/CMakeLists.txt | 2 +- contrib/incbin | 1 + contrib/incbin-cmake/CMakeLists.txt | 4 + contrib/nlp-data-cmake/CMakeLists.txt | 15 -- programs/install/CMakeLists.txt | 3 + programs/install/Install.cpp | 13 +- programs/keeper/CMakeLists.txt | 15 -- programs/keeper/Keeper.cpp | 6 +- programs/server/CMakeLists.txt | 12 +- programs/server/Server.cpp | 11 +- programs/server/resources.cpp | 0 src/CMakeLists.txt | 6 +- src/Common/CMakeLists.txt | 2 +- src/Common/Config/ConfigProcessor.cpp | 33 ++-- src/Common/Config/ConfigProcessor.h | 3 + src/Common/FrequencyHolder.cpp | 181 ++++++++++++++++++ src/Common/FrequencyHolder.h | 170 +--------------- src/Daemon/BaseDaemon.cpp | 1 - src/Server/WebUIRequestHandler.cpp | 14 +- src/Storages/System/CMakeLists.txt | 12 +- .../System/attachInformationSchemaTables.cpp | 24 ++- 22 files changed, 268 insertions(+), 263 deletions(-) create mode 160000 contrib/incbin create mode 100644 contrib/incbin-cmake/CMakeLists.txt delete mode 100644 contrib/nlp-data-cmake/CMakeLists.txt create mode 100644 programs/server/resources.cpp create mode 100644 src/Common/FrequencyHolder.cpp diff --git a/.gitmodules b/.gitmodules index ba71a8ae3a7..30085fb8dd4 100644 --- a/.gitmodules +++ b/.gitmodules @@ -340,3 +340,6 @@ [submodule "contrib/c-ares"] path = contrib/c-ares url = https://github.com/c-ares/c-ares.git +[submodule "contrib/incbin"] + path = contrib/incbin + url = https://github.com/graphitemaster/incbin.git diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 0f68c0cbc7c..fdf6e60e58f 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -164,13 +164,13 @@ add_contrib (libpq-cmake libpq) add_contrib (nuraft-cmake NuRaft) add_contrib (fast_float-cmake fast_float) add_contrib (datasketches-cpp-cmake datasketches-cpp) +add_contrib (incbin-cmake incbin) option(ENABLE_NLP "Enable NLP functions support" ${ENABLE_LIBRARIES}) if (ENABLE_NLP) add_contrib (libstemmer-c-cmake libstemmer_c) add_contrib (wordnet-blast-cmake wordnet-blast) add_contrib (lemmagen-c-cmake lemmagen-c) - add_contrib (nlp-data-cmake nlp-data) add_contrib (cld2-cmake cld2) endif() diff --git a/contrib/incbin b/contrib/incbin new file mode 160000 index 00000000000..6e576cae5ab --- /dev/null +++ b/contrib/incbin @@ -0,0 +1 @@ +Subproject commit 6e576cae5ab5810f25e2631f2e0b80cbe7dc8cbf diff --git a/contrib/incbin-cmake/CMakeLists.txt b/contrib/incbin-cmake/CMakeLists.txt new file mode 100644 index 00000000000..e64ebc99c73 --- /dev/null +++ b/contrib/incbin-cmake/CMakeLists.txt @@ -0,0 +1,4 @@ +set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/incbin") +add_library(_incbin INTERFACE) +target_include_directories(_incbin SYSTEM INTERFACE ${LIBRARY_DIR}) +add_library(ch_contrib::incbin ALIAS _incbin) diff --git a/contrib/nlp-data-cmake/CMakeLists.txt b/contrib/nlp-data-cmake/CMakeLists.txt deleted file mode 100644 index 5380269c479..00000000000 --- a/contrib/nlp-data-cmake/CMakeLists.txt +++ /dev/null @@ -1,15 +0,0 @@ -include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake) - -set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/nlp-data") - -add_library (_nlp_data INTERFACE) - -clickhouse_embed_binaries( - TARGET nlp_dictionaries - RESOURCE_DIR "${LIBRARY_DIR}" - RESOURCES charset.zst tonality_ru.zst programming.zst -) - -add_dependencies(_nlp_data nlp_dictionaries) -target_link_libraries(_nlp_data INTERFACE "-Wl,${WHOLE_ARCHIVE} $ -Wl,${NO_WHOLE_ARCHIVE}") -add_library(ch_contrib::nlp_data ALIAS _nlp_data) diff --git a/programs/install/CMakeLists.txt b/programs/install/CMakeLists.txt index c3f4d96d631..f3f562bab7c 100644 --- a/programs/install/CMakeLists.txt +++ b/programs/install/CMakeLists.txt @@ -10,3 +10,6 @@ set (CLICKHOUSE_INSTALL_LINK ) clickhouse_program_add_library(install) + +# For incbin +target_include_directories(clickhouse-install-lib PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/../server") diff --git a/programs/install/Install.cpp b/programs/install/Install.cpp index d83e189f7ef..da2c95af62c 100644 --- a/programs/install/Install.cpp +++ b/programs/install/Install.cpp @@ -20,10 +20,7 @@ #include #include #include -#include -#include #include -#include #include #include #include @@ -35,6 +32,12 @@ #include +#include + +/// Embedded configuration files used inside the install program +INCBIN(resource_config_xml, "config.xml"); +INCBIN(resource_users_xml, "users.xml"); + /** This tool can be used to install ClickHouse without a deb/rpm/tgz package, having only "clickhouse" binary. * It also allows to avoid dependency on systemd, upstart, SysV init. @@ -560,7 +563,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv) if (!fs::exists(main_config_file)) { - std::string_view main_config_content = getResource("config.xml"); + std::string_view main_config_content(reinterpret_cast(gresource_config_xmlData), gresource_config_xmlSize); if (main_config_content.empty()) { fmt::print("There is no default config.xml, you have to download it and place to {}.\n", main_config_file.string()); @@ -672,7 +675,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv) if (!fs::exists(users_config_file)) { - std::string_view users_config_content = getResource("users.xml"); + std::string_view users_config_content(reinterpret_cast(gresource_users_xmlData), gresource_users_xmlSize); if (users_config_content.empty()) { fmt::print("There is no default users.xml, you have to download it and place to {}.\n", users_config_file.string()); diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index 940e6848597..317e35959aa 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -1,16 +1,3 @@ -include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake) - -if (OS_LINUX) - set (LINK_RESOURCE_LIB INTERFACE "-Wl,${WHOLE_ARCHIVE} $ -Wl,${NO_WHOLE_ARCHIVE}") - # for some reason INTERFACE linkage doesn't work for standalone binary - set (LINK_RESOURCE_LIB_STANDALONE_KEEPER "-Wl,${WHOLE_ARCHIVE} $ -Wl,${NO_WHOLE_ARCHIVE}") -endif () - -clickhouse_embed_binaries( - TARGET clickhouse_keeper_configs - RESOURCES keeper_config.xml keeper_embedded.xml -) - set(CLICKHOUSE_KEEPER_SOURCES Keeper.cpp ) @@ -29,7 +16,6 @@ set (CLICKHOUSE_KEEPER_LINK clickhouse_program_add(keeper) install(FILES keeper_config.xml DESTINATION "${CLICKHOUSE_ETC_DIR}/clickhouse-keeper" COMPONENT clickhouse-keeper) -add_dependencies(clickhouse-keeper-lib clickhouse_keeper_configs) if (BUILD_STANDALONE_KEEPER) # Straight list of all required sources @@ -215,7 +201,6 @@ if (BUILD_STANDALONE_KEEPER) ${LINK_RESOURCE_LIB_STANDALONE_KEEPER} ) - add_dependencies(clickhouse-keeper clickhouse_keeper_configs) set_target_properties(clickhouse-keeper PROPERTIES RUNTIME_OUTPUT_DIRECTORY ../) if (SPLIT_DEBUG_SYMBOLS) diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index 6034d63a016..a38467c3369 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -457,8 +457,10 @@ try const std::string key_path = config().getString("openSSL.server.privateKeyFile", ""); std::vector extra_paths = {include_from_path}; - if (!cert_path.empty()) extra_paths.emplace_back(cert_path); - if (!key_path.empty()) extra_paths.emplace_back(key_path); + if (!cert_path.empty()) + extra_paths.emplace_back(cert_path); + if (!key_path.empty()) + extra_paths.emplace_back(key_path); /// ConfigReloader have to strict parameters which are redundant in our case auto main_config_reloader = std::make_unique( diff --git a/programs/server/CMakeLists.txt b/programs/server/CMakeLists.txt index 855973d10e1..e008e65acf6 100644 --- a/programs/server/CMakeLists.txt +++ b/programs/server/CMakeLists.txt @@ -1,12 +1,8 @@ -include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake) - set(CLICKHOUSE_SERVER_SOURCES MetricsTransmitter.cpp Server.cpp ) -set (LINK_RESOURCE_LIB INTERFACE "-Wl,${WHOLE_ARCHIVE} $ -Wl,${NO_WHOLE_ARCHIVE}") - set (CLICKHOUSE_SERVER_LINK PRIVATE clickhouse_aggregate_functions @@ -33,10 +29,4 @@ endif() clickhouse_program_add(server) -install(FILES config.xml users.xml DESTINATION "${CLICKHOUSE_ETC_DIR}/clickhouse-server" COMPONENT clickhouse) - -clickhouse_embed_binaries( - TARGET clickhouse_server_configs - RESOURCES config.xml users.xml embedded.xml play.html dashboard.html js/uplot.js -) -add_dependencies(clickhouse-server-lib clickhouse_server_configs) +target_include_directories(clickhouse-server-lib PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 33fdcc9c1a8..229a169dc1e 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -128,6 +128,10 @@ # include #endif +#include +/// A minimal file used when the server is run without installation +INCBIN(resource_embedded_xml, "embedded.xml"); + namespace CurrentMetrics { extern const Metric Revision; @@ -393,6 +397,7 @@ int Server::run() void Server::initialize(Poco::Util::Application & self) { + ConfigProcessor::registerEmbeddedConfig("config.xml", std::string_view(reinterpret_cast(gresource_embedded_xmlData), gresource_embedded_xmlSize)); BaseDaemon::initialize(self); logger().information("starting up"); @@ -1105,8 +1110,10 @@ try const std::string key_path = config().getString("openSSL.server.privateKeyFile", ""); std::vector extra_paths = {include_from_path}; - if (!cert_path.empty()) extra_paths.emplace_back(cert_path); - if (!key_path.empty()) extra_paths.emplace_back(key_path); + if (!cert_path.empty()) + extra_paths.emplace_back(cert_path); + if (!key_path.empty()) + extra_paths.emplace_back(key_path); auto main_config_reloader = std::make_unique( config_path, diff --git a/programs/server/resources.cpp b/programs/server/resources.cpp new file mode 100644 index 00000000000..e69de29bb2d diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f870993f080..fda8bafde59 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -210,7 +210,7 @@ if (TARGET ch_contrib::jemalloc) target_link_libraries (clickhouse_storages_system PRIVATE ch_contrib::jemalloc) endif() -target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::sparsehash) +target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::sparsehash ch_contrib::incbin) add_subdirectory(Access/Common) add_subdirectory(Common/ZooKeeper) @@ -296,7 +296,7 @@ macro (dbms_target_include_directories) endforeach () endmacro () -dbms_target_include_directories (PUBLIC "${ClickHouse_SOURCE_DIR}/src" "${ClickHouse_BINARY_DIR}/src") +dbms_target_include_directories (PUBLIC "${ClickHouse_SOURCE_DIR}/src" "${ClickHouse_BINARY_DIR}/src" "${ClickHouse_SOURCE_DIR}/programs/server") target_include_directories (clickhouse_common_io PUBLIC "${ClickHouse_SOURCE_DIR}/src" "${ClickHouse_BINARY_DIR}/src") if (TARGET ch_contrib::llvm) @@ -561,7 +561,7 @@ if (ENABLE_NLP) dbms_target_link_libraries (PUBLIC ch_contrib::stemmer) dbms_target_link_libraries (PUBLIC ch_contrib::wnb) dbms_target_link_libraries (PUBLIC ch_contrib::lemmagen) - dbms_target_link_libraries (PUBLIC ch_contrib::nlp_data) + target_include_directories(clickhouse_common_io PUBLIC ${CMAKE_SOURCE_DIR}/contrib/nlp-data) endif() if (TARGET ch_contrib::ulid) diff --git a/src/Common/CMakeLists.txt b/src/Common/CMakeLists.txt index e527b3dec43..b83c8431f0a 100644 --- a/src/Common/CMakeLists.txt +++ b/src/Common/CMakeLists.txt @@ -9,5 +9,5 @@ if (ENABLE_EXAMPLES) endif() if (ENABLE_MYSQL) - add_subdirectory (mysqlxx) + add_subdirectory(mysqlxx) endif () diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp index 5bbc8eae0de..c3a8f69cf3f 100644 --- a/src/Common/Config/ConfigProcessor.cpp +++ b/src/Common/Config/ConfigProcessor.cpp @@ -83,6 +83,13 @@ ConfigProcessor::~ConfigProcessor() Poco::Logger::destroy("ConfigProcessor"); } +static std::unordered_map embedded_configs; + +void ConfigProcessor::registerEmbeddedConfig(std::string name, std::string_view content) +{ + embedded_configs[name] = content; +} + /// Vector containing the name of the element and a sorted list of attribute names and values /// (except "remove" and "replace" attributes). @@ -281,15 +288,15 @@ void ConfigProcessor::doIncludesRecursive( { std::string value = node->nodeValue(); - bool replace_occured = false; + bool replace_occurred = false; size_t pos; while ((pos = value.find(substitution.first)) != std::string::npos) { value.replace(pos, substitution.first.length(), substitution.second); - replace_occured = true; + replace_occurred = true; } - if (replace_occured) + if (replace_occurred) node->setNodeValue(value); } } @@ -528,26 +535,14 @@ XMLDocumentPtr ConfigProcessor::processConfig( } else { - /// These embedded files added during build with some cmake magic. - /// Look at the end of programs/server/CMakeLists.txt. - std::string embedded_name; - if (path == "config.xml") - embedded_name = "embedded.xml"; - - if (path == "keeper_config.xml") - embedded_name = "keeper_embedded.xml"; - - /// When we can use config embedded in binary. - if (!embedded_name.empty()) + /// When we can use a config embedded in the binary. + if (auto it = embedded_configs.find(path); it != embedded_configs.end()) { - auto resource = getResource(embedded_name); - if (resource.empty()) - throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Configuration file {} doesn't exist and there is no embedded config", path); LOG_DEBUG(log, "There is no file '{}', will use embedded config.", path); - config = dom_parser.parseMemory(resource.data(), resource.size()); + config = dom_parser.parseMemory(it->second.data(), it->second.size()); } else - throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Configuration file {} doesn't exist", path); + throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Configuration file {} doesn't exist and there is no embedded config", path); } std::vector contributing_files; diff --git a/src/Common/Config/ConfigProcessor.h b/src/Common/Config/ConfigProcessor.h index 0ca3e46db88..eefe65ef06c 100644 --- a/src/Common/Config/ConfigProcessor.h +++ b/src/Common/Config/ConfigProcessor.h @@ -65,6 +65,9 @@ public: zkutil::ZooKeeperNodeCache * zk_node_cache = nullptr, const zkutil::EventPtr & zk_changed_event = nullptr); + /// These configurations will be used if there is no configuration file. + static void registerEmbeddedConfig(std::string name, std::string_view content); + /// loadConfig* functions apply processConfig and create Poco::Util::XMLConfiguration. /// The resulting XML document is saved into a file with the name diff --git a/src/Common/FrequencyHolder.cpp b/src/Common/FrequencyHolder.cpp new file mode 100644 index 00000000000..3b755cacacb --- /dev/null +++ b/src/Common/FrequencyHolder.cpp @@ -0,0 +1,181 @@ +#include + +#include + +/// Embedded SQL definitions +INCBIN(resource_charset_zst, "charset.zst"); +INCBIN(resource_tonality_ru_zst, "tonality_ru.zst"); +INCBIN(resource_programming_zst, "programming.zst"); + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int FILE_DOESNT_EXIST; +} + + +FrequencyHolder & FrequencyHolder::getInstance() +{ + static FrequencyHolder instance; + return instance; +} + +FrequencyHolder::FrequencyHolder() +{ + loadEmotionalDict(); + loadEncodingsFrequency(); + loadProgrammingFrequency(); +} + +void FrequencyHolder::loadEncodingsFrequency() +{ + Poco::Logger * log = &Poco::Logger::get("EncodingsFrequency"); + + LOG_TRACE(log, "Loading embedded charset frequencies"); + + std::string_view resource(reinterpret_cast(gresource_charset_zstData), gresource_charset_zstSize); + if (resource.empty()) + throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded charset frequencies"); + + String line; + UInt16 bigram; + Float64 frequency; + String charset_name; + + auto buf = std::make_unique(resource.data(), resource.size()); + ZstdInflatingReadBuffer in(std::move(buf)); + + while (!in.eof()) + { + readString(line, in); + in.ignore(); + + if (line.empty()) + continue; + + ReadBufferFromString buf_line(line); + + // Start loading a new charset + if (line.starts_with("// ")) + { + // Skip "// " + buf_line.ignore(3); + readString(charset_name, buf_line); + + /* In our dictionary we have lines with form: _ + * If we need to find language of data, we return + * If we need to find charset of data, we return . + */ + size_t sep = charset_name.find('_'); + + Encoding enc; + enc.lang = charset_name.substr(0, sep); + enc.name = charset_name.substr(sep + 1); + encodings_freq.push_back(std::move(enc)); + } + else + { + readIntText(bigram, buf_line); + buf_line.ignore(); + readFloatText(frequency, buf_line); + + encodings_freq.back().map[bigram] = frequency; + } + } + LOG_TRACE(log, "Charset frequencies was added, charsets count: {}", encodings_freq.size()); +} + +void FrequencyHolder::loadEmotionalDict() +{ + Poco::Logger * log = &Poco::Logger::get("EmotionalDict"); + LOG_TRACE(log, "Loading embedded emotional dictionary"); + + std::string_view resource(reinterpret_cast(gresource_tonality_ru_zstData), gresource_tonality_ru_zstSize); + if (resource.empty()) + throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded emotional dictionary"); + + String line; + String word; + Float64 tonality; + size_t count = 0; + + auto buf = std::make_unique(resource.data(), resource.size()); + ZstdInflatingReadBuffer in(std::move(buf)); + + while (!in.eof()) + { + readString(line, in); + in.ignore(); + + if (line.empty()) + continue; + + ReadBufferFromString buf_line(line); + + readStringUntilWhitespace(word, buf_line); + buf_line.ignore(); + readFloatText(tonality, buf_line); + + StringRef ref{string_pool.insert(word.data(), word.size()), word.size()}; + emotional_dict[ref] = tonality; + ++count; + } + LOG_TRACE(log, "Emotional dictionary was added. Word count: {}", std::to_string(count)); +} + +void FrequencyHolder::loadProgrammingFrequency() +{ + Poco::Logger * log = &Poco::Logger::get("ProgrammingFrequency"); + + LOG_TRACE(log, "Loading embedded programming languages frequencies loading"); + + std::string_view resource(reinterpret_cast(gresource_programming_zstData), gresource_programming_zstSize); + if (resource.empty()) + throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded programming languages frequencies"); + + String line; + String bigram; + Float64 frequency; + String programming_language; + + auto buf = std::make_unique(resource.data(), resource.size()); + ZstdInflatingReadBuffer in(std::move(buf)); + + while (!in.eof()) + { + readString(line, in); + in.ignore(); + + if (line.empty()) + continue; + + ReadBufferFromString buf_line(line); + + // Start loading a new language + if (line.starts_with("// ")) + { + // Skip "// " + buf_line.ignore(3); + readString(programming_language, buf_line); + + Language lang; + lang.name = programming_language; + programming_freq.push_back(std::move(lang)); + } + else + { + readStringUntilWhitespace(bigram, buf_line); + buf_line.ignore(); + readFloatText(frequency, buf_line); + + StringRef ref{string_pool.insert(bigram.data(), bigram.size()), bigram.size()}; + programming_freq.back().map[ref] = frequency; + } + } + LOG_TRACE(log, "Programming languages frequencies was added"); +} + +} diff --git a/src/Common/FrequencyHolder.h b/src/Common/FrequencyHolder.h index 74098598441..270e4dbbd2a 100644 --- a/src/Common/FrequencyHolder.h +++ b/src/Common/FrequencyHolder.h @@ -7,7 +7,6 @@ #include #include -#include #include #include #include @@ -20,11 +19,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int FILE_DOESNT_EXIST; -} - /// FrequencyHolder class is responsible for storing and loading dictionaries /// needed for text classification functions: /// @@ -56,11 +50,7 @@ public: using EncodingMap = HashMap; using EncodingContainer = std::vector; - static FrequencyHolder & getInstance() - { - static FrequencyHolder instance; - return instance; - } + static FrequencyHolder & getInstance(); const Map & getEmotionalDict() const { @@ -78,161 +68,11 @@ public: } private: + FrequencyHolder(); - FrequencyHolder() - { - loadEmotionalDict(); - loadEncodingsFrequency(); - loadProgrammingFrequency(); - } - - void loadEncodingsFrequency() - { - Poco::Logger * log = &Poco::Logger::get("EncodingsFrequency"); - - LOG_TRACE(log, "Loading embedded charset frequencies"); - - auto resource = getResource("charset.zst"); - if (resource.empty()) - throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded charset frequencies"); - - String line; - UInt16 bigram; - Float64 frequency; - String charset_name; - - auto buf = std::make_unique(resource.data(), resource.size()); - ZstdInflatingReadBuffer in(std::move(buf)); - - while (!in.eof()) - { - readString(line, in); - in.ignore(); - - if (line.empty()) - continue; - - ReadBufferFromString buf_line(line); - - // Start loading a new charset - if (line.starts_with("// ")) - { - // Skip "// " - buf_line.ignore(3); - readString(charset_name, buf_line); - - /* In our dictionary we have lines with form: _ - * If we need to find language of data, we return - * If we need to find charset of data, we return . - */ - size_t sep = charset_name.find('_'); - - Encoding enc; - enc.lang = charset_name.substr(0, sep); - enc.name = charset_name.substr(sep + 1); - encodings_freq.push_back(std::move(enc)); - } - else - { - readIntText(bigram, buf_line); - buf_line.ignore(); - readFloatText(frequency, buf_line); - - encodings_freq.back().map[bigram] = frequency; - } - } - LOG_TRACE(log, "Charset frequencies was added, charsets count: {}", encodings_freq.size()); - } - - void loadEmotionalDict() - { - Poco::Logger * log = &Poco::Logger::get("EmotionalDict"); - LOG_TRACE(log, "Loading embedded emotional dictionary"); - - auto resource = getResource("tonality_ru.zst"); - if (resource.empty()) - throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded emotional dictionary"); - - String line; - String word; - Float64 tonality; - size_t count = 0; - - auto buf = std::make_unique(resource.data(), resource.size()); - ZstdInflatingReadBuffer in(std::move(buf)); - - while (!in.eof()) - { - readString(line, in); - in.ignore(); - - if (line.empty()) - continue; - - ReadBufferFromString buf_line(line); - - readStringUntilWhitespace(word, buf_line); - buf_line.ignore(); - readFloatText(tonality, buf_line); - - StringRef ref{string_pool.insert(word.data(), word.size()), word.size()}; - emotional_dict[ref] = tonality; - ++count; - } - LOG_TRACE(log, "Emotional dictionary was added. Word count: {}", std::to_string(count)); - } - - void loadProgrammingFrequency() - { - Poco::Logger * log = &Poco::Logger::get("ProgrammingFrequency"); - - LOG_TRACE(log, "Loading embedded programming languages frequencies loading"); - - auto resource = getResource("programming.zst"); - if (resource.empty()) - throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "There is no embedded programming languages frequencies"); - - String line; - String bigram; - Float64 frequency; - String programming_language; - - auto buf = std::make_unique(resource.data(), resource.size()); - ZstdInflatingReadBuffer in(std::move(buf)); - - while (!in.eof()) - { - readString(line, in); - in.ignore(); - - if (line.empty()) - continue; - - ReadBufferFromString buf_line(line); - - // Start loading a new language - if (line.starts_with("// ")) - { - // Skip "// " - buf_line.ignore(3); - readString(programming_language, buf_line); - - Language lang; - lang.name = programming_language; - programming_freq.push_back(std::move(lang)); - } - else - { - readStringUntilWhitespace(bigram, buf_line); - buf_line.ignore(); - readFloatText(frequency, buf_line); - - StringRef ref{string_pool.insert(bigram.data(), bigram.size()), bigram.size()}; - programming_freq.back().map[ref] = frequency; - } - } - LOG_TRACE(log, "Programming languages frequencies was added"); - } + void loadEncodingsFrequency(); + void loadEmotionalDict(); + void loadProgrammingFrequency(); Arena string_pool; diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index 3852ec5ada5..f61ca054b2a 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -38,7 +38,6 @@ #include #include -#include #include #include #include diff --git a/src/Server/WebUIRequestHandler.cpp b/src/Server/WebUIRequestHandler.cpp index 3997e0f19b6..cb9e8935d8c 100644 --- a/src/Server/WebUIRequestHandler.cpp +++ b/src/Server/WebUIRequestHandler.cpp @@ -6,10 +6,16 @@ #include #include -#include #include +#include + +/// Embedded HTML pages +INCBIN(resource_play_html, "play.html"); +INCBIN(resource_dashboard_html, "dashboard.html"); +INCBIN(resource_uplot_js, "js/uplot.js"); + namespace DB { @@ -34,13 +40,13 @@ void WebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerR if (request.getURI().starts_with("/play")) { response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK); - *response.send() << getResource("play.html"); + *response.send() << std::string_view(reinterpret_cast(gresource_play_htmlData), gresource_play_htmlSize); } else if (request.getURI().starts_with("/dashboard")) { response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK); - std::string html(getResource("dashboard.html")); + std::string html(reinterpret_cast(gresource_dashboard_htmlData), gresource_dashboard_htmlSize); /// Replace a link to external JavaScript file to embedded file. /// This allows to open the HTML without running a server and to host it on server. @@ -55,7 +61,7 @@ void WebUIRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerR else if (request.getURI() == "/js/uplot.js") { response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_OK); - *response.send() << getResource("js/uplot.js"); + *response.send() << std::string_view(reinterpret_cast(gresource_uplot_jsData), gresource_uplot_jsSize); } else { diff --git a/src/Storages/System/CMakeLists.txt b/src/Storages/System/CMakeLists.txt index 1d2a3de5101..6b7d1739e33 100644 --- a/src/Storages/System/CMakeLists.txt +++ b/src/Storages/System/CMakeLists.txt @@ -43,18 +43,9 @@ list (APPEND storages_system_sources ${GENERATED_TIMEZONES_SRC}) # Overlength strings set_source_files_properties(${GENERATED_LICENSES_SRC} PROPERTIES COMPILE_FLAGS -w) -include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake) -clickhouse_embed_binaries( - TARGET information_schema_metadata - RESOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/InformationSchema/" - RESOURCES schemata.sql tables.sql views.sql columns.sql -) - list (SORT storages_system_sources) # Reproducible build add_library(clickhouse_storages_system ${storages_system_sources}) -add_dependencies(clickhouse_storages_system information_schema_metadata) - target_link_libraries(clickhouse_storages_system PRIVATE dbms common @@ -62,5 +53,6 @@ target_link_libraries(clickhouse_storages_system PRIVATE clickhouse_common_zookeeper clickhouse_parsers Poco::JSON - INTERFACE "-Wl,${WHOLE_ARCHIVE} $ -Wl,${NO_WHOLE_ARCHIVE}" ) + +target_include_directories(clickhouse_storages_system PRIVATE InformationSchema) diff --git a/src/Storages/System/attachInformationSchemaTables.cpp b/src/Storages/System/attachInformationSchemaTables.cpp index 61a91685324..bfc5c8c64e2 100644 --- a/src/Storages/System/attachInformationSchemaTables.cpp +++ b/src/Storages/System/attachInformationSchemaTables.cpp @@ -3,14 +3,21 @@ #include #include #include -#include +#include + +/// Embedded SQL definitions +INCBIN(resource_schemata_sql, "schemata.sql"); +INCBIN(resource_tables_sql, "tables.sql"); +INCBIN(resource_views_sql, "views.sql"); +INCBIN(resource_columns_sql, "columns.sql"); + namespace DB { /// View structures are taken from http://www.contrib.andrew.cmu.edu/~shadow/sql/sql1992.txt -static void createInformationSchemaView(ContextMutablePtr context, IDatabase & database, const String & view_name) +static void createInformationSchemaView(ContextMutablePtr context, IDatabase & database, const String & view_name, std::string_view query) { try { @@ -21,12 +28,11 @@ static void createInformationSchemaView(ContextMutablePtr context, IDatabase & d bool is_uppercase = database.getDatabaseName() == DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE; String metadata_resource_name = view_name + ".sql"; - auto attach_query = getResource(metadata_resource_name); - if (attach_query.empty()) + if (query.empty()) return; ParserCreateQuery parser; - ASTPtr ast = parseQuery(parser, attach_query.data(), attach_query.data() + attach_query.size(), + ASTPtr ast = parseQuery(parser, query.data(), query.data() + query.size(), "Attach query from embedded resource " + metadata_resource_name, DBMS_DEFAULT_MAX_QUERY_SIZE, DBMS_DEFAULT_MAX_PARSER_DEPTH); @@ -50,10 +56,10 @@ static void createInformationSchemaView(ContextMutablePtr context, IDatabase & d void attachInformationSchema(ContextMutablePtr context, IDatabase & information_schema_database) { - createInformationSchemaView(context, information_schema_database, "schemata"); - createInformationSchemaView(context, information_schema_database, "tables"); - createInformationSchemaView(context, information_schema_database, "views"); - createInformationSchemaView(context, information_schema_database, "columns"); + createInformationSchemaView(context, information_schema_database, "schemata", std::string_view(reinterpret_cast(gresource_schemata_sqlData), gresource_schemata_sqlSize)); + createInformationSchemaView(context, information_schema_database, "tables", std::string_view(reinterpret_cast(gresource_tables_sqlData), gresource_tables_sqlSize)); + createInformationSchemaView(context, information_schema_database, "views", std::string_view(reinterpret_cast(gresource_views_sqlData), gresource_views_sqlSize)); + createInformationSchemaView(context, information_schema_database, "columns", std::string_view(reinterpret_cast(gresource_columns_sqlData), gresource_columns_sqlSize)); } } From 4170d1458bdbccafe2f8cb2c671ee044b3efe9ba Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 23 Jul 2023 05:25:14 +0200 Subject: [PATCH 307/478] Use incbin for resources, part 2 --- cmake/embed_binary.cmake | 58 ------------------ contrib/cctz-cmake/CMakeLists.txt | 45 ++++++++------ src/Common/Config/ConfigProcessor.cpp | 1 - src/Common/DateLUTImpl.cpp | 17 ++++-- src/Common/SymbolIndex.cpp | 61 +++---------------- src/Common/SymbolIndex.h | 30 +-------- src/Common/getResource.cpp | 52 ---------------- src/Common/getResource.h | 7 --- src/Common/tests/gtest_DateLUTImpl.cpp | 14 ++--- .../System/StorageSystemTimeZones.cpp | 7 ++- 10 files changed, 58 insertions(+), 234 deletions(-) delete mode 100644 cmake/embed_binary.cmake delete mode 100644 src/Common/getResource.cpp delete mode 100644 src/Common/getResource.h diff --git a/cmake/embed_binary.cmake b/cmake/embed_binary.cmake deleted file mode 100644 index e5428c24939..00000000000 --- a/cmake/embed_binary.cmake +++ /dev/null @@ -1,58 +0,0 @@ -# Embed a set of resource files into a resulting object file. -# -# Signature: `clickhouse_embed_binaries(TARGET RESOURCE_DIR RESOURCES ...) -# -# This will generate a static library target named ``, which contains the contents of -# each `` file. The files should be located in ``. defaults to -# ${CMAKE_CURRENT_SOURCE_DIR}, and the resources may not be empty. -# -# Each resource will result in three symbols in the final archive, based on the name ``. -# These are: -# 1. `_binary__start`: Points to the start of the binary data from ``. -# 2. `_binary__end`: Points to the end of the binary data from ``. -# 2. `_binary__size`: Points to the size of the binary data from ``. -# -# `` is a normalized name derived from ``, by replacing the characters "./-" with -# the character "_", and the character "+" with "_PLUS_". This scheme is similar to those generated -# by `ld -r -b binary`, and matches the expectations in `./base/common/getResource.cpp`. -macro(clickhouse_embed_binaries) - set(one_value_args TARGET RESOURCE_DIR) - set(resources RESOURCES) - cmake_parse_arguments(EMBED "" "${one_value_args}" ${resources} ${ARGN}) - - if (NOT DEFINED EMBED_TARGET) - message(FATAL_ERROR "A target name must be provided for embedding binary resources into") - endif() - - if (NOT DEFINED EMBED_RESOURCE_DIR) - set(EMBED_RESOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") - endif() - - list(LENGTH EMBED_RESOURCES N_RESOURCES) - if (N_RESOURCES LESS 1) - message(FATAL_ERROR "The list of binary resources to embed may not be empty") - endif() - - add_library("${EMBED_TARGET}" STATIC) - set_target_properties("${EMBED_TARGET}" PROPERTIES LINKER_LANGUAGE C) - - set(EMBED_TEMPLATE_FILE "${PROJECT_SOURCE_DIR}/programs/embed_binary.S.in") - - foreach(RESOURCE_FILE ${EMBED_RESOURCES}) - set(ASSEMBLY_FILE_NAME "${RESOURCE_FILE}.S") - set(BINARY_FILE_NAME "${RESOURCE_FILE}") - - # Normalize the name of the resource. - string(REGEX REPLACE "[\./-]" "_" SYMBOL_NAME "${RESOURCE_FILE}") # - must be last in regex - string(REPLACE "+" "_PLUS_" SYMBOL_NAME "${SYMBOL_NAME}") - - # Generate the configured assembly file in the output directory. - configure_file("${EMBED_TEMPLATE_FILE}" "${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}" @ONLY) - - # Set the include directory for relative paths specified for `.incbin` directive. - set_property(SOURCE "${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}" APPEND PROPERTY INCLUDE_DIRECTORIES "${EMBED_RESOURCE_DIR}") - - target_sources("${EMBED_TARGET}" PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}") - set_target_properties("${EMBED_TARGET}" PROPERTIES OBJECT_DEPENDS "${RESOURCE_FILE}") - endforeach() -endmacro() diff --git a/contrib/cctz-cmake/CMakeLists.txt b/contrib/cctz-cmake/CMakeLists.txt index 10070fbd949..8aa3c7886db 100644 --- a/contrib/cctz-cmake/CMakeLists.txt +++ b/contrib/cctz-cmake/CMakeLists.txt @@ -1,4 +1,3 @@ -include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake) set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/cctz") set (SRCS @@ -23,12 +22,10 @@ if (OS_FREEBSD) endif () # Related to time_zones table: -# StorageSystemTimeZones.generated.cpp is autogenerated each time during a build -# data in this file will be used to populate the system.time_zones table, this is specific to OS_LINUX -# as the library that's built using embedded tzdata is also specific to OS_LINUX -set(SYSTEM_STORAGE_TZ_FILE "${PROJECT_BINARY_DIR}/src/Storages/System/StorageSystemTimeZones.generated.cpp") +# TimeZones.generated.cpp is autogenerated each time during a build +set(TIMEZONES_FILE "${CMAKE_CURRENT_BINARY_DIR}/TimeZones.generated.cpp") # remove existing copies so that its generated fresh on each build. -file(REMOVE ${SYSTEM_STORAGE_TZ_FILE}) +file(REMOVE ${TIMEZONES_FILE}) # get the list of timezones from tzdata shipped with cctz set(TZDIR "${LIBRARY_DIR}/testdata/zoneinfo") @@ -36,28 +33,36 @@ file(STRINGS "${LIBRARY_DIR}/testdata/version" TZDATA_VERSION) set_property(GLOBAL PROPERTY TZDATA_VERSION_PROP "${TZDATA_VERSION}") message(STATUS "Packaging with tzdata version: ${TZDATA_VERSION}") -set(TIMEZONE_RESOURCE_FILES) - # each file in that dir (except of tab and localtime) store the info about timezone execute_process(COMMAND bash -c "cd ${TZDIR} && find * -type f -and ! -name '*.tab' -and ! -name 'localtime' | LC_ALL=C sort | paste -sd ';' -" OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE TIMEZONES) -file(APPEND ${SYSTEM_STORAGE_TZ_FILE} "// autogenerated by ClickHouse/contrib/cctz-cmake/CMakeLists.txt\n") -file(APPEND ${SYSTEM_STORAGE_TZ_FILE} "const char * auto_time_zones[] {\n" ) +file(APPEND ${TIMEZONES_FILE} "// autogenerated by ClickHouse/contrib/cctz-cmake/CMakeLists.txt\n") +file(APPEND ${TIMEZONES_FILE} "#include \n") +set (COUNTER 1) foreach(TIMEZONE ${TIMEZONES}) - file(APPEND ${SYSTEM_STORAGE_TZ_FILE} " \"${TIMEZONE}\",\n") - list(APPEND TIMEZONE_RESOURCE_FILES "${TIMEZONE}") + file(APPEND ${TIMEZONES_FILE} "INCBIN(resource_timezone${COUNTER}, \"${TIMEZONE}\");\n") + MATH(EXPR COUNTER "${COUNTER}+1") endforeach(TIMEZONE) -file(APPEND ${SYSTEM_STORAGE_TZ_FILE} " nullptr};\n") -clickhouse_embed_binaries( - TARGET tzdata - RESOURCE_DIR "${TZDIR}" - RESOURCES ${TIMEZONE_RESOURCE_FILES} -) -add_dependencies(_cctz tzdata) -target_link_libraries(_cctz INTERFACE "-Wl,${WHOLE_ARCHIVE} $ -Wl,${NO_WHOLE_ARCHIVE}") + +file(APPEND ${TIMEZONES_FILE} "#include \n") +file(APPEND ${TIMEZONES_FILE} "struct TimeZone { const char * name; const unsigned char * data; size_t size; };\n") +file(APPEND ${TIMEZONES_FILE} "TimeZone auto_time_zones[] {\n" ) + +set (COUNTER 1) +foreach(TIMEZONE ${TIMEZONES}) + file(APPEND ${TIMEZONES_FILE} " {\"${TIMEZONE}\", gresource_timezone${COUNTER}Data, gresource_timezone${COUNTER}Size},\n") + MATH(EXPR COUNTER "${COUNTER}+1") +endforeach(TIMEZONE) + +file(APPEND ${TIMEZONES_FILE} " {nullptr, nullptr, 0}};\n") + +add_library (tzdata ${TIMEZONES_FILE}) +target_link_libraries(tzdata ch_contrib::incbin) +target_include_directories(tzdata PRIVATE ${TZDIR}) +target_link_libraries(_cctz tzdata) add_library(ch_contrib::cctz ALIAS _cctz) diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp index c3a8f69cf3f..bda181eceeb 100644 --- a/src/Common/Config/ConfigProcessor.cpp +++ b/src/Common/Config/ConfigProcessor.cpp @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Common/DateLUTImpl.cpp b/src/Common/DateLUTImpl.cpp index 8146b35cc5f..3619462e79b 100644 --- a/src/Common/DateLUTImpl.cpp +++ b/src/Common/DateLUTImpl.cpp @@ -3,7 +3,6 @@ #include #include #include -#include #include #include @@ -13,6 +12,10 @@ #include +/// Embedded timezones. +struct TimeZone { const char * name; const unsigned char * data; size_t size; }; +extern TimeZone auto_time_zones[]; + namespace { @@ -249,9 +252,15 @@ namespace cctz_extension const std::string & name, const std::function(const std::string & name)> & fallback) { - std::string_view resource = getResource(name); - if (!resource.empty()) - return std::make_unique(resource.data(), resource.size()); + const TimeZone * timezone = auto_time_zones; + while (timezone->name != nullptr) + { + if (timezone->name == name) + break; + ++timezone; + } + if (timezone->size) + return std::make_unique(reinterpret_cast(timezone->data), timezone->size); return fallback(name); } diff --git a/src/Common/SymbolIndex.cpp b/src/Common/SymbolIndex.cpp index cb02bb3ff75..ac406538033 100644 --- a/src/Common/SymbolIndex.cpp +++ b/src/Common/SymbolIndex.cpp @@ -87,50 +87,13 @@ namespace /// https://stackoverflow.com/questions/32088140/multiple-string-tables-in-elf-object -void updateResources(ElfW(Addr) base_address, std::string_view object_name, std::string_view name, const void * address, SymbolIndex::Resources & resources) -{ - const char * char_address = static_cast(address); - - if (name.starts_with("_binary_") || name.starts_with("binary_")) - { - if (name.ends_with("_start")) - { - name = name.substr((name[0] == '_') + strlen("binary_")); - name = name.substr(0, name.size() - strlen("_start")); - - auto & resource = resources[name]; - if (!resource.base_address || resource.base_address == base_address) - { - resource.base_address = base_address; - resource.start = std::string_view{char_address, 0}; // NOLINT(bugprone-string-constructor) - resource.object_name = object_name; - } - } - if (name.ends_with("_end")) - { - name = name.substr((name[0] == '_') + strlen("binary_")); - name = name.substr(0, name.size() - strlen("_end")); - - auto & resource = resources[name]; - if (!resource.base_address || resource.base_address == base_address) - { - resource.base_address = base_address; - resource.end = std::string_view{char_address, 0}; // NOLINT(bugprone-string-constructor) - resource.object_name = object_name; - } - } - } -} - - /// Based on the code of musl-libc and the answer of Kanalpiroge on /// https://stackoverflow.com/questions/15779185/list-all-the-functions-symbols-on-the-fly-in-c-code-on-a-linux-architecture /// It does not extract all the symbols (but only public - exported and used for dynamic linking), /// but will work if we cannot find or parse ELF files. void collectSymbolsFromProgramHeaders( dl_phdr_info * info, - std::vector & symbols, - SymbolIndex::Resources & resources) + std::vector & symbols) { /* Iterate over all headers of the current shared lib * (first call is for the executable itself) @@ -248,9 +211,6 @@ void collectSymbolsFromProgramHeaders( /// We are not interested in empty symbols. if (elf_sym[sym_index].st_size) symbols.push_back(symbol); - - /// But resources can be represented by a pair of empty symbols (indicating their boundaries). - updateResources(base_address, info->dlpi_name, symbol.name, symbol.address_begin, resources); } break; @@ -281,8 +241,7 @@ void collectSymbolsFromELFSymbolTable( const Elf & elf, const Elf::Section & symbol_table, const Elf::Section & string_table, - std::vector & symbols, - SymbolIndex::Resources & resources) + std::vector & symbols) { /// Iterate symbol table. const ElfSym * symbol_table_entry = reinterpret_cast(symbol_table.begin()); @@ -312,8 +271,6 @@ void collectSymbolsFromELFSymbolTable( if (symbol_table_entry->st_size) symbols.push_back(symbol); - - updateResources(info->dlpi_addr, info->dlpi_name, symbol.name, symbol.address_begin, resources); } } @@ -323,8 +280,7 @@ bool searchAndCollectSymbolsFromELFSymbolTable( const Elf & elf, unsigned section_header_type, const char * string_table_name, - std::vector & symbols, - SymbolIndex::Resources & resources) + std::vector & symbols) { std::optional symbol_table; std::optional string_table; @@ -342,7 +298,7 @@ bool searchAndCollectSymbolsFromELFSymbolTable( return false; } - collectSymbolsFromELFSymbolTable(info, elf, *symbol_table, *string_table, symbols, resources); + collectSymbolsFromELFSymbolTable(info, elf, *symbol_table, *string_table, symbols); return true; } @@ -351,7 +307,6 @@ void collectSymbolsFromELF( dl_phdr_info * info, std::vector & symbols, std::vector & objects, - SymbolIndex::Resources & resources, String & build_id) { String object_name; @@ -462,11 +417,11 @@ void collectSymbolsFromELF( object.name = object_name; objects.push_back(std::move(object)); - searchAndCollectSymbolsFromELFSymbolTable(info, *objects.back().elf, SHT_SYMTAB, ".strtab", symbols, resources); + searchAndCollectSymbolsFromELFSymbolTable(info, *objects.back().elf, SHT_SYMTAB, ".strtab", symbols); /// Unneeded if they were parsed from "program headers" of loaded objects. #if defined USE_MUSL - searchAndCollectSymbolsFromELFSymbolTable(info, *objects.back().elf, SHT_DYNSYM, ".dynstr", symbols, resources); + searchAndCollectSymbolsFromELFSymbolTable(info, *objects.back().elf, SHT_DYNSYM, ".dynstr", symbols); #endif } @@ -479,8 +434,8 @@ int collectSymbols(dl_phdr_info * info, size_t, void * data_ptr) { SymbolIndex::Data & data = *reinterpret_cast(data_ptr); - collectSymbolsFromProgramHeaders(info, data.symbols, data.resources); - collectSymbolsFromELF(info, data.symbols, data.objects, data.resources, data.build_id); + collectSymbolsFromProgramHeaders(info, data.symbols); + collectSymbolsFromELF(info, data.symbols, data.objects, data.build_id); /* Continue iterations */ return 0; diff --git a/src/Common/SymbolIndex.h b/src/Common/SymbolIndex.h index 4fd108434d5..8c7b8971805 100644 --- a/src/Common/SymbolIndex.h +++ b/src/Common/SymbolIndex.h @@ -8,6 +8,7 @@ #include #include + namespace DB { @@ -45,44 +46,15 @@ public: const std::vector & symbols() const { return data.symbols; } const std::vector & objects() const { return data.objects; } - std::string_view getResource(String name) const - { - if (auto it = data.resources.find(name); it != data.resources.end()) - return it->second.data(); - return {}; - } - /// The BuildID that is generated by compiler. String getBuildID() const { return data.build_id; } String getBuildIDHex() const; - struct ResourcesBlob - { - /// Symbol can be presented in multiple shared objects, - /// base_address will be used to compare only symbols from the same SO. - ElfW(Addr) base_address = 0; - /// Just a human name of the SO. - std::string_view object_name; - /// Data blob. - std::string_view start; - std::string_view end; - - std::string_view data() const - { - assert(end.data() >= start.data()); - return std::string_view{start.data(), static_cast(end.data() - start.data())}; - } - }; - using Resources = std::unordered_map; - struct Data { std::vector symbols; std::vector objects; String build_id; - - /// Resources (embedded binary data) are located by symbols in form of _binary_name_start and _binary_name_end. - Resources resources; }; private: Data data; diff --git a/src/Common/getResource.cpp b/src/Common/getResource.cpp deleted file mode 100644 index 72ba24c2f44..00000000000 --- a/src/Common/getResource.cpp +++ /dev/null @@ -1,52 +0,0 @@ -#include "getResource.h" -#include -#include -#include -#include - - -std::string_view getResource(std::string_view name) -{ - // Convert the resource file name into the form generated by `ld -r -b binary`. - std::string name_replaced(name); - std::replace(name_replaced.begin(), name_replaced.end(), '/', '_'); - std::replace(name_replaced.begin(), name_replaced.end(), '-', '_'); - std::replace(name_replaced.begin(), name_replaced.end(), '.', '_'); - boost::replace_all(name_replaced, "+", "_PLUS_"); - -#if defined USE_MUSL - /// If static linking is used, we cannot use dlsym and have to parse ELF symbol table by ourself. - return DB::SymbolIndex::instance().getResource(name_replaced); - -#else - // In most `dlsym(3)` APIs, one passes the symbol name as it appears via - // something like `nm` or `objdump -t`. For example, a symbol `_foo` would be - // looked up with the string `"_foo"`. - // - // Apple's linker is confusingly different. The NOTES on the man page for - // `dlsym(3)` claim that one looks up the symbol with "the name used in C - // source code". In this example, that would mean using the string `"foo"`. - // This apparently applies even in the case where the symbol did not originate - // from C source, such as the embedded binary resource files used here. So - // the symbol name must not have a leading `_` on Apple platforms. It's not - // clear how this applies to other symbols, such as those which _have_ a leading - // underscore in them by design, many leading underscores, etc. -#if defined OS_DARWIN - std::string prefix = "binary_"; -#else - std::string prefix = "_binary_"; -#endif - std::string symbol_name_start = prefix + name_replaced + "_start"; - std::string symbol_name_end = prefix + name_replaced + "_end"; - - const char * sym_start = reinterpret_cast(dlsym(RTLD_DEFAULT, symbol_name_start.c_str())); - const char * sym_end = reinterpret_cast(dlsym(RTLD_DEFAULT, symbol_name_end.c_str())); - - if (sym_start && sym_end) - { - auto resource_size = static_cast(std::distance(sym_start, sym_end)); - return { sym_start, resource_size }; - } - return {}; -#endif -} diff --git a/src/Common/getResource.h b/src/Common/getResource.h deleted file mode 100644 index 8975cc7841e..00000000000 --- a/src/Common/getResource.h +++ /dev/null @@ -1,7 +0,0 @@ -#pragma once - -#include - -/// Get resource from binary if exists. Otherwise return empty string view. -/// Resources are data that is embedded into executable at link time. -std::string_view getResource(std::string_view name); diff --git a/src/Common/tests/gtest_DateLUTImpl.cpp b/src/Common/tests/gtest_DateLUTImpl.cpp index 04f63403ec2..b09319c78d6 100644 --- a/src/Common/tests/gtest_DateLUTImpl.cpp +++ b/src/Common/tests/gtest_DateLUTImpl.cpp @@ -15,7 +15,8 @@ #endif // All timezones present at build time and embedded into ClickHouse binary. -extern const char * auto_time_zones[]; +struct TimeZone { const char * name; const unsigned char * data; size_t size; }; +extern TimeZone auto_time_zones[]; namespace { @@ -32,14 +33,14 @@ std::vector allTimezones(bool with_weird_offsets = true) { std::vector result; - const auto * timezone_name = auto_time_zones; - while (*timezone_name) + const TimeZone * timezone = auto_time_zones; + while (timezone->name) { - bool weird_offsets = (std::string_view(*timezone_name) == "Africa/Monrovia"); + bool weird_offsets = (std::string_view(timezone->name) == "Africa/Monrovia"); if (!weird_offsets || with_weird_offsets) - result.push_back(*timezone_name); - ++timezone_name; + result.push_back(timezone->name); + ++timezone; } return result; @@ -548,4 +549,3 @@ INSTANTIATE_TEST_SUITE_P(AllTimezones_Year1970, // {0, 0 + 11 * 3600 * 24 + 12, 11}, })) ); - diff --git a/src/Storages/System/StorageSystemTimeZones.cpp b/src/Storages/System/StorageSystemTimeZones.cpp index dc3711812a6..41227ab7780 100644 --- a/src/Storages/System/StorageSystemTimeZones.cpp +++ b/src/Storages/System/StorageSystemTimeZones.cpp @@ -4,7 +4,8 @@ #include -extern const char * auto_time_zones[]; +struct TimeZone { const char * name; const unsigned char * data; size_t size; }; +extern TimeZone auto_time_zones[]; namespace DB { @@ -17,7 +18,7 @@ NamesAndTypesList StorageSystemTimeZones::getNamesAndTypes() void StorageSystemTimeZones::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const { - for (auto * it = auto_time_zones; *it; ++it) - res_columns[0]->insert(String(*it)); + for (auto * it = auto_time_zones; it->name != nullptr; ++it) + res_columns[0]->insert(String(it->name)); } } From c8f8a23c71dc88ab53318be369ca17b528047b05 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 23 Jul 2023 06:09:15 +0200 Subject: [PATCH 308/478] Fix errors --- contrib/cctz-cmake/CMakeLists.txt | 21 +++++++++++++------ src/Common/DateLUTImpl.cpp | 18 +++++++--------- src/Common/tests/gtest_DateLUTImpl.cpp | 13 ++++++------ src/Storages/System/CMakeLists.txt | 2 -- .../System/StorageSystemTimeZones.cpp | 7 +++---- 5 files changed, 31 insertions(+), 30 deletions(-) diff --git a/contrib/cctz-cmake/CMakeLists.txt b/contrib/cctz-cmake/CMakeLists.txt index 8aa3c7886db..7edeada6e59 100644 --- a/contrib/cctz-cmake/CMakeLists.txt +++ b/contrib/cctz-cmake/CMakeLists.txt @@ -48,17 +48,26 @@ foreach(TIMEZONE ${TIMEZONES}) MATH(EXPR COUNTER "${COUNTER}+1") endforeach(TIMEZONE) -file(APPEND ${TIMEZONES_FILE} "#include \n") -file(APPEND ${TIMEZONES_FILE} "struct TimeZone { const char * name; const unsigned char * data; size_t size; };\n") -file(APPEND ${TIMEZONES_FILE} "TimeZone auto_time_zones[] {\n" ) +file(APPEND ${TIMEZONES_FILE} "const char * auto_time_zones[] {\n" ) -set (COUNTER 1) foreach(TIMEZONE ${TIMEZONES}) - file(APPEND ${TIMEZONES_FILE} " {\"${TIMEZONE}\", gresource_timezone${COUNTER}Data, gresource_timezone${COUNTER}Size},\n") + file(APPEND ${TIMEZONES_FILE} " \"${TIMEZONE}\",\n") MATH(EXPR COUNTER "${COUNTER}+1") endforeach(TIMEZONE) -file(APPEND ${TIMEZONES_FILE} " {nullptr, nullptr, 0}};\n") +file(APPEND ${TIMEZONES_FILE} "};\n\n") + +file(APPEND ${TIMEZONES_FILE} "#include \n\n") +file(APPEND ${TIMEZONES_FILE} "std::string_view getTimeZone(const char * name)\n{\n" ) + +set (COUNTER 1) +foreach(TIMEZONE ${TIMEZONES}) + file(APPEND ${TIMEZONES_FILE} " if (std::string_view(\"${TIMEZONE}\") == name) return { reinterpret_cast(gresource_timezone${COUNTER}Data), gresource_timezone${COUNTER}Size };\n") + MATH(EXPR COUNTER "${COUNTER}+1") +endforeach(TIMEZONE) + +file(APPEND ${TIMEZONES_FILE} " return {};\n") +file(APPEND ${TIMEZONES_FILE} "}\n") add_library (tzdata ${TIMEZONES_FILE}) target_link_libraries(tzdata ch_contrib::incbin) diff --git a/src/Common/DateLUTImpl.cpp b/src/Common/DateLUTImpl.cpp index 3619462e79b..d5e04238ef9 100644 --- a/src/Common/DateLUTImpl.cpp +++ b/src/Common/DateLUTImpl.cpp @@ -10,11 +10,12 @@ #include #include #include +#include /// Embedded timezones. -struct TimeZone { const char * name; const unsigned char * data; size_t size; }; -extern TimeZone auto_time_zones[]; +std::string_view getTimeZone(const char * name); + namespace { @@ -252,15 +253,10 @@ namespace cctz_extension const std::string & name, const std::function(const std::string & name)> & fallback) { - const TimeZone * timezone = auto_time_zones; - while (timezone->name != nullptr) - { - if (timezone->name == name) - break; - ++timezone; - } - if (timezone->size) - return std::make_unique(reinterpret_cast(timezone->data), timezone->size); + std::string_view tz_file = getTimeZone(name.data()); + + if (!tz_file.empty()) + return std::make_unique(tz_file.data(), tz_file.size()); return fallback(name); } diff --git a/src/Common/tests/gtest_DateLUTImpl.cpp b/src/Common/tests/gtest_DateLUTImpl.cpp index b09319c78d6..3d3a3f04941 100644 --- a/src/Common/tests/gtest_DateLUTImpl.cpp +++ b/src/Common/tests/gtest_DateLUTImpl.cpp @@ -15,8 +15,7 @@ #endif // All timezones present at build time and embedded into ClickHouse binary. -struct TimeZone { const char * name; const unsigned char * data; size_t size; }; -extern TimeZone auto_time_zones[]; +extern const char * auto_time_zones[]; namespace { @@ -33,14 +32,14 @@ std::vector allTimezones(bool with_weird_offsets = true) { std::vector result; - const TimeZone * timezone = auto_time_zones; - while (timezone->name) + const auto * timezone_name = auto_time_zones; + while (*timezone_name) { - bool weird_offsets = (std::string_view(timezone->name) == "Africa/Monrovia"); + bool weird_offsets = (std::string_view(*timezone_name) == "Africa/Monrovia"); if (!weird_offsets || with_weird_offsets) - result.push_back(timezone->name); - ++timezone; + result.push_back(*timezone_name); + ++timezone_name; } return result; diff --git a/src/Storages/System/CMakeLists.txt b/src/Storages/System/CMakeLists.txt index 6b7d1739e33..c3a2e726365 100644 --- a/src/Storages/System/CMakeLists.txt +++ b/src/Storages/System/CMakeLists.txt @@ -30,7 +30,6 @@ endif() add_dependencies(generate-source generate-contributors) set(GENERATED_LICENSES_SRC "${CMAKE_CURRENT_BINARY_DIR}/StorageSystemLicenses.generated.cpp") -set(GENERATED_TIMEZONES_SRC "${CMAKE_CURRENT_BINARY_DIR}/StorageSystemTimeZones.generated.cpp") add_custom_command( OUTPUT StorageSystemLicenses.generated.cpp @@ -38,7 +37,6 @@ add_custom_command( WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) list (APPEND storages_system_sources ${GENERATED_LICENSES_SRC}) -list (APPEND storages_system_sources ${GENERATED_TIMEZONES_SRC}) # Overlength strings set_source_files_properties(${GENERATED_LICENSES_SRC} PROPERTIES COMPILE_FLAGS -w) diff --git a/src/Storages/System/StorageSystemTimeZones.cpp b/src/Storages/System/StorageSystemTimeZones.cpp index 41227ab7780..dc3711812a6 100644 --- a/src/Storages/System/StorageSystemTimeZones.cpp +++ b/src/Storages/System/StorageSystemTimeZones.cpp @@ -4,8 +4,7 @@ #include -struct TimeZone { const char * name; const unsigned char * data; size_t size; }; -extern TimeZone auto_time_zones[]; +extern const char * auto_time_zones[]; namespace DB { @@ -18,7 +17,7 @@ NamesAndTypesList StorageSystemTimeZones::getNamesAndTypes() void StorageSystemTimeZones::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const { - for (auto * it = auto_time_zones; it->name != nullptr; ++it) - res_columns[0]->insert(String(it->name)); + for (auto * it = auto_time_zones; *it; ++it) + res_columns[0]->insert(String(*it)); } } From 8013cb1f784f6324b3c7b227499751dc7e666009 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 23 Jul 2023 08:46:44 +0200 Subject: [PATCH 309/478] Remove skip_startup_tables from IDatabase::loadStoredObjects() Signed-off-by: Azat Khuzhin --- src/Databases/DatabaseAtomic.cpp | 5 ++--- src/Databases/DatabaseAtomic.h | 2 +- src/Databases/DatabaseLazy.cpp | 3 +-- src/Databases/DatabaseLazy.h | 2 +- src/Databases/DatabaseOrdinary.cpp | 9 +-------- src/Databases/DatabaseOrdinary.h | 2 +- src/Databases/DatabaseReplicated.cpp | 5 ++--- src/Databases/DatabaseReplicated.h | 2 +- src/Databases/IDatabase.h | 3 +-- src/Databases/MySQL/DatabaseMySQL.cpp | 2 +- src/Databases/MySQL/DatabaseMySQL.h | 2 +- src/Databases/PostgreSQL/DatabasePostgreSQL.cpp | 2 +- src/Databases/PostgreSQL/DatabasePostgreSQL.h | 2 +- src/Databases/TablesLoader.cpp | 2 +- 14 files changed, 16 insertions(+), 27 deletions(-) diff --git a/src/Databases/DatabaseAtomic.cpp b/src/Databases/DatabaseAtomic.cpp index 7e20b6f6535..0f65069db35 100644 --- a/src/Databases/DatabaseAtomic.cpp +++ b/src/Databases/DatabaseAtomic.cpp @@ -441,11 +441,10 @@ void DatabaseAtomic::beforeLoadingMetadata(ContextMutablePtr /*context*/, Loadin } } -void DatabaseAtomic::loadStoredObjects( - ContextMutablePtr local_context, LoadingStrictnessLevel mode, bool skip_startup_tables) +void DatabaseAtomic::loadStoredObjects(ContextMutablePtr local_context, LoadingStrictnessLevel mode) { beforeLoadingMetadata(local_context, mode); - DatabaseOrdinary::loadStoredObjects(local_context, mode, skip_startup_tables); + DatabaseOrdinary::loadStoredObjects(local_context, mode); } void DatabaseAtomic::startupTables(ThreadPool & thread_pool, LoadingStrictnessLevel mode) diff --git a/src/Databases/DatabaseAtomic.h b/src/Databases/DatabaseAtomic.h index cb275812098..70553b2d5c2 100644 --- a/src/Databases/DatabaseAtomic.h +++ b/src/Databases/DatabaseAtomic.h @@ -48,7 +48,7 @@ public: DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name) const override; - void loadStoredObjects(ContextMutablePtr context, LoadingStrictnessLevel mode, bool skip_startup_tables) override; + void loadStoredObjects(ContextMutablePtr context, LoadingStrictnessLevel mode) override; void beforeLoadingMetadata(ContextMutablePtr context, LoadingStrictnessLevel mode) override; diff --git a/src/Databases/DatabaseLazy.cpp b/src/Databases/DatabaseLazy.cpp index f27c6c0c3ee..896ae99656f 100644 --- a/src/Databases/DatabaseLazy.cpp +++ b/src/Databases/DatabaseLazy.cpp @@ -37,8 +37,7 @@ DatabaseLazy::DatabaseLazy(const String & name_, const String & metadata_path_, } -void DatabaseLazy::loadStoredObjects( - ContextMutablePtr local_context, LoadingStrictnessLevel /*mode*/, bool /* skip_startup_tables */) +void DatabaseLazy::loadStoredObjects(ContextMutablePtr local_context, LoadingStrictnessLevel /*mode*/) { iterateMetadataFiles(local_context, [this, &local_context](const String & file_name) { diff --git a/src/Databases/DatabaseLazy.h b/src/Databases/DatabaseLazy.h index b01038073ef..2b1b119754d 100644 --- a/src/Databases/DatabaseLazy.h +++ b/src/Databases/DatabaseLazy.h @@ -26,7 +26,7 @@ public: bool canContainDistributedTables() const override { return false; } - void loadStoredObjects(ContextMutablePtr context, LoadingStrictnessLevel /*mode*/, bool skip_startup_tables) override; + void loadStoredObjects(ContextMutablePtr context, LoadingStrictnessLevel /*mode*/) override; void createTable( ContextPtr context, diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp index 8c92b8064ca..51d37b84e14 100644 --- a/src/Databases/DatabaseOrdinary.cpp +++ b/src/Databases/DatabaseOrdinary.cpp @@ -89,8 +89,7 @@ DatabaseOrdinary::DatabaseOrdinary( { } -void DatabaseOrdinary::loadStoredObjects( - ContextMutablePtr local_context, LoadingStrictnessLevel mode, bool skip_startup_tables) +void DatabaseOrdinary::loadStoredObjects(ContextMutablePtr local_context, LoadingStrictnessLevel mode) { /** Tables load faster if they are loaded in sorted (by name) order. * Otherwise (for the ext4 filesystem), `DirectoryIterator` iterates through them in some order, @@ -159,12 +158,6 @@ void DatabaseOrdinary::loadStoredObjects( } pool.wait(); - - if (!skip_startup_tables) - { - /// After all tables was basically initialized, startup them. - startupTables(pool, mode); - } } void DatabaseOrdinary::loadTablesMetadata(ContextPtr local_context, ParsedTablesMetadata & metadata, bool is_startup) diff --git a/src/Databases/DatabaseOrdinary.h b/src/Databases/DatabaseOrdinary.h index f9aa3214ef5..cabc8f9c55b 100644 --- a/src/Databases/DatabaseOrdinary.h +++ b/src/Databases/DatabaseOrdinary.h @@ -21,7 +21,7 @@ public: String getEngineName() const override { return "Ordinary"; } - void loadStoredObjects(ContextMutablePtr context, LoadingStrictnessLevel mode, bool skip_startup_tables) override; + void loadStoredObjects(ContextMutablePtr context, LoadingStrictnessLevel mode) override; bool supportsLoadingInTopologicalOrder() const override { return true; } diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index 25c23e2be17..d3b3d4b545f 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -495,11 +495,10 @@ void DatabaseReplicated::beforeLoadingMetadata(ContextMutablePtr /*context*/, Lo tryConnectToZooKeeperAndInitDatabase(mode); } -void DatabaseReplicated::loadStoredObjects( - ContextMutablePtr local_context, LoadingStrictnessLevel mode, bool skip_startup_tables) +void DatabaseReplicated::loadStoredObjects(ContextMutablePtr local_context, LoadingStrictnessLevel mode) { beforeLoadingMetadata(local_context, mode); - DatabaseAtomic::loadStoredObjects(local_context, mode, skip_startup_tables); + DatabaseAtomic::loadStoredObjects(local_context, mode); } UInt64 DatabaseReplicated::getMetadataHash(const String & table_name) const diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h index ff1a4aba41c..8e33f482ac1 100644 --- a/src/Databases/DatabaseReplicated.h +++ b/src/Databases/DatabaseReplicated.h @@ -67,7 +67,7 @@ public: void drop(ContextPtr /*context*/) override; - void loadStoredObjects(ContextMutablePtr context, LoadingStrictnessLevel mode, bool skip_startup_tables) override; + void loadStoredObjects(ContextMutablePtr context, LoadingStrictnessLevel mode) override; void beforeLoadingMetadata(ContextMutablePtr context, LoadingStrictnessLevel mode) override; diff --git a/src/Databases/IDatabase.h b/src/Databases/IDatabase.h index a9577dfc84a..9bed3c4bfc5 100644 --- a/src/Databases/IDatabase.h +++ b/src/Databases/IDatabase.h @@ -134,8 +134,7 @@ public: /// You can call only once, right after the object is created. virtual void loadStoredObjects( /// NOLINT ContextMutablePtr /*context*/, - LoadingStrictnessLevel /*mode*/, - bool /* skip_startup_tables */) + LoadingStrictnessLevel /*mode*/) { } diff --git a/src/Databases/MySQL/DatabaseMySQL.cpp b/src/Databases/MySQL/DatabaseMySQL.cpp index 70bd32efed9..94e5ba1773e 100644 --- a/src/Databases/MySQL/DatabaseMySQL.cpp +++ b/src/Databases/MySQL/DatabaseMySQL.cpp @@ -402,7 +402,7 @@ String DatabaseMySQL::getMetadataPath() const return metadata_path; } -void DatabaseMySQL::loadStoredObjects(ContextMutablePtr, LoadingStrictnessLevel /*mode*/, bool /* skip_startup_tables */) +void DatabaseMySQL::loadStoredObjects(ContextMutablePtr, LoadingStrictnessLevel /*mode*/) { std::lock_guard lock{mutex}; diff --git a/src/Databases/MySQL/DatabaseMySQL.h b/src/Databases/MySQL/DatabaseMySQL.h index f34a2fff4f7..e5b1f434d2f 100644 --- a/src/Databases/MySQL/DatabaseMySQL.h +++ b/src/Databases/MySQL/DatabaseMySQL.h @@ -76,7 +76,7 @@ public: void createTable(ContextPtr, const String & table_name, const StoragePtr & storage, const ASTPtr & create_query) override; - void loadStoredObjects(ContextMutablePtr, LoadingStrictnessLevel /*mode*/, bool skip_startup_tables) override; + void loadStoredObjects(ContextMutablePtr, LoadingStrictnessLevel /*mode*/) override; StoragePtr detachTable(ContextPtr context, const String & table_name) override; diff --git a/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp b/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp index f4d750f85d4..812a0d8717e 100644 --- a/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp +++ b/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp @@ -296,7 +296,7 @@ void DatabasePostgreSQL::drop(ContextPtr /*context*/) } -void DatabasePostgreSQL::loadStoredObjects(ContextMutablePtr /* context */, LoadingStrictnessLevel /*mode*/, bool /* skip_startup_tables */) +void DatabasePostgreSQL::loadStoredObjects(ContextMutablePtr /* context */, LoadingStrictnessLevel /*mode*/) { { std::lock_guard lock{mutex}; diff --git a/src/Databases/PostgreSQL/DatabasePostgreSQL.h b/src/Databases/PostgreSQL/DatabasePostgreSQL.h index 31fa036c0ee..d731e06649b 100644 --- a/src/Databases/PostgreSQL/DatabasePostgreSQL.h +++ b/src/Databases/PostgreSQL/DatabasePostgreSQL.h @@ -44,7 +44,7 @@ public: bool empty() const override; - void loadStoredObjects(ContextMutablePtr, LoadingStrictnessLevel /*mode*/, bool skip_startup_tables) override; + void loadStoredObjects(ContextMutablePtr, LoadingStrictnessLevel /*mode*/) override; DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name) const override; diff --git a/src/Databases/TablesLoader.cpp b/src/Databases/TablesLoader.cpp index ea0f2072430..f8b4e7fe33b 100644 --- a/src/Databases/TablesLoader.cpp +++ b/src/Databases/TablesLoader.cpp @@ -49,7 +49,7 @@ void TablesLoader::loadTables() if (need_resolve_dependencies && database.second->supportsLoadingInTopologicalOrder()) databases_to_load.push_back(database.first); else - database.second->loadStoredObjects(global_context, strictness_mode, /* skip_startup_tables */ true); + database.second->loadStoredObjects(global_context, strictness_mode); } if (databases_to_load.empty()) From 282258a855cfed40e0b2cd7c0ada3ec1defe8e06 Mon Sep 17 00:00:00 2001 From: Han Fei Date: Sun, 23 Jul 2023 11:29:29 +0200 Subject: [PATCH 310/478] fix style --- src/Common/OptimizedRegularExpression.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Common/OptimizedRegularExpression.cpp b/src/Common/OptimizedRegularExpression.cpp index e636b0b987d..05e6aefbb5e 100644 --- a/src/Common/OptimizedRegularExpression.cpp +++ b/src/Common/OptimizedRegularExpression.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -439,7 +440,7 @@ catch (...) is_trivial = false; required_substring_is_prefix = false; alternatives.clear(); - std::cerr << "Analyze RegularExpression failed, got error: {}" << DB::getCurrentExceptionMessage(false) << "\n"; + LOG_ERROR(&Poco::Logger::get("OptimizeRegularExpression"), "Analyze RegularExpression failed, got error: {}", DB::getCurrentExceptionMessage(false)); } template From 4c1f8f38cd4073b24064e076a677082db546c680 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 23 Jul 2023 17:30:23 +0200 Subject: [PATCH 311/478] Fix CI --- docker/test/fasttest/run.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index e25b5fdbfed..60e6199aaa4 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -147,6 +147,7 @@ function clone_submodules contrib/simdjson contrib/liburing contrib/libfiu + contrib/incbin ) git submodule sync From 8902bbdb60b466498ab2825000502195d5d35c91 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 23 Jul 2023 17:59:49 +0200 Subject: [PATCH 312/478] Fix fasttest --- src/Common/FrequencyHolder.cpp | 4 ++++ src/Common/FrequencyHolder.h | 6 ++++++ src/Functions/FunctionsCharsetClassification.cpp | 2 +- src/Functions/FunctionsLanguageClassification.cpp | 4 +--- 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/Common/FrequencyHolder.cpp b/src/Common/FrequencyHolder.cpp index 3b755cacacb..fe03e6a1b44 100644 --- a/src/Common/FrequencyHolder.cpp +++ b/src/Common/FrequencyHolder.cpp @@ -1,5 +1,7 @@ #include +#if USE_NLP + #include /// Embedded SQL definitions @@ -179,3 +181,5 @@ void FrequencyHolder::loadProgrammingFrequency() } } + +#endif diff --git a/src/Common/FrequencyHolder.h b/src/Common/FrequencyHolder.h index 270e4dbbd2a..73675ed9814 100644 --- a/src/Common/FrequencyHolder.h +++ b/src/Common/FrequencyHolder.h @@ -1,5 +1,9 @@ #pragma once +#include "config.h" + +#if USE_NLP + #include #include @@ -81,3 +85,5 @@ private: EncodingContainer encodings_freq; }; } + +#endif diff --git a/src/Functions/FunctionsCharsetClassification.cpp b/src/Functions/FunctionsCharsetClassification.cpp index a25da8f6c13..237d4c37fa2 100644 --- a/src/Functions/FunctionsCharsetClassification.cpp +++ b/src/Functions/FunctionsCharsetClassification.cpp @@ -3,7 +3,7 @@ #include #include -#include + namespace DB { diff --git a/src/Functions/FunctionsLanguageClassification.cpp b/src/Functions/FunctionsLanguageClassification.cpp index 6088fd52efa..55485d41ce0 100644 --- a/src/Functions/FunctionsLanguageClassification.cpp +++ b/src/Functions/FunctionsLanguageClassification.cpp @@ -5,19 +5,17 @@ #include #include #include -#include #include #include #include -#include #include #include #include #include -#include #include + namespace DB { /* Determine language of Unicode UTF-8 text. From 43bd6d1b8336f282cc4548c0f61b52516f49ac13 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 23 Jul 2023 19:00:49 +0300 Subject: [PATCH 313/478] Revert "Add an ability to specify allocations size for sampling memory profiler" --- programs/server/Server.cpp | 21 +++-------- src/Common/MemoryTracker.cpp | 10 +---- src/Common/MemoryTracker.h | 18 --------- src/Core/ServerSettings.h | 8 +--- src/Core/Settings.h | 4 +- src/Interpreters/ProcessList.cpp | 3 -- src/Interpreters/ThreadStatusExt.cpp | 2 - .../__init__.py | 1 - .../configs/max_untracked_memory.xml | 7 ---- .../configs/memory_profiler.xml | 5 --- .../test.py | 37 ------------------- ...r_sample_min_max_allocation_size.reference | 1 - ...profiler_sample_min_max_allocation_size.sh | 18 --------- 13 files changed, 11 insertions(+), 124 deletions(-) delete mode 100644 tests/integration/test_memory_profiler_min_max_borders/__init__.py delete mode 100644 tests/integration/test_memory_profiler_min_max_borders/configs/max_untracked_memory.xml delete mode 100644 tests/integration/test_memory_profiler_min_max_borders/configs/memory_profiler.xml delete mode 100644 tests/integration/test_memory_profiler_min_max_borders/test.py delete mode 100644 tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.reference delete mode 100755 tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.sh diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 33fdcc9c1a8..9202d4b32c1 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1637,26 +1637,17 @@ try global_context->initializeTraceCollector(); /// Set up server-wide memory profiler (for total memory tracker). - if (server_settings.total_memory_profiler_step) + UInt64 total_memory_profiler_step = config().getUInt64("total_memory_profiler_step", 0); + if (total_memory_profiler_step) { - total_memory_tracker.setProfilerStep(server_settings.total_memory_profiler_step); + total_memory_tracker.setProfilerStep(total_memory_profiler_step); } - if (server_settings.total_memory_tracker_sample_probability > 0.0) + double total_memory_tracker_sample_probability = config().getDouble("total_memory_tracker_sample_probability", 0); + if (total_memory_tracker_sample_probability > 0.0) { - total_memory_tracker.setSampleProbability(server_settings.total_memory_tracker_sample_probability); + total_memory_tracker.setSampleProbability(total_memory_tracker_sample_probability); } - - if (server_settings.total_memory_profiler_sample_min_allocation_size) - { - total_memory_tracker.setSampleMinAllocationSize(server_settings.total_memory_profiler_sample_min_allocation_size); - } - - if (server_settings.total_memory_profiler_sample_max_allocation_size) - { - total_memory_tracker.setSampleMaxAllocationSize(server_settings.total_memory_profiler_sample_max_allocation_size); - } - } #endif diff --git a/src/Common/MemoryTracker.cpp b/src/Common/MemoryTracker.cpp index 52cae0768dc..81cac2617c5 100644 --- a/src/Common/MemoryTracker.cpp +++ b/src/Common/MemoryTracker.cpp @@ -229,7 +229,7 @@ void MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryT } std::bernoulli_distribution sample(sample_probability); - if (unlikely(sample_probability > 0.0 && isSizeOkForSampling(size) && sample(thread_local_rng))) + if (unlikely(sample_probability > 0.0 && sample(thread_local_rng))) { MemoryTrackerBlockerInThread untrack_lock(VariableContext::Global); DB::TraceSender::send(DB::TraceType::MemorySample, StackTrace(), {.size = size}); @@ -413,7 +413,7 @@ void MemoryTracker::free(Int64 size) } std::bernoulli_distribution sample(sample_probability); - if (unlikely(sample_probability > 0.0 && isSizeOkForSampling(size) && sample(thread_local_rng))) + if (unlikely(sample_probability > 0.0 && sample(thread_local_rng))) { MemoryTrackerBlockerInThread untrack_lock(VariableContext::Global); DB::TraceSender::send(DB::TraceType::MemorySample, StackTrace(), {.size = -size}); @@ -534,12 +534,6 @@ void MemoryTracker::setOrRaiseProfilerLimit(Int64 value) ; } -bool MemoryTracker::isSizeOkForSampling(UInt64 size) const -{ - /// We can avoid comparison min_allocation_size_bytes with zero, because we cannot have 0 bytes allocation/deallocation - return ((max_allocation_size_bytes == 0 || size <= max_allocation_size_bytes) && size >= min_allocation_size_bytes); -} - bool canEnqueueBackgroundTask() { auto limit = background_memory_tracker.getSoftLimit(); diff --git a/src/Common/MemoryTracker.h b/src/Common/MemoryTracker.h index 768dc8a7404..4e29d40c953 100644 --- a/src/Common/MemoryTracker.h +++ b/src/Common/MemoryTracker.h @@ -67,12 +67,6 @@ private: /// To randomly sample allocations and deallocations in trace_log. double sample_probability = 0; - /// Randomly sample allocations only larger or equal to this size - UInt64 min_allocation_size_bytes = 0; - - /// Randomly sample allocations only smaller or equal to this size - UInt64 max_allocation_size_bytes = 0; - /// Singly-linked list. All information will be passed to subsequent memory trackers also (it allows to implement trackers hierarchy). /// In terms of tree nodes it is the list of parents. Lifetime of these trackers should "include" lifetime of current tracker. std::atomic parent {}; @@ -94,8 +88,6 @@ private: void setOrRaiseProfilerLimit(Int64 value); - bool isSizeOkForSampling(UInt64 size) const; - /// allocImpl(...) and free(...) should not be used directly friend struct CurrentMemoryTracker; void allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryTracker * query_tracker = nullptr); @@ -173,16 +165,6 @@ public: sample_probability = value; } - void setSampleMinAllocationSize(UInt64 value) - { - min_allocation_size_bytes = value; - } - - void setSampleMaxAllocationSize(UInt64 value) - { - max_allocation_size_bytes = value; - } - void setProfilerStep(Int64 value) { profiler_step = value; diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h index f7a6c9e950e..1a9f226041b 100644 --- a/src/Core/ServerSettings.h +++ b/src/Core/ServerSettings.h @@ -81,12 +81,8 @@ namespace DB M(UInt64, background_schedule_pool_size, 128, "The maximum number of threads that will be used for constantly executing some lightweight periodic operations.", 0) \ M(UInt64, background_message_broker_schedule_pool_size, 16, "The maximum number of threads that will be used for executing background operations for message streaming.", 0) \ M(UInt64, background_distributed_schedule_pool_size, 16, "The maximum number of threads that will be used for executing distributed sends.", 0) \ - M(Bool, display_secrets_in_show_and_select, false, "Allow showing secrets in SHOW and SELECT queries via a format setting and a grant", 0) \ - \ - M(UInt64, total_memory_profiler_step, 0, "Whenever server memory usage becomes larger than every next step in number of bytes the memory profiler will collect the allocating stack trace. Zero means disabled memory profiler. Values lower than a few megabytes will slow down server.", 0) \ - M(Double, total_memory_tracker_sample_probability, 0, "Collect random allocations and deallocations and write them into system.trace_log with 'MemorySample' trace_type. The probability is for every alloc/free regardless to the size of the allocation (can be changed with `memory_profiler_sample_min_allocation_size` and `memory_profiler_sample_max_allocation_size`). Note that sampling happens only when the amount of untracked memory exceeds 'max_untracked_memory'. You may want to set 'max_untracked_memory' to 0 for extra fine grained sampling.", 0) \ - M(UInt64, total_memory_profiler_sample_min_allocation_size, 0, "Collect random allocations of size greater or equal than specified value with probability equal to `total_memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) \ - M(UInt64, total_memory_profiler_sample_max_allocation_size, 0, "Collect random allocations of size less or equal than specified value with probability equal to `total_memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) + M(Bool, display_secrets_in_show_and_select, false, "Allow showing secrets in SHOW and SELECT queries via a format setting and a grant", 0) + DECLARE_SETTINGS_TRAITS(ServerSettingsTraits, SERVER_SETTINGS) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 4fc93500910..24be644ee55 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -427,9 +427,7 @@ class IColumn; M(UInt64, memory_overcommit_ratio_denominator_for_user, 1_GiB, "It represents soft memory limit on the global level. This value is used to compute query overcommit ratio.", 0) \ M(UInt64, max_untracked_memory, (4 * 1024 * 1024), "Small allocations and deallocations are grouped in thread local variable and tracked or profiled only when amount (in absolute value) becomes larger than specified value. If the value is higher than 'memory_profiler_step' it will be effectively lowered to 'memory_profiler_step'.", 0) \ M(UInt64, memory_profiler_step, (4 * 1024 * 1024), "Whenever query memory usage becomes larger than every next step in number of bytes the memory profiler will collect the allocating stack trace. Zero means disabled memory profiler. Values lower than a few megabytes will slow down query processing.", 0) \ - M(Float, memory_profiler_sample_probability, 0., "Collect random allocations and deallocations and write them into system.trace_log with 'MemorySample' trace_type. The probability is for every alloc/free regardless to the size of the allocation (can be changed with `memory_profiler_sample_min_allocation_size` and `memory_profiler_sample_max_allocation_size`). Note that sampling happens only when the amount of untracked memory exceeds 'max_untracked_memory'. You may want to set 'max_untracked_memory' to 0 for extra fine grained sampling.", 0) \ - M(UInt64, memory_profiler_sample_min_allocation_size, 0, "Collect random allocations of size greater or equal than specified value with probability equal to `memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) \ - M(UInt64, memory_profiler_sample_max_allocation_size, 0, "Collect random allocations of size less or equal than specified value with probability equal to `memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) \ + M(Float, memory_profiler_sample_probability, 0., "Collect random allocations and deallocations and write them into system.trace_log with 'MemorySample' trace_type. The probability is for every alloc/free regardless to the size of the allocation. Note that sampling happens only when the amount of untracked memory exceeds 'max_untracked_memory'. You may want to set 'max_untracked_memory' to 0 for extra fine grained sampling.", 0) \ M(Bool, trace_profile_events, false, "Send to system.trace_log profile event and value of increment on each increment with 'ProfileEvent' trace_type", 0) \ \ M(UInt64, memory_usage_overcommit_max_wait_microseconds, 5'000'000, "Maximum time thread will wait for memory to be freed in the case of memory overcommit. If timeout is reached and memory is not freed, exception is thrown.", 0) \ diff --git a/src/Interpreters/ProcessList.cpp b/src/Interpreters/ProcessList.cpp index c299572ef41..1503e396298 100644 --- a/src/Interpreters/ProcessList.cpp +++ b/src/Interpreters/ProcessList.cpp @@ -223,10 +223,7 @@ ProcessList::insert(const String & query_, const IAST * ast, ContextMutablePtr q { /// Set up memory profiling thread_group->memory_tracker.setProfilerStep(settings.memory_profiler_step); - thread_group->memory_tracker.setSampleProbability(settings.memory_profiler_sample_probability); - thread_group->memory_tracker.setSampleMinAllocationSize(settings.memory_profiler_sample_min_allocation_size); - thread_group->memory_tracker.setSampleMaxAllocationSize(settings.memory_profiler_sample_max_allocation_size); thread_group->performance_counters.setTraceProfileEvents(settings.trace_profile_events); } diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp index bac16c05533..398bea26b87 100644 --- a/src/Interpreters/ThreadStatusExt.cpp +++ b/src/Interpreters/ThreadStatusExt.cpp @@ -83,8 +83,6 @@ ThreadGroupPtr ThreadGroup::createForBackgroundProcess(ContextPtr storage_contex const Settings & settings = storage_context->getSettingsRef(); group->memory_tracker.setProfilerStep(settings.memory_profiler_step); group->memory_tracker.setSampleProbability(settings.memory_profiler_sample_probability); - group->memory_tracker.setSampleMinAllocationSize(settings.memory_profiler_sample_min_allocation_size); - group->memory_tracker.setSampleMaxAllocationSize(settings.memory_profiler_sample_max_allocation_size); group->memory_tracker.setSoftLimit(settings.memory_overcommit_ratio_denominator); group->memory_tracker.setParent(&background_memory_tracker); if (settings.memory_tracker_fault_probability > 0.0) diff --git a/tests/integration/test_memory_profiler_min_max_borders/__init__.py b/tests/integration/test_memory_profiler_min_max_borders/__init__.py deleted file mode 100644 index e5a0d9b4834..00000000000 --- a/tests/integration/test_memory_profiler_min_max_borders/__init__.py +++ /dev/null @@ -1 +0,0 @@ -#!/usr/bin/env python3 diff --git a/tests/integration/test_memory_profiler_min_max_borders/configs/max_untracked_memory.xml b/tests/integration/test_memory_profiler_min_max_borders/configs/max_untracked_memory.xml deleted file mode 100644 index 56fc5ed34ca..00000000000 --- a/tests/integration/test_memory_profiler_min_max_borders/configs/max_untracked_memory.xml +++ /dev/null @@ -1,7 +0,0 @@ - - - - 1 - - - diff --git a/tests/integration/test_memory_profiler_min_max_borders/configs/memory_profiler.xml b/tests/integration/test_memory_profiler_min_max_borders/configs/memory_profiler.xml deleted file mode 100644 index 5b3e17d145f..00000000000 --- a/tests/integration/test_memory_profiler_min_max_borders/configs/memory_profiler.xml +++ /dev/null @@ -1,5 +0,0 @@ - - 1 - 4096 - 8192 - diff --git a/tests/integration/test_memory_profiler_min_max_borders/test.py b/tests/integration/test_memory_profiler_min_max_borders/test.py deleted file mode 100644 index 6ab971fa9c4..00000000000 --- a/tests/integration/test_memory_profiler_min_max_borders/test.py +++ /dev/null @@ -1,37 +0,0 @@ -from helpers.cluster import ClickHouseCluster -import pytest - -cluster = ClickHouseCluster(__file__) -node = cluster.add_instance( - "node", - main_configs=["configs/memory_profiler.xml"], - user_configs=["configs/max_untracked_memory.xml"], -) - - -@pytest.fixture(scope="module") -def started_cluster(): - try: - cluster.start() - yield cluster - - finally: - cluster.shutdown() - - -def test_trace_boundaries_work(started_cluster): - node.query("select randomPrintableASCII(number) from numbers(1000) FORMAT Null") - node.query("SYSTEM FLUSH LOGS") - - assert ( - node.query( - "SELECT countDistinct(abs(size)) > 0 FROM system.trace_log where trace_type = 'MemorySample'" - ) - == "1\n" - ) - assert ( - node.query( - "SELECT count() FROM system.trace_log where trace_type = 'MemorySample' and (abs(size) > 8192 or abs(size) < 4096)" - ) - == "0\n" - ) diff --git a/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.reference b/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.reference deleted file mode 100644 index d00491fd7e5..00000000000 --- a/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.reference +++ /dev/null @@ -1 +0,0 @@ -1 diff --git a/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.sh b/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.sh deleted file mode 100755 index b1fbea26da7..00000000000 --- a/tests/queries/0_stateless/02818_memory_profiler_sample_min_max_allocation_size.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/env bash -# Tags: no-tsan, no-asan, no-ubsan, no-msan, no-cpu-aarch64, no-random-settings -# requires TraceCollector, does not available under sanitizers and aarch64 - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -query_id="${CLICKHOUSE_DATABASE}_min_max_allocation_size_$RANDOM$RANDOM" -${CLICKHOUSE_CLIENT} --query_id="$query_id" --memory_profiler_sample_min_allocation_size=4096 --memory_profiler_sample_max_allocation_size=8192 --log_queries=1 --max_threads=1 --max_untracked_memory=0 --memory_profiler_sample_probability=1 --query "select randomPrintableASCII(number) from numbers(1000) FORMAT Null" - -${CLICKHOUSE_CLIENT} --query "SYSTEM FLUSH LOGS" - -# at least something allocated -${CLICKHOUSE_CLIENT} --query "SELECT countDistinct(abs(size)) > 0 FROM system.trace_log where query_id='$query_id' and trace_type = 'MemorySample'" - -# show wrong allocations -${CLICKHOUSE_CLIENT} --query "SELECT abs(size) FROM system.trace_log where query_id='$query_id' and trace_type = 'MemorySample' and (abs(size) > 8192 or abs(size) < 4096)" From e56e1ebd5d8fbb808867c1f98e421383acf38b1f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 23 Jul 2023 18:29:07 +0200 Subject: [PATCH 314/478] Fix fasttest --- src/Functions/FunctionsCharsetClassification.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/Functions/FunctionsCharsetClassification.cpp b/src/Functions/FunctionsCharsetClassification.cpp index 237d4c37fa2..7704e3eafc0 100644 --- a/src/Functions/FunctionsCharsetClassification.cpp +++ b/src/Functions/FunctionsCharsetClassification.cpp @@ -1,4 +1,9 @@ #include + +#include "config.h" + +#if USE_NLP + #include #include @@ -150,3 +155,5 @@ REGISTER_FUNCTION(DetectCharset) } } + +#endif From 039cac69cf6d30cc58c8531b1efac4d9847cb599 Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Sun, 23 Jul 2023 18:35:37 +0200 Subject: [PATCH 315/478] Fix test_insert_same_partition_and_merge by increasing wait time --- tests/integration/test_merge_tree_azure_blob_storage/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_merge_tree_azure_blob_storage/test.py b/tests/integration/test_merge_tree_azure_blob_storage/test.py index 761b5257a34..86b70f8db70 100644 --- a/tests/integration/test_merge_tree_azure_blob_storage/test.py +++ b/tests/integration/test_merge_tree_azure_blob_storage/test.py @@ -215,7 +215,7 @@ def test_insert_same_partition_and_merge(cluster, merge_vertical): if attempt == 59: assert parts_count == "(1)" - time.sleep(1) + time.sleep(10) assert azure_query(node, f"SELECT sum(id) FROM {TABLE_NAME} FORMAT Values") == "(0)" assert ( From 311b3adf89b9d54c4b3bf40feb4179d967ed3d2e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 23 Jul 2023 20:10:41 +0200 Subject: [PATCH 316/478] Fix fasttest --- src/Functions/FunctionsCharsetClassification.cpp | 2 -- src/Functions/FunctionsProgrammingClassification.cpp | 5 +++++ src/Functions/FunctionsTonalityClassification.cpp | 5 +++++ 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/Functions/FunctionsCharsetClassification.cpp b/src/Functions/FunctionsCharsetClassification.cpp index 7704e3eafc0..05b173e3d95 100644 --- a/src/Functions/FunctionsCharsetClassification.cpp +++ b/src/Functions/FunctionsCharsetClassification.cpp @@ -1,7 +1,5 @@ #include -#include "config.h" - #if USE_NLP #include diff --git a/src/Functions/FunctionsProgrammingClassification.cpp b/src/Functions/FunctionsProgrammingClassification.cpp index 8a552a30e65..a93e1d9a87d 100644 --- a/src/Functions/FunctionsProgrammingClassification.cpp +++ b/src/Functions/FunctionsProgrammingClassification.cpp @@ -1,4 +1,7 @@ #include + +#if USE_NLP + #include #include #include @@ -118,3 +121,5 @@ REGISTER_FUNCTION(DetectProgrammingLanguage) } } + +#endif diff --git a/src/Functions/FunctionsTonalityClassification.cpp b/src/Functions/FunctionsTonalityClassification.cpp index e39f9c63758..3de38d99c88 100644 --- a/src/Functions/FunctionsTonalityClassification.cpp +++ b/src/Functions/FunctionsTonalityClassification.cpp @@ -1,4 +1,7 @@ #include + +#if USE_NLP + #include #include #include @@ -87,3 +90,5 @@ REGISTER_FUNCTION(DetectTonality) } } + +#endif From 49f4ef6ffb9264d8b4a31c8e4ab683f01afd4268 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 23 Jul 2023 20:11:24 +0200 Subject: [PATCH 317/478] Fix typo --- src/Functions/FunctionsCharsetClassification.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/FunctionsCharsetClassification.cpp b/src/Functions/FunctionsCharsetClassification.cpp index 05b173e3d95..0a332ab70a9 100644 --- a/src/Functions/FunctionsCharsetClassification.cpp +++ b/src/Functions/FunctionsCharsetClassification.cpp @@ -49,7 +49,7 @@ namespace return res; } - /// Сount how many times each bigram occurs in the text. + /// Count how many times each bigram occurs in the text. template ALWAYS_INLINE inline void calculateStats( const UInt8 * data, From e21a4c4c9a3f50436b8e708b6a38cdf8eee3c6be Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 23 Jul 2023 20:57:16 +0200 Subject: [PATCH 318/478] Fix the test --- .../02415_all_new_functions_must_be_documented.reference | 4 ---- .../02415_all_new_functions_must_be_documented.sql | 4 +++- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference index 595ebb483d5..b7097ad329b 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference @@ -238,10 +238,6 @@ defaultValueOfArgumentType defaultValueOfTypeName degrees demangle -detectCharset -detectLanguageUnknown -detectProgrammingLanguage -detectTonality divide dotProduct dumpColumnStructure diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.sql b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.sql index ed95c06d016..4f40da6c626 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.sql +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.sql @@ -15,5 +15,7 @@ AND name NOT IN ( 'h3ToGeoBoundary', 'h3ToParent', 'h3ToString', 'h3UnidirectionalEdgeIsValid', 'h3kRing', 'stringToH3', 'geoToS2', 's2CapContains', 's2CapUnion', 's2CellsIntersect', 's2GetNeighbors', 's2RectAdd', 's2RectContains', 's2RectIntersection', 's2RectUnion', 's2ToGeo', 'normalizeUTF8NFC', 'normalizeUTF8NFD', 'normalizeUTF8NFKC', 'normalizeUTF8NFKD', - 'lemmatize', 'tokenize', 'stem', 'synonyms' -- these functions are not enabled in fast test + 'lemmatize', 'tokenize', 'stem', 'synonyms', + 'detectCharset', 'detectLanguageUnknown', 'detectProgrammingLanguage', 'detectTonality' + -- these functions are not enabled in fast test ) ORDER BY name; From 67f643f27e5930765d0b6881c415ffacf369c14f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 23 Jul 2023 21:00:28 +0200 Subject: [PATCH 319/478] Fix error --- contrib/cctz-cmake/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/cctz-cmake/CMakeLists.txt b/contrib/cctz-cmake/CMakeLists.txt index 7edeada6e59..fde31dd469d 100644 --- a/contrib/cctz-cmake/CMakeLists.txt +++ b/contrib/cctz-cmake/CMakeLists.txt @@ -55,7 +55,7 @@ foreach(TIMEZONE ${TIMEZONES}) MATH(EXPR COUNTER "${COUNTER}+1") endforeach(TIMEZONE) -file(APPEND ${TIMEZONES_FILE} "};\n\n") +file(APPEND ${TIMEZONES_FILE} " nullptr\n};\n\n") file(APPEND ${TIMEZONES_FILE} "#include \n\n") file(APPEND ${TIMEZONES_FILE} "std::string_view getTimeZone(const char * name)\n{\n" ) From e02948580b31c61e32860da04f966a21231e14c7 Mon Sep 17 00:00:00 2001 From: alesapin Date: Sun, 23 Jul 2023 22:38:59 +0200 Subject: [PATCH 320/478] Don't shutdown interserver before tables --- programs/server/Server.cpp | 91 +++++++++++++++++++++++++++++--------- programs/server/Server.h | 11 ++++- 2 files changed, 79 insertions(+), 23 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index c7a7ba71e83..8c6e41d28c6 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -739,11 +739,12 @@ try [&]() -> std::vector { std::vector metrics; - metrics.reserve(servers_to_start_before_tables.size()); + + std::lock_guard lock(servers_lock); + metrics.reserve(servers_to_start_before_tables.size() + servers.size()); for (const auto & server : servers_to_start_before_tables) metrics.emplace_back(ProtocolServerMetrics{server.getPortName(), server.currentThreads()}); - std::lock_guard lock(servers_lock); for (const auto & server : servers) metrics.emplace_back(ProtocolServerMetrics{server.getPortName(), server.currentThreads()}); return metrics; @@ -1302,7 +1303,7 @@ try global_context->reloadAuxiliaryZooKeepersConfigIfChanged(config); std::lock_guard lock(servers_lock); - updateServers(*config, server_pool, async_metrics, servers); + updateServers(*config, server_pool, async_metrics, servers, servers_to_start_before_tables); } global_context->updateStorageConfiguration(*config); @@ -1404,10 +1405,27 @@ try } - for (auto & server : servers_to_start_before_tables) { - server.start(); - LOG_INFO(log, "Listening for {}", server.getDescription()); + std::lock_guard lock(servers_lock); + /// We should start interserver communications before (and more imporant shutdown after) tables. + /// Because server can wait for a long-running queries (for example in tcp_handler) after interserver handler was already shut down. + /// In this case we will have replicated tables which are unable to send any parts to other replicas, but still can + /// communicate with zookeeper, execute merges, etc. + createInterserverServers( + config(), + interserver_listen_hosts, + listen_try, + server_pool, + async_metrics, + servers_to_start_before_tables, + /* start_servers= */ false); + + + for (auto & server : servers_to_start_before_tables) + { + server.start(); + LOG_INFO(log, "Listening for {}", server.getDescription()); + } } /// Initialize access storages. @@ -1527,10 +1545,13 @@ try { LOG_DEBUG(log, "Waiting for current connections to servers for tables to finish."); size_t current_connections = 0; - for (auto & server : servers_to_start_before_tables) { - server.stop(); - current_connections += server.currentConnections(); + std::lock_guard lock(servers_lock); + for (auto & server : servers_to_start_before_tables) + { + server.stop(); + current_connections += server.currentConnections(); + } } if (current_connections) @@ -1709,7 +1730,7 @@ try { std::lock_guard lock(servers_lock); - createServers(config(), listen_hosts, interserver_listen_hosts, listen_try, server_pool, async_metrics, servers); + createServers(config(), listen_hosts, listen_try, server_pool, async_metrics, servers); if (servers.empty()) throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "No servers started (add valid listen_host and 'tcp_port' or 'http_port' " @@ -1967,7 +1988,6 @@ HTTPContextPtr Server::httpContext() const void Server::createServers( Poco::Util::AbstractConfiguration & config, const Strings & listen_hosts, - const Strings & interserver_listen_hosts, bool listen_try, Poco::ThreadPool & server_pool, AsynchronousMetrics & async_metrics, @@ -2189,6 +2209,23 @@ void Server::createServers( httpContext(), createHandlerFactory(*this, config, async_metrics, "PrometheusHandler-factory"), server_pool, socket, http_params)); }); } +} + +void Server::createInterserverServers( + Poco::Util::AbstractConfiguration & config, + const Strings & interserver_listen_hosts, + bool listen_try, + Poco::ThreadPool & server_pool, + AsynchronousMetrics & async_metrics, + std::vector & servers, + bool start_servers) +{ + const Settings & settings = global_context->getSettingsRef(); + + Poco::Timespan keep_alive_timeout(config.getUInt("keep_alive_timeout", 10), 0); + Poco::Net::HTTPServerParams::Ptr http_params = new Poco::Net::HTTPServerParams; + http_params->setTimeout(settings.http_receive_timeout); + http_params->setKeepAliveTimeout(keep_alive_timeout); /// Now iterate over interserver_listen_hosts for (const auto & interserver_listen_host : interserver_listen_hosts) @@ -2237,14 +2274,14 @@ void Server::createServers( #endif }); } - } void Server::updateServers( Poco::Util::AbstractConfiguration & config, Poco::ThreadPool & server_pool, AsynchronousMetrics & async_metrics, - std::vector & servers) + std::vector & servers, + std::vector & servers_to_start_before_tables) { Poco::Logger * log = &logger(); @@ -2270,11 +2307,19 @@ void Server::updateServers( Poco::Util::AbstractConfiguration & previous_config = latest_config ? *latest_config : this->config(); + std::vector all_servers; + all_servers.reserve(servers.size() + servers_to_start_before_tables.size()); for (auto & server : servers) + all_servers.push_back(&server); + + for (auto & server : servers_to_start_before_tables) + all_servers.push_back(&server); + + for (auto * server : all_servers) { - if (!server.isStopping()) + if (!server->isStopping()) { - std::string port_name = server.getPortName(); + std::string port_name = server->getPortName(); bool has_host = false; bool is_http = false; if (port_name.starts_with("protocols.")) @@ -2312,27 +2357,29 @@ void Server::updateServers( /// NOTE: better to compare using getPortName() over using /// dynamic_cast<> since HTTPServer is also used for prometheus and /// internal replication communications. - is_http = server.getPortName() == "http_port" || server.getPortName() == "https_port"; + is_http = server->getPortName() == "http_port" || server->getPortName() == "https_port"; } if (!has_host) - has_host = std::find(listen_hosts.begin(), listen_hosts.end(), server.getListenHost()) != listen_hosts.end(); + has_host = std::find(listen_hosts.begin(), listen_hosts.end(), server->getListenHost()) != listen_hosts.end(); bool has_port = !config.getString(port_name, "").empty(); bool force_restart = is_http && !isSameConfiguration(previous_config, config, "http_handlers"); if (force_restart) - LOG_TRACE(log, " had been changed, will reload {}", server.getDescription()); + LOG_TRACE(log, " had been changed, will reload {}", server->getDescription()); - if (!has_host || !has_port || config.getInt(server.getPortName()) != server.portNumber() || force_restart) + if (!has_host || !has_port || config.getInt(server->getPortName()) != server->portNumber() || force_restart) { - server.stop(); - LOG_INFO(log, "Stopped listening for {}", server.getDescription()); + server->stop(); + LOG_INFO(log, "Stopped listening for {}", server->getDescription()); } } } - createServers(config, listen_hosts, interserver_listen_hosts, listen_try, server_pool, async_metrics, servers, /* start_servers= */ true); + createServers(config, listen_hosts, listen_try, server_pool, async_metrics, servers, /* start_servers= */ true); + createInterserverServers(config, interserver_listen_hosts, listen_try, server_pool, async_metrics, servers_to_start_before_tables, /* start_servers= */ true); std::erase_if(servers, std::bind_front(check_server, "")); + std::erase_if(servers_to_start_before_tables, std::bind_front(check_server, "")); } } diff --git a/programs/server/Server.h b/programs/server/Server.h index e9ae6d8d937..d13378dcd65 100644 --- a/programs/server/Server.h +++ b/programs/server/Server.h @@ -102,6 +102,14 @@ private: void createServers( Poco::Util::AbstractConfiguration & config, const Strings & listen_hosts, + bool listen_try, + Poco::ThreadPool & server_pool, + AsynchronousMetrics & async_metrics, + std::vector & servers, + bool start_servers = false); + + void createInterserverServers( + Poco::Util::AbstractConfiguration & config, const Strings & interserver_listen_hosts, bool listen_try, Poco::ThreadPool & server_pool, @@ -113,7 +121,8 @@ private: Poco::Util::AbstractConfiguration & config, Poco::ThreadPool & server_pool, AsynchronousMetrics & async_metrics, - std::vector & servers); + std::vector & servers, + std::vector & servers_to_start_before_tables); }; } From c0f16dcf031b62e2eebdef249c132e9351203bc0 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Sun, 23 Jul 2023 21:10:12 +0000 Subject: [PATCH 321/478] Test from fuzzer --- .../02831_ast_fuzz_asan_join.reference | 0 .../0_stateless/02831_ast_fuzz_asan_join.sql | 22 +++++++++++++++++++ 2 files changed, 22 insertions(+) create mode 100644 tests/queries/0_stateless/02831_ast_fuzz_asan_join.reference create mode 100644 tests/queries/0_stateless/02831_ast_fuzz_asan_join.sql diff --git a/tests/queries/0_stateless/02831_ast_fuzz_asan_join.reference b/tests/queries/0_stateless/02831_ast_fuzz_asan_join.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02831_ast_fuzz_asan_join.sql b/tests/queries/0_stateless/02831_ast_fuzz_asan_join.sql new file mode 100644 index 00000000000..7c7bfd2df88 --- /dev/null +++ b/tests/queries/0_stateless/02831_ast_fuzz_asan_join.sql @@ -0,0 +1,22 @@ +SELECT + '0', + toTypeName(materialize(js2.s)) +FROM +( + SELECT number AS k + FROM numbers(100) +) AS js1 +FULL OUTER JOIN +( + SELECT + toLowCardinality(2147483647 + 256) AS k, + '-0.0000000001', + 1024, + toString(number + 10) AS s + FROM numbers(1024) +) AS js2 ON js1.k = js2.k +ORDER BY + inf DESC NULLS FIRST, + js1.k ASC NULLS LAST, + js2.k ASC +FORMAT `Null` From 1e467867e68c2c382f26291753bab45e2bc87a60 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 24 Jul 2023 00:03:40 +0200 Subject: [PATCH 322/478] Attempt to fix LTO --- contrib/cctz-cmake/CMakeLists.txt | 3 +-- programs/install/Install.cpp | 6 ++++-- programs/server/Server.cpp | 2 +- src/Common/FrequencyHolder.cpp | 6 +++--- src/Common/config.h.in | 4 ++++ src/Server/WebUIRequestHandler.cpp | 8 +++++--- src/Storages/System/attachInformationSchemaTables.cpp | 10 ++++++---- src/configure_config.cmake | 2 ++ 8 files changed, 26 insertions(+), 15 deletions(-) diff --git a/contrib/cctz-cmake/CMakeLists.txt b/contrib/cctz-cmake/CMakeLists.txt index fde31dd469d..7161f743de1 100644 --- a/contrib/cctz-cmake/CMakeLists.txt +++ b/contrib/cctz-cmake/CMakeLists.txt @@ -44,7 +44,7 @@ file(APPEND ${TIMEZONES_FILE} "#include \n") set (COUNTER 1) foreach(TIMEZONE ${TIMEZONES}) - file(APPEND ${TIMEZONES_FILE} "INCBIN(resource_timezone${COUNTER}, \"${TIMEZONE}\");\n") + file(APPEND ${TIMEZONES_FILE} "INCBIN(resource_timezone${COUNTER}, \"${TZDIR}/${TIMEZONE}\");\n") MATH(EXPR COUNTER "${COUNTER}+1") endforeach(TIMEZONE) @@ -71,7 +71,6 @@ file(APPEND ${TIMEZONES_FILE} "}\n") add_library (tzdata ${TIMEZONES_FILE}) target_link_libraries(tzdata ch_contrib::incbin) -target_include_directories(tzdata PRIVATE ${TZDIR}) target_link_libraries(_cctz tzdata) add_library(ch_contrib::cctz ALIAS _cctz) diff --git a/programs/install/Install.cpp b/programs/install/Install.cpp index da2c95af62c..d7086c95beb 100644 --- a/programs/install/Install.cpp +++ b/programs/install/Install.cpp @@ -34,9 +34,11 @@ #include +#include "config.h" + /// Embedded configuration files used inside the install program -INCBIN(resource_config_xml, "config.xml"); -INCBIN(resource_users_xml, "users.xml"); +INCBIN(resource_config_xml, SOURCE_DIR "/programs/server/config.xml"); +INCBIN(resource_users_xml, SOURCE_DIR "/programs/server/users.xml"); /** This tool can be used to install ClickHouse without a deb/rpm/tgz package, having only "clickhouse" binary. diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 229a169dc1e..2ab89ad048a 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -130,7 +130,7 @@ #include /// A minimal file used when the server is run without installation -INCBIN(resource_embedded_xml, "embedded.xml"); +INCBIN(resource_embedded_xml, SOURCE_DIR "/programs/server/embedded.xml"); namespace CurrentMetrics { diff --git a/src/Common/FrequencyHolder.cpp b/src/Common/FrequencyHolder.cpp index fe03e6a1b44..7dc1f622aeb 100644 --- a/src/Common/FrequencyHolder.cpp +++ b/src/Common/FrequencyHolder.cpp @@ -5,9 +5,9 @@ #include /// Embedded SQL definitions -INCBIN(resource_charset_zst, "charset.zst"); -INCBIN(resource_tonality_ru_zst, "tonality_ru.zst"); -INCBIN(resource_programming_zst, "programming.zst"); +INCBIN(resource_charset_zst, SOURCE_DIR "/contrib/nlp-data/charset.zst"); +INCBIN(resource_tonality_ru_zst, SOURCE_DIR "/contrib/nlp-data/tonality_ru.zst"); +INCBIN(resource_programming_zst, SOURCE_DIR "/contrib/nlp-data/programming.zst"); namespace DB diff --git a/src/Common/config.h.in b/src/Common/config.h.in index a2c18fc330f..628f0847d65 100644 --- a/src/Common/config.h.in +++ b/src/Common/config.h.in @@ -59,3 +59,7 @@ #cmakedefine01 USE_ULID #cmakedefine01 FIU_ENABLE #cmakedefine01 USE_BCRYPT + +/// This is needed for .incbin in assembly. For some reason, include paths don't work there in presence of LTO. +/// That's why we use absolute paths. +#cmakedefine SOURCE_DIR "@SOURCE_DIR@" diff --git a/src/Server/WebUIRequestHandler.cpp b/src/Server/WebUIRequestHandler.cpp index cb9e8935d8c..6fa1d65de42 100644 --- a/src/Server/WebUIRequestHandler.cpp +++ b/src/Server/WebUIRequestHandler.cpp @@ -11,10 +11,12 @@ #include +#include "config.h" + /// Embedded HTML pages -INCBIN(resource_play_html, "play.html"); -INCBIN(resource_dashboard_html, "dashboard.html"); -INCBIN(resource_uplot_js, "js/uplot.js"); +INCBIN(resource_play_html, SOURCE_DIR "/programs/server/play.html"); +INCBIN(resource_dashboard_html, SOURCE_DIR "/programs/server/dashboard.html"); +INCBIN(resource_uplot_js, SOURCE_DIR "/programs/server/js/uplot.js"); namespace DB diff --git a/src/Storages/System/attachInformationSchemaTables.cpp b/src/Storages/System/attachInformationSchemaTables.cpp index bfc5c8c64e2..12cef89b553 100644 --- a/src/Storages/System/attachInformationSchemaTables.cpp +++ b/src/Storages/System/attachInformationSchemaTables.cpp @@ -5,11 +5,13 @@ #include #include +#include "config.h" + /// Embedded SQL definitions -INCBIN(resource_schemata_sql, "schemata.sql"); -INCBIN(resource_tables_sql, "tables.sql"); -INCBIN(resource_views_sql, "views.sql"); -INCBIN(resource_columns_sql, "columns.sql"); +INCBIN(resource_schemata_sql, SOURCE_DIR "/src/Storages/System/InformationSchema/schemata.sql"); +INCBIN(resource_tables_sql, SOURCE_DIR "/src/Storages/System/InformationSchema/tables.sql"); +INCBIN(resource_views_sql, SOURCE_DIR "/src/Storages/System/InformationSchema/views.sql"); +INCBIN(resource_columns_sql, SOURCE_DIR "/src/Storages/System/InformationSchema/columns.sql"); namespace DB diff --git a/src/configure_config.cmake b/src/configure_config.cmake index ae6305705c2..5529e2f2f39 100644 --- a/src/configure_config.cmake +++ b/src/configure_config.cmake @@ -162,3 +162,5 @@ endif () if (TARGET ch_contrib::fiu) set(FIU_ENABLE 1) endif() + +set(SOURCE_DIR ${CMAKE_SOURCE_DIR}) From 7b4d0cf9d5b261eb68bd1db4021fcc350b907fc1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 24 Jul 2023 00:51:20 +0200 Subject: [PATCH 323/478] Fix Darwin --- contrib/incbin-cmake/CMakeLists.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/contrib/incbin-cmake/CMakeLists.txt b/contrib/incbin-cmake/CMakeLists.txt index e64ebc99c73..8f4dad7e0d9 100644 --- a/contrib/incbin-cmake/CMakeLists.txt +++ b/contrib/incbin-cmake/CMakeLists.txt @@ -2,3 +2,7 @@ set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/incbin") add_library(_incbin INTERFACE) target_include_directories(_incbin SYSTEM INTERFACE ${LIBRARY_DIR}) add_library(ch_contrib::incbin ALIAS _incbin) + +# Warning "incbin is incompatible with bitcode. Using the library will break upload to App Store if you have bitcode enabled. +# Add `#define INCBIN_SILENCE_BITCODE_WARNING` before including this header to silence this warning." +target_compile_definitions(_inclin PUBLIC INCBIN_SILENCE_BITCODE_WARNING) From 641c086dbd771c14cc7db089e265ec508da9ccff Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 24 Jul 2023 00:53:11 +0200 Subject: [PATCH 324/478] Fix Darwin --- contrib/incbin-cmake/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/incbin-cmake/CMakeLists.txt b/contrib/incbin-cmake/CMakeLists.txt index 8f4dad7e0d9..5778cf83c22 100644 --- a/contrib/incbin-cmake/CMakeLists.txt +++ b/contrib/incbin-cmake/CMakeLists.txt @@ -5,4 +5,4 @@ add_library(ch_contrib::incbin ALIAS _incbin) # Warning "incbin is incompatible with bitcode. Using the library will break upload to App Store if you have bitcode enabled. # Add `#define INCBIN_SILENCE_BITCODE_WARNING` before including this header to silence this warning." -target_compile_definitions(_inclin PUBLIC INCBIN_SILENCE_BITCODE_WARNING) +target_compile_definitions(_incbin INTERFACE INCBIN_SILENCE_BITCODE_WARNING) From 40f5649811bb579b3cf8d634281f862675934773 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 24 Jul 2023 01:02:11 +0200 Subject: [PATCH 325/478] Fix test --- .../no_allow_vertical_merges_from_compact_to_wide_parts.xml | 5 +++++ .../test_vertical_merges_from_compact_parts.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) create mode 100644 tests/integration/test_backward_compatibility/configs/no_allow_vertical_merges_from_compact_to_wide_parts.xml diff --git a/tests/integration/test_backward_compatibility/configs/no_allow_vertical_merges_from_compact_to_wide_parts.xml b/tests/integration/test_backward_compatibility/configs/no_allow_vertical_merges_from_compact_to_wide_parts.xml new file mode 100644 index 00000000000..c69be846c46 --- /dev/null +++ b/tests/integration/test_backward_compatibility/configs/no_allow_vertical_merges_from_compact_to_wide_parts.xml @@ -0,0 +1,5 @@ + + + 0 + + diff --git a/tests/integration/test_backward_compatibility/test_vertical_merges_from_compact_parts.py b/tests/integration/test_backward_compatibility/test_vertical_merges_from_compact_parts.py index 82ffcc20b60..481621cacfe 100644 --- a/tests/integration/test_backward_compatibility/test_vertical_merges_from_compact_parts.py +++ b/tests/integration/test_backward_compatibility/test_vertical_merges_from_compact_parts.py @@ -15,7 +15,7 @@ node_old = cluster.add_instance( ) node_new = cluster.add_instance( "node2", - main_configs=["configs/no_compress_marks.xml"], + main_configs=["configs/no_compress_marks.xml", "configs/no_allow_vertical_merges_from_compact_to_wide_parts.xml"], with_zookeeper=True, stay_alive=True, allow_analyzer=False, From dba7a0dffc4927a88c04cb7b9ec93faeeba40b3c Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sun, 23 Jul 2023 23:18:19 +0000 Subject: [PATCH 326/478] Automatic style fix --- .../test_vertical_merges_from_compact_parts.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_backward_compatibility/test_vertical_merges_from_compact_parts.py b/tests/integration/test_backward_compatibility/test_vertical_merges_from_compact_parts.py index 481621cacfe..9c9d1a4d312 100644 --- a/tests/integration/test_backward_compatibility/test_vertical_merges_from_compact_parts.py +++ b/tests/integration/test_backward_compatibility/test_vertical_merges_from_compact_parts.py @@ -15,7 +15,10 @@ node_old = cluster.add_instance( ) node_new = cluster.add_instance( "node2", - main_configs=["configs/no_compress_marks.xml", "configs/no_allow_vertical_merges_from_compact_to_wide_parts.xml"], + main_configs=[ + "configs/no_compress_marks.xml", + "configs/no_allow_vertical_merges_from_compact_to_wide_parts.xml", + ], with_zookeeper=True, stay_alive=True, allow_analyzer=False, From d7cdfb47d3795a3a09c2a204789c95e9726dc2b6 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 24 Jul 2023 01:56:04 +0200 Subject: [PATCH 327/478] Fix merge --- src/IO/WriteHelpers.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/IO/WriteHelpers.h b/src/IO/WriteHelpers.h index 0494cdf22e7..76778543bd0 100644 --- a/src/IO/WriteHelpers.h +++ b/src/IO/WriteHelpers.h @@ -953,6 +953,11 @@ void writeDecimalFractional(const T & x, UInt32 scale, WriteBuffer & ostr, bool { auto remainder = value % 10; value /= 10; + + if (remainder != 0 && last_nonzero_pos == 0) + last_nonzero_pos = pos; + + buf[pos] += static_cast(remainder); } writeChar('.', ostr); From 75efee9675f277fc3405ca5b256296aa406baca4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 24 Jul 2023 05:34:00 +0200 Subject: [PATCH 328/478] Fix errors --- programs/install/CMakeLists.txt | 3 --- programs/server/CMakeLists.txt | 2 +- src/CMakeLists.txt | 3 +-- 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/programs/install/CMakeLists.txt b/programs/install/CMakeLists.txt index f3f562bab7c..c3f4d96d631 100644 --- a/programs/install/CMakeLists.txt +++ b/programs/install/CMakeLists.txt @@ -10,6 +10,3 @@ set (CLICKHOUSE_INSTALL_LINK ) clickhouse_program_add_library(install) - -# For incbin -target_include_directories(clickhouse-install-lib PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/../server") diff --git a/programs/server/CMakeLists.txt b/programs/server/CMakeLists.txt index e008e65acf6..b8241afa1eb 100644 --- a/programs/server/CMakeLists.txt +++ b/programs/server/CMakeLists.txt @@ -29,4 +29,4 @@ endif() clickhouse_program_add(server) -target_include_directories(clickhouse-server-lib PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) +install(FILES config.xml users.xml DESTINATION "${CLICKHOUSE_ETC_DIR}/clickhouse-server" COMPONENT clickhouse) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index fda8bafde59..975bf9bb618 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -296,7 +296,7 @@ macro (dbms_target_include_directories) endforeach () endmacro () -dbms_target_include_directories (PUBLIC "${ClickHouse_SOURCE_DIR}/src" "${ClickHouse_BINARY_DIR}/src" "${ClickHouse_SOURCE_DIR}/programs/server") +dbms_target_include_directories (PUBLIC "${ClickHouse_SOURCE_DIR}/src" "${ClickHouse_BINARY_DIR}/src") target_include_directories (clickhouse_common_io PUBLIC "${ClickHouse_SOURCE_DIR}/src" "${ClickHouse_BINARY_DIR}/src") if (TARGET ch_contrib::llvm) @@ -561,7 +561,6 @@ if (ENABLE_NLP) dbms_target_link_libraries (PUBLIC ch_contrib::stemmer) dbms_target_link_libraries (PUBLIC ch_contrib::wnb) dbms_target_link_libraries (PUBLIC ch_contrib::lemmagen) - target_include_directories(clickhouse_common_io PUBLIC ${CMAKE_SOURCE_DIR}/contrib/nlp-data) endif() if (TARGET ch_contrib::ulid) From 169b9d5cc0c8dc54d31bc7229204b195f294c877 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 24 Jul 2023 05:49:06 +0200 Subject: [PATCH 329/478] Fix tidy --- src/Functions/GregorianDate.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/Functions/GregorianDate.cpp b/src/Functions/GregorianDate.cpp index da1172c8916..aaaeeb7339d 100644 --- a/src/Functions/GregorianDate.cpp +++ b/src/Functions/GregorianDate.cpp @@ -20,12 +20,12 @@ namespace ErrorCodes namespace { - static inline constexpr bool is_leap_year(int32_t year) + inline constexpr bool is_leap_year(int32_t year) { return (year % 4 == 0) && ((year % 400 == 0) || (year % 100 != 0)); } - static inline constexpr uint8_t monthLength(bool is_leap_year, uint8_t month) + inline constexpr uint8_t monthLength(bool is_leap_year, uint8_t month) { switch (month) { @@ -49,7 +49,7 @@ namespace /** Integer division truncated toward negative infinity. */ template - static inline constexpr I div(I x, J y) + inline constexpr I div(I x, J y) { const auto y_cast = static_cast(y); if (x > 0 && y_cast < 0) @@ -63,7 +63,7 @@ namespace /** Integer modulus, satisfying div(x, y)*y + mod(x, y) == x. */ template - static inline constexpr I mod(I x, J y) + inline constexpr I mod(I x, J y) { const auto y_cast = static_cast(y); const auto r = x % y_cast; @@ -76,13 +76,13 @@ namespace /** Like std::min(), but the type of operands may differ. */ template - static inline constexpr I min(I x, J y) + inline constexpr I min(I x, J y) { const auto y_cast = static_cast(y); return x < y_cast ? x : y_cast; } - static inline char readDigit(ReadBuffer & in) + inline char readDigit(ReadBuffer & in) { char c; if (!in.read(c)) @@ -93,7 +93,7 @@ namespace return c - '0'; } - static inline bool tryReadDigit(ReadBuffer & in, char & c) + inline bool tryReadDigit(ReadBuffer & in, char & c) { if (in.read(c) && c >= '0' && c <= '9') { From d7f7f16fbcfa8063e295708b4feb3b0079ad05f0 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 23 Jun 2023 10:44:13 +0200 Subject: [PATCH 330/478] Introduce IStorage::supportsTrivialCountOptimization() Signed-off-by: Azat Khuzhin --- src/Interpreters/InterpreterSelectQuery.cpp | 3 +-- src/Planner/PlannerJoinTree.cpp | 3 +++ src/Storages/IStorage.h | 3 +++ src/Storages/MergeTree/MergeTreeData.h | 2 ++ src/Storages/StorageMaterializedMySQL.h | 2 ++ 5 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index d07a6521544..fc3ea3a13ca 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -2274,8 +2274,7 @@ std::optional InterpreterSelectQuery::getTrivialCount(UInt64 max_paralle && !settings.allow_experimental_query_deduplication && !settings.empty_result_for_aggregation_by_empty_set && storage - && storage->getName() != "MaterializedMySQL" - && !storage->hasLightweightDeletedMask() + && storage->supportsTrivialCountOptimization() && query_info.filter_asts.empty() && query_analyzer->hasAggregation() && (query_analyzer->aggregates().size() == 1) diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index 5d8f8ca8741..c118fccded4 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -182,6 +182,9 @@ bool applyTrivialCountIfPossible( return false; const auto & storage = table_node.getStorage(); + if (!storage->supportsTrivialCountOptimization()) + return false; + auto storage_id = storage->getStorageID(); auto row_policy_filter = query_context->getRowPolicyFilter(storage_id.getDatabaseName(), storage_id.getTableName(), diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 76641b656a2..701e02a85ac 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -254,6 +254,9 @@ public: /// because those are internally translated into 'ALTER UDPATE' mutations. virtual bool supportsDelete() const { return false; } + /// Return true if the trivial count query could be optimized without reading the data at all. + virtual bool supportsTrivialCountOptimization() const { return false; } + private: StorageID storage_id; diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 41fc4657854..5e6b043c31c 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -434,6 +434,8 @@ public: bool areAsynchronousInsertsEnabled() const override { return getSettings()->async_insert; } + bool supportsTrivialCountOptimization() const override { return !hasLightweightDeletedMask(); } + NamesAndTypesList getVirtuals() const override; bool mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, ContextPtr, const StorageMetadataPtr & metadata_snapshot) const override; diff --git a/src/Storages/StorageMaterializedMySQL.h b/src/Storages/StorageMaterializedMySQL.h index 08fbb61960f..e6fcbc203e6 100644 --- a/src/Storages/StorageMaterializedMySQL.h +++ b/src/Storages/StorageMaterializedMySQL.h @@ -41,6 +41,8 @@ public: void drop() override { nested_storage->drop(); } + bool supportsTrivialCountOptimization() const override { return false; } + private: [[noreturn]] static void throwNotAllowed() { From a0070eda02736903b984518daf3d1c79bfe5fd94 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 23 Jun 2023 10:48:21 +0200 Subject: [PATCH 331/478] Slightly optimize code in ClusterProxy::executeQuery() Signed-off-by: Azat Khuzhin --- src/Interpreters/ClusterProxy/executeQuery.cpp | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index 3dea52faf46..5efba383e4b 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -176,11 +176,9 @@ void executeQuery( size_t shards = query_info.getCluster()->getShardCount(); for (const auto & shard_info : query_info.getCluster()->getShardsInfo()) { - ASTPtr query_ast_for_shard; - if (query_info.optimized_cluster && settings.optimize_skip_unused_shards_rewrite_in && shards > 1) + ASTPtr query_ast_for_shard = query_ast->clone(); + if (sharding_key_expr && query_info.optimized_cluster && settings.optimize_skip_unused_shards_rewrite_in && shards > 1) { - query_ast_for_shard = query_ast->clone(); - OptimizeShardingKeyRewriteInVisitor::Data visitor_data{ sharding_key_expr, sharding_key_expr->getSampleBlock().getByPosition(0).type, @@ -191,8 +189,6 @@ void executeQuery( OptimizeShardingKeyRewriteInVisitor visitor(visitor_data); visitor.visit(query_ast_for_shard); } - else - query_ast_for_shard = query_ast->clone(); if (shard_filter_generator) { From 67095d2150cafc91c0eebea4a17a8dc5f17b307c Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 23 Jun 2023 10:48:58 +0200 Subject: [PATCH 332/478] Fix comment for function argument in TableFunctionRemote Signed-off-by: Azat Khuzhin --- src/TableFunctions/TableFunctionRemote.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/TableFunctions/TableFunctionRemote.cpp b/src/TableFunctions/TableFunctionRemote.cpp index 4143014a7b3..e6d72ddf17b 100644 --- a/src/TableFunctions/TableFunctionRemote.cpp +++ b/src/TableFunctions/TableFunctionRemote.cpp @@ -264,7 +264,7 @@ void TableFunctionRemote::parseArguments(const ASTPtr & ast_function, ContextPtr secure, /* priority= */ Priority{1}, /* cluster_name= */ "", - /* password= */ "" + /* cluster_secret= */ "" }; cluster = std::make_shared(context->getSettingsRef(), names, params); } From b22247609036020e9bc4da64f1a297e49c29edfa Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 23 Jun 2023 14:19:08 +0200 Subject: [PATCH 333/478] Add ability to pass table for connections checks per-shard to ReadFromRemote Signed-off-by: Azat Khuzhin --- src/Interpreters/ClusterProxy/SelectStreamFactory.cpp | 1 + src/Interpreters/ClusterProxy/SelectStreamFactory.h | 2 ++ src/Processors/QueryPlan/ReadFromRemote.cpp | 6 ++++-- src/Processors/QueryPlan/ReadFromRemote.h | 1 + 4 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp index 0cf3f360994..953e38d56cd 100644 --- a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp +++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp @@ -124,6 +124,7 @@ void SelectStreamFactory::createForShard( { remote_shards.emplace_back(Shard{ .query = query_ast, + .main_table = main_table, .header = header, .shard_info = shard_info, .lazy = lazy, diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.h b/src/Interpreters/ClusterProxy/SelectStreamFactory.h index 030c0b77dd5..1cc5a3b1a77 100644 --- a/src/Interpreters/ClusterProxy/SelectStreamFactory.h +++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.h @@ -50,6 +50,8 @@ public: { /// Query and header may be changed depending on shard. ASTPtr query; + /// Used to check the table existence on remote node + StorageID main_table; Block header; Cluster::ShardInfo shard_info; diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp index 5cc13f45df4..7a99c363232 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.cpp +++ b/src/Processors/QueryPlan/ReadFromRemote.cpp @@ -162,7 +162,9 @@ void ReadFromRemote::addLazyPipe(Pipes & pipes, const ClusterProxy::SelectStream if (my_table_func_ptr) try_results = my_shard.shard_info.pool->getManyForTableFunction(timeouts, ¤t_settings, PoolMode::GET_MANY); else - try_results = my_shard.shard_info.pool->getManyChecked(timeouts, ¤t_settings, PoolMode::GET_MANY, my_main_table.getQualifiedName()); + try_results = my_shard.shard_info.pool->getManyChecked( + timeouts, ¤t_settings, PoolMode::GET_MANY, + my_shard.main_table ? my_shard.main_table.getQualifiedName() : my_main_table.getQualifiedName()); } catch (const Exception & ex) { @@ -241,7 +243,7 @@ void ReadFromRemote::addPipe(Pipes & pipes, const ClusterProxy::SelectStreamFact remote_query_executor->setPoolMode(PoolMode::GET_MANY); if (!table_func_ptr) - remote_query_executor->setMainTable(main_table); + remote_query_executor->setMainTable(shard.main_table ? shard.main_table : main_table); pipes.emplace_back(createRemoteSourcePipe(remote_query_executor, add_agg_info, add_totals, add_extremes, async_read, async_query_sending)); addConvertingActions(pipes.back(), output_stream->header); diff --git a/src/Processors/QueryPlan/ReadFromRemote.h b/src/Processors/QueryPlan/ReadFromRemote.h index d4005d81f1b..ac869cd89f9 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.h +++ b/src/Processors/QueryPlan/ReadFromRemote.h @@ -22,6 +22,7 @@ using ThrottlerPtr = std::shared_ptr; class ReadFromRemote final : public ISourceStep { public: + /// @param main_table_ if Shards contains main_table then this parameter will be ignored ReadFromRemote( ClusterProxy::SelectStreamFactory::Shards shards_, Block header_, From 83c0f03b98d6b3cbd10f9690256aed2fada47177 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 23 Jun 2023 14:21:53 +0200 Subject: [PATCH 334/478] Change signature of the updateSettingsForCluster() to avoid cluster requirement Signed-off-by: Azat Khuzhin --- src/Interpreters/ClusterProxy/executeQuery.cpp | 11 ++++++++--- src/Interpreters/ClusterProxy/executeQuery.h | 8 ++++++-- src/Storages/getStructureOfRemoteTable.cpp | 4 ++-- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index 5efba383e4b..2fed626ffb7 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -35,7 +35,12 @@ namespace ErrorCodes namespace ClusterProxy { -ContextMutablePtr updateSettingsForCluster(const Cluster & cluster, ContextPtr context, const Settings & settings, const StorageID & main_table, const SelectQueryInfo * query_info, Poco::Logger * log) +ContextMutablePtr updateSettingsForCluster(bool interserver_mode, + ContextPtr context, + const Settings & settings, + const StorageID & main_table, + const SelectQueryInfo * query_info, + Poco::Logger * log) { Settings new_settings = settings; new_settings.queue_max_wait_ms = Cluster::saturate(new_settings.queue_max_wait_ms, settings.max_execution_time); @@ -43,7 +48,7 @@ ContextMutablePtr updateSettingsForCluster(const Cluster & cluster, ContextPtr c /// If "secret" (in remote_servers) is not in use, /// user on the shard is not the same as the user on the initiator, /// hence per-user limits should not be applied. - if (cluster.getSecret().empty()) + if (!interserver_mode) { /// Does not matter on remote servers, because queries are sent under different user. new_settings.max_concurrent_queries_for_user = 0; @@ -170,7 +175,7 @@ void executeQuery( std::vector plans; SelectStreamFactory::Shards remote_shards; - auto new_context = updateSettingsForCluster(*query_info.getCluster(), context, settings, main_table, &query_info, log); + auto new_context = updateSettingsForCluster(!query_info.getCluster()->getSecret().empty(), context, settings, main_table, &query_info, log); new_context->increaseDistributedDepth(); size_t shards = query_info.getCluster()->getShardCount(); diff --git a/src/Interpreters/ClusterProxy/executeQuery.h b/src/Interpreters/ClusterProxy/executeQuery.h index 41f6da55686..511914e99e4 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.h +++ b/src/Interpreters/ClusterProxy/executeQuery.h @@ -34,8 +34,12 @@ class SelectStreamFactory; /// - optimize_skip_unused_shards_nesting /// /// @return new Context with adjusted settings -ContextMutablePtr updateSettingsForCluster( - const Cluster & cluster, ContextPtr context, const Settings & settings, const StorageID & main_table, const SelectQueryInfo * query_info = nullptr, Poco::Logger * log = nullptr); +ContextMutablePtr updateSettingsForCluster(bool interserver_mode, + ContextPtr context, + const Settings & settings, + const StorageID & main_table, + const SelectQueryInfo * query_info = nullptr, + Poco::Logger * log = nullptr); using AdditionalShardFilterGenerator = std::function; /// Execute a distributed query, creating a query plan, from which the query pipeline can be built. diff --git a/src/Storages/getStructureOfRemoteTable.cpp b/src/Storages/getStructureOfRemoteTable.cpp index e5fc01be9f4..cbed05e30ed 100644 --- a/src/Storages/getStructureOfRemoteTable.cpp +++ b/src/Storages/getStructureOfRemoteTable.cpp @@ -58,7 +58,7 @@ ColumnsDescription getStructureOfRemoteTableInShard( } ColumnsDescription res; - auto new_context = ClusterProxy::updateSettingsForCluster(cluster, context, context->getSettingsRef(), table_id); + auto new_context = ClusterProxy::updateSettingsForCluster(!cluster.getSecret().empty(), context, context->getSettingsRef(), table_id); /// Ignore limit for result number of rows (that could be set during handling CSE/CTE), /// since this is a service query and should not lead to query failure. @@ -177,7 +177,7 @@ ColumnsDescriptionByShardNum getExtendedObjectsOfRemoteTables( const auto & shards_info = cluster.getShardsInfo(); auto query = "DESC TABLE " + remote_table_id.getFullTableName(); - auto new_context = ClusterProxy::updateSettingsForCluster(cluster, context, context->getSettingsRef(), remote_table_id); + auto new_context = ClusterProxy::updateSettingsForCluster(!cluster.getSecret().empty(), context, context->getSettingsRef(), remote_table_id); new_context->setSetting("describe_extend_object_types", true); /// Expect only needed columns from the result of DESC TABLE. From 323128df6f3c779f3b2fe4a751fa98372a54fbbb Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 23 Jun 2023 15:02:32 +0200 Subject: [PATCH 335/478] Remove non existing ctor of Cluster::Address Signed-off-by: Azat Khuzhin --- src/Interpreters/Cluster.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/Interpreters/Cluster.h b/src/Interpreters/Cluster.h index de10a445d01..b90acd1d576 100644 --- a/src/Interpreters/Cluster.h +++ b/src/Interpreters/Cluster.h @@ -144,12 +144,6 @@ public: UInt32 shard_index_ = 0, UInt32 replica_index_ = 0); - Address( - const String & host_port_, - const ClusterConnectionParameters & params, - UInt32 shard_index_, - UInt32 replica_index_); - Address( const DatabaseReplicaInfo & info, const ClusterConnectionParameters & params, From 4a33e027c518f51d120c60b21ccd962264e1356a Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 20 Jun 2023 17:31:45 +0200 Subject: [PATCH 336/478] Split StorageReplicatedMergeTree reading methods Signed-off-by: Azat Khuzhin --- src/Storages/StorageReplicatedMergeTree.cpp | 141 ++++++++++++-------- src/Storages/StorageReplicatedMergeTree.h | 32 ++++- 2 files changed, 119 insertions(+), 54 deletions(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 13c0fb3f7c2..4e053c4598c 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -4902,67 +4902,102 @@ void StorageReplicatedMergeTree::read( snapshot_data.alter_conversions = {}; }); - /** The `select_sequential_consistency` setting has two meanings: - * 1. To throw an exception if on a replica there are not all parts which have been written down on quorum of remaining replicas. - * 2. Do not read parts that have not yet been written to the quorum of the replicas. - * For this you have to synchronously go to ZooKeeper. - */ - if (local_context->getSettingsRef().select_sequential_consistency) - { - auto max_added_blocks = std::make_shared(getMaxAddedBlocks()); - if (auto plan = reader.read( - column_names, storage_snapshot, query_info, local_context, - max_block_size, num_streams, processed_stage, std::move(max_added_blocks), /*enable_parallel_reading*/false)) - query_plan = std::move(*plan); - return; - } + const auto & settings = local_context->getSettingsRef(); + + /// The `select_sequential_consistency` setting has two meanings: + /// 1. To throw an exception if on a replica there are not all parts which have been written down on quorum of remaining replicas. + /// 2. Do not read parts that have not yet been written to the quorum of the replicas. + /// For this you have to synchronously go to ZooKeeper. + if (settings.select_sequential_consistency) + return readLocalSequentialConsistencyImpl(query_plan, column_names, storage_snapshot, query_info, local_context, processed_stage, max_block_size, num_streams); if (local_context->canUseParallelReplicasOnInitiator()) + return readParallelReplicasImpl(query_plan, column_names, storage_snapshot, query_info, local_context, processed_stage, max_block_size, num_streams); + + readLocalImpl(query_plan, column_names, storage_snapshot, query_info, local_context, processed_stage, max_block_size, num_streams); +} + +void StorageReplicatedMergeTree::readLocalSequentialConsistencyImpl( + QueryPlan & query_plan, + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr local_context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + size_t num_streams) +{ + auto max_added_blocks = std::make_shared(getMaxAddedBlocks()); + auto plan = reader.read(column_names, storage_snapshot, query_info, local_context, + max_block_size, num_streams, processed_stage, std::move(max_added_blocks), + /* enable_parallel_reading= */false); + if (plan) + query_plan = std::move(*plan); +} + +void StorageReplicatedMergeTree::readParallelReplicasImpl( + QueryPlan & query_plan, + const Names & /*column_names*/, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr local_context, + QueryProcessingStage::Enum processed_stage, + const size_t /*max_block_size*/, + const size_t /*num_streams*/) +{ + auto table_id = getStorageID(); + + auto parallel_replicas_cluster = local_context->getCluster(local_context->getSettingsRef().cluster_for_parallel_replicas); + + ASTPtr modified_query_ast; + Block header; + if (local_context->getSettingsRef().allow_experimental_analyzer) { - auto table_id = getStorageID(); + auto modified_query_tree = buildQueryTreeForShard(query_info, query_info.query_tree); - ASTPtr modified_query_ast; - - Block header; - - if (local_context->getSettingsRef().allow_experimental_analyzer) - { - auto modified_query_tree = buildQueryTreeForShard(query_info, query_info.query_tree); - - header = InterpreterSelectQueryAnalyzer::getSampleBlock( - modified_query_tree, local_context, SelectQueryOptions(processed_stage).analyze()); - modified_query_ast = queryNodeToSelectQuery(modified_query_tree); - } - else - { - modified_query_ast = ClusterProxy::rewriteSelectQuery(local_context, query_info.query, - table_id.database_name, table_id.table_name, /*remote_table_function_ptr*/nullptr); - header - = InterpreterSelectQuery(modified_query_ast, local_context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock(); - } - - auto cluster = local_context->getCluster(local_context->getSettingsRef().cluster_for_parallel_replicas); - - ClusterProxy::SelectStreamFactory select_stream_factory = - ClusterProxy::SelectStreamFactory( - header, - {}, - storage_snapshot, - processed_stage); - - ClusterProxy::executeQueryWithParallelReplicas( - query_plan, getStorageID(), /*remove_table_function_ptr*/ nullptr, - select_stream_factory, modified_query_ast, - local_context, query_info, cluster); + header = InterpreterSelectQueryAnalyzer::getSampleBlock( + modified_query_tree, local_context, SelectQueryOptions(processed_stage).analyze()); + modified_query_ast = queryNodeToSelectQuery(modified_query_tree); } else { - if (auto plan = reader.read( - column_names, storage_snapshot, query_info, - local_context, max_block_size, num_streams, - processed_stage, nullptr, /*enable_parallel_reading*/local_context->canUseParallelReplicasOnFollower())) - query_plan = std::move(*plan); + modified_query_ast = ClusterProxy::rewriteSelectQuery(local_context, query_info.query, + table_id.database_name, table_id.table_name, /*remote_table_function_ptr*/nullptr); + header + = InterpreterSelectQuery(modified_query_ast, local_context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock(); } + + ClusterProxy::SelectStreamFactory select_stream_factory = ClusterProxy::SelectStreamFactory( + header, + {}, + storage_snapshot, + processed_stage); + + ClusterProxy::executeQueryWithParallelReplicas( + query_plan, getStorageID(), + /* table_func_ptr= */ nullptr, + select_stream_factory, modified_query_ast, + local_context, query_info, parallel_replicas_cluster); +} + +void StorageReplicatedMergeTree::readLocalImpl( + QueryPlan & query_plan, + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr local_context, + QueryProcessingStage::Enum processed_stage, + const size_t max_block_size, + const size_t num_streams) +{ + auto plan = reader.read( + column_names, storage_snapshot, query_info, + local_context, max_block_size, num_streams, + processed_stage, + /* max_block_numbers_to_read= */ nullptr, + /* enable_parallel_reading= */ local_context->canUseParallelReplicasOnFollower()); + if (plan) + query_plan = std::move(*plan); } template diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 1a1b3c3b10c..ded940bc1d2 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -130,7 +130,7 @@ public: const Names & column_names, const StorageSnapshotPtr & storage_snapshot, SelectQueryInfo & query_info, - ContextPtr context, + ContextPtr local_context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, size_t num_streams) override; @@ -513,6 +513,36 @@ private: static std::optional distributedWriteFromClusterStorage(const std::shared_ptr & src_storage_cluster, const ASTInsertQuery & query, ContextPtr context); + void readLocalImpl( + QueryPlan & query_plan, + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr local_context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + size_t num_streams); + + void readLocalSequentialConsistencyImpl( + QueryPlan & query_plan, + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr local_context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + size_t num_streams); + + void readParallelReplicasImpl( + QueryPlan & query_plan, + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr local_context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + size_t num_streams); + template void foreachActiveParts(Func && func, bool select_sequential_consistency) const; From b22313ef2d721ec0f8687515de58f4e2ba785d1d Mon Sep 17 00:00:00 2001 From: flynn Date: Mon, 24 Jul 2023 03:54:34 +0000 Subject: [PATCH 337/478] Replace with three way comparison --- src/Common/IntervalTree.h | 34 ++-------------------------------- 1 file changed, 2 insertions(+), 32 deletions(-) diff --git a/src/Common/IntervalTree.h b/src/Common/IntervalTree.h index 2214a4e842d..ad079a312f2 100644 --- a/src/Common/IntervalTree.h +++ b/src/Common/IntervalTree.h @@ -27,39 +27,9 @@ struct Interval }; template -bool operator<(const Interval & lhs, const Interval & rhs) +auto operator<=>(const Interval & lhs, const Interval & rhs) { - return std::tie(lhs.left, lhs.right) < std::tie(rhs.left, rhs.right); -} - -template -bool operator<=(const Interval & lhs, const Interval & rhs) -{ - return std::tie(lhs.left, lhs.right) <= std::tie(rhs.left, rhs.right); -} - -template -bool operator==(const Interval & lhs, const Interval & rhs) -{ - return std::tie(lhs.left, lhs.right) == std::tie(rhs.left, rhs.right); -} - -template -bool operator!=(const Interval & lhs, const Interval & rhs) -{ - return std::tie(lhs.left, lhs.right) != std::tie(rhs.left, rhs.right); -} - -template -bool operator>(const Interval & lhs, const Interval & rhs) -{ - return std::tie(lhs.left, lhs.right) > std::tie(rhs.left, rhs.right); -} - -template -bool operator>=(const Interval & lhs, const Interval & rhs) -{ - return std::tie(lhs.left, lhs.right) >= std::tie(rhs.left, rhs.right); + return std::tie(lhs.left, lhs.right) <=> std::tie(rhs.left, rhs.right); } struct IntervalTreeVoidValue From ac54be9652414e10a1b79ec4f92439db5155310b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 24 Jul 2023 05:56:18 +0200 Subject: [PATCH 338/478] Fix a test --- tests/integration/test_backward_compatibility/test_functions.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/test_backward_compatibility/test_functions.py b/tests/integration/test_backward_compatibility/test_functions.py index fa24b146fec..c86c3ba0ab2 100644 --- a/tests/integration/test_backward_compatibility/test_functions.py +++ b/tests/integration/test_backward_compatibility/test_functions.py @@ -143,6 +143,7 @@ def test_string_functions(start_cluster): "position", "substring", "CAST", + "getTypeSerializationStreams", # NOTE: no need to ignore now()/now64() since they will fail because they don't accept any argument # 22.8 Backward Incompatible Change: Extended range of Date32 "toDate32OrZero", From 2389e0f0b68d03ecbb117745ed00c54979715ea7 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 17 Jul 2023 09:54:51 +0200 Subject: [PATCH 339/478] Randomize timezone in tests across non-deterministic around 1970 and default There was some cases when some patches to the datetime code leads to flaky tests, due to the tests itself had been runned using regular timezone (TZ). But if you will this tests with something "specific" (that is not strictly defined around 1970 year), those tests will fail. So to catch such issues in the PRs itself, let's randomize session_timezone as well. Signed-off-by: Azat Khuzhin --- docker/test/stateless/run.sh | 3 +++ tests/clickhouse-test | 19 +++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index fe53925ecc8..3694fb7c2f6 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -4,6 +4,9 @@ set -e -x -a # Choose random timezone for this test run. +# +# NOTE: that clickhouse-test will randomize session_timezone by itself as well +# (it will choose between default server timezone and something specific). TZ="$(rg -v '#' /usr/share/zoneinfo/zone.tab | awk '{print $3}' | shuf | head -n1)" echo "Choosen random timezone $TZ" ln -snf "/usr/share/zoneinfo/$TZ" /etc/localtime && echo "$TZ" > /etc/timezone diff --git a/tests/clickhouse-test b/tests/clickhouse-test index abd109d00b2..185e3003c95 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -529,6 +529,12 @@ def threshold_generator(always_on_prob, always_off_prob, min_val, max_val): return gen +# To keep dependency list as short as possible, tzdata is not used here (to +# avoid try/except block for import) +def get_localzone(): + return os.getenv("TZ", "/".join(os.readlink("/etc/localtime").split("/")[-2:])) + + class SettingsRandomizer: settings = { "max_insert_threads": lambda: 0 @@ -602,6 +608,19 @@ class SettingsRandomizer: "enable_memory_bound_merging_of_aggregation_results": lambda: random.randint( 0, 1 ), + "session_timezone": lambda: random.choice( + [ + # special non-deterministic around 1970 timezone, see [1]. + # + # [1]: https://github.com/ClickHouse/ClickHouse/issues/42653 + "America/Mazatlan", + "America/Hermosillo", + "Mexico/BajaSur", + # server default that is randomized across all timezones + # NOTE: due to lots of trickery we cannot use empty timezone here, but this should be the same. + get_localzone(), + ] + ), } @staticmethod From bc167dfde81c44bb93ee7dd0c634ff3428ea3c33 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 18 Jul 2023 06:20:05 +0200 Subject: [PATCH 340/478] clickhouse-test: add proper escaping for HTTP parameters The problem is that old versions of cURL (7.81.0 at least) handle additional parameters incorrectly if in previous parameter was "/": $ docker run --rm curlimages/curl:8.1.2 --http1.1 --get -vvv 'http://kernel.org/?bar=foo/baz' --data-urlencode "query=select 1 format Null"; echo > GET /?bar=foo/baz&query=select+1+format+Null HTTP/1.1 > User-Agent: curl/8.1.2 $ docker run --rm curlimages/curl:7.81.0 --http1.1 --get -vvv 'http://kernel.org/?bar=foo/baz' --data-urlencode "query=select 1 format Null"; echo > GET /?bar=foo/baz?query=select+1+format+Null HTTP/1.1 > User-Agent: curl/7.81.0-DEV Note, that I thought about making the same for cli, but it is not that easy, even after getting rid of sh -c and string contantenation, it still cannot be done for CLICKHOUSE_CLIENT_OPT. Signed-off-by: Azat Khuzhin --- tests/clickhouse-test | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 185e3003c95..c63e1e3ae52 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -625,16 +625,16 @@ class SettingsRandomizer: @staticmethod def get_random_settings(args): - random_settings = [] + random_settings = {} is_debug = BuildFlags.DEBUG in args.build_flags for setting, generator in SettingsRandomizer.settings.items(): if ( is_debug and setting == "allow_prefetched_read_pool_for_remote_filesystem" ): - random_settings.append(f"{setting}=0") + random_settings[setting] = 0 else: - random_settings.append(f"{setting}={generator()}") + random_settings[setting] = generator() return random_settings @@ -670,10 +670,10 @@ class MergeTreeSettingsRandomizer: @staticmethod def get_random_settings(args): - random_settings = [] + random_settings = {} for setting, generator in MergeTreeSettingsRandomizer.settings.items(): if setting not in args.changed_merge_tree_settings: - random_settings.append(f"{setting}={generator()}") + random_settings[setting] = generator() return random_settings @@ -785,7 +785,14 @@ class TestCase: @staticmethod def cli_format_settings(settings_list) -> str: - return " ".join([f"--{setting}" for setting in settings_list]) + out = [] + for k, v in settings_list.items(): + out.extend([f"--{k}", str(v)]) + return " ".join(out) + + @staticmethod + def http_format_settings(settings_list) -> str: + return urllib.parse.urlencode(settings_list) def has_show_create_table_in_test(self): return not subprocess.call(["grep", "-iq", "show create", self.case_file]) @@ -793,11 +800,12 @@ class TestCase: def add_random_settings(self, client_options): new_options = "" if self.randomize_settings: + http_params = self.http_format_settings(self.random_settings) if len(self.base_url_params) == 0: - os.environ["CLICKHOUSE_URL_PARAMS"] = "&".join(self.random_settings) + os.environ["CLICKHOUSE_URL_PARAMS"] = http_params else: os.environ["CLICKHOUSE_URL_PARAMS"] = ( - self.base_url_params + "&" + "&".join(self.random_settings) + self.base_url_params + "&" + http_params ) new_options += f" {self.cli_format_settings(self.random_settings)}" From 6ae4d291800c7d9b32622f1d520f1ab27b9f90b7 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 19 Jul 2023 13:22:31 +0200 Subject: [PATCH 341/478] Fix tests after session_timezone randomization Signed-off-by: Azat Khuzhin --- .../0_stateless/00387_use_client_time_zone.sh | 3 ++- tests/queries/0_stateless/00427_alter_primary_key.sh | 11 ++++++----- tests/queries/0_stateless/00933_ttl_simple.sql | 12 ++++++++++++ ...42_system_reload_dictionary_reloads_completely.sh | 4 ++-- .../0_stateless/01070_modify_ttl_recalc_only.sql | 3 +++ .../0_stateless/02530_dictionaries_update_field.sh | 3 ++- 6 files changed, 27 insertions(+), 9 deletions(-) diff --git a/tests/queries/0_stateless/00387_use_client_time_zone.sh b/tests/queries/0_stateless/00387_use_client_time_zone.sh index 2a6d81eebfe..e54d5244eef 100755 --- a/tests/queries/0_stateless/00387_use_client_time_zone.sh +++ b/tests/queries/0_stateless/00387_use_client_time_zone.sh @@ -5,4 +5,5 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -env TZ=UTC ${CLICKHOUSE_CLIENT} --use_client_time_zone=1 --query="SELECT toDateTime(1000000000)" +# NOTE: session_timezone overrides use_client_time_zone, disable it randomization +env TZ=UTC ${CLICKHOUSE_CLIENT} --session_timezone '' --use_client_time_zone=1 --query="SELECT toDateTime(1000000000)" diff --git a/tests/queries/0_stateless/00427_alter_primary_key.sh b/tests/queries/0_stateless/00427_alter_primary_key.sh index 1269e2ad6e3..f9984384d79 100755 --- a/tests/queries/0_stateless/00427_alter_primary_key.sh +++ b/tests/queries/0_stateless/00427_alter_primary_key.sh @@ -7,11 +7,12 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) function perform() { local query=$1 - TZ=UTC $CLICKHOUSE_CLIENT \ - --allow_deprecated_syntax_for_merge_tree=1 \ - --use_client_time_zone=1 \ - --input_format_values_interpret_expressions=0 \ - --query "$query" 2>/dev/null + local settings=( + --allow_deprecated_syntax_for_merge_tree 1 + --session_timezone UTC + --input_format_values_interpret_expressions 0 + ) + TZ=UTC $CLICKHOUSE_CLIENT "${settings[@]}" --query "$query" 2>/dev/null if [ "$?" -ne 0 ]; then echo "query failed" fi diff --git a/tests/queries/0_stateless/00933_ttl_simple.sql b/tests/queries/0_stateless/00933_ttl_simple.sql index 2bf686822d5..ad40e7c7e47 100644 --- a/tests/queries/0_stateless/00933_ttl_simple.sql +++ b/tests/queries/0_stateless/00933_ttl_simple.sql @@ -1,3 +1,15 @@ +-- disable timezone randomization since otherwise TTL may fail at particular datetime, i.e.: +-- +-- SELECT +-- now(), +-- toDate(toTimeZone(now(), 'America/Mazatlan')), +-- today() +-- +-- ┌───────────────now()─┬─toDate(toTimeZone(now(), 'America/Mazatlan'))─┬────today()─┐ +-- │ 2023-07-24 06:24:06 │ 2023-07-23 │ 2023-07-24 │ +-- └─────────────────────┴───────────────────────────────────────────────┴────────────┘ +set session_timezone = ''; + drop table if exists ttl_00933_1; -- Column TTL works only with wide parts, because it's very expensive to apply it for compact parts diff --git a/tests/queries/0_stateless/01042_system_reload_dictionary_reloads_completely.sh b/tests/queries/0_stateless/01042_system_reload_dictionary_reloads_completely.sh index f2b30e05040..9d34470c38d 100755 --- a/tests/queries/0_stateless/01042_system_reload_dictionary_reloads_completely.sh +++ b/tests/queries/0_stateless/01042_system_reload_dictionary_reloads_completely.sh @@ -7,8 +7,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) set -e -o pipefail -# Run the client. -$CLICKHOUSE_CLIENT --multiquery <<'EOF' +# NOTE: dictionaries TTLs works with server timezone, so session_timeout cannot be used +$CLICKHOUSE_CLIENT --session_timezone '' --multiquery <<'EOF' DROP DATABASE IF EXISTS dictdb_01042; CREATE DATABASE dictdb_01042; CREATE TABLE dictdb_01042.table(x Int64, y Int64, insert_time DateTime) ENGINE = MergeTree ORDER BY tuple(); diff --git a/tests/queries/0_stateless/01070_modify_ttl_recalc_only.sql b/tests/queries/0_stateless/01070_modify_ttl_recalc_only.sql index 247e412484f..7ac70d41871 100644 --- a/tests/queries/0_stateless/01070_modify_ttl_recalc_only.sql +++ b/tests/queries/0_stateless/01070_modify_ttl_recalc_only.sql @@ -2,6 +2,9 @@ set mutations_sync = 2; +-- system.parts has server default, timezone cannot be randomized +set session_timezone = ''; + drop table if exists ttl; create table ttl (d Date, a Int) engine = MergeTree order by a partition by toDayOfMonth(d) diff --git a/tests/queries/0_stateless/02530_dictionaries_update_field.sh b/tests/queries/0_stateless/02530_dictionaries_update_field.sh index 569466fe606..6ac10ea2308 100755 --- a/tests/queries/0_stateless/02530_dictionaries_update_field.sh +++ b/tests/queries/0_stateless/02530_dictionaries_update_field.sh @@ -5,7 +5,8 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -q " +# NOTE: dictionaries will be updated according to server TZ, not session, so prohibit it's randomization +$CLICKHOUSE_CLIENT --session_timezone '' -q " CREATE TABLE table_for_update_field_dictionary ( key UInt64, From 810137e57a53467e9fea668769749c559af12bc1 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Mon, 24 Jul 2023 05:59:07 +0000 Subject: [PATCH 342/478] Add new peak_memory_usage to docs --- docs/en/interfaces/http.md | 18 +++++++++--------- docs/ru/interfaces/http.md | 16 ++++++++-------- docs/zh/interfaces/http.md | 18 +++++++++--------- 3 files changed, 26 insertions(+), 26 deletions(-) diff --git a/docs/en/interfaces/http.md b/docs/en/interfaces/http.md index 3a7f6d4d854..37821f0fee1 100644 --- a/docs/en/interfaces/http.md +++ b/docs/en/interfaces/http.md @@ -56,7 +56,7 @@ Connection: Close Content-Type: text/tab-separated-values; charset=UTF-8 X-ClickHouse-Server-Display-Name: clickhouse.ru-central1.internal X-ClickHouse-Query-Id: 5abe861c-239c-467f-b955-8a201abb8b7f -X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"} 1 ``` @@ -286,9 +286,9 @@ Similarly, you can use ClickHouse sessions in the HTTP protocol. To do this, you You can receive information about the progress of a query in `X-ClickHouse-Progress` response headers. To do this, enable [send_progress_in_http_headers](../operations/settings/settings.md#settings-send_progress_in_http_headers). Example of the header sequence: ``` text -X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128"} -X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128"} -X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128"} +X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128","peak_memory_usage":"4371480"} +X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128","peak_memory_usage":"13621616"} +X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128","peak_memory_usage":"23155600"} ``` Possible header fields: @@ -416,7 +416,7 @@ $ curl -v 'http://localhost:8123/predefined_query' < X-ClickHouse-Format: Template < X-ClickHouse-Timezone: Asia/Shanghai < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"} < # HELP "Query" "Number of executing queries" # TYPE "Query" counter @@ -581,7 +581,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/hi' < Content-Type: text/html; charset=UTF-8 < Transfer-Encoding: chunked < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"} < * Connection #0 to host localhost left intact Say Hi!% @@ -621,7 +621,7 @@ $ curl -v -H 'XXX:xxx' 'http://localhost:8123/get_config_static_handler' < Content-Type: text/plain; charset=UTF-8 < Transfer-Encoding: chunked < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"} < * Connection #0 to host localhost left intact
% @@ -673,7 +673,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_absolute_path_static_handler' < Content-Type: text/html; charset=UTF-8 < Transfer-Encoding: chunked < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"} < Absolute Path File * Connection #0 to host localhost left intact @@ -692,7 +692,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_relative_path_static_handler' < Content-Type: text/html; charset=UTF-8 < Transfer-Encoding: chunked < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"} < Relative Path File * Connection #0 to host localhost left intact diff --git a/docs/ru/interfaces/http.md b/docs/ru/interfaces/http.md index b8c5ee77f0c..981f1c7b5a2 100644 --- a/docs/ru/interfaces/http.md +++ b/docs/ru/interfaces/http.md @@ -50,7 +50,7 @@ Connection: Close Content-Type: text/tab-separated-values; charset=UTF-8 X-ClickHouse-Server-Display-Name: clickhouse.ru-central1.internal X-ClickHouse-Query-Id: 5abe861c-239c-467f-b955-8a201abb8b7f -X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"} 1 ``` @@ -266,9 +266,9 @@ $ echo 'SELECT number FROM system.numbers LIMIT 10' | curl 'http://localhost:812 Прогресс выполнения запроса можно отслеживать с помощью заголовков ответа `X-ClickHouse-Progress`. Для этого включите [send_progress_in_http_headers](../operations/settings/settings.md#settings-send_progress_in_http_headers). Пример последовательности заголовков: ``` text -X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128"} -X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128"} -X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128"} +X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128","peak_memory_usage":"4371480"} +X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128","peak_memory_usage":"13621616"} +X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128","peak_memory_usage":"23155600"} ``` Возможные поля заголовка: @@ -529,7 +529,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/hi' < Content-Type: text/html; charset=UTF-8 < Transfer-Encoding: chunked < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"} < * Connection #0 to host localhost left intact Say Hi!% @@ -569,7 +569,7 @@ $ curl -v -H 'XXX:xxx' 'http://localhost:8123/get_config_static_handler' < Content-Type: text/plain; charset=UTF-8 < Transfer-Encoding: chunked < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"} < * Connection #0 to host localhost left intact
% @@ -621,7 +621,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_absolute_path_static_handler' < Content-Type: text/html; charset=UTF-8 < Transfer-Encoding: chunked < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"} < Absolute Path File * Connection #0 to host localhost left intact @@ -640,7 +640,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_relative_path_static_handler' < Content-Type: text/html; charset=UTF-8 < Transfer-Encoding: chunked < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"} < Relative Path File * Connection #0 to host localhost left intact diff --git a/docs/zh/interfaces/http.md b/docs/zh/interfaces/http.md index c7a0f355a92..f84768beccc 100644 --- a/docs/zh/interfaces/http.md +++ b/docs/zh/interfaces/http.md @@ -53,7 +53,7 @@ Connection: Close Content-Type: text/tab-separated-values; charset=UTF-8 X-ClickHouse-Server-Display-Name: clickhouse.ru-central1.internal X-ClickHouse-Query-Id: 5abe861c-239c-467f-b955-8a201abb8b7f -X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"} 1 ``` @@ -262,9 +262,9 @@ $ echo 'SELECT number FROM system.numbers LIMIT 10' | curl 'http://localhost:812 您可以在`X-ClickHouse-Progress`响应头中收到查询进度的信息。为此,启用[Http Header携带进度](../operations/settings/settings.md#settings-send_progress_in_http_headers)。示例: ``` text -X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128"} -X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128"} -X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128"} +X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128","peak_memory_usage":"4371480"} +X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128","peak_memory_usage":"13621616"} +X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128","peak_memory_usage":"23155600"} ``` 显示字段信息: @@ -363,7 +363,7 @@ $ curl -v 'http://localhost:8123/predefined_query' < X-ClickHouse-Format: Template < X-ClickHouse-Timezone: Asia/Shanghai < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"} < # HELP "Query" "Number of executing queries" # TYPE "Query" counter @@ -521,7 +521,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/hi' < Content-Type: text/html; charset=UTF-8 < Transfer-Encoding: chunked < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"} < * Connection #0 to host localhost left intact Say Hi!% @@ -561,7 +561,7 @@ $ curl -v -H 'XXX:xxx' 'http://localhost:8123/get_config_static_handler' < Content-Type: text/plain; charset=UTF-8 < Transfer-Encoding: chunked < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"} < * Connection #0 to host localhost left intact
% @@ -613,7 +613,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_absolute_path_static_handler' < Content-Type: text/html; charset=UTF-8 < Transfer-Encoding: chunked < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"} < Absolute Path File * Connection #0 to host localhost left intact @@ -632,7 +632,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_relative_path_static_handler' < Content-Type: text/html; charset=UTF-8 < Transfer-Encoding: chunked < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"} < Relative Path File * Connection #0 to host localhost left intact From ceaaa78fdcfac2243bcf28624336217bd44898f0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 24 Jul 2023 08:04:38 +0200 Subject: [PATCH 343/478] Fix transform --- src/Functions/transform.cpp | 37 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/src/Functions/transform.cpp b/src/Functions/transform.cpp index 1fc0e3adf96..a48d8d47489 100644 --- a/src/Functions/transform.cpp +++ b/src/Functions/transform.cpp @@ -156,15 +156,15 @@ namespace { initialize(arguments, result_type); - const auto * in = arguments.front().column.get(); - - if (isColumnConst(*in)) + if (isColumnConst(*arguments[0].column)) return executeConst(arguments, result_type, input_rows_count); ColumnPtr default_non_const; if (!cache.default_column && arguments.size() == 4) default_non_const = castColumn(arguments[3], result_type); + ColumnPtr in = cache.default_column ? arguments[0].column : castColumn(arguments[0], result_type); + auto column_result = result_type->createColumn(); if (cache.is_empty) { @@ -174,30 +174,30 @@ namespace } else if (cache.table_num_to_idx) { - if (!executeNum>(in, *column_result, default_non_const) - && !executeNum>(in, *column_result, default_non_const) - && !executeNum>(in, *column_result, default_non_const) - && !executeNum>(in, *column_result, default_non_const) - && !executeNum>(in, *column_result, default_non_const) - && !executeNum>(in, *column_result, default_non_const) - && !executeNum>(in, *column_result, default_non_const) - && !executeNum>(in, *column_result, default_non_const) - && !executeNum>(in, *column_result, default_non_const) - && !executeNum>(in, *column_result, default_non_const) - && !executeNum>(in, *column_result, default_non_const) - && !executeNum>(in, *column_result, default_non_const)) + if (!executeNum>(in.get(), *column_result, default_non_const) + && !executeNum>(in.get(), *column_result, default_non_const) + && !executeNum>(in.get(), *column_result, default_non_const) + && !executeNum>(in.get(), *column_result, default_non_const) + && !executeNum>(in.get(), *column_result, default_non_const) + && !executeNum>(in.get(), *column_result, default_non_const) + && !executeNum>(in.get(), *column_result, default_non_const) + && !executeNum>(in.get(), *column_result, default_non_const) + && !executeNum>(in.get(), *column_result, default_non_const) + && !executeNum>(in.get(), *column_result, default_non_const) + && !executeNum>(in.get(), *column_result, default_non_const) + && !executeNum>(in.get(), *column_result, default_non_const)) { throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", in->getName(), getName()); } } else if (cache.table_string_to_idx) { - if (!executeString(in, *column_result, default_non_const)) - executeContiguous(in, *column_result, default_non_const); + if (!executeString(in.get(), *column_result, default_non_const)) + executeContiguous(in.get(), *column_result, default_non_const); } else if (cache.table_anything_to_idx) { - executeAnything(in, *column_result, default_non_const); + executeAnything(in.get(), *column_result, default_non_const); } else throw Exception(ErrorCodes::LOGICAL_ERROR, "State of the function `transform` is not initialized"); @@ -810,7 +810,6 @@ namespace cache.initialized = true; } }; - } REGISTER_FUNCTION(Transform) From aaa0bf64fd888332bfa59c284508d4e7a84d372c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 24 Jul 2023 08:05:55 +0200 Subject: [PATCH 344/478] Add a test --- .../02832_transform_fixed_string_no_default.reference | 1 + .../0_stateless/02832_transform_fixed_string_no_default.sql | 1 + 2 files changed, 2 insertions(+) create mode 100644 tests/queries/0_stateless/02832_transform_fixed_string_no_default.reference create mode 100644 tests/queries/0_stateless/02832_transform_fixed_string_no_default.sql diff --git a/tests/queries/0_stateless/02832_transform_fixed_string_no_default.reference b/tests/queries/0_stateless/02832_transform_fixed_string_no_default.reference new file mode 100644 index 00000000000..9daeafb9864 --- /dev/null +++ b/tests/queries/0_stateless/02832_transform_fixed_string_no_default.reference @@ -0,0 +1 @@ +test diff --git a/tests/queries/0_stateless/02832_transform_fixed_string_no_default.sql b/tests/queries/0_stateless/02832_transform_fixed_string_no_default.sql new file mode 100644 index 00000000000..8d316d3413f --- /dev/null +++ b/tests/queries/0_stateless/02832_transform_fixed_string_no_default.sql @@ -0,0 +1 @@ +SELECT transform(name, ['a', 'b'], ['', NULL]) AS name FROM (SELECT 'test'::Nullable(FixedString(4)) AS name); From 890a3754a6a093545122e42bcab066a27c72ed5e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 24 Jul 2023 08:19:46 +0200 Subject: [PATCH 345/478] Fix error --- src/Functions/transform.cpp | 55 ++++++++++++++++++++----------------- 1 file changed, 30 insertions(+), 25 deletions(-) diff --git a/src/Functions/transform.cpp b/src/Functions/transform.cpp index a48d8d47489..79168d82c54 100644 --- a/src/Functions/transform.cpp +++ b/src/Functions/transform.cpp @@ -156,14 +156,18 @@ namespace { initialize(arguments, result_type); - if (isColumnConst(*arguments[0].column)) + const auto * in = arguments[0].column.get(); + + if (isColumnConst(*in)) return executeConst(arguments, result_type, input_rows_count); ColumnPtr default_non_const; if (!cache.default_column && arguments.size() == 4) default_non_const = castColumn(arguments[3], result_type); - ColumnPtr in = cache.default_column ? arguments[0].column : castColumn(arguments[0], result_type); + ColumnPtr in_casted = arguments[0].column; + if (arguments.size() == 3) + in_casted = castColumn(arguments[0], result_type); auto column_result = result_type->createColumn(); if (cache.is_empty) @@ -174,30 +178,30 @@ namespace } else if (cache.table_num_to_idx) { - if (!executeNum>(in.get(), *column_result, default_non_const) - && !executeNum>(in.get(), *column_result, default_non_const) - && !executeNum>(in.get(), *column_result, default_non_const) - && !executeNum>(in.get(), *column_result, default_non_const) - && !executeNum>(in.get(), *column_result, default_non_const) - && !executeNum>(in.get(), *column_result, default_non_const) - && !executeNum>(in.get(), *column_result, default_non_const) - && !executeNum>(in.get(), *column_result, default_non_const) - && !executeNum>(in.get(), *column_result, default_non_const) - && !executeNum>(in.get(), *column_result, default_non_const) - && !executeNum>(in.get(), *column_result, default_non_const) - && !executeNum>(in.get(), *column_result, default_non_const)) + if (!executeNum>(in, *column_result, default_non_const, *in_casted) + && !executeNum>(in, *column_result, default_non_const, *in_casted) + && !executeNum>(in, *column_result, default_non_const, *in_casted) + && !executeNum>(in, *column_result, default_non_const, *in_casted) + && !executeNum>(in, *column_result, default_non_const, *in_casted) + && !executeNum>(in, *column_result, default_non_const, *in_casted) + && !executeNum>(in, *column_result, default_non_const, *in_casted) + && !executeNum>(in, *column_result, default_non_const, *in_casted) + && !executeNum>(in, *column_result, default_non_const, *in_casted) + && !executeNum>(in, *column_result, default_non_const, *in_casted) + && !executeNum>(in, *column_result, default_non_const, *in_casted) + && !executeNum>(in, *column_result, default_non_const, *in_casted)) { throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", in->getName(), getName()); } } else if (cache.table_string_to_idx) { - if (!executeString(in.get(), *column_result, default_non_const)) - executeContiguous(in.get(), *column_result, default_non_const); + if (!executeString(in, *column_result, default_non_const, *in_casted)) + executeContiguous(in, *column_result, default_non_const, *in_casted); } else if (cache.table_anything_to_idx) { - executeAnything(in.get(), *column_result, default_non_const); + executeAnything(in, *column_result, default_non_const, *in_casted); } else throw Exception(ErrorCodes::LOGICAL_ERROR, "State of the function `transform` is not initialized"); @@ -218,7 +222,7 @@ namespace return impl->execute(args, result_type, input_rows_count); } - void executeAnything(const IColumn * in, IColumn & column_result, const ColumnPtr default_non_const) const + void executeAnything(const IColumn * in, IColumn & column_result, const ColumnPtr default_non_const, const IColumn & in_casted) const { const size_t size = in->size(); const auto & table = *cache.table_anything_to_idx; @@ -236,11 +240,11 @@ namespace else if (default_non_const) column_result.insertFrom(*default_non_const, i); else - column_result.insertFrom(*in, i); + column_result.insertFrom(in_casted, i); } } - void executeContiguous(const IColumn * in, IColumn & column_result, const ColumnPtr default_non_const) const + void executeContiguous(const IColumn * in, IColumn & column_result, const ColumnPtr default_non_const, const IColumn & in_casted) const { const size_t size = in->size(); const auto & table = *cache.table_string_to_idx; @@ -255,12 +259,12 @@ namespace else if (default_non_const) column_result.insertFrom(*default_non_const, i); else - column_result.insertFrom(*in, i); + column_result.insertFrom(in_casted, i); } } template - bool executeNum(const IColumn * in_untyped, IColumn & column_result, const ColumnPtr default_non_const) const + bool executeNum(const IColumn * in_untyped, IColumn & column_result, const ColumnPtr default_non_const, const IColumn & in_casted) const { const auto * const in = checkAndGetColumn(in_untyped); if (!in) @@ -297,7 +301,7 @@ namespace else if (default_non_const) column_result.insertFrom(*default_non_const, i); else - column_result.insertFrom(*in, i); + column_result.insertFrom(in_casted, i); } } return true; @@ -451,7 +455,7 @@ namespace } } - bool executeString(const IColumn * in_untyped, IColumn & column_result, const ColumnPtr default_non_const) const + bool executeString(const IColumn * in_untyped, IColumn & column_result, const ColumnPtr default_non_const, const IColumn & in_casted) const { const auto * const in = checkAndGetColumn(in_untyped); if (!in) @@ -488,7 +492,7 @@ namespace else if (default_non_const) column_result.insertFrom(*default_non_const, 0); else - column_result.insertFrom(*in, i); + column_result.insertFrom(in_casted, i); } } return true; @@ -810,6 +814,7 @@ namespace cache.initialized = true; } }; + } REGISTER_FUNCTION(Transform) From c79492240194f0d5dd9053c70a967c39a7536cb3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 24 Jul 2023 08:20:30 +0200 Subject: [PATCH 346/478] More tests --- .../02832_transform_fixed_string_no_default.reference | 2 ++ .../0_stateless/02832_transform_fixed_string_no_default.sql | 2 ++ 2 files changed, 4 insertions(+) diff --git a/tests/queries/0_stateless/02832_transform_fixed_string_no_default.reference b/tests/queries/0_stateless/02832_transform_fixed_string_no_default.reference index 9daeafb9864..ea545c90391 100644 --- a/tests/queries/0_stateless/02832_transform_fixed_string_no_default.reference +++ b/tests/queries/0_stateless/02832_transform_fixed_string_no_default.reference @@ -1 +1,3 @@ test + +\N diff --git a/tests/queries/0_stateless/02832_transform_fixed_string_no_default.sql b/tests/queries/0_stateless/02832_transform_fixed_string_no_default.sql index 8d316d3413f..0e58c716c9f 100644 --- a/tests/queries/0_stateless/02832_transform_fixed_string_no_default.sql +++ b/tests/queries/0_stateless/02832_transform_fixed_string_no_default.sql @@ -1 +1,3 @@ SELECT transform(name, ['a', 'b'], ['', NULL]) AS name FROM (SELECT 'test'::Nullable(FixedString(4)) AS name); +SELECT transform(name, ['test', 'b'], ['', NULL]) AS name FROM (SELECT 'test'::Nullable(FixedString(4)) AS name); +SELECT transform(name, ['a', 'test'], ['', NULL]) AS name FROM (SELECT 'test'::Nullable(FixedString(4)) AS name); From 0e46cf86b772e1513d837d6019181a6d291b7219 Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Mon, 24 Jul 2023 08:52:19 +0200 Subject: [PATCH 347/478] Added try-except to check cases when second backup/restore is picked up first --- .../test_disallow_concurrency.py | 69 +++++++++++++++---- 1 file changed, 57 insertions(+), 12 deletions(-) diff --git a/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py b/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py index d0ce2e03016..a863a6e2047 100644 --- a/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py +++ b/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py @@ -133,9 +133,21 @@ def test_concurrent_backups_on_same_node(): ) assert status in ["CREATING_BACKUP", "BACKUP_CREATED"] - error = nodes[0].query_and_get_error( - f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}" - ) + try: + error = nodes[0].query_and_get_error( + f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}" + ) + except Exception as e: + status = ( + nodes[0] + .query(f"SELECT status FROM system.backups WHERE id == '{id}'") + .rstrip("\n") + ) + # It is possible that the second backup was picked up first, and then the async backup + if status == "CREATING_BACKUP" or status == "BACKUP_FAILED": + return + else: + raise e expected_errors = [ "Concurrent backups not supported", f"Backup {backup_name} already exists", @@ -179,9 +191,20 @@ def test_concurrent_backups_on_different_nodes(): ) assert status in ["CREATING_BACKUP", "BACKUP_CREATED"] - error = nodes[0].query_and_get_error( - f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}" - ) + try: + error = nodes[0].query_and_get_error( + f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}" + ) + except Exception as e: + status = ( + nodes[1] + .query(f"SELECT status FROM system.backups WHERE id == '{id}'") + .rstrip("\n") + ) + if status == "CREATING_BACKUP" or status == "BACKUP_FAILED": + return + else: + raise e expected_errors = [ "Concurrent backups not supported", f"Backup {backup_name} already exists", @@ -224,9 +247,20 @@ def test_concurrent_restores_on_same_node(): ) assert status in ["RESTORING", "RESTORED"] - error = nodes[0].query_and_get_error( - f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}" - ) + try: + error = nodes[0].query_and_get_error( + f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}" + ) + except Exception as e: + status = ( + nodes[0] + .query(f"SELECT status FROM system.backups WHERE id == '{id}'") + .rstrip("\n") + ) + if status == "RESTORING" or status == "RESTORE_FAILED": + return + else: + raise e expected_errors = [ "Concurrent restores not supported", "Cannot restore the table default.tbl because it already contains some data", @@ -269,9 +303,20 @@ def test_concurrent_restores_on_different_node(): ) assert status in ["RESTORING", "RESTORED"] - error = nodes[1].query_and_get_error( - f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}" - ) + try: + error = nodes[1].query_and_get_error( + f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}" + ) + except Exception as e: + status = ( + nodes[0] + .query(f"SELECT status FROM system.backups WHERE id == '{id}'") + .rstrip("\n") + ) + if status == "RESTORING" or status == "RESTORE_FAILED": + return + else: + raise e expected_errors = [ "Concurrent restores not supported", "Cannot restore the table default.tbl because it already contains some data", From 96d40ff3c4dd34a9396c625b8a1d57f697f80dd0 Mon Sep 17 00:00:00 2001 From: flynn Date: Mon, 24 Jul 2023 07:30:32 +0000 Subject: [PATCH 348/478] fix --- src/Common/IntervalTree.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/Common/IntervalTree.h b/src/Common/IntervalTree.h index ad079a312f2..9a42aadf70e 100644 --- a/src/Common/IntervalTree.h +++ b/src/Common/IntervalTree.h @@ -32,6 +32,12 @@ auto operator<=>(const Interval & lhs, const Interval std::tie(rhs.left, rhs.right); } +template +bool operator==(const Interval & lhs, const Interval & rhs) +{ + return std::tie(lhs.left, lhs.right) == std::tie(rhs.left, rhs.right); +} + struct IntervalTreeVoidValue { }; From 0401dc453e9502697328879728bf0dbf7c1dd9e0 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 24 Jul 2023 10:14:23 +0200 Subject: [PATCH 349/478] Fix flakiness of test_version_update_after_mutation by enabling force_remove_data_recursively_on_drop Since there can be some leftovers: 2023.07.24 07:08:25.238066 [ 140 ] {} Application: Code: 219. DB::Exception: Cannot drop: filesystem error: in remove: Directory not empty ["/var/lib/clickhouse/data/system/"]. Probably database contain some detached tables or metadata leftovers from Ordinary engine. If you want to remove all data anyway, try to attach database back and drop it again with enabled force_remove_data_recursively_on_drop setting: Exception while trying to convert database system from Ordinary to Atomic. It may be in some intermediate state. You can finish conversion manually by moving the rest tables from system to .tmp_convert.system.9396432095832455195 (using RENAME TABLE) and executing DROP DATABASE system and RENAME DATABASE .tmp_convert.system.9396432095832455195 TO system. (DATABASE_NOT_EMPTY), Stack trace (when copying this message, always include the lines below): 0. DB::Exception::Exception(DB::Exception::MessageMasked&&, int, bool) @ 0x000000000e68af57 in /usr/bin/clickhouse 1. ? @ 0x000000000cab443c in /usr/bin/clickhouse 2. DB::DatabaseOnDisk::drop(std::shared_ptr) @ 0x000000001328d617 in /usr/bin/clickhouse 3. DB::DatabaseCatalog::detachDatabase(std::shared_ptr, String const&, bool, bool) @ 0x0000000013524a6c in /usr/bin/clickhouse 4. DB::InterpreterDropQuery::executeToDatabaseImpl(DB::ASTDropQuery const&, std::shared_ptr&, std::vector, DB::UUIDTag>, std::allocator, DB::UUIDTag>>>&) @ 0x0000000013bc05e4 in /usr/bin/clickhouse 5. DB::InterpreterDropQuery::executeToDatabase(DB::ASTDropQuery const&) @ 0x0000000013bbc6b8 in /usr/bin/clickhouse 6. DB::InterpreterDropQuery::execute() @ 0x0000000013bbba22 in /usr/bin/clickhouse 7. ? @ 0x00000000140b13a5 in /usr/bin/clickhouse 8. DB::executeQuery(String const&, std::shared_ptr, bool, DB::QueryProcessingStage::Enum) @ 0x00000000140ad20e in /usr/bin/clickhouse 9. ? @ 0x00000000140d2ef0 in /usr/bin/clickhouse 10. DB::maybeConvertSystemDatabase(std::shared_ptr) @ 0x00000000140d0aaf in /usr/bin/clickhouse 11. DB::Server::main(std::vector> const&) @ 0x000000000e724e55 in /usr/bin/clickhouse 12. Poco::Util::Application::run() @ 0x0000000017ead086 in /usr/bin/clickhouse 13. DB::Server::run() @ 0x000000000e714a5d in /usr/bin/clickhouse 14. Poco::Util::ServerApplication::run(int, char**) @ 0x0000000017ec07b9 in /usr/bin/clickhouse 15. mainEntryClickHouseServer(int, char**) @ 0x000000000e711a26 in /usr/bin/clickhouse 16. main @ 0x0000000008cf13cf in /usr/bin/clickhouse 17. __libc_start_main @ 0x0000000000021b97 in /lib/x86_64-linux-gnu/libc-2.27.so 18. _start @ 0x00000000080705ae in /usr/bin/clickhouse (version 23.7.1.2012) Signed-off-by: Azat Khuzhin --- tests/integration/helpers/cluster.py | 9 +++++++++ .../force_remove_data_recursively_on_drop.xml | 7 +++++++ .../test_version_update_after_mutation/test.py | 13 ++++++++++--- 3 files changed, 26 insertions(+), 3 deletions(-) create mode 100644 tests/integration/test_version_update_after_mutation/configs/force_remove_data_recursively_on_drop.xml diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index eff44de842a..0448eb2437f 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -3199,6 +3199,7 @@ class ClickHouseInstance: ): self.name = name self.base_cmd = cluster.base_cmd + self.base_dir = base_path self.docker_id = cluster.get_instance_docker_id(self.name) self.cluster = cluster self.hostname = hostname if hostname is not None else self.name @@ -4193,6 +4194,14 @@ class ClickHouseInstance: ["bash", "-c", f"sed -i 's/{replace}/{replacement}/g' {path_to_config}"] ) + def put_users_config(self, config_path): + """Put new config (useful if you cannot put it at the start)""" + + instance_config_dir = p.abspath(p.join(self.path, "configs")) + users_d_dir = p.abspath(p.join(instance_config_dir, "users.d")) + config_path = p.join(self.base_dir, config_path) + shutil.copy(config_path, users_d_dir) + def create_dir(self): """Create the instance directory and all the needed files there.""" diff --git a/tests/integration/test_version_update_after_mutation/configs/force_remove_data_recursively_on_drop.xml b/tests/integration/test_version_update_after_mutation/configs/force_remove_data_recursively_on_drop.xml new file mode 100644 index 00000000000..7a00648b28e --- /dev/null +++ b/tests/integration/test_version_update_after_mutation/configs/force_remove_data_recursively_on_drop.xml @@ -0,0 +1,7 @@ + + + + 1 + + + diff --git a/tests/integration/test_version_update_after_mutation/test.py b/tests/integration/test_version_update_after_mutation/test.py index c80205d48c1..416220c93c3 100644 --- a/tests/integration/test_version_update_after_mutation/test.py +++ b/tests/integration/test_version_update_after_mutation/test.py @@ -51,6 +51,12 @@ def start_cluster(): cluster.shutdown() +def restart_node(node): + # set force_remove_data_recursively_on_drop (cannot be done before, because the version is too old) + node.put_users_config("configs/force_remove_data_recursively_on_drop.xml") + node.restart_with_latest_version(signal=9, fix_metadata=True) + + def test_mutate_and_upgrade(start_cluster): for node in [node1, node2]: node.query("DROP TABLE IF EXISTS mt") @@ -67,8 +73,9 @@ def test_mutate_and_upgrade(start_cluster): node2.query("DETACH TABLE mt") # stop being leader node1.query("DETACH TABLE mt") # stop being leader - node1.restart_with_latest_version(signal=9, fix_metadata=True) - node2.restart_with_latest_version(signal=9, fix_metadata=True) + + restart_node(node1) + restart_node(node2) # After hard restart table can be in readonly mode exec_query_with_retry( @@ -124,7 +131,7 @@ def test_upgrade_while_mutation(start_cluster): # (We could be in process of creating some system table, which will leave empty directory on restart, # so when we start moving system tables from ordinary to atomic db, it will complain about some undeleted files) node3.query("SYSTEM FLUSH LOGS") - node3.restart_with_latest_version(signal=9, fix_metadata=True) + restart_node(node3) # checks for readonly exec_query_with_retry(node3, "OPTIMIZE TABLE mt1", sleep_time=5, retry_count=60) From efa638ef3cc7db3c6149b7c031cc4c7904987abd Mon Sep 17 00:00:00 2001 From: Val Doroshchuk Date: Wed, 19 Jul 2023 12:53:27 +0200 Subject: [PATCH 350/478] MaterializedMySQL: Support unquoted utf-8 strings in DDL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since ClickHouse does not support unquoted utf-8 strings but MySQL does. Instead of fixing Lexer to recognize utf-8 chars as TokenType::BareWord, suggesting to quote all unrecognized tokens before applying any DDL. Actual parsing and validating the syntax will be done by particular Parser. If there is any TokenType::Error, the query is unable to be parsed anyway. Quoting such tokens can provide the support of utf-8 names. See `tryQuoteUnrecognizedTokens` and `QuoteUnrecognizedTokensTest`. mysql> CREATE TABLE 道.渠(... is converted to CREATE TABLE `道`.`渠`(... Also fixed the bug with missing * while doing SELECT in full sync because db or table name are back quoted when not needed. --- src/Common/quoteString.cpp | 11 + src/Common/quoteString.h | 3 + .../MySQL/MaterializedMySQLSyncThread.cpp | 7 +- .../gtest_try_quote_unrecognized_tokens.cpp | 289 ++++++++++++++++++ .../MySQL/tryQuoteUnrecognizedTokens.cpp | 96 ++++++ .../MySQL/tryQuoteUnrecognizedTokens.h | 10 + src/Storages/StorageMySQL.cpp | 11 +- .../materialized_with_ddl.py | 122 ++++++++ .../test_materialized_mysql_database/test.py | 6 + 9 files changed, 542 insertions(+), 13 deletions(-) create mode 100644 src/Databases/MySQL/tests/gtest_try_quote_unrecognized_tokens.cpp create mode 100644 src/Databases/MySQL/tryQuoteUnrecognizedTokens.cpp create mode 100644 src/Databases/MySQL/tryQuoteUnrecognizedTokens.h diff --git a/src/Common/quoteString.cpp b/src/Common/quoteString.cpp index b464f4837a1..17129441c8f 100644 --- a/src/Common/quoteString.cpp +++ b/src/Common/quoteString.cpp @@ -44,4 +44,15 @@ String backQuoteIfNeed(StringRef x) return res; } + +String backQuoteMySQL(StringRef x) +{ + String res(x.size, '\0'); + { + WriteBufferFromString wb(res); + writeBackQuotedStringMySQL(x, wb); + } + return res; +} + } diff --git a/src/Common/quoteString.h b/src/Common/quoteString.h index b83988258e2..3f17d6e7621 100644 --- a/src/Common/quoteString.h +++ b/src/Common/quoteString.h @@ -24,4 +24,7 @@ String backQuote(StringRef x); /// Quote the identifier with backquotes, if required. String backQuoteIfNeed(StringRef x); +/// Quote the identifier with backquotes, for use in MySQL queries. +String backQuoteMySQL(StringRef x); + } diff --git a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp index 603bf3d0166..673bd155f77 100644 --- a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp +++ b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp @@ -4,6 +4,7 @@ #include #include +#include #include #include #include @@ -342,9 +343,8 @@ static inline String rewriteMysqlQueryColumn(mysqlxx::Pool::Entry & connection, { std::make_shared(), "column_type" } }; - const String & query = "SELECT COLUMN_NAME AS column_name, COLUMN_TYPE AS column_type FROM INFORMATION_SCHEMA.COLUMNS" - " WHERE TABLE_SCHEMA = '" + backQuoteIfNeed(database_name) + - "' AND TABLE_NAME = '" + backQuoteIfNeed(table_name) + "' ORDER BY ORDINAL_POSITION"; + String query = "SELECT COLUMN_NAME AS column_name, COLUMN_TYPE AS column_type FROM INFORMATION_SCHEMA.COLUMNS" + " WHERE TABLE_SCHEMA = '" + database_name + "' AND TABLE_NAME = '" + table_name + "' ORDER BY ORDINAL_POSITION"; StreamSettings mysql_input_stream_settings(global_settings, false, true); auto mysql_source = std::make_unique(connection, query, tables_columns_sample_block, mysql_input_stream_settings); @@ -812,6 +812,7 @@ void MaterializedMySQLSyncThread::executeDDLAtomic(const QueryEvent & query_even CurrentThread::QueryScope query_scope(query_context); String query = query_event.query; + tryQuoteUnrecognizedTokens(query, query); if (!materialized_tables_list.empty()) { auto table_id = tryParseTableIDFromDDL(query, query_event.schema); diff --git a/src/Databases/MySQL/tests/gtest_try_quote_unrecognized_tokens.cpp b/src/Databases/MySQL/tests/gtest_try_quote_unrecognized_tokens.cpp new file mode 100644 index 00000000000..9c76deb2712 --- /dev/null +++ b/src/Databases/MySQL/tests/gtest_try_quote_unrecognized_tokens.cpp @@ -0,0 +1,289 @@ +#include + +#include + +using namespace DB; + +struct TestCase +{ + String query; + String res; + bool ok; + + TestCase( + const String & query_, + const String & res_, + bool ok_) + : query(query_) + , res(res_) + , ok(ok_) + { + } +}; + +std::ostream & operator<<(std::ostream & ostr, const TestCase & test_case) +{ + return ostr << '"' << test_case.query << "\" -> \"" << test_case.res << "\" ok:" << test_case.ok; +} + +class QuoteUnrecognizedTokensTest : public ::testing::TestWithParam +{ +}; + +TEST_P(QuoteUnrecognizedTokensTest, escape) +{ + const auto & [query, expected, ok] = GetParam(); + String actual; + bool res = tryQuoteUnrecognizedTokens(query, actual); + EXPECT_EQ(ok, res); + EXPECT_EQ(expected, actual); +} + +INSTANTIATE_TEST_SUITE_P(MaterializedMySQL, QuoteUnrecognizedTokensTest, ::testing::ValuesIn(std::initializer_list{ + { + "", + "", + false + }, + { + "test '\"`", + "", + false + }, + { + "SELECT * FROM db.`table`", + "", + false + }, + { + "道渠", + "`道渠`", + true + }, + { + "道", + "`道`", + true + }, + { + "道道(skip) 道(", + "`道道`(skip) `道`(", + true + }, + { + "`道渠`", + "", + false + }, + { + "'道'", + "", + false + }, + { + "\"道\"", + "", + false + }, + { + "` 道 test 渠 `", + "", + false + }, + { + "skip 道 skip 123", + "skip `道` skip 123", + true + }, + { + "skip 123 `道` skip", + "", + false + }, + { + "skip `道 skip 123", + "", + false + }, + { + "skip test道 skip", + "skip `test道` skip", + true + }, + { + "test道2test", + "`test道2test`", + true + }, + { + "skip test道2test 123", + "skip `test道2test` 123", + true + }, + { + "skip 您a您a您a a您a您a您a 1您2您3您4 skip", + "skip `您a您a您a` `a您a您a您a` `1您2您3您4` skip", + true + }, + { + "skip 您a 您a您a b您2您c您4 skip", + "skip `您a` `您a您a` `b您2您c您4` skip", + true + }, + { + "123您a skip 56_您a 您a2 b_您2_您c123您_a4 skip", + "`123您a` skip `56_您a` `您a2` `b_您2_您c123您_a4` skip", + true + }, + { + "_您_ 123 skip 56_您_您_您_您_您_您_您_您_您_a 您a2 abc 123_您_您_321 a1b2c3 aaaaa您您_a4 skip", + "`_您_` 123 skip `56_您_您_您_您_您_您_您_您_您_a` `您a2` abc `123_您_您_321` a1b2c3 `aaaaa您您_a4` skip", + true + }, + { + "TABLE 您2 您(", + "TABLE `您2` `您`(", + true + }, + { + "TABLE 您.a您2(日2日2 INT", + "TABLE `您`.`a您2`(`日2日2` INT", + true + }, + { + "TABLE 您$.a_您2a_($日2日_2 INT, 您Hi好 a您b好c)", + "TABLE `您`$.`a_您2a_`($`日2日_2` INT, `您Hi好` `a您b好c`)", + true + }, + { + "TABLE 您a日.您a您a您a(test INT", + "TABLE `您a日`.`您a您a您a`(test INT", + true + }, + { + "TABLE 您a日.您a您a您a(Hi您Hi好Hi INT", + "TABLE `您a日`.`您a您a您a`(`Hi您Hi好Hi` INT", + true + }, + { + "--TABLE 您a日.您a您a您a(test INT", + "", + false + }, + { + "--您a日.您a您a您a(\n您Hi好", + "--您a日.您a您a您a(\n`您Hi好`", + true + }, + { + " /* TABLE 您a日.您a您a您a(test INT", + "", + false + }, + { + "/*您a日.您a您a您a(*/\n您Hi好", + "/*您a日.您a您a您a(*/\n`您Hi好`", + true + }, + { + " 您a日.您您aa您a /* 您a日.您a您a您a */ a您a日a.a您您您a", + " `您a日`.`您您aa您a` /* 您a日.您a您a您a */ `a您a日a`.`a您您您a`", + true + }, + //{ TODO + // "TABLE 您2.您a您a您a(test INT", + // "TABLE `您2`.`您a您a您a`(test INT", + // true + //}, + { + "skip 您a您a您a skip", + "skip `您a您a您a` skip", + true + }, + { + "test 您a2您3a您a 4 again", + "test `您a2您3a您a` 4 again", + true + }, + { + "CREATE TABLE db.`道渠`", + "", + false + }, + { + "CREATE TABLE db.`道渠", + "", + false + }, + { + "CREATE TABLE db.道渠", + "CREATE TABLE db.`道渠`", + true + }, + { + "CREATE TABLE db. 道渠", + "CREATE TABLE db. `道渠`", + true + }, + { + R"sql( + CREATE TABLE gb2312.`道渠` ( `id` int NOT NULL, + 您 INT, + 道渠 DATETIME, + 您test INT, test您 INT, test您test INT, + 道渠test INT, test道渠 INT, test道渠test INT, + 您_ INT, _您 INT, _您_ INT, + 您您__ INT, __您您 INT, __您您__ INT, + 您2 INT, 2您 INT, 2您2 INT, + 您您22 INT, 22您您 INT, 22您您22 INT, + 您_2 INT, _2您 INT, _2您_2 INT, _2您2_ INT, 2_您_2 INT, + 您您__22 INT, __22您您 INT, __22您您__22 INT, __22您您22__ INT, 22__您您__22 INT, + 您2_ INT, 2_您 INT, 2_您2_ INT, + 您您22__ INT, 22__您您 INT, 22__您您22__ INT, + 您_test INT, _test您 INT, _test您_test INT, _test您test_ INT, test_您test_ INT, test_您_test INT, + 您您_test INT, _test您您 INT, _test您您_test INT, _test您您test_ INT, test_您您test_ INT, test_您您_test INT, + 您test3 INT, test3您 INT, test3您test3 INT, test3您3test INT, + 您您test3 INT, test3您您 INT, test3您您test3 INT, test3您您3test INT, + 您3test INT, 3test您 INT, 3test您3test INT, 3test您test3 INT, + 您您3test INT, 3test您您 INT, 3test您您3test INT, 3test您您test3 INT, + 您_test4 INT, _test4您 INT, _test4您_test4 INT, test4_您_test4 INT, _test4您4test_ INT, _test4您test4_ INT, + 您您_test4 INT, _test4您您 INT, _test4您您_test4 INT, test4_您您_test4 INT, _test4您您4test_ INT, _test4您您test4_ INT, + 您_5test INT, _5test您 INT, _5test您_5test INT, 5test_您_test5 INT, _4test您test4_ INT, + test_日期 varchar(256), test_道_2 varchar(256) NOT NULL , + test_道渠您_3 + BIGINT NOT NULL, + 道您3_test INT, + PRIMARY KEY (`id`)) ENGINE=InnoDB DEFAULT CHARSET=gb2312; + )sql", + R"sql( + CREATE TABLE gb2312.`道渠` ( `id` int NOT NULL, + `您` INT, + `道渠` DATETIME, + `您test` INT, `test您` INT, `test您test` INT, + `道渠test` INT, `test道渠` INT, `test道渠test` INT, + `您_` INT, `_您` INT, `_您_` INT, + `您您__` INT, `__您您` INT, `__您您__` INT, + `您2` INT, `2您` INT, `2您2` INT, + `您您22` INT, `22您您` INT, `22您您22` INT, + `您_2` INT, `_2您` INT, `_2您_2` INT, `_2您2_` INT, `2_您_2` INT, + `您您__22` INT, `__22您您` INT, `__22您您__22` INT, `__22您您22__` INT, `22__您您__22` INT, + `您2_` INT, `2_您` INT, `2_您2_` INT, + `您您22__` INT, `22__您您` INT, `22__您您22__` INT, + `您_test` INT, `_test您` INT, `_test您_test` INT, `_test您test_` INT, `test_您test_` INT, `test_您_test` INT, + `您您_test` INT, `_test您您` INT, `_test您您_test` INT, `_test您您test_` INT, `test_您您test_` INT, `test_您您_test` INT, + `您test3` INT, `test3您` INT, `test3您test3` INT, `test3您3test` INT, + `您您test3` INT, `test3您您` INT, `test3您您test3` INT, `test3您您3test` INT, + `您3test` INT, `3test您` INT, `3test您3test` INT, `3test您test3` INT, + `您您3test` INT, `3test您您` INT, `3test您您3test` INT, `3test您您test3` INT, + `您_test4` INT, `_test4您` INT, `_test4您_test4` INT, `test4_您_test4` INT, `_test4您4test_` INT, `_test4您test4_` INT, + `您您_test4` INT, `_test4您您` INT, `_test4您您_test4` INT, `test4_您您_test4` INT, `_test4您您4test_` INT, `_test4您您test4_` INT, + `您_5test` INT, `_5test您` INT, `_5test您_5test` INT, `5test_您_test5` INT, `_4test您test4_` INT, + `test_日期` varchar(256), `test_道_2` varchar(256) NOT NULL , + `test_道渠您_3` + BIGINT NOT NULL, + `道您3_test` INT, + PRIMARY KEY (`id`)) ENGINE=InnoDB DEFAULT CHARSET=gb2312; + )sql", + true + }, +})); diff --git a/src/Databases/MySQL/tryQuoteUnrecognizedTokens.cpp b/src/Databases/MySQL/tryQuoteUnrecognizedTokens.cpp new file mode 100644 index 00000000000..cd4603ddaec --- /dev/null +++ b/src/Databases/MySQL/tryQuoteUnrecognizedTokens.cpp @@ -0,0 +1,96 @@ +#include +#include +#include + +namespace DB +{ + +/// Checks if there are no any tokens (like whitespaces) between current and previous pos +static bool noWhitespaces(const char * to, const char * from) +{ + return static_cast(from - to) == 0; +} + +/// Checks if the token should be quoted too together with unrecognized +static bool isWordOrNumber(TokenType type) +{ + return type == TokenType::BareWord || type == TokenType::Number; +} + +static void quoteLiteral( + IParser::Pos & pos, + IParser::Pos & pos_prev, + const char *& pos_unrecognized, + const char *& copy_from, + String & rewritten_query) +{ + /// Copy also whitespaces if any + const auto * end = + isWordOrNumber(pos->type) && noWhitespaces(pos_prev->end, pos->begin) + ? pos->end + : pos_prev->end; + String literal(pos_unrecognized, static_cast(end - pos_unrecognized)); + rewritten_query.append(copy_from, pos_unrecognized - copy_from).append(backQuoteMySQL(literal)); + copy_from = end; +} + +bool tryQuoteUnrecognizedTokens(const String & query, String & res) +{ + Tokens tokens(query.data(), query.data() + query.size()); + IParser::Pos pos(tokens, 0); + Expected expected; + String rewritten_query; + const char * copy_from = query.data(); + auto pos_prev = pos; + const char * pos_unrecognized = nullptr; + for (;pos->type != TokenType::EndOfStream; ++pos) + { + /// Commit quotes if any whitespaces found or the token is not a word + bool commit = !noWhitespaces(pos_prev->end, pos->begin) || (pos->type != TokenType::Error && !isWordOrNumber(pos->type)); + if (pos_unrecognized && commit) + { + quoteLiteral( + pos, + pos_prev, + pos_unrecognized, + copy_from, + rewritten_query); + pos_unrecognized = nullptr; + } + if (pos->type == TokenType::Error) + { + /// Find first appearance of the error token + if (!pos_unrecognized) + { + pos_unrecognized = + isWordOrNumber(pos_prev->type) && noWhitespaces(pos_prev->end, pos->begin) + ? pos_prev->begin + : pos->begin; + } + } + pos_prev = pos; + } + + /// There was EndOfStream but not committed unrecognized token + if (pos_unrecognized) + { + quoteLiteral( + pos, + pos_prev, + pos_unrecognized, + copy_from, + rewritten_query); + pos_unrecognized = nullptr; + } + + /// If no Errors found + if (copy_from == query.data()) + return false; + + auto size = static_cast(pos->end - copy_from); + rewritten_query.append(copy_from, size); + res = rewritten_query; + return true; +} + +} diff --git a/src/Databases/MySQL/tryQuoteUnrecognizedTokens.h b/src/Databases/MySQL/tryQuoteUnrecognizedTokens.h new file mode 100644 index 00000000000..582a297c485 --- /dev/null +++ b/src/Databases/MySQL/tryQuoteUnrecognizedTokens.h @@ -0,0 +1,10 @@ +#pragma once + +#include + +namespace DB +{ + +bool tryQuoteUnrecognizedTokens(const String & query, String & res); + +} diff --git a/src/Storages/StorageMySQL.cpp b/src/Storages/StorageMySQL.cpp index 3e928c3a811..b0a220eb1d2 100644 --- a/src/Storages/StorageMySQL.cpp +++ b/src/Storages/StorageMySQL.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -34,16 +35,6 @@ namespace ErrorCodes extern const int UNKNOWN_TABLE; } -static String backQuoteMySQL(const String & x) -{ - String res(x.size(), '\0'); - { - WriteBufferFromString wb(res); - writeBackQuotedStringMySQL(x, wb); - } - return res; -} - StorageMySQL::StorageMySQL( const StorageID & table_id_, mysqlxx::PoolWithFailover && pool_, diff --git a/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py b/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py index c97c3e5e2a8..9130ccc359c 100644 --- a/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py +++ b/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py @@ -1581,6 +1581,128 @@ def utf8mb4_test(clickhouse_node, mysql_node, service_name): mysql_node.query("DROP DATABASE utf8mb4_test") +def utf8mb4_column_test(clickhouse_node, mysql_node, service_name): + db = "utf8mb4_column_test" + mysql_node.query(f"DROP DATABASE IF EXISTS {db}") + clickhouse_node.query(f"DROP DATABASE IF EXISTS {db}") + mysql_node.query(f"CREATE DATABASE {db}") + + # Full sync + mysql_node.query(f"CREATE TABLE {db}.unquoted (id INT primary key, 日期 DATETIME)") + mysql_node.query(f"CREATE TABLE {db}.quoted (id INT primary key, `日期` DATETIME)") + mysql_node.query(f"INSERT INTO {db}.unquoted VALUES(1, now())") + mysql_node.query(f"INSERT INTO {db}.quoted VALUES(1, now())") + clickhouse_node.query( + f"CREATE DATABASE {db} ENGINE = MaterializedMySQL('{service_name}:3306', '{db}', 'root', 'clickhouse')" + ) + + # Full sync replicated unquoted columns names since they use SHOW CREATE TABLE + # which returns quoted column names + check_query( + clickhouse_node, + f"/* expect: quoted unquoted */ SHOW TABLES FROM {db}", + "quoted\nunquoted\n", + ) + check_query( + clickhouse_node, + f"/* expect: 1 */ SELECT COUNT() FROM {db}.unquoted", + "1\n", + ) + check_query( + clickhouse_node, + f"/* expect: 1 */ SELECT COUNT() FROM {db}.quoted", + "1\n", + ) + + # Inc sync + mysql_node.query( + f"CREATE TABLE {db}.unquoted_new (id INT primary key, 日期 DATETIME)" + ) + mysql_node.query( + f"CREATE TABLE {db}.quoted_new (id INT primary key, `日期` DATETIME)" + ) + mysql_node.query(f"INSERT INTO {db}.unquoted_new VALUES(1, now())") + mysql_node.query(f"INSERT INTO {db}.quoted_new VALUES(1, now())") + mysql_node.query(f"INSERT INTO {db}.unquoted VALUES(2, now())") + mysql_node.query(f"INSERT INTO {db}.quoted VALUES(2, now())") + check_query( + clickhouse_node, + f"/* expect: 2 */ SELECT COUNT() FROM {db}.quoted", + "2\n", + ) + check_query( + clickhouse_node, + f"/* expect: 1 */ SELECT COUNT() FROM {db}.quoted_new", + "1\n", + ) + check_query( + clickhouse_node, + f"/* expect: 2 */ SELECT COUNT() FROM {db}.unquoted", + "2\n", + ) + check_query( + clickhouse_node, + f"/* expect: 1 */ SELECT COUNT() FROM {db}.unquoted_new", + "1\n", + ) + + clickhouse_node.query(f"DROP DATABASE IF EXISTS `{db}`") + mysql_node.query(f"DROP DATABASE IF EXISTS `{db}`") + + +def utf8mb4_name_test(clickhouse_node, mysql_node, service_name): + db = "您Hi您" + table = "日期" + mysql_node.query(f"DROP DATABASE IF EXISTS `{db}`") + clickhouse_node.query(f"DROP DATABASE IF EXISTS `{db}`") + mysql_node.query(f"CREATE DATABASE `{db}`") + mysql_node.query( + f"CREATE TABLE `{db}`.`{table}` (id INT(11) NOT NULL PRIMARY KEY, `{table}` DATETIME) ENGINE=InnoDB DEFAULT CHARACTER SET utf8mb4" + ) + mysql_node.query(f"INSERT INTO `{db}`.`{table}` VALUES(1, now())") + mysql_node.query( + f"CREATE TABLE {db}.{table}_unquoted (id INT(11) NOT NULL PRIMARY KEY, {table} DATETIME) ENGINE=InnoDB DEFAULT CHARACTER SET utf8mb4" + ) + mysql_node.query(f"INSERT INTO {db}.{table}_unquoted VALUES(1, now())") + clickhouse_node.query( + f"CREATE DATABASE `{db}` ENGINE = MaterializedMySQL('{service_name}:3306', '{db}', 'root', 'clickhouse')" + ) + check_query( + clickhouse_node, + f"/* expect: 1 */ SELECT COUNT() FROM `{db}`.`{table}`", + "1\n", + ) + check_query( + clickhouse_node, + f"/* expect: 1 */ SELECT COUNT() FROM `{db}`.`{table}_unquoted`", + "1\n", + ) + + # Inc sync + mysql_node.query( + f"CREATE TABLE `{db}`.`{table}2` (id INT(11) NOT NULL PRIMARY KEY, `{table}` DATETIME) ENGINE=InnoDB DEFAULT CHARACTER SET utf8mb4" + ) + mysql_node.query(f"INSERT INTO `{db}`.`{table}2` VALUES(1, now())") + check_query( + clickhouse_node, + f"/* expect: 1 */ SELECT COUNT() FROM `{db}`.`{table}2`", + "1\n", + ) + + mysql_node.query( + f"CREATE TABLE {db}.{table}2_unquoted (id INT(11) NOT NULL PRIMARY KEY, {table} DATETIME) ENGINE=InnoDB DEFAULT CHARACTER SET utf8mb4" + ) + mysql_node.query(f"INSERT INTO {db}.{table}2_unquoted VALUES(1, now())") + check_query( + clickhouse_node, + f"/* expect: 1 */ SELECT COUNT() FROM `{db}`.`{table}2_unquoted`", + "1\n", + ) + + clickhouse_node.query(f"DROP DATABASE IF EXISTS `{db}`") + mysql_node.query(f"DROP DATABASE IF EXISTS `{db}`") + + def system_parts_test(clickhouse_node, mysql_node, service_name): mysql_node.query("DROP DATABASE IF EXISTS system_parts_test") clickhouse_node.query("DROP DATABASE IF EXISTS system_parts_test") diff --git a/tests/integration/test_materialized_mysql_database/test.py b/tests/integration/test_materialized_mysql_database/test.py index 32c1da8a2bd..e31ef70b4ad 100644 --- a/tests/integration/test_materialized_mysql_database/test.py +++ b/tests/integration/test_materialized_mysql_database/test.py @@ -381,6 +381,12 @@ def test_utf8mb4( ): materialized_with_ddl.utf8mb4_test(clickhouse_node, started_mysql_5_7, "mysql57") materialized_with_ddl.utf8mb4_test(clickhouse_node, started_mysql_8_0, "mysql80") + materialized_with_ddl.utf8mb4_column_test( + clickhouse_node, started_mysql_8_0, "mysql80" + ) + materialized_with_ddl.utf8mb4_name_test( + clickhouse_node, started_mysql_8_0, "mysql80" + ) def test_system_parts_table(started_cluster, started_mysql_8_0, clickhouse_node): From 3710c7238d9eaf0328170bafb03eb4b15ea5d67c Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 24 Jul 2023 09:19:06 +0000 Subject: [PATCH 351/478] Fix test_throttling --- tests/integration/test_throttling/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_throttling/test.py b/tests/integration/test_throttling/test.py index ff8e7154d0d..2b5e9312a4c 100644 --- a/tests/integration/test_throttling/test.py +++ b/tests/integration/test_throttling/test.py @@ -114,7 +114,7 @@ def node_update_config(mode, setting, value=None): def assert_took(took, should_took): - assert took >= should_took[0] * 0.9 and took < should_took[1] + assert took >= should_took[0] * 0.85 and took < should_took[1] @pytest.mark.parametrize( From 2471b032ab7a504d1997e9d3681bf97f0564273d Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 24 Jul 2023 09:52:49 +0000 Subject: [PATCH 352/478] fix lightweight delete after drop of projection --- src/Storages/MergeTree/MergeTreeData.cpp | 3 ++- .../02792_drop_projection_lwd.reference | 1 + .../0_stateless/02792_drop_projection_lwd.sql | 26 +++++++++++++++++++ 3 files changed, 29 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02792_drop_projection_lwd.reference create mode 100644 tests/queries/0_stateless/02792_drop_projection_lwd.sql diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index d773f380377..06a9b62d9de 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -5693,7 +5693,8 @@ bool MergeTreeData::supportsLightweightDelete() const auto lock = lockParts(); for (const auto & part : data_parts_by_info) { - if (!part->supportLightweightDeleteMutate()) + if (part->getState() == MergeTreeDataPartState::Active + && !part->supportLightweightDeleteMutate()) return false; } return true; diff --git a/tests/queries/0_stateless/02792_drop_projection_lwd.reference b/tests/queries/0_stateless/02792_drop_projection_lwd.reference new file mode 100644 index 00000000000..6529ff889b0 --- /dev/null +++ b/tests/queries/0_stateless/02792_drop_projection_lwd.reference @@ -0,0 +1 @@ +98 diff --git a/tests/queries/0_stateless/02792_drop_projection_lwd.sql b/tests/queries/0_stateless/02792_drop_projection_lwd.sql new file mode 100644 index 00000000000..fd446a8efe8 --- /dev/null +++ b/tests/queries/0_stateless/02792_drop_projection_lwd.sql @@ -0,0 +1,26 @@ +DROP TABLE IF EXISTS t_projections_lwd; + +CREATE TABLE t_projections_lwd (a UInt32, b UInt32) ENGINE = MergeTree ORDER BY a; + +INSERT INTO t_projections_lwd SELECT number, number FROM numbers(100); + +-- LWD works +DELETE FROM t_projections_lwd WHERE a = 0; + +-- add projection +ALTER TABLE t_projections_lwd ADD PROJECTION p_t_projections_lwd (SELECT * ORDER BY b); +ALTER TABLE t_projections_lwd MATERIALIZE PROJECTION p_t_projections_lwd; + +-- LWD does not work, as expected +DELETE FROM t_projections_lwd WHERE a = 1; -- { serverError UNFINISHED } +KILL MUTATION WHERE database = currentDatabase() AND table = 't_projections_lwd' SYNC FORMAT Null; + +-- drop projection +SET mutations_sync = 2; +ALTER TABLE t_projections_lwd DROP projection p_t_projections_lwd; + +DELETE FROM t_projections_lwd WHERE a = 2; + +SELECT count() FROM t_projections_lwd; + +DROP TABLE t_projections_lwd; From 5da6c99f6df90ae5a8dde59f9cccce8cee48fc61 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 24 Jul 2023 12:02:27 +0200 Subject: [PATCH 353/478] Add comment --- tests/integration/test_throttling/test.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/integration/test_throttling/test.py b/tests/integration/test_throttling/test.py index 2b5e9312a4c..62640394a85 100644 --- a/tests/integration/test_throttling/test.py +++ b/tests/integration/test_throttling/test.py @@ -114,6 +114,9 @@ def node_update_config(mode, setting, value=None): def assert_took(took, should_took): + # we need to decrease the lower limit because the server limits could + # be enforced by throttling some server background IO instead of query IO + # and we have no control over it assert took >= should_took[0] * 0.85 and took < should_took[1] From 79cc81890316338e35f13576cfd0360494e72645 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 24 Jul 2023 11:06:21 +0000 Subject: [PATCH 354/478] try to fix test --- .../02726_async_insert_flush_stress.sh | 33 ++++++++++++------- 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/tests/queries/0_stateless/02726_async_insert_flush_stress.sh b/tests/queries/0_stateless/02726_async_insert_flush_stress.sh index 5fafb773d16..876766d0780 100755 --- a/tests/queries/0_stateless/02726_async_insert_flush_stress.sh +++ b/tests/queries/0_stateless/02726_async_insert_flush_stress.sh @@ -11,7 +11,9 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) function insert1() { url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=0" - while true; do + + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do ${CLICKHOUSE_CURL} -sS "$url" -d 'INSERT INTO async_inserts FORMAT CSV 1,"a" 2,"b" @@ -22,7 +24,9 @@ function insert1() function insert2() { url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=0" - while true; do + + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do ${CLICKHOUSE_CURL} -sS "$url" -d 'INSERT INTO async_inserts FORMAT JSONEachRow {"id": 5, "s": "e"} {"id": 6, "s": "f"}' done } @@ -30,28 +34,33 @@ function insert2() function insert3() { url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=0" - while true; do + + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do ${CLICKHOUSE_CURL} -sS "$url" -d "INSERT INTO FUNCTION remote('127.0.0.1', $CLICKHOUSE_DATABASE, async_inserts) VALUES (7, 'g') (8, 'h')" done } function select1() { - while true; do + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do ${CLICKHOUSE_CLIENT} -q "SELECT * FROM async_inserts FORMAT Null" done } function select2() { - while true; do + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do ${CLICKHOUSE_CLIENT} -q "SELECT * FROM system.asynchronous_inserts FORMAT Null" done } function flush1() { - while true; do + local TIMELIMIT=$((SECONDS+$1)) + while [ $SECONDS -lt "$TIMELIMIT" ]; do sleep 0.2 ${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH ASYNC INSERT QUEUE" done @@ -70,14 +79,14 @@ export -f select2 export -f flush1 for _ in {1..5}; do - timeout $TIMEOUT bash -c insert1 & - timeout $TIMEOUT bash -c insert2 & - timeout $TIMEOUT bash -c insert3 & + insert1 $TIMEOUT & + insert2 $TIMEOUT & + insert3 $TIMEOUT & done -timeout $TIMEOUT bash -c select1 & -timeout $TIMEOUT bash -c select2 & -timeout $TIMEOUT bash -c flush1 & +select1 $TIMEOUT & +select2 $TIMEOUT & +flush1 $TIMEOUT & wait From 21097209d2e709db8022782a02980e52a7bc5df7 Mon Sep 17 00:00:00 2001 From: Andrey Zvonov <32552679+zvonand@users.noreply.github.com> Date: Mon, 24 Jul 2023 15:41:21 +0300 Subject: [PATCH 355/478] Revert "Remove `toDecimalString`" --- .../functions/type-conversion-functions.md | 38 +++ .../functions/type-conversion-functions.md | 38 +++ src/Functions/FunctionToDecimalString.cpp | 22 ++ src/Functions/FunctionToDecimalString.h | 312 ++++++++++++++++++ src/IO/WriteHelpers.h | 39 ++- .../02676_to_decimal_string.reference | 21 ++ .../0_stateless/02676_to_decimal_string.sql | 35 ++ 7 files changed, 492 insertions(+), 13 deletions(-) create mode 100644 src/Functions/FunctionToDecimalString.cpp create mode 100644 src/Functions/FunctionToDecimalString.h create mode 100644 tests/queries/0_stateless/02676_to_decimal_string.reference create mode 100644 tests/queries/0_stateless/02676_to_decimal_string.sql diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index c2bd525c483..36f40b37238 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -945,6 +945,44 @@ Result: └────────────┴───────┘ ``` +## toDecimalString + +Converts a numeric value to String with the number of fractional digits in the output specified by the user. + +**Syntax** + +``` sql +toDecimalString(number, scale) +``` + +**Parameters** + +- `number` — Value to be represented as String, [Int, UInt](/docs/en/sql-reference/data-types/int-uint.md), [Float](/docs/en/sql-reference/data-types/float.md), [Decimal](/docs/en/sql-reference/data-types/decimal.md), +- `scale` — Number of fractional digits, [UInt8](/docs/en/sql-reference/data-types/int-uint.md). + * Maximum scale for [Decimal](/docs/en/sql-reference/data-types/decimal.md) and [Int, UInt](/docs/en/sql-reference/data-types/int-uint.md) types is 77 (it is the maximum possible number of significant digits for Decimal), + * Maximum scale for [Float](/docs/en/sql-reference/data-types/float.md) is 60. + +**Returned value** + +- Input value represented as [String](/docs/en/sql-reference/data-types/string.md) with given number of fractional digits (scale). + The number is rounded up or down according to common arithmetic in case requested scale is smaller than original number's scale. + +**Example** + +Query: + +``` sql +SELECT toDecimalString(CAST('64.32', 'Float64'), 5); +``` + +Result: + +```response +┌toDecimalString(CAST('64.32', 'Float64'), 5)─┐ +│ 64.32000 │ +└─────────────────────────────────────────────┘ +``` + ## reinterpretAsUInt(8\|16\|32\|64) ## reinterpretAsInt(8\|16\|32\|64) diff --git a/docs/ru/sql-reference/functions/type-conversion-functions.md b/docs/ru/sql-reference/functions/type-conversion-functions.md index 088b1a9a1f1..e53104d8d71 100644 --- a/docs/ru/sql-reference/functions/type-conversion-functions.md +++ b/docs/ru/sql-reference/functions/type-conversion-functions.md @@ -762,6 +762,44 @@ SELECT toFixedString('foo\0bar', 8) AS s, toStringCutToZero(s) AS s_cut; └────────────┴───────┘ ``` +## toDecimalString + +Принимает любой численный тип первым аргументом, возвращает строковое десятичное представление числа с точностью, заданной вторым аргументом. + +**Синтаксис** + +``` sql +toDecimalString(number, scale) +``` + +**Параметры** + +- `number` — Значение любого числового типа: [Int, UInt](/docs/ru/sql-reference/data-types/int-uint.md), [Float](/docs/ru/sql-reference/data-types/float.md), [Decimal](/docs/ru/sql-reference/data-types/decimal.md), +- `scale` — Требуемое количество десятичных знаков после запятой, [UInt8](/docs/ru/sql-reference/data-types/int-uint.md). + * Значение `scale` для типов [Decimal](/docs/ru/sql-reference/data-types/decimal.md) и [Int, UInt](/docs/ru/sql-reference/data-types/int-uint.md) должно не превышать 77 (так как это наибольшее количество значимых символов для этих типов), + * Значение `scale` для типа [Float](/docs/ru/sql-reference/data-types/float.md) не должно превышать 60. + +**Возвращаемое значение** + +- Строка ([String](/docs/en/sql-reference/data-types/string.md)), представляющая собой десятичное представление входного числа с заданной длиной дробной части. + При необходимости число округляется по стандартным правилам арифметики. + +**Пример использования** + +Запрос: + +``` sql +SELECT toDecimalString(CAST('64.32', 'Float64'), 5); +``` + +Результат: + +```response +┌─toDecimalString(CAST('64.32', 'Float64'), 5)┐ +│ 64.32000 │ +└─────────────────────────────────────────────┘ +``` + ## reinterpretAsUInt(8\|16\|32\|64) {#reinterpretasuint8163264} ## reinterpretAsInt(8\|16\|32\|64) {#reinterpretasint8163264} diff --git a/src/Functions/FunctionToDecimalString.cpp b/src/Functions/FunctionToDecimalString.cpp new file mode 100644 index 00000000000..fe417b19137 --- /dev/null +++ b/src/Functions/FunctionToDecimalString.cpp @@ -0,0 +1,22 @@ +#include +#include +#include + +namespace DB +{ + +REGISTER_FUNCTION(ToDecimalString) +{ + factory.registerFunction( + FunctionDocumentation{ + .description=R"( +Returns string representation of a number. First argument is the number of any numeric type, +second argument is the desired number of digits in fractional part. Returns String. + + )", + .examples{{"toDecimalString", "SELECT toDecimalString(2.1456,2)", ""}}, + .categories{"String"} + }, FunctionFactory::CaseInsensitive); +} + +} diff --git a/src/Functions/FunctionToDecimalString.h b/src/Functions/FunctionToDecimalString.h new file mode 100644 index 00000000000..6ae007e6b66 --- /dev/null +++ b/src/Functions/FunctionToDecimalString.h @@ -0,0 +1,312 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int ILLEGAL_COLUMN; + extern const int CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER; +} + +class FunctionToDecimalString : public IFunction +{ +public: + static constexpr auto name = "toDecimalString"; + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + String getName() const override { return name; } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + size_t getNumberOfArguments() const override { return 2; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (!isNumber(*arguments[0])) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal first argument for formatDecimal function: got {}, expected numeric type", + arguments[0]->getName()); + + if (!isUInt8(*arguments[1])) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal second argument for formatDecimal function: got {}, expected UInt8", + arguments[1]->getName()); + + return std::make_shared(); + } + + bool useDefaultImplementationForConstants() const override { return true; } + +private: + /// For operations with Integer/Float + template + void vectorConstant(const FromVectorType & vec_from, UInt8 precision, + ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets) const + { + size_t input_rows_count = vec_from.size(); + result_offsets.resize(input_rows_count); + + /// Buffer is used here and in functions below because resulting size cannot be precisely anticipated, + /// and buffer resizes on-the-go. Also, .count() provided by buffer is convenient in this case. + WriteBufferFromVector buf_to(vec_to); + + for (size_t i = 0; i < input_rows_count; ++i) + { + format(vec_from[i], buf_to, precision); + result_offsets[i] = buf_to.count(); + } + + buf_to.finalize(); + } + + template + void vectorVector(const FirstArgVectorType & vec_from, const ColumnVector::Container & vec_precision, + ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets) const + { + size_t input_rows_count = vec_from.size(); + result_offsets.resize(input_rows_count); + + WriteBufferFromVector buf_to(vec_to); + + constexpr size_t max_digits = std::numeric_limits::digits10; + + for (size_t i = 0; i < input_rows_count; ++i) + { + if (vec_precision[i] > max_digits) + throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER, + "Too many fractional digits requested, shall not be more than {}", max_digits); + format(vec_from[i], buf_to, vec_precision[i]); + result_offsets[i] = buf_to.count(); + } + + buf_to.finalize(); + } + + template + void constantVector(const FirstArgType & value_from, const ColumnVector::Container & vec_precision, + ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets) const + { + size_t input_rows_count = vec_precision.size(); + result_offsets.resize(input_rows_count); + + WriteBufferFromVector buf_to(vec_to); + + constexpr size_t max_digits = std::numeric_limits::digits10; + + for (size_t i = 0; i < input_rows_count; ++i) + { + if (vec_precision[i] > max_digits) + throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER, + "Too many fractional digits requested, shall not be more than {}", max_digits); + format(value_from, buf_to, vec_precision[i]); + result_offsets[i] = buf_to.count(); + } + + buf_to.finalize(); + } + + /// For operations with Decimal + template + void vectorConstant(const FirstArgVectorType & vec_from, UInt8 precision, + ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets, UInt8 from_scale) const + { + /// There are no more than 77 meaning digits (as it is the max length of UInt256). So we can limit it with 77. + constexpr size_t max_digits = std::numeric_limits::digits10; + if (precision > max_digits) + throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER, + "Too many fractional digits requested for Decimal, must not be more than {}", max_digits); + + WriteBufferFromVector buf_to(vec_to); + size_t input_rows_count = vec_from.size(); + result_offsets.resize(input_rows_count); + + for (size_t i = 0; i < input_rows_count; ++i) + { + writeText(vec_from[i], from_scale, buf_to, true, true, precision); + writeChar(0, buf_to); + result_offsets[i] = buf_to.count(); + } + buf_to.finalize(); + } + + template + void vectorVector(const FirstArgVectorType & vec_from, const ColumnVector::Container & vec_precision, + ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets, UInt8 from_scale) const + { + size_t input_rows_count = vec_from.size(); + result_offsets.resize(input_rows_count); + + WriteBufferFromVector buf_to(vec_to); + + constexpr size_t max_digits = std::numeric_limits::digits10; + + for (size_t i = 0; i < input_rows_count; ++i) + { + if (vec_precision[i] > max_digits) + throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER, + "Too many fractional digits requested for Decimal, must not be more than {}", max_digits); + writeText(vec_from[i], from_scale, buf_to, true, true, vec_precision[i]); + writeChar(0, buf_to); + result_offsets[i] = buf_to.count(); + } + buf_to.finalize(); + } + + template + void constantVector(const FirstArgType & value_from, const ColumnVector::Container & vec_precision, + ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets, UInt8 from_scale) const + { + size_t input_rows_count = vec_precision.size(); + result_offsets.resize(input_rows_count); + + WriteBufferFromVector buf_to(vec_to); + + constexpr size_t max_digits = std::numeric_limits::digits10; + + for (size_t i = 0; i < input_rows_count; ++i) + { + if (vec_precision[i] > max_digits) + throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER, + "Too many fractional digits requested for Decimal, must not be more than {}", max_digits); + writeText(value_from, from_scale, buf_to, true, true, vec_precision[i]); + writeChar(0, buf_to); + result_offsets[i] = buf_to.count(); + } + buf_to.finalize(); + } + + template + static void format(T value, DB::WriteBuffer & out, UInt8 precision) + { + /// Maximum of 60 is hard-coded in 'double-conversion/double-conversion.h' for floating point values, + /// Catch this here to give user a more reasonable error. + if (precision > 60) + throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER, + "Too high precision requested for Float, must not be more than 60, got {}", Int8(precision)); + + DB::DoubleConverter::BufferType buffer; + double_conversion::StringBuilder builder{buffer, sizeof(buffer)}; + + const auto result = DB::DoubleConverter::instance().ToFixed(value, precision, &builder); + + if (!result) + throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER, "Error processing number: {}", value); + + out.write(buffer, builder.position()); + writeChar(0, out); + } + + template + static void format(T value, DB::WriteBuffer & out, UInt8 precision) + { + /// Fractional part for Integer is just trailing zeros. Let's limit it with 77 (like with Decimals). + constexpr size_t max_digits = std::numeric_limits::digits10; + if (precision > max_digits) + throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER, + "Too many fractional digits requested, shall not be more than {}", max_digits); + writeText(value, out); + if (precision > 0) [[likely]] + { + writeChar('.', out); + for (int i = 0; i < precision; ++i) + writeChar('0', out); + writeChar(0, out); + } + } + +public: + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + { + switch (arguments[0].type->getTypeId()) + { + case TypeIndex::UInt8: return executeType(arguments); + case TypeIndex::UInt16: return executeType(arguments); + case TypeIndex::UInt32: return executeType(arguments); + case TypeIndex::UInt64: return executeType(arguments); + case TypeIndex::UInt128: return executeType(arguments); + case TypeIndex::UInt256: return executeType(arguments); + case TypeIndex::Int8: return executeType(arguments); + case TypeIndex::Int16: return executeType(arguments); + case TypeIndex::Int32: return executeType(arguments); + case TypeIndex::Int64: return executeType(arguments); + case TypeIndex::Int128: return executeType(arguments); + case TypeIndex::Int256: return executeType(arguments); + case TypeIndex::Float32: return executeType(arguments); + case TypeIndex::Float64: return executeType(arguments); + case TypeIndex::Decimal32: return executeType(arguments); + case TypeIndex::Decimal64: return executeType(arguments); + case TypeIndex::Decimal128: return executeType(arguments); + case TypeIndex::Decimal256: return executeType(arguments); + default: + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", + arguments[0].column->getName(), getName()); + } + } + +private: + template + ColumnPtr executeType(const ColumnsWithTypeAndName & arguments) const + { + const auto * from_col_const = typeid_cast(arguments[0].column.get()); + const auto * precision_col = checkAndGetColumn>(arguments[1].column.get()); + const auto * precision_col_const = typeid_cast(arguments[1].column.get()); + + auto result_col = ColumnString::create(); + auto * result_col_string = assert_cast(result_col.get()); + ColumnString::Chars & result_chars = result_col_string->getChars(); + ColumnString::Offsets & result_offsets = result_col_string->getOffsets(); + + if constexpr (is_decimal) + { + const auto * from_col = checkAndGetColumn>(arguments[0].column.get()); + UInt8 from_scale = from_col->getScale(); + + if (from_col) + { + if (precision_col_const) + vectorConstant(from_col->getData(), precision_col_const->template getValue(), result_chars, result_offsets, from_scale); + else + vectorVector(from_col->getData(), precision_col->getData(), result_chars, result_offsets, from_scale); + } + else if (from_col_const) + constantVector(from_col_const->template getValue(), precision_col->getData(), result_chars, result_offsets, from_scale); + else + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function formatDecimal", arguments[0].column->getName()); + } + else + { + const auto * from_col = checkAndGetColumn>(arguments[0].column.get()); + if (from_col) + { + if (precision_col_const) + vectorConstant(from_col->getData(), precision_col_const->template getValue(), result_chars, result_offsets); + else + vectorVector(from_col->getData(), precision_col->getData(), result_chars, result_offsets); + } + else if (from_col_const) + constantVector(from_col_const->template getValue(), precision_col->getData(), result_chars, result_offsets); + else + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function formatDecimal", arguments[0].column->getName()); + } + + return result_col; + } +}; + +} diff --git a/src/IO/WriteHelpers.h b/src/IO/WriteHelpers.h index 76778543bd0..aa4c9b17e48 100644 --- a/src/IO/WriteHelpers.h +++ b/src/IO/WriteHelpers.h @@ -905,26 +905,26 @@ inline void writeText(const IPv4 & x, WriteBuffer & buf) { writeIPv4Text(x, buf) inline void writeText(const IPv6 & x, WriteBuffer & buf) { writeIPv6Text(x, buf); } template -void writeDecimalFractional(const T & x, UInt32 scale, WriteBuffer & ostr, bool trailing_zeros) +void writeDecimalFractional(const T & x, UInt32 scale, WriteBuffer & ostr, bool trailing_zeros, + bool fixed_fractional_length, UInt32 fractional_length) { /// If it's big integer, but the number of digits is small, /// use the implementation for smaller integers for more efficient arithmetic. - if constexpr (std::is_same_v) { if (x <= std::numeric_limits::max()) { - writeDecimalFractional(static_cast(x), scale, ostr, trailing_zeros); + writeDecimalFractional(static_cast(x), scale, ostr, trailing_zeros, fixed_fractional_length, fractional_length); return; } else if (x <= std::numeric_limits::max()) { - writeDecimalFractional(static_cast(x), scale, ostr, trailing_zeros); + writeDecimalFractional(static_cast(x), scale, ostr, trailing_zeros, fixed_fractional_length, fractional_length); return; } else if (x <= std::numeric_limits::max()) { - writeDecimalFractional(static_cast(x), scale, ostr, trailing_zeros); + writeDecimalFractional(static_cast(x), scale, ostr, trailing_zeros, fixed_fractional_length, fractional_length); return; } } @@ -932,24 +932,36 @@ void writeDecimalFractional(const T & x, UInt32 scale, WriteBuffer & ostr, bool { if (x <= std::numeric_limits::max()) { - writeDecimalFractional(static_cast(x), scale, ostr, trailing_zeros); + writeDecimalFractional(static_cast(x), scale, ostr, trailing_zeros, fixed_fractional_length, fractional_length); return; } else if (x <= std::numeric_limits::max()) { - writeDecimalFractional(static_cast(x), scale, ostr, trailing_zeros); + writeDecimalFractional(static_cast(x), scale, ostr, trailing_zeros, fixed_fractional_length, fractional_length); return; } } constexpr size_t max_digits = std::numeric_limits::digits10; assert(scale <= max_digits); + assert(fractional_length <= max_digits); + char buf[max_digits]; - memset(buf, '0', scale); + memset(buf, '0', std::max(scale, fractional_length)); T value = x; Int32 last_nonzero_pos = 0; - for (Int32 pos = scale - 1; pos >= 0; --pos) + + if (fixed_fractional_length && fractional_length < scale) + { + T new_value = value / DecimalUtils::scaleMultiplier(scale - fractional_length - 1); + auto round_carry = new_value % 10; + value = new_value / 10; + if (round_carry >= 5) + value += 1; + } + + for (Int32 pos = fixed_fractional_length ? std::min(scale - 1, fractional_length - 1) : scale - 1; pos >= 0; --pos) { auto remainder = value % 10; value /= 10; @@ -961,11 +973,12 @@ void writeDecimalFractional(const T & x, UInt32 scale, WriteBuffer & ostr, bool } writeChar('.', ostr); - ostr.write(buf, trailing_zeros ? scale : last_nonzero_pos + 1); + ostr.write(buf, fixed_fractional_length ? fractional_length : (trailing_zeros ? scale : last_nonzero_pos + 1)); } template -void writeText(Decimal x, UInt32 scale, WriteBuffer & ostr, bool trailing_zeros) +void writeText(Decimal x, UInt32 scale, WriteBuffer & ostr, bool trailing_zeros, + bool fixed_fractional_length = false, UInt32 fractional_length = 0) { T part = DecimalUtils::getWholePart(x, scale); @@ -976,7 +989,7 @@ void writeText(Decimal x, UInt32 scale, WriteBuffer & ostr, bool trailing_zer writeIntText(part, ostr); - if (scale) + if (scale || (fixed_fractional_length && fractional_length > 0)) { part = DecimalUtils::getFractionalPart(x, scale); if (part || trailing_zeros) @@ -984,7 +997,7 @@ void writeText(Decimal x, UInt32 scale, WriteBuffer & ostr, bool trailing_zer if (part < 0) part *= T(-1); - writeDecimalFractional(part, scale, ostr, trailing_zeros); + writeDecimalFractional(part, scale, ostr, trailing_zeros, fixed_fractional_length, fractional_length); } } } diff --git a/tests/queries/0_stateless/02676_to_decimal_string.reference b/tests/queries/0_stateless/02676_to_decimal_string.reference new file mode 100644 index 00000000000..4c27ee5b528 --- /dev/null +++ b/tests/queries/0_stateless/02676_to_decimal_string.reference @@ -0,0 +1,21 @@ +2.00000000000000000000000000000000000000000000000000000000000000000000000000000 +2.12 +-2.00000000000000000000000000000000000000000000000000000000000000000000000000000 +-2.12 +2.987600000000000033395508580724708735942840576171875000000000 +2.15 +-2.987600000000000033395508580724708735942840576171875000000000 +-2.15 +64.1230010986 +64.2340000000 +-64.1230010986 +-64.2340000000 +-32.345 +32.34500000000000000000000000000000000000000000000000000000000000000000000000000 +32.46 +-64.5671232345 +128.78932312332132985464 +-128.78932312332132985464 +128.78932312332132985464000000000000000000000000000000000000000000000000000000000 +128.7893231233 +-128.78932312332132985464123123789323123321329854600000000000000000000000000000000 diff --git a/tests/queries/0_stateless/02676_to_decimal_string.sql b/tests/queries/0_stateless/02676_to_decimal_string.sql new file mode 100644 index 00000000000..563d60c62c7 --- /dev/null +++ b/tests/queries/0_stateless/02676_to_decimal_string.sql @@ -0,0 +1,35 @@ +-- Regular types +SELECT toDecimalString(2, 77); -- more digits required than exist +SELECT toDecimalString(2.123456, 2); -- rounding +SELECT toDecimalString(-2, 77); -- more digits required than exist +SELECT toDecimalString(-2.123456, 2); -- rounding + +SELECT toDecimalString(2.9876, 60); -- more digits required than exist (took 60 as it is float by default) +SELECT toDecimalString(2.1456, 2); -- rounding +SELECT toDecimalString(-2.9876, 60); -- more digits required than exist +SELECT toDecimalString(-2.1456, 2); -- rounding + +-- Float32 and Float64 tests. No sense to test big float precision -- the result will be a mess anyway. +SELECT toDecimalString(64.123::Float32, 10); +SELECT toDecimalString(64.234::Float64, 10); +SELECT toDecimalString(-64.123::Float32, 10); +SELECT toDecimalString(-64.234::Float64, 10); + +-- Decimals +SELECT toDecimalString(-32.345::Decimal32(3), 3); +SELECT toDecimalString(32.345::Decimal32(3), 77); -- more digits required than exist +SELECT toDecimalString(32.456::Decimal32(3), 2); -- rounding +SELECT toDecimalString('-64.5671232345'::Decimal64(10), 10); +SELECT toDecimalString('128.78932312332132985464'::Decimal128(20), 20); +SELECT toDecimalString('-128.78932312332132985464123123'::Decimal128(26), 20); -- rounding +SELECT toDecimalString('128.78932312332132985464'::Decimal128(20), 77); -- more digits required than exist +SELECT toDecimalString('128.789323123321329854641231237893231233213298546'::Decimal256(45), 10); -- rounding +SELECT toDecimalString('-128.789323123321329854641231237893231233213298546'::Decimal256(45), 77); -- more digits required than exist + +-- Max number of decimal fractional digits is defined as 77 for Int/UInt/Decimal and 60 for Float. +-- These values shall work OK. +SELECT toDecimalString('32.32'::Float32, 61); -- {serverError CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER} +SELECT toDecimalString('64.64'::Float64, 61); -- {serverError CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER} +SELECT toDecimalString('88'::UInt8, 78); -- {serverError CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER} +SELECT toDecimalString('646464'::Int256, 78); -- {serverError CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER} +SELECT toDecimalString('-128.789323123321329854641231237893231233213298546'::Decimal256(45), 78); -- {serverError CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER} From df5ff1383c5c6f7e24cb6933246fc04cf5dfe702 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Mon, 24 Jul 2023 14:57:05 +0200 Subject: [PATCH 356/478] Fix settings not applied for explain query when format provided (#51859) --- src/Interpreters/InterpreterSetQuery.cpp | 3 +++ ..._explain_settings_not_applied_bug.reference | 11 +++++++++++ .../02798_explain_settings_not_applied_bug.sql | 18 ++++++++++++++++++ 3 files changed, 32 insertions(+) create mode 100644 tests/queries/0_stateless/02798_explain_settings_not_applied_bug.reference create mode 100644 tests/queries/0_stateless/02798_explain_settings_not_applied_bug.sql diff --git a/src/Interpreters/InterpreterSetQuery.cpp b/src/Interpreters/InterpreterSetQuery.cpp index 6db57a4f950..e9118b747e5 100644 --- a/src/Interpreters/InterpreterSetQuery.cpp +++ b/src/Interpreters/InterpreterSetQuery.cpp @@ -65,6 +65,9 @@ void InterpreterSetQuery::applySettingsFromQuery(const ASTPtr & ast, ContextMuta } else if (const auto * explain_query = ast->as()) { + if (explain_query->settings_ast) + InterpreterSetQuery(explain_query->settings_ast, context_).executeForCurrentContext(); + applySettingsFromQuery(explain_query->getExplainedQuery(), context_); } else if (const auto * query_with_output = dynamic_cast(ast.get())) diff --git a/tests/queries/0_stateless/02798_explain_settings_not_applied_bug.reference b/tests/queries/0_stateless/02798_explain_settings_not_applied_bug.reference new file mode 100644 index 00000000000..6fc36a0ba01 --- /dev/null +++ b/tests/queries/0_stateless/02798_explain_settings_not_applied_bug.reference @@ -0,0 +1,11 @@ + explain + + (Expression) + ExpressionTransform + (Aggregating) + FinalizeAggregatedTransform + AggregatingInOrderTransform + (Expression) + ExpressionTransform + (ReadFromMergeTree) + MergeTreeInOrder 0 → 1 diff --git a/tests/queries/0_stateless/02798_explain_settings_not_applied_bug.sql b/tests/queries/0_stateless/02798_explain_settings_not_applied_bug.sql new file mode 100644 index 00000000000..76f2129abfa --- /dev/null +++ b/tests/queries/0_stateless/02798_explain_settings_not_applied_bug.sql @@ -0,0 +1,18 @@ +SET read_in_order_two_level_merge_threshold=1000000; + +DROP TABLE IF EXISTS t; +CREATE TABLE t(a UInt64) +ENGINE = MergeTree +ORDER BY a; + +INSERT INTO t SELECT * FROM numbers_mt(1e3); +OPTIMIZE TABLE t FINAL; + +EXPLAIN PIPELINE +SELECT a +FROM t +GROUP BY a +FORMAT PrettySpace +SETTINGS optimize_aggregation_in_order = 1; + +DROP TABLE t; From c7239c64ea36a6994cd88d34edc3774243472a68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 24 Jul 2023 15:16:44 +0200 Subject: [PATCH 357/478] Remove unused code --- src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp index 48adf36e678..3eba9a9de24 100644 --- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp @@ -145,9 +145,6 @@ bool IMergeTreeSelectAlgorithm::getNewTask() ChunkAndProgress IMergeTreeSelectAlgorithm::read() { - size_t num_read_rows = 0; - size_t num_read_bytes = 0; - while (!is_cancelled) { try @@ -178,10 +175,6 @@ ChunkAndProgress IMergeTreeSelectAlgorithm::read() ordered_columns.push_back(res.block.getByName(name).column); } - /// Account a progress from previous empty chunks. - res.num_read_rows += num_read_rows; - res.num_read_bytes += num_read_bytes; - return ChunkAndProgress{ .chunk = Chunk(ordered_columns, res.row_count), .num_read_rows = res.num_read_rows, @@ -194,7 +187,7 @@ ChunkAndProgress IMergeTreeSelectAlgorithm::read() } } - return {Chunk(), num_read_rows, num_read_bytes, true}; + return {Chunk(), 0, 0, true}; } void IMergeTreeSelectAlgorithm::initializeMergeTreeReadersForCurrentTask( From 94fa2f18b174a0f88b59479aac40fbc2abbe73a7 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Mon, 24 Jul 2023 15:16:56 +0200 Subject: [PATCH 358/478] fix analyzer --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 17 +++++++++++++++++ .../02701_non_parametric_function.reference | 1 - .../02701_non_parametric_function.sql | 9 --------- 3 files changed, 17 insertions(+), 10 deletions(-) delete mode 100644 tests/queries/0_stateless/02701_non_parametric_function.reference delete mode 100644 tests/queries/0_stateless/02701_non_parametric_function.sql diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index da8933aabaa..fd16b6e168b 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -116,6 +116,7 @@ namespace ErrorCodes extern const int UNKNOWN_TABLE; extern const int ILLEGAL_COLUMN; extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH; + extern const int FUNCTION_CANNOT_HAVE_PARAMETERS; } /** Query analyzer implementation overview. Please check documentation in QueryAnalysisPass.h first. @@ -4896,6 +4897,12 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi lambda_expression_untyped->formatASTForErrorMessage(), scope.scope_node->formatASTForErrorMessage()); + if (!parameters.empty()) + { + throw Exception( + ErrorCodes::FUNCTION_CANNOT_HAVE_PARAMETERS, "Function {} is not parametric", function_node.formatASTForErrorMessage()); + } + auto lambda_expression_clone = lambda_expression_untyped->clone(); IdentifierResolveScope lambda_scope(lambda_expression_clone, &scope /*parent_scope*/); @@ -5012,9 +5019,13 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi } FunctionOverloadResolverPtr function = UserDefinedExecutableFunctionFactory::instance().tryGet(function_name, scope.context, parameters); + bool is_executable_udf = true; if (!function) + { function = FunctionFactory::instance().tryGet(function_name, scope.context); + is_executable_udf = false; + } if (!function) { @@ -5065,6 +5076,12 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi return result_projection_names; } + /// Executable UDFs may have parameters. They are checked in UserDefinedExecutableFunctionFactory. + if (!parameters.empty() && !is_executable_udf) + { + throw Exception(ErrorCodes::FUNCTION_CANNOT_HAVE_PARAMETERS, "Function {} is not parametric", function_name); + } + /** For lambda arguments we need to initialize lambda argument types DataTypeFunction using `getLambdaArgumentTypes` function. * Then each lambda arguments are initialized with columns, where column source is lambda. * This information is important for later steps of query processing. diff --git a/tests/queries/0_stateless/02701_non_parametric_function.reference b/tests/queries/0_stateless/02701_non_parametric_function.reference deleted file mode 100644 index 00750edc07d..00000000000 --- a/tests/queries/0_stateless/02701_non_parametric_function.reference +++ /dev/null @@ -1 +0,0 @@ -3 diff --git a/tests/queries/0_stateless/02701_non_parametric_function.sql b/tests/queries/0_stateless/02701_non_parametric_function.sql deleted file mode 100644 index 11c03372a73..00000000000 --- a/tests/queries/0_stateless/02701_non_parametric_function.sql +++ /dev/null @@ -1,9 +0,0 @@ --- Tags: no-parallel - -SELECT * FROM system.numbers WHERE number > toUInt64(10)(number) LIMIT 10; -- { serverError 309 } - -CREATE FUNCTION IF NOT EXISTS sum_udf as (x, y) -> (x + y); - -SELECT sum_udf(1)(1, 2); - -DROP FUNCTION IF EXISTS sum_udf; From c6e6fd761317662c05532d695c20be72f8e847d2 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Mon, 24 Jul 2023 15:58:21 +0200 Subject: [PATCH 359/478] Shard `OpenedFileCache` to avoid lock contention (#51341) * shard OpenedFileCache to avoid lock contention * Update OpenedFileCache.h * fix build --------- Co-authored-by: Alexey Milovidov --- src/Common/ProfileEvents.cpp | 1 + src/IO/OpenedFileCache.h | 109 +++++++++++++++++++++-------------- 2 files changed, 68 insertions(+), 42 deletions(-) diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index 4a656e38edf..f18a67fa565 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -45,6 +45,7 @@ M(MMappedFileCacheMisses, "Number of times a file has not been found in the MMap cache (for the 'mmap' read_method), so we had to mmap it again.") \ M(OpenedFileCacheHits, "Number of times a file has been found in the opened file cache, so we didn't have to open it again.") \ M(OpenedFileCacheMisses, "Number of times a file has been found in the opened file cache, so we had to open it again.") \ + M(OpenedFileCacheMicroseconds, "Amount of time spent executing OpenedFileCache methods.") \ M(AIOWrite, "Number of writes with Linux or FreeBSD AIO interface") \ M(AIOWriteBytes, "Number of bytes written with Linux or FreeBSD AIO interface") \ M(AIORead, "Number of reads with Linux or FreeBSD AIO interface") \ diff --git a/src/IO/OpenedFileCache.h b/src/IO/OpenedFileCache.h index 61e502a494b..2cecc675af7 100644 --- a/src/IO/OpenedFileCache.h +++ b/src/IO/OpenedFileCache.h @@ -4,14 +4,18 @@ #include #include -#include #include +#include +#include + +#include namespace ProfileEvents { extern const Event OpenedFileCacheHits; extern const Event OpenedFileCacheMisses; + extern const Event OpenedFileCacheMicroseconds; } namespace DB @@ -26,57 +30,79 @@ namespace DB */ class OpenedFileCache { -private: - using Key = std::pair; + class OpenedFileMap + { + using Key = std::pair; - using OpenedFileWeakPtr = std::weak_ptr; - using Files = std::map; + using OpenedFileWeakPtr = std::weak_ptr; + using Files = std::map; - Files files; - std::mutex mutex; + Files files; + std::mutex mutex; + + public: + using OpenedFilePtr = std::shared_ptr; + + OpenedFilePtr get(const std::string & path, int flags) + { + Key key(path, flags); + + std::lock_guard lock(mutex); + + auto [it, inserted] = files.emplace(key, OpenedFilePtr{}); + if (!inserted) + { + if (auto res = it->second.lock()) + { + ProfileEvents::increment(ProfileEvents::OpenedFileCacheHits); + return res; + } + } + ProfileEvents::increment(ProfileEvents::OpenedFileCacheMisses); + + OpenedFilePtr res + { + new OpenedFile(path, flags), + [key, this](auto ptr) + { + { + std::lock_guard another_lock(mutex); + files.erase(key); + } + delete ptr; + } + }; + + it->second = res; + return res; + } + + void remove(const std::string & path, int flags) + { + Key key(path, flags); + std::lock_guard lock(mutex); + files.erase(key); + } + }; + + static constexpr size_t buckets = 1024; + std::vector impls{buckets}; public: - using OpenedFilePtr = std::shared_ptr; + using OpenedFilePtr = OpenedFileMap::OpenedFilePtr; OpenedFilePtr get(const std::string & path, int flags) { - Key key(path, flags); - - std::lock_guard lock(mutex); - - auto [it, inserted] = files.emplace(key, OpenedFilePtr{}); - if (!inserted) - { - if (auto res = it->second.lock()) - { - ProfileEvents::increment(ProfileEvents::OpenedFileCacheHits); - return res; - } - } - ProfileEvents::increment(ProfileEvents::OpenedFileCacheMisses); - - OpenedFilePtr res - { - new OpenedFile(path, flags), - [key, this](auto ptr) - { - { - std::lock_guard another_lock(mutex); - files.erase(key); - } - delete ptr; - } - }; - - it->second = res; - return res; + ProfileEventTimeIncrement watch(ProfileEvents::OpenedFileCacheMicroseconds); + const auto bucket = CityHash_v1_0_2::CityHash64(path.data(), path.length()) % buckets; + return impls[bucket].get(path, flags); } void remove(const std::string & path, int flags) { - Key key(path, flags); - std::lock_guard lock(mutex); - files.erase(key); + ProfileEventTimeIncrement watch(ProfileEvents::OpenedFileCacheMicroseconds); + const auto bucket = CityHash_v1_0_2::CityHash64(path.data(), path.length()) % buckets; + impls[bucket].remove(path, flags); } static OpenedFileCache & instance() @@ -87,5 +113,4 @@ public: }; using OpenedFileCachePtr = std::shared_ptr; - } From d2d100b68a4fc1765708a276b217faf403722fb4 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Mon, 24 Jul 2023 17:05:57 +0200 Subject: [PATCH 360/478] Cancel execution in PipelineExecutor in case of exception in graph->updateNode --- src/Processors/Executors/PipelineExecutor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/Executors/PipelineExecutor.cpp b/src/Processors/Executors/PipelineExecutor.cpp index f523e7b7cf9..1508d834592 100644 --- a/src/Processors/Executors/PipelineExecutor.cpp +++ b/src/Processors/Executors/PipelineExecutor.cpp @@ -272,7 +272,7 @@ void PipelineExecutor::executeStepImpl(size_t thread_num, std::atomic_bool * yie /// Prepare processor after execution. if (!graph->updateNode(context.getProcessorID(), queue, async_queue)) - finish(); + cancel(); /// Push other tasks to global queue. tasks.pushTasks(queue, async_queue, context); From f067f8c46d2aec217c3f835441ca1a2a281c72fd Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Mon, 24 Jul 2023 15:37:16 +0000 Subject: [PATCH 361/478] Make 01951_distributed_push_down_limit analyzer agnostic --- tests/analyzer_tech_debt.txt | 1 - ...1951_distributed_push_down_limit.reference | 32 +++++++++---------- .../01951_distributed_push_down_limit.sql | 4 +-- 3 files changed, 18 insertions(+), 19 deletions(-) diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt index b746d1610a4..1d56b2c3a71 100644 --- a/tests/analyzer_tech_debt.txt +++ b/tests/analyzer_tech_debt.txt @@ -72,7 +72,6 @@ 01925_test_storage_merge_aliases 01930_optimize_skip_unused_shards_rewrite_in 01947_mv_subquery -01951_distributed_push_down_limit 01952_optimize_distributed_group_by_sharding_key 02000_join_on_const 02001_shard_num_shard_count diff --git a/tests/queries/0_stateless/01951_distributed_push_down_limit.reference b/tests/queries/0_stateless/01951_distributed_push_down_limit.reference index b9a7d17e955..d175d31846b 100644 --- a/tests/queries/0_stateless/01951_distributed_push_down_limit.reference +++ b/tests/queries/0_stateless/01951_distributed_push_down_limit.reference @@ -1,19 +1,19 @@ -- { echo } -explain select * from remote('127.{1,2}', view(select * from numbers(1e6))) order by number limit 10 settings distributed_push_down_limit=0; -Expression (Projection) - Limit (preliminary LIMIT (without OFFSET)) - Sorting (Merge sorted streams after aggregation stage for ORDER BY) +explain description=0 select * from remote('127.{1,2}', view(select * from numbers(1e6))) order by number limit 10 settings distributed_push_down_limit=0; +Expression + Limit + Sorting Union - Sorting (Sorting for ORDER BY) - Expression ((Before ORDER BY + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Projection + Before ORDER BY))))) - ReadFromStorage (SystemNumbers) - ReadFromRemote (Read from remote replica) -explain select * from remote('127.{1,2}', view(select * from numbers(1e6))) order by number limit 10 settings distributed_push_down_limit=1; -Expression (Projection) - Limit (preliminary LIMIT (without OFFSET)) - Sorting (Merge sorted streams after aggregation stage for ORDER BY) + Sorting + Expression + ReadFromStorage + ReadFromRemote +explain description=0 select * from remote('127.{1,2}', view(select * from numbers(1e6))) order by number limit 10 settings distributed_push_down_limit=1; +Expression + Limit + Sorting Union - Sorting (Sorting for ORDER BY) - Expression ((Before ORDER BY + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Projection + Before ORDER BY))))) - ReadFromStorage (SystemNumbers) - ReadFromRemote (Read from remote replica) + Sorting + Expression + ReadFromStorage + ReadFromRemote diff --git a/tests/queries/0_stateless/01951_distributed_push_down_limit.sql b/tests/queries/0_stateless/01951_distributed_push_down_limit.sql index 184e6321988..aee714a494e 100644 --- a/tests/queries/0_stateless/01951_distributed_push_down_limit.sql +++ b/tests/queries/0_stateless/01951_distributed_push_down_limit.sql @@ -3,5 +3,5 @@ set prefer_localhost_replica = 1; -- { echo } -explain select * from remote('127.{1,2}', view(select * from numbers(1e6))) order by number limit 10 settings distributed_push_down_limit=0; -explain select * from remote('127.{1,2}', view(select * from numbers(1e6))) order by number limit 10 settings distributed_push_down_limit=1; +explain description=0 select * from remote('127.{1,2}', view(select * from numbers(1e6))) order by number limit 10 settings distributed_push_down_limit=0; +explain description=0 select * from remote('127.{1,2}', view(select * from numbers(1e6))) order by number limit 10 settings distributed_push_down_limit=1; From da0a332d46a7939377dca08147897d0f5b1dd9c1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 24 Jul 2023 18:09:38 +0200 Subject: [PATCH 362/478] Keep symtab --- cmake/split_debug_symbols.cmake | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cmake/split_debug_symbols.cmake b/cmake/split_debug_symbols.cmake index a9c2158359a..8ba7669d9e2 100644 --- a/cmake/split_debug_symbols.cmake +++ b/cmake/split_debug_symbols.cmake @@ -22,8 +22,9 @@ macro(clickhouse_split_debug_symbols) # Splits debug symbols into separate file, leaves the binary untouched: COMMAND "${OBJCOPY_PATH}" --only-keep-debug "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug" COMMAND chmod 0644 "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug" - # Strips binary, sections '.note' & '.comment' are removed in line with Debian's stripping policy: www.debian.org/doc/debian-policy/ch-files.html, section '.clickhouse.hash' is needed for integrity check: - COMMAND "${STRIP_PATH}" --remove-section=.comment --remove-section=.note --keep-section=.clickhouse.hash "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" + # Strips binary, sections '.note' & '.comment' are removed in line with Debian's stripping policy: www.debian.org/doc/debian-policy/ch-files.html, section '.clickhouse.hash' is needed for integrity check. + # Also, after we disabled the export of symbols for dynamic linking, we still to keep a static symbol table for good stack traces. + COMMAND "${STRIP_PATH}" --remove-section=.comment --remove-section=.note --keep-section=.clickhouse.hash --keep-section=.symtab --keep-section=.strtab --keep-section=.shstrtab "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" # Associate stripped binary with debug symbols: COMMAND "${OBJCOPY_PATH}" --add-gnu-debuglink "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug" "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" COMMENT "Stripping clickhouse binary" VERBATIM From 0bbf26549f4fb49c599b4a58475c71bccfe9b37b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 24 Jul 2023 18:13:15 +0200 Subject: [PATCH 363/478] Fix test --- tests/integration/test_drop_is_lock_free/test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_drop_is_lock_free/test.py b/tests/integration/test_drop_is_lock_free/test.py index 8d92d784226..9f595800bea 100644 --- a/tests/integration/test_drop_is_lock_free/test.py +++ b/tests/integration/test_drop_is_lock_free/test.py @@ -104,7 +104,7 @@ def test_query_is_lock_free(lock_free_query, exclusive_table): select_handler = node.get_query_request( f""" - SELECT sleepEachRow(3) FROM {exclusive_table}; + SELECT sleepEachRow(3) FROM {exclusive_table} SETTINGS function_sleep_max_microseconds_per_block = 0; """, query_id=query_id, ) @@ -173,7 +173,7 @@ def test_query_is_permanent(transaction, permanent, exclusive_table): select_handler = node.get_query_request( f""" - SELECT sleepEachRow(3) FROM {exclusive_table}; + SELECT sleepEachRow(3) FROM {exclusive_table} SETTINGS function_sleep_max_microseconds_per_block = 0; """, query_id=query_id, ) From c755fde3a41f95466db1b5b9aeeb54decafa97c8 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 20 Jul 2023 18:36:00 +0000 Subject: [PATCH 364/478] Include query cache usage in SYSTEM.QUERY_LOG --- docs/en/operations/query-cache.md | 6 +- docs/en/operations/system-tables/query_log.md | 6 ++ src/Interpreters/AsynchronousInsertQueue.cpp | 2 +- src/Interpreters/Cache/QueryCache.h | 9 +++ src/Interpreters/QueryLog.cpp | 13 ++++ src/Interpreters/QueryLog.h | 5 +- src/Interpreters/executeQuery.cpp | 22 ++++--- src/Interpreters/executeQuery.h | 1 + .../02494_query_cache_query_log.reference | 16 +++++ .../02494_query_cache_query_log.sql | 64 +++++++++++++++++++ 10 files changed, 132 insertions(+), 12 deletions(-) create mode 100644 tests/queries/0_stateless/02494_query_cache_query_log.reference create mode 100644 tests/queries/0_stateless/02494_query_cache_query_log.sql diff --git a/docs/en/operations/query-cache.md b/docs/en/operations/query-cache.md index bfa51650cd8..547105c65cc 100644 --- a/docs/en/operations/query-cache.md +++ b/docs/en/operations/query-cache.md @@ -62,8 +62,10 @@ may return cached results then. The query cache can be cleared using statement `SYSTEM DROP QUERY CACHE`. The content of the query cache is displayed in system table `system.query_cache`. The number of query cache hits and misses are shown as events "QueryCacheHits" and "QueryCacheMisses" in system table -`system.events`. Both counters are only updated for `SELECT` queries which run with setting "use_query_cache = true". Other queries do not -affect the cache miss counter. +[system.events](system-tables/events.md). Both counters are only updated for `SELECT` queries which run with setting "use_query_cache = +true". Other queries do not affect the cache miss counter. Field `query_log_usage` in system table +[system.query_log](system-tables/query_log.md) shows for each ran query whether the query result was written into or read from the query +cache. The query cache exists once per ClickHouse server process. However, cache results are by default not shared between users. This can be changed (see below) but doing so is not recommended for security reasons. diff --git a/docs/en/operations/system-tables/query_log.md b/docs/en/operations/system-tables/query_log.md index b9fdd19c643..b8dc0c0224c 100644 --- a/docs/en/operations/system-tables/query_log.md +++ b/docs/en/operations/system-tables/query_log.md @@ -111,6 +111,11 @@ Columns: - `used_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `functions`, which were used during query execution. - `used_storages` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `storages`, which were used during query execution. - `used_table_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `table functions`, which were used during query execution. +- `query_cache_usage` ([Enum8](../../sql-reference/data-types/enum.md)) — Usage of the [query cache](../query-cache.md) during query execution. Values: + - `'None' = 1` = The query result was neither written into nor read from the query cache. + - `'Write' = 1` = The query result was written into the query cache. + - `'Read' = 1` = The query result was read from the query cache. + - `'Unknown' = 1` = Unknown status. **Example** @@ -186,6 +191,7 @@ used_formats: [] used_functions: [] used_storages: [] used_table_functions: [] +query_cache_usage: None ``` **See Also** diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index 8d0f18cc305..c6596f50eda 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -605,7 +605,7 @@ try total_rows, total_bytes, key.query_str); bool pulling_pipeline = false; - logQueryFinish(query_log_elem, insert_context, key.query, pipeline, pulling_pipeline, query_span, internal); + logQueryFinish(query_log_elem, insert_context, key.query, pipeline, pulling_pipeline, query_span, QueryCache::Usage::None, internal); } catch (...) { diff --git a/src/Interpreters/Cache/QueryCache.h b/src/Interpreters/Cache/QueryCache.h index c24b09c8e46..973015b8003 100644 --- a/src/Interpreters/Cache/QueryCache.h +++ b/src/Interpreters/Cache/QueryCache.h @@ -24,6 +24,15 @@ bool astContainsNonDeterministicFunctions(ASTPtr ast, ContextPtr context); class QueryCache { public: + enum class Usage + { + /// starts at 1 for compatibitity with DataTypeEnum8 + None = 1, /// query result neither written nor read into/from query cache + Write, /// query result wrote into query cache + Read, /// query result read from query cache + Unknown, /// we don't know what what happened + }; + /// Represents a query result in the cache. struct Key { diff --git a/src/Interpreters/QueryLog.cpp b/src/Interpreters/QueryLog.cpp index ec0315c2f95..c3294512f14 100644 --- a/src/Interpreters/QueryLog.cpp +++ b/src/Interpreters/QueryLog.cpp @@ -41,6 +41,15 @@ NamesAndTypesList QueryLogElement::getNamesAndTypes() {"ExceptionWhileProcessing", static_cast(EXCEPTION_WHILE_PROCESSING)} }); + auto query_cache_usage_datatype = std::make_shared( + DataTypeEnum8::Values + { + {"None", static_cast(QueryCache::Usage::None)}, + {"Write", static_cast(QueryCache::Usage::Write)}, + {"Read", static_cast(QueryCache::Usage::Read)}, + {"Unknown", static_cast(QueryCache::Usage::Unknown)} + }); + auto low_cardinality_string = std::make_shared(std::make_shared()); auto array_low_cardinality_string = std::make_shared(low_cardinality_string); @@ -126,6 +135,8 @@ NamesAndTypesList QueryLogElement::getNamesAndTypes() {"transaction_id", getTransactionIDDataType()}, + {"query_cache_usage", std::move(query_cache_usage_datatype)}, + {"asynchronous_read_counters", std::make_shared(low_cardinality_string, std::make_shared())}, }; } @@ -277,6 +288,8 @@ void QueryLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insert(Tuple{tid.start_csn, tid.local_tid, tid.host_id}); + columns[i++]->insert(query_cache_usage); + if (async_read_counters) async_read_counters->dumpToMapColumn(columns[i++].get()); else diff --git a/src/Interpreters/QueryLog.h b/src/Interpreters/QueryLog.h index 570d1297239..5bc80280eac 100644 --- a/src/Interpreters/QueryLog.h +++ b/src/Interpreters/QueryLog.h @@ -4,8 +4,9 @@ #include #include #include -#include +#include #include +#include #include #include #include @@ -96,6 +97,8 @@ struct QueryLogElement TransactionID tid; + QueryCache::Usage query_cache_usage = QueryCache::Usage::Unknown; + static std::string name() { return "QueryLog"; } static NamesAndTypesList getNamesAndTypes(); diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 688d3b9967d..578ca3b41f9 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -209,7 +209,7 @@ static void logException(ContextPtr context, QueryLogElement & elem, bool log_er } static void -addStatusInfoToQueryElement(QueryLogElement & element, const QueryStatusInfo & info, const ASTPtr query_ast, const ContextPtr context_ptr) +addStatusInfoToQueryLogElement(QueryLogElement & element, const QueryStatusInfo & info, const ASTPtr query_ast, const ContextPtr context_ptr) { const auto time_now = std::chrono::system_clock::now(); UInt64 elapsed_microseconds = info.elapsed_microseconds; @@ -347,6 +347,7 @@ void logQueryFinish( const QueryPipeline & query_pipeline, bool pulling_pipeline, std::shared_ptr query_span, + QueryCache::Usage query_cache_usage, bool internal) { const Settings & settings = context->getSettingsRef(); @@ -364,7 +365,7 @@ void logQueryFinish( QueryStatusInfo info = process_list_elem->getInfo(true, context->getSettingsRef().log_profile_events); elem.type = QueryLogElementType::QUERY_FINISH; - addStatusInfoToQueryElement(elem, info, query_ast, context); + addStatusInfoToQueryLogElement(elem, info, query_ast, context); if (pulling_pipeline) { @@ -399,6 +400,8 @@ void logQueryFinish( ReadableSize(elem.read_bytes / elapsed_seconds)); } + elem.query_cache_usage = query_cache_usage; + if (log_queries && elem.type >= log_queries_min_type && static_cast(elem.query_duration_ms) >= log_queries_min_query_duration_ms) { @@ -499,13 +502,15 @@ void logQueryException( if (process_list_elem) { QueryStatusInfo info = process_list_elem->getInfo(true, settings.log_profile_events, false); - addStatusInfoToQueryElement(elem, info, query_ast, context); + addStatusInfoToQueryLogElement(elem, info, query_ast, context); } else { elem.query_duration_ms = start_watch.elapsedMilliseconds(); } + elem.query_cache_usage = QueryCache::Usage::None; + if (settings.calculate_text_stack_trace && log_error) setExceptionStackTrace(elem); logException(context, elem, log_error); @@ -975,7 +980,7 @@ static std::tuple executeQueryImpl( QueryCachePtr query_cache = context->getQueryCache(); const bool can_use_query_cache = query_cache != nullptr && settings.use_query_cache && !internal && (ast->as() || ast->as()); - bool write_into_query_cache = false; + QueryCache::Usage query_cache_usage = QueryCache::Usage::None; if (!async_insert) { @@ -992,6 +997,7 @@ static std::tuple executeQueryImpl( QueryPipeline pipeline; pipeline.readFromQueryCache(reader.getSource(), reader.getSourceTotals(), reader.getSourceExtremes()); res.pipeline = std::move(pipeline); + query_cache_usage = QueryCache::Usage::Read; return true; } } @@ -1095,7 +1101,7 @@ static std::tuple executeQueryImpl( settings.query_cache_max_size_in_bytes, settings.query_cache_max_entries)); res.pipeline.writeResultIntoQueryCache(query_cache_writer); - write_into_query_cache = true; + query_cache_usage = QueryCache::Usage::Write; } } @@ -1147,19 +1153,19 @@ static std::tuple executeQueryImpl( auto finish_callback = [elem, context, ast, - write_into_query_cache, + query_cache_usage, internal, implicit_txn_control, execute_implicit_tcl_query, pulling_pipeline = pipeline.pulling(), query_span](QueryPipeline & query_pipeline) mutable { - if (write_into_query_cache) + if (query_cache_usage == QueryCache::Usage::Write) /// Trigger the actual write of the buffered query result into the query cache. This is done explicitly to prevent /// partial/garbage results in case of exceptions during query execution. query_pipeline.finalizeWriteInQueryCache(); - logQueryFinish(elem, context, ast, query_pipeline, pulling_pipeline, query_span, internal); + logQueryFinish(elem, context, ast, query_pipeline, pulling_pipeline, query_span, query_cache_usage, internal); if (*implicit_txn_control) execute_implicit_tcl_query(context, ASTTransactionControl::COMMIT); diff --git a/src/Interpreters/executeQuery.h b/src/Interpreters/executeQuery.h index 53624f8c812..f2a12bbef18 100644 --- a/src/Interpreters/executeQuery.h +++ b/src/Interpreters/executeQuery.h @@ -92,6 +92,7 @@ void logQueryFinish( const QueryPipeline & query_pipeline, bool pulling_pipeline, std::shared_ptr query_span, + QueryCache::Usage query_cache_usage, bool internal); void logQueryException( diff --git a/tests/queries/0_stateless/02494_query_cache_query_log.reference b/tests/queries/0_stateless/02494_query_cache_query_log.reference new file mode 100644 index 00000000000..9037909d121 --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_query_log.reference @@ -0,0 +1,16 @@ +-- Run a query with query cache not enabled +124437993 +QueryStart SELECT 124437993; Unknown +QueryFinish SELECT 124437993; None +-- Run a query with query cache enabled +124437994 +QueryStart SELECT 124437994 SETTINGS use_query_cache = 1; Unknown +QueryFinish SELECT 124437994 SETTINGS use_query_cache = 1; Write +-- Run the same query with query cache enabled +124437994 +QueryStart SELECT 124437994 SETTINGS use_query_cache = 1; Unknown +QueryStart SELECT 124437994 SETTINGS use_query_cache = 1; Unknown +QueryFinish SELECT 124437994 SETTINGS use_query_cache = 1; Read +QueryFinish SELECT 124437994 SETTINGS use_query_cache = 1; Write +-- Throw exception with query cache enabled +SELECT 124437995, throwIf(1) SETTINGS use_query_cache = 1; None diff --git a/tests/queries/0_stateless/02494_query_cache_query_log.sql b/tests/queries/0_stateless/02494_query_cache_query_log.sql new file mode 100644 index 00000000000..79a8f4cb62b --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_query_log.sql @@ -0,0 +1,64 @@ +-- Tags: no-parallel +-- Tag no-parallel: Messes with internal cache + +SYSTEM DROP QUERY CACHE; + +-- DROP TABLE system.query_log; -- debugging + + + +SELECT '-- Run a query with query cache not enabled'; +SELECT 124437993; + +SYSTEM FLUSH LOGS; + +-- Field 'query_cache_usage' should be 'None' +SELECT type, query, query_cache_usage +FROM system.query_log +WHERE current_database = currentDatabase() + AND query = 'SELECT 124437993;' +ORDER BY type; + + + +SELECT '-- Run a query with query cache enabled'; +SELECT 124437994 SETTINGS use_query_cache = 1; + +SYSTEM FLUSH LOGS; + +-- Field 'query_cache_usage' should be 'Write' +SELECT type, query, query_cache_usage +FROM system.query_log +WHERE current_database = currentDatabase() + AND query = 'SELECT 124437994 SETTINGS use_query_cache = 1;' +ORDER BY type; + + + +SELECT '-- Run the same query with query cache enabled'; +SELECT 124437994 SETTINGS use_query_cache = 1; + +SYSTEM FLUSH LOGS; + +-- Field 'query_cache_usage' should be 'Read' +SELECT type, query, query_cache_usage +FROM system.query_log +WHERE current_database = currentDatabase() + AND query = 'SELECT 124437994 SETTINGS use_query_cache = 1;' +ORDER BY type; + + + +SELECT '-- Throw exception with query cache enabled'; +SELECT 124437995, throwIf(1) SETTINGS use_query_cache = 1; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } + +SYSTEM FLUSH LOGS; + +-- Field 'query_cache_usage' should be 'None' +SELECT query, query_cache_usage +FROM system.query_log +WHERE current_database = currentDatabase() + AND query = 'SELECT 124437995, throwIf(1) SETTINGS use_query_cache = 1;' + AND type = 'ExceptionWhileProcessing'; + +SYSTEM DROP QUERY CACHE; From 032956dd1eeca994d6fa5a66f974cfa10203c205 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 24 Jul 2023 18:42:02 +0200 Subject: [PATCH 365/478] fix --- .../MergeTree/ReplicatedMergeTreeQueue.cpp | 2 +- .../test.py | 27 ++++++++++--------- .../01111_create_drop_replicated_db_stress.sh | 2 +- 3 files changed, 17 insertions(+), 14 deletions(-) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 2393f45ebb6..e11913fc3d2 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -576,7 +576,7 @@ int32_t ReplicatedMergeTreeQueue::pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper /// It's ok if replica became readonly due to connection loss after we got current zookeeper (in this case zookeeper must be expired). /// And it's ok if replica became readonly after shutdown. /// In other cases it's likely that someone called pullLogsToQueue(...) when queue is not initialized yet by RestartingThread. - bool not_completely_initialized = storage.is_readonly && !zookeeper->expired() && !storage.shutdown_called; + bool not_completely_initialized = storage.is_readonly && !zookeeper->expired() && !storage.shutdown_prepared_called; if (not_completely_initialized) throw Exception(ErrorCodes::LOGICAL_ERROR, "Tried to pull logs to queue (reason: {}) on readonly replica {}, it's a bug", reason, storage.getStorageID().getNameForLogs()); diff --git a/tests/integration/test_replicated_merge_tree_wait_on_shutdown/test.py b/tests/integration/test_replicated_merge_tree_wait_on_shutdown/test.py index a2a4ec92cf7..20b6a6c977f 100644 --- a/tests/integration/test_replicated_merge_tree_wait_on_shutdown/test.py +++ b/tests/integration/test_replicated_merge_tree_wait_on_shutdown/test.py @@ -49,25 +49,28 @@ def test_shutdown_and_wait(start_cluster): node.stop_clickhouse(kill=False, stop_wait_sec=60) p = Pool(50) - pm = PartitionManager() - - pm.partition_instances(node1, node2) def insert(value): node1.query(f"INSERT INTO test_table VALUES ({value})") - p.map(insert, range(1, 50)) + with PartitionManager() as pm: + pm.partition_instances(node1, node2) + # iptables rules must be applied immediately, but looks like sometimes they are not... + time.sleep(3) - # Start shutdown async - waiter = p.apply_async(soft_shutdown, (node1,)) - # to be sure that shutdown started - time.sleep(5) + p.map(insert, range(1, 50)) - # node 2 partitioned and don't see any data - assert node2.query("SELECT * FROM test_table") == "0\n" + # Start shutdown async + waiter = p.apply_async(soft_shutdown, (node1,)) + # to be sure that shutdown started + time.sleep(5) + + # node 2 partitioned and don't see any data + assert node2.query("SELECT * FROM test_table") == "0\n" + + # Restore network + pm.heal_all() - # Restore network - pm.heal_all() # wait for shutdown to finish waiter.get() diff --git a/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh b/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh index cc63af3676b..59899e1c14a 100755 --- a/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh +++ b/tests/queries/0_stateless/01111_create_drop_replicated_db_stress.sh @@ -56,7 +56,7 @@ function create_table() if [ -z "$database" ]; then continue; fi $CLICKHOUSE_CLIENT --distributed_ddl_task_timeout=0 -q \ "create table $database.rmt_${RANDOM}_${RANDOM}_${RANDOM} (n int) engine=ReplicatedMergeTree order by tuple() -- suppress $CLICKHOUSE_TEST_ZOOKEEPER_PREFIX" \ - 2>&1| grep -Fa "Exception: " | grep -Fv "Macro 'uuid' and empty arguments" | grep -Fv "Cannot enqueue query" | grep -Fv "ZooKeeper session expired" | grep -Fv UNKNOWN_DATABASE + 2>&1| grep -Fa "Exception: " | grep -Fv "Macro 'uuid' and empty arguments" | grep -Fv "Cannot enqueue query" | grep -Fv "ZooKeeper session expired" | grep -Fv UNKNOWN_DATABASE | grep -Fv TABLE_IS_DROPPED sleep 0.$RANDOM done } From 22a2fa097f3795cb2a483e899482b97f80aa8189 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 24 Jul 2023 19:40:02 +0200 Subject: [PATCH 366/478] Improve error messages --- src/Functions/GregorianDate.cpp | 2 +- src/Functions/parseDateTime.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Functions/GregorianDate.cpp b/src/Functions/GregorianDate.cpp index aaaeeb7339d..f28194781c2 100644 --- a/src/Functions/GregorianDate.cpp +++ b/src/Functions/GregorianDate.cpp @@ -125,7 +125,7 @@ void GregorianDate::init(ReadBuffer & in) assertEOF(in); if (month_ < 1 || month_ > 12 || day_of_month_ < 1 || day_of_month_ > monthLength(is_leap_year(year_), month_)) - throw Exception(ErrorCodes::CANNOT_PARSE_DATE, "Invalid date"); + throw Exception(ErrorCodes::CANNOT_PARSE_DATE, "Invalid date, out of range (year: {}, month: {}, day_of_month: {})."); } bool GregorianDate::tryInit(ReadBuffer & in) diff --git a/src/Functions/parseDateTime.cpp b/src/Functions/parseDateTime.cpp index c3fbc08c4a9..2381def9151 100644 --- a/src/Functions/parseDateTime.cpp +++ b/src/Functions/parseDateTime.cpp @@ -398,7 +398,7 @@ namespace static Int32 daysSinceEpochFromDayOfYear(Int32 year_, Int32 day_of_year_) { if (!isDayOfYearValid(year_, day_of_year_)) - throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Invalid day of year, year:{} day of year:{}", year_, day_of_year_); + throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Invalid day of year, out of range (year: {} day of year: {})", year_, day_of_year_); Int32 res = daysSinceEpochFromDate(year_, 1, 1); res += day_of_year_ - 1; @@ -408,7 +408,7 @@ namespace static Int32 daysSinceEpochFromDate(Int32 year_, Int32 month_, Int32 day_) { if (!isDateValid(year_, month_, day_)) - throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Invalid date, year:{} month:{} day:{}", year_, month_, day_); + throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Invalid date, out of range (year: {} month: {} day_of_month: {})", year_, month_, day_); Int32 res = cumulativeYearDays[year_ - 1970]; res += isLeapYear(year_) ? cumulativeLeapDays[month_ - 1] : cumulativeDays[month_ - 1]; From 654af41431423907fdffed93287e9160f78698b9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 24 Jul 2023 19:45:55 +0200 Subject: [PATCH 367/478] Fix race --- src/Functions/transform.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/Functions/transform.cpp b/src/Functions/transform.cpp index 79168d82c54..e03701327b1 100644 --- a/src/Functions/transform.cpp +++ b/src/Functions/transform.cpp @@ -658,13 +658,13 @@ namespace std::unique_ptr table_string_to_idx; std::unique_ptr table_anything_to_idx; - bool is_empty = false; - ColumnPtr from_column; ColumnPtr to_column; ColumnPtr default_column; - std::atomic initialized{false}; + bool is_empty = false; + bool initialized = false; + std::mutex mutex; }; @@ -697,13 +697,12 @@ namespace /// Can be called from different threads. It works only on the first call. void initialize(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const { + std::lock_guard lock(cache.mutex); if (cache.initialized) return; const DataTypePtr & from_type = arguments[0].type; - std::lock_guard lock(cache.mutex); - if (from_type->onlyNull()) { cache.is_empty = true; From c35da36ff2b78dff5b964774673b8c713aa22e95 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Mon, 24 Jul 2023 19:50:53 +0200 Subject: [PATCH 368/478] Fix default value --- base/poco/Foundation/include/Poco/URI.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/base/poco/Foundation/include/Poco/URI.h b/base/poco/Foundation/include/Poco/URI.h index f4505147ced..eba8109253d 100644 --- a/base/poco/Foundation/include/Poco/URI.h +++ b/base/poco/Foundation/include/Poco/URI.h @@ -57,7 +57,7 @@ public: URI(); /// Creates an empty URI. - explicit URI(const std::string & uri, bool disable_url_encoding = true); + explicit URI(const std::string & uri, bool disable_url_encoding = false); /// Parses an URI from the given string. Throws a /// SyntaxException if the uri is not valid. @@ -362,7 +362,7 @@ private: std::string _query; std::string _fragment; - bool _disable_url_encoding = true; + bool _disable_url_encoding = false; }; From 2f99363db0356f146db427934b63e9158b7b9858 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 24 Jul 2023 20:51:53 +0300 Subject: [PATCH 369/478] Update 02136_scalar_subquery_metrics.sql --- tests/queries/0_stateless/02136_scalar_subquery_metrics.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02136_scalar_subquery_metrics.sql b/tests/queries/0_stateless/02136_scalar_subquery_metrics.sql index 180610288aa..17ff367a58d 100644 --- a/tests/queries/0_stateless/02136_scalar_subquery_metrics.sql +++ b/tests/queries/0_stateless/02136_scalar_subquery_metrics.sql @@ -6,7 +6,7 @@ SELECT '#02136_scalar_subquery_4', (SELECT max(number) FROM numbers(1000)) as n SYSTEM FLUSH LOGS; SELECT read_rows, query FROM system.query_log WHERE - event_date > yesterday() + event_date >= yesterday() AND type = 'QueryFinish' AND current_database == currentDatabase() AND query LIKE 'SELECT ''#02136_scalar_subquery_%' From ab086f15d09048deb30bef84d5d3e7e62fefd898 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Sat, 3 Jun 2023 21:09:25 +0200 Subject: [PATCH 370/478] try to push down more --- src/Processors/QueryPlan/JoinStep.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/QueryPlan/JoinStep.cpp b/src/Processors/QueryPlan/JoinStep.cpp index 2ff8f161e99..33fa7955e0d 100644 --- a/src/Processors/QueryPlan/JoinStep.cpp +++ b/src/Processors/QueryPlan/JoinStep.cpp @@ -54,7 +54,7 @@ QueryPipelineBuilderPtr JoinStep::updatePipeline(QueryPipelineBuilders pipelines bool JoinStep::allowPushDownToRight() const { - return join->pipelineType() == JoinPipelineType::YShaped; + return join->pipelineType() == JoinPipelineType::YShaped || join->pipelineType() == JoinPipelineType::FillRightFirst; } void JoinStep::describePipeline(FormatSettings & settings) const From b2acbe42b722f83c0ffde1c8697e5f19bb14747f Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Sat, 3 Jun 2023 21:15:19 +0200 Subject: [PATCH 371/478] add perf test --- tests/performance/join_filter_pushdown.xml | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 tests/performance/join_filter_pushdown.xml diff --git a/tests/performance/join_filter_pushdown.xml b/tests/performance/join_filter_pushdown.xml new file mode 100644 index 00000000000..3adbbb3029e --- /dev/null +++ b/tests/performance/join_filter_pushdown.xml @@ -0,0 +1,9 @@ + + create table t(a UInt64) engine=MergeTree order by tuple() + insert into t select * from numbers_mt(5e6) + + select * from t as t0 inner join t as t1 using(a) where t1.a = 100 + + drop table t + + From d0894532feff599d1e73acca1a9010a53a26b004 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Mon, 26 Jun 2023 21:17:34 +0200 Subject: [PATCH 372/478] fix --- src/Processors/QueryPlan/Optimizations/filterPushDown.cpp | 4 ++++ .../queries/0_stateless/02514_analyzer_drop_join_on.reference | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp index 4336de41b7b..af47b6ff4cd 100644 --- a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp +++ b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp @@ -341,6 +341,10 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes if (table_join.kind() != JoinKind::Inner && table_join.kind() != JoinKind::Cross && table_join.kind() != kind) return 0; + /// There is no ASOF Right join, so we're talking about pushing to the right side + if (kind == JoinKind::Right && table_join.strictness() == JoinStrictness::Asof) + return 0; + bool is_left = kind == JoinKind::Left; const auto & input_header = is_left ? child->getInputStreams().front().header : child->getInputStreams().back().header; const auto & res_header = child->getOutputStream().header; diff --git a/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference b/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference index 0037ab85c07..1b177b84afa 100644 --- a/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference +++ b/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference @@ -107,7 +107,7 @@ Header: bx String bx_0 String c2_5 String c1_3 UInt64 - Filter (( + (JOIN actions + DROP unused columns after JOIN))) + Expression Header: a2_6 String bx_0 String c2_5 String @@ -139,7 +139,7 @@ Header: bx String ReadFromMemoryStorage Header: b1 UInt64 b2 String - Expression ((JOIN actions + Change column names to column identifiers)) + Filter (( + (JOIN actions + Change column names to column identifiers))) Header: c1_3 UInt64 c2_5 String ReadFromMemoryStorage From 104d3bbbae82309d7d55d3a46a28e6f791791fba Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Mon, 24 Jul 2023 21:10:00 +0200 Subject: [PATCH 373/478] add test --- .../0_stateless/01763_filter_push_down_bugs.sql | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests/queries/0_stateless/01763_filter_push_down_bugs.sql b/tests/queries/0_stateless/01763_filter_push_down_bugs.sql index 9a5ef4727c5..8470b4a3379 100644 --- a/tests/queries/0_stateless/01763_filter_push_down_bugs.sql +++ b/tests/queries/0_stateless/01763_filter_push_down_bugs.sql @@ -66,3 +66,17 @@ EXPLAIN indexes=1 SELECT id, delete_time FROM t1 DROP TABLE IF EXISTS t1; DROP TABLE IF EXISTS t2; + +-- expected to get row (1, 3, 1, 4) from JOIN and empty result from the query +SELECT * +FROM +( + SELECT * + FROM Values('id UInt64, t UInt64', (1, 3)) +) AS t1 +ASOF INNER JOIN +( + SELECT * + FROM Values('id UInt64, t UInt64', (1, 1), (1, 2), (1, 3), (1, 4), (1, 5)) +) AS t2 ON (t1.id = t2.id) AND (t1.t < t2.t) +WHERE t2.t != 4; From edc479bbf8c72a7076b092dd880fc7d8d2252e4d Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 24 Jul 2023 23:40:27 +0200 Subject: [PATCH 374/478] fix --- src/Storages/StorageReplicatedMergeTree.cpp | 4 +++- src/Storages/StorageReplicatedMergeTree.h | 3 +++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 6cdcffab50a..e6431927805 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -4861,6 +4861,7 @@ void StorageReplicatedMergeTree::startupImpl(bool from_attach_thread) LOG_TRACE(log, "Waiting for RestartingThread to startup table"); } + std::lock_guard lock{flush_and_shutdown_mutex}; if (shutdown_prepared_called.load() || shutdown_called.load()) throw Exception(ErrorCodes::TABLE_IS_DROPPED, "Cannot startup table because it is dropped"); @@ -4906,6 +4907,7 @@ void StorageReplicatedMergeTree::startupImpl(bool from_attach_thread) void StorageReplicatedMergeTree::flushAndPrepareForShutdown() { + std::lock_guard lock{flush_and_shutdown_mutex}; if (shutdown_prepared_called.exchange(true)) return; @@ -4922,7 +4924,7 @@ void StorageReplicatedMergeTree::flushAndPrepareForShutdown() attach_thread->shutdown(); restarting_thread.shutdown(/* part_of_full_shutdown */true); - /// Explicetly set the event, because the restarting thread will not set it again + /// Explicitly set the event, because the restarting thread will not set it again startup_event.set(); shutdown_deadline.emplace(std::chrono::system_clock::now() + std::chrono::milliseconds(settings_ptr->wait_for_unique_parts_send_before_shutdown_ms.totalMilliseconds())); } diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 1c721e3724b..daa39536fa7 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -481,6 +481,9 @@ private: std::atomic shutdown_prepared_called {false}; std::optional shutdown_deadline; + /// We call flushAndPrepareForShutdown before acquiring DDLGuard, so we can shutdown a table that is being created right now + mutable std::mutex flush_and_shutdown_mutex; + mutable std::mutex last_sent_parts_mutex; std::condition_variable last_sent_parts_cv; From 21382afa2b2c686cde3ac0702b548d872373d3b1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 25 Jul 2023 06:10:04 +0200 Subject: [PATCH 375/478] Check for punctuation --- .../AggregateFunctionSimpleLinearRegression.cpp | 2 +- src/Common/ConcurrentBoundedQueue.h | 2 +- src/Common/parseRemoteDescription.cpp | 16 ++-------------- src/Common/parseRemoteDescription.h | 2 +- src/Common/tests/gtest_sensitive_data_masker.cpp | 6 +++--- src/Coordination/ZooKeeperDataReader.cpp | 2 +- src/Core/tests/gtest_settings.cpp | 3 +-- src/DataTypes/NumberTraits.h | 2 +- .../getDictionaryConfigurationFromAST.cpp | 2 +- src/Functions/FunctionsStringHash.cpp | 5 ++--- src/Functions/GatherUtils/sliceHasImplAnyAll.h | 4 ++-- src/IO/S3/PocoHTTPClient.cpp | 2 +- src/Interpreters/Aggregator.h | 2 +- src/Interpreters/DDLWorker.cpp | 2 +- src/Interpreters/InterpreterRenameQuery.cpp | 2 +- src/Interpreters/TransactionLog.cpp | 2 +- src/Parsers/Kusto/ParserKQLOperators.h | 16 ++++++++-------- src/Parsers/tests/gtest_Parser.cpp | 4 ++-- .../Formats/Impl/ArrowFieldIndexUtil.h | 2 +- .../Formats/Impl/JSONEachRowRowInputFormat.cpp | 4 ++-- .../QueryPlan/IntersectOrExceptStep.cpp | 2 +- .../Transforms/buildPushingToViewsChain.cpp | 2 +- src/Server/HTTPHandler.cpp | 2 +- src/Storages/StorageFile.cpp | 4 ++-- src/Storages/StorageProxy.h | 2 +- src/Storages/StorageReplicatedMergeTree.cpp | 2 +- src/TableFunctions/TableFunctionFactory.cpp | 2 +- utils/check-style/check-style | 3 +++ 28 files changed, 45 insertions(+), 56 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.cpp b/src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.cpp index 1ed6c83af7d..1489db55857 100644 --- a/src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.cpp +++ b/src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.cpp @@ -66,7 +66,7 @@ AggregateFunctionPtr createAggregateFunctionSimpleLinearRegression( #undef FOR_LEASTSQR_TYPES #undef DISPATCH - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT , + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal types ({}, {}) of arguments of aggregate function {}, must " "be Native Ints, Native UInts or Floats", x_arg->getName(), y_arg->getName(), name); } diff --git a/src/Common/ConcurrentBoundedQueue.h b/src/Common/ConcurrentBoundedQueue.h index 01910c4caff..922607da813 100644 --- a/src/Common/ConcurrentBoundedQueue.h +++ b/src/Common/ConcurrentBoundedQueue.h @@ -110,7 +110,7 @@ public: /// Returns false if queue is finished [[nodiscard]] bool pushFront(const T & x) { - return emplaceImpl(/* timeout_milliseconds= */ std::nullopt , x); + return emplaceImpl(/* timeout_milliseconds= */ std::nullopt, x); } /// Returns false if queue is finished diff --git a/src/Common/parseRemoteDescription.cpp b/src/Common/parseRemoteDescription.cpp index 0bcd62d30c7..8ea3f4a0aa5 100644 --- a/src/Common/parseRemoteDescription.cpp +++ b/src/Common/parseRemoteDescription.cpp @@ -52,20 +52,8 @@ static bool parseNumber(const String & description, size_t l, size_t r, size_t & } -/* Parse a string that generates shards and replicas. Separator - one of two characters | or , - * depending on whether shards or replicas are generated. - * For example: - * host1,host2,... - generates set of shards from host1, host2, ... - * host1|host2|... - generates set of replicas from host1, host2, ... - * abc{8..10}def - generates set of shards abc8def, abc9def, abc10def. - * abc{08..10}def - generates set of shards abc08def, abc09def, abc10def. - * abc{x,yy,z}def - generates set of shards abcxdef, abcyydef, abczdef. - * abc{x|yy|z} def - generates set of replicas abcxdef, abcyydef, abczdef. - * abc{1..9}de{f,g,h} - is a direct product, 27 shards. - * abc{1..9}de{0|1} - is a direct product, 9 shards, in each 2 replicas. - */ -std::vector -parseRemoteDescription(const String & description, size_t l, size_t r, char separator, size_t max_addresses, const String & func_name) +std::vector parseRemoteDescription( + const String & description, size_t l, size_t r, char separator, size_t max_addresses, const String & func_name) { std::vector res; std::vector cur; diff --git a/src/Common/parseRemoteDescription.h b/src/Common/parseRemoteDescription.h index e3e4a3f523c..d97558c4728 100644 --- a/src/Common/parseRemoteDescription.h +++ b/src/Common/parseRemoteDescription.h @@ -3,7 +3,7 @@ #include namespace DB { -/* Parse a string that generates shards and replicas. Separator - one of two characters | or , +/* Parse a string that generates shards and replicas. Separator - one of two characters '|' or ',' * depending on whether shards or replicas are generated. * For example: * host1,host2,... - generates set of shards from host1, host2, ... diff --git a/src/Common/tests/gtest_sensitive_data_masker.cpp b/src/Common/tests/gtest_sensitive_data_masker.cpp index 92c4edbac2a..f36c4154684 100644 --- a/src/Common/tests/gtest_sensitive_data_masker.cpp +++ b/src/Common/tests/gtest_sensitive_data_masker.cpp @@ -27,7 +27,7 @@ TEST(Common, SensitiveDataMasker) { Poco::AutoPtr empty_xml_config = new Poco::Util::XMLConfiguration(); - DB::SensitiveDataMasker masker(*empty_xml_config , ""); + DB::SensitiveDataMasker masker(*empty_xml_config, ""); masker.addMaskingRule("all a letters", "a+", "--a--"); masker.addMaskingRule("all b letters", "b+", "--b--"); masker.addMaskingRule("all d letters", "d+", "--d--"); @@ -45,7 +45,7 @@ TEST(Common, SensitiveDataMasker) masker.printStats(); #endif - DB::SensitiveDataMasker masker2(*empty_xml_config , ""); + DB::SensitiveDataMasker masker2(*empty_xml_config, ""); masker2.addMaskingRule("hide root password", "qwerty123", "******"); masker2.addMaskingRule("hide SSN", "[0-9]{3}-[0-9]{2}-[0-9]{4}", "000-00-0000"); masker2.addMaskingRule("hide email", "[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,4}", "hidden@hidden.test"); @@ -58,7 +58,7 @@ TEST(Common, SensitiveDataMasker) "SELECT id FROM mysql('localhost:3308', 'database', 'table', 'root', '******') WHERE " "ssn='000-00-0000' or email='hidden@hidden.test'"); - DB::SensitiveDataMasker maskerbad(*empty_xml_config , ""); + DB::SensitiveDataMasker maskerbad(*empty_xml_config, ""); // gtest has not good way to check exception content, so just do it manually (see https://github.com/google/googletest/issues/952 ) try diff --git a/src/Coordination/ZooKeeperDataReader.cpp b/src/Coordination/ZooKeeperDataReader.cpp index 94fc07bcc4a..79929c4e66e 100644 --- a/src/Coordination/ZooKeeperDataReader.cpp +++ b/src/Coordination/ZooKeeperDataReader.cpp @@ -40,7 +40,7 @@ void deserializeSnapshotMagic(ReadBuffer & in) Coordination::read(dbid, in); static constexpr int32_t SNP_HEADER = 1514885966; /// "ZKSN" if (magic_header != SNP_HEADER) - throw Exception(ErrorCodes::CORRUPTED_DATA ,"Incorrect magic header in file, expected {}, got {}", SNP_HEADER, magic_header); + throw Exception(ErrorCodes::CORRUPTED_DATA, "Incorrect magic header in file, expected {}, got {}", SNP_HEADER, magic_header); } int64_t deserializeSessionAndTimeout(KeeperStorage & storage, ReadBuffer & in) diff --git a/src/Core/tests/gtest_settings.cpp b/src/Core/tests/gtest_settings.cpp index cbeb84ef2e7..a6d8763bfb8 100644 --- a/src/Core/tests/gtest_settings.cpp +++ b/src/Core/tests/gtest_settings.cpp @@ -121,7 +121,7 @@ GTEST_TEST(SettingMySQLDataTypesSupport, SetString) ASSERT_EQ(Field("decimal,datetime64"), setting); // comma with spaces - setting = " datetime64 , decimal "; + setting = " datetime64 , decimal "; /// bad punctuation is ok here ASSERT_TRUE(setting.changed); ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DECIMAL)); ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DATETIME64)); @@ -166,4 +166,3 @@ GTEST_TEST(SettingMySQLDataTypesSupport, SetInvalidString) ASSERT_TRUE(setting.changed); ASSERT_EQ(0, setting.value.getValue()); } - diff --git a/src/DataTypes/NumberTraits.h b/src/DataTypes/NumberTraits.h index 6b068b0d8b1..cf283d3358c 100644 --- a/src/DataTypes/NumberTraits.h +++ b/src/DataTypes/NumberTraits.h @@ -174,7 +174,7 @@ template struct ResultOfBitNot * Float, [U]Int -> Float * Decimal, Decimal -> Decimal * UUID, UUID -> UUID - * UInt64 , Int -> Error + * UInt64, Int -> Error * Float, [U]Int64 -> Error */ template diff --git a/src/Dictionaries/getDictionaryConfigurationFromAST.cpp b/src/Dictionaries/getDictionaryConfigurationFromAST.cpp index 0b7352e9cbb..b12ffc555d4 100644 --- a/src/Dictionaries/getDictionaryConfigurationFromAST.cpp +++ b/src/Dictionaries/getDictionaryConfigurationFromAST.cpp @@ -322,7 +322,7 @@ void buildSingleAttribute( /** Transforms - * PRIMARY KEY Attr1 ,..., AttrN + * PRIMARY KEY Attr1, ..., AttrN * to the next configuration * Attr1 * or diff --git a/src/Functions/FunctionsStringHash.cpp b/src/Functions/FunctionsStringHash.cpp index d6873d9490e..ff8ff2d2651 100644 --- a/src/Functions/FunctionsStringHash.cpp +++ b/src/Functions/FunctionsStringHash.cpp @@ -292,8 +292,8 @@ struct SimHashImpl continue; // we need to store the new word hash value to the oldest location. - // for example, N = 5, array |a0|a1|a2|a3|a4|, now , a0 is the oldest location, - // so we need to store new word hash into location of a0, then ,this array become + // for example, N = 5, array |a0|a1|a2|a3|a4|, now, a0 is the oldest location, + // so we need to store new word hash into location of a0, then this array become // |a5|a1|a2|a3|a4|, next time, a1 become the oldest location, we need to store new // word hash value into location of a1, then array become |a5|a6|a2|a3|a4| words[offset] = BytesRef{word_start, length}; @@ -793,4 +793,3 @@ REGISTER_FUNCTION(StringHash) factory.registerFunction(); } } - diff --git a/src/Functions/GatherUtils/sliceHasImplAnyAll.h b/src/Functions/GatherUtils/sliceHasImplAnyAll.h index 21c80b742fd..99bf1a7cc33 100644 --- a/src/Functions/GatherUtils/sliceHasImplAnyAll.h +++ b/src/Functions/GatherUtils/sliceHasImplAnyAll.h @@ -375,14 +375,14 @@ bool sliceHasImplAnyAllImplInt16( _mm256_or_si256( _mm256_andnot_si256( _mm256_shuffle_epi8(_mm256_permute2x128_si256(first_nm_mask, first_nm_mask, 1), _mm256_set_epi8(7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8)), - _mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(_mm256_permute2x128_si256(first_data ,first_data, 1), _mm256_set_epi8(7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8)))), + _mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(_mm256_permute2x128_si256(first_data, first_data, 1), _mm256_set_epi8(7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8)))), _mm256_andnot_si256( _mm256_shuffle_epi8(_mm256_permute2x128_si256(first_nm_mask, first_nm_mask, 1), _mm256_set_epi8(5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6)), _mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(_mm256_permute2x128_si256(first_data, first_data, 1), _mm256_set_epi8(5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6))))), _mm256_or_si256( _mm256_andnot_si256( _mm256_shuffle_epi8(_mm256_permute2x128_si256(first_nm_mask, first_nm_mask, 1), _mm256_set_epi8(3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4)), - _mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(_mm256_permute2x128_si256(first_data ,first_data ,1), _mm256_set_epi8(3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4)))), + _mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(_mm256_permute2x128_si256(first_data, first_data, 1), _mm256_set_epi8(3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4)))), _mm256_andnot_si256( _mm256_shuffle_epi8(_mm256_permute2x128_si256(first_nm_mask, first_nm_mask, 1), _mm256_set_epi8(1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2)), _mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(_mm256_permute2x128_si256(first_data, first_data, 1), _mm256_set_epi8(1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2)))))) diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp index 1a367a8199d..fd825720ac9 100644 --- a/src/IO/S3/PocoHTTPClient.cpp +++ b/src/IO/S3/PocoHTTPClient.cpp @@ -258,7 +258,7 @@ void PocoHTTPClient::addMetric(const Aws::Http::HttpRequest & request, S3MetricT void PocoHTTPClient::makeRequestInternal( Aws::Http::HttpRequest & request, std::shared_ptr & response, - Aws::Utils::RateLimits::RateLimiterInterface * readLimiter , + Aws::Utils::RateLimits::RateLimiterInterface * readLimiter, Aws::Utils::RateLimits::RateLimiterInterface * writeLimiter) const { /// Most sessions in pool are already connected and it is not possible to set proxy host/port to a connected session. diff --git a/src/Interpreters/Aggregator.h b/src/Interpreters/Aggregator.h index 05b34e8460f..29096a38be6 100644 --- a/src/Interpreters/Aggregator.h +++ b/src/Interpreters/Aggregator.h @@ -292,7 +292,7 @@ struct AggregationMethodStringNoCache { } - using State = ColumnsHashing::HashMethodString; + using State = ColumnsHashing::HashMethodString; static const bool low_cardinality_optimization = false; static const bool one_key_nullable_optimization = nullable; diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp index 193bb5b6ab0..92e6bcb326c 100644 --- a/src/Interpreters/DDLWorker.cpp +++ b/src/Interpreters/DDLWorker.cpp @@ -551,7 +551,7 @@ void DDLWorker::processTask(DDLTaskBase & task, const ZooKeeperPtr & zookeeper) chassert(!task.completely_processed); /// Setup tracing context on current thread for current DDL - OpenTelemetry::TracingContextHolder tracing_ctx_holder(__PRETTY_FUNCTION__ , + OpenTelemetry::TracingContextHolder tracing_ctx_holder(__PRETTY_FUNCTION__, task.entry.tracing_context, this->context->getOpenTelemetrySpanLog()); tracing_ctx_holder.root_span.kind = OpenTelemetry::CONSUMER; diff --git a/src/Interpreters/InterpreterRenameQuery.cpp b/src/Interpreters/InterpreterRenameQuery.cpp index 75d43b541e1..ae79b3f932e 100644 --- a/src/Interpreters/InterpreterRenameQuery.cpp +++ b/src/Interpreters/InterpreterRenameQuery.cpp @@ -193,7 +193,7 @@ AccessRightsElements InterpreterRenameQuery::getRequiredAccess(InterpreterRename required_access.emplace_back(AccessType::CREATE_TABLE | AccessType::INSERT, elem.to.getDatabase(), elem.to.getTable()); if (rename.exchange) { - required_access.emplace_back(AccessType::CREATE_TABLE | AccessType::INSERT , elem.from.getDatabase(), elem.from.getTable()); + required_access.emplace_back(AccessType::CREATE_TABLE | AccessType::INSERT, elem.from.getDatabase(), elem.from.getTable()); required_access.emplace_back(AccessType::SELECT | AccessType::DROP_TABLE, elem.to.getDatabase(), elem.to.getTable()); } } diff --git a/src/Interpreters/TransactionLog.cpp b/src/Interpreters/TransactionLog.cpp index 6257e617d4a..2ef4f4d6218 100644 --- a/src/Interpreters/TransactionLog.cpp +++ b/src/Interpreters/TransactionLog.cpp @@ -482,7 +482,7 @@ CSN TransactionLog::finalizeCommittedTransaction(MergeTreeTransaction * txn, CSN bool removed = running_list.erase(txn->tid.getHash()); if (!removed) { - LOG_ERROR(log , "I's a bug: TID {} {} doesn't exist", txn->tid.getHash(), txn->tid); + LOG_ERROR(log, "It's a bug: TID {} {} doesn't exist", txn->tid.getHash(), txn->tid); abort(); } } diff --git a/src/Parsers/Kusto/ParserKQLOperators.h b/src/Parsers/Kusto/ParserKQLOperators.h index 9796ae10c07..72e25cc3cf9 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.h +++ b/src/Parsers/Kusto/ParserKQLOperators.h @@ -31,10 +31,10 @@ protected: not_endswith, endswith_cs, not_endswith_cs, - equal, //=~ - not_equal,//!~ - equal_cs, //= - not_equal_cs,//!= + equal, /// =~ + not_equal, /// !~ + equal_cs, /// = + not_equal_cs, /// != has, not_has, has_all, @@ -49,10 +49,10 @@ protected: not_hassuffix, hassuffix_cs, not_hassuffix_cs, - in_cs, //in - not_in_cs, //!in - in, //in~ - not_in ,//!in~ + in_cs, /// in + not_in_cs, /// !in + in, /// in~ + not_in, /// !in~ matches_regex, startswith, not_startswith, diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index d77ae8d3a27..18e91c533e0 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -359,11 +359,11 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserTest, "SELECT *\nFROM Customers\nORDER BY LastName DESC" }, { - "Customers | order by Age desc , FirstName asc ", + "Customers | order by Age desc, FirstName asc ", "SELECT *\nFROM Customers\nORDER BY\n Age DESC,\n FirstName ASC" }, { - "Customers | order by Age asc , FirstName desc", + "Customers | order by Age asc, FirstName desc", "SELECT *\nFROM Customers\nORDER BY\n Age ASC,\n FirstName DESC" }, { diff --git a/src/Processors/Formats/Impl/ArrowFieldIndexUtil.h b/src/Processors/Formats/Impl/ArrowFieldIndexUtil.h index b7adaa35335..676ce50d04f 100644 --- a/src/Processors/Formats/Impl/ArrowFieldIndexUtil.h +++ b/src/Processors/Formats/Impl/ArrowFieldIndexUtil.h @@ -35,7 +35,7 @@ public: /// - key: field name with full path. eg. a struct field's name is like a.x.i /// - value: a pair, first value refers to this field's start index, second value refers to how many /// indices this field take. eg. - /// For a parquet schema {x: int , y: {i: int, j: int}}, the return will be + /// For a parquet schema {x: int, y: {i: int, j: int}}, the return will be /// - x: (0, 1) /// - y: (1, 2) /// - y.i: (1, 1) diff --git a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp index e5f52936021..b1b08cdf256 100644 --- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp @@ -236,10 +236,10 @@ bool JSONEachRowRowInputFormat::readRow(MutableColumns & columns, RowReadExtensi bool JSONEachRowRowInputFormat::checkEndOfData(bool is_first_row) { - /// We consume , or \n before scanning a new row, instead scanning to next row at the end. + /// We consume ',' or '\n' before scanning a new row, instead scanning to next row at the end. /// The reason is that if we want an exact number of rows read with LIMIT x /// from a streaming table engine with text data format, like File or Kafka - /// then seeking to next ;, or \n would trigger reading of an extra row at the end. + /// then seeking to next ';,' or '\n' would trigger reading of an extra row at the end. /// Semicolon is added for convenience as it could be used at end of INSERT query. if (!in->eof()) diff --git a/src/Processors/QueryPlan/IntersectOrExceptStep.cpp b/src/Processors/QueryPlan/IntersectOrExceptStep.cpp index afdff44020f..b132d27670d 100644 --- a/src/Processors/QueryPlan/IntersectOrExceptStep.cpp +++ b/src/Processors/QueryPlan/IntersectOrExceptStep.cpp @@ -30,7 +30,7 @@ static Block checkHeaders(const DataStreams & input_streams_) } IntersectOrExceptStep::IntersectOrExceptStep( - DataStreams input_streams_ , Operator operator_ , size_t max_threads_) + DataStreams input_streams_, Operator operator_, size_t max_threads_) : header(checkHeaders(input_streams_)) , current_operator(operator_) , max_threads(max_threads_) diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 7f7f9058f1b..1b20778877d 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -72,7 +72,7 @@ struct ViewsData std::atomic_bool has_exception = false; std::exception_ptr first_exception; - ViewsData(ThreadStatusesHolderPtr thread_status_holder_, ContextPtr context_, StorageID source_storage_id_, StorageMetadataPtr source_metadata_snapshot_ , StoragePtr source_storage_) + ViewsData(ThreadStatusesHolderPtr thread_status_holder_, ContextPtr context_, StorageID source_storage_id_, StorageMetadataPtr source_metadata_snapshot_, StoragePtr source_storage_) : thread_status_holder(std::move(thread_status_holder_)) , context(std::move(context_)) , source_storage_id(std::move(source_storage_id_)) diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index 069670c84a5..29b75fa6552 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -638,7 +638,7 @@ void HTTPHandler::processQuery( throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected MemoryWriteBuffer"); auto rdbuf = prev_memory_buffer->tryGetReadBuffer(); - copyData(*rdbuf , *next_buffer); + copyData(*rdbuf, *next_buffer); return next_buffer; }; diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index cbd32460f7e..3126d584964 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -242,8 +242,8 @@ void listFilesWithRegexpMatchingImpl( { if (recursive) { - listFilesWithRegexpMatchingImpl(fs::path(full_path).append(it->path().string()) / "" , - looking_for_directory ? suffix_with_globs.substr(next_slash_after_glob_pos) : current_glob , + listFilesWithRegexpMatchingImpl(fs::path(full_path).append(it->path().string()) / "", + looking_for_directory ? suffix_with_globs.substr(next_slash_after_glob_pos) : current_glob, total_bytes_to_read, result, recursive); } else if (looking_for_directory && re2::RE2::FullMatch(file_name, matcher)) diff --git a/src/Storages/StorageProxy.h b/src/Storages/StorageProxy.h index 14b7fc15af2..582dc6f882d 100644 --- a/src/Storages/StorageProxy.h +++ b/src/Storages/StorageProxy.h @@ -149,7 +149,7 @@ public: return getNested()->mayBenefitFromIndexForIn(left_in_operand, query_context, metadata_snapshot); } - CheckResults checkData(const ASTPtr & query , ContextPtr context) override { return getNested()->checkData(query, context); } + CheckResults checkData(const ASTPtr & query, ContextPtr context) override { return getNested()->checkData(query, context); } void checkTableCanBeDropped() const override { getNested()->checkTableCanBeDropped(); } bool storesDataOnDisk() const override { return getNested()->storesDataOnDisk(); } Strings getDataPaths() const override { return getNested()->getDataPaths(); } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 4e053c4598c..c3dedd69d0d 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -6569,7 +6569,7 @@ void StorageReplicatedMergeTree::fetchPartition( try { - /// part name , metadata, part_path , true, 0, zookeeper + /// part name, metadata, part_path, true, 0, zookeeper if (!fetchPart(part_name, metadata_snapshot, from_zookeeper_name, part_path, true, 0, zookeeper, /* try_fetch_shared = */ false)) throw Exception(ErrorCodes::UNFINISHED, "Failed to fetch part {} from {}", part_name, from_); } diff --git a/src/TableFunctions/TableFunctionFactory.cpp b/src/TableFunctions/TableFunctionFactory.cpp index 76108f1cdd4..ce3daff0785 100644 --- a/src/TableFunctions/TableFunctionFactory.cpp +++ b/src/TableFunctions/TableFunctionFactory.cpp @@ -41,7 +41,7 @@ TableFunctionPtr TableFunctionFactory::get( { auto hints = getHints(table_function->name); if (!hints.empty()) - throw Exception(ErrorCodes::UNKNOWN_FUNCTION, "Unknown table function {}. Maybe you meant: {}", table_function->name , toString(hints)); + throw Exception(ErrorCodes::UNKNOWN_FUNCTION, "Unknown table function {}. Maybe you meant: {}", table_function->name, toString(hints)); else throw Exception(ErrorCodes::UNKNOWN_FUNCTION, "Unknown table function {}", table_function->name); } diff --git a/utils/check-style/check-style b/utils/check-style/check-style index 0b3b86b4772..c28ca1cfc8a 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -410,3 +410,6 @@ find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep # The stateful directory should only contain the tests that depend on the test dataset (hits or visits). find $ROOT_PATH/tests/queries/1_stateful -name '*.sql' -or -name '*.sh' | grep -v '00076_system_columns_bytes' | xargs -I{} bash -c 'grep -q -P "hits|visits" "{}" || echo "The test {} does not depend on the test dataset (hits or visits table) and should be located in the 0_stateless directory. You can also add an exception to the check-style script."' + +# Check for bad punctuation: whitespace before comma. +find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep -P --line-number '\w ,' | grep -v 'bad punctuation is ok here' && echo "^ There is bad punctuation: whitespace before comma. You should write it like this: 'Hello, world!'" From 3e3adc7fecd5f6c409320727bec3a0291aa2430b Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 25 Jul 2023 10:29:59 +0200 Subject: [PATCH 376/478] tests: increase throttling for 01923_network_receive_time_metric_insert In debug builds launching the client can take a while, so let's increase the throttling to avoid flakiness CI: https://s3.amazonaws.com/clickhouse-test-reports/52490/9e2526a5f04861fcfac49c2ce85560d08c68af66/stateless_tests__debug__[1_5].html Signed-off-by: Azat Khuzhin --- .../0_stateless/01923_network_receive_time_metric_insert.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01923_network_receive_time_metric_insert.sh b/tests/queries/0_stateless/01923_network_receive_time_metric_insert.sh index ec5aa141859..4d7e79fae52 100755 --- a/tests/queries/0_stateless/01923_network_receive_time_metric_insert.sh +++ b/tests/queries/0_stateless/01923_network_receive_time_metric_insert.sh @@ -9,7 +9,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) ${CLICKHOUSE_CLIENT} --multiquery --query "DROP TABLE IF EXISTS t; CREATE TABLE t (x UInt64) ENGINE = Memory;" # Rate limit is chosen for operation to spent more than one second. -seq 1 1000 | pv --quiet --rate-limit 1000 | ${CLICKHOUSE_CLIENT} --query "INSERT INTO t FORMAT TSV" +seq 1 1000 | pv --quiet --rate-limit 500 | ${CLICKHOUSE_CLIENT} --query "INSERT INTO t FORMAT TSV" # We check that the value of NetworkReceiveElapsedMicroseconds correctly includes the time spent waiting data from the client. ${CLICKHOUSE_CLIENT} --multiquery --query "SYSTEM FLUSH LOGS; From b02e290d5507419e6166433b0a045eaeb3d124d9 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 25 Jul 2023 10:37:02 +0200 Subject: [PATCH 377/478] tests: fix 01035_avg_weighted_long flakiness Use one clickhouse-client invocation instead of 300, in debug builds it is significant - each spawn is ~1 second Signed-off-by: Azat Khuzhin --- .../0_stateless/01035_avg_weighted_long.sh | 48 +++++++++---------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/tests/queries/0_stateless/01035_avg_weighted_long.sh b/tests/queries/0_stateless/01035_avg_weighted_long.sh index 138aa03fbb3..8838b07a3d7 100755 --- a/tests/queries/0_stateless/01035_avg_weighted_long.sh +++ b/tests/queries/0_stateless/01035_avg_weighted_long.sh @@ -11,36 +11,36 @@ ${CLICKHOUSE_CLIENT} --query="SELECT avgWeighted(x, y) FROM (select toDecimal256 ${CLICKHOUSE_CLIENT} --query="SELECT avgWeighted(x, y) FROM (select toDecimal32(1, 0) x, toDecimal256(1, 1) y);" types=("Int8" "Int16" "Int32" "Int64" "UInt8" "UInt16" "UInt32" "UInt64" "Float32" "Float64") - -for left in "${types[@]}" -do - for right in "${types[@]}" - do - ${CLICKHOUSE_CLIENT} --query="SELECT avgWeighted(x, w) FROM values('x ${left}, w ${right}', (4, 1), (1, 0), (10, 2))" - ${CLICKHOUSE_CLIENT} --query="SELECT avgWeighted(x, w) FROM values('x ${left}, w ${right}', (0, 0), (1, 0))" - done -done - exttypes=("Int128" "Int256" "UInt256") - -for left in "${exttypes[@]}" -do - for right in "${exttypes[@]}" - do - ${CLICKHOUSE_CLIENT} --query="SELECT avgWeighted(to${left}(1), to${right}(2))" - done -done - # Decimal types dtypes=("32" "64" "128" "256") -for left in "${dtypes[@]}" -do - for right in "${dtypes[@]}" +( + for left in "${types[@]}" do - ${CLICKHOUSE_CLIENT} --query="SELECT avgWeighted(toDecimal${left}(2, 4), toDecimal${right}(1, 4))" + for right in "${types[@]}" + do + echo "SELECT avgWeighted(x, w) FROM values('x ${left}, w ${right}', (4, 1), (1, 0), (10, 2));" + echo "SELECT avgWeighted(x, w) FROM values('x ${left}, w ${right}', (0, 0), (1, 0));" + done done -done + + for left in "${exttypes[@]}" + do + for right in "${exttypes[@]}" + do + echo "SELECT avgWeighted(to${left}(1), to${right}(2));" + done + done + + for left in "${dtypes[@]}" + do + for right in "${dtypes[@]}" + do + echo "SELECT avgWeighted(toDecimal${left}(2, 4), toDecimal${right}(1, 4));" + done + done +) | clickhouse-client -nm echo "$(${CLICKHOUSE_CLIENT} --server_logs_file=/dev/null --query="SELECT avgWeighted(['string'], toFloat64(0))" 2>&1)" \ | grep -c 'Code: 43. DB::Exception: .* DB::Exception:.* Types .* are non-conforming as arguments for aggregate function avgWeighted' From 2efbeab5afe50fbd734a6729e4cffa7ef12fff04 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 25 Jul 2023 10:43:29 +0200 Subject: [PATCH 378/478] tests: fix 00719_parallel_ddl_table flakiness in debug builds In debug bulds each client invocation takes ~1 second, and on CI it can take more if the node is under some load, so let's decrease number of iterations. Anyway CI runs each test ~1K times daily, and if there will be something even this number of iterations should be enough. Signed-off-by: Azat Khuzhin --- tests/queries/0_stateless/00719_parallel_ddl_table.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/00719_parallel_ddl_table.sh b/tests/queries/0_stateless/00719_parallel_ddl_table.sh index fdc994aec33..57a7e228341 100755 --- a/tests/queries/0_stateless/00719_parallel_ddl_table.sh +++ b/tests/queries/0_stateless/00719_parallel_ddl_table.sh @@ -10,7 +10,7 @@ ${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS parallel_ddl" function query() { - for _ in {1..100}; do + for _ in {1..50}; do ${CLICKHOUSE_CLIENT} --query "CREATE TABLE IF NOT EXISTS parallel_ddl(a Int) ENGINE = Memory" ${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS parallel_ddl" done From d500e75569c59d1f91ae3de9c43f24f2be703e21 Mon Sep 17 00:00:00 2001 From: zvonand Date: Tue, 25 Jul 2023 12:07:47 +0200 Subject: [PATCH 379/478] fix --- src/Functions/FunctionToDecimalString.h | 67 ++----------------- .../0_stateless/02676_to_decimal_string.sql | 6 ++ 2 files changed, 13 insertions(+), 60 deletions(-) diff --git a/src/Functions/FunctionToDecimalString.h b/src/Functions/FunctionToDecimalString.h index 6ae007e6b66..68ad978632e 100644 --- a/src/Functions/FunctionToDecimalString.h +++ b/src/Functions/FunctionToDecimalString.h @@ -22,6 +22,7 @@ namespace ErrorCodes extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int ILLEGAL_COLUMN; extern const int CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } class FunctionToDecimalString : public IFunction @@ -36,17 +37,14 @@ public: size_t getNumberOfArguments() const override { return 2; } - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - if (!isNumber(*arguments[0])) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Illegal first argument for formatDecimal function: got {}, expected numeric type", - arguments[0]->getName()); + FunctionArgumentDescriptors mandatory_args = { + {"Value", nullptr, nullptr, nullptr}, + {"precision", &isNativeInteger, &isColumnConst, "const Integer [0-77]"} + }; - if (!isUInt8(*arguments[1])) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Illegal second argument for formatDecimal function: got {}, expected UInt8", - arguments[1]->getName()); + validateFunctionArgumentTypes(*this, arguments, mandatory_args, {}); return std::make_shared(); } @@ -98,29 +96,6 @@ private: buf_to.finalize(); } - template - void constantVector(const FirstArgType & value_from, const ColumnVector::Container & vec_precision, - ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets) const - { - size_t input_rows_count = vec_precision.size(); - result_offsets.resize(input_rows_count); - - WriteBufferFromVector buf_to(vec_to); - - constexpr size_t max_digits = std::numeric_limits::digits10; - - for (size_t i = 0; i < input_rows_count; ++i) - { - if (vec_precision[i] > max_digits) - throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER, - "Too many fractional digits requested, shall not be more than {}", max_digits); - format(value_from, buf_to, vec_precision[i]); - result_offsets[i] = buf_to.count(); - } - - buf_to.finalize(); - } - /// For operations with Decimal template void vectorConstant(const FirstArgVectorType & vec_from, UInt8 precision, @@ -168,29 +143,6 @@ private: buf_to.finalize(); } - template - void constantVector(const FirstArgType & value_from, const ColumnVector::Container & vec_precision, - ColumnString::Chars & vec_to, ColumnString::Offsets & result_offsets, UInt8 from_scale) const - { - size_t input_rows_count = vec_precision.size(); - result_offsets.resize(input_rows_count); - - WriteBufferFromVector buf_to(vec_to); - - constexpr size_t max_digits = std::numeric_limits::digits10; - - for (size_t i = 0; i < input_rows_count; ++i) - { - if (vec_precision[i] > max_digits) - throw DB::Exception(DB::ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER, - "Too many fractional digits requested for Decimal, must not be more than {}", max_digits); - writeText(value_from, from_scale, buf_to, true, true, vec_precision[i]); - writeChar(0, buf_to); - result_offsets[i] = buf_to.count(); - } - buf_to.finalize(); - } - template static void format(T value, DB::WriteBuffer & out, UInt8 precision) { @@ -263,7 +215,6 @@ private: template ColumnPtr executeType(const ColumnsWithTypeAndName & arguments) const { - const auto * from_col_const = typeid_cast(arguments[0].column.get()); const auto * precision_col = checkAndGetColumn>(arguments[1].column.get()); const auto * precision_col_const = typeid_cast(arguments[1].column.get()); @@ -284,8 +235,6 @@ private: else vectorVector(from_col->getData(), precision_col->getData(), result_chars, result_offsets, from_scale); } - else if (from_col_const) - constantVector(from_col_const->template getValue(), precision_col->getData(), result_chars, result_offsets, from_scale); else throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function formatDecimal", arguments[0].column->getName()); } @@ -299,8 +248,6 @@ private: else vectorVector(from_col->getData(), precision_col->getData(), result_chars, result_offsets); } - else if (from_col_const) - constantVector(from_col_const->template getValue(), precision_col->getData(), result_chars, result_offsets); else throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function formatDecimal", arguments[0].column->getName()); } diff --git a/tests/queries/0_stateless/02676_to_decimal_string.sql b/tests/queries/0_stateless/02676_to_decimal_string.sql index 563d60c62c7..1dae139deb1 100644 --- a/tests/queries/0_stateless/02676_to_decimal_string.sql +++ b/tests/queries/0_stateless/02676_to_decimal_string.sql @@ -33,3 +33,9 @@ SELECT toDecimalString('64.64'::Float64, 61); -- {serverError CANNOT_PRINT_FLOAT SELECT toDecimalString('88'::UInt8, 78); -- {serverError CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER} SELECT toDecimalString('646464'::Int256, 78); -- {serverError CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER} SELECT toDecimalString('-128.789323123321329854641231237893231233213298546'::Decimal256(45), 78); -- {serverError CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER} + +-- wrong types: #52407 and similar +SELECT toDecimalString('256.256'::Decimal256(45), *); -- {serverError ILLEGAL_COLUMN} +SELECT toDecimalString('128.128'::Decimal128(30), 'str'); -- {serverError ILLEGAL_TYPE_OF_ARGUMENT} +SELECT toDecimalString('64.64'::Decimal64(10)); -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} +SELECT toDecimalString('64.64'::Decimal64(10), 3, 3); -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} From 8184a289e5441208110bcd2f8f63b57e31ccde33 Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Mon, 1 May 2023 01:53:20 +0000 Subject: [PATCH 380/478] Partially reimplement Parquet encoder to make it faster and parallelizable --- src/CMakeLists.txt | 4 + src/Common/CurrentMetrics.cpp | 6 +- src/Common/PODArray.cpp | 10 + src/Common/PODArray.h | 11 + src/Core/Settings.h | 4 + src/Formats/FormatFactory.cpp | 7 +- src/Formats/FormatSettings.h | 6 + .../Formats/Impl/CHColumnToArrowColumn.cpp | 5 +- .../Formats/Impl/Parquet/PrepareForWrite.cpp | 618 +++++++++++++ .../Formats/Impl/Parquet/ThriftUtil.cpp | 35 + .../Formats/Impl/Parquet/ThriftUtil.h | 17 + src/Processors/Formats/Impl/Parquet/Write.cpp | 816 ++++++++++++++++++ src/Processors/Formats/Impl/Parquet/Write.h | 135 +++ .../Formats/Impl/ParquetBlockOutputFormat.cpp | 467 +++++++++- .../Formats/Impl/ParquetBlockOutputFormat.h | 116 ++- .../02735_parquet_encoder.reference | 55 ++ .../0_stateless/02735_parquet_encoder.sql | 168 ++++ 17 files changed, 2425 insertions(+), 55 deletions(-) create mode 100644 src/Processors/Formats/Impl/Parquet/PrepareForWrite.cpp create mode 100644 src/Processors/Formats/Impl/Parquet/ThriftUtil.cpp create mode 100644 src/Processors/Formats/Impl/Parquet/ThriftUtil.h create mode 100644 src/Processors/Formats/Impl/Parquet/Write.cpp create mode 100644 src/Processors/Formats/Impl/Parquet/Write.h create mode 100644 tests/queries/0_stateless/02735_parquet_encoder.reference create mode 100644 tests/queries/0_stateless/02735_parquet_encoder.sql diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 975bf9bb618..5c66c7e9495 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -267,6 +267,10 @@ add_object_library(clickhouse_processors_queryplan Processors/QueryPlan) add_object_library(clickhouse_processors_queryplan_optimizations Processors/QueryPlan/Optimizations) add_object_library(clickhouse_user_defined_functions Functions/UserDefined) +if (USE_PARQUET) + add_object_library(clickhouse_processors_formats_impl_parquet Processors/Formats/Impl/Parquet) +endif() + if (TARGET ch_contrib::nuraft) add_object_library(clickhouse_coordination Coordination) endif() diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp index 583b13cf79d..9a4ffb0577a 100644 --- a/src/Common/CurrentMetrics.cpp +++ b/src/Common/CurrentMetrics.cpp @@ -149,8 +149,10 @@ M(RestartReplicaThreadsActive, "Number of threads in the RESTART REPLICA thread pool running a task.") \ M(QueryPipelineExecutorThreads, "Number of threads in the PipelineExecutor thread pool.") \ M(QueryPipelineExecutorThreadsActive, "Number of threads in the PipelineExecutor thread pool running a task.") \ - M(ParquetDecoderThreads, "Number of threads in the ParquetBlockInputFormat thread pool running a task.") \ - M(ParquetDecoderThreadsActive, "Number of threads in the ParquetBlockInputFormat thread pool.") \ + M(ParquetDecoderThreads, "Number of threads in the ParquetBlockInputFormat thread pool.") \ + M(ParquetDecoderThreadsActive, "Number of threads in the ParquetBlockInputFormat thread pool running a task.") \ + M(ParquetEncoderThreads, "Number of threads in ParquetBlockOutputFormat thread pool.") \ + M(ParquetEncoderThreadsActive, "Number of threads in ParquetBlockOutputFormat thread pool running a task.") \ M(OutdatedPartsLoadingThreads, "Number of threads in the threadpool for loading Outdated data parts.") \ M(OutdatedPartsLoadingThreadsActive, "Number of active threads in the threadpool for loading Outdated data parts.") \ M(DistributedBytesToInsert, "Number of pending bytes to process for asynchronous insertion into Distributed tables. Number of bytes for every shard is summed.") \ diff --git a/src/Common/PODArray.cpp b/src/Common/PODArray.cpp index 07c3cf1af1a..d21dc40867d 100644 --- a/src/Common/PODArray.cpp +++ b/src/Common/PODArray.cpp @@ -15,4 +15,14 @@ template class PODArray, PADDING_FOR_SIMD - 1, PADD template class PODArray, PADDING_FOR_SIMD - 1, PADDING_FOR_SIMD>; template class PODArray, PADDING_FOR_SIMD - 1, PADDING_FOR_SIMD>; template class PODArray, PADDING_FOR_SIMD - 1, PADDING_FOR_SIMD>; + +template class PODArray, 0, 0>; +template class PODArray, 0, 0>; +template class PODArray, 0, 0>; +template class PODArray, 0, 0>; + +template class PODArray, 0, 0>; +template class PODArray, 0, 0>; +template class PODArray, 0, 0>; +template class PODArray, 0, 0>; } diff --git a/src/Common/PODArray.h b/src/Common/PODArray.h index b126afd2a37..68c1e325f0c 100644 --- a/src/Common/PODArray.h +++ b/src/Common/PODArray.h @@ -783,4 +783,15 @@ extern template class PODArray, PADDING_FOR_SIMD - extern template class PODArray, PADDING_FOR_SIMD - 1, PADDING_FOR_SIMD>; extern template class PODArray, PADDING_FOR_SIMD - 1, PADDING_FOR_SIMD>; extern template class PODArray, PADDING_FOR_SIMD - 1, PADDING_FOR_SIMD>; + +extern template class PODArray, 0, 0>; +extern template class PODArray, 0, 0>; +extern template class PODArray, 0, 0>; +extern template class PODArray, 0, 0>; + +extern template class PODArray, 0, 0>; +extern template class PODArray, 0, 0>; +extern template class PODArray, 0, 0>; +extern template class PODArray, 0, 0>; + } diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 97c64ba133c..98f7f212aa5 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -953,6 +953,10 @@ class IColumn; M(ParquetVersion, output_format_parquet_version, "2.latest", "Parquet format version for output format. Supported versions: 1.0, 2.4, 2.6 and 2.latest (default)", 0) \ M(ParquetCompression, output_format_parquet_compression_method, "lz4", "Compression method for Parquet output format. Supported codecs: snappy, lz4, brotli, zstd, gzip, none (uncompressed)", 0) \ M(Bool, output_format_parquet_compliant_nested_types, true, "In parquet file schema, use name 'element' instead of 'item' for list elements. This is a historical artifact of Arrow library implementation. Generally increases compatibility, except perhaps with some old versions of Arrow.", 0) \ + M(Bool, output_format_parquet_use_custom_encoder, true, "Use experimental faster Parquet encoder implementation.", 0) \ + M(Bool, output_format_parquet_parallel_encoding, true, "Do Parquet encoding in multiple threads. Requires output_format_parquet_use_custom_encoder.", 0) \ + M(UInt64, output_format_parquet_data_page_size, 1024 * 1024, "Target page size in bytes, before compression.", 0) \ + M(UInt64, output_format_parquet_batch_size, 1024, "Check page size every this many rows. Consider decreasing if you have columns with average values size above a few KBs.", 0) \ M(String, output_format_avro_codec, "", "Compression codec used for output. Possible values: 'null', 'deflate', 'snappy'.", 0) \ M(UInt64, output_format_avro_sync_interval, 16 * 1024, "Sync interval in bytes.", 0) \ M(String, output_format_avro_string_column_pattern, "", "For Avro format: regexp of String columns to select as AVRO string.", 0) \ diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 6e3e086859b..663b7f1ba95 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -130,6 +130,10 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.parquet.max_block_size = settings.input_format_parquet_max_block_size; format_settings.parquet.output_compression_method = settings.output_format_parquet_compression_method; format_settings.parquet.output_compliant_nested_types = settings.output_format_parquet_compliant_nested_types; + format_settings.parquet.use_custom_encoder = settings.output_format_parquet_use_custom_encoder; + format_settings.parquet.parallel_encoding = settings.output_format_parquet_parallel_encoding; + format_settings.parquet.data_page_size = settings.output_format_parquet_data_page_size; + format_settings.parquet.write_batch_size = settings.output_format_parquet_batch_size; format_settings.pretty.charset = settings.output_format_pretty_grid_charset.toString() == "ASCII" ? FormatSettings::Pretty::Charset::ASCII : FormatSettings::Pretty::Charset::UTF8; format_settings.pretty.color = settings.output_format_pretty_color; format_settings.pretty.max_column_pad_width = settings.output_format_pretty_max_column_pad_width; @@ -434,7 +438,7 @@ OutputFormatPtr FormatFactory::getOutputFormatParallelIfPossible( return format; } - return getOutputFormat(name, buf, sample, context, _format_settings); + return getOutputFormat(name, buf, sample, context, format_settings); } @@ -453,6 +457,7 @@ OutputFormatPtr FormatFactory::getOutputFormat( context->getQueryContext()->addQueryFactoriesInfo(Context::QueryLogFactories::Format, name); auto format_settings = _format_settings ? *_format_settings : getFormatSettings(context); + format_settings.max_threads = context->getSettingsRef().max_threads; /** TODO: Materialization is needed, because formats can use the functions `IDataType`, * which only work with full columns. diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index e321e5264ca..3259c46e5ff 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -100,6 +100,8 @@ struct FormatSettings UInt64 max_parser_depth = DBMS_DEFAULT_MAX_PARSER_DEPTH; + size_t max_threads = 1; + enum class ArrowCompression { NONE, @@ -233,10 +235,14 @@ struct FormatSettings bool output_string_as_string = false; bool output_fixed_string_as_fixed_byte_array = true; bool preserve_order = false; + bool use_custom_encoder = true; + bool parallel_encoding = true; UInt64 max_block_size = 8192; ParquetVersion output_version; ParquetCompression output_compression_method = ParquetCompression::SNAPPY; bool output_compliant_nested_types = true; + size_t data_page_size = 1024 * 1024; + size_t write_batch_size = 1024; } parquet; struct Pretty diff --git a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp index f688efa3290..e2383d1bfab 100644 --- a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp +++ b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp @@ -684,9 +684,6 @@ namespace DB bool output_fixed_string_as_fixed_byte_array, std::unordered_map & dictionary_values) { - const String column_type_name = column_type->getFamilyName(); - WhichDataType which(column_type); - switch (column_type->getTypeId()) { case TypeIndex::Nullable: @@ -796,7 +793,7 @@ namespace DB FOR_INTERNAL_NUMERIC_TYPES(DISPATCH) #undef DISPATCH default: - throw Exception(ErrorCodes::UNKNOWN_TYPE, "Internal type '{}' of a column '{}' is not supported for conversion into {} data format.", column_type_name, column_name, format_name); + throw Exception(ErrorCodes::UNKNOWN_TYPE, "Internal type '{}' of a column '{}' is not supported for conversion into {} data format.", column_type->getFamilyName(), column_name, format_name); } } diff --git a/src/Processors/Formats/Impl/Parquet/PrepareForWrite.cpp b/src/Processors/Formats/Impl/Parquet/PrepareForWrite.cpp new file mode 100644 index 00000000000..a70b6fcfc81 --- /dev/null +++ b/src/Processors/Formats/Impl/Parquet/PrepareForWrite.cpp @@ -0,0 +1,618 @@ +#include "Processors/Formats/Impl/Parquet/Write.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/// This file deals with schema conversion and with repetition and definition levels. + +/// Schema conversion is pretty straightforward. + +/// "Repetition and definition levels" are a somewhat tricky way of encoding information about +/// optional fields and lists. +/// +/// If you don't want to learn how these work, feel free to skip the updateRepDefLevels* functions. +/// All you need to know is: +/// * values for nulls are not encoded, so we have to filter nullable columns, +/// * information about all array lengths and nulls is encoded in the arrays `def` and `rep`, +/// which need to be encoded next to the data, +/// * `def` and `rep` arrays can be longer than `primitive_column`, because they include nulls and +/// empty arrays; the values in primitive_column correspond to positions where def[i] == max_def. +/// +/// If you do want to learn it, dremel paper: https://research.google/pubs/pub36632/ +/// Instead of reading the whole paper, try staring at figures 2-3 for a while - it might be enough. +/// (Why does Parquet do all this instead of just storing array lengths and null masks? I'm not +/// really sure.) +/// +/// We calculate the levels recursively, from inner to outer columns. +/// This means scanning the whole array for each Array/Nullable nesting level, which is probably not +/// the most efficient way to do it. But there's usually at most one nesting level, so it's fine. +/// +/// Most of this is moot because ClickHouse doesn't support nullable arrays or tuples right now, so +/// almost none of the tricky cases can happen. We implement it in full generality anyway (mostly +/// because I only learned the previous sentence after writing most of the code). + + +namespace DB::ErrorCodes +{ + extern const int UNKNOWN_TYPE; + extern const int TOO_DEEP_RECURSION; // I'm 14 and this is deep + extern const int UNKNOWN_COMPRESSION_METHOD; + extern const int LOGICAL_ERROR; +} + +namespace DB::Parquet +{ + +/// Thrift structs that Parquet uses for various metadata inside the parquet file. +namespace parq = parquet::format; + +namespace +{ + +void assertNoDefOverflow(ColumnChunkWriteState & s) +{ + if (s.max_def == UINT8_MAX) + throw Exception(ErrorCodes::TOO_DEEP_RECURSION, + "Column has more than 255 levels of nested Array/Nullable. Impressive! Unfortunately, " + "this is not supported by this Parquet encoder (but is supported by Parquet, if you " + "really need this for some reason)."); +} + +void updateRepDefLevelsAndFilterColumnForNullable(ColumnChunkWriteState & s, const NullMap & null_map) +{ + /// Increment definition levels for non-nulls. + /// Filter the column to contain only non-null values. + + assertNoDefOverflow(s); + ++s.max_def; + + /// Normal case: no arrays or nullables inside this nullable. + if (s.max_def == 1) + { + chassert(s.def.empty()); + s.def.resize(null_map.size()); + for (size_t i = 0; i < s.def.size(); ++i) + s.def[i] = !null_map[i]; + + /// We could be more efficient with this: + /// * Instead of doing the filter() here, we could defer it to writeColumnChunkBody(), at + /// least in the simple case of Nullable(Primitive). Then it'll parallelize if the table + /// consists of one big tuple. + /// * Instead of filtering explicitly, we could build filtering into the data encoder. + /// * Instead of filling out the `def` values above, we could point to null_map and build + /// the '!' into the encoder. + /// None of these seem worth the complexity right now. + s.primitive_column = s.primitive_column->filter(s.def, /*result_size_hint*/ -1); + + return; + } + + /// Weird general case: Nullable(Array), Nullable(Nullable), or any arbitrary nesting like that. + /// This is currently not allowed in ClickHouse, but let's support it anyway just in case. + + IColumn::Filter filter; + size_t row_idx = static_cast(-1); + for (size_t i = 0; i < s.def.size(); ++i) + { + row_idx += s.max_rep == 0 || s.rep[i] == 0; + if (s.def[i] == s.max_def - 1) + filter.push_back(!null_map[row_idx]); + s.def[i] += !null_map[row_idx]; + } + s.primitive_column = s.primitive_column->filter(filter, /*result_size_hint*/ -1); +} + +void updateRepDefLevelsForArray(ColumnChunkWriteState & s, const IColumn::Offsets & offsets) +{ + /// Increment all definition levels. + /// For non-first elements of arrays, increment repetition levels. + /// For empty arrays, insert a zero into repetition and definition levels arrays. + + assertNoDefOverflow(s); + ++s.max_def; + ++s.max_rep; + + /// Common case: no arrays or nullables inside this array. + if (s.max_rep == 1 && s.max_def == 1) + { + s.def.resize_fill(s.primitive_column->size(), 1); + s.rep.resize_fill(s.primitive_column->size(), 1); + size_t i = 0; + for (ssize_t row = 0; row < static_cast(offsets.size()); ++row) + { + size_t n = offsets[row] - offsets[row - 1]; + if (n) + { + s.rep[i] = 0; + i += n; + } + else + { + s.def.push_back(1); + s.rep.push_back(1); + s.def[i] = 0; + s.rep[i] = 0; + i += 1; + } + } + return; + } + + /// General case: Array(Array), Array(Nullable), or any arbitrary nesting like that. + + for (auto & x : s.def) + ++x; + + if (s.max_rep == 1) + s.rep.resize_fill(s.def.size(), 1); + else + for (auto & x : s.rep) + ++x; + + PaddedPODArray mask(s.def.size(), 1); // for inserting zeroes to rep and def + size_t i = 0; // in the input (s.def/s.rep) + size_t empty_arrays = 0; + for (ssize_t row = 0; row < static_cast(offsets.size()); ++row) + { + size_t n = offsets[row] - offsets[row - 1]; + if (n) + { + /// Un-increment the first rep of the array. + /// Skip n "items" in the nested column; first element of each item has rep = 1 + /// (we incremented it above). + chassert(s.rep[i] == 1); + --s.rep[i]; + do + { + ++i; + if (i == s.rep.size()) + { + --n; + chassert(n == 0); + break; + } + n -= s.rep[i] == 1; + } while (n); + } + else + { + mask.push_back(1); + mask[i + empty_arrays] = 0; + ++empty_arrays; + } + } + + if (empty_arrays != 0) + { + expandDataByMask(s.def, mask, false); + expandDataByMask(s.rep, mask, false); + } +} + +parq::CompressionCodec::type compressionMethodToParquet(CompressionMethod c) +{ + switch (c) + { + case CompressionMethod::None: return parq::CompressionCodec::UNCOMPRESSED; + case CompressionMethod::Snappy: return parq::CompressionCodec::SNAPPY; + case CompressionMethod::Gzip: return parq::CompressionCodec::GZIP; + case CompressionMethod::Brotli: return parq::CompressionCodec::BROTLI; + case CompressionMethod::Lz4: return parq::CompressionCodec::LZ4_RAW; + case CompressionMethod::Zstd: return parq::CompressionCodec::ZSTD; + + default: + throw Exception(ErrorCodes::UNKNOWN_COMPRESSION_METHOD, "Compression method {} is not supported by Parquet", toContentEncodingName(c)); + } +} + +/// Depth-first traversal of the schema tree for this column. +void prepareColumnRecursive( + ColumnPtr column, DataTypePtr type, const std::string & name, const WriteOptions & options, + ColumnChunkWriteStates & states, SchemaElements & schemas); + +void preparePrimitiveColumn(ColumnPtr column, DataTypePtr type, const std::string & name, + const WriteOptions & options, ColumnChunkWriteStates & states, SchemaElements & schemas) +{ + /// Add physical column info. + auto & state = states.emplace_back(); + state.primitive_column = column; + state.compression = options.compression; + + state.column_chunk.__isset.meta_data = true; + state.column_chunk.meta_data.__set_path_in_schema({name}); + state.column_chunk.meta_data.__set_codec(compressionMethodToParquet(state.compression)); + + /// Add logical schema leaf. + auto & schema = schemas.emplace_back(); + schema.__set_repetition_type(parq::FieldRepetitionType::REQUIRED); + schema.__set_name(name); + + /// Convert the type enums. + + using T = parq::Type; + using C = parq::ConvertedType; + + auto types = [&](T::type type_, std::optional converted = std::nullopt, std::optional logical = std::nullopt) + { + state.column_chunk.meta_data.__set_type(type_); + schema.__set_type(type_); + if (converted) + schema.__set_converted_type(*converted); + if (logical) + schema.__set_logicalType(*logical); + }; + + auto int_type = [](Int8 bits, bool signed_) + { + parq::LogicalType t; + t.__isset.INTEGER = true; + t.INTEGER.__set_bitWidth(bits); + t.INTEGER.__set_isSigned(signed_); + return t; + }; + + auto fixed_string = [&](size_t size, std::optional converted = std::nullopt, std::optional logical = std::nullopt) + { + state.column_chunk.meta_data.__set_type(parq::Type::FIXED_LEN_BYTE_ARRAY); + schema.__set_type(parq::Type::FIXED_LEN_BYTE_ARRAY); + schema.__set_type_length(static_cast(size)); + if (converted) + schema.__set_converted_type(*converted); + if (logical) + schema.__set_logicalType(*logical); + }; + + auto decimal = [&](Int32 bytes, UInt32 precision, UInt32 scale) + { + state.column_chunk.meta_data.__set_type(parq::Type::FIXED_LEN_BYTE_ARRAY); + schema.__set_type(parq::Type::FIXED_LEN_BYTE_ARRAY); + schema.__set_type_length(bytes); + schema.__set_scale(static_cast(scale)); + schema.__set_precision(static_cast(precision)); + schema.__set_converted_type(parq::ConvertedType::DECIMAL); + parq::DecimalType d; + d.__set_scale(static_cast(scale)); + d.__set_precision(static_cast(precision)); + parq::LogicalType t; + t.__set_DECIMAL(d); + schema.__set_logicalType(t); + }; + + switch (type->getTypeId()) + { + case TypeIndex::UInt8: types(T::INT32, C::UINT_8 , int_type(8 , false)); break; + case TypeIndex::UInt16: types(T::INT32, C::UINT_16, int_type(16, false)); break; + case TypeIndex::UInt32: types(T::INT32, C::UINT_32, int_type(32, false)); break; + case TypeIndex::UInt64: types(T::INT64, C::UINT_64, int_type(64, false)); break; + case TypeIndex::Int8: types(T::INT32, C::INT_8 , int_type(8 , true)); break; + case TypeIndex::Int16: types(T::INT32, C::INT_16 , int_type(16, true)); break; + case TypeIndex::Int32: types(T::INT32); break; + case TypeIndex::Int64: types(T::INT64); break; + case TypeIndex::Float32: types(T::FLOAT); break; + case TypeIndex::Float64: types(T::DOUBLE); break; + + /// These don't have suitable parquet logical types, so we write them as plain numbers. + /// (Parquet has "enums" but they're just strings, with nowhere to declare all possible enum + /// values in advance as part of the data type.) + case TypeIndex::Enum8: types(T::INT32, C::INT_8 , int_type(8 , true)); break; // Int8 + case TypeIndex::Enum16: types(T::INT32, C::INT_16 , int_type(16, true)); break; // Int16 + case TypeIndex::IPv4: types(T::INT32, C::UINT_32, int_type(32, false)); break; // UInt32 + case TypeIndex::Date: types(T::INT32, C::UINT_16, int_type(16, false)); break; // UInt16 + case TypeIndex::DateTime: types(T::INT32, C::UINT_32, int_type(32, false)); break; // UInt32 + + case TypeIndex::Date32: + { + parq::LogicalType t; + t.__set_DATE({}); + types(T::INT32, C::DATE, t); + break; + } + + case TypeIndex::DateTime64: + { + std::optional converted; + std::optional unit; + switch (assert_cast(*type).getScale()) + { + case 3: + converted = parq::ConvertedType::TIMESTAMP_MILLIS; + unit.emplace().__set_MILLIS({}); + break; + case 6: + converted = parq::ConvertedType::TIMESTAMP_MICROS; + unit.emplace().__set_MICROS({}); + break; + case 9: + unit.emplace().__set_NANOS({}); + break; + } + + std::optional t; + if (unit) + { + parq::TimestampType tt; + tt.__set_isAdjustedToUTC(true); + tt.__set_unit(*unit); + t.emplace().__set_TIMESTAMP(tt); + } + types(T::INT64, converted, t); + break; + } + + case TypeIndex::String: + case TypeIndex::FixedString: + { + if (options.output_fixed_string_as_fixed_byte_array && + type->getTypeId() == TypeIndex::FixedString) + { + fixed_string(assert_cast(*type).getN()); + } + else if (options.output_string_as_string) + { + parq::LogicalType t; + t.__set_STRING({}); + types(T::BYTE_ARRAY, C::UTF8, t); + } + else + { + types(T::BYTE_ARRAY); + } + break; + } + + /// Parquet doesn't have logical types for these. + case TypeIndex::UInt128: fixed_string(16); break; + case TypeIndex::UInt256: fixed_string(32); break; + case TypeIndex::Int128: fixed_string(16); break; + case TypeIndex::Int256: fixed_string(32); break; + case TypeIndex::IPv6: fixed_string(16); break; + + case TypeIndex::Decimal32: decimal(4 , getDecimalPrecision(*type), getDecimalScale(*type)); break; + case TypeIndex::Decimal64: decimal(8 , getDecimalPrecision(*type), getDecimalScale(*type)); break; + case TypeIndex::Decimal128: decimal(16, getDecimalPrecision(*type), getDecimalScale(*type)); break; + case TypeIndex::Decimal256: decimal(32, getDecimalPrecision(*type), getDecimalScale(*type)); break; + + default: + throw Exception(ErrorCodes::UNKNOWN_TYPE, "Internal type '{}' of column '{}' is not supported for conversion into Parquet data format.", type->getFamilyName(), name); + } +} + +void prepareColumnNullable( + ColumnPtr column, DataTypePtr type, const std::string & name, const WriteOptions & options, + ColumnChunkWriteStates & states, SchemaElements & schemas) +{ + const ColumnNullable * column_nullable = assert_cast(column.get()); + ColumnPtr nested_column = column_nullable->getNestedColumnPtr(); + DataTypePtr nested_type = assert_cast(type.get())->getNestedType(); + const NullMap & null_map = column_nullable->getNullMapData(); + + size_t child_states_begin = states.size(); + size_t child_schema_idx = schemas.size(); + + prepareColumnRecursive(nested_column, nested_type, name, options, states, schemas); + + if (schemas[child_schema_idx].repetition_type == parq::FieldRepetitionType::REQUIRED) + { + /// Normal case: we just slap a FieldRepetitionType::OPTIONAL onto the nested column. + schemas[child_schema_idx].repetition_type = parq::FieldRepetitionType::OPTIONAL; + } + else + { + /// Weird case: Nullable(Nullable(...)). Or Nullable(Tuple(Nullable(...))), etc. + /// This is probably not allowed in ClickHouse, but let's support it just in case. + auto & schema = *schemas.insert(schemas.begin() + child_schema_idx, {}); + schema.__set_repetition_type(parq::FieldRepetitionType::OPTIONAL); + schema.__set_name("nullable"); + schema.__set_num_children(1); + for (size_t i = child_states_begin; i < states.size(); ++i) + { + Strings & path = states[i].column_chunk.meta_data.path_in_schema; + path.insert(path.begin(), schema.name + "."); + } + } + + for (size_t i = child_states_begin; i < states.size(); ++i) + { + auto & s = states[i]; + updateRepDefLevelsAndFilterColumnForNullable(s, null_map); + } +} + +void prepareColumnTuple( + ColumnPtr column, DataTypePtr type, const std::string & name, const WriteOptions & options, + ColumnChunkWriteStates & states, SchemaElements & schemas) +{ + const auto * column_tuple = assert_cast(column.get()); + const auto * type_tuple = assert_cast(type.get()); + + auto & tuple_schema = schemas.emplace_back(); + tuple_schema.__set_repetition_type(parq::FieldRepetitionType::REQUIRED); + tuple_schema.__set_name(name); + tuple_schema.__set_num_children(static_cast(type_tuple->getElements().size())); + + size_t child_states_begin = states.size(); + + for (size_t i = 0; i < type_tuple->getElements().size(); ++i) + prepareColumnRecursive(column_tuple->getColumnPtr(i), type_tuple->getElement(i), type_tuple->getNameByPosition(i + 1), options, states, schemas); + + for (size_t i = child_states_begin; i < states.size(); ++i) + { + Strings & path = states[i].column_chunk.meta_data.path_in_schema; + /// O(nesting_depth^2), but who cares. + path.insert(path.begin(), name); + } +} + +void prepareColumnArray( + ColumnPtr column, DataTypePtr type, const std::string & name, const WriteOptions & options, + ColumnChunkWriteStates & states, SchemaElements & schemas) +{ + const auto * column_array = assert_cast(column.get()); + ColumnPtr nested_column = column_array->getDataPtr(); + DataTypePtr nested_type = assert_cast(type.get())->getNestedType(); + const auto & offsets = column_array->getOffsets(); + + /// Schema for lists https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#lists + /// + /// required group `name` (List): + /// repeated group "list": + /// "element" + + /// Add the groups schema. + + schemas.emplace_back(); + schemas.emplace_back(); + auto & list_schema = schemas[schemas.size() - 2]; + auto & item_schema = schemas[schemas.size() - 1]; + + list_schema.__set_repetition_type(parq::FieldRepetitionType::REQUIRED); + list_schema.__set_name(name); + list_schema.__set_num_children(1); + list_schema.__set_converted_type(parq::ConvertedType::LIST); + list_schema.__isset.logicalType = true; + list_schema.logicalType.__set_LIST({}); + + item_schema.__set_repetition_type(parq::FieldRepetitionType::REPEATED); + item_schema.__set_name("list"); + item_schema.__set_num_children(1); + + std::array path_prefix = {list_schema.name, item_schema.name}; + size_t child_states_begin = states.size(); + + /// Recurse. + prepareColumnRecursive(nested_column, nested_type, "element", options, states, schemas); + + /// Update repetition+definition levels and fully-qualified column names (x -> myarray.list.x). + for (size_t i = child_states_begin; i < states.size(); ++i) + { + Strings & path = states[i].column_chunk.meta_data.path_in_schema; + path.insert(path.begin(), path_prefix.begin(), path_prefix.end()); + + updateRepDefLevelsForArray(states[i], offsets); + } +} + +void prepareColumnMap( + ColumnPtr column, DataTypePtr type, const std::string & name, const WriteOptions & options, + ColumnChunkWriteStates & states, SchemaElements & schemas) +{ + const auto * column_map = assert_cast(column.get()); + const auto * column_array = &column_map->getNestedColumn(); + const auto & offsets = column_array->getOffsets(); + ColumnPtr column_tuple = column_array->getDataPtr(); + + const auto * map_type = assert_cast(type.get()); + DataTypePtr tuple_type = std::make_shared(map_type->getKeyValueTypes(), Strings{"key", "value"}); + + /// Map is an array of tuples + /// https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#maps + /// + /// required group `name` (Map): + /// repeated group "key_value": + /// reqiured <...> "key" + /// <...> "value" + + auto & map_schema = schemas.emplace_back(); + map_schema.__set_repetition_type(parq::FieldRepetitionType::REQUIRED); + map_schema.__set_name(name); + map_schema.__set_num_children(1); + map_schema.__set_converted_type(parq::ConvertedType::MAP); + map_schema.__set_logicalType({}); + map_schema.logicalType.__set_MAP({}); + + size_t tuple_schema_idx = schemas.size(); + size_t child_states_begin = states.size(); + + prepareColumnTuple(column_tuple, tuple_type, "key_value", options, states, schemas); + + schemas[tuple_schema_idx].__set_repetition_type(parq::FieldRepetitionType::REPEATED); + schemas[tuple_schema_idx].__set_converted_type(parq::ConvertedType::MAP_KEY_VALUE); + + for (size_t i = child_states_begin; i < states.size(); ++i) + { + Strings & path = states[i].column_chunk.meta_data.path_in_schema; + path.insert(path.begin(), name); + + updateRepDefLevelsForArray(states[i], offsets); + } +} + +void prepareColumnRecursive( + ColumnPtr column, DataTypePtr type, const std::string & name, const WriteOptions & options, + ColumnChunkWriteStates & states, SchemaElements & schemas) +{ + switch (type->getTypeId()) + { + case TypeIndex::Nullable: prepareColumnNullable(column, type, name, options, states, schemas); break; + case TypeIndex::Array: prepareColumnArray(column, type, name, options, states, schemas); break; + case TypeIndex::Tuple: prepareColumnTuple(column, type, name, options, states, schemas); break; + case TypeIndex::Map: prepareColumnMap(column, type, name, options, states, schemas); break; + case TypeIndex::LowCardinality: + { + auto nested_type = assert_cast(*type).getDictionaryType(); + if (nested_type->isNullable()) + prepareColumnNullable( + column->convertToFullColumnIfLowCardinality(), nested_type, name, options, states, schemas); + else + /// Use nested data type, but keep ColumnLowCardinality. The encoder can deal with it. + preparePrimitiveColumn(column, nested_type, name, options, states, schemas); + break; + } + default: + preparePrimitiveColumn(column, type, name, options, states, schemas); + break; + } +} + +} + +SchemaElements convertSchema(const Block & sample, const WriteOptions & options) +{ + SchemaElements schema; + auto & root = schema.emplace_back(); + root.__set_name("schema"); + root.__set_num_children(static_cast(sample.columns())); + + for (auto & c : sample) + prepareColumnForWrite(c.column, c.type, c.name, options, nullptr, &schema); + + return schema; +} + +void prepareColumnForWrite( + ColumnPtr column, DataTypePtr type, const std::string & name, const WriteOptions & options, + ColumnChunkWriteStates * out_columns_to_write, SchemaElements * out_schema) +{ + if (column->size() == 0 && out_columns_to_write != nullptr) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty column passed to Parquet encoder"); + + ColumnChunkWriteStates states; + SchemaElements schemas; + prepareColumnRecursive(column, type, name, options, states, schemas); + + if (out_columns_to_write) + for (auto & s : states) + out_columns_to_write->push_back(std::move(s)); + if (out_schema) + out_schema->insert(out_schema->end(), schemas.begin(), schemas.end()); + + if (column->empty()) + states.clear(); +} + +} diff --git a/src/Processors/Formats/Impl/Parquet/ThriftUtil.cpp b/src/Processors/Formats/Impl/Parquet/ThriftUtil.cpp new file mode 100644 index 00000000000..2a99b028ae0 --- /dev/null +++ b/src/Processors/Formats/Impl/Parquet/ThriftUtil.cpp @@ -0,0 +1,35 @@ +#include +#include + +namespace DB::Parquet +{ + +class WriteBufferTransport : public apache::thrift::transport::TTransport +{ +public: + WriteBuffer & out; + size_t bytes = 0; + + explicit WriteBufferTransport(WriteBuffer & out_) : out(out_) {} + + void write(const uint8_t* buf, uint32_t len) + { + out.write(reinterpret_cast(buf), len); + bytes += len; + } +}; + +template +size_t serializeThriftStruct(const T & obj, WriteBuffer & out) +{ + auto trans = std::make_shared(out); + auto proto = apache::thrift::protocol::TCompactProtocolFactoryT().getProtocol(trans); + obj.write(proto.get()); + return trans->bytes; +} + +template size_t serializeThriftStruct(const parquet::format::PageHeader &, WriteBuffer & out); +template size_t serializeThriftStruct(const parquet::format::ColumnChunk &, WriteBuffer & out); +template size_t serializeThriftStruct(const parquet::format::FileMetaData &, WriteBuffer & out); + +} diff --git a/src/Processors/Formats/Impl/Parquet/ThriftUtil.h b/src/Processors/Formats/Impl/Parquet/ThriftUtil.h new file mode 100644 index 00000000000..1efbe0002d4 --- /dev/null +++ b/src/Processors/Formats/Impl/Parquet/ThriftUtil.h @@ -0,0 +1,17 @@ +#pragma once + +#include // in contrib/arrow/cpp/src/ , generated from parquet.thrift +#include + +namespace DB::Parquet +{ + +/// Returns number of bytes written. +template +size_t serializeThriftStruct(const T & obj, WriteBuffer & out); + +extern template size_t serializeThriftStruct(const parquet::format::PageHeader &, WriteBuffer & out); +extern template size_t serializeThriftStruct(const parquet::format::ColumnChunk &, WriteBuffer & out); +extern template size_t serializeThriftStruct(const parquet::format::FileMetaData &, WriteBuffer & out); + +} diff --git a/src/Processors/Formats/Impl/Parquet/Write.cpp b/src/Processors/Formats/Impl/Parquet/Write.cpp new file mode 100644 index 00000000000..a29bb81f8dc --- /dev/null +++ b/src/Processors/Formats/Impl/Parquet/Write.cpp @@ -0,0 +1,816 @@ +#include "Processors/Formats/Impl/Parquet/Write.h" +#include "Processors/Formats/Impl/Parquet/ThriftUtil.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "config_version.h" + +namespace DB::ErrorCodes +{ + extern const int CANNOT_COMPRESS; + extern const int LIMIT_EXCEEDED; + extern const int LOGICAL_ERROR; +} + +namespace DB::Parquet +{ + +namespace parq = parquet::format; + +namespace +{ + +template +struct StatisticsNumeric +{ + T min = std::numeric_limits::max(); + T max = std::numeric_limits::min(); + + void add(SourceType x) + { + min = std::min(min, static_cast(x)); + max = std::max(max, static_cast(x)); + } + + void merge(const StatisticsNumeric & s) + { + min = std::min(min, s.min); + max = std::max(max, s.max); + } + + void clear() { *this = {}; } + + parq::Statistics get(const WriteOptions &) + { + parq::Statistics s; + s.__isset.min_value = s.__isset.max_value = true; + s.min_value.resize(sizeof(T)); + s.max_value.resize(sizeof(T)); + memcpy(s.min_value.data(), &min, sizeof(T)); + memcpy(s.max_value.data(), &max, sizeof(T)); + + if constexpr (std::is_signed::value) + { + s.__set_min(s.min_value); + s.__set_max(s.max_value); + } + return s; + } +}; + +struct StatisticsFixedString +{ + size_t fixed_string_size = UINT64_MAX; + const uint8_t * min = nullptr; + const uint8_t * max = nullptr; + + void add(parquet::FixedLenByteArray a) + { + chassert(fixed_string_size != UINT64_MAX); + addMin(a.ptr); + addMax(a.ptr); + } + + void merge(const StatisticsFixedString & s) + { + chassert(fixed_string_size == UINT64_MAX || fixed_string_size == s.fixed_string_size); + fixed_string_size = s.fixed_string_size; + if (s.min == nullptr) + return; + addMin(s.min); + addMax(s.max); + } + + void clear() { min = max = nullptr; } + + parq::Statistics get(const WriteOptions & options) + { + parq::Statistics s; + if (min == nullptr || fixed_string_size > options.max_statistics_size) + return s; + s.__set_min_value(std::string(reinterpret_cast(min), fixed_string_size)); + s.__set_max_value(std::string(reinterpret_cast(max), fixed_string_size)); + return s; + } + + void addMin(const uint8_t * p) + { + if (min == nullptr || memcmp(p, min, fixed_string_size) < 0) + min = p; + } + void addMax(const uint8_t * p) + { + if (max == nullptr || memcmp(p, max, fixed_string_size) > 0) + max = p; + } +}; + +struct StatisticsString +{ + parquet::ByteArray min; + parquet::ByteArray max; + + void add(parquet::ByteArray x) + { + addMin(x); + addMax(x); + } + + void merge(const StatisticsString & s) + { + if (s.min.ptr == nullptr) + return; + addMin(s.min); + addMax(s.max); + } + + void clear() { *this = {}; } + + parq::Statistics get(const WriteOptions & options) + { + parq::Statistics s; + if (min.ptr == nullptr) + return s; + if (static_cast(min.len) <= options.max_statistics_size) + s.__set_min_value(std::string(reinterpret_cast(min.ptr), static_cast(min.len))); + if (static_cast(max.len) <= options.max_statistics_size) + s.__set_max_value(std::string(reinterpret_cast(max.ptr), static_cast(max.len))); + return s; + } + + void addMin(parquet::ByteArray x) + { + if (min.ptr == nullptr || compare(x, min) < 0) + min = x; + } + + void addMax(parquet::ByteArray x) + { + if (max.ptr == nullptr || compare(x, max) > 0) + max = x; + } + + static int compare(parquet::ByteArray a, parquet::ByteArray b) + { + int t = memcmp(a.ptr, b.ptr, std::min(a.len, b.len)); + if (t != 0) + return t; + return a.len - b.len; + } +}; + +/// The column usually needs to be converted to one of Parquet physical types, e.g. UInt16 -> Int32 +/// or [element of ColumnString] -> std::string_view. +/// We do this conversion in small batches rather than all at once, just before encoding the batch, +/// in hopes of getting better performance through cache locality. +/// The Coverter* structs below are responsible for that. +/// When conversion is not needed, getBatch() will just return pointer into original data. + +template ::value, + To, + typename std::make_unsigned::type>::type> +struct ConverterNumeric +{ + using Statistics = StatisticsNumeric; + + const Col & column; + PODArray buf; + + explicit ConverterNumeric(const ColumnPtr & c) : column(assert_cast(*c)) {} + + const To * getBatch(size_t offset, size_t count) + { + if constexpr (sizeof(*column.getData().data()) == sizeof(To)) + return reinterpret_cast(column.getData().data() + offset); + else + { + buf.resize(count); + for (size_t i = 0; i < count; ++i) + buf[i] = static_cast(column.getData()[offset + i]); + return buf.data(); + } + } +}; + +struct ConverterString +{ + using Statistics = StatisticsString; + + const ColumnString & column; + PODArray buf; + + explicit ConverterString(const ColumnPtr & c) : column(assert_cast(*c)) {} + + const parquet::ByteArray * getBatch(size_t offset, size_t count) + { + buf.resize(count); + for (size_t i = 0; i < count; ++i) + { + StringRef s = column.getDataAt(offset + i); + buf[i] = parquet::ByteArray(static_cast(s.size), reinterpret_cast(s.data)); + } + return buf.data(); + } +}; + +struct ConverterFixedString +{ + using Statistics = StatisticsFixedString; + + const ColumnFixedString & column; + PODArray buf; + + explicit ConverterFixedString(const ColumnPtr & c) : column(assert_cast(*c)) {} + + const parquet::FixedLenByteArray * getBatch(size_t offset, size_t count) + { + buf.resize(count); + for (size_t i = 0; i < count; ++i) + buf[i].ptr = reinterpret_cast(column.getChars().data() + (offset + i) * column.getN()); + return buf.data(); + } + + size_t fixedStringSize() { return column.getN(); } +}; + +struct ConverterFixedStringAsString +{ + using Statistics = StatisticsString; + + const ColumnFixedString & column; + PODArray buf; + + explicit ConverterFixedStringAsString(const ColumnPtr & c) : column(assert_cast(*c)) {} + + const parquet::ByteArray * getBatch(size_t offset, size_t count) + { + buf.resize(count); + for (size_t i = 0; i < count; ++i) + buf[i] = parquet::ByteArray(static_cast(column.getN()), reinterpret_cast(column.getChars().data() + (offset + i) * column.getN())); + return buf.data(); + } +}; + +template +struct ConverterNumberAsFixedString +{ + /// Calculate min/max statistics for little-endian fixed strings, not numbers, because parquet + /// doesn't know it's numbers. + using Statistics = StatisticsFixedString; + + const ColumnVector & column; + PODArray buf; + + explicit ConverterNumberAsFixedString(const ColumnPtr & c) : column(assert_cast &>(*c)) {} + + const parquet::FixedLenByteArray * getBatch(size_t offset, size_t count) + { + buf.resize(count); + for (size_t i = 0; i < count; ++i) + buf[i].ptr = reinterpret_cast(column.getData().data() + offset + i); + return buf.data(); + } + + size_t fixedStringSize() { return sizeof(T); } +}; + +/// Like ConverterNumberAsFixedString, but converts to big-endian. Because that's the byte order +/// Parquet uses for decimal types and literally nothing else, for some reason. +template +struct ConverterDecimal +{ + using Statistics = StatisticsFixedString; + + const ColumnDecimal & column; + PODArray data_buf; + PODArray ptr_buf; + + explicit ConverterDecimal(const ColumnPtr & c) : column(assert_cast &>(*c)) {} + + const parquet::FixedLenByteArray * getBatch(size_t offset, size_t count) + { + data_buf.resize(count * sizeof(T)); + ptr_buf.resize(count); + memcpy(data_buf.data(), reinterpret_cast(column.getData().data() + offset), count * sizeof(T)); + for (size_t i = 0; i < count; ++i) + { + std::reverse(data_buf.data() + i * sizeof(T), data_buf.data() + (i + 1) * sizeof(T)); + ptr_buf[i].ptr = data_buf.data() + i * sizeof(T); + } + return ptr_buf.data(); + } + + size_t fixedStringSize() { return sizeof(T); } +}; + +/// Returns either `source` or `scratch`. +PODArray & compress(PODArray & source, PODArray & scratch, CompressionMethod method) +{ + /// We could use wrapWriteBufferWithCompressionMethod() for everything, but I worry about the + /// overhead of creating a bunch of WriteBuffers on each page (thousands of values). + switch (method) + { + case CompressionMethod::None: + return source; + + case CompressionMethod::Lz4: + { + #pragma clang diagnostic push + #pragma clang diagnostic ignored "-Wold-style-cast" + + size_t max_dest_size = LZ4_COMPRESSBOUND(source.size()); + + #pragma clang diagnostic pop + + if (max_dest_size > std::numeric_limits::max()) + throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress column of size {}", formatReadableSizeWithBinarySuffix(source.size())); + + scratch.resize(max_dest_size); + + int compressed_size = LZ4_compress_default( + source.data(), + scratch.data(), + static_cast(source.size()), + static_cast(max_dest_size)); + + scratch.resize(static_cast(compressed_size)); + return scratch; + } + + default: + { + auto dest_buf = std::make_unique>>(scratch); + auto compressed_buf = wrapWriteBufferWithCompressionMethod( + std::move(dest_buf), + method, + /*level*/ 3, + source.size(), + /*existing_memory*/ source.data()); + chassert(compressed_buf->position() == source.data()); + chassert(compressed_buf->available() == source.size()); + compressed_buf->position() += source.size(); + compressed_buf->finalize(); + return scratch; + } + } +} + +void encodeRepDefLevelsRLE(const UInt8 * data, size_t size, UInt8 max_level, PODArray & out) +{ + using arrow::util::RleEncoder; + + chassert(max_level > 0); + size_t offset = out.size(); + size_t prefix_size = sizeof(Int32); + + int bit_width = bitScanReverse(max_level) + 1; + int max_rle_size = RleEncoder::MaxBufferSize(bit_width, static_cast(size)) + + RleEncoder::MinBufferSize(bit_width); + + out.resize(offset + prefix_size + max_rle_size); + + RleEncoder encoder(reinterpret_cast(out.data() + offset + prefix_size), max_rle_size, bit_width); + for (size_t i = 0; i < size; ++i) + encoder.Put(data[i]); + encoder.Flush(); + Int32 len = encoder.len(); + + memcpy(out.data() + offset, &len, prefix_size); + out.resize(offset + prefix_size + len); +} + +void addToEncodingsUsed(ColumnChunkWriteState & s, parq::Encoding::type e) +{ + if (!std::count(s.column_chunk.meta_data.encodings.begin(), s.column_chunk.meta_data.encodings.end(), e)) + s.column_chunk.meta_data.encodings.push_back(e); +} + +void writePage(const parq::PageHeader & header, const PODArray & compressed, ColumnChunkWriteState & s, WriteBuffer & out) +{ + size_t header_size = serializeThriftStruct(header, out); + out.write(compressed.data(), compressed.size()); + + /// Remember first data page and first dictionary page. + if (header.__isset.data_page_header && s.column_chunk.meta_data.data_page_offset == -1) + s.column_chunk.meta_data.__set_data_page_offset(s.column_chunk.meta_data.total_compressed_size); + if (header.__isset.dictionary_page_header && !s.column_chunk.meta_data.__isset.dictionary_page_offset) + s.column_chunk.meta_data.__set_dictionary_page_offset(s.column_chunk.meta_data.total_compressed_size); + + s.column_chunk.meta_data.total_uncompressed_size += header.uncompressed_page_size + header_size; + s.column_chunk.meta_data.total_compressed_size += header.compressed_page_size + header_size; +} + +template +void writeColumnImpl( + ColumnChunkWriteState & s, const WriteOptions & options, WriteBuffer & out, Converter && converter) +{ + size_t num_values = s.max_def > 0 ? s.def.size() : s.primitive_column->size(); + auto encoding = options.encoding; + + typename Converter::Statistics page_statistics; + typename Converter::Statistics total_statistics; + + /// We start with dictionary encoding, then switch to `encoding` (non-dictionary) if the + /// dictionary gets too big. That's how arrow does it too. + bool initially_used_dictionary = options.use_dictionary_encoding; + bool currently_using_dictionary = initially_used_dictionary; + + std::optional fixed_string_descr; + if constexpr (std::is_same::value) + { + /// This just communicates one number to MakeTypedEncoder(): the fixed string length. + fixed_string_descr.emplace(parquet::schema::PrimitiveNode::Make( + "", parquet::Repetition::REQUIRED, parquet::Type::FIXED_LEN_BYTE_ARRAY, + parquet::ConvertedType::NONE, static_cast(converter.fixedStringSize())), 0, 0); + + page_statistics.fixed_string_size = converter.fixedStringSize(); + } + + /// Could use an arena here (by passing a custom MemoryPool), to reuse memory across pages. + /// Alternatively, we could avoid using arrow's dictionary encoding code and leverage + /// ColumnLowCardinality instead. It would work basically the same way as what this function + /// currently does: add values to the ColumnRowCardinality (instead of `encoder`) in batches, + /// checking dictionary size after each batch; if it gets big, flush the dictionary and the + /// indices and switch to non-dictionary encoding. Feels like it could even be slightly less code. + auto encoder = parquet::MakeTypedEncoder( + // ignored if using dictionary + static_cast(encoding), + currently_using_dictionary, fixed_string_descr ? &*fixed_string_descr : nullptr); + + struct PageData + { + parq::PageHeader header; + PODArray data; + }; + std::vector dict_encoded_pages; // can't write them out until we have full dictionary + + /// Reused across pages to reduce number of allocations and improve locality. + PODArray encoded; + PODArray compressed_maybe; + + /// Start of current page. + size_t def_offset = 0; // index in def and rep + size_t data_offset = 0; // index in primitive_column + + auto flush_page = [&](size_t def_count, size_t data_count) + { + encoded.clear(); + + /// Concatenate encoded rep, def, and data. + + if (s.max_rep > 0) + encodeRepDefLevelsRLE(s.rep.data() + def_offset, def_count, s.max_rep, encoded); + if (s.max_def > 0) + encodeRepDefLevelsRLE(s.def.data() + def_offset, def_count, s.max_def, encoded); + + std::shared_ptr values = encoder->FlushValues(); // resets it for next page + + encoded.resize(encoded.size() + values->size()); + memcpy(encoded.data() + encoded.size() - values->size(), values->data(), values->size()); + values.reset(); + + if (encoded.size() > INT32_MAX) + throw Exception(ErrorCodes::CANNOT_COMPRESS, "Uncompressed page is too big: {}", encoded.size()); + + size_t uncompressed_size = encoded.size(); + auto & compressed = compress(encoded, compressed_maybe, s.compression); + + if (compressed.size() > INT32_MAX) + throw Exception(ErrorCodes::CANNOT_COMPRESS, "Compressed page is too big: {}", compressed.size()); + + parq::PageHeader header; + header.__set_type(parq::PageType::DATA_PAGE); + header.__set_uncompressed_page_size(static_cast(uncompressed_size)); + header.__set_compressed_page_size(static_cast(compressed.size())); + header.__isset.data_page_header = true; + auto & d = header.data_page_header; + d.__set_num_values(static_cast(def_count)); + d.__set_encoding(currently_using_dictionary ? parq::Encoding::RLE_DICTIONARY : encoding); + d.__set_definition_level_encoding(parq::Encoding::RLE); + d.__set_repetition_level_encoding(parq::Encoding::RLE); + /// We could also put checksum in `header.crc`, but apparently no one uses it: + /// https://issues.apache.org/jira/browse/PARQUET-594 + + if (options.write_page_statistics) + { + d.__set_statistics(page_statistics.get(options)); + + if (s.max_def == 1 && s.max_rep == 0) + d.statistics.__set_null_count(static_cast(def_count - data_count)); + } + + total_statistics.merge(page_statistics); + page_statistics.clear(); + + if (currently_using_dictionary) + { + dict_encoded_pages.push_back({.header = std::move(header)}); + std::swap(dict_encoded_pages.back().data, compressed); + } + else + { + writePage(header, compressed, s, out); + } + + def_offset += def_count; + data_offset += data_count; + }; + + auto flush_dict = [&] -> bool + { + auto * dict_encoder = dynamic_cast *>(encoder.get()); + int dict_size = dict_encoder->dict_encoded_size(); + + encoded.resize(static_cast(dict_size)); + dict_encoder->WriteDict(reinterpret_cast(encoded.data())); + + auto & compressed = compress(encoded, compressed_maybe, s.compression); + + if (compressed.size() > INT32_MAX) + throw Exception(ErrorCodes::CANNOT_COMPRESS, "Compressed dictionary page is too big: {}", compressed.size()); + + parq::PageHeader header; + header.__set_type(parq::PageType::DICTIONARY_PAGE); + header.__set_uncompressed_page_size(dict_size); + header.__set_compressed_page_size(static_cast(compressed.size())); + header.__isset.dictionary_page_header = true; + header.dictionary_page_header.__set_num_values(dict_encoder->num_entries()); + header.dictionary_page_header.__set_encoding(parq::Encoding::PLAIN); + + writePage(header, compressed, s, out); + + for (auto & p : dict_encoded_pages) + writePage(p.header, p.data, s, out); + + dict_encoded_pages.clear(); + encoder.reset(); + + return true; + }; + + auto is_dict_too_big = [&] { + auto * dict_encoder = dynamic_cast *>(encoder.get()); + int dict_size = dict_encoder->dict_encoded_size(); + return static_cast(dict_size) >= options.dictionary_size_limit; + }; + + while (def_offset < num_values) + { + /// Pick enough data for a page. + size_t next_def_offset = def_offset; + size_t next_data_offset = data_offset; + while (true) + { + /// Bite off a batch of defs and corresponding data values. + size_t def_count = std::min(options.write_batch_size, num_values - next_def_offset); + size_t data_count = 0; + if (s.max_def == 0) + data_count = def_count; + else + for (size_t i = 0; i < def_count; ++i) + data_count += s.def[next_def_offset + i] == s.max_def; + + /// Encode the data (but not the levels yet), so that we can estimate its encoded size. + const typename ParquetDType::c_type * converted = converter.getBatch(next_data_offset, data_count); + + if (options.write_page_statistics || options.write_column_chunk_statistics) + for (size_t i = 0; i < data_count; ++i) + page_statistics.add(converted[i]); + + encoder->Put(converted, static_cast(data_count)); + + next_def_offset += def_count; + next_data_offset += data_count; + + if (currently_using_dictionary && is_dict_too_big()) + { + /// Fallback to non-dictionary encoding. + flush_page(next_def_offset - def_offset, next_data_offset - data_offset); + flush_dict(); + + currently_using_dictionary = false; + encoder = parquet::MakeTypedEncoder( + static_cast(encoding)); + break; + } + + if (next_def_offset == num_values || + static_cast(encoder->EstimatedDataEncodedSize()) >= options.data_page_size) + { + flush_page(next_def_offset - def_offset, next_data_offset - data_offset); + break; + } + } + } + + if (currently_using_dictionary) + flush_dict(); + + chassert(data_offset == s.primitive_column->size()); + + if (options.write_column_chunk_statistics) + { + s.column_chunk.meta_data.__set_statistics(total_statistics.get(options)); + + if (s.max_def == 1 && s.max_rep == 0) + s.column_chunk.meta_data.statistics.__set_null_count(static_cast(def_offset - data_offset)); + } + + /// Report which encodings we've used. + if (s.max_rep > 0 || s.max_def > 0) + addToEncodingsUsed(s, parq::Encoding::RLE); // levels + if (!currently_using_dictionary) + addToEncodingsUsed(s, encoding); // non-dictionary encoding + if (initially_used_dictionary) + { + addToEncodingsUsed(s, parq::Encoding::PLAIN); // dictionary itself + addToEncodingsUsed(s, parq::Encoding::RLE_DICTIONARY); // ids + } +} + +} + +void writeColumnChunkBody(ColumnChunkWriteState & s, const WriteOptions & options, WriteBuffer & out) +{ + s.column_chunk.meta_data.__set_num_values(s.max_def > 0 ? s.def.size() : s.primitive_column->size()); + + /// We'll be updating these as we go. + s.column_chunk.meta_data.__set_encodings({}); + s.column_chunk.meta_data.__set_total_compressed_size(0); + s.column_chunk.meta_data.__set_total_uncompressed_size(0); + s.column_chunk.meta_data.__set_data_page_offset(-1); + + s.primitive_column = s.primitive_column->convertToFullColumnIfLowCardinality(); + + switch (s.primitive_column->getDataType()) + { + /// Numeric conversion to Int32 or Int64. + #define N(source_type, parquet_dtype) \ + writeColumnImpl(s, options, out, \ + ConverterNumeric, parquet::parquet_dtype::c_type>( \ + s.primitive_column)) + + case TypeIndex::UInt8 : N(UInt8 , Int32Type); break; + case TypeIndex::UInt16 : N(UInt16, Int32Type); break; + case TypeIndex::UInt32 : N(UInt32, Int32Type); break; + case TypeIndex::UInt64 : N(UInt64, Int64Type); break; + case TypeIndex::Int8 : N(Int8 , Int32Type); break; + case TypeIndex::Int16 : N(Int16 , Int32Type); break; + case TypeIndex::Int32 : N(Int32 , Int32Type); break; + case TypeIndex::Int64 : N(Int64 , Int64Type); break; + + case TypeIndex::Enum8: N(Int8 , Int32Type); break; + case TypeIndex::Enum16: N(Int16 , Int32Type); break; + case TypeIndex::Date: N(UInt16, Int32Type); break; + case TypeIndex::Date32: N(Int32 , Int32Type); break; + case TypeIndex::DateTime: N(UInt32, Int32Type); break; + + #undef N + + case TypeIndex::Float32: + writeColumnImpl( + s, options, out, ConverterNumeric, Float32, Float32>( + s.primitive_column)); + break; + + case TypeIndex::Float64: + writeColumnImpl( + s, options, out, ConverterNumeric, Float64, Float64>( + s.primitive_column)); + break; + + case TypeIndex::DateTime64: + writeColumnImpl( + s, options, out, ConverterNumeric, Int64, Int64>( + s.primitive_column)); + break; + + case TypeIndex::IPv4: + writeColumnImpl( + s, options, out, ConverterNumeric, Int32, UInt32>( + s.primitive_column)); + break; + + case TypeIndex::String: + writeColumnImpl( + s, options, out, ConverterString(s.primitive_column)); + break; + + case TypeIndex::FixedString: + if (options.output_fixed_string_as_fixed_byte_array) + writeColumnImpl( + s, options, out, ConverterFixedString(s.primitive_column)); + else + writeColumnImpl( + s, options, out, ConverterFixedStringAsString(s.primitive_column)); + break; + + #define F(source_type) \ + writeColumnImpl( \ + s, options, out, ConverterNumberAsFixedString(s.primitive_column)) + case TypeIndex::UInt128: F(UInt128); break; + case TypeIndex::UInt256: F(UInt256); break; + case TypeIndex::Int128: F(Int128); break; + case TypeIndex::Int256: F(Int256); break; + case TypeIndex::IPv6: F(IPv6); break; + #undef F + + #define D(source_type) \ + writeColumnImpl( \ + s, options, out, ConverterDecimal(s.primitive_column)) + case TypeIndex::Decimal32: D(Decimal32); break; + case TypeIndex::Decimal64: D(Decimal64); break; + case TypeIndex::Decimal128: D(Decimal128); break; + case TypeIndex::Decimal256: D(Decimal256); break; + #undef D + + default: + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected column type: {}", s.primitive_column->getFamilyName()); + } + + /// Free some memory. + s.primitive_column = {}; + s.def = {}; + s.rep = {}; +} + +void writeFileHeader(WriteBuffer & out) +{ + /// Write the magic bytes. We're a wizard now. + out.write("PAR1", 4); +} + +parq::ColumnChunk finalizeColumnChunkAndWriteFooter( + size_t offset_in_file, ColumnChunkWriteState s, const WriteOptions &, WriteBuffer & out) +{ + if (s.column_chunk.meta_data.data_page_offset != -1) + s.column_chunk.meta_data.data_page_offset += offset_in_file; + if (s.column_chunk.meta_data.__isset.dictionary_page_offset) + s.column_chunk.meta_data.dictionary_page_offset += offset_in_file; + s.column_chunk.file_offset = offset_in_file + s.column_chunk.meta_data.total_compressed_size; + + serializeThriftStruct(s.column_chunk, out); + + return std::move(s.column_chunk); +} + +parq::RowGroup makeRowGroup(std::vector column_chunks, size_t num_rows) +{ + parq::RowGroup r; + r.__set_num_rows(num_rows); + r.__set_columns(std::move(column_chunks)); + r.__set_total_compressed_size(0); + for (auto & c : r.columns) + { + r.total_byte_size += c.meta_data.total_uncompressed_size; + r.total_compressed_size += c.meta_data.total_compressed_size; + } + if (!r.columns.empty()) + { + auto & m = r.columns[0].meta_data; + r.__set_file_offset(m.__isset.dictionary_page_offset ? m.dictionary_page_offset : m.data_page_offset); + } + return r; +} + +void writeFileFooter(std::vector row_groups, SchemaElements schema, const WriteOptions & options, WriteBuffer & out) +{ + parq::FileMetaData meta; + meta.version = 2; + meta.schema = std::move(schema); + meta.row_groups = std::move(row_groups); + for (auto & r : meta.row_groups) + meta.num_rows += r.num_rows; + meta.__set_created_by(VERSION_NAME " " VERSION_DESCRIBE); + + if (options.write_page_statistics || options.write_column_chunk_statistics) + { + meta.__set_column_orders({}); + for (auto & s : meta.schema) + if (!s.__isset.num_children) + meta.column_orders.emplace_back(); + for (auto & c : meta.column_orders) + c.__set_TYPE_ORDER({}); + } + + size_t footer_size = serializeThriftStruct(meta, out); + + if (footer_size > INT32_MAX) + throw Exception(ErrorCodes::LIMIT_EXCEEDED, "Parquet file metadata too big: {}", footer_size); + + writeIntBinary(static_cast(footer_size), out); + out.write("PAR1", 4); +} + +} diff --git a/src/Processors/Formats/Impl/Parquet/Write.h b/src/Processors/Formats/Impl/Parquet/Write.h new file mode 100644 index 00000000000..333a32e191f --- /dev/null +++ b/src/Processors/Formats/Impl/Parquet/Write.h @@ -0,0 +1,135 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace DB::Parquet +{ + +/// A good resource for learning how Parquet format works is +/// contrib/arrow/cpp/src/parquet/parquet.thrift + +struct WriteOptions +{ + bool output_string_as_string = false; + bool output_fixed_string_as_fixed_byte_array = true; + + CompressionMethod compression = CompressionMethod::Lz4; + + size_t data_page_size = 1024 * 1024; + size_t write_batch_size = 1024; + + bool use_dictionary_encoding = true; + size_t dictionary_size_limit = 1024 * 1024; + /// If using dictionary, this encoding is used as a fallback when dictionary gets too big. + /// Otherwise, this is used for everything. + parquet::format::Encoding::type encoding = parquet::format::Encoding::PLAIN; + + bool write_page_statistics = true; + bool write_column_chunk_statistics = true; + size_t max_statistics_size = 4096; +}; + +/// Information about a primitive column (leaf of the schema tree) to write to Parquet file. +struct ColumnChunkWriteState +{ + /// After writeColumnChunkBody(), offsets in this struct are relative to the start of column chunk. + /// Then finalizeColumnChunkAndWriteFooter() fixes them up before writing to file. + parquet::format::ColumnChunk column_chunk; + + ColumnPtr primitive_column; + CompressionMethod compression; // must match what's inside column_chunk + + /// Repetition and definition levels. Produced by prepareColumnForWrite(). + /// def is empty iff max_def == 0, which means no arrays or nullables. + /// rep is empty iff max_rep == 0, which means no arrays. + PaddedPODArray def; // definition levels + PaddedPODArray rep; // repetition levels + /// Max possible levels, according to schema. Actual max in def/rep may be smaller. + UInt8 max_def = 0; + UInt8 max_rep = 0; + + ColumnChunkWriteState() = default; + /// Prevent accidental copying. + ColumnChunkWriteState(ColumnChunkWriteState &&) = default; + ColumnChunkWriteState & operator=(ColumnChunkWriteState &&) = default; + + /// Estimated memory usage. + size_t allocatedBytes() const + { + size_t r = def.allocated_bytes() + rep.allocated_bytes(); + if (primitive_column) + r += primitive_column->allocatedBytes(); + return r; + } +}; + +using SchemaElements = std::vector; +using ColumnChunkWriteStates = std::vector; + +/// Parquet file consists of row groups, which consist of column chunks. +/// +/// Column chunks can be encoded mostly independently of each other, in parallel. +/// But there are two small complications: +/// 1. One ClickHouse column can translate to multiple leaf columns in parquet. +/// E.g. tuples and maps. +/// If all primitive columns are in one big tuple, we'd like to encode them in parallel too, +/// even though they're one top-level ClickHouse column. +/// 2. At the end of each encoded column chunk there's a footer (struct ColumnMetaData) that +/// contains some absolute offsets in the file. We can't encode it until we know the exact +/// position in the file where the column chunk will go. So these footers have to be serialized +/// sequentially, after we know sizes of all previous column chunks. +/// +/// With that in mind, here's how to write a parquet file: +/// +/// (1) writeFileHeader() +/// (2) For each row group: +/// | (3) For each ClickHouse column: +/// | (4) Call prepareColumnForWrite(). +/// | It'll produce one or more ColumnChunkWriteStates, corresponding to primitive columns that +/// | we need to write. +/// | It'll also produce SchemaElements as a byproduct, describing the logical types and +/// | groupings of the physical columns (e.g. tuples, arrays, maps). +/// | (5) For each ColumnChunkWriteState: +/// | (6) Call writeColumnChunkBody() to write the actual data to the given WriteBuffer. +/// | (7) Call finalizeColumnChunkAndWriteFooter() to write the footer of the column chunk. +/// | (8) Call makeRowGroup() using the ColumnChunk metadata structs from previous step. +/// (9) Call writeFileFooter() using the row groups from previous step and SchemaElements from +/// convertSchema(). +/// +/// Steps (4) and (6) can be parallelized, both within and across row groups. + +/// Parquet schema is a tree of SchemaElements, flattened into a list in depth-first order. +/// Leaf nodes correspond to physical columns of primitive types. Inner nodes describe logical +/// groupings of those columns, e.g. tuples or structs. +SchemaElements convertSchema(const Block & sample, const WriteOptions & options); + +void prepareColumnForWrite( + ColumnPtr column, DataTypePtr type, const std::string & name, const WriteOptions & options, + ColumnChunkWriteStates * out_columns_to_write, SchemaElements * out_schema = nullptr); + +void writeFileHeader(WriteBuffer & out); + +/// Encodes a column chunk, without the footer. +/// The ColumnChunkWriteState-s should then passed to finalizeColumnChunkAndWriteFooter(). +void writeColumnChunkBody(ColumnChunkWriteState & s, const WriteOptions & options, WriteBuffer & out); + +/// Unlike most of the column chunk data, the footer (`ColumnMetaData`) needs to know its absolute +/// offset in the file. So we encode it separately, after all previous row groups and column chunks +/// have been encoded. +/// (If you're wondering if the 8-byte offset values can be patched inside the encoded blob - no, +/// they're varint-encoded and can't be padded to a fixed length.) +/// `offset_in_file` is the absolute position in the file where the writeColumnChunkBody()'s output +/// starts. +/// Returns a ColumnChunk to add to the RowGroup. +parquet::format::ColumnChunk finalizeColumnChunkAndWriteFooter( + size_t offset_in_file, ColumnChunkWriteState s, const WriteOptions & options, WriteBuffer & out); + +parquet::format::RowGroup makeRowGroup(std::vector column_chunks, size_t num_rows); + +void writeFileFooter(std::vector row_groups, SchemaElements schema, const WriteOptions & options, WriteBuffer & out); + +} diff --git a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp index 91840cd2c50..9a2d9072860 100644 --- a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp @@ -3,14 +3,23 @@ #if USE_PARQUET #include +#include #include #include "ArrowBufferedStreams.h" #include "CHColumnToArrowColumn.h" +namespace CurrentMetrics +{ + extern const Metric ParquetEncoderThreads; + extern const Metric ParquetEncoderThreadsActive; +} + namespace DB { +using namespace Parquet; + namespace ErrorCodes { extern const int UNKNOWN_EXCEPTION; @@ -67,11 +76,219 @@ namespace ParquetBlockOutputFormat::ParquetBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_) : IOutputFormat(header_, out_), format_settings{format_settings_} { + if (format_settings.parquet.use_custom_encoder) + { + if (format_settings.parquet.parallel_encoding && format_settings.max_threads > 1) + pool = std::make_unique( + CurrentMetrics::ParquetEncoderThreads, CurrentMetrics::ParquetEncoderThreadsActive, + format_settings.max_threads); + + using C = FormatSettings::ParquetCompression; + switch (format_settings.parquet.output_compression_method) + { + case C::NONE: options.compression = CompressionMethod::None; break; + case C::SNAPPY: options.compression = CompressionMethod::Snappy; break; + case C::ZSTD: options.compression = CompressionMethod::Zstd; break; + case C::LZ4: options.compression = CompressionMethod::Lz4; break; + case C::GZIP: options.compression = CompressionMethod::Gzip; break; + case C::BROTLI: options.compression = CompressionMethod::Brotli; break; + } + options.output_string_as_string = format_settings.parquet.output_string_as_string; + options.output_fixed_string_as_fixed_byte_array = format_settings.parquet.output_fixed_string_as_fixed_byte_array; + options.data_page_size = format_settings.parquet.data_page_size; + options.write_batch_size = format_settings.parquet.write_batch_size; + + schema = convertSchema(header_, options); + } } -void ParquetBlockOutputFormat::consumeStaged() +ParquetBlockOutputFormat::~ParquetBlockOutputFormat() { - const size_t columns_num = staging_chunks.at(0).getNumColumns(); + if (pool) + { + is_stopped = true; + pool->wait(); + } +} + +void ParquetBlockOutputFormat::consume(Chunk chunk) +{ + /// Poll background tasks. + if (pool) + { + std::unique_lock lock(mutex); + while (true) + { + /// If some row groups are ready to be written to the file, write them. + reapCompletedRowGroups(lock); + + if (background_exception) + std::rethrow_exception(background_exception); + + if (is_stopped) + return; + + /// If there's too much work in flight, wait for some of it to complete. + if (row_groups.size() < 2) + break; + if (bytes_in_flight <= format_settings.parquet.row_group_bytes * 4 && + task_queue.size() <= format_settings.max_threads * 4) + break; + + condvar.wait(lock); + } + } + + /// Do something like SquashingTransform to produce big enough row groups. + /// Because the real SquashingTransform is only used for INSERT, not for SELECT ... INTO OUTFILE. + /// The latter doesn't even have a pipeline where a transform could be inserted, so it's more + /// convenient to do the squashing here. It's also parallelized here. + + if (chunk.getNumRows() != 0) + { + staging_rows += chunk.getNumRows(); + staging_bytes += chunk.bytes(); + staging_chunks.push_back(std::move(chunk)); + } + + const size_t target_rows = std::max(static_cast(1), format_settings.parquet.row_group_rows); + + if (staging_rows < target_rows && + staging_bytes < format_settings.parquet.row_group_bytes) + return; + + /// In the rare case that more than `row_group_rows` rows arrived in one chunk, split the + /// staging chunk into multiple row groups. + if (staging_rows >= target_rows * 2) + { + /// Increase row group size slightly (by < 2x) to avoid a small row group at the end. + size_t num_row_groups = std::max(static_cast(1), staging_rows / target_rows); + size_t row_group_size = (staging_rows - 1) / num_row_groups + 1; // round up + + Chunk concatenated = std::move(staging_chunks[0]); + for (size_t i = 1; i < staging_chunks.size(); ++i) + concatenated.append(staging_chunks[i]); + staging_chunks.clear(); + + for (size_t offset = 0; offset < staging_rows; offset += row_group_size) + { + size_t count = std::min(row_group_size, staging_rows - offset); + MutableColumns columns = concatenated.cloneEmptyColumns(); + for (size_t i = 0; i < columns.size(); ++i) + columns[i]->insertRangeFrom(*concatenated.getColumns()[i], offset, count); + + Chunks piece; + piece.emplace_back(std::move(columns), count, concatenated.getChunkInfo()); + writeRowGroup(std::move(piece)); + } + } + else + { + writeRowGroup(std::move(staging_chunks)); + } + + staging_chunks.clear(); + staging_rows = 0; + staging_bytes = 0; +} + +void ParquetBlockOutputFormat::finalizeImpl() +{ + if (!staging_chunks.empty()) + writeRowGroup(std::move(staging_chunks)); + + if (format_settings.parquet.use_custom_encoder) + { + if (pool) + { + std::unique_lock lock(mutex); + + /// Wait for background work to complete. + while (true) + { + reapCompletedRowGroups(lock); + + if (background_exception) + std::rethrow_exception(background_exception); + + if (is_stopped) + return; + + if (row_groups.empty()) + break; + + condvar.wait(lock); + } + } + + if (row_groups_complete.empty()) + writeFileHeader(out); + writeFileFooter(std::move(row_groups_complete), schema, options, out); + } + else + { + if (!file_writer) + { + Block header = materializeBlock(getPort(PortKind::Main).getHeader()); + std::vector chunks; + chunks.push_back(Chunk(header.getColumns(), 0)); + writeRowGroup(std::move(chunks)); + } + + if (file_writer) + { + auto status = file_writer->Close(); + if (!status.ok()) + throw Exception(ErrorCodes::UNKNOWN_EXCEPTION, "Error while closing a table: {}", status.ToString()); + } + } +} + +void ParquetBlockOutputFormat::resetFormatterImpl() +{ + if (pool) + { + is_stopped = true; + pool->wait(); + is_stopped = false; + } + + background_exception = nullptr; + threads_running = 0; + task_queue.clear(); + row_groups.clear(); + file_writer.reset(); + row_groups_complete.clear(); + staging_chunks.clear(); + staging_rows = 0; + staging_bytes = 0; +} + +void ParquetBlockOutputFormat::onCancel() +{ + is_stopped = true; +} + +void ParquetBlockOutputFormat::writeRowGroup(std::vector chunks) +{ + if (pool) + writeRowGroupInParallel(std::move(chunks)); + else if (!format_settings.parquet.use_custom_encoder) + writeUsingArrow(std::move(chunks)); + else + { + Chunk concatenated = std::move(chunks[0]); + for (size_t i = 1; i < chunks.size(); ++i) + concatenated.append(chunks[i]); + chunks.clear(); + + writeRowGroupInOneThread(std::move(concatenated)); + } +} + +void ParquetBlockOutputFormat::writeUsingArrow(std::vector chunks) +{ + const size_t columns_num = chunks.at(0).getNumColumns(); std::shared_ptr arrow_table; if (!ch_column_to_arrow_column) @@ -85,7 +302,7 @@ void ParquetBlockOutputFormat::consumeStaged() format_settings.parquet.output_fixed_string_as_fixed_byte_array); } - ch_column_to_arrow_column->chChunkToArrowTable(arrow_table, staging_chunks, columns_num); + ch_column_to_arrow_column->chChunkToArrowTable(arrow_table, chunks, columns_num); if (!file_writer) { @@ -112,64 +329,228 @@ void ParquetBlockOutputFormat::consumeStaged() file_writer = std::move(result.ValueOrDie()); } - // TODO: calculate row_group_size depending on a number of rows and table size - - // allow slightly bigger than row_group_size to avoid a very small tail row group - auto status = file_writer->WriteTable(*arrow_table, std::max(format_settings.parquet.row_group_rows, staging_rows)); + auto status = file_writer->WriteTable(*arrow_table, INT64_MAX); if (!status.ok()) throw Exception(ErrorCodes::UNKNOWN_EXCEPTION, "Error while writing a table: {}", status.ToString()); } -void ParquetBlockOutputFormat::consume(Chunk chunk) +void ParquetBlockOutputFormat::writeRowGroupInOneThread(Chunk chunk) { - /// Do something like SquashingTransform to produce big enough row groups. - /// Because the real SquashingTransform is only used for INSERT, not for SELECT ... INTO OUTFILE. - /// The latter doesn't even have a pipeline where a transform could be inserted, so it's more - /// convenient to do the squashing here. - staging_rows += chunk.getNumRows(); - staging_bytes += chunk.bytes(); - staging_chunks.push_back(std::move(chunk)); - chassert(staging_chunks.back().getNumColumns() == staging_chunks.front().getNumColumns()); - if (staging_rows < format_settings.parquet.row_group_rows && - staging_bytes < format_settings.parquet.row_group_bytes) - { + if (chunk.getNumRows() == 0) return; - } - else + + const Block & header = getPort(PortKind::Main).getHeader(); + Parquet::ColumnChunkWriteStates columns_to_write; + chassert(header.columns() == chunk.getNumColumns()); + for (size_t i = 0; i < header.columns(); ++i) + prepareColumnForWrite( + chunk.getColumns()[i], header.getByPosition(i).type, header.getByPosition(i).name, + options, &columns_to_write); + + if (row_groups_complete.empty()) + writeFileHeader(out); + + std::vector column_chunks; + for (auto & s : columns_to_write) { - consumeStaged(); - staging_chunks.clear(); - staging_rows = 0; - staging_bytes = 0; + size_t offset = out.count(); + writeColumnChunkBody(s, options, out); + auto c = finalizeColumnChunkAndWriteFooter(offset, std::move(s), options, out); + column_chunks.push_back(std::move(c)); + } + + auto r = makeRowGroup(std::move(column_chunks), chunk.getNumRows()); + row_groups_complete.push_back(std::move(r)); +} + +void ParquetBlockOutputFormat::writeRowGroupInParallel(std::vector chunks) +{ + std::unique_lock lock(mutex); + + const Block & header = getPort(PortKind::Main).getHeader(); + + RowGroupState & r = row_groups.emplace_back(); + r.column_chunks.resize(header.columns()); + r.tasks_in_flight = r.column_chunks.size(); + + std::vector columnses; + for (auto & chunk : chunks) + { + chassert(header.columns() == chunk.getNumColumns()); + r.num_rows += chunk.getNumRows(); + columnses.push_back(chunk.detachColumns()); + } + + for (size_t i = 0; i < header.columns(); ++i) + { + Task & t = task_queue.emplace_back(&r, i, this); + t.column_type = header.getByPosition(i).type; + t.column_name = header.getByPosition(i).name; + + /// Defer concatenating the columns to the threads. + size_t bytes = 0; + for (size_t j = 0; j < chunks.size(); ++j) + { + auto & col = columnses[j][i]; + bytes += col->allocatedBytes(); + t.column_pieces.push_back(std::move(col)); + } + t.mem.set(bytes); + } + + startMoreThreadsIfNeeded(lock); +} + +void ParquetBlockOutputFormat::reapCompletedRowGroups(std::unique_lock & lock) +{ + while (!row_groups.empty() && row_groups.front().tasks_in_flight == 0 && !is_stopped) + { + RowGroupState & r = row_groups.front(); + + /// Write to the file. + + lock.unlock(); + + if (row_groups_complete.empty()) + writeFileHeader(out); + + std::vector metadata; + for (auto & cols : r.column_chunks) + { + for (ColumnChunk & col : cols) + { + size_t offset = out.count(); + + out.write(col.serialized.data(), col.serialized.size()); + auto m = finalizeColumnChunkAndWriteFooter(offset, std::move(col.state), options, out); + + metadata.push_back(std::move(m)); + } + } + + row_groups_complete.push_back(makeRowGroup(std::move(metadata), r.num_rows)); + + lock.lock(); + + row_groups.pop_front(); } } -void ParquetBlockOutputFormat::finalizeImpl() +void ParquetBlockOutputFormat::startMoreThreadsIfNeeded(const std::unique_lock &) { - if (!file_writer && staging_chunks.empty()) + /// Speculate that all current are already working on tasks. + size_t to_add = std::min(task_queue.size(), format_settings.max_threads - threads_running); + for (size_t i = 0; i < to_add; ++i) { - Block header = materializeBlock(getPort(PortKind::Main).getHeader()); + auto job = [this, thread_group = CurrentThread::getGroup()]() + { + if (thread_group) + CurrentThread::attachToGroupIfDetached(thread_group); + SCOPE_EXIT_SAFE(if (thread_group) CurrentThread::detachFromGroupIfNotDetached();); - consume(Chunk(header.getColumns(), 0)); // this will make staging_chunks non-empty + try + { + setThreadName("ParquetEncoder"); + + threadFunction(); + } + catch (...) + { + std::lock_guard lock(mutex); + background_exception = std::current_exception(); + condvar.notify_all(); + --threads_running; + } + }; + + if (threads_running == 0) + { + /// First thread. We need it to succeed; otherwise we may get stuck. + pool->scheduleOrThrowOnError(job); + ++threads_running; + } + else + { + /// More threads. This may be called from inside the thread pool, so avoid waiting; + /// otherwise it may deadlock. + if (!pool->trySchedule(job)) + break; + } } - - if (!staging_chunks.empty()) - { - consumeStaged(); - staging_chunks.clear(); - staging_rows = 0; - staging_bytes = 0; - } - - auto status = file_writer->Close(); - if (!status.ok()) - throw Exception(ErrorCodes::UNKNOWN_EXCEPTION, "Error while closing a table: {}", status.ToString()); } -void ParquetBlockOutputFormat::resetFormatterImpl() +void ParquetBlockOutputFormat::threadFunction() { - file_writer.reset(); + std::unique_lock lock(mutex); + + while (true) + { + if (task_queue.empty() || is_stopped) + { + /// The check and the decrement need to be in the same critical section, to make sure + /// we never get stuck with tasks but no threads. + --threads_running; + return; + } + + auto task = std::move(task_queue.front()); + task_queue.pop_front(); + + if (task.column_type) + { + lock.unlock(); + + IColumn::MutablePtr concatenated = IColumn::mutate(std::move(task.column_pieces[0])); + for (size_t i = 1; i < task.column_pieces.size(); ++i) + { + auto & c = task.column_pieces[i]; + concatenated->insertRangeFrom(*c, 0, c->size()); + c.reset(); + } + task.column_pieces.clear(); + + std::vector subcolumns; + prepareColumnForWrite( + std::move(concatenated), task.column_type, task.column_name, options, &subcolumns); + + lock.lock(); + + for (size_t i = 0; i < subcolumns.size(); ++i) + { + task.row_group->column_chunks[task.column_idx].emplace_back(this); + task.row_group->tasks_in_flight += 1; + + auto & t = task_queue.emplace_back(task.row_group, task.column_idx, this); + t.subcolumn_idx = i; + t.state = std::move(subcolumns[i]); + t.mem.set(t.state.allocatedBytes()); + } + + startMoreThreadsIfNeeded(lock); + } + else + { + lock.unlock(); + + PODArray serialized; + { + WriteBufferFromVector buf(serialized); + writeColumnChunkBody(task.state, options, buf); + } + + lock.lock(); + + auto & c = task.row_group->column_chunks[task.column_idx][task.subcolumn_idx]; + c.state = std::move(task.state); + c.serialized = std::move(serialized); + c.mem.set(c.serialized.size() + c.state.allocatedBytes()); + } + + --task.row_group->tasks_in_flight; + + condvar.notify_all(); + } } void registerOutputFormatParquet(FormatFactory & factory) diff --git a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.h b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.h index 482c778bc52..4c73de007fe 100644 --- a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.h +++ b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.h @@ -2,8 +2,11 @@ #include "config.h" #if USE_PARQUET -# include -# include + +#include +#include +#include +#include namespace arrow { @@ -28,25 +31,128 @@ class ParquetBlockOutputFormat : public IOutputFormat { public: ParquetBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_); + ~ParquetBlockOutputFormat() override; String getName() const override { return "ParquetBlockOutputFormat"; } String getContentType() const override { return "application/octet-stream"; } private: - void consumeStaged(); + struct MemoryToken + { + ParquetBlockOutputFormat * parent; + size_t bytes = 0; + + explicit MemoryToken(ParquetBlockOutputFormat * p, size_t b = 0) : parent(p) + { + set(b); + } + + MemoryToken(MemoryToken && t) + : parent(std::exchange(t.parent, nullptr)), bytes(std::exchange(t.bytes, 0)) {} + + MemoryToken & operator=(MemoryToken && t) + { + parent = std::exchange(t.parent, nullptr); + bytes = std::exchange(t.bytes, 0); + return *this; + } + + ~MemoryToken() + { + set(0); + } + + void set(size_t new_size) + { + if (new_size == bytes) + return; + parent->bytes_in_flight += new_size - bytes; // overflow is fine + bytes = new_size; + } + }; + + struct ColumnChunk + { + Parquet::ColumnChunkWriteState state; + PODArray serialized; + + MemoryToken mem; + + ColumnChunk(ParquetBlockOutputFormat * p) : mem(p) {} + }; + + struct RowGroupState + { + size_t tasks_in_flight = 0; + std::vector> column_chunks; + size_t num_rows = 0; + }; + + struct Task + { + RowGroupState * row_group; + size_t column_idx; + size_t subcolumn_idx = 0; + + MemoryToken mem; + + /// If not null, we need to call prepareColumnForWrite(). + /// Otherwise we need to call writeColumnChunkBody(). + DataTypePtr column_type; + std::string column_name; + std::vector column_pieces; + + Parquet::ColumnChunkWriteState state; + + Task(RowGroupState * rg, size_t ci, ParquetBlockOutputFormat * p) + : row_group(rg), column_idx(ci), mem(p) {} + }; + void consume(Chunk) override; void finalizeImpl() override; void resetFormatterImpl() override; + void onCancel() override; + void writeRowGroup(std::vector chunks); + void writeUsingArrow(std::vector chunks); + void writeRowGroupInOneThread(Chunk chunk); + void writeRowGroupInParallel(std::vector chunks); + + void threadFunction(); + void startMoreThreadsIfNeeded(const std::unique_lock & lock); + + /// Called in single-threaded fashion. Writes to the file. + void reapCompletedRowGroups(std::unique_lock & lock); + + const FormatSettings format_settings; + + /// Chunks to squash together to form a row group. std::vector staging_chunks; size_t staging_rows = 0; size_t staging_bytes = 0; - const FormatSettings format_settings; - std::unique_ptr file_writer; std::unique_ptr ch_column_to_arrow_column; + + Parquet::WriteOptions options; + Parquet::SchemaElements schema; + std::vector row_groups_complete; + + + std::mutex mutex; + std::condition_variable condvar; // wakes up consume() + std::unique_ptr pool; + + std::atomic_bool is_stopped{false}; + std::exception_ptr background_exception = nullptr; + + /// Invariant: if there's at least one task then there's at least one thread. + size_t threads_running = 0; + std::atomic bytes_in_flight{0}; + + std::deque task_queue; + std::deque row_groups; }; } diff --git a/tests/queries/0_stateless/02735_parquet_encoder.reference b/tests/queries/0_stateless/02735_parquet_encoder.reference new file mode 100644 index 00000000000..c7d79392d85 --- /dev/null +++ b/tests/queries/0_stateless/02735_parquet_encoder.reference @@ -0,0 +1,55 @@ +u8 Nullable(UInt8) +u16 Nullable(UInt16) +u32 Nullable(UInt32) +u64 Nullable(UInt64) +i8 Nullable(Int8) +i16 Nullable(Int16) +i32 Nullable(Int32) +i64 Nullable(Int64) +date Nullable(UInt16) +date32 Nullable(Date32) +datetime Nullable(UInt32) +datetime64 Nullable(DateTime64(3, \'UTC\')) +enum8 Nullable(Int8) +enum16 Nullable(Int16) +float32 Nullable(Float32) +float64 Nullable(Float64) +str Nullable(String) +fstr Nullable(FixedString(12)) +u128 Nullable(FixedString(16)) +u256 Nullable(FixedString(32)) +i128 Nullable(FixedString(16)) +i256 Nullable(FixedString(32)) +decimal32 Nullable(Decimal(9, 3)) +decimal64 Nullable(Decimal(18, 10)) +decimal128 Nullable(Decimal(38, 20)) +decimal256 Nullable(Decimal(76, 40)) +ipv4 Nullable(UInt32) +ipv6 Nullable(FixedString(16)) +0 +0 +0 +0 +1 2 1 +1 2 2 +1 3 3 +1 1000000 1 +3914219105369203805 +4 1000000 1 +(1000000,0,NULL,'100','299') +(1000000,0,NULL,'0','-1294970296') +(1000000,0,NULL,'-2147483296','2147481000') +(100000,900000,NULL,'100009','999999') +[(2,0,NULL,'','[]')] +1 1 +0 1 +16159458007063698496 +16159458007063698496 +BYTE_ARRAY String +FIXED_LEN_BYTE_ARRAY None +BYTE_ARRAY None +BYTE_ARRAY None +BYTE_ARRAY String +never gonna +give you +up diff --git a/tests/queries/0_stateless/02735_parquet_encoder.sql b/tests/queries/0_stateless/02735_parquet_encoder.sql new file mode 100644 index 00000000000..d8d52a13218 --- /dev/null +++ b/tests/queries/0_stateless/02735_parquet_encoder.sql @@ -0,0 +1,168 @@ +-- Tags: no-fasttest + +set output_format_parquet_use_custom_encoder = 1; +set output_format_parquet_row_group_size = 1000; +set output_format_parquet_data_page_size = 800; +set output_format_parquet_batch_size = 100; +set output_format_parquet_row_group_size_bytes = 1000000000; +set engine_file_truncate_on_insert=1; + +-- Write random data to parquet file, then read from it and check that it matches what we wrote. +-- Do this for all kinds of data types: primitive, Nullable(primitive), Array(primitive), +-- Array(Nullable(primitive)), Array(Array(primitive)), Map(primitive, primitive), etc. + +drop table if exists basic_types_02735; +create temporary table basic_types_02735 as select * from generateRandom(' + u8 UInt8, + u16 UInt16, + u32 UInt32, + u64 UInt64, + i8 Int8, + i16 Int16, + i32 Int32, + i64 Int64, + date Date, + date32 Date32, + datetime DateTime, + datetime64 DateTime64, + enum8 Enum8(''x'' = 1, ''y'' = 2, ''z'' = 3), + enum16 Enum16(''xx'' = 1000, ''yy'' = 2000, ''zz'' = 3000), + float32 Float32, + float64 Float64, + str String, + fstr FixedString(12), + u128 UInt128, + u256 UInt256, + i128 Int128, + i256 Int256, + decimal32 Decimal32(3), + decimal64 Decimal64(10), + decimal128 Decimal128(20), + decimal256 Decimal256(40), + ipv4 IPv4, + ipv6 IPv6') limit 10101; +insert into function file(basic_types_02735.parquet) select * from basic_types_02735; +desc file(basic_types_02735.parquet); +select (select sum(cityHash64(*)) from basic_types_02735) - (select sum(cityHash64(*)) from file(basic_types_02735.parquet)); +drop table basic_types_02735; + + +drop table if exists nullables_02735; +create temporary table nullables_02735 as select * from generateRandom(' + u16 Nullable(UInt16), + i64 Nullable(Int64), + datetime64 Nullable(DateTime64), + enum8 Nullable(Enum8(''x'' = 1, ''y'' = 2, ''z'' = 3)), + float64 Nullable(Float64), + str Nullable(String), + fstr Nullable(FixedString(12)), + i256 Nullable(Int256), + decimal256 Nullable(Decimal256(40)), + ipv6 Nullable(IPv6)') limit 10000; +insert into function file(nullables_02735.parquet) select * from nullables_02735; +select (select sum(cityHash64(*)) from nullables_02735) - (select sum(cityHash64(*)) from file(nullables_02735.parquet)); +drop table nullables_02735; + + +-- TODO: When cityHash64() fully supports Nullable: https://github.com/ClickHouse/ClickHouse/pull/48625 +-- the next two blocks can be simplified: arrays_out_02735 intermediate table is not needed, +-- a.csv and b.csv are not needed. + +drop table if exists arrays_02735; +drop table if exists arrays_out_02735; +create table arrays_02735 engine = Memory as select * from generateRandom(' + u32 Array(UInt32), + i8 Array(Int8), + datetime Array(DateTime), + enum16 Array(Enum16(''xx'' = 1000, ''yy'' = 2000, ''zz'' = 3000)), + float32 Array(Float32), + str Array(String), + fstr Array(FixedString(12)), + u128 Array(UInt128), + decimal64 Array(Decimal64(10)), + ipv4 Array(IPv4), + msi Map(String, Int16), + tup Tuple(FixedString(3), Array(String), Map(Int8, Date))') limit 10000; +insert into function file(arrays_02735.parquet) select * from arrays_02735; +create temporary table arrays_out_02735 as arrays_02735; +insert into arrays_out_02735 select * from file(arrays_02735.parquet); +select (select sum(cityHash64(*)) from arrays_02735) - (select sum(cityHash64(*)) from arrays_out_02735); +--select (select sum(cityHash64(*)) from arrays_02735) - +-- (select sum(cityHash64(u32, i8, datetime, enum16, float32, str, fstr, arrayMap(x->reinterpret(x, 'UInt128'), u128), decimal64, ipv4, msi, tup)) from file(arrays_02735.parquet)); +drop table arrays_02735; +drop table arrays_out_02735; + + +drop table if exists madness_02735; +create temporary table madness_02735 as select * from generateRandom(' + aa Array(Array(UInt32)), + aaa Array(Array(Array(UInt32))), + an Array(Nullable(String)), + aan Array(Array(Nullable(FixedString(10)))), + l LowCardinality(String), + ln LowCardinality(Nullable(FixedString(11))), + al Array(LowCardinality(UInt128)), + aaln Array(Array(LowCardinality(Nullable(String)))), + mln Map(LowCardinality(String), Nullable(Int8)), + t Tuple(Map(FixedString(5), Tuple(Array(UInt16), Nullable(UInt16), Array(Tuple(Int8, Decimal64(10))))), Tuple(kitchen UInt64, sink String)), + n Nested(hello UInt64, world Tuple(first String, second FixedString(1))) + ') limit 10000; +insert into function file(madness_02735.parquet) select * from madness_02735; +insert into function file(a.csv) select * from madness_02735 order by tuple(*); +insert into function file(b.csv) select aa, aaa, an, aan, l, ln, arrayMap(x->reinterpret(x, 'UInt128'), al) as al_, aaln, mln, t, n.hello, n.world from file(madness_02735.parquet) order by tuple(aa, aaa, an, aan, l, ln, al_, aaln, mln, t, n.hello, n.world); +select (select sum(cityHash64(*)) from file(a.csv, LineAsString)) - (select sum(cityHash64(*)) from file(b.csv, LineAsString)); +--select (select sum(cityHash64(*)) from madness_02735) - +-- (select sum(cityHash64(aa, aaa, an, aan, l, ln, map(x->reinterpret(x, 'UInt128'), al), aaln, mln, t, n.hello, n.world)) from file(madness_02735.parquet)); +drop table madness_02735; + + +-- Merging input blocks into bigger row groups. +insert into function file(squash_02735.parquet) select '012345' union all select '543210' settings max_block_size = 1; +select num_columns, num_rows, num_row_groups from file(squash_02735.parquet, ParquetMetadata); + +-- Row group size limit in bytes. +insert into function file(row_group_bytes_02735.parquet) select '012345' union all select '543210' settings max_block_size = 1, output_format_parquet_row_group_size_bytes = 5; +select num_columns, num_rows, num_row_groups from file(row_group_bytes_02735.parquet, ParquetMetadata); + +-- Row group size limit in rows. +insert into function file(tiny_row_groups_02735.parquet) select * from numbers(3) settings output_format_parquet_row_group_size = 1; +select num_columns, num_rows, num_row_groups from file(tiny_row_groups_02735.parquet, ParquetMetadata); + +-- 1M unique 8-byte values should exceed dictionary_size_limit (1 MB). +insert into function file(big_column_chunk_02735.parquet) select number from numbers(1000000) settings output_format_parquet_row_group_size = 1000000; +select num_columns, num_rows, num_row_groups from file(big_column_chunk_02735.parquet, ParquetMetadata); +select sum(cityHash64(number)) from file(big_column_chunk_02735.parquet); + +-- Check statistics: signed vs unsigned, null count. Use enough rows to produce multiple pages. +insert into function file(statistics_02735.parquet) select 100 + number%200 as a, toUInt32(number * 3000) as u, toInt32(number * 3000) as i, if(number % 10 == 9, toString(number), null) as s from numbers(1000000) settings output_format_parquet_row_group_size = 1000000; +select num_columns, num_rows, num_row_groups from file(statistics_02735.parquet, ParquetMetadata); +select tupleElement(c, 'statistics') from file(statistics_02735.parquet, ParquetMetadata) array join tupleElement(row_groups[1], 'columns') as c; + +-- Statistics string length limit (max_statistics_size). +insert into function file(long_string_02735.parquet) select toString(range(number * 2000)) from numbers(2); +select tupleElement(tupleElement(row_groups[1], 'columns'), 'statistics') from file(long_string_02735.parquet, ParquetMetadata); + +-- Compression setting. +insert into function file(compressed_02735.parquet) select concat('aaaaaaaaaaaaaaaa', toString(number)) as s from numbers(1000) settings output_format_parquet_row_group_size = 10000, output_format_parquet_compression_method='zstd'; +select total_compressed_size < 10000, total_uncompressed_size > 15000 from file(compressed_02735.parquet, ParquetMetadata); +insert into function file(compressed_02735.parquet) select concat('aaaaaaaaaaaaaaaa', toString(number)) as s from numbers(1000) settings output_format_parquet_row_group_size = 10000, output_format_parquet_compression_method='none'; +select total_compressed_size < 10000, total_uncompressed_size > 15000 from file(compressed_02735.parquet, ParquetMetadata); + +-- Single-threaded encoding and Arrow encoder. +drop table if exists other_encoders_02735; +create temporary table other_encoders_02735 as select number, number*2 from numbers(10000); +insert into function file(single_thread_02735.parquet) select * from other_encoders_02735 settings max_threads = 1; +select sum(cityHash64(*)) from file(single_thread_02735.parquet); +insert into function file(arrow_02735.parquet) select * from other_encoders_02735 settings output_format_parquet_use_custom_encoder = 0; +select sum(cityHash64(*)) from file(arrow_02735.parquet); + +-- String -> binary vs string; FixedString -> fixed-length-binary vs binary vs string. +insert into function file(strings1_02735.parquet) select 'never', toFixedString('gonna', 5) settings output_format_parquet_string_as_string = 1, output_format_parquet_fixed_string_as_fixed_byte_array = 1; +select columns.5, columns.6 from file(strings1_02735.parquet, ParquetMetadata) array join columns; +insert into function file(strings2_02735.parquet) select 'give', toFixedString('you', 3) settings output_format_parquet_string_as_string = 0, output_format_parquet_fixed_string_as_fixed_byte_array = 0; +select columns.5, columns.6 from file(strings2_02735.parquet, ParquetMetadata) array join columns; +insert into function file(strings3_02735.parquet) select toFixedString('up', 2) settings output_format_parquet_string_as_string = 1, output_format_parquet_fixed_string_as_fixed_byte_array = 0; +select columns.5, columns.6 from file(strings3_02735.parquet, ParquetMetadata) array join columns; +select * from file(strings1_02735.parquet); +select * from file(strings2_02735.parquet); +select * from file(strings3_02735.parquet); From db5cb960508fc20ff7127aa092b89e6002c9f503 Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Wed, 17 May 2023 01:56:00 +0000 Subject: [PATCH 381/478] Start over when falling back to non-dictionary encoding --- src/Processors/Formats/Impl/Parquet/Write.cpp | 41 +++++++++++-------- 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/src/Processors/Formats/Impl/Parquet/Write.cpp b/src/Processors/Formats/Impl/Parquet/Write.cpp index a29bb81f8dc..ba67f075a0d 100644 --- a/src/Processors/Formats/Impl/Parquet/Write.cpp +++ b/src/Processors/Formats/Impl/Parquet/Write.cpp @@ -421,10 +421,7 @@ void writeColumnImpl( typename Converter::Statistics page_statistics; typename Converter::Statistics total_statistics; - /// We start with dictionary encoding, then switch to `encoding` (non-dictionary) if the - /// dictionary gets too big. That's how arrow does it too. - bool initially_used_dictionary = options.use_dictionary_encoding; - bool currently_using_dictionary = initially_used_dictionary; + bool use_dictionary = options.use_dictionary_encoding; std::optional fixed_string_descr; if constexpr (std::is_same::value) @@ -441,12 +438,11 @@ void writeColumnImpl( /// Alternatively, we could avoid using arrow's dictionary encoding code and leverage /// ColumnLowCardinality instead. It would work basically the same way as what this function /// currently does: add values to the ColumnRowCardinality (instead of `encoder`) in batches, - /// checking dictionary size after each batch; if it gets big, flush the dictionary and the - /// indices and switch to non-dictionary encoding. Feels like it could even be slightly less code. + /// checking dictionary size after each batch. That might be faster. auto encoder = parquet::MakeTypedEncoder( // ignored if using dictionary static_cast(encoding), - currently_using_dictionary, fixed_string_descr ? &*fixed_string_descr : nullptr); + use_dictionary, fixed_string_descr ? &*fixed_string_descr : nullptr); struct PageData { @@ -496,7 +492,7 @@ void writeColumnImpl( header.__isset.data_page_header = true; auto & d = header.data_page_header; d.__set_num_values(static_cast(def_count)); - d.__set_encoding(currently_using_dictionary ? parq::Encoding::RLE_DICTIONARY : encoding); + d.__set_encoding(use_dictionary ? parq::Encoding::RLE_DICTIONARY : encoding); d.__set_definition_level_encoding(parq::Encoding::RLE); d.__set_repetition_level_encoding(parq::Encoding::RLE); /// We could also put checksum in `header.crc`, but apparently no one uses it: @@ -513,7 +509,7 @@ void writeColumnImpl( total_statistics.merge(page_statistics); page_statistics.clear(); - if (currently_using_dictionary) + if (use_dictionary) { dict_encoded_pages.push_back({.header = std::move(header)}); std::swap(dict_encoded_pages.back().data, compressed); @@ -593,13 +589,22 @@ void writeColumnImpl( next_def_offset += def_count; next_data_offset += data_count; - if (currently_using_dictionary && is_dict_too_big()) + if (use_dictionary && is_dict_too_big()) { /// Fallback to non-dictionary encoding. - flush_page(next_def_offset - def_offset, next_data_offset - data_offset); - flush_dict(); + /// + /// Discard encoded data and start over. + /// This is different from what arrow does: arrow writes out the dictionary-encoded + /// data, then uses non-dictionary encoding for later pages. + /// Starting over seems better: it produces slightly smaller files (I saw 1-4%) in + /// exchange for slight decrease in speed (I saw < 5%). This seems like a good + /// trade because encoding speed is much less important than decoding (as evidenced + /// by arrow not supporting parallel encoding, even though it's easy to support). - currently_using_dictionary = false; + def_offset = 0; + data_offset = 0; + dict_encoded_pages.clear(); + use_dictionary = false; encoder = parquet::MakeTypedEncoder( static_cast(encoding)); break; @@ -614,7 +619,7 @@ void writeColumnImpl( } } - if (currently_using_dictionary) + if (use_dictionary) flush_dict(); chassert(data_offset == s.primitive_column->size()); @@ -630,13 +635,15 @@ void writeColumnImpl( /// Report which encodings we've used. if (s.max_rep > 0 || s.max_def > 0) addToEncodingsUsed(s, parq::Encoding::RLE); // levels - if (!currently_using_dictionary) - addToEncodingsUsed(s, encoding); // non-dictionary encoding - if (initially_used_dictionary) + if (use_dictionary) { addToEncodingsUsed(s, parq::Encoding::PLAIN); // dictionary itself addToEncodingsUsed(s, parq::Encoding::RLE_DICTIONARY); // ids } + else + { + addToEncodingsUsed(s, encoding); + } } } From dfdf5de972b0b8ee37fd0e89cfeaa8c3f5ea79cf Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Tue, 30 May 2023 01:28:16 +0000 Subject: [PATCH 382/478] Fixes --- contrib/arrow-cmake/CMakeLists.txt | 5 +- programs/client/Client.cpp | 5 + .../Formats/Impl/Parquet/PrepareForWrite.cpp | 16 ++- src/Processors/Formats/Impl/Parquet/Write.cpp | 120 +++++++++++++++--- src/Processors/Formats/Impl/Parquet/Write.h | 1 + .../Formats/Impl/ParquetBlockInputFormat.cpp | 7 +- .../Formats/Impl/ParquetBlockOutputFormat.cpp | 18 ++- .../Formats/Impl/ParquetBlockOutputFormat.h | 1 + .../02581_parquet_arrow_orc_compressions.sh | 2 + .../0_stateless/02735_parquet_encoder.sql | 2 +- 10 files changed, 147 insertions(+), 30 deletions(-) diff --git a/contrib/arrow-cmake/CMakeLists.txt b/contrib/arrow-cmake/CMakeLists.txt index 46b86cb4ddb..e3ea0381595 100644 --- a/contrib/arrow-cmake/CMakeLists.txt +++ b/contrib/arrow-cmake/CMakeLists.txt @@ -502,9 +502,10 @@ target_include_directories(_parquet SYSTEM BEFORE "${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src" "${CMAKE_CURRENT_SOURCE_DIR}/cpp/src") target_link_libraries(_parquet - PUBLIC _arrow - PRIVATE + PUBLIC + _arrow ch_contrib::thrift + PRIVATE boost::headers_only boost::regex OpenSSL::Crypto OpenSSL::SSL) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index e1a33231592..e73f77819ad 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -812,6 +812,11 @@ bool Client::processWithFuzzing(const String & full_query) } catch (...) { + if (!ast_to_process) + fmt::print(stderr, + "Error while forming new query: {}\n", + getCurrentExceptionMessage(true)); + // Some functions (e.g. protocol parsers) don't throw, but // set last_exception instead, so we'll also do it here for // uniformity. diff --git a/src/Processors/Formats/Impl/Parquet/PrepareForWrite.cpp b/src/Processors/Formats/Impl/Parquet/PrepareForWrite.cpp index a70b6fcfc81..0700fc8491c 100644 --- a/src/Processors/Formats/Impl/Parquet/PrepareForWrite.cpp +++ b/src/Processors/Formats/Impl/Parquet/PrepareForWrite.cpp @@ -295,7 +295,17 @@ void preparePrimitiveColumn(ColumnPtr column, DataTypePtr type, const std::strin switch (type->getTypeId()) { - case TypeIndex::UInt8: types(T::INT32, C::UINT_8 , int_type(8 , false)); break; + case TypeIndex::UInt8: + if (isBool(type)) + { + types(T::BOOLEAN); + state.is_bool = true; + } + else + { + types(T::INT32, C::UINT_8 , int_type(8 , false)); + } + break; case TypeIndex::UInt16: types(T::INT32, C::UINT_16, int_type(16, false)); break; case TypeIndex::UInt32: types(T::INT32, C::UINT_32, int_type(32, false)); break; case TypeIndex::UInt64: types(T::INT64, C::UINT_64, int_type(64, false)); break; @@ -588,7 +598,7 @@ SchemaElements convertSchema(const Block & sample, const WriteOptions & options) root.__set_name("schema"); root.__set_num_children(static_cast(sample.columns())); - for (auto & c : sample) + for (const auto & c : sample) prepareColumnForWrite(c.column, c.type, c.name, options, nullptr, &schema); return schema; @@ -598,7 +608,7 @@ void prepareColumnForWrite( ColumnPtr column, DataTypePtr type, const std::string & name, const WriteOptions & options, ColumnChunkWriteStates * out_columns_to_write, SchemaElements * out_schema) { - if (column->size() == 0 && out_columns_to_write != nullptr) + if (column->empty() && out_columns_to_write != nullptr) throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty column passed to Parquet encoder"); ColumnChunkWriteStates states; diff --git a/src/Processors/Formats/Impl/Parquet/Write.cpp b/src/Processors/Formats/Impl/Parquet/Write.cpp index ba67f075a0d..9664d173f29 100644 --- a/src/Processors/Formats/Impl/Parquet/Write.cpp +++ b/src/Processors/Formats/Impl/Parquet/Write.cpp @@ -15,6 +15,10 @@ #include #include "config_version.h" +#if USE_SNAPPY +#include +#endif + namespace DB::ErrorCodes { extern const int CANNOT_COMPRESS; @@ -68,7 +72,7 @@ struct StatisticsNumeric } }; -struct StatisticsFixedString +struct StatisticsFixedStringRef { size_t fixed_string_size = UINT64_MAX; const uint8_t * min = nullptr; @@ -81,7 +85,7 @@ struct StatisticsFixedString addMax(a.ptr); } - void merge(const StatisticsFixedString & s) + void merge(const StatisticsFixedStringRef & s) { chassert(fixed_string_size == UINT64_MAX || fixed_string_size == s.fixed_string_size); fixed_string_size = s.fixed_string_size; @@ -93,7 +97,7 @@ struct StatisticsFixedString void clear() { min = max = nullptr; } - parq::Statistics get(const WriteOptions & options) + parq::Statistics get(const WriteOptions & options) const { parq::Statistics s; if (min == nullptr || fixed_string_size > options.max_statistics_size) @@ -115,7 +119,54 @@ struct StatisticsFixedString } }; -struct StatisticsString +template +struct StatisticsFixedStringCopy +{ + bool empty = true; + std::array min {}; + std::array max {}; + + void add(parquet::FixedLenByteArray a) + { + addMin(a.ptr); + addMax(a.ptr); + empty = false; + } + + void merge(const StatisticsFixedStringCopy & s) + { + if (s.empty) + return; + addMin(&s.min[0]); + addMax(&s.max[0]); + empty = false; + } + + void clear() { empty = true; } + + parq::Statistics get(const WriteOptions &) const + { + parq::Statistics s; + if (empty) + return s; + s.__set_min_value(std::string(reinterpret_cast(min.data()), S)); + s.__set_max_value(std::string(reinterpret_cast(max.data()), S)); + return s; + } + + void addMin(const uint8_t * p) + { + if (empty || memcmp(p, min.data(), S) < 0) + memcpy(min.data(), p, S); + } + void addMax(const uint8_t * p) + { + if (empty || memcmp(p, max.data(), S) > 0) + memcpy(max.data(), p, S); + } +}; + +struct StatisticsStringRef { parquet::ByteArray min; parquet::ByteArray max; @@ -126,7 +177,7 @@ struct StatisticsString addMax(x); } - void merge(const StatisticsString & s) + void merge(const StatisticsStringRef & s) { if (s.min.ptr == nullptr) return; @@ -136,7 +187,7 @@ struct StatisticsString void clear() { *this = {}; } - parq::Statistics get(const WriteOptions & options) + parq::Statistics get(const WriteOptions & options) const { parq::Statistics s; if (min.ptr == nullptr) @@ -197,7 +248,7 @@ struct ConverterNumeric { buf.resize(count); for (size_t i = 0; i < count; ++i) - buf[i] = static_cast(column.getData()[offset + i]); + buf[i] = static_cast(column.getData()[offset + i]); // NOLINT return buf.data(); } } @@ -205,7 +256,7 @@ struct ConverterNumeric struct ConverterString { - using Statistics = StatisticsString; + using Statistics = StatisticsStringRef; const ColumnString & column; PODArray buf; @@ -226,7 +277,7 @@ struct ConverterString struct ConverterFixedString { - using Statistics = StatisticsFixedString; + using Statistics = StatisticsFixedStringRef; const ColumnFixedString & column; PODArray buf; @@ -246,7 +297,7 @@ struct ConverterFixedString struct ConverterFixedStringAsString { - using Statistics = StatisticsString; + using Statistics = StatisticsStringRef; const ColumnFixedString & column; PODArray buf; @@ -267,7 +318,7 @@ struct ConverterNumberAsFixedString { /// Calculate min/max statistics for little-endian fixed strings, not numbers, because parquet /// doesn't know it's numbers. - using Statistics = StatisticsFixedString; + using Statistics = StatisticsFixedStringCopy; const ColumnVector & column; PODArray buf; @@ -290,7 +341,7 @@ struct ConverterNumberAsFixedString template struct ConverterDecimal { - using Statistics = StatisticsFixedString; + using Statistics = StatisticsFixedStringCopy; const ColumnDecimal & column; PODArray data_buf; @@ -348,6 +399,24 @@ PODArray & compress(PODArray & source, PODArray & scratch, Com return scratch; } +#if USE_SNAPPY + case CompressionMethod::Snappy: + { + size_t max_dest_size = snappy::MaxCompressedLength(source.size()); + + if (max_dest_size > std::numeric_limits::max()) + throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress column of size {}", formatReadableSizeWithBinarySuffix(source.size())); + + scratch.resize(max_dest_size); + + size_t compressed_size; + snappy::RawCompress(source.data(), source.size(), scratch.data(), &compressed_size); + + scratch.resize(static_cast(compressed_size)); + return scratch; + } +#endif + default: { auto dest_buf = std::make_unique>>(scratch); @@ -421,7 +490,7 @@ void writeColumnImpl( typename Converter::Statistics page_statistics; typename Converter::Statistics total_statistics; - bool use_dictionary = options.use_dictionary_encoding; + bool use_dictionary = options.use_dictionary_encoding && !s.is_bool; std::optional fixed_string_descr; if constexpr (std::is_same::value) @@ -431,7 +500,8 @@ void writeColumnImpl( "", parquet::Repetition::REQUIRED, parquet::Type::FIXED_LEN_BYTE_ARRAY, parquet::ConvertedType::NONE, static_cast(converter.fixedStringSize())), 0, 0); - page_statistics.fixed_string_size = converter.fixedStringSize(); + if constexpr (std::is_same::value) + page_statistics.fixed_string_size = converter.fixedStringSize(); } /// Could use an arena here (by passing a custom MemoryPool), to reuse memory across pages. @@ -605,8 +675,16 @@ void writeColumnImpl( data_offset = 0; dict_encoded_pages.clear(); use_dictionary = false; + +#ifndef NDEBUG + /// Arrow's DictEncoderImpl destructor asserts that FlushValues() was called, so we + /// call it even though we don't need its output. + encoder->FlushValues(); +#endif + encoder = parquet::MakeTypedEncoder( - static_cast(encoding)); + static_cast(encoding), /* use_dictionary */ false, + fixed_string_descr ? &*fixed_string_descr : nullptr); break; } @@ -668,7 +746,13 @@ void writeColumnChunkBody(ColumnChunkWriteState & s, const WriteOptions & option ConverterNumeric, parquet::parquet_dtype::c_type>( \ s.primitive_column)) - case TypeIndex::UInt8 : N(UInt8 , Int32Type); break; + case TypeIndex::UInt8: + if (s.is_bool) + writeColumnImpl(s, options, out, + ConverterNumeric, bool, bool>(s.primitive_column)); + else + N(UInt8 , Int32Type); + break; case TypeIndex::UInt16 : N(UInt16, Int32Type); break; case TypeIndex::UInt32 : N(UInt32, Int32Type); break; case TypeIndex::UInt64 : N(UInt64, Int64Type); break; @@ -769,14 +853,14 @@ parq::ColumnChunk finalizeColumnChunkAndWriteFooter( serializeThriftStruct(s.column_chunk, out); - return std::move(s.column_chunk); + return s.column_chunk; } parq::RowGroup makeRowGroup(std::vector column_chunks, size_t num_rows) { parq::RowGroup r; r.__set_num_rows(num_rows); - r.__set_columns(std::move(column_chunks)); + r.__set_columns(column_chunks); r.__set_total_compressed_size(0); for (auto & c : r.columns) { diff --git a/src/Processors/Formats/Impl/Parquet/Write.h b/src/Processors/Formats/Impl/Parquet/Write.h index 333a32e191f..9197eae5384 100644 --- a/src/Processors/Formats/Impl/Parquet/Write.h +++ b/src/Processors/Formats/Impl/Parquet/Write.h @@ -42,6 +42,7 @@ struct ColumnChunkWriteState ColumnPtr primitive_column; CompressionMethod compression; // must match what's inside column_chunk + bool is_bool = false; /// Repetition and definition levels. Produced by prepareColumnForWrite(). /// def is empty iff max_def == 0, which means no arrays or nullables. diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp index 3dde8ad6a6c..be9c600f9bd 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp @@ -59,7 +59,12 @@ ParquetBlockInputFormat::ParquetBlockInputFormat( pool = std::make_unique(CurrentMetrics::ParquetDecoderThreads, CurrentMetrics::ParquetDecoderThreadsActive, max_decoding_threads); } -ParquetBlockInputFormat::~ParquetBlockInputFormat() = default; +ParquetBlockInputFormat::~ParquetBlockInputFormat() +{ + is_stopped = true; + if (pool) + pool->wait(); +} void ParquetBlockInputFormat::initializeIfNeeded() { diff --git a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp index 9a2d9072860..fbf8b3a7c87 100644 --- a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp @@ -68,9 +68,8 @@ namespace if (method == FormatSettings::ParquetCompression::GZIP) return parquet::Compression::type::GZIP; - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported compression method"); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported parquet compression method"); } - } ParquetBlockOutputFormat::ParquetBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_) @@ -162,7 +161,7 @@ void ParquetBlockOutputFormat::consume(Chunk chunk) if (staging_rows >= target_rows * 2) { /// Increase row group size slightly (by < 2x) to avoid a small row group at the end. - size_t num_row_groups = std::max(static_cast(1), staging_rows / target_rows); + size_t num_row_groups = std::max(static_cast(1), staging_rows / target_rows); size_t row_group_size = (staging_rows - 1) / num_row_groups + 1; // round up Chunk concatenated = std::move(staging_chunks[0]); @@ -222,7 +221,10 @@ void ParquetBlockOutputFormat::finalizeImpl() } if (row_groups_complete.empty()) + { + base_offset = out.count(); writeFileHeader(out); + } writeFileFooter(std::move(row_groups_complete), schema, options, out); } else @@ -349,12 +351,15 @@ void ParquetBlockOutputFormat::writeRowGroupInOneThread(Chunk chunk) options, &columns_to_write); if (row_groups_complete.empty()) + { + base_offset = out.count(); writeFileHeader(out); + } std::vector column_chunks; for (auto & s : columns_to_write) { - size_t offset = out.count(); + size_t offset = out.count() - base_offset; writeColumnChunkBody(s, options, out); auto c = finalizeColumnChunkAndWriteFooter(offset, std::move(s), options, out); column_chunks.push_back(std::move(c)); @@ -413,14 +418,17 @@ void ParquetBlockOutputFormat::reapCompletedRowGroups(std::unique_lock metadata; for (auto & cols : r.column_chunks) { for (ColumnChunk & col : cols) { - size_t offset = out.count(); + size_t offset = out.count() - base_offset; out.write(col.serialized.data(), col.serialized.size()); auto m = finalizeColumnChunkAndWriteFooter(offset, std::move(col.state), options, out); diff --git a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.h b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.h index 4c73de007fe..aededc39dc4 100644 --- a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.h +++ b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.h @@ -138,6 +138,7 @@ private: Parquet::WriteOptions options; Parquet::SchemaElements schema; std::vector row_groups_complete; + size_t base_offset = 0; std::mutex mutex; diff --git a/tests/queries/0_stateless/02581_parquet_arrow_orc_compressions.sh b/tests/queries/0_stateless/02581_parquet_arrow_orc_compressions.sh index 89b5147f026..d00026d516a 100755 --- a/tests/queries/0_stateless/02581_parquet_arrow_orc_compressions.sh +++ b/tests/queries/0_stateless/02581_parquet_arrow_orc_compressions.sh @@ -5,6 +5,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +set -o pipefail + $CLICKHOUSE_LOCAL -q "select * from numbers(10) format Parquet settings output_format_parquet_compression_method='none'" | $CLICKHOUSE_LOCAL --input-format=Parquet -q "select count() from table" $CLICKHOUSE_LOCAL -q "select * from numbers(10) format Parquet settings output_format_parquet_compression_method='lz4'" | $CLICKHOUSE_LOCAL --input-format=Parquet -q "select count() from table" $CLICKHOUSE_LOCAL -q "select * from numbers(10) format Parquet settings output_format_parquet_compression_method='snappy'" | $CLICKHOUSE_LOCAL --input-format=Parquet -q "select count() from table" diff --git a/tests/queries/0_stateless/02735_parquet_encoder.sql b/tests/queries/0_stateless/02735_parquet_encoder.sql index d8d52a13218..3701c685120 100644 --- a/tests/queries/0_stateless/02735_parquet_encoder.sql +++ b/tests/queries/0_stateless/02735_parquet_encoder.sql @@ -1,4 +1,4 @@ --- Tags: no-fasttest +-- Tags: no-fasttest, no-parallel set output_format_parquet_use_custom_encoder = 1; set output_format_parquet_row_group_size = 1000; From 6b8752f2931fed6483d9221b6f5388e302245f31 Mon Sep 17 00:00:00 2001 From: zvonand Date: Tue, 25 Jul 2023 12:19:35 +0200 Subject: [PATCH 383/478] fix error message --- src/Functions/FunctionToDecimalString.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/FunctionToDecimalString.h b/src/Functions/FunctionToDecimalString.h index 68ad978632e..c16a72115d6 100644 --- a/src/Functions/FunctionToDecimalString.h +++ b/src/Functions/FunctionToDecimalString.h @@ -41,7 +41,7 @@ public: { FunctionArgumentDescriptors mandatory_args = { {"Value", nullptr, nullptr, nullptr}, - {"precision", &isNativeInteger, &isColumnConst, "const Integer [0-77]"} + {"precision", &isNativeInteger, &isColumnConst, "const Integer"} }; validateFunctionArgumentTypes(*this, arguments, mandatory_args, {}); From 5ee71bd643caf26b9f533dab1e369f9dc306296b Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Tue, 25 Jul 2023 10:26:26 +0000 Subject: [PATCH 384/478] Work around the clang bug --- src/Processors/Formats/Impl/Parquet/Write.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Processors/Formats/Impl/Parquet/Write.cpp b/src/Processors/Formats/Impl/Parquet/Write.cpp index 9664d173f29..47ef0c53ab5 100644 --- a/src/Processors/Formats/Impl/Parquet/Write.cpp +++ b/src/Processors/Formats/Impl/Parquet/Write.cpp @@ -651,6 +651,10 @@ void writeColumnImpl( const typename ParquetDType::c_type * converted = converter.getBatch(next_data_offset, data_count); if (options.write_page_statistics || options.write_column_chunk_statistics) +/// Workaround for clang bug: https://github.com/llvm/llvm-project/issues/63630 +#ifdef MEMORY_SANITIZER +#pragma clang loop vectorize(disable) +#endif for (size_t i = 0; i < data_count; ++i) page_statistics.add(converted[i]); From 155b90c780733a7712956982367088d856ec139b Mon Sep 17 00:00:00 2001 From: Andrey Zvonov <32552679+zvonand@users.noreply.github.com> Date: Tue, 25 Jul 2023 13:47:59 +0300 Subject: [PATCH 385/478] oops --- src/Functions/FunctionToDecimalString.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Functions/FunctionToDecimalString.h b/src/Functions/FunctionToDecimalString.h index c16a72115d6..a965e2c2c90 100644 --- a/src/Functions/FunctionToDecimalString.h +++ b/src/Functions/FunctionToDecimalString.h @@ -19,10 +19,8 @@ namespace DB namespace ErrorCodes { - extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int ILLEGAL_COLUMN; extern const int CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER; - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } class FunctionToDecimalString : public IFunction From f8c90d5964a4c27dc119fd4417c23785a40b9c5e Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Tue, 25 Jul 2023 13:36:57 +0200 Subject: [PATCH 386/478] Make better --- docs/en/sql-reference/transactions.md | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/docs/en/sql-reference/transactions.md b/docs/en/sql-reference/transactions.md index 1ca2db44b13..cb89a091d68 100644 --- a/docs/en/sql-reference/transactions.md +++ b/docs/en/sql-reference/transactions.md @@ -5,7 +5,7 @@ slug: /en/guides/developer/transactional ## Case 1: INSERT into one partition, of one table, of the MergeTree* family -This is transactional (ACID) if the number of rows inserted is less than or equal to `max_insert_block_size rows`, and in the case of data in TSV, TKSV, CSV, or JSONEachRow format if the number of bytes is less than `min_chunk_bytes_for_parallel_parsing`: +This is transactional (ACID) if the inserted rows are packed and inserted as a single block (see Notes): - Atomic: an INSERT succeeds or is rejected as a whole: if a confirmation is sent to the client, then all rows were inserted; if an error is sent to the client, then no rows were inserted. - Consistent: if there are no table constraints violated, then all rows in an INSERT are inserted and the INSERT succeeds; if constraints are violated, then no rows are inserted. - Isolated: concurrent clients observe a consistent snapshot of the table–the state of the table either as it was before the INSERT attempt, or after the successful INSERT; no partial state is seen @@ -33,14 +33,16 @@ Same as Case 1 above, with this detail: - atomicity is ensured even if `async_insert` is enabled and `wait_for_async_insert` is set to 1 (the default), but if `wait_for_async_insert` is set to 0, then atomicity is not ensured. ## Notes -- `max_insert_block_size` is 1 000 000 by default and can be adjusted as needed -- `min_chunk_bytes_for_parallel_parsing` is 1 000 000 by default and can be adjusted as needed +- rows inserted from the client in some data format are packed into a single block when: + - the insert format is row-based (like CSV, TSV, Values, JSONEachRow, etc) and the data contains less then `max_insert_block_size` rows (~1 000 000 by default) or less then `min_chunk_bytes_for_parallel_parsing` bytes (10 MB by default) in case of parallel parsing is used (enabled by default) + - the insert format is column-based (like Native, Parquet, ORC, etc) and the data contains only one block of data +- the size of the inserted block in general may depend on many settings (for example: `max_block_size`, `max_insert_block_size`, `min_insert_block_size_rows`, `min_insert_block_size_bytes`, `preferred_block_size_bytes`, etc) - if the client did not receive an answer from the server, the client does not know if the transaction succeeded, and it can repeat the transaction, using exactly-once insertion properties - ClickHouse is using MVCC with snapshot isolation internally - all ACID properties are valid even in the case of server kill/crash - either insert_quorum into different AZ or fsync should be enabled to ensure durable inserts in the typical setup - "consistency" in ACID terms does not cover the semantics of distributed systems, see https://jepsen.io/consistency which is controlled by different settings (select_sequential_consistency) -- this explanation does not cover a new transactions feature that allow to have full-featured transactions over multiple tables, materialized views, for multiple SELECTs, etc. (see the next section on Transactions, Commit, and Rollback). +- this explanation does not cover a new transactions feature that allow to have full-featured transactions over multiple tables, materialized views, for multiple SELECTs, etc. (see the next section on Transactions, Commit, and Rollback) ## Transactions, Commit, and Rollback From 93e5d7f51c561af4d9236ef7e146b94754bc8fd8 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 25 Jul 2023 11:42:22 +0000 Subject: [PATCH 387/478] Fix flaky 00995_exception_while_insert --- tests/queries/0_stateless/00995_exception_while_insert.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/00995_exception_while_insert.sh b/tests/queries/0_stateless/00995_exception_while_insert.sh index 927ac6a54e5..732dba6c6f1 100755 --- a/tests/queries/0_stateless/00995_exception_while_insert.sh +++ b/tests/queries/0_stateless/00995_exception_while_insert.sh @@ -7,8 +7,8 @@ CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=none $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS check;" -$CLICKHOUSE_CLIENT --query="CREATE TABLE check (x UInt64, y UInt64 DEFAULT throwIf(x > 1500000)) ENGINE = Memory;" +$CLICKHOUSE_CLIENT --query="CREATE TABLE check (x UInt64, y UInt64 DEFAULT throwIf(x = 1500000)) ENGINE = Memory;" -seq 1 2000000 | $CLICKHOUSE_CLIENT --query="INSERT INTO check(x) FORMAT TSV" 2>&1 | grep -q "Value passed to 'throwIf' function is non-zero." && echo 'OK' || echo 'FAIL' ||: +seq 1 1500000 | $CLICKHOUSE_CLIENT --query="INSERT INTO check(x) FORMAT TSV" 2>&1 | grep -q "Value passed to 'throwIf' function is non-zero." && echo 'OK' || echo 'FAIL' ||: $CLICKHOUSE_CLIENT --query="DROP TABLE check;" From 22fec136c132de820c07c32d0508e6c67af51050 Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Tue, 25 Jul 2023 14:04:55 +0200 Subject: [PATCH 388/478] Create new StorageView with substituted parameters for every SELECT query of a parameterized view --- src/Interpreters/Context.cpp | 18 +++++++- src/Interpreters/InterpreterSelectQuery.cpp | 24 +--------- .../TranslateQualifiedNamesVisitor.cpp | 7 --- .../TranslateQualifiedNamesVisitor.h | 6 +-- src/Interpreters/TreeRewriter.cpp | 21 ++------- src/Interpreters/TreeRewriter.h | 5 +-- src/Storages/SelectQueryInfo.h | 1 - src/Storages/StorageSnapshot.cpp | 8 +--- src/Storages/StorageSnapshot.h | 2 +- src/Storages/StorageView.cpp | 44 ++----------------- src/Storages/StorageView.h | 11 +---- 11 files changed, 33 insertions(+), 114 deletions(-) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index cc77e0fe723..123c2ab8f85 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -114,7 +114,10 @@ #include #include #include +#include +#include #include +#include #if USE_ROCKSDB #include @@ -1576,8 +1579,21 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const { if (table.get()->isView() && table->as() && table->as()->isParameterizedView()) { + auto query = table->getInMemoryMetadataPtr()->getSelectQuery().inner_query->clone(); + NameToNameMap parameterized_view_values = analyzeFunctionParamValues(table_expression); + StorageView::replaceQueryParametersIfParametrizedView(query,parameterized_view_values); + + ASTCreateQuery create; + create.select = query->as(); + auto sample_block = InterpreterSelectWithUnionQuery::getSampleBlock(query, getQueryContext()); + auto res = std::make_shared(StorageID(database_name, table_name), + create, + ColumnsDescription(sample_block.getNamesAndTypesList()), + /* comment */ "", + /* is_parameterized_view */ true); + res->startup(); function->prefer_subquery_to_function_formatting = true; - return table; + return res; } } auto hash = table_expression->getTreeHash(); diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index d07a6521544..23a879a9426 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -610,27 +610,10 @@ InterpreterSelectQuery::InterpreterSelectQuery( { /// Allow push down and other optimizations for VIEW: replace with subquery and rewrite it. ASTPtr view_table; - NameToNameMap parameter_types; if (view) { query_info.is_parameterized_view = view->isParameterizedView(); - /// We need to fetch the parameters set for SELECT ... FROM parameterized_view() before the query is replaced. - /// replaceWithSubquery replaces the function child and adds the subquery in its place. - /// the parameters are children of function child, if function (which corresponds to parametrised view and has - /// parameters in its arguments: `parametrised_view()`) is replaced the parameters are also gone from tree - /// So we need to get the parameters before they are removed from the tree - /// and after query is replaced, we use these parameters to substitute in the parameterized view query - if (query_info.is_parameterized_view) - { - query_info.parameterized_view_values = analyzeFunctionParamValues(query_ptr); - parameter_types = view->getParameterTypes(); - } view->replaceWithSubquery(getSelectQuery(), view_table, metadata_snapshot, view->isParameterizedView()); - if (query_info.is_parameterized_view) - { - view->replaceQueryParametersIfParametrizedView(query_ptr, query_info.parameterized_view_values); - } - } syntax_analyzer_result = TreeRewriter(context).analyzeSelect( @@ -639,10 +622,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( options, joined_tables.tablesWithColumns(), required_result_column_names, - table_join, - query_info.is_parameterized_view, - query_info.parameterized_view_values, - parameter_types); + table_join); query_info.syntax_analyzer_result = syntax_analyzer_result; @@ -793,7 +773,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( query_info.filter_asts.push_back(parallel_replicas_custom_filter_ast); } - source_header = storage_snapshot->getSampleBlockForColumns(required_columns, query_info.parameterized_view_values); + source_header = storage_snapshot->getSampleBlockForColumns(required_columns); } /// Calculate structure of the result. diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp index aeb912ddfbb..130ce2194fd 100644 --- a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp +++ b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp @@ -249,13 +249,6 @@ void TranslateQualifiedNamesMatcher::visit(ASTExpressionList & node, const ASTPt if (first_table || !data.join_using_columns.contains(column.name)) { std::string column_name = column.name; - - /// replaceQueryParameterWithValue is used for parameterized view (which are created using query parameters - /// and SELECT is used with substitution of these query parameters ) - if (!data.parameter_values.empty()) - column_name - = StorageView::replaceQueryParameterWithValue(column_name, data.parameter_values, data.parameter_types); - addIdentifier(columns, table.table, column_name); } } diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.h b/src/Interpreters/TranslateQualifiedNamesVisitor.h index 6c804ad6c90..73e45fc7ea0 100644 --- a/src/Interpreters/TranslateQualifiedNamesVisitor.h +++ b/src/Interpreters/TranslateQualifiedNamesVisitor.h @@ -28,15 +28,11 @@ public: const TablesWithColumns & tables; std::unordered_set join_using_columns; bool has_columns; - NameToNameMap parameter_values; - NameToNameMap parameter_types; - Data(const NameSet & source_columns_, const TablesWithColumns & tables_, bool has_columns_ = true, const NameToNameMap & parameter_values_ = {}, const NameToNameMap & parameter_types_ = {}) + Data(const NameSet & source_columns_, const TablesWithColumns & tables_, bool has_columns_ = true) : source_columns(source_columns_) , tables(tables_) , has_columns(has_columns_) - , parameter_values(parameter_values_) - , parameter_types(parameter_types_) {} bool hasColumn(const String & name) const { return source_columns.count(name); } diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index 65b5d950975..d44d6cc6ac8 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -299,11 +299,10 @@ using ReplacePositionalArgumentsVisitor = InDepthNodeVisitor table_join, - bool is_parameterized_view, - const NameToNameMap parameter_values, - const NameToNameMap parameter_types) const + std::shared_ptr table_join) const { auto * select_query = query->as(); if (!select_query) @@ -1201,7 +1197,7 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect( result.analyzed_join->setColumnsFromJoinedTable(std::move(columns_from_joined_table), source_columns_set, right_table.table.getQualifiedNamePrefix()); } - translateQualifiedNames(query, *select_query, source_columns_set, tables_with_columns, parameter_values, parameter_types); + translateQualifiedNames(query, *select_query, source_columns_set, tables_with_columns); /// Optimizes logical expressions. LogicalExpressionsOptimizer(select_query, tables_with_columns, settings.optimize_min_equality_disjunction_chain_length.value).perform(); @@ -1259,15 +1255,6 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect( result.window_function_asts = getWindowFunctions(query, *select_query); result.expressions_with_window_function = getExpressionsWithWindowFunctions(query); - /// replaceQueryParameterWithValue is used for parameterized view (which are created using query parameters - /// and SELECT is used with substitution of these query parameters ) - /// the replaced column names will be used in the next steps - if (is_parameterized_view) - { - for (auto & column : result.source_columns) - column.name = StorageView::replaceQueryParameterWithValue(column.name, parameter_values, parameter_types); - } - result.collectUsedColumns(query, true, settings.query_plan_optimize_primary_key); result.required_source_columns_before_expanding_alias_columns = result.required_source_columns.getNames(); diff --git a/src/Interpreters/TreeRewriter.h b/src/Interpreters/TreeRewriter.h index ea16c432d0f..a171133cd08 100644 --- a/src/Interpreters/TreeRewriter.h +++ b/src/Interpreters/TreeRewriter.h @@ -128,10 +128,7 @@ public: const SelectQueryOptions & select_options = {}, const std::vector & tables_with_columns = {}, const Names & required_result_columns = {}, - std::shared_ptr table_join = {}, - bool is_parameterized_view = false, - const NameToNameMap parameter_values = {}, - const NameToNameMap parameter_types = {}) const; + std::shared_ptr table_join = {}) const; private: static void normalize(ASTPtr & query, Aliases & aliases, const NameSet & source_columns_set, bool ignore_alias, const Settings & settings, bool allow_self_aliases, ContextPtr context_, bool is_create_parameterized_view = false); diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h index 13d6909fd52..0f75562e0c1 100644 --- a/src/Storages/SelectQueryInfo.h +++ b/src/Storages/SelectQueryInfo.h @@ -258,7 +258,6 @@ struct SelectQueryInfo bool parallel_replicas_disabled = false; bool is_parameterized_view = false; - NameToNameMap parameterized_view_values; // If limit is not 0, that means it's a trivial limit query. UInt64 limit = 0; diff --git a/src/Storages/StorageSnapshot.cpp b/src/Storages/StorageSnapshot.cpp index 31770c9a32b..6abca59268f 100644 --- a/src/Storages/StorageSnapshot.cpp +++ b/src/Storages/StorageSnapshot.cpp @@ -113,7 +113,7 @@ NameAndTypePair StorageSnapshot::getColumn(const GetColumnsOptions & options, co return *column; } -Block StorageSnapshot::getSampleBlockForColumns(const Names & column_names, const NameToNameMap & parameter_values) const +Block StorageSnapshot::getSampleBlockForColumns(const Names & column_names) const { Block res; @@ -121,12 +121,6 @@ Block StorageSnapshot::getSampleBlockForColumns(const Names & column_names, cons for (const auto & column_name : column_names) { std::string substituted_column_name = column_name; - - /// substituted_column_name is used for parameterized view (which are created using query parameters - /// and SELECT is used with substitution of these query parameters ) - if (!parameter_values.empty()) - substituted_column_name = StorageView::replaceValueWithQueryParameter(column_name, parameter_values); - auto column = columns.tryGetColumnOrSubcolumn(GetColumnsOptions::All, substituted_column_name); auto object_column = object_columns.tryGetColumnOrSubcolumn(GetColumnsOptions::All, substituted_column_name); if (column && !object_column) diff --git a/src/Storages/StorageSnapshot.h b/src/Storages/StorageSnapshot.h index 946e8a98cf2..a69f9b95955 100644 --- a/src/Storages/StorageSnapshot.h +++ b/src/Storages/StorageSnapshot.h @@ -71,7 +71,7 @@ struct StorageSnapshot NameAndTypePair getColumn(const GetColumnsOptions & options, const String & column_name) const; /// Block with ordinary + materialized + aliases + virtuals + subcolumns. - Block getSampleBlockForColumns(const Names & column_names, const NameToNameMap & parameter_values = {}) const; + Block getSampleBlockForColumns(const Names & column_names) const; ColumnsDescription getDescriptionForColumns(const Names & column_names) const; diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp index 470def7e197..f0f9b9540de 100644 --- a/src/Storages/StorageView.cpp +++ b/src/Storages/StorageView.cpp @@ -107,7 +107,8 @@ StorageView::StorageView( const StorageID & table_id_, const ASTCreateQuery & query, const ColumnsDescription & columns_, - const String & comment) + const String & comment, + const bool is_parameterized_view_) : IStorage(table_id_) { StorageInMemoryMetadata storage_metadata; @@ -123,8 +124,7 @@ StorageView::StorageView( NormalizeSelectWithUnionQueryVisitor::Data data{SetOperationMode::Unspecified}; NormalizeSelectWithUnionQueryVisitor{data}.visit(description.inner_query); - is_parameterized_view = query.isParameterizedView(); - view_parameter_types = analyzeReceiveQueryParamsWithType(description.inner_query); + is_parameterized_view = is_parameterized_view_ || query.isParameterizedView(); storage_metadata.setSelectQuery(description); setInMemoryMetadata(storage_metadata); } @@ -173,7 +173,7 @@ void StorageView::read( query_plan.addStep(std::move(materializing)); /// And also convert to expected structure. - const auto & expected_header = storage_snapshot->getSampleBlockForColumns(column_names, query_info.parameterized_view_values); + const auto & expected_header = storage_snapshot->getSampleBlockForColumns(column_names); const auto & header = query_plan.getCurrentDataStream().header; const auto * select_with_union = current_inner_query->as(); @@ -258,42 +258,6 @@ void StorageView::replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_ child = view_query; } -String StorageView::replaceQueryParameterWithValue(const String & column_name, const NameToNameMap & parameter_values, const NameToNameMap & parameter_types) -{ - std::string name = column_name; - std::string::size_type pos = 0u; - for (const auto & parameter : parameter_values) - { - if ((pos = name.find(parameter.first)) != std::string::npos) - { - auto parameter_datatype_iterator = parameter_types.find(parameter.first); - size_t parameter_end = pos + parameter.first.size(); - if (parameter_datatype_iterator != parameter_types.end() && name.size() >= parameter_end && (name[parameter_end] == ',' || name[parameter_end] == ')')) - { - String parameter_name("_CAST(" + parameter.second + ", '" + parameter_datatype_iterator->second + "')"); - name.replace(pos, parameter.first.size(), parameter_name); - break; - } - } - } - return name; -} - -String StorageView::replaceValueWithQueryParameter(const String & column_name, const NameToNameMap & parameter_values) -{ - String name = column_name; - std::string::size_type pos = 0u; - for (const auto & parameter : parameter_values) - { - if ((pos = name.find("_CAST(" + parameter.second)) != std::string::npos) - { - name = name.substr(0,pos) + parameter.first + ")"; - break; - } - } - return name; -} - ASTPtr StorageView::restoreViewName(ASTSelectQuery & select_query, const ASTPtr & view_name) { ASTTableExpression * table_expression = getFirstTableExpression(select_query); diff --git a/src/Storages/StorageView.h b/src/Storages/StorageView.h index bebecb79ec0..b8bf5585c0f 100644 --- a/src/Storages/StorageView.h +++ b/src/Storages/StorageView.h @@ -15,7 +15,8 @@ public: const StorageID & table_id_, const ASTCreateQuery & query, const ColumnsDescription & columns_, - const String & comment); + const String & comment, + const bool is_parameterized_view_=false); std::string getName() const override { return "View"; } bool isView() const override { return true; } @@ -44,17 +45,9 @@ public: static void replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_query, ASTPtr & view_name, const bool parameterized_view); static ASTPtr restoreViewName(ASTSelectQuery & select_query, const ASTPtr & view_name); - static String replaceQueryParameterWithValue (const String & column_name, const NameToNameMap & parameter_values, const NameToNameMap & parameter_types); - static String replaceValueWithQueryParameter (const String & column_name, const NameToNameMap & parameter_values); - - const NameToNameMap & getParameterTypes() const - { - return view_parameter_types; - } protected: bool is_parameterized_view; - NameToNameMap view_parameter_types; }; } From ed5393ef035f37ab3acd193c69333ff7a0084be7 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 25 Jul 2023 12:09:13 +0000 Subject: [PATCH 389/478] Stabilize tests --- .../0_stateless/02494_query_cache_query_log.reference | 6 +----- .../queries/0_stateless/02494_query_cache_query_log.sql | 9 ++++++--- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/tests/queries/0_stateless/02494_query_cache_query_log.reference b/tests/queries/0_stateless/02494_query_cache_query_log.reference index 9037909d121..f9429064456 100644 --- a/tests/queries/0_stateless/02494_query_cache_query_log.reference +++ b/tests/queries/0_stateless/02494_query_cache_query_log.reference @@ -1,16 +1,12 @@ -- Run a query with query cache not enabled 124437993 -QueryStart SELECT 124437993; Unknown QueryFinish SELECT 124437993; None -- Run a query with query cache enabled 124437994 -QueryStart SELECT 124437994 SETTINGS use_query_cache = 1; Unknown QueryFinish SELECT 124437994 SETTINGS use_query_cache = 1; Write -- Run the same query with query cache enabled 124437994 -QueryStart SELECT 124437994 SETTINGS use_query_cache = 1; Unknown -QueryStart SELECT 124437994 SETTINGS use_query_cache = 1; Unknown -QueryFinish SELECT 124437994 SETTINGS use_query_cache = 1; Read QueryFinish SELECT 124437994 SETTINGS use_query_cache = 1; Write +QueryFinish SELECT 124437994 SETTINGS use_query_cache = 1; Read -- Throw exception with query cache enabled SELECT 124437995, throwIf(1) SETTINGS use_query_cache = 1; None diff --git a/tests/queries/0_stateless/02494_query_cache_query_log.sql b/tests/queries/0_stateless/02494_query_cache_query_log.sql index 79a8f4cb62b..aedc39c4486 100644 --- a/tests/queries/0_stateless/02494_query_cache_query_log.sql +++ b/tests/queries/0_stateless/02494_query_cache_query_log.sql @@ -17,7 +17,8 @@ SELECT type, query, query_cache_usage FROM system.query_log WHERE current_database = currentDatabase() AND query = 'SELECT 124437993;' -ORDER BY type; + AND type = 'QueryFinish' +ORDER BY type, query_cache_usage; @@ -31,7 +32,8 @@ SELECT type, query, query_cache_usage FROM system.query_log WHERE current_database = currentDatabase() AND query = 'SELECT 124437994 SETTINGS use_query_cache = 1;' -ORDER BY type; + AND type = 'QueryFinish' +ORDER BY type, query_cache_usage; @@ -45,7 +47,8 @@ SELECT type, query, query_cache_usage FROM system.query_log WHERE current_database = currentDatabase() AND query = 'SELECT 124437994 SETTINGS use_query_cache = 1;' -ORDER BY type; + AND type = 'QueryFinish' +ORDER BY type, query_cache_usage; From 568afbbec318d5c38c8281f8ef4bd5873fa76c42 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 24 Jul 2023 09:56:29 +0000 Subject: [PATCH 390/478] added field with refcount to system.remote_data_paths table --- src/Disks/IDisk.h | 5 ++- .../ObjectStorages/DiskObjectStorage.cpp | 2 +- .../System/StorageSystemRemoteDataPaths.cpp | 8 +++- .../02791_remote_paths_refcount.reference | 28 ++++++++++++ .../02791_remote_paths_refcount.sql | 43 +++++++++++++++++++ 5 files changed, 82 insertions(+), 4 deletions(-) create mode 100644 tests/queries/0_stateless/02791_remote_paths_refcount.reference create mode 100644 tests/queries/0_stateless/02791_remote_paths_refcount.sql diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h index 2b0ca369a96..a2c5e59237f 100644 --- a/src/Disks/IDisk.h +++ b/src/Disks/IDisk.h @@ -303,10 +303,11 @@ public: std::string local_path; std::string common_prefix_for_objects; StoredObjects objects; + size_t refcount; LocalPathWithObjectStoragePaths( - const std::string & local_path_, const std::string & common_prefix_for_objects_, StoredObjects && objects_) - : local_path(local_path_), common_prefix_for_objects(common_prefix_for_objects_), objects(std::move(objects_)) {} + const std::string & local_path_, const std::string & common_prefix_for_objects_, StoredObjects && objects_, size_t refcount_) + : local_path(local_path_), common_prefix_for_objects(common_prefix_for_objects_), objects(std::move(objects_)), refcount(refcount_) {} }; virtual void getRemotePathsRecursive(const String &, std::vector &) diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp index 762151b3808..001cff4cefe 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp @@ -82,7 +82,7 @@ void DiskObjectStorage::getRemotePathsRecursive(const String & local_path, std:: { try { - paths_map.emplace_back(local_path, metadata_storage->getObjectStorageRootPath(), getStorageObjects(local_path)); + paths_map.emplace_back(local_path, metadata_storage->getObjectStorageRootPath(), getStorageObjects(local_path), metadata_storage->getHardlinkCount(local_path)); } catch (const Exception & e) { diff --git a/src/Storages/System/StorageSystemRemoteDataPaths.cpp b/src/Storages/System/StorageSystemRemoteDataPaths.cpp index eb514d3b3f4..820b1cf3823 100644 --- a/src/Storages/System/StorageSystemRemoteDataPaths.cpp +++ b/src/Storages/System/StorageSystemRemoteDataPaths.cpp @@ -25,6 +25,7 @@ StorageSystemRemoteDataPaths::StorageSystemRemoteDataPaths(const StorageID & tab {"local_path", std::make_shared()}, {"remote_path", std::make_shared()}, {"size", std::make_shared()}, + {"refcount", std::make_shared()}, {"common_prefix_for_blobs", std::make_shared()}, {"cache_paths", std::make_shared(std::make_shared())}, })); @@ -48,6 +49,7 @@ Pipe StorageSystemRemoteDataPaths::read( MutableColumnPtr col_local_path = ColumnString::create(); MutableColumnPtr col_remote_path = ColumnString::create(); MutableColumnPtr col_size = ColumnUInt64::create(); + MutableColumnPtr col_refcount = ColumnUInt64::create(); MutableColumnPtr col_namespace = ColumnString::create(); MutableColumnPtr col_cache_paths = ColumnArray::create(ColumnString::create()); @@ -65,19 +67,22 @@ Pipe StorageSystemRemoteDataPaths::read( if (disk->supportsCache()) cache = FileCacheFactory::instance().getByName(disk->getCacheName()).cache; - for (const auto & [local_path, common_prefox_for_objects, storage_objects] : remote_paths_by_local_path) + for (const auto & [local_path, common_prefox_for_objects, storage_objects, refcount] : remote_paths_by_local_path) { for (const auto & object : storage_objects) { col_disk_name->insert(disk_name); col_base_path->insert(disk->getPath()); + if (cache) col_cache_base_path->insert(cache->getBasePath()); else col_cache_base_path->insertDefault(); + col_local_path->insert(local_path); col_remote_path->insert(object.remote_path); col_size->insert(object.bytes_size); + col_refcount->insert(refcount); col_namespace->insert(common_prefox_for_objects); if (cache) @@ -101,6 +106,7 @@ Pipe StorageSystemRemoteDataPaths::read( res_columns.emplace_back(std::move(col_local_path)); res_columns.emplace_back(std::move(col_remote_path)); res_columns.emplace_back(std::move(col_size)); + res_columns.emplace_back(std::move(col_refcount)); res_columns.emplace_back(std::move(col_namespace)); res_columns.emplace_back(std::move(col_cache_paths)); diff --git a/tests/queries/0_stateless/02791_remote_paths_refcount.reference b/tests/queries/0_stateless/02791_remote_paths_refcount.reference new file mode 100644 index 00000000000..56fb1536205 --- /dev/null +++ b/tests/queries/0_stateless/02791_remote_paths_refcount.reference @@ -0,0 +1,28 @@ +0_0_0_0 0 +0_0_0_0_1 1 +1_0_0_0 0 +1_0_0_0_1 1 +0_0_0_0_1 checksums.txt 0 +0_0_0_0_1 columns.txt 1 +0_0_0_0_1 count.txt 1 +0_0_0_0_1 default_compression_codec.txt 1 +0_0_0_0_1 id.bin 1 +0_0_0_0_1 id.cmrk2 1 +0_0_0_0_1 metadata_version.txt 1 +0_0_0_0_1 minmax_id.idx 1 +0_0_0_0_1 partition.dat 1 +0_0_0_0_1 primary.cidx 1 +0_0_0_0_1 v.bin 1 +0_0_0_0_1 v.cmrk2 1 +1_0_0_0_1 checksums.txt 0 +1_0_0_0_1 columns.txt 0 +1_0_0_0_1 count.txt 1 +1_0_0_0_1 default_compression_codec.txt 0 +1_0_0_0_1 id.bin 1 +1_0_0_0_1 id.cmrk2 1 +1_0_0_0_1 metadata_version.txt 0 +1_0_0_0_1 minmax_id.idx 1 +1_0_0_0_1 partition.dat 1 +1_0_0_0_1 primary.cidx 1 +1_0_0_0_1 v.bin 0 +1_0_0_0_1 v.cmrk2 0 diff --git a/tests/queries/0_stateless/02791_remote_paths_refcount.sql b/tests/queries/0_stateless/02791_remote_paths_refcount.sql new file mode 100644 index 00000000000..e64df599d32 --- /dev/null +++ b/tests/queries/0_stateless/02791_remote_paths_refcount.sql @@ -0,0 +1,43 @@ +-- Tags: no-fasttest + +DROP TABLE IF EXISTS t_refcount SYNC; + +CREATE TABLE t_refcount (id UInt64, v UInt64) +ENGINE = ReplicatedMergeTree('/clickhouse/test/{database}/t_refcount', '1') +ORDER BY id PARTITION BY id % 2 +SETTINGS + storage_policy = 's3_cache', + allow_remote_fs_zero_copy_replication = 1, + min_bytes_for_wide_part = 0, + compress_marks = 1, + compress_primary_key = 1, + ratio_of_defaults_for_sparse_serialization = 1.0; + +INSERT INTO t_refcount VALUES (1, 10), (2, 20); + +SET mutations_sync = 2; +ALTER TABLE t_refcount UPDATE v = v * 10 WHERE id % 2 = 1; + +SELECT name, active FROM system.parts WHERE database = currentDatabase() AND table = 't_refcount' ORDER BY name; + +WITH splitByChar('/', full_path) AS path_parts +SELECT path_parts[-2] AS part_name, path_parts[-1] AS file_name, refcount +FROM +( + SELECT + path || local_path AS full_path, + substring(full_path, 1, length(full_path) - position(reverse(full_path), '/') + 1) AS part_path, + refcount + FROM system.remote_data_paths + WHERE disk_name = 's3_cache' +) AS paths +INNER JOIN +( + SELECT path + FROM system.parts + WHERE database = currentDatabase() AND table = 't_refcount' AND active +) AS parts +ON paths.part_path = parts.path +ORDER BY part_name, file_name; + +DROP TABLE IF EXISTS t_refcount SYNC; From 328d0a5269407eef6899907d6b9869307a56dfa4 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 25 Jul 2023 14:50:27 +0200 Subject: [PATCH 391/478] fix --- src/Storages/StorageReplicatedMergeTree.cpp | 10 +++++++--- .../test.py | 4 +++- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index e6431927805..9e4a63f6ba9 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -4861,9 +4861,13 @@ void StorageReplicatedMergeTree::startupImpl(bool from_attach_thread) LOG_TRACE(log, "Waiting for RestartingThread to startup table"); } - std::lock_guard lock{flush_and_shutdown_mutex}; - if (shutdown_prepared_called.load() || shutdown_called.load()) - throw Exception(ErrorCodes::TABLE_IS_DROPPED, "Cannot startup table because it is dropped"); + auto lock = std::unique_lock(flush_and_shutdown_mutex, std::defer_lock); + do + { + if (shutdown_prepared_called.load() || shutdown_called.load()) + throw Exception(ErrorCodes::TABLE_IS_DROPPED, "Cannot startup table because it is dropped"); + } + while (!lock.try_lock()); /// And this is just a callback session_expired_callback_handler = EventNotifier::instance().subscribe(Coordination::Error::ZSESSIONEXPIRED, [this]() diff --git a/tests/integration/test_replicated_merge_tree_wait_on_shutdown/test.py b/tests/integration/test_replicated_merge_tree_wait_on_shutdown/test.py index 20b6a6c977f..d971e4ec658 100644 --- a/tests/integration/test_replicated_merge_tree_wait_on_shutdown/test.py +++ b/tests/integration/test_replicated_merge_tree_wait_on_shutdown/test.py @@ -3,6 +3,7 @@ import pytest from helpers.cluster import ClickHouseCluster from helpers.network import PartitionManager +from helpers.test_tools import assert_eq_with_retry from multiprocessing.dummy import Pool import time @@ -54,9 +55,10 @@ def test_shutdown_and_wait(start_cluster): node1.query(f"INSERT INTO test_table VALUES ({value})") with PartitionManager() as pm: + assert node2.query("SELECT * FROM test_table") == "0\n" pm.partition_instances(node1, node2) # iptables rules must be applied immediately, but looks like sometimes they are not... - time.sleep(3) + assert_eq_with_retry(node1, "select count() from remote('node1,node2', 'system.one')", "1\n", settings={"skip_unavailable_shards": 1}) p.map(insert, range(1, 50)) From d7de8bf797a7444927e80c7c88d9b7c5a4040e01 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 25 Jul 2023 13:03:12 +0000 Subject: [PATCH 392/478] Automatic style fix --- .../test_replicated_merge_tree_wait_on_shutdown/test.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_replicated_merge_tree_wait_on_shutdown/test.py b/tests/integration/test_replicated_merge_tree_wait_on_shutdown/test.py index d971e4ec658..d1373d44d0f 100644 --- a/tests/integration/test_replicated_merge_tree_wait_on_shutdown/test.py +++ b/tests/integration/test_replicated_merge_tree_wait_on_shutdown/test.py @@ -58,7 +58,12 @@ def test_shutdown_and_wait(start_cluster): assert node2.query("SELECT * FROM test_table") == "0\n" pm.partition_instances(node1, node2) # iptables rules must be applied immediately, but looks like sometimes they are not... - assert_eq_with_retry(node1, "select count() from remote('node1,node2', 'system.one')", "1\n", settings={"skip_unavailable_shards": 1}) + assert_eq_with_retry( + node1, + "select count() from remote('node1,node2', 'system.one')", + "1\n", + settings={"skip_unavailable_shards": 1}, + ) p.map(insert, range(1, 50)) From b91852de3a311cd03ef571e4470deba3deeba25b Mon Sep 17 00:00:00 2001 From: Julian Maicher Date: Tue, 25 Jul 2023 16:01:19 +0200 Subject: [PATCH 393/478] fix(docs): Document correct MODIFY COLUMN REMOVE syntax --- docs/en/sql-reference/statements/alter/column.md | 2 +- docs/ru/sql-reference/statements/alter/column.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/statements/alter/column.md b/docs/en/sql-reference/statements/alter/column.md index dae2c7dd1d3..6ceb9b5849e 100644 --- a/docs/en/sql-reference/statements/alter/column.md +++ b/docs/en/sql-reference/statements/alter/column.md @@ -213,7 +213,7 @@ Removes one of the column properties: `DEFAULT`, `ALIAS`, `MATERIALIZED`, `CODEC Syntax: ```sql -ALTER TABLE table_name MODIFY column_name REMOVE property; +ALTER TABLE table_name MODIFY COLUMN column_name REMOVE property; ``` **Example** diff --git a/docs/ru/sql-reference/statements/alter/column.md b/docs/ru/sql-reference/statements/alter/column.md index a8ace213075..92be30b101a 100644 --- a/docs/ru/sql-reference/statements/alter/column.md +++ b/docs/ru/sql-reference/statements/alter/column.md @@ -182,7 +182,7 @@ ALTER TABLE visits MODIFY COLUMN browser Array(String) Синтаксис: ```sql -ALTER TABLE table_name MODIFY column_name REMOVE property; +ALTER TABLE table_name MODIFY COLUMN column_name REMOVE property; ``` **Пример** From bd09ad6736bac2b9e986993e75f1f8f61b1508a6 Mon Sep 17 00:00:00 2001 From: Val Doroshchuk Date: Tue, 25 Jul 2023 16:19:44 +0200 Subject: [PATCH 394/478] MaterializedMySQL: Fix typos in tests --- .../materialized_with_ddl.py | 27 +++++++++++++------ .../test_materialized_mysql_database/test.py | 9 ++++--- 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py b/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py index 8b2943c2b73..389d430622d 100644 --- a/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py +++ b/tests/integration/test_materialized_mysql_database/materialized_with_ddl.py @@ -13,25 +13,36 @@ from multiprocessing.dummy import Pool from helpers.test_tools import assert_eq_with_retry -def check_query(clickhouse_node, query, result_set, retry_count=10, interval_seconds=3): - lastest_result = "" +def check_query( + clickhouse_node, + query, + result_set, + retry_count=30, + interval_seconds=1, + on_failure=None, +): + latest_result = "" + if "/* expect: " not in query: + query = "/* expect: " + result_set.rstrip("\n") + "*/ " + query for i in range(retry_count): try: - lastest_result = clickhouse_node.query(query) - if result_set == lastest_result: + latest_result = clickhouse_node.query(query) + if result_set == latest_result: return - logging.debug(f"latest_result {lastest_result}") + logging.debug(f"latest_result {latest_result}") time.sleep(interval_seconds) except Exception as e: logging.debug(f"check_query retry {i+1} exception {e}") time.sleep(interval_seconds) else: - result_got = clickhouse_node.query(query) + latest_result = clickhouse_node.query(query) + if on_failure is not None and latest_result != result_set: + on_failure(latest_result, result_set) assert ( - result_got == result_set - ), f"Got result {result_got}, while expected result {result_set}" + latest_result == result_set + ), f"Got result '{latest_result}', expected result '{result_set}'" def dml_with_materialized_mysql_database(clickhouse_node, mysql_node, service_name): diff --git a/tests/integration/test_materialized_mysql_database/test.py b/tests/integration/test_materialized_mysql_database/test.py index c21e04af8db..1fd09f733f0 100644 --- a/tests/integration/test_materialized_mysql_database/test.py +++ b/tests/integration/test_materialized_mysql_database/test.py @@ -52,6 +52,7 @@ def started_cluster(): cluster.start() yield cluster finally: + node_db.stop_clickhouse() # ensures that coverage report is written to disk, even if cluster.shutdown() times out. cluster.shutdown() @@ -86,7 +87,7 @@ class MySQLConnection: else: self.mysql_connection.ping(reconnect=True) logging.debug( - "MySQL Connection establised: {}:{}".format( + "MySQL Connection established: {}:{}".format( self.ip_address, self.port ) ) @@ -94,7 +95,7 @@ class MySQLConnection: except Exception as e: errors += [str(e)] time.sleep(1) - raise Exception("Connection not establised, {}".format(errors)) + raise Exception("Connection not established, {}".format(errors)) def query(self, execution_query): with self.alloc_connection().cursor() as cursor: @@ -118,9 +119,9 @@ class MySQLConnection: if result is not None: print(cursor.fetchall()) - def query_and_get_data(self, executio_query): + def query_and_get_data(self, execution_query): with self.alloc_connection().cursor() as cursor: - cursor.execute(executio_query) + cursor.execute(execution_query) return cursor.fetchall() def close(self): From 2c7c38950d54c009e5268d371dabe8035b817283 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 25 Jul 2023 14:21:12 +0000 Subject: [PATCH 395/478] better check for lightweight deletes --- src/Storages/MergeTree/MergeTreeData.cpp | 7 +++++-- .../02792_drop_projection_lwd.reference | 2 +- .../0_stateless/02792_drop_projection_lwd.sql | 16 +++++----------- 3 files changed, 11 insertions(+), 14 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 06a9b62d9de..6179c70ca57 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -5693,8 +5693,11 @@ bool MergeTreeData::supportsLightweightDelete() const auto lock = lockParts(); for (const auto & part : data_parts_by_info) { - if (part->getState() == MergeTreeDataPartState::Active - && !part->supportLightweightDeleteMutate()) + if (part->getState() == MergeTreeDataPartState::Outdated + || part->getState() == MergeTreeDataPartState::Deleting) + continue; + + if (!part->supportLightweightDeleteMutate()) return false; } return true; diff --git a/tests/queries/0_stateless/02792_drop_projection_lwd.reference b/tests/queries/0_stateless/02792_drop_projection_lwd.reference index 6529ff889b0..3ad5abd03ae 100644 --- a/tests/queries/0_stateless/02792_drop_projection_lwd.reference +++ b/tests/queries/0_stateless/02792_drop_projection_lwd.reference @@ -1 +1 @@ -98 +99 diff --git a/tests/queries/0_stateless/02792_drop_projection_lwd.sql b/tests/queries/0_stateless/02792_drop_projection_lwd.sql index fd446a8efe8..a1d8a9c90f3 100644 --- a/tests/queries/0_stateless/02792_drop_projection_lwd.sql +++ b/tests/queries/0_stateless/02792_drop_projection_lwd.sql @@ -1,23 +1,17 @@ +SET mutations_sync = 2; + DROP TABLE IF EXISTS t_projections_lwd; -CREATE TABLE t_projections_lwd (a UInt32, b UInt32) ENGINE = MergeTree ORDER BY a; +CREATE TABLE t_projections_lwd (a UInt32, b UInt32, PROJECTION p (SELECT * ORDER BY b)) ENGINE = MergeTree ORDER BY a; INSERT INTO t_projections_lwd SELECT number, number FROM numbers(100); --- LWD works -DELETE FROM t_projections_lwd WHERE a = 0; - --- add projection -ALTER TABLE t_projections_lwd ADD PROJECTION p_t_projections_lwd (SELECT * ORDER BY b); -ALTER TABLE t_projections_lwd MATERIALIZE PROJECTION p_t_projections_lwd; - -- LWD does not work, as expected -DELETE FROM t_projections_lwd WHERE a = 1; -- { serverError UNFINISHED } +DELETE FROM t_projections_lwd WHERE a = 1; -- { serverError BAD_ARGUMENTS } KILL MUTATION WHERE database = currentDatabase() AND table = 't_projections_lwd' SYNC FORMAT Null; -- drop projection -SET mutations_sync = 2; -ALTER TABLE t_projections_lwd DROP projection p_t_projections_lwd; +ALTER TABLE t_projections_lwd DROP projection p; DELETE FROM t_projections_lwd WHERE a = 2; From 79d0343becaa001dca587ee1932a8520e086d0ce Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 25 Jul 2023 16:34:40 +0200 Subject: [PATCH 396/478] tests: fix 01821_join_table_race_long flakiness (#52559) By grouping multiple queries into one clickhouse-client invocation, since each execve of the binary can take ~1 second in debug builds. But this slightly changes the logic, so be aware. Signed-off-by: Azat Khuzhin Co-authored-by: Alexander Tokmakov --- tests/queries/0_stateless/01821_join_table_race_long.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/01821_join_table_race_long.sh b/tests/queries/0_stateless/01821_join_table_race_long.sh index e02fe788653..561b856841b 100755 --- a/tests/queries/0_stateless/01821_join_table_race_long.sh +++ b/tests/queries/0_stateless/01821_join_table_race_long.sh @@ -9,13 +9,13 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS join_table_race" $CLICKHOUSE_CLIENT -q "CREATE TABLE join_table_race(id Int32, name String) ENGINE = Join(ANY, LEFT, id)" -for _ in {0..100}; do $CLICKHOUSE_CLIENT -q "INSERT INTO join_table_race VALUES ($RANDOM, '$RANDOM')" > /dev/null 2> /dev/null; done & +for _ in {0..100}; do echo "INSERT INTO join_table_race VALUES ($RANDOM, '$RANDOM');"; done | $CLICKHOUSE_CLIENT --ignore-error -nm > /dev/null 2> /dev/null & -for _ in {0..200}; do $CLICKHOUSE_CLIENT -q "SELECT count() FROM join_table_race FORMAT Null" > /dev/null 2> /dev/null; done & +for _ in {0..200}; do echo "SELECT count() FROM join_table_race FORMAT Null;"; done | $CLICKHOUSE_CLIENT --ignore-error -nm > /dev/null 2> /dev/null & -for _ in {0..100}; do $CLICKHOUSE_CLIENT -q "TRUNCATE TABLE join_table_race" > /dev/null 2> /dev/null; done & +for _ in {0..100}; do echo "TRUNCATE TABLE join_table_race;"; done | $CLICKHOUSE_CLIENT --ignore-error -nm > /dev/null 2> /dev/null & -for _ in {0..100}; do $CLICKHOUSE_CLIENT -q "ALTER TABLE join_table_race DELETE WHERE id % 2 = 0" > /dev/null 2> /dev/null; done & +for _ in {0..100}; do echo "ALTER TABLE join_table_race DELETE WHERE id % 2 = 0;"; done | $CLICKHOUSE_CLIENT --ignore-error -nm > /dev/null 2> /dev/null & wait From 85082ad8f8ee0d1023273d8db888e143e59bd828 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 25 Jul 2023 16:35:01 +0200 Subject: [PATCH 397/478] Fix data-race DatabaseReplicated::startupTables()/canExecuteReplicatedMetadataAlter() (#52490) CI founds [1]: Exception: Sanitizer assert found for instance ================== WARNING: ThreadSanitizer: data race (pid=348) Write of size 8 at 0x7b58000044a0 by main thread: 2 DB::DatabaseReplicated::startupTables(ThreadPoolImpl>&, DB::LoadingStrictnessLevel) build_docker/./src/Databases/DatabaseReplicated.cpp:526:16 (clickhouse+0x1ec45092) 3 DB::TablesLoader::startupTables() build_docker/./src/Databases/TablesLoader.cpp:87:26 (clickhouse+0x1f9258ab) (BuildId: 7d4ce55d33d4c3e3df9fd39b304e67e53eb61a63) 4 DB::loadMetadata(std::__1::shared_ptr, std::__1::basic_string, std::__1::allocator> const&) build_docker/./src/Interpreters/loadMetadata.cpp:234:12 (clickhouse+0x1fff3834) (BuildId: 7d4ce55d33d4c3e3df9fd39b304e67e53eb61a63) 5 DB::Server::main() build_docker/./programs/server/Server.cpp:1615:9 (clickhouse+0x163e7f78) (BuildId: 7d4ce55d33d4c3e3df9fd39b304e67e53eb61a63) 6 Poco::Util::Application::run() build_docker/./base/poco/Util/src/Application.cpp:315:8 (clickhouse+0x257608fe) (BuildId: 7d4ce55d33d4c3e3df9fd39b304e67e53eb61a63) 7 DB::Server::run() build_docker/./programs/server/Server.cpp:391:25 (clickhouse+0x163d7d7c) (BuildId: 7d4ce55d33d4c3e3df9fd39b304e67e53eb61a63) 8 Poco::Util::ServerApplication::run(int, char**) build_docker/./base/poco/Util/src/ServerApplication.cpp:131:9 (clickhouse+0x25780114) (BuildId: 7d4ce55d33d4c3e3df9fd39b304e67e53eb61a63) 9 mainEntryClickHouseServer(int, char**) build_docker/./programs/server/Server.cpp:196:20 (clickhouse+0x163d4c23) (BuildId: 7d4ce55d33d4c3e3df9fd39b304e67e53eb61a63) 10 main build_docker/./programs/main.cpp:487:12 (clickhouse+0xdf8c877) (BuildId: 7d4ce55d33d4c3e3df9fd39b304e67e53eb61a63) Previous read of size 8 at 0x7b58000044a0 by thread T27 (mutexes: write M0, write M1): 1 DB::DatabaseReplicated::canExecuteReplicatedMetadataAlter() const build_docker/./src/Databases/DatabaseReplicated.cpp:1303:12 (clickhouse+0x1ec5c5bd) 2 DB::ReplicatedMergeTreeQueue::shouldExecuteLogEntry() const build_docker/./src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp:1471:24 (clickhouse+0x2115fb56) (BuildId: 7d4ce55d33d4c3e3df9fd39b304e67e53eb61a63) 3 DB::ReplicatedMergeTreeQueue::selectEntryToProcess(DB::MergeTreeDataMergerMutator&, DB::MergeTreeData&) build_docker/./src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp:1676:13 (clickhouse+0x21163c58) (BuildId: 7d4ce55d33d4c3e3df9fd39b304e67e53eb61a63) 4 DB::StorageReplicatedMergeTree::selectQueueEntry() build_docker/./src/Storages/StorageReplicatedMergeTree.cpp:3240:26 (clickhouse+0x20823db2) (BuildId: 7d4ce55d33d4c3e3df9fd39b304e67e53eb61a63) 5 DB::StorageReplicatedMergeTree::scheduleDataProcessingJob(DB::BackgroundJobsAssignee&) build_docker/./src/Storages/StorageReplicatedMergeTree.cpp:3304:65 (clickhouse+0x208240fc) (BuildId: 7d4ce55d33d4c3e3df9fd39b304e67e53eb61a63) [1]: https://s3.amazonaws.com/clickhouse-test-reports/52395/0b258dda4ee618a4d002e2b5246d68bbd2c77c7e/integration_tests__tsan__[5_6].html Add ddl_worker_initialized flag to avoid this race. Note, that it should be enough to check this flag only in canExecuteReplicatedMetadataAlter() since only it can be run in parallel with ctor before it had been finished. v0: initialize ddl before startupTables() v2: ddl_worker_initialized Signed-off-by: Azat Khuzhin Co-authored-by: Alexander Tokmakov --- src/Databases/DatabaseReplicated.cpp | 4 +++- src/Databases/DatabaseReplicated.h | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index d3b3d4b545f..ed56edd7503 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -524,6 +524,7 @@ void DatabaseReplicated::startupTables(ThreadPool & thread_pool, LoadingStrictne ddl_worker = std::make_unique(this, getContext()); ddl_worker->startup(); + ddl_worker_initialized = true; } bool DatabaseReplicated::checkDigestValid(const ContextPtr & local_context, bool debug_check /* = true */) const @@ -1155,6 +1156,7 @@ void DatabaseReplicated::stopReplication() void DatabaseReplicated::shutdown() { stopReplication(); + ddl_worker_initialized = false; ddl_worker = nullptr; DatabaseAtomic::shutdown(); } @@ -1299,7 +1301,7 @@ bool DatabaseReplicated::canExecuteReplicatedMetadataAlter() const /// It may update the metadata digest (both locally and in ZooKeeper) /// before DatabaseReplicatedDDLWorker::initializeReplication() has finished. /// We should not update metadata until the database is initialized. - return ddl_worker && ddl_worker->isCurrentlyActive(); + return ddl_worker_initialized && ddl_worker->isCurrentlyActive(); } void DatabaseReplicated::detachTablePermanently(ContextPtr local_context, const String & table_name) diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h index 8e33f482ac1..7ba91e48085 100644 --- a/src/Databases/DatabaseReplicated.h +++ b/src/Databases/DatabaseReplicated.h @@ -134,6 +134,7 @@ private: std::atomic_bool is_readonly = true; std::atomic_bool is_probably_dropped = false; std::atomic_bool is_recovering = false; + std::atomic_bool ddl_worker_initialized = false; std::unique_ptr ddl_worker; UInt32 max_log_ptr_at_creation = 0; From c75b5bc740cd20ee7f5e6bb5a71b9f8e215eb03c Mon Sep 17 00:00:00 2001 From: Sanjam Panda <36253777+saitama951@users.noreply.github.com> Date: Tue, 25 Jul 2023 20:12:22 +0530 Subject: [PATCH 398/478] Update TwoLevelStringHashTable.h --- .../HashTable/TwoLevelStringHashTable.h | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/Common/HashTable/TwoLevelStringHashTable.h b/src/Common/HashTable/TwoLevelStringHashTable.h index ee6dcd05d9a..0527ec67e6e 100644 --- a/src/Common/HashTable/TwoLevelStringHashTable.h +++ b/src/Common/HashTable/TwoLevelStringHashTable.h @@ -114,18 +114,18 @@ public: { memcpy(&n[0], p, 8); if constexpr (std::endian::native == std::endian::little) - n[0] &= -1ULL >> s; - else - n[0] &= -1ULL << s; + n[0] &= -1ULL >> s; + else + n[0] &= -1ULL << s; } else { const char * lp = x.data + x.size - 8; memcpy(&n[0], lp, 8); if constexpr (std::endian::native == std::endian::little) - n[0] >>= s; - else - n[0] <<= s; + n[0] >>= s; + else + n[0] <<= s; } auto res = hash(k8); auto buck = getBucketFromHash(res); @@ -138,9 +138,9 @@ public: const char * lp = x.data + x.size - 8; memcpy(&n[1], lp, 8); if constexpr (std::endian::native == std::endian::little) - n[1] >>= s; + n[1] >>= s; else - n[1] <<= s; + n[1] <<= s; auto res = hash(k16); auto buck = getBucketFromHash(res); keyHolderDiscardKey(key_holder); @@ -152,9 +152,9 @@ public: const char * lp = x.data + x.size - 8; memcpy(&n[2], lp, 8); if constexpr (std::endian::native == std::endian::little) - n[2] >>= s; + n[2] >>= s; else - n[2] <<= s; + n[2] <<= s; auto res = hash(k24); auto buck = getBucketFromHash(res); keyHolderDiscardKey(key_holder); From 11016d4c5f36fa39a36c2c2b6c0eec7c1c3dfd5f Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Tue, 25 Jul 2023 16:46:50 +0200 Subject: [PATCH 399/478] Revert "Rewrite uniq to count" --- src/Analyzer/Passes/UniqToCountPass.cpp | 198 ------------------ src/Analyzer/Passes/UniqToCountPass.h | 30 --- src/Analyzer/QueryTreePassManager.cpp | 2 - src/Core/Settings.h | 1 - src/Interpreters/InterpreterSelectQuery.cpp | 7 - .../RewriteUniqToCountVisitor.cpp | 163 -------------- src/Interpreters/RewriteUniqToCountVisitor.h | 30 --- .../test_rewrite_uniq_to_count/__init__.py | 0 .../test_rewrite_uniq_to_count/test.py | 127 ----------- 9 files changed, 558 deletions(-) delete mode 100644 src/Analyzer/Passes/UniqToCountPass.cpp delete mode 100644 src/Analyzer/Passes/UniqToCountPass.h delete mode 100644 src/Interpreters/RewriteUniqToCountVisitor.cpp delete mode 100644 src/Interpreters/RewriteUniqToCountVisitor.h delete mode 100644 tests/integration/test_rewrite_uniq_to_count/__init__.py delete mode 100644 tests/integration/test_rewrite_uniq_to_count/test.py diff --git a/src/Analyzer/Passes/UniqToCountPass.cpp b/src/Analyzer/Passes/UniqToCountPass.cpp deleted file mode 100644 index 7533a99107b..00000000000 --- a/src/Analyzer/Passes/UniqToCountPass.cpp +++ /dev/null @@ -1,198 +0,0 @@ -#include "UniqToCountPass.h" - -#include -#include - -#include -#include -#include -#include - -namespace DB -{ - -namespace -{ - -bool matchFnUniq(String func_name) -{ - auto name = Poco::toLower(func_name); - return name == "uniq" || name == "uniqHLL12" || name == "uniqExact" || name == "uniqTheta" || name == "uniqCombined" - || name == "uniqCombined64"; -} - -/// Extract the corresponding projection columns for group by node list. -/// For example: -/// SELECT a as aa, any(b) FROM table group by a; -> aa(ColumnNode) -NamesAndTypes extractProjectionColumnsForGroupBy(const QueryNode * query_node) -{ - if (!query_node->hasGroupBy()) - return {}; - - NamesAndTypes result; - for (const auto & group_by_ele : query_node->getGroupByNode()->getChildren()) - { - const auto & projection_columns = query_node->getProjectionColumns(); - const auto & projection_nodes = query_node->getProjection().getNodes(); - - assert(projection_columns.size() == projection_nodes.size()); - - for (size_t i = 0; i < projection_columns.size(); i++) - { - if (projection_nodes[i]->isEqual(*group_by_ele)) - result.push_back(projection_columns[i]); - } - } - return result; -} - -/// Whether query_columns equals subquery_columns. -/// query_columns: query columns from query -/// subquery_columns: projection columns from subquery -bool nodeListEquals(const QueryTreeNodes & query_columns, const NamesAndTypes & subquery_columns) -{ - if (query_columns.size() != subquery_columns.size()) - return false; - - for (const auto & query_column : query_columns) - { - auto find = std::find_if( - subquery_columns.begin(), - subquery_columns.end(), - [&](const auto & subquery_column) -> bool - { - if (auto * column_node = query_column->as()) - { - return subquery_column == column_node->getColumn(); - } - return false; - }); - - if (find == subquery_columns.end()) - return false; - } - return true; -} - -/// Whether subquery_columns contains all columns in subquery_columns. -/// query_columns: query columns from query -/// subquery_columns: projection columns from subquery -bool nodeListContainsAll(const QueryTreeNodes & query_columns, const NamesAndTypes & subquery_columns) -{ - if (query_columns.size() > subquery_columns.size()) - return false; - - for (const auto & query_column : query_columns) - { - auto find = std::find_if( - subquery_columns.begin(), - subquery_columns.end(), - [&](const auto & subquery_column) -> bool - { - if (auto * column_node = query_column->as()) - { - return subquery_column == column_node->getColumn(); - } - return false; - }); - - if (find == subquery_columns.end()) - return false; - } - return true; -} - -} - -class UniqToCountVisitor : public InDepthQueryTreeVisitor -{ -public: - using Base = InDepthQueryTreeVisitor; - using Base::Base; - - void visitImpl(QueryTreeNodePtr & node) - { - auto * query_node = node->as(); - if (!query_node) - return; - - /// Check that query has only single table expression which is subquery - auto * subquery_node = query_node->getJoinTree()->as(); - if (!subquery_node) - return; - - /// Check that query has only single node in projection - auto & projection_nodes = query_node->getProjection().getNodes(); - if (projection_nodes.size() != 1) - return; - - /// Check that projection_node is a function - auto & projection_node = projection_nodes[0]; - auto * function_node = projection_node->as(); - if (!function_node) - return; - - /// Check that query single projection node is `uniq` or its variants - if (!matchFnUniq(function_node->getFunctionName())) - return; - - auto & uniq_arguments_nodes = function_node->getArguments().getNodes(); - - /// Whether query matches 'SELECT uniq(x ...) FROM (SELECT DISTINCT x ...)' - auto match_subquery_with_distinct = [&]() -> bool - { - if (!subquery_node->isDistinct()) - return false; - - /// uniq expression list == subquery projection columns - if (!nodeListEquals(uniq_arguments_nodes, subquery_node->getProjectionColumns())) - return false; - - return true; - }; - - /// Whether query matches 'SELECT uniq(x ...) FROM (SELECT x ... GROUP BY x ...)' - auto match_subquery_with_group_by = [&]() -> bool - { - if (!subquery_node->hasGroupBy()) - return false; - - /// uniq argument node list == subquery group by node list - auto group_by_columns = extractProjectionColumnsForGroupBy(subquery_node); - - if (!nodeListEquals(uniq_arguments_nodes, group_by_columns)) - return false; - - /// subquery projection columns must contain all columns in uniq argument node list - if (!nodeListContainsAll(uniq_arguments_nodes, subquery_node->getProjectionColumns())) - return false; - - return true; - }; - - /// Replace uniq of initial query to count - if (match_subquery_with_distinct() || match_subquery_with_group_by()) - { - AggregateFunctionProperties properties; - auto aggregate_function = AggregateFunctionFactory::instance().get("count", {}, {}, properties); - - function_node->resolveAsAggregateFunction(std::move(aggregate_function)); - function_node->getArguments().getNodes().clear(); - - /// Update projection columns - query_node->resolveProjectionColumns({{"count()", function_node->getResultType()}}); - } - } -}; - - -void UniqToCountPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context) -{ - if (!context->getSettings().optimize_uniq_to_count) - return; - - UniqToCountVisitor visitor; - visitor.visit(query_tree_node); -} - -} diff --git a/src/Analyzer/Passes/UniqToCountPass.h b/src/Analyzer/Passes/UniqToCountPass.h deleted file mode 100644 index 4992d524e5e..00000000000 --- a/src/Analyzer/Passes/UniqToCountPass.h +++ /dev/null @@ -1,30 +0,0 @@ -#pragma once - -#include - -namespace DB -{ - -/** Optimize `uniq` and its variants(except uniqUpTo) into `count` over subquery. - * Example: 'SELECT uniq(x ...) FROM (SELECT DISTINCT x ...)' to - * Result: 'SELECT count() FROM (SELECT DISTINCT x ...)' - * - * Example: 'SELECT uniq(x ...) FROM (SELECT x ... GROUP BY x ...)' to - * Result: 'SELECT count() FROM (SELECT x ... GROUP BY x ...)' - * - * Note that we can rewrite all uniq variants except uniqUpTo. - */ -class UniqToCountPass final : public IQueryTreePass -{ -public: - String getName() override { return "UniqToCount"; } - - String getDescription() override - { - return "Rewrite uniq and its variants(except uniqUpTo) to count if subquery has distinct or group by clause."; - } - - void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override; -}; - -} diff --git a/src/Analyzer/QueryTreePassManager.cpp b/src/Analyzer/QueryTreePassManager.cpp index dd75b0f586d..a6da2a66615 100644 --- a/src/Analyzer/QueryTreePassManager.cpp +++ b/src/Analyzer/QueryTreePassManager.cpp @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include @@ -247,7 +246,6 @@ void addQueryTreePasses(QueryTreePassManager & manager) manager.addPass(std::make_unique()); manager.addPass(std::make_unique()); - manager.addPass(std::make_unique()); manager.addPass(std::make_unique()); manager.addPass(std::make_unique()); manager.addPass(std::make_unique()); diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 2ead00cafb4..8bebef5fb00 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -776,7 +776,6 @@ class IColumn; M(Bool, keeper_map_strict_mode, false, "Enforce additional checks during operations on KeeperMap. E.g. throw an exception on an insert for already existing key", 0) \ M(UInt64, extract_kvp_max_pairs_per_row, 1000, "Max number pairs that can be produced by extractKeyValuePairs function. Used to safeguard against consuming too much memory.", 0) \ M(Timezone, session_timezone, "", "This setting can be removed in the future due to potential caveats. It is experimental and is not suitable for production usage. The default timezone for current session or query. The server default timezone if empty.", 0) \ - M(Bool, optimize_uniq_to_count, false, "Rewrite uniq and its variants(except uniqUpTo) to count if subquery has distinct or group by clause.", 0) \ M(Bool, allow_create_index_without_type, false, "Allow CREATE INDEX query without TYPE. Query will be ignored. Made for SQL compatibility tests.", 0)\ // End of COMMON_SETTINGS // Please add settings related to formats into the FORMAT_FACTORY_SETTINGS and move obsolete settings to OBSOLETE_SETTINGS. diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 8402165b62b..fc3ea3a13ca 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -39,7 +39,6 @@ #include #include #include -#include #include #include @@ -427,12 +426,6 @@ InterpreterSelectQuery::InterpreterSelectQuery( RewriteCountDistinctFunctionVisitor(data_rewrite_countdistinct).visit(query_ptr); } - if (settings.optimize_uniq_to_count) - { - RewriteUniqToCountMatcher::Data data_rewrite_uniq_count; - RewriteUniqToCountVisitor(data_rewrite_uniq_count).visit(query_ptr); - } - JoinedTables joined_tables(getSubqueryContext(context), getSelectQuery(), options.with_all_cols, options_.is_create_parameterized_view); bool got_storage_from_query = false; diff --git a/src/Interpreters/RewriteUniqToCountVisitor.cpp b/src/Interpreters/RewriteUniqToCountVisitor.cpp deleted file mode 100644 index 7445068207a..00000000000 --- a/src/Interpreters/RewriteUniqToCountVisitor.cpp +++ /dev/null @@ -1,163 +0,0 @@ -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -namespace DB -{ - -using Aliases = std::unordered_map; - -namespace -{ - -bool matchFnUniq(String func_name) -{ - auto name = Poco::toLower(func_name); - return name == "uniq" || name == "uniqHLL12" || name == "uniqExact" || name == "uniqTheta" || name == "uniqCombined" - || name == "uniqCombined64"; -} - -bool expressionEquals(const ASTPtr & lhs, const ASTPtr & rhs, const Aliases & alias) -{ - if (lhs->getTreeHash() == rhs->getTreeHash()) - { - return true; - } - else - { - auto * lhs_idf = lhs->as(); - auto * rhs_idf = rhs->as(); - if (lhs_idf && rhs_idf) - { - /// compound identifiers, such as: - if (lhs_idf->shortName() == rhs_idf->shortName()) - return true; - - /// translate alias - if (alias.find(lhs_idf->shortName()) != alias.end()) - lhs_idf = alias.find(lhs_idf->shortName())->second->as(); - - if (alias.find(rhs_idf->shortName()) != alias.end()) - rhs_idf = alias.find(rhs_idf->shortName())->second->as(); - - if (lhs_idf->shortName() == rhs_idf->shortName()) - return true; - } - } - return false; -} - -bool expressionListEquals(ASTExpressionList * lhs, ASTExpressionList * rhs, const Aliases & alias) -{ - if (!lhs || !rhs) - return false; - if (lhs->children.size() != rhs->children.size()) - return false; - for (size_t i = 0; i < lhs->children.size(); i++) - { - if (!expressionEquals(lhs->children[i], rhs->children[i], alias)) - return false; - } - return true; -} - -/// Test whether lhs contains all expressions in rhs. -bool expressionListContainsAll(ASTExpressionList * lhs, ASTExpressionList * rhs, const Aliases & alias) -{ - if (!lhs || !rhs) - return false; - if (lhs->children.size() < rhs->children.size()) - return false; - for (const auto & re : rhs->children) - { - auto predicate = [&re, &alias](ASTPtr & le) { return expressionEquals(le, re, alias); }; - if (std::find_if(lhs->children.begin(), lhs->children.end(), predicate) == lhs->children.end()) - return false; - } - return true; -} - -} - -void RewriteUniqToCountMatcher::visit(ASTPtr & ast, Data & /*data*/) -{ - auto * selectq = ast->as(); - if (!selectq || !selectq->tables() || selectq->tables()->children.size() != 1) - return; - auto expr_list = selectq->select(); - if (!expr_list || expr_list->children.size() != 1) - return; - auto * func = expr_list->children[0]->as(); - if (!func || !matchFnUniq(func->name)) - return; - if (selectq->tables()->as()->children[0]->as()->children.size() != 1) - return; - auto * table_expr = selectq->tables() - ->as() - ->children[0] - ->as() - ->children[0] - ->as(); - if (!table_expr || table_expr->children.size() != 1 || !table_expr->subquery) - return; - auto * subquery = table_expr->subquery->as(); - if (!subquery) - return; - auto * sub_selectq = subquery->children[0] - ->as()->children[0] - ->as()->children[0] - ->as(); - if (!sub_selectq) - return; - auto sub_expr_list = sub_selectq->select(); - if (!sub_expr_list) - return; - - /// collect subquery select expressions alias - Aliases alias; - for (const auto & expr : sub_expr_list->children) - { - if (!expr->tryGetAlias().empty()) - alias.insert({expr->tryGetAlias(), expr}); - } - - /// Whether query matches 'SELECT uniq(x ...) FROM (SELECT DISTINCT x ...)' - auto match_subquery_with_distinct = [&]() -> bool - { - if (!sub_selectq->distinct) - return false; - /// uniq expression list == subquery group by expression list - if (!expressionListEquals(func->children[0]->as(), sub_expr_list->as(), alias)) - return false; - return true; - }; - - /// Whether query matches 'SELECT uniq(x ...) FROM (SELECT x ... GROUP BY x ...)' - auto match_subquery_with_group_by = [&]() -> bool - { - auto group_by = sub_selectq->groupBy(); - if (!group_by) - return false; - /// uniq expression list == subquery group by expression list - if (!expressionListEquals(func->children[0]->as(), group_by->as(), alias)) - return false; - /// subquery select expression list must contain all columns in uniq expression list - if (!expressionListContainsAll(sub_expr_list->as(), func->children[0]->as(), alias)) - return false; - return true; - }; - - if (match_subquery_with_distinct() || match_subquery_with_group_by()) - expr_list->children[0] = makeASTFunction("count"); -} - -} diff --git a/src/Interpreters/RewriteUniqToCountVisitor.h b/src/Interpreters/RewriteUniqToCountVisitor.h deleted file mode 100644 index 94528ccf2ee..00000000000 --- a/src/Interpreters/RewriteUniqToCountVisitor.h +++ /dev/null @@ -1,30 +0,0 @@ -#pragma once - -#include -#include -#include "Interpreters/TreeRewriter.h" - -namespace DB -{ - -class ASTFunction; - -/** Optimize `uniq` into `count` over subquery. - * Example: 'SELECT uniq(x ...) FROM (SELECT DISTINCT x ...)' to - * Result: 'SELECT count() FROM (SELECT DISTINCT x ...)' - * - * Example: 'SELECT uniq(x ...) FROM (SELECT x ... GROUP BY x ...)' to - * Result: 'SELECT count() FROM (SELECT x ... GROUP BY x ...)' - * - * Note that we can rewrite all uniq variants except uniqUpTo. - */ -class RewriteUniqToCountMatcher -{ -public: - struct Data {}; - static void visit(ASTPtr & ast, Data &); - static bool needChildVisit(const ASTPtr &, const ASTPtr &) { return true; } -}; - -using RewriteUniqToCountVisitor = InDepthNodeVisitor; -} diff --git a/tests/integration/test_rewrite_uniq_to_count/__init__.py b/tests/integration/test_rewrite_uniq_to_count/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/integration/test_rewrite_uniq_to_count/test.py b/tests/integration/test_rewrite_uniq_to_count/test.py deleted file mode 100644 index e38e57f5cee..00000000000 --- a/tests/integration/test_rewrite_uniq_to_count/test.py +++ /dev/null @@ -1,127 +0,0 @@ -import pytest -from helpers.cluster import ClickHouseCluster - -cluster = ClickHouseCluster(__file__) -node = cluster.add_instance("node") - - -@pytest.fixture(scope="module") -def started_cluster(): - try: - cluster.start() - prepare() - yield cluster - finally: - shutdown() - cluster.shutdown() - - -def prepare(): - node.query( - """ - CREATE TABLE IF NOT EXISTS test_rewrite_uniq_to_count - ( - `a` UInt8, - `b` UInt8, - `c` UInt8 - ) - ENGINE = MergeTree - ORDER BY `a` - """ - ) - node.query( - "INSERT INTO test_rewrite_uniq_to_count values ('1', '1', '1'), ('1', '1', '1')" - ) - node.query( - "INSERT INTO test_rewrite_uniq_to_count values ('2', '2', '2'), ('2', '2', '2')" - ) - node.query( - "INSERT INTO test_rewrite_uniq_to_count values ('3', '3', '3'), ('3', '3', '3')" - ) - - -def shutdown(): - node.query("DROP TABLE IF EXISTS test_rewrite_uniq_to_count SYNC") - - -def check(query, result): - # old analyzer - query = query + " settings optimize_uniq_to_count = 1" - assert node.query(query) == f"{result}\n" - assert "count()" in node.query("EXPLAIN SYNTAX " + query) - - # new analyzer - query = query + ", allow_experimental_analyzer = 1" - assert node.query(query) == f"{result}\n" - assert "count()" in node.query("EXPLAIN QUERY TREE " + query) - - -def check_by_old_analyzer(query, result): - # only old analyzer - query = query + " settings optimize_uniq_to_count = 1" - assert node.query(query) == f"{result}\n" - assert "count()" in node.query("EXPLAIN SYNTAX " + query) - - -def test_rewrite_distinct(started_cluster): - # simple test - check( - "SELECT uniq(a) FROM (SELECT DISTINCT a FROM test_rewrite_uniq_to_count)", - 3, - ) - - # test subquery alias - check( - "SELECT uniq(t.a) FROM (SELECT DISTINCT a FROM test_rewrite_uniq_to_count) t", - 3, - ) - - # test compound column name - check( - "SELECT uniq(a) FROM (SELECT DISTINCT test_rewrite_uniq_to_count.a FROM test_rewrite_uniq_to_count) t", - 3, - ) - - # test select expression alias - check( - "SELECT uniq(alias_of_a) FROM (SELECT DISTINCT test_rewrite_uniq_to_count.a as alias_of_a FROM test_rewrite_uniq_to_count) t", - 3, - ) - - # test select expression alias - check( - "SELECT uniq(alias_of_a) FROM (SELECT DISTINCT a as alias_of_a FROM test_rewrite_uniq_to_count) t", - 3, - ) - - -def test_rewrite_group_by(started_cluster): - # simple test - check( - "SELECT uniq(a) FROM (SELECT a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY a)", - 3, - ) - - # test subquery alias - check( - "SELECT uniq(t.a) FROM (SELECT a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY a) t", - 3, - ) - - # test select expression alias - check( - "SELECT uniq(t.alias_of_a) FROM (SELECT a as alias_of_a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY a) t", - 3, - ) - - # test select expression alias - check( - "SELECT uniq(t.alias_of_a) FROM (SELECT a as alias_of_a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY alias_of_a) t", - 3, - ) - - # test select expression alias - check( - "SELECT uniq(t.alias_of_a) FROM (SELECT a as alias_of_a, sum(b) FROM test_rewrite_uniq_to_count GROUP BY a) t", - 3, - ) From 2cc1ac45dd8dda3385e2df1db9ea4fab1789a585 Mon Sep 17 00:00:00 2001 From: zvonand Date: Tue, 25 Jul 2023 18:45:56 +0200 Subject: [PATCH 400/478] update missed error --- src/Functions/FunctionToDecimalString.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/FunctionToDecimalString.h b/src/Functions/FunctionToDecimalString.h index a965e2c2c90..ce52d8b99f6 100644 --- a/src/Functions/FunctionToDecimalString.h +++ b/src/Functions/FunctionToDecimalString.h @@ -214,7 +214,7 @@ private: ColumnPtr executeType(const ColumnsWithTypeAndName & arguments) const { const auto * precision_col = checkAndGetColumn>(arguments[1].column.get()); - const auto * precision_col_const = typeid_cast(arguments[1].column.get()); + const auto * precision_col_const = checkAndGetColumnConst>(arguments[1].column.get()); auto result_col = ColumnString::create(); auto * result_col_string = assert_cast(result_col.get()); From 413ec520b3027d9f377aa1929a2855429994ffe3 Mon Sep 17 00:00:00 2001 From: Sanjam Panda Date: Tue, 25 Jul 2023 18:54:27 +0200 Subject: [PATCH 401/478] fix code style --- src/Common/HashTable/TwoLevelStringHashTable.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/Common/HashTable/TwoLevelStringHashTable.h b/src/Common/HashTable/TwoLevelStringHashTable.h index 0527ec67e6e..54c208c5b60 100644 --- a/src/Common/HashTable/TwoLevelStringHashTable.h +++ b/src/Common/HashTable/TwoLevelStringHashTable.h @@ -113,20 +113,20 @@ public: if ((reinterpret_cast(p) & 2048) == 0) { memcpy(&n[0], p, 8); - if constexpr (std::endian::native == std::endian::little) + if constexpr (std::endian::native == std::endian::little) n[0] &= -1ULL >> s; else n[0] &= -1ULL << s; - } + } else { const char * lp = x.data + x.size - 8; memcpy(&n[0], lp, 8); - if constexpr (std::endian::native == std::endian::little) + if constexpr (std::endian::native == std::endian::little) n[0] >>= s; else n[0] <<= s; - } + } auto res = hash(k8); auto buck = getBucketFromHash(res); keyHolderDiscardKey(key_holder); @@ -139,9 +139,9 @@ public: memcpy(&n[1], lp, 8); if constexpr (std::endian::native == std::endian::little) n[1] >>= s; - else + else n[1] <<= s; - auto res = hash(k16); + auto res = hash(k16); auto buck = getBucketFromHash(res); keyHolderDiscardKey(key_holder); return func(self.impls[buck].m2, k16, res); @@ -153,9 +153,9 @@ public: memcpy(&n[2], lp, 8); if constexpr (std::endian::native == std::endian::little) n[2] >>= s; - else + else n[2] <<= s; - auto res = hash(k24); + auto res = hash(k24); auto buck = getBucketFromHash(res); keyHolderDiscardKey(key_holder); return func(self.impls[buck].m3, k24, res); From 4f7bdf308d215478a718e1fe3c157c043702213e Mon Sep 17 00:00:00 2001 From: zvonand Date: Tue, 25 Jul 2023 18:57:54 +0200 Subject: [PATCH 402/478] add explicit else --- src/Functions/FunctionToDecimalString.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/Functions/FunctionToDecimalString.h b/src/Functions/FunctionToDecimalString.h index ce52d8b99f6..3dd946203cc 100644 --- a/src/Functions/FunctionToDecimalString.h +++ b/src/Functions/FunctionToDecimalString.h @@ -38,7 +38,7 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { FunctionArgumentDescriptors mandatory_args = { - {"Value", nullptr, nullptr, nullptr}, + {"Value", &isNumber, nullptr, "Number"}, {"precision", &isNativeInteger, &isColumnConst, "const Integer"} }; @@ -230,8 +230,10 @@ private: { if (precision_col_const) vectorConstant(from_col->getData(), precision_col_const->template getValue(), result_chars, result_offsets, from_scale); - else + else if (precision_col) vectorVector(from_col->getData(), precision_col->getData(), result_chars, result_offsets, from_scale); + else + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of second argument of function formatDecimal", arguments[1].column->getName()); } else throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function formatDecimal", arguments[0].column->getName()); @@ -243,8 +245,11 @@ private: { if (precision_col_const) vectorConstant(from_col->getData(), precision_col_const->template getValue(), result_chars, result_offsets); - else + else if (precision_col) vectorVector(from_col->getData(), precision_col->getData(), result_chars, result_offsets); + else + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of second argument of function formatDecimal", arguments[1].column->getName()); + } else throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function formatDecimal", arguments[0].column->getName()); From 59db21941034a287eea6c1016ed2ca83e6772774 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Tue, 25 Jul 2023 19:21:41 +0200 Subject: [PATCH 403/478] Fix possible error "Cannot drain connections: cancel first" --- src/QueryPipeline/RemoteQueryExecutor.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/QueryPipeline/RemoteQueryExecutor.cpp b/src/QueryPipeline/RemoteQueryExecutor.cpp index cd6f65b7b43..198c3265a84 100644 --- a/src/QueryPipeline/RemoteQueryExecutor.cpp +++ b/src/QueryPipeline/RemoteQueryExecutor.cpp @@ -591,8 +591,8 @@ void RemoteQueryExecutor::finish() /// Send the request to abort the execution of the request, if not already sent. tryCancel("Cancelling query because enough data has been read"); - /// If connections weren't created yet or query wasn't sent, nothing to do. - if (!connections || !sent_query) + /// If connections weren't created yet, query wasn't sent or was already finished, nothing to do. + if (!connections || !sent_query || finished) return; /// Get the remaining packets so that there is no out of sync in the connections to the replicas. From d78b3e560f13a6ba8b85b76e2f0d56bea44f2c62 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 25 Jul 2023 17:45:13 +0000 Subject: [PATCH 404/478] Fix 02497_trace_events_stress_long again --- .../0_stateless/02497_trace_events_stress_long.sh | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02497_trace_events_stress_long.sh b/tests/queries/0_stateless/02497_trace_events_stress_long.sh index 91f6a9bb541..c111ed40a29 100755 --- a/tests/queries/0_stateless/02497_trace_events_stress_long.sh +++ b/tests/queries/0_stateless/02497_trace_events_stress_long.sh @@ -45,4 +45,11 @@ thread2 $TIMEOUT >/dev/null & wait -$CLICKHOUSE_CLIENT -q "SELECT count() FROM system.processes WHERE query_id LIKE '02497_$CLICKHOUSE_DATABASE%'" | rg '^0$' \ No newline at end of file +for _ in {1..10} +do + # process list is cleaned after everything is sent to client + # so this check can be run before process list is cleaned + # to avoid spurious failures we retry the check couple of times + $CLICKHOUSE_CLIENT -q "SELECT count() FROM system.processes WHERE query_id LIKE '02497_$CLICKHOUSE_DATABASE%'" | rg '^0$' && break + sleep 1 +done \ No newline at end of file From 20300804b13187447e8677573b46ee70175c98cc Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 26 Jul 2023 00:01:46 +0300 Subject: [PATCH 405/478] Update test.py --- .../test_replicated_merge_tree_wait_on_shutdown/test.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/tests/integration/test_replicated_merge_tree_wait_on_shutdown/test.py b/tests/integration/test_replicated_merge_tree_wait_on_shutdown/test.py index d1373d44d0f..67dd03098e9 100644 --- a/tests/integration/test_replicated_merge_tree_wait_on_shutdown/test.py +++ b/tests/integration/test_replicated_merge_tree_wait_on_shutdown/test.py @@ -55,16 +55,7 @@ def test_shutdown_and_wait(start_cluster): node1.query(f"INSERT INTO test_table VALUES ({value})") with PartitionManager() as pm: - assert node2.query("SELECT * FROM test_table") == "0\n" pm.partition_instances(node1, node2) - # iptables rules must be applied immediately, but looks like sometimes they are not... - assert_eq_with_retry( - node1, - "select count() from remote('node1,node2', 'system.one')", - "1\n", - settings={"skip_unavailable_shards": 1}, - ) - p.map(insert, range(1, 50)) # Start shutdown async From d85f9ddb35f02564fe9d04f20f0a3451530a2b4c Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 26 Jul 2023 00:03:08 +0300 Subject: [PATCH 406/478] Update parallel_skip.json --- tests/integration/parallel_skip.json | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/integration/parallel_skip.json b/tests/integration/parallel_skip.json index 407fe7d1b01..1075fbaa0f8 100644 --- a/tests/integration/parallel_skip.json +++ b/tests/integration/parallel_skip.json @@ -69,6 +69,8 @@ "test_server_reload/test.py::test_remove_tcp_port", "test_keeper_map/test.py::test_keeper_map_without_zk", + + "test_replicated_merge_tree_wait_on_shutdown/test.py::test_shutdown_and_wait", "test_http_failover/test.py::test_url_destination_host_with_multiple_addrs", "test_http_failover/test.py::test_url_invalid_hostname", From 3928f7ef460f4f4603ceaa065733ac0a7ebc4d16 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 26 Jul 2023 09:19:35 +0200 Subject: [PATCH 407/478] Remove peak memory usage from the final message in the client --- src/Common/ProgressIndication.cpp | 3 --- tests/queries/0_stateless/01921_test_progress_bar.py | 1 - 2 files changed, 4 deletions(-) diff --git a/src/Common/ProgressIndication.cpp b/src/Common/ProgressIndication.cpp index 960d864660c..5a1929d4ec2 100644 --- a/src/Common/ProgressIndication.cpp +++ b/src/Common/ProgressIndication.cpp @@ -101,9 +101,6 @@ void ProgressIndication::writeFinalProgress() << formatReadableSizeWithDecimalSuffix(progress.read_bytes * 1000000000.0 / elapsed_ns) << "/s.)"; else std::cout << ". "; - auto peak_memory_usage = getMemoryUsage().peak; - if (peak_memory_usage >= 0) - std::cout << "\nPeak memory usage (for query) " << formatReadableSizeWithBinarySuffix(peak_memory_usage) << "."; } void ProgressIndication::writeProgress(WriteBufferFromFileDescriptor & message) diff --git a/tests/queries/0_stateless/01921_test_progress_bar.py b/tests/queries/0_stateless/01921_test_progress_bar.py index 9ce2168e2ae..89eecbc3987 100755 --- a/tests/queries/0_stateless/01921_test_progress_bar.py +++ b/tests/queries/0_stateless/01921_test_progress_bar.py @@ -17,4 +17,3 @@ with client(name="client1>", log=log) as client1: client1.send("SELECT number FROM numbers(1000) FORMAT Null") client1.expect("Progress: 1\.00 thousand rows, 8\.00 KB .*" + end_of_block) client1.expect("0 rows in set. Elapsed: [\\w]{1}\.[\\w]{3} sec.") - client1.expect("Peak memory usage \(for query\) .*B" + end_of_block) From 991584506f11563d324051236e09bd7c1a3b12d4 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 26 Jul 2023 12:42:18 +0400 Subject: [PATCH 408/478] fix a bug when files are finalizated after first write --- src/Storages/MergeTree/GinIndexStore.cpp | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/Storages/MergeTree/GinIndexStore.cpp b/src/Storages/MergeTree/GinIndexStore.cpp index aa0c1fccbc3..91e831270d4 100644 --- a/src/Storages/MergeTree/GinIndexStore.cpp +++ b/src/Storages/MergeTree/GinIndexStore.cpp @@ -243,6 +243,15 @@ void GinIndexStore::finalize() { if (!current_postings.empty()) writeSegment(); + + if (metadata_file_stream) + metadata_file_stream->finalize(); + + if (dict_file_stream) + dict_file_stream->finalize(); + + if (postings_file_stream) + postings_file_stream->finalize(); } void GinIndexStore::initFileStreams() @@ -319,13 +328,8 @@ void GinIndexStore::writeSegment() current_segment.segment_id = getNextSegmentID(); metadata_file_stream->sync(); - metadata_file_stream->finalize(); - dict_file_stream->sync(); - dict_file_stream->finalize(); - postings_file_stream->sync(); - postings_file_stream->finalize(); } GinIndexStoreDeserializer::GinIndexStoreDeserializer(const GinIndexStorePtr & store_) From 93e10077bad715235dfe7d4da6d103ffbb30f55a Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 26 Jul 2023 11:53:19 +0200 Subject: [PATCH 409/478] Fix attaching gdb in stress tests (#51445) * Fix attaching gdb in stress tests * Fix * Update run.sh * Try remove run_with_retry * Return run_with_retry * Don't set -e in run_with_retry if it was't set before * Update tests/ci/utils.lib * Fix bash --------- Co-authored-by: Alexander Tokmakov --- docker/test/stress/run.sh | 3 ++- docker/test/upgrade/run.sh | 1 + tests/ci/stress_tests.lib | 2 -- tests/ci/utils.lib | 11 +++++++++-- 4 files changed, 12 insertions(+), 5 deletions(-) diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 4926967d2d2..9217fcfddd9 100644 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -14,6 +14,7 @@ ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test # Stress tests and upgrade check uses similar code that was placed # in a separate bash library. See tests/ci/stress_tests.lib +source /usr/share/clickhouse-test/ci/attach_gdb.lib source /usr/share/clickhouse-test/ci/stress_tests.lib install_packages package_folder @@ -52,7 +53,7 @@ azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log & start -shellcheck disable=SC2086 # No quotes because I want to split it into words. +# shellcheck disable=SC2086 # No quotes because I want to split it into words. /s3downloader --url-prefix "$S3_URL" --dataset-names $DATASETS chmod 777 -R /var/lib/clickhouse clickhouse-client --query "ATTACH DATABASE IF NOT EXISTS datasets ENGINE = Ordinary" diff --git a/docker/test/upgrade/run.sh b/docker/test/upgrade/run.sh index b8061309342..73a2965bf44 100644 --- a/docker/test/upgrade/run.sh +++ b/docker/test/upgrade/run.sh @@ -16,6 +16,7 @@ ln -s /usr/share/clickhouse-test/ci/get_previous_release_tag.py /usr/bin/get_pre # Stress tests and upgrade check uses similar code that was placed # in a separate bash library. See tests/ci/stress_tests.lib +source /usr/share/clickhouse-test/ci/attach_gdb.lib source /usr/share/clickhouse-test/ci/stress_tests.lib azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log & diff --git a/tests/ci/stress_tests.lib b/tests/ci/stress_tests.lib index 190f3f39f9e..85b376ac39d 100644 --- a/tests/ci/stress_tests.lib +++ b/tests/ci/stress_tests.lib @@ -9,8 +9,6 @@ FAIL="\tFAIL\t\\N\t" FAILURE_CONTEXT_LINES=100 FAILURE_CONTEXT_MAX_LINE_WIDTH=300 -source attach_gdb.lib - function escaped() { # That's the simplest way I found to escape a string in bash. Yep, bash is the most convenient programming language. diff --git a/tests/ci/utils.lib b/tests/ci/utils.lib index b5ce4ae0d78..c90b7ebe6f6 100644 --- a/tests/ci/utils.lib +++ b/tests/ci/utils.lib @@ -2,6 +2,11 @@ function run_with_retry() { + if [[ $- =~ e ]]; then + set_e=true + else + set_e=false + fi set +e local total_retries="$1" @@ -12,7 +17,9 @@ function run_with_retry() until [ "$retry" -ge "$total_retries" ] do if "$@"; then - set -e + if $set_e; then + set -e + fi return else retry=$((retry + 1)) @@ -26,4 +33,4 @@ function run_with_retry() function fn_exists() { declare -F "$1" > /dev/null; -} \ No newline at end of file +} From 017d34d40fdd8fe5b03e993b030385ccb20b0ebc Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Wed, 28 Jun 2023 23:41:51 +0200 Subject: [PATCH 410/478] determine task size by prewhere columns --- src/Core/Settings.h | 1 + src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp | 9 ++++++--- src/Storages/MergeTree/MergeTreeReadPool.cpp | 6 ++++-- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 24be644ee55..d14121a97a3 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -673,6 +673,7 @@ class IColumn; M(UInt64, remote_read_min_bytes_for_seek, 4 * DBMS_DEFAULT_BUFFER_SIZE, "Min bytes required for remote read (url, s3) to do seek, instead of read with ignore.", 0) \ M(UInt64, merge_tree_min_bytes_per_task_for_remote_reading, 4 * DBMS_DEFAULT_BUFFER_SIZE, "Min bytes to read per task.", 0) \ M(Bool, merge_tree_use_const_size_tasks_for_remote_reading, true, "Whether to use constant size tasks for reading from a remote table.", 0) \ + M(Bool, merge_tree_determine_task_size_by_prewhere_columns, true, "Whether to use only prewhere columns size to determine reading task size.", 0) \ \ M(UInt64, async_insert_threads, 16, "Maximum number of threads to actually parse and insert data in background. Zero means asynchronous mode is disabled", 0) \ M(Bool, async_insert, false, "If true, data from INSERT query is stored in queue and later flushed to table in background. If wait_for_async_insert is false, INSERT query is processed almost instantly, otherwise client will wait until data will be flushed to table", 0) \ diff --git a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp index fbad7d2f7be..e9e2138d995 100644 --- a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp @@ -328,7 +328,10 @@ MergeTreePrefetchedReadPool::PartsInfos MergeTreePrefetchedReadPool::getPartsInf for (const auto & range : part.ranges) part_info->sum_marks += range.end - range.begin; - part_info->approx_size_of_mark = getApproximateSizeOfGranule(*part_info->data_part, column_names); + const auto & columns = settings.merge_tree_determine_task_size_by_prewhere_columns && prewhere_info + ? prewhere_info->prewhere_actions->getRequiredColumnsNames() + : column_names; + part_info->approx_size_of_mark = getApproximateSizeOfGranule(*part_info->data_part, columns); const auto task_columns = getReadTaskColumns( part_reader_info, @@ -369,9 +372,9 @@ MergeTreePrefetchedReadPool::PartsInfos MergeTreePrefetchedReadPool::getPartsInf } if (prewhere_info) { - for (const auto & columns : task_columns.pre_columns) + for (const auto & cols : task_columns.pre_columns) { - for (const auto & col : columns) + for (const auto & col : cols) { const size_t col_size = part.data_part->getColumnSize(col.name).data_compressed; part_info->estimated_memory_usage_for_single_prefetch += std::min(col_size, settings.prefetch_buffer_size); diff --git a/src/Storages/MergeTree/MergeTreeReadPool.cpp b/src/Storages/MergeTree/MergeTreeReadPool.cpp index 2ab90189f9d..896769d9355 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPool.cpp @@ -73,8 +73,10 @@ MergeTreeReadPool::MergeTreeReadPool( size_t total_marks = 0; for (const auto & part : parts_ranges) { - total_compressed_bytes += getApproxSizeOfPart( - *part.data_part, prewhere_info ? prewhere_info->prewhere_actions->getRequiredColumnsNames() : column_names_); + const auto & columns = settings.merge_tree_determine_task_size_by_prewhere_columns && prewhere_info + ? prewhere_info->prewhere_actions->getRequiredColumnsNames() + : column_names_; + total_compressed_bytes += getApproxSizeOfPart(*part.data_part, columns); total_marks += part.getMarksCount(); } From 04180549b094c231a01642cb70fa051bed2f7abb Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Wed, 26 Jul 2023 13:15:58 +0200 Subject: [PATCH 411/478] Fix possible double-free in Aggregator (#52439) --- src/Interpreters/Aggregator.cpp | 6 ++++-- .../test.py | 2 +- .../0_stateless/02355_control_block_size_in_aggregator.sql | 3 ++- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp index c7d4b87694b..36cd32910b5 100644 --- a/src/Interpreters/Aggregator.cpp +++ b/src/Interpreters/Aggregator.cpp @@ -2020,7 +2020,8 @@ template NO_INLINE Aggregator::convertToBlockImplFinal(Method & method, Table & data, Arena * arena, Arenas & aggregates_pools, size_t) const { - const size_t max_block_size = params.max_block_size; + /// +1 for nullKeyData, if `data` doesn't have it - not a problem, just some memory for one excessive row will be preallocated + const size_t max_block_size = (return_single_block ? data.size() : std::min(params.max_block_size, data.size())) + 1; const bool final = true; ConvertToBlockRes res; @@ -2097,7 +2098,8 @@ template Aggregator::ConvertToBlockRes NO_INLINE Aggregator::convertToBlockImplNotFinal(Method & method, Table & data, Arenas & aggregates_pools, size_t) const { - const size_t max_block_size = params.max_block_size; + /// +1 for nullKeyData, if `data` doesn't have it - not a problem, just some memory for one excessive row will be preallocated + const size_t max_block_size = (return_single_block ? data.size() : std::min(params.max_block_size, data.size())) + 1; const bool final = false; ConvertToBlockRes res; diff --git a/tests/integration/test_distributed_directory_monitor_split_batch_on_failure/test.py b/tests/integration/test_distributed_directory_monitor_split_batch_on_failure/test.py index faa38af6533..e66631460f7 100644 --- a/tests/integration/test_distributed_directory_monitor_split_batch_on_failure/test.py +++ b/tests/integration/test_distributed_directory_monitor_split_batch_on_failure/test.py @@ -68,7 +68,7 @@ def test_distributed_directory_monitor_split_batch_on_failure_OFF(started_cluste settings={ # max_memory_usage is the limit for the batch on the remote node # (local query should not be affected since 30MB is enough for 100K rows) - "max_memory_usage": "30Mi", + "max_memory_usage": "20Mi", "max_untracked_memory": "0", }, ) diff --git a/tests/queries/0_stateless/02355_control_block_size_in_aggregator.sql b/tests/queries/0_stateless/02355_control_block_size_in_aggregator.sql index b4754c6d6fe..f9f9661a7c4 100644 --- a/tests/queries/0_stateless/02355_control_block_size_in_aggregator.sql +++ b/tests/queries/0_stateless/02355_control_block_size_in_aggregator.sql @@ -1,6 +1,7 @@ SET max_block_size = 4213; -SELECT DISTINCT (blockSize() <= 4213) +--- We allocate space for one more row in case nullKeyData is present. +SELECT DISTINCT (blockSize() <= 4214) FROM ( SELECT number From bf301867650194b089b14240d121b1b1eb3b4f6e Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Wed, 26 Jul 2023 13:23:09 +0200 Subject: [PATCH 412/478] upd test --- .../queries/0_stateless/02701_non_parametric_function.reference | 0 tests/queries/0_stateless/02701_non_parametric_function.sql | 1 + 2 files changed, 1 insertion(+) create mode 100644 tests/queries/0_stateless/02701_non_parametric_function.reference create mode 100644 tests/queries/0_stateless/02701_non_parametric_function.sql diff --git a/tests/queries/0_stateless/02701_non_parametric_function.reference b/tests/queries/0_stateless/02701_non_parametric_function.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02701_non_parametric_function.sql b/tests/queries/0_stateless/02701_non_parametric_function.sql new file mode 100644 index 00000000000..b242bdc72ef --- /dev/null +++ b/tests/queries/0_stateless/02701_non_parametric_function.sql @@ -0,0 +1 @@ +SELECT * FROM system.numbers WHERE number > toUInt64(10)(number) LIMIT 10; -- { serverError 309 } From aa25ce9e3d50d1b590821a1a9d93f0e1edf53e8e Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 26 Jul 2023 11:19:20 +0000 Subject: [PATCH 413/478] Follow-up to "Implement support of encrypted elements in configuration file" Cf. PR #50986 - rename XML attribute "encryption_codec" to "encrypted_by" --- docs/en/operations/configuration-files.md | 67 +++++++++++++++---- docs/ru/operations/configuration-files.md | 7 +- src/Common/Config/ConfigProcessor.cpp | 20 +++--- src/Common/examples/encrypt_decrypt.cpp | 4 +- src/Compression/CompressionCodecEncrypted.cpp | 42 ++---------- src/Compression/CompressionCodecEncrypted.h | 4 +- .../test_config_decryption/configs/config.xml | 7 +- .../configs/config.yaml | 6 +- .../configs/config_invalid_chars.xml | 8 ++- .../configs/config_no_encryption_key.xml | 6 +- .../configs/config_subnodes.xml | 6 +- .../configs/config_wrong_method.xml | 7 +- .../test_wrong_settings.py | 2 +- 13 files changed, 110 insertions(+), 76 deletions(-) diff --git a/docs/en/operations/configuration-files.md b/docs/en/operations/configuration-files.md index d1d9fa542ab..a19c55673ed 100644 --- a/docs/en/operations/configuration-files.md +++ b/docs/en/operations/configuration-files.md @@ -67,7 +67,7 @@ Substitutions can also be performed from ZooKeeper. To do this, specify the attr ## Encrypting Configuration {#encryption} -You can use symmetric encryption to encrypt a configuration element, for example, a password field. To do so, first configure the [encryption codec](../sql-reference/statements/create/table.md#encryption-codecs), then add attribute `encryption_codec` with the name of the encryption codec as value to the element to encrypt. +You can use symmetric encryption to encrypt a configuration element, for example, a password field. To do so, first configure the [encryption codec](../sql-reference/statements/create/table.md#encryption-codecs), then add attribute `encrypted_by` with the name of the encryption codec as value to the element to encrypt. Unlike attributes `from_zk`, `from_env` and `incl` (or element `include`), no substitution, i.e. decryption of the encrypted value, is performed in the preprocessed file. Decryption happens only at runtime in the server process. @@ -75,19 +75,22 @@ Example: ```xml + 00112233445566778899aabbccddeeff + admin - 961F000000040000000000EEDDEF4F453CFE6457C4234BD7C09258BD651D85 + 961F000000040000000000EEDDEF4F453CFE6457C4234BD7C09258BD651D85 + ``` -To get the encrypted value `encrypt_decrypt` example application may be used. +To encrypt a value, you can use the (example) program `encrypt_decrypt`: Example: @@ -138,12 +141,17 @@ Here you can see default config written in YAML: [config.yaml.example](https://g There are some differences between YAML and XML formats in terms of ClickHouse configurations. Here are some tips for writing a configuration in YAML format. -You should use a Scalar node to write a key-value pair: +An XML tag with a text value is represented by a YAML key-value pair ``` yaml key: value ``` -To create a node, containing other nodes you should use a Map: +Corresponding XML: +``` xml +value +``` + +A nested XML node is represented by a YAML map: ``` yaml map_key: key1: val1 @@ -151,7 +159,16 @@ map_key: key3: val3 ``` -To create a list of values or nodes assigned to one tag you should use a Sequence: +Corresponding XML: +``` xml + + val1 + val2 + val3 + +``` + +To create the same XML tag multiple times, use a YAML sequence: ``` yaml seq_key: - val1 @@ -162,8 +179,22 @@ seq_key: key3: val5 ``` -If you want to write an attribute for a Sequence or Map node, you should use a @ prefix before the attribute key. Note, that @ is reserved by YAML standard, so you should also to wrap it into double quotes: +Corresponding XML: +```xml +val1 +val2 + + val3 + + + + val4 + val5 + + +``` +To provide an XML attribute, you can use an attribute key with a `@` prefix. Note that `@` is reserved by YAML standard, so must be wrapped in double quotes: ``` yaml map: "@attr1": value1 @@ -171,16 +202,14 @@ map: key: 123 ``` -From that Map we will get these XML nodes: - +Corresponding XML: ``` xml 123 ``` -You can also set attributes for Sequence: - +It is also possible to use attributes in YAML sequence: ``` yaml seq: - "@attr1": value1 @@ -189,13 +218,25 @@ seq: - abc ``` -So, we can get YAML config equal to this XML one: - +Corresponding XML: ``` xml 123 abc ``` +The aforementioned syntax does not allow to express XML text nodes with XML attributes as YAML. This special case can be achieved using an +`#text` attribute key: +```yaml +map_key: + "@attr1": value1 + "#text": value2 +``` + +Corresponding XML: +```xml +value2 +``` + ## Implementation Details {#implementation-details} For each config file, the server also generates `file-preprocessed.xml` files when starting. These files contain all the completed substitutions and overrides, and they are intended for informational use. If ZooKeeper substitutions were used in the config files but ZooKeeper is not available on the server start, the server loads the configuration from the preprocessed file. diff --git a/docs/ru/operations/configuration-files.md b/docs/ru/operations/configuration-files.md index 01a91bd41c6..085761d80c7 100644 --- a/docs/ru/operations/configuration-files.md +++ b/docs/ru/operations/configuration-files.md @@ -87,7 +87,7 @@ $ cat /etc/clickhouse-server/users.d/alice.xml ## Шифрование {#encryption} -Вы можете использовать симметричное шифрование для зашифровки элемента конфигурации, например, поля password. Чтобы это сделать, сначала настройте [кодек шифрования](../sql-reference/statements/create/table.md#encryption-codecs), затем добавьте аттибут`encryption_codec` с именем кодека шифрования как значение к элементу, который надо зашифровать. +Вы можете использовать симметричное шифрование для зашифровки элемента конфигурации, например, поля password. Чтобы это сделать, сначала настройте [кодек шифрования](../sql-reference/statements/create/table.md#encryption-codecs), затем добавьте аттибут`encrypted_by` с именем кодека шифрования как значение к элементу, который надо зашифровать. В отличии от аттрибутов `from_zk`, `from_env` и `incl` (или элемента `include`), подстановка, т.е. расшифровка зашифрованного значения, не выподняется в файле предобработки. Расшифровка происходит только во время исполнения в серверном процессе. @@ -95,15 +95,18 @@ $ cat /etc/clickhouse-server/users.d/alice.xml ```xml + 00112233445566778899aabbccddeeff + admin - 961F000000040000000000EEDDEF4F453CFE6457C4234BD7C09258BD651D85 + 961F000000040000000000EEDDEF4F453CFE6457C4234BD7C09258BD651D85 + ``` diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp index db3c6909b21..a55183782d8 100644 --- a/src/Common/Config/ConfigProcessor.cpp +++ b/src/Common/Config/ConfigProcessor.cpp @@ -192,13 +192,13 @@ static void mergeAttributes(Element & config_element, Element & with_element) std::string ConfigProcessor::encryptValue(const std::string & codec_name, const std::string & value) { - EncryptionMethod method = getEncryptionMethod(codec_name); - CompressionCodecEncrypted codec(method); + EncryptionMethod encryption_method = toEncryptionMethod(codec_name); + CompressionCodecEncrypted codec(encryption_method); Memory<> memory; memory.resize(codec.getCompressedReserveSize(static_cast(value.size()))); auto bytes_written = codec.compress(value.data(), static_cast(value.size()), memory.data()); - auto encrypted_value = std::string(memory.data(), bytes_written); + std::string encrypted_value(memory.data(), bytes_written); std::string hex_value; boost::algorithm::hex(encrypted_value.begin(), encrypted_value.end(), std::back_inserter(hex_value)); return hex_value; @@ -206,8 +206,8 @@ std::string ConfigProcessor::encryptValue(const std::string & codec_name, const std::string ConfigProcessor::decryptValue(const std::string & codec_name, const std::string & value) { - EncryptionMethod method = getEncryptionMethod(codec_name); - CompressionCodecEncrypted codec(method); + EncryptionMethod encryption_method = toEncryptionMethod(codec_name); + CompressionCodecEncrypted codec(encryption_method); Memory<> memory; std::string encrypted_value; @@ -223,7 +223,7 @@ std::string ConfigProcessor::decryptValue(const std::string & codec_name, const memory.resize(codec.readDecompressedBlockSize(encrypted_value.data())); codec.decompress(encrypted_value.data(), static_cast(encrypted_value.size()), memory.data()); - std::string decrypted_value = std::string(memory.data(), memory.size()); + std::string decrypted_value(memory.data(), memory.size()); return decrypted_value; } @@ -234,7 +234,7 @@ void ConfigProcessor::decryptRecursive(Poco::XML::Node * config_root) if (node->nodeType() == Node::ELEMENT_NODE) { Element & element = dynamic_cast(*node); - if (element.hasAttribute("encryption_codec")) + if (element.hasAttribute("encrypted_by")) { const NodeListPtr children = element.childNodes(); if (children->length() != 1) @@ -244,8 +244,8 @@ void ConfigProcessor::decryptRecursive(Poco::XML::Node * config_root) if (text_node->nodeType() != Node::TEXT_NODE) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Encrypted node {} should have text node", node->nodeName()); - auto encryption_codec = element.getAttribute("encryption_codec"); - text_node->setNodeValue(decryptValue(encryption_codec, text_node->getNodeValue())); + auto encrypted_by = element.getAttribute("encrypted_by"); + text_node->setNodeValue(decryptValue(encrypted_by, text_node->getNodeValue())); } decryptRecursive(node); } @@ -775,7 +775,7 @@ ConfigProcessor::LoadedConfig ConfigProcessor::loadConfigWithZooKeeperIncludes( void ConfigProcessor::decryptEncryptedElements(LoadedConfig & loaded_config) { - CompressionCodecEncrypted::Configuration::instance().tryLoad(*loaded_config.configuration, "encryption_codecs"); + CompressionCodecEncrypted::Configuration::instance().load(*loaded_config.configuration, "encryption_codecs"); Node * config_root = getRootNode(loaded_config.preprocessed_xml.get()); decryptRecursive(config_root); loaded_config.configuration = new Poco::Util::XMLConfiguration(loaded_config.preprocessed_xml); diff --git a/src/Common/examples/encrypt_decrypt.cpp b/src/Common/examples/encrypt_decrypt.cpp index 503802016cb..c7f949195c8 100644 --- a/src/Common/examples/encrypt_decrypt.cpp +++ b/src/Common/examples/encrypt_decrypt.cpp @@ -3,7 +3,7 @@ #include #include -/** This test program encrypts or decrypts text values using a symmetric encryption codec like AES_128_GCM_SIV or AES_256_GCM_SIV. +/** This program encrypts or decrypts text values using a symmetric encryption codec like AES_128_GCM_SIV or AES_256_GCM_SIV. * Keys for codecs are loaded from section of configuration file. * * How to use: @@ -32,7 +32,7 @@ int main(int argc, char ** argv) DB::ConfigProcessor processor(argv[1], false, true); auto loaded_config = processor.loadConfig(); - DB::CompressionCodecEncrypted::Configuration::instance().tryLoad(*loaded_config.configuration, "encryption_codecs"); + DB::CompressionCodecEncrypted::Configuration::instance().load(*loaded_config.configuration, "encryption_codecs"); if (action == "-e") std::cout << processor.encryptValue(codec_name, value) << std::endl; diff --git a/src/Compression/CompressionCodecEncrypted.cpp b/src/Compression/CompressionCodecEncrypted.cpp index 3f4e35a78a4..5438e02792f 100644 --- a/src/Compression/CompressionCodecEncrypted.cpp +++ b/src/Compression/CompressionCodecEncrypted.cpp @@ -31,14 +31,14 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } -EncryptionMethod getEncryptionMethod(const std::string & name) +EncryptionMethod toEncryptionMethod(const std::string & name) { if (name == "AES_128_GCM_SIV") return AES_128_GCM_SIV; else if (name == "AES_256_GCM_SIV") return AES_256_GCM_SIV; else - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong encryption method. Got {}", name); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown encryption method. Got {}", name); } namespace @@ -48,34 +48,22 @@ namespace String getMethodName(EncryptionMethod Method) { if (Method == AES_128_GCM_SIV) - { return "AES_128_GCM_SIV"; - } else if (Method == AES_256_GCM_SIV) - { return "AES_256_GCM_SIV"; - } else - { return ""; - } } /// Get method code (used for codec, to understand which one we are using) uint8_t getMethodCode(EncryptionMethod Method) { if (Method == AES_128_GCM_SIV) - { return static_cast(CompressionMethodByte::AES_128_GCM_SIV); - } else if (Method == AES_256_GCM_SIV) - { return static_cast(CompressionMethodByte::AES_256_GCM_SIV); - } else - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong encryption method. Got {}", getMethodName(Method)); - } + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown encryption method. Got {}", getMethodName(Method)); } } // end of namespace @@ -105,17 +93,11 @@ const String empty_nonce = {"\0\0\0\0\0\0\0\0\0\0\0\0", actual_nonce_size}; UInt64 methodKeySize(EncryptionMethod Method) { if (Method == AES_128_GCM_SIV) - { return 16; - } else if (Method == AES_256_GCM_SIV) - { return 32; - } else - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong encryption method. Got {}", getMethodName(Method)); - } + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown encryption method. Got {}", getMethodName(Method)); } std::string lastErrorString() @@ -130,17 +112,11 @@ std::string lastErrorString() auto getMethod(EncryptionMethod Method) { if (Method == AES_128_GCM_SIV) - { return EVP_aead_aes_128_gcm_siv; - } else if (Method == AES_256_GCM_SIV) - { return EVP_aead_aes_256_gcm_siv; - } else - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong encryption method. Got {}", getMethodName(Method)); - } + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown encryption method. Got {}", getMethodName(Method)); } /// Encrypt plaintext with particular algorithm and put result into ciphertext_and_tag. @@ -206,17 +182,11 @@ size_t decrypt(std::string_view ciphertext, char * plaintext, EncryptionMethod m auto getMethod(EncryptionMethod Method) { if (Method == AES_128_GCM_SIV) - { return EVP_aes_128_gcm; - } else if (Method == AES_256_GCM_SIV) - { return EVP_aes_256_gcm; - } else - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong encryption method. Got {}", getMethodName(Method)); - } + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown encryption method. Got {}", getMethodName(Method)); } /// Encrypt plaintext with particular algorithm and put result into ciphertext_and_tag. diff --git a/src/Compression/CompressionCodecEncrypted.h b/src/Compression/CompressionCodecEncrypted.h index fafcf4af507..7971cbadab7 100644 --- a/src/Compression/CompressionCodecEncrypted.h +++ b/src/Compression/CompressionCodecEncrypted.h @@ -18,8 +18,8 @@ enum EncryptionMethod MAX_ENCRYPTION_METHOD }; -/// Get method for string name. Throw exception for wrong name. -EncryptionMethod getEncryptionMethod(const std::string & name); +/// Get encryption method for string name. Throw exception for wrong name. +EncryptionMethod toEncryptionMethod(const std::string & name); /** This codec encrypts and decrypts blocks with AES-128 in * GCM-SIV mode (RFC-8452), which is the only cipher currently diff --git a/tests/integration/test_config_decryption/configs/config.xml b/tests/integration/test_config_decryption/configs/config.xml index 5c274128e39..4b0d3a77659 100644 --- a/tests/integration/test_config_decryption/configs/config.xml +++ b/tests/integration/test_config_decryption/configs/config.xml @@ -1,4 +1,5 @@ + 00112233445566778899aabbccddeeff @@ -7,6 +8,8 @@ 00112233445566778899aabbccddeeff00112233445566778899aabbccddeeff - 96260000000B0000000000E8FE3C087CED2205A5071078B29FD5C3B97F824911DED3217E980C - 97260000000B0000000000BFFF70C4DA718754C1DA0E2F25FF9246D4783F7FFEC4089EC1CC14 + + 96260000000B0000000000E8FE3C087CED2205A5071078B29FD5C3B97F824911DED3217E980C + 97260000000B0000000000BFFF70C4DA718754C1DA0E2F25FF9246D4783F7FFEC4089EC1CC14 + diff --git a/tests/integration/test_config_decryption/configs/config.yaml b/tests/integration/test_config_decryption/configs/config.yaml index ab4391be3c5..1b20b65b652 100644 --- a/tests/integration/test_config_decryption/configs/config.yaml +++ b/tests/integration/test_config_decryption/configs/config.yaml @@ -3,9 +3,11 @@ encryption_codecs: key_hex: 00112233445566778899aabbccddeeff aes_256_gcm_siv: key_hex: 00112233445566778899aabbccddeeff00112233445566778899aabbccddeeff + max_table_size_to_drop: '#text': 96260000000B0000000000E8FE3C087CED2205A5071078B29FD5C3B97F824911DED3217E980C - '@encryption_codec': AES_128_GCM_SIV + '@encrypted_by': AES_128_GCM_SIV + max_partition_size_to_drop: - '@encryption_codec': AES_256_GCM_SIV + '@encrypted_by': AES_256_GCM_SIV '#text': 97260000000B0000000000BFFF70C4DA718754C1DA0E2F25FF9246D4783F7FFEC4089EC1CC14 diff --git a/tests/integration/test_config_decryption/configs/config_invalid_chars.xml b/tests/integration/test_config_decryption/configs/config_invalid_chars.xml index 49bf51b5bad..53345b897dc 100644 --- a/tests/integration/test_config_decryption/configs/config_invalid_chars.xml +++ b/tests/integration/test_config_decryption/configs/config_invalid_chars.xml @@ -1,4 +1,5 @@ + 00112233445566778899aabbccddeeff @@ -7,6 +8,9 @@ 00112233445566778899aabbccddeeff00112233445566778899aabbccddeeff - --96260000000B0000000000E8FE3C087CED2205A5071078B29FD5C3B97F824911DED3217E980C - 97260000000B0000000000BFFF70C4DA718754C1DA0E2F25FF9246D4783F7FFEC4089EC1CC14 + + + --96260000000B0000000000E8FE3C087CED2205A5071078B29FD5C3B97F824911DED3217E980C + 97260000000B0000000000BFFF70C4DA718754C1DA0E2F25FF9246D4783F7FFEC4089EC1CC14 + diff --git a/tests/integration/test_config_decryption/configs/config_no_encryption_key.xml b/tests/integration/test_config_decryption/configs/config_no_encryption_key.xml index 5f7769f7403..830c75f7378 100644 --- a/tests/integration/test_config_decryption/configs/config_no_encryption_key.xml +++ b/tests/integration/test_config_decryption/configs/config_no_encryption_key.xml @@ -1,3 +1,7 @@ - 96260000000B0000000000E8FE3C087CED2205A5071078B29FD5C3B97F824911DED3217E980C + + + + 96260000000B0000000000E8FE3C087CED2205A5071078B29FD5C3B97F824911DED3217E980C + diff --git a/tests/integration/test_config_decryption/configs/config_subnodes.xml b/tests/integration/test_config_decryption/configs/config_subnodes.xml index b0e519ff546..8213270f747 100644 --- a/tests/integration/test_config_decryption/configs/config_subnodes.xml +++ b/tests/integration/test_config_decryption/configs/config_subnodes.xml @@ -1,10 +1,14 @@ + 00112233445566778899aabbccddeeff - + + + 96260000000B0000000000E8FE3C087CED2205A5071078B29FD5C3B97F824911DED3217E980C + diff --git a/tests/integration/test_config_decryption/configs/config_wrong_method.xml b/tests/integration/test_config_decryption/configs/config_wrong_method.xml index b452ce6374c..b96c13d5105 100644 --- a/tests/integration/test_config_decryption/configs/config_wrong_method.xml +++ b/tests/integration/test_config_decryption/configs/config_wrong_method.xml @@ -1,4 +1,5 @@ + 00112233445566778899aabbccddeeff @@ -7,6 +8,8 @@ 00112233445566778899aabbccddeeff00112233445566778899aabbccddeeff - 96260000000B0000000000E8FE3C087CED2205A5071078B29FD5C3B97F824911DED3217E980C - 97260000000B0000000000BFFF70C4DA718754C1DA0E2F25FF9246D4783F7FFEC4089EC1CC14 + + 96260000000B0000000000E8FE3C087CED2205A5071078B29FD5C3B97F824911DED3217E980C + 97260000000B0000000000BFFF70C4DA718754C1DA0E2F25FF9246D4783F7FFEC4089EC1CC14 + diff --git a/tests/integration/test_config_decryption/test_wrong_settings.py b/tests/integration/test_config_decryption/test_wrong_settings.py index b148f9a051a..c6987d12324 100644 --- a/tests/integration/test_config_decryption/test_wrong_settings.py +++ b/tests/integration/test_config_decryption/test_wrong_settings.py @@ -15,7 +15,7 @@ def start_clickhouse(config, err_msg): def test_wrong_method(): start_clickhouse( - "configs/config_wrong_method.xml", "Wrong encryption method. Got WRONG" + "configs/config_wrong_method.xml", "Unknown encryption method. Got WRONG" ) From dccbe875d247818a17e999ceab5e062537169f80 Mon Sep 17 00:00:00 2001 From: Han Fei Date: Wed, 26 Jul 2023 15:37:58 +0200 Subject: [PATCH 414/478] check if storage shutdown before we operate MergeTreeDeduplicationLog --- .../MergeTree/MergeTreeDeduplicationLog.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp index 09a04f13fc7..53481ab06a0 100644 --- a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp +++ b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp @@ -10,6 +10,8 @@ #include #include +#include + namespace DB { @@ -231,6 +233,12 @@ std::pair MergeTreeDeduplicationLog::addPart(const std: return std::make_pair(info, false); } + if (stopped) + { + LOG_ERROR(&Poco::Logger::get("MergeTreeDeduplicationLog"), "Storage has been shutdown when we add this part."); + return {}; + } + chassert(current_writer != nullptr); /// Create new record @@ -261,6 +269,12 @@ void MergeTreeDeduplicationLog::dropPart(const MergeTreePartInfo & drop_part_inf if (deduplication_window == 0) return; + if (stopped) + { + LOG_ERROR(&Poco::Logger::get("MergeTreeDeduplicationLog"), "Storage has been shutdown when we drop this part."); + return; + } + chassert(current_writer != nullptr); for (auto itr = deduplication_map.begin(); itr != deduplication_map.end(); /* no increment here, we erasing from map */) From 338188ae7f1ccdb399671cbfae584ff79705097a Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 26 Jul 2023 14:10:27 +0000 Subject: [PATCH 415/478] fix test --- tests/queries/0_stateless/02791_remote_paths_refcount.sql | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/queries/0_stateless/02791_remote_paths_refcount.sql b/tests/queries/0_stateless/02791_remote_paths_refcount.sql index e64df599d32..180601738ad 100644 --- a/tests/queries/0_stateless/02791_remote_paths_refcount.sql +++ b/tests/queries/0_stateless/02791_remote_paths_refcount.sql @@ -2,6 +2,10 @@ DROP TABLE IF EXISTS t_refcount SYNC; +-- Names of parts (on which this test depends) +-- can differ in case of fault injection. +SET insert_keeper_fault_injection_probability = 0.0; + CREATE TABLE t_refcount (id UInt64, v UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/test/{database}/t_refcount', '1') ORDER BY id PARTITION BY id % 2 From 89f2e8cdea8d7f32be735cd86326d1cbed24e158 Mon Sep 17 00:00:00 2001 From: xiedeyantu Date: Wed, 26 Jul 2023 23:02:57 +0800 Subject: [PATCH 416/478] Fix S3 table function does not work for pre-signed URL --- src/TableFunctions/TableFunctionS3.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/TableFunctions/TableFunctionS3.cpp b/src/TableFunctions/TableFunctionS3.cpp index c8cc0cddd30..3637b3e9eb2 100644 --- a/src/TableFunctions/TableFunctionS3.cpp +++ b/src/TableFunctions/TableFunctionS3.cpp @@ -160,7 +160,7 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context configuration.keys = {configuration.url.key}; if (configuration.format == "auto") - configuration.format = FormatFactory::instance().getFormatFromFileName(configuration.url.uri.getPath(), true); + configuration.format = FormatFactory::instance().getFormatFromFileName(Poco::URI(configuration.url.uri.getPath()).getPath(), true); } void TableFunctionS3::parseArguments(const ASTPtr & ast_function, ContextPtr context) From 2479f1352a62adebdc460dbfde4510ad25fc7184 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 26 Jul 2023 17:11:28 +0200 Subject: [PATCH 417/478] fix deadlocks in StorageTableFunctionProxy --- src/Storages/StorageTableFunction.h | 2 +- .../02828_create_as_table_function_rename.reference | 1 + .../0_stateless/02828_create_as_table_function_rename.sql | 7 +++++++ 3 files changed, 9 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02828_create_as_table_function_rename.reference create mode 100644 tests/queries/0_stateless/02828_create_as_table_function_rename.sql diff --git a/src/Storages/StorageTableFunction.h b/src/Storages/StorageTableFunction.h index 26cbe1f0233..3939483495e 100644 --- a/src/Storages/StorageTableFunction.h +++ b/src/Storages/StorageTableFunction.h @@ -156,7 +156,7 @@ public: void checkTableCanBeDropped() const override {} private: - mutable std::mutex nested_mutex; + mutable std::recursive_mutex nested_mutex; mutable GetNestedStorageFunc get_nested; mutable StoragePtr nested; const bool add_conversion; diff --git a/tests/queries/0_stateless/02828_create_as_table_function_rename.reference b/tests/queries/0_stateless/02828_create_as_table_function_rename.reference new file mode 100644 index 00000000000..573541ac970 --- /dev/null +++ b/tests/queries/0_stateless/02828_create_as_table_function_rename.reference @@ -0,0 +1 @@ +0 diff --git a/tests/queries/0_stateless/02828_create_as_table_function_rename.sql b/tests/queries/0_stateless/02828_create_as_table_function_rename.sql new file mode 100644 index 00000000000..7e24e485fb9 --- /dev/null +++ b/tests/queries/0_stateless/02828_create_as_table_function_rename.sql @@ -0,0 +1,7 @@ + +drop table if exists t1; +create table t1 as remote('localhost', 'system.one'); +rename table t1 to t2; +select * from t2; +rename table t2 to t1; +drop table t1; From 74f3e76b182411fee1d49e74aa5040cd4a378967 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 26 Jul 2023 17:15:28 +0200 Subject: [PATCH 418/478] fix build with clang-15 --- src/Common/SystemLogBase.cpp | 2 +- src/Interpreters/Cache/Metadata.cpp | 3 +++ src/Storages/HDFS/StorageHDFS.h | 2 ++ 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/Common/SystemLogBase.cpp b/src/Common/SystemLogBase.cpp index ed5ffd78a7b..3d68fe63227 100644 --- a/src/Common/SystemLogBase.cpp +++ b/src/Common/SystemLogBase.cpp @@ -185,7 +185,7 @@ void SystemLogQueue::confirm(uint64_t to_flush_end) } template -SystemLogQueue::Index SystemLogQueue::pop(std::vector& output, bool& should_prepare_tables_anyway, bool& exit_this_thread) +typename SystemLogQueue::Index SystemLogQueue::pop(std::vector& output, bool& should_prepare_tables_anyway, bool& exit_this_thread) { std::unique_lock lock(mutex); flush_event.wait_for(lock, diff --git a/src/Interpreters/Cache/Metadata.cpp b/src/Interpreters/Cache/Metadata.cpp index 6a68d0f21f7..783c71448fc 100644 --- a/src/Interpreters/Cache/Metadata.cpp +++ b/src/Interpreters/Cache/Metadata.cpp @@ -360,6 +360,9 @@ private: struct DownloadInfo { + DownloadInfo(const CacheMetadata::Key & key_, const size_t & offset_, const std::weak_ptr & file_segment_) + : key(key_), offset(offset_), file_segment(file_segment_) {} + CacheMetadata::Key key; size_t offset; /// We keep weak pointer to file segment diff --git a/src/Storages/HDFS/StorageHDFS.h b/src/Storages/HDFS/StorageHDFS.h index 74801b68f73..13e46bc1023 100644 --- a/src/Storages/HDFS/StorageHDFS.h +++ b/src/Storages/HDFS/StorageHDFS.h @@ -29,6 +29,8 @@ public: struct PathWithInfo { + PathWithInfo() = default; + PathWithInfo(const String & path_, const std::optional & info_) : path(path_), info(info_) {} String path; std::optional info; }; From b8cac9499d01bd51e4b8a669c7d23104c656dc7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Wed, 26 Jul 2023 13:18:09 +0000 Subject: [PATCH 419/478] Add tests to reproduce the problem --- .../0_stateless/02833_local_with_dialect.reference | 2 ++ tests/queries/0_stateless/02833_local_with_dialect.sh | 8 ++++++++ 2 files changed, 10 insertions(+) create mode 100644 tests/queries/0_stateless/02833_local_with_dialect.reference create mode 100755 tests/queries/0_stateless/02833_local_with_dialect.sh diff --git a/tests/queries/0_stateless/02833_local_with_dialect.reference b/tests/queries/0_stateless/02833_local_with_dialect.reference new file mode 100644 index 00000000000..dbb67375997 --- /dev/null +++ b/tests/queries/0_stateless/02833_local_with_dialect.reference @@ -0,0 +1,2 @@ +0 +[?2004h[?2004lBye. diff --git a/tests/queries/0_stateless/02833_local_with_dialect.sh b/tests/queries/0_stateless/02833_local_with_dialect.sh new file mode 100755 index 00000000000..2a2e1b09459 --- /dev/null +++ b/tests/queries/0_stateless/02833_local_with_dialect.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + + +echo "exit" | ${CLICKHOUSE_LOCAL} --query "from s\"SELECT * FROM numbers(1)\"" --dialect prql --interactive From 3a6aaa29c9db0db1bc2875b7323c334148da387f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Wed, 26 Jul 2023 15:25:57 +0000 Subject: [PATCH 420/478] Do not load suggestions in case not ClickHouse dialects --- programs/local/LocalServer.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 3c2a8ae3152..6ac7edaf1d9 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -569,7 +569,9 @@ void LocalServer::processConfig() } print_stack_trace = config().getBool("stacktrace", false); - load_suggestions = (is_interactive || delayed_interactive) && !config().getBool("disable_suggestion", false); + const std::string clickhouse_dialect{"clickhouse"}; + load_suggestions = (is_interactive || delayed_interactive) && !config().getBool("disable_suggestion", false) + && config().getString("dialect", clickhouse_dialect) == clickhouse_dialect; auto logging = (config().has("logger.console") || config().has("logger.level") From 0a838dc6d19af963a021aa1910f2144839f21d4a Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Wed, 26 Jul 2023 18:30:18 +0200 Subject: [PATCH 421/478] Revert "Remove `mmap/mremap/munmap` from Allocator.h" (#52589) --- src/Common/Allocator.cpp | 26 ++- src/Common/Allocator.h | 182 ++++++++++++++---- src/Common/Allocator_fwd.h | 2 +- src/Common/CurrentMetrics.cpp | 2 + src/Common/HashTable/HashTableAllocator.h | 2 +- .../01778_mmap_cache_infra.reference | 2 + 6 files changed, 177 insertions(+), 39 deletions(-) diff --git a/src/Common/Allocator.cpp b/src/Common/Allocator.cpp index 769df70d71e..0fb90e5a47e 100644 --- a/src/Common/Allocator.cpp +++ b/src/Common/Allocator.cpp @@ -1,4 +1,26 @@ #include "Allocator.h" -template class Allocator; -template class Allocator; +/** Keep definition of this constant in cpp file; otherwise its value + * is inlined into allocator code making it impossible to override it + * in third-party code. + * + * Note: extern may seem redundant, but is actually needed due to bug in GCC. + * See also: https://gcc.gnu.org/legacy-ml/gcc-help/2017-12/msg00021.html + */ +#ifdef NDEBUG + __attribute__((__weak__)) extern const size_t MMAP_THRESHOLD = 128 * (1ULL << 20); +#else + /** + * In debug build, use small mmap threshold to reproduce more memory + * stomping bugs. Along with ASLR it will hopefully detect more issues than + * ASan. The program may fail due to the limit on number of memory mappings. + * + * Not too small to avoid too quick exhaust of memory mappings. + */ + __attribute__((__weak__)) extern const size_t MMAP_THRESHOLD = 16384; +#endif + +template class Allocator; +template class Allocator; +template class Allocator; +template class Allocator; diff --git a/src/Common/Allocator.h b/src/Common/Allocator.h index 1e77e988326..5180fbdaa2d 100644 --- a/src/Common/Allocator.h +++ b/src/Common/Allocator.h @@ -36,26 +36,51 @@ #include +/// Required for older Darwin builds, that lack definition of MAP_ANONYMOUS +#ifndef MAP_ANONYMOUS +#define MAP_ANONYMOUS MAP_ANON +#endif + +/** + * Many modern allocators (for example, tcmalloc) do not do a mremap for + * realloc, even in case of large enough chunks of memory. Although this allows + * you to increase performance and reduce memory consumption during realloc. + * To fix this, we do mremap manually if the chunk of memory is large enough. + * The threshold (64 MB) is chosen quite large, since changing the address + * space is very slow, especially in the case of a large number of threads. We + * expect that the set of operations mmap/something to do/mremap can only be + * performed about 1000 times per second. + * + * P.S. This is also required, because tcmalloc can not allocate a chunk of + * memory greater than 16 GB. + * + * P.P.S. Note that MMAP_THRESHOLD symbol is intentionally made weak. It allows + * to override it during linkage when using ClickHouse as a library in + * third-party applications which may already use own allocator doing mmaps + * in the implementation of alloc/realloc. + */ +extern const size_t MMAP_THRESHOLD; + static constexpr size_t MALLOC_MIN_ALIGNMENT = 8; +namespace CurrentMetrics +{ + extern const Metric MMappedAllocs; + extern const Metric MMappedAllocBytes; +} + namespace DB { - namespace ErrorCodes { + extern const int BAD_ARGUMENTS; extern const int CANNOT_ALLOCATE_MEMORY; + extern const int CANNOT_MUNMAP; + extern const int CANNOT_MREMAP; extern const int LOGICAL_ERROR; } - } -/** Previously there was a code which tried to use manual mmap and mremap (clickhouse_mremap.h) for large allocations/reallocations (64MB+). - * Most modern allocators (including jemalloc) don't use mremap, so the idea was to take advantage from mremap system call for large reallocs. - * Actually jemalloc had support for mremap, but it was intentionally removed from codebase https://github.com/jemalloc/jemalloc/commit/e2deab7a751c8080c2b2cdcfd7b11887332be1bb. - * Our performance tests also shows that without manual mmap/mremap/munmap clickhouse is overall faster for about 1-2% and up to 5-7x for some types of queries. - * That is why we don't do manuall mmap/mremap/munmap here and completely rely on jemalloc for allocations of any size. - */ - /** Responsible for allocating / freeing memory. Used, for example, in PODArray, Arena. * Also used in hash tables. * The interface is different from std::allocator @@ -63,8 +88,10 @@ namespace ErrorCodes * - passing the size into the `free` method; * - by the presence of the `alignment` argument; * - the possibility of zeroing memory (used in hash tables); + * - random hint address for mmap + * - mmap_threshold for using mmap less or more */ -template +template class Allocator { public: @@ -82,7 +109,7 @@ public: try { checkSize(size); - freeNoTrack(buf); + freeNoTrack(buf, size); CurrentMemoryTracker::free(size); } catch (...) @@ -105,26 +132,49 @@ public: /// nothing to do. /// BTW, it's not possible to change alignment while doing realloc. } - else if (alignment <= MALLOC_MIN_ALIGNMENT) + else if (old_size < MMAP_THRESHOLD && new_size < MMAP_THRESHOLD + && alignment <= MALLOC_MIN_ALIGNMENT) { /// Resize malloc'd memory region with no special alignment requirement. CurrentMemoryTracker::realloc(old_size, new_size); void * new_buf = ::realloc(buf, new_size); if (nullptr == new_buf) - { - DB::throwFromErrno( - fmt::format("Allocator: Cannot realloc from {} to {}.", ReadableSize(old_size), ReadableSize(new_size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY); - } + DB::throwFromErrno(fmt::format("Allocator: Cannot realloc from {} to {}.", ReadableSize(old_size), ReadableSize(new_size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY); buf = new_buf; if constexpr (clear_memory) if (new_size > old_size) memset(reinterpret_cast(buf) + old_size, 0, new_size - old_size); } + else if (old_size >= MMAP_THRESHOLD && new_size >= MMAP_THRESHOLD) + { + /// Resize mmap'd memory region. + CurrentMemoryTracker::realloc(old_size, new_size); + + // On apple and freebsd self-implemented mremap used (common/mremap.h) + buf = clickhouse_mremap(buf, old_size, new_size, MREMAP_MAYMOVE, + PROT_READ | PROT_WRITE, mmap_flags, -1, 0); + if (MAP_FAILED == buf) + DB::throwFromErrno(fmt::format("Allocator: Cannot mremap memory chunk from {} to {}.", + ReadableSize(old_size), ReadableSize(new_size)), DB::ErrorCodes::CANNOT_MREMAP); + + /// No need for zero-fill, because mmap guarantees it. + } + else if (new_size < MMAP_THRESHOLD) + { + /// Small allocs that requires a copy. Assume there's enough memory in system. Call CurrentMemoryTracker once. + CurrentMemoryTracker::realloc(old_size, new_size); + + void * new_buf = allocNoTrack(new_size, alignment); + memcpy(new_buf, buf, std::min(old_size, new_size)); + freeNoTrack(buf, old_size); + buf = new_buf; + } else { /// Big allocs that requires a copy. MemoryTracker is called inside 'alloc', 'free' methods. + void * new_buf = alloc(new_size, alignment); memcpy(new_buf, buf, std::min(old_size, new_size)); free(buf, old_size); @@ -142,38 +192,83 @@ protected: static constexpr bool clear_memory = clear_memory_; + // Freshly mmapped pages are copy-on-write references to a global zero page. + // On the first write, a page fault occurs, and an actual writable page is + // allocated. If we are going to use this memory soon, such as when resizing + // hash tables, it makes sense to pre-fault the pages by passing + // MAP_POPULATE to mmap(). This takes some time, but should be faster + // overall than having a hot loop interrupted by page faults. + // It is only supported on Linux. + static constexpr int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS +#if defined(OS_LINUX) + | (mmap_populate ? MAP_POPULATE : 0) +#endif + ; + private: void * allocNoTrack(size_t size, size_t alignment) { void * buf; - if (alignment <= MALLOC_MIN_ALIGNMENT) - { - if constexpr (clear_memory) - buf = ::calloc(size, 1); - else - buf = ::malloc(size); + size_t mmap_min_alignment = ::getPageSize(); - if (nullptr == buf) - DB::throwFromErrno(fmt::format("Allocator: Cannot malloc {}.", ReadableSize(size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY); + if (size >= MMAP_THRESHOLD) + { + if (alignment > mmap_min_alignment) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, + "Too large alignment {}: more than page size when allocating {}.", + ReadableSize(alignment), ReadableSize(size)); + + buf = mmap(getMmapHint(), size, PROT_READ | PROT_WRITE, + mmap_flags, -1, 0); + if (MAP_FAILED == buf) + DB::throwFromErrno(fmt::format("Allocator: Cannot mmap {}.", ReadableSize(size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY); + /// No need for zero-fill, because mmap guarantees it. + + CurrentMetrics::add(CurrentMetrics::MMappedAllocs); + CurrentMetrics::add(CurrentMetrics::MMappedAllocBytes, size); } else { - buf = nullptr; - int res = posix_memalign(&buf, alignment, size); + if (alignment <= MALLOC_MIN_ALIGNMENT) + { + if constexpr (clear_memory) + buf = ::calloc(size, 1); + else + buf = ::malloc(size); - if (0 != res) - DB::throwFromErrno(fmt::format("Cannot allocate memory (posix_memalign) {}.", ReadableSize(size)), - DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, res); + if (nullptr == buf) + DB::throwFromErrno(fmt::format("Allocator: Cannot malloc {}.", ReadableSize(size)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY); + } + else + { + buf = nullptr; + int res = posix_memalign(&buf, alignment, size); - if constexpr (clear_memory) - memset(buf, 0, size); + if (0 != res) + DB::throwFromErrno(fmt::format("Cannot allocate memory (posix_memalign) {}.", ReadableSize(size)), + DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, res); + + if constexpr (clear_memory) + memset(buf, 0, size); + } } return buf; } - void freeNoTrack(void * buf) + void freeNoTrack(void * buf, size_t size) { - ::free(buf); + if (size >= MMAP_THRESHOLD) + { + if (0 != munmap(buf, size)) + DB::throwFromErrno(fmt::format("Allocator: Cannot munmap {}.", ReadableSize(size)), DB::ErrorCodes::CANNOT_MUNMAP); + + CurrentMetrics::sub(CurrentMetrics::MMappedAllocs); + CurrentMetrics::sub(CurrentMetrics::MMappedAllocBytes, size); + } + else + { + ::free(buf); + } } void checkSize(size_t size) @@ -182,6 +277,21 @@ private: if (size >= 0x8000000000000000ULL) throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Too large size ({}) passed to allocator. It indicates an error.", size); } + +#ifndef NDEBUG + /// In debug builds, request mmap() at random addresses (a kind of ASLR), to + /// reproduce more memory stomping bugs. Note that Linux doesn't do it by + /// default. This may lead to worse TLB performance. + void * getMmapHint() + { + return reinterpret_cast(std::uniform_int_distribution(0x100000000000UL, 0x700000000000UL)(thread_local_rng)); + } +#else + void * getMmapHint() + { + return nullptr; + } +#endif }; @@ -257,5 +367,7 @@ constexpr size_t allocatorInitialBytes; -extern template class Allocator; +extern template class Allocator; +extern template class Allocator; +extern template class Allocator; +extern template class Allocator; diff --git a/src/Common/Allocator_fwd.h b/src/Common/Allocator_fwd.h index a96bc2a503b..a13a4398654 100644 --- a/src/Common/Allocator_fwd.h +++ b/src/Common/Allocator_fwd.h @@ -3,7 +3,7 @@ * This file provides forward declarations for Allocator. */ -template +template class Allocator; template diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp index 583b13cf79d..e290fc8ccd3 100644 --- a/src/Common/CurrentMetrics.cpp +++ b/src/Common/CurrentMetrics.cpp @@ -173,6 +173,8 @@ M(PartsInMemory, "In-memory parts.") \ M(MMappedFiles, "Total number of mmapped files.") \ M(MMappedFileBytes, "Sum size of mmapped file regions.") \ + M(MMappedAllocs, "Total number of mmapped allocations") \ + M(MMappedAllocBytes, "Sum bytes of mmapped allocations") \ M(AsynchronousReadWait, "Number of threads waiting for asynchronous read.") \ M(PendingAsyncInsert, "Number of asynchronous inserts that are waiting for flush.") \ M(KafkaConsumers, "Number of active Kafka consumers") \ diff --git a/src/Common/HashTable/HashTableAllocator.h b/src/Common/HashTable/HashTableAllocator.h index 8252265111d..47e3fdfc4b6 100644 --- a/src/Common/HashTable/HashTableAllocator.h +++ b/src/Common/HashTable/HashTableAllocator.h @@ -8,7 +8,7 @@ * table, so it makes sense to pre-fault the pages so that page faults don't * interrupt the resize loop. Set the allocator parameter accordingly. */ -using HashTableAllocator = Allocator; +using HashTableAllocator = Allocator; template using HashTableAllocatorWithStackMemory = AllocatorWithStackMemory; diff --git a/tests/queries/0_stateless/01778_mmap_cache_infra.reference b/tests/queries/0_stateless/01778_mmap_cache_infra.reference index 0e82b277bc1..ed365028ecc 100644 --- a/tests/queries/0_stateless/01778_mmap_cache_infra.reference +++ b/tests/queries/0_stateless/01778_mmap_cache_infra.reference @@ -2,5 +2,7 @@ CreatedReadBufferMMap CreatedReadBufferMMapFailed MMappedFileCacheHits MMappedFileCacheMisses +MMappedAllocBytes +MMappedAllocs MMappedFileBytes MMappedFiles From d89e2e6a27746dbb8febd2990d1ed3c23fcf153b Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com> Date: Wed, 26 Jul 2023 19:58:41 +0200 Subject: [PATCH 422/478] Add SYSTEM STOP LISTEN query (#51016) Co-authored-by: Nikita Mikhaylov Co-authored-by: Nikita Mikhaylov --- docs/en/sql-reference/statements/system.md | 26 + programs/keeper/CMakeLists.txt | 1 + programs/server/Server.cpp | 448 +++++++++++------- programs/server/Server.h | 14 +- src/Access/Common/AccessType.h | 1 + src/Interpreters/Context.cpp | 34 ++ src/Interpreters/Context.h | 8 + src/Interpreters/InterpreterSystemQuery.cpp | 19 +- src/Parsers/ASTSystemQuery.cpp | 11 + src/Parsers/ASTSystemQuery.h | 7 +- src/Parsers/ParserSystemQuery.cpp | 36 ++ src/Parsers/examples/CMakeLists.txt | 4 +- src/Server/ServerType.cpp | 138 ++++++ src/Server/ServerType.h | 44 ++ .../test_system_start_stop_listen/__init__.py | 0 .../configs/cluster.xml | 16 + .../test_system_start_stop_listen/test.py | 40 ++ .../01271_show_privileges.reference | 1 + .../02117_show_create_table_system.reference | 6 +- 19 files changed, 663 insertions(+), 191 deletions(-) create mode 100644 src/Server/ServerType.cpp create mode 100644 src/Server/ServerType.h create mode 100644 tests/integration/test_system_start_stop_listen/__init__.py create mode 100644 tests/integration/test_system_start_stop_listen/configs/cluster.xml create mode 100644 tests/integration/test_system_start_stop_listen/test.py diff --git a/docs/en/sql-reference/statements/system.md b/docs/en/sql-reference/statements/system.md index 65a35f03fbe..fb601cd5d35 100644 --- a/docs/en/sql-reference/statements/system.md +++ b/docs/en/sql-reference/statements/system.md @@ -414,3 +414,29 @@ Will do sync syscall. ```sql SYSTEM SYNC FILE CACHE [ON CLUSTER cluster_name] ``` + + +### SYSTEM STOP LISTEN + +Closes the socket and gracefully terminates the existing connections to the server on the specified port with the specified protocol. + +However, if the corresponding protocol settings were not specified in the clickhouse-server configuration, this command will have no effect. + +```sql +SYSTEM STOP LISTEN [ON CLUSTER cluster_name] [QUERIES ALL | QUERIES DEFAULT | QUERIES CUSTOM | TCP | TCP_WITH_PROXY | TCP_SECURE | HTTP | HTTPS | MYSQL | GRPC | POSTGRESQL | PROMETHEUS | CUSTOM 'protocol'] +``` + +- If `CUSTOM 'protocol'` modifier is specified, the custom protocol with the specified name defined in the protocols section of the server configuration will be stopped. +- If `QUERIES ALL` modifier is specified, all protocols are stopped. +- If `QUERIES DEFAULT` modifier is specified, all default protocols are stopped. +- If `QUERIES CUSTOM` modifier is specified, all custom protocols are stopped. + +### SYSTEM START LISTEN + +Allows new connections to be established on the specified protocols. + +However, if the server on the specified port and protocol was not stopped using the SYSTEM STOP LISTEN command, this command will have no effect. + +```sql +SYSTEM START LISTEN [ON CLUSTER cluster_name] [QUERIES ALL | QUERIES DEFAULT | QUERIES CUSTOM | TCP | TCP_WITH_PROXY | TCP_SECURE | HTTP | HTTPS | MYSQL | GRPC | POSTGRESQL | PROMETHEUS | CUSTOM 'protocol'] +``` diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index abf31a7a499..43a8d84b513 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -65,6 +65,7 @@ if (BUILD_STANDALONE_KEEPER) ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/PrometheusRequestHandler.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/PrometheusMetricsWriter.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/waitServersToFinish.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/ServerType.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTPRequestHandlerFactoryMain.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/HTTPServer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/HTTP/ReadHeaders.cpp diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 960b6574633..dce52ecdb12 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1457,6 +1457,24 @@ try access_control.reload(AccessControl::ReloadMode::USERS_CONFIG_ONLY); }); + global_context->setStopServersCallback([&](const ServerType & server_type) + { + stopServers(servers, server_type); + }); + + global_context->setStartServersCallback([&](const ServerType & server_type) + { + createServers( + config(), + listen_hosts, + listen_try, + server_pool, + async_metrics, + servers, + /* start_servers= */ true, + server_type); + }); + /// Limit on total number of concurrently executed queries. global_context->getProcessList().setMaxSize(server_settings.max_concurrent_queries); @@ -1998,7 +2016,8 @@ void Server::createServers( Poco::ThreadPool & server_pool, AsynchronousMetrics & async_metrics, std::vector & servers, - bool start_servers) + bool start_servers, + const ServerType & server_type) { const Settings & settings = global_context->getSettingsRef(); @@ -2012,6 +2031,9 @@ void Server::createServers( for (const auto & protocol : protocols) { + if (!server_type.shouldStart(ServerType::Type::CUSTOM, protocol)) + continue; + std::vector hosts; if (config.has("protocols." + protocol + ".host")) hosts.push_back(config.getString("protocols." + protocol + ".host")); @@ -2058,162 +2080,190 @@ void Server::createServers( for (const auto & listen_host : listen_hosts) { - /// HTTP - const char * port_name = "http_port"; - createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter - { - Poco::Net::ServerSocket socket; - auto address = socketBindListen(config, socket, listen_host, port); - socket.setReceiveTimeout(settings.http_receive_timeout); - socket.setSendTimeout(settings.http_send_timeout); + const char * port_name; - return ProtocolServerAdapter( - listen_host, - port_name, - "http://" + address.toString(), - std::make_unique( - httpContext(), createHandlerFactory(*this, config, async_metrics, "HTTPHandler-factory"), server_pool, socket, http_params)); - }); - - /// HTTPS - port_name = "https_port"; - createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + if (server_type.shouldStart(ServerType::Type::HTTP)) { + /// HTTP + port_name = "http_port"; + createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(config, socket, listen_host, port); + socket.setReceiveTimeout(settings.http_receive_timeout); + socket.setSendTimeout(settings.http_send_timeout); + + return ProtocolServerAdapter( + listen_host, + port_name, + "http://" + address.toString(), + std::make_unique( + httpContext(), createHandlerFactory(*this, config, async_metrics, "HTTPHandler-factory"), server_pool, socket, http_params)); + }); + } + + if (server_type.shouldStart(ServerType::Type::HTTPS)) + { + /// HTTPS + port_name = "https_port"; + createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { #if USE_SSL - Poco::Net::SecureServerSocket socket; - auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true); - socket.setReceiveTimeout(settings.http_receive_timeout); - socket.setSendTimeout(settings.http_send_timeout); - return ProtocolServerAdapter( - listen_host, - port_name, - "https://" + address.toString(), - std::make_unique( - httpContext(), createHandlerFactory(*this, config, async_metrics, "HTTPSHandler-factory"), server_pool, socket, http_params)); + Poco::Net::SecureServerSocket socket; + auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true); + socket.setReceiveTimeout(settings.http_receive_timeout); + socket.setSendTimeout(settings.http_send_timeout); + return ProtocolServerAdapter( + listen_host, + port_name, + "https://" + address.toString(), + std::make_unique( + httpContext(), createHandlerFactory(*this, config, async_metrics, "HTTPSHandler-factory"), server_pool, socket, http_params)); #else - UNUSED(port); - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "HTTPS protocol is disabled because Poco library was built without NetSSL support."); + UNUSED(port); + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "HTTPS protocol is disabled because Poco library was built without NetSSL support."); #endif - }); + }); + } - /// TCP - port_name = "tcp_port"; - createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + if (server_type.shouldStart(ServerType::Type::TCP)) { - Poco::Net::ServerSocket socket; - auto address = socketBindListen(config, socket, listen_host, port); - socket.setReceiveTimeout(settings.receive_timeout); - socket.setSendTimeout(settings.send_timeout); - return ProtocolServerAdapter( - listen_host, - port_name, - "native protocol (tcp): " + address.toString(), - std::make_unique( - new TCPHandlerFactory(*this, /* secure */ false, /* proxy protocol */ false), - server_pool, - socket, - new Poco::Net::TCPServerParams)); - }); + /// TCP + port_name = "tcp_port"; + createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(config, socket, listen_host, port); + socket.setReceiveTimeout(settings.receive_timeout); + socket.setSendTimeout(settings.send_timeout); + return ProtocolServerAdapter( + listen_host, + port_name, + "native protocol (tcp): " + address.toString(), + std::make_unique( + new TCPHandlerFactory(*this, /* secure */ false, /* proxy protocol */ false), + server_pool, + socket, + new Poco::Net::TCPServerParams)); + }); + } - /// TCP with PROXY protocol, see https://github.com/wolfeidau/proxyv2/blob/master/docs/proxy-protocol.txt - port_name = "tcp_with_proxy_port"; - createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + if (server_type.shouldStart(ServerType::Type::TCP_WITH_PROXY)) { - Poco::Net::ServerSocket socket; - auto address = socketBindListen(config, socket, listen_host, port); - socket.setReceiveTimeout(settings.receive_timeout); - socket.setSendTimeout(settings.send_timeout); - return ProtocolServerAdapter( - listen_host, - port_name, - "native protocol (tcp) with PROXY: " + address.toString(), - std::make_unique( - new TCPHandlerFactory(*this, /* secure */ false, /* proxy protocol */ true), - server_pool, - socket, - new Poco::Net::TCPServerParams)); - }); + /// TCP with PROXY protocol, see https://github.com/wolfeidau/proxyv2/blob/master/docs/proxy-protocol.txt + port_name = "tcp_with_proxy_port"; + createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(config, socket, listen_host, port); + socket.setReceiveTimeout(settings.receive_timeout); + socket.setSendTimeout(settings.send_timeout); + return ProtocolServerAdapter( + listen_host, + port_name, + "native protocol (tcp) with PROXY: " + address.toString(), + std::make_unique( + new TCPHandlerFactory(*this, /* secure */ false, /* proxy protocol */ true), + server_pool, + socket, + new Poco::Net::TCPServerParams)); + }); + } - /// TCP with SSL - port_name = "tcp_port_secure"; - createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + if (server_type.shouldStart(ServerType::Type::TCP_SECURE)) { -#if USE_SSL - Poco::Net::SecureServerSocket socket; - auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true); - socket.setReceiveTimeout(settings.receive_timeout); - socket.setSendTimeout(settings.send_timeout); - return ProtocolServerAdapter( - listen_host, - port_name, - "secure native protocol (tcp_secure): " + address.toString(), - std::make_unique( - new TCPHandlerFactory(*this, /* secure */ true, /* proxy protocol */ false), - server_pool, - socket, - new Poco::Net::TCPServerParams)); -#else - UNUSED(port); - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support."); -#endif - }); + /// TCP with SSL + port_name = "tcp_port_secure"; + createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { + #if USE_SSL + Poco::Net::SecureServerSocket socket; + auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true); + socket.setReceiveTimeout(settings.receive_timeout); + socket.setSendTimeout(settings.send_timeout); + return ProtocolServerAdapter( + listen_host, + port_name, + "secure native protocol (tcp_secure): " + address.toString(), + std::make_unique( + new TCPHandlerFactory(*this, /* secure */ true, /* proxy protocol */ false), + server_pool, + socket, + new Poco::Net::TCPServerParams)); + #else + UNUSED(port); + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support."); + #endif + }); + } - port_name = "mysql_port"; - createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + if (server_type.shouldStart(ServerType::Type::MYSQL)) { - Poco::Net::ServerSocket socket; - auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true); - socket.setReceiveTimeout(Poco::Timespan()); - socket.setSendTimeout(settings.send_timeout); - return ProtocolServerAdapter( - listen_host, - port_name, - "MySQL compatibility protocol: " + address.toString(), - std::make_unique(new MySQLHandlerFactory(*this), server_pool, socket, new Poco::Net::TCPServerParams)); - }); + port_name = "mysql_port"; + createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true); + socket.setReceiveTimeout(Poco::Timespan()); + socket.setSendTimeout(settings.send_timeout); + return ProtocolServerAdapter( + listen_host, + port_name, + "MySQL compatibility protocol: " + address.toString(), + std::make_unique(new MySQLHandlerFactory(*this), server_pool, socket, new Poco::Net::TCPServerParams)); + }); + } - port_name = "postgresql_port"; - createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + if (server_type.shouldStart(ServerType::Type::POSTGRESQL)) { - Poco::Net::ServerSocket socket; - auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true); - socket.setReceiveTimeout(Poco::Timespan()); - socket.setSendTimeout(settings.send_timeout); - return ProtocolServerAdapter( - listen_host, - port_name, - "PostgreSQL compatibility protocol: " + address.toString(), - std::make_unique(new PostgreSQLHandlerFactory(*this), server_pool, socket, new Poco::Net::TCPServerParams)); - }); + port_name = "postgresql_port"; + createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true); + socket.setReceiveTimeout(Poco::Timespan()); + socket.setSendTimeout(settings.send_timeout); + return ProtocolServerAdapter( + listen_host, + port_name, + "PostgreSQL compatibility protocol: " + address.toString(), + std::make_unique(new PostgreSQLHandlerFactory(*this), server_pool, socket, new Poco::Net::TCPServerParams)); + }); + } #if USE_GRPC - port_name = "grpc_port"; - createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + if (server_type.shouldStart(ServerType::Type::GRPC)) { - Poco::Net::SocketAddress server_address(listen_host, port); - return ProtocolServerAdapter( - listen_host, - port_name, - "gRPC protocol: " + server_address.toString(), - std::make_unique(*this, makeSocketAddress(listen_host, port, &logger()))); - }); + port_name = "grpc_port"; + createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::SocketAddress server_address(listen_host, port); + return ProtocolServerAdapter( + listen_host, + port_name, + "gRPC protocol: " + server_address.toString(), + std::make_unique(*this, makeSocketAddress(listen_host, port, &logger()))); + }); + } #endif - - /// Prometheus (if defined and not setup yet with http_port) - port_name = "prometheus.port"; - createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + if (server_type.shouldStart(ServerType::Type::PROMETHEUS)) { - Poco::Net::ServerSocket socket; - auto address = socketBindListen(config, socket, listen_host, port); - socket.setReceiveTimeout(settings.http_receive_timeout); - socket.setSendTimeout(settings.http_send_timeout); - return ProtocolServerAdapter( - listen_host, - port_name, - "Prometheus: http://" + address.toString(), - std::make_unique( - httpContext(), createHandlerFactory(*this, config, async_metrics, "PrometheusHandler-factory"), server_pool, socket, http_params)); - }); + /// Prometheus (if defined and not setup yet with http_port) + port_name = "prometheus.port"; + createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(config, socket, listen_host, port); + socket.setReceiveTimeout(settings.http_receive_timeout); + socket.setSendTimeout(settings.http_send_timeout); + return ProtocolServerAdapter( + listen_host, + port_name, + "Prometheus: http://" + address.toString(), + std::make_unique( + httpContext(), createHandlerFactory(*this, config, async_metrics, "PrometheusHandler-factory"), server_pool, socket, http_params)); + }); + } } } @@ -2224,7 +2274,8 @@ void Server::createInterserverServers( Poco::ThreadPool & server_pool, AsynchronousMetrics & async_metrics, std::vector & servers, - bool start_servers) + bool start_servers, + const ServerType & server_type) { const Settings & settings = global_context->getSettingsRef(); @@ -2236,52 +2287,97 @@ void Server::createInterserverServers( /// Now iterate over interserver_listen_hosts for (const auto & interserver_listen_host : interserver_listen_hosts) { - /// Interserver IO HTTP - const char * port_name = "interserver_http_port"; - createServer(config, interserver_listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter - { - Poco::Net::ServerSocket socket; - auto address = socketBindListen(config, socket, interserver_listen_host, port); - socket.setReceiveTimeout(settings.http_receive_timeout); - socket.setSendTimeout(settings.http_send_timeout); - return ProtocolServerAdapter( - interserver_listen_host, - port_name, - "replica communication (interserver): http://" + address.toString(), - std::make_unique( - httpContext(), - createHandlerFactory(*this, config, async_metrics, "InterserverIOHTTPHandler-factory"), - server_pool, - socket, - http_params)); - }); + const char * port_name; - port_name = "interserver_https_port"; - createServer(config, interserver_listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + if (server_type.shouldStart(ServerType::Type::INTERSERVER_HTTP)) { + /// Interserver IO HTTP + port_name = "interserver_http_port"; + createServer(config, interserver_listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(config, socket, interserver_listen_host, port); + socket.setReceiveTimeout(settings.http_receive_timeout); + socket.setSendTimeout(settings.http_send_timeout); + return ProtocolServerAdapter( + interserver_listen_host, + port_name, + "replica communication (interserver): http://" + address.toString(), + std::make_unique( + httpContext(), + createHandlerFactory(*this, config, async_metrics, "InterserverIOHTTPHandler-factory"), + server_pool, + socket, + http_params)); + }); + } + + if (server_type.shouldStart(ServerType::Type::INTERSERVER_HTTPS)) + { + port_name = "interserver_https_port"; + createServer(config, interserver_listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter + { #if USE_SSL - Poco::Net::SecureServerSocket socket; - auto address = socketBindListen(config, socket, interserver_listen_host, port, /* secure = */ true); - socket.setReceiveTimeout(settings.http_receive_timeout); - socket.setSendTimeout(settings.http_send_timeout); - return ProtocolServerAdapter( - interserver_listen_host, - port_name, - "secure replica communication (interserver): https://" + address.toString(), - std::make_unique( - httpContext(), - createHandlerFactory(*this, config, async_metrics, "InterserverIOHTTPSHandler-factory"), - server_pool, - socket, - http_params)); + Poco::Net::SecureServerSocket socket; + auto address = socketBindListen(config, socket, interserver_listen_host, port, /* secure = */ true); + socket.setReceiveTimeout(settings.http_receive_timeout); + socket.setSendTimeout(settings.http_send_timeout); + return ProtocolServerAdapter( + interserver_listen_host, + port_name, + "secure replica communication (interserver): https://" + address.toString(), + std::make_unique( + httpContext(), + createHandlerFactory(*this, config, async_metrics, "InterserverIOHTTPSHandler-factory"), + server_pool, + socket, + http_params)); #else - UNUSED(port); - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support."); + UNUSED(port); + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support."); #endif - }); + }); + } } } +void Server::stopServers( + std::vector & servers, + const ServerType & server_type +) const +{ + Poco::Logger * log = &logger(); + + /// Remove servers once all their connections are closed + auto check_server = [&log](const char prefix[], auto & server) + { + if (!server.isStopping()) + return false; + size_t current_connections = server.currentConnections(); + LOG_DEBUG(log, "Server {}{}: {} ({} connections)", + server.getDescription(), + prefix, + !current_connections ? "finished" : "waiting", + current_connections); + return !current_connections; + }; + + std::erase_if(servers, std::bind_front(check_server, " (from one of previous remove)")); + + for (auto & server : servers) + { + if (!server.isStopping()) + { + const std::string server_port_name = server.getPortName(); + + if (server_type.shouldStop(server_port_name)) + server.stop(); + } + } + + std::erase_if(servers, std::bind_front(check_server, "")); +} + void Server::updateServers( Poco::Util::AbstractConfiguration & config, Poco::ThreadPool & server_pool, diff --git a/programs/server/Server.h b/programs/server/Server.h index d13378dcd65..3f03dd137ef 100644 --- a/programs/server/Server.h +++ b/programs/server/Server.h @@ -3,8 +3,9 @@ #include #include -#include "Server/HTTP/HTTPContext.h" +#include #include +#include #include /** Server provides three interfaces: @@ -106,7 +107,8 @@ private: Poco::ThreadPool & server_pool, AsynchronousMetrics & async_metrics, std::vector & servers, - bool start_servers = false); + bool start_servers = false, + const ServerType & server_type = ServerType(ServerType::Type::QUERIES_ALL)); void createInterserverServers( Poco::Util::AbstractConfiguration & config, @@ -115,7 +117,8 @@ private: Poco::ThreadPool & server_pool, AsynchronousMetrics & async_metrics, std::vector & servers, - bool start_servers = false); + bool start_servers = false, + const ServerType & server_type = ServerType(ServerType::Type::QUERIES_ALL)); void updateServers( Poco::Util::AbstractConfiguration & config, @@ -123,6 +126,11 @@ private: AsynchronousMetrics & async_metrics, std::vector & servers, std::vector & servers_to_start_before_tables); + + void stopServers( + std::vector & servers, + const ServerType & server_type + ) const; }; } diff --git a/src/Access/Common/AccessType.h b/src/Access/Common/AccessType.h index 374a1dd04a4..b253a0e13ce 100644 --- a/src/Access/Common/AccessType.h +++ b/src/Access/Common/AccessType.h @@ -187,6 +187,7 @@ enum class AccessType M(SYSTEM_THREAD_FUZZER, "SYSTEM START THREAD FUZZER, SYSTEM STOP THREAD FUZZER, START THREAD FUZZER, STOP THREAD FUZZER", GLOBAL, SYSTEM) \ M(SYSTEM_UNFREEZE, "SYSTEM UNFREEZE", GLOBAL, SYSTEM) \ M(SYSTEM_FAILPOINT, "SYSTEM ENABLE FAILPOINT, SYSTEM DISABLE FAILPOINT", GLOBAL, SYSTEM) \ + M(SYSTEM_LISTEN, "SYSTEM START LISTEN, SYSTEM STOP LISTEN", GLOBAL, SYSTEM) \ M(SYSTEM, "", GROUP, ALL) /* allows to execute SYSTEM {SHUTDOWN|RELOAD CONFIG|...} */ \ \ M(dictGet, "dictHas, dictGetHierarchy, dictIsIn", DICTIONARY, ALL) /* allows to execute functions dictGet(), dictHas(), dictGetHierarchy(), dictIsIn() */\ diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index cc77e0fe723..f83e524ffb9 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -357,6 +358,9 @@ struct ContextSharedPart : boost::noncopyable Context::ConfigReloadCallback config_reload_callback; + Context::StartStopServersCallback start_servers_callback; + Context::StartStopServersCallback stop_servers_callback; + bool is_server_completely_started = false; #if USE_ROCKSDB @@ -3688,6 +3692,36 @@ void Context::reloadConfig() const shared->config_reload_callback(); } +void Context::setStartServersCallback(StartStopServersCallback && callback) +{ + /// Is initialized at server startup, so lock isn't required. Otherwise use mutex. + shared->start_servers_callback = std::move(callback); +} + +void Context::setStopServersCallback(StartStopServersCallback && callback) +{ + /// Is initialized at server startup, so lock isn't required. Otherwise use mutex. + shared->stop_servers_callback = std::move(callback); +} + +void Context::startServers(const ServerType & server_type) const +{ + /// Use mutex if callback may be changed after startup. + if (!shared->start_servers_callback) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't start servers because start_servers_callback is not set."); + + shared->start_servers_callback(server_type); +} + +void Context::stopServers(const ServerType & server_type) const +{ + /// Use mutex if callback may be changed after startup. + if (!shared->stop_servers_callback) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't stop servers because stop_servers_callback is not set."); + + shared->stop_servers_callback(server_type); +} + void Context::shutdown() { diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index fa210f04451..75752774d4c 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -134,6 +134,7 @@ using StoragePolicyPtr = std::shared_ptr; using StoragePoliciesMap = std::map; class StoragePolicySelector; using StoragePolicySelectorPtr = std::shared_ptr; +class ServerType; template class MergeTreeBackgroundExecutor; @@ -1057,6 +1058,13 @@ public: void setConfigReloadCallback(ConfigReloadCallback && callback); void reloadConfig() const; + using StartStopServersCallback = std::function; + void setStartServersCallback(StartStopServersCallback && callback); + void setStopServersCallback(StartStopServersCallback && callback); + + void startServers(const ServerType & server_type) const; + void stopServers(const ServerType & server_type) const; + void shutdown(); bool isInternalQuery() const { return is_internal_query; } diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index 1bd30e06888..3207da9941a 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -556,6 +556,14 @@ BlockIO InterpreterSystemQuery::execute() ); break; } + case Type::STOP_LISTEN: + getContext()->checkAccess(AccessType::SYSTEM_LISTEN); + getContext()->stopServers(query.server_type); + break; + case Type::START_LISTEN: + getContext()->checkAccess(AccessType::SYSTEM_LISTEN); + getContext()->startServers(query.server_type); + break; case Type::FLUSH_ASYNC_INSERT_QUEUE: { getContext()->checkAccess(AccessType::SYSTEM_FLUSH_ASYNC_INSERT_QUEUE); @@ -567,9 +575,6 @@ BlockIO InterpreterSystemQuery::execute() queue->flushAll(); break; } - case Type::STOP_LISTEN_QUERIES: - case Type::START_LISTEN_QUERIES: - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not supported yet", query.type); case Type::STOP_THREAD_FUZZER: getContext()->checkAccess(AccessType::SYSTEM_THREAD_FUZZER); ThreadFuzzer::stop(); @@ -1181,8 +1186,12 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster() required_access.emplace_back(AccessType::SYSTEM_SYNC_FILE_CACHE); break; } - case Type::STOP_LISTEN_QUERIES: - case Type::START_LISTEN_QUERIES: + case Type::STOP_LISTEN: + case Type::START_LISTEN: + { + required_access.emplace_back(AccessType::SYSTEM_LISTEN); + break; + } case Type::STOP_THREAD_FUZZER: case Type::START_THREAD_FUZZER: case Type::ENABLE_FAILPOINT: diff --git a/src/Parsers/ASTSystemQuery.cpp b/src/Parsers/ASTSystemQuery.cpp index a91449ff035..754eb825dcc 100644 --- a/src/Parsers/ASTSystemQuery.cpp +++ b/src/Parsers/ASTSystemQuery.cpp @@ -220,6 +220,17 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &, { settings.ostr << (settings.hilite ? hilite_none : ""); } + else if (type == Type::START_LISTEN || type == Type::STOP_LISTEN) + { + settings.ostr << (settings.hilite ? hilite_keyword : "") << " " << ServerType::serverTypeToString(server_type.type) + << (settings.hilite ? hilite_none : ""); + + if (server_type.type == ServerType::CUSTOM) + { + settings.ostr << (settings.hilite ? hilite_identifier : "") << " " << backQuoteIfNeed(server_type.custom_name); + } + + } } diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h index b18f8fc7b07..ebaf357c0ab 100644 --- a/src/Parsers/ASTSystemQuery.h +++ b/src/Parsers/ASTSystemQuery.h @@ -3,6 +3,7 @@ #include #include #include +#include #include "config.h" @@ -35,8 +36,8 @@ public: #if USE_AWS_S3 DROP_S3_CLIENT_CACHE, #endif - STOP_LISTEN_QUERIES, - START_LISTEN_QUERIES, + STOP_LISTEN, + START_LISTEN, RESTART_REPLICAS, RESTART_REPLICA, RESTORE_REPLICA, @@ -116,6 +117,8 @@ public: SyncReplicaMode sync_replica_mode = SyncReplicaMode::DEFAULT; + ServerType server_type; + String getID(char) const override { return "SYSTEM query"; } ASTPtr clone() const override diff --git a/src/Parsers/ParserSystemQuery.cpp b/src/Parsers/ParserSystemQuery.cpp index 48dbe60e241..9aff0e8879e 100644 --- a/src/Parsers/ParserSystemQuery.cpp +++ b/src/Parsers/ParserSystemQuery.cpp @@ -442,6 +442,42 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & break; } + case Type::START_LISTEN: + case Type::STOP_LISTEN: + { + if (!parseQueryWithOnCluster(res, pos, expected)) + return false; + + ServerType::Type current_type = ServerType::Type::END; + std::string current_custom_name; + + for (const auto & type : magic_enum::enum_values()) + { + if (ParserKeyword{ServerType::serverTypeToString(type)}.ignore(pos, expected)) + { + current_type = type; + break; + } + } + + if (current_type == ServerType::Type::END) + return false; + + if (current_type == ServerType::CUSTOM) + { + ASTPtr ast; + + if (!ParserStringLiteral{}.parse(pos, ast, expected)) + return false; + + current_custom_name = ast->as().value.get(); + } + + res->server_type = ServerType(current_type, current_custom_name); + + break; + } + default: { if (!parseQueryWithOnCluster(res, pos, expected)) diff --git a/src/Parsers/examples/CMakeLists.txt b/src/Parsers/examples/CMakeLists.txt index 82ca7bc0688..e411574bd65 100644 --- a/src/Parsers/examples/CMakeLists.txt +++ b/src/Parsers/examples/CMakeLists.txt @@ -3,8 +3,8 @@ set(SRCS) clickhouse_add_executable(lexer lexer.cpp ${SRCS}) target_link_libraries(lexer PRIVATE clickhouse_parsers) -clickhouse_add_executable(select_parser select_parser.cpp ${SRCS}) +clickhouse_add_executable(select_parser select_parser.cpp ${SRCS} "../../Server/ServerType.cpp") target_link_libraries(select_parser PRIVATE clickhouse_parsers) -clickhouse_add_executable(create_parser create_parser.cpp ${SRCS}) +clickhouse_add_executable(create_parser create_parser.cpp ${SRCS} "../../Server/ServerType.cpp") target_link_libraries(create_parser PRIVATE clickhouse_parsers) diff --git a/src/Server/ServerType.cpp b/src/Server/ServerType.cpp new file mode 100644 index 00000000000..c6916ee39d9 --- /dev/null +++ b/src/Server/ServerType.cpp @@ -0,0 +1,138 @@ +#include + +#include +#include +#include + +#include + +namespace DB +{ + +namespace +{ + std::vector getTypeIndexToTypeName() + { + constexpr std::size_t types_size = magic_enum::enum_count(); + + std::vector type_index_to_type_name; + type_index_to_type_name.resize(types_size); + + auto entries = magic_enum::enum_entries(); + for (const auto & [entry, str] : entries) + { + auto str_copy = String(str); + std::replace(str_copy.begin(), str_copy.end(), '_', ' '); + type_index_to_type_name[static_cast(entry)] = std::move(str_copy); + } + + return type_index_to_type_name; + } +} + +const char * ServerType::serverTypeToString(ServerType::Type type) +{ + /** During parsing if SystemQuery is not parsed properly it is added to Expected variants as description check IParser.h. + * Description string must be statically allocated. + */ + static std::vector type_index_to_type_name = getTypeIndexToTypeName(); + const auto & type_name = type_index_to_type_name[static_cast(type)]; + return type_name.data(); +} + +bool ServerType::shouldStart(Type server_type, const std::string & custom_name_) const +{ + if (type == Type::QUERIES_ALL) + return true; + + if (type == Type::QUERIES_DEFAULT) + { + switch (server_type) + { + case Type::TCP: + case Type::TCP_WITH_PROXY: + case Type::TCP_SECURE: + case Type::HTTP: + case Type::HTTPS: + case Type::MYSQL: + case Type::GRPC: + case Type::POSTGRESQL: + case Type::PROMETHEUS: + case Type::INTERSERVER_HTTP: + case Type::INTERSERVER_HTTPS: + return true; + default: + return false; + } + } + + if (type == Type::QUERIES_CUSTOM) + { + switch (server_type) + { + case Type::CUSTOM: + return true; + default: + return false; + } + } + + return type == server_type && custom_name == custom_name_; +} + +bool ServerType::shouldStop(const std::string & port_name) const +{ + Type port_type; + std::string port_custom_name; + + if (port_name == "http_port") + port_type = Type::HTTP; + + else if (port_name == "https_port") + port_type = Type::HTTPS; + + else if (port_name == "tcp_port") + port_type = Type::TCP; + + else if (port_name == "tcp_with_proxy_port") + port_type = Type::TCP_WITH_PROXY; + + else if (port_name == "tcp_port_secure") + port_type = Type::TCP_SECURE; + + else if (port_name == "mysql_port") + port_type = Type::MYSQL; + + else if (port_name == "postgresql_port") + port_type = Type::POSTGRESQL; + + else if (port_name == "grpc_port") + port_type = Type::GRPC; + + else if (port_name == "prometheus.port") + port_type = Type::PROMETHEUS; + + else if (port_name == "interserver_http_port") + port_type = Type::INTERSERVER_HTTP; + + else if (port_name == "interserver_https_port") + port_type = Type::INTERSERVER_HTTPS; + + else if (port_name.starts_with("protocols.") && port_name.ends_with(".port")) + { + constexpr size_t protocols_size = std::string_view("protocols.").size(); + constexpr size_t port_size = std::string_view("protocols.").size(); + + port_type = Type::CUSTOM; + port_custom_name = port_name.substr(protocols_size, port_name.size() - port_size); + } + else + port_type = Type::UNKNOWN; + + if (port_type == Type::UNKNOWN) + return false; + + return shouldStart(type, port_custom_name); +} + +} diff --git a/src/Server/ServerType.h b/src/Server/ServerType.h new file mode 100644 index 00000000000..345d1a10119 --- /dev/null +++ b/src/Server/ServerType.h @@ -0,0 +1,44 @@ +#pragma once + +#include +namespace DB +{ + +class ServerType +{ +public: + + enum Type + { + UNKNOWN, + TCP, + TCP_WITH_PROXY, + TCP_SECURE, + HTTP, + HTTPS, + MYSQL, + GRPC, + POSTGRESQL, + PROMETHEUS, + CUSTOM, + INTERSERVER_HTTP, + INTERSERVER_HTTPS, + QUERIES_ALL, + QUERIES_DEFAULT, + QUERIES_CUSTOM, + END + }; + + ServerType() = default; + explicit ServerType(Type type_, const std::string & custom_name_ = "") : type(type_), custom_name(custom_name_) {} + + static const char * serverTypeToString(Type type); + + bool shouldStart(Type server_type, const std::string & custom_name_ = "") const; + bool shouldStop(const std::string & port_name) const; + + Type type; + std::string custom_name; +}; + +} diff --git a/tests/integration/test_system_start_stop_listen/__init__.py b/tests/integration/test_system_start_stop_listen/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_system_start_stop_listen/configs/cluster.xml b/tests/integration/test_system_start_stop_listen/configs/cluster.xml new file mode 100644 index 00000000000..93d8f890f40 --- /dev/null +++ b/tests/integration/test_system_start_stop_listen/configs/cluster.xml @@ -0,0 +1,16 @@ + + + + + + node1 + 9000 + + + node2 + 9000 + + + + + diff --git a/tests/integration/test_system_start_stop_listen/test.py b/tests/integration/test_system_start_stop_listen/test.py new file mode 100644 index 00000000000..ec1a000c599 --- /dev/null +++ b/tests/integration/test_system_start_stop_listen/test.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python3 + + +import pytest +import time +from helpers.cluster import ClickHouseCluster +from helpers.network import PartitionManager +from helpers.test_tools import assert_eq_with_retry +import random +import string +import json + +cluster = ClickHouseCluster(__file__) +node1 = cluster.add_instance( + "node1", main_configs=["configs/cluster.xml"], with_zookeeper=True +) +node2 = cluster.add_instance( + "node2", main_configs=["configs/cluster.xml"], with_zookeeper=True +) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + yield cluster + + finally: + cluster.shutdown() + + +def test_system_start_stop_listen_queries(started_cluster): + node1.query("SYSTEM STOP LISTEN QUERIES ALL") + + assert "Connection refused" in node1.query_and_get_error("SELECT 1", timeout=3) + + node2.query("SYSTEM START LISTEN ON CLUSTER default QUERIES ALL") + + node1.query("SELECT 1") diff --git a/tests/queries/0_stateless/01271_show_privileges.reference b/tests/queries/0_stateless/01271_show_privileges.reference index e6f7fa1ed2b..db0f2d8235b 100644 --- a/tests/queries/0_stateless/01271_show_privileges.reference +++ b/tests/queries/0_stateless/01271_show_privileges.reference @@ -138,6 +138,7 @@ SYSTEM FLUSH [] \N SYSTEM SYSTEM THREAD FUZZER ['SYSTEM START THREAD FUZZER','SYSTEM STOP THREAD FUZZER','START THREAD FUZZER','STOP THREAD FUZZER'] GLOBAL SYSTEM SYSTEM UNFREEZE ['SYSTEM UNFREEZE'] GLOBAL SYSTEM SYSTEM FAILPOINT ['SYSTEM ENABLE FAILPOINT','SYSTEM DISABLE FAILPOINT'] GLOBAL SYSTEM +SYSTEM LISTEN ['SYSTEM START LISTEN','SYSTEM STOP LISTEN'] GLOBAL SYSTEM SYSTEM [] \N ALL dictGet ['dictHas','dictGetHierarchy','dictIsIn'] DICTIONARY ALL displaySecretsInShowAndSelect [] GLOBAL ALL diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index d58c76260c5..46d1f0e3a0b 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -297,7 +297,7 @@ CREATE TABLE system.grants ( `user_name` Nullable(String), `role_name` Nullable(String), - `access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION' = 96, 'NAMED COLLECTION ADMIN' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM DISTRIBUTED SENDS' = 121, 'SYSTEM REPLICATED SENDS' = 122, 'SYSTEM SENDS' = 123, 'SYSTEM REPLICATION QUEUES' = 124, 'SYSTEM DROP REPLICA' = 125, 'SYSTEM SYNC REPLICA' = 126, 'SYSTEM RESTART REPLICA' = 127, 'SYSTEM RESTORE REPLICA' = 128, 'SYSTEM WAIT LOADING PARTS' = 129, 'SYSTEM SYNC DATABASE REPLICA' = 130, 'SYSTEM SYNC TRANSACTION LOG' = 131, 'SYSTEM SYNC FILE CACHE' = 132, 'SYSTEM FLUSH DISTRIBUTED' = 133, 'SYSTEM FLUSH LOGS' = 134, 'SYSTEM FLUSH ASYNC INSERT QUEUE' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM' = 140, 'dictGet' = 141, 'displaySecretsInShowAndSelect' = 142, 'addressToLine' = 143, 'addressToLineWithInlines' = 144, 'addressToSymbol' = 145, 'demangle' = 146, 'INTROSPECTION' = 147, 'FILE' = 148, 'URL' = 149, 'REMOTE' = 150, 'MONGO' = 151, 'REDIS' = 152, 'MEILISEARCH' = 153, 'MYSQL' = 154, 'POSTGRES' = 155, 'SQLITE' = 156, 'ODBC' = 157, 'JDBC' = 158, 'HDFS' = 159, 'S3' = 160, 'HIVE' = 161, 'AZURE' = 162, 'SOURCES' = 163, 'CLUSTER' = 164, 'ALL' = 165, 'NONE' = 166), + `access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION' = 96, 'NAMED COLLECTION ADMIN' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM DISTRIBUTED SENDS' = 121, 'SYSTEM REPLICATED SENDS' = 122, 'SYSTEM SENDS' = 123, 'SYSTEM REPLICATION QUEUES' = 124, 'SYSTEM DROP REPLICA' = 125, 'SYSTEM SYNC REPLICA' = 126, 'SYSTEM RESTART REPLICA' = 127, 'SYSTEM RESTORE REPLICA' = 128, 'SYSTEM WAIT LOADING PARTS' = 129, 'SYSTEM SYNC DATABASE REPLICA' = 130, 'SYSTEM SYNC TRANSACTION LOG' = 131, 'SYSTEM SYNC FILE CACHE' = 132, 'SYSTEM FLUSH DISTRIBUTED' = 133, 'SYSTEM FLUSH LOGS' = 134, 'SYSTEM FLUSH ASYNC INSERT QUEUE' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM LISTEN' = 140, 'SYSTEM' = 141, 'dictGet' = 142, 'displaySecretsInShowAndSelect' = 143, 'addressToLine' = 144, 'addressToLineWithInlines' = 145, 'addressToSymbol' = 146, 'demangle' = 147, 'INTROSPECTION' = 148, 'FILE' = 149, 'URL' = 150, 'REMOTE' = 151, 'MONGO' = 152, 'REDIS' = 153, 'MEILISEARCH' = 154, 'MYSQL' = 155, 'POSTGRES' = 156, 'SQLITE' = 157, 'ODBC' = 158, 'JDBC' = 159, 'HDFS' = 160, 'S3' = 161, 'HIVE' = 162, 'AZURE' = 163, 'SOURCES' = 164, 'CLUSTER' = 165, 'ALL' = 166, 'NONE' = 167), `database` Nullable(String), `table` Nullable(String), `column` Nullable(String), @@ -584,10 +584,10 @@ ENGINE = SystemPartsColumns COMMENT 'SYSTEM TABLE is built on the fly.' CREATE TABLE system.privileges ( - `privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION' = 96, 'NAMED COLLECTION ADMIN' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM DISTRIBUTED SENDS' = 121, 'SYSTEM REPLICATED SENDS' = 122, 'SYSTEM SENDS' = 123, 'SYSTEM REPLICATION QUEUES' = 124, 'SYSTEM DROP REPLICA' = 125, 'SYSTEM SYNC REPLICA' = 126, 'SYSTEM RESTART REPLICA' = 127, 'SYSTEM RESTORE REPLICA' = 128, 'SYSTEM WAIT LOADING PARTS' = 129, 'SYSTEM SYNC DATABASE REPLICA' = 130, 'SYSTEM SYNC TRANSACTION LOG' = 131, 'SYSTEM SYNC FILE CACHE' = 132, 'SYSTEM FLUSH DISTRIBUTED' = 133, 'SYSTEM FLUSH LOGS' = 134, 'SYSTEM FLUSH ASYNC INSERT QUEUE' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM' = 140, 'dictGet' = 141, 'displaySecretsInShowAndSelect' = 142, 'addressToLine' = 143, 'addressToLineWithInlines' = 144, 'addressToSymbol' = 145, 'demangle' = 146, 'INTROSPECTION' = 147, 'FILE' = 148, 'URL' = 149, 'REMOTE' = 150, 'MONGO' = 151, 'REDIS' = 152, 'MEILISEARCH' = 153, 'MYSQL' = 154, 'POSTGRES' = 155, 'SQLITE' = 156, 'ODBC' = 157, 'JDBC' = 158, 'HDFS' = 159, 'S3' = 160, 'HIVE' = 161, 'AZURE' = 162, 'SOURCES' = 163, 'CLUSTER' = 164, 'ALL' = 165, 'NONE' = 166), + `privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION' = 96, 'NAMED COLLECTION ADMIN' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM DISTRIBUTED SENDS' = 121, 'SYSTEM REPLICATED SENDS' = 122, 'SYSTEM SENDS' = 123, 'SYSTEM REPLICATION QUEUES' = 124, 'SYSTEM DROP REPLICA' = 125, 'SYSTEM SYNC REPLICA' = 126, 'SYSTEM RESTART REPLICA' = 127, 'SYSTEM RESTORE REPLICA' = 128, 'SYSTEM WAIT LOADING PARTS' = 129, 'SYSTEM SYNC DATABASE REPLICA' = 130, 'SYSTEM SYNC TRANSACTION LOG' = 131, 'SYSTEM SYNC FILE CACHE' = 132, 'SYSTEM FLUSH DISTRIBUTED' = 133, 'SYSTEM FLUSH LOGS' = 134, 'SYSTEM FLUSH ASYNC INSERT QUEUE' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM LISTEN' = 140, 'SYSTEM' = 141, 'dictGet' = 142, 'displaySecretsInShowAndSelect' = 143, 'addressToLine' = 144, 'addressToLineWithInlines' = 145, 'addressToSymbol' = 146, 'demangle' = 147, 'INTROSPECTION' = 148, 'FILE' = 149, 'URL' = 150, 'REMOTE' = 151, 'MONGO' = 152, 'REDIS' = 153, 'MEILISEARCH' = 154, 'MYSQL' = 155, 'POSTGRES' = 156, 'SQLITE' = 157, 'ODBC' = 158, 'JDBC' = 159, 'HDFS' = 160, 'S3' = 161, 'HIVE' = 162, 'AZURE' = 163, 'SOURCES' = 164, 'CLUSTER' = 165, 'ALL' = 166, 'NONE' = 167), `aliases` Array(String), `level` Nullable(Enum8('GLOBAL' = 0, 'DATABASE' = 1, 'TABLE' = 2, 'DICTIONARY' = 3, 'VIEW' = 4, 'COLUMN' = 5, 'NAMED_COLLECTION' = 6)), - `parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION' = 96, 'NAMED COLLECTION ADMIN' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM DISTRIBUTED SENDS' = 121, 'SYSTEM REPLICATED SENDS' = 122, 'SYSTEM SENDS' = 123, 'SYSTEM REPLICATION QUEUES' = 124, 'SYSTEM DROP REPLICA' = 125, 'SYSTEM SYNC REPLICA' = 126, 'SYSTEM RESTART REPLICA' = 127, 'SYSTEM RESTORE REPLICA' = 128, 'SYSTEM WAIT LOADING PARTS' = 129, 'SYSTEM SYNC DATABASE REPLICA' = 130, 'SYSTEM SYNC TRANSACTION LOG' = 131, 'SYSTEM SYNC FILE CACHE' = 132, 'SYSTEM FLUSH DISTRIBUTED' = 133, 'SYSTEM FLUSH LOGS' = 134, 'SYSTEM FLUSH ASYNC INSERT QUEUE' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM' = 140, 'dictGet' = 141, 'displaySecretsInShowAndSelect' = 142, 'addressToLine' = 143, 'addressToLineWithInlines' = 144, 'addressToSymbol' = 145, 'demangle' = 146, 'INTROSPECTION' = 147, 'FILE' = 148, 'URL' = 149, 'REMOTE' = 150, 'MONGO' = 151, 'REDIS' = 152, 'MEILISEARCH' = 153, 'MYSQL' = 154, 'POSTGRES' = 155, 'SQLITE' = 156, 'ODBC' = 157, 'JDBC' = 158, 'HDFS' = 159, 'S3' = 160, 'HIVE' = 161, 'AZURE' = 162, 'SOURCES' = 163, 'CLUSTER' = 164, 'ALL' = 165, 'NONE' = 166)) + `parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE ARBITRARY TEMPORARY TABLE' = 53, 'CREATE FUNCTION' = 54, 'CREATE NAMED COLLECTION' = 55, 'CREATE' = 56, 'DROP DATABASE' = 57, 'DROP TABLE' = 58, 'DROP VIEW' = 59, 'DROP DICTIONARY' = 60, 'DROP FUNCTION' = 61, 'DROP NAMED COLLECTION' = 62, 'DROP' = 63, 'UNDROP TABLE' = 64, 'TRUNCATE' = 65, 'OPTIMIZE' = 66, 'BACKUP' = 67, 'KILL QUERY' = 68, 'KILL TRANSACTION' = 69, 'MOVE PARTITION BETWEEN SHARDS' = 70, 'CREATE USER' = 71, 'ALTER USER' = 72, 'DROP USER' = 73, 'CREATE ROLE' = 74, 'ALTER ROLE' = 75, 'DROP ROLE' = 76, 'ROLE ADMIN' = 77, 'CREATE ROW POLICY' = 78, 'ALTER ROW POLICY' = 79, 'DROP ROW POLICY' = 80, 'CREATE QUOTA' = 81, 'ALTER QUOTA' = 82, 'DROP QUOTA' = 83, 'CREATE SETTINGS PROFILE' = 84, 'ALTER SETTINGS PROFILE' = 85, 'DROP SETTINGS PROFILE' = 86, 'SHOW USERS' = 87, 'SHOW ROLES' = 88, 'SHOW ROW POLICIES' = 89, 'SHOW QUOTAS' = 90, 'SHOW SETTINGS PROFILES' = 91, 'SHOW ACCESS' = 92, 'ACCESS MANAGEMENT' = 93, 'SHOW NAMED COLLECTIONS' = 94, 'SHOW NAMED COLLECTIONS SECRETS' = 95, 'NAMED COLLECTION' = 96, 'NAMED COLLECTION ADMIN' = 97, 'SYSTEM SHUTDOWN' = 98, 'SYSTEM DROP DNS CACHE' = 99, 'SYSTEM DROP MARK CACHE' = 100, 'SYSTEM DROP UNCOMPRESSED CACHE' = 101, 'SYSTEM DROP MMAP CACHE' = 102, 'SYSTEM DROP QUERY CACHE' = 103, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 104, 'SYSTEM DROP FILESYSTEM CACHE' = 105, 'SYSTEM DROP SCHEMA CACHE' = 106, 'SYSTEM DROP S3 CLIENT CACHE' = 107, 'SYSTEM DROP CACHE' = 108, 'SYSTEM RELOAD CONFIG' = 109, 'SYSTEM RELOAD USERS' = 110, 'SYSTEM RELOAD DICTIONARY' = 111, 'SYSTEM RELOAD MODEL' = 112, 'SYSTEM RELOAD FUNCTION' = 113, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 114, 'SYSTEM RELOAD' = 115, 'SYSTEM RESTART DISK' = 116, 'SYSTEM MERGES' = 117, 'SYSTEM TTL MERGES' = 118, 'SYSTEM FETCHES' = 119, 'SYSTEM MOVES' = 120, 'SYSTEM DISTRIBUTED SENDS' = 121, 'SYSTEM REPLICATED SENDS' = 122, 'SYSTEM SENDS' = 123, 'SYSTEM REPLICATION QUEUES' = 124, 'SYSTEM DROP REPLICA' = 125, 'SYSTEM SYNC REPLICA' = 126, 'SYSTEM RESTART REPLICA' = 127, 'SYSTEM RESTORE REPLICA' = 128, 'SYSTEM WAIT LOADING PARTS' = 129, 'SYSTEM SYNC DATABASE REPLICA' = 130, 'SYSTEM SYNC TRANSACTION LOG' = 131, 'SYSTEM SYNC FILE CACHE' = 132, 'SYSTEM FLUSH DISTRIBUTED' = 133, 'SYSTEM FLUSH LOGS' = 134, 'SYSTEM FLUSH ASYNC INSERT QUEUE' = 135, 'SYSTEM FLUSH' = 136, 'SYSTEM THREAD FUZZER' = 137, 'SYSTEM UNFREEZE' = 138, 'SYSTEM FAILPOINT' = 139, 'SYSTEM LISTEN' = 140, 'SYSTEM' = 141, 'dictGet' = 142, 'displaySecretsInShowAndSelect' = 143, 'addressToLine' = 144, 'addressToLineWithInlines' = 145, 'addressToSymbol' = 146, 'demangle' = 147, 'INTROSPECTION' = 148, 'FILE' = 149, 'URL' = 150, 'REMOTE' = 151, 'MONGO' = 152, 'REDIS' = 153, 'MEILISEARCH' = 154, 'MYSQL' = 155, 'POSTGRES' = 156, 'SQLITE' = 157, 'ODBC' = 158, 'JDBC' = 159, 'HDFS' = 160, 'S3' = 161, 'HIVE' = 162, 'AZURE' = 163, 'SOURCES' = 164, 'CLUSTER' = 165, 'ALL' = 166, 'NONE' = 167)) ) ENGINE = SystemPrivileges COMMENT 'SYSTEM TABLE is built on the fly.' From 5aeeaebd00c90385b1d9d8e3c6b13d298240c752 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Wed, 26 Jul 2023 17:09:23 -0300 Subject: [PATCH 423/478] init and destroy ares channel on demand.. --- src/Common/CaresPTRResolver.cpp | 71 ++++++++++--------- src/Common/CaresPTRResolver.h | 29 ++++---- .../tests/gtest_dns_reverse_resolve.cpp | 35 ++++----- 3 files changed, 71 insertions(+), 64 deletions(-) diff --git a/src/Common/CaresPTRResolver.cpp b/src/Common/CaresPTRResolver.cpp index fee4d01eb37..bf61e940745 100644 --- a/src/Common/CaresPTRResolver.cpp +++ b/src/Common/CaresPTRResolver.cpp @@ -41,9 +41,25 @@ namespace DB } } - std::mutex CaresPTRResolver::mutex; + struct AresChannelRAII + { + AresChannelRAII() + { + if (ares_init(&channel) != ARES_SUCCESS) + { + throw DB::Exception(DB::ErrorCodes::DNS_ERROR, "Failed to initialize c-ares channel"); + } + } - CaresPTRResolver::CaresPTRResolver(CaresPTRResolver::provider_token) : channel(nullptr) + ~AresChannelRAII() + { + ares_destroy(channel); + } + + ares_channel channel; + }; + + CaresPTRResolver::CaresPTRResolver(CaresPTRResolver::provider_token) { /* * ares_library_init is not thread safe. Currently, the only other usage of c-ares seems to be in grpc. @@ -57,34 +73,22 @@ namespace DB * */ static const auto library_init_result = ares_library_init(ARES_LIB_INIT_ALL); - if (library_init_result != ARES_SUCCESS || ares_init(&channel) != ARES_SUCCESS) + if (library_init_result != ARES_SUCCESS) { throw DB::Exception(DB::ErrorCodes::DNS_ERROR, "Failed to initialize c-ares"); } } - CaresPTRResolver::~CaresPTRResolver() - { - ares_destroy(channel); - /* - * Library initialization is currently done only once in the constructor. Multiple instances of CaresPTRResolver - * will be used in the lifetime of ClickHouse, thus it's problematic to have de-init here. - * In a practical view, it makes little to no sense to de-init a DNS library since DNS requests will happen - * until the end of the program. Hence, ares_library_cleanup() will not be called. - * */ - } - std::unordered_set CaresPTRResolver::resolve(const std::string & ip) { - std::lock_guard guard(mutex); + AresChannelRAII channel_raii; std::unordered_set ptr_records; - resolve(ip, ptr_records); + resolve(ip, ptr_records, channel_raii.channel); - if (!wait_and_process()) + if (!wait_and_process(channel_raii.channel)) { - cancel_requests(); throw DB::Exception(DB::ErrorCodes::DNS_ERROR, "Failed to complete reverse DNS query for IP {}", ip); } @@ -93,22 +97,21 @@ namespace DB std::unordered_set CaresPTRResolver::resolve_v6(const std::string & ip) { - std::lock_guard guard(mutex); + AresChannelRAII channel_raii; std::unordered_set ptr_records; - resolve_v6(ip, ptr_records); + resolve_v6(ip, ptr_records, channel_raii.channel); - if (!wait_and_process()) + if (!wait_and_process(channel_raii.channel)) { - cancel_requests(); throw DB::Exception(DB::ErrorCodes::DNS_ERROR, "Failed to complete reverse DNS query for IP {}", ip); } return ptr_records; } - void CaresPTRResolver::resolve(const std::string & ip, std::unordered_set & response) + void CaresPTRResolver::resolve(const std::string & ip, std::unordered_set & response, ares_channel channel) { in_addr addr; @@ -117,7 +120,7 @@ namespace DB ares_gethostbyaddr(channel, reinterpret_cast(&addr), sizeof(addr), AF_INET, callback, &response); } - void CaresPTRResolver::resolve_v6(const std::string & ip, std::unordered_set & response) + void CaresPTRResolver::resolve_v6(const std::string & ip, std::unordered_set & response, ares_channel channel) { in6_addr addr; inet_pton(AF_INET6, ip.c_str(), &addr); @@ -125,15 +128,15 @@ namespace DB ares_gethostbyaddr(channel, reinterpret_cast(&addr), sizeof(addr), AF_INET6, callback, &response); } - bool CaresPTRResolver::wait_and_process() + bool CaresPTRResolver::wait_and_process(ares_channel channel) { int sockets[ARES_GETSOCK_MAXNUM]; pollfd pollfd[ARES_GETSOCK_MAXNUM]; while (true) { - auto readable_sockets = get_readable_sockets(sockets, pollfd); - auto timeout = calculate_timeout(); + auto readable_sockets = get_readable_sockets(sockets, pollfd, channel); + auto timeout = calculate_timeout(channel); int number_of_fds_ready = 0; if (!readable_sockets.empty()) @@ -158,11 +161,11 @@ namespace DB if (number_of_fds_ready > 0) { - process_readable_sockets(readable_sockets); + process_readable_sockets(readable_sockets, channel); } else { - process_possible_timeout(); + process_possible_timeout(channel); break; } } @@ -170,12 +173,12 @@ namespace DB return true; } - void CaresPTRResolver::cancel_requests() + void CaresPTRResolver::cancel_requests(ares_channel channel) { ares_cancel(channel); } - std::span CaresPTRResolver::get_readable_sockets(int * sockets, pollfd * pollfd) + std::span CaresPTRResolver::get_readable_sockets(int * sockets, pollfd * pollfd, ares_channel channel) { int sockets_bitmask = ares_getsock(channel, sockets, ARES_GETSOCK_MAXNUM); @@ -205,7 +208,7 @@ namespace DB return std::span(pollfd, number_of_sockets_to_poll); } - int64_t CaresPTRResolver::calculate_timeout() + int64_t CaresPTRResolver::calculate_timeout(ares_channel channel) { timeval tv; if (auto * tvp = ares_timeout(channel, nullptr, &tv)) @@ -218,14 +221,14 @@ namespace DB return 0; } - void CaresPTRResolver::process_possible_timeout() + void CaresPTRResolver::process_possible_timeout(ares_channel channel) { /* Call ares_process() unconditonally here, even if we simply timed out above, as otherwise the ares name resolve won't timeout! */ ares_process_fd(channel, ARES_SOCKET_BAD, ARES_SOCKET_BAD); } - void CaresPTRResolver::process_readable_sockets(std::span readable_sockets) + void CaresPTRResolver::process_readable_sockets(std::span readable_sockets, ares_channel channel) { for (auto readable_socket : readable_sockets) { diff --git a/src/Common/CaresPTRResolver.h b/src/Common/CaresPTRResolver.h index 454509ae43c..24a5e422ca8 100644 --- a/src/Common/CaresPTRResolver.h +++ b/src/Common/CaresPTRResolver.h @@ -28,32 +28,35 @@ namespace DB public: explicit CaresPTRResolver(provider_token); - ~CaresPTRResolver() override; + + /* + * Library initialization is currently done only once in the constructor. Multiple instances of CaresPTRResolver + * will be used in the lifetime of ClickHouse, thus it's problematic to have de-init here. + * In a practical view, it makes little to no sense to de-init a DNS library since DNS requests will happen + * until the end of the program. Hence, ares_library_cleanup() will not be called. + * */ + ~CaresPTRResolver() override = default; std::unordered_set resolve(const std::string & ip) override; std::unordered_set resolve_v6(const std::string & ip) override; private: - bool wait_and_process(); + bool wait_and_process(ares_channel channel); - void cancel_requests(); + void cancel_requests(ares_channel channel); - void resolve(const std::string & ip, std::unordered_set & response); + void resolve(const std::string & ip, std::unordered_set & response, ares_channel channel); - void resolve_v6(const std::string & ip, std::unordered_set & response); + void resolve_v6(const std::string & ip, std::unordered_set & response, ares_channel channel); - std::span get_readable_sockets(int * sockets, pollfd * pollfd); + std::span get_readable_sockets(int * sockets, pollfd * pollfd, ares_channel channel); - int64_t calculate_timeout(); + int64_t calculate_timeout(ares_channel channel); - void process_possible_timeout(); + void process_possible_timeout(ares_channel channel); - void process_readable_sockets(std::span readable_sockets); - - ares_channel channel; - - static std::mutex mutex; + void process_readable_sockets(std::span readable_sockets, ares_channel channel); }; } diff --git a/src/Common/tests/gtest_dns_reverse_resolve.cpp b/src/Common/tests/gtest_dns_reverse_resolve.cpp index 08351564eaf..de33deddac3 100644 --- a/src/Common/tests/gtest_dns_reverse_resolve.cpp +++ b/src/Common/tests/gtest_dns_reverse_resolve.cpp @@ -9,34 +9,35 @@ namespace DB { TEST(Common, ReverseDNS) { - auto addresses = std::vector({ - "8.8.8.8", "2001:4860:4860::8888", // dns.google - "142.250.219.35", // google.com - "157.240.12.35", // facebook - "208.84.244.116", "2600:1419:c400::214:c410", //www.terra.com.br, - "127.0.0.1", "::1" - }); - auto func = [&]() { // Good random seed, good engine auto rnd1 = std::mt19937(std::random_device{}()); - for (int i = 0; i < 50; ++i) + for (int i = 0; i < 10; ++i) { auto & dns_resolver_instance = DNSResolver::instance(); -// unfortunately, DNS cache can't be disabled because we might end up causing a DDoS attack -// dns_resolver_instance.setDisableCacheFlag(); + dns_resolver_instance.setDisableCacheFlag(); - auto addr_index = rnd1() % addresses.size(); + auto val1 = rnd1() % static_cast((pow(2, 31) - 1)); + auto val2 = rnd1() % static_cast((pow(2, 31) - 1)); + auto val3 = rnd1() % static_cast((pow(2, 31) - 1)); + auto val4 = rnd1() % static_cast((pow(2, 31) - 1)); - [[maybe_unused]] auto result = dns_resolver_instance.reverseResolve(Poco::Net::IPAddress{ addresses[addr_index] }); + uint32_t ipv4_buffer[1] = { + static_cast(val1) + }; -// will not assert either because some of the IP addresses might change in the future and -// this test will become flaky -// ASSERT_TRUE(!result.empty()); + uint32_t ipv6_buffer[4] = { + static_cast(val1), + static_cast(val2), + static_cast(val3), + static_cast(val4) + }; + + dns_resolver_instance.reverseResolve(Poco::Net::IPAddress{ ipv4_buffer, sizeof(ipv4_buffer)}); + dns_resolver_instance.reverseResolve(Poco::Net::IPAddress{ ipv6_buffer, sizeof(ipv6_buffer)}); } - }; auto number_of_threads = 200u; From 954a1d3edec8117a135c23b7ec60065249fa0f02 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 26 Jul 2023 23:38:14 +0300 Subject: [PATCH 424/478] Improve logging macros (#52519) * improve logging macros * fix * Update logger_useful.h * fix * fix --- base/poco/Foundation/include/Poco/Message.h | 2 + base/poco/Foundation/src/Message.cpp | 13 +++ src/Common/LoggingFormatStringHelpers.h | 63 ++++++++---- src/Common/logger_useful.h | 97 ++++++++++++++----- src/Common/tests/gtest_log.cpp | 53 ++++++++++ .../01164_detach_attach_partition_race.sh | 2 +- 6 files changed, 189 insertions(+), 41 deletions(-) diff --git a/base/poco/Foundation/include/Poco/Message.h b/base/poco/Foundation/include/Poco/Message.h index e8f04888ab4..282c7fb5fd1 100644 --- a/base/poco/Foundation/include/Poco/Message.h +++ b/base/poco/Foundation/include/Poco/Message.h @@ -67,6 +67,8 @@ public: Message( const std::string & source, const std::string & text, Priority prio, const char * file, int line, std::string_view fmt_str = {}); + Message( + std::string && source, std::string && text, Priority prio, const char * file, int line, std::string_view fmt_str); /// Creates a Message with the given source, text, priority, /// source file path and line. /// diff --git a/base/poco/Foundation/src/Message.cpp b/base/poco/Foundation/src/Message.cpp index 663c96e47a2..54118cc0fc5 100644 --- a/base/poco/Foundation/src/Message.cpp +++ b/base/poco/Foundation/src/Message.cpp @@ -60,6 +60,19 @@ Message::Message(const std::string& source, const std::string& text, Priority pr } +Message::Message(std::string && source, std::string && text, Priority prio, const char * file, int line, std::string_view fmt_str): + _source(std::move(source)), + _text(std::move(text)), + _prio(prio), + _tid(0), + _file(file), + _line(line), + _pMap(0), + _fmt_str(fmt_str) +{ + init(); +} + Message::Message(const Message& msg): _source(msg._source), _text(msg._text), diff --git a/src/Common/LoggingFormatStringHelpers.h b/src/Common/LoggingFormatStringHelpers.h index 3afa3fb089d..6dffd202807 100644 --- a/src/Common/LoggingFormatStringHelpers.h +++ b/src/Common/LoggingFormatStringHelpers.h @@ -43,6 +43,17 @@ struct PreformattedMessage operator const std::string & () const { return text; } operator std::string () && { return std::move(text); } operator fmt::format_string<> () const { UNREACHABLE(); } + + void apply(std::string & out_text, std::string_view & out_format_string) const & + { + out_text = text; + out_format_string = format_string; + } + void apply(std::string & out_text, std::string_view & out_format_string) && + { + out_text = std::move(text); + out_format_string = format_string; + } }; template @@ -99,10 +110,33 @@ template constexpr std::string_view tryGetStaticFormatString(T && x } } +/// Constexpr ifs are not like ifdefs, and compiler still checks that unneeded code can be compiled +/// This template is useful to avoid compilation failures when condition of some "constexpr if" is false +template struct ConstexprIfsAreNotIfdefs +{ + template constexpr static std::string_view getStaticFormatString(T &&) { return {}; } + template static PreformattedMessage getPreformatted(T &&) { return {}; } +}; + +template<> struct ConstexprIfsAreNotIfdefs +{ + template consteval static std::string_view getStaticFormatString(T && x) + { + /// See tryGetStaticFormatString(...) + static_assert(!std::is_same_v>); + static_assert(std::is_nothrow_convertible::value); + static_assert(!std::is_pointer::value); + return std::string_view(x); + } + + template static T && getPreformatted(T && x) { return std::forward(x); } +}; + template constexpr size_t numArgs(Ts &&...) { return sizeof...(Ts); } template constexpr auto firstArg(T && x, Ts &&...) { return std::forward(x); } /// For implicit conversion of fmt::basic_runtime<> to char* for std::string ctor template constexpr auto firstArg(fmt::basic_runtime && data, Ts &&...) { return data.str.data(); } +template constexpr auto firstArg(const fmt::basic_runtime & data, Ts &&...) { return data.str.data(); } consteval ssize_t formatStringCountArgsNum(const char * const str, size_t len) { @@ -142,26 +176,19 @@ consteval void formatStringCheckArgsNumImpl(std::string_view str, size_t nargs) functionThatFailsCompilationOfConstevalFunctions("unexpected number of arguments in a format string"); } -template -struct CheckArgsNumHelperImpl +template +consteval void formatStringCheckArgsNum(T && str, size_t nargs) { - template - consteval CheckArgsNumHelperImpl(T && str) - { - formatStringCheckArgsNumImpl(tryGetStaticFormatString(str), sizeof...(Args)); - } - - /// No checks for fmt::runtime and PreformattedMessage - template CheckArgsNumHelperImpl(fmt::basic_runtime &&) {} - template<> CheckArgsNumHelperImpl(PreformattedMessage &) {} - template<> CheckArgsNumHelperImpl(const PreformattedMessage &) {} - template<> CheckArgsNumHelperImpl(PreformattedMessage &&) {} - -}; - -template using CheckArgsNumHelper = CheckArgsNumHelperImpl...>; -template void formatStringCheckArgsNum(CheckArgsNumHelper, Args &&...) {} + formatStringCheckArgsNumImpl(tryGetStaticFormatString(str), nargs); +} +template inline void formatStringCheckArgsNum(fmt::basic_runtime &&, size_t) {} +template<> inline void formatStringCheckArgsNum(PreformattedMessage &, size_t) {} +template<> inline void formatStringCheckArgsNum(const PreformattedMessage &, size_t) {} +template<> inline void formatStringCheckArgsNum(PreformattedMessage &&, size_t) {} +template struct FormatStringTypeInfo{ static constexpr bool is_static = true; static constexpr bool has_format = true; }; +template struct FormatStringTypeInfo> { static constexpr bool is_static = false; static constexpr bool has_format = false; }; +template<> struct FormatStringTypeInfo { static constexpr bool is_static = false; static constexpr bool has_format = true; }; /// This wrapper helps to avoid too frequent and noisy log messages. /// For each pair (logger_name, format_string) it remembers when such a message was logged the last time. diff --git a/src/Common/logger_useful.h b/src/Common/logger_useful.h index 3ebb1d25075..d9fe5ac9190 100644 --- a/src/Common/logger_useful.h +++ b/src/Common/logger_useful.h @@ -1,7 +1,7 @@ #pragma once /// Macros for convenient usage of Poco logger. - +#include #include #include #include @@ -28,33 +28,86 @@ namespace #define LOG_IMPL_FIRST_ARG(X, ...) X +/// Copy-paste from contrib/libpq/include/c.h +/// There's no easy way to count the number of arguments without evaluating these arguments... +#define CH_VA_ARGS_NARGS(...) \ + CH_VA_ARGS_NARGS_(__VA_ARGS__, \ + 63,62,61,60, \ + 59,58,57,56,55,54,53,52,51,50, \ + 49,48,47,46,45,44,43,42,41,40, \ + 39,38,37,36,35,34,33,32,31,30, \ + 29,28,27,26,25,24,23,22,21,20, \ + 19,18,17,16,15,14,13,12,11,10, \ + 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) +#define CH_VA_ARGS_NARGS_( \ + _01,_02,_03,_04,_05,_06,_07,_08,_09,_10, \ + _11,_12,_13,_14,_15,_16,_17,_18,_19,_20, \ + _21,_22,_23,_24,_25,_26,_27,_28,_29,_30, \ + _31,_32,_33,_34,_35,_36,_37,_38,_39,_40, \ + _41,_42,_43,_44,_45,_46,_47,_48,_49,_50, \ + _51,_52,_53,_54,_55,_56,_57,_58,_59,_60, \ + _61,_62,_63, N, ...) \ + (N) + +#define LINE_NUM_AS_STRING_IMPL2(x) #x +#define LINE_NUM_AS_STRING_IMPL(x) LINE_NUM_AS_STRING_IMPL2(x) +#define LINE_NUM_AS_STRING LINE_NUM_AS_STRING_IMPL(__LINE__) +#define MESSAGE_FOR_EXCEPTION_ON_LOGGING "Failed to write a log message: " __FILE__ ":" LINE_NUM_AS_STRING "\n" + /// Logs a message to a specified logger with that level. /// If more than one argument is provided, /// the first argument is interpreted as a template with {}-substitutions /// and the latter arguments are treated as values to substitute. /// If only one argument is provided, it is treated as a message without substitutions. -#define LOG_IMPL(logger, priority, PRIORITY, ...) do \ -{ \ - auto _logger = ::getLogger(logger); \ - const bool _is_clients_log = (DB::CurrentThread::getGroup() != nullptr) && \ - (DB::CurrentThread::get().getClientLogsLevel() >= (priority)); \ - if (_is_clients_log || _logger->is((PRIORITY))) \ - { \ - std::string formatted_message = numArgs(__VA_ARGS__) > 1 ? fmt::format(__VA_ARGS__) : firstArg(__VA_ARGS__); \ - formatStringCheckArgsNum(__VA_ARGS__); \ - if (auto _channel = _logger->getChannel()) \ - { \ - std::string file_function; \ - file_function += __FILE__; \ - file_function += "; "; \ - file_function += __PRETTY_FUNCTION__; \ - Poco::Message poco_message(_logger->name(), formatted_message, \ - (PRIORITY), file_function.c_str(), __LINE__, tryGetStaticFormatString(LOG_IMPL_FIRST_ARG(__VA_ARGS__))); \ - _channel->log(poco_message); \ - } \ - ProfileEvents::incrementForLogMessage(PRIORITY); \ - } \ +#define LOG_IMPL(logger, priority, PRIORITY, ...) do \ +{ \ + auto _logger = ::getLogger(logger); \ + const bool _is_clients_log = (DB::CurrentThread::getGroup() != nullptr) && \ + (DB::CurrentThread::get().getClientLogsLevel() >= (priority)); \ + if (!_is_clients_log && !_logger->is((PRIORITY))) \ + break; \ + \ + try \ + { \ + ProfileEvents::incrementForLogMessage(PRIORITY); \ + auto _channel = _logger->getChannel(); \ + if (!_channel) \ + break; \ + \ + constexpr size_t _nargs = CH_VA_ARGS_NARGS(__VA_ARGS__); \ + using LogTypeInfo = FormatStringTypeInfo>; \ + \ + std::string_view _format_string; \ + std::string _formatted_message; \ + \ + if constexpr (LogTypeInfo::is_static) \ + { \ + formatStringCheckArgsNum(LOG_IMPL_FIRST_ARG(__VA_ARGS__), _nargs - 1); \ + _format_string = ConstexprIfsAreNotIfdefs::getStaticFormatString(LOG_IMPL_FIRST_ARG(__VA_ARGS__)); \ + } \ + \ + constexpr bool is_preformatted_message = !LogTypeInfo::is_static && LogTypeInfo::has_format; \ + if constexpr (is_preformatted_message) \ + { \ + static_assert(_nargs == 1 || !is_preformatted_message); \ + ConstexprIfsAreNotIfdefs::getPreformatted(LOG_IMPL_FIRST_ARG(__VA_ARGS__)).apply(_formatted_message, _format_string); \ + } \ + else \ + { \ + _formatted_message = _nargs == 1 ? firstArg(__VA_ARGS__) : fmt::format(__VA_ARGS__); \ + } \ + \ + std::string _file_function = __FILE__ "; "; \ + _file_function += __PRETTY_FUNCTION__; \ + Poco::Message _poco_message(_logger->name(), std::move(_formatted_message), \ + (PRIORITY), _file_function.c_str(), __LINE__, _format_string); \ + _channel->log(_poco_message); \ + } \ + catch (...) \ + { \ + ::write(STDERR_FILENO, static_cast(MESSAGE_FOR_EXCEPTION_ON_LOGGING), sizeof(MESSAGE_FOR_EXCEPTION_ON_LOGGING)); \ + } \ } while (false) diff --git a/src/Common/tests/gtest_log.cpp b/src/Common/tests/gtest_log.cpp index f92866626f9..e755c22ba75 100644 --- a/src/Common/tests/gtest_log.cpp +++ b/src/Common/tests/gtest_log.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include @@ -50,3 +51,55 @@ TEST(Logger, TestLog) } } + +static size_t global_counter = 0; + +static std::string getLogMessage() +{ + ++global_counter; + return "test1 " + std::to_string(thread_local_rng()); +} + +static size_t getLogMessageParam() +{ + ++global_counter; + return thread_local_rng(); +} + +static PreformattedMessage getPreformatted() +{ + ++global_counter; + return PreformattedMessage::create("test3 {}", thread_local_rng()); +} + +static size_t getLogMessageParamOrThrow() +{ + size_t x = thread_local_rng(); + if (x % 1000 == 0) + return x; + throw Poco::Exception("error", 42); +} + +TEST(Logger, SideEffects) +{ + std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM + auto my_channel = Poco::AutoPtr(new Poco::StreamChannel(oss)); + auto * log = &Poco::Logger::create("Logger", my_channel.get()); + log->setLevel("trace"); + + /// Ensure that parameters are evaluated only once + global_counter = 0; + LOG_TRACE(log, fmt::runtime(getLogMessage())); + EXPECT_EQ(global_counter, 1); + LOG_TRACE(log, "test2 {}", getLogMessageParam()); + EXPECT_EQ(global_counter, 2); + LOG_TRACE(log, getPreformatted()); + EXPECT_EQ(global_counter, 3); + + auto var = PreformattedMessage::create("test4 {}", thread_local_rng()); + LOG_TRACE(log, var); + EXPECT_EQ(var.text.starts_with("test4 "), true); + EXPECT_EQ(var.format_string, "test4 {}"); + + LOG_TRACE(log, "test no throw {}", getLogMessageParamOrThrow()); +} diff --git a/tests/queries/0_stateless/01164_detach_attach_partition_race.sh b/tests/queries/0_stateless/01164_detach_attach_partition_race.sh index e645cb5aae7..07b39723c37 100755 --- a/tests/queries/0_stateless/01164_detach_attach_partition_race.sh +++ b/tests/queries/0_stateless/01164_detach_attach_partition_race.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT -q "drop table if exists mt" -$CLICKHOUSE_CLIENT -q "create table mt (n int) engine=MergeTree order by n settings parts_to_throw_insert=1000" +$CLICKHOUSE_CLIENT -q "create table mt (n int) engine=MergeTree order by n settings parts_to_throw_insert=5000" $CLICKHOUSE_CLIENT -q "insert into mt values (1)" $CLICKHOUSE_CLIENT -q "insert into mt values (2)" $CLICKHOUSE_CLIENT -q "insert into mt values (3)" From 6aab4cc83508093d19ed84130be483e73c08b324 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 27 Jul 2023 05:25:40 +0200 Subject: [PATCH 425/478] Check for unexpected cyrillic --- src/Compression/CompressionCodecEncrypted.cpp | 2 +- src/Disks/DiskEncrypted.cpp | 4 ++-- src/IO/examples/read_buffer.cpp | 4 ++-- src/IO/examples/write_buffer.cpp | 2 +- src/IO/examples/write_buffer_perf.cpp | 2 +- src/Processors/Formats/Impl/ParquetMetadataInputFormat.cpp | 2 +- utils/check-style/check-style | 3 +++ 7 files changed, 11 insertions(+), 8 deletions(-) diff --git a/src/Compression/CompressionCodecEncrypted.cpp b/src/Compression/CompressionCodecEncrypted.cpp index 3f4e35a78a4..34b621291fd 100644 --- a/src/Compression/CompressionCodecEncrypted.cpp +++ b/src/Compression/CompressionCodecEncrypted.cpp @@ -493,7 +493,7 @@ void CompressionCodecEncrypted::Configuration::loadImpl( /// If there is only one key with non zero ID, curren_key_id should be defined. if (new_params->keys_storage[method].size() == 1 && !new_params->keys_storage[method].contains(0)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Config has one key with non zero id. сurrent_key_id is required"); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Config has one key with non zero id. current_key_id is required"); } /// Try to find which key will be used for encryption. If there is no current_key and only one key without id diff --git a/src/Disks/DiskEncrypted.cpp b/src/Disks/DiskEncrypted.cpp index 441e639b967..aa9c4a92adc 100644 --- a/src/Disks/DiskEncrypted.cpp +++ b/src/Disks/DiskEncrypted.cpp @@ -433,10 +433,10 @@ void DiskEncrypted::applyNewSettings( { auto new_settings = parseDiskEncryptedSettings(name, config, config_prefix, disk_map); if (new_settings->wrapped_disk != delegate) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Сhanging wrapped disk on the fly is not supported. Disk {}", name); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Changing wrapped disk on the fly is not supported. Disk {}", name); if (new_settings->disk_path != disk_path) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Сhanging disk path on the fly is not supported. Disk {}", name); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Changing disk path on the fly is not supported. Disk {}", name); current_settings.set(std::move(new_settings)); IDisk::applyNewSettings(config, context, config_prefix, disk_map); diff --git a/src/IO/examples/read_buffer.cpp b/src/IO/examples/read_buffer.cpp index 85675c0d613..221da24715b 100644 --- a/src/IO/examples/read_buffer.cpp +++ b/src/IO/examples/read_buffer.cpp @@ -40,7 +40,7 @@ int readAndPrint(DB::ReadBuffer & in) int main(int, char **) { { - std::string s = "-123456 123.456 вася пе\\tтя\t'\\'xyz\\\\'"; + std::string s = "-123456 123.456 вася pe\\ttya\t'\\'xyz\\\\'"; DB::ReadBufferFromString in(s); if (readAndPrint(in)) std::cout << "readAndPrint from ReadBufferFromString failed" << std::endl; @@ -49,7 +49,7 @@ int main(int, char **) std::shared_ptr in; { - std::string s = "-123456 123.456 вася пе\\tтя\t'\\'xyz\\\\'"; + std::string s = "-123456 123.456 вася pe\\ttya\t'\\'xyz\\\\'"; in = std::make_shared(s); } if (readAndPrint(*in)) diff --git a/src/IO/examples/write_buffer.cpp b/src/IO/examples/write_buffer.cpp index bca0be24b1a..999f9b1bb34 100644 --- a/src/IO/examples/write_buffer.cpp +++ b/src/IO/examples/write_buffer.cpp @@ -14,7 +14,7 @@ int main(int, char **) { DB::Int64 a = -123456; DB::Float64 b = 123.456; - DB::String c = "вася пе\tтя"; + DB::String c = "вася pe\ttya"; DB::String d = "'xyz\\"; std::stringstream s; // STYLE_CHECK_ALLOW_STD_STRING_STREAM diff --git a/src/IO/examples/write_buffer_perf.cpp b/src/IO/examples/write_buffer_perf.cpp index 0b3d0a61241..3f57ddb9a4f 100644 --- a/src/IO/examples/write_buffer_perf.cpp +++ b/src/IO/examples/write_buffer_perf.cpp @@ -14,7 +14,7 @@ int main(int, char **) { DB::Int64 a = -123456; DB::Float64 b = 123.456; - DB::String c = "вася пе\tтя"; + DB::String c = "вася pe\ttya"; DB::String d = "'xyz\\"; std::ofstream s("test"); diff --git a/src/Processors/Formats/Impl/ParquetMetadataInputFormat.cpp b/src/Processors/Formats/Impl/ParquetMetadataInputFormat.cpp index 229a0630328..cff83b0ad3b 100644 --- a/src/Processors/Formats/Impl/ParquetMetadataInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetMetadataInputFormat.cpp @@ -180,7 +180,7 @@ Chunk ParquetMetadataInputFormat::generate() else if (name == names[3]) { auto column = types[3]->createColumn(); - /// Version сan be only PARQUET_1_0 or PARQUET_2_LATEST (which is 2.6). + /// Version can be only PARQUET_1_0 or PARQUET_2_LATEST (which is 2.6). String version = metadata->version() == parquet::ParquetVersion::PARQUET_1_0 ? "1.0" : "2.6"; assert_cast(*column).insertData(version.data(), version.size()); res.addColumn(std::move(column)); diff --git a/utils/check-style/check-style b/utils/check-style/check-style index c28ca1cfc8a..67c185a0b54 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -413,3 +413,6 @@ find $ROOT_PATH/tests/queries/1_stateful -name '*.sql' -or -name '*.sh' | grep - # Check for bad punctuation: whitespace before comma. find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep -P --line-number '\w ,' | grep -v 'bad punctuation is ok here' && echo "^ There is bad punctuation: whitespace before comma. You should write it like this: 'Hello, world!'" + +# Cyrillic characters hiding inside Latin. +find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' | xargs grep -P --line-number '[a-zA-Z][а-яА-ЯёЁ]|[а-яА-ЯёЁ][a-zA-Z]' && echo "^ Cyrillic characters found in unexpected place." From d35c87c1bdf4cba8848e94acd0ed8ec1f6b34502 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Thu, 27 Jul 2023 05:06:16 +0000 Subject: [PATCH 426/478] allow positional options for clickhouse-local and populate global udf settings --- programs/local/LocalServer.cpp | 15 +++++++++++++++ src/Client/ClientBase.cpp | 5 ----- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 3c2a8ae3152..3ee268aed0e 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -266,6 +266,10 @@ void LocalServer::tryInitPath() global_context->setUserFilesPath(""); // user's files are everywhere + std::string user_scripts_path = config().getString("user_scripts_path", fs::path(path) / "user_scripts/"); + global_context->setUserScriptsPath(user_scripts_path); + fs::create_directories(user_scripts_path); + /// top_level_domains_lists const std::string & top_level_domains_path = config().getString("top_level_domains_path", path + "top_level_domains/"); if (!top_level_domains_path.empty()) @@ -490,6 +494,17 @@ try applyCmdSettings(global_context); + /// try to load user defined executable functions, throw on error and die + try + { + global_context->loadOrReloadUserDefinedExecutableFunctions(config()); + } + catch (...) + { + tryLogCurrentException(&logger(), "Caught exception while loading user defined executable functions."); + throw; + } + if (is_interactive) { clearTerminal(); diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 3e964d5c6a3..496fc8fce0a 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -2624,11 +2624,6 @@ void ClientBase::parseAndCheckOptions(OptionsDescription & options_description, throw Exception(ErrorCodes::UNRECOGNIZED_ARGUMENTS, "Unrecognized option '{}'", unrecognized_options[0]); } - /// Check positional options (options after ' -- ', ex: clickhouse-client -- ). - unrecognized_options = po::collect_unrecognized(parsed.options, po::collect_unrecognized_mode::include_positional); - if (unrecognized_options.size() > 1) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Positional options are not supported."); - po::store(parsed, options); } From 65ffe91bf26a3429fe691c755736867e7819d2f5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 27 Jul 2023 07:13:26 +0200 Subject: [PATCH 427/478] Fix double whitespace --- src/Processors/Formats/Impl/AvroRowInputFormat.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp index b2c75db0e54..c661e6b782d 100644 --- a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp @@ -1011,7 +1011,7 @@ private: using ConfluentSchemaRegistry = AvroConfluentRowInputFormat::SchemaRegistry; #define SCHEMA_REGISTRY_CACHE_MAX_SIZE 1000 /// Cache of Schema Registry URL -> SchemaRegistry -static CacheBase schema_registry_cache(SCHEMA_REGISTRY_CACHE_MAX_SIZE); +static CacheBase schema_registry_cache(SCHEMA_REGISTRY_CACHE_MAX_SIZE); static std::shared_ptr getConfluentSchemaRegistry(const FormatSettings & format_settings) { From d2d7139da3af470a49267047f0ea45f652d59e45 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 27 Jul 2023 08:58:23 +0200 Subject: [PATCH 428/478] Changelog for 23.7 --- CHANGELOG.md | 175 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 175 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index bf6b309ef2c..878edfa4add 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,5 @@ ### Table of Contents +**[ClickHouse release v23.7, 2023-07-27](#237)**
**[ClickHouse release v23.6, 2023-06-30](#236)**
**[ClickHouse release v23.5, 2023-06-08](#235)**
**[ClickHouse release v23.4, 2023-04-26](#234)**
@@ -9,6 +10,180 @@ # 2023 Changelog +### ClickHouse release 23.7, 2023-07-27 + +#### Backward Incompatible Change +* Add `NAMED COLLECTION` access type (aliases `USE NAMED COLLECTION`, `NAMED COLLECTION USAGE`). This PR is backward incompatible because this access type is disabled by default (because a parent access type `NAMED COLLECTION ADMIN` is disabled by default as well). Proposed in [#50277](https://github.com/ClickHouse/ClickHouse/issues/50277). To grant use `GRANT NAMED COLLECTION ON collection_name TO user` or `GRANT NAMED COLLECTION ON * TO user`, to be able to give these grants `named_collection_admin` is required in config (previously it was named `named_collection_control`, so will remain as an alias). [#50625](https://github.com/ClickHouse/ClickHouse/pull/50625) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fixing a typo in the `system.parts` column name `last_removal_attemp_time`. Now it is named `last_removal_attempt_time`. [#52104](https://github.com/ClickHouse/ClickHouse/pull/52104) ([filimonov](https://github.com/filimonov)). +* Bump version of the distributed_ddl_entry_format_version to 5 by default (enables opentelemetry and initial_query_idd pass through). This will not allow to process existing entries for distributed DDL after *downgrade* (but note, that usually there should be no such unprocessed entries). [#52128](https://github.com/ClickHouse/ClickHouse/pull/52128) ([Azat Khuzhin](https://github.com/azat)). +* Check projection metadata the same way we check ordinary metadata. This change may prevent the server from starting in case there was a table with an invalid projection. An example is a projection that created positional columns in PK (e.g. `projection p (select * order by 1, 4)` which is not allowed in table PK and can cause a crash during insert/merge). Drop such projections before the update. Fixes [#52353](https://github.com/ClickHouse/ClickHouse/issues/52353). [#52361](https://github.com/ClickHouse/ClickHouse/pull/52361) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* The experimental feature `hashid` is removed due to a bug. The quality of implementation was questionable at the start, and it didn't get through the experimental status. This closes [#52406](https://github.com/ClickHouse/ClickHouse/issues/52406). [#52449](https://github.com/ClickHouse/ClickHouse/pull/52449) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### New Feature +* Added support for PRQL as a query language. [#50686](https://github.com/ClickHouse/ClickHouse/pull/50686) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Add support for external disks in Keeper for storing snapshots and logs. [#50098](https://github.com/ClickHouse/ClickHouse/pull/50098) ([Antonio Andelic](https://github.com/antonio2368)). +* Add support for multi-directory selection (`{}`) globs. [#50559](https://github.com/ClickHouse/ClickHouse/pull/50559) ([Andrey Zvonov](https://github.com/zvonand)). +* Support ZooKeeper `reconfig` command for ClickHouse Keeper with incremental reconfiguration which can be enabled via `keeper_server.enable_reconfiguration` setting. Support adding servers, removing servers, and changing server priorities. [#49450](https://github.com/ClickHouse/ClickHouse/pull/49450) ([Mike Kot](https://github.com/myrrc)). +* Kafka connector can fetch Avro schema from schema registry with basic authentication using url-encoded credentials. [#49664](https://github.com/ClickHouse/ClickHouse/pull/49664) ([Ilya Golshtein](https://github.com/ilejn)). +* Add function `arrayJaccardIndex` which computes the Jaccard similarity between two arrays. [#50076](https://github.com/ClickHouse/ClickHouse/pull/50076) ([FFFFFFFHHHHHHH](https://github.com/FFFFFFFHHHHHHH)). +* Add a column `is_obsolete` to `system.settings` and similar tables. Closes [#50819](https://github.com/ClickHouse/ClickHouse/issues/50819). [#50826](https://github.com/ClickHouse/ClickHouse/pull/50826) ([flynn](https://github.com/ucasfl)). +* Implement support of encrypted elements in configuration file. Added possibility to use encrypted text in leaf elements of configuration file. The text is encrypted using encryption codecs from `` section. [#50986](https://github.com/ClickHouse/ClickHouse/pull/50986) ([Roman Vasin](https://github.com/rvasin)). +* Grace Hash Join algorithm is now applicable to FULL and RIGHT JOINs. [#49483](https://github.com/ClickHouse/ClickHouse/issues/49483). [#51013](https://github.com/ClickHouse/ClickHouse/pull/51013) ([lgbo](https://github.com/lgbo-ustc)). +* Add `SYSTEM STOP LISTEN` query for more graceful termination. Closes [#47972](https://github.com/ClickHouse/ClickHouse/issues/47972). [#51016](https://github.com/ClickHouse/ClickHouse/pull/51016) ([Nikolay Degterinsky](https://github.com/evillique)). +* Add `input_format_csv_allow_variable_number_of_columns` options. [#51273](https://github.com/ClickHouse/ClickHouse/pull/51273) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Another boring feature: add function `substring_index`, as in Spark or MySQL. [#51472](https://github.com/ClickHouse/ClickHouse/pull/51472) ([李扬](https://github.com/taiyang-li)). +* A system table `jemalloc_bins` to show stats for jemalloc bins. Example `SELECT *, size * (nmalloc - ndalloc) AS allocated_bytes FROM system.jemalloc_bins WHERE allocated_bytes > 0 ORDER BY allocated_bytes DESC LIMIT 10`. Enjoy. [#51674](https://github.com/ClickHouse/ClickHouse/pull/51674) ([Alexander Gololobov](https://github.com/davenger)). +* Add `RowBinaryWithDefaults` format with extra byte before each column as a flag for using the column's default value. Closes [#50854](https://github.com/ClickHouse/ClickHouse/issues/50854). [#51695](https://github.com/ClickHouse/ClickHouse/pull/51695) ([Kruglov Pavel](https://github.com/Avogar)). +* Added `default_temporary_table_engine` setting. Same as `default_table_engine` but for temporary tables. [#51292](https://github.com/ClickHouse/ClickHouse/issues/51292). [#51708](https://github.com/ClickHouse/ClickHouse/pull/51708) ([velavokr](https://github.com/velavokr)). +* Added new `initcap` / `initcapUTF8` functions which convert the first letter of each word to upper case and the rest to lower case. [#51735](https://github.com/ClickHouse/ClickHouse/pull/51735) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Create table now supports `PRIMARY KEY` syntax in column definition. Columns are added to primary index in the same order columns are defined. [#51881](https://github.com/ClickHouse/ClickHouse/pull/51881) ([Ilya Yatsishin](https://github.com/qoega)). +* Added the possibility to use date and time format specifiers in log and error log file names, either in config files (`log` and `errorlog` tags) or command line arguments (`--log-file` and `--errorlog-file`). [#51945](https://github.com/ClickHouse/ClickHouse/pull/51945) ([Victor Krasnov](https://github.com/sirvickr)). +* Added Peak Memory Usage statistic to HTTP headers. [#51946](https://github.com/ClickHouse/ClickHouse/pull/51946) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Added new `hasSubsequence` (+`CaseInsensitive` and `UTF8` versions) functions to match subsequences in strings. [#52050](https://github.com/ClickHouse/ClickHouse/pull/52050) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Add `array_agg` as alias of `groupArray` for PostgreSQL compatibility. Closes [#52100](https://github.com/ClickHouse/ClickHouse/issues/52100). ### Documentation entry for user-facing changes. [#52135](https://github.com/ClickHouse/ClickHouse/pull/52135) ([flynn](https://github.com/ucasfl)). +* Add `any_value` as a compatibility alias for `any` aggregate function. Closes [#52140](https://github.com/ClickHouse/ClickHouse/issues/52140). [#52147](https://github.com/ClickHouse/ClickHouse/pull/52147) ([flynn](https://github.com/ucasfl)). +* Add aggregate function `array_concat_agg` for compatibility with BigQuery, it's alias of `groupArrayArray`. Closes [#52139](https://github.com/ClickHouse/ClickHouse/issues/52139). [#52149](https://github.com/ClickHouse/ClickHouse/pull/52149) ([flynn](https://github.com/ucasfl)). +* Add `OCTET_LENGTH` as an alias to `length`. Closes [#52153](https://github.com/ClickHouse/ClickHouse/issues/52153). [#52176](https://github.com/ClickHouse/ClickHouse/pull/52176) ([FFFFFFFHHHHHHH](https://github.com/FFFFFFFHHHHHHH)). +* Added `firstLine` function to extract the first line from the multi-line string. This closes [#51172](https://github.com/ClickHouse/ClickHouse/issues/51172). [#52209](https://github.com/ClickHouse/ClickHouse/pull/52209) ([Mikhail Koviazin](https://github.com/mkmkme)). +* Implement KQL-style formatting for the `Interval` data type. This is only needed for compatibility with the `Kusto` query language. [#45671](https://github.com/ClickHouse/ClickHouse/pull/45671) ([ltrk2](https://github.com/ltrk2)). +* Added query `SYSTEM FLUSH ASYNC INSERT QUEUE` which flushes all pending asynchronous inserts to the destination tables. Added a server-side setting `async_insert_queue_flush_on_shutdown` (`true` by default) which determines whether to flush queue of asynchronous inserts on graceful shutdown. Setting `async_insert_threads` is now a server-side setting. [#49160](https://github.com/ClickHouse/ClickHouse/pull/49160) ([Anton Popov](https://github.com/CurtizJ)). +* Aliases `current_database` and a new function `current_schemas` for compatibility with PostgreSQL. [#51076](https://github.com/ClickHouse/ClickHouse/pull/51076) ([Pedro Riera](https://github.com/priera)). +* Add alias for functions `today` (now available under the `curdate`/`current_date` names) and `now` (`current_timestamp`). [#52106](https://github.com/ClickHouse/ClickHouse/pull/52106) ([Lloyd-Pottiger](https://github.com/Lloyd-Pottiger)). +* Support `async_deduplication_token` for async insert. [#52136](https://github.com/ClickHouse/ClickHouse/pull/52136) ([Han Fei](https://github.com/hanfei1991)). +* Add new setting `disable_url_encoding` that allows to disable decoding/encoding path in uri in URL engine. [#52337](https://github.com/ClickHouse/ClickHouse/pull/52337) ([Kruglov Pavel](https://github.com/Avogar)). + +#### Performance Improvement +* Writing parquet files is 10x faster, it's multi-threaded now. Almost the same speed as reading. [#49367](https://github.com/ClickHouse/ClickHouse/pull/49367) ([Michael Kolupaev](https://github.com/al13n321)). +* Enable automatic selection of the sparse serialization format by default. It improves performance. The format is supported since version 22.1. After this change, downgrading to versions older than 22.1 might not be possible. You can turn off the usage of the sparse serialization format by providing the `ratio_of_defaults_for_sparse_serialization = 1` setting for your MergeTree tables. [#49631](https://github.com/ClickHouse/ClickHouse/pull/49631) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Enable `move_all_conditions_to_prewhere` and `enable_multiple_prewhere_read_steps` settings by default. [#46365](https://github.com/ClickHouse/ClickHouse/pull/46365) ([Alexander Gololobov](https://github.com/davenger)). +* Improves performance of some queries by tuning allocator. [#46416](https://github.com/ClickHouse/ClickHouse/pull/46416) ([Azat Khuzhin](https://github.com/azat)). +* Now we use fixed-size tasks in `MergeTreePrefetchedReadPool` as in `MergeTreeReadPool`. Also from now we use connection pool for S3 requests. [#49732](https://github.com/ClickHouse/ClickHouse/pull/49732) ([Nikita Taranov](https://github.com/nickitat)). +* More pushdown to the right side of join. [#50532](https://github.com/ClickHouse/ClickHouse/pull/50532) ([Nikita Taranov](https://github.com/nickitat)). +* Improve grace_hash join by reserving hash table's size (resubmit). [#50875](https://github.com/ClickHouse/ClickHouse/pull/50875) ([lgbo](https://github.com/lgbo-ustc)). +* Waiting on lock in `OpenedFileCache` could be noticeable sometimes. We sharded it into multiple sub-maps (each with its own lock) to avoid contention. [#51341](https://github.com/ClickHouse/ClickHouse/pull/51341) ([Nikita Taranov](https://github.com/nickitat)). +* Move conditions with primary key columns to the end of PREWHERE chain. The idea is that conditions with PK columns are likely to be used in PK analysis and will not contribute much more to PREWHERE filtering. [#51958](https://github.com/ClickHouse/ClickHouse/pull/51958) ([Alexander Gololobov](https://github.com/davenger)). +* Speed up `COUNT(DISTINCT)` for String types by inlining SipHash. The performance experiments of *OnTime* on the ICX device (Intel Xeon Platinum 8380 CPU, 80 cores, 160 threads) show that this change could bring an improvement of *11.6%* to the QPS of the query *Q8* while having no impact on others. [#52036](https://github.com/ClickHouse/ClickHouse/pull/52036) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). +* Enable `allow_vertical_merges_from_compact_to_wide_parts` by default. It will save memory usage during merges. [#52295](https://github.com/ClickHouse/ClickHouse/pull/52295) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix incorrect projection analysis which invalidates primary keys. This issue only exists when `query_plan_optimize_primary_key = 1, query_plan_optimize_projection = 1`. This fixes [#48823](https://github.com/ClickHouse/ClickHouse/issues/48823). This fixes [#51173](https://github.com/ClickHouse/ClickHouse/issues/51173). [#52308](https://github.com/ClickHouse/ClickHouse/pull/52308) ([Amos Bird](https://github.com/amosbird)). +* Reduce the number of syscalls in `FileCache::loadMetadata` - this speeds up server startup if the filesystem cache is configured. [#52435](https://github.com/ClickHouse/ClickHouse/pull/52435) ([Raúl Marín](https://github.com/Algunenano)). +* Allow to have strict lower boundary for file segment size by downloading remaining data in the background. Minimum size of file segment (if actual file size is bigger) is configured as cache configuration setting `boundary_alignment`, by default `4Mi`. Number of background threads are configured as cache configuration setting `background_download_threads`, by default `2`. Also `max_file_segment_size` was increased from `8Mi` to `32Mi` in this PR. [#51000](https://github.com/ClickHouse/ClickHouse/pull/51000) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Decreased default timeouts for S3 from 30 seconds to 3 seconds, and for other HTTP from 180 seconds to 30 seconds. [#51171](https://github.com/ClickHouse/ClickHouse/pull/51171) ([Michael Kolupaev](https://github.com/al13n321)). +* New setting `merge_tree_determine_task_size_by_prewhere_columns` added. If set to `true` only sizes of the columns from `PREWHERE` section will be considered to determine reading task size. Otherwise all the columns from query are considered. [#52606](https://github.com/ClickHouse/ClickHouse/pull/52606) ([Nikita Taranov](https://github.com/nickitat)). + +#### Improvement +* Use read_bytes/total_bytes_to_read for progress bar in s3/file/url/... table functions for better progress indication. [#51286](https://github.com/ClickHouse/ClickHouse/pull/51286) ([Kruglov Pavel](https://github.com/Avogar)). +* Introduce a table setting `wait_for_unique_parts_send_before_shutdown_ms` which specify the amount of time replica will wait before closing interserver handler for replicated sends. Also fix inconsistency with shutdown of tables and interserver handlers: now server shutdown tables first and only after it shut down interserver handlers. [#51851](https://github.com/ClickHouse/ClickHouse/pull/51851) ([alesapin](https://github.com/alesapin)). +* Allow SQL standard `FETCH` without `OFFSET`. See https://antonz.org/sql-fetch/. [#51293](https://github.com/ClickHouse/ClickHouse/pull/51293) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow filtering HTTP headers for the URL/S3 table functions with the new `http_forbid_headers` section in config. Both exact matching and regexp filters are available. [#51038](https://github.com/ClickHouse/ClickHouse/pull/51038) ([Nikolay Degterinsky](https://github.com/evillique)). +* Don't show messages about `16 EiB` free space in logs, as they don't make sense. This closes [#49320](https://github.com/ClickHouse/ClickHouse/issues/49320). [#49342](https://github.com/ClickHouse/ClickHouse/pull/49342) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Properly check the limit for the `sleepEachRow` function. Add a setting `function_sleep_max_microseconds_per_block`. This is needed for generic query fuzzer. [#49343](https://github.com/ClickHouse/ClickHouse/pull/49343) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix two issues in `geoHash` functions. [#50066](https://github.com/ClickHouse/ClickHouse/pull/50066) ([李扬](https://github.com/taiyang-li)). +* Log async insert flush queries into `system.query_log`. [#51160](https://github.com/ClickHouse/ClickHouse/pull/51160) ([Raúl Marín](https://github.com/Algunenano)). +* Functions `date_diff` and `age` now support millisecond/microsecond unit and work with microsecond precision. [#51291](https://github.com/ClickHouse/ClickHouse/pull/51291) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Improve parsing of path in clickhouse-keeper-client. [#51359](https://github.com/ClickHouse/ClickHouse/pull/51359) ([Azat Khuzhin](https://github.com/azat)). +* A third-party product depending on ClickHouse (Gluten: a Plugin to Double SparkSQL's Performance) had a bug. This fix avoids heap overflow in that third-party product while reading from HDFS. [#51386](https://github.com/ClickHouse/ClickHouse/pull/51386) ([李扬](https://github.com/taiyang-li)). +* Add ability to disable native copy for S3 (setting for BACKUP/RESTORE `allow_s3_native_copy`, and `s3_allow_native_copy` for `s3`/`s3_plain` disks). [#51448](https://github.com/ClickHouse/ClickHouse/pull/51448) ([Azat Khuzhin](https://github.com/azat)). +* Add column `primary_key_size` to `system.parts` table to show compressed primary key size on disk. Closes [#51400](https://github.com/ClickHouse/ClickHouse/issues/51400). [#51496](https://github.com/ClickHouse/ClickHouse/pull/51496) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Allow running `clickhouse-local` without procfs, without home directory existing, and without name resolution plugins from glibc. [#51518](https://github.com/ClickHouse/ClickHouse/pull/51518) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add placeholder `%a` for rull filename in rename_files_after_processing setting. [#51603](https://github.com/ClickHouse/ClickHouse/pull/51603) ([Kruglov Pavel](https://github.com/Avogar)). +* Add column `modification_time` into `system.parts_columns`. [#51685](https://github.com/ClickHouse/ClickHouse/pull/51685) ([Azat Khuzhin](https://github.com/azat)). +* Add new setting `input_format_csv_use_default_on_bad_values` to CSV format that allows to insert default value when parsing of a single field failed. [#51716](https://github.com/ClickHouse/ClickHouse/pull/51716) ([KevinyhZou](https://github.com/KevinyhZou)). +* Added a crash log flush to the disk after the unexpected crash. [#51720](https://github.com/ClickHouse/ClickHouse/pull/51720) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Fix behavior in dashboard page where errors unrelated to authentication are not shown. Also fix 'overlapping' chart behavior. [#51744](https://github.com/ClickHouse/ClickHouse/pull/51744) ([Zach Naimon](https://github.com/ArctypeZach)). +* Allow UUID to UInt128 conversion. [#51765](https://github.com/ClickHouse/ClickHouse/pull/51765) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Added support for function `range` of Nullable arguments. [#51767](https://github.com/ClickHouse/ClickHouse/pull/51767) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Convert condition like `toyear(x) = c` to `c1 <= x < c2`. [#51795](https://github.com/ClickHouse/ClickHouse/pull/51795) ([Han Fei](https://github.com/hanfei1991)). +* Improve MySQL compatibility of the statement `SHOW INDEX`. [#51796](https://github.com/ClickHouse/ClickHouse/pull/51796) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix `use_structure_from_insertion_table_in_table_functions` does not work with `MATERIALIZED` and `ALIAS` columns. Closes [#51817](https://github.com/ClickHouse/ClickHouse/issues/51817). Closes [#51019](https://github.com/ClickHouse/ClickHouse/issues/51019). [#51825](https://github.com/ClickHouse/ClickHouse/pull/51825) ([flynn](https://github.com/ucasfl)). +* Cache dictionary now requests only unique keys from source. Closes [#51762](https://github.com/ClickHouse/ClickHouse/issues/51762). [#51853](https://github.com/ClickHouse/ClickHouse/pull/51853) ([Maksim Kita](https://github.com/kitaisreal)). +* Fixed the case when settings were not applied for EXPLAIN query when FORMAT was provided. [#51859](https://github.com/ClickHouse/ClickHouse/pull/51859) ([Nikita Taranov](https://github.com/nickitat)). +* Allow SETTINGS before FORMAT in DESCRIBE TABLE query for compatibility with SELECT query. Closes [#51544](https://github.com/ClickHouse/ClickHouse/issues/51544). [#51899](https://github.com/ClickHouse/ClickHouse/pull/51899) ([Nikolay Degterinsky](https://github.com/evillique)). +* Var-Int encoded integers (e.g. used by the native protocol) can now use the full 64-bit range. 3rd party clients are advised to update their var-int code accordingly. [#51905](https://github.com/ClickHouse/ClickHouse/pull/51905) ([Robert Schulze](https://github.com/rschu1ze)). +* Update certificates when they change without the need to manually SYSTEM RELOAD CONFIG. [#52030](https://github.com/ClickHouse/ClickHouse/pull/52030) ([Mike Kot](https://github.com/myrrc)). +* Added `allow_create_index_without_type` setting that allow to ignore `ADD INDEX` queries without specified `TYPE`. Standard SQL queries will just succeed without changing table schema. [#52056](https://github.com/ClickHouse/ClickHouse/pull/52056) ([Ilya Yatsishin](https://github.com/qoega)). +* Log messages are written to the `system.text_log` from the server startup. [#52113](https://github.com/ClickHouse/ClickHouse/pull/52113) ([Dmitry Kardymon](https://github.com/kardymonds)). +* In cases where the HTTP endpoint has multiple IP addresses and the first of them is unreachable, a timeout exception was thrown. Made session creation with handling all resolved endpoints. [#52116](https://github.com/ClickHouse/ClickHouse/pull/52116) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Avro input format now supports Union even if it contains only a single type. Closes [#52131](https://github.com/ClickHouse/ClickHouse/issues/52131). [#52137](https://github.com/ClickHouse/ClickHouse/pull/52137) ([flynn](https://github.com/ucasfl)). +* Add setting `optimize_use_implicit_projections` to disable implicit projections (currently only `min_max_count` projection). [#52152](https://github.com/ClickHouse/ClickHouse/pull/52152) ([Amos Bird](https://github.com/amosbird)). +* It was possible to use the function `hasToken` for infinite loop. Now this possibility is removed. This closes [#52156](https://github.com/ClickHouse/ClickHouse/issues/52156). [#52160](https://github.com/ClickHouse/ClickHouse/pull/52160) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Create ZK ancestors optimistically. [#52195](https://github.com/ClickHouse/ClickHouse/pull/52195) ([Raúl Marín](https://github.com/Algunenano)). +* Fix [#50582](https://github.com/ClickHouse/ClickHouse/issues/50582). Avoid the `Not found column ... in block` error in some cases of reading in-order and constants. [#52259](https://github.com/ClickHouse/ClickHouse/pull/52259) ([Chen768959](https://github.com/Chen768959)). +* Check whether S2 geo primitives are invalid as early as possible on ClickHouse side. This closes: [#27090](https://github.com/ClickHouse/ClickHouse/issues/27090). [#52260](https://github.com/ClickHouse/ClickHouse/pull/52260) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Add back missing projection QueryAccessInfo when `query_plan_optimize_projection = 1`. This fixes [#50183](https://github.com/ClickHouse/ClickHouse/issues/50183) . This fixes [#50093](https://github.com/ClickHouse/ClickHouse/issues/50093). [#52327](https://github.com/ClickHouse/ClickHouse/pull/52327) ([Amos Bird](https://github.com/amosbird)). +* When `ZooKeeperRetriesControl` rethrows an error, it's more useful to see its original stack trace, not the one from `ZooKeeperRetriesControl` itself. [#52347](https://github.com/ClickHouse/ClickHouse/pull/52347) ([Vitaly Baranov](https://github.com/vitlibar)). +* Wait for zero copy replication lock even if some disks don't support it. [#52376](https://github.com/ClickHouse/ClickHouse/pull/52376) ([Raúl Marín](https://github.com/Algunenano)). +* Now interserver port will be closed only after tables are shut down. [#52498](https://github.com/ClickHouse/ClickHouse/pull/52498) ([alesapin](https://github.com/alesapin)). + +#### Experimental Feature +* Allow to add disk name for custom disks. Previously custom disks would use an internal generated disk name. Now it will be possible with `disk = disk_(...)` (e.g. disk will have name `name`) . [#51552](https://github.com/ClickHouse/ClickHouse/pull/51552) ([Kseniia Sumarokova](https://github.com/kssenii)). This syntax can be changed in this release. +* (experimental MaterializedMySQL) Fixed crash when `mysqlxx::Pool::Entry` is used after it was disconnected. [#52063](https://github.com/ClickHouse/ClickHouse/pull/52063) ([Val Doroshchuk](https://github.com/valbok)). +* (experimental MaterializedMySQL) `CREATE TABLE ... AS SELECT` .. is now supported in MaterializedMySQL. [#52067](https://github.com/ClickHouse/ClickHouse/pull/52067) ([Val Doroshchuk](https://github.com/valbok)). +* (experimental MaterializedMySQL) Introduced automatic conversion of text types to utf8 for MaterializedMySQL. [#52084](https://github.com/ClickHouse/ClickHouse/pull/52084) ([Val Doroshchuk](https://github.com/valbok)). +* (experimental MaterializedMySQL) Now unquoted UTF-8 strings are supported in DDL for MaterializedMySQL. [#52318](https://github.com/ClickHouse/ClickHouse/pull/52318) ([Val Doroshchuk](https://github.com/valbok)). +* (experimental MaterializedMySQL) Now double quoted comments are supported in MaterializedMySQL. [#52355](https://github.com/ClickHouse/ClickHouse/pull/52355) ([Val Doroshchuk](https://github.com/valbok)). +* Upgrade Intel QPL from v1.1.0 to v1.2.0 2. Upgrade Intel accel-config from v3.5 to v4.0 3. Fixed issue that Device IOTLB miss has big perf. impact for IAA accelerators. [#52180](https://github.com/ClickHouse/ClickHouse/pull/52180) ([jasperzhu](https://github.com/jinjunzh)). +* The `session_timezone` setting (new in version 23.6) is demoted to experimental. [#52445](https://github.com/ClickHouse/ClickHouse/pull/52445) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Build/Testing/Packaging Improvement +* Add experimental ClickHouse builds for Linux RISC-V 64 to CI. [#31398](https://github.com/ClickHouse/ClickHouse/pull/31398) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add integration test check with the enabled Analyzer. [#50926](https://github.com/ClickHouse/ClickHouse/pull/50926) [#52210](https://github.com/ClickHouse/ClickHouse/pull/52210) ([Dmitry Novik](https://github.com/novikd)). +* Reproducible builds for Rust. [#52395](https://github.com/ClickHouse/ClickHouse/pull/52395) ([Azat Khuzhin](https://github.com/azat)). +* Update Cargo dependencies. [#51721](https://github.com/ClickHouse/ClickHouse/pull/51721) ([Raúl Marín](https://github.com/Algunenano)). +* Make the function `CHColumnToArrowColumn::fillArrowArrayWithArrayColumnData` to work with nullable arrays, which are not possible in ClickHouse, but needed for Gluten. [#52112](https://github.com/ClickHouse/ClickHouse/pull/52112) ([李扬](https://github.com/taiyang-li)). +* We've updated the CCTZ library to master, but there are no user-visible changes. [#52124](https://github.com/ClickHouse/ClickHouse/pull/52124) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The `system.licenses` table now includes the hard-forked library Poco. This closes [#52066](https://github.com/ClickHouse/ClickHouse/issues/52066). [#52127](https://github.com/ClickHouse/ClickHouse/pull/52127) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Check that there are no cases of bad punctuation: whitespace before a comma like `Hello ,world` instead of `Hello, world`. [#52549](https://github.com/ClickHouse/ClickHouse/pull/52549) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Bug Fix (user-visible misbehavior in an official stable release) +* Fix MaterializedPostgreSQL syncTables [#49698](https://github.com/ClickHouse/ClickHouse/pull/49698) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix projection with optimize_aggregators_of_group_by_keys [#49709](https://github.com/ClickHouse/ClickHouse/pull/49709) ([Amos Bird](https://github.com/amosbird)). +* Fix optimize_skip_unused_shards with JOINs [#51037](https://github.com/ClickHouse/ClickHouse/pull/51037) ([Azat Khuzhin](https://github.com/azat)). +* Fix formatDateTime() with fractional negative datetime64 [#51290](https://github.com/ClickHouse/ClickHouse/pull/51290) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Functions `hasToken*` were totally wrong. Add a test for [#43358](https://github.com/ClickHouse/ClickHouse/issues/43358) [#51378](https://github.com/ClickHouse/ClickHouse/pull/51378) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix optimization to move functions before sorting. [#51481](https://github.com/ClickHouse/ClickHouse/pull/51481) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix Block structure mismatch in Pipe::unitePipes for FINAL [#51492](https://github.com/ClickHouse/ClickHouse/pull/51492) ([Nikita Taranov](https://github.com/nickitat)). +* Fix SIGSEGV for clusters with zero weight across all shards (fixes INSERT INTO FUNCTION clusterAllReplicas()) [#51545](https://github.com/ClickHouse/ClickHouse/pull/51545) ([Azat Khuzhin](https://github.com/azat)). +* Fix timeout for hedged requests [#51582](https://github.com/ClickHouse/ClickHouse/pull/51582) ([Azat Khuzhin](https://github.com/azat)). +* Fix logical error in ANTI join with NULL [#51601](https://github.com/ClickHouse/ClickHouse/pull/51601) ([vdimir](https://github.com/vdimir)). +* Fix for moving 'IN' conditions to PREWHERE [#51610](https://github.com/ClickHouse/ClickHouse/pull/51610) ([Alexander Gololobov](https://github.com/davenger)). +* Do not apply PredicateExpressionsOptimizer for ASOF/ANTI join [#51633](https://github.com/ClickHouse/ClickHouse/pull/51633) ([vdimir](https://github.com/vdimir)). +* Fix async insert with deduplication for ReplicatedMergeTree using merging algorithms [#51676](https://github.com/ClickHouse/ClickHouse/pull/51676) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix reading from empty column in `parseSipHashKey` [#51804](https://github.com/ClickHouse/ClickHouse/pull/51804) ([Nikita Taranov](https://github.com/nickitat)). +* Fix segfault when create invalid EmbeddedRocksdb table [#51847](https://github.com/ClickHouse/ClickHouse/pull/51847) ([Duc Canh Le](https://github.com/canhld94)). +* Fix inserts into MongoDB tables [#51876](https://github.com/ClickHouse/ClickHouse/pull/51876) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix deadlock on DatabaseCatalog shutdown [#51908](https://github.com/ClickHouse/ClickHouse/pull/51908) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix error in subquery operators [#51922](https://github.com/ClickHouse/ClickHouse/pull/51922) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix async connect to hosts with multiple ips [#51934](https://github.com/ClickHouse/ClickHouse/pull/51934) ([Kruglov Pavel](https://github.com/Avogar)). +* Do not remove inputs after ActionsDAG::merge [#51947](https://github.com/ClickHouse/ClickHouse/pull/51947) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Check refcount in `RemoveManyObjectStorageOperation::finalize` instead of `execute` [#51954](https://github.com/ClickHouse/ClickHouse/pull/51954) ([vdimir](https://github.com/vdimir)). +* Allow parametric UDFs [#51964](https://github.com/ClickHouse/ClickHouse/pull/51964) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Small fix for toDateTime64() for dates after 2283-12-31 [#52130](https://github.com/ClickHouse/ClickHouse/pull/52130) ([Andrey Zvonov](https://github.com/zvonand)). +* Fix ORDER BY tuple of WINDOW functions [#52145](https://github.com/ClickHouse/ClickHouse/pull/52145) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix incorrect projection analysis when aggregation expression contains monotonic functions [#52151](https://github.com/ClickHouse/ClickHouse/pull/52151) ([Amos Bird](https://github.com/amosbird)). +* Fix error in `groupArrayMoving` functions [#52161](https://github.com/ClickHouse/ClickHouse/pull/52161) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Disable direct join for range dictionary [#52187](https://github.com/ClickHouse/ClickHouse/pull/52187) ([Duc Canh Le](https://github.com/canhld94)). +* Fix sticky mutations test (and extremely rare race condition) [#52197](https://github.com/ClickHouse/ClickHouse/pull/52197) ([alesapin](https://github.com/alesapin)). +* Fix race in Web disk [#52211](https://github.com/ClickHouse/ClickHouse/pull/52211) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix data race in Connection::setAsyncCallback on unknown packet from server [#52219](https://github.com/ClickHouse/ClickHouse/pull/52219) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix temp data deletion on startup, add test [#52275](https://github.com/ClickHouse/ClickHouse/pull/52275) ([vdimir](https://github.com/vdimir)). +* Don't use minmax_count projections when counting nullable columns [#52297](https://github.com/ClickHouse/ClickHouse/pull/52297) ([Amos Bird](https://github.com/amosbird)). +* MergeTree/ReplicatedMergeTree should use server timezone for log entries [#52325](https://github.com/ClickHouse/ClickHouse/pull/52325) ([Azat Khuzhin](https://github.com/azat)). +* Fix parameterized view with cte and multiple usage [#52328](https://github.com/ClickHouse/ClickHouse/pull/52328) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Disable expression templates for time intervals [#52335](https://github.com/ClickHouse/ClickHouse/pull/52335) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix `apply_snapshot` in Keeper [#52358](https://github.com/ClickHouse/ClickHouse/pull/52358) ([Antonio Andelic](https://github.com/antonio2368)). +* Update build-osx.md [#52377](https://github.com/ClickHouse/ClickHouse/pull/52377) ([AlexBykovski](https://github.com/AlexBykovski)). +* Fix `countSubstrings()` hang with empty needle and a column haystack [#52409](https://github.com/ClickHouse/ClickHouse/pull/52409) ([Sergei Trifonov](https://github.com/serxa)). +* Fix normal projection with merge table [#52432](https://github.com/ClickHouse/ClickHouse/pull/52432) ([Amos Bird](https://github.com/amosbird)). +* Fix possible double-free in Aggregator [#52439](https://github.com/ClickHouse/ClickHouse/pull/52439) ([Nikita Taranov](https://github.com/nickitat)). +* Fixed inserting into Buffer engine [#52440](https://github.com/ClickHouse/ClickHouse/pull/52440) ([Vasily Nemkov](https://github.com/Enmk)). +* The implementation of AnyHash was non-conformant. [#52448](https://github.com/ClickHouse/ClickHouse/pull/52448) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Check recursion depth in OptimizedRegularExpression [#52451](https://github.com/ClickHouse/ClickHouse/pull/52451) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix data-race DatabaseReplicated::startupTables()/canExecuteReplicatedMetadataAlter() [#52490](https://github.com/ClickHouse/ClickHouse/pull/52490) ([Azat Khuzhin](https://github.com/azat)). +* Fix abort in function `transform` [#52513](https://github.com/ClickHouse/ClickHouse/pull/52513) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix lightweight delete after drop of projection [#52517](https://github.com/ClickHouse/ClickHouse/pull/52517) ([Anton Popov](https://github.com/CurtizJ)). +* Fix possible error "Cannot drain connections: cancel first" [#52585](https://github.com/ClickHouse/ClickHouse/pull/52585) ([Kruglov Pavel](https://github.com/Avogar)). + + ### ClickHouse release 23.6, 2023-06-29 #### Backward Incompatible Change From ac51ade45c5581d031c382277b550b2fb2f873fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Thu, 27 Jul 2023 08:45:53 +0000 Subject: [PATCH 429/478] Do not run the test without Rust libraries --- tests/queries/0_stateless/02833_local_with_dialect.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/02833_local_with_dialect.sh b/tests/queries/0_stateless/02833_local_with_dialect.sh index 2a2e1b09459..012a6d91269 100755 --- a/tests/queries/0_stateless/02833_local_with_dialect.sh +++ b/tests/queries/0_stateless/02833_local_with_dialect.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: no-fasttest, no-random-settings CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From f61f36800ccd028bb9cf4ef402275006faf2facb Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 27 Jul 2023 08:48:23 +0000 Subject: [PATCH 430/478] Fix style --- .../Formats/Impl/Parquet/PrepareForWrite.cpp | 14 +++++++------- src/Processors/Formats/Impl/Parquet/Write.cpp | 16 ++++++++-------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/Processors/Formats/Impl/Parquet/PrepareForWrite.cpp b/src/Processors/Formats/Impl/Parquet/PrepareForWrite.cpp index 0700fc8491c..bc4c9ca3b72 100644 --- a/src/Processors/Formats/Impl/Parquet/PrepareForWrite.cpp +++ b/src/Processors/Formats/Impl/Parquet/PrepareForWrite.cpp @@ -303,14 +303,14 @@ void preparePrimitiveColumn(ColumnPtr column, DataTypePtr type, const std::strin } else { - types(T::INT32, C::UINT_8 , int_type(8 , false)); + types(T::INT32, C::UINT_8, int_type(8, false)); } break; case TypeIndex::UInt16: types(T::INT32, C::UINT_16, int_type(16, false)); break; case TypeIndex::UInt32: types(T::INT32, C::UINT_32, int_type(32, false)); break; case TypeIndex::UInt64: types(T::INT64, C::UINT_64, int_type(64, false)); break; - case TypeIndex::Int8: types(T::INT32, C::INT_8 , int_type(8 , true)); break; - case TypeIndex::Int16: types(T::INT32, C::INT_16 , int_type(16, true)); break; + case TypeIndex::Int8: types(T::INT32, C::INT_8, int_type(8, true)); break; + case TypeIndex::Int16: types(T::INT32, C::INT_16, int_type(16, true)); break; case TypeIndex::Int32: types(T::INT32); break; case TypeIndex::Int64: types(T::INT64); break; case TypeIndex::Float32: types(T::FLOAT); break; @@ -319,8 +319,8 @@ void preparePrimitiveColumn(ColumnPtr column, DataTypePtr type, const std::strin /// These don't have suitable parquet logical types, so we write them as plain numbers. /// (Parquet has "enums" but they're just strings, with nowhere to declare all possible enum /// values in advance as part of the data type.) - case TypeIndex::Enum8: types(T::INT32, C::INT_8 , int_type(8 , true)); break; // Int8 - case TypeIndex::Enum16: types(T::INT32, C::INT_16 , int_type(16, true)); break; // Int16 + case TypeIndex::Enum8: types(T::INT32, C::INT_8, int_type(8, true)); break; // Int8 + case TypeIndex::Enum16: types(T::INT32, C::INT_16, int_type(16, true)); break; // Int16 case TypeIndex::IPv4: types(T::INT32, C::UINT_32, int_type(32, false)); break; // UInt32 case TypeIndex::Date: types(T::INT32, C::UINT_16, int_type(16, false)); break; // UInt16 case TypeIndex::DateTime: types(T::INT32, C::UINT_32, int_type(32, false)); break; // UInt32 @@ -392,8 +392,8 @@ void preparePrimitiveColumn(ColumnPtr column, DataTypePtr type, const std::strin case TypeIndex::Int256: fixed_string(32); break; case TypeIndex::IPv6: fixed_string(16); break; - case TypeIndex::Decimal32: decimal(4 , getDecimalPrecision(*type), getDecimalScale(*type)); break; - case TypeIndex::Decimal64: decimal(8 , getDecimalPrecision(*type), getDecimalScale(*type)); break; + case TypeIndex::Decimal32: decimal(4, getDecimalPrecision(*type), getDecimalScale(*type)); break; + case TypeIndex::Decimal64: decimal(8, getDecimalPrecision(*type), getDecimalScale(*type)); break; case TypeIndex::Decimal128: decimal(16, getDecimalPrecision(*type), getDecimalScale(*type)); break; case TypeIndex::Decimal256: decimal(32, getDecimalPrecision(*type), getDecimalScale(*type)); break; diff --git a/src/Processors/Formats/Impl/Parquet/Write.cpp b/src/Processors/Formats/Impl/Parquet/Write.cpp index 47ef0c53ab5..5ebf2be76d2 100644 --- a/src/Processors/Formats/Impl/Parquet/Write.cpp +++ b/src/Processors/Formats/Impl/Parquet/Write.cpp @@ -755,20 +755,20 @@ void writeColumnChunkBody(ColumnChunkWriteState & s, const WriteOptions & option writeColumnImpl(s, options, out, ConverterNumeric, bool, bool>(s.primitive_column)); else - N(UInt8 , Int32Type); + N(UInt8, Int32Type); break; case TypeIndex::UInt16 : N(UInt16, Int32Type); break; case TypeIndex::UInt32 : N(UInt32, Int32Type); break; case TypeIndex::UInt64 : N(UInt64, Int64Type); break; - case TypeIndex::Int8 : N(Int8 , Int32Type); break; - case TypeIndex::Int16 : N(Int16 , Int32Type); break; - case TypeIndex::Int32 : N(Int32 , Int32Type); break; - case TypeIndex::Int64 : N(Int64 , Int64Type); break; + case TypeIndex::Int8 : N(Int8, Int32Type); break; + case TypeIndex::Int16 : N(Int16, Int32Type); break; + case TypeIndex::Int32 : N(Int32, Int32Type); break; + case TypeIndex::Int64 : N(Int64, Int64Type); break; - case TypeIndex::Enum8: N(Int8 , Int32Type); break; - case TypeIndex::Enum16: N(Int16 , Int32Type); break; + case TypeIndex::Enum8: N(Int8, Int32Type); break; + case TypeIndex::Enum16: N(Int16, Int32Type); break; case TypeIndex::Date: N(UInt16, Int32Type); break; - case TypeIndex::Date32: N(Int32 , Int32Type); break; + case TypeIndex::Date32: N(Int32, Int32Type); break; case TypeIndex::DateTime: N(UInt32, Int32Type); break; #undef N From 2b18872e86898fe0c0ee40ddecf05c29088a7aca Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 27 Jul 2023 08:55:19 +0000 Subject: [PATCH 431/478] Incorporate review feedback --- docs/en/operations/system-tables/query_log.md | 8 ++++---- src/Interpreters/Cache/QueryCache.h | 8 ++++---- src/Interpreters/QueryLog.cpp | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/docs/en/operations/system-tables/query_log.md b/docs/en/operations/system-tables/query_log.md index b8dc0c0224c..835c79129de 100644 --- a/docs/en/operations/system-tables/query_log.md +++ b/docs/en/operations/system-tables/query_log.md @@ -112,10 +112,10 @@ Columns: - `used_storages` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `storages`, which were used during query execution. - `used_table_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `table functions`, which were used during query execution. - `query_cache_usage` ([Enum8](../../sql-reference/data-types/enum.md)) — Usage of the [query cache](../query-cache.md) during query execution. Values: - - `'None' = 1` = The query result was neither written into nor read from the query cache. - - `'Write' = 1` = The query result was written into the query cache. - - `'Read' = 1` = The query result was read from the query cache. - - `'Unknown' = 1` = Unknown status. + - `'Unknown' = 1` = Status unknown. + - `'None' = 2` = The query result was neither written into nor read from the query cache. + - `'Write' = 3` = The query result was written into the query cache. + - `'Read' = 4` = The query result was read from the query cache. **Example** diff --git a/src/Interpreters/Cache/QueryCache.h b/src/Interpreters/Cache/QueryCache.h index 973015b8003..5fe756268f2 100644 --- a/src/Interpreters/Cache/QueryCache.h +++ b/src/Interpreters/Cache/QueryCache.h @@ -27,10 +27,10 @@ public: enum class Usage { /// starts at 1 for compatibitity with DataTypeEnum8 - None = 1, /// query result neither written nor read into/from query cache - Write, /// query result wrote into query cache - Read, /// query result read from query cache - Unknown, /// we don't know what what happened + Unknown = 1, /// we don't know what what happened + None, /// query result neither written nor read into/from query cache + Write, /// query result written into query cache + Read, /// query result read from query cache }; /// Represents a query result in the cache. diff --git a/src/Interpreters/QueryLog.cpp b/src/Interpreters/QueryLog.cpp index c3294512f14..df21e82305a 100644 --- a/src/Interpreters/QueryLog.cpp +++ b/src/Interpreters/QueryLog.cpp @@ -44,10 +44,10 @@ NamesAndTypesList QueryLogElement::getNamesAndTypes() auto query_cache_usage_datatype = std::make_shared( DataTypeEnum8::Values { + {"Unknown", static_cast(QueryCache::Usage::Unknown)}, {"None", static_cast(QueryCache::Usage::None)}, {"Write", static_cast(QueryCache::Usage::Write)}, - {"Read", static_cast(QueryCache::Usage::Read)}, - {"Unknown", static_cast(QueryCache::Usage::Unknown)} + {"Read", static_cast(QueryCache::Usage::Read)} }); auto low_cardinality_string = std::make_shared(std::make_shared()); From 043ad45ec3f4f2d1ff8b619da4c6ec5cb2b8f0d0 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 27 Jul 2023 09:18:24 +0000 Subject: [PATCH 432/478] Incorporate review feedback, pt. II --- docs/en/operations/system-tables/query_log.md | 8 ++++---- src/Interpreters/Cache/QueryCache.h | 9 ++++----- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/docs/en/operations/system-tables/query_log.md b/docs/en/operations/system-tables/query_log.md index 835c79129de..c6f565b8748 100644 --- a/docs/en/operations/system-tables/query_log.md +++ b/docs/en/operations/system-tables/query_log.md @@ -112,10 +112,10 @@ Columns: - `used_storages` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `storages`, which were used during query execution. - `used_table_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `table functions`, which were used during query execution. - `query_cache_usage` ([Enum8](../../sql-reference/data-types/enum.md)) — Usage of the [query cache](../query-cache.md) during query execution. Values: - - `'Unknown' = 1` = Status unknown. - - `'None' = 2` = The query result was neither written into nor read from the query cache. - - `'Write' = 3` = The query result was written into the query cache. - - `'Read' = 4` = The query result was read from the query cache. + - `'Unknown'` = Status unknown. + - `'None'` = The query result was neither written into nor read from the query cache. + - `'Write'` = The query result was written into the query cache. + - `'Read'` = The query result was read from the query cache. **Example** diff --git a/src/Interpreters/Cache/QueryCache.h b/src/Interpreters/Cache/QueryCache.h index 5fe756268f2..c2de8ca22dd 100644 --- a/src/Interpreters/Cache/QueryCache.h +++ b/src/Interpreters/Cache/QueryCache.h @@ -26,11 +26,10 @@ class QueryCache public: enum class Usage { - /// starts at 1 for compatibitity with DataTypeEnum8 - Unknown = 1, /// we don't know what what happened - None, /// query result neither written nor read into/from query cache - Write, /// query result written into query cache - Read, /// query result read from query cache + Unknown, /// we don't know what what happened + None, /// query result neither written nor read into/from query cache + Write, /// query result written into query cache + Read, /// query result read from query cache }; /// Represents a query result in the cache. From 228de12d94a206f6eaae74059216886c32c2b53e Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 27 Jul 2023 13:59:23 +0300 Subject: [PATCH 433/478] Update ReplicatedMergeTreeQueue.cpp (#52648) --- src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index e11913fc3d2..21d5597e614 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -1803,7 +1803,7 @@ std::map ReplicatedMergeTreeQueue::getAlterMutationCo Int64 part_data_version = part->info.getDataVersion(); Int64 part_metadata_version = part->getMetadataVersion(); - LOG_DEBUG(log, "Looking for mutations for part {} (part data version {}, part metadata version {})", part->name, part_data_version, part_metadata_version); + LOG_TEST(log, "Looking for mutations for part {} (part data version {}, part metadata version {})", part->name, part_data_version, part_metadata_version); std::map result; /// Here we return mutation commands for part which has bigger alter version than part metadata version. From 9d73be6fca63edbca0bd97d07386f41268f1b11d Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Thu, 27 Jul 2023 12:14:46 +0000 Subject: [PATCH 434/478] test is added --- .../0_stateless/02833_local_udf_options.reference | 1 + tests/queries/0_stateless/02833_local_udf_options.sh | 11 +++++++++++ tests/queries/0_stateless/scripts_udf/function.xml | 9 +++++++++ tests/queries/0_stateless/scripts_udf/udf.sh | 3 +++ 4 files changed, 24 insertions(+) create mode 100755 tests/queries/0_stateless/02833_local_udf_options.reference create mode 100755 tests/queries/0_stateless/02833_local_udf_options.sh create mode 100644 tests/queries/0_stateless/scripts_udf/function.xml create mode 100755 tests/queries/0_stateless/scripts_udf/udf.sh diff --git a/tests/queries/0_stateless/02833_local_udf_options.reference b/tests/queries/0_stateless/02833_local_udf_options.reference new file mode 100755 index 00000000000..19f0805d8de --- /dev/null +++ b/tests/queries/0_stateless/02833_local_udf_options.reference @@ -0,0 +1 @@ +qwerty diff --git a/tests/queries/0_stateless/02833_local_udf_options.sh b/tests/queries/0_stateless/02833_local_udf_options.sh new file mode 100755 index 00000000000..149b62d7e2c --- /dev/null +++ b/tests/queries/0_stateless/02833_local_udf_options.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +set -e + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +SCRIPTS_DIR=$CUR_DIR/scripts_udf + +$CLICKHOUSE_LOCAL -q 'select test_function()' -- --user_scripts_path=$SCRIPTS_DIR --user_defined_executable_functions_config=$SCRIPTS_DIR/function.xml diff --git a/tests/queries/0_stateless/scripts_udf/function.xml b/tests/queries/0_stateless/scripts_udf/function.xml new file mode 100644 index 00000000000..69a0abb5cec --- /dev/null +++ b/tests/queries/0_stateless/scripts_udf/function.xml @@ -0,0 +1,9 @@ + + + executable + test_function + String + TabSeparated + udf.sh + + diff --git a/tests/queries/0_stateless/scripts_udf/udf.sh b/tests/queries/0_stateless/scripts_udf/udf.sh new file mode 100755 index 00000000000..add85833c3e --- /dev/null +++ b/tests/queries/0_stateless/scripts_udf/udf.sh @@ -0,0 +1,3 @@ +#!/bin/sh + +echo qwerty From f3dc6dd061515054afbbe5c58452a9554998a8b7 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 27 Jul 2023 15:23:57 +0300 Subject: [PATCH 435/478] Revert "Added field `refcount` to `system.remote_data_paths` table" --- src/Disks/IDisk.h | 5 +- .../ObjectStorages/DiskObjectStorage.cpp | 2 +- .../System/StorageSystemRemoteDataPaths.cpp | 8 +--- .../02791_remote_paths_refcount.reference | 28 ----------- .../02791_remote_paths_refcount.sql | 47 ------------------- 5 files changed, 4 insertions(+), 86 deletions(-) delete mode 100644 tests/queries/0_stateless/02791_remote_paths_refcount.reference delete mode 100644 tests/queries/0_stateless/02791_remote_paths_refcount.sql diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h index a2c5e59237f..2b0ca369a96 100644 --- a/src/Disks/IDisk.h +++ b/src/Disks/IDisk.h @@ -303,11 +303,10 @@ public: std::string local_path; std::string common_prefix_for_objects; StoredObjects objects; - size_t refcount; LocalPathWithObjectStoragePaths( - const std::string & local_path_, const std::string & common_prefix_for_objects_, StoredObjects && objects_, size_t refcount_) - : local_path(local_path_), common_prefix_for_objects(common_prefix_for_objects_), objects(std::move(objects_)), refcount(refcount_) {} + const std::string & local_path_, const std::string & common_prefix_for_objects_, StoredObjects && objects_) + : local_path(local_path_), common_prefix_for_objects(common_prefix_for_objects_), objects(std::move(objects_)) {} }; virtual void getRemotePathsRecursive(const String &, std::vector &) diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp index 001cff4cefe..762151b3808 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp @@ -82,7 +82,7 @@ void DiskObjectStorage::getRemotePathsRecursive(const String & local_path, std:: { try { - paths_map.emplace_back(local_path, metadata_storage->getObjectStorageRootPath(), getStorageObjects(local_path), metadata_storage->getHardlinkCount(local_path)); + paths_map.emplace_back(local_path, metadata_storage->getObjectStorageRootPath(), getStorageObjects(local_path)); } catch (const Exception & e) { diff --git a/src/Storages/System/StorageSystemRemoteDataPaths.cpp b/src/Storages/System/StorageSystemRemoteDataPaths.cpp index 820b1cf3823..eb514d3b3f4 100644 --- a/src/Storages/System/StorageSystemRemoteDataPaths.cpp +++ b/src/Storages/System/StorageSystemRemoteDataPaths.cpp @@ -25,7 +25,6 @@ StorageSystemRemoteDataPaths::StorageSystemRemoteDataPaths(const StorageID & tab {"local_path", std::make_shared()}, {"remote_path", std::make_shared()}, {"size", std::make_shared()}, - {"refcount", std::make_shared()}, {"common_prefix_for_blobs", std::make_shared()}, {"cache_paths", std::make_shared(std::make_shared())}, })); @@ -49,7 +48,6 @@ Pipe StorageSystemRemoteDataPaths::read( MutableColumnPtr col_local_path = ColumnString::create(); MutableColumnPtr col_remote_path = ColumnString::create(); MutableColumnPtr col_size = ColumnUInt64::create(); - MutableColumnPtr col_refcount = ColumnUInt64::create(); MutableColumnPtr col_namespace = ColumnString::create(); MutableColumnPtr col_cache_paths = ColumnArray::create(ColumnString::create()); @@ -67,22 +65,19 @@ Pipe StorageSystemRemoteDataPaths::read( if (disk->supportsCache()) cache = FileCacheFactory::instance().getByName(disk->getCacheName()).cache; - for (const auto & [local_path, common_prefox_for_objects, storage_objects, refcount] : remote_paths_by_local_path) + for (const auto & [local_path, common_prefox_for_objects, storage_objects] : remote_paths_by_local_path) { for (const auto & object : storage_objects) { col_disk_name->insert(disk_name); col_base_path->insert(disk->getPath()); - if (cache) col_cache_base_path->insert(cache->getBasePath()); else col_cache_base_path->insertDefault(); - col_local_path->insert(local_path); col_remote_path->insert(object.remote_path); col_size->insert(object.bytes_size); - col_refcount->insert(refcount); col_namespace->insert(common_prefox_for_objects); if (cache) @@ -106,7 +101,6 @@ Pipe StorageSystemRemoteDataPaths::read( res_columns.emplace_back(std::move(col_local_path)); res_columns.emplace_back(std::move(col_remote_path)); res_columns.emplace_back(std::move(col_size)); - res_columns.emplace_back(std::move(col_refcount)); res_columns.emplace_back(std::move(col_namespace)); res_columns.emplace_back(std::move(col_cache_paths)); diff --git a/tests/queries/0_stateless/02791_remote_paths_refcount.reference b/tests/queries/0_stateless/02791_remote_paths_refcount.reference deleted file mode 100644 index 56fb1536205..00000000000 --- a/tests/queries/0_stateless/02791_remote_paths_refcount.reference +++ /dev/null @@ -1,28 +0,0 @@ -0_0_0_0 0 -0_0_0_0_1 1 -1_0_0_0 0 -1_0_0_0_1 1 -0_0_0_0_1 checksums.txt 0 -0_0_0_0_1 columns.txt 1 -0_0_0_0_1 count.txt 1 -0_0_0_0_1 default_compression_codec.txt 1 -0_0_0_0_1 id.bin 1 -0_0_0_0_1 id.cmrk2 1 -0_0_0_0_1 metadata_version.txt 1 -0_0_0_0_1 minmax_id.idx 1 -0_0_0_0_1 partition.dat 1 -0_0_0_0_1 primary.cidx 1 -0_0_0_0_1 v.bin 1 -0_0_0_0_1 v.cmrk2 1 -1_0_0_0_1 checksums.txt 0 -1_0_0_0_1 columns.txt 0 -1_0_0_0_1 count.txt 1 -1_0_0_0_1 default_compression_codec.txt 0 -1_0_0_0_1 id.bin 1 -1_0_0_0_1 id.cmrk2 1 -1_0_0_0_1 metadata_version.txt 0 -1_0_0_0_1 minmax_id.idx 1 -1_0_0_0_1 partition.dat 1 -1_0_0_0_1 primary.cidx 1 -1_0_0_0_1 v.bin 0 -1_0_0_0_1 v.cmrk2 0 diff --git a/tests/queries/0_stateless/02791_remote_paths_refcount.sql b/tests/queries/0_stateless/02791_remote_paths_refcount.sql deleted file mode 100644 index 180601738ad..00000000000 --- a/tests/queries/0_stateless/02791_remote_paths_refcount.sql +++ /dev/null @@ -1,47 +0,0 @@ --- Tags: no-fasttest - -DROP TABLE IF EXISTS t_refcount SYNC; - --- Names of parts (on which this test depends) --- can differ in case of fault injection. -SET insert_keeper_fault_injection_probability = 0.0; - -CREATE TABLE t_refcount (id UInt64, v UInt64) -ENGINE = ReplicatedMergeTree('/clickhouse/test/{database}/t_refcount', '1') -ORDER BY id PARTITION BY id % 2 -SETTINGS - storage_policy = 's3_cache', - allow_remote_fs_zero_copy_replication = 1, - min_bytes_for_wide_part = 0, - compress_marks = 1, - compress_primary_key = 1, - ratio_of_defaults_for_sparse_serialization = 1.0; - -INSERT INTO t_refcount VALUES (1, 10), (2, 20); - -SET mutations_sync = 2; -ALTER TABLE t_refcount UPDATE v = v * 10 WHERE id % 2 = 1; - -SELECT name, active FROM system.parts WHERE database = currentDatabase() AND table = 't_refcount' ORDER BY name; - -WITH splitByChar('/', full_path) AS path_parts -SELECT path_parts[-2] AS part_name, path_parts[-1] AS file_name, refcount -FROM -( - SELECT - path || local_path AS full_path, - substring(full_path, 1, length(full_path) - position(reverse(full_path), '/') + 1) AS part_path, - refcount - FROM system.remote_data_paths - WHERE disk_name = 's3_cache' -) AS paths -INNER JOIN -( - SELECT path - FROM system.parts - WHERE database = currentDatabase() AND table = 't_refcount' AND active -) AS parts -ON paths.part_path = parts.path -ORDER BY part_name, file_name; - -DROP TABLE IF EXISTS t_refcount SYNC; From 8bba7baeaa65548d91da4c068c6af9b583f9449c Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Thu, 27 Jul 2023 12:34:22 +0000 Subject: [PATCH 436/478] fix style --- tests/queries/0_stateless/02833_local_udf_options.reference | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 tests/queries/0_stateless/02833_local_udf_options.reference diff --git a/tests/queries/0_stateless/02833_local_udf_options.reference b/tests/queries/0_stateless/02833_local_udf_options.reference old mode 100755 new mode 100644 From 33300a978e00687713d08fa786178eecc7bc15d5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 27 Jul 2023 15:38:53 +0300 Subject: [PATCH 437/478] Update CHANGELOG (#52655) --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 878edfa4add..f401b346726 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,7 +20,7 @@ * The experimental feature `hashid` is removed due to a bug. The quality of implementation was questionable at the start, and it didn't get through the experimental status. This closes [#52406](https://github.com/ClickHouse/ClickHouse/issues/52406). [#52449](https://github.com/ClickHouse/ClickHouse/pull/52449) ([Alexey Milovidov](https://github.com/alexey-milovidov)). #### New Feature -* Added support for PRQL as a query language. [#50686](https://github.com/ClickHouse/ClickHouse/pull/50686) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Added `Overlay` database engine to combine multiple databases into one. Added `Filesystem` database engine to represent a directory in the filesystem as a set of implicitly available tables with auto-detected formats and structures. A new `S3` database engine allows to read-only interact with s3 storage by representing a prefix as a set of tables. A new `HDFS` database engine allows to interact with HDFS storage in the same way. [#48821](https://github.com/ClickHouse/ClickHouse/pull/48821) ([alekseygolub](https://github.com/alekseygolub)). * Add support for external disks in Keeper for storing snapshots and logs. [#50098](https://github.com/ClickHouse/ClickHouse/pull/50098) ([Antonio Andelic](https://github.com/antonio2368)). * Add support for multi-directory selection (`{}`) globs. [#50559](https://github.com/ClickHouse/ClickHouse/pull/50559) ([Andrey Zvonov](https://github.com/zvonand)). * Support ZooKeeper `reconfig` command for ClickHouse Keeper with incremental reconfiguration which can be enabled via `keeper_server.enable_reconfiguration` setting. Support adding servers, removing servers, and changing server priorities. [#49450](https://github.com/ClickHouse/ClickHouse/pull/49450) ([Mike Kot](https://github.com/myrrc)). @@ -115,6 +115,7 @@ * Now interserver port will be closed only after tables are shut down. [#52498](https://github.com/ClickHouse/ClickHouse/pull/52498) ([alesapin](https://github.com/alesapin)). #### Experimental Feature +* Added support for [PRQL](https://prql-lang.org/) as a query language. [#50686](https://github.com/ClickHouse/ClickHouse/pull/50686) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). * Allow to add disk name for custom disks. Previously custom disks would use an internal generated disk name. Now it will be possible with `disk = disk_(...)` (e.g. disk will have name `name`) . [#51552](https://github.com/ClickHouse/ClickHouse/pull/51552) ([Kseniia Sumarokova](https://github.com/kssenii)). This syntax can be changed in this release. * (experimental MaterializedMySQL) Fixed crash when `mysqlxx::Pool::Entry` is used after it was disconnected. [#52063](https://github.com/ClickHouse/ClickHouse/pull/52063) ([Val Doroshchuk](https://github.com/valbok)). * (experimental MaterializedMySQL) `CREATE TABLE ... AS SELECT` .. is now supported in MaterializedMySQL. [#52067](https://github.com/ClickHouse/ClickHouse/pull/52067) ([Val Doroshchuk](https://github.com/valbok)). From 78f3a575f9ddbfd47e46e8169b63979e3d2aa72f Mon Sep 17 00:00:00 2001 From: Jiebin Sun Date: Thu, 27 Jul 2023 21:06:34 +0800 Subject: [PATCH 438/478] Convert hashSets in parallel before merge (#50748) * Convert hashSets in parallel before merge Before merge, if one of the lhs and rhs is singleLevelSet and the other is twoLevelSet, then the SingleLevelSet will call convertToTwoLevel(). The convert process is not in parallel and it will cost lots of cycle if it cosume all the singleLevelSet. The idea of the patch is to convert all the singleLevelSets to twoLevelSets in parallel if the hashsets are not all singleLevel or not all twoLevel. I have tested the patch on Intel 2 x 112 vCPUs SPR server with clickbench and latest upstream ClickHouse. Q5 has got a big 264% performance improvement and 24 queries have got at least 5% performance gain. The overall geomean of 43 queries has gained 7.4% more than the base code. Signed-off-by: Jiebin Sun * add resize() for the data_vec in parallelizeMergePrepare() Signed-off-by: Jiebin Sun * Add the performance test prepare_hash_before_merge.xml Signed-off-by: Jiebin Sun * Fit the CI to rename the data set from hits_v1 to test.hits. Signed-off-by: Jiebin Sun * remove the redundant branch in UniqExactSet Co-authored-by: Nikita Taranov * Remove the empty methods and add throw exception in parallelizeMergePrepare() Signed-off-by: Jiebin Sun --------- Signed-off-by: Jiebin Sun Co-authored-by: Nikita Taranov --- .../AggregateFunctionUniq.h | 39 ++++++++++++++ src/AggregateFunctions/IAggregateFunction.h | 8 +++ src/AggregateFunctions/UniqExactSet.h | 51 +++++++++++++++++++ src/Interpreters/Aggregator.cpp | 14 +++++ .../performance/prepare_hash_before_merge.xml | 4 ++ 5 files changed, 116 insertions(+) create mode 100644 tests/performance/prepare_hash_before_merge.xml diff --git a/src/AggregateFunctions/AggregateFunctionUniq.h b/src/AggregateFunctions/AggregateFunctionUniq.h index de68e9076a0..2810051a82f 100644 --- a/src/AggregateFunctions/AggregateFunctionUniq.h +++ b/src/AggregateFunctions/AggregateFunctionUniq.h @@ -29,6 +29,10 @@ #include #include +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; +} namespace DB { @@ -42,6 +46,7 @@ struct AggregateFunctionUniqUniquesHashSetData Set set; constexpr static bool is_able_to_parallelize_merge = false; + constexpr static bool is_parallelize_merge_prepare_needed = false; constexpr static bool is_variadic = false; static String getName() { return "uniq"; } @@ -55,6 +60,7 @@ struct AggregateFunctionUniqUniquesHashSetDataForVariadic Set set; constexpr static bool is_able_to_parallelize_merge = false; + constexpr static bool is_parallelize_merge_prepare_needed = false; constexpr static bool is_variadic = true; constexpr static bool is_exact = is_exact_; constexpr static bool argument_is_tuple = argument_is_tuple_; @@ -72,6 +78,7 @@ struct AggregateFunctionUniqHLL12Data Set set; constexpr static bool is_able_to_parallelize_merge = is_able_to_parallelize_merge_; + constexpr static bool is_parallelize_merge_prepare_needed = false; constexpr static bool is_variadic = false; static String getName() { return "uniqHLL12"; } @@ -84,6 +91,7 @@ struct AggregateFunctionUniqHLL12Data Set set; constexpr static bool is_able_to_parallelize_merge = false; + constexpr static bool is_parallelize_merge_prepare_needed = false; constexpr static bool is_variadic = false; static String getName() { return "uniqHLL12"; } @@ -96,6 +104,7 @@ struct AggregateFunctionUniqHLL12Data Set set; constexpr static bool is_able_to_parallelize_merge = false; + constexpr static bool is_parallelize_merge_prepare_needed = false; constexpr static bool is_variadic = false; static String getName() { return "uniqHLL12"; } @@ -108,6 +117,7 @@ struct AggregateFunctionUniqHLL12Data Set set; constexpr static bool is_able_to_parallelize_merge = false; + constexpr static bool is_parallelize_merge_prepare_needed = false; constexpr static bool is_variadic = false; static String getName() { return "uniqHLL12"; } @@ -120,6 +130,7 @@ struct AggregateFunctionUniqHLL12DataForVariadic Set set; constexpr static bool is_able_to_parallelize_merge = is_able_to_parallelize_merge_; + constexpr static bool is_parallelize_merge_prepare_needed = false; constexpr static bool is_variadic = true; constexpr static bool is_exact = is_exact_; constexpr static bool argument_is_tuple = argument_is_tuple_; @@ -143,6 +154,7 @@ struct AggregateFunctionUniqExactData Set set; constexpr static bool is_able_to_parallelize_merge = is_able_to_parallelize_merge_; + constexpr static bool is_parallelize_merge_prepare_needed = true; constexpr static bool is_variadic = false; static String getName() { return "uniqExact"; } @@ -162,6 +174,7 @@ struct AggregateFunctionUniqExactData Set set; constexpr static bool is_able_to_parallelize_merge = is_able_to_parallelize_merge_; + constexpr static bool is_parallelize_merge_prepare_needed = true; constexpr static bool is_variadic = false; static String getName() { return "uniqExact"; } @@ -181,6 +194,7 @@ struct AggregateFunctionUniqExactData Set set; constexpr static bool is_able_to_parallelize_merge = is_able_to_parallelize_merge_; + constexpr static bool is_parallelize_merge_prepare_needed = true; constexpr static bool is_variadic = false; static String getName() { return "uniqExact"; } @@ -190,6 +204,7 @@ template { constexpr static bool is_able_to_parallelize_merge = is_able_to_parallelize_merge_; + constexpr static bool is_parallelize_merge_prepare_needed = true; constexpr static bool is_variadic = true; constexpr static bool is_exact = is_exact_; constexpr static bool argument_is_tuple = argument_is_tuple_; @@ -204,6 +219,7 @@ struct AggregateFunctionUniqThetaData Set set; constexpr static bool is_able_to_parallelize_merge = false; + constexpr static bool is_parallelize_merge_prepare_needed = false; constexpr static bool is_variadic = false; static String getName() { return "uniqTheta"; } @@ -213,6 +229,7 @@ template struct AggregateFunctionUniqThetaDataForVariadic : AggregateFunctionUniqThetaData { constexpr static bool is_able_to_parallelize_merge = false; + constexpr static bool is_parallelize_merge_prepare_needed = false; constexpr static bool is_variadic = true; constexpr static bool is_exact = is_exact_; constexpr static bool argument_is_tuple = argument_is_tuple_; @@ -384,8 +401,10 @@ template class AggregateFunctionUniq final : public IAggregateFunctionDataHelper> { private: + using DataSet = typename Data::Set; static constexpr size_t num_args = 1; static constexpr bool is_able_to_parallelize_merge = Data::is_able_to_parallelize_merge; + static constexpr bool is_parallelize_merge_prepare_needed = Data::is_parallelize_merge_prepare_needed; public: explicit AggregateFunctionUniq(const DataTypes & argument_types_) @@ -439,6 +458,26 @@ public: detail::Adder::add(this->data(place), columns, num_args, row_begin, row_end, flags, null_map); } + bool isParallelizeMergePrepareNeeded() const override { return is_parallelize_merge_prepare_needed;} + + void parallelizeMergePrepare(AggregateDataPtrs & places, ThreadPool & thread_pool) const override + { + if constexpr (is_parallelize_merge_prepare_needed) + { + std::vector data_vec; + data_vec.resize(places.size()); + + for (unsigned long i = 0; i < data_vec.size(); i++) + data_vec[i] = &this->data(places[i]).set; + + DataSet::parallelizeMergePrepare(data_vec, thread_pool); + } + else + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "parallelizeMergePrepare() is only implemented when is_parallelize_merge_prepare_needed is true for {} ", getName()); + } + } + void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override { this->data(place).set.merge(this->data(rhs).set); diff --git a/src/AggregateFunctions/IAggregateFunction.h b/src/AggregateFunctions/IAggregateFunction.h index df08b6f2109..b460a66ea22 100644 --- a/src/AggregateFunctions/IAggregateFunction.h +++ b/src/AggregateFunctions/IAggregateFunction.h @@ -47,6 +47,7 @@ using DataTypePtr = std::shared_ptr; using DataTypes = std::vector; using AggregateDataPtr = char *; +using AggregateDataPtrs = std::vector; using ConstAggregateDataPtr = const char *; class IAggregateFunction; @@ -148,6 +149,13 @@ public: /// Default values must be a the 0-th positions in columns. virtual void addManyDefaults(AggregateDataPtr __restrict place, const IColumn ** columns, size_t length, Arena * arena) const = 0; + virtual bool isParallelizeMergePrepareNeeded() const { return false; } + + virtual void parallelizeMergePrepare(AggregateDataPtrs & /*places*/, ThreadPool & /*thread_pool*/) const + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "parallelizeMergePrepare() with thread pool parameter isn't implemented for {} ", getName()); + } + /// Merges state (on which place points to) with other state of current aggregation function. virtual void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const = 0; diff --git a/src/AggregateFunctions/UniqExactSet.h b/src/AggregateFunctions/UniqExactSet.h index 90cfe700179..0d99b29686f 100644 --- a/src/AggregateFunctions/UniqExactSet.h +++ b/src/AggregateFunctions/UniqExactSet.h @@ -28,6 +28,57 @@ public: asTwoLevel().insert(std::forward(arg)); } + /// In merge, if one of the lhs and rhs is twolevelset and the other is singlelevelset, then the singlelevelset will need to convertToTwoLevel(). + /// It's not in parallel and will cost extra large time if the thread_num is large. + /// This method will convert all the SingleLevelSet to TwoLevelSet in parallel if the hashsets are not all singlelevel or not all twolevel. + static void parallelizeMergePrepare(const std::vector & data_vec, ThreadPool & thread_pool) + { + unsigned long single_level_set_num = 0; + + for (auto ele : data_vec) + { + if (ele->isSingleLevel()) + single_level_set_num ++; + } + + if (single_level_set_num > 0 && single_level_set_num < data_vec.size()) + { + try + { + auto data_vec_atomic_index = std::make_shared(0); + auto thread_func = [data_vec, data_vec_atomic_index, thread_group = CurrentThread::getGroup()]() + { + SCOPE_EXIT_SAFE( + if (thread_group) + CurrentThread::detachFromGroupIfNotDetached(); + ); + if (thread_group) + CurrentThread::attachToGroupIfDetached(thread_group); + + setThreadName("UniqExaConvert"); + + while (true) + { + const auto i = data_vec_atomic_index->fetch_add(1); + if (i >= data_vec.size()) + return; + if (data_vec[i]->isSingleLevel()) + data_vec[i]->convertToTwoLevel(); + } + }; + for (size_t i = 0; i < std::min(thread_pool.getMaxThreads(), single_level_set_num); ++i) + thread_pool.scheduleOrThrowOnError(thread_func); + + thread_pool.wait(); + } + catch (...) + { + thread_pool.wait(); + throw; + } + } + } + auto merge(const UniqExactSet & other, ThreadPool * thread_pool = nullptr) { if (isSingleLevel() && other.isTwoLevel()) diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp index 36cd32910b5..c2914c938b5 100644 --- a/src/Interpreters/Aggregator.cpp +++ b/src/Interpreters/Aggregator.cpp @@ -2603,6 +2603,20 @@ void NO_INLINE Aggregator::mergeWithoutKeyDataImpl( AggregatedDataVariantsPtr & res = non_empty_data[0]; + for (size_t i = 0; i < params.aggregates_size; ++i) + { + if (aggregate_functions[i]->isParallelizeMergePrepareNeeded()) + { + size_t size = non_empty_data.size(); + std::vector data_vec; + + for (size_t result_num = 0; result_num < size; ++result_num) + data_vec.emplace_back(non_empty_data[result_num]->without_key + offsets_of_aggregate_states[i]); + + aggregate_functions[i]->parallelizeMergePrepare(data_vec, thread_pool); + } + } + /// We merge all aggregation results to the first. for (size_t result_num = 1, size = non_empty_data.size(); result_num < size; ++result_num) { diff --git a/tests/performance/prepare_hash_before_merge.xml b/tests/performance/prepare_hash_before_merge.xml new file mode 100644 index 00000000000..e99f762927f --- /dev/null +++ b/tests/performance/prepare_hash_before_merge.xml @@ -0,0 +1,4 @@ + + SELECT COUNT(DISTINCT Title) FROM test.hits SETTINGS max_threads = 24 + SELECT COUNT(DISTINCT Referer) FROM test.hits SETTINGS max_threads = 22 + From 671128140dc6672349421c84643076410d46ce0f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 27 Jul 2023 15:34:05 +0200 Subject: [PATCH 439/478] Update autogenerated version to 23.8.1.1 and contributors --- cmake/autogenerated_versions.txt | 10 ++++---- .../StorageSystemContributors.generated.cpp | 24 +++++++++++++++++++ 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index 821b7b46855..9919d018046 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -2,11 +2,11 @@ # NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION, # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes. -SET(VERSION_REVISION 54476) +SET(VERSION_REVISION 54477) SET(VERSION_MAJOR 23) -SET(VERSION_MINOR 7) +SET(VERSION_MINOR 8) SET(VERSION_PATCH 1) -SET(VERSION_GITHASH d1c7e13d08868cb04d3562dcced704dd577cb1df) -SET(VERSION_DESCRIBE v23.7.1.1-testing) -SET(VERSION_STRING 23.7.1.1) +SET(VERSION_GITHASH a70127baecc451f1f7073bad7b6198f6703441d8) +SET(VERSION_DESCRIBE v23.8.1.1-testing) +SET(VERSION_STRING 23.8.1.1) # end of autochange diff --git a/src/Storages/System/StorageSystemContributors.generated.cpp b/src/Storages/System/StorageSystemContributors.generated.cpp index f84c554afc0..031c7454ab6 100644 --- a/src/Storages/System/StorageSystemContributors.generated.cpp +++ b/src/Storages/System/StorageSystemContributors.generated.cpp @@ -42,10 +42,12 @@ const char * auto_contributors[] { "Alex", "Alex Bocharov", "Alex Cao", + "Alex Cheng", "Alex Karo", "Alex Krash", "Alex Ryndin", "Alex Zatelepin", + "AlexBykovski", "Alexander Avdonkin", "Alexander Bezpiatov", "Alexander Burmak", @@ -232,6 +234,7 @@ const char * auto_contributors[] { "CheSema", "Chebarykov Pavel", "Chen Yufei", + "Chen768959", "Cheng Pan", "Chienlung Cheung", "Christian", @@ -485,6 +488,7 @@ const char * auto_contributors[] { "John", "John Hummel", "John Skopis", + "John Spurlock", "Jonatas Freitas", "Jonathan-Ackerman", "Jordi", @@ -659,6 +663,7 @@ const char * auto_contributors[] { "Mikhail Gaidamaka", "Mikhail Guzov", "Mikhail Korotov", + "Mikhail Koviazin", "Mikhail Malafeev", "Mikhail Nacharov", "Mikhail Salosin", @@ -815,6 +820,7 @@ const char * auto_contributors[] { "Roman Vasin", "Roman Vlasenko", "Roman Zhukov", + "Rory Crispin", "Roy Bellingan", "Ruslan", "Ruslan Savchenko", @@ -832,7 +838,9 @@ const char * auto_contributors[] { "Salvatore Mesoraca", "Sami Kerola", "Samuel Chou", + "Samuel Colvin", "San", + "Sanjam Panda", "Saulius Valatka", "Sean Haynes", "Sean Lafferty", @@ -883,6 +891,7 @@ const char * auto_contributors[] { "SmitaRKulkarni", "Snow", "Sofia Antipushina", + "Song Liyong", "Sorck", "Stanislav Dobrovolschii", "Stanislav Pavlovichev", @@ -893,6 +902,7 @@ const char * auto_contributors[] { "Stepan Herold", "Stephan", "Steve-金勇", + "StianBerger", "Stig Bakken", "Storozhuk Kostiantyn", "Stupnikov Andrey", @@ -977,6 +987,7 @@ const char * auto_contributors[] { "Vitaliy Karnienko", "Vitaliy Kozlovskiy", "Vitaliy Lyudvichenko", + "Vitaliy Pashkov", "Vitaliy Zakaznikov", "Vitaly", "Vitaly Artemyev", @@ -1029,6 +1040,7 @@ const char * auto_contributors[] { "Yakov Olkhovskiy", "YalalovSM", "Yangkuan Liu", + "Yarik Briukhovetskyi", "Yatian Xu", "Yatsishin Ilya", "Yağızcan Değirmenci", @@ -1053,6 +1065,7 @@ const char * auto_contributors[] { "Yury Karpovich", "Yury Stankevich", "Yusuke Tanaka", + "Zach Naimon", "ZhiYong Wang", "Zhichang Yu", "Zhichun Wu", @@ -1143,6 +1156,7 @@ const char * auto_contributors[] { "changvvb", "chasingegg", "chen", + "chen768959", "chen9t", "chengy8934", "chenjian", @@ -1179,6 +1193,7 @@ const char * auto_contributors[] { "detailyang", "dfenelonov", "dgrr", + "dheerajathrey", "dimarub2000", "dinosaur", "divanorama", @@ -1329,6 +1344,7 @@ const char * auto_contributors[] { "lanfz", "larryluogit", "laurieliyang", + "lcjh", "lehasm", "leosunli", "leozhang", @@ -1455,6 +1471,7 @@ const char * auto_contributors[] { "pawelsz-rb", "pdai", "pdv-ru", + "pedro.riera", "pengxiangcai", "peshkurov", "peter279k", @@ -1548,8 +1565,10 @@ const char * auto_contributors[] { "teng.ma", "terrylin", "tesw yew isal", + "therealnick233", "tianzhou", "tiger.yan", + "timfursov", "tison", "topvisor", "tpanetti", @@ -1563,6 +1582,7 @@ const char * auto_contributors[] { "usurai", "vahid-sohrabloo", "vdimir", + "velavokr", "velom", "vesslanjin", "vgocoder", @@ -1587,17 +1607,21 @@ const char * auto_contributors[] { "wuxiaobai24", "wzl", "xPoSx", + "xiao", + "xiaolei565", "xiedeyantu", "xieyichen", "xinhuitian", "xlwh", "xmy", + "xuelei", "yakkomajuri", "yakov-olkhovskiy", "yandd", "yang", "yangshuai", "yaqi-zhao", + "yariks5s", "yeer", "ygrek", "yhgcn", From 28c49e3f20923917c46872b15dce21a72ab47b4a Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 27 Jul 2023 13:47:11 +0000 Subject: [PATCH 440/478] Update version_date.tsv and changelogs after v23.7.1.2470-stable --- SECURITY.md | 3 +- docker/keeper/Dockerfile | 2 +- docker/server/Dockerfile.alpine | 2 +- docker/server/Dockerfile.ubuntu | 2 +- docs/changelogs/v23.7.1.2470-stable.md | 452 +++++++++++++++++++++++++ utils/list-versions/version_date.tsv | 1 + 6 files changed, 458 insertions(+), 4 deletions(-) create mode 100644 docs/changelogs/v23.7.1.2470-stable.md diff --git a/SECURITY.md b/SECURITY.md index 4ba5f13d09c..d61533b44b9 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -13,9 +13,10 @@ The following versions of ClickHouse server are currently being supported with s | Version | Supported | |:-|:-| +| 23.7 | ✔️ | | 23.6 | ✔️ | | 23.5 | ✔️ | -| 23.4 | ✔️ | +| 23.4 | ❌ | | 23.3 | ✔️ | | 23.2 | ❌ | | 23.1 | ❌ | diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile index 8a6324aef88..c9800e4e66d 100644 --- a/docker/keeper/Dockerfile +++ b/docker/keeper/Dockerfile @@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \ esac ARG REPOSITORY="https://s3.amazonaws.com/clickhouse-builds/22.4/31c367d3cd3aefd316778601ff6565119fe36682/package_release" -ARG VERSION="23.6.2.18" +ARG VERSION="23.7.1.2470" ARG PACKAGES="clickhouse-keeper" # user/group precreated explicitly with fixed uid/gid on purpose. diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index 7f453627601..f558338b23c 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -33,7 +33,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="23.6.2.18" +ARG VERSION="23.7.1.2470" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # user/group precreated explicitly with fixed uid/gid on purpose. diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index 1fa7b83ae16..156de034a7f 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -23,7 +23,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list ARG REPO_CHANNEL="stable" ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" -ARG VERSION="23.6.2.18" +ARG VERSION="23.7.1.2470" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # set non-empty deb_location_url url to create a docker image diff --git a/docs/changelogs/v23.7.1.2470-stable.md b/docs/changelogs/v23.7.1.2470-stable.md new file mode 100644 index 00000000000..a77078cb653 --- /dev/null +++ b/docs/changelogs/v23.7.1.2470-stable.md @@ -0,0 +1,452 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.7.1.2470-stable (a70127baecc) FIXME as compared to v23.6.1.1524-stable (d1c7e13d088) + +#### Backward Incompatible Change +* Add ` NAMED COLLECTION` access type (aliases `USE NAMED COLLECTION`, `NAMED COLLECTION USAGE`). This PR is backward incompatible because this access type is disabled by default (because a parent access type `NAMED COLLECTION ADMIN` is disabled by default as well). Proposed in [#50277](https://github.com/ClickHouse/ClickHouse/issues/50277). To grant use `GRANT NAMED COLLECTION ON collection_name TO user` or `GRANT NAMED COLLECTION ON * TO user`, to be able to give these grants `named_collection_admin` is required in config (previously it was named `named_collection_control`, so will remain as an alias). [#50625](https://github.com/ClickHouse/ClickHouse/pull/50625) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fixing a typo in the `system.parts` column name `last_removal_attemp_time`. Now it is named `last_removal_attempt_time`. [#52104](https://github.com/ClickHouse/ClickHouse/pull/52104) ([filimonov](https://github.com/filimonov)). +* Bump version of the distributed_ddl_entry_format_version to 5 by default (enables opentelemetry and initial_query_idd pass through). This will not allow to process existing entries for distributed DDL after **downgrade** (but note, that usually there should be no such unprocessed entries). [#52128](https://github.com/ClickHouse/ClickHouse/pull/52128) ([Azat Khuzhin](https://github.com/azat)). +* Check projection metadata the same way we check ordinary metadata. This change may prevent the server from starting in case there was a table with an invalid projection. An example is a projection that created positional columns in PK (e.g. `projection p (select * order by 1, 4)` which is not allowed in table PK and can cause a crash during insert/merge). Drop such projections before the update. Fixes [#52353](https://github.com/ClickHouse/ClickHouse/issues/52353). [#52361](https://github.com/ClickHouse/ClickHouse/pull/52361) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* The experimental feature `hashid` is removed due to a bug. The quality of implementation was questionable at the start, and it didn't get through the experimental status. This closes [#52406](https://github.com/ClickHouse/ClickHouse/issues/52406). [#52449](https://github.com/ClickHouse/ClickHouse/pull/52449) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The function `toDecimalString` is removed due to subpar implementation quality. This closes [#52407](https://github.com/ClickHouse/ClickHouse/issues/52407). [#52450](https://github.com/ClickHouse/ClickHouse/pull/52450) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### New Feature +* Implement KQL-style formatting for Interval. [#45671](https://github.com/ClickHouse/ClickHouse/pull/45671) ([ltrk2](https://github.com/ltrk2)). +* Support ZooKeeper `reconfig` command for CH Keeper with incremental reconfiguration which can be enabled via `keeper_server.enable_reconfiguration` setting. Support adding servers, removing servers, and changing server priorities. [#49450](https://github.com/ClickHouse/ClickHouse/pull/49450) ([Mike Kot](https://github.com/myrrc)). +* Kafka connector can fetch avro schema from schema registry with basic authentication using url-encoded credentials. [#49664](https://github.com/ClickHouse/ClickHouse/pull/49664) ([Ilya Golshtein](https://github.com/ilejn)). +* Add function `arrayJaccardIndex` which computes the Jaccard similarity between two arrays. [#50076](https://github.com/ClickHouse/ClickHouse/pull/50076) ([FFFFFFFHHHHHHH](https://github.com/FFFFFFFHHHHHHH)). +* Added support for prql as a query language. [#50686](https://github.com/ClickHouse/ClickHouse/pull/50686) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Add a column is_obsolete to system.settings and similar tables. Closes [#50819](https://github.com/ClickHouse/ClickHouse/issues/50819). [#50826](https://github.com/ClickHouse/ClickHouse/pull/50826) ([flynn](https://github.com/ucasfl)). +* Implement support of encrypted elements in configuration file Added possibility to use encrypted text in leaf elements of configuration file. The text is encrypted using encryption codecs from section. [#50986](https://github.com/ClickHouse/ClickHouse/pull/50986) ([Roman Vasin](https://github.com/rvasin)). +* Just a new request of [#49483](https://github.com/ClickHouse/ClickHouse/issues/49483). [#51013](https://github.com/ClickHouse/ClickHouse/pull/51013) ([lgbo](https://github.com/lgbo-ustc)). +* Add SYSTEM STOP LISTEN query. Closes [#47972](https://github.com/ClickHouse/ClickHouse/issues/47972). [#51016](https://github.com/ClickHouse/ClickHouse/pull/51016) ([Nikolay Degterinsky](https://github.com/evillique)). +* Add input_format_csv_allow_variable_number_of_columns options. [#51273](https://github.com/ClickHouse/ClickHouse/pull/51273) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Another boring feature: add function substring_index, as in spark or mysql. [#51472](https://github.com/ClickHouse/ClickHouse/pull/51472) ([李扬](https://github.com/taiyang-li)). +* Show stats for jemalloc bins. Example ``` SELECT *, size * (nmalloc - ndalloc) AS allocated_bytes FROM system.jemalloc_bins WHERE allocated_bytes > 0 ORDER BY allocated_bytes DESC LIMIT 10. [#51674](https://github.com/ClickHouse/ClickHouse/pull/51674) ([Alexander Gololobov](https://github.com/davenger)). +* Add RowBinaryWithDefaults format with extra byte before each column for using column default value. Closes [#50854](https://github.com/ClickHouse/ClickHouse/issues/50854). [#51695](https://github.com/ClickHouse/ClickHouse/pull/51695) ([Kruglov Pavel](https://github.com/Avogar)). +* Added `default_temporary_table_engine` setting. Same as `default_table_engine` but for temporary tables. [#51292](https://github.com/ClickHouse/ClickHouse/issues/51292). [#51708](https://github.com/ClickHouse/ClickHouse/pull/51708) ([velavokr](https://github.com/velavokr)). +* Added new initcap / initcapUTF8 functions which convert the first letter of each word to upper case and the rest to lower case. [#51735](https://github.com/ClickHouse/ClickHouse/pull/51735) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Create table now supports `PRIMARY KEY` syntax in column definition. Columns are added to primary index in the same order columns are defined. [#51881](https://github.com/ClickHouse/ClickHouse/pull/51881) ([Ilya Yatsishin](https://github.com/qoega)). +* Added the possibility to use date and time format specifiers in log and error log file names, either in config files (`log` and `errorlog` tags) or command line arguments (`--log-file` and `--errorlog-file`). [#51945](https://github.com/ClickHouse/ClickHouse/pull/51945) ([Victor Krasnov](https://github.com/sirvickr)). +* Added Peak Memory Usage (for query) to client final statistics, and to http header. [#51946](https://github.com/ClickHouse/ClickHouse/pull/51946) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Added new hasSubsequence() (+CaseInsensitive + UTF8 versions) functions. [#52050](https://github.com/ClickHouse/ClickHouse/pull/52050) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Add `array_agg` as alias of `groupArray` for PostgreSQL compatibility. Closes [#52100](https://github.com/ClickHouse/ClickHouse/issues/52100). ### Documentation entry for user-facing changes. [#52135](https://github.com/ClickHouse/ClickHouse/pull/52135) ([flynn](https://github.com/ucasfl)). +* Add `any_value` as a compatibility alias for `any` aggregate function. Closes [#52140](https://github.com/ClickHouse/ClickHouse/issues/52140). [#52147](https://github.com/ClickHouse/ClickHouse/pull/52147) ([flynn](https://github.com/ucasfl)). +* Add aggregate function `array_concat_agg` for compatibility with BigQuery, it's alias of `groupArrayArray`. Closes [#52139](https://github.com/ClickHouse/ClickHouse/issues/52139). [#52149](https://github.com/ClickHouse/ClickHouse/pull/52149) ([flynn](https://github.com/ucasfl)). +* Add `OCTET_LENGTH` as an alias to `length`. Closes [#52153](https://github.com/ClickHouse/ClickHouse/issues/52153). [#52176](https://github.com/ClickHouse/ClickHouse/pull/52176) ([FFFFFFFHHHHHHH](https://github.com/FFFFFFFHHHHHHH)). +* Re-add SipHash keyed functions. [#52206](https://github.com/ClickHouse/ClickHouse/pull/52206) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* Added `firstLine` function to extract the first line from the multi-line string. This closes [#51172](https://github.com/ClickHouse/ClickHouse/issues/51172). [#52209](https://github.com/ClickHouse/ClickHouse/pull/52209) ([Mikhail Koviazin](https://github.com/mkmkme)). + +#### Performance Improvement +* Enable `move_all_conditions_to_prewhere` and `enable_multiple_prewhere_read_steps` settings by default. [#46365](https://github.com/ClickHouse/ClickHouse/pull/46365) ([Alexander Gololobov](https://github.com/davenger)). +* Improves performance of some queries by tuning allocator. [#46416](https://github.com/ClickHouse/ClickHouse/pull/46416) ([Azat Khuzhin](https://github.com/azat)). +* Writing parquet files is 10x faster, it's multi-threaded now. Almost the same speed as reading. [#49367](https://github.com/ClickHouse/ClickHouse/pull/49367) ([Michael Kolupaev](https://github.com/al13n321)). +* Enable automatic selection of the sparse serialization format by default. It improves performance. The format is supported since version 22.1. After this change, downgrading to versions older than 22.1 might not be possible. You can turn off the usage of the sparse serialization format by providing the `ratio_of_defaults_for_sparse_serialization = 1` setting for your MergeTree tables. [#49631](https://github.com/ClickHouse/ClickHouse/pull/49631) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Now we use fixed-size tasks in `MergeTreePrefetchedReadPool` as in `MergeTreeReadPool`. Also from now we use connection pool for S3 requests. [#49732](https://github.com/ClickHouse/ClickHouse/pull/49732) ([Nikita Taranov](https://github.com/nickitat)). +* More pushdown to the right side of join. [#50532](https://github.com/ClickHouse/ClickHouse/pull/50532) ([Nikita Taranov](https://github.com/nickitat)). +* Improve grace_hash join by reserving hash table's size (resubmit). [#50875](https://github.com/ClickHouse/ClickHouse/pull/50875) ([lgbo](https://github.com/lgbo-ustc)). +* Waiting on lock in `OpenedFileCache` could be noticeable sometimes. We sharded it into multiple sub-maps (each with its own lock) to avoid contention. [#51341](https://github.com/ClickHouse/ClickHouse/pull/51341) ([Nikita Taranov](https://github.com/nickitat)). +* Remove duplicate condition in functionunixtimestamp64.h. [#51857](https://github.com/ClickHouse/ClickHouse/pull/51857) ([lcjh](https://github.com/ljhcage)). +* The idea is that conditions with PK columns are likely to be used in PK analysis and will not contribute much more to PREWHERE filtering. [#51958](https://github.com/ClickHouse/ClickHouse/pull/51958) ([Alexander Gololobov](https://github.com/davenger)). +* 1. Add rewriter for both old and new analyzer. 2. Add settings `optimize_uniq_to_count` which default is 0. [#52004](https://github.com/ClickHouse/ClickHouse/pull/52004) ([JackyWoo](https://github.com/JackyWoo)). +* The performance experiments of **OnTime** on the ICX device (Intel Xeon Platinum 8380 CPU, 80 cores, 160 threads) show that this change could bring an improvement of **11.6%** to the QPS of the query **Q8** while having no impact on others. [#52036](https://github.com/ClickHouse/ClickHouse/pull/52036) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). +* Enable `allow_vertical_merges_from_compact_to_wide_parts` by default. It will save memory usage during merges. [#52295](https://github.com/ClickHouse/ClickHouse/pull/52295) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix incorrect projection analysis which invalidates primary keys. This issue only exists when `query_plan_optimize_primary_key = 1, query_plan_optimize_projection = 1` . This fixes [#48823](https://github.com/ClickHouse/ClickHouse/issues/48823) . This fixes [#51173](https://github.com/ClickHouse/ClickHouse/issues/51173) . [#52308](https://github.com/ClickHouse/ClickHouse/pull/52308) ([Amos Bird](https://github.com/amosbird)). +* Reduce the number of syscalls in FileCache::loadMetadata. [#52435](https://github.com/ClickHouse/ClickHouse/pull/52435) ([Raúl Marín](https://github.com/Algunenano)). + +#### Improvement +* Added query `SYSTEM FLUSH ASYNC INSERT QUEUE` which flushes all pending asynchronous inserts to the destination tables. Added a server-side setting `async_insert_queue_flush_on_shutdown` (`true` by default) which determines whether to flush queue of asynchronous inserts on graceful shutdown. Setting `async_insert_threads` is now a server-side setting. [#49160](https://github.com/ClickHouse/ClickHouse/pull/49160) ([Anton Popov](https://github.com/CurtizJ)). +* Don't show messages about `16 EiB` free space in logs, as they don't make sense. This closes [#49320](https://github.com/ClickHouse/ClickHouse/issues/49320). [#49342](https://github.com/ClickHouse/ClickHouse/pull/49342) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Properly check the limit for the `sleepEachRow` function. Add a setting `function_sleep_max_microseconds_per_block`. This is needed for generic query fuzzer. [#49343](https://github.com/ClickHouse/ClickHouse/pull/49343) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix two issues: ``` select geohashEncode(120.2, number::Float64) from numbers(10);. [#50066](https://github.com/ClickHouse/ClickHouse/pull/50066) ([李扬](https://github.com/taiyang-li)). +* Add support for external disks in Keeper for storing snapshots and logs. [#50098](https://github.com/ClickHouse/ClickHouse/pull/50098) ([Antonio Andelic](https://github.com/antonio2368)). +* Add support for multi-directory selection (`{}`) globs. [#50559](https://github.com/ClickHouse/ClickHouse/pull/50559) ([Andrey Zvonov](https://github.com/zvonand)). +* Allow to have strict lower boundary for file segment size by downloading remaining data in the background. Minimum size of file segment (if actual file size is bigger) is configured as cache configuration setting `boundary_alignment`, by default `4Mi`. Number of background threads are configured as cache configuration setting `background_download_threads`, by default `2`. Also `max_file_segment_size` was increased from `8Mi` to `32Mi` in this PR. [#51000](https://github.com/ClickHouse/ClickHouse/pull/51000) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Allow filtering HTTP headers with `http_forbid_headers` section in config. Both exact matching and regexp filters are available. [#51038](https://github.com/ClickHouse/ClickHouse/pull/51038) ([Nikolay Degterinsky](https://github.com/evillique)). +* #50727 new alias for function current_database and added new function current_schemas. [#51076](https://github.com/ClickHouse/ClickHouse/pull/51076) ([Pedro Riera](https://github.com/priera)). +* Log async insert flush queries into to system.query_log. [#51160](https://github.com/ClickHouse/ClickHouse/pull/51160) ([Raúl Marín](https://github.com/Algunenano)). +* Decreased default timeouts for S3 from 30 seconds to 3 seconds, and for other HTTP from 180 seconds to 30 seconds. [#51171](https://github.com/ClickHouse/ClickHouse/pull/51171) ([Michael Kolupaev](https://github.com/al13n321)). +* Use read_bytes/total_bytes_to_read for progress bar in s3/file/url/... table functions for better progress indication. [#51286](https://github.com/ClickHouse/ClickHouse/pull/51286) ([Kruglov Pavel](https://github.com/Avogar)). +* Functions "date_diff() and age()" now support millisecond/microsecond unit and work with microsecond precision. [#51291](https://github.com/ClickHouse/ClickHouse/pull/51291) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Allow SQL standard `FETCH` without `OFFSET`. See https://antonz.org/sql-fetch/. [#51293](https://github.com/ClickHouse/ClickHouse/pull/51293) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improve parsing of path in clickhouse-keeper-client. [#51359](https://github.com/ClickHouse/ClickHouse/pull/51359) ([Azat Khuzhin](https://github.com/azat)). +* A third-party product depending on ClickHouse (Gluten: Plugin to Double SparkSQL's Performance) had a bug. This fix avoids heap overflow in that third-party product while reading from HDFS. [#51386](https://github.com/ClickHouse/ClickHouse/pull/51386) ([李扬](https://github.com/taiyang-li)). +* Fix checking error caused by uninitialized class members. [#51418](https://github.com/ClickHouse/ClickHouse/pull/51418) ([李扬](https://github.com/taiyang-li)). +* Add ability to disable native copy for S3 (setting for BACKUP/RESTORE `allow_s3_native_copy`, and `s3_allow_native_copy` for `s3`/`s3_plain` disks). [#51448](https://github.com/ClickHouse/ClickHouse/pull/51448) ([Azat Khuzhin](https://github.com/azat)). +* Add column `primary_key_size` to `system.parts` table to show compressed primary key size on disk. Closes [#51400](https://github.com/ClickHouse/ClickHouse/issues/51400). [#51496](https://github.com/ClickHouse/ClickHouse/pull/51496) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Allow running `clickhouse-local` without procfs, without home directory existing, and without name resolution plugins from glibc. [#51518](https://github.com/ClickHouse/ClickHouse/pull/51518) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Correcting the message of modify storage policy https://github.com/clickhouse/clickhouse/issues/51516 ### documentation entry for user-facing changes. [#51519](https://github.com/ClickHouse/ClickHouse/pull/51519) ([xiaolei565](https://github.com/xiaolei565)). +* Support `DROP FILESYSTEM CACHE KEY [ OFFSET ]`. [#51547](https://github.com/ClickHouse/ClickHouse/pull/51547) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Allow to add disk name for custom disks. Previously custom disks would use an internal generated disk name. Now it will be possible with `disk = disk_(...)` (e.g. disk will have name `name`) . [#51552](https://github.com/ClickHouse/ClickHouse/pull/51552) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add placeholder `%a` for rull filename in rename_files_after_processing setting. [#51603](https://github.com/ClickHouse/ClickHouse/pull/51603) ([Kruglov Pavel](https://github.com/Avogar)). +* Add column modification time into system.parts_columns. [#51685](https://github.com/ClickHouse/ClickHouse/pull/51685) ([Azat Khuzhin](https://github.com/azat)). +* Add new setting `input_format_csv_use_default_on_bad_values` to CSV format that allows to insert default value when parsing of a single field failed. [#51716](https://github.com/ClickHouse/ClickHouse/pull/51716) ([KevinyhZou](https://github.com/KevinyhZou)). +* Added a crash log flush to the disk after the unexpected crash. [#51720](https://github.com/ClickHouse/ClickHouse/pull/51720) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Fix behavior in dashboard page where errors unrelated to authentication are not shown. Also fix 'overlapping' chart behavior. [#51744](https://github.com/ClickHouse/ClickHouse/pull/51744) ([Zach Naimon](https://github.com/ArctypeZach)). +* Allow UUID to UInt128 conversion. [#51765](https://github.com/ClickHouse/ClickHouse/pull/51765) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Added support for function range of Nullable arguments. [#51767](https://github.com/ClickHouse/ClickHouse/pull/51767) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Convert condition like `toyear(x) = c` to `c1 <= x < c2`. [#51795](https://github.com/ClickHouse/ClickHouse/pull/51795) ([Han Fei](https://github.com/hanfei1991)). +* Improve MySQL compatibility of statement SHOW INDEX. [#51796](https://github.com/ClickHouse/ClickHouse/pull/51796) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix `use_structure_from_insertion_table_in_table_functions` does not work with `MATERIALIZED` and `ALIAS` columns. Closes [#51817](https://github.com/ClickHouse/ClickHouse/issues/51817). Closes [#51019](https://github.com/ClickHouse/ClickHouse/issues/51019). [#51825](https://github.com/ClickHouse/ClickHouse/pull/51825) ([flynn](https://github.com/ucasfl)). +* Introduce a table setting `wait_for_unique_parts_send_before_shutdown_ms` which specify the amount of time replica will wait before closing interserver handler for replicated sends. Also fix inconsistency with shutdown of tables and interserver handlers: now server shutdown tables first and only after it shut down interserver handlers. [#51851](https://github.com/ClickHouse/ClickHouse/pull/51851) ([alesapin](https://github.com/alesapin)). +* CacheDictionary request only unique keys from source. Closes [#51762](https://github.com/ClickHouse/ClickHouse/issues/51762). [#51853](https://github.com/ClickHouse/ClickHouse/pull/51853) ([Maksim Kita](https://github.com/kitaisreal)). +* Fixed settings not applied for explain query when format provided. [#51859](https://github.com/ClickHouse/ClickHouse/pull/51859) ([Nikita Taranov](https://github.com/nickitat)). +* Allow SETTINGS before FORMAT in DESCRIBE TABLE query for compatibility with SELECT query. Closes [#51544](https://github.com/ClickHouse/ClickHouse/issues/51544). [#51899](https://github.com/ClickHouse/ClickHouse/pull/51899) ([Nikolay Degterinsky](https://github.com/evillique)). +* Var-int encoded integers (e.g. used by the native protocol) can now use the full 64-bit range. 3rd party clients are advised to update their var-int code accordingly. [#51905](https://github.com/ClickHouse/ClickHouse/pull/51905) ([Robert Schulze](https://github.com/rschu1ze)). +* Update certificates when they change without the need to manually SYSTEM RELOAD CONFIG. [#52030](https://github.com/ClickHouse/ClickHouse/pull/52030) ([Mike Kot](https://github.com/myrrc)). +* Added `allow_create_index_without_type` setting that allow to ignore `ADD INDEX` queries without specified `TYPE`. Standard SQL queries will just succeed without changing table schema. [#52056](https://github.com/ClickHouse/ClickHouse/pull/52056) ([Ilya Yatsishin](https://github.com/qoega)). +* Fixed crash when mysqlxx::Pool::Entry is used after it was disconnected. [#52063](https://github.com/ClickHouse/ClickHouse/pull/52063) ([Val Doroshchuk](https://github.com/valbok)). +* CREATE TABLE ... AS SELECT .. is now supported in MaterializedMySQL. [#52067](https://github.com/ClickHouse/ClickHouse/pull/52067) ([Val Doroshchuk](https://github.com/valbok)). +* Introduced automatic conversion of text types to utf8 for MaterializedMySQL. [#52084](https://github.com/ClickHouse/ClickHouse/pull/52084) ([Val Doroshchuk](https://github.com/valbok)). +* Add alias for functions `today` (now available under the `curdate`/`current_date` names) and `now` (`current_timestamp`). [#52106](https://github.com/ClickHouse/ClickHouse/pull/52106) ([Lloyd-Pottiger](https://github.com/Lloyd-Pottiger)). +* Log messages are written to text_log from the beginning. [#52113](https://github.com/ClickHouse/ClickHouse/pull/52113) ([Dmitry Kardymon](https://github.com/kardymonds)). +* In cases where the HTTP endpoint has multiple IP addresses and the first of them is unreachable, a timeout exception will be thrown. Made session creation with handling all resolved endpoints. [#52116](https://github.com/ClickHouse/ClickHouse/pull/52116) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Support async_deduplication_token for async insert. [#52136](https://github.com/ClickHouse/ClickHouse/pull/52136) ([Han Fei](https://github.com/hanfei1991)). +* Avro input format support Union with single type. Closes [#52131](https://github.com/ClickHouse/ClickHouse/issues/52131). [#52137](https://github.com/ClickHouse/ClickHouse/pull/52137) ([flynn](https://github.com/ucasfl)). +* Add setting `optimize_use_implicit_projections` to disable implicit projections (currently only `min_max_count` projection). This is defaulted to false until [#52075](https://github.com/ClickHouse/ClickHouse/issues/52075) is fixed. [#52152](https://github.com/ClickHouse/ClickHouse/pull/52152) ([Amos Bird](https://github.com/amosbird)). +* It was possible to use the function `hasToken` for infinite loop. Now this possibility is removed. This closes [#52156](https://github.com/ClickHouse/ClickHouse/issues/52156). [#52160](https://github.com/ClickHouse/ClickHouse/pull/52160) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* 1. Upgrade Intel QPL from v1.1.0 to v1.2.0 2. Upgrade Intel accel-config from v3.5 to v4.0 3. Fixed issue that Device IOTLB miss has big perf. impact for IAA accelerators. [#52180](https://github.com/ClickHouse/ClickHouse/pull/52180) ([jasperzhu](https://github.com/jinjunzh)). +* Functions "date_diff() and age()" now support millisecond/microsecond unit and work with microsecond precision. [#52181](https://github.com/ClickHouse/ClickHouse/pull/52181) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Create ZK ancestors optimistically. [#52195](https://github.com/ClickHouse/ClickHouse/pull/52195) ([Raúl Marín](https://github.com/Algunenano)). +* Fix [#50582](https://github.com/ClickHouse/ClickHouse/issues/50582). Avoid the `Not found column ... in block` error in some cases of reading in-order and constants. [#52259](https://github.com/ClickHouse/ClickHouse/pull/52259) ([Chen768959](https://github.com/Chen768959)). +* Check whether S2 geo primitives are invalid as early as possible on ClickHouse side. This closes: [#27090](https://github.com/ClickHouse/ClickHouse/issues/27090). [#52260](https://github.com/ClickHouse/ClickHouse/pull/52260) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Now unquoted utf-8 strings are supported in DDL for MaterializedMySQL. [#52318](https://github.com/ClickHouse/ClickHouse/pull/52318) ([Val Doroshchuk](https://github.com/valbok)). +* Add back missing projection QueryAccessInfo when `query_plan_optimize_projection = 1`. This fixes [#50183](https://github.com/ClickHouse/ClickHouse/issues/50183) . This fixes [#50093](https://github.com/ClickHouse/ClickHouse/issues/50093) . [#52327](https://github.com/ClickHouse/ClickHouse/pull/52327) ([Amos Bird](https://github.com/amosbird)). +* Add new setting `disable_url_encoding` that allows to disable decoding/encoding path in uri in URL engine. [#52337](https://github.com/ClickHouse/ClickHouse/pull/52337) ([Kruglov Pavel](https://github.com/Avogar)). +* When `ZooKeeperRetriesControl` rethrows an error, it's more useful to see its original stack trace, not the one from `ZooKeeperRetriesControl` itself. [#52347](https://github.com/ClickHouse/ClickHouse/pull/52347) ([Vitaly Baranov](https://github.com/vitlibar)). +* Now double quoted comments are supported in MaterializedMySQL. [#52355](https://github.com/ClickHouse/ClickHouse/pull/52355) ([Val Doroshchuk](https://github.com/valbok)). +* Wait for zero copy replication lock even if some disks don't support it. [#52376](https://github.com/ClickHouse/ClickHouse/pull/52376) ([Raúl Marín](https://github.com/Algunenano)). +* Now it's possible to specify min (`memory_profiler_sample_min_allocation_size`) and max (`memory_profiler_sample_max_allocation_size`) size for allocations to be tracked with sampling memory profiler. [#52419](https://github.com/ClickHouse/ClickHouse/pull/52419) ([alesapin](https://github.com/alesapin)). +* The `session_timezone` setting is demoted to experimental. [#52445](https://github.com/ClickHouse/ClickHouse/pull/52445) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Now interserver port will be closed only after tables are shut down. [#52498](https://github.com/ClickHouse/ClickHouse/pull/52498) ([alesapin](https://github.com/alesapin)). +* Added field `refcount` to `system.remote_data_paths` table. [#52518](https://github.com/ClickHouse/ClickHouse/pull/52518) ([Anton Popov](https://github.com/CurtizJ)). +* New setting `merge_tree_determine_task_size_by_prewhere_columns` added. If set to `true` only sizes of the columns from `PREWHERE` section will be considered to determine reading task size. Otherwise all the columns from query are considered. [#52606](https://github.com/ClickHouse/ClickHouse/pull/52606) ([Nikita Taranov](https://github.com/nickitat)). + +#### Build/Testing/Packaging Improvement +* Add experimental ClickHouse builds for Linux RISC-V 64 to CI. [#31398](https://github.com/ClickHouse/ClickHouse/pull/31398) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fixed CRC32(WeakHash32) issue for s390x. [#50365](https://github.com/ClickHouse/ClickHouse/pull/50365) ([Harry Lee](https://github.com/HarryLeeIBM)). +* Add integration test check with the enabled analyzer. [#50926](https://github.com/ClickHouse/ClickHouse/pull/50926) ([Dmitry Novik](https://github.com/novikd)). +* Update cargo dependencies. [#51721](https://github.com/ClickHouse/ClickHouse/pull/51721) ([Raúl Marín](https://github.com/Algunenano)). +* Fixed several issues found by OSS-Fuzz. [#51736](https://github.com/ClickHouse/ClickHouse/pull/51736) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* There were a couple of failures because of (?) S3 availability. The sccache has a feature of failing over to local compilation. [#51893](https://github.com/ClickHouse/ClickHouse/pull/51893) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* 02242_delete_user_race and 02243_drop_user_grant_race tests have been corrected. [#51923](https://github.com/ClickHouse/ClickHouse/pull/51923) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Make the function `CHColumnToArrowColumn::fillArrowArrayWithArrayColumnData` to work with nullable arrays, which are not possible in ClickHouse, but needed for Gluten. [#52112](https://github.com/ClickHouse/ClickHouse/pull/52112) ([李扬](https://github.com/taiyang-li)). +* We've updated the CCTZ library to master, but there are no user-visible changes. [#52124](https://github.com/ClickHouse/ClickHouse/pull/52124) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The `system.licenses` table now includes the hard-forked library Poco. This closes [#52066](https://github.com/ClickHouse/ClickHouse/issues/52066). [#52127](https://github.com/ClickHouse/ClickHouse/pull/52127) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Follow up [#50926](https://github.com/ClickHouse/ClickHouse/issues/50926). Add integration tests check with enabled analyzer to master. [#52210](https://github.com/ClickHouse/ClickHouse/pull/52210) ([Dmitry Novik](https://github.com/novikd)). +* Reproducible builds for Rust. [#52395](https://github.com/ClickHouse/ClickHouse/pull/52395) ([Azat Khuzhin](https://github.com/azat)). +* Improve the startup time of `clickhouse-client` and `clickhouse-local` in debug and sanitizer builds. This closes [#52228](https://github.com/ClickHouse/ClickHouse/issues/52228). [#52489](https://github.com/ClickHouse/ClickHouse/pull/52489) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Check that there are no cases of bad punctuation: whitespace before a comma like `Hello ,world` instead of `Hello, world`. [#52549](https://github.com/ClickHouse/ClickHouse/pull/52549) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix materialised pg syncTables [#49698](https://github.com/ClickHouse/ClickHouse/pull/49698) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix projection with optimize_aggregators_of_group_by_keys [#49709](https://github.com/ClickHouse/ClickHouse/pull/49709) ([Amos Bird](https://github.com/amosbird)). +* Fix optimize_skip_unused_shards with JOINs [#51037](https://github.com/ClickHouse/ClickHouse/pull/51037) ([Azat Khuzhin](https://github.com/azat)). +* Fix formatDateTime() with fractional negative datetime64 [#51290](https://github.com/ClickHouse/ClickHouse/pull/51290) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Functions `hasToken*` were totally wrong. Add a test for [#43358](https://github.com/ClickHouse/ClickHouse/issues/43358) [#51378](https://github.com/ClickHouse/ClickHouse/pull/51378) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix optimization to move functions before sorting. [#51481](https://github.com/ClickHouse/ClickHouse/pull/51481) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix Block structure mismatch in Pipe::unitePipes for FINAL [#51492](https://github.com/ClickHouse/ClickHouse/pull/51492) ([Nikita Taranov](https://github.com/nickitat)). +* Fix SIGSEGV for clusters with zero weight across all shards (fixes INSERT INTO FUNCTION clusterAllReplicas()) [#51545](https://github.com/ClickHouse/ClickHouse/pull/51545) ([Azat Khuzhin](https://github.com/azat)). +* Fix timeout for hedged requests [#51582](https://github.com/ClickHouse/ClickHouse/pull/51582) ([Azat Khuzhin](https://github.com/azat)). +* Fix logical error in ANTI join with NULL [#51601](https://github.com/ClickHouse/ClickHouse/pull/51601) ([vdimir](https://github.com/vdimir)). +* Fix for moving 'IN' conditions to PREWHERE [#51610](https://github.com/ClickHouse/ClickHouse/pull/51610) ([Alexander Gololobov](https://github.com/davenger)). +* Do not apply PredicateExpressionsOptimizer for ASOF/ANTI join [#51633](https://github.com/ClickHouse/ClickHouse/pull/51633) ([vdimir](https://github.com/vdimir)). +* Fix async insert with deduplication for ReplicatedMergeTree using merging algorithms [#51676](https://github.com/ClickHouse/ClickHouse/pull/51676) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix reading from empty column in `parseSipHashKey` [#51804](https://github.com/ClickHouse/ClickHouse/pull/51804) ([Nikita Taranov](https://github.com/nickitat)). +* Fix segfault when create invalid EmbeddedRocksdb table [#51847](https://github.com/ClickHouse/ClickHouse/pull/51847) ([Duc Canh Le](https://github.com/canhld94)). +* Fix inserts into MongoDB tables [#51876](https://github.com/ClickHouse/ClickHouse/pull/51876) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix deadlock on DatabaseCatalog shutdown [#51908](https://github.com/ClickHouse/ClickHouse/pull/51908) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix error in subquery operators [#51922](https://github.com/ClickHouse/ClickHouse/pull/51922) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix async connect to hosts with multiple ips [#51934](https://github.com/ClickHouse/ClickHouse/pull/51934) ([Kruglov Pavel](https://github.com/Avogar)). +* Do not remove inputs after ActionsDAG::merge [#51947](https://github.com/ClickHouse/ClickHouse/pull/51947) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Check refcount in `RemoveManyObjectStorageOperation::finalize` instead of `execute` [#51954](https://github.com/ClickHouse/ClickHouse/pull/51954) ([vdimir](https://github.com/vdimir)). +* Allow parametric UDFs [#51964](https://github.com/ClickHouse/ClickHouse/pull/51964) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Small fix for toDateTime64() for dates after 2283-12-31 [#52130](https://github.com/ClickHouse/ClickHouse/pull/52130) ([Andrey Zvonov](https://github.com/zvonand)). +* Fix ORDER BY tuple of WINDOW functions [#52145](https://github.com/ClickHouse/ClickHouse/pull/52145) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix incorrect projection analysis when aggregation expression contains monotonic functions [#52151](https://github.com/ClickHouse/ClickHouse/pull/52151) ([Amos Bird](https://github.com/amosbird)). +* Fix error in `groupArrayMoving` functions [#52161](https://github.com/ClickHouse/ClickHouse/pull/52161) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Disable direct join for range dictionary [#52187](https://github.com/ClickHouse/ClickHouse/pull/52187) ([Duc Canh Le](https://github.com/canhld94)). +* Fix sticky mutations test (and extremely rare race condition) [#52197](https://github.com/ClickHouse/ClickHouse/pull/52197) ([alesapin](https://github.com/alesapin)). +* Fix race in Web disk [#52211](https://github.com/ClickHouse/ClickHouse/pull/52211) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix data race in Connection::setAsyncCallback on unknown packet from server [#52219](https://github.com/ClickHouse/ClickHouse/pull/52219) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix temp data deletion on startup, add test [#52275](https://github.com/ClickHouse/ClickHouse/pull/52275) ([vdimir](https://github.com/vdimir)). +* Don't use minmax_count projections when counting nullable columns [#52297](https://github.com/ClickHouse/ClickHouse/pull/52297) ([Amos Bird](https://github.com/amosbird)). +* MergeTree/ReplicatedMergeTree should use server timezone for log entries [#52325](https://github.com/ClickHouse/ClickHouse/pull/52325) ([Azat Khuzhin](https://github.com/azat)). +* Fix parameterized view with cte and multiple usage [#52328](https://github.com/ClickHouse/ClickHouse/pull/52328) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Disable expression templates for time intervals [#52335](https://github.com/ClickHouse/ClickHouse/pull/52335) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix `apply_snapshot` in Keeper [#52358](https://github.com/ClickHouse/ClickHouse/pull/52358) ([Antonio Andelic](https://github.com/antonio2368)). +* Update build-osx.md [#52377](https://github.com/ClickHouse/ClickHouse/pull/52377) ([AlexBykovski](https://github.com/AlexBykovski)). +* Fix `countSubstrings()` hang with empty needle and a column haystack [#52409](https://github.com/ClickHouse/ClickHouse/pull/52409) ([Sergei Trifonov](https://github.com/serxa)). +* Fix normal projection with merge table [#52432](https://github.com/ClickHouse/ClickHouse/pull/52432) ([Amos Bird](https://github.com/amosbird)). +* Fix possible double-free in Aggregator [#52439](https://github.com/ClickHouse/ClickHouse/pull/52439) ([Nikita Taranov](https://github.com/nickitat)). +* Fixed inserting into Buffer engine [#52440](https://github.com/ClickHouse/ClickHouse/pull/52440) ([Vasily Nemkov](https://github.com/Enmk)). +* The implementation of AnyHash was non-conformant. [#52448](https://github.com/ClickHouse/ClickHouse/pull/52448) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Check recursion depth in OptimizedRegularExpression [#52451](https://github.com/ClickHouse/ClickHouse/pull/52451) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix data-race DatabaseReplicated::startupTables()/canExecuteReplicatedMetadataAlter() [#52490](https://github.com/ClickHouse/ClickHouse/pull/52490) ([Azat Khuzhin](https://github.com/azat)). +* Fix abort in function `transform` [#52513](https://github.com/ClickHouse/ClickHouse/pull/52513) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix lightweight delete after drop of projection [#52517](https://github.com/ClickHouse/ClickHouse/pull/52517) ([Anton Popov](https://github.com/CurtizJ)). +* Fix possible error "Cannot drain connections: cancel first" [#52585](https://github.com/ClickHouse/ClickHouse/pull/52585) ([Kruglov Pavel](https://github.com/Avogar)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Revert "Add documentation for building in docker"'. [#51773](https://github.com/ClickHouse/ClickHouse/pull/51773) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Fix build"'. [#51911](https://github.com/ClickHouse/ClickHouse/pull/51911) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Millisecond and microsecond support in date_diff / age functions"'. [#52129](https://github.com/ClickHouse/ClickHouse/pull/52129) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Re-add SipHash keyed functions"'. [#52466](https://github.com/ClickHouse/ClickHouse/pull/52466) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Add an ability to specify allocations size for sampling memory profiler"'. [#52496](https://github.com/ClickHouse/ClickHouse/pull/52496) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Rewrite uniq to count"'. [#52576](https://github.com/ClickHouse/ClickHouse/pull/52576) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Remove duplicate_order_by_and_distinct optimization [#47135](https://github.com/ClickHouse/ClickHouse/pull/47135) ([Igor Nikonov](https://github.com/devcrafter)). +* Update sort desc in ReadFromMergeTree after applying PREWHERE info [#48669](https://github.com/ClickHouse/ClickHouse/pull/48669) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix `BindException: Address already in use` in HDFS integration tests [#49428](https://github.com/ClickHouse/ClickHouse/pull/49428) ([Nikita Taranov](https://github.com/nickitat)). +* Force libunwind usage (removes gcc_eh support) [#49438](https://github.com/ClickHouse/ClickHouse/pull/49438) ([Azat Khuzhin](https://github.com/azat)). +* Cleanup `storage_conf.xml` [#49557](https://github.com/ClickHouse/ClickHouse/pull/49557) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix flaky tests caused by OPTIMIZE FINAL failing memory budget check [#49764](https://github.com/ClickHouse/ClickHouse/pull/49764) ([Michael Kolupaev](https://github.com/al13n321)). +* Remove unstable queries from performance/join_set_filter [#50235](https://github.com/ClickHouse/ClickHouse/pull/50235) ([vdimir](https://github.com/vdimir)). +* More accurate DNS resolve for the keeper connection [#50738](https://github.com/ClickHouse/ClickHouse/pull/50738) ([pufit](https://github.com/pufit)). +* Try to fix some trash in Disks and part moves [#51135](https://github.com/ClickHouse/ClickHouse/pull/51135) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add jemalloc support fro s390x [#51186](https://github.com/ClickHouse/ClickHouse/pull/51186) ([Boris Kuschel](https://github.com/bkuschel)). +* Resubmit [#48821](https://github.com/ClickHouse/ClickHouse/issues/48821) [#51208](https://github.com/ClickHouse/ClickHouse/pull/51208) ([Kseniia Sumarokova](https://github.com/kssenii)). +* test for [#36894](https://github.com/ClickHouse/ClickHouse/issues/36894) [#51274](https://github.com/ClickHouse/ClickHouse/pull/51274) ([Denny Crane](https://github.com/den-crane)). +* external_aggregation_fix for big endian machines [#51280](https://github.com/ClickHouse/ClickHouse/pull/51280) ([Sanjam Panda](https://github.com/saitama951)). +* Fix: Invalid number of rows in Chunk column Object [#51296](https://github.com/ClickHouse/ClickHouse/pull/51296) ([Igor Nikonov](https://github.com/devcrafter)). +* Add a test for [#44816](https://github.com/ClickHouse/ClickHouse/issues/44816) [#51305](https://github.com/ClickHouse/ClickHouse/pull/51305) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for `calculate_text_stack_trace` setting [#51311](https://github.com/ClickHouse/ClickHouse/pull/51311) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* decrease log level, make logs shorter [#51320](https://github.com/ClickHouse/ClickHouse/pull/51320) ([Sema Checherinda](https://github.com/CheSema)). +* Collect stack traces from job's scheduling and print along with exception's stack trace. [#51349](https://github.com/ClickHouse/ClickHouse/pull/51349) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Add a test for [#42691](https://github.com/ClickHouse/ClickHouse/issues/42691) [#51352](https://github.com/ClickHouse/ClickHouse/pull/51352) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for [#32474](https://github.com/ClickHouse/ClickHouse/issues/32474) [#51354](https://github.com/ClickHouse/ClickHouse/pull/51354) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for [#41727](https://github.com/ClickHouse/ClickHouse/issues/41727) [#51355](https://github.com/ClickHouse/ClickHouse/pull/51355) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for [#35801](https://github.com/ClickHouse/ClickHouse/issues/35801) [#51356](https://github.com/ClickHouse/ClickHouse/pull/51356) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for [#34626](https://github.com/ClickHouse/ClickHouse/issues/34626) [#51357](https://github.com/ClickHouse/ClickHouse/pull/51357) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Initialize text_log earlier to capture table startup messages [#51360](https://github.com/ClickHouse/ClickHouse/pull/51360) ([Azat Khuzhin](https://github.com/azat)). +* Use separate default settings for clickhouse-local [#51363](https://github.com/ClickHouse/ClickHouse/pull/51363) ([Azat Khuzhin](https://github.com/azat)). +* Attempt to remove wrong code (catch/throw in Functions) [#51367](https://github.com/ClickHouse/ClickHouse/pull/51367) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove suspicious code [#51383](https://github.com/ClickHouse/ClickHouse/pull/51383) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Disable hedged requests under TSan [#51392](https://github.com/ClickHouse/ClickHouse/pull/51392) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* no finalize in d-tor WriteBufferFromOStream [#51404](https://github.com/ClickHouse/ClickHouse/pull/51404) ([Sema Checherinda](https://github.com/CheSema)). +* Better diagnostics for 01193_metadata_loading [#51414](https://github.com/ClickHouse/ClickHouse/pull/51414) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix attaching gdb in stress tests [#51445](https://github.com/ClickHouse/ClickHouse/pull/51445) ([Kruglov Pavel](https://github.com/Avogar)). +* Merging [#36384](https://github.com/ClickHouse/ClickHouse/issues/36384) [#51458](https://github.com/ClickHouse/ClickHouse/pull/51458) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix possible race on shutdown wait [#51497](https://github.com/ClickHouse/ClickHouse/pull/51497) ([Sergei Trifonov](https://github.com/serxa)). +* Fix `test_alter_moving_garbage`: lock between getActiveContainingPart and swapActivePart in parts mover [#51498](https://github.com/ClickHouse/ClickHouse/pull/51498) ([vdimir](https://github.com/vdimir)). +* Fix a logical error on mutation [#51502](https://github.com/ClickHouse/ClickHouse/pull/51502) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix running integration tests with spaces in it's names [#51514](https://github.com/ClickHouse/ClickHouse/pull/51514) ([Azat Khuzhin](https://github.com/azat)). +* Fix flaky test 00417_kill_query [#51522](https://github.com/ClickHouse/ClickHouse/pull/51522) ([Nikolay Degterinsky](https://github.com/evillique)). +* fs cache: add some checks [#51536](https://github.com/ClickHouse/ClickHouse/pull/51536) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Don't run 02782_uniq_exact_parallel_merging_bug in parallel with other tests [#51549](https://github.com/ClickHouse/ClickHouse/pull/51549) ([Nikita Taranov](https://github.com/nickitat)). +* 00900_orc_load: lift kill timeout [#51559](https://github.com/ClickHouse/ClickHouse/pull/51559) ([Robert Schulze](https://github.com/rschu1ze)). +* Add retries to 00416_pocopatch_progress_in_http_headers [#51575](https://github.com/ClickHouse/ClickHouse/pull/51575) ([Nikolay Degterinsky](https://github.com/evillique)). +* Remove the usage of Analyzer setting in the client [#51578](https://github.com/ClickHouse/ClickHouse/pull/51578) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix merge_selecting_task scheduling [#51591](https://github.com/ClickHouse/ClickHouse/pull/51591) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add hex functions for cityhash [#51595](https://github.com/ClickHouse/ClickHouse/pull/51595) ([Vitaly Baranov](https://github.com/vitlibar)). +* Remove `unset CLICKHOUSE_LOG_COMMENT` from tests [#51623](https://github.com/ClickHouse/ClickHouse/pull/51623) ([Nikita Taranov](https://github.com/nickitat)). +* Implement endianness-independent serialization [#51637](https://github.com/ClickHouse/ClickHouse/pull/51637) ([ltrk2](https://github.com/ltrk2)). +* Ignore APPEND and TRUNCATE modifiers if file does not exist. [#51640](https://github.com/ClickHouse/ClickHouse/pull/51640) ([alekar](https://github.com/alekar)). +* Try to fix flaky 02210_processors_profile_log [#51641](https://github.com/ClickHouse/ClickHouse/pull/51641) ([Igor Nikonov](https://github.com/devcrafter)). +* Make common macros extendable [#51646](https://github.com/ClickHouse/ClickHouse/pull/51646) ([Amos Bird](https://github.com/amosbird)). +* Correct an exception message in src/Functions/nested.cpp [#51651](https://github.com/ClickHouse/ClickHouse/pull/51651) ([Alex Cheng](https://github.com/Alex-Cheng)). +* tests: fix 02050_client_profile_events flakiness [#51653](https://github.com/ClickHouse/ClickHouse/pull/51653) ([Azat Khuzhin](https://github.com/azat)). +* Minor follow-up to re2 update to 2023-06-02 ([#50949](https://github.com/ClickHouse/ClickHouse/issues/50949)) [#51655](https://github.com/ClickHouse/ClickHouse/pull/51655) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix 02116_tuple_element with Analyzer [#51669](https://github.com/ClickHouse/ClickHouse/pull/51669) ([Robert Schulze](https://github.com/rschu1ze)). +* Update timeouts in tests for transactions [#51683](https://github.com/ClickHouse/ClickHouse/pull/51683) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Remove unused code [#51684](https://github.com/ClickHouse/ClickHouse/pull/51684) ([Sergei Trifonov](https://github.com/serxa)). +* Remove `mmap/mremap/munmap` from Allocator.h [#51686](https://github.com/ClickHouse/ClickHouse/pull/51686) ([alesapin](https://github.com/alesapin)). +* SonarCloud: Add C++23 Experimental Flag [#51687](https://github.com/ClickHouse/ClickHouse/pull/51687) ([Julio Jimenez](https://github.com/juliojimenez)). +* Wait with retries when attaching GDB in tests [#51688](https://github.com/ClickHouse/ClickHouse/pull/51688) ([Antonio Andelic](https://github.com/antonio2368)). +* Update version_date.tsv and changelogs after v23.6.1.1524-stable [#51691](https://github.com/ClickHouse/ClickHouse/pull/51691) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* fix write to finalized buffer [#51696](https://github.com/ClickHouse/ClickHouse/pull/51696) ([Sema Checherinda](https://github.com/CheSema)). +* do not log exception aborted for pending mutate/merge entries when shutdown [#51697](https://github.com/ClickHouse/ClickHouse/pull/51697) ([Sema Checherinda](https://github.com/CheSema)). +* Fix race in ContextAccess [#51704](https://github.com/ClickHouse/ClickHouse/pull/51704) ([Vitaly Baranov](https://github.com/vitlibar)). +* Make test scripts backwards compatible [#51707](https://github.com/ClickHouse/ClickHouse/pull/51707) ([Antonio Andelic](https://github.com/antonio2368)). +* test for full join and null predicate [#51709](https://github.com/ClickHouse/ClickHouse/pull/51709) ([Denny Crane](https://github.com/den-crane)). +* A cmake warning on job limits underutilizing CPU [#51710](https://github.com/ClickHouse/ClickHouse/pull/51710) ([velavokr](https://github.com/velavokr)). +* Fix SQLLogic docker images [#51719](https://github.com/ClickHouse/ClickHouse/pull/51719) ([Antonio Andelic](https://github.com/antonio2368)). +* Added ASK_PASSWORD client constant instead of hardcoded '\n' [#51723](https://github.com/ClickHouse/ClickHouse/pull/51723) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Update README.md [#51726](https://github.com/ClickHouse/ClickHouse/pull/51726) ([Tyler Hannan](https://github.com/tylerhannan)). +* Fix source image for sqllogic [#51728](https://github.com/ClickHouse/ClickHouse/pull/51728) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Remove MemoryPool from Poco because it's useless [#51732](https://github.com/ClickHouse/ClickHouse/pull/51732) ([alesapin](https://github.com/alesapin)). +* Fix: logical error in grace hash join [#51737](https://github.com/ClickHouse/ClickHouse/pull/51737) ([Igor Nikonov](https://github.com/devcrafter)). +* Update 01320_create_sync_race_condition_zookeeper.sh [#51742](https://github.com/ClickHouse/ClickHouse/pull/51742) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Pin for docker-ce [#51743](https://github.com/ClickHouse/ClickHouse/pull/51743) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Revert "Fix: Invalid number of rows in Chunk column Object" [#51750](https://github.com/ClickHouse/ClickHouse/pull/51750) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add SonarCloud to README [#51751](https://github.com/ClickHouse/ClickHouse/pull/51751) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix test `02789_object_type_invalid_num_of_rows` [#51754](https://github.com/ClickHouse/ClickHouse/pull/51754) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix (benign) data race in `transform` [#51755](https://github.com/ClickHouse/ClickHouse/pull/51755) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix flaky KeeperMap test [#51764](https://github.com/ClickHouse/ClickHouse/pull/51764) ([Antonio Andelic](https://github.com/antonio2368)). +* Version mypy=1.4.1 falsly reports unused ignore comment [#51769](https://github.com/ClickHouse/ClickHouse/pull/51769) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Avoid keeping lock Context::getLock() while calculating access rights [#51772](https://github.com/ClickHouse/ClickHouse/pull/51772) ([Vitaly Baranov](https://github.com/vitlibar)). +* Making stateless tests with timeout less flaky [#51774](https://github.com/ClickHouse/ClickHouse/pull/51774) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix after [#51000](https://github.com/ClickHouse/ClickHouse/issues/51000) [#51790](https://github.com/ClickHouse/ClickHouse/pull/51790) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add assert in ThreadStatus destructor for correct current_thread [#51800](https://github.com/ClickHouse/ClickHouse/pull/51800) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix broken parts handling in `ReplicatedMergeTree` [#51801](https://github.com/ClickHouse/ClickHouse/pull/51801) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix tsan signal-unsafe call [#51802](https://github.com/ClickHouse/ClickHouse/pull/51802) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix for parallel replicas not completely disabled by granule count threshold [#51805](https://github.com/ClickHouse/ClickHouse/pull/51805) ([Alexander Gololobov](https://github.com/davenger)). +* Make sure that we don't attempt to serialize/deserialize block with 0 columns and non-zero rows [#51807](https://github.com/ClickHouse/ClickHouse/pull/51807) ([Alexander Gololobov](https://github.com/davenger)). +* Fix rare bug in `DROP COLUMN` and enabled sparse columns [#51809](https://github.com/ClickHouse/ClickHouse/pull/51809) ([Anton Popov](https://github.com/CurtizJ)). +* Fix flaky `test_multiple_disks` [#51821](https://github.com/ClickHouse/ClickHouse/pull/51821) ([Antonio Andelic](https://github.com/antonio2368)). +* Follow up to [#51547](https://github.com/ClickHouse/ClickHouse/issues/51547) [#51822](https://github.com/ClickHouse/ClickHouse/pull/51822) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Correctly grep archives in stress tests [#51824](https://github.com/ClickHouse/ClickHouse/pull/51824) ([Antonio Andelic](https://github.com/antonio2368)). +* Update analyzer_tech_debt.txt [#51836](https://github.com/ClickHouse/ClickHouse/pull/51836) ([Alexander Tokmakov](https://github.com/tavplubix)). +* remove unused code [#51837](https://github.com/ClickHouse/ClickHouse/pull/51837) ([flynn](https://github.com/ucasfl)). +* Fix disk config for upgrade tests [#51839](https://github.com/ClickHouse/ClickHouse/pull/51839) ([Antonio Andelic](https://github.com/antonio2368)). +* Remove Coverity from workflows, but leave in the code [#51842](https://github.com/ClickHouse/ClickHouse/pull/51842) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Many fixes [3] [#51848](https://github.com/ClickHouse/ClickHouse/pull/51848) ([Ilya Yatsishin](https://github.com/qoega)). +* Change misleading name in joins: addJoinedBlock -> addBlockToJoin [#51852](https://github.com/ClickHouse/ClickHouse/pull/51852) ([Igor Nikonov](https://github.com/devcrafter)). +* fix: correct exception messages on policies comparison [#51854](https://github.com/ClickHouse/ClickHouse/pull/51854) ([Feng Kaiyu](https://github.com/fky2015)). +* Update 02439_merge_selecting_partitions.sql [#51862](https://github.com/ClickHouse/ClickHouse/pull/51862) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Remove useless packages [#51863](https://github.com/ClickHouse/ClickHouse/pull/51863) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove useless logs [#51865](https://github.com/ClickHouse/ClickHouse/pull/51865) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix incorrect log level = warning [#51867](https://github.com/ClickHouse/ClickHouse/pull/51867) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix test_replicated_table_attach [#51868](https://github.com/ClickHouse/ClickHouse/pull/51868) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Better usability of a test [#51869](https://github.com/ClickHouse/ClickHouse/pull/51869) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove useless code [#51873](https://github.com/ClickHouse/ClickHouse/pull/51873) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Another fix upgrade check script [#51878](https://github.com/ClickHouse/ClickHouse/pull/51878) ([Antonio Andelic](https://github.com/antonio2368)). +* Sqlloogic improvements [#51883](https://github.com/ClickHouse/ClickHouse/pull/51883) ([Ilya Yatsishin](https://github.com/qoega)). +* Disable ThinLTO on non-Linux [#51897](https://github.com/ClickHouse/ClickHouse/pull/51897) ([Robert Schulze](https://github.com/rschu1ze)). +* Pin rust nightly (to make it stable) [#51903](https://github.com/ClickHouse/ClickHouse/pull/51903) ([Azat Khuzhin](https://github.com/azat)). +* Fix build [#51909](https://github.com/ClickHouse/ClickHouse/pull/51909) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix build [#51910](https://github.com/ClickHouse/ClickHouse/pull/51910) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix flaky test `00175_partition_by_ignore` and move it to correct location [#51913](https://github.com/ClickHouse/ClickHouse/pull/51913) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix flaky test 02360_send_logs_level_colors: avoid usage of `file` tool [#51914](https://github.com/ClickHouse/ClickHouse/pull/51914) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Maybe better tests [#51916](https://github.com/ClickHouse/ClickHouse/pull/51916) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Revert system drop filesystem cache by key [#51917](https://github.com/ClickHouse/ClickHouse/pull/51917) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix flaky test `detach_attach_partition_race` [#51920](https://github.com/ClickHouse/ClickHouse/pull/51920) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Another fix for `02481_async_insert_race_long` [#51925](https://github.com/ClickHouse/ClickHouse/pull/51925) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix segfault caused by `ThreadStatus` [#51931](https://github.com/ClickHouse/ClickHouse/pull/51931) ([Antonio Andelic](https://github.com/antonio2368)). +* Print short fault info only from safe fields [#51932](https://github.com/ClickHouse/ClickHouse/pull/51932) ([Alexander Gololobov](https://github.com/davenger)). +* Fix typo in integration tests [#51944](https://github.com/ClickHouse/ClickHouse/pull/51944) ([Ilya Yatsishin](https://github.com/qoega)). +* Better logs on shutdown [#51951](https://github.com/ClickHouse/ClickHouse/pull/51951) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Filter databases list before querying potentially slow fields [#51955](https://github.com/ClickHouse/ClickHouse/pull/51955) ([Alexander Gololobov](https://github.com/davenger)). +* Fix some issues with transactions [#51959](https://github.com/ClickHouse/ClickHouse/pull/51959) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix unrelated messages from LSan in clickhouse-client [#51966](https://github.com/ClickHouse/ClickHouse/pull/51966) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow OOM in AST Fuzzer with Sanitizers [#51967](https://github.com/ClickHouse/ClickHouse/pull/51967) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Disable one test under Analyzer [#51968](https://github.com/ClickHouse/ClickHouse/pull/51968) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix Docker [#51969](https://github.com/ClickHouse/ClickHouse/pull/51969) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix test `01825_type_json_from_map` [#51970](https://github.com/ClickHouse/ClickHouse/pull/51970) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix test `02354_distributed_with_external_aggregation_memory_usage` [#51971](https://github.com/ClickHouse/ClickHouse/pull/51971) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix disaster in integration tests, part 2 [#51973](https://github.com/ClickHouse/ClickHouse/pull/51973) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* [RFC] Cleanup remote_servers in dist config.xml [#51985](https://github.com/ClickHouse/ClickHouse/pull/51985) ([Azat Khuzhin](https://github.com/azat)). +* Update version_date.tsv and changelogs after v23.6.2.18-stable [#51986](https://github.com/ClickHouse/ClickHouse/pull/51986) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v22.8.20.11-lts [#51987](https://github.com/ClickHouse/ClickHouse/pull/51987) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Fix performance test for regexp cache [#51988](https://github.com/ClickHouse/ClickHouse/pull/51988) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Move a test to the right place [#51989](https://github.com/ClickHouse/ClickHouse/pull/51989) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a check to validate that the stateful tests are stateful [#51990](https://github.com/ClickHouse/ClickHouse/pull/51990) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Check that functional tests cleanup their tables [#51991](https://github.com/ClickHouse/ClickHouse/pull/51991) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix test_extreme_deduplication [#51992](https://github.com/ClickHouse/ClickHouse/pull/51992) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Cleanup SymbolIndex after reload got removed [#51993](https://github.com/ClickHouse/ClickHouse/pull/51993) ([Azat Khuzhin](https://github.com/azat)). +* Update CompletedPipelineExecutor exception log name [#52028](https://github.com/ClickHouse/ClickHouse/pull/52028) ([xiao](https://github.com/nicelulu)). +* Fix `00502_custom_partitioning_replicated_zookeeper_long` [#52032](https://github.com/ClickHouse/ClickHouse/pull/52032) ([Antonio Andelic](https://github.com/antonio2368)). +* Prohibit send_metadata for s3_plain disks [#52038](https://github.com/ClickHouse/ClickHouse/pull/52038) ([Azat Khuzhin](https://github.com/azat)). +* Update version_date.tsv and changelogs after v23.4.6.25-stable [#52061](https://github.com/ClickHouse/ClickHouse/pull/52061) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Preparations for Trivial Support For Resharding (part1) [#52068](https://github.com/ClickHouse/ClickHouse/pull/52068) ([Azat Khuzhin](https://github.com/azat)). +* Update version_date.tsv and changelogs after v23.3.8.21-lts [#52077](https://github.com/ClickHouse/ClickHouse/pull/52077) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Fix flakiness of test_keeper_s3_snapshot flakiness [#52083](https://github.com/ClickHouse/ClickHouse/pull/52083) ([Azat Khuzhin](https://github.com/azat)). +* Fix test_extreme_deduplication flakiness [#52085](https://github.com/ClickHouse/ClickHouse/pull/52085) ([Azat Khuzhin](https://github.com/azat)). +* Small docs update for toYearWeek() function [#52090](https://github.com/ClickHouse/ClickHouse/pull/52090) ([Andrey Zvonov](https://github.com/zvonand)). +* Small docs update for DateTime, DateTime64 [#52094](https://github.com/ClickHouse/ClickHouse/pull/52094) ([Andrey Zvonov](https://github.com/zvonand)). +* Add missing --force for docker network prune (otherwise it is noop on CI) [#52095](https://github.com/ClickHouse/ClickHouse/pull/52095) ([Azat Khuzhin](https://github.com/azat)). +* tests: drop existing view in test_materialized_mysql_database [#52103](https://github.com/ClickHouse/ClickHouse/pull/52103) ([Azat Khuzhin](https://github.com/azat)). +* Update README.md [#52115](https://github.com/ClickHouse/ClickHouse/pull/52115) ([Tyler Hannan](https://github.com/tylerhannan)). +* Print Zxid in keeper stat command in hex (so as ZooKeeper) [#52122](https://github.com/ClickHouse/ClickHouse/pull/52122) ([Azat Khuzhin](https://github.com/azat)). +* Skip protection from double decompression if inode from maps cannot be obtained [#52138](https://github.com/ClickHouse/ClickHouse/pull/52138) ([Azat Khuzhin](https://github.com/azat)). +* There is no point in detecting flaky tests [#52142](https://github.com/ClickHouse/ClickHouse/pull/52142) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove default argument value [#52143](https://github.com/ClickHouse/ClickHouse/pull/52143) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix the "kill_mutation" test [#52144](https://github.com/ClickHouse/ClickHouse/pull/52144) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix ORDER BY tuple of WINDOW functions (and slightly more changes) [#52146](https://github.com/ClickHouse/ClickHouse/pull/52146) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix possible EADDRINUSE ("Address already in use") in integration tests [#52148](https://github.com/ClickHouse/ClickHouse/pull/52148) ([Azat Khuzhin](https://github.com/azat)). +* Fix test 02497_storage_file_reader_selection [#52154](https://github.com/ClickHouse/ClickHouse/pull/52154) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix unexpected AST Set [#52158](https://github.com/ClickHouse/ClickHouse/pull/52158) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix crash in comparison functions due to incorrect query analysis [#52172](https://github.com/ClickHouse/ClickHouse/pull/52172) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix slow test `02317_distinct_in_order_optimization` [#52173](https://github.com/ClickHouse/ClickHouse/pull/52173) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add comments for https://github.com/ClickHouse/ClickHouse/pull/52112 [#52175](https://github.com/ClickHouse/ClickHouse/pull/52175) ([李扬](https://github.com/taiyang-li)). +* Randomize timezone in tests across non-deterministic around 1970 and default [#52184](https://github.com/ClickHouse/ClickHouse/pull/52184) ([Azat Khuzhin](https://github.com/azat)). +* Fix `test_multiple_disks/test.py::test_start_stop_moves` [#52189](https://github.com/ClickHouse/ClickHouse/pull/52189) ([Antonio Andelic](https://github.com/antonio2368)). +* CMake: Simplify job limiting [#52196](https://github.com/ClickHouse/ClickHouse/pull/52196) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix self extracting binaries under qemu linux-user (qemu-$ARCH-static) [#52198](https://github.com/ClickHouse/ClickHouse/pull/52198) ([Azat Khuzhin](https://github.com/azat)). +* Fix `Integration tests flaky check (asan)` [#52201](https://github.com/ClickHouse/ClickHouse/pull/52201) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix flaky test test_lost_part [#52202](https://github.com/ClickHouse/ClickHouse/pull/52202) ([alesapin](https://github.com/alesapin)). +* MaterializedMySQL: Replace to_string by magic_enum::enum_name [#52204](https://github.com/ClickHouse/ClickHouse/pull/52204) ([Val Doroshchuk](https://github.com/valbok)). +* MaterializedMySQL: Add tests to parse db and table names from DDL [#52208](https://github.com/ClickHouse/ClickHouse/pull/52208) ([Val Doroshchuk](https://github.com/valbok)). +* Revert "Fixed several issues found by OSS-Fuzz" [#52216](https://github.com/ClickHouse/ClickHouse/pull/52216) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Use one copy replication more agressively [#52218](https://github.com/ClickHouse/ClickHouse/pull/52218) ([alesapin](https://github.com/alesapin)). +* Fix flaky test `01076_parallel_alter_replicated_zookeeper` [#52221](https://github.com/ClickHouse/ClickHouse/pull/52221) ([alesapin](https://github.com/alesapin)). +* Fix 01889_key_condition_function_chains for analyzer. [#52223](https://github.com/ClickHouse/ClickHouse/pull/52223) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Inhibit settings randomization in the test `json_ghdata` [#52226](https://github.com/ClickHouse/ClickHouse/pull/52226) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Slightly better diagnostics in a test [#52227](https://github.com/ClickHouse/ClickHouse/pull/52227) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Enable no-upgrade-check for 02273_full_sort_join [#52235](https://github.com/ClickHouse/ClickHouse/pull/52235) ([vdimir](https://github.com/vdimir)). +* Fix network manager for integration tests [#52237](https://github.com/ClickHouse/ClickHouse/pull/52237) ([Azat Khuzhin](https://github.com/azat)). +* List replication queue only for current test database [#52238](https://github.com/ClickHouse/ClickHouse/pull/52238) ([Alexander Gololobov](https://github.com/davenger)). +* Attempt to fix assert in tsan with fibers [#52241](https://github.com/ClickHouse/ClickHouse/pull/52241) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix undefined behaviour in fuzzer [#52256](https://github.com/ClickHouse/ClickHouse/pull/52256) ([Antonio Andelic](https://github.com/antonio2368)). +* Follow-up to [#51959](https://github.com/ClickHouse/ClickHouse/issues/51959) [#52261](https://github.com/ClickHouse/ClickHouse/pull/52261) ([Alexander Tokmakov](https://github.com/tavplubix)). +* More fair queue for `drop table sync` [#52276](https://github.com/ClickHouse/ClickHouse/pull/52276) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix `02497_trace_events_stress_long` [#52279](https://github.com/ClickHouse/ClickHouse/pull/52279) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix test `01111_create_drop_replicated_db_stress` [#52283](https://github.com/ClickHouse/ClickHouse/pull/52283) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix ugly code [#52284](https://github.com/ClickHouse/ClickHouse/pull/52284) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add missing replica syncs in test_backup_restore_on_cluster [#52306](https://github.com/ClickHouse/ClickHouse/pull/52306) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix test_replicated_database 'node doesn't exist' flakiness [#52307](https://github.com/ClickHouse/ClickHouse/pull/52307) ([Michael Kolupaev](https://github.com/al13n321)). +* Minor: Update description of events "QueryCacheHits/Misses" [#52309](https://github.com/ClickHouse/ClickHouse/pull/52309) ([Robert Schulze](https://github.com/rschu1ze)). +* Beautify pretty-printing of the query string in SYSTEM.QUERY_CACHE [#52312](https://github.com/ClickHouse/ClickHouse/pull/52312) ([Robert Schulze](https://github.com/rschu1ze)). +* Reduce dependencies for skim by avoid using default features [#52316](https://github.com/ClickHouse/ClickHouse/pull/52316) ([Azat Khuzhin](https://github.com/azat)). +* Fix 02725_memory-for-merges [#52317](https://github.com/ClickHouse/ClickHouse/pull/52317) ([alesapin](https://github.com/alesapin)). +* Skip unsupported disks in Keeper [#52321](https://github.com/ClickHouse/ClickHouse/pull/52321) ([Antonio Andelic](https://github.com/antonio2368)). +* Revert "Improve CSVInputFormat to check and set default value to column if deserialize failed" [#52322](https://github.com/ClickHouse/ClickHouse/pull/52322) ([Kruglov Pavel](https://github.com/Avogar)). +* Resubmit [#51716](https://github.com/ClickHouse/ClickHouse/issues/51716) [#52323](https://github.com/ClickHouse/ClickHouse/pull/52323) ([Kruglov Pavel](https://github.com/Avogar)). +* Add logging about all found workflows for merge_pr.py [#52324](https://github.com/ClickHouse/ClickHouse/pull/52324) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Minor: Less awkward IAST::FormatSettings [#52332](https://github.com/ClickHouse/ClickHouse/pull/52332) ([Robert Schulze](https://github.com/rschu1ze)). +* Mark test 02125_many_mutations_2 as no-parallel to avoid flakiness [#52338](https://github.com/ClickHouse/ClickHouse/pull/52338) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix capabilities installed via systemd service (fixes netlink/IO priorities) [#52357](https://github.com/ClickHouse/ClickHouse/pull/52357) ([Azat Khuzhin](https://github.com/azat)). +* Update 01606_git_import.sh [#52360](https://github.com/ClickHouse/ClickHouse/pull/52360) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Update ci-slack-bot.py [#52372](https://github.com/ClickHouse/ClickHouse/pull/52372) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix `test_keeper_session` [#52373](https://github.com/ClickHouse/ClickHouse/pull/52373) ([Antonio Andelic](https://github.com/antonio2368)). +* Update ci-slack-bot.py [#52374](https://github.com/ClickHouse/ClickHouse/pull/52374) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Disable analyzer setting in backward_compatibility integration tests. [#52375](https://github.com/ClickHouse/ClickHouse/pull/52375) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* New metric - Filesystem cache size limit [#52378](https://github.com/ClickHouse/ClickHouse/pull/52378) ([Krzysztof Góralski](https://github.com/kgoralski)). +* Fix `test_replicated_merge_tree_encrypted_disk ` [#52379](https://github.com/ClickHouse/ClickHouse/pull/52379) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix `02122_parallel_formatting_XML ` [#52380](https://github.com/ClickHouse/ClickHouse/pull/52380) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Follow up to [#49698](https://github.com/ClickHouse/ClickHouse/issues/49698) [#52381](https://github.com/ClickHouse/ClickHouse/pull/52381) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Less replication errors [#52382](https://github.com/ClickHouse/ClickHouse/pull/52382) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Rename TaskStatsInfoGetter into NetlinkMetricsProvider [#52392](https://github.com/ClickHouse/ClickHouse/pull/52392) ([Azat Khuzhin](https://github.com/azat)). +* Fix `test_keeper_force_recovery` [#52408](https://github.com/ClickHouse/ClickHouse/pull/52408) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix flaky gtest_lru_file_cache.cpp [#52418](https://github.com/ClickHouse/ClickHouse/pull/52418) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix: remove redundant distinct with views [#52438](https://github.com/ClickHouse/ClickHouse/pull/52438) ([Igor Nikonov](https://github.com/devcrafter)). +* Add 02815_range_dict_no_direct_join to analyzer_tech_debt.txt [#52464](https://github.com/ClickHouse/ClickHouse/pull/52464) ([vdimir](https://github.com/vdimir)). +* do not throw exception in OptimizedRegularExpressionImpl::analyze [#52467](https://github.com/ClickHouse/ClickHouse/pull/52467) ([Han Fei](https://github.com/hanfei1991)). +* Remove skip_startup_tables from IDatabase::loadStoredObjects() [#52491](https://github.com/ClickHouse/ClickHouse/pull/52491) ([Azat Khuzhin](https://github.com/azat)). +* Fix test_insert_same_partition_and_merge by increasing wait time [#52497](https://github.com/ClickHouse/ClickHouse/pull/52497) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Try to fix asan wanring in HashJoin [#52499](https://github.com/ClickHouse/ClickHouse/pull/52499) ([Igor Nikonov](https://github.com/devcrafter)). +* Replace with three way comparison [#52509](https://github.com/ClickHouse/ClickHouse/pull/52509) ([flynn](https://github.com/ucasfl)). +* Fix flakiness of test_version_update_after_mutation by enabling force_remove_data_recursively_on_drop [#52514](https://github.com/ClickHouse/ClickHouse/pull/52514) ([Azat Khuzhin](https://github.com/azat)). +* Fix `test_throttling` [#52515](https://github.com/ClickHouse/ClickHouse/pull/52515) ([Antonio Andelic](https://github.com/antonio2368)). +* Improve logging macros [#52519](https://github.com/ClickHouse/ClickHouse/pull/52519) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix `toDecimalString` function [#52520](https://github.com/ClickHouse/ClickHouse/pull/52520) ([Andrey Zvonov](https://github.com/zvonand)). +* Remove unused code [#52527](https://github.com/ClickHouse/ClickHouse/pull/52527) ([Raúl Marín](https://github.com/Algunenano)). +* Cancel execution in PipelineExecutor in case of exception in graph->updateNode [#52533](https://github.com/ClickHouse/ClickHouse/pull/52533) ([Kruglov Pavel](https://github.com/Avogar)). +* Make 01951_distributed_push_down_limit analyzer agnostic [#52534](https://github.com/ClickHouse/ClickHouse/pull/52534) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix disallow_concurrency test for backup and restore [#52536](https://github.com/ClickHouse/ClickHouse/pull/52536) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Update 02136_scalar_subquery_metrics.sql [#52537](https://github.com/ClickHouse/ClickHouse/pull/52537) ([Alexander Tokmakov](https://github.com/tavplubix)). +* tests: fix 01035_avg_weighted_long flakiness [#52556](https://github.com/ClickHouse/ClickHouse/pull/52556) ([Azat Khuzhin](https://github.com/azat)). +* tests: increase throttling for 01923_network_receive_time_metric_insert [#52557](https://github.com/ClickHouse/ClickHouse/pull/52557) ([Azat Khuzhin](https://github.com/azat)). +* tests: fix 00719_parallel_ddl_table flakiness in debug builds [#52558](https://github.com/ClickHouse/ClickHouse/pull/52558) ([Azat Khuzhin](https://github.com/azat)). +* tests: fix 01821_join_table_race_long flakiness [#52559](https://github.com/ClickHouse/ClickHouse/pull/52559) ([Azat Khuzhin](https://github.com/azat)). +* Fix flaky `00995_exception_while_insert` [#52568](https://github.com/ClickHouse/ClickHouse/pull/52568) ([Antonio Andelic](https://github.com/antonio2368)). +* MaterializedMySQL: Fix typos in tests [#52575](https://github.com/ClickHouse/ClickHouse/pull/52575) ([Val Doroshchuk](https://github.com/valbok)). +* Fix `02497_trace_events_stress_long` again [#52587](https://github.com/ClickHouse/ClickHouse/pull/52587) ([Antonio Andelic](https://github.com/antonio2368)). +* Revert "Remove `mmap/mremap/munmap` from Allocator.h" [#52589](https://github.com/ClickHouse/ClickHouse/pull/52589) ([Nikita Taranov](https://github.com/nickitat)). +* Remove peak memory usage from the final message in the client [#52598](https://github.com/ClickHouse/ClickHouse/pull/52598) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* GinIndexStore: fix a bug when files are finalizated after first write, [#52602](https://github.com/ClickHouse/ClickHouse/pull/52602) ([Sema Checherinda](https://github.com/CheSema)). +* Fix deadlocks in StorageTableFunctionProxy [#52626](https://github.com/ClickHouse/ClickHouse/pull/52626) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix build with clang-15 [#52627](https://github.com/ClickHouse/ClickHouse/pull/52627) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix style [#52647](https://github.com/ClickHouse/ClickHouse/pull/52647) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix logging level of a noisy message [#52648](https://github.com/ClickHouse/ClickHouse/pull/52648) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Revert "Added field `refcount` to `system.remote_data_paths` table" [#52657](https://github.com/ClickHouse/ClickHouse/pull/52657) ([Alexander Tokmakov](https://github.com/tavplubix)). + diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 8b535e3d897..1eabc65a10f 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,3 +1,4 @@ +v23.7.1.2470-stable 2023-07-27 v23.6.2.18-stable 2023-07-09 v23.6.1.1524-stable 2023-06-30 v23.5.4.25-stable 2023-06-29 From ce38d3c5ea45507696430e9c7f39f9ab7b9de394 Mon Sep 17 00:00:00 2001 From: Han Fei Date: Thu, 27 Jul 2023 16:11:08 +0200 Subject: [PATCH 441/478] address comment --- src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp index 53481ab06a0..dbb4f7f0d8e 100644 --- a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp +++ b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp @@ -10,11 +10,17 @@ #include #include +#include "Common/Exception.h" #include namespace DB { +namespace ErrorCodes +{ + extern const int ABORTED; +} + namespace { @@ -271,8 +277,7 @@ void MergeTreeDeduplicationLog::dropPart(const MergeTreePartInfo & drop_part_inf if (stopped) { - LOG_ERROR(&Poco::Logger::get("MergeTreeDeduplicationLog"), "Storage has been shutdown when we drop this part."); - return; + throw Exception(ErrorCodes::ABORTED, "Storage has been shutdown when we drop this part."); } chassert(current_writer != nullptr); From f5dfb70f5c2f4b94a54e9fdb97737a70b28362ad Mon Sep 17 00:00:00 2001 From: Han Fei Date: Thu, 27 Jul 2023 16:12:20 +0200 Subject: [PATCH 442/478] Update src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp --- src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp index dbb4f7f0d8e..80e94b2fd39 100644 --- a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp +++ b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp @@ -10,7 +10,7 @@ #include #include -#include "Common/Exception.h" +#include #include namespace DB From 0d44d527ef590a5471ea577c132edb42f0c99c70 Mon Sep 17 00:00:00 2001 From: Han Fei Date: Thu, 27 Jul 2023 16:27:04 +0200 Subject: [PATCH 443/478] Update src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp Co-authored-by: Alexander Tokmakov --- src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp index 80e94b2fd39..25b93160d27 100644 --- a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp +++ b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp @@ -241,8 +241,7 @@ std::pair MergeTreeDeduplicationLog::addPart(const std: if (stopped) { - LOG_ERROR(&Poco::Logger::get("MergeTreeDeduplicationLog"), "Storage has been shutdown when we add this part."); - return {}; + throw Exception(ErrorCodes::ABORTED, "Storage has been shutdown when we drop this part."); } chassert(current_writer != nullptr); From f6ca013c536d76ca6c1403db5f84d792f6bd8864 Mon Sep 17 00:00:00 2001 From: Han Fei Date: Thu, 27 Jul 2023 16:28:00 +0200 Subject: [PATCH 444/478] Update src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp --- src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp index 25b93160d27..22dabc43a8c 100644 --- a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp +++ b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp @@ -11,7 +11,6 @@ #include #include -#include namespace DB { From 9488567bf6be7e2b751917a179222478fcb46f5e Mon Sep 17 00:00:00 2001 From: Han Fei Date: Thu, 27 Jul 2023 16:28:08 +0200 Subject: [PATCH 445/478] Update src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp --- src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp index 22dabc43a8c..548b61ce422 100644 --- a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp +++ b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp @@ -240,7 +240,7 @@ std::pair MergeTreeDeduplicationLog::addPart(const std: if (stopped) { - throw Exception(ErrorCodes::ABORTED, "Storage has been shutdown when we drop this part."); + throw Exception(ErrorCodes::ABORTED, "Storage has been shutdown when we add this part."); } chassert(current_writer != nullptr); From 5611b2fff484d74c70c8ad6b62ba8d66c0b63589 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 27 Jul 2023 15:45:55 +0200 Subject: [PATCH 446/478] Add a note about not working _table filter for Merge with analyzer Signed-off-by: Azat Khuzhin --- src/Storages/StorageMerge.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index b0ed242d14d..272f35303bd 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -795,6 +795,10 @@ StorageMerge::StorageListWithLocks StorageMerge::getSelectedTables( bool filter_by_database_virtual_column /* = false */, bool filter_by_table_virtual_column /* = false */) const { + /// FIXME: filtering does not work with allow_experimental_analyzer due to + /// different column names there (it has "table_name._table" not just + /// "_table") + assert(!filter_by_database_virtual_column || !filter_by_table_virtual_column || query); const Settings & settings = query_context->getSettingsRef(); From 68aed0d16e331a6ba6b592243f10ce2a816152db Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 26 Jul 2023 20:25:48 +0200 Subject: [PATCH 447/478] RFC: Fix filtering by virtual columns with OR expression Virtual columns did not supports queries with OR, for example query like this (here `m` is the `Merge` table, see the test): select key from m where (value = 10 and _table = 'v1') or (value = 20 and _table = 'v1'); Will always leads to: Cannot find column `value` in source stream, there are only columns ... The reason for this is that it actually executes the following queries: SELECT key, value FROM default.d1 WHERE ((value = 10) AND ('v1' = 'v1')) OR ((value = 20) AND ('v1' = 'v1')); SELECT key FROM default.d2 WHERE 0; And this kind of filtering is used not only for `Merge` table but also: - `_table` for `Merge` (already mentioned) - `_file` for `File` - `_idx` for `S3` - and as well as filtering `system.*` tables by `database`/`table`/... Signed-off-by: Azat Khuzhin --- src/Storages/VirtualColumnUtils.cpp | 36 +++++++++++++----- .../02840_merge__table_or_filter.reference | 38 +++++++++++++++++++ .../02840_merge__table_or_filter.sql.j2 | 34 +++++++++++++++++ 3 files changed, 99 insertions(+), 9 deletions(-) create mode 100644 tests/queries/0_stateless/02840_merge__table_or_filter.reference create mode 100644 tests/queries/0_stateless/02840_merge__table_or_filter.sql.j2 diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index 907fc0cd22c..79be1f98a0f 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -63,14 +64,31 @@ bool isValidFunction(const ASTPtr & expression, const std::function & is_constant, ASTs & result) { const auto * function = expression->as(); - if (function && (function->name == "and" || function->name == "indexHint")) + + if (function) { - bool ret = true; - for (const auto & child : function->arguments->children) - ret &= extractFunctions(child, is_constant, result); - return ret; + if (function->name == "and" || function->name == "indexHint") + { + bool ret = true; + for (const auto & child : function->arguments->children) + ret &= extractFunctions(child, is_constant, result); + return ret; + } + else if (function->name == "or") + { + bool ret = true; + ASTs or_args; + for (const auto & child : function->arguments->children) + ret &= extractFunctions(child, is_constant, or_args); + /// We can keep condition only if it still OR condition (i.e. we + /// have dependent conditions for columns at both sides) + if (or_args.size() == 2) + result.push_back(makeASTForLogicalOr(std::move(or_args))); + return ret; + } } - else if (isValidFunction(expression, is_constant)) + + if (isValidFunction(expression, is_constant)) { result.push_back(expression->clone()); return true; @@ -80,13 +98,13 @@ bool extractFunctions(const ASTPtr & expression, const std::function Date: Thu, 27 Jul 2023 09:49:34 +0000 Subject: [PATCH 448/478] Add query cache metrics to system.asynchronous_metrics Cf. https://github.com/ClickHouse/ClickHouse/pull/52384#issuecomment-1653241216 --- docs/en/operations/query-cache.md | 11 ++++++----- .../operations/system-tables/asynchronous_metrics.md | 12 ++++++++++++ docs/en/operations/system-tables/events.md | 2 ++ docs/en/operations/system-tables/metrics.md | 2 +- src/Interpreters/Cache/QueryCache.cpp | 10 ++++++++++ src/Interpreters/Cache/QueryCache.h | 5 ++++- src/Interpreters/ServerAsynchronousMetrics.cpp | 6 ++++++ 7 files changed, 41 insertions(+), 7 deletions(-) diff --git a/docs/en/operations/query-cache.md b/docs/en/operations/query-cache.md index 547105c65cc..d0b785d8fda 100644 --- a/docs/en/operations/query-cache.md +++ b/docs/en/operations/query-cache.md @@ -61,11 +61,12 @@ use_query_cache = true`) but one should keep in mind that all `SELECT` queries i may return cached results then. The query cache can be cleared using statement `SYSTEM DROP QUERY CACHE`. The content of the query cache is displayed in system table -`system.query_cache`. The number of query cache hits and misses are shown as events "QueryCacheHits" and "QueryCacheMisses" in system table -[system.events](system-tables/events.md). Both counters are only updated for `SELECT` queries which run with setting "use_query_cache = -true". Other queries do not affect the cache miss counter. Field `query_log_usage` in system table -[system.query_log](system-tables/query_log.md) shows for each ran query whether the query result was written into or read from the query -cache. +`system.query_cache`. The number of query cache hits and misses since database start are shown as events "QueryCacheHits" and +"QueryCacheMisses" in system table [system.events](system-tables/events.md). Both counters are only updated for `SELECT` queries which run +with setting `use_query_cache = true`, other queries do not affect "QueryCacheMisses". Field `query_log_usage` in system table +[system.query_log](system-tables/query_log.md) shows for each executed query whether the query result was written into or read from the +query cache. Asynchronous metrics "QueryCacheEntries" and "QueryCacheBytes" in system table +[system.asynchronous_metrics](system-tables/asynchronous_metrics.md) show how many entries / bytes the query cache currently contains. The query cache exists once per ClickHouse server process. However, cache results are by default not shared between users. This can be changed (see below) but doing so is not recommended for security reasons. diff --git a/docs/en/operations/system-tables/asynchronous_metrics.md b/docs/en/operations/system-tables/asynchronous_metrics.md index f357341da67..e46b495239c 100644 --- a/docs/en/operations/system-tables/asynchronous_metrics.md +++ b/docs/en/operations/system-tables/asynchronous_metrics.md @@ -32,6 +32,10 @@ SELECT * FROM system.asynchronous_metrics LIMIT 10 └─────────────────────────────────────────┴────────────┴────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ ``` + + ## Metric descriptions @@ -483,6 +487,14 @@ The value is similar to `OSUserTime` but divided to the number of CPU cores to b Number of threads in the server of the PostgreSQL compatibility protocol. +### QueryCacheBytes + +Total size of the query cache cache in bytes. + +### QueryCacheEntries + +Total number of entries in the query cache. + ### ReplicasMaxAbsoluteDelay Maximum difference in seconds between the most fresh replicated part and the most fresh data part still to be replicated, across Replicated tables. A very high value indicates a replica with no data. diff --git a/docs/en/operations/system-tables/events.md b/docs/en/operations/system-tables/events.md index ba5602ee292..7846fe4be5d 100644 --- a/docs/en/operations/system-tables/events.md +++ b/docs/en/operations/system-tables/events.md @@ -11,6 +11,8 @@ Columns: - `value` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of events occurred. - `description` ([String](../../sql-reference/data-types/string.md)) — Event description. +You can find all supported events in source file [src/Common/ProfileEvents.cpp](https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/ProfileEvents.cpp). + **Example** ``` sql diff --git a/docs/en/operations/system-tables/metrics.md b/docs/en/operations/system-tables/metrics.md index 5a7dfd03eb4..b1dcea5500f 100644 --- a/docs/en/operations/system-tables/metrics.md +++ b/docs/en/operations/system-tables/metrics.md @@ -11,7 +11,7 @@ Columns: - `value` ([Int64](../../sql-reference/data-types/int-uint.md)) — Metric value. - `description` ([String](../../sql-reference/data-types/string.md)) — Metric description. -The list of supported metrics you can find in the [src/Common/CurrentMetrics.cpp](https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/CurrentMetrics.cpp) source file of ClickHouse. +You can find all supported metrics in source file [src/Common/CurrentMetrics.cpp](https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/CurrentMetrics.cpp). **Example** diff --git a/src/Interpreters/Cache/QueryCache.cpp b/src/Interpreters/Cache/QueryCache.cpp index a6c509e8bb1..5982a5ade50 100644 --- a/src/Interpreters/Cache/QueryCache.cpp +++ b/src/Interpreters/Cache/QueryCache.cpp @@ -496,6 +496,16 @@ void QueryCache::reset() cache_size_in_bytes = 0; } +size_t QueryCache::weight() const +{ + return cache.weight(); +} + +size_t QueryCache::count() const +{ + return cache.count(); +} + size_t QueryCache::recordQueryRun(const Key & key) { std::lock_guard lock(mutex); diff --git a/src/Interpreters/Cache/QueryCache.h b/src/Interpreters/Cache/QueryCache.h index c2de8ca22dd..eaa54c503fa 100644 --- a/src/Interpreters/Cache/QueryCache.h +++ b/src/Interpreters/Cache/QueryCache.h @@ -186,6 +186,9 @@ public: void reset(); + size_t weight() const; + size_t count() const; + /// Record new execution of query represented by key. Returns number of executions so far. size_t recordQueryRun(const Key & key); @@ -193,7 +196,7 @@ public: std::vector dump() const; private: - Cache cache; + Cache cache; /// has its own locking --> not protected by mutex mutable std::mutex mutex; TimesExecuted times_executed TSA_GUARDED_BY(mutex); diff --git a/src/Interpreters/ServerAsynchronousMetrics.cpp b/src/Interpreters/ServerAsynchronousMetrics.cpp index 0fbcfc9e6a1..68411e80755 100644 --- a/src/Interpreters/ServerAsynchronousMetrics.cpp +++ b/src/Interpreters/ServerAsynchronousMetrics.cpp @@ -92,6 +92,12 @@ void ServerAsynchronousMetrics::updateImpl(AsynchronousMetricValues & new_values " The files opened with `mmap` are kept in the cache to avoid costly TLB flushes."}; } + if (auto query_cache = getContext()->getQueryCache()) + { + new_values["QueryCacheBytes"] = { query_cache->weight(), "Total size of the query cache in bytes." }; + new_values["QueryCacheEntries"] = { query_cache->count(), "Total number of entries in the query cache." }; + } + { auto caches = FileCacheFactory::instance().getAll(); size_t total_bytes = 0; From 380da315121078fc3e88a1e038e5aacd296853c2 Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Thu, 27 Jul 2023 19:03:44 +0200 Subject: [PATCH 449/478] Improvements to backup restore disallow_concurrency test --- .../test_disallow_concurrency.py | 102 +++++++++++++----- 1 file changed, 73 insertions(+), 29 deletions(-) diff --git a/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py b/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py index a863a6e2047..af1b2656227 100644 --- a/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py +++ b/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py @@ -133,21 +133,31 @@ def test_concurrent_backups_on_same_node(): ) assert status in ["CREATING_BACKUP", "BACKUP_CREATED"] - try: - error = nodes[0].query_and_get_error( - f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}" - ) - except Exception as e: + result, error = nodes[0].query_and_get_answer_with_error( + f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}" + ) + + if not error: status = ( nodes[0] .query(f"SELECT status FROM system.backups WHERE id == '{id}'") .rstrip("\n") ) # It is possible that the second backup was picked up first, and then the async backup - if status == "CREATING_BACKUP" or status == "BACKUP_FAILED": + if status == "BACKUP_FAILED": + return + elif status == "CREATING_BACKUP": + assert_eq_with_retry( + nodes[0], + f"SELECT status FROM system.backups WHERE id = '{id}'", + "BACKUP_FAILED", + sleep_time=2, + retry_count=50, + ) return else: - raise e + raise Exception("Concurrent backups both passed, when one is expected to fail") + expected_errors = [ "Concurrent backups not supported", f"Backup {backup_name} already exists", @@ -191,20 +201,31 @@ def test_concurrent_backups_on_different_nodes(): ) assert status in ["CREATING_BACKUP", "BACKUP_CREATED"] - try: - error = nodes[0].query_and_get_error( + result, error = nodes[0].query_and_get_answer_with_error( f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}" - ) - except Exception as e: + ) + + if not error: status = ( nodes[1] .query(f"SELECT status FROM system.backups WHERE id == '{id}'") .rstrip("\n") ) - if status == "CREATING_BACKUP" or status == "BACKUP_FAILED": + # It is possible that the second backup was picked up first, and then the async backup + if status == "BACKUP_FAILED": + return + elif status == "CREATING_BACKUP": + assert_eq_with_retry( + nodes[1], + f"SELECT status FROM system.backups WHERE id = '{id}'", + "BACKUP_FAILED", + sleep_time=2, + retry_count=50, + ) return else: - raise e + raise Exception("Concurrent backups both passed, when one is expected to fail") + expected_errors = [ "Concurrent backups not supported", f"Backup {backup_name} already exists", @@ -247,20 +268,32 @@ def test_concurrent_restores_on_same_node(): ) assert status in ["RESTORING", "RESTORED"] - try: - error = nodes[0].query_and_get_error( - f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}" - ) - except Exception as e: + result, error = nodes[0].query_and_get_answer_with_error( + f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}" + ) + + if not error: status = ( nodes[0] - .query(f"SELECT status FROM system.backups WHERE id == '{id}'") + .query(f"SELECT status FROM system.backups WHERE id == '{restore_id}'") .rstrip("\n") ) - if status == "RESTORING" or status == "RESTORE_FAILED": + # It is possible that the second backup was picked up first, and then the async backup + if status == "RESTORE_FAILED": + return + elif status == "RESTORING": + assert_eq_with_retry( + nodes[0], + f"SELECT status FROM system.backups WHERE id == '{restore_id}'", + "RESTORE_FAILED", + sleep_time=2, + retry_count=50, + ) return else: - raise e + raise Exception("Concurrent restores both passed, when one is expected to fail") + + expected_errors = [ "Concurrent restores not supported", "Cannot restore the table default.tbl because it already contains some data", @@ -303,20 +336,31 @@ def test_concurrent_restores_on_different_node(): ) assert status in ["RESTORING", "RESTORED"] - try: - error = nodes[1].query_and_get_error( - f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}" - ) - except Exception as e: + result, error = nodes[1].query_and_get_answer_with_error( + f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}" + ) + + if not error: status = ( nodes[0] - .query(f"SELECT status FROM system.backups WHERE id == '{id}'") + .query(f"SELECT status FROM system.backups WHERE id == '{restore_id}'") .rstrip("\n") ) - if status == "RESTORING" or status == "RESTORE_FAILED": + # It is possible that the second backup was picked up first, and then the async backup + if status == "RESTORE_FAILED": + return + elif status == "RESTORING": + assert_eq_with_retry( + nodes[0], + f"SELECT status FROM system.backups WHERE id == '{restore_id}'", + "RESTORE_FAILED", + sleep_time=2, + retry_count=50, + ) return else: - raise e + raise Exception("Concurrent restores both passed, when one is expected to fail") + expected_errors = [ "Concurrent restores not supported", "Cannot restore the table default.tbl because it already contains some data", From b95745d916330abb3306016d512b40d3d24616dd Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Thu, 27 Jul 2023 17:52:44 +0000 Subject: [PATCH 450/478] fix: check positional options --- src/Client/ClientBase.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 496fc8fce0a..06dabf96c28 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -2624,6 +2624,10 @@ void ClientBase::parseAndCheckOptions(OptionsDescription & options_description, throw Exception(ErrorCodes::UNRECOGNIZED_ARGUMENTS, "Unrecognized option '{}'", unrecognized_options[0]); } + /// Check positional options. + if (std::ranges::count_if(parsed.options, [](const auto & op){ return !op.unregistered && op.string_key.empty() && !op.original_tokens[0].starts_with("--"); }) > 1) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Positional options are not supported."); + po::store(parsed, options); } From 9340f02d26ae7f170611ea9b19a11e720b41b765 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 27 Jul 2023 18:33:07 +0000 Subject: [PATCH 451/478] Silence spell check --- .../aspell-ignore/en/aspell-dict.txt | 28 ++++++++++--------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index a314815e2c4..80aeadd8738 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -211,7 +211,6 @@ Decrypted Deduplicate Deduplication DelayedInserts -delim DeliveryTag DeltaLake Denormalize @@ -699,6 +698,8 @@ PyCharm QEMU QTCreator Quantile +QueryCacheBytes +QueryCacheEntries QueryCacheHits QueryCacheMisses QueryPreempted @@ -761,9 +762,9 @@ RoaringBitmap RocksDB Rollup RowBinary +RowBinaryWithDefaults RowBinaryWithNames RowBinaryWithNamesAndTypes -RowBinaryWithDefaults Runtime SATA SELECTs @@ -776,7 +777,6 @@ SMALLINT SPNEGO SQEs SQLAlchemy -SquaredDistance SQLConsoleDetail SQLInsert SQLSTATE @@ -811,6 +811,7 @@ Smirnov'test Soundex SpanKind Spearman's +SquaredDistance StartTLS StartTime StartupSystemTables @@ -838,8 +839,6 @@ Subexpression Submodules Subqueries Substrings -substringIndex -substringIndexUTF SummingMergeTree SuperSet Superset @@ -1272,6 +1271,7 @@ cryptographic csv csvwithnames csvwithnamesandtypes +curdate currentDatabase currentProfiles currentRoles @@ -1331,6 +1331,7 @@ defaultProfiles defaultRoles defaultValueOfArgumentType defaultValueOfTypeName +delim deltaLake deltaSum deltaSumTimestamp @@ -1542,13 +1543,13 @@ hadoop halfMD halfday hardlinks +hasAll +hasAny +hasColumnInTable hasSubsequence hasSubsequenceCaseInsensitive hasSubsequenceCaseInsensitiveUTF hasSubsequenceUTF -hasAll -hasAny -hasColumnInTable hasSubstr hasToken hasTokenCaseInsensitive @@ -1590,10 +1591,10 @@ incrementing indexHint indexOf infi -initialQueryID -initializeAggregation initcap initcapUTF +initialQueryID +initializeAggregation injective innogames inodes @@ -2131,9 +2132,9 @@ routineley rowNumberInAllBlocks rowNumberInBlock rowbinary +rowbinarywithdefaults rowbinarywithnames rowbinarywithnamesandtypes -rowbinarywithdefaults rsync rsyslog runnable @@ -2185,8 +2186,8 @@ sleepEachRow snowflakeToDateTime socketcache soundex -sparkbar sparkBar +sparkbar sparsehash speedscope splitByChar @@ -2256,6 +2257,8 @@ subreddits subseconds subsequence substring +substringIndex +substringIndexUTF substringUTF substrings subtitiles @@ -2556,4 +2559,3 @@ znode znodes zookeeperSessionUptime zstd -curdate From 5942c80faed38febea5394526b5e5c670b03bd4d Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Thu, 27 Jul 2023 19:11:44 +0000 Subject: [PATCH 452/478] fix test: '--option' now is allowed after terminating '--' --- .../02096_bad_options_in_client_and_local.reference | 2 -- .../0_stateless/02096_bad_options_in_client_and_local.sh | 4 ---- 2 files changed, 6 deletions(-) diff --git a/tests/queries/0_stateless/02096_bad_options_in_client_and_local.reference b/tests/queries/0_stateless/02096_bad_options_in_client_and_local.reference index c4c0901b9df..432299e9556 100644 --- a/tests/queries/0_stateless/02096_bad_options_in_client_and_local.reference +++ b/tests/queries/0_stateless/02096_bad_options_in_client_and_local.reference @@ -8,5 +8,3 @@ OK OK OK OK -OK -OK diff --git a/tests/queries/0_stateless/02096_bad_options_in_client_and_local.sh b/tests/queries/0_stateless/02096_bad_options_in_client_and_local.sh index d37155e8506..753d56fb424 100755 --- a/tests/queries/0_stateless/02096_bad_options_in_client_and_local.sh +++ b/tests/queries/0_stateless/02096_bad_options_in_client_and_local.sh @@ -9,8 +9,6 @@ ${CLICKHOUSE_LOCAL} --unknown-option 2>&1 | grep -F -q "UNRECOGNIZED_ARGUMENTS" ${CLICKHOUSE_LOCAL} --unknown-option-1 --unknown-option-2 2>&1 | grep -F -q "UNRECOGNIZED_ARGUMENTS" && echo "OK" || echo "FAIL" -${CLICKHOUSE_LOCAL} -- --unknown-option 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo "OK" || echo "FAIL" - ${CLICKHOUSE_LOCAL} -- 'positional-argument' 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo "OK" || echo "FAIL" ${CLICKHOUSE_LOCAL} -f 2>&1 | grep -F -q "Bad arguments" && echo "OK" || echo "FAIL" @@ -22,8 +20,6 @@ ${CLICKHOUSE_CLIENT} --unknown-option 2>&1 | grep -F -q "UNRECOGNIZED_ARGUMENTS" ${CLICKHOUSE_CLIENT} --unknown-option-1 --unknown-option-2 2>&1 | grep -F -q "UNRECOGNIZED_ARGUMENTS" && echo "OK" || echo "FAIL" -${CLICKHOUSE_CLIENT} -- --unknown-option 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo "OK" || echo "FAIL" - ${CLICKHOUSE_CLIENT} -- 'positional-argument' 2>&1 | grep -F -q "BAD_ARGUMENTS" && echo "OK" || echo "FAIL" ${CLICKHOUSE_CLIENT} --j 2>&1 | grep -F -q "Bad arguments" && echo "OK" || echo "FAIL" From dba8b445bd37b2fb9fb4983e0a3f740649dcbb5b Mon Sep 17 00:00:00 2001 From: Jai Jhala Date: Thu, 27 Jul 2023 12:32:53 -0700 Subject: [PATCH 453/478] Update default output_format_arrow_compression.md Updates the default parameter of output_format_arrow_compression_method from "none" to "lz4_frame". --- docs/en/operations/settings/settings-formats.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md index ee8e0d547b8..fb10ff7f61b 100644 --- a/docs/en/operations/settings/settings-formats.md +++ b/docs/en/operations/settings/settings-formats.md @@ -1164,7 +1164,7 @@ Enabled by default. Compression method used in output Arrow format. Supported codecs: `lz4_frame`, `zstd`, `none` (uncompressed) -Default value: `none`. +Default value: `lz4_frame`. ## ORC format settings {#orc-format-settings} From 7d8dc92ed0522e309760037720f6fd8fb3f2542d Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 27 Jul 2023 21:07:54 +0000 Subject: [PATCH 454/478] Automatic style fix --- .../test_disallow_concurrency.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py b/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py index af1b2656227..5c3f06a9d9d 100644 --- a/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py +++ b/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py @@ -156,7 +156,9 @@ def test_concurrent_backups_on_same_node(): ) return else: - raise Exception("Concurrent backups both passed, when one is expected to fail") + raise Exception( + "Concurrent backups both passed, when one is expected to fail" + ) expected_errors = [ "Concurrent backups not supported", @@ -202,7 +204,7 @@ def test_concurrent_backups_on_different_nodes(): assert status in ["CREATING_BACKUP", "BACKUP_CREATED"] result, error = nodes[0].query_and_get_answer_with_error( - f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}" + f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}" ) if not error: @@ -224,7 +226,9 @@ def test_concurrent_backups_on_different_nodes(): ) return else: - raise Exception("Concurrent backups both passed, when one is expected to fail") + raise Exception( + "Concurrent backups both passed, when one is expected to fail" + ) expected_errors = [ "Concurrent backups not supported", @@ -291,8 +295,9 @@ def test_concurrent_restores_on_same_node(): ) return else: - raise Exception("Concurrent restores both passed, when one is expected to fail") - + raise Exception( + "Concurrent restores both passed, when one is expected to fail" + ) expected_errors = [ "Concurrent restores not supported", @@ -359,7 +364,9 @@ def test_concurrent_restores_on_different_node(): ) return else: - raise Exception("Concurrent restores both passed, when one is expected to fail") + raise Exception( + "Concurrent restores both passed, when one is expected to fail" + ) expected_errors = [ "Concurrent restores not supported", From 18c1fd6f08cc2be964ed15604c26a70d7d168561 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Thu, 27 Jul 2023 21:24:39 +0000 Subject: [PATCH 455/478] Refactor InDepthQueryTreeVisitorWithContext --- src/Analyzer/InDepthQueryTreeVisitor.h | 158 +++--------------- ...egateFunctionsArithmericOperationsPass.cpp | 8 +- src/Analyzer/Passes/ArrayExistsToHasPass.cpp | 2 +- src/Analyzer/Passes/AutoFinalOnQueryPass.cpp | 2 +- .../Passes/ConvertOrLikeChainPass.cpp | 2 +- src/Analyzer/Passes/ConvertQueryToCNFPass.cpp | 2 +- src/Analyzer/Passes/CountDistinctPass.cpp | 34 ++-- src/Analyzer/Passes/CrossToInnerJoinPass.cpp | 2 +- .../Passes/FunctionToSubcolumnsPass.cpp | 2 +- src/Analyzer/Passes/FuseFunctionsPass.cpp | 2 +- .../Passes/GroupingFunctionsResolvePass.cpp | 2 +- src/Analyzer/Passes/IfChainToMultiIfPass.cpp | 2 +- .../Passes/IfTransformStringsToEnumPass.cpp | 2 +- .../Passes/LogicalExpressionOptimizerPass.cpp | 2 +- src/Analyzer/Passes/MultiIfToIfPass.cpp | 2 +- .../Passes/NormalizeCountVariantsPass.cpp | 2 +- .../OptimizeGroupByFunctionKeysPass.cpp | 2 +- ...ptimizeRedundantFunctionsInOrderByPass.cpp | 2 +- .../RewriteAggregateFunctionWithIfPass.cpp | 2 +- .../Passes/ShardNumColumnToFunctionPass.cpp | 2 +- src/Analyzer/Passes/SumIfToCountIfPass.cpp | 2 +- .../UniqInjectiveFunctionsEliminationPass.cpp | 2 +- src/Storages/buildQueryTreeForShard.cpp | 2 +- 23 files changed, 55 insertions(+), 185 deletions(-) diff --git a/src/Analyzer/InDepthQueryTreeVisitor.h b/src/Analyzer/InDepthQueryTreeVisitor.h index be3a760d4e6..59ee57996c4 100644 --- a/src/Analyzer/InDepthQueryTreeVisitor.h +++ b/src/Analyzer/InDepthQueryTreeVisitor.h @@ -91,26 +91,25 @@ private: template using ConstInDepthQueryTreeVisitor = InDepthQueryTreeVisitor; -/** Same as InDepthQueryTreeVisitor and additionally keeps track of current scope context. +/** Same as InDepthQueryTreeVisitor (but has a different interface) and additionally keeps track of current scope context. * This can be useful if your visitor has special logic that depends on current scope context. + * + * To specify behavior of the visitor you can implement following methods in derived class: + * 1. needChildVisit – This methods allows to skip subtree. + * 2. enterImpl – This method is called before children are processed. + * 3. leaveImpl – This method is called after children are processed. */ template class InDepthQueryTreeVisitorWithContext { public: - using VisitQueryTreeNodeType = std::conditional_t; + using VisitQueryTreeNodeType = QueryTreeNodePtr; explicit InDepthQueryTreeVisitorWithContext(ContextPtr context, size_t initial_subquery_depth = 0) : current_context(std::move(context)) , subquery_depth(initial_subquery_depth) {} - /// Return true if visitor should traverse tree top to bottom, false otherwise - bool shouldTraverseTopToBottom() const - { - return true; - } - /// Return true if visitor should visit child, false otherwise bool needChildVisit(VisitQueryTreeNodeType & parent [[maybe_unused]], VisitQueryTreeNodeType & child [[maybe_unused]]) { @@ -147,18 +146,16 @@ public: ++subquery_depth; - bool traverse_top_to_bottom = getDerived().shouldTraverseTopToBottom(); - if (!traverse_top_to_bottom) - visitChildren(query_tree_node); + getDerived().enterImpl(query_tree_node); - getDerived().visitImpl(query_tree_node); - - if (traverse_top_to_bottom) - visitChildren(query_tree_node); + visitChildren(query_tree_node); getDerived().leaveImpl(query_tree_node); } + void enterImpl(VisitQueryTreeNodeType & node [[maybe_unused]]) + {} + void leaveImpl(VisitQueryTreeNodeType & node [[maybe_unused]]) {} private: @@ -172,85 +169,15 @@ private: return *static_cast(this); } - void visitChildren(VisitQueryTreeNodeType & expression) + bool shouldSkipSubtree( + VisitQueryTreeNodeType & parent, + VisitQueryTreeNodeType & child, + size_t subtree_index) { - for (auto & child : expression->getChildren()) - { - if (!child) - continue; + bool need_visit_child = getDerived().needChildVisit(parent, child); + if (!need_visit_child) + return true; - bool need_visit_child = getDerived().needChildVisit(expression, child); - - if (need_visit_child) - visit(child); - } - } - - ContextPtr current_context; - size_t subquery_depth = 0; -}; - -template -using ConstInDepthQueryTreeVisitorWithContext = InDepthQueryTreeVisitorWithContext; - -/** Visitor that use another visitor to visit node only if condition for visiting node is true. - * For example, your visitor need to visit only query tree nodes or union nodes. - * - * Condition interface: - * struct Condition - * { - * bool operator()(VisitQueryTreeNodeType & node) - * { - * return shouldNestedVisitorVisitNode(node); - * } - * } - */ -template -class InDepthQueryTreeConditionalVisitor : public InDepthQueryTreeVisitor, const_visitor> -{ -public: - using Base = InDepthQueryTreeVisitor, const_visitor>; - using VisitQueryTreeNodeType = typename Base::VisitQueryTreeNodeType; - - explicit InDepthQueryTreeConditionalVisitor(Visitor & visitor_, Condition & condition_) - : visitor(visitor_) - , condition(condition_) - { - } - - bool shouldTraverseTopToBottom() const - { - return visitor.shouldTraverseTopToBottom(); - } - - void visitImpl(VisitQueryTreeNodeType & query_tree_node) - { - if (condition(query_tree_node)) - visitor.visit(query_tree_node); - } - - Visitor & visitor; - Condition & condition; -}; - -template -using ConstInDepthQueryTreeConditionalVisitor = InDepthQueryTreeConditionalVisitor; - -template -class QueryTreeVisitor -{ -public: - explicit QueryTreeVisitor(ContextPtr context_) - : current_context(std::move(context_)) - {} - - bool needApply(QueryTreeNodePtr & node) - { - return getImpl().needApply(node); - } - - bool shouldSkipSubtree(QueryTreeNodePtr & parent, size_t subtree_index) - { if (auto * table_function_node = parent->as()) { const auto & unresolved_indexes = table_function_node->getUnresolvedArgumentIndexes(); @@ -259,58 +186,19 @@ public: return false; } - void visit(QueryTreeNodePtr & node) - { - auto current_scope_context_ptr = current_context; - SCOPE_EXIT( - current_context = std::move(current_scope_context_ptr); - ); - - if (auto * query_node = node->template as()) - current_context = query_node->getContext(); - else if (auto * union_node = node->template as()) - current_context = union_node->getContext(); - - if (!TOP_TO_BOTTOM) - visitChildren(node); - - if (needApply(node)) - getImpl().apply(node); - - if (TOP_TO_BOTTOM) - visitChildren(node); - } - - const ContextPtr & getContext() const - { - return current_context; - } - - const Settings & getSettings() const - { - return current_context->getSettingsRef(); - } -private: - - Impl & getImpl() - { - return *static_cast(this); - } - - void visitChildren(QueryTreeNodePtr & node) + void visitChildren(VisitQueryTreeNodeType & expression) { size_t index = 0; - for (auto & child : node->getChildren()) + for (auto & child : expression->getChildren()) { - if (child && !shouldSkipSubtree(node, index)) + if (child && !shouldSkipSubtree(expression, child, index)) visit(child); ++index; } } - static constexpr bool TOP_TO_BOTTOM = Impl::TOP_TO_BOTTOM; - ContextPtr current_context; + size_t subquery_depth = 0; }; } diff --git a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp index 1476a66c892..3615a632374 100644 --- a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp +++ b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp @@ -51,13 +51,7 @@ public: using Base = InDepthQueryTreeVisitorWithContext; using Base::Base; - /// Traverse tree bottom to top - static bool shouldTraverseTopToBottom() - { - return false; - } - - void visitImpl(QueryTreeNodePtr & node) + void leaveImpl(QueryTreeNodePtr & node) { if (!getSettings().optimize_arithmetic_operations_in_aggregate_functions) return; diff --git a/src/Analyzer/Passes/ArrayExistsToHasPass.cpp b/src/Analyzer/Passes/ArrayExistsToHasPass.cpp index c0f958588f1..a95bcea4fac 100644 --- a/src/Analyzer/Passes/ArrayExistsToHasPass.cpp +++ b/src/Analyzer/Passes/ArrayExistsToHasPass.cpp @@ -22,7 +22,7 @@ public: using Base = InDepthQueryTreeVisitorWithContext; using Base::Base; - void visitImpl(QueryTreeNodePtr & node) + void enterImpl(QueryTreeNodePtr & node) { if (!getSettings().optimize_rewrite_array_exists_to_has) return; diff --git a/src/Analyzer/Passes/AutoFinalOnQueryPass.cpp b/src/Analyzer/Passes/AutoFinalOnQueryPass.cpp index 15326ca1dc8..2c89ec9dc20 100644 --- a/src/Analyzer/Passes/AutoFinalOnQueryPass.cpp +++ b/src/Analyzer/Passes/AutoFinalOnQueryPass.cpp @@ -20,7 +20,7 @@ public: using Base = InDepthQueryTreeVisitorWithContext; using Base::Base; - void visitImpl(QueryTreeNodePtr & node) + void enterImpl(QueryTreeNodePtr & node) { if (!getSettings().final) return; diff --git a/src/Analyzer/Passes/ConvertOrLikeChainPass.cpp b/src/Analyzer/Passes/ConvertOrLikeChainPass.cpp index 7d7362fb742..1fada88a21c 100644 --- a/src/Analyzer/Passes/ConvertOrLikeChainPass.cpp +++ b/src/Analyzer/Passes/ConvertOrLikeChainPass.cpp @@ -50,7 +50,7 @@ public: && settings.max_hyperscan_regexp_total_length == 0; } - void visitImpl(QueryTreeNodePtr & node) + void enterImpl(QueryTreeNodePtr & node) { auto * function_node = node->as(); if (!function_node || function_node->getFunctionName() != "or") diff --git a/src/Analyzer/Passes/ConvertQueryToCNFPass.cpp b/src/Analyzer/Passes/ConvertQueryToCNFPass.cpp index 4d32c96b845..724448ad742 100644 --- a/src/Analyzer/Passes/ConvertQueryToCNFPass.cpp +++ b/src/Analyzer/Passes/ConvertQueryToCNFPass.cpp @@ -688,7 +688,7 @@ public: using Base = InDepthQueryTreeVisitorWithContext; using Base::Base; - void visitImpl(QueryTreeNodePtr & node) + void enterImpl(QueryTreeNodePtr & node) { auto * query_node = node->as(); if (!query_node) diff --git a/src/Analyzer/Passes/CountDistinctPass.cpp b/src/Analyzer/Passes/CountDistinctPass.cpp index 38f7d07d052..dc58747221e 100644 --- a/src/Analyzer/Passes/CountDistinctPass.cpp +++ b/src/Analyzer/Passes/CountDistinctPass.cpp @@ -16,17 +16,16 @@ namespace DB namespace { -class CountDistinctVisitor : public QueryTreeVisitor +class CountDistinctVisitor : public InDepthQueryTreeVisitorWithContext { public: - using QueryTreeVisitor::QueryTreeVisitor; + using Base = InDepthQueryTreeVisitorWithContext; + using Base::Base; - static constexpr bool TOP_TO_BOTTOM = true; - - bool needApply(QueryTreeNodePtr & node) + void enterImpl(QueryTreeNodePtr & node) { if (!getSettings().count_distinct_optimization) - return false; + return; auto * query_node = node->as(); @@ -34,43 +33,32 @@ public: if (!query_node || (query_node->hasWith() || query_node->hasPrewhere() || query_node->hasWhere() || query_node->hasGroupBy() || query_node->hasHaving() || query_node->hasWindow() || query_node->hasOrderBy() || query_node->hasLimitByLimit() || query_node->hasLimitByOffset() || query_node->hasLimitBy() || query_node->hasLimit() || query_node->hasOffset())) - return false; + return; /// Check that query has only single table expression auto join_tree_node_type = query_node->getJoinTree()->getNodeType(); if (join_tree_node_type == QueryTreeNodeType::JOIN || join_tree_node_type == QueryTreeNodeType::ARRAY_JOIN) - return false; + return; /// Check that query has only single node in projection auto & projection_nodes = query_node->getProjection().getNodes(); if (projection_nodes.size() != 1) - return false; + return; /// Check that query single projection node is `countDistinct` function auto & projection_node = projection_nodes[0]; auto * function_node = projection_node->as(); if (!function_node) - return false; + return; auto lower_function_name = Poco::toLower(function_node->getFunctionName()); if (lower_function_name != "countdistinct" && lower_function_name != "uniqexact") - return false; + return; /// Check that `countDistinct` function has single COLUMN argument auto & count_distinct_arguments_nodes = function_node->getArguments().getNodes(); if (count_distinct_arguments_nodes.size() != 1 && count_distinct_arguments_nodes[0]->getNodeType() != QueryTreeNodeType::COLUMN) - return false; - - return true; - } - - void apply(QueryTreeNodePtr & node) - { - auto * query_node = node->as(); - auto & projection_nodes = query_node->getProjection().getNodes(); - auto * function_node = projection_nodes[0]->as(); - - auto & count_distinct_arguments_nodes = function_node->getArguments().getNodes(); + return; auto & count_distinct_argument_column = count_distinct_arguments_nodes[0]; auto & count_distinct_argument_column_typed = count_distinct_argument_column->as(); diff --git a/src/Analyzer/Passes/CrossToInnerJoinPass.cpp b/src/Analyzer/Passes/CrossToInnerJoinPass.cpp index d4877d23f28..b5ece1a4c49 100644 --- a/src/Analyzer/Passes/CrossToInnerJoinPass.cpp +++ b/src/Analyzer/Passes/CrossToInnerJoinPass.cpp @@ -193,7 +193,7 @@ public: return true; } - void visitImpl(QueryTreeNodePtr & node) + void enterImpl(QueryTreeNodePtr & node) { if (!isEnabled()) return; diff --git a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp index 696483862e0..cd635f87e0e 100644 --- a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp +++ b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp @@ -29,7 +29,7 @@ public: using Base = InDepthQueryTreeVisitorWithContext; using Base::Base; - void visitImpl(QueryTreeNodePtr & node) const + void enterImpl(QueryTreeNodePtr & node) const { if (!getSettings().optimize_functions_to_subcolumns) return; diff --git a/src/Analyzer/Passes/FuseFunctionsPass.cpp b/src/Analyzer/Passes/FuseFunctionsPass.cpp index 14082697955..2cb7afa4ad6 100644 --- a/src/Analyzer/Passes/FuseFunctionsPass.cpp +++ b/src/Analyzer/Passes/FuseFunctionsPass.cpp @@ -37,7 +37,7 @@ public: , names_to_collect(names_to_collect_) {} - void visitImpl(QueryTreeNodePtr & node) + void enterImpl(QueryTreeNodePtr & node) { if (!getSettings().optimize_syntax_fuse_functions) return; diff --git a/src/Analyzer/Passes/GroupingFunctionsResolvePass.cpp b/src/Analyzer/Passes/GroupingFunctionsResolvePass.cpp index 0cf5310a3ad..577bca8d1ae 100644 --- a/src/Analyzer/Passes/GroupingFunctionsResolvePass.cpp +++ b/src/Analyzer/Passes/GroupingFunctionsResolvePass.cpp @@ -46,7 +46,7 @@ public: { } - void visitImpl(const QueryTreeNodePtr & node) + void enterImpl(const QueryTreeNodePtr & node) { auto * function_node = node->as(); if (!function_node || function_node->getFunctionName() != "grouping") diff --git a/src/Analyzer/Passes/IfChainToMultiIfPass.cpp b/src/Analyzer/Passes/IfChainToMultiIfPass.cpp index 1f97e012331..b0018d474d5 100644 --- a/src/Analyzer/Passes/IfChainToMultiIfPass.cpp +++ b/src/Analyzer/Passes/IfChainToMultiIfPass.cpp @@ -23,7 +23,7 @@ public: , multi_if_function_ptr(std::move(multi_if_function_ptr_)) {} - void visitImpl(QueryTreeNodePtr & node) + void enterImpl(QueryTreeNodePtr & node) { if (!getSettings().optimize_if_chain_to_multiif) return; diff --git a/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp b/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp index 562aff4cf05..901867b8889 100644 --- a/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp +++ b/src/Analyzer/Passes/IfTransformStringsToEnumPass.cpp @@ -113,7 +113,7 @@ public: using Base = InDepthQueryTreeVisitorWithContext; using Base::Base; - void visitImpl(QueryTreeNodePtr & node) + void enterImpl(QueryTreeNodePtr & node) { if (!getSettings().optimize_if_transform_strings_to_enum) return; diff --git a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp index 13f8025f5ea..46056aeaf6f 100644 --- a/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp +++ b/src/Analyzer/Passes/LogicalExpressionOptimizerPass.cpp @@ -19,7 +19,7 @@ public: : Base(std::move(context)) {} - void visitImpl(QueryTreeNodePtr & node) + void enterImpl(QueryTreeNodePtr & node) { auto * function_node = node->as(); diff --git a/src/Analyzer/Passes/MultiIfToIfPass.cpp b/src/Analyzer/Passes/MultiIfToIfPass.cpp index 4672351bcfb..85dd33af8bb 100644 --- a/src/Analyzer/Passes/MultiIfToIfPass.cpp +++ b/src/Analyzer/Passes/MultiIfToIfPass.cpp @@ -21,7 +21,7 @@ public: , if_function_ptr(std::move(if_function_ptr_)) {} - void visitImpl(QueryTreeNodePtr & node) + void enterImpl(QueryTreeNodePtr & node) { if (!getSettings().optimize_multiif_to_if) return; diff --git a/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp b/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp index d36be98751c..c85b863a203 100644 --- a/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp +++ b/src/Analyzer/Passes/NormalizeCountVariantsPass.cpp @@ -20,7 +20,7 @@ public: using Base = InDepthQueryTreeVisitorWithContext; using Base::Base; - void visitImpl(QueryTreeNodePtr & node) + void enterImpl(QueryTreeNodePtr & node) { if (!getSettings().optimize_normalize_count_variants) return; diff --git a/src/Analyzer/Passes/OptimizeGroupByFunctionKeysPass.cpp b/src/Analyzer/Passes/OptimizeGroupByFunctionKeysPass.cpp index 5ed52f1210b..2e3f207fdeb 100644 --- a/src/Analyzer/Passes/OptimizeGroupByFunctionKeysPass.cpp +++ b/src/Analyzer/Passes/OptimizeGroupByFunctionKeysPass.cpp @@ -26,7 +26,7 @@ public: return !child->as(); } - void visitImpl(QueryTreeNodePtr & node) + void enterImpl(QueryTreeNodePtr & node) { if (!getSettings().optimize_group_by_function_keys) return; diff --git a/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp index c6d312d0ecf..875d0c8b5fb 100644 --- a/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp +++ b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp @@ -28,7 +28,7 @@ public: return true; } - void visitImpl(QueryTreeNodePtr & node) + void enterImpl(QueryTreeNodePtr & node) { if (!getSettings().optimize_redundant_functions_in_order_by) return; diff --git a/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp b/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp index de264948d4c..38f2fbfa274 100644 --- a/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp +++ b/src/Analyzer/Passes/RewriteAggregateFunctionWithIfPass.cpp @@ -26,7 +26,7 @@ public: using Base = InDepthQueryTreeVisitorWithContext; using Base::Base; - void visitImpl(QueryTreeNodePtr & node) + void enterImpl(QueryTreeNodePtr & node) { if (!getSettings().optimize_rewrite_aggregate_function_with_if) return; diff --git a/src/Analyzer/Passes/ShardNumColumnToFunctionPass.cpp b/src/Analyzer/Passes/ShardNumColumnToFunctionPass.cpp index b28816e8ff3..52c30b7b35d 100644 --- a/src/Analyzer/Passes/ShardNumColumnToFunctionPass.cpp +++ b/src/Analyzer/Passes/ShardNumColumnToFunctionPass.cpp @@ -24,7 +24,7 @@ public: using Base = InDepthQueryTreeVisitorWithContext; using Base::Base; - void visitImpl(QueryTreeNodePtr & node) const + void enterImpl(QueryTreeNodePtr & node) const { auto * column_node = node->as(); if (!column_node) diff --git a/src/Analyzer/Passes/SumIfToCountIfPass.cpp b/src/Analyzer/Passes/SumIfToCountIfPass.cpp index d55af278152..cff9ba1111c 100644 --- a/src/Analyzer/Passes/SumIfToCountIfPass.cpp +++ b/src/Analyzer/Passes/SumIfToCountIfPass.cpp @@ -26,7 +26,7 @@ public: using Base = InDepthQueryTreeVisitorWithContext; using Base::Base; - void visitImpl(QueryTreeNodePtr & node) + void enterImpl(QueryTreeNodePtr & node) { if (!getSettings().optimize_rewrite_sum_if_to_count_if) return; diff --git a/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp b/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp index 5c4484457e8..179bd1c38e4 100644 --- a/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp +++ b/src/Analyzer/Passes/UniqInjectiveFunctionsEliminationPass.cpp @@ -31,7 +31,7 @@ public: using Base = InDepthQueryTreeVisitorWithContext; using Base::Base; - void visitImpl(QueryTreeNodePtr & node) + void enterImpl(QueryTreeNodePtr & node) { if (!getSettings().optimize_injective_functions_inside_uniq) return; diff --git a/src/Storages/buildQueryTreeForShard.cpp b/src/Storages/buildQueryTreeForShard.cpp index 1ee7d747fcc..9929b5bb39b 100644 --- a/src/Storages/buildQueryTreeForShard.cpp +++ b/src/Storages/buildQueryTreeForShard.cpp @@ -130,7 +130,7 @@ public: return true; } - void visitImpl(QueryTreeNodePtr & node) + void enterImpl(QueryTreeNodePtr & node) { auto * function_node = node->as(); auto * join_node = node->as(); From 6573ba537819ce03dd644ff02bdf7341bcc26d58 Mon Sep 17 00:00:00 2001 From: pufit Date: Thu, 27 Jul 2023 19:37:28 -0400 Subject: [PATCH 456/478] Temporary returning metadata_cache.xml into tests config --- tests/config/install.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/config/install.sh b/tests/config/install.sh index 9aaadbc74a5..50f2627d37c 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -34,6 +34,7 @@ ln -sf $SRC_PATH/config.d/keeper_port.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/logging_no_rotate.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/merge_tree.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/lost_forever_check.xml $DEST_SERVER_PATH/config.d/ +ln -sf $SRC_PATH/config.d/metadata_cache.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/tcp_with_proxy.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/prometheus.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/top_level_domains_lists.xml $DEST_SERVER_PATH/config.d/ From b3351bb547b8753b405d820925f8f4270be6132d Mon Sep 17 00:00:00 2001 From: Alexey Gerasimchuck Date: Fri, 28 Jul 2023 03:36:23 +0000 Subject: [PATCH 457/478] partially fixed 01747_system_session_log_long test --- src/Core/PostgreSQLProtocol.h | 49 +++-- tests/config/users.d/session_log_test.xml | 2 +- .../01747_system_session_log_long.reference | 198 ++++++++++++------ .../01747_system_session_log_long.sh | 119 ++++++----- 4 files changed, 224 insertions(+), 144 deletions(-) rename tests/queries/{bugs => 0_stateless}/01747_system_session_log_long.reference (73%) rename tests/queries/{bugs => 0_stateless}/01747_system_session_log_long.sh (78%) diff --git a/src/Core/PostgreSQLProtocol.h b/src/Core/PostgreSQLProtocol.h index 8c0654b559f..b0d7646a5f7 100644 --- a/src/Core/PostgreSQLProtocol.h +++ b/src/Core/PostgreSQLProtocol.h @@ -805,20 +805,9 @@ protected: const String & user_name, const String & password, Session & session, - Messaging::MessageTransport & mt, const Poco::Net::SocketAddress & address) { - try - { - session.authenticate(user_name, password, address); - } - catch (const Exception &) - { - mt.send( - Messaging::ErrorOrNoticeResponse(Messaging::ErrorOrNoticeResponse::ERROR, "28P01", "Invalid user or password"), - true); - throw; - } + session.authenticate(user_name, password, address); } public: @@ -839,10 +828,10 @@ public: void authenticate( const String & user_name, Session & session, - Messaging::MessageTransport & mt, + [[maybe_unused]] Messaging::MessageTransport & mt, const Poco::Net::SocketAddress & address) override { - return setPassword(user_name, "", session, mt, address); + return setPassword(user_name, "", session, address); } AuthenticationType getType() const override @@ -866,7 +855,7 @@ public: if (type == Messaging::FrontMessageType::PASSWORD_MESSAGE) { std::unique_ptr password = mt.receive(); - return setPassword(user_name, password->password, session, mt, address); + return setPassword(user_name, password->password, session, address); } else throw Exception(ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT, @@ -901,20 +890,30 @@ public: Messaging::MessageTransport & mt, const Poco::Net::SocketAddress & address) { - const AuthenticationType user_auth_type = session.getAuthenticationTypeOrLogInFailure(user_name); - if (type_to_method.find(user_auth_type) != type_to_method.end()) + AuthenticationType user_auth_type; + try { - type_to_method[user_auth_type]->authenticate(user_name, session, mt, address); - mt.send(Messaging::AuthenticationOk(), true); - LOG_DEBUG(log, "Authentication for user {} was successful.", user_name); - return; + user_auth_type = session.getAuthenticationTypeOrLogInFailure(user_name); + if (type_to_method.find(user_auth_type) != type_to_method.end()) + { + type_to_method[user_auth_type]->authenticate(user_name, session, mt, address); + mt.send(Messaging::AuthenticationOk(), true); + LOG_DEBUG(log, "Authentication for user {} was successful.", user_name); + return; + } + } + catch (const Exception&) + { + mt.send(Messaging::ErrorOrNoticeResponse(Messaging::ErrorOrNoticeResponse::ERROR, "28P01", "Invalid user or password"), + true); + + throw; } - mt.send( - Messaging::ErrorOrNoticeResponse(Messaging::ErrorOrNoticeResponse::ERROR, "0A000", "Authentication method is not supported"), - true); + mt.send(Messaging::ErrorOrNoticeResponse(Messaging::ErrorOrNoticeResponse::ERROR, "0A000", "Authentication method is not supported"), + true); - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Authentication type {} is not supported.", user_auth_type); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Authentication method is not supported: {}", user_auth_type); } }; } diff --git a/tests/config/users.d/session_log_test.xml b/tests/config/users.d/session_log_test.xml index daddaa6e4b9..cc2c2c5fcde 100644 --- a/tests/config/users.d/session_log_test.xml +++ b/tests/config/users.d/session_log_test.xml @@ -17,7 +17,7 @@ - + ::1 127.0.0.1 diff --git a/tests/queries/bugs/01747_system_session_log_long.reference b/tests/queries/0_stateless/01747_system_session_log_long.reference similarity index 73% rename from tests/queries/bugs/01747_system_session_log_long.reference rename to tests/queries/0_stateless/01747_system_session_log_long.reference index 9ecf7e05421..e4f0b6f6076 100644 --- a/tests/queries/bugs/01747_system_session_log_long.reference +++ b/tests/queries/0_stateless/01747_system_session_log_long.reference @@ -4,215 +4,291 @@ TCP endpoint TCP 'wrong password' case is skipped for no_password. HTTP endpoint HTTP 'wrong password' case is skipped for no_password. -MySQL endpoint +HTTP endpoint with named session +HTTP 'wrong password' case is skipped for no_password. +MySQL endpoint no_password +Wrong username +Wrong password MySQL 'wrong password' case is skipped for no_password. +PostrgreSQL endpoint +PostgreSQL 'wrong password' case is skipped for no_password. # no_password - No profiles no roles TCP endpoint TCP 'wrong password' case is skipped for no_password. HTTP endpoint HTTP 'wrong password' case is skipped for no_password. -MySQL endpoint +HTTP endpoint with named session +HTTP 'wrong password' case is skipped for no_password. +MySQL endpoint no_password +Wrong username +Wrong password MySQL 'wrong password' case is skipped for no_password. +PostrgreSQL endpoint +PostgreSQL 'wrong password' case is skipped for no_password. # no_password - Two profiles, no roles TCP endpoint TCP 'wrong password' case is skipped for no_password. HTTP endpoint HTTP 'wrong password' case is skipped for no_password. -MySQL endpoint +HTTP endpoint with named session +HTTP 'wrong password' case is skipped for no_password. +MySQL endpoint no_password +Wrong username +Wrong password MySQL 'wrong password' case is skipped for no_password. +PostrgreSQL endpoint +PostgreSQL 'wrong password' case is skipped for no_password. # no_password - Two profiles and two simple roles TCP endpoint TCP 'wrong password' case is skipped for no_password. HTTP endpoint HTTP 'wrong password' case is skipped for no_password. -MySQL endpoint +HTTP endpoint with named session +HTTP 'wrong password' case is skipped for no_password. +MySQL endpoint no_password +Wrong username +Wrong password MySQL 'wrong password' case is skipped for no_password. +PostrgreSQL endpoint +PostgreSQL 'wrong password' case is skipped for no_password. # plaintext_password - No profiles no roles TCP endpoint HTTP endpoint -MySQL endpoint +HTTP endpoint with named session +MySQL endpoint plaintext_password +Wrong username +Wrong password +PostrgreSQL endpoint # plaintext_password - Two profiles, no roles TCP endpoint HTTP endpoint -MySQL endpoint +HTTP endpoint with named session +MySQL endpoint plaintext_password +Wrong username +Wrong password +PostrgreSQL endpoint # plaintext_password - Two profiles and two simple roles TCP endpoint HTTP endpoint -MySQL endpoint +HTTP endpoint with named session +MySQL endpoint plaintext_password +Wrong username +Wrong password +PostrgreSQL endpoint # sha256_password - No profiles no roles TCP endpoint HTTP endpoint -MySQL endpoint +HTTP endpoint with named session +MySQL endpoint sha256_password MySQL 'successful login' case is skipped for sha256_password. +Wrong username +Wrong password +PostrgreSQL endpoint +PostgreSQL tests are skipped for sha256_password # sha256_password - Two profiles, no roles TCP endpoint HTTP endpoint -MySQL endpoint +HTTP endpoint with named session +MySQL endpoint sha256_password MySQL 'successful login' case is skipped for sha256_password. +Wrong username +Wrong password +PostrgreSQL endpoint +PostgreSQL tests are skipped for sha256_password # sha256_password - Two profiles and two simple roles TCP endpoint HTTP endpoint -MySQL endpoint +HTTP endpoint with named session +MySQL endpoint sha256_password MySQL 'successful login' case is skipped for sha256_password. +Wrong username +Wrong password +PostrgreSQL endpoint +PostgreSQL tests are skipped for sha256_password # double_sha1_password - No profiles no roles TCP endpoint HTTP endpoint -MySQL endpoint +HTTP endpoint with named session +MySQL endpoint double_sha1_password +Wrong username +Wrong password +PostrgreSQL endpoint +PostgreSQL tests are skipped for double_sha1_password # double_sha1_password - Two profiles, no roles TCP endpoint HTTP endpoint -MySQL endpoint +HTTP endpoint with named session +MySQL endpoint double_sha1_password +Wrong username +Wrong password +PostrgreSQL endpoint +PostgreSQL tests are skipped for double_sha1_password # double_sha1_password - Two profiles and two simple roles TCP endpoint HTTP endpoint -MySQL endpoint +HTTP endpoint with named session +MySQL endpoint double_sha1_password +Wrong username +Wrong password +PostrgreSQL endpoint +PostgreSQL tests are skipped for double_sha1_password ${BASE_USERNAME}_double_sha1_password_no_profiles_no_roles TCP LoginFailure 1 ${BASE_USERNAME}_double_sha1_password_no_profiles_no_roles TCP LoginSuccess 1 ${BASE_USERNAME}_double_sha1_password_no_profiles_no_roles TCP Logout 1 -${BASE_USERNAME}_double_sha1_password_no_profiles_no_roles HTTP LoginFailure 1 -${BASE_USERNAME}_double_sha1_password_no_profiles_no_roles HTTP LoginSuccess 1 -${BASE_USERNAME}_double_sha1_password_no_profiles_no_roles HTTP Logout 1 +${BASE_USERNAME}_double_sha1_password_no_profiles_no_roles HTTP LoginFailure many +${BASE_USERNAME}_double_sha1_password_no_profiles_no_roles HTTP LoginSuccess many +${BASE_USERNAME}_double_sha1_password_no_profiles_no_roles HTTP Logout many ${BASE_USERNAME}_double_sha1_password_no_profiles_no_roles MySQL LoginFailure many ${BASE_USERNAME}_double_sha1_password_no_profiles_no_roles MySQL LoginSuccess 1 ${BASE_USERNAME}_double_sha1_password_no_profiles_no_roles MySQL Logout 1 ${BASE_USERNAME}_double_sha1_password_two_profiles_no_roles TCP LoginFailure 1 ${BASE_USERNAME}_double_sha1_password_two_profiles_no_roles TCP LoginSuccess 1 ${BASE_USERNAME}_double_sha1_password_two_profiles_no_roles TCP Logout 1 -${BASE_USERNAME}_double_sha1_password_two_profiles_no_roles HTTP LoginFailure 1 -${BASE_USERNAME}_double_sha1_password_two_profiles_no_roles HTTP LoginSuccess 1 -${BASE_USERNAME}_double_sha1_password_two_profiles_no_roles HTTP Logout 1 +${BASE_USERNAME}_double_sha1_password_two_profiles_no_roles HTTP LoginFailure many +${BASE_USERNAME}_double_sha1_password_two_profiles_no_roles HTTP LoginSuccess many +${BASE_USERNAME}_double_sha1_password_two_profiles_no_roles HTTP Logout many ${BASE_USERNAME}_double_sha1_password_two_profiles_no_roles MySQL LoginFailure many ${BASE_USERNAME}_double_sha1_password_two_profiles_no_roles MySQL LoginSuccess 1 ${BASE_USERNAME}_double_sha1_password_two_profiles_no_roles MySQL Logout 1 ${BASE_USERNAME}_double_sha1_password_two_profiles_two_roles TCP LoginFailure 1 ${BASE_USERNAME}_double_sha1_password_two_profiles_two_roles TCP LoginSuccess 1 ${BASE_USERNAME}_double_sha1_password_two_profiles_two_roles TCP Logout 1 -${BASE_USERNAME}_double_sha1_password_two_profiles_two_roles HTTP LoginFailure 1 -${BASE_USERNAME}_double_sha1_password_two_profiles_two_roles HTTP LoginSuccess 1 -${BASE_USERNAME}_double_sha1_password_two_profiles_two_roles HTTP Logout 1 +${BASE_USERNAME}_double_sha1_password_two_profiles_two_roles HTTP LoginFailure many +${BASE_USERNAME}_double_sha1_password_two_profiles_two_roles HTTP LoginSuccess many +${BASE_USERNAME}_double_sha1_password_two_profiles_two_roles HTTP Logout many ${BASE_USERNAME}_double_sha1_password_two_profiles_two_roles MySQL LoginFailure many ${BASE_USERNAME}_double_sha1_password_two_profiles_two_roles MySQL LoginSuccess 1 ${BASE_USERNAME}_double_sha1_password_two_profiles_two_roles MySQL Logout 1 ${BASE_USERNAME}_no_password_no_profiles_no_roles TCP LoginSuccess 1 ${BASE_USERNAME}_no_password_no_profiles_no_roles TCP Logout 1 -${BASE_USERNAME}_no_password_no_profiles_no_roles HTTP LoginSuccess 1 -${BASE_USERNAME}_no_password_no_profiles_no_roles HTTP Logout 1 +${BASE_USERNAME}_no_password_no_profiles_no_roles HTTP LoginSuccess many +${BASE_USERNAME}_no_password_no_profiles_no_roles HTTP Logout many ${BASE_USERNAME}_no_password_no_profiles_no_roles MySQL LoginSuccess 1 ${BASE_USERNAME}_no_password_no_profiles_no_roles MySQL Logout 1 ${BASE_USERNAME}_no_password_two_profiles_no_roles TCP LoginSuccess 1 ${BASE_USERNAME}_no_password_two_profiles_no_roles TCP Logout 1 -${BASE_USERNAME}_no_password_two_profiles_no_roles HTTP LoginSuccess 1 -${BASE_USERNAME}_no_password_two_profiles_no_roles HTTP Logout 1 +${BASE_USERNAME}_no_password_two_profiles_no_roles HTTP LoginSuccess many +${BASE_USERNAME}_no_password_two_profiles_no_roles HTTP Logout many ${BASE_USERNAME}_no_password_two_profiles_no_roles MySQL LoginSuccess 1 ${BASE_USERNAME}_no_password_two_profiles_no_roles MySQL Logout 1 ${BASE_USERNAME}_no_password_two_profiles_two_roles TCP LoginSuccess 1 ${BASE_USERNAME}_no_password_two_profiles_two_roles TCP Logout 1 -${BASE_USERNAME}_no_password_two_profiles_two_roles HTTP LoginSuccess 1 -${BASE_USERNAME}_no_password_two_profiles_two_roles HTTP Logout 1 +${BASE_USERNAME}_no_password_two_profiles_two_roles HTTP LoginSuccess many +${BASE_USERNAME}_no_password_two_profiles_two_roles HTTP Logout many ${BASE_USERNAME}_no_password_two_profiles_two_roles MySQL LoginSuccess 1 ${BASE_USERNAME}_no_password_two_profiles_two_roles MySQL Logout 1 ${BASE_USERNAME}_plaintext_password_no_profiles_no_roles TCP LoginFailure 1 ${BASE_USERNAME}_plaintext_password_no_profiles_no_roles TCP LoginSuccess 1 ${BASE_USERNAME}_plaintext_password_no_profiles_no_roles TCP Logout 1 -${BASE_USERNAME}_plaintext_password_no_profiles_no_roles HTTP LoginFailure 1 -${BASE_USERNAME}_plaintext_password_no_profiles_no_roles HTTP LoginSuccess 1 -${BASE_USERNAME}_plaintext_password_no_profiles_no_roles HTTP Logout 1 +${BASE_USERNAME}_plaintext_password_no_profiles_no_roles HTTP LoginFailure many +${BASE_USERNAME}_plaintext_password_no_profiles_no_roles HTTP LoginSuccess many +${BASE_USERNAME}_plaintext_password_no_profiles_no_roles HTTP Logout many ${BASE_USERNAME}_plaintext_password_no_profiles_no_roles MySQL LoginFailure many ${BASE_USERNAME}_plaintext_password_no_profiles_no_roles MySQL LoginSuccess 1 ${BASE_USERNAME}_plaintext_password_no_profiles_no_roles MySQL Logout 1 +${BASE_USERNAME}_plaintext_password_no_profiles_no_roles PostgreSQL LoginFailure many ${BASE_USERNAME}_plaintext_password_two_profiles_no_roles TCP LoginFailure 1 ${BASE_USERNAME}_plaintext_password_two_profiles_no_roles TCP LoginSuccess 1 ${BASE_USERNAME}_plaintext_password_two_profiles_no_roles TCP Logout 1 -${BASE_USERNAME}_plaintext_password_two_profiles_no_roles HTTP LoginFailure 1 -${BASE_USERNAME}_plaintext_password_two_profiles_no_roles HTTP LoginSuccess 1 -${BASE_USERNAME}_plaintext_password_two_profiles_no_roles HTTP Logout 1 +${BASE_USERNAME}_plaintext_password_two_profiles_no_roles HTTP LoginFailure many +${BASE_USERNAME}_plaintext_password_two_profiles_no_roles HTTP LoginSuccess many +${BASE_USERNAME}_plaintext_password_two_profiles_no_roles HTTP Logout many ${BASE_USERNAME}_plaintext_password_two_profiles_no_roles MySQL LoginFailure many ${BASE_USERNAME}_plaintext_password_two_profiles_no_roles MySQL LoginSuccess 1 ${BASE_USERNAME}_plaintext_password_two_profiles_no_roles MySQL Logout 1 +${BASE_USERNAME}_plaintext_password_two_profiles_no_roles PostgreSQL LoginFailure many ${BASE_USERNAME}_plaintext_password_two_profiles_two_roles TCP LoginFailure 1 ${BASE_USERNAME}_plaintext_password_two_profiles_two_roles TCP LoginSuccess 1 ${BASE_USERNAME}_plaintext_password_two_profiles_two_roles TCP Logout 1 -${BASE_USERNAME}_plaintext_password_two_profiles_two_roles HTTP LoginFailure 1 -${BASE_USERNAME}_plaintext_password_two_profiles_two_roles HTTP LoginSuccess 1 -${BASE_USERNAME}_plaintext_password_two_profiles_two_roles HTTP Logout 1 +${BASE_USERNAME}_plaintext_password_two_profiles_two_roles HTTP LoginFailure many +${BASE_USERNAME}_plaintext_password_two_profiles_two_roles HTTP LoginSuccess many +${BASE_USERNAME}_plaintext_password_two_profiles_two_roles HTTP Logout many ${BASE_USERNAME}_plaintext_password_two_profiles_two_roles MySQL LoginFailure many ${BASE_USERNAME}_plaintext_password_two_profiles_two_roles MySQL LoginSuccess 1 ${BASE_USERNAME}_plaintext_password_two_profiles_two_roles MySQL Logout 1 +${BASE_USERNAME}_plaintext_password_two_profiles_two_roles PostgreSQL LoginFailure many ${BASE_USERNAME}_sha256_password_no_profiles_no_roles TCP LoginFailure 1 ${BASE_USERNAME}_sha256_password_no_profiles_no_roles TCP LoginSuccess 1 ${BASE_USERNAME}_sha256_password_no_profiles_no_roles TCP Logout 1 -${BASE_USERNAME}_sha256_password_no_profiles_no_roles HTTP LoginFailure 1 -${BASE_USERNAME}_sha256_password_no_profiles_no_roles HTTP LoginSuccess 1 -${BASE_USERNAME}_sha256_password_no_profiles_no_roles HTTP Logout 1 +${BASE_USERNAME}_sha256_password_no_profiles_no_roles HTTP LoginFailure many +${BASE_USERNAME}_sha256_password_no_profiles_no_roles HTTP LoginSuccess many +${BASE_USERNAME}_sha256_password_no_profiles_no_roles HTTP Logout many ${BASE_USERNAME}_sha256_password_no_profiles_no_roles MySQL LoginFailure many ${BASE_USERNAME}_sha256_password_two_profiles_no_roles TCP LoginFailure 1 ${BASE_USERNAME}_sha256_password_two_profiles_no_roles TCP LoginSuccess 1 ${BASE_USERNAME}_sha256_password_two_profiles_no_roles TCP Logout 1 -${BASE_USERNAME}_sha256_password_two_profiles_no_roles HTTP LoginFailure 1 -${BASE_USERNAME}_sha256_password_two_profiles_no_roles HTTP LoginSuccess 1 -${BASE_USERNAME}_sha256_password_two_profiles_no_roles HTTP Logout 1 +${BASE_USERNAME}_sha256_password_two_profiles_no_roles HTTP LoginFailure many +${BASE_USERNAME}_sha256_password_two_profiles_no_roles HTTP LoginSuccess many +${BASE_USERNAME}_sha256_password_two_profiles_no_roles HTTP Logout many ${BASE_USERNAME}_sha256_password_two_profiles_no_roles MySQL LoginFailure many ${BASE_USERNAME}_sha256_password_two_profiles_two_roles TCP LoginFailure 1 ${BASE_USERNAME}_sha256_password_two_profiles_two_roles TCP LoginSuccess 1 ${BASE_USERNAME}_sha256_password_two_profiles_two_roles TCP Logout 1 -${BASE_USERNAME}_sha256_password_two_profiles_two_roles HTTP LoginFailure 1 -${BASE_USERNAME}_sha256_password_two_profiles_two_roles HTTP LoginSuccess 1 -${BASE_USERNAME}_sha256_password_two_profiles_two_roles HTTP Logout 1 +${BASE_USERNAME}_sha256_password_two_profiles_two_roles HTTP LoginFailure many +${BASE_USERNAME}_sha256_password_two_profiles_two_roles HTTP LoginSuccess many +${BASE_USERNAME}_sha256_password_two_profiles_two_roles HTTP Logout many ${BASE_USERNAME}_sha256_password_two_profiles_two_roles MySQL LoginFailure many invalid_${BASE_USERNAME}_double_sha1_password_no_profiles_no_roles TCP LoginFailure 1 -invalid_${BASE_USERNAME}_double_sha1_password_no_profiles_no_roles HTTP LoginFailure 1 +invalid_${BASE_USERNAME}_double_sha1_password_no_profiles_no_roles HTTP LoginFailure many invalid_${BASE_USERNAME}_double_sha1_password_no_profiles_no_roles MySQL LoginFailure many invalid_${BASE_USERNAME}_double_sha1_password_two_profiles_no_roles TCP LoginFailure 1 -invalid_${BASE_USERNAME}_double_sha1_password_two_profiles_no_roles HTTP LoginFailure 1 +invalid_${BASE_USERNAME}_double_sha1_password_two_profiles_no_roles HTTP LoginFailure many invalid_${BASE_USERNAME}_double_sha1_password_two_profiles_no_roles MySQL LoginFailure many invalid_${BASE_USERNAME}_double_sha1_password_two_profiles_two_roles TCP LoginFailure 1 -invalid_${BASE_USERNAME}_double_sha1_password_two_profiles_two_roles HTTP LoginFailure 1 +invalid_${BASE_USERNAME}_double_sha1_password_two_profiles_two_roles HTTP LoginFailure many invalid_${BASE_USERNAME}_double_sha1_password_two_profiles_two_roles MySQL LoginFailure many invalid_${BASE_USERNAME}_no_password_no_profiles_no_roles TCP LoginFailure 1 -invalid_${BASE_USERNAME}_no_password_no_profiles_no_roles HTTP LoginFailure 1 +invalid_${BASE_USERNAME}_no_password_no_profiles_no_roles HTTP LoginFailure many invalid_${BASE_USERNAME}_no_password_no_profiles_no_roles MySQL LoginFailure many +invalid_${BASE_USERNAME}_no_password_no_profiles_no_roles PostgreSQL LoginFailure many invalid_${BASE_USERNAME}_no_password_two_profiles_no_roles TCP LoginFailure 1 -invalid_${BASE_USERNAME}_no_password_two_profiles_no_roles HTTP LoginFailure 1 +invalid_${BASE_USERNAME}_no_password_two_profiles_no_roles HTTP LoginFailure many invalid_${BASE_USERNAME}_no_password_two_profiles_no_roles MySQL LoginFailure many +invalid_${BASE_USERNAME}_no_password_two_profiles_no_roles PostgreSQL LoginFailure many invalid_${BASE_USERNAME}_no_password_two_profiles_two_roles TCP LoginFailure 1 -invalid_${BASE_USERNAME}_no_password_two_profiles_two_roles HTTP LoginFailure 1 +invalid_${BASE_USERNAME}_no_password_two_profiles_two_roles HTTP LoginFailure many invalid_${BASE_USERNAME}_no_password_two_profiles_two_roles MySQL LoginFailure many +invalid_${BASE_USERNAME}_no_password_two_profiles_two_roles PostgreSQL LoginFailure many invalid_${BASE_USERNAME}_plaintext_password_no_profiles_no_roles TCP LoginFailure 1 -invalid_${BASE_USERNAME}_plaintext_password_no_profiles_no_roles HTTP LoginFailure 1 +invalid_${BASE_USERNAME}_plaintext_password_no_profiles_no_roles HTTP LoginFailure many invalid_${BASE_USERNAME}_plaintext_password_no_profiles_no_roles MySQL LoginFailure many +invalid_${BASE_USERNAME}_plaintext_password_no_profiles_no_roles PostgreSQL LoginFailure many invalid_${BASE_USERNAME}_plaintext_password_two_profiles_no_roles TCP LoginFailure 1 -invalid_${BASE_USERNAME}_plaintext_password_two_profiles_no_roles HTTP LoginFailure 1 +invalid_${BASE_USERNAME}_plaintext_password_two_profiles_no_roles HTTP LoginFailure many invalid_${BASE_USERNAME}_plaintext_password_two_profiles_no_roles MySQL LoginFailure many +invalid_${BASE_USERNAME}_plaintext_password_two_profiles_no_roles PostgreSQL LoginFailure many invalid_${BASE_USERNAME}_plaintext_password_two_profiles_two_roles TCP LoginFailure 1 -invalid_${BASE_USERNAME}_plaintext_password_two_profiles_two_roles HTTP LoginFailure 1 +invalid_${BASE_USERNAME}_plaintext_password_two_profiles_two_roles HTTP LoginFailure many invalid_${BASE_USERNAME}_plaintext_password_two_profiles_two_roles MySQL LoginFailure many +invalid_${BASE_USERNAME}_plaintext_password_two_profiles_two_roles PostgreSQL LoginFailure many invalid_${BASE_USERNAME}_sha256_password_no_profiles_no_roles TCP LoginFailure 1 -invalid_${BASE_USERNAME}_sha256_password_no_profiles_no_roles HTTP LoginFailure 1 +invalid_${BASE_USERNAME}_sha256_password_no_profiles_no_roles HTTP LoginFailure many invalid_${BASE_USERNAME}_sha256_password_no_profiles_no_roles MySQL LoginFailure many invalid_${BASE_USERNAME}_sha256_password_two_profiles_no_roles TCP LoginFailure 1 -invalid_${BASE_USERNAME}_sha256_password_two_profiles_no_roles HTTP LoginFailure 1 +invalid_${BASE_USERNAME}_sha256_password_two_profiles_no_roles HTTP LoginFailure many invalid_${BASE_USERNAME}_sha256_password_two_profiles_no_roles MySQL LoginFailure many invalid_${BASE_USERNAME}_sha256_password_two_profiles_two_roles TCP LoginFailure 1 -invalid_${BASE_USERNAME}_sha256_password_two_profiles_two_roles HTTP LoginFailure 1 +invalid_${BASE_USERNAME}_sha256_password_two_profiles_two_roles HTTP LoginFailure many invalid_${BASE_USERNAME}_sha256_password_two_profiles_two_roles MySQL LoginFailure many invalid_session_log_test_xml_user TCP LoginFailure 1 -invalid_session_log_test_xml_user HTTP LoginFailure 1 +invalid_session_log_test_xml_user HTTP LoginFailure many invalid_session_log_test_xml_user MySQL LoginFailure many +invalid_session_log_test_xml_user PostgreSQL LoginFailure many session_log_test_xml_user TCP LoginSuccess 1 session_log_test_xml_user TCP Logout 1 -session_log_test_xml_user HTTP LoginSuccess 1 -session_log_test_xml_user HTTP Logout 1 +session_log_test_xml_user HTTP LoginSuccess many +session_log_test_xml_user HTTP Logout many session_log_test_xml_user MySQL LoginSuccess 1 session_log_test_xml_user MySQL Logout 1 diff --git a/tests/queries/bugs/01747_system_session_log_long.sh b/tests/queries/0_stateless/01747_system_session_log_long.sh similarity index 78% rename from tests/queries/bugs/01747_system_session_log_long.sh rename to tests/queries/0_stateless/01747_system_session_log_long.sh index 9b127e0b48d..c6e93f4abd7 100755 --- a/tests/queries/bugs/01747_system_session_log_long.sh +++ b/tests/queries/0_stateless/01747_system_session_log_long.sh @@ -1,6 +1,5 @@ #!/usr/bin/env bash # Tags: long, no-parallel, no-fasttest -# Tag no-fasttest: Accesses CH via mysql table function (which is unavailable) ################################################################################################## # Verify that login, logout, and login failure events are properly stored in system.session_log @@ -11,9 +10,8 @@ # Using multiple protocols # * native TCP protocol with CH client # * HTTP with CURL -# * MySQL - CH server accesses itself via mysql table function, query typically fails (unrelated) -# but auth should be performed properly. -# * PostgreSQL - CH server accesses itself via postgresql table function (currently out of order). +# * MySQL - CH server accesses itself via mysql table function. +# * PostgreSQL - CH server accesses itself via postgresql table function, but can't execute query (No LOGIN SUCCESS entry). # * gRPC - not done yet # # There is way to control how many time a query (e.g. via mysql table function) is retried @@ -53,7 +51,7 @@ function reportError() function executeQuery() { - ## Execute query (provided via heredoc or herestring) and print query in case of error. + # Execute query (provided via heredoc or herestring) and print query in case of error. trap 'rm -f ${TMP_QUERY_FILE}; trap - ERR RETURN' RETURN # Since we want to report with current values supplied to this function call # shellcheck disable=SC2064 @@ -82,7 +80,7 @@ trap "cleanup" EXIT function executeQueryExpectError() { cat - > "${TMP_QUERY_FILE}" - ! ${CLICKHOUSE_CLIENT} "${@}" --multiquery --queries-file "${TMP_QUERY_FILE}" 2>&1 | tee -a ${TMP_QUERY_FILE} + ! ${CLICKHOUSE_CLIENT} --multiquery --queries-file "${TMP_QUERY_FILE}" "${@}" 2>&1 | tee -a ${TMP_QUERY_FILE} } function createUser() @@ -121,6 +119,8 @@ function createUser() executeQuery < Date: Fri, 28 Jul 2023 07:08:11 +0000 Subject: [PATCH 458/478] use same executor for GET_PART and ATTACH_PART --- src/Storages/StorageReplicatedMergeTree.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index d62a1d960e6..2c2cea0af2b 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -3311,7 +3311,7 @@ bool StorageReplicatedMergeTree::scheduleDataProcessingJob(BackgroundJobsAssigne auto job_type = selected_entry->log_entry->type; /// Depending on entry type execute in fetches (small) pool or big merge_mutate pool - if (job_type == LogEntry::GET_PART) + if (job_type == LogEntry::GET_PART || job_type == LogEntry::ATTACH_PART) { assignee.scheduleFetchTask(std::make_shared( [this, selected_entry] () mutable From 63b05da1f2da6cee086d1154ddc670329aba667d Mon Sep 17 00:00:00 2001 From: Alexey Gerasimchuck Date: Fri, 28 Jul 2023 07:23:34 +0000 Subject: [PATCH 459/478] System logs improvements --- .../settings.md | 128 ++++++++++- docs/en/operations/system-tables/index.md | 4 + .../settings.md | 212 +++++++++++++++--- docs/ru/operations/system-tables/index.md | 4 + programs/server/config.xml | 53 +++++ src/Common/SystemLogBase.cpp | 81 ++++--- src/Common/SystemLogBase.h | 43 +++- src/Common/ZooKeeper/ZooKeeperImpl.cpp | 2 +- src/Daemon/BaseDaemon.cpp | 4 + .../IO/AsynchronousBoundedReadBuffer.cpp | 2 +- .../IO/CachedOnDiskReadBufferFromFile.cpp | 2 +- .../IO/CachedOnDiskWriteBufferFromFile.cpp | 2 +- src/Disks/IO/ReadBufferFromRemoteFSGather.cpp | 2 +- src/Interpreters/AsynchronousInsertQueue.cpp | 4 +- src/Interpreters/Context.cpp | 6 + src/Interpreters/Context.h | 3 + src/Interpreters/CrashLog.cpp | 5 +- src/Interpreters/CrashLog.h | 5 + src/Interpreters/MetricLog.cpp | 2 +- src/Interpreters/PartLog.cpp | 2 +- src/Interpreters/ProcessorsProfileLog.cpp | 7 - src/Interpreters/ProcessorsProfileLog.h | 7 +- src/Interpreters/Session.cpp | 2 +- src/Interpreters/SessionLog.cpp | 6 +- src/Interpreters/SystemLog.cpp | 92 +++++--- src/Interpreters/SystemLog.h | 17 +- src/Interpreters/TextLog.cpp | 11 +- src/Interpreters/TextLog.h | 13 +- src/Interpreters/ThreadStatusExt.cpp | 4 +- src/Interpreters/TraceCollector.cpp | 2 +- src/Interpreters/TransactionLog.cpp | 2 +- src/Interpreters/TransactionsInfoLog.cpp | 2 +- src/Loggers/Loggers.cpp | 44 +++- src/Loggers/OwnSplitChannel.cpp | 2 +- src/Storages/MergeTree/MergeTreeData.cpp | 2 +- tests/integration/parallel_skip.json | 9 +- .../test_crash_log/configs/crash_log.xml | 16 ++ tests/integration/test_crash_log/test.py | 19 +- .../test_system_flush_logs/test.py | 99 +++++++- .../test_system_logs/test_system_logs.py | 50 +++++ 40 files changed, 794 insertions(+), 178 deletions(-) create mode 100644 tests/integration/test_crash_log/configs/crash_log.xml diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index a6ae517e401..e9f0f0dae00 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -512,7 +512,7 @@ Both the cache for `local_disk`, and temporary data will be stored in `/tiny_loc cache local_disk /tiny_local_cache/ - 10M + 10M 1M 1 0 @@ -1592,6 +1592,10 @@ To manually turn on metrics history collection [`system.metric_log`](../../opera metric_log
7500 1000 + 1048576 + 8192 + 524288 + false ``` @@ -1695,6 +1699,14 @@ Use the following parameters to configure logging: - `order_by` - [Custom sorting key](../../engines/table-engines/mergetree-family/mergetree.md#order_by) for a system table. Can't be used if `engine` defined. - `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) for a system table. Can't be used if `partition_by` or `order_by` defined. - `flush_interval_milliseconds` – Interval for flushing data from the buffer in memory to the table. +- `max_size_rows` – Maximal size in lines for the logs. When non-flushed logs amount reaches max_size, logs dumped to the disk. +Default: 1048576. +- `reserved_size_rows` – Pre-allocated memory size in lines for the logs. +Default: 8192. +- `buffer_size_rows_flush_threshold` – Lines amount threshold, reaching it launches flushing logs to the disk in background. +Default: `max_size_rows / 2`. +- `flush_on_crash` - Indication whether logs should be dumped to the disk in case of a crash. +Default: false. - `storage_policy` – Name of storage policy to use for the table (optional) - `settings` - [Additional parameters](../../engines/table-engines/mergetree-family/mergetree.md/#settings) that control the behavior of the MergeTree (optional). @@ -1706,6 +1718,10 @@ Use the following parameters to configure logging: part_log
toMonday(event_date) 7500 + 1048576 + 8192 + 524288 + false ``` @@ -1773,6 +1789,14 @@ Use the following parameters to configure logging: - `order_by` - [Custom sorting key](../../engines/table-engines/mergetree-family/mergetree.md#order_by) for a system table. Can't be used if `engine` defined. - `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) for a system table. Can't be used if `partition_by` or `order_by` defined. - `flush_interval_milliseconds` – Interval for flushing data from the buffer in memory to the table. +- `max_size_rows` – Maximal size in lines for the logs. When non-flushed logs amount reaches max_size, logs dumped to the disk. +Default: 1048576. +- `reserved_size_rows` – Pre-allocated memory size in lines for the logs. +Default: 8192. +- `buffer_size_rows_flush_threshold` – Lines amount threshold, reaching it launches flushing logs to the disk in background. +Default: `max_size_rows / 2`. +- `flush_on_crash` - Indication whether logs should be dumped to the disk in case of a crash. +Default: false. - `storage_policy` – Name of storage policy to use for the table (optional) - `settings` - [Additional parameters](../../engines/table-engines/mergetree-family/mergetree.md/#settings) that control the behavior of the MergeTree (optional). @@ -1786,6 +1810,10 @@ If the table does not exist, ClickHouse will create it. If the structure of the query_log
Engine = MergeTree PARTITION BY event_date ORDER BY event_time TTL event_date + INTERVAL 30 day 7500 + 1048576 + 8192 + 524288 + false ``` @@ -1831,6 +1859,14 @@ Use the following parameters to configure logging: - `order_by` - [Custom sorting key](../../engines/table-engines/mergetree-family/mergetree.md#order_by) for a system table. Can't be used if `engine` defined. - `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) for a system table. Can't be used if `partition_by` or `order_by` defined. - `flush_interval_milliseconds` – Interval for flushing data from the buffer in memory to the table. +- `max_size_rows` – Maximal size in lines for the logs. When non-flushed logs amount reaches max_size_rows, logs dumped to the disk. +Default: 1048576. +- `reserved_size_rows` – Pre-allocated memory size in lines for the logs. +Default: 8192. +- `buffer_size_rows_flush_threshold` – Lines amount threshold, reaching it launches flushing logs to the disk in background. +Default: `max_size_rows / 2`. +- `flush_on_crash` - Indication whether logs should be dumped to the disk in case of a crash. +Default: false. - `storage_policy` – Name of storage policy to use for the table (optional) - `settings` - [Additional parameters](../../engines/table-engines/mergetree-family/mergetree.md/#settings) that control the behavior of the MergeTree (optional). @@ -1844,6 +1880,10 @@ If the table does not exist, ClickHouse will create it. If the structure of the query_thread_log
toMonday(event_date) 7500 + 1048576 + 8192 + 524288 + false ``` @@ -1861,6 +1901,14 @@ Use the following parameters to configure logging: - `order_by` - [Custom sorting key](../../engines/table-engines/mergetree-family/mergetree.md#order_by) for a system table. Can't be used if `engine` defined. - `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) for a system table. Can't be used if `partition_by` or `order_by` defined. - `flush_interval_milliseconds` – Interval for flushing data from the buffer in memory to the table. +- `max_size_rows` – Maximal size in lines for the logs. When non-flushed logs amount reaches max_size, logs dumped to the disk. +Default: 1048576. +- `reserved_size_rows` – Pre-allocated memory size in lines for the logs. +Default: 8192. +- `buffer_size_rows_flush_threshold` – Lines amount threshold, reaching it launches flushing logs to the disk in background. +Default: `max_size_rows / 2`. +- `flush_on_crash` - Indication whether logs should be dumped to the disk in case of a crash. +Default: false. - `storage_policy` – Name of storage policy to use for the table (optional) - `settings` - [Additional parameters](../../engines/table-engines/mergetree-family/mergetree.md/#settings) that control the behavior of the MergeTree (optional). @@ -1874,6 +1922,10 @@ If the table does not exist, ClickHouse will create it. If the structure of the query_views_log
toYYYYMM(event_date) 7500 + 1048576 + 8192 + 524288 + false ``` @@ -1890,6 +1942,14 @@ Parameters: - `order_by` - [Custom sorting key](../../engines/table-engines/mergetree-family/mergetree.md#order_by) for a system table. Can't be used if `engine` defined. - `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) for a system table. Can't be used if `partition_by` or `order_by` defined. - `flush_interval_milliseconds` — Interval for flushing data from the buffer in memory to the table. +- `max_size_rows` – Maximal size in lines for the logs. When non-flushed logs amount reaches max_size, logs dumped to the disk. +Default: 1048576. +- `reserved_size_rows` – Pre-allocated memory size in lines for the logs. +Default: 8192. +- `buffer_size_rows_flush_threshold` – Lines amount threshold, reaching it launches flushing logs to the disk in background. +Default: `max_size_rows / 2`. +- `flush_on_crash` - Indication whether logs should be dumped to the disk in case of a crash. +Default: false. - `storage_policy` – Name of storage policy to use for the table (optional) - `settings` - [Additional parameters](../../engines/table-engines/mergetree-family/mergetree.md/#settings) that control the behavior of the MergeTree (optional). @@ -1901,13 +1961,16 @@ Parameters: system text_log
7500 + 1048576 + 8192 + 524288 + false Engine = MergeTree PARTITION BY event_date ORDER BY event_time TTL event_date + INTERVAL 30 day ``` - ## trace_log {#server_configuration_parameters-trace_log} Settings for the [trace_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) system table operation. @@ -1920,6 +1983,12 @@ Parameters: - `order_by` - [Custom sorting key](../../engines/table-engines/mergetree-family/mergetree.md#order_by) for a system table. Can't be used if `engine` defined. - `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/index.md) for a system table. Can't be used if `partition_by` or `order_by` defined. - `flush_interval_milliseconds` — Interval for flushing data from the buffer in memory to the table. +- `max_size_rows` – Maximal size in lines for the logs. When non-flushed logs amount reaches max_size, logs dumped to the disk. +Default: 1048576. +- `reserved_size_rows` – Pre-allocated memory size in lines for the logs. +Default: 8192. +- `buffer_size_rows_flush_threshold` – Lines amount threshold, reaching it launches flushing logs to the disk in background. +Default: `max_size_rows / 2`. - `storage_policy` – Name of storage policy to use for the table (optional) - `settings` - [Additional parameters](../../engines/table-engines/mergetree-family/mergetree.md/#settings) that control the behavior of the MergeTree (optional). @@ -1931,6 +2000,10 @@ The default server configuration file `config.xml` contains the following settin trace_log
toYYYYMM(event_date) 7500 + 1048576 + 8192 + 524288 + false ``` @@ -1945,9 +2018,18 @@ Parameters: - `partition_by` — [Custom partitioning key](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) for a system table. Can't be used if `engine` defined. - `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) for a system table. Can't be used if `partition_by` defined. - `flush_interval_milliseconds` — Interval for flushing data from the buffer in memory to the table. +- `max_size_rows` – Maximal size in lines for the logs. When non-flushed logs amount reaches max_size, logs dumped to the disk. +Default: 1048576. +- `reserved_size_rows` – Pre-allocated memory size in lines for the logs. +Default: 8192. +- `buffer_size_rows_flush_threshold` – Lines amount threshold, reaching it launches flushing logs to the disk in background. +Default: `max_size_rows / 2`. +- `flush_on_crash` - Indication whether logs should be dumped to the disk in case of a crash. +Default: false. - `storage_policy` – Name of storage policy to use for the table (optional) **Example** + ```xml @@ -1955,11 +2037,53 @@ Parameters: asynchronous_insert_log
7500 toYYYYMM(event_date) + 1048576 + 8192 + 524288 + false
``` +## crash_log {#server_configuration_parameters-crash_log} + +Settings for the [crash_log](../../operations/system-tables/crash-log.md) system table operation. + +Parameters: + +- `database` — Database for storing a table. +- `table` — Table name. +- `partition_by` — [Custom partitioning key](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) for a system table. Can't be used if `engine` defined. +- `order_by` - [Custom sorting key](../../engines/table-engines/mergetree-family/mergetree.md#order_by) for a system table. Can't be used if `engine` defined. +- `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/index.md) for a system table. Can't be used if `partition_by` or `order_by` defined. +- `flush_interval_milliseconds` — Interval for flushing data from the buffer in memory to the table. +- `max_size_rows` – Maximal size in lines for the logs. When non-flushed logs amount reaches max_size, logs dumped to the disk. +Default: 1048576. +- `reserved_size_rows` – Pre-allocated memory size in lines for the logs. +Default: 8192. +- `buffer_size_rows_flush_threshold` – Lines amount threshold, reaching it launches flushing logs to the disk in background. +Default: `max_size_rows / 2`. +- `flush_on_crash` - Indication whether logs should be dumped to the disk in case of a crash. +Default: false. +- `storage_policy` – Name of storage policy to use for the table (optional) +- `settings` - [Additional parameters](../../engines/table-engines/mergetree-family/mergetree.md/#settings) that control the behavior of the MergeTree (optional). + +The default server configuration file `config.xml` contains the following settings section: + +``` xml + + system + crash_log
+ toYYYYMM(event_date) + 7500 + 1024 + 1024 + 512 + false +
+``` + ## query_masking_rules {#query-masking-rules} Regexp-based rules, which will be applied to queries as well as all log messages before storing them in server logs, diff --git a/docs/en/operations/system-tables/index.md b/docs/en/operations/system-tables/index.md index 1b720098fc7..a46f306f677 100644 --- a/docs/en/operations/system-tables/index.md +++ b/docs/en/operations/system-tables/index.md @@ -47,6 +47,10 @@ An example: ENGINE = MergeTree PARTITION BY toYYYYMM(event_date) ORDER BY (event_date, event_time) SETTINGS index_granularity = 1024 --> 7500 + 1048576 + 8192 + 524288 + false ``` diff --git a/docs/ru/operations/server-configuration-parameters/settings.md b/docs/ru/operations/server-configuration-parameters/settings.md index 421df3fe3eb..81a696bcfc1 100644 --- a/docs/ru/operations/server-configuration-parameters/settings.md +++ b/docs/ru/operations/server-configuration-parameters/settings.md @@ -1058,6 +1058,10 @@ ClickHouse использует потоки из глобального пул metric_log
7500 1000 + 1048576 + 8192 + 524288 + false ``` @@ -1155,12 +1159,19 @@ ClickHouse использует потоки из глобального пул При настройке логирования используются следующие параметры: -- `database` — имя базы данных; -- `table` — имя таблицы; -- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine` -- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. -- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. - +- `database` — имя базы данных; +- `table` — имя таблицы; +- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine` +- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. +- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. +- `max_size_rows` – максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск. +Значение по умолчанию: 1048576. +- `reserved_size_rows` – преаллоцированный размер в строках для буфера с логами. +Значение по умолчанию: 8192. +- `buffer_size_bytes_flush_threshold` – количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме. +Значение по умолчанию: `max_size / 2`. +- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы. +Значение по умолчанию: false. **Пример** ``` xml @@ -1169,6 +1180,10 @@ ClickHouse использует потоки из глобального пул part_log
toMonday(event_date) 7500 + 1048576 + 8192 + 524288 + false ``` @@ -1218,11 +1233,19 @@ ClickHouse использует потоки из глобального пул При настройке логирования используются следующие параметры: -- `database` — имя базы данных; -- `table` — имя таблицы, куда будет записываться лог; -- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine` -- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. -- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. +- `database` — имя базы данных; +- `table` — имя таблицы; +- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine` +- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. +- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. +- `max_size_rows` – максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск. +Значение по умолчанию: 1048576. +- `reserved_size_rows` – преаллоцированный размер в строках для буфера с логами. +Значение по умолчанию: 8192. +- `buffer_size_bytes_flush_threshold` – количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме. +Значение по умолчанию: `max_size / 2`. +- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы. +Значение по умолчанию: false. Если таблица не существует, то ClickHouse создаст её. Если структура журнала запросов изменилась при обновлении сервера ClickHouse, то таблица со старой структурой переименовывается, а новая таблица создается автоматически. @@ -1234,6 +1257,10 @@ ClickHouse использует потоки из глобального пул query_log
Engine = MergeTree PARTITION BY event_date ORDER BY event_time TTL event_date + INTERVAL 30 day 7500 + 1048576 + 8192 + 524288 + false ``` @@ -1245,11 +1272,19 @@ ClickHouse использует потоки из глобального пул При настройке логирования используются следующие параметры: -- `database` — имя базы данных; -- `table` — имя таблицы, куда будет записываться лог; -- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine` -- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. -- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. +- `database` — имя базы данных; +- `table` — имя таблицы; +- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine` +- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. +- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. +- `max_size_rows` – максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск. +Значение по умолчанию: 1048576. +- `reserved_size_rows` – преаллоцированный размер в строках для буфера с логами. +Значение по умолчанию: 8192. +- `buffer_size_bytes_flush_threshold` – количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме. +Значение по умолчанию: `max_size / 2`. +- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы. +Значение по умолчанию: false. Если таблица не существует, то ClickHouse создаст её. Если структура журнала запросов изменилась при обновлении сервера ClickHouse, то таблица со старой структурой переименовывается, а новая таблица создается автоматически. @@ -1261,6 +1296,10 @@ ClickHouse использует потоки из глобального пул query_thread_log
toMonday(event_date) 7500 + 1048576 + 8192 + 524288 + false ``` @@ -1272,11 +1311,19 @@ ClickHouse использует потоки из глобального пул При настройке логирования используются следующие параметры: -- `database` – имя базы данных. -- `table` – имя системной таблицы, где будут логироваться запросы. -- `partition_by` — устанавливает [произвольный ключ партиционирования](../../engines/table-engines/mergetree-family/custom-partitioning-key.md). Нельзя использовать, если задан параметр `engine`. -- `engine` — устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать, если задан параметр `partition_by`. -- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. +- `database` — имя базы данных; +- `table` — имя таблицы; +- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine` +- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. +- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. +- `max_size_rows` – максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск. +Значение по умолчанию: 1048576. +- `reserved_size_rows` – преаллоцированный размер в строках для буфера с логами. +Значение по умолчанию: 8192. +- `buffer_size_bytes_flush_threshold` – количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме. +Значение по умолчанию: `max_size / 2`. +- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы. +Значение по умолчанию: false. Если таблица не существует, то ClickHouse создаст её. Если структура журнала запросов изменилась при обновлении сервера ClickHouse, то таблица со старой структурой переименовывается, а новая таблица создается автоматически. @@ -1288,6 +1335,10 @@ ClickHouse использует потоки из глобального пул query_views_log
toYYYYMM(event_date) 7500 + 1048576 + 8192 + 524288 + false ``` @@ -1297,12 +1348,20 @@ ClickHouse использует потоки из глобального пул Параметры: -- `level` — Максимальный уровень сообщения (по умолчанию `Trace`) которое будет сохранено в таблице. -- `database` — имя базы данных для хранения таблицы. -- `table` — имя таблицы, куда будут записываться текстовые сообщения. -- `partition_by` — устанавливает [произвольный ключ партиционирования](../../engines/table-engines/mergetree-family/custom-partitioning-key.md). Нельзя использовать если используется `engine` -- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. -- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. +- `level` — Максимальный уровень сообщения (по умолчанию `Trace`) которое будет сохранено в таблице. +- `database` — имя базы данных; +- `table` — имя таблицы; +- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine` +- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. +- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. +- `max_size_rows` – максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск. +Значение по умолчанию: 1048576. +- `reserved_size_rows` – преаллоцированный размер в строках для буфера с логами. +Значение по умолчанию: 8192. +- `buffer_size_bytes_flush_threshold` – количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме. +Значение по умолчанию: `max_size / 2`. +- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы. +Значение по умолчанию: false. **Пример** ```xml @@ -1312,6 +1371,10 @@ ClickHouse использует потоки из глобального пул system text_log
7500 + 1048576 + 8192 + 524288 + false Engine = MergeTree PARTITION BY event_date ORDER BY event_time TTL event_date + INTERVAL 30 day @@ -1323,13 +1386,21 @@ ClickHouse использует потоки из глобального пул Настройки для [trace_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) system table operation. -Parameters: +Параметры: -- `database` — Database for storing a table. -- `table` — Table name. -- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine` -- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. -- `flush_interval_milliseconds` — Interval for flushing data from the buffer in memory to the table. +- `database` — имя базы данных; +- `table` — имя таблицы; +- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine` +- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. +- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. +- `max_size_rows` – максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск. +Значение по умолчанию: 1048576. +- `reserved_size_rows` – преаллоцированный размер в строках для буфера с логами. +Значение по умолчанию: 8192. +- `buffer_size_bytes_flush_threshold` – количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме. +Значение по умолчанию: `max_size / 2`. +- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы. +Значение по умолчанию: false. По умолчанию файл настроек сервера `config.xml` содержит следующие настройки: @@ -1339,9 +1410,84 @@ Parameters: trace_log
toYYYYMM(event_date) 7500 + 1048576 + 8192 + 524288 ``` +## asynchronous_insert_log {#server_configuration_parameters-asynchronous_insert_log} + +Настройки для asynchronous_insert_log Система для логирования ассинхронных вставок. + +Параметры: + +- `database` — имя базы данных; +- `table` — имя таблицы; +- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine` +- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. +- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. +- `max_size_rows` – максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск. +Значение по умолчанию: 1048576. +- `reserved_size_rows` – преаллоцированный размер в строках для буфера с логами. +Значение по умолчанию: 8192. +- `buffer_size_bytes_flush_threshold` – количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме. +Значение по умолчанию: `max_size / 2`. +- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы. +Значение по умолчанию: false. + +**Пример** + +```xml + + + system + asynchronous_insert_log
+ 7500 + toYYYYMM(event_date) + 1048576 + 8192 + 524288 + +
+
+``` + +## crash_log {#server_configuration_parameters-crash_log} + +Настройки для таблицы [crash_log](../../operations/system-tables/crash-log.md). + +Параметры: + +- `database` — имя базы данных; +- `table` — имя таблицы; +- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine` +- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. +- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. +- `max_size_rows` – максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск. +Значение по умолчанию: 1024. +- `reserved_size_rows` – преаллоцированный размер в строках для буфера с логами. +Значение по умолчанию: 1024. +- `buffer_size_bytes_flush_threshold` – количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме. +Значение по умолчанию: `max_size / 2`. +- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы. +Значение по умолчанию: true. + +**Пример** + +``` xml + + system + crash_log
+ toYYYYMM(event_date) + 7500 + 1024 + 1024 + 512 + true +
+``` + ## query_masking_rules {#query-masking-rules} Правила, основанные на регулярных выражениях, которые будут применены для всех запросов, а также для всех сообщений перед сохранением их в лог на сервере, diff --git a/docs/ru/operations/system-tables/index.md b/docs/ru/operations/system-tables/index.md index 7ff368b1910..24f79cae212 100644 --- a/docs/ru/operations/system-tables/index.md +++ b/docs/ru/operations/system-tables/index.md @@ -45,6 +45,10 @@ sidebar_label: "Системные таблицы" ENGINE = MergeTree PARTITION BY toYYYYMM(event_date) ORDER BY (event_date, event_time) SETTINGS index_granularity = 1024 --> 7500 + 1048576 + 8192 + 524288 + false ``` diff --git a/programs/server/config.xml b/programs/server/config.xml index 2a7dc1e576a..153cb728bb4 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -1026,6 +1026,14 @@ 7500 + + 1048576 + + 8192 + + 524288 + + false @@ -1039,6 +1047,11 @@ toYYYYMM(event_date) 7500 + 1048576 + 8192 + 524288 + + false @@ -1084,7 +1109,11 @@ system metric_log
7500 + 1048576 + 8192 + 524288 1000 + false @@ -1151,6 +1196,10 @@ toYYYYMM(event_date) 7500 + 1048576 + 8192 + 524288 + false