From 488767e601e95cdf81b30b34c70e4e4ba18510e0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 1 Jul 2024 02:38:42 +0200 Subject: [PATCH 01/47] Uninteresting changes --- programs/local/LocalServer.cpp | 1 + src/Client/LocalConnection.h | 1 + src/Interpreters/Squashing.cpp | 96 ++++++++++++++++++---------------- src/Interpreters/Squashing.h | 10 +--- 4 files changed, 54 insertions(+), 54 deletions(-) diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index b33e1595056..310c366b023 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -422,6 +422,7 @@ void LocalServer::connect() { connection_parameters = ConnectionParameters(getClientConfiguration(), "localhost"); + /// This is needed for table function input(...). ReadBuffer * in; auto table_file = getClientConfiguration().getString("table-file", "-"); if (table_file == "-" || table_file == "stdin") diff --git a/src/Client/LocalConnection.h b/src/Client/LocalConnection.h index fb6fa1b55eb..b424c5b5aa3 100644 --- a/src/Client/LocalConnection.h +++ b/src/Client/LocalConnection.h @@ -175,4 +175,5 @@ private: ReadBuffer * in; }; + } diff --git a/src/Interpreters/Squashing.cpp b/src/Interpreters/Squashing.cpp index f8b6a6542cc..858debacf0c 100644 --- a/src/Interpreters/Squashing.cpp +++ b/src/Interpreters/Squashing.cpp @@ -5,11 +5,60 @@ namespace DB { + namespace ErrorCodes { extern const int LOGICAL_ERROR; } +namespace +{ + +Chunk squashImpl(std::vector & input_chunks) +{ + Chunk accumulated_chunk; + std::vector mutable_columns = {}; + size_t rows = 0; + for (const Chunk & chunk : input_chunks) + rows += chunk.getNumRows(); + + { + auto & first_chunk = input_chunks[0]; + Columns columns = first_chunk.detachColumns(); + for (auto & column : columns) + { + mutable_columns.push_back(IColumn::mutate(std::move(column))); + mutable_columns.back()->reserve(rows); + } + } + + for (size_t i = 1; i < input_chunks.size(); ++i) // We've already processed the first chunk above + { + Columns columns = input_chunks[i].detachColumns(); + for (size_t j = 0, size = mutable_columns.size(); j < size; ++j) + { + const auto source_column = columns[j]; + + mutable_columns[j]->insertRangeFrom(*source_column, 0, source_column->size()); + } + } + accumulated_chunk.setColumns(std::move(mutable_columns), rows); + return accumulated_chunk; +} + +const ChunksToSquash * getInfoFromChunk(const Chunk & chunk) +{ + const auto & info = chunk.getChunkInfo(); + const auto * agg_info = typeid_cast(info.get()); + + if (!agg_info) + throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no ChunksToSquash in ChunkInfoPtr"); + + return agg_info; +} + +} + Squashing::Squashing(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_) : header(header_) , min_block_size_rows(min_block_size_rows_) @@ -27,8 +76,8 @@ Chunk Squashing::squash(Chunk && input_chunk) if (!input_chunk.hasChunkInfo()) return Chunk(); - const auto *info = getInfoFromChunk(input_chunk); - return squash(info->chunks); + const auto * info = getInfoFromChunk(input_chunk); + return squashImpl(info->chunks); } Chunk Squashing::add(Chunk && input_chunk) @@ -95,49 +144,6 @@ Chunk Squashing::convertToChunk(std::vector && chunks) const return Chunk(header.cloneEmptyColumns(), 0, info); } -Chunk Squashing::squash(std::vector & input_chunks) -{ - Chunk accumulated_chunk; - std::vector mutable_columns = {}; - size_t rows = 0; - for (const Chunk & chunk : input_chunks) - rows += chunk.getNumRows(); - - { - auto & first_chunk = input_chunks[0]; - Columns columns = first_chunk.detachColumns(); - for (auto & column : columns) - { - mutable_columns.push_back(IColumn::mutate(std::move(column))); - mutable_columns.back()->reserve(rows); - } - } - - for (size_t i = 1; i < input_chunks.size(); ++i) // We've already processed the first chunk above - { - Columns columns = input_chunks[i].detachColumns(); - for (size_t j = 0, size = mutable_columns.size(); j < size; ++j) - { - const auto source_column = columns[j]; - - mutable_columns[j]->insertRangeFrom(*source_column, 0, source_column->size()); - } - } - accumulated_chunk.setColumns(std::move(mutable_columns), rows); - return accumulated_chunk; -} - -const ChunksToSquash* Squashing::getInfoFromChunk(const Chunk & chunk) -{ - const auto& info = chunk.getChunkInfo(); - const auto * agg_info = typeid_cast(info.get()); - - if (!agg_info) - throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no ChunksToSquash in ChunkInfoPtr"); - - return agg_info; -} - void Squashing::expandCurrentSize(size_t rows, size_t bytes) { accumulated_size.rows += rows; diff --git a/src/Interpreters/Squashing.h b/src/Interpreters/Squashing.h index d76cca60e41..bb4cd7bed14 100644 --- a/src/Interpreters/Squashing.h +++ b/src/Interpreters/Squashing.h @@ -36,12 +36,8 @@ public: static Chunk squash(Chunk && input_chunk); Chunk flush(); - bool isDataLeft() - { - return !chunks_to_merge_vec.empty(); - } - Block header; + private: struct CurrentSize { @@ -55,10 +51,6 @@ private: CurrentSize accumulated_size; - static const ChunksToSquash * getInfoFromChunk(const Chunk & chunk); - - static Chunk squash(std::vector & input_chunks); - void expandCurrentSize(size_t rows, size_t bytes); void changeCurrentSize(size_t rows, size_t bytes); bool isEnoughSize(size_t rows, size_t bytes) const; From 2d1f45bb985debfb20037cb5f7dba6fb4f6903d8 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Thu, 11 Jul 2024 08:32:00 +0000 Subject: [PATCH 02/47] Remove redundant code --- src/Client/IConnections.h | 2 -- src/Client/MultiplexedConnections.cpp | 11 +++++------ src/Interpreters/ClientInfo.cpp | 4 ++-- src/Interpreters/ClientInfo.h | 2 +- src/Interpreters/Context.cpp | 7 ------- src/Interpreters/Context.h | 1 - src/Processors/QueryPlan/ReadFromRemote.cpp | 13 ++++++------- 7 files changed, 14 insertions(+), 26 deletions(-) diff --git a/src/Client/IConnections.h b/src/Client/IConnections.h index ebc71511834..09211de53b0 100644 --- a/src/Client/IConnections.h +++ b/src/Client/IConnections.h @@ -54,8 +54,6 @@ public: struct ReplicaInfo { - bool collaborate_with_initiator{false}; - size_t all_replicas_count{0}; size_t number_of_current_replica{0}; }; diff --git a/src/Client/MultiplexedConnections.cpp b/src/Client/MultiplexedConnections.cpp index bcef286ecbc..866338bee8d 100644 --- a/src/Client/MultiplexedConnections.cpp +++ b/src/Client/MultiplexedConnections.cpp @@ -141,13 +141,12 @@ void MultiplexedConnections::sendQuery( modified_settings.group_by_two_level_threshold = 0; modified_settings.group_by_two_level_threshold_bytes = 0; } + } - if (replica_info) - { - client_info.collaborate_with_initiator = true; - client_info.count_participating_replicas = replica_info->all_replicas_count; - client_info.number_of_current_replica = replica_info->number_of_current_replica; - } + if (replica_info) + { + client_info.collaborate_with_initiator = true; + client_info.number_of_current_replica = replica_info->number_of_current_replica; } /// FIXME: Remove once we will make `allow_experimental_analyzer` obsolete setting. diff --git a/src/Interpreters/ClientInfo.cpp b/src/Interpreters/ClientInfo.cpp index ce1efb61cc0..39fdef23baa 100644 --- a/src/Interpreters/ClientInfo.cpp +++ b/src/Interpreters/ClientInfo.cpp @@ -95,7 +95,7 @@ void ClientInfo::write(WriteBuffer & out, UInt64 server_protocol_revision) const if (server_protocol_revision >= DBMS_MIN_REVISION_WITH_PARALLEL_REPLICAS) { writeVarUInt(static_cast(collaborate_with_initiator), out); - writeVarUInt(count_participating_replicas, out); + writeVarUInt(obsolete_count_participating_replicas, out); writeVarUInt(number_of_current_replica, out); } } @@ -185,7 +185,7 @@ void ClientInfo::read(ReadBuffer & in, UInt64 client_protocol_revision) UInt64 value; readVarUInt(value, in); collaborate_with_initiator = static_cast(value); - readVarUInt(count_participating_replicas, in); + readVarUInt(obsolete_count_participating_replicas, in); readVarUInt(number_of_current_replica, in); } } diff --git a/src/Interpreters/ClientInfo.h b/src/Interpreters/ClientInfo.h index 3054667e264..ca32b4c5cfa 100644 --- a/src/Interpreters/ClientInfo.h +++ b/src/Interpreters/ClientInfo.h @@ -127,7 +127,7 @@ public: /// For parallel processing on replicas bool collaborate_with_initiator{false}; - UInt64 count_participating_replicas{0}; + UInt64 obsolete_count_participating_replicas{0}; UInt64 number_of_current_replica{0}; enum class BackgroundOperationType : uint8_t diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index f2626696492..0d60f7dc5f4 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -4941,13 +4941,6 @@ void Context::setConnectionClientVersion(UInt64 client_version_major, UInt64 cli client_info.connection_tcp_protocol_version = client_tcp_protocol_version; } -void Context::setReplicaInfo(bool collaborate_with_initiator, size_t all_replicas_count, size_t number_of_current_replica) -{ - client_info.collaborate_with_initiator = collaborate_with_initiator; - client_info.count_participating_replicas = all_replicas_count; - client_info.number_of_current_replica = number_of_current_replica; -} - void Context::increaseDistributedDepth() { ++client_info.distributed_depth; diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 8c5492bcbc8..5cd1153ab44 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -696,7 +696,6 @@ public: void setInitialQueryStartTime(std::chrono::time_point initial_query_start_time); void setQuotaClientKey(const String & quota_key); void setConnectionClientVersion(UInt64 client_version_major, UInt64 client_version_minor, UInt64 client_version_patch, unsigned client_tcp_protocol_version); - void setReplicaInfo(bool collaborate_with_initiator, size_t all_replicas_count, size_t number_of_current_replica); void increaseDistributedDepth(); const OpenTelemetry::TracingContext & getClientTraceContext() const { return client_info.client_trace_context; } OpenTelemetry::TracingContext & getClientTraceContext() { return client_info.client_trace_context; } diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp index 84c2515e8ca..8ebd95f27e5 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.cpp +++ b/src/Processors/QueryPlan/ReadFromRemote.cpp @@ -411,8 +411,8 @@ void ReadFromParallelRemoteReplicasStep::initializePipeline(QueryPipelineBuilder auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(current_settings); const auto & shard = cluster->getShardsInfo().at(0); - size_t all_replicas_count = current_settings.max_parallel_replicas; - if (all_replicas_count > shard.getAllNodeCount()) + size_t max_replicas_to_use = current_settings.max_parallel_replicas; + if (max_replicas_to_use > shard.getAllNodeCount()) { LOG_INFO( getLogger("ReadFromParallelRemoteReplicasStep"), @@ -420,14 +420,14 @@ void ReadFromParallelRemoteReplicasStep::initializePipeline(QueryPipelineBuilder "Will use the latter number to execute the query.", current_settings.max_parallel_replicas, shard.getAllNodeCount()); - all_replicas_count = shard.getAllNodeCount(); + max_replicas_to_use = shard.getAllNodeCount(); } std::vector shuffled_pool; - if (all_replicas_count < shard.getAllNodeCount()) + if (max_replicas_to_use < shard.getAllNodeCount()) { shuffled_pool = shard.pool->getShuffledPools(current_settings); - shuffled_pool.resize(all_replicas_count); + shuffled_pool.resize(max_replicas_to_use); } else { @@ -437,11 +437,10 @@ void ReadFromParallelRemoteReplicasStep::initializePipeline(QueryPipelineBuilder shuffled_pool = shard.pool->getShuffledPools(current_settings, priority_func); } - for (size_t i=0; i < all_replicas_count; ++i) + for (size_t i=0; i < max_replicas_to_use; ++i) { IConnections::ReplicaInfo replica_info { - .all_replicas_count = all_replicas_count, /// we should use this number specifically because efficiency of data distribution by consistent hash depends on it. .number_of_current_replica = i, }; From f85be2b452e9d62f766cee3d170dacd1906e04b7 Mon Sep 17 00:00:00 2001 From: vdimir Date: Thu, 11 Jul 2024 12:17:59 +0000 Subject: [PATCH 03/47] Add --memory-usage option to client in non interactive mode --- src/Client/ClientBase.cpp | 22 ++++++++++++++++++++-- src/Common/ProgressIndication.h | 10 +++++----- 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 5d472ba99b9..b5de348d583 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -2069,9 +2069,18 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin progress_indication.writeFinalProgress(); output_stream << std::endl << std::endl; } - else if (getClientConfiguration().getBool("print-time-to-stderr", false)) + else { - error_stream << progress_indication.elapsedSeconds() << "\n"; + const auto & config = getClientConfiguration(); + if (config.getBool("print-time-to-stderr", false)) + error_stream << progress_indication.elapsedSeconds() << "\n"; + + const auto & print_memory_mode = config.getString("print-memory-to-stderr", ""); + auto peak_memeory_usage = std::max(progress_indication.getMemoryUsage().peak, 0); + if (print_memory_mode == "default") + error_stream << peak_memeory_usage << "\n"; + else if (print_memory_mode == "readable") + error_stream << formatReadableSizeWithBinarySuffix(peak_memeory_usage) << "\n"; } if (!is_interactive && getClientConfiguration().getBool("print-num-processed-rows", false)) @@ -3035,6 +3044,7 @@ void ClientBase::init(int argc, char ** argv) ("disable_suggestion,A", "Disable loading suggestion data. Note that suggestion data is loaded asynchronously through a second connection to ClickHouse server. Also it is reasonable to disable suggestion if you want to paste a query with TAB characters. Shorthand option -A is for those who get used to mysql client.") ("wait_for_suggestions_to_load", "Load suggestion data synchonously.") ("time,t", "print query execution time to stderr in non-interactive mode (for benchmarks)") + ("memory-usage", po::value()->implicit_value("default")->default_value("none"), "print memory usage to stderr in non-interactive mode (for benchmarks). Values: 'none', 'default', 'readable'") ("echo", "in batch mode, print query before execution") @@ -3120,6 +3130,14 @@ void ClientBase::init(int argc, char ** argv) /// Output execution time to stderr in batch mode. if (options.count("time")) getClientConfiguration().setBool("print-time-to-stderr", true); + if (options.count("memory-usage")) + { + const auto & memory_usage_mode = options["memory-usage"].as(); + if (memory_usage_mode != "none" && memory_usage_mode != "default" && memory_usage_mode != "readable") + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown memory-usage mode: {}", memory_usage_mode); + getClientConfiguration().setString("print-memory-to-stderr", memory_usage_mode); + } + if (options.count("query")) queries = options["query"].as>(); if (options.count("query_id")) diff --git a/src/Common/ProgressIndication.h b/src/Common/ProgressIndication.h index ae39fb49bcc..c7f61a7fba9 100644 --- a/src/Common/ProgressIndication.h +++ b/src/Common/ProgressIndication.h @@ -71,11 +71,6 @@ public: /// How much seconds passed since query execution start. double elapsedSeconds() const { return getElapsedNanoseconds() / 1e9; } - void updateThreadEventData(HostToTimesMap & new_hosts_data); - -private: - double getCPUUsage(); - struct MemoryUsage { UInt64 total = 0; @@ -85,6 +80,11 @@ private: MemoryUsage getMemoryUsage() const; + void updateThreadEventData(HostToTimesMap & new_hosts_data); + +private: + double getCPUUsage(); + UInt64 getElapsedNanoseconds() const; /// This flag controls whether to show the progress bar. We start showing it after From b9cd1bddd7c8612e3b43172ea8670a7e904fe237 Mon Sep 17 00:00:00 2001 From: vdimir Date: Thu, 11 Jul 2024 12:20:37 +0000 Subject: [PATCH 04/47] upd doc --- docs/en/interfaces/cli.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/en/interfaces/cli.md b/docs/en/interfaces/cli.md index e18ff6f1a3f..63b7353d092 100644 --- a/docs/en/interfaces/cli.md +++ b/docs/en/interfaces/cli.md @@ -185,6 +185,7 @@ You can pass parameters to `clickhouse-client` (all parameters have a default va - `--format, -f` – Use the specified default format to output the result. - `--vertical, -E` – If specified, use the [Vertical format](../interfaces/formats.md#vertical) by default to output the result. This is the same as `–format=Vertical`. In this format, each value is printed on a separate line, which is helpful when displaying wide tables. - `--time, -t` – If specified, print the query execution time to ‘stderr’ in non-interactive mode. +- `--memory-usage` – If specified, print the query memory usage to ‘stderr’ in non-interactive mode. - `--stacktrace` – If specified, also print the stack trace if an exception occurs. - `--config-file` – The name of the configuration file. - `--secure` – If specified, will connect to server over secure connection (TLS). You might need to configure your CA certificates in the [configuration file](#configuration_files). The available configuration settings are the same as for [server-side TLS configuration](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-openssl). @@ -339,7 +340,7 @@ clickhouse-client clickhouse://some_user%40some_mail.com@localhost:9000 Connect to one of provides hosts: `192.168.1.15`, `192.168.1.25`. ``` bash -clickhouse-client clickhouse://192.168.1.15,192.168.1.25 +clickhouse-client clickhouse://192.168.1.15,192.168.1.25 ``` ### Configuration Files {#configuration_files} @@ -367,7 +368,7 @@ Example of a config file: ``` Or the same config in a YAML format: - + ```yaml user: username password: 'password' From 05c3692e1dfcacffc9de62bac9a53a0e28f4bea7 Mon Sep 17 00:00:00 2001 From: vdimir Date: Thu, 11 Jul 2024 12:22:03 +0000 Subject: [PATCH 05/47] add 03203_client_benchmark_options --- .../03203_client_benchmark_options.reference | 6 ++++++ .../0_stateless/03203_client_benchmark_options.sh | 13 +++++++++++++ 2 files changed, 19 insertions(+) create mode 100644 tests/queries/0_stateless/03203_client_benchmark_options.reference create mode 100755 tests/queries/0_stateless/03203_client_benchmark_options.sh diff --git a/tests/queries/0_stateless/03203_client_benchmark_options.reference b/tests/queries/0_stateless/03203_client_benchmark_options.reference new file mode 100644 index 00000000000..fd2996b1c78 --- /dev/null +++ b/tests/queries/0_stateless/03203_client_benchmark_options.reference @@ -0,0 +1,6 @@ +Ok +Ok +Ok +Ok +Ok +Ok diff --git a/tests/queries/0_stateless/03203_client_benchmark_options.sh b/tests/queries/0_stateless/03203_client_benchmark_options.sh new file mode 100755 index 00000000000..a9b9d69822b --- /dev/null +++ b/tests/queries/0_stateless/03203_client_benchmark_options.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} -t -q "SELECT sleepEachRow(2) FORMAT Null" 2>&1 | grep -q "^2\." && echo "Ok" || echo "Fail" +${CLICKHOUSE_CLIENT} --time -q "SELECT sleepEachRow(2) FORMAT Null" 2>&1 | grep -q "^2\." && echo "Ok" || echo "Fail" +${CLICKHOUSE_CLIENT} --memory-usage -q "SELECT sum(number) FROM numbers(10_000) FORMAT Null" 2>&1 | grep -q "^[0-9]\+$" && echo "Ok" || echo "Fail" +${CLICKHOUSE_CLIENT} --memory-usage=none -q "SELECT sum(number) FROM numbers(10_000) FORMAT Null" # expected no output +${CLICKHOUSE_CLIENT} --memory-usage=default -q "SELECT sum(number) FROM numbers(10_000) FORMAT Null" 2>&1 | grep -q "^[0-9]\+$" && echo "Ok" || echo "Fail" +${CLICKHOUSE_CLIENT} --memory-usage=readable -q "SELECT sum(number) FROM numbers(10_000) FORMAT Null" 2>&1 | grep -q "^[0-9].*B$" && echo "Ok" || echo "Fail" +${CLICKHOUSE_CLIENT} --memory-usage=unknown -q "SELECT sum(number) FROM numbers(10_000) FORMAT Null" 2>&1 | grep -q "BAD_ARGUMENTS" && echo "Ok" || echo "Fail" From fa2270fd59c0de91d45ac50f2064d31f568935e4 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Thu, 11 Jul 2024 22:35:39 +0000 Subject: [PATCH 06/47] Fix VALID UNTIL clause resetting after restart --- .../Access/InterpreterCreateUserQuery.cpp | 17 ++++++++++++++- .../integration/test_user_valid_until/test.py | 21 ++++++++++++++++++- 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp index 32c51b745c7..a09de7e688a 100644 --- a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp +++ b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp @@ -259,7 +259,22 @@ void InterpreterCreateUserQuery::updateUserFromQuery(User & user, const ASTCreat if (query.auth_data) auth_data = AuthenticationData::fromAST(*query.auth_data, {}, !query.attach); - updateUserFromQueryImpl(user, query, auth_data, {}, {}, {}, {}, {}, allow_no_password, allow_plaintext_password, true); + std::optional valid_until; + if (query.valid_until) + { + const String valid_until_str = checkAndGetLiteralArgument(query.valid_until, "valid_until"); + time_t time = 0; + + if (valid_until_str != "infinity") + { + ReadBufferFromString in(valid_until_str); + readDateTimeText(time, in); + } + + valid_until = time; + } + + updateUserFromQueryImpl(user, query, auth_data, {}, {}, {}, {}, valid_until, allow_no_password, allow_plaintext_password, true); } void registerInterpreterCreateUserQuery(InterpreterFactory & factory) diff --git a/tests/integration/test_user_valid_until/test.py b/tests/integration/test_user_valid_until/test.py index d6d5bf8b18e..39ca5997067 100644 --- a/tests/integration/test_user_valid_until/test.py +++ b/tests/integration/test_user_valid_until/test.py @@ -5,7 +5,7 @@ from time import sleep from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) -node = cluster.add_instance("node") +node = cluster.add_instance("node", stay_alive=True) @pytest.fixture(scope="module") @@ -84,3 +84,22 @@ def test_details(started_cluster): node.query("SHOW CREATE USER user_details_time_only") == f"CREATE USER user_details_time_only VALID UNTIL \\'{until_year}-01-01 22:03:40\\'\n" ) + + +def test_restart(started_cluster): + node.query("CREATE USER user_restart VALID UNTIL '06/11/2010 08:03:20 Z+3'") + + assert ( + node.query("SHOW CREATE USER user_restart") + == "CREATE USER user_restart VALID UNTIL \\'2010-11-06 05:03:20\\'\n" + ) + + node.restart_clickhouse() + + assert ( + node.query("SHOW CREATE USER user_restart") + == "CREATE USER user_restart VALID UNTIL \\'2010-11-06 05:03:20\\'\n" + ) + + error = "Authentication failed" + assert error in node.query_and_get_error("SELECT 1", user="user_restart") From 705134413ffd321a59c1c2c82af11a5edddc962c Mon Sep 17 00:00:00 2001 From: vdimir Date: Fri, 12 Jul 2024 12:00:39 +0200 Subject: [PATCH 07/47] Update docs/en/interfaces/cli.md --- docs/en/interfaces/cli.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/interfaces/cli.md b/docs/en/interfaces/cli.md index 63b7353d092..849b3d4b486 100644 --- a/docs/en/interfaces/cli.md +++ b/docs/en/interfaces/cli.md @@ -185,7 +185,7 @@ You can pass parameters to `clickhouse-client` (all parameters have a default va - `--format, -f` – Use the specified default format to output the result. - `--vertical, -E` – If specified, use the [Vertical format](../interfaces/formats.md#vertical) by default to output the result. This is the same as `–format=Vertical`. In this format, each value is printed on a separate line, which is helpful when displaying wide tables. - `--time, -t` – If specified, print the query execution time to ‘stderr’ in non-interactive mode. -- `--memory-usage` – If specified, print the query memory usage to ‘stderr’ in non-interactive mode. +- `--memory-usage` – If specified, print memory usage to ‘stderr’ in non-interactive mode]. Possible values: 'none' - do not print memory usage, 'default' - print number of bytes, 'readable' - print memory usage in human-readable format. - `--stacktrace` – If specified, also print the stack trace if an exception occurs. - `--config-file` – The name of the configuration file. - `--secure` – If specified, will connect to server over secure connection (TLS). You might need to configure your CA certificates in the [configuration file](#configuration_files). The available configuration settings are the same as for [server-side TLS configuration](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-openssl). From 15ba7534bff6cbd35ae42d42100a3910fe856882 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 15 Jul 2024 02:24:41 +0200 Subject: [PATCH 08/47] Fix double whitespace in EXPLAIN AST CREATE --- src/Parsers/ASTCreateQuery.cpp | 10 ++++++++++ src/Parsers/ASTCreateQuery.h | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/Parsers/ASTCreateQuery.cpp b/src/Parsers/ASTCreateQuery.cpp index d56a2724914..6dc009da9a8 100644 --- a/src/Parsers/ASTCreateQuery.cpp +++ b/src/Parsers/ASTCreateQuery.cpp @@ -265,6 +265,16 @@ ASTPtr ASTCreateQuery::clone() const return res; } +String ASTCreateQuery::getID(char delim) const +{ + String res = attach ? "AttachQuery" : "CreateQuery"; + String database = getDatabase(); + if (!database.empty()) + res += (delim + getDatabase()); + res += (delim + getTable()); + return res; +} + void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { frame.need_parens = false; diff --git a/src/Parsers/ASTCreateQuery.h b/src/Parsers/ASTCreateQuery.h index 6fbf045915b..9e4364b1f25 100644 --- a/src/Parsers/ASTCreateQuery.h +++ b/src/Parsers/ASTCreateQuery.h @@ -136,7 +136,7 @@ public: bool create_or_replace{false}; /** Get the text that identifies this element. */ - String getID(char delim) const override { return (attach ? "AttachQuery" : "CreateQuery") + (delim + getDatabase()) + delim + getTable(); } + String getID(char delim) const override; ASTPtr clone() const override; From 24545caa3a7898e754d7b1f002c95132dc6a2d8b Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Mon, 15 Jul 2024 00:52:31 +0000 Subject: [PATCH 09/47] Move parsing into a separate function --- .../Access/InterpreterCreateUserQuery.cpp | 59 ++++++++++--------- 1 file changed, 30 insertions(+), 29 deletions(-) diff --git a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp index a09de7e688a..855aa36b159 100644 --- a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp +++ b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp @@ -114,6 +114,34 @@ namespace else if (query.grantees) user.grantees = *query.grantees; } + + time_t getValidUntilFromAST(ASTPtr valid_until, ContextPtr context) + { + if (context) + valid_until = evaluateConstantExpressionAsLiteral(valid_until, context); + + const String valid_until_str = checkAndGetLiteralArgument(valid_until, "valid_until"); + + if (valid_until_str == "infinity") + return 0; + + time_t time = 0; + ReadBufferFromString in(valid_until_str); + + if (context) + { + const auto & time_zone = DateLUT::instance(""); + const auto & utc_time_zone = DateLUT::instance("UTC"); + + parseDateTimeBestEffort(time, in, time_zone, utc_time_zone); + } + else + { + readDateTimeText(time, in); + } + + return time; + } } BlockIO InterpreterCreateUserQuery::execute() @@ -134,23 +162,7 @@ BlockIO InterpreterCreateUserQuery::execute() std::optional valid_until; if (query.valid_until) - { - const ASTPtr valid_until_literal = evaluateConstantExpressionAsLiteral(query.valid_until, getContext()); - const String valid_until_str = checkAndGetLiteralArgument(valid_until_literal, "valid_until"); - - time_t time = 0; - - if (valid_until_str != "infinity") - { - const auto & time_zone = DateLUT::instance(""); - const auto & utc_time_zone = DateLUT::instance("UTC"); - - ReadBufferFromString in(valid_until_str); - parseDateTimeBestEffort(time, in, time_zone, utc_time_zone); - } - - valid_until = time; - } + valid_until = getValidUntilFromAST(query.valid_until, getContext()); std::optional default_roles_from_query; if (query.default_roles) @@ -261,18 +273,7 @@ void InterpreterCreateUserQuery::updateUserFromQuery(User & user, const ASTCreat std::optional valid_until; if (query.valid_until) - { - const String valid_until_str = checkAndGetLiteralArgument(query.valid_until, "valid_until"); - time_t time = 0; - - if (valid_until_str != "infinity") - { - ReadBufferFromString in(valid_until_str); - readDateTimeText(time, in); - } - - valid_until = time; - } + valid_until = getValidUntilFromAST(query.valid_until, {}); updateUserFromQueryImpl(user, query, auth_data, {}, {}, {}, {}, valid_until, allow_no_password, allow_plaintext_password, true); } From 41327260bad798494fef0914b51d95fa85043057 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 15 Jul 2024 03:47:35 +0200 Subject: [PATCH 10/47] Fix MSan report in GRPC --- contrib/grpc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/grpc b/contrib/grpc index f5b7fdc2dff..1716359d2e2 160000 --- a/contrib/grpc +++ b/contrib/grpc @@ -1 +1 @@ -Subproject commit f5b7fdc2dff09ada06dbf6c75df298fb40f898df +Subproject commit 1716359d2e28d304a250f9df0e6c0ccad03de8db From d76139699178b5403255150b0774a12332728238 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 15 Jul 2024 04:08:10 +0200 Subject: [PATCH 11/47] Fix test --- .../0_stateless/02903_parameterized_view_explain_ast.reference | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02903_parameterized_view_explain_ast.reference b/tests/queries/0_stateless/02903_parameterized_view_explain_ast.reference index 6ee8d0c3d23..27222968b1a 100644 --- a/tests/queries/0_stateless/02903_parameterized_view_explain_ast.reference +++ b/tests/queries/0_stateless/02903_parameterized_view_explain_ast.reference @@ -1,4 +1,4 @@ -CreateQuery numbers_pv (children 2) +CreateQuery numbers_pv (children 2) Identifier numbers_pv SelectWithUnionQuery (children 1) ExpressionList (children 1) From e346535641bff8998c126f5fb74f02d63014090a Mon Sep 17 00:00:00 2001 From: Han Fei Date: Mon, 15 Jul 2024 16:27:53 +0200 Subject: [PATCH 12/47] fix log in keeper tcp handler --- src/Server/KeeperTCPHandler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Server/KeeperTCPHandler.cpp b/src/Server/KeeperTCPHandler.cpp index 4849f5827c1..aa79d8814a7 100644 --- a/src/Server/KeeperTCPHandler.cpp +++ b/src/Server/KeeperTCPHandler.cpp @@ -623,7 +623,7 @@ void KeeperTCPHandler::updateStats(Coordination::ZooKeeperResponsePtr & response LOG_INFO( log, "Total time to process a request took too long ({}ms).\nRequest info: {}", - elapsed, + elapsed_ms, request->toString(/*short_format=*/true)); } From dec5931262ab4dfe541abfca3fb1e7b67f3d9a5e Mon Sep 17 00:00:00 2001 From: Han Fei Date: Mon, 15 Jul 2024 16:43:41 +0200 Subject: [PATCH 13/47] log session id --- src/Server/KeeperTCPHandler.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Server/KeeperTCPHandler.cpp b/src/Server/KeeperTCPHandler.cpp index aa79d8814a7..b61df45133a 100644 --- a/src/Server/KeeperTCPHandler.cpp +++ b/src/Server/KeeperTCPHandler.cpp @@ -622,7 +622,8 @@ void KeeperTCPHandler::updateStats(Coordination::ZooKeeperResponsePtr & response { LOG_INFO( log, - "Total time to process a request took too long ({}ms).\nRequest info: {}", + "Total time to process a request in session {} took too long ({}ms).\nRequest info: {}", + session_id, elapsed_ms, request->toString(/*short_format=*/true)); } From 38027abbecfc7b60594ed1073ce9104133152aa9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Mon, 8 Jul 2024 13:06:50 +0000 Subject: [PATCH 14/47] Fix bug in `numbers` when both limit and offset is used but the index cannot be used --- src/Storages/System/StorageSystemNumbers.cpp | 5 +++++ .../03203_system_numbers_limit_and_offset.reference | 3 +++ .../0_stateless/03203_system_numbers_limit_and_offset.sql | 5 +++++ 3 files changed, 13 insertions(+) create mode 100644 tests/queries/0_stateless/03203_system_numbers_limit_and_offset.reference create mode 100644 tests/queries/0_stateless/03203_system_numbers_limit_and_offset.sql diff --git a/src/Storages/System/StorageSystemNumbers.cpp b/src/Storages/System/StorageSystemNumbers.cpp index da700a7a4e9..6ddfb5e3213 100644 --- a/src/Storages/System/StorageSystemNumbers.cpp +++ b/src/Storages/System/StorageSystemNumbers.cpp @@ -38,6 +38,11 @@ void StorageSystemNumbers::read( size_t max_block_size, size_t num_streams) { + if (limit && *limit < max_block_size) + { + max_block_size = static_cast(*limit); + multithreaded = false; + } query_plan.addStep(std::make_unique( column_names, query_info, storage_snapshot, context, shared_from_this(), max_block_size, num_streams)); } diff --git a/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.reference b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.reference new file mode 100644 index 00000000000..d55fa7c71fc --- /dev/null +++ b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.reference @@ -0,0 +1,3 @@ +9900 +9910 +9920 diff --git a/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.sql b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.sql new file mode 100644 index 00000000000..6b7f998c4d4 --- /dev/null +++ b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.sql @@ -0,0 +1,5 @@ +SELECT number FROM numbers_mt(10000) +WHERE (number % 10) = 0 +ORDER BY number ASC +LIMIT 990, 3; + From a177bb63077353e15f6780ae775c9519f2795695 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Mon, 8 Jul 2024 16:14:20 +0000 Subject: [PATCH 15/47] Really fix the bug --- .../QueryPlan/ReadFromSystemNumbersStep.cpp | 35 +++++++------------ src/Storages/System/StorageSystemNumbers.cpp | 5 --- ..._system_numbers_limit_and_offset.reference | 4 +++ .../03203_system_numbers_limit_and_offset.sql | 34 +++++++++++++++++- 4 files changed, 49 insertions(+), 29 deletions(-) diff --git a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp index a3ae035afdd..049fe762640 100644 --- a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp +++ b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp @@ -39,15 +39,10 @@ inline void iotaWithStepOptimized(T * begin, size_t count, T first_value, T step class NumbersSource : public ISource { public: - NumbersSource(UInt64 block_size_, UInt64 offset_, std::optional limit_, UInt64 chunk_step_, const std::string & column_name, UInt64 step_) - : ISource(createHeader(column_name)) - , block_size(block_size_) - , next(offset_) - , chunk_step(chunk_step_) - , step(step_) + NumbersSource( + UInt64 block_size_, UInt64 offset_, std::optional end_, UInt64 chunk_step_, const std::string & column_name, UInt64 step_) + : ISource(createHeader(column_name)), block_size(block_size_), next(offset_), chunk_step(chunk_step_), end(end_), step(step_) { - if (limit_.has_value()) - end = limit_.value() + offset_; } String getName() const override { return "Numbers"; } @@ -79,7 +74,6 @@ protected: next += chunk_step; progress(column->size(), column->byteSize()); - return {Columns{std::move(column)}, real_block_size}; } @@ -549,13 +543,21 @@ Pipe ReadFromSystemNumbersStep::makePipe() return pipe; } + const auto end = std::invoke( + [&]() -> std::optional + { + if (numbers_storage.limit.has_value()) + return *(numbers_storage.limit) + numbers_storage.offset; + return {}; + }); + /// Fall back to NumbersSource for (size_t i = 0; i < num_streams; ++i) { auto source = std::make_shared( max_block_size, numbers_storage.offset + i * max_block_size * numbers_storage.step, - numbers_storage.limit, + end, num_streams * max_block_size * numbers_storage.step, numbers_storage.column_name, numbers_storage.step); @@ -571,19 +573,6 @@ Pipe ReadFromSystemNumbersStep::makePipe() pipe.addSource(std::move(source)); } - if (numbers_storage.limit) - { - size_t i = 0; - auto storage_limit = (*numbers_storage.limit - 1) / numbers_storage.step + 1; - /// This formula is how to split 'limit' elements to 'num_streams' chunks almost uniformly. - pipe.addSimpleTransform( - [&](const Block & header) - { - ++i; - return std::make_shared(header, storage_limit * i / num_streams - storage_limit * (i - 1) / num_streams, 0); - }); - } - return pipe; } diff --git a/src/Storages/System/StorageSystemNumbers.cpp b/src/Storages/System/StorageSystemNumbers.cpp index 6ddfb5e3213..da700a7a4e9 100644 --- a/src/Storages/System/StorageSystemNumbers.cpp +++ b/src/Storages/System/StorageSystemNumbers.cpp @@ -38,11 +38,6 @@ void StorageSystemNumbers::read( size_t max_block_size, size_t num_streams) { - if (limit && *limit < max_block_size) - { - max_block_size = static_cast(*limit); - multithreaded = false; - } query_plan.addStep(std::make_unique( column_names, query_info, storage_snapshot, context, shared_from_this(), max_block_size, num_streams)); } diff --git a/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.reference b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.reference index d55fa7c71fc..05deae711c6 100644 --- a/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.reference +++ b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.reference @@ -1,3 +1,7 @@ +case 1 9900 9910 9920 +case 2 +9990 +18679 31 diff --git a/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.sql b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.sql index 6b7f998c4d4..1a1133c8a7f 100644 --- a/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.sql +++ b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.sql @@ -1,5 +1,37 @@ +SELECT 'case 1'; SELECT number FROM numbers_mt(10000) WHERE (number % 10) = 0 ORDER BY number ASC LIMIT 990, 3; - +SELECT 'case 2'; +SELECT number FROM numbers_mt(10000) +WHERE (number % 10) = 0 +ORDER BY number ASC +LIMIT 999, 20 SETTINGS max_block_size = 31; +--- The following query was buggy before, so let's use it as a test case +WITH + toUInt64(-1) AS umax, + toUInt8(ceil(log10(umax))) AS max_digits, + 9 * max_digits AS max_digits_sum, + (x -> ((x > 1) AND (arraySum(arrayMap(y -> ((y > 1) AND (y < x) AND ((x % y) = 0)), range(toUInt64(sqrt(x)) + 1))) = 0))) AS is_prime_slow +SELECT + num, + ds +FROM +( + WITH x -> arraySum(arrayMap(y -> toUInt8(y), splitByString('', toString(x)))) AS digits_sum + SELECT + 1 + (number * 2) AS num, + digits_sum(num) AS ds + FROM numbers_mt(10000) + WHERE ds IN ( + WITH x -> ((x > 1) AND (arraySum(arrayMap(y -> ((y > 1) AND (y < x) AND ((x % y) = 0)), range(toUInt64(sqrt(x)) + 1))) = 0)) AS is_prime_slow + SELECT number + FROM numbers(max_digits_sum + 1) + WHERE is_prime_slow(number) + ) +) +WHERE is_prime_slow(num) +ORDER BY num ASC +LIMIT 998, 1 +SETTINGS max_block_size = 64, max_threads=16; From 8b78cf1c6139931132ff84bdf1ae495eef9714be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Mon, 8 Jul 2024 16:22:12 +0000 Subject: [PATCH 16/47] Add one more test case --- .../03203_system_numbers_limit_and_offset.reference | 12 ++++++++++++ .../03203_system_numbers_limit_and_offset.sql | 8 ++++++++ 2 files changed, 20 insertions(+) diff --git a/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.reference b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.reference index 05deae711c6..e95864a6fc9 100644 --- a/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.reference +++ b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.reference @@ -5,3 +5,15 @@ case 1 case 2 9990 18679 31 +0 +10 +20 +30 +40 +50 +60 +70 +80 +90 +100 +110 diff --git a/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.sql b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.sql index 1a1133c8a7f..dc9e06b806f 100644 --- a/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.sql +++ b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.sql @@ -3,11 +3,13 @@ SELECT number FROM numbers_mt(10000) WHERE (number % 10) = 0 ORDER BY number ASC LIMIT 990, 3; + SELECT 'case 2'; SELECT number FROM numbers_mt(10000) WHERE (number % 10) = 0 ORDER BY number ASC LIMIT 999, 20 SETTINGS max_block_size = 31; + --- The following query was buggy before, so let's use it as a test case WITH toUInt64(-1) AS umax, @@ -35,3 +37,9 @@ WHERE is_prime_slow(num) ORDER BY num ASC LIMIT 998, 1 SETTINGS max_block_size = 64, max_threads=16; + +SELECT number +FROM numbers_mt(120) +WHERE (number % 10) = 0 +ORDER BY number ASC +SETTINGS max_block_size = 31, max_threads = 11 From 2442473b253c4d8cf8f4474065b196bd4be4782c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Wed, 10 Jul 2024 11:12:28 +0000 Subject: [PATCH 17/47] Fix `generate_series` --- .../QueryPlan/ReadFromSystemNumbersStep.cpp | 50 +++++++++++++++---- .../02970_generate_series.reference | 1 + .../0_stateless/02970_generate_series.sql | 1 + 3 files changed, 41 insertions(+), 11 deletions(-) diff --git a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp index 049fe762640..67c228d43f4 100644 --- a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp +++ b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp @@ -36,12 +36,32 @@ inline void iotaWithStepOptimized(T * begin, size_t count, T first_value, T step iotaWithStep(begin, count, first_value, step); } +/// The range is defined as [start, end) +UInt64 itemCountInRange(UInt64 start, UInt64 end, UInt64 step) +{ + const auto range_count = end - start; + if (step == 1) + return range_count; + + return (range_count - 1) / step + 1; +} + class NumbersSource : public ISource { public: NumbersSource( - UInt64 block_size_, UInt64 offset_, std::optional end_, UInt64 chunk_step_, const std::string & column_name, UInt64 step_) - : ISource(createHeader(column_name)), block_size(block_size_), next(offset_), chunk_step(chunk_step_), end(end_), step(step_) + UInt64 block_size_, + UInt64 offset_, + std::optional end_, + const std::string & column_name, + UInt64 step_in_chunk_, + UInt64 step_between_chunks_) + : ISource(createHeader(column_name)) + , block_size(block_size_) + , next(offset_) + , end(end_) + , step_in_chunk(step_in_chunk_) + , step_between_chunks(step_between_chunks_) { } String getName() const override { return "Numbers"; } @@ -59,7 +79,10 @@ protected: { if (end.value() <= next) return {}; - real_block_size = std::min(block_size, end.value() - next); + + auto max_items_to_generate = itemCountInRange(next, *end, step_in_chunk); + + real_block_size = std::min(block_size, max_items_to_generate); } auto column = ColumnUInt64::create(real_block_size); ColumnUInt64::Container & vec = column->getData(); @@ -69,9 +92,9 @@ protected: UInt64 * current_end = &vec[real_block_size]; - iotaWithStepOptimized(pos, static_cast(current_end - pos), curr, step); + iotaWithStepOptimized(pos, static_cast(current_end - pos), curr, step_in_chunk); - next += chunk_step; + next += step_between_chunks; progress(column->size(), column->byteSize()); return {Columns{std::move(column)}, real_block_size}; @@ -80,9 +103,9 @@ protected: private: UInt64 block_size; UInt64 next; - UInt64 chunk_step; std::optional end; /// not included - UInt64 step; + UInt64 step_in_chunk; + UInt64 step_between_chunks; }; struct RangeWithStep @@ -552,19 +575,24 @@ Pipe ReadFromSystemNumbersStep::makePipe() }); /// Fall back to NumbersSource + /// Range in a single block + const auto block_range = max_block_size * numbers_storage.step; + /// Step between chunks in a single source. + /// It is bigger than block_range in case of multiple threads, because we have to account for other sources as well. + const auto step_between_chunks = num_streams * block_range; for (size_t i = 0; i < num_streams; ++i) { auto source = std::make_shared( max_block_size, - numbers_storage.offset + i * max_block_size * numbers_storage.step, + numbers_storage.offset + i * block_range, end, - num_streams * max_block_size * numbers_storage.step, numbers_storage.column_name, - numbers_storage.step); + numbers_storage.step, + step_between_chunks); if (numbers_storage.limit && i == 0) { - auto rows_appr = (*numbers_storage.limit - 1) / numbers_storage.step + 1; + auto rows_appr = itemCountInRange(numbers_storage.offset, *numbers_storage.limit, numbers_storage.step); if (limit > 0 && limit < rows_appr) rows_appr = query_info_limit; source->addTotalRowsApprox(rows_appr); diff --git a/tests/queries/0_stateless/02970_generate_series.reference b/tests/queries/0_stateless/02970_generate_series.reference index 6e6f3c81587..f8a8e1891b7 100644 --- a/tests/queries/0_stateless/02970_generate_series.reference +++ b/tests/queries/0_stateless/02970_generate_series.reference @@ -5,6 +5,7 @@ 501 50 17928 +17928 0 10 13 diff --git a/tests/queries/0_stateless/02970_generate_series.sql b/tests/queries/0_stateless/02970_generate_series.sql index 0844714b3a6..edae884a561 100644 --- a/tests/queries/0_stateless/02970_generate_series.sql +++ b/tests/queries/0_stateless/02970_generate_series.sql @@ -5,6 +5,7 @@ SELECT count() FROM generate_series(7, 77, 10); SELECT count() FROM generate_series(0, 1000, 2); SELECT count() FROM generate_series(0, 999, 20); SELECT sum(generate_series) FROM generate_series(4, 1008, 4) WHERE generate_series % 7 = 1; +SELECT sum(generate_series) FROM generate_series(4, 1008, 4) WHERE generate_series % 7 = 1 SETTINGS max_block_size = 71; SELECT * FROM generate_series(5, 4); SELECT * FROM generate_series(0, 0); From a92eb1a4c38be7868fd0cb80261ae882c8fbb70e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Wed, 10 Jul 2024 11:20:05 +0000 Subject: [PATCH 18/47] Avoid creating unnecessary sources --- src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp index 67c228d43f4..da8a6933d6f 100644 --- a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp +++ b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp @@ -582,6 +582,10 @@ Pipe ReadFromSystemNumbersStep::makePipe() const auto step_between_chunks = num_streams * block_range; for (size_t i = 0; i < num_streams; ++i) { + const auto source_start = numbers_storage.offset + i * block_range; + if (numbers_storage.limit.has_value() && *numbers_storage.limit < source_start) + break; + auto source = std::make_shared( max_block_size, numbers_storage.offset + i * block_range, From 17352b6ed1cdcad359d2f17a96040f7fdaba9422 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Wed, 10 Jul 2024 12:06:11 +0000 Subject: [PATCH 19/47] Fix the case when offset is bigger than limit --- src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp | 8 +++++--- .../03203_system_numbers_limit_and_offset.reference | 1 + .../0_stateless/03203_system_numbers_limit_and_offset.sql | 8 +++++++- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp index da8a6933d6f..b070bbe739b 100644 --- a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp +++ b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp @@ -582,13 +582,15 @@ Pipe ReadFromSystemNumbersStep::makePipe() const auto step_between_chunks = num_streams * block_range; for (size_t i = 0; i < num_streams; ++i) { - const auto source_start = numbers_storage.offset + i * block_range; - if (numbers_storage.limit.has_value() && *numbers_storage.limit < source_start) + const auto source_offset = i * block_range; + if (numbers_storage.limit.has_value() && *numbers_storage.limit < source_offset) break; + const auto source_start = numbers_storage.offset + source_offset; + auto source = std::make_shared( max_block_size, - numbers_storage.offset + i * block_range, + source_start, end, numbers_storage.column_name, numbers_storage.step, diff --git a/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.reference b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.reference index e95864a6fc9..960459f42ac 100644 --- a/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.reference +++ b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.reference @@ -17,3 +17,4 @@ case 2 90 100 110 +4250 diff --git a/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.sql b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.sql index dc9e06b806f..e258b0bd2d2 100644 --- a/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.sql +++ b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.sql @@ -42,4 +42,10 @@ SELECT number FROM numbers_mt(120) WHERE (number % 10) = 0 ORDER BY number ASC -SETTINGS max_block_size = 31, max_threads = 11 +SETTINGS max_block_size = 31, max_threads = 11; + +SELECT number +FROM numbers_mt(4242, 9) +WHERE (number % 10) = 0 +ORDER BY number ASC +SETTINGS max_block_size = 31, max_threads = 11; From 6be9d7b2544ccb5642999b53409c0d9326163a62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Thu, 11 Jul 2024 13:40:44 +0200 Subject: [PATCH 20/47] Make the tests works with old analyzer --- .../0_stateless/03203_system_numbers_limit_and_offset.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.sql b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.sql index e258b0bd2d2..b72ab4a98fd 100644 --- a/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.sql +++ b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.sql @@ -21,10 +21,10 @@ SELECT ds FROM ( - WITH x -> arraySum(arrayMap(y -> toUInt8(y), splitByString('', toString(x)))) AS digits_sum + WITH arraySum(arrayMap(y -> toUInt8(y), splitByString('', toString(num)))) AS digits_sum SELECT 1 + (number * 2) AS num, - digits_sum(num) AS ds + digits_sum AS ds FROM numbers_mt(10000) WHERE ds IN ( WITH x -> ((x > 1) AND (arraySum(arrayMap(y -> ((y > 1) AND (y < x) AND ((x % y) = 0)), range(toUInt64(sqrt(x)) + 1))) = 0)) AS is_prime_slow From 35eabcc9d1cf3a5339c20ea34d006edfc858ae68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Sat, 13 Jul 2024 16:01:15 +0000 Subject: [PATCH 21/47] Split test cases --- ...ystem_numbers_limit_and_offset_complex.reference} | 6 ------ ...3203_system_numbers_limit_and_offset_complex.sql} | 12 ------------ ..._system_numbers_limit_and_offset_simple.reference | 6 ++++++ .../03203_system_numbers_limit_and_offset_simple.sql | 11 +++++++++++ 4 files changed, 17 insertions(+), 18 deletions(-) rename tests/queries/0_stateless/{03203_system_numbers_limit_and_offset.reference => 03203_system_numbers_limit_and_offset_complex.reference} (60%) rename tests/queries/0_stateless/{03203_system_numbers_limit_and_offset.sql => 03203_system_numbers_limit_and_offset_complex.sql} (82%) create mode 100644 tests/queries/0_stateless/03203_system_numbers_limit_and_offset_simple.reference create mode 100644 tests/queries/0_stateless/03203_system_numbers_limit_and_offset_simple.sql diff --git a/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.reference b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset_complex.reference similarity index 60% rename from tests/queries/0_stateless/03203_system_numbers_limit_and_offset.reference rename to tests/queries/0_stateless/03203_system_numbers_limit_and_offset_complex.reference index 960459f42ac..d38b21d2d05 100644 --- a/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.reference +++ b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset_complex.reference @@ -1,9 +1,3 @@ -case 1 -9900 -9910 -9920 -case 2 -9990 18679 31 0 10 diff --git a/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.sql b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset_complex.sql similarity index 82% rename from tests/queries/0_stateless/03203_system_numbers_limit_and_offset.sql rename to tests/queries/0_stateless/03203_system_numbers_limit_and_offset_complex.sql index b72ab4a98fd..8e8620361fa 100644 --- a/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.sql +++ b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset_complex.sql @@ -1,15 +1,3 @@ -SELECT 'case 1'; -SELECT number FROM numbers_mt(10000) -WHERE (number % 10) = 0 -ORDER BY number ASC -LIMIT 990, 3; - -SELECT 'case 2'; -SELECT number FROM numbers_mt(10000) -WHERE (number % 10) = 0 -ORDER BY number ASC -LIMIT 999, 20 SETTINGS max_block_size = 31; - --- The following query was buggy before, so let's use it as a test case WITH toUInt64(-1) AS umax, diff --git a/tests/queries/0_stateless/03203_system_numbers_limit_and_offset_simple.reference b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset_simple.reference new file mode 100644 index 00000000000..6aad3ee0b4b --- /dev/null +++ b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset_simple.reference @@ -0,0 +1,6 @@ +case 1 +9900 +9910 +9920 +case 2 +9990 diff --git a/tests/queries/0_stateless/03203_system_numbers_limit_and_offset_simple.sql b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset_simple.sql new file mode 100644 index 00000000000..df01ddf4312 --- /dev/null +++ b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset_simple.sql @@ -0,0 +1,11 @@ +SELECT 'case 1'; +SELECT number FROM numbers_mt(10000) +WHERE (number % 10) = 0 +ORDER BY number ASC +LIMIT 990, 3; + +SELECT 'case 2'; +SELECT number FROM numbers_mt(10000) +WHERE (number % 10) = 0 +ORDER BY number ASC +LIMIT 999, 20 SETTINGS max_block_size = 31; From 8e469af2a41b444bbeb58b63fa0ece7d883553c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Sat, 13 Jul 2024 20:36:01 +0000 Subject: [PATCH 22/47] Make query really work with old analyzer --- ...ystem_numbers_limit_and_offset_complex.sql | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/tests/queries/0_stateless/03203_system_numbers_limit_and_offset_complex.sql b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset_complex.sql index 8e8620361fa..756e08da27d 100644 --- a/tests/queries/0_stateless/03203_system_numbers_limit_and_offset_complex.sql +++ b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset_complex.sql @@ -1,27 +1,26 @@ --- The following query was buggy before, so let's use it as a test case WITH - toUInt64(-1) AS umax, - toUInt8(ceil(log10(umax))) AS max_digits, - 9 * max_digits AS max_digits_sum, - (x -> ((x > 1) AND (arraySum(arrayMap(y -> ((y > 1) AND (y < x) AND ((x % y) = 0)), range(toUInt64(sqrt(x)) + 1))) = 0))) AS is_prime_slow + (num > 1) AND (arraySum(arrayMap(y -> ((y > 1) AND (y < num) AND ((num % y) = 0)), range(toUInt64(sqrt(num)) + 1))) = 0) AS is_prime_slow SELECT num, - ds + ds, FROM ( - WITH arraySum(arrayMap(y -> toUInt8(y), splitByString('', toString(num)))) AS digits_sum + WITH + arraySum(arrayMap(y -> toUInt8(y), splitByString('', toString(num)))) AS digits_sum SELECT 1 + (number * 2) AS num, digits_sum AS ds FROM numbers_mt(10000) WHERE ds IN ( - WITH x -> ((x > 1) AND (arraySum(arrayMap(y -> ((y > 1) AND (y < x) AND ((x % y) = 0)), range(toUInt64(sqrt(x)) + 1))) = 0)) AS is_prime_slow + WITH + (number > 1) AND (arraySum(arrayMap(y -> ((y > 1) AND (y < number) AND ((number % y) = 0)), range(toUInt64(sqrt(number)) + 1))) = 0) AS is_prime_slow SELECT number - FROM numbers(max_digits_sum + 1) - WHERE is_prime_slow(number) + FROM numbers(180 + 1) + WHERE is_prime_slow ) ) -WHERE is_prime_slow(num) +WHERE is_prime_slow ORDER BY num ASC LIMIT 998, 1 SETTINGS max_block_size = 64, max_threads=16; From 26f8b81081dd75941bb72b00642391529b9d5bda Mon Sep 17 00:00:00 2001 From: Xu Jia Date: Tue, 16 Jul 2024 10:54:00 +0800 Subject: [PATCH 23/47] update trusted contributors --- tests/ci/lambda_shared_package/lambda_shared/pr.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/ci/lambda_shared_package/lambda_shared/pr.py b/tests/ci/lambda_shared_package/lambda_shared/pr.py index e981e28a454..e3aceaab0de 100644 --- a/tests/ci/lambda_shared_package/lambda_shared/pr.py +++ b/tests/ci/lambda_shared_package/lambda_shared/pr.py @@ -45,6 +45,7 @@ TRUSTED_CONTRIBUTORS = { "k-morozov", # Konstantin Morozov, Yandex Cloud "justindeguzman", # ClickHouse, Inc "jrdi", # ClickHouse contributor, TinyBird + "XuJia0210", # ClickHouse, Inc ] } From 2c62a6b5283c68aac604a97591571e05e6877fab Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 16 Jul 2024 05:10:26 +0200 Subject: [PATCH 24/47] Update pr.py --- .../lambda_shared_package/lambda_shared/pr.py | 21 +------------------ 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/tests/ci/lambda_shared_package/lambda_shared/pr.py b/tests/ci/lambda_shared_package/lambda_shared/pr.py index e3aceaab0de..d970662d38b 100644 --- a/tests/ci/lambda_shared_package/lambda_shared/pr.py +++ b/tests/ci/lambda_shared_package/lambda_shared/pr.py @@ -3,48 +3,29 @@ import re from typing import Tuple -# Individual trusted contirbutors who are not in any trusted organization. +# Individual trusted contributors who are not in any trusted organization. # Can be changed in runtime: we will append users that we learned to be in # a trusted org, to save GitHub API calls. TRUSTED_CONTRIBUTORS = { e.lower() for e in [ - "achimbab", # Kakao corp "Algunenano", # Raúl Marín, ClickHouse, Inc "amosbird", "azat", # SEMRush "bharatnc", # Many contributions. - "bobrik", # Seasoned contributor, CloudFlare "cwurm", # ClickHouse, Inc "den-crane", # Documentation contributor - "hagen1778", # Roman Khavronenko, seasoned contributor - "hczhcz", - "hexiaoting", # Seasoned contributor "ildus", # adjust, ex-pgpro - "javisantana", # a Spanish ClickHouse enthusiast, ex-Carto - "kreuzerkrieg", - "nikvas0", "nvartolomei", # Seasoned contributor, CloudFlare - "spongedu", # Seasoned contributor "taiyang-li", "ucasFL", # Amos Bird's friend "vdimir", # ClickHouse, Inc - "YiuRULE", - "zlobober", # Developer of YT - "ilejn", # Arenadata, responsible for Kerberized Kafka "thomoco", # ClickHouse, Inc - "BoloniniD", # Seasoned contributor, HSE "tonickkozlov", # Cloudflare "tylerhannan", # ClickHouse, Inc - "myrrc", # Mike Kot, DoubleCloud "thevar1able", # ClickHouse, Inc - "aalexfvk", - "MikhailBurdukov", "tsolodov", # ClickHouse, Inc - "kitaisreal", - "k-morozov", # Konstantin Morozov, Yandex Cloud "justindeguzman", # ClickHouse, Inc - "jrdi", # ClickHouse contributor, TinyBird "XuJia0210", # ClickHouse, Inc ] } From 5d96b684a640ef7cc0f911d25ab8fbde4db9f590 Mon Sep 17 00:00:00 2001 From: Xu Jia Date: Tue, 16 Jul 2024 11:30:13 +0800 Subject: [PATCH 25/47] update format --- tests/ci/lambda_shared_package/lambda_shared/pr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/lambda_shared_package/lambda_shared/pr.py b/tests/ci/lambda_shared_package/lambda_shared/pr.py index d970662d38b..9320cc4ce8c 100644 --- a/tests/ci/lambda_shared_package/lambda_shared/pr.py +++ b/tests/ci/lambda_shared_package/lambda_shared/pr.py @@ -26,7 +26,7 @@ TRUSTED_CONTRIBUTORS = { "thevar1able", # ClickHouse, Inc "tsolodov", # ClickHouse, Inc "justindeguzman", # ClickHouse, Inc - "XuJia0210", # ClickHouse, Inc + "XuJia0210", # ClickHouse, Inc ] } From 1d2f3a1ed5274764a2a051994ebd69464b6b1212 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 16 Jul 2024 05:56:26 +0200 Subject: [PATCH 26/47] Update pr.py --- tests/ci/lambda_shared_package/lambda_shared/pr.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/ci/lambda_shared_package/lambda_shared/pr.py b/tests/ci/lambda_shared_package/lambda_shared/pr.py index 9320cc4ce8c..95130fc2a0f 100644 --- a/tests/ci/lambda_shared_package/lambda_shared/pr.py +++ b/tests/ci/lambda_shared_package/lambda_shared/pr.py @@ -9,7 +9,6 @@ from typing import Tuple TRUSTED_CONTRIBUTORS = { e.lower() for e in [ - "Algunenano", # Raúl Marín, ClickHouse, Inc "amosbird", "azat", # SEMRush "bharatnc", # Many contributions. @@ -19,11 +18,9 @@ TRUSTED_CONTRIBUTORS = { "nvartolomei", # Seasoned contributor, CloudFlare "taiyang-li", "ucasFL", # Amos Bird's friend - "vdimir", # ClickHouse, Inc "thomoco", # ClickHouse, Inc "tonickkozlov", # Cloudflare "tylerhannan", # ClickHouse, Inc - "thevar1able", # ClickHouse, Inc "tsolodov", # ClickHouse, Inc "justindeguzman", # ClickHouse, Inc "XuJia0210", # ClickHouse, Inc From e292358ade930e2779792365407fa229cd37d079 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 16 Jul 2024 06:12:42 +0200 Subject: [PATCH 27/47] Update run.sh --- docker/test/fasttest/run.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index 58c436b469b..57e7b6b2f56 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -11,9 +11,9 @@ stage=${stage:-} # Compiler version, normally set by Dockerfile export LLVM_VERSION=${LLVM_VERSION:-18} -# For some reason it is needed for tests with 'expect' but I don't know, why -groupadd --system --gid 1000 clickhouse -useradd --system --gid 1000 --uid 1000 clickhouse +# For some reason, it is needed for tests with 'expect', but I don't know, why. +sudo groupadd --system --gid 1000 clickhouse +sudo useradd --system --gid 1000 --uid 1000 clickhouse # A variable to pass additional flags to CMake. # Here we explicitly default it to nothing so that bash doesn't complain about From a9b8c2a29a69d85be827048f02ca85a84ec7afca Mon Sep 17 00:00:00 2001 From: Max K Date: Tue, 16 Jul 2024 10:48:43 +0200 Subject: [PATCH 28/47] CI: Do not block Tests_3 unless MAX_TOTAL_FAILURES_PER_JOB_BEFORE_BLOCKING_CI --- tests/ci/functional_test_check.py | 28 +++++++++++- tests/ci/integration_test_check.py | 19 ++++++++ tests/ci/merge_pr.py | 70 ++++-------------------------- 3 files changed, 54 insertions(+), 63 deletions(-) diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py index 41c7ed963c9..74dd4d8fbd7 100644 --- a/tests/ci/functional_test_check.py +++ b/tests/ci/functional_test_check.py @@ -17,9 +17,19 @@ from download_release_packages import download_last_release from env_helper import REPO_COPY, REPORT_PATH, TEMP_PATH from get_robot_token import get_parameter_from_ssm from pr_info import PRInfo -from report import ERROR, SUCCESS, JobReport, StatusType, TestResults, read_test_results +from report import ( + ERROR, + SUCCESS, + JobReport, + StatusType, + TestResults, + read_test_results, + FAILURE, +) from stopwatch import Stopwatch from tee_popen import TeePopen +from ci_config import CI +from ci_utils import Utils NO_CHANGES_MSG = "Nothing to run" @@ -351,7 +361,23 @@ def main(): additional_files=additional_logs, ).dump(to_file=args.report_to_file if args.report_to_file else None) + should_block_ci = False if state != SUCCESS: + should_block_ci = True + + if state == FAILURE and CI.is_required(check_name): + failed_cnt = Utils.get_failed_tests_number(description) + print( + f"Job status is [{state}] with [{failed_cnt}] failed test cases. status description [{description}]" + ) + if ( + failed_cnt + and failed_cnt <= CI.MAX_TOTAL_FAILURES_PER_JOB_BEFORE_BLOCKING_CI + ): + print(f"Won't block the CI workflow") + should_block_ci = False + + if should_block_ci: sys.exit(1) diff --git a/tests/ci/integration_test_check.py b/tests/ci/integration_test_check.py index 80ac1935d95..6245f0490fc 100644 --- a/tests/ci/integration_test_check.py +++ b/tests/ci/integration_test_check.py @@ -23,10 +23,13 @@ from report import ( TestResult, TestResults, read_test_results, + FAILURE, ) from stopwatch import Stopwatch import integration_tests_runner as runner +from ci_config import CI +from ci_utils import Utils def get_json_params_dict( @@ -233,7 +236,23 @@ def main(): additional_files=additional_logs, ).dump(to_file=args.report_to_file if args.report_to_file else None) + should_block_ci = False if state != SUCCESS: + should_block_ci = True + + if state == FAILURE and CI.is_required(check_name): + failed_cnt = Utils.get_failed_tests_number(description) + print( + f"Job status is [{state}] with [{failed_cnt}] failed test cases. status description [{description}]" + ) + if ( + failed_cnt + and failed_cnt <= CI.MAX_TOTAL_FAILURES_PER_JOB_BEFORE_BLOCKING_CI + ): + print(f"Won't block the CI workflow") + should_block_ci = False + + if should_block_ci: sys.exit(1) diff --git a/tests/ci/merge_pr.py b/tests/ci/merge_pr.py index 94456506879..4b57467ebdc 100644 --- a/tests/ci/merge_pr.py +++ b/tests/ci/merge_pr.py @@ -27,7 +27,6 @@ from report import SUCCESS, FAILURE from env_helper import GITHUB_UPSTREAM_REPOSITORY, GITHUB_REPOSITORY from synchronizer_utils import SYNC_BRANCH_PREFIX from ci_config import CI -from ci_utils import Utils # The team name for accepted approvals TEAM_NAME = getenv("GITHUB_TEAM_NAME", "core") @@ -249,74 +248,21 @@ def main(): repo = gh.get_repo(args.repo) if args.set_ci_status: + # set Mergeable check status and exit assert args.wf_status in (FAILURE, SUCCESS) - # set mergeable check status and exit commit = get_commit(gh, args.pr_info.sha) statuses = get_commit_filtered_statuses(commit) - max_failed_tests_per_job = 0 - job_name_with_max_failures = None - total_failed_tests = 0 - failed_to_get_info = False has_failed_statuses = False for status in statuses: - if not CI.is_required(status.context) or status.context in ( - CI.StatusNames.SYNC, - CI.StatusNames.PR_CHECK, - ): - # CI.StatusNames.SYNC or CI.StatusNames.PR_CHECK should not be checked - continue print(f"Check status [{status.context}], [{status.state}]") - if status.state == FAILURE: + if not CI.is_required(status.context) and status.state != SUCCESS: has_failed_statuses = True - failed_cnt = Utils.get_failed_tests_number(status.description) - if failed_cnt is None: - failed_to_get_info = True - print( - f"WARNING: failed to get number of failed tests from [{status.description}]" - ) - else: - if failed_cnt > max_failed_tests_per_job: - job_name_with_max_failures = status.context - max_failed_tests_per_job = failed_cnt - total_failed_tests += failed_cnt - print( - f"Failed test cases in [{status.context}] is [{failed_cnt}], total failures [{total_failed_tests}]" - ) - elif status.state != SUCCESS and status.context not in ( - CI.StatusNames.SYNC, - CI.StatusNames.PR_CHECK, - ): - # do not block CI on failures in (CI.StatusNames.SYNC, CI.StatusNames.PR_CHECK) - has_failed_statuses = True - print( - f"Unexpected status for [{status.context}]: [{status.state}] - block further testing" - ) - failed_to_get_info = True - - can_continue = True - if total_failed_tests > CI.MAX_TOTAL_FAILURES_BEFORE_BLOCKING_CI: - print( - f"Required check has [{total_failed_tests}] failed - block further testing" - ) - can_continue = False - if max_failed_tests_per_job > CI.MAX_TOTAL_FAILURES_PER_JOB_BEFORE_BLOCKING_CI: - print( - f"Job [{job_name_with_max_failures}] has [{max_failed_tests_per_job}] failures - block further testing" - ) - can_continue = False - if failed_to_get_info: - print("Unexpected commit status state - block further testing") - can_continue = False - if args.wf_status != SUCCESS and not has_failed_statuses: - # workflow failed but reason is unknown as no failed statuses present - can_continue = False - print( - "WARNING: Either the runner is faulty or the operating status is unknown. The first is self-healing, the second requires investigation." - ) if args.wf_status == SUCCESS or has_failed_statuses: - # do not set mergeable check status if args.wf_status == failure, apparently it has died runners and is to be restarted + # set Mergeable check if workflow is successful (green) + # or if we have GH statuses with failures (red) + # to avoid false-green on a died runner state = trigger_mergeable_check( commit, statuses, @@ -333,10 +279,10 @@ def main(): print( "Workflow failed but no failed statuses found (died runner?) - cannot set Mergeable Check status" ) - - if not can_continue: + if args.wf_status == SUCCESS and not has_failed_statuses: + sys.exit(0) + else: sys.exit(1) - sys.exit(0) # An ugly and not nice fix to patch the wrong organization URL, # see https://github.com/PyGithub/PyGithub/issues/2395#issuecomment-1378629710 From 2af361eb14617d067732795424c22a6f511c9f18 Mon Sep 17 00:00:00 2001 From: Max K Date: Tue, 16 Jul 2024 16:18:41 +0200 Subject: [PATCH 29/47] minor fix --- tests/ci/merge_pr.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/ci/merge_pr.py b/tests/ci/merge_pr.py index 4b57467ebdc..b005be472c7 100644 --- a/tests/ci/merge_pr.py +++ b/tests/ci/merge_pr.py @@ -256,7 +256,12 @@ def main(): has_failed_statuses = False for status in statuses: print(f"Check status [{status.context}], [{status.state}]") - if not CI.is_required(status.context) and status.state != SUCCESS: + if ( + not CI.is_required(status.context) + and status.context not in (CI.StatusNames.SYNC, CI.StatusNames.PR_CHECK) + and status.state != SUCCESS + ): + print(f"WARNING: Failed status [{status.context}], [{status.state}]") has_failed_statuses = True if args.wf_status == SUCCESS or has_failed_statuses: From db7fb78a04280179b7ca09f588c957d2e0eab6be Mon Sep 17 00:00:00 2001 From: Max K Date: Tue, 16 Jul 2024 19:49:21 +0200 Subject: [PATCH 30/47] major fix --- tests/ci/ci_config.py | 6 ++++-- tests/ci/merge_pr.py | 6 +----- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 9b9ddee5326..f5e39f343b2 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -415,7 +415,8 @@ class CI: JobNames.INTEGRATION_TEST_FLAKY: CommonJobConfigs.INTEGRATION_TEST.with_properties( required_builds=[BuildNames.PACKAGE_ASAN], pr_only=True, - reference_job_name=JobNames.INTEGRATION_TEST_TSAN, + # TODO: approach with reference job names does not work because digest may not be calculated if job skipped in wf + # reference_job_name=JobNames.INTEGRATION_TEST_TSAN, ), JobNames.COMPATIBILITY_TEST: CommonJobConfigs.COMPATIBILITY_TEST.with_properties( required_builds=[BuildNames.PACKAGE_RELEASE], @@ -460,7 +461,8 @@ class CI: required_builds=[BuildNames.PACKAGE_ASAN], pr_only=True, timeout=3600, - reference_job_name=JobNames.STATELESS_TEST_RELEASE, + # TODO: approach with reference job names does not work because digest may not be calculated if job skipped in wf + # reference_job_name=JobNames.STATELESS_TEST_RELEASE, ), JobNames.JEPSEN_KEEPER: JobConfig( required_builds=[BuildNames.BINARY_RELEASE], diff --git a/tests/ci/merge_pr.py b/tests/ci/merge_pr.py index b005be472c7..4d8facafb84 100644 --- a/tests/ci/merge_pr.py +++ b/tests/ci/merge_pr.py @@ -256,11 +256,7 @@ def main(): has_failed_statuses = False for status in statuses: print(f"Check status [{status.context}], [{status.state}]") - if ( - not CI.is_required(status.context) - and status.context not in (CI.StatusNames.SYNC, CI.StatusNames.PR_CHECK) - and status.state != SUCCESS - ): + if CI.is_required(status.context) and status.state != SUCCESS: print(f"WARNING: Failed status [{status.context}], [{status.state}]") has_failed_statuses = True From ea3a333a0711a33ad9193e67939b5f8a970834ea Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Tue, 16 Jul 2024 22:25:45 +0200 Subject: [PATCH 31/47] Fix formatting error --- docs/en/sql-reference/functions/other-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 260457b3be1..5e2802eb08b 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -86,7 +86,7 @@ Returns the fully qualified domain name of the ClickHouse server. fqdn(); ``` -Aliases: `fullHostName`, 'FQDN'. +Aliases: `fullHostName`, `FQDN`. **Returned value** From 8c0ade413cca3ea63b47909c54c80de76c1e2a26 Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Tue, 16 Jul 2024 22:28:12 +0200 Subject: [PATCH 32/47] Update other-functions.md --- docs/en/sql-reference/functions/other-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 5e2802eb08b..b7e4094f30e 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -76,7 +76,7 @@ WHERE macro = 'test'; └───────┴──────────────┘ ``` -## FQDN +## fqdn Returns the fully qualified domain name of the ClickHouse server. From 9ff6de4eba0e82ff6dc2c1d007d4866637e1d0b2 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 16 Jul 2024 23:17:45 +0200 Subject: [PATCH 33/47] Remove unused header --- src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp index b9edff39b82..a904b29e12f 100644 --- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp +++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp @@ -21,7 +21,6 @@ #include #include -#include #include #include #include From 5ca7241865f863d68b3a3b96011b9238e49ac3f8 Mon Sep 17 00:00:00 2001 From: Max K Date: Wed, 17 Jul 2024 00:23:14 +0200 Subject: [PATCH 34/47] CI: AWS ASG decrease capacity from runners --- tests/ci/worker/init_runner.sh | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/tests/ci/worker/init_runner.sh b/tests/ci/worker/init_runner.sh index ec7793db2aa..91bffa876b2 100644 --- a/tests/ci/worker/init_runner.sh +++ b/tests/ci/worker/init_runner.sh @@ -50,7 +50,7 @@ set -uo pipefail # set accordingly to a runner role # #################################### -echo "Running init script" +echo "Running init script v1" export DEBIAN_FRONTEND=noninteractive export RUNNER_HOME=/home/ubuntu/actions-runner @@ -90,7 +90,6 @@ terminate_delayed() { # IF `sleep` IS CHANGED, CHANGE ANOTHER VALUE IN `pgrep` sleep=13.14159265358979323846 echo "Going to terminate the runner's instance in $sleep seconds" - INSTANCE_ID=$(ec2metadata --instance-id) # We execute it with `at` to not have it as an orphan process, but launched independently # GH Runners kill all remain processes echo "sleep '$sleep'; aws ec2 terminate-instances --instance-ids $INSTANCE_ID" | at now || \ @@ -111,11 +110,17 @@ declare -f terminate_delayed >> /tmp/actions-hooks/common.sh terminate_and_exit() { # Terminate instance and exit from the script instantly echo "Going to terminate the runner's instance" - INSTANCE_ID=$(ec2metadata --instance-id) aws ec2 terminate-instances --instance-ids "$INSTANCE_ID" exit 0 } +terminate_decrease_and_exit() { + # Terminate instance and exit from the script instantly + echo "Going to terminate the runner's instance and decrease asg capacity" + aws autoscaling terminate-instance-in-auto-scaling-group --instance-id "$INSTANCE_ID" --should-decrement-desired-capacity + exit 0 +} + declare -f terminate_and_exit >> /tmp/actions-hooks/common.sh check_spot_instance_is_old() { @@ -324,7 +329,7 @@ while true; do sudo -u ubuntu ./config.sh remove --token "$(get_runner_token)" \ || continue echo "Runner didn't launch or have assigned jobs after ${RUNNER_AGE} seconds, shutting down" - terminate_and_exit + terminate_decrease_and_exit fi fi else From a1a802cee9f1e75cfdaed7c86aac63fd4e3010db Mon Sep 17 00:00:00 2001 From: Max K Date: Wed, 17 Jul 2024 00:32:13 +0200 Subject: [PATCH 35/47] fix deployment script --- tests/ci/worker/deploy-runner-init.sh | 4 +++- tests/ci/worker/init_runner.sh | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/ci/worker/deploy-runner-init.sh b/tests/ci/worker/deploy-runner-init.sh index 06edede48fa..96fbd82a99c 100755 --- a/tests/ci/worker/deploy-runner-init.sh +++ b/tests/ci/worker/deploy-runner-init.sh @@ -1,5 +1,7 @@ #!/usr/bin/env bash +set -e + usage() { echo "Usage: $0 ENVIRONMENT" >&2 echo "Valid values for ENVIRONMENT: staging, production" >&2 @@ -55,7 +57,7 @@ EOF body() { local first_line - first_line=$(sed -n '/^# THE SCRIPT START$/{=;q}' "$SOURCE_SCRIPT") + first_line=$(sed -n '/^# THE SCRIPT START$/{=;q;}' "$SOURCE_SCRIPT") if [ -z "$first_line" ]; then echo "The pattern '# THE SCRIPT START' is not found in $SOURCE_SCRIPT" >&2 exit 1 diff --git a/tests/ci/worker/init_runner.sh b/tests/ci/worker/init_runner.sh index 91bffa876b2..d6cdb6d9c57 100644 --- a/tests/ci/worker/init_runner.sh +++ b/tests/ci/worker/init_runner.sh @@ -50,7 +50,7 @@ set -uo pipefail # set accordingly to a runner role # #################################### -echo "Running init script v1" +echo "Running init v1" export DEBIAN_FRONTEND=noninteractive export RUNNER_HOME=/home/ubuntu/actions-runner From 0d1789292bc480c33c0f2b2ee54058976dea74fe Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 17 Jul 2024 02:11:09 +0200 Subject: [PATCH 36/47] Add sudo for convenience --- docker/test/util/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/test/util/Dockerfile b/docker/test/util/Dockerfile index f13bb576f79..babddbd228c 100644 --- a/docker/test/util/Dockerfile +++ b/docker/test/util/Dockerfile @@ -17,6 +17,7 @@ ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=18 RUN apt-get update \ && apt-get install \ + sudo \ apt-transport-https \ apt-utils \ ca-certificates \ From c913a36ed6580e87ebb641cb3024c7182725d8ea Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 17 Jul 2024 02:13:57 +0200 Subject: [PATCH 37/47] Minor modifications --- docker/test/fasttest/Dockerfile | 5 ++++- docker/test/fasttest/run.sh | 4 ---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile index 2512268be0f..72e33226f38 100644 --- a/docker/test/fasttest/Dockerfile +++ b/docker/test/fasttest/Dockerfile @@ -26,7 +26,10 @@ RUN apt-get update \ zstd \ --yes --no-install-recommends \ && apt-get clean \ - && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* + && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* \ + && groupadd --system --gid 1000 clickhouse \ + && useradd --system --gid 1000 --uid 1000 clickhouse +# ^ For some reason, groupadd and useradd are needed for tests with 'expect', but I don't know, why. COPY requirements.txt / RUN pip3 install --no-cache-dir -r /requirements.txt diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index 57e7b6b2f56..970bf12a81a 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -11,10 +11,6 @@ stage=${stage:-} # Compiler version, normally set by Dockerfile export LLVM_VERSION=${LLVM_VERSION:-18} -# For some reason, it is needed for tests with 'expect', but I don't know, why. -sudo groupadd --system --gid 1000 clickhouse -sudo useradd --system --gid 1000 --uid 1000 clickhouse - # A variable to pass additional flags to CMake. # Here we explicitly default it to nothing so that bash doesn't complain about # it being undefined. Also read it as array so that we can pass an empty list From a40edea5c99ac290a3b04a015065c0cad5266ab0 Mon Sep 17 00:00:00 2001 From: Xu Jia Date: Wed, 17 Jul 2024 09:37:28 +0800 Subject: [PATCH 38/47] trigger CI From 60f45cb993ccf15812605d51b6e7ce8489c98d05 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 17 Jul 2024 03:52:35 +0200 Subject: [PATCH 39/47] A fix --- docker/test/fasttest/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile index 72e33226f38..4cac2ee6135 100644 --- a/docker/test/fasttest/Dockerfile +++ b/docker/test/fasttest/Dockerfile @@ -28,7 +28,7 @@ RUN apt-get update \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* \ && groupadd --system --gid 1000 clickhouse \ - && useradd --system --gid 1000 --uid 1000 clickhouse + && useradd --system --gid 1000 --uid 1000 -m clickhouse # ^ For some reason, groupadd and useradd are needed for tests with 'expect', but I don't know, why. COPY requirements.txt / From 968d28f1f4bae02eae2f7b96c774ddfd9656c308 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 17 Jul 2024 07:24:46 +0200 Subject: [PATCH 40/47] Remove outdated articles from the docs --- docs/en/sql-reference/data-types/lowcardinality.md | 3 +-- docs/zh/changelog/index.md | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/docs/en/sql-reference/data-types/lowcardinality.md b/docs/en/sql-reference/data-types/lowcardinality.md index 133ac2bd72e..a92f6f4dce6 100644 --- a/docs/en/sql-reference/data-types/lowcardinality.md +++ b/docs/en/sql-reference/data-types/lowcardinality.md @@ -56,7 +56,6 @@ Functions: ## Related content -- [Reducing ClickHouse Storage Cost with the Low Cardinality Type – Lessons from an Instana Engineer](https://altinity.com/blog/2020-5-20-reducing-clickhouse-storage-cost-with-the-low-cardinality-type-lessons-from-an-instana-engineer) -- [String Optimization (video presentation in Russian)](https://youtu.be/rqf-ILRgBdY?list=PL0Z2YDlm0b3iwXCpEFiOOYmwXzVmjJfEt). [Slides in English](https://github.com/ClickHouse/clickhouse-presentations/raw/master/meetup19/string_optimization.pdf) - Blog: [Optimizing ClickHouse with Schemas and Codecs](https://clickhouse.com/blog/optimize-clickhouse-codecs-compression-schema) - Blog: [Working with time series data in ClickHouse](https://clickhouse.com/blog/working-with-time-series-data-and-functions-ClickHouse) +- [String Optimization (video presentation in Russian)](https://youtu.be/rqf-ILRgBdY?list=PL0Z2YDlm0b3iwXCpEFiOOYmwXzVmjJfEt). [Slides in English](https://github.com/ClickHouse/clickhouse-presentations/raw/master/meetup19/string_optimization.pdf) diff --git a/docs/zh/changelog/index.md b/docs/zh/changelog/index.md index c91d8bcf4d1..cd77a8c03cf 100644 --- a/docs/zh/changelog/index.md +++ b/docs/zh/changelog/index.md @@ -356,7 +356,7 @@ sidebar_label: "\u53D8\u66F4\u65E5\u5FD7" #### 新功能 {#new-feature-1} -- 添加 `deduplicate_blocks_in_dependent_materialized_views` 用于控制具有实例化视图的表中幂等插入的行为的选项。 这个新功能是由Altinity的特殊要求添加到错误修正版本中的。 +- 添加 `deduplicate_blocks_in_dependent_materialized_views` 用于控制具有实例化视图的表中幂等插入的行为的选项。 [#9070](https://github.com/ClickHouse/ClickHouse/pull/9070) [(urykhy)](https://github.com/urykhy) ### ClickHouse版本v20.1.2.4,2020-01-22 {#clickhouse-release-v20-1-2-4-2020-01-22} From 005256429fb4d6a9aec88b3bb5662382166f6f9b Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 17 Jul 2024 11:49:15 +0200 Subject: [PATCH 41/47] Don't run test 03198_dynamic_read_subcolumns in azure --- tests/queries/0_stateless/03198_dynamic_read_subcolumns.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03198_dynamic_read_subcolumns.sql b/tests/queries/0_stateless/03198_dynamic_read_subcolumns.sql index 27fca179580..155009cc2fe 100644 --- a/tests/queries/0_stateless/03198_dynamic_read_subcolumns.sql +++ b/tests/queries/0_stateless/03198_dynamic_read_subcolumns.sql @@ -1,4 +1,4 @@ --- Tags: no-random-settings, no-s3-storage +-- Tags: no-random-settings, no-object-storage SET allow_experimental_dynamic_type = 1; DROP TABLE IF EXISTS test_dynamic; From 2958803255f0f646147eca4f9432f72496318bbf Mon Sep 17 00:00:00 2001 From: Max K Date: Wed, 17 Jul 2024 11:26:38 +0200 Subject: [PATCH 42/47] do not decrease from lambda --- tests/ci/autoscale_runners_lambda/app.py | 77 ++++++++++--------- .../test_autoscale.py | 25 ++++-- 2 files changed, 60 insertions(+), 42 deletions(-) diff --git a/tests/ci/autoscale_runners_lambda/app.py b/tests/ci/autoscale_runners_lambda/app.py index 520abcb41bb..aed5cf3a8dd 100644 --- a/tests/ci/autoscale_runners_lambda/app.py +++ b/tests/ci/autoscale_runners_lambda/app.py @@ -99,7 +99,8 @@ def set_capacity( continue raise ValueError("Queue status is not in ['in_progress', 'queued']") - scale_down, scale_up = get_scales(runner_type) + #scale_down, scale_up = get_scales(runner_type) + _, scale_up = get_scales(runner_type) # With lyfecycle hooks some instances are actually free because some of # them are in 'Terminating:Wait' state effective_capacity = max( @@ -110,7 +111,7 @@ def set_capacity( # How much nodes are free (positive) or need to be added (negative) capacity_reserve = effective_capacity - running - queued stop = False - if capacity_reserve < 0: + if capacity_reserve <= 0: # This part is about scaling up capacity_deficit = -capacity_reserve # It looks that we are still OK, since no queued jobs exist @@ -158,41 +159,43 @@ def set_capacity( ) return - # Now we will calculate if we need to scale down - stop = stop or asg["DesiredCapacity"] == asg["MinSize"] - new_capacity = asg["DesiredCapacity"] - (capacity_reserve // scale_down) - new_capacity = max(new_capacity, asg["MinSize"]) - new_capacity = min(new_capacity, asg["MaxSize"]) - stop = stop or asg["DesiredCapacity"] == new_capacity - if stop: - logging.info( - "Do not decrease ASG %s capacity, current capacity=%s, effective " - "capacity=%s, minimum capacity=%s, running jobs=%s, queue size=%s", - asg["AutoScalingGroupName"], - asg["DesiredCapacity"], - effective_capacity, - asg["MinSize"], - running, - queued, - ) - return - - logging.info( - "The ASG %s capacity will be decreased to %s, current capacity=%s, effective " - "capacity=%s, minimum capacity=%s, running jobs=%s, queue size=%s", - asg["AutoScalingGroupName"], - new_capacity, - asg["DesiredCapacity"], - effective_capacity, - asg["MinSize"], - running, - queued, - ) - if not dry_run: - client.set_desired_capacity( - AutoScalingGroupName=asg["AutoScalingGroupName"], - DesiredCapacity=new_capacity, - ) + # FIXME: try decreasing capacity from runners that finished their jobs and have no job assigned + # IMPORTANT: Runner init script must be of version that supports ASG decrease + # # Now we will calculate if we need to scale down + # stop = stop or asg["DesiredCapacity"] == asg["MinSize"] + # new_capacity = asg["DesiredCapacity"] - (capacity_reserve // scale_down) + # new_capacity = max(new_capacity, asg["MinSize"]) + # new_capacity = min(new_capacity, asg["MaxSize"]) + # stop = stop or asg["DesiredCapacity"] == new_capacity + # if stop: + # logging.info( + # "Do not decrease ASG %s capacity, current capacity=%s, effective " + # "capacity=%s, minimum capacity=%s, running jobs=%s, queue size=%s", + # asg["AutoScalingGroupName"], + # asg["DesiredCapacity"], + # effective_capacity, + # asg["MinSize"], + # running, + # queued, + # ) + # return + # + # logging.info( + # "The ASG %s capacity will be decreased to %s, current capacity=%s, effective " + # "capacity=%s, minimum capacity=%s, running jobs=%s, queue size=%s", + # asg["AutoScalingGroupName"], + # new_capacity, + # asg["DesiredCapacity"], + # effective_capacity, + # asg["MinSize"], + # running, + # queued, + # ) + # if not dry_run: + # client.set_desired_capacity( + # AutoScalingGroupName=asg["AutoScalingGroupName"], + # DesiredCapacity=new_capacity, + # ) def main(dry_run: bool = True) -> None: diff --git a/tests/ci/autoscale_runners_lambda/test_autoscale.py b/tests/ci/autoscale_runners_lambda/test_autoscale.py index 3aeab49ffc7..d1a1f9b358f 100644 --- a/tests/ci/autoscale_runners_lambda/test_autoscale.py +++ b/tests/ci/autoscale_runners_lambda/test_autoscale.py @@ -97,19 +97,34 @@ class TestSetCapacity(unittest.TestCase): ), TestCase("lower-min", 10, 5, 20, [Queue("queued", 5, "lower-min")], 10), # Decrease capacity - TestCase("w/reserve", 1, 13, 20, [Queue("queued", 5, "w/reserve")], 5), + # FIXME: Tests changed for lambda that can only scale up + # TestCase("w/reserve", 1, 13, 20, [Queue("queued", 5, "w/reserve")], 5), + TestCase("w/reserve", 1, 13, 20, [Queue("queued", 5, "w/reserve")], -1), + # TestCase( + # "style-checker", 1, 13, 20, [Queue("queued", 5, "style-checker")], 5 + # ), TestCase( - "style-checker", 1, 13, 20, [Queue("queued", 5, "style-checker")], 5 + "style-checker", 1, 13, 20, [Queue("queued", 5, "style-checker")], -1 ), - TestCase("w/reserve", 1, 23, 20, [Queue("queued", 17, "w/reserve")], 17), - TestCase("decrease", 1, 13, 20, [Queue("in_progress", 3, "decrease")], 3), + # TestCase("w/reserve", 1, 23, 20, [Queue("queued", 17, "w/reserve")], 17), + TestCase("w/reserve", 1, 23, 20, [Queue("queued", 17, "w/reserve")], -1), + # TestCase("decrease", 1, 13, 20, [Queue("in_progress", 3, "decrease")], 3), + TestCase("decrease", 1, 13, 20, [Queue("in_progress", 3, "decrease")], -1), + # TestCase( + # "style-checker", + # 1, + # 13, + # 20, + # [Queue("in_progress", 5, "style-checker")], + # 5, + # ), TestCase( "style-checker", 1, 13, 20, [Queue("in_progress", 5, "style-checker")], - 5, + -1, ), ) for t in test_cases: From c40473ff80c0cdf09b85b7dda695a41ee2df0112 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 17 Jul 2024 10:50:52 +0000 Subject: [PATCH 43/47] Automatic style fix --- tests/ci/autoscale_runners_lambda/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/autoscale_runners_lambda/app.py b/tests/ci/autoscale_runners_lambda/app.py index aed5cf3a8dd..c790a5081dd 100644 --- a/tests/ci/autoscale_runners_lambda/app.py +++ b/tests/ci/autoscale_runners_lambda/app.py @@ -99,7 +99,7 @@ def set_capacity( continue raise ValueError("Queue status is not in ['in_progress', 'queued']") - #scale_down, scale_up = get_scales(runner_type) + # scale_down, scale_up = get_scales(runner_type) _, scale_up = get_scales(runner_type) # With lyfecycle hooks some instances are actually free because some of # them are in 'Terminating:Wait' state From 3ade6de95df356adfaf7f6a6e3fb22d2545c6c50 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 17 Jul 2024 13:13:51 +0200 Subject: [PATCH 44/47] Add description about no-object-storage tag --- tests/queries/0_stateless/03198_dynamic_read_subcolumns.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/03198_dynamic_read_subcolumns.sql b/tests/queries/0_stateless/03198_dynamic_read_subcolumns.sql index 155009cc2fe..5d473064c68 100644 --- a/tests/queries/0_stateless/03198_dynamic_read_subcolumns.sql +++ b/tests/queries/0_stateless/03198_dynamic_read_subcolumns.sql @@ -1,4 +1,5 @@ -- Tags: no-random-settings, no-object-storage +-- Tag no-object-storage: this test relies on the number of opened files in MergeTree that can differ in object storages SET allow_experimental_dynamic_type = 1; DROP TABLE IF EXISTS test_dynamic; From 245bc587ae5d9cb6751153b7bf24dcf88487d5dd Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Wed, 17 Jul 2024 17:57:28 +0200 Subject: [PATCH 45/47] add 'fqdn' to list --- utils/check-style/aspell-ignore/en/aspell-dict.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 4e7bf7f686e..b4b18f9bbf6 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -1653,6 +1653,7 @@ formated formatschema formatter formatters +fqdn frac freezed fromDaysSinceYearZero From d5345c5aae8c8598a043311e58318d22c5396392 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 17 Jul 2024 18:31:10 +0200 Subject: [PATCH 46/47] Update Squashing.cpp --- src/Interpreters/Squashing.cpp | 32 -------------------------------- 1 file changed, 32 deletions(-) diff --git a/src/Interpreters/Squashing.cpp b/src/Interpreters/Squashing.cpp index 4af7d57f5a0..49e95f202dd 100644 --- a/src/Interpreters/Squashing.cpp +++ b/src/Interpreters/Squashing.cpp @@ -17,38 +17,6 @@ namespace ErrorCodes namespace { -Chunk squashImpl(std::vector & input_chunks) -{ - Chunk accumulated_chunk; - std::vector mutable_columns = {}; - size_t rows = 0; - for (const Chunk & chunk : input_chunks) - rows += chunk.getNumRows(); - - { - auto & first_chunk = input_chunks[0]; - Columns columns = first_chunk.detachColumns(); - for (auto & column : columns) - { - mutable_columns.push_back(IColumn::mutate(std::move(column))); - mutable_columns.back()->reserve(rows); - } - } - - for (size_t i = 1; i < input_chunks.size(); ++i) // We've already processed the first chunk above - { - Columns columns = input_chunks[i].detachColumns(); - for (size_t j = 0, size = mutable_columns.size(); j < size; ++j) - { - const auto source_column = columns[j]; - - mutable_columns[j]->insertRangeFrom(*source_column, 0, source_column->size()); - } - } - accumulated_chunk.setColumns(std::move(mutable_columns), rows); - return accumulated_chunk; -} - const ChunksToSquash * getInfoFromChunk(const Chunk & chunk) { const auto & info = chunk.getChunkInfo(); From 37dd7756434c4a2fa780fc187505c67e92be6f45 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 17 Jul 2024 18:31:27 +0200 Subject: [PATCH 47/47] Update Squashing.cpp --- src/Interpreters/Squashing.cpp | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/src/Interpreters/Squashing.cpp b/src/Interpreters/Squashing.cpp index 49e95f202dd..3a7f28ed837 100644 --- a/src/Interpreters/Squashing.cpp +++ b/src/Interpreters/Squashing.cpp @@ -14,22 +14,6 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -namespace -{ - -const ChunksToSquash * getInfoFromChunk(const Chunk & chunk) -{ - const auto & info = chunk.getChunkInfo(); - const auto * agg_info = typeid_cast(info.get()); - - if (!agg_info) - throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no ChunksToSquash in ChunkInfoPtr"); - - return agg_info; -} - -} - Squashing::Squashing(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_) : min_block_size_rows(min_block_size_rows_) , min_block_size_bytes(min_block_size_bytes_)