From 0d46e7555b066298603b5e0cd4dc122e74863ebf Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 20 Feb 2024 19:08:33 +0100 Subject: [PATCH 001/180] Fix unexpected behavior with FORMAT and SETTINGS parsing --- src/Parsers/ParserQueryWithOutput.cpp | 80 ++++++++++++++++++--------- 1 file changed, 54 insertions(+), 26 deletions(-) diff --git a/src/Parsers/ParserQueryWithOutput.cpp b/src/Parsers/ParserQueryWithOutput.cpp index 7a627ae5f6a..340abf27c31 100644 --- a/src/Parsers/ParserQueryWithOutput.cpp +++ b/src/Parsers/ParserQueryWithOutput.cpp @@ -150,37 +150,65 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec } + /// These two sections are allowed in an arbitrary order. ParserKeyword s_format("FORMAT"); - - if (s_format.ignore(pos, expected)) - { - ParserIdentifier format_p; - - if (!format_p.parse(pos, query_with_output.format, expected)) - return false; - setIdentifierSpecial(query_with_output.format); - - query_with_output.children.push_back(query_with_output.format); - } - - // SETTINGS key1 = value1, key2 = value2, ... ParserKeyword s_settings("SETTINGS"); - if (!query_with_output.settings_ast && s_settings.ignore(pos, expected)) - { - ParserSetQuery parser_settings(true); - if (!parser_settings.parse(pos, query_with_output.settings_ast, expected)) - return false; - query_with_output.children.push_back(query_with_output.settings_ast); - // SETTINGS after FORMAT is not parsed by the SELECT parser (ParserSelectQuery) - // Pass them manually, to apply in InterpreterSelectQuery::initSettings() - if (query->as()) + /** Why: let's take the following example: + * SELECT 1 UNION ALL SELECT 2 FORMAT TSV + * Each subquery can be put in parentheses and have its own settings: + * (SELECT 1 SETTINGS a=b) UNION ALL (SELECT 2 SETTINGS c=d) FORMAT TSV + * And the whole query can have settings: + * (SELECT 1 SETTINGS a=b) UNION ALL (SELECT 2 SETTINGS c=d) FORMAT TSV SETTINGS e=f + * A single query with output is parsed in the same way as the UNION ALL chain: + * SELECT 1 SETTINGS a=b FORMAT TSV SETTINGS e=f + * So while these forms have a slightly different meaning, they both exist: + * SELECT 1 SETTINGS a=b FORMAT TSV + * SELECT 1 FORMAT TSV SETTINGS e=f + * And due to this effect, the users expect that the FORMAT and SETTINGS may go in an arbitrary order. + * But while this work: + * (SELECT 1) UNION ALL (SELECT 2) FORMAT TSV SETTINGS d=f + * This does not work automatically, unless we explicitly allow different orders: + * (SELECT 1) UNION ALL (SELECT 2) SETTINGS d=f FORMAT TSV + * Inevitably, we also allow this: + * SELECT 1 SETTINGS a=b SETTINGS d=f FORMAT TSV + * ^^^^^^^^^^^^^^^^^^^^^ + * Because this part is consumed into ASTSelectWithUnionQuery + * and the rest into ASTQueryWithOutput. + */ + + for (size_t i = 0; i < 2; ++i) + { + if (!query_with_output.format && s_format.ignore(pos, expected)) { - auto settings = query_with_output.settings_ast->clone(); - assert_cast(settings.get())->print_in_format = false; - QueryWithOutputSettingsPushDownVisitor::Data data{settings}; - QueryWithOutputSettingsPushDownVisitor(data).visit(query); + ParserIdentifier format_p; + + if (!format_p.parse(pos, query_with_output.format, expected)) + return false; + setIdentifierSpecial(query_with_output.format); + + query_with_output.children.push_back(query_with_output.format); } + else if (!query_with_output.settings_ast && s_settings.ignore(pos, expected)) + { + // SETTINGS key1 = value1, key2 = value2, ... + ParserSetQuery parser_settings(true); + if (!parser_settings.parse(pos, query_with_output.settings_ast, expected)) + return false; + query_with_output.children.push_back(query_with_output.settings_ast); + + // SETTINGS after FORMAT is not parsed by the SELECT parser (ParserSelectQuery) + // Pass them manually, to apply in InterpreterSelectQuery::initSettings() + if (query->as()) + { + auto settings = query_with_output.settings_ast->clone(); + assert_cast(settings.get())->print_in_format = false; + QueryWithOutputSettingsPushDownVisitor::Data data{settings}; + QueryWithOutputSettingsPushDownVisitor(data).visit(query); + } + } + else + break; } node = std::move(query); From e91dd71d4e55fe80d1c230a87eee7ad84333d9c3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 17 Jun 2024 07:01:06 +0200 Subject: [PATCH 002/180] Settings normalization --- src/Interpreters/InterpreterSetQuery.cpp | 26 ++++++++++-------------- src/Parsers/ParserQueryWithOutput.cpp | 11 ---------- 2 files changed, 11 insertions(+), 26 deletions(-) diff --git a/src/Interpreters/InterpreterSetQuery.cpp b/src/Interpreters/InterpreterSetQuery.cpp index 7e68fc5c4c1..cac44c7747b 100644 --- a/src/Interpreters/InterpreterSetQuery.cpp +++ b/src/Interpreters/InterpreterSetQuery.cpp @@ -9,6 +9,7 @@ #include #include + namespace DB { @@ -45,9 +46,7 @@ static void applySettingsFromSelectWithUnion(const ASTSelectWithUnionQuery & sel // It is flattened later, when we process UNION ALL/DISTINCT. const auto * last_select = children.back()->as(); if (last_select && last_select->settings()) - { InterpreterSetQuery(last_select->settings(), context).executeForCurrentContext(); - } } void InterpreterSetQuery::applySettingsFromQuery(const ASTPtr & ast, ContextMutablePtr context_) @@ -55,6 +54,16 @@ void InterpreterSetQuery::applySettingsFromQuery(const ASTPtr & ast, ContextMuta if (!ast) return; + /// First apply the outermost settings. Then they could be overridden by deeper settings. + if (const auto * query_with_output = dynamic_cast(ast.get())) + { + if (query_with_output->settings_ast) + InterpreterSetQuery(query_with_output->settings_ast, context_).executeForCurrentContext(); + + if (const auto * create_query = ast->as(); create_query && create_query->select) + applySettingsFromSelectWithUnion(create_query->select->as(), context_); + } + if (const auto * select_query = ast->as()) { if (auto new_settings = select_query->settings()) @@ -71,19 +80,6 @@ void InterpreterSetQuery::applySettingsFromQuery(const ASTPtr & ast, ContextMuta applySettingsFromQuery(explain_query->getExplainedQuery(), context_); } - else if (const auto * query_with_output = dynamic_cast(ast.get())) - { - if (query_with_output->settings_ast) - InterpreterSetQuery(query_with_output->settings_ast, context_).executeForCurrentContext(); - - if (const auto * create_query = ast->as()) - { - if (create_query->select) - { - applySettingsFromSelectWithUnion(create_query->select->as(), context_); - } - } - } else if (auto * insert_query = ast->as()) { context_->setInsertFormat(insert_query->format); diff --git a/src/Parsers/ParserQueryWithOutput.cpp b/src/Parsers/ParserQueryWithOutput.cpp index 6d8a1258555..ac8f7d560e0 100644 --- a/src/Parsers/ParserQueryWithOutput.cpp +++ b/src/Parsers/ParserQueryWithOutput.cpp @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include @@ -198,16 +197,6 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (!parser_settings.parse(pos, query_with_output.settings_ast, expected)) return false; query_with_output.children.push_back(query_with_output.settings_ast); - - // SETTINGS after FORMAT is not parsed by the SELECT parser (ParserSelectQuery) - // Pass them manually, to apply in InterpreterSelectQuery::initSettings() - if (query->as()) - { - auto settings = query_with_output.settings_ast->clone(); - assert_cast(settings.get())->print_in_format = false; - QueryWithOutputSettingsPushDownVisitor::Data data{settings}; - QueryWithOutputSettingsPushDownVisitor(data).visit(query); - } } else break; From f5da9e424075fa755edd1a869e199f4861011be2 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 17 Jun 2024 07:01:44 +0200 Subject: [PATCH 003/180] Add a test --- ...QueryWithOutputSettingsPushDownVisitor.cpp | 56 ------------------- .../QueryWithOutputSettingsPushDownVisitor.h | 39 ------------- .../03172_format_settings_clauses.reference | 14 +++++ .../03172_format_settings_clauses.sql | 30 ++++++++++ 4 files changed, 44 insertions(+), 95 deletions(-) delete mode 100644 src/Parsers/QueryWithOutputSettingsPushDownVisitor.cpp delete mode 100644 src/Parsers/QueryWithOutputSettingsPushDownVisitor.h create mode 100644 tests/queries/0_stateless/03172_format_settings_clauses.reference create mode 100644 tests/queries/0_stateless/03172_format_settings_clauses.sql diff --git a/src/Parsers/QueryWithOutputSettingsPushDownVisitor.cpp b/src/Parsers/QueryWithOutputSettingsPushDownVisitor.cpp deleted file mode 100644 index 8cf0d0063ae..00000000000 --- a/src/Parsers/QueryWithOutputSettingsPushDownVisitor.cpp +++ /dev/null @@ -1,56 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#include -#include - -namespace DB -{ - -bool QueryWithOutputSettingsPushDownMatcher::needChildVisit(ASTPtr & node, const ASTPtr & child) -{ - if (node->as()) - return true; - if (node->as()) - return true; - if (child->as()) - return true; - return false; -} - -void QueryWithOutputSettingsPushDownMatcher::visit(ASTPtr & ast, Data & data) -{ - if (auto * select_query = ast->as()) - visit(*select_query, ast, data); -} - -void QueryWithOutputSettingsPushDownMatcher::visit(ASTSelectQuery & select_query, ASTPtr &, Data & data) -{ - ASTPtr select_settings_ast = select_query.settings(); - if (!select_settings_ast) - { - select_query.setExpression(ASTSelectQuery::Expression::SETTINGS, data.settings_ast->clone()); - return; - } - - SettingsChanges & select_settings = select_settings_ast->as().changes; - SettingsChanges & settings = data.settings_ast->as().changes; - - for (auto & setting : settings) - { - auto it = std::find_if(select_settings.begin(), select_settings.end(), [&](auto & select_setting) - { - return select_setting.name == setting.name; - }); - if (it == select_settings.end()) - select_settings.push_back(setting); - else - it->value = setting.value; - } -} - -} diff --git a/src/Parsers/QueryWithOutputSettingsPushDownVisitor.h b/src/Parsers/QueryWithOutputSettingsPushDownVisitor.h deleted file mode 100644 index fde8a07b555..00000000000 --- a/src/Parsers/QueryWithOutputSettingsPushDownVisitor.h +++ /dev/null @@ -1,39 +0,0 @@ -#pragma once - -#include -#include - -namespace DB -{ - -class ASTSelectQuery; -struct SettingChange; -class SettingsChanges; - -/// Pushdown SETTINGS clause that goes after FORMAT to the SELECT query: -/// (since settings after FORMAT parsed separately not in the ParserSelectQuery but in ParserQueryWithOutput) -/// -/// SELECT 1 FORMAT Null SETTINGS max_block_size = 1 -> -/// SELECT 1 SETTINGS max_block_size = 1 FORMAT Null SETTINGS max_block_size = 1 -/// -/// Otherwise settings after FORMAT will not be applied. -class QueryWithOutputSettingsPushDownMatcher -{ -public: - using Visitor = InDepthNodeVisitor; - - struct Data - { - const ASTPtr & settings_ast; - }; - - static bool needChildVisit(ASTPtr & node, const ASTPtr & child); - static void visit(ASTPtr & ast, Data & data); - -private: - static void visit(ASTSelectQuery &, ASTPtr &, Data &); -}; - -using QueryWithOutputSettingsPushDownVisitor = QueryWithOutputSettingsPushDownMatcher::Visitor; - -} diff --git a/tests/queries/0_stateless/03172_format_settings_clauses.reference b/tests/queries/0_stateless/03172_format_settings_clauses.reference new file mode 100644 index 00000000000..8a98b137f4b --- /dev/null +++ b/tests/queries/0_stateless/03172_format_settings_clauses.reference @@ -0,0 +1,14 @@ +1 +2 +1 +2 +1 +2 +1 +1 +3 +3 +3 +3 +3 +1 diff --git a/tests/queries/0_stateless/03172_format_settings_clauses.sql b/tests/queries/0_stateless/03172_format_settings_clauses.sql new file mode 100644 index 00000000000..0d1aa4dcfbb --- /dev/null +++ b/tests/queries/0_stateless/03172_format_settings_clauses.sql @@ -0,0 +1,30 @@ +SET max_block_size = 10, max_threads = 1; + +-- Take the following example: +SELECT 1 UNION ALL SELECT 2 FORMAT TSV; + +-- Each subquery can be put in parentheses and have its own settings: +(SELECT getSetting('max_block_size') SETTINGS max_block_size = 1) UNION ALL (SELECT getSetting('max_block_size') SETTINGS max_block_size = 2) FORMAT TSV; + +-- And the whole query can have settings: +(SELECT getSetting('max_block_size') SETTINGS max_block_size = 1) UNION ALL (SELECT getSetting('max_block_size') SETTINGS max_block_size = 2) FORMAT TSV SETTINGS max_block_size = 3; + +-- A single query with output is parsed in the same way as the UNION ALL chain: +SELECT getSetting('max_block_size') SETTINGS max_block_size = 1 FORMAT TSV SETTINGS max_block_size = 3; + +-- So while these forms have a slightly different meaning, they both exist: +SELECT getSetting('max_block_size') SETTINGS max_block_size = 1 FORMAT TSV; +SELECT getSetting('max_block_size') FORMAT TSV SETTINGS max_block_size = 3; + +-- And due to this effect, the users expect that the FORMAT and SETTINGS may go in an arbitrary order. +-- But while this work: +(SELECT getSetting('max_block_size')) UNION ALL (SELECT getSetting('max_block_size')) FORMAT TSV SETTINGS max_block_size = 3; + +-- This does not work automatically, unless we explicitly allow different orders: +(SELECT getSetting('max_block_size')) UNION ALL (SELECT getSetting('max_block_size')) SETTINGS max_block_size = 3 FORMAT TSV; + +-- Inevitably, we allow this: +SELECT getSetting('max_block_size') SETTINGS max_block_size = 1 SETTINGS max_block_size = 3 FORMAT TSV; +/*^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^*/ +-- Because this part is consumed into ASTSelectWithUnionQuery +-- and the rest into ASTQueryWithOutput. From 778807a8883901debc1bfeb72e17b37eb06bcaf0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 17 Jun 2024 08:22:36 +0200 Subject: [PATCH 004/180] Fix error; remove garbage --- src/Client/ClientBase.cpp | 33 +++------------------------------ 1 file changed, 3 insertions(+), 30 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 854cc3fef8b..ad4964b4b7c 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -60,6 +60,7 @@ #include #include #include +#include #include #include #include @@ -1937,41 +1938,13 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin global_context->setSettings(*old_settings); }); - auto apply_query_settings = [&](const IAST & settings_ast) - { - if (!old_settings) - old_settings.emplace(global_context->getSettingsRef()); - global_context->applySettingsChanges(settings_ast.as()->changes); - global_context->resetSettingsToDefaultValue(settings_ast.as()->default_settings); - }; - - const auto * insert = parsed_query->as(); - if (const auto * select = parsed_query->as(); select && select->settings()) - apply_query_settings(*select->settings()); - else if (const auto * select_with_union = parsed_query->as()) - { - const ASTs & children = select_with_union->list_of_selects->children; - if (!children.empty()) - { - // On the client it is enough to apply settings only for the - // last SELECT, since the only thing that is important to apply - // on the client is format settings. - const auto * last_select = children.back()->as(); - if (last_select && last_select->settings()) - { - apply_query_settings(*last_select->settings()); - } - } - } - else if (const auto * query_with_output = parsed_query->as(); query_with_output && query_with_output->settings_ast) - apply_query_settings(*query_with_output->settings_ast); - else if (insert && insert->settings_ast) - apply_query_settings(*insert->settings_ast); + InterpreterSetQuery::applySettingsFromQuery(parsed_query, global_context); if (!connection->checkConnected(connection_parameters.timeouts)) connect(); ASTPtr input_function; + const auto * insert = parsed_query->as(); if (insert && insert->select) insert->tryFindInputFunction(input_function); From 87a2e5f39018ef16ab6fdd79ad934dce6c45aaf0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 17 Jun 2024 08:30:55 +0200 Subject: [PATCH 005/180] Fix error --- src/Client/ClientBase.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index ad4964b4b7c..958a1f50813 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -1932,10 +1932,9 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin { /// Temporarily apply query settings to context. - std::optional old_settings; + Settings old_settings = global_context->getSettings(); SCOPE_EXIT_SAFE({ - if (old_settings) - global_context->setSettings(*old_settings); + global_context->setSettings(old_settings); }); InterpreterSetQuery::applySettingsFromQuery(parsed_query, global_context); From 67a539292e5e62f0bf470d11a5b224ef105bea02 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 17 Jun 2024 08:33:11 +0200 Subject: [PATCH 006/180] Update test --- tests/queries/0_stateless/01401_FORMAT_SETTINGS.reference | 4 ++-- tests/queries/0_stateless/01401_FORMAT_SETTINGS.sh | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/01401_FORMAT_SETTINGS.reference b/tests/queries/0_stateless/01401_FORMAT_SETTINGS.reference index 22405bf1866..a8b99666654 100644 --- a/tests/queries/0_stateless/01401_FORMAT_SETTINGS.reference +++ b/tests/queries/0_stateless/01401_FORMAT_SETTINGS.reference @@ -1,7 +1,7 @@ 1 1 1 -1 -1 +2 +1 2 2 diff --git a/tests/queries/0_stateless/01401_FORMAT_SETTINGS.sh b/tests/queries/0_stateless/01401_FORMAT_SETTINGS.sh index b70c28422c9..173cc949500 100755 --- a/tests/queries/0_stateless/01401_FORMAT_SETTINGS.sh +++ b/tests/queries/0_stateless/01401_FORMAT_SETTINGS.sh @@ -13,7 +13,7 @@ ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d 'SELECT DISTINCT blockSize() FROM ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d 'SELECT DISTINCT blockSize() FROM numbers(2) FORMAT CSV SETTINGS max_block_size = 1' # push down append ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d 'SELECT DISTINCT blockSize() FROM numbers(2) SETTINGS max_compress_block_size = 1 FORMAT CSV SETTINGS max_block_size = 1' -# overwrite on push down (since these settings goes latest) +# not overwrite on push down ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d 'SELECT DISTINCT blockSize() FROM numbers(2) SETTINGS max_block_size = 2 FORMAT CSV SETTINGS max_block_size = 1' # on push-down ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d 'SELECT DISTINCT blockSize() FROM numbers(2) SETTINGS max_block_size = 1 FORMAT CSV' From 4817657375a37c374eb4be72793cba434c16a815 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Mon, 17 Jun 2024 23:43:03 +0200 Subject: [PATCH 007/180] Revert "Revert "Fix AWS ECS"" --- contrib/aws-crt-cpp | 2 +- .../ProxyConfigurationResolverProvider.cpp | 35 +++++++------------ .../ProxyConfigurationResolverProvider.h | 5 ++- src/IO/ReadWriteBufferFromHTTP.cpp | 2 +- src/IO/S3/Client.cpp | 6 ++-- src/IO/S3/PocoHTTPClient.cpp | 20 +++++++++-- src/IO/S3/PocoHTTPClient.h | 14 ++++---- src/IO/S3/PocoHTTPClientFactory.cpp | 5 ++- .../0_stateless/03170_ecs_crash.reference | 4 +++ tests/queries/0_stateless/03170_ecs_crash.sh | 9 +++++ 10 files changed, 61 insertions(+), 41 deletions(-) create mode 100644 tests/queries/0_stateless/03170_ecs_crash.reference create mode 100755 tests/queries/0_stateless/03170_ecs_crash.sh diff --git a/contrib/aws-crt-cpp b/contrib/aws-crt-cpp index f532d6abc0d..0217761556a 160000 --- a/contrib/aws-crt-cpp +++ b/contrib/aws-crt-cpp @@ -1 +1 @@ -Subproject commit f532d6abc0d2b0d8b5d6fe9e7c51eaedbe4afbd0 +Subproject commit 0217761556a7ba7ec537fe933d0ab1159096746e diff --git a/src/Common/ProxyConfigurationResolverProvider.cpp b/src/Common/ProxyConfigurationResolverProvider.cpp index b06073121e7..a46837bfdb9 100644 --- a/src/Common/ProxyConfigurationResolverProvider.cpp +++ b/src/Common/ProxyConfigurationResolverProvider.cpp @@ -112,9 +112,8 @@ namespace return configuration.has(config_prefix + ".uri"); } - /* - * New syntax requires protocol prefix " or " - * */ + /* New syntax requires protocol prefix " or " + */ std::optional getProtocolPrefix( ProxyConfiguration::Protocol request_protocol, const String & config_prefix, @@ -130,22 +129,18 @@ namespace return protocol_prefix; } - template std::optional calculatePrefixBasedOnSettingsSyntax( + bool new_syntax, ProxyConfiguration::Protocol request_protocol, const String & config_prefix, const Poco::Util::AbstractConfiguration & configuration ) { if (!configuration.has(config_prefix)) - { return std::nullopt; - } - if constexpr (new_syntax) - { + if (new_syntax) return getProtocolPrefix(request_protocol, config_prefix, configuration); - } return config_prefix; } @@ -155,24 +150,21 @@ std::shared_ptr ProxyConfigurationResolverProvider:: Protocol request_protocol, const Poco::Util::AbstractConfiguration & configuration) { - if (auto resolver = getFromSettings(request_protocol, "proxy", configuration)) - { + if (auto resolver = getFromSettings(true, request_protocol, "proxy", configuration)) return resolver; - } return std::make_shared( request_protocol, isTunnelingDisabledForHTTPSRequestsOverHTTPProxy(configuration)); } -template std::shared_ptr ProxyConfigurationResolverProvider::getFromSettings( + bool new_syntax, Protocol request_protocol, const String & config_prefix, - const Poco::Util::AbstractConfiguration & configuration -) + const Poco::Util::AbstractConfiguration & configuration) { - auto prefix_opt = calculatePrefixBasedOnSettingsSyntax(request_protocol, config_prefix, configuration); + auto prefix_opt = calculatePrefixBasedOnSettingsSyntax(new_syntax, request_protocol, config_prefix, configuration); if (!prefix_opt) { @@ -195,20 +187,17 @@ std::shared_ptr ProxyConfigurationResolverProvider:: std::shared_ptr ProxyConfigurationResolverProvider::getFromOldSettingsFormat( Protocol request_protocol, const String & config_prefix, - const Poco::Util::AbstractConfiguration & configuration -) + const Poco::Util::AbstractConfiguration & configuration) { - /* - * First try to get it from settings only using the combination of config_prefix and configuration. + /* First try to get it from settings only using the combination of config_prefix and configuration. * This logic exists for backward compatibility with old S3 storage specific proxy configuration. * */ - if (auto resolver = ProxyConfigurationResolverProvider::getFromSettings(request_protocol, config_prefix + ".proxy", configuration)) + if (auto resolver = ProxyConfigurationResolverProvider::getFromSettings(false, request_protocol, config_prefix + ".proxy", configuration)) { return resolver; } - /* - * In case the combination of config_prefix and configuration does not provide a resolver, try to get it from general / new settings. + /* In case the combination of config_prefix and configuration does not provide a resolver, try to get it from general / new settings. * Falls back to Environment resolver if no configuration is found. * */ return ProxyConfigurationResolverProvider::get(request_protocol, configuration); diff --git a/src/Common/ProxyConfigurationResolverProvider.h b/src/Common/ProxyConfigurationResolverProvider.h index ebf22f7e92a..357b218e499 100644 --- a/src/Common/ProxyConfigurationResolverProvider.h +++ b/src/Common/ProxyConfigurationResolverProvider.h @@ -33,12 +33,11 @@ public: ); private: - template static std::shared_ptr getFromSettings( + bool is_new_syntax, Protocol protocol, const String & config_prefix, - const Poco::Util::AbstractConfiguration & configuration - ); + const Poco::Util::AbstractConfiguration & configuration); }; } diff --git a/src/IO/ReadWriteBufferFromHTTP.cpp b/src/IO/ReadWriteBufferFromHTTP.cpp index 303ffb744b5..4f883a9b4ed 100644 --- a/src/IO/ReadWriteBufferFromHTTP.cpp +++ b/src/IO/ReadWriteBufferFromHTTP.cpp @@ -221,7 +221,7 @@ ReadWriteBufferFromHTTP::ReadWriteBufferFromHTTP( if (iter == http_header_entries.end()) { - http_header_entries.emplace_back(user_agent, fmt::format("ClickHouse/{}", VERSION_STRING)); + http_header_entries.emplace_back(user_agent, fmt::format("ClickHouse/{}{}", VERSION_STRING, VERSION_OFFICIAL)); } if (!delay_initialization && use_external_buffer) diff --git a/src/IO/S3/Client.cpp b/src/IO/S3/Client.cpp index 9229342b8c1..cbb61deea9f 100644 --- a/src/IO/S3/Client.cpp +++ b/src/IO/S3/Client.cpp @@ -972,10 +972,10 @@ PocoHTTPClientConfiguration ClientFactory::createClientConfiguration( // NOLINT { auto context = Context::getGlobalContextInstance(); chassert(context); - auto proxy_configuration_resolver = DB::ProxyConfigurationResolverProvider::get(DB::ProxyConfiguration::protocolFromString(protocol), context->getConfigRef()); + auto proxy_configuration_resolver = ProxyConfigurationResolverProvider::get(ProxyConfiguration::protocolFromString(protocol), context->getConfigRef()); - auto per_request_configuration = [=] () { return proxy_configuration_resolver->resolve(); }; - auto error_report = [=] (const DB::ProxyConfiguration & req) { proxy_configuration_resolver->errorReport(req); }; + auto per_request_configuration = [=]{ return proxy_configuration_resolver->resolve(); }; + auto error_report = [=](const ProxyConfiguration & req) { proxy_configuration_resolver->errorReport(req); }; auto config = PocoHTTPClientConfiguration( per_request_configuration, diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp index 1cef43530e0..04982f14f36 100644 --- a/src/IO/S3/PocoHTTPClient.cpp +++ b/src/IO/S3/PocoHTTPClient.cpp @@ -1,4 +1,5 @@ #include +#include #include "config.h" #if USE_AWS_S3 @@ -17,6 +18,7 @@ #include #include #include +#include #include #include @@ -29,6 +31,7 @@ #include + static const int SUCCESS_RESPONSE_MIN = 200; static const int SUCCESS_RESPONSE_MAX = 299; @@ -84,7 +87,7 @@ namespace DB::S3 { PocoHTTPClientConfiguration::PocoHTTPClientConfiguration( - std::function per_request_configuration_, + std::function per_request_configuration_, const String & force_region_, const RemoteHostFilter & remote_host_filter_, unsigned int s3_max_redirects_, @@ -94,7 +97,7 @@ PocoHTTPClientConfiguration::PocoHTTPClientConfiguration( bool s3_use_adaptive_timeouts_, const ThrottlerPtr & get_request_throttler_, const ThrottlerPtr & put_request_throttler_, - std::function error_report_) + std::function error_report_) : per_request_configuration(per_request_configuration_) , force_region(force_region_) , remote_host_filter(remote_host_filter_) @@ -107,6 +110,8 @@ PocoHTTPClientConfiguration::PocoHTTPClientConfiguration( , s3_use_adaptive_timeouts(s3_use_adaptive_timeouts_) , error_report(error_report_) { + /// This is used to identify configurations created by us. + userAgent = std::string(VERSION_FULL) + VERSION_OFFICIAL; } void PocoHTTPClientConfiguration::updateSchemeAndRegion() @@ -166,6 +171,17 @@ PocoHTTPClient::PocoHTTPClient(const PocoHTTPClientConfiguration & client_config { } +PocoHTTPClient::PocoHTTPClient(const Aws::Client::ClientConfiguration & client_configuration) + : timeouts(ConnectionTimeouts() + .withConnectionTimeout(Poco::Timespan(client_configuration.connectTimeoutMs * 1000)) + .withSendTimeout(Poco::Timespan(client_configuration.requestTimeoutMs * 1000)) + .withReceiveTimeout(Poco::Timespan(client_configuration.requestTimeoutMs * 1000)) + .withTCPKeepAliveTimeout(Poco::Timespan( + client_configuration.enableTcpKeepAlive ? client_configuration.tcpKeepAliveIntervalMs * 1000 : 0))), + remote_host_filter(Context::getGlobalContextInstance()->getRemoteHostFilter()) +{ +} + std::shared_ptr PocoHTTPClient::MakeRequest( const std::shared_ptr & request, Aws::Utils::RateLimits::RateLimiterInterface * readLimiter, diff --git a/src/IO/S3/PocoHTTPClient.h b/src/IO/S3/PocoHTTPClient.h index 88251b964e2..18a21649167 100644 --- a/src/IO/S3/PocoHTTPClient.h +++ b/src/IO/S3/PocoHTTPClient.h @@ -38,7 +38,7 @@ class PocoHTTPClient; struct PocoHTTPClientConfiguration : public Aws::Client::ClientConfiguration { - std::function per_request_configuration; + std::function per_request_configuration; String force_region; const RemoteHostFilter & remote_host_filter; unsigned int s3_max_redirects; @@ -54,13 +54,13 @@ struct PocoHTTPClientConfiguration : public Aws::Client::ClientConfiguration size_t http_keep_alive_timeout = DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT; size_t http_keep_alive_max_requests = DEFAULT_HTTP_KEEP_ALIVE_MAX_REQUEST; - std::function error_report; + std::function error_report; void updateSchemeAndRegion(); private: PocoHTTPClientConfiguration( - std::function per_request_configuration_, + std::function per_request_configuration_, const String & force_region_, const RemoteHostFilter & remote_host_filter_, unsigned int s3_max_redirects_, @@ -70,8 +70,7 @@ private: bool s3_use_adaptive_timeouts_, const ThrottlerPtr & get_request_throttler_, const ThrottlerPtr & put_request_throttler_, - std::function error_report_ - ); + std::function error_report_); /// Constructor of Aws::Client::ClientConfiguration must be called after AWS SDK initialization. friend ClientFactory; @@ -120,6 +119,7 @@ class PocoHTTPClient : public Aws::Http::HttpClient { public: explicit PocoHTTPClient(const PocoHTTPClientConfiguration & client_configuration); + explicit PocoHTTPClient(const Aws::Client::ClientConfiguration & client_configuration); ~PocoHTTPClient() override = default; std::shared_ptr MakeRequest( @@ -166,8 +166,8 @@ protected: static S3MetricKind getMetricKind(const Aws::Http::HttpRequest & request); void addMetric(const Aws::Http::HttpRequest & request, S3MetricType type, ProfileEvents::Count amount = 1) const; - std::function per_request_configuration; - std::function error_report; + std::function per_request_configuration; + std::function error_report; ConnectionTimeouts timeouts; const RemoteHostFilter & remote_host_filter; unsigned int s3_max_redirects; diff --git a/src/IO/S3/PocoHTTPClientFactory.cpp b/src/IO/S3/PocoHTTPClientFactory.cpp index ef7af2d01ba..abec907778c 100644 --- a/src/IO/S3/PocoHTTPClientFactory.cpp +++ b/src/IO/S3/PocoHTTPClientFactory.cpp @@ -15,7 +15,10 @@ namespace DB::S3 std::shared_ptr PocoHTTPClientFactory::CreateHttpClient(const Aws::Client::ClientConfiguration & client_configuration) const { - return std::make_shared(static_cast(client_configuration)); + if (client_configuration.userAgent.starts_with("ClickHouse")) + return std::make_shared(static_cast(client_configuration)); + else /// This client is created inside the AWS SDK with default settings to obtain ECS credentials from localhost. + return std::make_shared(client_configuration); } std::shared_ptr PocoHTTPClientFactory::CreateHttpRequest( diff --git a/tests/queries/0_stateless/03170_ecs_crash.reference b/tests/queries/0_stateless/03170_ecs_crash.reference new file mode 100644 index 00000000000..acd7c60768b --- /dev/null +++ b/tests/queries/0_stateless/03170_ecs_crash.reference @@ -0,0 +1,4 @@ +1 2 3 +4 5 6 +7 8 9 +0 0 0 diff --git a/tests/queries/0_stateless/03170_ecs_crash.sh b/tests/queries/0_stateless/03170_ecs_crash.sh new file mode 100755 index 00000000000..fa6870c4cf2 --- /dev/null +++ b/tests/queries/0_stateless/03170_ecs_crash.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# Previous versions crashed in attempt to use this authentication method (regardless of whether it was able to authenticate): +AWS_CONTAINER_CREDENTIALS_FULL_URI=http://localhost:1338/latest/meta-data/container/security-credentials $CLICKHOUSE_LOCAL -q "select * from s3('http://localhost:11111/test/a.tsv')" From 1df895f3dadcbb65d246927b79f42144e5fc1af2 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 18 Jun 2024 14:04:40 +0200 Subject: [PATCH 008/180] Minor changes --- src/Access/SettingsConstraints.cpp | 8 ++++---- src/Interpreters/InterpreterSetQuery.cpp | 10 +++++----- src/Interpreters/InterpreterSetQuery.h | 2 +- .../03003_compatibility_setting_bad_value.sql | 1 - 4 files changed, 10 insertions(+), 11 deletions(-) diff --git a/src/Access/SettingsConstraints.cpp b/src/Access/SettingsConstraints.cpp index a274f6b54f2..7506e365035 100644 --- a/src/Access/SettingsConstraints.cpp +++ b/src/Access/SettingsConstraints.cpp @@ -219,8 +219,8 @@ void SettingsConstraints::clamp(const Settings & current_settings, SettingsChang }); } -template -bool getNewValueToCheck(const T & current_settings, SettingChange & change, Field & new_value, bool throw_on_failure) +template +bool getNewValueToCheck(const SettingsT & current_settings, SettingChange & change, Field & new_value, bool throw_on_failure) { Field current_value; bool has_current_value = current_settings.tryGet(change.name, current_value); @@ -230,12 +230,12 @@ bool getNewValueToCheck(const T & current_settings, SettingChange & change, Fiel return false; if (throw_on_failure) - new_value = T::castValueUtil(change.name, change.value); + new_value = SettingsT::castValueUtil(change.name, change.value); else { try { - new_value = T::castValueUtil(change.name, change.value); + new_value = SettingsT::castValueUtil(change.name, change.value); } catch (...) { diff --git a/src/Interpreters/InterpreterSetQuery.cpp b/src/Interpreters/InterpreterSetQuery.cpp index cac44c7747b..15d4ba56d8d 100644 --- a/src/Interpreters/InterpreterSetQuery.cpp +++ b/src/Interpreters/InterpreterSetQuery.cpp @@ -46,7 +46,7 @@ static void applySettingsFromSelectWithUnion(const ASTSelectWithUnionQuery & sel // It is flattened later, when we process UNION ALL/DISTINCT. const auto * last_select = children.back()->as(); if (last_select && last_select->settings()) - InterpreterSetQuery(last_select->settings(), context).executeForCurrentContext(); + InterpreterSetQuery(last_select->settings(), context).executeForCurrentContext(false); } void InterpreterSetQuery::applySettingsFromQuery(const ASTPtr & ast, ContextMutablePtr context_) @@ -58,7 +58,7 @@ void InterpreterSetQuery::applySettingsFromQuery(const ASTPtr & ast, ContextMuta if (const auto * query_with_output = dynamic_cast(ast.get())) { if (query_with_output->settings_ast) - InterpreterSetQuery(query_with_output->settings_ast, context_).executeForCurrentContext(); + InterpreterSetQuery(query_with_output->settings_ast, context_).executeForCurrentContext(false); if (const auto * create_query = ast->as(); create_query && create_query->select) applySettingsFromSelectWithUnion(create_query->select->as(), context_); @@ -67,7 +67,7 @@ void InterpreterSetQuery::applySettingsFromQuery(const ASTPtr & ast, ContextMuta if (const auto * select_query = ast->as()) { if (auto new_settings = select_query->settings()) - InterpreterSetQuery(new_settings, context_).executeForCurrentContext(); + InterpreterSetQuery(new_settings, context_).executeForCurrentContext(false); } else if (const auto * select_with_union_query = ast->as()) { @@ -76,7 +76,7 @@ void InterpreterSetQuery::applySettingsFromQuery(const ASTPtr & ast, ContextMuta else if (const auto * explain_query = ast->as()) { if (explain_query->settings_ast) - InterpreterSetQuery(explain_query->settings_ast, context_).executeForCurrentContext(); + InterpreterSetQuery(explain_query->settings_ast, context_).executeForCurrentContext(false); applySettingsFromQuery(explain_query->getExplainedQuery(), context_); } @@ -84,7 +84,7 @@ void InterpreterSetQuery::applySettingsFromQuery(const ASTPtr & ast, ContextMuta { context_->setInsertFormat(insert_query->format); if (insert_query->settings_ast) - InterpreterSetQuery(insert_query->settings_ast, context_).executeForCurrentContext(); + InterpreterSetQuery(insert_query->settings_ast, context_).executeForCurrentContext(false); } } diff --git a/src/Interpreters/InterpreterSetQuery.h b/src/Interpreters/InterpreterSetQuery.h index 2438762f347..f50105c39f4 100644 --- a/src/Interpreters/InterpreterSetQuery.h +++ b/src/Interpreters/InterpreterSetQuery.h @@ -23,7 +23,7 @@ public: /** Set setting for current context (query context). * It is used for interpretation of SETTINGS clause in SELECT query. */ - void executeForCurrentContext(bool ignore_setting_constraints = false); + void executeForCurrentContext(bool ignore_setting_constraints); bool supportsTransactions() const override { return true; } diff --git a/tests/queries/0_stateless/03003_compatibility_setting_bad_value.sql b/tests/queries/0_stateless/03003_compatibility_setting_bad_value.sql index 48e98798c51..b9fbfd917fc 100644 --- a/tests/queries/0_stateless/03003_compatibility_setting_bad_value.sql +++ b/tests/queries/0_stateless/03003_compatibility_setting_bad_value.sql @@ -1,2 +1 @@ select 42 settings compatibility=NULL; -- {clientError BAD_ARGUMENTS} - From 11d54f4809a8b58773f13664e6b842cb6c7dce48 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 18 Jun 2024 15:18:53 +0200 Subject: [PATCH 009/180] Pass-through settings from the client --- programs/client/Client.cpp | 3 +++ programs/server/Server.cpp | 2 +- src/Access/AccessControl.cpp | 8 +++++++- src/Access/AccessControl.h | 5 ++++- src/Client/ClientBase.cpp | 1 - .../0_stateless/03003_compatibility_setting_bad_value.sql | 2 +- 6 files changed, 16 insertions(+), 5 deletions(-) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index efe23d57478..22a035fbd71 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -1184,6 +1184,9 @@ void Client::processConfig() global_context->setQueryKindInitial(); global_context->setQuotaClientKey(config().getString("quota_key", "")); global_context->setQueryKind(query_kind); + + /// Allow to pass-through unknown settings to the server. + global_context->getAccessControl().allowAllSettings(); } diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 6414f7f6ea5..2f1d07790e1 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1824,7 +1824,7 @@ try auto & access_control = global_context->getAccessControl(); try { - access_control.setUpFromMainConfig(config(), config_path, [&] { return global_context->getZooKeeper(); }); + access_control.setupFromMainConfig(config(), config_path, [&] { return global_context->getZooKeeper(); }); } catch (...) { diff --git a/src/Access/AccessControl.cpp b/src/Access/AccessControl.cpp index c3bb42160ad..9831621d6ac 100644 --- a/src/Access/AccessControl.cpp +++ b/src/Access/AccessControl.cpp @@ -264,7 +264,7 @@ AccessControl::AccessControl() AccessControl::~AccessControl() = default; -void AccessControl::setUpFromMainConfig(const Poco::Util::AbstractConfiguration & config_, const String & config_path_, +void AccessControl::setupFromMainConfig(const Poco::Util::AbstractConfiguration & config_, const String & config_path_, const zkutil::GetZooKeeper & get_zookeeper_function_) { if (config_.has("custom_settings_prefixes")) @@ -852,4 +852,10 @@ const ExternalAuthenticators & AccessControl::getExternalAuthenticators() const return *external_authenticators; } + +void AccessControl::allowAllSettings() +{ + custom_settings_prefixes->registerPrefixes({""}); +} + } diff --git a/src/Access/AccessControl.h b/src/Access/AccessControl.h index d1537219a06..f408f6dfb0d 100644 --- a/src/Access/AccessControl.h +++ b/src/Access/AccessControl.h @@ -54,7 +54,7 @@ public: ~AccessControl() override; /// Initializes access storage (user directories). - void setUpFromMainConfig(const Poco::Util::AbstractConfiguration & config_, const String & config_path_, + void setupFromMainConfig(const Poco::Util::AbstractConfiguration & config_, const String & config_path_, const zkutil::GetZooKeeper & get_zookeeper_function_); /// Parses access entities from a configuration loaded from users.xml. @@ -235,6 +235,9 @@ public: /// Gets manager of notifications. AccessChangesNotifier & getChangesNotifier(); + /// Allow all setting names - this can be used in clients to pass-through unknown settings to the server. + void allowAllSettings(); + private: class ContextAccessCache; class CustomSettingsPrefixes; diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 958a1f50813..617a56cfd95 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -2958,7 +2958,6 @@ void ClientBase::init(int argc, char ** argv) boost::replace_all(arg, "−", "--"); } - OptionsDescription options_description; options_description.main_description.emplace(createOptionsDescription("Main options", terminal_width)); diff --git a/tests/queries/0_stateless/03003_compatibility_setting_bad_value.sql b/tests/queries/0_stateless/03003_compatibility_setting_bad_value.sql index b9fbfd917fc..3a09eec7452 100644 --- a/tests/queries/0_stateless/03003_compatibility_setting_bad_value.sql +++ b/tests/queries/0_stateless/03003_compatibility_setting_bad_value.sql @@ -1 +1 @@ -select 42 settings compatibility=NULL; -- {clientError BAD_ARGUMENTS} +select 42 settings compatibility=NULL; -- {clientError BAD_GET} From 16c3e36b5a2f3203eb87161f4320ea5e70865fc4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 19 Jun 2024 22:50:52 +0200 Subject: [PATCH 010/180] Simplification --- src/Client/ClientBase.cpp | 142 +++++++++++++++++------------------ src/Client/ClientBase.h | 1 + src/Interpreters/Session.cpp | 8 +- src/Interpreters/Session.h | 3 +- 4 files changed, 74 insertions(+), 80 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 617a56cfd95..490a560de2d 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -439,7 +439,7 @@ void ClientBase::sendExternalTables(ASTPtr parsed_query) std::vector data; for (auto & table : external_tables) - data.emplace_back(table.getData(global_context)); + data.emplace_back(table.getData(query_context)); connection->sendExternalTablesData(data); } @@ -652,10 +652,10 @@ try /// intermixed with data with parallel formatting. /// It may increase code complexity significantly. if (!extras_into_stdout || select_only_into_file) - output_format = global_context->getOutputFormatParallelIfPossible( + output_format = query_context->getOutputFormatParallelIfPossible( current_format, out_file_buf ? *out_file_buf : *out_buf, block); else - output_format = global_context->getOutputFormat( + output_format = query_context->getOutputFormat( current_format, out_file_buf ? *out_file_buf : *out_buf, block); output_format->setAutoFlush(); @@ -949,7 +949,7 @@ void ClientBase::processTextAsSingleQuery(const String & full_query) /// But for asynchronous inserts we don't extract data, because it's needed /// to be done on server side in that case (for coalescing the data from multiple inserts on server side). const auto * insert = parsed_query->as(); - if (insert && isSyncInsertWithData(*insert, global_context)) + if (insert && isSyncInsertWithData(*insert, query_context)) query_to_execute = full_query.substr(0, insert->data - full_query.data()); else query_to_execute = full_query; @@ -1067,7 +1067,7 @@ void ClientBase::processOrdinaryQuery(const String & query_to_execute, ASTPtr pa } } - const auto & settings = global_context->getSettingsRef(); + const auto & settings = query_context->getSettingsRef(); const Int32 signals_before_stop = settings.partial_result_on_first_cancel ? 2 : 1; int retries_left = 10; @@ -1082,10 +1082,10 @@ void ClientBase::processOrdinaryQuery(const String & query_to_execute, ASTPtr pa connection_parameters.timeouts, query, query_parameters, - global_context->getCurrentQueryId(), + query_context->getCurrentQueryId(), query_processing_stage, - &global_context->getSettingsRef(), - &global_context->getClientInfo(), + &query_context->getSettingsRef(), + &query_context->getClientInfo(), true, [&](const Progress & progress) { onProgress(progress); }); @@ -1275,7 +1275,7 @@ void ClientBase::onProgress(const Progress & value) void ClientBase::onTimezoneUpdate(const String & tz) { - global_context->setSetting("session_timezone", tz); + query_context->setSetting("session_timezone", tz); } @@ -1471,13 +1471,13 @@ bool ClientBase::receiveSampleBlock(Block & out, ColumnsDescription & columns_de void ClientBase::setInsertionTable(const ASTInsertQuery & insert_query) { - if (!global_context->hasInsertionTable() && insert_query.table) + if (!query_context->hasInsertionTable() && insert_query.table) { String table = insert_query.table->as().shortName(); if (!table.empty()) { String database = insert_query.database ? insert_query.database->as().shortName() : ""; - global_context->setInsertionTable(StorageID(database, table)); + query_context->setInsertionTable(StorageID(database, table)); } } } @@ -1528,7 +1528,7 @@ void ClientBase::processInsertQuery(const String & query_to_execute, ASTPtr pars const auto & parsed_insert_query = parsed_query->as(); if ((!parsed_insert_query.data && !parsed_insert_query.infile) && (is_interactive || (!stdin_is_a_tty && !isStdinNotEmptyAndValid(std_in)))) { - const auto & settings = global_context->getSettingsRef(); + const auto & settings = query_context->getSettingsRef(); if (settings.throw_if_no_data_to_insert) throw Exception(ErrorCodes::NO_DATA_TO_INSERT, "No data to insert"); else @@ -1542,10 +1542,10 @@ void ClientBase::processInsertQuery(const String & query_to_execute, ASTPtr pars connection_parameters.timeouts, query, query_parameters, - global_context->getCurrentQueryId(), + query_context->getCurrentQueryId(), query_processing_stage, - &global_context->getSettingsRef(), - &global_context->getClientInfo(), + &query_context->getSettingsRef(), + &query_context->getClientInfo(), true, [&](const Progress & progress) { onProgress(progress); }); @@ -1593,7 +1593,7 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des /// Set callback to be called on file progress. if (tty_buf) - progress_indication.setFileProgressCallback(global_context, *tty_buf); + progress_indication.setFileProgressCallback(query_context, *tty_buf); } /// If data fetched from file (maybe compressed file) @@ -1627,10 +1627,10 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des } StorageFile::CommonArguments args{ - WithContext(global_context), + WithContext(query_context), parsed_insert_query->table_id, current_format, - getFormatSettings(global_context), + getFormatSettings(query_context), compression_method, columns_for_storage_file, ConstraintsDescription{}, @@ -1638,7 +1638,7 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des {}, String{}, }; - StoragePtr storage = std::make_shared(in_file, global_context->getUserFilesPath(), args); + StoragePtr storage = std::make_shared(in_file, query_context->getUserFilesPath(), args); storage->startup(); SelectQueryInfo query_info; @@ -1647,18 +1647,18 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des auto metadata = storage->getInMemoryMetadataPtr(); QueryPlan plan; storage->read( - plan, - sample.getNames(), - storage->getStorageSnapshot(metadata, global_context), - query_info, - global_context, - {}, - global_context->getSettingsRef().max_block_size, - getNumberOfPhysicalCPUCores()); + plan, + sample.getNames(), + storage->getStorageSnapshot(metadata, query_context), + query_info, + query_context, + {}, + query_context->getSettingsRef().max_block_size, + getNumberOfPhysicalCPUCores()); auto builder = plan.buildQueryPipeline( - QueryPlanOptimizationSettings::fromContext(global_context), - BuildQueryPipelineSettings::fromContext(global_context)); + QueryPlanOptimizationSettings::fromContext(query_context), + BuildQueryPipelineSettings::fromContext(query_context)); QueryPlanResourceHolder resources; auto pipe = QueryPipelineBuilder::getPipe(std::move(*builder), resources); @@ -1719,14 +1719,14 @@ void ClientBase::sendDataFrom(ReadBuffer & buf, Block & sample, const ColumnsDes current_format = insert->format; } - auto source = global_context->getInputFormat(current_format, buf, sample, insert_format_max_block_size); + auto source = query_context->getInputFormat(current_format, buf, sample, insert_format_max_block_size); Pipe pipe(source); if (columns_description.hasDefaults()) { pipe.addSimpleTransform([&](const Block & header) { - return std::make_shared(header, columns_description, *source, global_context); + return std::make_shared(header, columns_description, *source, query_context); }); } @@ -1872,6 +1872,9 @@ void ClientBase::cancelQuery() void ClientBase::processParsedSingleQuery(const String & full_query, const String & query_to_execute, ASTPtr parsed_query, std::optional echo_query_, bool report_error) { + query_context = Context::createCopy(global_context); + query_context->makeQueryContext(); + resetOutput(); have_error = false; cancelled = false; @@ -1888,12 +1891,12 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin if (is_interactive) { - global_context->setCurrentQueryId(""); + query_context->setCurrentQueryId(""); // Generate a new query_id for (const auto & query_id_format : query_id_formats) { writeString(query_id_format.first, std_out); - writeString(fmt::format(fmt::runtime(query_id_format.second), fmt::arg("query_id", global_context->getCurrentQueryId())), std_out); + writeString(fmt::format(fmt::runtime(query_id_format.second), fmt::arg("query_id", query_context->getCurrentQueryId())), std_out); writeChar('\n', std_out); std_out.next(); } @@ -1920,7 +1923,7 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin auto password = auth_data->getPassword(); if (password) - global_context->getAccessControl().checkPasswordComplexityRules(*password); + query_context->getAccessControl().checkPasswordComplexityRules(*password); } } } @@ -1930,47 +1933,40 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin progress_indication.resetProgress(); profile_events.watch.restart(); + /// Apply query settings to context, as they can affect the behavior on client-side. + InterpreterSetQuery::applySettingsFromQuery(parsed_query, query_context); + + if (!connection->checkConnected(connection_parameters.timeouts)) + connect(); + + ASTPtr input_function; + const auto * insert = parsed_query->as(); + if (insert && insert->select) + insert->tryFindInputFunction(input_function); + + bool is_async_insert_with_inlined_data = query_context->getSettingsRef().async_insert && insert && insert->hasInlinedData(); + + if (is_async_insert_with_inlined_data) { - /// Temporarily apply query settings to context. - Settings old_settings = global_context->getSettings(); - SCOPE_EXIT_SAFE({ - global_context->setSettings(old_settings); - }); + bool have_data_in_stdin = !is_interactive && !stdin_is_a_tty && isStdinNotEmptyAndValid(std_in); + bool have_external_data = have_data_in_stdin || insert->infile; - InterpreterSetQuery::applySettingsFromQuery(parsed_query, global_context); - - if (!connection->checkConnected(connection_parameters.timeouts)) - connect(); - - ASTPtr input_function; - const auto * insert = parsed_query->as(); - if (insert && insert->select) - insert->tryFindInputFunction(input_function); - - bool is_async_insert_with_inlined_data = global_context->getSettingsRef().async_insert && insert && insert->hasInlinedData(); - - if (is_async_insert_with_inlined_data) - { - bool have_data_in_stdin = !is_interactive && !stdin_is_a_tty && isStdinNotEmptyAndValid(std_in); - bool have_external_data = have_data_in_stdin || insert->infile; - - if (have_external_data) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, - "Processing async inserts with both inlined and external data (from stdin or infile) is not supported"); - } - - /// INSERT query for which data transfer is needed (not an INSERT SELECT or input()) is processed separately. - if (insert && (!insert->select || input_function) && !is_async_insert_with_inlined_data) - { - if (input_function && insert->format.empty()) - throw Exception(ErrorCodes::INVALID_USAGE_OF_INPUT, "FORMAT must be specified for function input()"); - - processInsertQuery(query_to_execute, parsed_query); - } - else - processOrdinaryQuery(query_to_execute, parsed_query); + if (have_external_data) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "Processing async inserts with both inlined and external data (from stdin or infile) is not supported"); } + /// INSERT query for which data transfer is needed (not an INSERT SELECT or input()) is processed separately. + if (insert && (!insert->select || input_function) && !is_async_insert_with_inlined_data) + { + if (input_function && insert->format.empty()) + throw Exception(ErrorCodes::INVALID_USAGE_OF_INPUT, "FORMAT must be specified for function input()"); + + processInsertQuery(query_to_execute, parsed_query); + } + else + processOrdinaryQuery(query_to_execute, parsed_query); + /// Do not change context (current DB, settings) in case of an exception. if (!have_error) { @@ -2651,10 +2647,8 @@ bool ClientBase::processMultiQueryFromFile(const String & file_name) if (!has_log_comment) { - Settings settings = global_context->getSettings(); /// NOTE: cannot use even weakly_canonical() since it fails for /dev/stdin due to resolving of "pipe:[X]" - settings.log_comment = fs::absolute(fs::path(file_name)); - global_context->setSettings(settings); + global_context->setSetting("log_comment", String(fs::absolute(fs::path(file_name)))); } return executeMultiQuery(queries_from_file); diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index 220fcddc038..228a9d65ea7 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -198,6 +198,7 @@ protected: /// since other members can use them. SharedContextHolder shared_context; ContextMutablePtr global_context; + ContextMutablePtr query_context; bool is_interactive = false; /// Use either interactive line editing interface or batch mode. bool is_multiquery = false; diff --git a/src/Interpreters/Session.cpp b/src/Interpreters/Session.cpp index 396562189e0..9dd686290db 100644 --- a/src/Interpreters/Session.cpp +++ b/src/Interpreters/Session.cpp @@ -532,7 +532,7 @@ ContextMutablePtr Session::makeSessionContext() session_context->checkSettingsConstraints(settings_from_auth_server, SettingSource::QUERY); session_context->applySettingsChanges(settings_from_auth_server); - recordLoginSucess(session_context); + recordLoginSuccess(session_context); return session_context; } @@ -596,7 +596,7 @@ ContextMutablePtr Session::makeSessionContext(const String & session_name_, std: { session_name_ }, max_sessions_for_user); - recordLoginSucess(session_context); + recordLoginSuccess(session_context); return session_context; } @@ -672,13 +672,13 @@ ContextMutablePtr Session::makeQueryContextImpl(const ClientInfo * client_info_t user = query_context->getUser(); /// Interserver does not create session context - recordLoginSucess(query_context); + recordLoginSuccess(query_context); return query_context; } -void Session::recordLoginSucess(ContextPtr login_context) const +void Session::recordLoginSuccess(ContextPtr login_context) const { if (notified_session_log_about_login) return; diff --git a/src/Interpreters/Session.h b/src/Interpreters/Session.h index 14f6f806acd..fc41c78e666 100644 --- a/src/Interpreters/Session.h +++ b/src/Interpreters/Session.h @@ -102,8 +102,7 @@ public: private: std::shared_ptr getSessionLog() const; ContextMutablePtr makeQueryContextImpl(const ClientInfo * client_info_to_copy, ClientInfo * client_info_to_move) const; - void recordLoginSucess(ContextPtr login_context) const; - + void recordLoginSuccess(ContextPtr login_context) const; mutable bool notified_session_log_about_login = false; const UUID auth_id; From fa5d4cfea183c64e3ff088f922c0960a3c3951e0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 20 Jun 2024 00:41:14 +0200 Subject: [PATCH 011/180] Fix error --- src/Client/ClientBase.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 490a560de2d..8fcb9632be0 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -949,7 +949,7 @@ void ClientBase::processTextAsSingleQuery(const String & full_query) /// But for asynchronous inserts we don't extract data, because it's needed /// to be done on server side in that case (for coalescing the data from multiple inserts on server side). const auto * insert = parsed_query->as(); - if (insert && isSyncInsertWithData(*insert, query_context)) + if (insert && isSyncInsertWithData(*insert, global_context)) query_to_execute = full_query.substr(0, insert->data - full_query.data()); else query_to_execute = full_query; From e064171a68fcac110d27ff36a51b7a6bb4fbb251 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 23 Jun 2024 03:58:11 +0200 Subject: [PATCH 012/180] Fix errors --- programs/client/Client.cpp | 10 ---------- programs/local/LocalServer.cpp | 3 --- src/Client/ClientBase.cpp | 4 +++- src/Client/LocalConnection.cpp | 1 - src/Client/LocalConnection.h | 2 -- .../0_stateless/00857_global_joinsavel_table_alias.sql | 1 - 6 files changed, 3 insertions(+), 18 deletions(-) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 22a035fbd71..ab02d9fac74 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -19,7 +19,6 @@ #include #include -#include #include #include #include @@ -311,7 +310,6 @@ int Client::main(const std::vector & /*args*/) try { UseSSL use_ssl; - auto & thread_status = MainThreadStatus::getInstance(); setupSignalHandler(); std::cout << std::fixed << std::setprecision(3); @@ -326,14 +324,6 @@ try initTTYBuffer(toProgressOption(config().getString("progress", "default"))); ASTAlterCommand::setFormatAlterCommandsWithParentheses(true); - { - // All that just to set DB::CurrentThread::get().getGlobalContext() - // which is required for client timezone (pushed from server) to work. - auto thread_group = std::make_shared(); - const_cast(thread_group->global_context) = global_context; - thread_status.attachToGroup(thread_group, false); - } - /// Includes delayed_interactive. if (is_interactive) { diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index cb1c35743b2..e5f4bac852c 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -27,10 +27,8 @@ #include #include #include -#include #include #include -#include #include #include #include @@ -48,7 +46,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 38aa0ed8b14..03f088f2b61 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -1872,7 +1872,7 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin ASTPtr parsed_query, std::optional echo_query_, bool report_error) { query_context = Context::createCopy(global_context); - query_context->makeQueryContext(); + CurrentThread::QueryScope query_scope(query_context); resetOutput(); have_error = false; @@ -2926,6 +2926,8 @@ void ClientBase::init(int argc, char ** argv) /// Don't parse options with Poco library, we prefer neat boost::program_options. stopOptionsProcessing(); + MainThreadStatus::getInstance(); + stdin_is_a_tty = isatty(STDIN_FILENO); stdout_is_a_tty = isatty(STDOUT_FILENO); stderr_is_a_tty = isatty(STDERR_FILENO); diff --git a/src/Client/LocalConnection.cpp b/src/Client/LocalConnection.cpp index c7494e31605..e63e5793505 100644 --- a/src/Client/LocalConnection.cpp +++ b/src/Client/LocalConnection.cpp @@ -125,7 +125,6 @@ void LocalConnection::sendQuery( state->query_id = query_id; state->query = query; - state->query_scope_holder = std::make_unique(query_context); state->stage = QueryProcessingStage::Enum(stage); state->profile_queue = std::make_shared(std::numeric_limits::max()); CurrentThread::attachInternalProfileEventsQueue(state->profile_queue); diff --git a/src/Client/LocalConnection.h b/src/Client/LocalConnection.h index 899d134cce5..bdd0b481529 100644 --- a/src/Client/LocalConnection.h +++ b/src/Client/LocalConnection.h @@ -61,8 +61,6 @@ struct LocalQueryState /// Time after the last check to stop the request and send the progress. Stopwatch after_send_progress; Stopwatch after_send_profile_events; - - std::unique_ptr query_scope_holder; }; diff --git a/tests/queries/0_stateless/00857_global_joinsavel_table_alias.sql b/tests/queries/0_stateless/00857_global_joinsavel_table_alias.sql index 2044a9b8d22..092b071cb48 100644 --- a/tests/queries/0_stateless/00857_global_joinsavel_table_alias.sql +++ b/tests/queries/0_stateless/00857_global_joinsavel_table_alias.sql @@ -1,4 +1,3 @@ - DROP TABLE IF EXISTS local_table; DROP TABLE IF EXISTS other_table; From 84f81c61853f34d765475309932d73af55e25d0f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 23 Jun 2024 17:18:32 +0200 Subject: [PATCH 013/180] Fix error --- src/Client/Suggest.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Client/Suggest.cpp b/src/Client/Suggest.cpp index 0188ebc8173..c1f163939e8 100644 --- a/src/Client/Suggest.cpp +++ b/src/Client/Suggest.cpp @@ -96,6 +96,10 @@ void Suggest::load(ContextPtr context, const ConnectionParameters & connection_p loading_thread = std::thread([my_context = Context::createCopy(context), connection_parameters, suggestion_limit, this] { ThreadStatus thread_status; + my_context->makeQueryContext(); + auto group = ThreadGroup::createForQuery(my_context); + CurrentThread::attachToGroup(group); + for (size_t retry = 0; retry < 10; ++retry) { try From 602fa5cbadba2924bc4e57f26f5f37b00d7b086e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 23 Jun 2024 17:28:54 +0200 Subject: [PATCH 014/180] Fix error --- programs/local/LocalServer.cpp | 1 - src/Client/ClientBase.h | 3 --- 2 files changed, 4 deletions(-) diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index e5f4bac852c..45641b999b6 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -428,7 +428,6 @@ int LocalServer::main(const std::vector & /*args*/) try { UseSSL use_ssl; - thread_status.emplace(); StackTrace::setShowAddresses(config().getBool("show_addresses_in_stack_traces", true)); diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index 228a9d65ea7..83b99696373 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -244,9 +244,6 @@ protected: Settings cmd_settings; MergeTreeSettings cmd_merge_tree_settings; - /// thread status should be destructed before shared context because it relies on process list. - std::optional thread_status; - ServerConnectionPtr connection; ConnectionParameters connection_parameters; From e0ef26285a5cda87e5073c6a0aaecb67f9609a96 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 23 Jun 2024 19:46:00 +0200 Subject: [PATCH 015/180] Update submodule --- contrib/aws | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/aws b/contrib/aws index 1c2946bfcb7..6463c9cbf47 160000 --- a/contrib/aws +++ b/contrib/aws @@ -1 +1 @@ -Subproject commit 1c2946bfcb7f1e3ae0a858de0b59d4f1a7b4ccaf +Subproject commit 6463c9cbf47cab78e4a4fa97a866942f201c6a58 From 9948150b87c6ee4531e0130de095035e2f228ec1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 24 Jun 2024 00:19:20 +0200 Subject: [PATCH 016/180] Fix error --- src/IO/S3/PocoHTTPClient.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/IO/S3/PocoHTTPClient.h b/src/IO/S3/PocoHTTPClient.h index 18a21649167..3b7ec4d1d9c 100644 --- a/src/IO/S3/PocoHTTPClient.h +++ b/src/IO/S3/PocoHTTPClient.h @@ -172,7 +172,7 @@ protected: const RemoteHostFilter & remote_host_filter; unsigned int s3_max_redirects; bool s3_use_adaptive_timeouts = true; - bool enable_s3_requests_logging; + bool enable_s3_requests_logging = false; bool for_disk_s3; /// Limits get request per second rate for GET, SELECT and all other requests, excluding throttled by put throttler From 3b377fabe70c447304fa36d04f0495c0e9432d9c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 25 Jun 2024 02:49:40 +0200 Subject: [PATCH 017/180] Update submodule --- contrib/aws | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/aws b/contrib/aws index 1c2946bfcb7..d5450d76abd 160000 --- a/contrib/aws +++ b/contrib/aws @@ -1 +1 @@ -Subproject commit 1c2946bfcb7f1e3ae0a858de0b59d4f1a7b4ccaf +Subproject commit d5450d76abda556ce145ddabe7e0cc6a7644ec59 From 49634db3ba2961dadbdc1689f0a4ef1ecdb8bea1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 25 Jun 2024 02:51:57 +0200 Subject: [PATCH 018/180] Update submodule --- contrib/aws-crt-cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/aws-crt-cpp b/contrib/aws-crt-cpp index f532d6abc0d..e5aa45cacfd 160000 --- a/contrib/aws-crt-cpp +++ b/contrib/aws-crt-cpp @@ -1 +1 @@ -Subproject commit f532d6abc0d2b0d8b5d6fe9e7c51eaedbe4afbd0 +Subproject commit e5aa45cacfdcda7719ead38760e7c61076f5745f From d7b3c3e8a97835c9f7987a5852ef9469770f8560 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 29 Jun 2024 21:59:04 +0200 Subject: [PATCH 019/180] Add review suggestion --- src/Interpreters/InterpreterSetQuery.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Interpreters/InterpreterSetQuery.cpp b/src/Interpreters/InterpreterSetQuery.cpp index 15d4ba56d8d..2ae35c4313b 100644 --- a/src/Interpreters/InterpreterSetQuery.cpp +++ b/src/Interpreters/InterpreterSetQuery.cpp @@ -46,7 +46,7 @@ static void applySettingsFromSelectWithUnion(const ASTSelectWithUnionQuery & sel // It is flattened later, when we process UNION ALL/DISTINCT. const auto * last_select = children.back()->as(); if (last_select && last_select->settings()) - InterpreterSetQuery(last_select->settings(), context).executeForCurrentContext(false); + InterpreterSetQuery(last_select->settings(), context).executeForCurrentContext(/* ignore_setting_constraints= */ false); } void InterpreterSetQuery::applySettingsFromQuery(const ASTPtr & ast, ContextMutablePtr context_) @@ -58,7 +58,7 @@ void InterpreterSetQuery::applySettingsFromQuery(const ASTPtr & ast, ContextMuta if (const auto * query_with_output = dynamic_cast(ast.get())) { if (query_with_output->settings_ast) - InterpreterSetQuery(query_with_output->settings_ast, context_).executeForCurrentContext(false); + InterpreterSetQuery(query_with_output->settings_ast, context_).executeForCurrentContext(/* ignore_setting_constraints= */ false); if (const auto * create_query = ast->as(); create_query && create_query->select) applySettingsFromSelectWithUnion(create_query->select->as(), context_); @@ -67,7 +67,7 @@ void InterpreterSetQuery::applySettingsFromQuery(const ASTPtr & ast, ContextMuta if (const auto * select_query = ast->as()) { if (auto new_settings = select_query->settings()) - InterpreterSetQuery(new_settings, context_).executeForCurrentContext(false); + InterpreterSetQuery(new_settings, context_).executeForCurrentContext(/* ignore_setting_constraints= */ false); } else if (const auto * select_with_union_query = ast->as()) { @@ -76,7 +76,7 @@ void InterpreterSetQuery::applySettingsFromQuery(const ASTPtr & ast, ContextMuta else if (const auto * explain_query = ast->as()) { if (explain_query->settings_ast) - InterpreterSetQuery(explain_query->settings_ast, context_).executeForCurrentContext(false); + InterpreterSetQuery(explain_query->settings_ast, context_).executeForCurrentContext(/* ignore_setting_constraints= */ false); applySettingsFromQuery(explain_query->getExplainedQuery(), context_); } @@ -84,7 +84,7 @@ void InterpreterSetQuery::applySettingsFromQuery(const ASTPtr & ast, ContextMuta { context_->setInsertFormat(insert_query->format); if (insert_query->settings_ast) - InterpreterSetQuery(insert_query->settings_ast, context_).executeForCurrentContext(false); + InterpreterSetQuery(insert_query->settings_ast, context_).executeForCurrentContext(/* ignore_setting_constraints= */ false); } } From 3b45916470affeb555ae20a4d557ea9de5075f50 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 22 Jul 2024 05:46:53 +0200 Subject: [PATCH 020/180] What if we tighten limits for functional tests? --- tests/config/install.sh | 1 + tests/config/users.d/limits.xml | 8 ++++++++ 2 files changed, 9 insertions(+) create mode 100644 tests/config/users.d/limits.xml diff --git a/tests/config/install.sh b/tests/config/install.sh index 1b0edc5fc16..265b9248f4a 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -93,6 +93,7 @@ ln -sf $SRC_PATH/users.d/prefetch_settings.xml $DEST_SERVER_PATH/users.d/ ln -sf $SRC_PATH/users.d/nonconst_timezone.xml $DEST_SERVER_PATH/users.d/ ln -sf $SRC_PATH/users.d/allow_introspection_functions.yaml $DEST_SERVER_PATH/users.d/ ln -sf $SRC_PATH/users.d/replicated_ddl_entry.xml $DEST_SERVER_PATH/users.d/ +ln -sf $SRC_PATH/users.d/limits.xml $DEST_SERVER_PATH/users.d/ if [[ -n "$USE_OLD_ANALYZER" ]] && [[ "$USE_OLD_ANALYZER" -eq 1 ]]; then ln -sf $SRC_PATH/users.d/analyzer.xml $DEST_SERVER_PATH/users.d/ diff --git a/tests/config/users.d/limits.xml b/tests/config/users.d/limits.xml new file mode 100644 index 00000000000..f44c73241ab --- /dev/null +++ b/tests/config/users.d/limits.xml @@ -0,0 +1,8 @@ + + + + 5G + 20000000 + + + From 8c264230e30bf97f6bac999401cd594b31e09977 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 22 Jul 2024 07:20:33 +0200 Subject: [PATCH 021/180] Loosen the limit --- docker/test/stateful/run.sh | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh index 857385f4715..72a8f31ab71 100755 --- a/docker/test/stateful/run.sh +++ b/docker/test/stateful/run.sh @@ -118,8 +118,8 @@ if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]] clickhouse-client --query "CREATE TABLE test.hits AS datasets.hits_v1" clickhouse-client --query "CREATE TABLE test.visits AS datasets.visits_v1" - clickhouse-client --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1" - clickhouse-client --query "INSERT INTO test.visits SELECT * FROM datasets.visits_v1" + clickhouse-client --max_memory_usage 10G --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1" + clickhouse-client --max_memory_usage 10G --query "INSERT INTO test.visits SELECT * FROM datasets.visits_v1" clickhouse-client --query "DROP TABLE datasets.hits_v1" clickhouse-client --query "DROP TABLE datasets.visits_v1" @@ -191,16 +191,16 @@ else ENGINE = CollapsingMergeTree(Sign) PARTITION BY toYYYYMM(StartDate) ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'" - clickhouse-client --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0" - clickhouse-client --query "INSERT INTO test.visits SELECT * FROM datasets.visits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0" + clickhouse-client --max_memory_usage 10G --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0" + clickhouse-client --max_memory_usage 10G --query "INSERT INTO test.visits SELECT * FROM datasets.visits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0" clickhouse-client --query "DROP TABLE datasets.visits_v1 SYNC" clickhouse-client --query "DROP TABLE datasets.hits_v1 SYNC" else clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits" clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits" fi - clickhouse-client --query "CREATE TABLE test.hits_s3 (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'" - clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM test.hits SETTINGS enable_filesystem_cache_on_write_operations=0" + clickhouse-client --query "CREATE TABLE test.hits_s3 (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'" + clickhouse-client --max_memory_usage 10G --query "INSERT INTO test.hits_s3 SELECT * FROM test.hits SETTINGS enable_filesystem_cache_on_write_operations=0" fi clickhouse-client --query "SHOW TABLES FROM test" From b42069cfa80e66ab59669bb7ccb93c2944d91170 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 24 Jul 2024 04:27:11 +0200 Subject: [PATCH 022/180] Adjust some tests --- .../0_stateless/00111_shard_external_sort_distributed.sql | 6 +++--- .../0_stateless/00463_long_sessions_in_http_interface.sh | 2 +- tests/queries/0_stateless/00601_kill_running_query.sh | 2 +- tests/queries/0_stateless/00976_max_execution_speed.sql | 2 +- .../0_stateless/01119_optimize_trivial_insert_select.sql | 5 +++-- .../queries/0_stateless/01245_limit_infinite_sources.sql | 1 + tests/queries/0_stateless/01249_flush_interactive.sh | 4 ++-- tests/queries/0_stateless/01293_show_settings.reference | 1 + .../01301_aggregate_state_exception_memory_leak.sh | 2 +- tests/queries/0_stateless/01603_read_with_backoff_bug.sql | 2 +- .../0_stateless/02021_exponential_sum_shard.reference | 1 - tests/queries/0_stateless/02021_exponential_sum_shard.sql | 1 - tests/queries/0_stateless/02136_kill_scalar_queries.sh | 2 +- tests/queries/0_stateless/02293_ttest_large_samples.sql | 2 ++ .../02294_floating_point_second_in_settings.sh | 6 +++--- tests/queries/0_stateless/02343_aggregation_pipeline.sql | 8 +++----- .../0_stateless/02697_stop_reading_on_first_cancel.sh | 2 +- tests/queries/0_stateless/02700_s3_part_INT_MAX.sh | 4 +++- .../02896_max_execution_time_with_break_overflow_mode.sql | 2 ++ tests/queries/0_stateless/02915_sleep_large_uint.sql | 1 + ...p_virtual_columns_with_non_deterministic_functions.sql | 1 + 21 files changed, 32 insertions(+), 25 deletions(-) diff --git a/tests/queries/0_stateless/00111_shard_external_sort_distributed.sql b/tests/queries/0_stateless/00111_shard_external_sort_distributed.sql index 112f5edae36..88a05f59111 100644 --- a/tests/queries/0_stateless/00111_shard_external_sort_distributed.sql +++ b/tests/queries/0_stateless/00111_shard_external_sort_distributed.sql @@ -1,10 +1,10 @@ -- Tags: distributed -SET max_memory_usage = 300000000; -SET max_bytes_before_external_sort = 20000000; +SET max_memory_usage = 150000000; +SET max_bytes_before_external_sort = 10000000; DROP TABLE IF EXISTS numbers10m; -CREATE VIEW numbers10m AS SELECT number FROM system.numbers LIMIT 10000000; +CREATE VIEW numbers10m AS SELECT number FROM system.numbers LIMIT 5000000; SELECT number FROM remote('127.0.0.{2,3}', currentDatabase(), numbers10m) ORDER BY number * 1234567890123456789 LIMIT 19999980, 20; diff --git a/tests/queries/0_stateless/00463_long_sessions_in_http_interface.sh b/tests/queries/0_stateless/00463_long_sessions_in_http_interface.sh index d41d6409315..bb77a88820a 100755 --- a/tests/queries/0_stateless/00463_long_sessions_in_http_interface.sh +++ b/tests/queries/0_stateless/00463_long_sessions_in_http_interface.sh @@ -74,7 +74,7 @@ ${CLICKHOUSE_CLIENT} --multiquery --query "DROP TABLE t" echo "A session cannot be used by concurrent connections:" -${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_9&query_id=${CLICKHOUSE_DATABASE}_9" --data-binary "SELECT count() FROM system.numbers" >/dev/null & +${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&session_id=${CLICKHOUSE_DATABASE}_9&query_id=${CLICKHOUSE_DATABASE}_9&max_rows_to_read=0" --data-binary "SELECT count() FROM system.numbers" >/dev/null & # An infinite loop is required to make the test reliable. We will ensure that at least once the query on the line above has started before this check while true diff --git a/tests/queries/0_stateless/00601_kill_running_query.sh b/tests/queries/0_stateless/00601_kill_running_query.sh index 3163f8146d0..be0fff49129 100755 --- a/tests/queries/0_stateless/00601_kill_running_query.sh +++ b/tests/queries/0_stateless/00601_kill_running_query.sh @@ -11,7 +11,7 @@ function wait_for_query_to_start() while [[ $($CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL" -d "SELECT count() FROM system.processes WHERE query_id = '$1'") == 0 ]]; do sleep 0.1; done } -${CLICKHOUSE_CURL_COMMAND} -q --max-time 30 -sS "$CLICKHOUSE_URL&query_id=test_00601_$CLICKHOUSE_DATABASE" -d 'SELECT sum(ignore(*)) FROM (SELECT number % 1000 AS k, groupArray(number) FROM numbers(50000000) GROUP BY k)' > /dev/null & +${CLICKHOUSE_CURL_COMMAND} -q --max-time 30 -sS "$CLICKHOUSE_URL&query_id=test_00601_$CLICKHOUSE_DATABASE" -d 'SELECT sum(ignore(*)) FROM (SELECT number % 1000 AS k, groupArray(number) FROM numbers(50000000) GROUP BY k) SETTINGS max_rows_to_read = 0' > /dev/null & wait_for_query_to_start "test_00601_$CLICKHOUSE_DATABASE" $CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL" -d "KILL QUERY WHERE query_id = 'test_00601_$CLICKHOUSE_DATABASE'" wait diff --git a/tests/queries/0_stateless/00976_max_execution_speed.sql b/tests/queries/0_stateless/00976_max_execution_speed.sql index 52c3f05ff43..41374712724 100644 --- a/tests/queries/0_stateless/00976_max_execution_speed.sql +++ b/tests/queries/0_stateless/00976_max_execution_speed.sql @@ -1,2 +1,2 @@ -SET max_execution_speed = 1, max_execution_time = 3; +SET max_execution_speed = 1, max_execution_time = 3, max_rows_to_read = 0; SELECT count() FROM system.numbers; -- { serverError TIMEOUT_EXCEEDED } diff --git a/tests/queries/0_stateless/01119_optimize_trivial_insert_select.sql b/tests/queries/0_stateless/01119_optimize_trivial_insert_select.sql index a53b60a5ad3..2b301d7aced 100644 --- a/tests/queries/0_stateless/01119_optimize_trivial_insert_select.sql +++ b/tests/queries/0_stateless/01119_optimize_trivial_insert_select.sql @@ -1,8 +1,9 @@ drop table if exists t; create table t(n int, a Int64, s String) engine = MergeTree() order by a; -set enable_positional_arguments=0; -set optimize_trivial_insert_select=1; +set enable_positional_arguments = 0; +set optimize_trivial_insert_select = 1; +set max_rows_to_read = 0; -- due to aggregate functions, optimize_trivial_insert_select will not be applied insert into t select 1, sum(number) as c, getSetting('max_threads') from numbers_mt(100000000) settings max_insert_threads=4, max_threads=2; diff --git a/tests/queries/0_stateless/01245_limit_infinite_sources.sql b/tests/queries/0_stateless/01245_limit_infinite_sources.sql index 05680d86a33..69c93baf8a8 100644 --- a/tests/queries/0_stateless/01245_limit_infinite_sources.sql +++ b/tests/queries/0_stateless/01245_limit_infinite_sources.sql @@ -9,3 +9,4 @@ FROM ) WHERE number = 1 LIMIT 1 +SETTINGS max_rows_to_read = 0; diff --git a/tests/queries/0_stateless/01249_flush_interactive.sh b/tests/queries/0_stateless/01249_flush_interactive.sh index 551e11c8c8d..775b7825a16 100755 --- a/tests/queries/0_stateless/01249_flush_interactive.sh +++ b/tests/queries/0_stateless/01249_flush_interactive.sh @@ -14,10 +14,10 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) function test() { - timeout 5 ${CLICKHOUSE_LOCAL} --max_execution_time 10 --query " + timeout 5 ${CLICKHOUSE_LOCAL} --max_execution_time 10 --max_rows_to_read 0 --query " SELECT DISTINCT number % 5 FROM system.numbers" ||: echo -e '---' - timeout 5 ${CLICKHOUSE_CURL} -sS --no-buffer "${CLICKHOUSE_URL}&max_execution_time=10" --data-binary " + timeout 5 ${CLICKHOUSE_CURL} -sS --no-buffer "${CLICKHOUSE_URL}&max_execution_time=10&max_rows_to_read=0" --data-binary " SELECT DISTINCT number % 5 FROM system.numbers" ||: echo -e '---' } diff --git a/tests/queries/0_stateless/01293_show_settings.reference b/tests/queries/0_stateless/01293_show_settings.reference index 187f55697e4..9d326f16a3b 100644 --- a/tests/queries/0_stateless/01293_show_settings.reference +++ b/tests/queries/0_stateless/01293_show_settings.reference @@ -5,5 +5,6 @@ connect_timeout_with_failover_secure_ms Milliseconds 3000 external_storage_connect_timeout_sec UInt64 10 s3_connect_timeout_ms UInt64 1000 filesystem_prefetch_max_memory_usage UInt64 1073741824 +max_memory_usage UInt64 5000000000 max_untracked_memory UInt64 1048576 memory_profiler_step UInt64 1048576 diff --git a/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.sh b/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.sh index 47fe7a9c7d9..9dd800ceb09 100755 --- a/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.sh +++ b/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.sh @@ -8,7 +8,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) start=$SECONDS # If the memory leak exists, it will lead to OOM fairly quickly. for _ in {1..1000}; do - $CLICKHOUSE_CLIENT --max_memory_usage 1G <<< "SELECT uniqExactState(number) FROM system.numbers_mt GROUP BY number % 10"; + $CLICKHOUSE_CLIENT --max_memory_usage 1G --max_rows_to_read 0 <<< "SELECT uniqExactState(number) FROM system.numbers_mt GROUP BY number % 10"; # NOTE: we cannot use timeout here since this will not guarantee that the query will be executed at least once. # (since graceful wait of clickhouse-client had been reverted) diff --git a/tests/queries/0_stateless/01603_read_with_backoff_bug.sql b/tests/queries/0_stateless/01603_read_with_backoff_bug.sql index 1cf52c0288b..ec14f637c01 100644 --- a/tests/queries/0_stateless/01603_read_with_backoff_bug.sql +++ b/tests/queries/0_stateless/01603_read_with_backoff_bug.sql @@ -9,7 +9,7 @@ create table t (x UInt64, s String) engine = MergeTree order by x SETTINGS index INSERT INTO t SELECT number, if(number < (8129 * 1024), arrayStringConcat(arrayMap(x -> toString(x), range(number % 128)), ' '), '') -FROM numbers_mt((8129 * 1024) * 3) settings max_insert_threads=8; +FROM numbers_mt((8129 * 1024) * 3) settings max_insert_threads=8, max_rows_to_read=0; -- optimize table t final; diff --git a/tests/queries/0_stateless/02021_exponential_sum_shard.reference b/tests/queries/0_stateless/02021_exponential_sum_shard.reference index 8453706a05a..c28e5d7a132 100644 --- a/tests/queries/0_stateless/02021_exponential_sum_shard.reference +++ b/tests/queries/0_stateless/02021_exponential_sum_shard.reference @@ -2,4 +2,3 @@ 0.009775171065493644 0.009775171065493644 0.009775171065493644 -0.009775171065493644 diff --git a/tests/queries/0_stateless/02021_exponential_sum_shard.sql b/tests/queries/0_stateless/02021_exponential_sum_shard.sql index 49fde0fe217..8e91637e41d 100644 --- a/tests/queries/0_stateless/02021_exponential_sum_shard.sql +++ b/tests/queries/0_stateless/02021_exponential_sum_shard.sql @@ -3,4 +3,3 @@ WITH number % 10 = 0 AS value, number AS time SELECT exponentialMovingAverage(1) WITH number % 10 = 0 AS value, number AS time SELECT exponentialMovingAverage(1)(value, time) AS exp_smooth FROM remote('127.0.0.{1..10}', numbers_mt(10000)); WITH number % 10 = 0 AS value, number AS time SELECT exponentialMovingAverage(1)(value, time) AS exp_smooth FROM remote('127.0.0.{1..10}', numbers_mt(100000)); WITH number % 10 = 0 AS value, number AS time SELECT exponentialMovingAverage(1)(value, time) AS exp_smooth FROM remote('127.0.0.{1..10}', numbers_mt(1000000)); -WITH number % 10 = 0 AS value, number AS time SELECT exponentialMovingAverage(1)(value, time) AS exp_smooth FROM remote('127.0.0.{1..10}', numbers_mt(10000000)); diff --git a/tests/queries/0_stateless/02136_kill_scalar_queries.sh b/tests/queries/0_stateless/02136_kill_scalar_queries.sh index c8691b62360..f8bd5a42756 100755 --- a/tests/queries/0_stateless/02136_kill_scalar_queries.sh +++ b/tests/queries/0_stateless/02136_kill_scalar_queries.sh @@ -10,7 +10,7 @@ function wait_for_query_to_start() } QUERY_1_ID="${CLICKHOUSE_DATABASE}_TEST02132KILL_QUERY1" -(${CLICKHOUSE_CLIENT} --query_id="${QUERY_1_ID}" --query='select (SELECT max(number) from system.numbers) + 1;' 2>&1 | grep -q "Code: 394." || echo 'FAIL') & +(${CLICKHOUSE_CLIENT} --max_rows_to_read 0 --query_id="${QUERY_1_ID}" --query='select (SELECT max(number) from system.numbers) + 1;' 2>&1 | grep -q "Code: 394." || echo 'FAIL') & wait_for_query_to_start "${QUERY_1_ID}" ${CLICKHOUSE_CLIENT} --query="KILL QUERY WHERE query_id='${QUERY_1_ID}' SYNC" diff --git a/tests/queries/0_stateless/02293_ttest_large_samples.sql b/tests/queries/0_stateless/02293_ttest_large_samples.sql index 14baa3fddfe..826bd483fe9 100644 --- a/tests/queries/0_stateless/02293_ttest_large_samples.sql +++ b/tests/queries/0_stateless/02293_ttest_large_samples.sql @@ -15,6 +15,8 @@ SELECT FROM system.numbers limit 500000)); +SET max_rows_to_read = 0; + SELECT roundBankers(result.1, 5), roundBankers(result.2, 5 ) FROM ( SELECT studentTTest(sample, variant) as result diff --git a/tests/queries/0_stateless/02294_floating_point_second_in_settings.sh b/tests/queries/0_stateless/02294_floating_point_second_in_settings.sh index 7a18b8fea29..27dbd3e3de6 100755 --- a/tests/queries/0_stateless/02294_floating_point_second_in_settings.sh +++ b/tests/queries/0_stateless/02294_floating_point_second_in_settings.sh @@ -23,16 +23,16 @@ function check_output() { # TCP CLIENT echo "TCP CLIENT" -OUTPUT=$($CLICKHOUSE_CLIENT --max_execution_time $MAX_TIMEOUT -q "SELECT count() FROM system.numbers" 2>&1 || true) +OUTPUT=$($CLICKHOUSE_CLIENT --max_rows_to_read 0 --max_execution_time $MAX_TIMEOUT -q "SELECT count() FROM system.numbers" 2>&1 || true) check_output "${OUTPUT}" echo "TCP CLIENT WITH SETTINGS IN QUERY" -OUTPUT=$($CLICKHOUSE_CLIENT -q "SELECT count() FROM system.numbers SETTINGS max_execution_time=$MAX_TIMEOUT" 2>&1 || true) +OUTPUT=$($CLICKHOUSE_CLIENT --max_rows_to_read 0 -q "SELECT count() FROM system.numbers SETTINGS max_execution_time=$MAX_TIMEOUT" 2>&1 || true) check_output "${OUTPUT}" # HTTP CLIENT echo "HTTP CLIENT" -OUTPUT=$(${CLICKHOUSE_CURL_COMMAND} -q -sS "$CLICKHOUSE_URL&max_execution_time=$MAX_TIMEOUT" -d \ +OUTPUT=$(${CLICKHOUSE_CURL_COMMAND} -q -sS "$CLICKHOUSE_URL&max_execution_time=${MAX_TIMEOUT}&max_rows_to_read=0" -d \ "SELECT count() FROM system.numbers" || true) check_output "${OUTPUT}" diff --git a/tests/queries/0_stateless/02343_aggregation_pipeline.sql b/tests/queries/0_stateless/02343_aggregation_pipeline.sql index 0f9dbd0247d..24d54293313 100644 --- a/tests/queries/0_stateless/02343_aggregation_pipeline.sql +++ b/tests/queries/0_stateless/02343_aggregation_pipeline.sql @@ -13,11 +13,9 @@ set allow_prefetched_read_pool_for_local_filesystem = 0; -- { echoOn } -explain pipeline select * from (select * from numbers(1e8) group by number) group by number; - -explain pipeline select * from (select * from numbers_mt(1e8) group by number) group by number; - -explain pipeline select * from (select * from numbers_mt(1e8) group by number) order by number; +explain pipeline select * from (select * from numbers(1e8) group by number) group by number settings max_rows_to_read = 0; +explain pipeline select * from (select * from numbers_mt(1e8) group by number) group by number settings max_rows_to_read = 0; +explain pipeline select * from (select * from numbers_mt(1e8) group by number) order by number settings max_rows_to_read = 0; explain pipeline select number from remote('127.0.0.{1,2,3}', system, numbers_mt) group by number settings distributed_aggregation_memory_efficient = 1; diff --git a/tests/queries/0_stateless/02697_stop_reading_on_first_cancel.sh b/tests/queries/0_stateless/02697_stop_reading_on_first_cancel.sh index 2be13588453..5a2cec08eca 100755 --- a/tests/queries/0_stateless/02697_stop_reading_on_first_cancel.sh +++ b/tests/queries/0_stateless/02697_stop_reading_on_first_cancel.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) QUERY_ID="${CLICKHOUSE_DATABASE}_read_with_cancel" -$CLICKHOUSE_CLIENT -n --query_id="$QUERY_ID" --query="SELECT sum(number * 0) FROM numbers(10000000000) SETTINGS partial_result_on_first_cancel=true;" & +$CLICKHOUSE_CLIENT --max_rows_to_read 0 -n --query_id="$QUERY_ID" --query="SELECT sum(number * 0) FROM numbers(10000000000) SETTINGS partial_result_on_first_cancel=true;" & pid=$! for _ in {0..60} diff --git a/tests/queries/0_stateless/02700_s3_part_INT_MAX.sh b/tests/queries/0_stateless/02700_s3_part_INT_MAX.sh index a34a480a078..c431686b594 100755 --- a/tests/queries/0_stateless/02700_s3_part_INT_MAX.sh +++ b/tests/queries/0_stateless/02700_s3_part_INT_MAX.sh @@ -10,7 +10,9 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # NOTE: .sh test is used over .sql because it needs $CLICKHOUSE_DATABASE to # avoid truncation, since seems that the version of MinIO that is used on CI # too slow with this. -$CLICKHOUSE_CLIENT -nm -q " +# +# Unfortunately, the test has to buffer it in memory. +$CLICKHOUSE_CLIENT --max_memory_usage 10G -nm -q " INSERT INTO FUNCTION s3('http://localhost:11111/test/$CLICKHOUSE_DATABASE/test_INT_MAX.tsv', '', '', 'TSV') SELECT repeat('a', 1024) FROM numbers((pow(2, 30) * 2) / 1024) SETTINGS s3_max_single_part_upload_size = '5Gi'; diff --git a/tests/queries/0_stateless/02896_max_execution_time_with_break_overflow_mode.sql b/tests/queries/0_stateless/02896_max_execution_time_with_break_overflow_mode.sql index ec86a66c7dd..3e131cad0f0 100644 --- a/tests/queries/0_stateless/02896_max_execution_time_with_break_overflow_mode.sql +++ b/tests/queries/0_stateless/02896_max_execution_time_with_break_overflow_mode.sql @@ -1,5 +1,7 @@ -- Tags: no-fasttest +SET max_rows_to_read = 0; + -- Query stops after timeout without an error SELECT * FROM numbers(100000000) SETTINGS max_block_size=1, max_execution_time=2, timeout_overflow_mode='break' FORMAT Null; diff --git a/tests/queries/0_stateless/02915_sleep_large_uint.sql b/tests/queries/0_stateless/02915_sleep_large_uint.sql index f7c04ab6d1f..08b6c580a28 100644 --- a/tests/queries/0_stateless/02915_sleep_large_uint.sql +++ b/tests/queries/0_stateless/02915_sleep_large_uint.sql @@ -1,6 +1,7 @@ SELECT sleep(3.40282e+44); -- { serverError BAD_ARGUMENTS } SELECT sleep((pow(2, 64) / 1000000) - 1); -- { serverError BAD_ARGUMENTS } SELECT sleepEachRow(184467440737095516) from numbers(10000); -- { serverError BAD_ARGUMENTS } +SET max_rows_to_read = 0; SELECT sleepEachRow(pow(2, 31)) from numbers(9007199254740992) settings function_sleep_max_microseconds_per_block = 8589934592000000000; -- { serverError TOO_SLOW } -- Another corner case, but it requires lots of memory to run (huge block size) diff --git a/tests/queries/0_stateless/03002_filter_skip_virtual_columns_with_non_deterministic_functions.sql b/tests/queries/0_stateless/03002_filter_skip_virtual_columns_with_non_deterministic_functions.sql index 8ccc3cf61da..6ef8c5a8656 100644 --- a/tests/queries/0_stateless/03002_filter_skip_virtual_columns_with_non_deterministic_functions.sql +++ b/tests/queries/0_stateless/03002_filter_skip_virtual_columns_with_non_deterministic_functions.sql @@ -1,3 +1,4 @@ +SET max_rows_to_read = 0; create table test (number UInt64) engine=MergeTree order by number; insert into test select * from numbers(50000000); select ignore(number) from test where RAND() > 4292390314 limit 10; From 016888e29a828870d5cdb50a0eb5e1514bafc97c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 24 Jul 2024 04:32:20 +0200 Subject: [PATCH 023/180] Adjust some tests --- tests/queries/1_stateful/00067_union_all.sql | 3 ++- .../00088_global_in_one_shard_and_rows_before_limit.sql | 2 +- .../queries/1_stateful/00147_global_in_aggregate_function.sql | 1 + .../queries/1_stateful/00149_quantiles_timing_distributed.sql | 1 + tests/queries/1_stateful/00167_read_bytes_from_fs.sql | 1 + .../1_stateful/00171_grouping_aggregated_transform_bug.sql | 1 + .../1_stateful/00182_simple_squashing_transform_bug.sql | 1 + 7 files changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/queries/1_stateful/00067_union_all.sql b/tests/queries/1_stateful/00067_union_all.sql index 2a1d00e975d..9ee14b36b03 100644 --- a/tests/queries/1_stateful/00067_union_all.sql +++ b/tests/queries/1_stateful/00067_union_all.sql @@ -10,4 +10,5 @@ UNION ALL ORDER BY id DESC LIMIT 10 ) -ORDER BY id, event; +ORDER BY id, event +SETTINGS max_rows_to_read = 40_000_000; diff --git a/tests/queries/1_stateful/00088_global_in_one_shard_and_rows_before_limit.sql b/tests/queries/1_stateful/00088_global_in_one_shard_and_rows_before_limit.sql index 52f9c46997f..443808e7bed 100644 --- a/tests/queries/1_stateful/00088_global_in_one_shard_and_rows_before_limit.sql +++ b/tests/queries/1_stateful/00088_global_in_one_shard_and_rows_before_limit.sql @@ -1,4 +1,4 @@ -- Tags: shard -SET output_format_write_statistics = 0; +SET output_format_write_statistics = 0, max_rows_to_read = 20_000_000; SELECT EventDate, count() FROM remote('127.0.0.1', test.hits) WHERE UserID GLOBAL IN (SELECT UserID FROM test.hits) GROUP BY EventDate ORDER BY EventDate LIMIT 5 FORMAT JSONCompact; diff --git a/tests/queries/1_stateful/00147_global_in_aggregate_function.sql b/tests/queries/1_stateful/00147_global_in_aggregate_function.sql index 075c01530c6..c156f073573 100644 --- a/tests/queries/1_stateful/00147_global_in_aggregate_function.sql +++ b/tests/queries/1_stateful/00147_global_in_aggregate_function.sql @@ -1,4 +1,5 @@ -- Tags: global +SET max_rows_to_read = 40_000_000; SELECT sum(UserID GLOBAL IN (SELECT UserID FROM remote('127.0.0.{1,2}', test.hits))) FROM remote('127.0.0.{1,2}', test.hits); SELECT sum(UserID GLOBAL IN (SELECT UserID FROM test.hits)) FROM remote('127.0.0.{1,2}', test.hits); diff --git a/tests/queries/1_stateful/00149_quantiles_timing_distributed.sql b/tests/queries/1_stateful/00149_quantiles_timing_distributed.sql index 6f910646fb7..16b565985ea 100644 --- a/tests/queries/1_stateful/00149_quantiles_timing_distributed.sql +++ b/tests/queries/1_stateful/00149_quantiles_timing_distributed.sql @@ -1,4 +1,5 @@ -- Tags: distributed +SET max_rows_to_read = 100_000_000; SELECT sum(cityHash64(*)) FROM (SELECT CounterID, quantileTiming(0.5)(SendTiming), count() FROM remote('127.0.0.{1,2,3,4,5,6,7,8,9,10}', test.hits) WHERE SendTiming != -1 GROUP BY CounterID); SELECT sum(cityHash64(*)) FROM (SELECT CounterID, quantileTiming(0.5)(SendTiming), count() FROM remote('127.0.0.{1,2,3,4,5,6,7,8,9,10}', test.hits) WHERE SendTiming != -1 GROUP BY CounterID) SETTINGS optimize_aggregation_in_order = 1; diff --git a/tests/queries/1_stateful/00167_read_bytes_from_fs.sql b/tests/queries/1_stateful/00167_read_bytes_from_fs.sql index 7b3f50f8141..1a98a531067 100644 --- a/tests/queries/1_stateful/00167_read_bytes_from_fs.sql +++ b/tests/queries/1_stateful/00167_read_bytes_from_fs.sql @@ -1,5 +1,6 @@ -- Tags: no-random-settings +SET max_memory_usage = '10G' SELECT sum(cityHash64(*)) FROM test.hits SETTINGS max_threads=40; -- We had a bug which lead to additional compressed data read. test.hits compressed size is about 1.2Gb, but we read more then 3Gb. diff --git a/tests/queries/1_stateful/00171_grouping_aggregated_transform_bug.sql b/tests/queries/1_stateful/00171_grouping_aggregated_transform_bug.sql index 7068780a1b1..b3e4d749328 100644 --- a/tests/queries/1_stateful/00171_grouping_aggregated_transform_bug.sql +++ b/tests/queries/1_stateful/00171_grouping_aggregated_transform_bug.sql @@ -1,4 +1,5 @@ -- Tags: distributed +SET max_rows_to_read = '100M'; SELECT sum(cityHash64(*)) FROM (SELECT CounterID, quantileTiming(0.5)(SendTiming), count() FROM remote('127.0.0.{1,2,3,4,5,6,7,8,9,10}', test.hits) WHERE SendTiming != -1 GROUP BY CounterID) SETTINGS max_block_size = 63169; SELECT sum(cityHash64(*)) FROM (SELECT CounterID, quantileTiming(0.5)(SendTiming), count() FROM remote('127.0.0.{1,2,3,4,5,6,7,8,9,10}', test.hits) WHERE SendTiming != -1 GROUP BY CounterID) SETTINGS optimize_aggregation_in_order = 1, max_block_size = 63169; diff --git a/tests/queries/1_stateful/00182_simple_squashing_transform_bug.sql b/tests/queries/1_stateful/00182_simple_squashing_transform_bug.sql index e73de4b33fb..85bad651090 100644 --- a/tests/queries/1_stateful/00182_simple_squashing_transform_bug.sql +++ b/tests/queries/1_stateful/00182_simple_squashing_transform_bug.sql @@ -1,6 +1,7 @@ -- Tags: global set allow_prefetched_read_pool_for_remote_filesystem=0, merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability=0, max_threads=2, max_block_size=65387; +set max_rows_to_read = '20M'; SELECT sum(UserID GLOBAL IN (SELECT UserID FROM remote('127.0.0.{1,2}', test.hits))) FROM remote('127.0.0.{1,2}', test.hits); SELECT sum(UserID GLOBAL IN (SELECT UserID FROM test.hits)) FROM remote('127.0.0.{1,2}', test.hits); From e569a305ba07225ee641ae4af07ba9c88e4608d4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 24 Jul 2024 04:46:31 +0200 Subject: [PATCH 024/180] Adjust some tests --- .../00002_log_and_exception_messages_formatting.sql | 2 ++ .../0_stateless/00375_shard_group_uniq_array_of_string.sql | 2 +- .../00376_shard_group_uniq_array_of_int_array.sql | 2 +- .../00377_shard_group_uniq_array_of_string_array.sql | 2 +- tests/queries/0_stateless/00600_replace_running_query.sh | 6 +++--- .../00834_cancel_http_readonly_queries_on_client_close.sh | 2 +- tests/queries/0_stateless/00906_low_cardinality_cache.sql | 2 +- tests/queries/0_stateless/01304_direct_io_long.sh | 4 ++-- tests/queries/0_stateless/02021_exponential_sum.reference | 1 - tests/queries/0_stateless/02021_exponential_sum.sql | 1 - tests/queries/0_stateless/02234_cast_to_ip_address.sql | 2 +- .../0_stateless/02450_kill_distributed_query_deadlock.sh | 2 +- tests/queries/0_stateless/02585_query_status_deadlock.sh | 3 +-- tests/queries/0_stateless/02786_max_execution_time_leaf.sql | 1 + .../0_stateless/02844_subquery_timeout_with_break.sql | 2 +- tests/queries/0_stateless/02916_glogal_in_cancel.sql | 2 +- 16 files changed, 18 insertions(+), 18 deletions(-) diff --git a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql index 07c42d6d039..c158406c0da 100644 --- a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql +++ b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql @@ -7,6 +7,8 @@ system flush logs; drop table if exists logs; create view logs as select * from system.text_log where now() - toIntervalMinute(120) < event_time; +SET max_rows_to_read = 0; + -- Check that we don't have too many messages formatted with fmt::runtime or strings concatenation. -- 0.001 threshold should be always enough, the value was about 0.00025 WITH 0.001 AS threshold diff --git a/tests/queries/0_stateless/00375_shard_group_uniq_array_of_string.sql b/tests/queries/0_stateless/00375_shard_group_uniq_array_of_string.sql index 8a310cb8fc9..f32a64cd30f 100644 --- a/tests/queries/0_stateless/00375_shard_group_uniq_array_of_string.sql +++ b/tests/queries/0_stateless/00375_shard_group_uniq_array_of_string.sql @@ -7,7 +7,7 @@ INSERT INTO group_uniq_str SELECT 2 as id, toString(number % 100) as v FROM syst INSERT INTO group_uniq_str SELECT 5 as id, toString(number % 100) as v FROM system.numbers LIMIT 10000000; SELECT length(groupUniqArray(v)) FROM group_uniq_str GROUP BY id ORDER BY id; -SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{2,3,4,5}', currentDatabase(), 'group_uniq_str') GROUP BY id ORDER BY id; +SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{2,3,4,5}', currentDatabase(), 'group_uniq_str') GROUP BY id ORDER BY id SETTINGS max_rows_to_read = '50M'; SELECT length(groupUniqArray(10)(v)) FROM group_uniq_str GROUP BY id ORDER BY id; SELECT length(groupUniqArray(10000)(v)) FROM group_uniq_str GROUP BY id ORDER BY id; diff --git a/tests/queries/0_stateless/00376_shard_group_uniq_array_of_int_array.sql b/tests/queries/0_stateless/00376_shard_group_uniq_array_of_int_array.sql index abd0e6e6a45..43066880102 100644 --- a/tests/queries/0_stateless/00376_shard_group_uniq_array_of_int_array.sql +++ b/tests/queries/0_stateless/00376_shard_group_uniq_array_of_int_array.sql @@ -6,7 +6,7 @@ CREATE TABLE group_uniq_arr_int ENGINE = Memory AS (SELECT intDiv(number%1000000, 100) as v, intDiv(number%100, 10) as g, number%10 as c FROM system.numbers WHERE c < 3 LIMIT 10000000); SELECT length(groupUniqArray(v)) FROM group_uniq_arr_int GROUP BY id ORDER BY id; -SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{2,3,4,5}', currentDatabase(), 'group_uniq_arr_int') GROUP BY id ORDER BY id; +SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{2,3,4,5}', currentDatabase(), 'group_uniq_arr_int') GROUP BY id ORDER BY id SETTINGS max_rows_to_read = '50M'; SELECT length(groupUniqArray(10)(v)) FROM group_uniq_arr_int GROUP BY id ORDER BY id; SELECT length(groupUniqArray(100000)(v)) FROM group_uniq_arr_int GROUP BY id ORDER BY id; diff --git a/tests/queries/0_stateless/00377_shard_group_uniq_array_of_string_array.sql b/tests/queries/0_stateless/00377_shard_group_uniq_array_of_string_array.sql index e9cfff211f8..1c4376ad577 100644 --- a/tests/queries/0_stateless/00377_shard_group_uniq_array_of_string_array.sql +++ b/tests/queries/0_stateless/00377_shard_group_uniq_array_of_string_array.sql @@ -6,6 +6,6 @@ CREATE TABLE group_uniq_arr_str ENGINE = Memory AS (SELECT intDiv(number%1000000, 100) as v, intDiv(number%100, 10) as g, number%10 as c FROM system.numbers WHERE c < 3 LIMIT 10000000); SELECT length(groupUniqArray(v)) FROM group_uniq_arr_str GROUP BY id ORDER BY id; -SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{2,3,4,5}', currentDatabase(), 'group_uniq_arr_str') GROUP BY id ORDER BY id; +SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{2,3,4,5}', currentDatabase(), 'group_uniq_arr_str') GROUP BY id ORDER BY id SETTINGS max_rows_to_read = '50M'; DROP TABLE IF EXISTS group_uniq_arr_str; diff --git a/tests/queries/0_stateless/00600_replace_running_query.sh b/tests/queries/0_stateless/00600_replace_running_query.sh index 7a71d17f19b..e7022875086 100755 --- a/tests/queries/0_stateless/00600_replace_running_query.sh +++ b/tests/queries/0_stateless/00600_replace_running_query.sh @@ -17,7 +17,7 @@ function wait_for_query_to_start() } -$CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL&query_id=hello&replace_running_query=1" -d 'SELECT 1, count() FROM system.numbers' > /dev/null 2>&1 & +$CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL&query_id=hello&replace_running_query=1&max_rows_to_read=0" -d 'SELECT 1, count() FROM system.numbers' > /dev/null 2>&1 & wait_for_query_to_start 'hello' # Replace it @@ -26,7 +26,7 @@ $CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL&query_id=hello&replace_running_query=1" -d # Wait for it to be replaced wait -${CLICKHOUSE_CLIENT_BINARY} --user=u_00600${TEST_PREFIX} --query_id=42 --query='SELECT 2, count() FROM system.numbers' 2>&1 | grep -cF 'was cancelled' & +${CLICKHOUSE_CLIENT_BINARY} --user=u_00600${TEST_PREFIX} --query_id=42 --max_rows_to_read=0 --query='SELECT 2, count() FROM system.numbers' 2>&1 | grep -cF 'was cancelled' & wait_for_query_to_start '42' # Trying to run another query with the same query_id @@ -38,7 +38,7 @@ $CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL&query_id=42&replace_running_query=1" -d 'S $CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL" -d "KILL QUERY WHERE query_id = '42' SYNC" > /dev/null wait -${CLICKHOUSE_CLIENT} --query_id=42 --query='SELECT 3, count() FROM system.numbers' 2>&1 | grep -cF 'was cancelled' & +${CLICKHOUSE_CLIENT} --query_id=42 --max_rows_to_read=0 --query='SELECT 3, count() FROM system.numbers' 2>&1 | grep -cF 'was cancelled' & wait_for_query_to_start '42' ${CLICKHOUSE_CLIENT} --query_id=42 --replace_running_query=1 --replace_running_query_max_wait_ms=500 --query='SELECT 43' 2>&1 | grep -F "can't be stopped" > /dev/null wait diff --git a/tests/queries/0_stateless/00834_cancel_http_readonly_queries_on_client_close.sh b/tests/queries/0_stateless/00834_cancel_http_readonly_queries_on_client_close.sh index 5c21c70e06a..dd3735f27b1 100755 --- a/tests/queries/0_stateless/00834_cancel_http_readonly_queries_on_client_close.sh +++ b/tests/queries/0_stateless/00834_cancel_http_readonly_queries_on_client_close.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CURL} --max-time 1 -sS "${CLICKHOUSE_URL}&query_id=cancel_http_readonly_queries_on_client_close&cancel_http_readonly_queries_on_client_close=1&query=SELECT+count()+FROM+system.numbers" 2>&1 | grep -cF 'curl: (28)' +${CLICKHOUSE_CURL} --max-time 1 -sS "${CLICKHOUSE_URL}&query_id=cancel_http_readonly_queries_on_client_close&cancel_http_readonly_queries_on_client_close=1&max_rows_to_read=0&query=SELECT+count()+FROM+system.numbers" 2>&1 | grep -cF 'curl: (28)' i=0 retries=300 while [[ $i -lt $retries ]]; do diff --git a/tests/queries/0_stateless/00906_low_cardinality_cache.sql b/tests/queries/0_stateless/00906_low_cardinality_cache.sql index 55eacd0db44..15a53841761 100644 --- a/tests/queries/0_stateless/00906_low_cardinality_cache.sql +++ b/tests/queries/0_stateless/00906_low_cardinality_cache.sql @@ -1,5 +1,5 @@ drop table if exists lc_00906; create table lc_00906 (b LowCardinality(String)) engine=MergeTree order by b SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; -insert into lc_00906 select '0123456789' from numbers(100000000); +insert into lc_00906 select '0123456789' from numbers(100000000) SETTINGS max_rows_to_read = '100M'; select count(), b from lc_00906 group by b; drop table if exists lc_00906; diff --git a/tests/queries/0_stateless/01304_direct_io_long.sh b/tests/queries/0_stateless/01304_direct_io_long.sh index 2e27c2f7728..a66239058ab 100755 --- a/tests/queries/0_stateless/01304_direct_io_long.sh +++ b/tests/queries/0_stateless/01304_direct_io_long.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT --multiquery --query " +$CLICKHOUSE_CLIENT --max_rows_to_read 50M --multiquery " DROP TABLE IF EXISTS bug; CREATE TABLE bug (UserID UInt64, Date Date) ENGINE = MergeTree ORDER BY Date SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi', merge_max_block_size = 8192; @@ -18,5 +18,5 @@ cat "$LOG" | grep Loaded rm "$LOG" -$CLICKHOUSE_CLIENT --multiquery --query " +$CLICKHOUSE_CLIENT --multiquery " DROP TABLE bug;" diff --git a/tests/queries/0_stateless/02021_exponential_sum.reference b/tests/queries/0_stateless/02021_exponential_sum.reference index 5bd77479cf7..c9dcee51173 100644 --- a/tests/queries/0_stateless/02021_exponential_sum.reference +++ b/tests/queries/0_stateless/02021_exponential_sum.reference @@ -5,4 +5,3 @@ 0.0009775171065493646 0.0009775171065493646 0.0009775171065493646 -0.0009775171065493646 diff --git a/tests/queries/0_stateless/02021_exponential_sum.sql b/tests/queries/0_stateless/02021_exponential_sum.sql index 8ab7638029c..62ec7dcf9f1 100644 --- a/tests/queries/0_stateless/02021_exponential_sum.sql +++ b/tests/queries/0_stateless/02021_exponential_sum.sql @@ -6,4 +6,3 @@ WITH number % 10 = 0 AS value, number AS time SELECT exponentialMovingAverage(1) WITH number % 10 = 0 AS value, number AS time SELECT exponentialMovingAverage(1)(value, time) AS exp_smooth FROM numbers_mt(100000); WITH number % 10 = 0 AS value, number AS time SELECT exponentialMovingAverage(1)(value, time) AS exp_smooth FROM numbers_mt(1000000); WITH number % 10 = 0 AS value, number AS time SELECT exponentialMovingAverage(1)(value, time) AS exp_smooth FROM numbers_mt(10000000); -WITH number % 10 = 0 AS value, number AS time SELECT exponentialMovingAverage(1)(value, time) AS exp_smooth FROM numbers_mt(100000000); diff --git a/tests/queries/0_stateless/02234_cast_to_ip_address.sql b/tests/queries/0_stateless/02234_cast_to_ip_address.sql index 28f1afff57f..51e953da905 100644 --- a/tests/queries/0_stateless/02234_cast_to_ip_address.sql +++ b/tests/queries/0_stateless/02234_cast_to_ip_address.sql @@ -67,7 +67,7 @@ SELECT toIPv6('::.1.2.3'); --{serverError CANNOT_PARSE_IPV6} SELECT toIPv6OrDefault('::.1.2.3'); SELECT toIPv6OrNull('::.1.2.3'); -SELECT count() FROM numbers_mt(100000000) WHERE NOT ignore(toIPv6OrZero(randomString(8))); +SELECT count() FROM numbers_mt(20000000) WHERE NOT ignore(toIPv6OrZero(randomString(8))); SELECT '--'; diff --git a/tests/queries/0_stateless/02450_kill_distributed_query_deadlock.sh b/tests/queries/0_stateless/02450_kill_distributed_query_deadlock.sh index 0cd520d8d5d..445f907bcc5 100755 --- a/tests/queries/0_stateless/02450_kill_distributed_query_deadlock.sh +++ b/tests/queries/0_stateless/02450_kill_distributed_query_deadlock.sh @@ -9,7 +9,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # this can trigger a hung/deadlock in ProcessorList. for i in {1..50}; do query_id="$CLICKHOUSE_TEST_UNIQUE_NAME-$i" - $CLICKHOUSE_CLIENT --format Null --query_id "$query_id" -q "select * from remote('127.{1|2|3|4|5|6}', numbers(1e12))" 2>/dev/null & + $CLICKHOUSE_CLIENT --format Null --query_id "$query_id" --max_rows_to_read 0 -q "select * from remote('127.{1|2|3|4|5|6}', numbers(1e12))" 2>/dev/null & while :; do killed_queries="$($CLICKHOUSE_CLIENT -q "kill query where query_id = '$query_id' sync" | wc -l)" if [[ "$killed_queries" -ge 1 ]]; then diff --git a/tests/queries/0_stateless/02585_query_status_deadlock.sh b/tests/queries/0_stateless/02585_query_status_deadlock.sh index e3e34109cdb..6321ac0064a 100755 --- a/tests/queries/0_stateless/02585_query_status_deadlock.sh +++ b/tests/queries/0_stateless/02585_query_status_deadlock.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) QUERY_ID="${CLICKHOUSE_DATABASE}_test_02585_query_to_kill_id_1" -$CLICKHOUSE_CLIENT --query_id="$QUERY_ID" -n -q " +$CLICKHOUSE_CLIENT --query_id="$QUERY_ID" --max_rows_to_read 0 -n -q " create temporary table tmp as select * from numbers(500000000); select * from remote('127.0.0.2', 'system.numbers_mt') where number in (select * from tmp);" &> /dev/null & @@ -23,4 +23,3 @@ do done $CLICKHOUSE_CLIENT -q "kill query where query_id = '$QUERY_ID' sync" &> /dev/null - diff --git a/tests/queries/0_stateless/02786_max_execution_time_leaf.sql b/tests/queries/0_stateless/02786_max_execution_time_leaf.sql index f678c913b46..2e4623f4ac6 100644 --- a/tests/queries/0_stateless/02786_max_execution_time_leaf.sql +++ b/tests/queries/0_stateless/02786_max_execution_time_leaf.sql @@ -1,4 +1,5 @@ -- Tags: no-fasttest +SET max_rows_to_read = 0; SELECT count() FROM cluster('test_cluster_two_shards', view( SELECT * FROM numbers(100000000000) )) SETTINGS max_execution_time_leaf = 1; -- { serverError TIMEOUT_EXCEEDED } -- Can return partial result SELECT count() FROM cluster('test_cluster_two_shards', view( SELECT * FROM numbers(100000000000) )) FORMAT Null SETTINGS max_execution_time_leaf = 1, timeout_overflow_mode_leaf = 'break'; diff --git a/tests/queries/0_stateless/02844_subquery_timeout_with_break.sql b/tests/queries/0_stateless/02844_subquery_timeout_with_break.sql index 511ed0c59de..00b527a9378 100644 --- a/tests/queries/0_stateless/02844_subquery_timeout_with_break.sql +++ b/tests/queries/0_stateless/02844_subquery_timeout_with_break.sql @@ -4,7 +4,7 @@ CREATE TABLE t (key UInt64, value UInt64, INDEX value_idx value TYPE bloom_filte INSERT INTO t SELECT number, rand()%1000 FROM numbers(10000); SET timeout_overflow_mode='break'; -SET max_execution_time=0.1; +SET max_execution_time=0.1, max_rows_to_read=0; SELECT * FROM t WHERE value IN (SELECT number FROM numbers(1000000000)); DROP TABLE t; diff --git a/tests/queries/0_stateless/02916_glogal_in_cancel.sql b/tests/queries/0_stateless/02916_glogal_in_cancel.sql index ad54f1ecdec..dd61795947a 100644 --- a/tests/queries/0_stateless/02916_glogal_in_cancel.sql +++ b/tests/queries/0_stateless/02916_glogal_in_cancel.sql @@ -1,2 +1,2 @@ -set max_execution_time = 0.5, timeout_overflow_mode = 'break'; +set max_execution_time = 0.5, timeout_overflow_mode = 'break', max_rows_to_read = 0; SELECT number FROM remote('127.0.0.{3|2}', numbers(1)) WHERE number GLOBAL IN (SELECT number FROM numbers(10000000000.)) format Null; From 26650dcb2e39b0d28cae4029b6adde2b6c01fda2 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 24 Jul 2024 04:59:13 +0200 Subject: [PATCH 025/180] More limits --- tests/config/install.sh | 2 +- tests/config/users.d/limits.xml | 8 ----- tests/config/users.d/limits.yaml | 57 ++++++++++++++++++++++++++++++++ 3 files changed, 58 insertions(+), 9 deletions(-) delete mode 100644 tests/config/users.d/limits.xml create mode 100644 tests/config/users.d/limits.yaml diff --git a/tests/config/install.sh b/tests/config/install.sh index 265b9248f4a..c5fb3cc92c7 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -93,7 +93,7 @@ ln -sf $SRC_PATH/users.d/prefetch_settings.xml $DEST_SERVER_PATH/users.d/ ln -sf $SRC_PATH/users.d/nonconst_timezone.xml $DEST_SERVER_PATH/users.d/ ln -sf $SRC_PATH/users.d/allow_introspection_functions.yaml $DEST_SERVER_PATH/users.d/ ln -sf $SRC_PATH/users.d/replicated_ddl_entry.xml $DEST_SERVER_PATH/users.d/ -ln -sf $SRC_PATH/users.d/limits.xml $DEST_SERVER_PATH/users.d/ +ln -sf $SRC_PATH/users.d/limits.yaml $DEST_SERVER_PATH/users.d/ if [[ -n "$USE_OLD_ANALYZER" ]] && [[ "$USE_OLD_ANALYZER" -eq 1 ]]; then ln -sf $SRC_PATH/users.d/analyzer.xml $DEST_SERVER_PATH/users.d/ diff --git a/tests/config/users.d/limits.xml b/tests/config/users.d/limits.xml deleted file mode 100644 index f44c73241ab..00000000000 --- a/tests/config/users.d/limits.xml +++ /dev/null @@ -1,8 +0,0 @@ - - - - 5G - 20000000 - - - diff --git a/tests/config/users.d/limits.yaml b/tests/config/users.d/limits.yaml new file mode 100644 index 00000000000..4f3f439a997 --- /dev/null +++ b/tests/config/users.d/limits.yaml @@ -0,0 +1,57 @@ +profiles: + default: + max_memory_usage: 5G + max_rows_to_read: 20000000 + + # Also set every other limit to a high value, so it will not limit anything, but we will test that code around it. + s3_max_get_rps: 1000000 + s3_max_get_burst: 2000000 + s3_max_put_rps: 1000000 + s3_max_put_burst: 2000000 + max_remote_read_network_bandwidth: 1T + max_remote_write_network_bandwidth: 1T + max_local_read_bandwidth: 1T + max_local_write_bandwidth: 1T + use_index_for_in_with_subqueries_max_values: 1G + max_bytes_to_read: 1T + max_bytes_to_read_leaf: 1T + max_rows_to_group_by: 10G + max_bytes_before_external_group_by: 10G + max_rows_to_sort: 10G + max_bytes_to_sort: 10G + max_bytes_before_external_sort: 10G + max_result_rows: 1G + max_result_bytes: 1G + max_execution_time: 600 + max_execution_time_leaf: 600 + max_execution_speed: 100G + max_execution_speed_bytes: 10T + max_estimated_execution_time: 600 + max_columns_to_read: 10K + max_temporary_columns: 10K + max_temporary_non_const_columns: 10K + max_sessions_for_user: 1K + max_rows_in_set: 10G + max_bytes_in_set: 10G + max_rows_in_join: 10G + max_bytes_in_join: 10G + max_rows_in_set_to_optimize_join: 1G + max_rows_to_transfer: 1G + max_bytes_to_transfer: 1G + max_rows_in_distinct: 10G + max_bytes_in_distinct: 10G + max_memory_usage_for_user: 10G + max_network_bandwidth: 100G + max_network_bytes: 1T + max_network_bandwidth_for_user: 100G + max_network_bandwidth_for_all_users: 100G + max_temporary_data_on_disk_size_for_user: 100G + max_temporary_data_on_disk_size_for_query: 100G + max_backup_bandwidth: 100G + max_hyperscan_regexp_length: 1M + max_hyperscan_regexp_total_length: 10M + query_cache_max_size_in_bytes: 10M + query_cache_max_entries: 100K + external_storage_max_read_rows: 10G + external_storage_max_read_bytes: 10G + max_streams_for_merge_tree_reading: 1000 From 63e586d17af1e8a92fc4d2e8af71f1dba4996fea Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 24 Jul 2024 09:57:56 +0200 Subject: [PATCH 026/180] Adjust tests --- tests/config/users.d/limits.yaml | 6 +-- ..._shard_external_sort_distributed.reference | 40 +++++++++---------- .../00111_shard_external_sort_distributed.sql | 2 +- ...00375_shard_group_uniq_array_of_string.sql | 2 +- ...76_shard_group_uniq_array_of_int_array.sql | 2 +- ...shard_group_uniq_array_of_string_array.sql | 2 +- .../00600_replace_running_query.sh | 4 +- .../00601_kill_running_query.reference | 2 +- .../00906_low_cardinality_cache.sql | 3 +- .../01091_query_profiler_does_not_hang.sql | 2 +- .../0_stateless/01293_show_settings.reference | 1 + .../0_stateless/01485_256_bit_multiply.sql | 2 + .../01603_read_with_backoff_bug.sql | 1 + .../01961_roaring_memory_tracking.sql | 2 +- .../02003_memory_limit_in_client.sh | 6 +-- .../02161_addressToLineWithInlines.sql | 2 +- .../02226_analyzer_or_like_combine.sql | 2 + .../02234_cast_to_ip_address.reference | 8 ++-- .../02343_aggregation_pipeline.reference | 6 +-- .../02353_simdjson_buffer_overflow.sql | 1 + .../0_stateless/02372_now_in_block.sql | 1 + .../02536_delta_gorilla_corruption.sql | 2 +- 22 files changed, 54 insertions(+), 45 deletions(-) diff --git a/tests/config/users.d/limits.yaml b/tests/config/users.d/limits.yaml index 4f3f439a997..1c9fff9f4c8 100644 --- a/tests/config/users.d/limits.yaml +++ b/tests/config/users.d/limits.yaml @@ -27,9 +27,9 @@ profiles: max_execution_speed: 100G max_execution_speed_bytes: 10T max_estimated_execution_time: 600 - max_columns_to_read: 10K - max_temporary_columns: 10K - max_temporary_non_const_columns: 10K + max_columns_to_read: 20K + max_temporary_columns: 20K + max_temporary_non_const_columns: 20K max_sessions_for_user: 1K max_rows_in_set: 10G max_bytes_in_set: 10G diff --git a/tests/queries/0_stateless/00111_shard_external_sort_distributed.reference b/tests/queries/0_stateless/00111_shard_external_sort_distributed.reference index df5aa77af60..7534c12a0d8 100644 --- a/tests/queries/0_stateless/00111_shard_external_sort_distributed.reference +++ b/tests/queries/0_stateless/00111_shard_external_sort_distributed.reference @@ -1,20 +1,20 @@ -7040546 -7040546 -4327029 -4327029 -1613512 -1613512 -8947307 -8947307 -6233790 -6233790 -3520273 -3520273 -806756 -806756 -8140551 -8140551 -5427034 -5427034 -2713517 -2713517 +4437158 +4437158 +1723641 +1723641 +3630402 +3630402 +916885 +916885 +2823646 +2823646 +110129 +110129 +4730407 +4730407 +2016890 +2016890 +3923651 +3923651 +1210134 +1210134 diff --git a/tests/queries/0_stateless/00111_shard_external_sort_distributed.sql b/tests/queries/0_stateless/00111_shard_external_sort_distributed.sql index 88a05f59111..ef9c0f9f9d0 100644 --- a/tests/queries/0_stateless/00111_shard_external_sort_distributed.sql +++ b/tests/queries/0_stateless/00111_shard_external_sort_distributed.sql @@ -6,6 +6,6 @@ SET max_bytes_before_external_sort = 10000000; DROP TABLE IF EXISTS numbers10m; CREATE VIEW numbers10m AS SELECT number FROM system.numbers LIMIT 5000000; -SELECT number FROM remote('127.0.0.{2,3}', currentDatabase(), numbers10m) ORDER BY number * 1234567890123456789 LIMIT 19999980, 20; +SELECT number FROM remote('127.0.0.{2,3}', currentDatabase(), numbers10m) ORDER BY number * 1234567890123456789 LIMIT 4999980, 20; DROP TABLE numbers10m; diff --git a/tests/queries/0_stateless/00375_shard_group_uniq_array_of_string.sql b/tests/queries/0_stateless/00375_shard_group_uniq_array_of_string.sql index f32a64cd30f..445ffe66f64 100644 --- a/tests/queries/0_stateless/00375_shard_group_uniq_array_of_string.sql +++ b/tests/queries/0_stateless/00375_shard_group_uniq_array_of_string.sql @@ -7,7 +7,7 @@ INSERT INTO group_uniq_str SELECT 2 as id, toString(number % 100) as v FROM syst INSERT INTO group_uniq_str SELECT 5 as id, toString(number % 100) as v FROM system.numbers LIMIT 10000000; SELECT length(groupUniqArray(v)) FROM group_uniq_str GROUP BY id ORDER BY id; -SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{2,3,4,5}', currentDatabase(), 'group_uniq_str') GROUP BY id ORDER BY id SETTINGS max_rows_to_read = '50M'; +SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{2,3,4,5}', currentDatabase(), 'group_uniq_str') GROUP BY id ORDER BY id SETTINGS max_rows_to_read = '60M'; SELECT length(groupUniqArray(10)(v)) FROM group_uniq_str GROUP BY id ORDER BY id; SELECT length(groupUniqArray(10000)(v)) FROM group_uniq_str GROUP BY id ORDER BY id; diff --git a/tests/queries/0_stateless/00376_shard_group_uniq_array_of_int_array.sql b/tests/queries/0_stateless/00376_shard_group_uniq_array_of_int_array.sql index 43066880102..7593e1e1580 100644 --- a/tests/queries/0_stateless/00376_shard_group_uniq_array_of_int_array.sql +++ b/tests/queries/0_stateless/00376_shard_group_uniq_array_of_int_array.sql @@ -6,7 +6,7 @@ CREATE TABLE group_uniq_arr_int ENGINE = Memory AS (SELECT intDiv(number%1000000, 100) as v, intDiv(number%100, 10) as g, number%10 as c FROM system.numbers WHERE c < 3 LIMIT 10000000); SELECT length(groupUniqArray(v)) FROM group_uniq_arr_int GROUP BY id ORDER BY id; -SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{2,3,4,5}', currentDatabase(), 'group_uniq_arr_int') GROUP BY id ORDER BY id SETTINGS max_rows_to_read = '50M'; +SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{2,3,4,5}', currentDatabase(), 'group_uniq_arr_int') GROUP BY id ORDER BY id SETTINGS max_rows_to_read = '55M'; SELECT length(groupUniqArray(10)(v)) FROM group_uniq_arr_int GROUP BY id ORDER BY id; SELECT length(groupUniqArray(100000)(v)) FROM group_uniq_arr_int GROUP BY id ORDER BY id; diff --git a/tests/queries/0_stateless/00377_shard_group_uniq_array_of_string_array.sql b/tests/queries/0_stateless/00377_shard_group_uniq_array_of_string_array.sql index 1c4376ad577..8b48ee673f3 100644 --- a/tests/queries/0_stateless/00377_shard_group_uniq_array_of_string_array.sql +++ b/tests/queries/0_stateless/00377_shard_group_uniq_array_of_string_array.sql @@ -6,6 +6,6 @@ CREATE TABLE group_uniq_arr_str ENGINE = Memory AS (SELECT intDiv(number%1000000, 100) as v, intDiv(number%100, 10) as g, number%10 as c FROM system.numbers WHERE c < 3 LIMIT 10000000); SELECT length(groupUniqArray(v)) FROM group_uniq_arr_str GROUP BY id ORDER BY id; -SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{2,3,4,5}', currentDatabase(), 'group_uniq_arr_str') GROUP BY id ORDER BY id SETTINGS max_rows_to_read = '50M'; +SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{2,3,4,5}', currentDatabase(), 'group_uniq_arr_str') GROUP BY id ORDER BY id SETTINGS max_rows_to_read = '55M'; DROP TABLE IF EXISTS group_uniq_arr_str; diff --git a/tests/queries/0_stateless/00600_replace_running_query.sh b/tests/queries/0_stateless/00600_replace_running_query.sh index e7022875086..8f21443d589 100755 --- a/tests/queries/0_stateless/00600_replace_running_query.sh +++ b/tests/queries/0_stateless/00600_replace_running_query.sh @@ -8,7 +8,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) TEST_PREFIX=$RANDOM ${CLICKHOUSE_CLIENT} -q "drop user if exists u_00600${TEST_PREFIX}" -${CLICKHOUSE_CLIENT} -q "create user u_00600${TEST_PREFIX} settings max_execution_time=60, readonly=1" +${CLICKHOUSE_CLIENT} -q "create user u_00600${TEST_PREFIX} settings max_execution_time=60, readonly=1, max_rows_to_read=0" ${CLICKHOUSE_CLIENT} -q "grant select on system.numbers to u_00600${TEST_PREFIX}" function wait_for_query_to_start() @@ -26,7 +26,7 @@ $CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL&query_id=hello&replace_running_query=1" -d # Wait for it to be replaced wait -${CLICKHOUSE_CLIENT_BINARY} --user=u_00600${TEST_PREFIX} --query_id=42 --max_rows_to_read=0 --query='SELECT 2, count() FROM system.numbers' 2>&1 | grep -cF 'was cancelled' & +${CLICKHOUSE_CLIENT_BINARY} --user=u_00600${TEST_PREFIX} --query_id=42 --query='SELECT 2, count() FROM system.numbers' 2>&1 | grep -cF 'was cancelled' & wait_for_query_to_start '42' # Trying to run another query with the same query_id diff --git a/tests/queries/0_stateless/00601_kill_running_query.reference b/tests/queries/0_stateless/00601_kill_running_query.reference index 3917ff89482..7824d5804bc 100644 --- a/tests/queries/0_stateless/00601_kill_running_query.reference +++ b/tests/queries/0_stateless/00601_kill_running_query.reference @@ -1 +1 @@ -waiting test_00601_default default SELECT sum(ignore(*)) FROM (SELECT number % 1000 AS k, groupArray(number) FROM numbers(50000000) GROUP BY k) +waiting test_00601_default default SELECT sum(ignore(*)) FROM (SELECT number % 1000 AS k, groupArray(number) FROM numbers(50000000) GROUP BY k) SETTINGS max_rows_to_read = 0 diff --git a/tests/queries/0_stateless/00906_low_cardinality_cache.sql b/tests/queries/0_stateless/00906_low_cardinality_cache.sql index 15a53841761..efd96746dc4 100644 --- a/tests/queries/0_stateless/00906_low_cardinality_cache.sql +++ b/tests/queries/0_stateless/00906_low_cardinality_cache.sql @@ -1,5 +1,6 @@ +SET max_rows_to_read = '100M' drop table if exists lc_00906; create table lc_00906 (b LowCardinality(String)) engine=MergeTree order by b SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; -insert into lc_00906 select '0123456789' from numbers(100000000) SETTINGS max_rows_to_read = '100M'; +insert into lc_00906 select '0123456789' from numbers(100000000); select count(), b from lc_00906 group by b; drop table if exists lc_00906; diff --git a/tests/queries/0_stateless/01091_query_profiler_does_not_hang.sql b/tests/queries/0_stateless/01091_query_profiler_does_not_hang.sql index 21a84bdd691..45f1a00ae23 100644 --- a/tests/queries/0_stateless/01091_query_profiler_does_not_hang.sql +++ b/tests/queries/0_stateless/01091_query_profiler_does_not_hang.sql @@ -1,4 +1,4 @@ -- Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug -SET query_profiler_cpu_time_period_ns = 1; +SET query_profiler_cpu_time_period_ns = 1, max_rows_to_read = 0; SELECT count() FROM numbers_mt(1000000000); diff --git a/tests/queries/0_stateless/01293_show_settings.reference b/tests/queries/0_stateless/01293_show_settings.reference index 9d326f16a3b..8b383813c9f 100644 --- a/tests/queries/0_stateless/01293_show_settings.reference +++ b/tests/queries/0_stateless/01293_show_settings.reference @@ -6,5 +6,6 @@ external_storage_connect_timeout_sec UInt64 10 s3_connect_timeout_ms UInt64 1000 filesystem_prefetch_max_memory_usage UInt64 1073741824 max_memory_usage UInt64 5000000000 +max_memory_usage_for_user UInt64 10000000000 max_untracked_memory UInt64 1048576 memory_profiler_step UInt64 1048576 diff --git a/tests/queries/0_stateless/01485_256_bit_multiply.sql b/tests/queries/0_stateless/01485_256_bit_multiply.sql index 5c8c47c9127..18be2b11599 100644 --- a/tests/queries/0_stateless/01485_256_bit_multiply.sql +++ b/tests/queries/0_stateless/01485_256_bit_multiply.sql @@ -1,5 +1,7 @@ -- Tags: no-random-settings, no-asan, no-msan, no-tsan, no-ubsan, no-debug +SET max_rows_to_read = '100M' + select count() from ( select toInt128(number) * number x, toInt256(number) * number y from numbers_mt(100000000) where x != y diff --git a/tests/queries/0_stateless/01603_read_with_backoff_bug.sql b/tests/queries/0_stateless/01603_read_with_backoff_bug.sql index ec14f637c01..b68d15a2200 100644 --- a/tests/queries/0_stateless/01603_read_with_backoff_bug.sql +++ b/tests/queries/0_stateless/01603_read_with_backoff_bug.sql @@ -3,6 +3,7 @@ set enable_filesystem_cache=0; set enable_filesystem_cache_on_write_operations=0; +set max_rows_to_read = '30M'; drop table if exists t; create table t (x UInt64, s String) engine = MergeTree order by x SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; diff --git a/tests/queries/0_stateless/01961_roaring_memory_tracking.sql b/tests/queries/0_stateless/01961_roaring_memory_tracking.sql index 485c8192f69..79c722bd629 100644 --- a/tests/queries/0_stateless/01961_roaring_memory_tracking.sql +++ b/tests/queries/0_stateless/01961_roaring_memory_tracking.sql @@ -2,5 +2,5 @@ SET max_bytes_before_external_group_by = 0; -SET max_memory_usage = '100M'; +SET max_memory_usage = '100M', max_rows_to_read = '1B'; SELECT cityHash64(rand() % 1000) as n, groupBitmapState(number) FROM numbers_mt(200000000) GROUP BY n FORMAT Null; -- { serverError MEMORY_LIMIT_EXCEEDED } diff --git a/tests/queries/0_stateless/02003_memory_limit_in_client.sh b/tests/queries/0_stateless/02003_memory_limit_in_client.sh index 96028f4847a..94eba8f25be 100755 --- a/tests/queries/0_stateless/02003_memory_limit_in_client.sh +++ b/tests/queries/0_stateless/02003_memory_limit_in_client.sh @@ -4,11 +4,11 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -$CLICKHOUSE_CLIENT --max_memory_usage_in_client=1 -n -q "SELECT arrayMap(x -> range(x), range(number)) FROM numbers(1000) -- { clientError MEMORY_LIMIT_EXCEEDED }" +$CLICKHOUSE_CLIENT --max_result_bytes 0 --max_memory_usage_in_client=1 -n -q "SELECT arrayMap(x -> range(x), range(number)) FROM numbers(1000) -- { clientError MEMORY_LIMIT_EXCEEDED }" $CLICKHOUSE_CLIENT --max_memory_usage_in_client=0 -n -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000" -$CLICKHOUSE_CLIENT --max_memory_usage_in_client='5K' -n -q "SELECT arrayMap(x -> range(x), range(number)) FROM numbers(1000) -- { clientError MEMORY_LIMIT_EXCEEDED }" -$CLICKHOUSE_CLIENT --max_memory_usage_in_client='5k' -n -q "SELECT arrayMap(x -> range(x), range(number)) FROM numbers(1000) -- { clientError MEMORY_LIMIT_EXCEEDED }" +$CLICKHOUSE_CLIENT --max_result_bytes 0 --max_memory_usage_in_client='5K' -n -q "SELECT arrayMap(x -> range(x), range(number)) FROM numbers(1000) -- { clientError MEMORY_LIMIT_EXCEEDED }" +$CLICKHOUSE_CLIENT --max_result_bytes 0 --max_memory_usage_in_client='5k' -n -q "SELECT arrayMap(x -> range(x), range(number)) FROM numbers(1000) -- { clientError MEMORY_LIMIT_EXCEEDED }" $CLICKHOUSE_CLIENT --max_memory_usage_in_client='1M' -n -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000" $CLICKHOUSE_CLIENT --max_memory_usage_in_client='23G' -n -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000" $CLICKHOUSE_CLIENT --max_memory_usage_in_client='11T' -n -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000" diff --git a/tests/queries/0_stateless/02161_addressToLineWithInlines.sql b/tests/queries/0_stateless/02161_addressToLineWithInlines.sql index cf400ed34c5..d7ce133f38c 100644 --- a/tests/queries/0_stateless/02161_addressToLineWithInlines.sql +++ b/tests/queries/0_stateless/02161_addressToLineWithInlines.sql @@ -6,7 +6,7 @@ SELECT addressToLineWithInlines(1); -- { serverError FUNCTION_NOT_ALLOWED } SET allow_introspection_functions = 1; SET query_profiler_real_time_period_ns = 0; SET query_profiler_cpu_time_period_ns = 1000000; -SET log_queries = 1; +SET log_queries = 1, max_rows_to_read = 0; SELECT count() FROM numbers_mt(10000000000) SETTINGS log_comment='02161_test_case'; SET log_queries = 0; SET query_profiler_cpu_time_period_ns = 0; diff --git a/tests/queries/0_stateless/02226_analyzer_or_like_combine.sql b/tests/queries/0_stateless/02226_analyzer_or_like_combine.sql index fbebfc6d281..1cd6b8a4e4d 100644 --- a/tests/queries/0_stateless/02226_analyzer_or_like_combine.sql +++ b/tests/queries/0_stateless/02226_analyzer_or_like_combine.sql @@ -1,3 +1,5 @@ +SET allow_hyperscan = 1, max_hyperscan_regexp_length = 0, max_hyperscan_regexp_total_length = 0; + EXPLAIN SYNTAX SELECT materialize('Привет, World') AS s WHERE (s LIKE 'hell%') OR (s ILIKE '%привет%') OR (s ILIKE 'world%') SETTINGS optimize_or_like_chain = 0; EXPLAIN QUERY TREE run_passes=1 SELECT materialize('Привет, World') AS s WHERE (s LIKE 'hell%') OR (s ILIKE '%привет%') OR (s ILIKE 'world%') SETTINGS optimize_or_like_chain = 0, allow_experimental_analyzer = 1; EXPLAIN SYNTAX SELECT materialize('Привет, World') AS s WHERE (s LIKE 'hell%') OR (s ILIKE '%привет%') OR (s ILIKE 'world%') SETTINGS optimize_or_like_chain = 1; diff --git a/tests/queries/0_stateless/02234_cast_to_ip_address.reference b/tests/queries/0_stateless/02234_cast_to_ip_address.reference index fa9c6bd0f94..3dd306477b9 100644 --- a/tests/queries/0_stateless/02234_cast_to_ip_address.reference +++ b/tests/queries/0_stateless/02234_cast_to_ip_address.reference @@ -26,9 +26,9 @@ IPv4 functions IPv6 functions \0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0 \N -\0\0\0\0\0\0\0\0\0\0\0\0 -\0\0\0\0\0\0\0\0\0\0\0\0 -\0\0\0\0\0\0\0\0\0\0\0\0 +\0\0\0\0\0\0\0\0\0\0��\0\0 +\0\0\0\0\0\0\0\0\0\0��\0\0 +\0\0\0\0\0\0\0\0\0\0��\0\0 -- :: \N @@ -37,7 +37,7 @@ IPv6 functions ::ffff:127.0.0.1 :: \N -100000000 +20000000 -- ::ffff:127.0.0.1 -- diff --git a/tests/queries/0_stateless/02343_aggregation_pipeline.reference b/tests/queries/0_stateless/02343_aggregation_pipeline.reference index bf61eb6da0a..eb013200a17 100644 --- a/tests/queries/0_stateless/02343_aggregation_pipeline.reference +++ b/tests/queries/0_stateless/02343_aggregation_pipeline.reference @@ -1,6 +1,6 @@ -- { echoOn } -explain pipeline select * from (select * from numbers(1e8) group by number) group by number; +explain pipeline select * from (select * from numbers(1e8) group by number) group by number settings max_rows_to_read = 0; (Expression) ExpressionTransform × 16 (Aggregating) @@ -16,7 +16,7 @@ ExpressionTransform × 16 ExpressionTransform (ReadFromSystemNumbers) NumbersRange 0 → 1 -explain pipeline select * from (select * from numbers_mt(1e8) group by number) group by number; +explain pipeline select * from (select * from numbers_mt(1e8) group by number) group by number settings max_rows_to_read = 0; (Expression) ExpressionTransform × 16 (Aggregating) @@ -32,7 +32,7 @@ ExpressionTransform × 16 ExpressionTransform × 16 (ReadFromSystemNumbers) NumbersRange × 16 0 → 1 -explain pipeline select * from (select * from numbers_mt(1e8) group by number) order by number; +explain pipeline select * from (select * from numbers_mt(1e8) group by number) order by number settings max_rows_to_read = 0; (Expression) ExpressionTransform (Sorting) diff --git a/tests/queries/0_stateless/02353_simdjson_buffer_overflow.sql b/tests/queries/0_stateless/02353_simdjson_buffer_overflow.sql index b324f834053..e7c6c272102 100644 --- a/tests/queries/0_stateless/02353_simdjson_buffer_overflow.sql +++ b/tests/queries/0_stateless/02353_simdjson_buffer_overflow.sql @@ -2,5 +2,6 @@ SET max_execution_time = 3; SET timeout_overflow_mode = 'break'; +SET max_rows_to_read = 0, max_bytes_to_read = 0; SELECT count() FROM system.numbers_mt WHERE NOT ignore(JSONExtract('{' || repeat('"a":"b",', rand() % 10) || '"c":"d"}', 'a', 'String')) FORMAT Null; diff --git a/tests/queries/0_stateless/02372_now_in_block.sql b/tests/queries/0_stateless/02372_now_in_block.sql index aee4572ce8d..d0aec471801 100644 --- a/tests/queries/0_stateless/02372_now_in_block.sql +++ b/tests/queries/0_stateless/02372_now_in_block.sql @@ -1,3 +1,4 @@ +SET max_rows_to_read = 0, max_bytes_to_read = 0; SELECT count() FROM (SELECT DISTINCT nowInBlock(), nowInBlock('Pacific/Pitcairn') FROM system.numbers LIMIT 2); SELECT nowInBlock(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT nowInBlock(NULL) IS NULL; diff --git a/tests/queries/0_stateless/02536_delta_gorilla_corruption.sql b/tests/queries/0_stateless/02536_delta_gorilla_corruption.sql index a4e0965e329..3accc726d08 100644 --- a/tests/queries/0_stateless/02536_delta_gorilla_corruption.sql +++ b/tests/queries/0_stateless/02536_delta_gorilla_corruption.sql @@ -12,7 +12,7 @@ create table bug_delta_gorilla (value_bug UInt64 codec (Delta, Gorilla)) engine = MergeTree order by tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi' -as (select 0 from numbers(30000000)); +as (select 0 from numbers(20000000)); select count(*) from bug_delta_gorilla From c2238c57231fe86883eb9b9a7042a72d28d31eae Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 24 Jul 2024 10:45:33 +0200 Subject: [PATCH 027/180] Fix tests --- .../0_stateless/00375_shard_group_uniq_array_of_string.sql | 2 +- .../00376_shard_group_uniq_array_of_int_array.sql | 4 +++- .../00377_shard_group_uniq_array_of_string_array.sql | 3 ++- tests/queries/0_stateless/00906_low_cardinality_cache.sql | 2 +- tests/queries/0_stateless/01485_256_bit_multiply.sql | 2 +- tests/queries/0_stateless/01961_roaring_memory_tracking.sql | 2 +- .../queries/0_stateless/02234_cast_to_ip_address.reference | 6 +++--- tests/queries/0_stateless/02234_cast_to_ip_address.sql | 2 +- 8 files changed, 13 insertions(+), 10 deletions(-) diff --git a/tests/queries/0_stateless/00375_shard_group_uniq_array_of_string.sql b/tests/queries/0_stateless/00375_shard_group_uniq_array_of_string.sql index 445ffe66f64..8db91904a6a 100644 --- a/tests/queries/0_stateless/00375_shard_group_uniq_array_of_string.sql +++ b/tests/queries/0_stateless/00375_shard_group_uniq_array_of_string.sql @@ -7,7 +7,7 @@ INSERT INTO group_uniq_str SELECT 2 as id, toString(number % 100) as v FROM syst INSERT INTO group_uniq_str SELECT 5 as id, toString(number % 100) as v FROM system.numbers LIMIT 10000000; SELECT length(groupUniqArray(v)) FROM group_uniq_str GROUP BY id ORDER BY id; -SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{2,3,4,5}', currentDatabase(), 'group_uniq_str') GROUP BY id ORDER BY id SETTINGS max_rows_to_read = '60M'; +SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{2,3,4,5}', currentDatabase(), 'group_uniq_str') GROUP BY id ORDER BY id SETTINGS max_rows_to_read = '100M'; SELECT length(groupUniqArray(10)(v)) FROM group_uniq_str GROUP BY id ORDER BY id; SELECT length(groupUniqArray(10000)(v)) FROM group_uniq_str GROUP BY id ORDER BY id; diff --git a/tests/queries/0_stateless/00376_shard_group_uniq_array_of_int_array.sql b/tests/queries/0_stateless/00376_shard_group_uniq_array_of_int_array.sql index 7593e1e1580..24b7f1c30a6 100644 --- a/tests/queries/0_stateless/00376_shard_group_uniq_array_of_int_array.sql +++ b/tests/queries/0_stateless/00376_shard_group_uniq_array_of_int_array.sql @@ -1,12 +1,14 @@ -- Tags: shard +SET max_rows_to_read = '55M'; + DROP TABLE IF EXISTS group_uniq_arr_int; CREATE TABLE group_uniq_arr_int ENGINE = Memory AS SELECT g as id, if(c == 0, [v], if(c == 1, emptyArrayInt64(), [v, v])) as v FROM (SELECT intDiv(number%1000000, 100) as v, intDiv(number%100, 10) as g, number%10 as c FROM system.numbers WHERE c < 3 LIMIT 10000000); SELECT length(groupUniqArray(v)) FROM group_uniq_arr_int GROUP BY id ORDER BY id; -SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{2,3,4,5}', currentDatabase(), 'group_uniq_arr_int') GROUP BY id ORDER BY id SETTINGS max_rows_to_read = '55M'; +SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{2,3,4,5}', currentDatabase(), 'group_uniq_arr_int') GROUP BY id ORDER BY id; SELECT length(groupUniqArray(10)(v)) FROM group_uniq_arr_int GROUP BY id ORDER BY id; SELECT length(groupUniqArray(100000)(v)) FROM group_uniq_arr_int GROUP BY id ORDER BY id; diff --git a/tests/queries/0_stateless/00377_shard_group_uniq_array_of_string_array.sql b/tests/queries/0_stateless/00377_shard_group_uniq_array_of_string_array.sql index 8b48ee673f3..180a6a04861 100644 --- a/tests/queries/0_stateless/00377_shard_group_uniq_array_of_string_array.sql +++ b/tests/queries/0_stateless/00377_shard_group_uniq_array_of_string_array.sql @@ -1,4 +1,5 @@ -- Tags: shard +SET max_rows_to_read = '55M'; DROP TABLE IF EXISTS group_uniq_arr_str; CREATE TABLE group_uniq_arr_str ENGINE = Memory AS @@ -6,6 +7,6 @@ CREATE TABLE group_uniq_arr_str ENGINE = Memory AS (SELECT intDiv(number%1000000, 100) as v, intDiv(number%100, 10) as g, number%10 as c FROM system.numbers WHERE c < 3 LIMIT 10000000); SELECT length(groupUniqArray(v)) FROM group_uniq_arr_str GROUP BY id ORDER BY id; -SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{2,3,4,5}', currentDatabase(), 'group_uniq_arr_str') GROUP BY id ORDER BY id SETTINGS max_rows_to_read = '55M'; +SELECT length(groupUniqArray(v)) FROM remote('127.0.0.{2,3,4,5}', currentDatabase(), 'group_uniq_arr_str') GROUP BY id ORDER BY id; DROP TABLE IF EXISTS group_uniq_arr_str; diff --git a/tests/queries/0_stateless/00906_low_cardinality_cache.sql b/tests/queries/0_stateless/00906_low_cardinality_cache.sql index efd96746dc4..9c1abe1b6df 100644 --- a/tests/queries/0_stateless/00906_low_cardinality_cache.sql +++ b/tests/queries/0_stateless/00906_low_cardinality_cache.sql @@ -1,4 +1,4 @@ -SET max_rows_to_read = '100M' +SET max_rows_to_read = '100M'; drop table if exists lc_00906; create table lc_00906 (b LowCardinality(String)) engine=MergeTree order by b SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into lc_00906 select '0123456789' from numbers(100000000); diff --git a/tests/queries/0_stateless/01485_256_bit_multiply.sql b/tests/queries/0_stateless/01485_256_bit_multiply.sql index 18be2b11599..a4e99d51970 100644 --- a/tests/queries/0_stateless/01485_256_bit_multiply.sql +++ b/tests/queries/0_stateless/01485_256_bit_multiply.sql @@ -1,6 +1,6 @@ -- Tags: no-random-settings, no-asan, no-msan, no-tsan, no-ubsan, no-debug -SET max_rows_to_read = '100M' +SET max_rows_to_read = '100M'; select count() from ( diff --git a/tests/queries/0_stateless/01961_roaring_memory_tracking.sql b/tests/queries/0_stateless/01961_roaring_memory_tracking.sql index 79c722bd629..22eb8e887f2 100644 --- a/tests/queries/0_stateless/01961_roaring_memory_tracking.sql +++ b/tests/queries/0_stateless/01961_roaring_memory_tracking.sql @@ -2,5 +2,5 @@ SET max_bytes_before_external_group_by = 0; -SET max_memory_usage = '100M', max_rows_to_read = '1B'; +SET max_memory_usage = '100M', max_rows_to_read = '1G'; SELECT cityHash64(rand() % 1000) as n, groupBitmapState(number) FROM numbers_mt(200000000) GROUP BY n FORMAT Null; -- { serverError MEMORY_LIMIT_EXCEEDED } diff --git a/tests/queries/0_stateless/02234_cast_to_ip_address.reference b/tests/queries/0_stateless/02234_cast_to_ip_address.reference index 3dd306477b9..b9f0a49ec4d 100644 --- a/tests/queries/0_stateless/02234_cast_to_ip_address.reference +++ b/tests/queries/0_stateless/02234_cast_to_ip_address.reference @@ -26,9 +26,9 @@ IPv4 functions IPv6 functions \0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0 \N -\0\0\0\0\0\0\0\0\0\0��\0\0 -\0\0\0\0\0\0\0\0\0\0��\0\0 -\0\0\0\0\0\0\0\0\0\0��\0\0 +\0\0\0\0\0\0\0\0\0\0\0\0 +\0\0\0\0\0\0\0\0\0\0\0\0 +\0\0\0\0\0\0\0\0\0\0\0\0 -- :: \N diff --git a/tests/queries/0_stateless/02234_cast_to_ip_address.sql b/tests/queries/0_stateless/02234_cast_to_ip_address.sql index 51e953da905..c851cfde927 100644 --- a/tests/queries/0_stateless/02234_cast_to_ip_address.sql +++ b/tests/queries/0_stateless/02234_cast_to_ip_address.sql @@ -71,7 +71,7 @@ SELECT count() FROM numbers_mt(20000000) WHERE NOT ignore(toIPv6OrZero(randomStr SELECT '--'; -SELECT cast('test' , 'IPv6'); --{serverError CANNOT_PARSE_IPV6} +SELECT cast('test' , 'IPv6'); -- { serverError CANNOT_PARSE_IPV6 } SELECT cast('::ffff:127.0.0.1', 'IPv6'); SELECT '--'; From 2106d4769ac4fca6604dec3b66831aef4b12e943 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 24 Jul 2024 10:53:28 +0200 Subject: [PATCH 028/180] Fix tests --- tests/config/users.d/limits.yaml | 1 - tests/queries/0_stateless/02346_additional_filters.sql | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/config/users.d/limits.yaml b/tests/config/users.d/limits.yaml index 1c9fff9f4c8..23aaccf9298 100644 --- a/tests/config/users.d/limits.yaml +++ b/tests/config/users.d/limits.yaml @@ -30,7 +30,6 @@ profiles: max_columns_to_read: 20K max_temporary_columns: 20K max_temporary_non_const_columns: 20K - max_sessions_for_user: 1K max_rows_in_set: 10G max_bytes_in_set: 10G max_rows_in_join: 10G diff --git a/tests/queries/0_stateless/02346_additional_filters.sql b/tests/queries/0_stateless/02346_additional_filters.sql index f6b665713ec..5a799e1c8c1 100644 --- a/tests/queries/0_stateless/02346_additional_filters.sql +++ b/tests/queries/0_stateless/02346_additional_filters.sql @@ -4,6 +4,8 @@ drop table if exists table_2; drop table if exists v_numbers; drop table if exists mv_table; +SET max_rows_to_read = 0; + create table table_1 (x UInt32, y String) engine = MergeTree order by x; insert into table_1 values (1, 'a'), (2, 'bb'), (3, 'ccc'), (4, 'dddd'); From 878a340317863e94aec61476e105342aef997c7b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 24 Jul 2024 11:47:07 +0200 Subject: [PATCH 029/180] Fix tests --- docker/test/stateful/run.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh index 304bfd7b533..5532df40fdf 100755 --- a/docker/test/stateful/run.sh +++ b/docker/test/stateful/run.sh @@ -200,7 +200,8 @@ else clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits" fi clickhouse-client --query "CREATE TABLE test.hits_s3 (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'" - clickhouse-client --max_memory_usage 10G --query "INSERT INTO test.hits_s3 SELECT * FROM test.hits SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16" + # AWS S3 is very inefficient, so increase memory even further: + clickhouse-client --max_memory_usage 20G --query "INSERT INTO test.hits_s3 SELECT * FROM test.hits SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16" fi clickhouse-client --query "SHOW TABLES FROM test" From 036485a657a35d764ad33f912d0d10b37d05e59b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 24 Jul 2024 12:15:48 +0200 Subject: [PATCH 030/180] Fix error --- docker/test/stateful/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh index 5532df40fdf..bd2e38ff00f 100755 --- a/docker/test/stateful/run.sh +++ b/docker/test/stateful/run.sh @@ -201,7 +201,7 @@ else fi clickhouse-client --query "CREATE TABLE test.hits_s3 (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'" # AWS S3 is very inefficient, so increase memory even further: - clickhouse-client --max_memory_usage 20G --query "INSERT INTO test.hits_s3 SELECT * FROM test.hits SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16" + clickhouse-client --max_memory_usage 20G --max_memory_usage_for_user 20G --query "INSERT INTO test.hits_s3 SELECT * FROM test.hits SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16" fi clickhouse-client --query "SHOW TABLES FROM test" From e2c78844a005aa5c04e454df6ff7e0508c967d18 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 24 Jul 2024 14:53:09 +0200 Subject: [PATCH 031/180] Fix tests --- docker/test/stateful/run.sh | 2 +- tests/queries/0_stateless/02177_issue_31009.sql | 2 ++ .../00088_global_in_one_shard_and_rows_before_limit.sql | 2 +- tests/queries/1_stateful/00147_global_in_aggregate_function.sql | 2 +- tests/queries/1_stateful/00167_read_bytes_from_fs.sql | 2 +- .../queries/1_stateful/00182_simple_squashing_transform_bug.sql | 2 +- 6 files changed, 7 insertions(+), 5 deletions(-) diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh index bd2e38ff00f..fde8b8ae529 100755 --- a/docker/test/stateful/run.sh +++ b/docker/test/stateful/run.sh @@ -201,7 +201,7 @@ else fi clickhouse-client --query "CREATE TABLE test.hits_s3 (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'" # AWS S3 is very inefficient, so increase memory even further: - clickhouse-client --max_memory_usage 20G --max_memory_usage_for_user 20G --query "INSERT INTO test.hits_s3 SELECT * FROM test.hits SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16" + clickhouse-client --max_memory_usage 30G --max_memory_usage_for_user 30G --query "INSERT INTO test.hits_s3 SELECT * FROM test.hits SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16" fi clickhouse-client --query "SHOW TABLES FROM test" diff --git a/tests/queries/0_stateless/02177_issue_31009.sql b/tests/queries/0_stateless/02177_issue_31009.sql index f25df59f4b4..5c62b5a9c2f 100644 --- a/tests/queries/0_stateless/02177_issue_31009.sql +++ b/tests/queries/0_stateless/02177_issue_31009.sql @@ -8,6 +8,8 @@ DROP TABLE IF EXISTS right; CREATE TABLE left ( key UInt32, value String ) ENGINE = MergeTree ORDER BY key SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; CREATE TABLE right ( key UInt32, value String ) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; +SET max_rows_to_read = '50M'; + INSERT INTO left SELECT number, toString(number) FROM numbers(25367182); INSERT INTO right SELECT number, toString(number) FROM numbers(23124707); diff --git a/tests/queries/1_stateful/00088_global_in_one_shard_and_rows_before_limit.sql b/tests/queries/1_stateful/00088_global_in_one_shard_and_rows_before_limit.sql index 443808e7bed..8f18f3740e4 100644 --- a/tests/queries/1_stateful/00088_global_in_one_shard_and_rows_before_limit.sql +++ b/tests/queries/1_stateful/00088_global_in_one_shard_and_rows_before_limit.sql @@ -1,4 +1,4 @@ -- Tags: shard -SET output_format_write_statistics = 0, max_rows_to_read = 20_000_000; +SET output_format_write_statistics = 0, max_rows_to_read = 50_000_000; SELECT EventDate, count() FROM remote('127.0.0.1', test.hits) WHERE UserID GLOBAL IN (SELECT UserID FROM test.hits) GROUP BY EventDate ORDER BY EventDate LIMIT 5 FORMAT JSONCompact; diff --git a/tests/queries/1_stateful/00147_global_in_aggregate_function.sql b/tests/queries/1_stateful/00147_global_in_aggregate_function.sql index c156f073573..f0b249e9af4 100644 --- a/tests/queries/1_stateful/00147_global_in_aggregate_function.sql +++ b/tests/queries/1_stateful/00147_global_in_aggregate_function.sql @@ -1,5 +1,5 @@ -- Tags: global -SET max_rows_to_read = 40_000_000; +SET max_rows_to_read = 100_000_000; SELECT sum(UserID GLOBAL IN (SELECT UserID FROM remote('127.0.0.{1,2}', test.hits))) FROM remote('127.0.0.{1,2}', test.hits); SELECT sum(UserID GLOBAL IN (SELECT UserID FROM test.hits)) FROM remote('127.0.0.{1,2}', test.hits); diff --git a/tests/queries/1_stateful/00167_read_bytes_from_fs.sql b/tests/queries/1_stateful/00167_read_bytes_from_fs.sql index 1a98a531067..184a8edcbcb 100644 --- a/tests/queries/1_stateful/00167_read_bytes_from_fs.sql +++ b/tests/queries/1_stateful/00167_read_bytes_from_fs.sql @@ -1,6 +1,6 @@ -- Tags: no-random-settings -SET max_memory_usage = '10G' +SET max_memory_usage = '10G'; SELECT sum(cityHash64(*)) FROM test.hits SETTINGS max_threads=40; -- We had a bug which lead to additional compressed data read. test.hits compressed size is about 1.2Gb, but we read more then 3Gb. diff --git a/tests/queries/1_stateful/00182_simple_squashing_transform_bug.sql b/tests/queries/1_stateful/00182_simple_squashing_transform_bug.sql index 85bad651090..26e112cff04 100644 --- a/tests/queries/1_stateful/00182_simple_squashing_transform_bug.sql +++ b/tests/queries/1_stateful/00182_simple_squashing_transform_bug.sql @@ -1,7 +1,7 @@ -- Tags: global set allow_prefetched_read_pool_for_remote_filesystem=0, merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability=0, max_threads=2, max_block_size=65387; -set max_rows_to_read = '20M'; +set max_rows_to_read = '100M'; SELECT sum(UserID GLOBAL IN (SELECT UserID FROM remote('127.0.0.{1,2}', test.hits))) FROM remote('127.0.0.{1,2}', test.hits); SELECT sum(UserID GLOBAL IN (SELECT UserID FROM test.hits)) FROM remote('127.0.0.{1,2}', test.hits); From 6725546b312ef52675f8fbd5f41d0ef5327a3e8a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 25 Jul 2024 19:35:21 +0200 Subject: [PATCH 032/180] Update some tests --- .../02884_parallel_window_functions.sql | 18 ++++++++++-------- .../03143_asof_join_ddb_long.reference | 4 ++-- .../0_stateless/03143_asof_join_ddb_long.sql | 4 ++-- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/tests/queries/0_stateless/02884_parallel_window_functions.sql b/tests/queries/0_stateless/02884_parallel_window_functions.sql index c5ab013a198..ea1cd458c65 100644 --- a/tests/queries/0_stateless/02884_parallel_window_functions.sql +++ b/tests/queries/0_stateless/02884_parallel_window_functions.sql @@ -1,6 +1,6 @@ -- Tags: long, no-tsan, no-asan, no-ubsan, no-msan, no-debug -CREATE TABLE window_funtion_threading +CREATE TABLE window_function_threading Engine = MergeTree ORDER BY (ac, nw) AS SELECT @@ -20,7 +20,7 @@ FROM AVG(wg) AS WR, ac, nw - FROM window_funtion_threading + FROM window_function_threading GROUP BY ac, nw ) GROUP BY nw @@ -40,7 +40,7 @@ FROM AVG(wg) AS WR, ac, nw - FROM window_funtion_threading + FROM window_function_threading GROUP BY ac, nw ) GROUP BY nw @@ -58,7 +58,7 @@ FROM AVG(wg) AS WR, ac, nw - FROM window_funtion_threading + FROM window_function_threading GROUP BY ac, nw ) GROUP BY nw @@ -66,6 +66,8 @@ ORDER BY nw ASC, R DESC LIMIT 10 SETTINGS max_threads = 1; +SET max_rows_to_read = 30000000; + SELECT nw, sum(WR) AS R, @@ -77,7 +79,7 @@ FROM AVG(wg) AS WR, ac, nw - FROM window_funtion_threading + FROM window_function_threading WHERE (ac % 4) = 0 GROUP BY ac, @@ -88,7 +90,7 @@ FROM AVG(wg) AS WR, ac, nw - FROM window_funtion_threading + FROM window_function_threading WHERE (ac % 4) = 1 GROUP BY ac, @@ -99,7 +101,7 @@ FROM AVG(wg) AS WR, ac, nw - FROM window_funtion_threading + FROM window_function_threading WHERE (ac % 4) = 2 GROUP BY ac, @@ -110,7 +112,7 @@ FROM AVG(wg) AS WR, ac, nw - FROM window_funtion_threading + FROM window_function_threading WHERE (ac % 4) = 3 GROUP BY ac, diff --git a/tests/queries/0_stateless/03143_asof_join_ddb_long.reference b/tests/queries/0_stateless/03143_asof_join_ddb_long.reference index 2850a8aba98..ae7f7c805f2 100644 --- a/tests/queries/0_stateless/03143_asof_join_ddb_long.reference +++ b/tests/queries/0_stateless/03143_asof_join_ddb_long.reference @@ -1,2 +1,2 @@ -49999983751397 10000032 -49999983751397 10000032 +7999995751397 4000032 +7999995751397 4000032 diff --git a/tests/queries/0_stateless/03143_asof_join_ddb_long.sql b/tests/queries/0_stateless/03143_asof_join_ddb_long.sql index 17a67511030..a635fd2e86a 100644 --- a/tests/queries/0_stateless/03143_asof_join_ddb_long.sql +++ b/tests/queries/0_stateless/03143_asof_join_ddb_long.sql @@ -11,7 +11,7 @@ AS toDateTime('1990-03-21 13:00:00') + INTERVAL number MINUTE AS begin, number % 4 AS key, number AS value - FROM numbers(0, 10000000); + FROM numbers(0, 4000000); CREATE TABLE skewed_probe ENGINE = MergeTree ORDER BY (key, begin) AS @@ -33,7 +33,7 @@ AS SELECT toDateTime('1990-03-21 13:00:01') + INTERVAL number MINUTE AS begin, 3 AS key - FROM numbers(0, 10000000); + FROM numbers(0, 4000000); SELECT SUM(value), COUNT(*) From a7441669aa87b1551d2211ec4c3e550aaaa86b41 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 25 Jul 2024 21:43:03 +0200 Subject: [PATCH 033/180] Update some tests --- ...675_profile_events_from_query_log_and_client.sh | 2 +- .../02884_parallel_window_functions.sql | 2 +- ...967_parallel_replicas_join_algo_and_analyzer.sh | 14 +++++++------- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/queries/0_stateless/02675_profile_events_from_query_log_and_client.sh b/tests/queries/0_stateless/02675_profile_events_from_query_log_and_client.sh index e346d9893a7..1cf65ed8120 100755 --- a/tests/queries/0_stateless/02675_profile_events_from_query_log_and_client.sh +++ b/tests/queries/0_stateless/02675_profile_events_from_query_log_and_client.sh @@ -9,7 +9,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) echo "INSERT TO S3" $CLICKHOUSE_CLIENT --print-profile-events --profile-events-delay-ms=-1 -nq " INSERT INTO TABLE FUNCTION s3('http://localhost:11111/test/profile_events.csv', 'test', 'testtest', 'CSV', 'number UInt64') SELECT number FROM numbers(1000000) SETTINGS s3_max_single_part_upload_size = 10, s3_truncate_on_insert = 1; -" 2>&1 | grep -o -e '\ \[\ .*\ \]\ S3.*:\ .*\ ' | grep -v 'Microseconds' | grep -v 'S3DiskConnections' | grep -v 'S3DiskAddresses' | sort +" 2>&1 | grep -o -e '\ \[\ .*\ \]\ S3.*:\ .*\ ' | grep -v 'Microseconds' | grep -v 'S3DiskConnections' | grep -v 'S3DiskAddresses' | grep -v 'RequestThrottlerCount' | sort echo "CHECK WITH query_log" $CLICKHOUSE_CLIENT -nq " diff --git a/tests/queries/0_stateless/02884_parallel_window_functions.sql b/tests/queries/0_stateless/02884_parallel_window_functions.sql index ea1cd458c65..2207c90a4ee 100644 --- a/tests/queries/0_stateless/02884_parallel_window_functions.sql +++ b/tests/queries/0_stateless/02884_parallel_window_functions.sql @@ -66,7 +66,7 @@ ORDER BY nw ASC, R DESC LIMIT 10 SETTINGS max_threads = 1; -SET max_rows_to_read = 30000000; +SET max_rows_to_read = 40000000; SELECT nw, diff --git a/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.sh b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.sh index 2840482da6d..8cefa873940 100755 --- a/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.sh +++ b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.sh @@ -90,7 +90,7 @@ $CLICKHOUSE_CLIENT -q " select * from (select key, value from num_1) l inner join (select key, value from num_2) r on l.key = r.key order by l.key limit 10 offset 700000 -SETTINGS allow_experimental_analyzer=1, join_algorithm='full_sorting_merge', send_logs_level='trace', +SETTINGS max_rows_to_read=0, allow_experimental_analyzer=1, join_algorithm='full_sorting_merge', send_logs_level='trace', allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=1" 2>&1 | grep "executeQuery\|.*Coordinator: Coordination done" | @@ -128,7 +128,7 @@ select * from (select key, value from num_1) l inner join (select key, value from num_2 inner join (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings parallel_replicas_prefer_local_join=1) r on l.key = r.key order by l.key limit 10 offset 10000 -SETTINGS allow_experimental_analyzer=1, join_algorithm='full_sorting_merge', send_logs_level='trace', +SETTINGS max_rows_to_read=0, allow_experimental_analyzer=1, join_algorithm='full_sorting_merge', send_logs_level='trace', allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=1" 2>&1 | grep "executeQuery\|.*Coordinator: Coordination done" | @@ -154,7 +154,7 @@ select * from (select key, value from num_1) l inner join (select key, value from num_2 inner join (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings parallel_replicas_prefer_local_join=0) r on l.key = r.key order by l.key limit 10 offset 10000 -SETTINGS allow_experimental_analyzer=1, send_logs_level='trace', +SETTINGS max_rows_to_read=0, allow_experimental_analyzer=1, send_logs_level='trace', allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=0" 2>&1 | grep "executeQuery\|.*Coordinator: Coordination done" | @@ -179,7 +179,7 @@ select * from (select key, value from num_1) l inner join (select key, value from num_2 inner join (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings parallel_replicas_prefer_local_join=1) r on l.key = r.key order by l.key limit 10 offset 10000 -SETTINGS allow_experimental_analyzer=1, send_logs_level='trace', +SETTINGS max_rows_to_read=0, allow_experimental_analyzer=1, send_logs_level='trace', allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=0" 2>&1 | grep "executeQuery\|.*Coordinator: Coordination done" | @@ -205,7 +205,7 @@ select * from (select key, value from num_1) l inner join (select key, value from num_2 inner join (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings join_algorithm='full_sorting_merge') r on l.key = r.key order by l.key limit 10 offset 10000 -SETTINGS allow_experimental_analyzer=1, parallel_replicas_prefer_local_join=0, send_logs_level='trace', +SETTINGS max_rows_to_read=0, allow_experimental_analyzer=1, parallel_replicas_prefer_local_join=0, send_logs_level='trace', allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', join_algorithm='full_sorting_merge'" 2>&1 | grep "executeQuery\|.*Coordinator: Coordination done" | @@ -230,7 +230,7 @@ select * from (select key, value from num_1) l inner join (select key, value from num_2 inner join (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings join_algorithm='hash') r on l.key = r.key order by l.key limit 10 offset 10000 -SETTINGS allow_experimental_analyzer=1, parallel_replicas_prefer_local_join=0, send_logs_level='trace', +SETTINGS max_rows_to_read=0, allow_experimental_analyzer=1, parallel_replicas_prefer_local_join=0, send_logs_level='trace', allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', join_algorithm='full_sorting_merge'" 2>&1 | grep "executeQuery\|.*Coordinator: Coordination done" | @@ -255,7 +255,7 @@ select * from (select key, value from num_1) l inner join (select key, value from num_2 inner join (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings join_algorithm='full_sorting_merge') r on l.key = r.key order by l.key limit 10 offset 10000 -SETTINGS allow_experimental_analyzer=1, parallel_replicas_prefer_local_join=0, send_logs_level='trace', +SETTINGS max_rows_to_read=0, allow_experimental_analyzer=1, parallel_replicas_prefer_local_join=0, send_logs_level='trace', allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', join_algorithm='hash'" 2>&1 | grep "executeQuery\|.*Coordinator: Coordination done" | From ba5a07bcc7b78673e58d0501e85b59e929215bac Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 25 Jul 2024 21:47:41 +0200 Subject: [PATCH 034/180] Better tests --- .../01301_aggregate_state_exception_memory_leak.reference | 2 +- .../0_stateless/01301_aggregate_state_exception_memory_leak.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.reference b/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.reference index b20e7415f52..6282bf366d0 100644 --- a/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.reference +++ b/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.reference @@ -1,2 +1,2 @@ -Memory limit (for query) exceeded +Memory limit exceeded Ok diff --git a/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.sh b/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.sh index 9dd800ceb09..266518d11d4 100755 --- a/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.sh +++ b/tests/queries/0_stateless/01301_aggregate_state_exception_memory_leak.sh @@ -16,5 +16,5 @@ for _ in {1..1000}; do if [[ $elapsed -gt 30 ]]; then break fi -done 2>&1 | grep -o -F 'Memory limit (for query) exceeded' | uniq +done 2>&1 | grep -o -P 'Memory limit .+ exceeded' | sed -r -e 's/(Memory limit)(.+)( exceeded)/\1\3/' | uniq echo 'Ok' From f81e8aa345d64f5fbcae103f92cdc649f0d82d24 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 25 Jul 2024 22:08:32 +0200 Subject: [PATCH 035/180] Update tests --- .../queries/0_stateless/01010_pmj_right_table_memory_limits.sql | 2 ++ tests/queries/0_stateless/01304_direct_io_long.sh | 2 +- .../01730_distributed_group_by_no_merge_order_by_long.sql | 2 +- tests/queries/0_stateless/02151_lc_prefetch.sql | 1 + .../queries/0_stateless/02344_insert_profile_events_stress.sql | 1 + ...02354_distributed_with_external_aggregation_memory_usage.sql | 2 ++ .../02481_parquet_list_monotonically_increasing_offsets.sh | 2 +- tests/queries/0_stateless/02497_remote_disk_fat_column.sql | 2 +- .../02896_max_execution_time_with_break_overflow_mode.sql | 2 +- tests/queries/0_stateless/03143_asof_join_ddb_long.sql | 1 + 10 files changed, 12 insertions(+), 5 deletions(-) diff --git a/tests/queries/0_stateless/01010_pmj_right_table_memory_limits.sql b/tests/queries/0_stateless/01010_pmj_right_table_memory_limits.sql index a090be85221..b8f2596f3d5 100644 --- a/tests/queries/0_stateless/01010_pmj_right_table_memory_limits.sql +++ b/tests/queries/0_stateless/01010_pmj_right_table_memory_limits.sql @@ -1,5 +1,7 @@ -- Tags: no-parallel, no-fasttest, no-random-settings +SET max_bytes_in_join = 0; +SET max_rows_in_join = 0; SET max_memory_usage = 32000000; SET join_on_disk_max_files_to_merge = 4; diff --git a/tests/queries/0_stateless/01304_direct_io_long.sh b/tests/queries/0_stateless/01304_direct_io_long.sh index a66239058ab..35d1440bcb5 100755 --- a/tests/queries/0_stateless/01304_direct_io_long.sh +++ b/tests/queries/0_stateless/01304_direct_io_long.sh @@ -12,7 +12,7 @@ $CLICKHOUSE_CLIENT --max_rows_to_read 50M --multiquery " INSERT INTO bug SELECT rand64(), '2020-06-07' FROM numbers(50000000); OPTIMIZE TABLE bug FINAL;" LOG="$CLICKHOUSE_TMP/err-$CLICKHOUSE_DATABASE" -$CLICKHOUSE_BENCHMARK --iterations 10 --max_threads 100 --min_bytes_to_use_direct_io 1 <<< "SELECT sum(UserID) FROM bug PREWHERE NOT ignore(Date)" 1>/dev/null 2>"$LOG" +$CLICKHOUSE_BENCHMARK --max_rows_to_read 51M --iterations 10 --max_threads 100 --min_bytes_to_use_direct_io 1 <<< "SELECT sum(UserID) FROM bug PREWHERE NOT ignore(Date)" 1>/dev/null 2>"$LOG" cat "$LOG" | grep Exception cat "$LOG" | grep Loaded diff --git a/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql b/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql index 6625ad916e8..6172afbc699 100644 --- a/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql +++ b/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql @@ -12,7 +12,7 @@ select * from remote('127.{2..11}', view(select * from numbers(1e6))) group by n -- and the query with GROUP BY on remote servers will first do GROUP BY and then send the block, -- so the initiator will first receive all blocks from remotes and only after start merging, -- and will hit the memory limit. -select * from remote('127.{2..11}', view(select * from numbers(1e6))) group by number order by number limit 1e6 settings distributed_group_by_no_merge=2, max_memory_usage='20Mi', max_block_size=4294967296; -- { serverError MEMORY_LIMIT_EXCEEDED } +select * from remote('127.{2..11}', view(select * from numbers(1e6))) group by number order by number limit 1e6 settings distributed_group_by_no_merge=2, max_memory_usage='20Mi', max_block_size=4294967296, max_rows_to_read=0; -- { serverError MEMORY_LIMIT_EXCEEDED } -- with optimize_aggregation_in_order=1 remote servers will produce blocks more frequently, -- since they don't need to wait until the aggregation will be finished, diff --git a/tests/queries/0_stateless/02151_lc_prefetch.sql b/tests/queries/0_stateless/02151_lc_prefetch.sql index c2b97231145..f8c76038120 100644 --- a/tests/queries/0_stateless/02151_lc_prefetch.sql +++ b/tests/queries/0_stateless/02151_lc_prefetch.sql @@ -3,5 +3,6 @@ drop table if exists tab_lc; CREATE TABLE tab_lc (x UInt64, y LowCardinality(String)) engine = MergeTree order by x SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into tab_lc select number, toString(number % 10) from numbers(20000000); optimize table tab_lc; +SET max_rows_to_read = '21M'; select count() from tab_lc where y == '0' settings local_filesystem_read_prefetch=1; drop table if exists tab_lc; diff --git a/tests/queries/0_stateless/02344_insert_profile_events_stress.sql b/tests/queries/0_stateless/02344_insert_profile_events_stress.sql index e9a790bea5d..902e1da543c 100644 --- a/tests/queries/0_stateless/02344_insert_profile_events_stress.sql +++ b/tests/queries/0_stateless/02344_insert_profile_events_stress.sql @@ -1,4 +1,5 @@ -- Tags: no-parallel, long, no-debug, no-tsan, no-msan, no-asan +SET max_rows_to_read = 0; create table data_02344 (key Int) engine=Null; -- 3e9 rows is enough to fill the socket buffer and cause INSERT hung. diff --git a/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql b/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql index 105fb500461..82eb4c93e3d 100644 --- a/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql +++ b/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql @@ -1,5 +1,7 @@ -- Tags: long, no-tsan, no-msan, no-asan, no-ubsan, no-debug, no-object-storage +SET max_rows_to_read = '51M'; + DROP TABLE IF EXISTS t_2354_dist_with_external_aggr; create table t_2354_dist_with_external_aggr(a UInt64, b String, c FixedString(100)) engine = MergeTree order by tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; diff --git a/tests/queries/0_stateless/02481_parquet_list_monotonically_increasing_offsets.sh b/tests/queries/0_stateless/02481_parquet_list_monotonically_increasing_offsets.sh index 55e6ac2f758..3e28e76d6da 100755 --- a/tests/queries/0_stateless/02481_parquet_list_monotonically_increasing_offsets.sh +++ b/tests/queries/0_stateless/02481_parquet_list_monotonically_increasing_offsets.sh @@ -11,7 +11,7 @@ echo "Parquet" DATA_FILE=$CUR_DIR/data_parquet/list_monotonically_increasing_offsets.parquet ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS parquet_load" ${CLICKHOUSE_CLIENT} --query="CREATE TABLE parquet_load (list Array(Int64), json Nullable(String)) ENGINE = Memory" -cat "$DATA_FILE" | ${CLICKHOUSE_CLIENT} -q "INSERT INTO parquet_load FORMAT Parquet" +cat "$DATA_FILE" | ${CLICKHOUSE_CLIENT} --max_memory_usage 10G -q "INSERT INTO parquet_load FORMAT Parquet" ${CLICKHOUSE_CLIENT} --query="SELECT * FROM parquet_load" | md5sum ${CLICKHOUSE_CLIENT} --query="SELECT count() FROM parquet_load" ${CLICKHOUSE_CLIENT} --query="drop table parquet_load" diff --git a/tests/queries/0_stateless/02497_remote_disk_fat_column.sql b/tests/queries/0_stateless/02497_remote_disk_fat_column.sql index d97109b66f3..65519296602 100644 --- a/tests/queries/0_stateless/02497_remote_disk_fat_column.sql +++ b/tests/queries/0_stateless/02497_remote_disk_fat_column.sql @@ -2,7 +2,7 @@ set allow_suspicious_fixed_string_types=1; create table fat_granularity (x UInt32, fat FixedString(160000)) engine = MergeTree order by x settings storage_policy = 's3_cache'; -insert into fat_granularity select number, toString(number) || '_' from numbers(100000) settings max_block_size = 8192, max_insert_threads=8; +insert into fat_granularity select number, toString(number) || '_' from numbers(100000) settings max_block_size = 3000, max_insert_threads = 8, min_insert_block_size_rows = 0, min_insert_block_size_bytes = 0; -- Too large sizes of FixedString to deserialize select x from fat_granularity prewhere fat like '256\_%' settings max_threads=2; diff --git a/tests/queries/0_stateless/02896_max_execution_time_with_break_overflow_mode.sql b/tests/queries/0_stateless/02896_max_execution_time_with_break_overflow_mode.sql index 3e131cad0f0..ecaad62b35a 100644 --- a/tests/queries/0_stateless/02896_max_execution_time_with_break_overflow_mode.sql +++ b/tests/queries/0_stateless/02896_max_execution_time_with_break_overflow_mode.sql @@ -1,6 +1,6 @@ -- Tags: no-fasttest -SET max_rows_to_read = 0; +SET max_rows_to_read = 0, max_execution_time = 0, max_estimated_execution_time = 0; -- Query stops after timeout without an error SELECT * FROM numbers(100000000) SETTINGS max_block_size=1, max_execution_time=2, timeout_overflow_mode='break' FORMAT Null; diff --git a/tests/queries/0_stateless/03143_asof_join_ddb_long.sql b/tests/queries/0_stateless/03143_asof_join_ddb_long.sql index a635fd2e86a..18d98dbdfe4 100644 --- a/tests/queries/0_stateless/03143_asof_join_ddb_long.sql +++ b/tests/queries/0_stateless/03143_asof_join_ddb_long.sql @@ -35,6 +35,7 @@ AS 3 AS key FROM numbers(0, 4000000); +SET max_rows_to_read = 0; SELECT SUM(value), COUNT(*) FROM skewed_probe From 9c7078bcf7edfc79dbea2cf6e065aa594810ccaf Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 26 Jul 2024 02:54:11 +0200 Subject: [PATCH 036/180] Update tests --- tests/queries/0_stateless/00974_query_profiler.sql | 1 + ...0_distributed_group_by_no_merge_order_by_long.sql | 3 ++- .../0_stateless/02122_join_group_by_timeout.sh | 8 ++++---- ...ibuted_with_external_aggregation_memory_usage.sql | 2 +- ..._parquet_list_monotonically_increasing_offsets.sh | 2 +- .../02884_parallel_window_functions.reference | 12 ++++++------ 6 files changed, 15 insertions(+), 13 deletions(-) diff --git a/tests/queries/0_stateless/00974_query_profiler.sql b/tests/queries/0_stateless/00974_query_profiler.sql index 24e4241b813..71ea14c3d64 100644 --- a/tests/queries/0_stateless/00974_query_profiler.sql +++ b/tests/queries/0_stateless/00974_query_profiler.sql @@ -15,6 +15,7 @@ SELECT count() > 0 FROM system.trace_log t WHERE query_id = (SELECT query_id FRO SET query_profiler_real_time_period_ns = 0; SET query_profiler_cpu_time_period_ns = 1000000; SET log_queries = 1; +SET max_rows_to_read = 0; SELECT count(), ignore('test cpu time query profiler') FROM numbers_mt(10000000000); SET log_queries = 0; SYSTEM FLUSH LOGS; diff --git a/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql b/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql index 6172afbc699..e980f367de7 100644 --- a/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql +++ b/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql @@ -12,7 +12,8 @@ select * from remote('127.{2..11}', view(select * from numbers(1e6))) group by n -- and the query with GROUP BY on remote servers will first do GROUP BY and then send the block, -- so the initiator will first receive all blocks from remotes and only after start merging, -- and will hit the memory limit. -select * from remote('127.{2..11}', view(select * from numbers(1e6))) group by number order by number limit 1e6 settings distributed_group_by_no_merge=2, max_memory_usage='20Mi', max_block_size=4294967296, max_rows_to_read=0; -- { serverError MEMORY_LIMIT_EXCEEDED } +SET max_rows_to_read = 0; +select * from remote('127.{2..11}', view(select * from numbers(1e6))) group by number order by number limit 1e6 settings distributed_group_by_no_merge=2, max_memory_usage='20Mi', max_block_size=4294967296; -- { serverError MEMORY_LIMIT_EXCEEDED } -- with optimize_aggregation_in_order=1 remote servers will produce blocks more frequently, -- since they don't need to wait until the aggregation will be finished, diff --git a/tests/queries/0_stateless/02122_join_group_by_timeout.sh b/tests/queries/0_stateless/02122_join_group_by_timeout.sh index 59719f75d7c..79c4f01c98a 100755 --- a/tests/queries/0_stateless/02122_join_group_by_timeout.sh +++ b/tests/queries/0_stateless/02122_join_group_by_timeout.sh @@ -14,7 +14,7 @@ MAX_PROCESS_WAIT=5 # TCP CLIENT: As of today (02/12/21) uses PullingAsyncPipelineExecutor ### Should be cancelled after 1 second and return a 159 exception (timeout) -timeout -s KILL $MAX_PROCESS_WAIT $CLICKHOUSE_CLIENT --max_execution_time 1 -q \ +timeout -s KILL $MAX_PROCESS_WAIT $CLICKHOUSE_CLIENT --max_result_rows 0 --max_result_bytes 0 --max_execution_time 1 -q \ "SELECT * FROM ( SELECT a.name as n @@ -31,7 +31,7 @@ timeout -s KILL $MAX_PROCESS_WAIT $CLICKHOUSE_CLIENT --max_execution_time 1 -q \ FORMAT Null" 2>&1 | grep -o "Code: 159" | sort | uniq ### Should stop pulling data and return what has been generated already (return code 0) -timeout -s KILL $MAX_PROCESS_WAIT $CLICKHOUSE_CLIENT -q \ +timeout -s KILL $MAX_PROCESS_WAIT $CLICKHOUSE_CLIENT --max_result_rows 0 --max_result_bytes 0 -q \ "SELECT a.name as n FROM ( @@ -48,7 +48,7 @@ echo $? # HTTP CLIENT: As of today (02/12/21) uses PullingPipelineExecutor ### Should be cancelled after 1 second and return a 159 exception (timeout) -${CLICKHOUSE_CURL} -q --max-time $MAX_PROCESS_WAIT -sS "$CLICKHOUSE_URL&max_execution_time=1" -d \ +${CLICKHOUSE_CURL} -q --max-time $MAX_PROCESS_WAIT -sS "$CLICKHOUSE_URL&max_result_rows=0&max_result_bytes=0&max_execution_time=1" -d \ "SELECT * FROM ( SELECT a.name as n @@ -66,7 +66,7 @@ ${CLICKHOUSE_CURL} -q --max-time $MAX_PROCESS_WAIT -sS "$CLICKHOUSE_URL&max_exec ### Should stop pulling data and return what has been generated already (return code 0) -${CLICKHOUSE_CURL} -q --max-time $MAX_PROCESS_WAIT -sS "$CLICKHOUSE_URL" -d \ +${CLICKHOUSE_CURL} -q --max-time $MAX_PROCESS_WAIT -sS "$CLICKHOUSE_URL&max_result_rows=0&max_result_bytes=0" -d \ "SELECT a.name as n FROM ( diff --git a/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql b/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql index 82eb4c93e3d..5eea6f149b5 100644 --- a/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql +++ b/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql @@ -1,6 +1,6 @@ -- Tags: long, no-tsan, no-msan, no-asan, no-ubsan, no-debug, no-object-storage -SET max_rows_to_read = '51M'; +SET max_rows_to_read = '101M'; DROP TABLE IF EXISTS t_2354_dist_with_external_aggr; diff --git a/tests/queries/0_stateless/02481_parquet_list_monotonically_increasing_offsets.sh b/tests/queries/0_stateless/02481_parquet_list_monotonically_increasing_offsets.sh index 3e28e76d6da..2f512697868 100755 --- a/tests/queries/0_stateless/02481_parquet_list_monotonically_increasing_offsets.sh +++ b/tests/queries/0_stateless/02481_parquet_list_monotonically_increasing_offsets.sh @@ -12,6 +12,6 @@ DATA_FILE=$CUR_DIR/data_parquet/list_monotonically_increasing_offsets.parquet ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS parquet_load" ${CLICKHOUSE_CLIENT} --query="CREATE TABLE parquet_load (list Array(Int64), json Nullable(String)) ENGINE = Memory" cat "$DATA_FILE" | ${CLICKHOUSE_CLIENT} --max_memory_usage 10G -q "INSERT INTO parquet_load FORMAT Parquet" -${CLICKHOUSE_CLIENT} --query="SELECT * FROM parquet_load" | md5sum +${CLICKHOUSE_CLIENT} --max_result_rows 0 --max_result_bytes 0 --query="SELECT * FROM parquet_load" | md5sum ${CLICKHOUSE_CLIENT} --query="SELECT count() FROM parquet_load" ${CLICKHOUSE_CLIENT} --query="drop table parquet_load" diff --git a/tests/queries/0_stateless/02884_parallel_window_functions.reference b/tests/queries/0_stateless/02884_parallel_window_functions.reference index bac15838dc2..a2cc96dda74 100644 --- a/tests/queries/0_stateless/02884_parallel_window_functions.reference +++ b/tests/queries/0_stateless/02884_parallel_window_functions.reference @@ -12,7 +12,7 @@ FROM AVG(wg) AS WR, ac, nw - FROM window_funtion_threading + FROM window_function_threading GROUP BY ac, nw ) GROUP BY nw @@ -32,7 +32,7 @@ FROM AVG(wg) AS WR, ac, nw - FROM window_funtion_threading + FROM window_function_threading GROUP BY ac, nw ) GROUP BY nw @@ -53,7 +53,7 @@ FROM AVG(wg) AS WR, ac, nw - FROM window_funtion_threading + FROM window_function_threading WHERE (ac % 4) = 0 GROUP BY ac, @@ -64,7 +64,7 @@ FROM AVG(wg) AS WR, ac, nw - FROM window_funtion_threading + FROM window_function_threading WHERE (ac % 4) = 1 GROUP BY ac, @@ -75,7 +75,7 @@ FROM AVG(wg) AS WR, ac, nw - FROM window_funtion_threading + FROM window_function_threading WHERE (ac % 4) = 2 GROUP BY ac, @@ -86,7 +86,7 @@ FROM AVG(wg) AS WR, ac, nw - FROM window_funtion_threading + FROM window_function_threading WHERE (ac % 4) = 3 GROUP BY ac, From a03f12fe76969bc2002bc76ce04f7097cb02295b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 26 Jul 2024 14:17:17 +0200 Subject: [PATCH 037/180] Fix some tests --- ...2354_distributed_with_external_aggregation_memory_usage.sql | 2 +- .../0_stateless/02884_parallel_window_functions.reference | 1 + tests/queries/0_stateless/replication.lib | 3 ++- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql b/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql index 5eea6f149b5..f9da5b3a73c 100644 --- a/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql +++ b/tests/queries/0_stateless/02354_distributed_with_external_aggregation_memory_usage.sql @@ -25,6 +25,6 @@ select a, b, c, sum(a) as s from remote('127.0.0.{2,3}', currentDatabase(), t_2354_dist_with_external_aggr) group by a, b, c format Null -settings max_memory_usage = '5Gi'; +settings max_memory_usage = '5Gi', max_result_rows = 0, max_result_bytes = 0; DROP TABLE t_2354_dist_with_external_aggr; diff --git a/tests/queries/0_stateless/02884_parallel_window_functions.reference b/tests/queries/0_stateless/02884_parallel_window_functions.reference index a2cc96dda74..1f5346a1484 100644 --- a/tests/queries/0_stateless/02884_parallel_window_functions.reference +++ b/tests/queries/0_stateless/02884_parallel_window_functions.reference @@ -42,6 +42,7 @@ SETTINGS max_threads = 1; 0 2 0 1 2 0 2 2 0 +SET max_rows_to_read = 40000000; SELECT nw, sum(WR) AS R, diff --git a/tests/queries/0_stateless/replication.lib b/tests/queries/0_stateless/replication.lib index 05651531fba..dcac721859e 100755 --- a/tests/queries/0_stateless/replication.lib +++ b/tests/queries/0_stateless/replication.lib @@ -114,7 +114,8 @@ function check_replication_consistency() # it's important to disable prefer warmed unmerged parts because # otherwise it can read non-syncrhonized state of replicas - res=$($CLICKHOUSE_CLIENT --prefer_warmed_unmerged_parts_seconds=0 -q \ + # also, disable the limit that is set for tests globally + res=$($CLICKHOUSE_CLIENT --prefer_warmed_unmerged_parts_seconds=0 --max_rows_to_read=0 -q \ "SELECT if((countDistinct(data) as c) == 0, 1, c) FROM From 7f4eb59c42aa4f432c99fa7e4f8240056fa26b91 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 26 Jul 2024 14:33:40 +0200 Subject: [PATCH 038/180] Fix some tests --- .../01730_distributed_group_by_no_merge_order_by_long.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql b/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql index e980f367de7..805e0b4fedb 100644 --- a/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql +++ b/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql @@ -12,7 +12,7 @@ select * from remote('127.{2..11}', view(select * from numbers(1e6))) group by n -- and the query with GROUP BY on remote servers will first do GROUP BY and then send the block, -- so the initiator will first receive all blocks from remotes and only after start merging, -- and will hit the memory limit. -SET max_rows_to_read = 0; +SET max_rows_to_read = 0, max_result_rows = 0; select * from remote('127.{2..11}', view(select * from numbers(1e6))) group by number order by number limit 1e6 settings distributed_group_by_no_merge=2, max_memory_usage='20Mi', max_block_size=4294967296; -- { serverError MEMORY_LIMIT_EXCEEDED } -- with optimize_aggregation_in_order=1 remote servers will produce blocks more frequently, From 80b925ec75983ea558be536d657bfd2d277f1fbc Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 26 Jul 2024 14:47:58 +0200 Subject: [PATCH 039/180] Update test --- ...arallel_replicas_join_algo_and_analyzer.sh | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.sh b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.sh index 8cefa873940..2c5b5a2a07b 100755 --- a/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.sh +++ b/tests/queries/0_stateless/02967_parallel_replicas_join_algo_and_analyzer.sh @@ -86,11 +86,11 @@ SETTINGS allow_experimental_analyzer=1, join_algorithm='full_sorting_merge', allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=1" -$CLICKHOUSE_CLIENT -q " +$CLICKHOUSE_CLIENT --max_rows_in_set_to_optimize_join 0 -q " select * from (select key, value from num_1) l inner join (select key, value from num_2) r on l.key = r.key order by l.key limit 10 offset 700000 -SETTINGS max_rows_to_read=0, allow_experimental_analyzer=1, join_algorithm='full_sorting_merge', send_logs_level='trace', +SETTINGS allow_experimental_analyzer=1, join_algorithm='full_sorting_merge', send_logs_level='trace', allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=1" 2>&1 | grep "executeQuery\|.*Coordinator: Coordination done" | @@ -123,12 +123,12 @@ SETTINGS allow_experimental_analyzer=1, allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=1" -$CLICKHOUSE_CLIENT -q " +$CLICKHOUSE_CLIENT --max_rows_in_set_to_optimize_join 0 -q " select * from (select key, value from num_1) l inner join (select key, value from num_2 inner join (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings parallel_replicas_prefer_local_join=1) r on l.key = r.key order by l.key limit 10 offset 10000 -SETTINGS max_rows_to_read=0, allow_experimental_analyzer=1, join_algorithm='full_sorting_merge', send_logs_level='trace', +SETTINGS allow_experimental_analyzer=1, join_algorithm='full_sorting_merge', send_logs_level='trace', allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=1" 2>&1 | grep "executeQuery\|.*Coordinator: Coordination done" | @@ -149,12 +149,12 @@ SETTINGS allow_experimental_analyzer=1, allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=0" -$CLICKHOUSE_CLIENT -q " +$CLICKHOUSE_CLIENT --max_rows_in_set_to_optimize_join 0 -q " select * from (select key, value from num_1) l inner join (select key, value from num_2 inner join (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings parallel_replicas_prefer_local_join=0) r on l.key = r.key order by l.key limit 10 offset 10000 -SETTINGS max_rows_to_read=0, allow_experimental_analyzer=1, send_logs_level='trace', +SETTINGS allow_experimental_analyzer=1, send_logs_level='trace', allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=0" 2>&1 | grep "executeQuery\|.*Coordinator: Coordination done" | @@ -174,12 +174,12 @@ SETTINGS allow_experimental_analyzer=1, allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=0" -$CLICKHOUSE_CLIENT -q " +$CLICKHOUSE_CLIENT --max_rows_in_set_to_optimize_join 0 -q " select * from (select key, value from num_1) l inner join (select key, value from num_2 inner join (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings parallel_replicas_prefer_local_join=1) r on l.key = r.key order by l.key limit 10 offset 10000 -SETTINGS max_rows_to_read=0, allow_experimental_analyzer=1, send_logs_level='trace', +SETTINGS allow_experimental_analyzer=1, send_logs_level='trace', allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', parallel_replicas_prefer_local_join=0" 2>&1 | grep "executeQuery\|.*Coordinator: Coordination done" | @@ -200,12 +200,12 @@ SETTINGS allow_experimental_analyzer=1, parallel_replicas_prefer_local_join=0, allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', join_algorithm='full_sorting_merge'" -$CLICKHOUSE_CLIENT -q " +$CLICKHOUSE_CLIENT --max_rows_in_set_to_optimize_join 0 -q " select * from (select key, value from num_1) l inner join (select key, value from num_2 inner join (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings join_algorithm='full_sorting_merge') r on l.key = r.key order by l.key limit 10 offset 10000 -SETTINGS max_rows_to_read=0, allow_experimental_analyzer=1, parallel_replicas_prefer_local_join=0, send_logs_level='trace', +SETTINGS allow_experimental_analyzer=1, parallel_replicas_prefer_local_join=0, send_logs_level='trace', allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', join_algorithm='full_sorting_merge'" 2>&1 | grep "executeQuery\|.*Coordinator: Coordination done" | @@ -225,12 +225,12 @@ SETTINGS allow_experimental_analyzer=1, parallel_replicas_prefer_local_join=0, allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', join_algorithm='full_sorting_merge'" -$CLICKHOUSE_CLIENT -q " +$CLICKHOUSE_CLIENT --max_rows_in_set_to_optimize_join 0 -q " select * from (select key, value from num_1) l inner join (select key, value from num_2 inner join (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings join_algorithm='hash') r on l.key = r.key order by l.key limit 10 offset 10000 -SETTINGS max_rows_to_read=0, allow_experimental_analyzer=1, parallel_replicas_prefer_local_join=0, send_logs_level='trace', +SETTINGS allow_experimental_analyzer=1, parallel_replicas_prefer_local_join=0, send_logs_level='trace', allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', join_algorithm='full_sorting_merge'" 2>&1 | grep "executeQuery\|.*Coordinator: Coordination done" | @@ -250,12 +250,12 @@ SETTINGS allow_experimental_analyzer=1, parallel_replicas_prefer_local_join=0, allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', join_algorithm='hash'" -$CLICKHOUSE_CLIENT -q " +$CLICKHOUSE_CLIENT --max_rows_in_set_to_optimize_join 0 -q " select * from (select key, value from num_1) l inner join (select key, value from num_2 inner join (select number * 7 as key from numbers(1e5)) as nn on num_2.key = nn.key settings join_algorithm='full_sorting_merge') r on l.key = r.key order by l.key limit 10 offset 10000 -SETTINGS max_rows_to_read=0, allow_experimental_analyzer=1, parallel_replicas_prefer_local_join=0, send_logs_level='trace', +SETTINGS allow_experimental_analyzer=1, parallel_replicas_prefer_local_join=0, send_logs_level='trace', allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', join_algorithm='hash'" 2>&1 | grep "executeQuery\|.*Coordinator: Coordination done" | From 342bbb53c65d32698d55649252e6bc29f2a557b8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 26 Jul 2024 14:49:36 +0200 Subject: [PATCH 040/180] It was an optimization, not a limit --- tests/config/users.d/limits.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/config/users.d/limits.yaml b/tests/config/users.d/limits.yaml index 23aaccf9298..63c4e884a9d 100644 --- a/tests/config/users.d/limits.yaml +++ b/tests/config/users.d/limits.yaml @@ -34,7 +34,6 @@ profiles: max_bytes_in_set: 10G max_rows_in_join: 10G max_bytes_in_join: 10G - max_rows_in_set_to_optimize_join: 1G max_rows_to_transfer: 1G max_bytes_to_transfer: 1G max_rows_in_distinct: 10G From fd5934d0ad8954a263554ce48402849deafa5341 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 27 Jul 2024 01:28:38 +0200 Subject: [PATCH 041/180] Update test --- .../01730_distributed_group_by_no_merge_order_by_long.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql b/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql index 805e0b4fedb..6eb55839f5e 100644 --- a/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql +++ b/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql @@ -1,6 +1,7 @@ -- Tags: long, distributed, no-random-settings drop table if exists data_01730; +SET max_rows_to_read = 0, max_result_rows = 0; -- does not use 127.1 due to prefer_localhost_replica @@ -12,7 +13,6 @@ select * from remote('127.{2..11}', view(select * from numbers(1e6))) group by n -- and the query with GROUP BY on remote servers will first do GROUP BY and then send the block, -- so the initiator will first receive all blocks from remotes and only after start merging, -- and will hit the memory limit. -SET max_rows_to_read = 0, max_result_rows = 0; select * from remote('127.{2..11}', view(select * from numbers(1e6))) group by number order by number limit 1e6 settings distributed_group_by_no_merge=2, max_memory_usage='20Mi', max_block_size=4294967296; -- { serverError MEMORY_LIMIT_EXCEEDED } -- with optimize_aggregation_in_order=1 remote servers will produce blocks more frequently, From 16e84d1e3678b56d0b32dff1377b6daadf870dd2 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 27 Jul 2024 02:37:54 +0200 Subject: [PATCH 042/180] Update test --- tests/queries/0_stateless/01651_lc_insert_tiny_log.sql | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/tests/queries/0_stateless/01651_lc_insert_tiny_log.sql b/tests/queries/0_stateless/01651_lc_insert_tiny_log.sql index d11c9120c61..bc5553ad227 100644 --- a/tests/queries/0_stateless/01651_lc_insert_tiny_log.sql +++ b/tests/queries/0_stateless/01651_lc_insert_tiny_log.sql @@ -1,7 +1,7 @@ -set allow_suspicious_low_cardinality_types=1; +set allow_suspicious_low_cardinality_types = 1, max_rows_to_read = '21M'; drop table if exists perf_lc_num; -CREATE TABLE perf_lc_num(  num UInt8,  arr Array(LowCardinality(Int64)) default [num]  ) ENGINE = TinyLog; +CREATE TABLE perf_lc_num( num UInt8, arr Array(LowCardinality(Int64)) default [num] ) ENGINE = TinyLog; INSERT INTO perf_lc_num (num) SELECT toUInt8(number) FROM numbers(10000000); @@ -16,7 +16,7 @@ select sum(length(arr)), sum(num) from perf_lc_num; drop table if exists perf_lc_num; -CREATE TABLE perf_lc_num(  num UInt8,  arr Array(LowCardinality(Int64)) default [num]  ) ENGINE = Log; +CREATE TABLE perf_lc_num( num UInt8, arr Array(LowCardinality(Int64)) default [num] ) ENGINE = Log; INSERT INTO perf_lc_num (num) SELECT toUInt8(number) FROM numbers(10000000); @@ -31,7 +31,7 @@ select sum(length(arr)), sum(num) from perf_lc_num; drop table if exists perf_lc_num; -CREATE TABLE perf_lc_num(  num UInt8,  arr Array(LowCardinality(Int64)) default [num]  ) ENGINE = StripeLog; +CREATE TABLE perf_lc_num( num UInt8, arr Array(LowCardinality(Int64)) default [num] ) ENGINE = StripeLog; INSERT INTO perf_lc_num (num) SELECT toUInt8(number) FROM numbers(10000000); @@ -44,5 +44,3 @@ select sum(length(arr)) from perf_lc_num; select sum(length(arr)), sum(num) from perf_lc_num; drop table if exists perf_lc_num; - - From 59ce7ec1c6f478b3ac700a612fea46e7fc12756b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 27 Jul 2024 04:26:46 +0200 Subject: [PATCH 043/180] Update 01651_lc_insert_tiny_log.sql --- tests/queries/0_stateless/01651_lc_insert_tiny_log.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01651_lc_insert_tiny_log.sql b/tests/queries/0_stateless/01651_lc_insert_tiny_log.sql index bc5553ad227..b1d6a39d5c9 100644 --- a/tests/queries/0_stateless/01651_lc_insert_tiny_log.sql +++ b/tests/queries/0_stateless/01651_lc_insert_tiny_log.sql @@ -1,4 +1,4 @@ -set allow_suspicious_low_cardinality_types = 1, max_rows_to_read = '21M'; +set allow_suspicious_low_cardinality_types = 1, max_rows_to_read = '31M'; drop table if exists perf_lc_num; CREATE TABLE perf_lc_num( num UInt8, arr Array(LowCardinality(Int64)) default [num] ) ENGINE = TinyLog; From 9a6de84559cdf927e8747e4cd536676fd3b1c513 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 27 Jul 2024 10:39:55 +0200 Subject: [PATCH 044/180] Update tests --- .../00086_concat_nary_const_with_nonconst_segfault.sql | 2 +- .../01730_distributed_group_by_no_merge_order_by_long.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/00086_concat_nary_const_with_nonconst_segfault.sql b/tests/queries/0_stateless/00086_concat_nary_const_with_nonconst_segfault.sql index 2f0ef648983..4b87b2af28d 100644 --- a/tests/queries/0_stateless/00086_concat_nary_const_with_nonconst_segfault.sql +++ b/tests/queries/0_stateless/00086_concat_nary_const_with_nonconst_segfault.sql @@ -1 +1 @@ -SELECT extract(toString(number), '10000000') FROM system.numbers_mt WHERE concat(materialize('1'), '...', toString(number)) LIKE '%10000000%' LIMIT 1 +SELECT extract(toString(number), '10000000') FROM system.numbers_mt WHERE concat(materialize('1'), '...', toString(number)) LIKE '%10000000%' LIMIT 1 SETTINGS max_rows_to_read = 0; diff --git a/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql b/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql index 6eb55839f5e..83a26c83005 100644 --- a/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql +++ b/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql @@ -1,7 +1,7 @@ -- Tags: long, distributed, no-random-settings drop table if exists data_01730; -SET max_rows_to_read = 0, max_result_rows = 0; +SET max_rows_to_read = 0, max_result_rows = 0, max_bytes_before_external_group_by = 0; -- does not use 127.1 due to prefer_localhost_replica From 0dfcab25ed2f98c8a6133dad4061e37e4541c16e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 27 Jul 2024 16:10:15 +0200 Subject: [PATCH 045/180] Better limits --- tests/config/users.d/limits.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/config/users.d/limits.yaml b/tests/config/users.d/limits.yaml index 63c4e884a9d..46cff73142c 100644 --- a/tests/config/users.d/limits.yaml +++ b/tests/config/users.d/limits.yaml @@ -38,7 +38,7 @@ profiles: max_bytes_to_transfer: 1G max_rows_in_distinct: 10G max_bytes_in_distinct: 10G - max_memory_usage_for_user: 10G + max_memory_usage_for_user: 32G max_network_bandwidth: 100G max_network_bytes: 1T max_network_bandwidth_for_user: 100G From b1eaec0d49b326df09c46ea3107e98fa083c220d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 27 Jul 2024 23:29:52 +0200 Subject: [PATCH 046/180] Update 01293_show_settings.reference --- tests/queries/0_stateless/01293_show_settings.reference | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01293_show_settings.reference b/tests/queries/0_stateless/01293_show_settings.reference index 8b383813c9f..c4c3473ee18 100644 --- a/tests/queries/0_stateless/01293_show_settings.reference +++ b/tests/queries/0_stateless/01293_show_settings.reference @@ -6,6 +6,6 @@ external_storage_connect_timeout_sec UInt64 10 s3_connect_timeout_ms UInt64 1000 filesystem_prefetch_max_memory_usage UInt64 1073741824 max_memory_usage UInt64 5000000000 -max_memory_usage_for_user UInt64 10000000000 +max_memory_usage_for_user UInt64 32000000000 max_untracked_memory UInt64 1048576 memory_profiler_step UInt64 1048576 From cd1350c0f380d2c2754231c12aefb891c299b3fb Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 28 Jul 2024 14:07:36 +0200 Subject: [PATCH 047/180] Mark some tests as long --- .../0_stateless/00111_shard_external_sort_distributed.sql | 2 +- .../00377_shard_group_uniq_array_of_string_array.sql | 2 +- tests/queries/0_stateless/00906_low_cardinality_cache.sql | 2 ++ tests/queries/0_stateless/01603_read_with_backoff_bug.sql | 2 +- 4 files changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/00111_shard_external_sort_distributed.sql b/tests/queries/0_stateless/00111_shard_external_sort_distributed.sql index ef9c0f9f9d0..93efc317bfa 100644 --- a/tests/queries/0_stateless/00111_shard_external_sort_distributed.sql +++ b/tests/queries/0_stateless/00111_shard_external_sort_distributed.sql @@ -1,4 +1,4 @@ --- Tags: distributed +-- Tags: distributed, long SET max_memory_usage = 150000000; SET max_bytes_before_external_sort = 10000000; diff --git a/tests/queries/0_stateless/00377_shard_group_uniq_array_of_string_array.sql b/tests/queries/0_stateless/00377_shard_group_uniq_array_of_string_array.sql index 180a6a04861..1ec91ac2396 100644 --- a/tests/queries/0_stateless/00377_shard_group_uniq_array_of_string_array.sql +++ b/tests/queries/0_stateless/00377_shard_group_uniq_array_of_string_array.sql @@ -1,4 +1,4 @@ --- Tags: shard +-- Tags: shard, long SET max_rows_to_read = '55M'; DROP TABLE IF EXISTS group_uniq_arr_str; diff --git a/tests/queries/0_stateless/00906_low_cardinality_cache.sql b/tests/queries/0_stateless/00906_low_cardinality_cache.sql index 9c1abe1b6df..337fba865fd 100644 --- a/tests/queries/0_stateless/00906_low_cardinality_cache.sql +++ b/tests/queries/0_stateless/00906_low_cardinality_cache.sql @@ -1,3 +1,5 @@ +-- Tags: long + SET max_rows_to_read = '100M'; drop table if exists lc_00906; create table lc_00906 (b LowCardinality(String)) engine=MergeTree order by b SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; diff --git a/tests/queries/0_stateless/01603_read_with_backoff_bug.sql b/tests/queries/0_stateless/01603_read_with_backoff_bug.sql index b68d15a2200..85be5082d92 100644 --- a/tests/queries/0_stateless/01603_read_with_backoff_bug.sql +++ b/tests/queries/0_stateless/01603_read_with_backoff_bug.sql @@ -1,4 +1,4 @@ --- Tags: no-tsan +-- Tags: no-tsan, long -- Tag no-tsan: Too long for TSan set enable_filesystem_cache=0; From 1042fc68c2969ee4963268a97daf522d0e163ac5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 28 Jul 2024 16:41:43 +0200 Subject: [PATCH 048/180] Update test --- .../queries/0_stateless/00632_get_sample_block_cache.sql | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/00632_get_sample_block_cache.sql b/tests/queries/0_stateless/00632_get_sample_block_cache.sql index ae9b6bb7b2c..6a226c4912a 100644 --- a/tests/queries/0_stateless/00632_get_sample_block_cache.sql +++ b/tests/queries/0_stateless/00632_get_sample_block_cache.sql @@ -2,6 +2,9 @@ SET joined_subquery_requires_alias = 0; +-- We are no longer interested in the old analyzer. +SET allow_experimental_analyzer = 1; + -- This test (SELECT) without cache can take tens minutes DROP TABLE IF EXISTS dict_string; DROP TABLE IF EXISTS dict_ui64; @@ -41,8 +44,6 @@ SETTINGS index_granularity = 8192; CREATE TABLE dict_string (entityIri String) ENGINE = Memory; CREATE TABLE dict_ui64 (learnerId UInt64) ENGINE = Memory; ---SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration`, `full-watched-learners-count`, `overall-watchers-count`, `overall-full-watched-learners-count`, `views-count`, `time-before-full-watched-average`, if (isNaN((`overall-full-watched-learners-count`/`overall-watchers-count`) * 100), 0, (`overall-full-watched-learners-count`/`overall-watchers-count`) * 100) as `overall-watched-part`, if (isNaN((`full-watched-learners-count`/`watchers-count` * 100)), 0, (`full-watched-learners-count`/`watchers-count` * 100)) as `full-watched-part`, if (isNaN((`rejects-count`/`views-count` * 100)), 0, (`rejects-count`/`views-count` * 100)) as `rejects-part` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration`, `full-watched-learners-count`, `overall-watchers-count`, `overall-full-watched-learners-count`, `views-count`, `time-before-full-watched-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration`, `full-watched-learners-count`, `overall-watchers-count`, `overall-full-watched-learners-count`, `views-count` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration`, `full-watched-learners-count`, `overall-watchers-count`, `overall-full-watched-learners-count` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration`, `full-watched-learners-count`, `overall-watchers-count` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration`, `full-watched-learners-count` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average` FROM (SELECT `entityIri`, `watchers-count` FROM (SELECT `entityIri` FROM `CloM8CwMR2`) ANY LEFT JOIN (SELECT uniq(learnerId) as `watchers-count`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `learnerId` IN `tkRpHxGqM1` GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(viewDurationSum) as `time-repeating-average`, `entityIri` FROM (SELECT sum(views.viewDuration) as viewDurationSum, `entityIri`, `learnerId` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `views`.`repeatingView` = 1 AND `learnerId` IN `tkRpHxGqM1` GROUP BY `learnerId`, `entityIri`) GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(views.viewDuration) as `reject-views-duration-average`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `views`.`reject` = 1 AND `learnerId` IN `tkRpHxGqM1` GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(viewsCount) as `repeating-views-count-average`, `entityIri` FROM (SELECT count() as viewsCount, `learnerId`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `courseId` = 1 AND `entityIri` IN `CloM8CwMR2` WHERE `views`.`repeatingView` = 1 AND `learnerId` IN `tkRpHxGqM1` GROUP BY `learnerId`, `entityIri`) GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(views.viewDuration) as `views-duration-average`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `learnerId` IN `tkRpHxGqM1` GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(views.watchedPart) as `watched-part-average`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `learnerId` IN `tkRpHxGqM1` GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT count() as `rejects-count`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `views`.`reject` = 1 AND `learnerId` IN `tkRpHxGqM1` GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(progressMax) as `progress-average`, `entityIri` FROM (SELECT max(views.progress) as progressMax, `entityIri`, `learnerId` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `learnerId` IN `tkRpHxGqM1` GROUP BY `learnerId`, `entityIri`) GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(fullWatchedViews) as `views-count-before-full-watched-average`, `entityIri` FROM `video_views` FINAL PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `learnerId` IN `tkRpHxGqM1` GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT any(duration) as `duration`, `entityIri` FROM `video_views` FINAL PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `learnerId` IN `tkRpHxGqM1` GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT uniq(learnerId) as `full-watched-learners-count`, `entityIri` FROM `video_views` FINAL PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `fullWatched` = 1 AND `learnerId` IN `tkRpHxGqM1` GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT uniq(learnerId) as `overall-watchers-count`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `learnerId` IN `tkRpHxGqM1` GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT uniq(learnerId) as `overall-full-watched-learners-count`, `entityIri` FROM `video_views` FINAL PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `fullWatched` = 1 AND `learnerId` IN `tkRpHxGqM1` GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT count() as `views-count`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `learnerId` IN `tkRpHxGqM1` GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(fullWatchedTime) as `time-before-full-watched-average`, `entityIri` FROM `video_views` FINAL PREWHERE `entityIri` IN `CloM8CwMR2` AND `courseId` = 1 WHERE `learnerId` IN `tkRpHxGqM1` GROUP BY `entityIri`) USING `entityIri`) FORMAT JSON; - SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration`, `full-watched-learners-count`, `overall-watchers-count`, `overall-full-watched-learners-count`, `views-count`, `time-before-full-watched-average`, if (isNaN((`overall-full-watched-learners-count`/`overall-watchers-count`) * 100), 0, (`overall-full-watched-learners-count`/`overall-watchers-count`) * 100) as `overall-watched-part`, if (isNaN((`full-watched-learners-count`/`watchers-count` * 100)), 0, (`full-watched-learners-count`/`watchers-count` * 100)) as `full-watched-part`, if (isNaN((`rejects-count`/`views-count` * 100)), 0, (`rejects-count`/`views-count` * 100)) as `rejects-part` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration`, `full-watched-learners-count`, `overall-watchers-count`, `overall-full-watched-learners-count`, `views-count`, `time-before-full-watched-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration`, `full-watched-learners-count`, `overall-watchers-count`, `overall-full-watched-learners-count`, `views-count` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration`, `full-watched-learners-count`, `overall-watchers-count`, `overall-full-watched-learners-count` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration`, `full-watched-learners-count`, `overall-watchers-count` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration`, `full-watched-learners-count` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average`, `duration` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average`, `views-count-before-full-watched-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count`, `progress-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average`, `rejects-count` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average`, `watched-part-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average`, `views-duration-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average`, `repeating-views-count-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average`, `reject-views-duration-average` FROM (SELECT `entityIri`, `watchers-count`, `time-repeating-average` FROM (SELECT `entityIri`, `watchers-count` FROM (SELECT `entityIri` FROM dict_string) ANY LEFT JOIN (SELECT uniq(learnerId) as `watchers-count`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN dict_string AND `courseId` = 1 WHERE `learnerId` IN dict_ui64 GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(viewDurationSum) as `time-repeating-average`, `entityIri` FROM (SELECT sum(views.viewDuration) as viewDurationSum, `entityIri`, `learnerId` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN dict_string AND `courseId` = 1 WHERE `views`.`repeatingView` = 1 AND `learnerId` IN dict_ui64 GROUP BY `learnerId`, `entityIri`) GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(views.viewDuration) as `reject-views-duration-average`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN dict_string AND `courseId` = 1 WHERE `views`.`reject` = 1 AND `learnerId` IN dict_ui64 GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(viewsCount) as `repeating-views-count-average`, `entityIri` FROM (SELECT count() as viewsCount, `learnerId`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `courseId` = 1 AND `entityIri` IN dict_string WHERE `views`.`repeatingView` = 1 AND `learnerId` IN dict_ui64 GROUP BY `learnerId`, `entityIri`) GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(views.viewDuration) as `views-duration-average`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN dict_string AND `courseId` = 1 WHERE `learnerId` IN dict_ui64 GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(views.watchedPart) as `watched-part-average`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN dict_string AND `courseId` = 1 WHERE `learnerId` IN dict_ui64 GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT count() as `rejects-count`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN dict_string AND `courseId` = 1 WHERE `views`.`reject` = 1 AND `learnerId` IN dict_ui64 GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(progressMax) as `progress-average`, `entityIri` FROM (SELECT max(views.progress) as progressMax, `entityIri`, `learnerId` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN dict_string AND `courseId` = 1 WHERE `learnerId` IN dict_ui64 GROUP BY `learnerId`, `entityIri`) GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(fullWatchedViews) as `views-count-before-full-watched-average`, `entityIri` FROM `video_views` FINAL PREWHERE `entityIri` IN dict_string AND `courseId` = 1 WHERE `learnerId` IN dict_ui64 GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT any(duration) as `duration`, `entityIri` FROM `video_views` FINAL PREWHERE `entityIri` IN dict_string AND `courseId` = 1 WHERE `learnerId` IN dict_ui64 GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT uniq(learnerId) as `full-watched-learners-count`, `entityIri` FROM `video_views` FINAL PREWHERE `entityIri` IN dict_string AND `courseId` = 1 WHERE `fullWatched` = 1 AND `learnerId` IN dict_ui64 GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT uniq(learnerId) as `overall-watchers-count`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN dict_string AND `courseId` = 1 WHERE `learnerId` IN dict_ui64 GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT uniq(learnerId) as `overall-full-watched-learners-count`, `entityIri` FROM `video_views` FINAL PREWHERE `entityIri` IN dict_string AND `courseId` = 1 WHERE `fullWatched` = 1 AND `learnerId` IN dict_ui64 GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT count() as `views-count`, `entityIri` FROM `video_views` FINAL ARRAY JOIN `views` PREWHERE `entityIri` IN dict_string AND `courseId` = 1 WHERE `learnerId` IN dict_ui64 GROUP BY `entityIri`) USING `entityIri`) ANY LEFT JOIN (SELECT avg(fullWatchedTime) as `time-before-full-watched-average`, `entityIri` FROM `video_views` FINAL PREWHERE `entityIri` IN dict_string AND `courseId` = 1 WHERE `learnerId` IN dict_ui64 GROUP BY `entityIri`) USING `entityIri`); @@ -55,7 +56,7 @@ DROP TABLE video_views; --- Test for tsan: Ensure cache used from one thread +-- Test for tsan: Ensure cache is used from one thread SET max_threads = 32; DROP TABLE IF EXISTS sample_00632; @@ -173,7 +174,6 @@ FROM UNION ALL SELECT * FROM ( SELECT * FROM sample_00632 WHERE x > 0 ) ) GROUP BY x - --HAVING c = 1 ORDER BY x ASC ); DROP TABLE sample_00632; From 81714ce561c99710c3350a40d662966d5bb1a86a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 28 Jul 2024 16:44:53 +0200 Subject: [PATCH 049/180] Make test simpler --- tests/queries/0_stateless/02585_query_status_deadlock.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02585_query_status_deadlock.sh b/tests/queries/0_stateless/02585_query_status_deadlock.sh index 6321ac0064a..932cf593393 100755 --- a/tests/queries/0_stateless/02585_query_status_deadlock.sh +++ b/tests/queries/0_stateless/02585_query_status_deadlock.sh @@ -8,7 +8,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) QUERY_ID="${CLICKHOUSE_DATABASE}_test_02585_query_to_kill_id_1" $CLICKHOUSE_CLIENT --query_id="$QUERY_ID" --max_rows_to_read 0 -n -q " -create temporary table tmp as select * from numbers(500000000); +create temporary table tmp as select * from numbers(100000000); select * from remote('127.0.0.2', 'system.numbers_mt') where number in (select * from tmp);" &> /dev/null & $CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS" @@ -19,7 +19,7 @@ do if [ -n "$res" ]; then break fi - sleep 1 + sleep 1 done $CLICKHOUSE_CLIENT -q "kill query where query_id = '$QUERY_ID' sync" &> /dev/null From aec346676127abba85886a828e663ebf05cfa81e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 28 Jul 2024 23:07:28 +0200 Subject: [PATCH 050/180] Update test --- tests/queries/0_stateless/01161_all_system_tables.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01161_all_system_tables.sh b/tests/queries/0_stateless/01161_all_system_tables.sh index 739df782a39..d4a80d074dc 100755 --- a/tests/queries/0_stateless/01161_all_system_tables.sh +++ b/tests/queries/0_stateless/01161_all_system_tables.sh @@ -19,7 +19,7 @@ function run_selects() thread_num=$1 readarray -t tables_arr < <(${CLICKHOUSE_CLIENT} -q "SELECT database || '.' || name FROM system.tables WHERE database in ('system', 'information_schema', 'INFORMATION_SCHEMA') and name != 'zookeeper' and name != 'models' - AND sipHash64(name || toString($RAND)) % $THREADS = $thread_num AND name NOT LIKE '%\\_sender' AND name NOT LIKE '%\\_watcher'") + AND sipHash64(name || toString($RAND)) % $THREADS = $thread_num AND name NOT LIKE '%\\_sender' AND name NOT LIKE '%\\_watcher' AND name != 'coverage_log'") for t in "${tables_arr[@]}" do From e517338182b79aaa70c6b3e2d15c499acccc4d88 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 29 Jul 2024 21:16:19 +0200 Subject: [PATCH 051/180] Update tests --- tests/queries/0_stateless/replication.lib | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/replication.lib b/tests/queries/0_stateless/replication.lib index dcac721859e..36309cf0331 100755 --- a/tests/queries/0_stateless/replication.lib +++ b/tests/queries/0_stateless/replication.lib @@ -89,7 +89,7 @@ function check_replication_consistency() # Touch all data to check that it's readable (and trigger PartCheckThread if needed) # it's important to disable prefer warmed unmerged parts because # otherwise it can read non-syncrhonized state of replicas - while ! $CLICKHOUSE_CLIENT --prefer_warmed_unmerged_parts_seconds=0 -q "SELECT * FROM merge(currentDatabase(), '$table_name_prefix') FORMAT Null" 2>/dev/null; do + while ! $CLICKHOUSE_CLIENT --prefer_warmed_unmerged_parts_seconds=0 --max_result_rows 0 --max_result_bytes 0 --max_rows_to_read 0 --max_bytes_to_read 0 -q "SELECT * FROM merge(currentDatabase(), '$table_name_prefix') FORMAT Null" 2>/dev/null; do sleep 1; num_tries=$((num_tries+1)) if [ $num_tries -eq 250 ]; then From d89019293e955452ede3e0abbe4b11ab2a3471bb Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 30 Jul 2024 06:15:13 +0200 Subject: [PATCH 052/180] Update tests --- tests/queries/0_stateless/01603_read_with_backoff_bug.sql | 4 ++-- tests/queries/0_stateless/02700_s3_part_INT_MAX.sh | 2 +- tests/queries/1_stateful/00157_cache_dictionary.sql | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/01603_read_with_backoff_bug.sql b/tests/queries/0_stateless/01603_read_with_backoff_bug.sql index 85be5082d92..f41b336be46 100644 --- a/tests/queries/0_stateless/01603_read_with_backoff_bug.sql +++ b/tests/queries/0_stateless/01603_read_with_backoff_bug.sql @@ -1,5 +1,5 @@ --- Tags: no-tsan, long --- Tag no-tsan: Too long for TSan +-- Tags: no-tsan, no-msan, long +-- too long. set enable_filesystem_cache=0; set enable_filesystem_cache_on_write_operations=0; diff --git a/tests/queries/0_stateless/02700_s3_part_INT_MAX.sh b/tests/queries/0_stateless/02700_s3_part_INT_MAX.sh index c431686b594..cfb38c60615 100755 --- a/tests/queries/0_stateless/02700_s3_part_INT_MAX.sh +++ b/tests/queries/0_stateless/02700_s3_part_INT_MAX.sh @@ -12,7 +12,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # too slow with this. # # Unfortunately, the test has to buffer it in memory. -$CLICKHOUSE_CLIENT --max_memory_usage 10G -nm -q " +$CLICKHOUSE_CLIENT --max_memory_usage 16G -nm -q " INSERT INTO FUNCTION s3('http://localhost:11111/test/$CLICKHOUSE_DATABASE/test_INT_MAX.tsv', '', '', 'TSV') SELECT repeat('a', 1024) FROM numbers((pow(2, 30) * 2) / 1024) SETTINGS s3_max_single_part_upload_size = '5Gi'; diff --git a/tests/queries/1_stateful/00157_cache_dictionary.sql b/tests/queries/1_stateful/00157_cache_dictionary.sql index 3621ff82126..a7c6c099de6 100644 --- a/tests/queries/1_stateful/00157_cache_dictionary.sql +++ b/tests/queries/1_stateful/00157_cache_dictionary.sql @@ -9,7 +9,7 @@ ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS storage_policy = 'default'; -INSERT INTO test.hits_1m SELECT * FROM test.hits LIMIT 1000000; +INSERT INTO test.hits_1m SELECT * FROM test.hits LIMIT 1000000 SETTINGS min_insert_block_size_rows = 0, min_insert_block_size_bytes = 0; CREATE DATABASE IF NOT EXISTS db_dict; DROP DICTIONARY IF EXISTS db_dict.cache_hits; From 56ba7c5d48cfa648f1d496d55cdfd50450da0299 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 30 Jul 2024 09:33:17 +0200 Subject: [PATCH 053/180] Update a test --- tests/queries/0_stateless/00632_get_sample_block_cache.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/00632_get_sample_block_cache.sql b/tests/queries/0_stateless/00632_get_sample_block_cache.sql index 6a226c4912a..a631cbb8b86 100644 --- a/tests/queries/0_stateless/00632_get_sample_block_cache.sql +++ b/tests/queries/0_stateless/00632_get_sample_block_cache.sql @@ -57,7 +57,7 @@ DROP TABLE video_views; -- Test for tsan: Ensure cache is used from one thread -SET max_threads = 32; +SET max_threads = 32, max_memory_usage = '10G'; DROP TABLE IF EXISTS sample_00632; From 458509909da0e6702dbb6023775112e4fa1c9a83 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 30 Jul 2024 23:21:14 +0200 Subject: [PATCH 054/180] Update test --- tests/queries/1_stateful/00157_cache_dictionary.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/1_stateful/00157_cache_dictionary.sql b/tests/queries/1_stateful/00157_cache_dictionary.sql index a7c6c099de6..bb5a21d0779 100644 --- a/tests/queries/1_stateful/00157_cache_dictionary.sql +++ b/tests/queries/1_stateful/00157_cache_dictionary.sql @@ -9,7 +9,7 @@ ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS storage_policy = 'default'; -INSERT INTO test.hits_1m SELECT * FROM test.hits LIMIT 1000000 SETTINGS min_insert_block_size_rows = 0, min_insert_block_size_bytes = 0; +INSERT INTO test.hits_1m SELECT * FROM test.hits LIMIT 1000000 SETTINGS min_insert_block_size_rows = 0, min_insert_block_size_bytes = 0, max_block_size = 8192; CREATE DATABASE IF NOT EXISTS db_dict; DROP DICTIONARY IF EXISTS db_dict.cache_hits; From 33ed33d2af0821785165f0ffe80e3c8086c637e2 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 31 Jul 2024 17:38:20 +0200 Subject: [PATCH 055/180] Minor change --- src/Databases/DatabaseOnDisk.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index f419f5811a1..734f354d9a5 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -313,7 +313,7 @@ void DatabaseOnDisk::detachTablePermanently(ContextPtr query_context, const Stri std::lock_guard lock(mutex); if (const auto it = snapshot_detached_tables.find(table_name); it == snapshot_detached_tables.end()) { - throw Exception(ErrorCodes::LOGICAL_ERROR, "Snapshot doesn't contain info about detached table={}", table_name); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Snapshot doesn't contain info about detached table `{}`", table_name); } else { From af324f69e955310f54e451d1120e8526ac3150fb Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 31 Jul 2024 17:56:20 +0200 Subject: [PATCH 056/180] Update test --- tests/queries/1_stateful/00158_cache_dictionary_has.sql | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/queries/1_stateful/00158_cache_dictionary_has.sql b/tests/queries/1_stateful/00158_cache_dictionary_has.sql index 32c109417de..631a7751550 100644 --- a/tests/queries/1_stateful/00158_cache_dictionary_has.sql +++ b/tests/queries/1_stateful/00158_cache_dictionary_has.sql @@ -10,6 +10,8 @@ SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'hits' PA LIFETIME(MIN 300 MAX 600) LAYOUT(CACHE(SIZE_IN_CELLS 100 QUERY_WAIT_TIMEOUT_MILLISECONDS 600000)); +SET timeout_before_checking_execution_speed = 300; + SELECT sum(flag) FROM (SELECT dictHas('db_dict.cache_hits', toUInt64(WatchID)) as flag FROM test.hits PREWHERE WatchID % 1400 == 0 LIMIT 100); SELECT count() from test.hits PREWHERE WatchID % 1400 == 0; @@ -20,4 +22,4 @@ SELECT sum(flag) FROM (SELECT dictHas('db_dict.cache_hits', toUInt64(WatchID)) a SELECT count() from test.hits PREWHERE WatchID % 5 == 0; DROP DICTIONARY IF EXISTS db_dict.cache_hits; -DROP DATABASE IF EXISTS db_dict; +DROP DATABASE IF EXISTS db_dict; From 461251b519120dd6b0cb64471bfba70160137e50 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 31 Jul 2024 19:21:47 +0200 Subject: [PATCH 057/180] Update a test --- .../02450_kill_distributed_query_deadlock.sh | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/tests/queries/0_stateless/02450_kill_distributed_query_deadlock.sh b/tests/queries/0_stateless/02450_kill_distributed_query_deadlock.sh index 445f907bcc5..96692ba325a 100755 --- a/tests/queries/0_stateless/02450_kill_distributed_query_deadlock.sh +++ b/tests/queries/0_stateless/02450_kill_distributed_query_deadlock.sh @@ -5,20 +5,24 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -# Test that running distributed query and cancel it ASAP, -# this can trigger a hung/deadlock in ProcessorList. -for i in {1..50}; do +# Test that runs a distributed query and cancels it ASAP, +# this has a chance to trigger a hung/deadlock in ProcessorList. +for i in {1..50} +do query_id="$CLICKHOUSE_TEST_UNIQUE_NAME-$i" - $CLICKHOUSE_CLIENT --format Null --query_id "$query_id" --max_rows_to_read 0 -q "select * from remote('127.{1|2|3|4|5|6}', numbers(1e12))" 2>/dev/null & - while :; do + $CLICKHOUSE_CLIENT --format Null --query_id "$query_id" --max_rows_to_read 0 --max_bytes_to_read 0 --max_result_rows 0 --max_result_bytes 0 -q "select * from remote('127.{1|2|3|4|5|6}', numbers(1e12))" 2>/dev/null & + while true + do killed_queries="$($CLICKHOUSE_CLIENT -q "kill query where query_id = '$query_id' sync" | wc -l)" - if [[ "$killed_queries" -ge 1 ]]; then + if [[ "$killed_queries" -ge 1 ]] + then break fi done wait -n query_return_status=$? - if [[ $query_return_status -eq 0 ]]; then + if [[ $query_return_status -eq 0 ]] + then echo "Query $query_id should be cancelled, however it returns successfully" fi done From af30e72e187713f4e435ff4f1f2382ea6671ba21 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 1 Aug 2024 23:58:57 +0200 Subject: [PATCH 058/180] Better limits --- tests/config/users.d/limits.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/config/users.d/limits.yaml b/tests/config/users.d/limits.yaml index 46cff73142c..53cbbfa744a 100644 --- a/tests/config/users.d/limits.yaml +++ b/tests/config/users.d/limits.yaml @@ -26,6 +26,7 @@ profiles: max_execution_time_leaf: 600 max_execution_speed: 100G max_execution_speed_bytes: 10T + timeout_before_checking_execution_speed: 300 max_estimated_execution_time: 600 max_columns_to_read: 20K max_temporary_columns: 20K From fb23cbdef6b38d1ba90aa1bb304487a3bb947e5c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 31 Jul 2024 17:38:20 +0200 Subject: [PATCH 059/180] Minor change --- src/Databases/DatabaseOnDisk.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index f419f5811a1..734f354d9a5 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -313,7 +313,7 @@ void DatabaseOnDisk::detachTablePermanently(ContextPtr query_context, const Stri std::lock_guard lock(mutex); if (const auto it = snapshot_detached_tables.find(table_name); it == snapshot_detached_tables.end()) { - throw Exception(ErrorCodes::LOGICAL_ERROR, "Snapshot doesn't contain info about detached table={}", table_name); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Snapshot doesn't contain info about detached table `{}`", table_name); } else { From 15e9f8d9cbe4c2f0a1d9973650d93fa195a56276 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 2 Aug 2024 22:52:01 +0200 Subject: [PATCH 060/180] Fix `02481_async_insert_race_long` --- tests/queries/0_stateless/02481_async_insert_race_long.sh | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/02481_async_insert_race_long.sh b/tests/queries/0_stateless/02481_async_insert_race_long.sh index b0088017d32..def97409bc4 100755 --- a/tests/queries/0_stateless/02481_async_insert_race_long.sh +++ b/tests/queries/0_stateless/02481_async_insert_race_long.sh @@ -29,11 +29,8 @@ function insert3() { local TIMELIMIT=$((SECONDS+$1)) while [ $SECONDS -lt "$TIMELIMIT" ]; do - ${MY_CLICKHOUSE_CLIENT} --insert_keeper_fault_injection_probability=0 --wait_for_async_insert 1 -q "INSERT INTO async_inserts_race VALUES (7, 'g') (8, 'h')" & - sleep 0.05 + ${MY_CLICKHOUSE_CLIENT} --insert_keeper_fault_injection_probability=0 --wait_for_async_insert 1 -q "INSERT INTO async_inserts_race VALUES (7, 'g') (8, 'h')" done - - wait } function select1() From 13b435d281ce39a4f8eee889da482dbf272dd1cf Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 31 Jul 2024 17:38:20 +0200 Subject: [PATCH 061/180] Minor change --- src/Databases/DatabaseOnDisk.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index f419f5811a1..734f354d9a5 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -313,7 +313,7 @@ void DatabaseOnDisk::detachTablePermanently(ContextPtr query_context, const Stri std::lock_guard lock(mutex); if (const auto it = snapshot_detached_tables.find(table_name); it == snapshot_detached_tables.end()) { - throw Exception(ErrorCodes::LOGICAL_ERROR, "Snapshot doesn't contain info about detached table={}", table_name); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Snapshot doesn't contain info about detached table `{}`", table_name); } else { From 113f7e0c8c9cc61ab72f5ccd8bb6e5fac58cbaea Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 3 Aug 2024 19:37:38 +0200 Subject: [PATCH 062/180] Maybe better --- tests/queries/1_stateful/00157_cache_dictionary.sql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/queries/1_stateful/00157_cache_dictionary.sql b/tests/queries/1_stateful/00157_cache_dictionary.sql index bb5a21d0779..f1bee538828 100644 --- a/tests/queries/1_stateful/00157_cache_dictionary.sql +++ b/tests/queries/1_stateful/00157_cache_dictionary.sql @@ -9,7 +9,8 @@ ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS storage_policy = 'default'; -INSERT INTO test.hits_1m SELECT * FROM test.hits LIMIT 1000000 SETTINGS min_insert_block_size_rows = 0, min_insert_block_size_bytes = 0, max_block_size = 8192; +INSERT INTO test.hits_1m SELECT * FROM test.hits LIMIT 1000000 + SETTINGS min_insert_block_size_rows = 0, min_insert_block_size_bytes = 0, max_block_size = 8192, max_insert_threads = 1, max_threads = 1; CREATE DATABASE IF NOT EXISTS db_dict; DROP DICTIONARY IF EXISTS db_dict.cache_hits; From 3ebc3852f404a1ce392e9b66b8356fa9da701097 Mon Sep 17 00:00:00 2001 From: MikhailBurdukov Date: Mon, 22 Jul 2024 15:28:29 +0000 Subject: [PATCH 063/180] Allow filtering ip addresses by ip family in DNS resolver --- programs/server/Server.cpp | 3 + src/Common/DNSResolver.cpp | 93 ++++++-- src/Common/DNSResolver.h | 9 + src/Core/ServerSettings.h | 2 + src/Core/SettingsChangesHistory.cpp | 261 +++++++++++++++++++++++ tests/integration/test_dns_cache/test.py | 67 +++++- 6 files changed, 418 insertions(+), 17 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 7800ee9ff00..3126f65ef09 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1768,6 +1768,9 @@ try new_server_settings.http_connections_store_limit, }); + DNSResolver::instance().setFilterSettings(new_server_settings.dns_allow_resolve_names_to_ipv4, new_server_settings.dns_allow_resolve_names_to_ipv6); + + if (global_context->isServerCompletelyStarted()) CannotAllocateThreadFaultInjector::setFaultProbability(new_server_settings.cannot_allocate_thread_fault_injection_probability); diff --git a/src/Common/DNSResolver.cpp b/src/Common/DNSResolver.cpp index 4b577a251af..051e6e63091 100644 --- a/src/Common/DNSResolver.cpp +++ b/src/Common/DNSResolver.cpp @@ -12,6 +12,8 @@ #include #include #include +#include +#include #include #include "DNSPTRResolverProvider.h" @@ -139,12 +141,6 @@ DNSResolver::IPAddresses resolveIPAddressImpl(const std::string & host) return addresses; } -DNSResolver::IPAddresses resolveIPAddressWithCache(CacheBase & cache, const std::string & host) -{ - auto [result, _ ] = cache.getOrSet(host, [&host]() {return std::make_shared(resolveIPAddressImpl(host), std::chrono::system_clock::now());}); - return result->addresses; -} - std::unordered_set reverseResolveImpl(const Poco::Net::IPAddress & address) { auto ptr_resolver = DB::DNSPTRResolverProvider::get(); @@ -198,21 +194,90 @@ struct DNSResolver::Impl std::atomic disable_cache{false}; }; +struct DNSResolver::AddressFilter +{ + struct DNSFilterSettings + { + std::atomic dns_allow_resolve_names_to_ipv4{true}; + std::atomic dns_allow_resolve_names_to_ipv6{true}; + }; -DNSResolver::DNSResolver() : impl(std::make_unique()), log(getLogger("DNSResolver")) {} + void performAddressFiltering(DNSResolver::IPAddresses & addresses) + { + bool dns_resolve_ipv4 = settings.dns_allow_resolve_names_to_ipv4; + bool dns_resolve_ipv6 = settings.dns_allow_resolve_names_to_ipv6; + + if (dns_resolve_ipv4 && dns_resolve_ipv6) + { + return; + } + if (!dns_resolve_ipv4 && !dns_resolve_ipv6) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "DNS can't resolve any address, because dns_resolve_ipv6_interfaces and dns_resolve_ipv4_interfaces both are disabled"); + } + addresses.erase( + std::remove_if(addresses.begin(), addresses.end(), + [dns_resolve_ipv6, dns_resolve_ipv4](const Poco::Net::IPAddress& address) + { + return (address.family() == Poco::Net::IPAddress::IPv6 && !dns_resolve_ipv6) + || (address.family() == Poco::Net::IPAddress::IPv4 && !dns_resolve_ipv4); + }), + addresses.end() + ); + } + + void setSettings(bool dns_allow_resolve_names_to_ipv4_, bool dns_allow_resolve_names_to_ipv6_) + { + settings.dns_allow_resolve_names_to_ipv4 = dns_allow_resolve_names_to_ipv4_; + settings.dns_allow_resolve_names_to_ipv6 = dns_allow_resolve_names_to_ipv6_; + } + + DNSFilterSettings settings; +}; + + +DNSResolver::DNSResolver() + : impl(std::make_unique()) + , addressFilter(std::make_unique()) + , log(getLogger("DNSResolver")) {} + + +DNSResolver::IPAddresses DNSResolver::getResolvedIPAdressessWithFiltering(const std::string & host) +{ + auto addresses = resolveIPAddressImpl(host); + addressFilter->performAddressFiltering(addresses); + + if (addresses.empty()) + { + ProfileEvents::increment(ProfileEvents::DNSError); + throw DB::NetException(ErrorCodes::DNS_ERROR, "After filtering there are no resolved address for host({}).", host); + } + return addresses; +} + +DNSResolver::IPAddresses DNSResolver::resolveIPAddressWithCache(const std::string & host) +{ + auto [result, _ ] = impl->cache_host.getOrSet(host, [&host, this]() {return std::make_shared(getResolvedIPAdressessWithFiltering(host), std::chrono::system_clock::now());}); + return result->addresses; +} Poco::Net::IPAddress DNSResolver::resolveHost(const std::string & host) { return pickAddress(resolveHostAll(host)); // random order -> random pick } +void DNSResolver::setFilterSettings(bool dns_allow_resolve_names_to_ipv4_, bool dns_allow_resolve_names_to_ipv6_) +{ + addressFilter->setSettings(dns_allow_resolve_names_to_ipv4_, dns_allow_resolve_names_to_ipv6_); +} + DNSResolver::IPAddresses DNSResolver::resolveHostAllInOriginOrder(const std::string & host) { if (impl->disable_cache) - return resolveIPAddressImpl(host); + return getResolvedIPAdressessWithFiltering(host); addToNewHosts(host); - return resolveIPAddressWithCache(impl->cache_host, host); + return resolveIPAddressWithCache(host); } DNSResolver::IPAddresses DNSResolver::resolveHostAll(const std::string & host) @@ -232,7 +297,7 @@ Poco::Net::SocketAddress DNSResolver::resolveAddress(const std::string & host_an splitHostAndPort(host_and_port, host, port); addToNewHosts(host); - return Poco::Net::SocketAddress(pickAddress(resolveIPAddressWithCache(impl->cache_host, host)), port); + return Poco::Net::SocketAddress(pickAddress(resolveIPAddressWithCache(host)), port); } Poco::Net::SocketAddress DNSResolver::resolveAddress(const std::string & host, UInt16 port) @@ -241,7 +306,7 @@ Poco::Net::SocketAddress DNSResolver::resolveAddress(const std::string & host, U return Poco::Net::SocketAddress(host, port); addToNewHosts(host); - return Poco::Net::SocketAddress(pickAddress(resolveIPAddressWithCache(impl->cache_host, host)), port); + return Poco::Net::SocketAddress(pickAddress(resolveIPAddressWithCache(host)), port); } std::vector DNSResolver::resolveAddressList(const std::string & host, UInt16 port) @@ -254,7 +319,7 @@ std::vector DNSResolver::resolveAddressList(const std: if (!impl->disable_cache) addToNewHosts(host); - std::vector ips = impl->disable_cache ? hostByName(host) : resolveIPAddressWithCache(impl->cache_host, host); + std::vector ips = impl->disable_cache ? hostByName(host) : resolveIPAddressWithCache(host); auto ips_end = std::unique(ips.begin(), ips.end()); addresses.reserve(ips_end - ips.begin()); @@ -419,8 +484,8 @@ bool DNSResolver::updateCache(UInt32 max_consecutive_failures) bool DNSResolver::updateHost(const String & host) { - const auto old_value = resolveIPAddressWithCache(impl->cache_host, host); - auto new_value = resolveIPAddressImpl(host); + const auto old_value = resolveIPAddressWithCache(host); + auto new_value = getResolvedIPAdressessWithFiltering(host); const bool result = old_value != new_value; impl->cache_host.set(host, std::make_shared(std::move(new_value), std::chrono::system_clock::now())); return result; diff --git a/src/Common/DNSResolver.h b/src/Common/DNSResolver.h index 1ddd9d3b991..b35f55dfcd2 100644 --- a/src/Common/DNSResolver.h +++ b/src/Common/DNSResolver.h @@ -68,6 +68,8 @@ public: /// Returns true if IP of any host has been changed or an element was dropped (too many failures) bool updateCache(UInt32 max_consecutive_failures); + void setFilterSettings(bool dns_allow_resolve_names_to_ipv4, bool dns_allow_resolve_names_to_ipv6); + /// Returns a copy of cache entries std::vector> cacheEntries() const; @@ -86,6 +88,10 @@ private: struct Impl; std::unique_ptr impl; + + struct AddressFilter; + std::unique_ptr addressFilter; + LoggerPtr log; /// Updates cached value and returns true it has been changed. @@ -94,6 +100,9 @@ private: void addToNewHosts(const String & host); void addToNewAddresses(const Poco::Net::IPAddress & address); + + IPAddresses resolveIPAddressWithCache(const std::string & host); + IPAddresses getResolvedIPAdressessWithFiltering(const std::string & host); }; } diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h index d13e6251ca9..6c23e3b95f6 100644 --- a/src/Core/ServerSettings.h +++ b/src/Core/ServerSettings.h @@ -106,6 +106,8 @@ namespace DB M(UInt64, dns_cache_max_entries, 10000, "Internal DNS cache max entries.", 0) \ M(Int32, dns_cache_update_period, 15, "Internal DNS cache update period in seconds.", 0) \ M(UInt32, dns_max_consecutive_failures, 10, "Max DNS resolve failures of a hostname before dropping the hostname from ClickHouse DNS cache.", 0) \ + M(Bool, dns_allow_resolve_names_to_ipv4, true, "Allows resolve names to ipv4 addresses.", 0) \ + M(Bool, dns_allow_resolve_names_to_ipv6, true, "Allows resolve names to ipv6 addresses.", 0) \ \ M(UInt64, max_table_size_to_drop, 50000000000lu, "If size of a table is greater than this value (in bytes) than table could not be dropped with any DROP query.", 0) \ M(UInt64, max_partition_size_to_drop, 50000000000lu, "Same as max_table_size_to_drop, but for the partitions.", 0) \ diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 0ccbd874a3d..a76e214b1fc 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -500,6 +500,267 @@ static std::initializer_list col >= '2023-01-01' AND col <= '2023-12-31')"}, + {"extract_key_value_pairs_max_pairs_per_row", 0, 0, "Max number of pairs that can be produced by the `extractKeyValuePairs` function. Used as a safeguard against consuming too much memory."}, + {"default_view_definer", "CURRENT_USER", "CURRENT_USER", "Allows to set default `DEFINER` option while creating a view"}, + {"default_materialized_view_sql_security", "DEFINER", "DEFINER", "Allows to set a default value for SQL SECURITY option when creating a materialized view"}, + {"default_normal_view_sql_security", "INVOKER", "INVOKER", "Allows to set default `SQL SECURITY` option while creating a normal view"}, + {"mysql_map_string_to_text_in_show_columns", false, true, "Reduce the configuration effort to connect ClickHouse with BI tools."}, + {"mysql_map_fixed_string_to_text_in_show_columns", false, true, "Reduce the configuration effort to connect ClickHouse with BI tools."}, + }}, + {"24.1", {{"print_pretty_type_names", false, true, "Better user experience."}, + {"input_format_json_read_bools_as_strings", false, true, "Allow to read bools as strings in JSON formats by default"}, + {"output_format_arrow_use_signed_indexes_for_dictionary", false, true, "Use signed indexes type for Arrow dictionaries by default as it's recommended"}, + {"allow_experimental_variant_type", false, false, "Add new experimental Variant type"}, + {"use_variant_as_common_type", false, false, "Allow to use Variant in if/multiIf if there is no common type"}, + {"output_format_arrow_use_64_bit_indexes_for_dictionary", false, false, "Allow to use 64 bit indexes type in Arrow dictionaries"}, + {"parallel_replicas_mark_segment_size", 128, 128, "Add new setting to control segment size in new parallel replicas coordinator implementation"}, + {"ignore_materialized_views_with_dropped_target_table", false, false, "Add new setting to allow to ignore materialized views with dropped target table"}, + {"output_format_compression_level", 3, 3, "Allow to change compression level in the query output"}, + {"output_format_compression_zstd_window_log", 0, 0, "Allow to change zstd window log in the query output when zstd compression is used"}, + {"enable_zstd_qat_codec", false, false, "Add new ZSTD_QAT codec"}, + {"enable_vertical_final", false, true, "Use vertical final by default"}, + {"output_format_arrow_use_64_bit_indexes_for_dictionary", false, false, "Allow to use 64 bit indexes type in Arrow dictionaries"}, + {"max_rows_in_set_to_optimize_join", 100000, 0, "Disable join optimization as it prevents from read in order optimization"}, + {"output_format_pretty_color", true, "auto", "Setting is changed to allow also for auto value, disabling ANSI escapes if output is not a tty"}, + {"function_visible_width_behavior", 0, 1, "We changed the default behavior of `visibleWidth` to be more precise"}, + {"max_estimated_execution_time", 0, 0, "Separate max_execution_time and max_estimated_execution_time"}, + {"iceberg_engine_ignore_schema_evolution", false, false, "Allow to ignore schema evolution in Iceberg table engine"}, + {"optimize_injective_functions_in_group_by", false, true, "Replace injective functions by it's arguments in GROUP BY section in analyzer"}, + {"update_insert_deduplication_token_in_dependent_materialized_views", false, false, "Allow to update insert deduplication token with table identifier during insert in dependent materialized views"}, + {"azure_max_unexpected_write_error_retries", 4, 4, "The maximum number of retries in case of unexpected errors during Azure blob storage write"}, + {"split_parts_ranges_into_intersecting_and_non_intersecting_final", false, true, "Allow to split parts ranges into intersecting and non intersecting during FINAL optimization"}, + {"split_intersecting_parts_ranges_into_layers_final", true, true, "Allow to split intersecting parts ranges into layers during FINAL optimization"}}}, + {"23.12", {{"allow_suspicious_ttl_expressions", true, false, "It is a new setting, and in previous versions the behavior was equivalent to allowing."}, + {"input_format_parquet_allow_missing_columns", false, true, "Allow missing columns in Parquet files by default"}, + {"input_format_orc_allow_missing_columns", false, true, "Allow missing columns in ORC files by default"}, + {"input_format_arrow_allow_missing_columns", false, true, "Allow missing columns in Arrow files by default"}}}, + {"23.11", {{"parsedatetime_parse_without_leading_zeros", false, true, "Improved compatibility with MySQL DATE_FORMAT/STR_TO_DATE"}}}, + {"23.9", {{"optimize_group_by_constant_keys", false, true, "Optimize group by constant keys by default"}, + {"input_format_json_try_infer_named_tuples_from_objects", false, true, "Try to infer named Tuples from JSON objects by default"}, + {"input_format_json_read_numbers_as_strings", false, true, "Allow to read numbers as strings in JSON formats by default"}, + {"input_format_json_read_arrays_as_strings", false, true, "Allow to read arrays as strings in JSON formats by default"}, + {"input_format_json_infer_incomplete_types_as_strings", false, true, "Allow to infer incomplete types as Strings in JSON formats by default"}, + {"input_format_json_try_infer_numbers_from_strings", true, false, "Don't infer numbers from strings in JSON formats by default to prevent possible parsing errors"}, + {"http_write_exception_in_output_format", false, true, "Output valid JSON/XML on exception in HTTP streaming."}}}, + {"23.8", {{"rewrite_count_distinct_if_with_count_distinct_implementation", false, true, "Rewrite countDistinctIf with count_distinct_implementation configuration"}}}, + {"23.7", {{"function_sleep_max_microseconds_per_block", 0, 3000000, "In previous versions, the maximum sleep time of 3 seconds was applied only for `sleep`, but not for `sleepEachRow` function. In the new version, we introduce this setting. If you set compatibility with the previous versions, we will disable the limit altogether."}}}, + {"23.6", {{"http_send_timeout", 180, 30, "3 minutes seems crazy long. Note that this is timeout for a single network write call, not for the whole upload operation."}, + {"http_receive_timeout", 180, 30, "See http_send_timeout."}}}, + {"23.5", {{"input_format_parquet_preserve_order", true, false, "Allow Parquet reader to reorder rows for better parallelism."}, + {"parallelize_output_from_storages", false, true, "Allow parallelism when executing queries that read from file/url/s3/etc. This may reorder rows."}, + {"use_with_fill_by_sorting_prefix", false, true, "Columns preceding WITH FILL columns in ORDER BY clause form sorting prefix. Rows with different values in sorting prefix are filled independently"}, + {"output_format_parquet_compliant_nested_types", false, true, "Change an internal field name in output Parquet file schema."}}}, + {"23.4", {{"allow_suspicious_indices", true, false, "If true, index can defined with identical expressions"}, + {"allow_nonconst_timezone_arguments", true, false, "Allow non-const timezone arguments in certain time-related functions like toTimeZone(), fromUnixTimestamp*(), snowflakeToDateTime*()."}, + {"connect_timeout_with_failover_ms", 50, 1000, "Increase default connect timeout because of async connect"}, + {"connect_timeout_with_failover_secure_ms", 100, 1000, "Increase default secure connect timeout because of async connect"}, + {"hedged_connection_timeout_ms", 100, 50, "Start new connection in hedged requests after 50 ms instead of 100 to correspond with previous connect timeout"}, + {"formatdatetime_f_prints_single_zero", true, false, "Improved compatibility with MySQL DATE_FORMAT()/STR_TO_DATE()"}, + {"formatdatetime_parsedatetime_m_is_month_name", false, true, "Improved compatibility with MySQL DATE_FORMAT/STR_TO_DATE"}}}, + {"23.3", {{"output_format_parquet_version", "1.0", "2.latest", "Use latest Parquet format version for output format"}, + {"input_format_json_ignore_unknown_keys_in_named_tuple", false, true, "Improve parsing JSON objects as named tuples"}, + {"input_format_native_allow_types_conversion", false, true, "Allow types conversion in Native input forma"}, + {"output_format_arrow_compression_method", "none", "lz4_frame", "Use lz4 compression in Arrow output format by default"}, + {"output_format_parquet_compression_method", "snappy", "lz4", "Use lz4 compression in Parquet output format by default"}, + {"output_format_orc_compression_method", "none", "lz4_frame", "Use lz4 compression in ORC output format by default"}, + {"async_query_sending_for_remote", false, true, "Create connections and send query async across shards"}}}, + {"23.2", {{"output_format_parquet_fixed_string_as_fixed_byte_array", false, true, "Use Parquet FIXED_LENGTH_BYTE_ARRAY type for FixedString by default"}, + {"output_format_arrow_fixed_string_as_fixed_byte_array", false, true, "Use Arrow FIXED_SIZE_BINARY type for FixedString by default"}, + {"query_plan_remove_redundant_distinct", false, true, "Remove redundant Distinct step in query plan"}, + {"optimize_duplicate_order_by_and_distinct", true, false, "Remove duplicate ORDER BY and DISTINCT if it's possible"}, + {"insert_keeper_max_retries", 0, 20, "Enable reconnections to Keeper on INSERT, improve reliability"}}}, + {"23.1", {{"input_format_json_read_objects_as_strings", 0, 1, "Enable reading nested json objects as strings while object type is experimental"}, + {"input_format_json_defaults_for_missing_elements_in_named_tuple", false, true, "Allow missing elements in JSON objects while reading named tuples by default"}, + {"input_format_csv_detect_header", false, true, "Detect header in CSV format by default"}, + {"input_format_tsv_detect_header", false, true, "Detect header in TSV format by default"}, + {"input_format_custom_detect_header", false, true, "Detect header in CustomSeparated format by default"}, + {"query_plan_remove_redundant_sorting", false, true, "Remove redundant sorting in query plan. For example, sorting steps related to ORDER BY clauses in subqueries"}}}, + {"22.12", {{"max_size_to_preallocate_for_aggregation", 10'000'000, 100'000'000, "This optimizes performance"}, + {"query_plan_aggregation_in_order", 0, 1, "Enable some refactoring around query plan"}, + {"format_binary_max_string_size", 0, 1_GiB, "Prevent allocating large amount of memory"}}}, + {"22.11", {{"use_structure_from_insertion_table_in_table_functions", 0, 2, "Improve using structure from insertion table in table functions"}}}, + {"22.9", {{"force_grouping_standard_compatibility", false, true, "Make GROUPING function output the same as in SQL standard and other DBMS"}}}, + {"22.7", {{"cross_to_inner_join_rewrite", 1, 2, "Force rewrite comma join to inner"}, + {"enable_positional_arguments", false, true, "Enable positional arguments feature by default"}, + {"format_csv_allow_single_quotes", true, false, "Most tools don't treat single quote in CSV specially, don't do it by default too"}}}, + {"22.6", {{"output_format_json_named_tuples_as_objects", false, true, "Allow to serialize named tuples as JSON objects in JSON formats by default"}, + {"input_format_skip_unknown_fields", false, true, "Optimize reading subset of columns for some input formats"}}}, + {"22.5", {{"memory_overcommit_ratio_denominator", 0, 1073741824, "Enable memory overcommit feature by default"}, + {"memory_overcommit_ratio_denominator_for_user", 0, 1073741824, "Enable memory overcommit feature by default"}}}, + {"22.4", {{"allow_settings_after_format_in_insert", true, false, "Do not allow SETTINGS after FORMAT for INSERT queries because ClickHouse interpret SETTINGS as some values, which is misleading"}}}, + {"22.3", {{"cast_ipv4_ipv6_default_on_conversion_error", true, false, "Make functions cast(value, 'IPv4') and cast(value, 'IPv6') behave same as toIPv4 and toIPv6 functions"}}}, + {"21.12", {{"stream_like_engine_allow_direct_select", true, false, "Do not allow direct select for Kafka/RabbitMQ/FileLog by default"}}}, + {"21.9", {{"output_format_decimal_trailing_zeros", true, false, "Do not output trailing zeros in text representation of Decimal types by default for better looking output"}, + {"use_hedged_requests", false, true, "Enable Hedged Requests feature by default"}}}, + {"21.7", {{"legacy_column_name_of_tuple_literal", true, false, "Add this setting only for compatibility reasons. It makes sense to set to 'true', while doing rolling update of cluster from version lower than 21.7 to higher"}}}, + {"21.5", {{"async_socket_for_remote", false, true, "Fix all problems and turn on asynchronous reads from socket for remote queries by default again"}}}, + {"21.3", {{"async_socket_for_remote", true, false, "Turn off asynchronous reads from socket for remote queries because of some problems"}, + {"optimize_normalize_count_variants", false, true, "Rewrite aggregate functions that semantically equals to count() as count() by default"}, + {"normalize_function_names", false, true, "Normalize function names to their canonical names, this was needed for projection query routing"}}}, + {"21.2", {{"enable_global_with_statement", false, true, "Propagate WITH statements to UNION queries and all subqueries by default"}}}, + {"21.1", {{"insert_quorum_parallel", false, true, "Use parallel quorum inserts by default. It is significantly more convenient to use than sequential quorum inserts"}, + {"input_format_null_as_default", false, true, "Allow to insert NULL as default for input formats by default"}, + {"optimize_on_insert", false, true, "Enable data optimization on INSERT by default for better user experience"}, + {"use_compact_format_in_distributed_parts_names", false, true, "Use compact format for async INSERT into Distributed tables by default"}}}, + {"20.10", {{"format_regexp_escaping_rule", "Escaped", "Raw", "Use Raw as default escaping rule for Regexp format to male the behaviour more like to what users expect"}}}, + {"20.7", {{"show_table_uuid_in_table_create_query_if_not_nil", true, false, "Stop showing UID of the table in its CREATE query for Engine=Atomic"}}}, + {"20.5", {{"input_format_with_names_use_header", false, true, "Enable using header with names for formats with WithNames/WithNamesAndTypes suffixes"}, + {"allow_suspicious_codecs", true, false, "Don't allow to specify meaningless compression codecs"}}}, + {"20.4", {{"validate_polygons", false, true, "Throw exception if polygon is invalid in function pointInPolygon by default instead of returning possibly wrong results"}}}, + {"19.18", {{"enable_scalar_subquery_optimization", false, true, "Prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once"}}}, + {"19.14", {{"any_join_distinct_right_table_keys", true, false, "Disable ANY RIGHT and ANY FULL JOINs by default to avoid inconsistency"}}}, + {"19.12", {{"input_format_defaults_for_omitted_fields", false, true, "Enable calculation of complex default expressions for omitted fields for some input formats, because it should be the expected behaviour"}}}, + {"19.5", {{"max_partitions_per_insert_block", 0, 100, "Add a limit for the number of partitions in one block"}}}, + {"18.12.17", {{"enable_optimize_predicate_expression", 0, 1, "Optimize predicates to subqueries by default"}}}, }; diff --git a/tests/integration/test_dns_cache/test.py b/tests/integration/test_dns_cache/test.py index a6db26c8575..5e120dc42aa 100644 --- a/tests/integration/test_dns_cache/test.py +++ b/tests/integration/test_dns_cache/test.py @@ -32,6 +32,7 @@ node2 = cluster.add_instance( main_configs=["configs/listen_host.xml", "configs/dns_update_long.xml"], with_zookeeper=True, ipv6_address="2001:3984:3989::1:1112", + ipv4_address="10.5.95.11", ) @@ -39,9 +40,6 @@ node2 = cluster.add_instance( def cluster_without_dns_cache_update(): try: cluster.start() - - _fill_nodes([node1, node2], "test_table_drop") - yield cluster except Exception as ex: @@ -59,6 +57,8 @@ def test_ip_change_drop_dns_cache(cluster_without_dns_cache_update): # In this case we should manually set up the static DNS entries on the source host # to exclude resplving addresses automatically added by docker. # We use ipv6 for hosts, but resolved DNS entries may contain an unexpected ipv4 address. + _fill_nodes([node1, node2], "test_table_drop") + node2.set_hosts([("2001:3984:3989::1:1111", "node1")]) # drop DNS cache node2.query("SYSTEM DROP DNS CACHE") @@ -98,6 +98,67 @@ def test_ip_change_drop_dns_cache(cluster_without_dns_cache_update): assert_eq_with_retry(node2, "SELECT count(*) from test_table_drop", "7") +def _render_filter_config(allow_ipv4, allow_ipv6): + config = f""" + + {int(allow_ipv4)} + {int(allow_ipv6)} + + """ + return config + + +@pytest.mark.parametrize( + "allow_ipv4, allow_ipv6", + [ + (True, False), + (False, True), + (False, False), + ], +) +def test_dns_resolver_filter(cluster_without_dns_cache_update, allow_ipv4, allow_ipv6): + host_ipv6 = node2.ipv6_address + host_ipv4 = node2.ipv4_address + + node2.set_hosts( + [ + (host_ipv6, "test_host"), + (host_ipv4, "test_host"), + ] + ) + node2.replace_config( + "/etc/clickhouse-server/config.d/dns_filter.xml", + _render_filter_config(allow_ipv4, allow_ipv6), + ) + + node2.query("SYSTEM DROP DNS CACHE") + node2.query("SYSTEM DROP CONNECTIONS CACHE") + node2.query("SYSTEM RELOAD CONFIG") + + if not allow_ipv4 and not allow_ipv6: + with pytest.raises(QueryRuntimeException): + node4.query("SELECT * FROM remote('lost_host', 'system', 'one')") + else: + node2.query("SELECT * FROM remote('test_host', system, one)") + assert ( + node2.query( + "SELECT ip_address FROM system.dns_cache WHERE hostname='test_host'" + ) + == f"{host_ipv4 if allow_ipv4 else host_ipv6}\n" + ) + + node2.exec_in_container( + [ + "bash", + "-c", + "rm /etc/clickhouse-server/config.d/dns_filter.xml", + ], + privileged=True, + user="root", + ) + node2.query("SYSTEM RELOAD CONFIG") + + node3 = cluster.add_instance( "node3", main_configs=["configs/listen_host.xml"], From 0d5cb9f75a527e90aed18860efbd5ed1f9dcd775 Mon Sep 17 00:00:00 2001 From: MikhailBurdukov Date: Tue, 23 Jul 2024 09:25:02 +0000 Subject: [PATCH 064/180] Review fixes --- src/Common/DNSResolver.cpp | 34 ++++++++++++++--------------- src/Core/SettingsChangesHistory.cpp | 2 +- 2 files changed, 17 insertions(+), 19 deletions(-) diff --git a/src/Common/DNSResolver.cpp b/src/Common/DNSResolver.cpp index 051e6e63091..08111d7f2af 100644 --- a/src/Common/DNSResolver.cpp +++ b/src/Common/DNSResolver.cpp @@ -12,8 +12,7 @@ #include #include #include -#include -#include +#include "Common/MultiVersion.h" #include #include "DNSPTRResolverProvider.h" @@ -198,14 +197,17 @@ struct DNSResolver::AddressFilter { struct DNSFilterSettings { - std::atomic dns_allow_resolve_names_to_ipv4{true}; - std::atomic dns_allow_resolve_names_to_ipv6{true}; + bool dns_allow_resolve_names_to_ipv4{true}; + bool dns_allow_resolve_names_to_ipv6{true}; }; + AddressFilter() : settings(std::make_unique()) {} + void performAddressFiltering(DNSResolver::IPAddresses & addresses) { - bool dns_resolve_ipv4 = settings.dns_allow_resolve_names_to_ipv4; - bool dns_resolve_ipv6 = settings.dns_allow_resolve_names_to_ipv6; + const auto current_settings = settings.get(); + bool dns_resolve_ipv4 = current_settings->dns_allow_resolve_names_to_ipv4; + bool dns_resolve_ipv6 = current_settings->dns_allow_resolve_names_to_ipv6; if (dns_resolve_ipv4 && dns_resolve_ipv6) { @@ -215,24 +217,20 @@ struct DNSResolver::AddressFilter { throw Exception(ErrorCodes::BAD_ARGUMENTS, "DNS can't resolve any address, because dns_resolve_ipv6_interfaces and dns_resolve_ipv4_interfaces both are disabled"); } - addresses.erase( - std::remove_if(addresses.begin(), addresses.end(), - [dns_resolve_ipv6, dns_resolve_ipv4](const Poco::Net::IPAddress& address) - { - return (address.family() == Poco::Net::IPAddress::IPv6 && !dns_resolve_ipv6) - || (address.family() == Poco::Net::IPAddress::IPv4 && !dns_resolve_ipv4); - }), - addresses.end() - ); + + std::erase_if(addresses, [dns_resolve_ipv6, dns_resolve_ipv4](const Poco::Net::IPAddress& address) + { + return (address.family() == Poco::Net::IPAddress::IPv6 && !dns_resolve_ipv6) + || (address.family() == Poco::Net::IPAddress::IPv4 && !dns_resolve_ipv4); + }); } void setSettings(bool dns_allow_resolve_names_to_ipv4_, bool dns_allow_resolve_names_to_ipv6_) { - settings.dns_allow_resolve_names_to_ipv4 = dns_allow_resolve_names_to_ipv4_; - settings.dns_allow_resolve_names_to_ipv6 = dns_allow_resolve_names_to_ipv6_; + settings.set(std::make_unique(dns_allow_resolve_names_to_ipv4_, dns_allow_resolve_names_to_ipv6_)); } - DNSFilterSettings settings; + MultiVersion settings; }; diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index a76e214b1fc..01b9bca795f 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -523,7 +523,7 @@ static std::initializer_list Date: Tue, 23 Jul 2024 12:58:50 +0000 Subject: [PATCH 065/180] Fix test --- src/Core/SettingsChangesHistory.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 01b9bca795f..ac427e2e03e 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -523,9 +523,7 @@ static std::initializer_list Date: Tue, 23 Jul 2024 16:56:29 +0000 Subject: [PATCH 066/180] Fix tidy build --- src/Common/DNSResolver.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/DNSResolver.cpp b/src/Common/DNSResolver.cpp index 08111d7f2af..bbee7d259f0 100644 --- a/src/Common/DNSResolver.cpp +++ b/src/Common/DNSResolver.cpp @@ -203,7 +203,7 @@ struct DNSResolver::AddressFilter AddressFilter() : settings(std::make_unique()) {} - void performAddressFiltering(DNSResolver::IPAddresses & addresses) + void performAddressFiltering(DNSResolver::IPAddresses & addresses) const { const auto current_settings = settings.get(); bool dns_resolve_ipv4 = current_settings->dns_allow_resolve_names_to_ipv4; From 304d01e2c36ae11cd1c703135c493ee885bc3062 Mon Sep 17 00:00:00 2001 From: MikhailBurdukov Date: Mon, 29 Jul 2024 19:18:14 +0000 Subject: [PATCH 067/180] Review fix --- programs/server/Server.cpp | 1 - src/Common/DNSResolver.cpp | 8 ++++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 3126f65ef09..aa7c3d75163 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -1770,7 +1770,6 @@ try DNSResolver::instance().setFilterSettings(new_server_settings.dns_allow_resolve_names_to_ipv4, new_server_settings.dns_allow_resolve_names_to_ipv6); - if (global_context->isServerCompletelyStarted()) CannotAllocateThreadFaultInjector::setFaultProbability(new_server_settings.cannot_allocate_thread_fault_injection_probability); diff --git a/src/Common/DNSResolver.cpp b/src/Common/DNSResolver.cpp index bbee7d259f0..68a8fa7d74c 100644 --- a/src/Common/DNSResolver.cpp +++ b/src/Common/DNSResolver.cpp @@ -225,9 +225,9 @@ struct DNSResolver::AddressFilter }); } - void setSettings(bool dns_allow_resolve_names_to_ipv4_, bool dns_allow_resolve_names_to_ipv6_) + void setSettings(bool dns_allow_resolve_names_to_ipv4, bool dns_allow_resolve_names_to_ipv6) { - settings.set(std::make_unique(dns_allow_resolve_names_to_ipv4_, dns_allow_resolve_names_to_ipv6_)); + settings.set(std::make_unique(dns_allow_resolve_names_to_ipv4, dns_allow_resolve_names_to_ipv6)); } MultiVersion settings; @@ -264,9 +264,9 @@ Poco::Net::IPAddress DNSResolver::resolveHost(const std::string & host) return pickAddress(resolveHostAll(host)); // random order -> random pick } -void DNSResolver::setFilterSettings(bool dns_allow_resolve_names_to_ipv4_, bool dns_allow_resolve_names_to_ipv6_) +void DNSResolver::setFilterSettings(bool dns_allow_resolve_names_to_ipv4, bool dns_allow_resolve_names_to_ipv6) { - addressFilter->setSettings(dns_allow_resolve_names_to_ipv4_, dns_allow_resolve_names_to_ipv6_); + addressFilter->setSettings(dns_allow_resolve_names_to_ipv4, dns_allow_resolve_names_to_ipv6); } DNSResolver::IPAddresses DNSResolver::resolveHostAllInOriginOrder(const std::string & host) From 4143eea587b808086cceb98025906646fa78c96a Mon Sep 17 00:00:00 2001 From: MikhailBurdukov Date: Wed, 31 Jul 2024 08:35:38 +0000 Subject: [PATCH 068/180] Add test to skip parallel --- tests/integration/parallel_skip.json | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/parallel_skip.json b/tests/integration/parallel_skip.json index 99fa626bd1e..6689572aeb7 100644 --- a/tests/integration/parallel_skip.json +++ b/tests/integration/parallel_skip.json @@ -1,6 +1,7 @@ [ "test_dns_cache/test.py::test_dns_cache_update", "test_dns_cache/test.py::test_ip_change_drop_dns_cache", + "test_dns_cache/test.py::test_dns_resolver_filter", "test_dns_cache/test.py::test_ip_change_update_dns_cache", "test_dns_cache/test.py::test_user_access_ip_change[node0]", "test_dns_cache/test.py::test_user_access_ip_change[node1]", From 012ea3cc6d09c50f29fe4d7964aa18ee038c35b2 Mon Sep 17 00:00:00 2001 From: MikhailBurdukov Date: Wed, 31 Jul 2024 13:29:36 +0000 Subject: [PATCH 069/180] Rebase --- src/Core/SettingsChangesHistory.cpp | 259 ----------------------- tests/integration/test_dns_cache/test.py | 138 ++++++------ 2 files changed, 74 insertions(+), 323 deletions(-) diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index ac427e2e03e..0ccbd874a3d 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -500,265 +500,6 @@ static std::initializer_list col >= '2023-01-01' AND col <= '2023-12-31')"}, - {"extract_key_value_pairs_max_pairs_per_row", 0, 0, "Max number of pairs that can be produced by the `extractKeyValuePairs` function. Used as a safeguard against consuming too much memory."}, - {"default_view_definer", "CURRENT_USER", "CURRENT_USER", "Allows to set default `DEFINER` option while creating a view"}, - {"default_materialized_view_sql_security", "DEFINER", "DEFINER", "Allows to set a default value for SQL SECURITY option when creating a materialized view"}, - {"default_normal_view_sql_security", "INVOKER", "INVOKER", "Allows to set default `SQL SECURITY` option while creating a normal view"}, - {"mysql_map_string_to_text_in_show_columns", false, true, "Reduce the configuration effort to connect ClickHouse with BI tools."}, - {"mysql_map_fixed_string_to_text_in_show_columns", false, true, "Reduce the configuration effort to connect ClickHouse with BI tools."}, - }}, - {"24.1", {{"print_pretty_type_names", false, true, "Better user experience."}, - {"input_format_json_read_bools_as_strings", false, true, "Allow to read bools as strings in JSON formats by default"}, - {"output_format_arrow_use_signed_indexes_for_dictionary", false, true, "Use signed indexes type for Arrow dictionaries by default as it's recommended"}, - {"allow_experimental_variant_type", false, false, "Add new experimental Variant type"}, - {"use_variant_as_common_type", false, false, "Allow to use Variant in if/multiIf if there is no common type"}, - {"output_format_arrow_use_64_bit_indexes_for_dictionary", false, false, "Allow to use 64 bit indexes type in Arrow dictionaries"}, - {"parallel_replicas_mark_segment_size", 128, 128, "Add new setting to control segment size in new parallel replicas coordinator implementation"}, - {"ignore_materialized_views_with_dropped_target_table", false, false, "Add new setting to allow to ignore materialized views with dropped target table"}, - {"output_format_compression_level", 3, 3, "Allow to change compression level in the query output"}, - {"output_format_compression_zstd_window_log", 0, 0, "Allow to change zstd window log in the query output when zstd compression is used"}, - {"enable_zstd_qat_codec", false, false, "Add new ZSTD_QAT codec"}, - {"enable_vertical_final", false, true, "Use vertical final by default"}, - {"output_format_arrow_use_64_bit_indexes_for_dictionary", false, false, "Allow to use 64 bit indexes type in Arrow dictionaries"}, - {"max_rows_in_set_to_optimize_join", 100000, 0, "Disable join optimization as it prevents from read in order optimization"}, - {"output_format_pretty_color", true, "auto", "Setting is changed to allow also for auto value, disabling ANSI escapes if output is not a tty"}, - {"function_visible_width_behavior", 0, 1, "We changed the default behavior of `visibleWidth` to be more precise"}, - {"max_estimated_execution_time", 0, 0, "Separate max_execution_time and max_estimated_execution_time"}, - {"iceberg_engine_ignore_schema_evolution", false, false, "Allow to ignore schema evolution in Iceberg table engine"}, - {"optimize_injective_functions_in_group_by", false, true, "Replace injective functions by it's arguments in GROUP BY section in analyzer"}, - {"update_insert_deduplication_token_in_dependent_materialized_views", false, false, "Allow to update insert deduplication token with table identifier during insert in dependent materialized views"}, - {"azure_max_unexpected_write_error_retries", 4, 4, "The maximum number of retries in case of unexpected errors during Azure blob storage write"}, - {"split_parts_ranges_into_intersecting_and_non_intersecting_final", false, true, "Allow to split parts ranges into intersecting and non intersecting during FINAL optimization"}, - {"split_intersecting_parts_ranges_into_layers_final", true, true, "Allow to split intersecting parts ranges into layers during FINAL optimization"}}}, - {"23.12", {{"allow_suspicious_ttl_expressions", true, false, "It is a new setting, and in previous versions the behavior was equivalent to allowing."}, - {"input_format_parquet_allow_missing_columns", false, true, "Allow missing columns in Parquet files by default"}, - {"input_format_orc_allow_missing_columns", false, true, "Allow missing columns in ORC files by default"}, - {"input_format_arrow_allow_missing_columns", false, true, "Allow missing columns in Arrow files by default"}}}, - {"23.11", {{"parsedatetime_parse_without_leading_zeros", false, true, "Improved compatibility with MySQL DATE_FORMAT/STR_TO_DATE"}}}, - {"23.9", {{"optimize_group_by_constant_keys", false, true, "Optimize group by constant keys by default"}, - {"input_format_json_try_infer_named_tuples_from_objects", false, true, "Try to infer named Tuples from JSON objects by default"}, - {"input_format_json_read_numbers_as_strings", false, true, "Allow to read numbers as strings in JSON formats by default"}, - {"input_format_json_read_arrays_as_strings", false, true, "Allow to read arrays as strings in JSON formats by default"}, - {"input_format_json_infer_incomplete_types_as_strings", false, true, "Allow to infer incomplete types as Strings in JSON formats by default"}, - {"input_format_json_try_infer_numbers_from_strings", true, false, "Don't infer numbers from strings in JSON formats by default to prevent possible parsing errors"}, - {"http_write_exception_in_output_format", false, true, "Output valid JSON/XML on exception in HTTP streaming."}}}, - {"23.8", {{"rewrite_count_distinct_if_with_count_distinct_implementation", false, true, "Rewrite countDistinctIf with count_distinct_implementation configuration"}}}, - {"23.7", {{"function_sleep_max_microseconds_per_block", 0, 3000000, "In previous versions, the maximum sleep time of 3 seconds was applied only for `sleep`, but not for `sleepEachRow` function. In the new version, we introduce this setting. If you set compatibility with the previous versions, we will disable the limit altogether."}}}, - {"23.6", {{"http_send_timeout", 180, 30, "3 minutes seems crazy long. Note that this is timeout for a single network write call, not for the whole upload operation."}, - {"http_receive_timeout", 180, 30, "See http_send_timeout."}}}, - {"23.5", {{"input_format_parquet_preserve_order", true, false, "Allow Parquet reader to reorder rows for better parallelism."}, - {"parallelize_output_from_storages", false, true, "Allow parallelism when executing queries that read from file/url/s3/etc. This may reorder rows."}, - {"use_with_fill_by_sorting_prefix", false, true, "Columns preceding WITH FILL columns in ORDER BY clause form sorting prefix. Rows with different values in sorting prefix are filled independently"}, - {"output_format_parquet_compliant_nested_types", false, true, "Change an internal field name in output Parquet file schema."}}}, - {"23.4", {{"allow_suspicious_indices", true, false, "If true, index can defined with identical expressions"}, - {"allow_nonconst_timezone_arguments", true, false, "Allow non-const timezone arguments in certain time-related functions like toTimeZone(), fromUnixTimestamp*(), snowflakeToDateTime*()."}, - {"connect_timeout_with_failover_ms", 50, 1000, "Increase default connect timeout because of async connect"}, - {"connect_timeout_with_failover_secure_ms", 100, 1000, "Increase default secure connect timeout because of async connect"}, - {"hedged_connection_timeout_ms", 100, 50, "Start new connection in hedged requests after 50 ms instead of 100 to correspond with previous connect timeout"}, - {"formatdatetime_f_prints_single_zero", true, false, "Improved compatibility with MySQL DATE_FORMAT()/STR_TO_DATE()"}, - {"formatdatetime_parsedatetime_m_is_month_name", false, true, "Improved compatibility with MySQL DATE_FORMAT/STR_TO_DATE"}}}, - {"23.3", {{"output_format_parquet_version", "1.0", "2.latest", "Use latest Parquet format version for output format"}, - {"input_format_json_ignore_unknown_keys_in_named_tuple", false, true, "Improve parsing JSON objects as named tuples"}, - {"input_format_native_allow_types_conversion", false, true, "Allow types conversion in Native input forma"}, - {"output_format_arrow_compression_method", "none", "lz4_frame", "Use lz4 compression in Arrow output format by default"}, - {"output_format_parquet_compression_method", "snappy", "lz4", "Use lz4 compression in Parquet output format by default"}, - {"output_format_orc_compression_method", "none", "lz4_frame", "Use lz4 compression in ORC output format by default"}, - {"async_query_sending_for_remote", false, true, "Create connections and send query async across shards"}}}, - {"23.2", {{"output_format_parquet_fixed_string_as_fixed_byte_array", false, true, "Use Parquet FIXED_LENGTH_BYTE_ARRAY type for FixedString by default"}, - {"output_format_arrow_fixed_string_as_fixed_byte_array", false, true, "Use Arrow FIXED_SIZE_BINARY type for FixedString by default"}, - {"query_plan_remove_redundant_distinct", false, true, "Remove redundant Distinct step in query plan"}, - {"optimize_duplicate_order_by_and_distinct", true, false, "Remove duplicate ORDER BY and DISTINCT if it's possible"}, - {"insert_keeper_max_retries", 0, 20, "Enable reconnections to Keeper on INSERT, improve reliability"}}}, - {"23.1", {{"input_format_json_read_objects_as_strings", 0, 1, "Enable reading nested json objects as strings while object type is experimental"}, - {"input_format_json_defaults_for_missing_elements_in_named_tuple", false, true, "Allow missing elements in JSON objects while reading named tuples by default"}, - {"input_format_csv_detect_header", false, true, "Detect header in CSV format by default"}, - {"input_format_tsv_detect_header", false, true, "Detect header in TSV format by default"}, - {"input_format_custom_detect_header", false, true, "Detect header in CustomSeparated format by default"}, - {"query_plan_remove_redundant_sorting", false, true, "Remove redundant sorting in query plan. For example, sorting steps related to ORDER BY clauses in subqueries"}}}, - {"22.12", {{"max_size_to_preallocate_for_aggregation", 10'000'000, 100'000'000, "This optimizes performance"}, - {"query_plan_aggregation_in_order", 0, 1, "Enable some refactoring around query plan"}, - {"format_binary_max_string_size", 0, 1_GiB, "Prevent allocating large amount of memory"}}}, - {"22.11", {{"use_structure_from_insertion_table_in_table_functions", 0, 2, "Improve using structure from insertion table in table functions"}}}, - {"22.9", {{"force_grouping_standard_compatibility", false, true, "Make GROUPING function output the same as in SQL standard and other DBMS"}}}, - {"22.7", {{"cross_to_inner_join_rewrite", 1, 2, "Force rewrite comma join to inner"}, - {"enable_positional_arguments", false, true, "Enable positional arguments feature by default"}, - {"format_csv_allow_single_quotes", true, false, "Most tools don't treat single quote in CSV specially, don't do it by default too"}}}, - {"22.6", {{"output_format_json_named_tuples_as_objects", false, true, "Allow to serialize named tuples as JSON objects in JSON formats by default"}, - {"input_format_skip_unknown_fields", false, true, "Optimize reading subset of columns for some input formats"}}}, - {"22.5", {{"memory_overcommit_ratio_denominator", 0, 1073741824, "Enable memory overcommit feature by default"}, - {"memory_overcommit_ratio_denominator_for_user", 0, 1073741824, "Enable memory overcommit feature by default"}}}, - {"22.4", {{"allow_settings_after_format_in_insert", true, false, "Do not allow SETTINGS after FORMAT for INSERT queries because ClickHouse interpret SETTINGS as some values, which is misleading"}}}, - {"22.3", {{"cast_ipv4_ipv6_default_on_conversion_error", true, false, "Make functions cast(value, 'IPv4') and cast(value, 'IPv6') behave same as toIPv4 and toIPv6 functions"}}}, - {"21.12", {{"stream_like_engine_allow_direct_select", true, false, "Do not allow direct select for Kafka/RabbitMQ/FileLog by default"}}}, - {"21.9", {{"output_format_decimal_trailing_zeros", true, false, "Do not output trailing zeros in text representation of Decimal types by default for better looking output"}, - {"use_hedged_requests", false, true, "Enable Hedged Requests feature by default"}}}, - {"21.7", {{"legacy_column_name_of_tuple_literal", true, false, "Add this setting only for compatibility reasons. It makes sense to set to 'true', while doing rolling update of cluster from version lower than 21.7 to higher"}}}, - {"21.5", {{"async_socket_for_remote", false, true, "Fix all problems and turn on asynchronous reads from socket for remote queries by default again"}}}, - {"21.3", {{"async_socket_for_remote", true, false, "Turn off asynchronous reads from socket for remote queries because of some problems"}, - {"optimize_normalize_count_variants", false, true, "Rewrite aggregate functions that semantically equals to count() as count() by default"}, - {"normalize_function_names", false, true, "Normalize function names to their canonical names, this was needed for projection query routing"}}}, - {"21.2", {{"enable_global_with_statement", false, true, "Propagate WITH statements to UNION queries and all subqueries by default"}}}, - {"21.1", {{"insert_quorum_parallel", false, true, "Use parallel quorum inserts by default. It is significantly more convenient to use than sequential quorum inserts"}, - {"input_format_null_as_default", false, true, "Allow to insert NULL as default for input formats by default"}, - {"optimize_on_insert", false, true, "Enable data optimization on INSERT by default for better user experience"}, - {"use_compact_format_in_distributed_parts_names", false, true, "Use compact format for async INSERT into Distributed tables by default"}}}, - {"20.10", {{"format_regexp_escaping_rule", "Escaped", "Raw", "Use Raw as default escaping rule for Regexp format to male the behaviour more like to what users expect"}}}, - {"20.7", {{"show_table_uuid_in_table_create_query_if_not_nil", true, false, "Stop showing UID of the table in its CREATE query for Engine=Atomic"}}}, - {"20.5", {{"input_format_with_names_use_header", false, true, "Enable using header with names for formats with WithNames/WithNamesAndTypes suffixes"}, - {"allow_suspicious_codecs", true, false, "Don't allow to specify meaningless compression codecs"}}}, - {"20.4", {{"validate_polygons", false, true, "Throw exception if polygon is invalid in function pointInPolygon by default instead of returning possibly wrong results"}}}, - {"19.18", {{"enable_scalar_subquery_optimization", false, true, "Prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once"}}}, - {"19.14", {{"any_join_distinct_right_table_keys", true, false, "Disable ANY RIGHT and ANY FULL JOINs by default to avoid inconsistency"}}}, - {"19.12", {{"input_format_defaults_for_omitted_fields", false, true, "Enable calculation of complex default expressions for omitted fields for some input formats, because it should be the expected behaviour"}}}, - {"19.5", {{"max_partitions_per_insert_block", 0, 100, "Add a limit for the number of partitions in one block"}}}, - {"18.12.17", {{"enable_optimize_predicate_expression", 0, 1, "Optimize predicates to subqueries by default"}}}, }; diff --git a/tests/integration/test_dns_cache/test.py b/tests/integration/test_dns_cache/test.py index 5e120dc42aa..36401517429 100644 --- a/tests/integration/test_dns_cache/test.py +++ b/tests/integration/test_dns_cache/test.py @@ -32,7 +32,6 @@ node2 = cluster.add_instance( main_configs=["configs/listen_host.xml", "configs/dns_update_long.xml"], with_zookeeper=True, ipv6_address="2001:3984:3989::1:1112", - ipv4_address="10.5.95.11", ) @@ -40,6 +39,9 @@ node2 = cluster.add_instance( def cluster_without_dns_cache_update(): try: cluster.start() + + _fill_nodes([node1, node2], "test_table_drop") + yield cluster except Exception as ex: @@ -57,8 +59,6 @@ def test_ip_change_drop_dns_cache(cluster_without_dns_cache_update): # In this case we should manually set up the static DNS entries on the source host # to exclude resplving addresses automatically added by docker. # We use ipv6 for hosts, but resolved DNS entries may contain an unexpected ipv4 address. - _fill_nodes([node1, node2], "test_table_drop") - node2.set_hosts([("2001:3984:3989::1:1111", "node1")]) # drop DNS cache node2.query("SYSTEM DROP DNS CACHE") @@ -98,67 +98,6 @@ def test_ip_change_drop_dns_cache(cluster_without_dns_cache_update): assert_eq_with_retry(node2, "SELECT count(*) from test_table_drop", "7") -def _render_filter_config(allow_ipv4, allow_ipv6): - config = f""" - - {int(allow_ipv4)} - {int(allow_ipv6)} - - """ - return config - - -@pytest.mark.parametrize( - "allow_ipv4, allow_ipv6", - [ - (True, False), - (False, True), - (False, False), - ], -) -def test_dns_resolver_filter(cluster_without_dns_cache_update, allow_ipv4, allow_ipv6): - host_ipv6 = node2.ipv6_address - host_ipv4 = node2.ipv4_address - - node2.set_hosts( - [ - (host_ipv6, "test_host"), - (host_ipv4, "test_host"), - ] - ) - node2.replace_config( - "/etc/clickhouse-server/config.d/dns_filter.xml", - _render_filter_config(allow_ipv4, allow_ipv6), - ) - - node2.query("SYSTEM DROP DNS CACHE") - node2.query("SYSTEM DROP CONNECTIONS CACHE") - node2.query("SYSTEM RELOAD CONFIG") - - if not allow_ipv4 and not allow_ipv6: - with pytest.raises(QueryRuntimeException): - node4.query("SELECT * FROM remote('lost_host', 'system', 'one')") - else: - node2.query("SELECT * FROM remote('test_host', system, one)") - assert ( - node2.query( - "SELECT ip_address FROM system.dns_cache WHERE hostname='test_host'" - ) - == f"{host_ipv4 if allow_ipv4 else host_ipv6}\n" - ) - - node2.exec_in_container( - [ - "bash", - "-c", - "rm /etc/clickhouse-server/config.d/dns_filter.xml", - ], - privileged=True, - user="root", - ) - node2.query("SYSTEM RELOAD CONFIG") - - node3 = cluster.add_instance( "node3", main_configs=["configs/listen_host.xml"], @@ -378,3 +317,74 @@ def test_host_is_drop_from_cache_after_consecutive_failures( assert node4.wait_for_log_line( "Cached hosts dropped:.*InvalidHostThatDoesNotExist.*" ) + + +node7 = cluster.add_instance( + "node7", + main_configs=["configs/listen_host.xml", "configs/dns_update_long.xml"], + with_zookeeper=True, + ipv6_address="2001:3984:3989::1:1117", + ipv4_address="10.5.95.17", +) + + +def _render_filter_config(allow_ipv4, allow_ipv6): + config = f""" + + {int(allow_ipv4)} + {int(allow_ipv6)} + + """ + return config + + +@pytest.mark.parametrize( + "allow_ipv4, allow_ipv6", + [ + (True, False), + (False, True), + (False, False), + ], +) +def test_dns_resolver_filter(cluster_without_dns_cache_update, allow_ipv4, allow_ipv6): + node = node7 + host_ipv6 = node.ipv6_address + host_ipv4 = node.ipv4_address + + node.set_hosts( + [ + (host_ipv6, "test_host"), + (host_ipv4, "test_host"), + ] + ) + node.replace_config( + "/etc/clickhouse-server/config.d/dns_filter.xml", + _render_filter_config(allow_ipv4, allow_ipv6), + ) + + node.query("SYSTEM RELOAD CONFIG") + node.query("SYSTEM DROP DNS CACHE") + node.query("SYSTEM DROP CONNECTIONS CACHE") + + if not allow_ipv4 and not allow_ipv6: + with pytest.raises(QueryRuntimeException): + node.query("SELECT * FROM remote('lost_host', 'system', 'one')") + else: + node.query("SELECT * FROM remote('test_host', system, one)") + assert ( + node.query( + "SELECT ip_address FROM system.dns_cache WHERE hostname='test_host'" + ) + == f"{host_ipv4 if allow_ipv4 else host_ipv6}\n" + ) + + node.exec_in_container( + [ + "bash", + "-c", + "rm /etc/clickhouse-server/config.d/dns_filter.xml", + ], + privileged=True, + user="root", + ) + node.query("SYSTEM RELOAD CONFIG") From 725640613b0d1cf47515697b5856a85953b73483 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 7 Aug 2024 22:35:57 +0200 Subject: [PATCH 070/180] Add annotations --- .../0_stateless/00111_shard_external_sort_distributed.sql | 3 ++- .../0_stateless/00376_shard_group_uniq_array_of_int_array.sql | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/00111_shard_external_sort_distributed.sql b/tests/queries/0_stateless/00111_shard_external_sort_distributed.sql index 93efc317bfa..9e06654195d 100644 --- a/tests/queries/0_stateless/00111_shard_external_sort_distributed.sql +++ b/tests/queries/0_stateless/00111_shard_external_sort_distributed.sql @@ -1,4 +1,5 @@ --- Tags: distributed, long +-- Tags: distributed, long, no-flaky-check +-- ^ no-flaky-check - sometimes longer than 600s with ThreadFuzzer. SET max_memory_usage = 150000000; SET max_bytes_before_external_sort = 10000000; diff --git a/tests/queries/0_stateless/00376_shard_group_uniq_array_of_int_array.sql b/tests/queries/0_stateless/00376_shard_group_uniq_array_of_int_array.sql index 24b7f1c30a6..4453c26283c 100644 --- a/tests/queries/0_stateless/00376_shard_group_uniq_array_of_int_array.sql +++ b/tests/queries/0_stateless/00376_shard_group_uniq_array_of_int_array.sql @@ -1,4 +1,4 @@ --- Tags: shard +-- Tags: long SET max_rows_to_read = '55M'; From da6378752fc562f7b8df487f86fe2257215eb96a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 8 Aug 2024 04:07:43 +0200 Subject: [PATCH 071/180] Add annotations --- tests/queries/0_stateless/01304_direct_io_long.sh | 3 ++- tests/queries/0_stateless/02177_issue_31009_pt2.sql.j2 | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01304_direct_io_long.sh b/tests/queries/0_stateless/01304_direct_io_long.sh index 6ab25eebaf7..867c37667fe 100755 --- a/tests/queries/0_stateless/01304_direct_io_long.sh +++ b/tests/queries/0_stateless/01304_direct_io_long.sh @@ -1,5 +1,6 @@ #!/usr/bin/env bash -# Tags: long, no-object-storage-with-slow-build +# Tags: long, no-object-storage-with-slow-build, no-flaky-check +# It can be too long with ThreadFuzzer CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/02177_issue_31009_pt2.sql.j2 b/tests/queries/0_stateless/02177_issue_31009_pt2.sql.j2 index 47940356302..7df77595347 100644 --- a/tests/queries/0_stateless/02177_issue_31009_pt2.sql.j2 +++ b/tests/queries/0_stateless/02177_issue_31009_pt2.sql.j2 @@ -1,4 +1,5 @@ --- Tags: long +-- Tags: long, no-flaky-check +-- It can be too long with ThreadFuzzer DROP TABLE IF EXISTS left; DROP TABLE IF EXISTS right; From 4fac40a3cb4823f0014a2c5324593b6ef8a6b6ac Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 8 Aug 2024 04:41:22 +0200 Subject: [PATCH 072/180] Step back --- programs/client/Client.cpp | 10 ++++++++++ programs/local/LocalServer.cpp | 3 +++ src/Client/ClientBase.h | 3 +++ src/Client/LocalConnection.cpp | 1 + src/Client/LocalConnection.h | 2 ++ src/Client/Suggest.cpp | 4 ---- 6 files changed, 19 insertions(+), 4 deletions(-) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 0f136664de8..1d99d223ee9 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -327,6 +328,7 @@ int Client::main(const std::vector & /*args*/) try { UseSSL use_ssl; + auto & thread_status = MainThreadStatus::getInstance(); setupSignalHandler(); std::cout << std::fixed << std::setprecision(3); @@ -341,6 +343,14 @@ try initTTYBuffer(toProgressOption(config().getString("progress", "default"))); ASTAlterCommand::setFormatAlterCommandsWithParentheses(true); + { + // All that just to set DB::CurrentThread::get().getGlobalContext() + // which is required for client timezone (pushed from server) to work. + auto thread_group = std::make_shared(); + const_cast(thread_group->global_context) = global_context; + thread_status.attachToGroup(thread_group, false); + } + /// Includes delayed_interactive. if (is_interactive) { diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index cb89c6c5510..0d731ed0e14 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -29,8 +29,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -461,6 +463,7 @@ int LocalServer::main(const std::vector & /*args*/) try { UseSSL use_ssl; + thread_status.emplace(); StackTrace::setShowAddresses(server_settings.show_addresses_in_stack_traces); diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index 26deb1eda26..1a23b6b1363 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -296,6 +296,9 @@ protected: Settings cmd_settings; MergeTreeSettings cmd_merge_tree_settings; + /// thread status should be destructed before shared context because it relies on process list. + std::optional thread_status; + ServerConnectionPtr connection; ConnectionParameters connection_parameters; diff --git a/src/Client/LocalConnection.cpp b/src/Client/LocalConnection.cpp index b0a5ef99253..072184e0a66 100644 --- a/src/Client/LocalConnection.cpp +++ b/src/Client/LocalConnection.cpp @@ -128,6 +128,7 @@ void LocalConnection::sendQuery( state->query_id = query_id; state->query = query; + state->query_scope_holder = std::make_unique(query_context); state->stage = QueryProcessingStage::Enum(stage); state->profile_queue = std::make_shared(std::numeric_limits::max()); CurrentThread::attachInternalProfileEventsQueue(state->profile_queue); diff --git a/src/Client/LocalConnection.h b/src/Client/LocalConnection.h index 5cc3d0b30ec..b424c5b5aa3 100644 --- a/src/Client/LocalConnection.h +++ b/src/Client/LocalConnection.h @@ -61,6 +61,8 @@ struct LocalQueryState /// Time after the last check to stop the request and send the progress. Stopwatch after_send_progress; Stopwatch after_send_profile_events; + + std::unique_ptr query_scope_holder; }; diff --git a/src/Client/Suggest.cpp b/src/Client/Suggest.cpp index c1f163939e8..0188ebc8173 100644 --- a/src/Client/Suggest.cpp +++ b/src/Client/Suggest.cpp @@ -96,10 +96,6 @@ void Suggest::load(ContextPtr context, const ConnectionParameters & connection_p loading_thread = std::thread([my_context = Context::createCopy(context), connection_parameters, suggestion_limit, this] { ThreadStatus thread_status; - my_context->makeQueryContext(); - auto group = ThreadGroup::createForQuery(my_context); - CurrentThread::attachToGroup(group); - for (size_t retry = 0; retry < 10; ++retry) { try From ca9bd647fbd97fd36de5e30778c824ae522e03c3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 8 Aug 2024 04:52:18 +0200 Subject: [PATCH 073/180] Simplification --- src/Client/ClientBase.cpp | 43 +++++++-------------------------------- 1 file changed, 7 insertions(+), 36 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 62a008bc88c..a305278fb4d 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -1877,48 +1877,19 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin profile_events.watch.restart(); { - /// Temporarily apply query settings to context. - std::optional old_settings; - SCOPE_EXIT_SAFE({ - if (old_settings) - client_context->setSettings(*old_settings); + /// Temporarily apply query settings to the context. + Settings old_settings = client_context->getSettingsCopy(); + SCOPE_EXIT_SAFE( + { + client_context->setSettings(old_settings); }); - - auto apply_query_settings = [&](const IAST & settings_ast) - { - if (!old_settings) - old_settings.emplace(client_context->getSettingsRef()); - client_context->applySettingsChanges(settings_ast.as()->changes); - client_context->resetSettingsToDefaultValue(settings_ast.as()->default_settings); - }; - - const auto * insert = parsed_query->as(); - if (const auto * select = parsed_query->as(); select && select->settings()) - apply_query_settings(*select->settings()); - else if (const auto * select_with_union = parsed_query->as()) - { - const ASTs & children = select_with_union->list_of_selects->children; - if (!children.empty()) - { - // On the client it is enough to apply settings only for the - // last SELECT, since the only thing that is important to apply - // on the client is format settings. - const auto * last_select = children.back()->as(); - if (last_select && last_select->settings()) - { - apply_query_settings(*last_select->settings()); - } - } - } - else if (const auto * query_with_output = parsed_query->as(); query_with_output && query_with_output->settings_ast) - apply_query_settings(*query_with_output->settings_ast); - else if (insert && insert->settings_ast) - apply_query_settings(*insert->settings_ast); + InterpreterSetQuery::applySettingsFromQuery(parsed_query, client_context); if (!connection->checkConnected(connection_parameters.timeouts)) connect(); ASTPtr input_function; + const auto * insert = parsed_query->as(); if (insert && insert->select) insert->tryFindInputFunction(input_function); From d52d599af4db1c779551b6788a5505e521fe3c31 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 8 Aug 2024 18:34:02 +0200 Subject: [PATCH 074/180] Annotations --- ...ter_skip_virtual_columns_with_non_deterministic_functions.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/03002_filter_skip_virtual_columns_with_non_deterministic_functions.sql b/tests/queries/0_stateless/03002_filter_skip_virtual_columns_with_non_deterministic_functions.sql index 6ef8c5a8656..6714a069246 100644 --- a/tests/queries/0_stateless/03002_filter_skip_virtual_columns_with_non_deterministic_functions.sql +++ b/tests/queries/0_stateless/03002_filter_skip_virtual_columns_with_non_deterministic_functions.sql @@ -1,3 +1,4 @@ +-- Tags: long SET max_rows_to_read = 0; create table test (number UInt64) engine=MergeTree order by number; insert into test select * from numbers(50000000); From 91ff9f40a2cea46d2ddf14628b16a0ddf923a3cc Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 8 Aug 2024 21:04:10 +0200 Subject: [PATCH 075/180] Misc --- tests/queries/0_stateless/01603_read_with_backoff_bug.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/01603_read_with_backoff_bug.sql b/tests/queries/0_stateless/01603_read_with_backoff_bug.sql index 278817b1d48..8a6fa9b7845 100644 --- a/tests/queries/0_stateless/01603_read_with_backoff_bug.sql +++ b/tests/queries/0_stateless/01603_read_with_backoff_bug.sql @@ -4,6 +4,7 @@ set enable_filesystem_cache=0; set enable_filesystem_cache_on_write_operations=0; set max_rows_to_read = '30M'; + drop table if exists t; create table t (x UInt64, s String) engine = MergeTree order by x SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; From a25accdae3cf420beaeb8ca25a9ac32070c397b8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 9 Aug 2024 20:20:09 +0200 Subject: [PATCH 076/180] Fix a test --- programs/client/Client.cpp | 3 +++ src/Client/ClientBase.cpp | 3 --- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 1d99d223ee9..631914dee5c 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -1164,6 +1164,9 @@ void Client::processOptions(const OptionsDescription & options_description, /// (There is no need to copy the context because clickhouse-client has no background tasks so it won't use that context in parallel.) client_context = global_context; initClientContext(); + + /// Allow to pass-through unknown settings to the server. + client_context->getAccessControl().allowAllSettings(); } diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index a00a9499237..473db8e9678 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -689,9 +689,6 @@ void ClientBase::initClientContext() client_context->setQueryKindInitial(); client_context->setQueryKind(query_kind); client_context->setQueryParameters(query_parameters); - - /// Allow to pass-through unknown settings to the server. - client_context->getAccessControl().allowAllSettings(); } bool ClientBase::isRegularFile(int fd) From c716315c3fdfd57719daf0f7f42b786afe6e68af Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 10 Aug 2024 00:20:46 +0200 Subject: [PATCH 077/180] Annotations --- .../0_stateless/00375_shard_group_uniq_array_of_string.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/00375_shard_group_uniq_array_of_string.sql b/tests/queries/0_stateless/00375_shard_group_uniq_array_of_string.sql index 8db91904a6a..c8a243d9b27 100644 --- a/tests/queries/0_stateless/00375_shard_group_uniq_array_of_string.sql +++ b/tests/queries/0_stateless/00375_shard_group_uniq_array_of_string.sql @@ -1,4 +1,4 @@ --- Tags: shard +-- Tags: shard, long DROP TABLE IF EXISTS group_uniq_str; CREATE TABLE group_uniq_str ENGINE = Memory AS SELECT number % 10 as id, toString(intDiv((number%10000), 10)) as v FROM system.numbers LIMIT 10000000; From 375de7ff6523a23fb7b898725a53004f24d047cd Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 11 Aug 2024 07:15:10 +0200 Subject: [PATCH 078/180] ci: add more logs in the functional tests reports Due to settings randomization 4096 is not enough even to show all settings, like here [1]. [1]: https://s3.amazonaws.com/clickhouse-test-reports/68139/c852bd9dbaa317423234d4f15f21d64e59be42b5/stateless_tests_flaky_check__asan_.html Signed-off-by: Azat Khuzhin --- docker/test/util/process_functional_tests_result.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/util/process_functional_tests_result.py b/docker/test/util/process_functional_tests_result.py index aa2ea686c46..ec9e14b1430 100755 --- a/docker/test/util/process_functional_tests_result.py +++ b/docker/test/util/process_functional_tests_result.py @@ -116,7 +116,7 @@ def process_test_log(log_path, broken_tests): test[0], test[1], test[2], - "".join(test[3])[:4096].replace("\t", "\\t").replace("\n", "\\n"), + "".join(test[3])[:8192].replace("\t", "\\t").replace("\n", "\\n"), ] for test in test_results ] From ece707c4436ab65fcb142f0eaae72f7eb2c3d8db Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 10 Aug 2024 19:43:29 +0200 Subject: [PATCH 079/180] Better test for Not-ready Set is passed in system.* tables - system.distribution_queue - system.replication_queue - system.rocksdb - system.databases - system.mutations - test for system.part_moves_between_shards will not be provided since it is a likely deprecated feature and the test requires some code (I've fixed it differently from #66018, but it does not make sense anymore, so I'm submitting only the test) --- ...3223_system_tables_set_not_ready.reference | 5 ++++ .../03223_system_tables_set_not_ready.sql | 30 +++++++++++++++++++ 2 files changed, 35 insertions(+) create mode 100644 tests/queries/0_stateless/03223_system_tables_set_not_ready.reference create mode 100644 tests/queries/0_stateless/03223_system_tables_set_not_ready.sql diff --git a/tests/queries/0_stateless/03223_system_tables_set_not_ready.reference b/tests/queries/0_stateless/03223_system_tables_set_not_ready.reference new file mode 100644 index 00000000000..e39523ed4f5 --- /dev/null +++ b/tests/queries/0_stateless/03223_system_tables_set_not_ready.reference @@ -0,0 +1,5 @@ +system.distribution_queue 1 +system.rocksdb 1 +system.databases 1 +system.mutations 1 +system.replication_queue 1 diff --git a/tests/queries/0_stateless/03223_system_tables_set_not_ready.sql b/tests/queries/0_stateless/03223_system_tables_set_not_ready.sql new file mode 100644 index 00000000000..907fa47143c --- /dev/null +++ b/tests/queries/0_stateless/03223_system_tables_set_not_ready.sql @@ -0,0 +1,30 @@ +-- Tags: no-fasttest +-- Tag no-fasttest -- due to EmbeddedRocksDB + +drop table if exists null; +drop table if exists dist; +create table null as system.one engine=Null; +create table dist as null engine=Distributed(test_cluster_two_shards, currentDatabase(), 'null', rand()); +insert into dist settings prefer_localhost_replica=0 values (1); +select 'system.distribution_queue', count() from system.distribution_queue where exists(select 1) and database = currentDatabase(); + +drop table if exists rocksdb; +create table rocksdb (key Int) engine=EmbeddedRocksDB() primary key key; +insert into rocksdb values (1); +select 'system.rocksdb', count()>0 from system.rocksdb where exists(select 1) and database = currentDatabase(); + +select 'system.databases', count() from system.databases where exists(select 1) and database = currentDatabase(); + +drop table if exists mt; +create table mt (key Int) engine=MergeTree() order by key; +alter table mt delete where 1; +select 'system.mutations', count() from system.mutations where exists(select 1) and database = currentDatabase(); + +drop table if exists rep1; +drop table if exists rep2; +create table rep1 (key Int) engine=ReplicatedMergeTree('/{database}/rep', '{table}') order by key; +create table rep2 (key Int) engine=ReplicatedMergeTree('/{database}/rep', '{table}') order by key; +system stop fetches rep2; +insert into rep1 values (1); +system sync replica rep2 pull; +select 'system.replication_queue', count()>0 from system.replication_queue where exists(select 1) and database = currentDatabase(); From a41c1305887d08f43c02e354bb307f69a16b3fb0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 12 Aug 2024 06:05:44 +0200 Subject: [PATCH 080/180] Update 02675_profile_events_from_query_log_and_client.sh --- .../02675_profile_events_from_query_log_and_client.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02675_profile_events_from_query_log_and_client.sh b/tests/queries/0_stateless/02675_profile_events_from_query_log_and_client.sh index 894b2b61563..ff534a6a2e6 100755 --- a/tests/queries/0_stateless/02675_profile_events_from_query_log_and_client.sh +++ b/tests/queries/0_stateless/02675_profile_events_from_query_log_and_client.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest +# Tags: no-fasttest, no-random-merge-tree-settings # Tag no-fasttest: needs s3 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) From 9c7d9a6a8d96b88c56aaa95b691f2b9bf79cf8d4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 12 Aug 2024 23:57:03 +0200 Subject: [PATCH 081/180] Annotations --- tests/queries/0_stateless/02293_ttest_large_samples.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/02293_ttest_large_samples.sql b/tests/queries/0_stateless/02293_ttest_large_samples.sql index 826bd483fe9..b4687541360 100644 --- a/tests/queries/0_stateless/02293_ttest_large_samples.sql +++ b/tests/queries/0_stateless/02293_ttest_large_samples.sql @@ -1,3 +1,5 @@ +-- Tags: long + SELECT roundBankers(result.1, 5), roundBankers(result.2, 5) FROM ( SELECT studentTTest(sample, variant) as result From 1767ec6b4ca0fc0e8546e705e0d0dff3ffa797cb Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 12 Aug 2024 23:55:01 +0200 Subject: [PATCH 082/180] Debug test --- .../0_stateless/02490_benchmark_max_consecutive_errors.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/02490_benchmark_max_consecutive_errors.sh b/tests/queries/0_stateless/02490_benchmark_max_consecutive_errors.sh index f747b3156a5..df7e9386662 100755 --- a/tests/queries/0_stateless/02490_benchmark_max_consecutive_errors.sh +++ b/tests/queries/0_stateless/02490_benchmark_max_consecutive_errors.sh @@ -11,5 +11,6 @@ if [ "$RES" -eq 10 ] then echo "$RES" else + echo "$RES" cat "${CLICKHOUSE_TMP}/${CLICKHOUSE_DATABASE}.log" fi From 4c043301e6dde6b0c83394d6721e112c9c7bf4ce Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 13 Aug 2024 10:30:31 +0200 Subject: [PATCH 083/180] Avoid ignoring errors of execute_process() (set COMMAND_ERROR_IS_FATAL=ANY) This will fix with issues like this [1]: Aug 12 09:58:44 '/usr/bin/cmake' '--build' '/build/build_docker/native' '--target' 'pre_compressor' Aug 12 09:58:44 sccache: error: Server startup failed: cache storage failed to read: Unexpected (temporary) at stat Aug 12 09:58:45 ninja: build stopped: subcommand failed. Aug 12 09:58:45 -- Configuring done (77.7s) Aug 12 09:58:47 -- Generating done (1.8s) Aug 12 09:58:47 -- Build files have been written to: /build/build_docker So as you can see even if ninja fails it still wrote build files, while it should fail. [1]: https://s3.amazonaws.com/clickhouse-test-reports/64955/0af41e32a5822d25ac3760f1ebb2313557474701/builds/report.html [2]: https://s3.amazonaws.com/clickhouse-builds/PRs/64955/0af41e32a5822d25ac3760f1ebb2313557474701/binary_darwin_aarch64/build_log.log Note, COMMAND_ERROR_IS_FATAL is 3.19+, and the requirement for now is 3.20 Signed-off-by: Azat Khuzhin --- CMakeLists.txt | 12 ++++++-- PreLoad.cmake | 10 +++++-- cmake/freebsd/default_libs.cmake | 12 ++++++-- cmake/linux/default_libs.cmake | 6 +++- cmake/tools.cmake | 6 +++- cmake/utils.cmake | 5 +++- contrib/cctz-cmake/CMakeLists.txt | 4 ++- contrib/google-protobuf-cmake/CMakeLists.txt | 12 ++++++-- contrib/grpc-cmake/CMakeLists.txt | 30 +++++++++++++------ .../completions/CMakeLists.txt | 1 + 10 files changed, 75 insertions(+), 23 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7b4e0484ab1..8e2302e6c52 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -609,7 +609,9 @@ if (NATIVE_BUILD_TARGETS execute_process( COMMAND ${CMAKE_COMMAND} -E make_directory "${NATIVE_BUILD_DIR}" - COMMAND_ECHO STDOUT) + COMMAND_ECHO STDOUT + COMMAND_ERROR_IS_FATAL ANY + ) execute_process( COMMAND ${CMAKE_COMMAND} @@ -621,9 +623,13 @@ if (NATIVE_BUILD_TARGETS "-DENABLE_CLICKHOUSE_SELF_EXTRACTING=${ENABLE_CLICKHOUSE_SELF_EXTRACTING}" ${PROJECT_SOURCE_DIR} WORKING_DIRECTORY "${NATIVE_BUILD_DIR}" - COMMAND_ECHO STDOUT) + COMMAND_ECHO STDOUT + COMMAND_ERROR_IS_FATAL ANY + ) execute_process( COMMAND ${CMAKE_COMMAND} --build "${NATIVE_BUILD_DIR}" --target ${NATIVE_BUILD_TARGETS} - COMMAND_ECHO STDOUT) + COMMAND_ECHO STDOUT + COMMAND_ERROR_IS_FATAL ANY + ) endif () diff --git a/PreLoad.cmake b/PreLoad.cmake index e0fd37b2fd6..92b221c9f63 100644 --- a/PreLoad.cmake +++ b/PreLoad.cmake @@ -51,8 +51,14 @@ if (NOT "$ENV{CFLAGS}" STREQUAL "" endif() # Default toolchain - this is needed to avoid dependency on OS files. -execute_process(COMMAND uname -s OUTPUT_VARIABLE OS) -execute_process(COMMAND uname -m OUTPUT_VARIABLE ARCH) +execute_process(COMMAND uname -s + OUTPUT_VARIABLE OS + COMMAND_ERROR_IS_FATAL ANY +) +execute_process(COMMAND uname -m + OUTPUT_VARIABLE ARCH + COMMAND_ERROR_IS_FATAL ANY +) # By default, prefer clang on Linux # But note, that you still may change the compiler with -DCMAKE_C_COMPILER/-DCMAKE_CXX_COMPILER. diff --git a/cmake/freebsd/default_libs.cmake b/cmake/freebsd/default_libs.cmake index 6bde75f8c9a..3f5b3829877 100644 --- a/cmake/freebsd/default_libs.cmake +++ b/cmake/freebsd/default_libs.cmake @@ -9,10 +9,18 @@ endif () file(GLOB bprefix "/usr/local/llvm${COMPILER_VERSION_MAJOR}/lib/clang/${COMPILER_VERSION_MAJOR}/lib/${system_processor}-portbld-freebsd*/") message(STATUS "-Bprefix: ${bprefix}") -execute_process(COMMAND ${CMAKE_CXX_COMPILER} -Bprefix=${bprefix} --print-file-name=libclang_rt.builtins-${system_processor}.a OUTPUT_VARIABLE BUILTINS_LIBRARY OUTPUT_STRIP_TRAILING_WHITESPACE) +execute_process(COMMAND + ${CMAKE_CXX_COMPILER} -Bprefix=${bprefix} --print-file-name=libclang_rt.builtins-${system_processor}.a + OUTPUT_VARIABLE BUILTINS_LIBRARY + COMMAND_ERROR_IS_FATAL ANY + OUTPUT_STRIP_TRAILING_WHITESPACE) # --print-file-name simply prints what you passed in case of nothing was resolved, so let's try one other possible option if (BUILTINS_LIBRARY STREQUAL "libclang_rt.builtins-${system_processor}.a") - execute_process(COMMAND ${CMAKE_CXX_COMPILER} -Bprefix=${bprefix} --print-file-name=libclang_rt.builtins.a OUTPUT_VARIABLE BUILTINS_LIBRARY OUTPUT_STRIP_TRAILING_WHITESPACE) + execute_process(COMMAND + ${CMAKE_CXX_COMPILER} -Bprefix=${bprefix} --print-file-name=libclang_rt.builtins.a + OUTPUT_VARIABLE BUILTINS_LIBRARY + COMMAND_ERROR_IS_FATAL ANY + OUTPUT_STRIP_TRAILING_WHITESPACE) endif() if (BUILTINS_LIBRARY STREQUAL "libclang_rt.builtins.a") message(FATAL_ERROR "libclang_rt.builtins had not been found") diff --git a/cmake/linux/default_libs.cmake b/cmake/linux/default_libs.cmake index 4a06243243e..51620bc9f33 100644 --- a/cmake/linux/default_libs.cmake +++ b/cmake/linux/default_libs.cmake @@ -5,7 +5,11 @@ set (DEFAULT_LIBS "-nodefaultlibs") # We need builtins from Clang's RT even without libcxx - for ubsan+int128. # See https://bugs.llvm.org/show_bug.cgi?id=16404 -execute_process (COMMAND ${CMAKE_CXX_COMPILER} --target=${CMAKE_CXX_COMPILER_TARGET} --print-libgcc-file-name --rtlib=compiler-rt OUTPUT_VARIABLE BUILTINS_LIBRARY OUTPUT_STRIP_TRAILING_WHITESPACE) +execute_process (COMMAND + ${CMAKE_CXX_COMPILER} --target=${CMAKE_CXX_COMPILER_TARGET} --print-libgcc-file-name --rtlib=compiler-rt + OUTPUT_VARIABLE BUILTINS_LIBRARY + COMMAND_ERROR_IS_FATAL ANY + OUTPUT_STRIP_TRAILING_WHITESPACE) # Apparently, in clang-19, the UBSan support library for C++ was moved out into ubsan_standalone_cxx.a, so we have to include both. if (SANITIZE STREQUAL undefined) diff --git a/cmake/tools.cmake b/cmake/tools.cmake index 7aa5d4c51ce..5c7da54b779 100644 --- a/cmake/tools.cmake +++ b/cmake/tools.cmake @@ -5,7 +5,11 @@ if (NOT CMAKE_CXX_COMPILER_ID MATCHES "Clang") endif () # Print details to output -execute_process(COMMAND ${CMAKE_CXX_COMPILER} --version OUTPUT_VARIABLE COMPILER_SELF_IDENTIFICATION OUTPUT_STRIP_TRAILING_WHITESPACE) +execute_process(COMMAND ${CMAKE_CXX_COMPILER} --version + OUTPUT_VARIABLE COMPILER_SELF_IDENTIFICATION + COMMAND_ERROR_IS_FATAL ANY + OUTPUT_STRIP_TRAILING_WHITESPACE +) message (STATUS "Using compiler:\n${COMPILER_SELF_IDENTIFICATION}") # Require minimum compiler versions diff --git a/cmake/utils.cmake b/cmake/utils.cmake index a318408098a..a99d8e050a8 100644 --- a/cmake/utils.cmake +++ b/cmake/utils.cmake @@ -90,7 +90,10 @@ endfunction() # Function get_cmake_properties returns list of all propreties that cmake supports function(get_cmake_properties outvar) - execute_process(COMMAND cmake --help-property-list OUTPUT_VARIABLE cmake_properties) + execute_process(COMMAND cmake --help-property-list + OUTPUT_VARIABLE cmake_properties + COMMAND_ERROR_IS_FATAL ANY + ) # Convert command output into a CMake list string(REGEX REPLACE ";" "\\\\;" cmake_properties "${cmake_properties}") string(REGEX REPLACE "\n" ";" cmake_properties "${cmake_properties}") diff --git a/contrib/cctz-cmake/CMakeLists.txt b/contrib/cctz-cmake/CMakeLists.txt index 7161f743de1..fadf948b053 100644 --- a/contrib/cctz-cmake/CMakeLists.txt +++ b/contrib/cctz-cmake/CMakeLists.txt @@ -37,7 +37,9 @@ message(STATUS "Packaging with tzdata version: ${TZDATA_VERSION}") execute_process(COMMAND bash -c "cd ${TZDIR} && find * -type f -and ! -name '*.tab' -and ! -name 'localtime' | LC_ALL=C sort | paste -sd ';' -" OUTPUT_STRIP_TRAILING_WHITESPACE - OUTPUT_VARIABLE TIMEZONES) + OUTPUT_VARIABLE TIMEZONES + COMMAND_ERROR_IS_FATAL ANY +) file(APPEND ${TIMEZONES_FILE} "// autogenerated by ClickHouse/contrib/cctz-cmake/CMakeLists.txt\n") file(APPEND ${TIMEZONES_FILE} "#include \n") diff --git a/contrib/google-protobuf-cmake/CMakeLists.txt b/contrib/google-protobuf-cmake/CMakeLists.txt index e44f737cfc3..f1a744f851f 100644 --- a/contrib/google-protobuf-cmake/CMakeLists.txt +++ b/contrib/google-protobuf-cmake/CMakeLists.txt @@ -359,7 +359,9 @@ else () execute_process( COMMAND mkdir -p ${PROTOC_BUILD_DIR} - COMMAND_ECHO STDOUT) + COMMAND_ECHO STDOUT + COMMAND_ERROR_IS_FATAL ANY + ) execute_process( COMMAND ${CMAKE_COMMAND} @@ -375,11 +377,15 @@ else () "-DABSL_ENABLE_INSTALL=0" "${protobuf_source_dir}" WORKING_DIRECTORY "${PROTOC_BUILD_DIR}" - COMMAND_ECHO STDOUT) + COMMAND_ECHO STDOUT + COMMAND_ERROR_IS_FATAL ANY + ) execute_process( COMMAND ${CMAKE_COMMAND} --build "${PROTOC_BUILD_DIR}" - COMMAND_ECHO STDOUT) + COMMAND_ECHO STDOUT + COMMAND_ERROR_IS_FATAL ANY + ) endif () add_executable(protoc IMPORTED GLOBAL) diff --git a/contrib/grpc-cmake/CMakeLists.txt b/contrib/grpc-cmake/CMakeLists.txt index 1c0bf41ff78..975774d1990 100644 --- a/contrib/grpc-cmake/CMakeLists.txt +++ b/contrib/grpc-cmake/CMakeLists.txt @@ -51,8 +51,9 @@ if (NOT CMAKE_HOST_SYSTEM_NAME STREQUAL CMAKE_SYSTEM_NAME set(OPENSSL_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/openssl-cmake") execute_process( - COMMAND mkdir -p ${OPENSSL_BUILD_DIR} - COMMAND_ECHO STDOUT + COMMAND mkdir -p ${OPENSSL_BUILD_DIR} + COMMAND_ECHO STDOUT + COMMAND_ERROR_IS_FATAL ANY ) if (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "amd64|x86_64") @@ -89,15 +90,21 @@ if (NOT CMAKE_HOST_SYSTEM_NAME STREQUAL CMAKE_SYSTEM_NAME "-DClickHouse_SOURCE_DIR=${ClickHouse_SOURCE_DIR}" "${OPENSSL_SOURCE_DIR}" WORKING_DIRECTORY "${OPENSSL_BUILD_DIR}" - COMMAND_ECHO STDOUT) + COMMAND_ECHO STDOUT + COMMAND_ERROR_IS_FATAL ANY + ) execute_process( COMMAND ${CMAKE_COMMAND} --build "${OPENSSL_BUILD_DIR}" - COMMAND_ECHO STDOUT) + COMMAND_ECHO STDOUT + COMMAND_ERROR_IS_FATAL ANY + ) execute_process( COMMAND ${CMAKE_COMMAND} --install "${OPENSSL_BUILD_DIR}" - COMMAND_ECHO STDOUT) + COMMAND_ECHO STDOUT + COMMAND_ERROR_IS_FATAL ANY + ) # It's not important on which file we depend, we just want to specify right order add_library(openssl_for_grpc STATIC IMPORTED GLOBAL) @@ -108,8 +115,9 @@ if (NOT CMAKE_HOST_SYSTEM_NAME STREQUAL CMAKE_SYSTEM_NAME set (GRPC_CPP_PLUGIN_BUILD_DIR "${_gRPC_BINARY_DIR}/build") execute_process( - COMMAND mkdir -p ${GRPC_CPP_PLUGIN_BUILD_DIR} - COMMAND_ECHO STDOUT + COMMAND mkdir -p ${GRPC_CPP_PLUGIN_BUILD_DIR} + COMMAND_ECHO STDOUT + COMMAND_ERROR_IS_FATAL ANY ) set(abseil_source_dir "${ClickHouse_SOURCE_DIR}/contrib/abseil-cpp") @@ -140,11 +148,15 @@ if (NOT CMAKE_HOST_SYSTEM_NAME STREQUAL CMAKE_SYSTEM_NAME "-DgRPC_SSL_PROVIDER=package" "${_gRPC_SOURCE_DIR}" WORKING_DIRECTORY "${GRPC_CPP_PLUGIN_BUILD_DIR}" - COMMAND_ECHO STDOUT) + COMMAND_ECHO STDOUT + COMMAND_ERROR_IS_FATAL ANY + ) execute_process( COMMAND ${CMAKE_COMMAND} --build "${GRPC_CPP_PLUGIN_BUILD_DIR}" - COMMAND_ECHO STDOUT) + COMMAND_ECHO STDOUT + COMMAND_ERROR_IS_FATAL ANY + ) add_executable(grpc_cpp_plugin IMPORTED GLOBAL) set_target_properties (grpc_cpp_plugin PROPERTIES IMPORTED_LOCATION "${GRPC_CPP_PLUGIN_BUILD_DIR}/grpc_cpp_plugin") diff --git a/programs/bash-completion/completions/CMakeLists.txt b/programs/bash-completion/completions/CMakeLists.txt index d364e07ef6e..2e911e81981 100644 --- a/programs/bash-completion/completions/CMakeLists.txt +++ b/programs/bash-completion/completions/CMakeLists.txt @@ -6,6 +6,7 @@ macro(configure_bash_completion) COMMAND ${PKG_CONFIG_BIN} --variable=completionsdir bash-completion OUTPUT_VARIABLE ${out} OUTPUT_STRIP_TRAILING_WHITESPACE + COMMAND_ERROR_IS_FATAL ANY ) endif() string(REPLACE /usr "${CMAKE_INSTALL_PREFIX}" out "${out}") From 69893aaa25e3c459f3480955534e05456e7aaa64 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 13 Aug 2024 16:19:25 +0200 Subject: [PATCH 084/180] Lower memory usage --- docker/test/stateless/run.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index c70cbe1fe45..874095e39dc 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -391,8 +391,8 @@ done # wait for minio to flush its batch if it has any sleep 1 clickhouse-client -q "SYSTEM FLUSH ASYNC INSERT QUEUE" -clickhouse-client -q "SELECT log FROM minio_audit_logs ORDER BY event_time INTO OUTFILE '/test_output/minio_audit_logs.jsonl.zst' FORMAT JSONEachRow" -clickhouse-client -q "SELECT log FROM minio_server_logs ORDER BY event_time INTO OUTFILE '/test_output/minio_server_logs.jsonl.zst' FORMAT JSONEachRow" +clickhouse-client --max_block_size 8192 --max_memory_usage 10G --max_threads 1 -q "SELECT log FROM minio_audit_logs ORDER BY event_time INTO OUTFILE '/test_output/minio_audit_logs.jsonl.zst' FORMAT JSONEachRow" +clickhouse-client --max_block_size 8192 --max_memory_usage 10G --max_threads 1 -q "SELECT log FROM minio_server_logs ORDER BY event_time INTO OUTFILE '/test_output/minio_server_logs.jsonl.zst' FORMAT JSONEachRow" # Stop server so we can safely read data with clickhouse-local. # Why do we read data with clickhouse-local? From da1c98a771c5b6cd74b3e1ba00fd4e01574489e9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 14 Aug 2024 04:54:33 +0200 Subject: [PATCH 085/180] Update the limits --- docker/test/stateless/run.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 874095e39dc..2e6e7bbebe5 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -391,8 +391,8 @@ done # wait for minio to flush its batch if it has any sleep 1 clickhouse-client -q "SYSTEM FLUSH ASYNC INSERT QUEUE" -clickhouse-client --max_block_size 8192 --max_memory_usage 10G --max_threads 1 -q "SELECT log FROM minio_audit_logs ORDER BY event_time INTO OUTFILE '/test_output/minio_audit_logs.jsonl.zst' FORMAT JSONEachRow" -clickhouse-client --max_block_size 8192 --max_memory_usage 10G --max_threads 1 -q "SELECT log FROM minio_server_logs ORDER BY event_time INTO OUTFILE '/test_output/minio_server_logs.jsonl.zst' FORMAT JSONEachRow" +clickhouse-client --max_block_size 8192 --max_memory_usage 10G --max_threads 1 --max_result_bytes 0 --max_result_rows 0 --max_rows_to_read 0 --max_bytes_to_read 0 -q "SELECT log FROM minio_audit_logs ORDER BY event_time INTO OUTFILE '/test_output/minio_audit_logs.jsonl.zst' FORMAT JSONEachRow" +clickhouse-client --max_block_size 8192 --max_memory_usage 10G --max_threads 1 --max_result_bytes 0 --max_result_rows 0 --max_rows_to_read 0 --max_bytes_to_read 0 -q "SELECT log FROM minio_server_logs ORDER BY event_time INTO OUTFILE '/test_output/minio_server_logs.jsonl.zst' FORMAT JSONEachRow" # Stop server so we can safely read data with clickhouse-local. # Why do we read data with clickhouse-local? From 844cdd8937cce17060ea8b54fdfc2428d3015f44 Mon Sep 17 00:00:00 2001 From: Blargian Date: Wed, 14 Aug 2024 20:38:09 +0200 Subject: [PATCH 086/180] update toInterval functions --- .../functions/type-conversion-functions.md | 400 +++++++++++++++++- 1 file changed, 380 insertions(+), 20 deletions(-) diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index 1e618b8cdab..cd6fd9ab839 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -4866,30 +4866,23 @@ Result: └───────┴───────────────┴──────┴──────────────┴──────────────┴──────────────────────┘ ``` -## toInterval(Year\|Quarter\|Month\|Week\|Day\|Hour\|Minute\|Second) +## toIntervalYear -Converts a Number type argument to an [Interval](../data-types/special-data-types/interval.md) data type. +Returns an interval of `n` years of data type [IntervalYear](../data-types/special-data-types/interval.md). **Syntax** ``` sql -toIntervalSecond(number) -toIntervalMinute(number) -toIntervalHour(number) -toIntervalDay(number) -toIntervalWeek(number) -toIntervalMonth(number) -toIntervalQuarter(number) -toIntervalYear(number) +toIntervalYear(n) ``` **Arguments** -- `number` — Duration of interval. Positive integer number. +- `n` — Number of years. Positive integer number. [Int*](../data-types/int-uint.md). **Returned values** -- The value in `Interval` data type. +- Interval of `n` years. [IntervalYear](../data-types/special-data-types/interval.md). **Example** @@ -4898,19 +4891,386 @@ Query: ``` sql WITH toDate('2019-01-01') AS date, - INTERVAL 1 WEEK AS interval_week, - toIntervalWeek(1) AS interval_to_week -SELECT - date + interval_week, - date + interval_to_week; + toIntervalYear(1) AS interval_to_year +SELECT date + interval_to_year ``` Result: ```response -┌─plus(date, interval_week)─┬─plus(date, interval_to_week)─┐ -│ 2019-01-08 │ 2019-01-08 │ -└───────────────────────────┴──────────────────────────────┘ +┌─plus(date, interval_to_year)─┐ +│ 2020-01-01 │ +└──────────────────────────────┘ +``` + +## toIntervalQuarter + +Returns an interval of `n` quarters of data type [IntervalQuarter](../data-types/special-data-types/interval.md). + +**Syntax** + +``` sql +toIntervalQuarter(n) +``` + +**Arguments** + +- `n` — Number of quarters. Positive integer number. [Int*](../data-types/int-uint.md). + +**Returned values** + +- Interval of `n` quarters. [IntervalQuarter](../data-types/special-data-types/interval.md). + +**Example** + +Query: + +``` sql +WITH + toDate('2019-01-01') AS date, + toIntervalQuarter(1) AS interval_to_quarter +SELECT date + interval_to_quarter +``` + +Result: + +```response +┌─plus(date, interval_to_quarter)─┐ +│ 2019-04-01 │ +└─────────────────────────────────┘ +``` + +## toIntervalMonth + +Returns an interval of `n` months of data type [IntervalMonth](../data-types/special-data-types/interval.md). + +**Syntax** + +``` sql +toIntervalMonth(n) +``` + +**Arguments** + +- `n` — Number of m. Positive integer number. [Int*](../data-types/int-uint.md). + +**Returned values** + +- Interval of `n` months. [IntervalMonth](../data-types/special-data-types/interval.md). + +**Example** + +Query: + +``` sql +WITH + toDate('2019-01-01') AS date, + toIntervalMonth(1) AS interval_to_month +SELECT date + interval_to_month +``` + +Result: + +```response +┌─plus(date, interval_to_month)─┐ +│ 2019-02-01 │ +└───────────────────────────────┘ +``` + +## toIntervalWeek + +Returns an interval of `n` weeks of data type [IntervalWeek](../data-types/special-data-types/interval.md). + +**Syntax** + +``` sql +toIntervalWeek(n) +``` + +**Arguments** + +- `n` — Number of weeks. Positive integer number. [Int*](../data-types/int-uint.md). + +**Returned values** + +- Interval of `n` weeks. [IntervalWeek](../data-types/special-data-types/interval.md). + +**Example** + +Query: + +``` sql +WITH + toDate('2019-01-01') AS date, + toIntervalWeek(1) AS interval_to_week +SELECT date + interval_to_week +``` + +Result: + +```response +┌─plus(date, interval_to_week)─┐ +│ 2019-01-08 │ +└──────────────────────────────┘ +``` + +## toIntervalDay + +Returns an interval of `n` days of data type [IntervalDay](../data-types/special-data-types/interval.md). + +**Syntax** + +``` sql +toIntervalDay(n) +``` + +**Arguments** + +- `n` — Number of days. Positive integer number. [Int*](../data-types/int-uint.md). + +**Returned values** + +- Interval of `n` days. [IntervalDay](../data-types/special-data-types/interval.md). + +**Example** + +Query: + +``` sql +WITH + toDate('2019-01-01') AS date, + toIntervalDay(5) AS interval_to_days +SELECT date + interval_to_days +``` + +Result: + +```response +┌─plus(date, interval_to_days)─┐ +│ 2019-01-06 │ +└──────────────────────────────┘ +``` + +## toIntervalHour + +Returns an interval of `n` hours of data type [IntervalHour](../data-types/special-data-types/interval.md). + +**Syntax** + +``` sql +toIntervalHour(n) +``` + +**Arguments** + +- `n` — Number of hours. Positive integer number. [Int*](../data-types/int-uint.md). + +**Returned values** + +- Interval of `n` hours. [IntervalHour](../data-types/special-data-types/interval.md). + +**Example** + +Query: + +``` sql +WITH + toDate('2019-01-01') AS date, + toIntervalHour(12) AS interval_to_hours +SELECT date + interval_to_hours +``` + +Result: + +```response +┌─plus(date, interval_to_hours)─┐ +│ 2019-01-01 12:00:00 │ +└───────────────────────────────┘ +``` + +## toIntervalMinute + +Returns an interval of `n` minutes of data type [IntervalMinute](../data-types/special-data-types/interval.md). + +**Syntax** + +``` sql +toIntervalMinute(n) +``` + +**Arguments** + +- `n` — Number of minutes. Positive integer number. [Int*](../data-types/int-uint.md). + +**Returned values** + +- Interval of `n` minutes. [IntervalMinute](../data-types/special-data-types/interval.md). + +**Example** + +Query: + +``` sql +WITH + toDate('2019-01-01') AS date, + toIntervalMinute(12) AS interval_to_minutes +SELECT date + interval_to_minutes +``` + +Result: + +```response +┌─plus(date, interval_to_minutes)─┐ +│ 2019-01-01 00:12:00 │ +└─────────────────────────────────┘ +``` + +## toIntervalSecond + +Returns an interval of `n` seconds of data type [IntervalSecond](../data-types/special-data-types/interval.md). + +**Syntax** + +``` sql +toIntervalSecond(n) +``` + +**Arguments** + +- `n` — Number of seconds. Positive integer number. [Int*](../data-types/int-uint.md). + +**Returned values** + +- Interval of `n` seconds. [IntervalSecond](../data-types/special-data-types/interval.md). + +**Example** + +Query: + +``` sql +WITH + toDate('2019-01-01') AS date, + toIntervalSecond(30) AS interval_to_seconds +SELECT date + interval_to_seconds +``` + +Result: + +```response +┌─plus(date, interval_to_seconds)─┐ +│ 2019-01-01 00:00:30 │ +└─────────────────────────────────┘ +``` + +## toIntervalMillisecond + +Returns an interval of `n` milliseconds of data type [IntervalMillisecond](../data-types/special-data-types/interval.md). + +**Syntax** + +``` sql +toIntervalMillisecond(n) +``` + +**Arguments** + +- `n` — Number of milliseconds. Positive integer number. [Int*](../data-types/int-uint.md). + +**Returned values** + +- Interval of `n` milliseconds. [IntervalMilliseconds](../data-types/special-data-types/interval.md). + +**Example** + +Query: + +``` sql +WITH + toDateTime('2019-01-01') AS date, + toIntervalMillisecond(30) AS interval_to_milliseconds +SELECT date + interval_to_milliseconds +``` + +Result: + +```response +┌─plus(date, interval_to_milliseconds)─┐ +│ 2019-01-01 00:00:00.030 │ +└──────────────────────────────────────┘ +``` + +## toIntervalMicrosecond + +Returns an interval of `n` microseconds of data type [IntervalMicrosecond](../data-types/special-data-types/interval.md). + +**Syntax** + +``` sql +toIntervalMicrosecond(n) +``` + +**Arguments** + +- `n` — Number of microseconds. Positive integer number. [Int*](../data-types/int-uint.md). + +**Returned values** + +- Interval of `n` microseconds. [IntervalMicrosecond](../data-types/special-data-types/interval.md). + +**Example** + +Query: + +``` sql +WITH + toDateTime('2019-01-01') AS date, + toIntervalMicrosecond(30) AS interval_to_microseconds +SELECT date + interval_to_microseconds +``` + +Result: + +```response +┌─plus(date, interval_to_microseconds)─┐ +│ 2019-01-01 00:00:00.000030 │ +└──────────────────────────────────────┘ +``` + +## toIntervalNanosecond + +Returns an interval of `n` nanoseconds of data type [IntervalNanosecond](../data-types/special-data-types/interval.md). + +**Syntax** + +``` sql +toIntervalNanosecond(n) +``` + +**Arguments** + +- `n` — Number of nanoseconds. Positive integer number. [Int*](../data-types/int-uint.md). + +**Returned values** + +- Interval of `n` nanoseconds. [IntervalNanosecond](../data-types/special-data-types/interval.md). + +**Example** + +Query: + +``` sql +WITH + toDateTime('2019-01-01') AS date, + toIntervalNanosecond(30) AS interval_to_nanoseconds +SELECT date + interval_to_nanoseconds +``` + +Result: + +```response +┌─plus(date, interval_to_nanoseconds)─┐ +│ 2019-01-01 00:00:00.000000030 │ +└─────────────────────────────────────┘ ``` ## parseDateTime From 7bebc448f30a026e41563553788f463243aa18fd Mon Sep 17 00:00:00 2001 From: megao Date: Thu, 15 Aug 2024 10:42:22 +0800 Subject: [PATCH 087/180] fix progress value of view_refreshview --- src/Storages/System/StorageSystemViewRefreshes.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Storages/System/StorageSystemViewRefreshes.cpp b/src/Storages/System/StorageSystemViewRefreshes.cpp index 30539ed6b6a..97065f2b2fb 100644 --- a/src/Storages/System/StorageSystemViewRefreshes.cpp +++ b/src/Storages/System/StorageSystemViewRefreshes.cpp @@ -86,7 +86,8 @@ void StorageSystemViewRefreshes::fillData( res_columns[i++]->insert(refresh.exception_message); res_columns[i++]->insert(refresh.refresh_count); - res_columns[i++]->insert(Float64(refresh.progress.read_rows) / refresh.progress.total_rows_to_read); +// res_columns[i++]->insert(std::min(Float64(refresh.progress.read_rows) / refresh.progress.total_rows_to_read, static_cast(1))); + res_columns[i++]->insert(std::min(Float64(refresh.progress.read_rows) / refresh.progress.total_rows_to_read, 1.0)); res_columns[i++]->insert(refresh.progress.elapsed_ns / 1e9); res_columns[i++]->insert(refresh.progress.read_rows); res_columns[i++]->insert(refresh.progress.read_bytes); From c9bfff3934ba5257dbfd1d43294b27155c6aec30 Mon Sep 17 00:00:00 2001 From: megao Date: Thu, 15 Aug 2024 10:52:17 +0800 Subject: [PATCH 088/180] fix progress value of view_refreshview --- src/Storages/System/StorageSystemViewRefreshes.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Storages/System/StorageSystemViewRefreshes.cpp b/src/Storages/System/StorageSystemViewRefreshes.cpp index 97065f2b2fb..3941c4c39c2 100644 --- a/src/Storages/System/StorageSystemViewRefreshes.cpp +++ b/src/Storages/System/StorageSystemViewRefreshes.cpp @@ -86,7 +86,6 @@ void StorageSystemViewRefreshes::fillData( res_columns[i++]->insert(refresh.exception_message); res_columns[i++]->insert(refresh.refresh_count); -// res_columns[i++]->insert(std::min(Float64(refresh.progress.read_rows) / refresh.progress.total_rows_to_read, static_cast(1))); res_columns[i++]->insert(std::min(Float64(refresh.progress.read_rows) / refresh.progress.total_rows_to_read, 1.0)); res_columns[i++]->insert(refresh.progress.elapsed_ns / 1e9); res_columns[i++]->insert(refresh.progress.read_rows); From 7d01c3131265d0dfae24fbf6ab91c71073573765 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 15 Aug 2024 16:01:13 +0200 Subject: [PATCH 089/180] Delete old code of named collections --- src/Core/PostgreSQL/PoolWithFailover.cpp | 2 +- src/Core/PostgreSQL/PoolWithFailover.h | 9 +- src/Dictionaries/HTTPDictionarySource.cpp | 26 +- src/Dictionaries/MongoDBDictionarySource.cpp | 61 ++-- .../PostgreSQLDictionarySource.cpp | 131 ++++++-- .../ExternalDataSourceConfiguration.cpp | 288 ------------------ .../ExternalDataSourceConfiguration.h | 92 ------ src/Storages/NamedCollectionsHelpers.h | 2 +- src/Storages/StorageExternalDistributed.h | 2 - src/TableFunctions/TableFunctionMongoDB.cpp | 1 - src/TableFunctions/TableFunctionRedis.cpp | 1 - 11 files changed, 170 insertions(+), 445 deletions(-) delete mode 100644 src/Storages/ExternalDataSourceConfiguration.cpp delete mode 100644 src/Storages/ExternalDataSourceConfiguration.h diff --git a/src/Core/PostgreSQL/PoolWithFailover.cpp b/src/Core/PostgreSQL/PoolWithFailover.cpp index 5014564dbe0..054fc3b2226 100644 --- a/src/Core/PostgreSQL/PoolWithFailover.cpp +++ b/src/Core/PostgreSQL/PoolWithFailover.cpp @@ -23,7 +23,7 @@ namespace postgres { PoolWithFailover::PoolWithFailover( - const DB::ExternalDataSourcesConfigurationByPriority & configurations_by_priority, + const ReplicasConfigurationByPriority & configurations_by_priority, size_t pool_size, size_t pool_wait_timeout_, size_t max_tries_, diff --git a/src/Core/PostgreSQL/PoolWithFailover.h b/src/Core/PostgreSQL/PoolWithFailover.h index 502a9a9b7d7..2237c752367 100644 --- a/src/Core/PostgreSQL/PoolWithFailover.h +++ b/src/Core/PostgreSQL/PoolWithFailover.h @@ -8,7 +8,6 @@ #include "ConnectionHolder.h" #include #include -#include #include @@ -20,12 +19,12 @@ namespace postgres class PoolWithFailover { - -using RemoteDescription = std::vector>; - public: + using ReplicasConfigurationByPriority = std::map>; + using RemoteDescription = std::vector>; + PoolWithFailover( - const DB::ExternalDataSourcesConfigurationByPriority & configurations_by_priority, + const ReplicasConfigurationByPriority & configurations_by_priority, size_t pool_size, size_t pool_wait_timeout, size_t max_tries_, diff --git a/src/Dictionaries/HTTPDictionarySource.cpp b/src/Dictionaries/HTTPDictionarySource.cpp index 663c63dd6c6..d6df03b39df 100644 --- a/src/Dictionaries/HTTPDictionarySource.cpp +++ b/src/Dictionaries/HTTPDictionarySource.cpp @@ -8,12 +8,12 @@ #include #include #include -#include #include #include #include "DictionarySourceFactory.h" #include "DictionarySourceHelpers.h" #include "DictionaryStructure.h" +#include #include "registerDictionaries.h" @@ -223,21 +223,23 @@ void registerDictionarySourceHTTP(DictionarySourceFactory & factory) String endpoint; String format; - auto named_collection = created_from_ddl - ? getURLBasedDataSourceConfiguration(config, settings_config_prefix, global_context) - : std::nullopt; + auto named_collection = created_from_ddl ? tryGetNamedCollectionWithOverrides(config, settings_config_prefix, global_context) : nullptr; if (named_collection) { - url = named_collection->configuration.url; - endpoint = named_collection->configuration.endpoint; - format = named_collection->configuration.format; + validateNamedCollection( + *named_collection, + /* required_keys */{}, + /* optional_keys */ValidateKeysMultiset{ + "url", "endpoint", "user", "credentials.user", "password", "credentials.password", "format", "compression_method", "structure", "name"}); - credentials.setUsername(named_collection->configuration.user); - credentials.setPassword(named_collection->configuration.password); + url = named_collection->getOrDefault("url", ""); + endpoint = named_collection->getOrDefault("endpoint", ""); + format = named_collection->getOrDefault("format", ""); - header_entries.reserve(named_collection->configuration.headers.size()); - for (const auto & [key, value] : named_collection->configuration.headers) - header_entries.emplace_back(key, value); + credentials.setUsername(named_collection->getAnyOrDefault({"user", "credentials.user"}, "")); + credentials.setPassword(named_collection->getAnyOrDefault({"password", "credentials.password"}, "")); + + header_entries = getHeadersFromNamedCollection(*named_collection); } else { diff --git a/src/Dictionaries/MongoDBDictionarySource.cpp b/src/Dictionaries/MongoDBDictionarySource.cpp index c30a6f90e44..7bacfdab3d2 100644 --- a/src/Dictionaries/MongoDBDictionarySource.cpp +++ b/src/Dictionaries/MongoDBDictionarySource.cpp @@ -1,15 +1,12 @@ #include "MongoDBDictionarySource.h" #include "DictionarySourceFactory.h" #include "DictionaryStructure.h" -#include #include +#include namespace DB { -static const std::unordered_set dictionary_allowed_keys = { - "host", "port", "user", "password", "db", "database", "uri", "collection", "name", "method", "options"}; - void registerDictionarySourceMongoDB(DictionarySourceFactory & factory) { auto create_mongo_db_dictionary = []( @@ -22,35 +19,53 @@ void registerDictionarySourceMongoDB(DictionarySourceFactory & factory) bool created_from_ddl) { const auto config_prefix = root_config_prefix + ".mongodb"; - ExternalDataSourceConfiguration configuration; - auto has_config_key = [](const String & key) { return dictionary_allowed_keys.contains(key); }; - auto named_collection = getExternalDataSourceConfiguration(config, config_prefix, context, has_config_key); + auto named_collection = created_from_ddl ? tryGetNamedCollectionWithOverrides(config, config_prefix, context) : nullptr; + + String host, username, password, database, method, options, collection; + UInt16 port; if (named_collection) { - configuration = named_collection->configuration; + validateNamedCollection( + *named_collection, + /* required_keys */{"collection"}, + /* optional_keys */ValidateKeysMultiset{ + "host", "port", "user", "password", "db", "database", "uri", "name", "method", "options"}); + + host = named_collection->getOrDefault("host", ""); + port = static_cast(named_collection->getOrDefault("port", 0)); + username = named_collection->getOrDefault("user", ""); + password = named_collection->getOrDefault("password", ""); + database = named_collection->getAnyOrDefault({"db", "database"}, ""); + method = named_collection->getOrDefault("method", ""); + collection = named_collection->getOrDefault("collection", ""); + options = named_collection->getOrDefault("options", ""); } else { - configuration.host = config.getString(config_prefix + ".host", ""); - configuration.port = config.getUInt(config_prefix + ".port", 0); - configuration.username = config.getString(config_prefix + ".user", ""); - configuration.password = config.getString(config_prefix + ".password", ""); - configuration.database = config.getString(config_prefix + ".db", ""); + host = config.getString(config_prefix + ".host", ""); + port = config.getUInt(config_prefix + ".port", 0); + username = config.getString(config_prefix + ".user", ""); + password = config.getString(config_prefix + ".password", ""); + database = config.getString(config_prefix + ".db", ""); + method = config.getString(config_prefix + ".method", ""); + collection = config.getString(config_prefix + ".collection"); + options = config.getString(config_prefix + ".options", ""); } if (created_from_ddl) - context->getRemoteHostFilter().checkHostAndPort(configuration.host, toString(configuration.port)); + context->getRemoteHostFilter().checkHostAndPort(host, toString(port)); - return std::make_unique(dict_struct, + return std::make_unique( + dict_struct, config.getString(config_prefix + ".uri", ""), - configuration.host, - configuration.port, - configuration.username, - configuration.password, - config.getString(config_prefix + ".method", ""), - configuration.database, - config.getString(config_prefix + ".collection"), - config.getString(config_prefix + ".options", ""), + host, + port, + username, + password, + method, + database, + collection, + options, sample_block); }; diff --git a/src/Dictionaries/PostgreSQLDictionarySource.cpp b/src/Dictionaries/PostgreSQLDictionarySource.cpp index f62a9a009d8..fd026a97cd4 100644 --- a/src/Dictionaries/PostgreSQLDictionarySource.cpp +++ b/src/Dictionaries/PostgreSQLDictionarySource.cpp @@ -13,7 +13,7 @@ #include "readInvalidateQuery.h" #include #include -#include +#include #include #endif @@ -30,7 +30,7 @@ namespace ErrorCodes static const UInt64 max_block_size = 8192; -static const std::unordered_set dictionary_allowed_keys = { +static const ValidateKeysMultiset dictionary_allowed_keys = { "host", "port", "user", "password", "db", "database", "table", "schema", "update_field", "update_lag", "invalidate_query", "query", "where", "name", "priority"}; @@ -179,6 +179,19 @@ std::string PostgreSQLDictionarySource::toString() const #endif +static void validateConfigKeys( + const Poco::Util::AbstractConfiguration & dict_config, const String & config_prefix) +{ + Poco::Util::AbstractConfiguration::Keys config_keys; + dict_config.keys(config_prefix, config_keys); + for (const auto & config_key : config_keys) + { + if (dictionary_allowed_keys.contains(config_key) || startsWith(config_key, "replica")) + continue; + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected key `{}` in dictionary source configuration", config_key); + } +} + void registerDictionarySourcePostgreSQL(DictionarySourceFactory & factory) { auto create_table_source = [=](const DictionaryStructure & dict_struct, @@ -191,38 +204,118 @@ void registerDictionarySourcePostgreSQL(DictionarySourceFactory & factory) { #if USE_LIBPQXX const auto settings_config_prefix = config_prefix + ".postgresql"; - auto has_config_key = [](const String & key) { return dictionary_allowed_keys.contains(key) || key.starts_with("replica"); }; - auto configuration = getExternalDataSourceConfigurationByPriority(config, settings_config_prefix, context, has_config_key); const auto & settings = context->getSettingsRef(); + std::optional dictionary_configuration; + String database, schema, table; + postgres::PoolWithFailover::ReplicasConfigurationByPriority replicas_by_priority; + + auto named_collection = created_from_ddl ? tryGetNamedCollectionWithOverrides(config, settings_config_prefix, context) : nullptr; + if (named_collection) + { + validateNamedCollection>(*named_collection, {}, dictionary_allowed_keys); + + StoragePostgreSQL::Configuration common_configuration; + common_configuration.host = named_collection->getOrDefault("host", ""); + common_configuration.port = named_collection->getOrDefault("port", 0); + common_configuration.username = named_collection->getOrDefault("user", ""); + common_configuration.password = named_collection->getOrDefault("password", ""); + common_configuration.database = named_collection->getAnyOrDefault({"database", "db"}, ""); + common_configuration.schema = named_collection->getOrDefault("schema", ""); + common_configuration.table = named_collection->getOrDefault("table", ""); + + dictionary_configuration.emplace(PostgreSQLDictionarySource::Configuration{ + .db = common_configuration.database, + .schema = common_configuration.schema, + .table = common_configuration.table, + .query = named_collection->getOrDefault("query", ""), + .where = named_collection->getOrDefault("where", ""), + .invalidate_query = named_collection->getOrDefault("invalidate_query", ""), + .update_field = named_collection->getOrDefault("update_field", ""), + .update_lag = named_collection->getOrDefault("update_lag", 1), + }); + + replicas_by_priority[0].emplace_back(common_configuration); + } + else + { + validateConfigKeys(config, settings_config_prefix); + + StoragePostgreSQL::Configuration common_configuration; + common_configuration.host = config.getString(settings_config_prefix + ".host", ""); + common_configuration.port = config.getUInt(settings_config_prefix + ".port", 0); + common_configuration.username = config.getString(settings_config_prefix + ".user", ""); + common_configuration.password = config.getString(settings_config_prefix + ".password", ""); + common_configuration.database = config.getString(fmt::format("{}.database", settings_config_prefix), config.getString(fmt::format("{}.db", settings_config_prefix), "")); + common_configuration.schema = config.getString(fmt::format("{}.schema", settings_config_prefix), ""); + common_configuration.table = config.getString(fmt::format("{}.table", settings_config_prefix), ""); + + dictionary_configuration.emplace(PostgreSQLDictionarySource::Configuration + { + .db = common_configuration.database, + .schema = common_configuration.schema, + .table = common_configuration.table, + .query = config.getString(fmt::format("{}.query", settings_config_prefix), ""), + .where = config.getString(fmt::format("{}.where", settings_config_prefix), ""), + .invalidate_query = config.getString(fmt::format("{}.invalidate_query", settings_config_prefix), ""), + .update_field = config.getString(fmt::format("{}.update_field", settings_config_prefix), ""), + .update_lag = config.getUInt64(fmt::format("{}.update_lag", settings_config_prefix), 1) + }); + + + if (config.has(settings_config_prefix + ".replica")) + { + Poco::Util::AbstractConfiguration::Keys config_keys; + config.keys(settings_config_prefix, config_keys); + + for (const auto & config_key : config_keys) + { + if (config_key.starts_with("replica")) + { + String replica_name = settings_config_prefix + "." + config_key; + StoragePostgreSQL::Configuration replica_configuration{common_configuration}; + + size_t priority = config.getInt(replica_name + ".priority", 0); + replica_configuration.host = config.getString(replica_name + ".host", common_configuration.host); + replica_configuration.port = config.getUInt(replica_name + ".port", common_configuration.port); + replica_configuration.username = config.getString(replica_name + ".user", common_configuration.username); + replica_configuration.password = config.getString(replica_name + ".password", common_configuration.password); + + if (replica_configuration.host.empty() || replica_configuration.port == 0 + || replica_configuration.username.empty() || replica_configuration.password.empty()) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Named collection of connection parameters is missing some " + "of the parameters and no other dictionary parameters are added"); + } + + replicas_by_priority[priority].emplace_back(replica_configuration); + } + } + } + else + { + replicas_by_priority[0].emplace_back(common_configuration); + } + } if (created_from_ddl) { - for (const auto & replicas : configuration.replicas_configurations) - for (const auto & replica : replicas.second) + for (const auto & [_, replicas] : replicas_by_priority) + for (const auto & replica : replicas) context->getRemoteHostFilter().checkHostAndPort(replica.host, toString(replica.port)); } + auto pool = std::make_shared( - configuration.replicas_configurations, + replicas_by_priority, settings.postgresql_connection_pool_size, settings.postgresql_connection_pool_wait_timeout, settings.postgresql_connection_pool_retries, settings.postgresql_connection_pool_auto_close_connection, settings.postgresql_connection_attempt_timeout); - PostgreSQLDictionarySource::Configuration dictionary_configuration - { - .db = configuration.database, - .schema = configuration.schema, - .table = configuration.table, - .query = config.getString(fmt::format("{}.query", settings_config_prefix), ""), - .where = config.getString(fmt::format("{}.where", settings_config_prefix), ""), - .invalidate_query = config.getString(fmt::format("{}.invalidate_query", settings_config_prefix), ""), - .update_field = config.getString(fmt::format("{}.update_field", settings_config_prefix), ""), - .update_lag = config.getUInt64(fmt::format("{}.update_lag", settings_config_prefix), 1) - }; - return std::make_unique(dict_struct, dictionary_configuration, pool, sample_block); + return std::make_unique(dict_struct, dictionary_configuration.value(), pool, sample_block); #else (void)dict_struct; (void)config; diff --git a/src/Storages/ExternalDataSourceConfiguration.cpp b/src/Storages/ExternalDataSourceConfiguration.cpp deleted file mode 100644 index 41979f8d91c..00000000000 --- a/src/Storages/ExternalDataSourceConfiguration.cpp +++ /dev/null @@ -1,288 +0,0 @@ -#include "ExternalDataSourceConfiguration.h" - -#include -#include -#include -#include -#include -#include -#include -#include - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} - -IMPLEMENT_SETTINGS_TRAITS(EmptySettingsTraits, EMPTY_SETTINGS) - -static const std::unordered_set dictionary_allowed_keys = { - "host", "port", "user", "password", "quota_key", "db", - "database", "table", "schema", "replica", - "update_field", "update_lag", "invalidate_query", "query", - "where", "name", "secure", "uri", "collection"}; - - -template -SettingsChanges getSettingsChangesFromConfig( - const BaseSettings & settings, const Poco::Util::AbstractConfiguration & config, const String & config_prefix) -{ - SettingsChanges config_settings; - for (const auto & setting : settings.all()) - { - const auto & setting_name = setting.getName(); - auto setting_value = config.getString(config_prefix + '.' + setting_name, ""); - if (!setting_value.empty()) - config_settings.emplace_back(setting_name, setting_value); - } - return config_settings; -} - - -String ExternalDataSourceConfiguration::toString() const -{ - WriteBufferFromOwnString configuration_info; - configuration_info << "username: " << username << "\t"; - if (addresses.empty()) - { - configuration_info << "host: " << host << "\t"; - configuration_info << "port: " << port << "\t"; - } - else - { - for (const auto & [replica_host, replica_port] : addresses) - { - configuration_info << "host: " << replica_host << "\t"; - configuration_info << "port: " << replica_port << "\t"; - } - } - return configuration_info.str(); -} - - -void ExternalDataSourceConfiguration::set(const ExternalDataSourceConfiguration & conf) -{ - host = conf.host; - port = conf.port; - username = conf.username; - password = conf.password; - quota_key = conf.quota_key; - database = conf.database; - table = conf.table; - schema = conf.schema; - addresses = conf.addresses; - addresses_expr = conf.addresses_expr; -} - - -static void validateConfigKeys( - const Poco::Util::AbstractConfiguration & dict_config, const String & config_prefix, HasConfigKeyFunc has_config_key_func) -{ - Poco::Util::AbstractConfiguration::Keys config_keys; - dict_config.keys(config_prefix, config_keys); - for (const auto & config_key : config_keys) - { - if (!has_config_key_func(config_key)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected key `{}` in dictionary source configuration", config_key); - } -} - -template -std::optional getExternalDataSourceConfiguration( - const Poco::Util::AbstractConfiguration & dict_config, const String & dict_config_prefix, - ContextPtr context, HasConfigKeyFunc has_config_key, const BaseSettings & settings) -{ - validateConfigKeys(dict_config, dict_config_prefix, has_config_key); - ExternalDataSourceConfiguration configuration; - - auto collection_name = dict_config.getString(dict_config_prefix + ".name", ""); - if (!collection_name.empty()) - { - const auto & config = context->getConfigRef(); - const auto & collection_prefix = fmt::format("named_collections.{}", collection_name); - validateConfigKeys(dict_config, collection_prefix, has_config_key); - auto config_settings = getSettingsChangesFromConfig(settings, config, collection_prefix); - auto dict_settings = getSettingsChangesFromConfig(settings, dict_config, dict_config_prefix); - /// dictionary config settings override collection settings. - config_settings.insert(config_settings.end(), dict_settings.begin(), dict_settings.end()); - - if (!config.has(collection_prefix)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no collection named `{}` in config", collection_name); - - configuration.host = dict_config.getString(dict_config_prefix + ".host", config.getString(collection_prefix + ".host", "")); - configuration.port = dict_config.getInt(dict_config_prefix + ".port", config.getUInt(collection_prefix + ".port", 0)); - configuration.username = dict_config.getString(dict_config_prefix + ".user", config.getString(collection_prefix + ".user", "")); - configuration.password = dict_config.getString(dict_config_prefix + ".password", config.getString(collection_prefix + ".password", "")); - configuration.quota_key = dict_config.getString(dict_config_prefix + ".quota_key", config.getString(collection_prefix + ".quota_key", "")); - configuration.database = dict_config.getString(dict_config_prefix + ".db", config.getString(dict_config_prefix + ".database", - config.getString(collection_prefix + ".db", config.getString(collection_prefix + ".database", "")))); - configuration.table = dict_config.getString(dict_config_prefix + ".table", config.getString(collection_prefix + ".table", "")); - configuration.schema = dict_config.getString(dict_config_prefix + ".schema", config.getString(collection_prefix + ".schema", "")); - - if (configuration.host.empty() || configuration.port == 0 || configuration.username.empty() || configuration.table.empty()) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Named collection of connection parameters is missing some " - "of the parameters and dictionary parameters are not added"); - } - return ExternalDataSourceInfo{.configuration = configuration, .settings_changes = config_settings}; - } - return std::nullopt; -} - -std::optional getURLBasedDataSourceConfiguration( - const Poco::Util::AbstractConfiguration & dict_config, const String & dict_config_prefix, ContextPtr context) -{ - URLBasedDataSourceConfiguration configuration; - auto collection_name = dict_config.getString(dict_config_prefix + ".name", ""); - if (!collection_name.empty()) - { - const auto & config = context->getConfigRef(); - const auto & collection_prefix = fmt::format("named_collections.{}", collection_name); - - if (!config.has(collection_prefix)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no collection named `{}` in config", collection_name); - - configuration.url = - dict_config.getString(dict_config_prefix + ".url", config.getString(collection_prefix + ".url", "")); - configuration.endpoint = - dict_config.getString(dict_config_prefix + ".endpoint", config.getString(collection_prefix + ".endpoint", "")); - configuration.format = - dict_config.getString(dict_config_prefix + ".format", config.getString(collection_prefix + ".format", "")); - configuration.compression_method = - dict_config.getString(dict_config_prefix + ".compression", config.getString(collection_prefix + ".compression_method", "")); - configuration.structure = - dict_config.getString(dict_config_prefix + ".structure", config.getString(collection_prefix + ".structure", "")); - configuration.user = - dict_config.getString(dict_config_prefix + ".credentials.user", config.getString(collection_prefix + ".credentials.user", "")); - configuration.password = - dict_config.getString(dict_config_prefix + ".credentials.password", config.getString(collection_prefix + ".credentials.password", "")); - - String headers_prefix; - const Poco::Util::AbstractConfiguration *headers_config = nullptr; - if (dict_config.has(dict_config_prefix + ".headers")) - { - headers_prefix = dict_config_prefix + ".headers"; - headers_config = &dict_config; - } - else - { - headers_prefix = collection_prefix + ".headers"; - headers_config = &config; - } - - if (headers_config) - { - Poco::Util::AbstractConfiguration::Keys header_keys; - headers_config->keys(headers_prefix, header_keys); - headers_prefix += "."; - for (const auto & header : header_keys) - { - const auto header_prefix = headers_prefix + header; - configuration.headers.emplace_back( - headers_config->getString(header_prefix + ".name"), - headers_config->getString(header_prefix + ".value")); - } - } - - return URLBasedDataSourceConfig{ .configuration = configuration }; - } - - return std::nullopt; -} - -ExternalDataSourcesByPriority getExternalDataSourceConfigurationByPriority( - const Poco::Util::AbstractConfiguration & dict_config, const String & dict_config_prefix, ContextPtr context, HasConfigKeyFunc has_config_key) -{ - validateConfigKeys(dict_config, dict_config_prefix, has_config_key); - ExternalDataSourceConfiguration common_configuration; - - auto named_collection = getExternalDataSourceConfiguration(dict_config, dict_config_prefix, context, has_config_key); - if (named_collection) - { - common_configuration = named_collection->configuration; - } - else - { - common_configuration.host = dict_config.getString(dict_config_prefix + ".host", ""); - common_configuration.port = dict_config.getUInt(dict_config_prefix + ".port", 0); - common_configuration.username = dict_config.getString(dict_config_prefix + ".user", ""); - common_configuration.password = dict_config.getString(dict_config_prefix + ".password", ""); - common_configuration.quota_key = dict_config.getString(dict_config_prefix + ".quota_key", ""); - common_configuration.database = dict_config.getString(dict_config_prefix + ".db", dict_config.getString(dict_config_prefix + ".database", "")); - common_configuration.table = dict_config.getString(fmt::format("{}.table", dict_config_prefix), ""); - common_configuration.schema = dict_config.getString(fmt::format("{}.schema", dict_config_prefix), ""); - } - - ExternalDataSourcesByPriority configuration - { - .database = common_configuration.database, - .table = common_configuration.table, - .schema = common_configuration.schema, - .replicas_configurations = {} - }; - - if (dict_config.has(dict_config_prefix + ".replica")) - { - Poco::Util::AbstractConfiguration::Keys config_keys; - dict_config.keys(dict_config_prefix, config_keys); - - for (const auto & config_key : config_keys) - { - if (config_key.starts_with("replica")) - { - ExternalDataSourceConfiguration replica_configuration(common_configuration); - String replica_name = dict_config_prefix + "." + config_key; - validateConfigKeys(dict_config, replica_name, has_config_key); - - size_t priority = dict_config.getInt(replica_name + ".priority", 0); - replica_configuration.host = dict_config.getString(replica_name + ".host", common_configuration.host); - replica_configuration.port = dict_config.getUInt(replica_name + ".port", common_configuration.port); - replica_configuration.username = dict_config.getString(replica_name + ".user", common_configuration.username); - replica_configuration.password = dict_config.getString(replica_name + ".password", common_configuration.password); - replica_configuration.quota_key = dict_config.getString(replica_name + ".quota_key", common_configuration.quota_key); - - if (replica_configuration.host.empty() || replica_configuration.port == 0 - || replica_configuration.username.empty() || replica_configuration.password.empty()) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Named collection of connection parameters is missing some " - "of the parameters and no other dictionary parameters are added"); - } - - configuration.replicas_configurations[priority].emplace_back(replica_configuration); - } - } - } - else - { - configuration.replicas_configurations[0].emplace_back(common_configuration); - } - - return configuration; -} - - -void URLBasedDataSourceConfiguration::set(const URLBasedDataSourceConfiguration & conf) -{ - url = conf.url; - format = conf.format; - compression_method = conf.compression_method; - structure = conf.structure; - http_method = conf.http_method; - headers = conf.headers; -} - -template -std::optional getExternalDataSourceConfiguration( - const Poco::Util::AbstractConfiguration & dict_config, const String & dict_config_prefix, - ContextPtr context, HasConfigKeyFunc has_config_key, const BaseSettings & settings); - -template -SettingsChanges getSettingsChangesFromConfig( - const BaseSettings & settings, const Poco::Util::AbstractConfiguration & config, const String & config_prefix); - -} diff --git a/src/Storages/ExternalDataSourceConfiguration.h b/src/Storages/ExternalDataSourceConfiguration.h deleted file mode 100644 index c703c9ce999..00000000000 --- a/src/Storages/ExternalDataSourceConfiguration.h +++ /dev/null @@ -1,92 +0,0 @@ -#pragma once - -#include -#include -#include -#include - - -namespace DB -{ - -#define EMPTY_SETTINGS(M, ALIAS) -DECLARE_SETTINGS_TRAITS(EmptySettingsTraits, EMPTY_SETTINGS) - -struct EmptySettings : public BaseSettings {}; - -struct ExternalDataSourceConfiguration -{ - String host; - UInt16 port = 0; - String username = "default"; - String password; - String quota_key; - String database; - String table; - String schema; - - std::vector> addresses; /// Failover replicas. - String addresses_expr; - - String toString() const; - - void set(const ExternalDataSourceConfiguration & conf); -}; - - -using StorageSpecificArgs = std::vector>; - -struct ExternalDataSourceInfo -{ - ExternalDataSourceConfiguration configuration; - SettingsChanges settings_changes; -}; - -using HasConfigKeyFunc = std::function; - -template -std::optional getExternalDataSourceConfiguration( - const Poco::Util::AbstractConfiguration & dict_config, const String & dict_config_prefix, - ContextPtr context, HasConfigKeyFunc has_config_key, const BaseSettings & settings = {}); - - -/// Highest priority is 0, the bigger the number in map, the less the priority. -using ExternalDataSourcesConfigurationByPriority = std::map>; - -struct ExternalDataSourcesByPriority -{ - String database; - String table; - String schema; - ExternalDataSourcesConfigurationByPriority replicas_configurations; -}; - -ExternalDataSourcesByPriority -getExternalDataSourceConfigurationByPriority(const Poco::Util::AbstractConfiguration & dict_config, const String & dict_config_prefix, ContextPtr context, HasConfigKeyFunc has_config_key); - -struct URLBasedDataSourceConfiguration -{ - String url; - String endpoint; - String format = "auto"; - String compression_method = "auto"; - String structure = "auto"; - - String user; - String password; - - HTTPHeaderEntries headers; - String http_method; - - void set(const URLBasedDataSourceConfiguration & conf); -}; - -struct URLBasedDataSourceConfig -{ - URLBasedDataSourceConfiguration configuration; -}; - -std::optional getURLBasedDataSourceConfiguration( - const Poco::Util::AbstractConfiguration & dict_config, const String & dict_config_prefix, ContextPtr context); - -} diff --git a/src/Storages/NamedCollectionsHelpers.h b/src/Storages/NamedCollectionsHelpers.h index f444a581eb6..bf2da7235a2 100644 --- a/src/Storages/NamedCollectionsHelpers.h +++ b/src/Storages/NamedCollectionsHelpers.h @@ -133,7 +133,7 @@ void validateNamedCollection( { throw Exception( ErrorCodes::BAD_ARGUMENTS, - "Unexpected key {} in named collection. Required keys: {}, optional keys: {}", + "Unexpected key `{}` in named collection. Required keys: {}, optional keys: {}", backQuoteIfNeed(key), fmt::join(required_keys, ", "), fmt::join(optional_keys, ", ")); } } diff --git a/src/Storages/StorageExternalDistributed.h b/src/Storages/StorageExternalDistributed.h index c4d37c3e5cc..56c7fe86f34 100644 --- a/src/Storages/StorageExternalDistributed.h +++ b/src/Storages/StorageExternalDistributed.h @@ -8,8 +8,6 @@ namespace DB { -struct ExternalDataSourceConfiguration; - /// Storages MySQL and PostgreSQL use ConnectionPoolWithFailover and support multiple replicas. /// This class unites multiple storages with replicas into multiple shards with replicas. /// A query to external database is passed to one replica on each shard, the result is united. diff --git a/src/TableFunctions/TableFunctionMongoDB.cpp b/src/TableFunctions/TableFunctionMongoDB.cpp index b2cf1b4675e..94279d1bf6d 100644 --- a/src/TableFunctions/TableFunctionMongoDB.cpp +++ b/src/TableFunctions/TableFunctionMongoDB.cpp @@ -1,5 +1,4 @@ #include -#include #include diff --git a/src/TableFunctions/TableFunctionRedis.cpp b/src/TableFunctions/TableFunctionRedis.cpp index f87ba6d1c6d..aca751c2840 100644 --- a/src/TableFunctions/TableFunctionRedis.cpp +++ b/src/TableFunctions/TableFunctionRedis.cpp @@ -15,7 +15,6 @@ #include #include -#include namespace DB From f6e1eb1643888c2b8bbc179899cbc4bacaee5b78 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 15 Aug 2024 16:31:48 +0200 Subject: [PATCH 090/180] Fix style check --- src/Dictionaries/HTTPDictionarySource.cpp | 2 +- src/Dictionaries/PostgreSQLDictionarySource.cpp | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Dictionaries/HTTPDictionarySource.cpp b/src/Dictionaries/HTTPDictionarySource.cpp index d6df03b39df..bf19f912723 100644 --- a/src/Dictionaries/HTTPDictionarySource.cpp +++ b/src/Dictionaries/HTTPDictionarySource.cpp @@ -230,7 +230,7 @@ void registerDictionarySourceHTTP(DictionarySourceFactory & factory) *named_collection, /* required_keys */{}, /* optional_keys */ValidateKeysMultiset{ - "url", "endpoint", "user", "credentials.user", "password", "credentials.password", "format", "compression_method", "structure", "name"}); + "url", "endpoint", "user", "credentials.user", "password", "credentials.password", "format", "compression_method", "structure", "name"}); url = named_collection->getOrDefault("url", ""); endpoint = named_collection->getOrDefault("endpoint", ""); diff --git a/src/Dictionaries/PostgreSQLDictionarySource.cpp b/src/Dictionaries/PostgreSQLDictionarySource.cpp index fd026a97cd4..fd426de126d 100644 --- a/src/Dictionaries/PostgreSQLDictionarySource.cpp +++ b/src/Dictionaries/PostgreSQLDictionarySource.cpp @@ -24,6 +24,7 @@ namespace DB namespace ErrorCodes { extern const int SUPPORT_IS_DISABLED; + extern const int BAD_ARGUMENTS; } #if USE_LIBPQXX From 077f10a4ada2a561111207d8e99e22d2c8e48f40 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 15 Aug 2024 18:26:48 +0200 Subject: [PATCH 091/180] Fix build --- src/Dictionaries/PostgreSQLDictionarySource.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/Dictionaries/PostgreSQLDictionarySource.cpp b/src/Dictionaries/PostgreSQLDictionarySource.cpp index fd426de126d..8e472f85a6e 100644 --- a/src/Dictionaries/PostgreSQLDictionarySource.cpp +++ b/src/Dictionaries/PostgreSQLDictionarySource.cpp @@ -4,6 +4,7 @@ #include #include #include "DictionarySourceFactory.h" +#include #include "registerDictionaries.h" #if USE_LIBPQXX @@ -13,7 +14,6 @@ #include "readInvalidateQuery.h" #include #include -#include #include #endif @@ -27,14 +27,14 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } -#if USE_LIBPQXX - -static const UInt64 max_block_size = 8192; - static const ValidateKeysMultiset dictionary_allowed_keys = { "host", "port", "user", "password", "db", "database", "table", "schema", "update_field", "update_lag", "invalidate_query", "query", "where", "name", "priority"}; +#if USE_LIBPQXX + +static const UInt64 max_block_size = 8192; + namespace { ExternalQueryBuilder makeExternalQueryBuilder(const DictionaryStructure & dict_struct, const String & schema, const String & table, const String & query, const String & where) @@ -178,8 +178,6 @@ std::string PostgreSQLDictionarySource::toString() const return "PostgreSQL: " + configuration.db + '.' + configuration.table + (where.empty() ? "" : ", where: " + where); } -#endif - static void validateConfigKeys( const Poco::Util::AbstractConfiguration & dict_config, const String & config_prefix) { @@ -193,6 +191,8 @@ static void validateConfigKeys( } } +#endif + void registerDictionarySourcePostgreSQL(DictionarySourceFactory & factory) { auto create_table_source = [=](const DictionaryStructure & dict_struct, From a49ef43286e31525f8829e3307a9231be0ce417c Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Thu, 15 Aug 2024 21:33:26 +0000 Subject: [PATCH 092/180] Fix 01119_session_log flakiness --- ...9_session_log.sql => 01119_session_log.sh} | 37 +++++++++++++++---- 1 file changed, 30 insertions(+), 7 deletions(-) rename tests/queries/0_stateless/{01119_session_log.sql => 01119_session_log.sh} (73%) mode change 100644 => 100755 diff --git a/tests/queries/0_stateless/01119_session_log.sql b/tests/queries/0_stateless/01119_session_log.sh old mode 100644 new mode 100755 similarity index 73% rename from tests/queries/0_stateless/01119_session_log.sql rename to tests/queries/0_stateless/01119_session_log.sh index 55f6228797a..809d300fada --- a/tests/queries/0_stateless/01119_session_log.sql +++ b/tests/queries/0_stateless/01119_session_log.sh @@ -1,5 +1,20 @@ --- Tags: no-fasttest +#!/usr/bin/env bash +# Tags: no-fasttest +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +session_log_query_prefix=" +system flush logs; +select distinct type, user, auth_type, toString(client_address)!='::ffff:0.0.0.0' as a, client_port!=0 as b, interface from system.session_log +where user in ('default', 'nonexistsnt_user_1119', ' ', ' INTERSERVER SECRET ') +and interface in ('HTTP', 'TCP', 'TCP_Interserver') +and (user != 'default' or (a=1 and b=1)) -- FIXME: we should not write uninitialized address and port (but we do sometimes) +and event_time >= now() - interval 5 minute" + +$CLICKHOUSE_CLIENT -nm -q " select * from remote('127.0.0.2', system, one, 'default', ''); select * from remote('127.0.0.2', system, one, 'default', 'wrong password'); -- { serverError AUTHENTICATION_FAILED } select * from remote('127.0.0.2', system, one, 'nonexistsnt_user_1119', ''); -- { serverError AUTHENTICATION_FAILED } @@ -16,9 +31,17 @@ select * from url('http://127.0.0.1:8123/?query=select+1&user=+++', LineAsString select * from cluster('test_cluster_interserver_secret', system, one); -system flush logs; -select distinct type, user, auth_type, toString(client_address)!='::ffff:0.0.0.0' as a, client_port!=0 as b, interface from system.session_log -where user in ('default', 'nonexistsnt_user_1119', ' ', ' INTERSERVER SECRET ') -and interface in ('HTTP', 'TCP', 'TCP_Interserver') -and (user != 'default' or (a=1 and b=1)) -- FIXME: we should not write uninitialized address and port (but we do sometimes) -and event_time >= now() - interval 5 minute order by type, user, interface; +$session_log_query_prefix and type != 'Logout' order by type, user, interface; +" + +# Wait for logout events. +for attempt in {1..10} +do + if [ "`$CLICKHOUSE_CLIENT -q "$session_log_query_prefix and type = 'Logout'" | wc -l`" -eq 3 ] + then + break + fi + sleep 2 +done + +$CLICKHOUSE_CLIENT -q "$session_log_query_prefix and type = 'Logout' order by user, interface" From 253188381759d062967c9bb3b9d0907f30eab61a Mon Sep 17 00:00:00 2001 From: Michael Kolupaev Date: Fri, 16 Aug 2024 05:39:50 +0000 Subject: [PATCH 093/180] she sells seashells by seashore the shells that she sells are seashells im sure --- tests/queries/0_stateless/01119_session_log.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01119_session_log.sh b/tests/queries/0_stateless/01119_session_log.sh index 809d300fada..2d17b545276 100755 --- a/tests/queries/0_stateless/01119_session_log.sh +++ b/tests/queries/0_stateless/01119_session_log.sh @@ -35,7 +35,7 @@ $session_log_query_prefix and type != 'Logout' order by type, user, interface; " # Wait for logout events. -for attempt in {1..10} +for _ in {1..10} do if [ "`$CLICKHOUSE_CLIENT -q "$session_log_query_prefix and type = 'Logout'" | wc -l`" -eq 3 ] then From a85f544205f2e782d4f6c16c0622728475db3571 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Fri, 16 Aug 2024 08:47:28 +0000 Subject: [PATCH 094/180] Update analyzer_tech_debt.txt --- tests/analyzer_tech_debt.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt index bd92465e1aa..c8edbdc5932 100644 --- a/tests/analyzer_tech_debt.txt +++ b/tests/analyzer_tech_debt.txt @@ -1,4 +1,3 @@ 01624_soft_constraints -02354_vector_search_queries # Check after ConstantNode refactoring 02944_variant_as_common_type From 1b49e2492521c54b0e6240d412af847d3fa21221 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Fri, 16 Aug 2024 11:26:31 +0200 Subject: [PATCH 095/180] Fix clang-tidy --- src/Dictionaries/PostgreSQLDictionarySource.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Dictionaries/PostgreSQLDictionarySource.cpp b/src/Dictionaries/PostgreSQLDictionarySource.cpp index 8e472f85a6e..b1bab17e2e9 100644 --- a/src/Dictionaries/PostgreSQLDictionarySource.cpp +++ b/src/Dictionaries/PostgreSQLDictionarySource.cpp @@ -208,7 +208,6 @@ void registerDictionarySourcePostgreSQL(DictionarySourceFactory & factory) const auto & settings = context->getSettingsRef(); std::optional dictionary_configuration; - String database, schema, table; postgres::PoolWithFailover::ReplicasConfigurationByPriority replicas_by_priority; auto named_collection = created_from_ddl ? tryGetNamedCollectionWithOverrides(config, settings_config_prefix, context) : nullptr; From 60a6e893a40761eb46655e76cb6a3fe5f177019c Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Fri, 16 Aug 2024 17:56:12 +0800 Subject: [PATCH 096/180] first commit --- src/Common/examples/CMakeLists.txt | 5 + src/Common/examples/utf8_upper_lower.cpp | 27 ++ src/Functions/LowerUpperImpl.h | 1 - src/Functions/LowerUpperUTF8Impl.h | 283 +++--------------- src/Functions/initcapUTF8.cpp | 3 +- src/Functions/lowerUTF8.cpp | 25 +- src/Functions/upperUTF8.cpp | 24 +- .../00170_lower_upper_utf8.reference | 4 + .../0_stateless/00170_lower_upper_utf8.sql | 11 + .../00233_position_function_family.sql | 3 + .../0_stateless/00761_lower_utf8_bug.sql | 3 + .../0_stateless/01278_random_string_utf8.sql | 3 + .../0_stateless/01431_utf8_ubsan.reference | 4 +- .../queries/0_stateless/01431_utf8_ubsan.sql | 3 + .../0_stateless/01590_countSubstrings.sql | 3 + ...71_lower_upper_utf8_row_overlaps.reference | 4 +- .../02071_lower_upper_utf8_row_overlaps.sql | 3 + ...new_functions_must_be_documented.reference | 2 - .../02514_if_with_lazy_low_cardinality.sql | 3 + .../0_stateless/02807_lower_utf8_msan.sql | 3 + tests/queries/0_stateless/03015_peder1001.sql | 3 + 21 files changed, 159 insertions(+), 261 deletions(-) create mode 100644 src/Common/examples/utf8_upper_lower.cpp diff --git a/src/Common/examples/CMakeLists.txt b/src/Common/examples/CMakeLists.txt index 69580d4ad0e..8383e80d09d 100644 --- a/src/Common/examples/CMakeLists.txt +++ b/src/Common/examples/CMakeLists.txt @@ -92,3 +92,8 @@ endif() clickhouse_add_executable (check_pointer_valid check_pointer_valid.cpp) target_link_libraries (check_pointer_valid PRIVATE clickhouse_common_io clickhouse_common_config) + +if (TARGET ch_contrib::icu) + clickhouse_add_executable (utf8_upper_lower utf8_upper_lower.cpp) + target_link_libraries (utf8_upper_lower PRIVATE ch_contrib::icu) +endif () diff --git a/src/Common/examples/utf8_upper_lower.cpp b/src/Common/examples/utf8_upper_lower.cpp new file mode 100644 index 00000000000..826e1763105 --- /dev/null +++ b/src/Common/examples/utf8_upper_lower.cpp @@ -0,0 +1,27 @@ +#include +#include + +std::string utf8_to_lower(const std::string & input) +{ + icu::UnicodeString unicodeInput(input.c_str(), "UTF-8"); + unicodeInput.toLower(); + std::string output; + unicodeInput.toUTF8String(output); + return output; +} + +std::string utf8_to_upper(const std::string & input) +{ + icu::UnicodeString unicodeInput(input.c_str(), "UTF-8"); + unicodeInput.toUpper(); + std::string output; + unicodeInput.toUTF8String(output); + return output; +} + +int main() +{ + std::string input = "ır"; + std::cout << "upper:" << utf8_to_upper(input) << std::endl; + return 0; +} diff --git a/src/Functions/LowerUpperImpl.h b/src/Functions/LowerUpperImpl.h index d463ef96e16..a52703d10c8 100644 --- a/src/Functions/LowerUpperImpl.h +++ b/src/Functions/LowerUpperImpl.h @@ -1,7 +1,6 @@ #pragma once #include - namespace DB { diff --git a/src/Functions/LowerUpperUTF8Impl.h b/src/Functions/LowerUpperUTF8Impl.h index eedabca5b22..5da085f48e5 100644 --- a/src/Functions/LowerUpperUTF8Impl.h +++ b/src/Functions/LowerUpperUTF8Impl.h @@ -1,15 +1,14 @@ #pragma once + +#include "config.h" + +#if USE_ICU + #include #include -#include -#include +#include +#include #include -#include - -#ifdef __SSE2__ -#include -#endif - namespace DB { @@ -19,71 +18,7 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } -/// xor or do nothing -template -UInt8 xor_or_identity(const UInt8 c, const int mask) -{ - return c ^ mask; -} - -template <> -inline UInt8 xor_or_identity(const UInt8 c, const int) -{ - return c; -} - -/// It is caller's responsibility to ensure the presence of a valid cyrillic sequence in array -template -inline void UTF8CyrillicToCase(const UInt8 *& src, UInt8 *& dst) -{ - if (src[0] == 0xD0u && (src[1] >= 0x80u && src[1] <= 0x8Fu)) - { - /// ЀЁЂЃЄЅІЇЈЉЊЋЌЍЎЏ - *dst++ = xor_or_identity(*src++, 0x1); - *dst++ = xor_or_identity(*src++, 0x10); - } - else if (src[0] == 0xD1u && (src[1] >= 0x90u && src[1] <= 0x9Fu)) - { - /// ѐёђѓєѕіїјљњћќѝўџ - *dst++ = xor_or_identity(*src++, 0x1); - *dst++ = xor_or_identity(*src++, 0x10); - } - else if (src[0] == 0xD0u && (src[1] >= 0x90u && src[1] <= 0x9Fu)) - { - /// А-П - *dst++ = *src++; - *dst++ = xor_or_identity(*src++, 0x20); - } - else if (src[0] == 0xD0u && (src[1] >= 0xB0u && src[1] <= 0xBFu)) - { - /// а-п - *dst++ = *src++; - *dst++ = xor_or_identity(*src++, 0x20); - } - else if (src[0] == 0xD0u && (src[1] >= 0xA0u && src[1] <= 0xAFu)) - { - /// Р-Я - *dst++ = xor_or_identity(*src++, 0x1); - *dst++ = xor_or_identity(*src++, 0x20); - } - else if (src[0] == 0xD1u && (src[1] >= 0x80u && src[1] <= 0x8Fu)) - { - /// р-я - *dst++ = xor_or_identity(*src++, 0x1); - *dst++ = xor_or_identity(*src++, 0x20); - } -} - - -/** If the string contains UTF-8 encoded text, convert it to the lower (upper) case. - * Note: It is assumed that after the character is converted to another case, - * the length of its multibyte sequence in UTF-8 does not change. - * Otherwise, the behavior is undefined. - */ -template +template struct LowerUpperUTF8Impl { static void vector( @@ -103,180 +38,46 @@ struct LowerUpperUTF8Impl return; } - res_data.resize_exact(data.size()); - res_offsets.assign(offsets); - array(data.data(), data.data() + data.size(), offsets, res_data.data()); + res_data.resize(data.size()); + res_offsets.resize_exact(offsets.size()); + + String output; + size_t curr_offset = 0; + for (size_t i = 0; i < offsets.size(); ++i) + { + const auto * data_start = reinterpret_cast(&data[offsets[i - 1]]); + size_t size = offsets[i] - offsets[i - 1]; + + icu::UnicodeString input(data_start, static_cast(size), "UTF-8"); + if constexpr (upper) + input.toUpper(); + else + input.toLower(); + + output.clear(); + input.toUTF8String(output); + + /// For valid UTF-8 input strings, ICU sometimes produces output with extra '\0's at the end. Only the data before the first + /// '\0' is valid. It the input is not valid UTF-8, then the behavior of lower/upperUTF8 is undefined by definition. In this + /// case, the behavior is also reasonable. + const char * res_end = find_last_not_symbols_or_null<'\0'>(output.data(), output.data() + output.size()); + size_t valid_size = res_end ? res_end - output.data() + 1 : 0; + + res_data.resize(curr_offset + valid_size + 1); + memcpy(&res_data[curr_offset], output.data(), valid_size); + res_data[curr_offset + valid_size] = 0; + + curr_offset += valid_size + 1; + res_offsets[i] = curr_offset; + } } static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &, size_t) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "Functions lowerUTF8 and upperUTF8 cannot work with FixedString argument"); } - - /** Converts a single code point starting at `src` to desired case, storing result starting at `dst`. - * `src` and `dst` are incremented by corresponding sequence lengths. */ - static bool toCase(const UInt8 *& src, const UInt8 * src_end, UInt8 *& dst, bool partial) - { - if (src[0] <= ascii_upper_bound) - { - if (*src >= not_case_lower_bound && *src <= not_case_upper_bound) - *dst++ = *src++ ^ flip_case_mask; - else - *dst++ = *src++; - } - else if (src + 1 < src_end - && ((src[0] == 0xD0u && (src[1] >= 0x80u && src[1] <= 0xBFu)) || (src[0] == 0xD1u && (src[1] >= 0x80u && src[1] <= 0x9Fu)))) - { - cyrillic_to_case(src, dst); - } - else if (src + 1 < src_end && src[0] == 0xC2u) - { - /// Punctuation U+0080 - U+00BF, UTF-8: C2 80 - C2 BF - *dst++ = *src++; - *dst++ = *src++; - } - else if (src + 2 < src_end && src[0] == 0xE2u) - { - /// Characters U+2000 - U+2FFF, UTF-8: E2 80 80 - E2 BF BF - *dst++ = *src++; - *dst++ = *src++; - *dst++ = *src++; - } - else - { - size_t src_sequence_length = UTF8::seqLength(*src); - /// In case partial buffer was passed (due to SSE optimization) - /// we cannot convert it with current src_end, but we may have more - /// bytes to convert and eventually got correct symbol. - if (partial && src_sequence_length > static_cast(src_end - src)) - return false; - - auto src_code_point = UTF8::convertUTF8ToCodePoint(src, src_end - src); - if (src_code_point) - { - int dst_code_point = to_case(*src_code_point); - if (dst_code_point > 0) - { - size_t dst_sequence_length = UTF8::convertCodePointToUTF8(dst_code_point, dst, src_end - src); - assert(dst_sequence_length <= 4); - - /// We don't support cases when lowercase and uppercase characters occupy different number of bytes in UTF-8. - /// As an example, this happens for ß and ẞ. - if (dst_sequence_length == src_sequence_length) - { - src += dst_sequence_length; - dst += dst_sequence_length; - return true; - } - } - } - - *dst = *src; - ++dst; - ++src; - } - - return true; - } - -private: - static constexpr auto ascii_upper_bound = '\x7f'; - static constexpr auto flip_case_mask = 'A' ^ 'a'; - - static void array(const UInt8 * src, const UInt8 * src_end, const ColumnString::Offsets & offsets, UInt8 * dst) - { - const auto * offset_it = offsets.begin(); - const UInt8 * begin = src; - -#ifdef __SSE2__ - static constexpr auto bytes_sse = sizeof(__m128i); - - /// If we are before this position, we can still read at least bytes_sse. - const auto * src_end_sse = src_end - bytes_sse + 1; - - /// SSE2 packed comparison operate on signed types, hence compare (c < 0) instead of (c > 0x7f) - const auto v_zero = _mm_setzero_si128(); - const auto v_not_case_lower_bound = _mm_set1_epi8(not_case_lower_bound - 1); - const auto v_not_case_upper_bound = _mm_set1_epi8(not_case_upper_bound + 1); - const auto v_flip_case_mask = _mm_set1_epi8(flip_case_mask); - - while (src < src_end_sse) - { - const auto chars = _mm_loadu_si128(reinterpret_cast(src)); - - /// check for ASCII - const auto is_not_ascii = _mm_cmplt_epi8(chars, v_zero); - const auto mask_is_not_ascii = _mm_movemask_epi8(is_not_ascii); - - /// ASCII - if (mask_is_not_ascii == 0) - { - const auto is_not_case - = _mm_and_si128(_mm_cmpgt_epi8(chars, v_not_case_lower_bound), _mm_cmplt_epi8(chars, v_not_case_upper_bound)); - const auto mask_is_not_case = _mm_movemask_epi8(is_not_case); - - /// everything in correct case ASCII - if (mask_is_not_case == 0) - _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), chars); - else - { - /// ASCII in mixed case - /// keep `flip_case_mask` only where necessary, zero out elsewhere - const auto xor_mask = _mm_and_si128(v_flip_case_mask, is_not_case); - - /// flip case by applying calculated mask - const auto cased_chars = _mm_xor_si128(chars, xor_mask); - - /// store result back to destination - _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), cased_chars); - } - - src += bytes_sse; - dst += bytes_sse; - } - else - { - /// UTF-8 - - /// Find the offset of the next string after src - size_t offset_from_begin = src - begin; - while (offset_from_begin >= *offset_it) - ++offset_it; - - /// Do not allow one row influence another (since row may have invalid sequence, and break the next) - const UInt8 * row_end = begin + *offset_it; - chassert(row_end >= src); - const UInt8 * expected_end = std::min(src + bytes_sse, row_end); - - while (src < expected_end) - { - if (!toCase(src, expected_end, dst, /* partial= */ true)) - { - /// Fallback to handling byte by byte. - src_end_sse = src; - break; - } - } - } - } - - /// Find the offset of the next string after src - size_t offset_from_begin = src - begin; - while (offset_it != offsets.end() && offset_from_begin >= *offset_it) - ++offset_it; -#endif - - /// handle remaining symbols, row by row (to avoid influence of bad UTF8 symbols from one row, to another) - while (src < src_end) - { - const UInt8 * row_end = begin + *offset_it; - chassert(row_end >= src); - - while (src < row_end) - toCase(src, row_end, dst, /* partial= */ false); - ++offset_it; - } - } }; } + +#endif diff --git a/src/Functions/initcapUTF8.cpp b/src/Functions/initcapUTF8.cpp index 282d846094e..004586dce26 100644 --- a/src/Functions/initcapUTF8.cpp +++ b/src/Functions/initcapUTF8.cpp @@ -1,9 +1,8 @@ #include #include -#include #include #include - +#include namespace DB { diff --git a/src/Functions/lowerUTF8.cpp b/src/Functions/lowerUTF8.cpp index 7adb0069121..e2f7cb84730 100644 --- a/src/Functions/lowerUTF8.cpp +++ b/src/Functions/lowerUTF8.cpp @@ -1,9 +1,10 @@ -#include +#include "config.h" + +#if USE_ICU + +#include #include #include -#include -#include - namespace DB { @@ -15,13 +16,25 @@ struct NameLowerUTF8 static constexpr auto name = "lowerUTF8"; }; -using FunctionLowerUTF8 = FunctionStringToString>, NameLowerUTF8>; +using FunctionLowerUTF8 = FunctionStringToString, NameLowerUTF8>; } REGISTER_FUNCTION(LowerUTF8) { - factory.registerFunction(); + FunctionDocumentation::Description description + = R"(Converts a string to lowercase, assuming that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.)"; + FunctionDocumentation::Syntax syntax = "lowerUTF8(input)"; + FunctionDocumentation::Arguments arguments = {{"input", "Input with String type"}}; + FunctionDocumentation::ReturnedValue returned_value = "A String data type value"; + FunctionDocumentation::Examples examples = { + {"first", "SELECT lowerUTF8('München') as Lowerutf8;", "münchen"}, + }; + FunctionDocumentation::Categories categories = {"String"}; + + factory.registerFunction({description, syntax, arguments, returned_value, examples, categories}); } } + +#endif diff --git a/src/Functions/upperUTF8.cpp b/src/Functions/upperUTF8.cpp index 659e67f0ef3..ef26430331f 100644 --- a/src/Functions/upperUTF8.cpp +++ b/src/Functions/upperUTF8.cpp @@ -1,8 +1,10 @@ +#include "config.h" + +#if USE_ICU + +#include #include #include -#include -#include - namespace DB { @@ -14,13 +16,25 @@ struct NameUpperUTF8 static constexpr auto name = "upperUTF8"; }; -using FunctionUpperUTF8 = FunctionStringToString>, NameUpperUTF8>; +using FunctionUpperUTF8 = FunctionStringToString, NameUpperUTF8>; } REGISTER_FUNCTION(UpperUTF8) { - factory.registerFunction(); + FunctionDocumentation::Description description + = R"(Converts a string to lowercase, assuming that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.)"; + FunctionDocumentation::Syntax syntax = "upperUTF8(input)"; + FunctionDocumentation::Arguments arguments = {{"input", "Input with String type"}}; + FunctionDocumentation::ReturnedValue returned_value = "A String data type value"; + FunctionDocumentation::Examples examples = { + {"first", "SELECT upperUTF8('München') as Upperutf8;", "MÜNCHEN"}, + }; + FunctionDocumentation::Categories categories = {"String"}; + + factory.registerFunction({description, syntax, arguments, returned_value, examples, categories}); } } + +#endif diff --git a/tests/queries/0_stateless/00170_lower_upper_utf8.reference b/tests/queries/0_stateless/00170_lower_upper_utf8.reference index f202cb75513..3c644f22b9b 100644 --- a/tests/queries/0_stateless/00170_lower_upper_utf8.reference +++ b/tests/queries/0_stateless/00170_lower_upper_utf8.reference @@ -22,3 +22,7 @@ 1 1 1 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/00170_lower_upper_utf8.sql b/tests/queries/0_stateless/00170_lower_upper_utf8.sql index 4caba2033ff..85b6c5c6095 100644 --- a/tests/queries/0_stateless/00170_lower_upper_utf8.sql +++ b/tests/queries/0_stateless/00170_lower_upper_utf8.sql @@ -1,3 +1,6 @@ +-- Tags: no-fasttest +-- no-fasttest: upper/lowerUTF8 use ICU + select lower('aaaaaaaaaaaaaaa012345789,.!aaaa' as str) = str; select lowerUTF8('aaaaaaaaaaaaaaa012345789,.!aaaa' as str) = str; select lower('AaAaAaAaAaAaAaA012345789,.!aAaA') = 'aaaaaaaaaaaaaaa012345789,.!aaaa'; @@ -27,3 +30,11 @@ select sum(lower(materialize('aaaaАБВГAAAAaaAA')) = materialize('aaaaАБВ select sum(upper(materialize('aaaaАБВГAAAAaaAA')) = materialize('AAAAАБВГAAAAAAAA')) = count() from system.one array join range(16384) as n; select sum(lowerUTF8(materialize('aaaaАБВГAAAAaaAA')) = materialize('aaaaабвгaaaaaaaa')) = count() from system.one array join range(16384) as n; select sum(upperUTF8(materialize('aaaaАБВГAAAAaaAA')) = materialize('AAAAАБВГAAAAAAAA')) = count() from system.one array join range(16384) as n; + +-- Turkish language +select upperUTF8('ır') = 'IR'; +select lowerUTF8('ır') = 'ır'; + +-- German language +select upper('öäüß') = 'öäüß'; +select lower('ÖÄÜẞ') = 'ÖÄÜẞ'; diff --git a/tests/queries/0_stateless/00233_position_function_family.sql b/tests/queries/0_stateless/00233_position_function_family.sql index dd7394bc39a..d6668cb7ba4 100644 --- a/tests/queries/0_stateless/00233_position_function_family.sql +++ b/tests/queries/0_stateless/00233_position_function_family.sql @@ -1,3 +1,6 @@ +-- Tags: no-fasttest +-- no-fasttest: upper/lowerUTF8 use ICU + SET send_logs_level = 'fatal'; select 1 = position('', ''); diff --git a/tests/queries/0_stateless/00761_lower_utf8_bug.sql b/tests/queries/0_stateless/00761_lower_utf8_bug.sql index de20b894331..a0ab55edc15 100644 --- a/tests/queries/0_stateless/00761_lower_utf8_bug.sql +++ b/tests/queries/0_stateless/00761_lower_utf8_bug.sql @@ -1 +1,4 @@ +-- Tags: no-fasttest +-- no-fasttest: upper/lowerUTF8 use ICU + SELECT lowerUTF8('\xF0') = lowerUTF8('\xF0'); diff --git a/tests/queries/0_stateless/01278_random_string_utf8.sql b/tests/queries/0_stateless/01278_random_string_utf8.sql index da2dc48c3e1..290d6a0c759 100644 --- a/tests/queries/0_stateless/01278_random_string_utf8.sql +++ b/tests/queries/0_stateless/01278_random_string_utf8.sql @@ -1,3 +1,6 @@ +-- Tags: no-fasttest +-- no-fasttest: upper/lowerUTF8 use ICU + SELECT randomStringUTF8('string'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT lengthUTF8(randomStringUTF8(100)); SELECT toTypeName(randomStringUTF8(10)); diff --git a/tests/queries/0_stateless/01431_utf8_ubsan.reference b/tests/queries/0_stateless/01431_utf8_ubsan.reference index c98c950d535..dc785e57851 100644 --- a/tests/queries/0_stateless/01431_utf8_ubsan.reference +++ b/tests/queries/0_stateless/01431_utf8_ubsan.reference @@ -1,2 +1,2 @@ -FF -FF +EFBFBD +EFBFBD diff --git a/tests/queries/0_stateless/01431_utf8_ubsan.sql b/tests/queries/0_stateless/01431_utf8_ubsan.sql index d6a299225b1..3a28e023805 100644 --- a/tests/queries/0_stateless/01431_utf8_ubsan.sql +++ b/tests/queries/0_stateless/01431_utf8_ubsan.sql @@ -1,2 +1,5 @@ +-- Tags: no-fasttest +-- no-fasttest: upper/lowerUTF8 use ICU + SELECT hex(lowerUTF8('\xFF')); SELECT hex(upperUTF8('\xFF')); diff --git a/tests/queries/0_stateless/01590_countSubstrings.sql b/tests/queries/0_stateless/01590_countSubstrings.sql index b38cbb7d188..5ec4f412d7f 100644 --- a/tests/queries/0_stateless/01590_countSubstrings.sql +++ b/tests/queries/0_stateless/01590_countSubstrings.sql @@ -1,3 +1,6 @@ +-- Tags: no-fasttest +-- no-fasttest: upper/lowerUTF8 use ICU + -- -- countSubstrings -- diff --git a/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.reference b/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.reference index a3bac432482..deabef61a88 100644 --- a/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.reference +++ b/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.reference @@ -5,9 +5,9 @@ insert into utf8_overlap values ('\xe2'), ('Foo⚊BarBazBam'), ('\xe2'), ('Foo -- MONOGRAM FOR YANG with lowerUTF8(str) as l_, upperUTF8(str) as u_, '0x' || hex(str) as h_ select length(str), if(l_ == '\xe2', h_, l_), if(u_ == '\xe2', h_, u_) from utf8_overlap format CSV; -1,"0xE2","0xE2" +1,"�","�" 15,"foo⚊barbazbam","FOO⚊BARBAZBAM" -1,"0xE2","0xE2" +1,"�","�" 15,"foo⚊barbazbam","FOO⚊BARBAZBAM" -- NOTE: regression test for introduced bug -- https://github.com/ClickHouse/ClickHouse/issues/42756 diff --git a/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.sql b/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.sql index 8ca0a3f5f75..d175e0659d0 100644 --- a/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.sql +++ b/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.sql @@ -1,3 +1,6 @@ +-- Tags: no-fasttest +-- no-fasttest: upper/lowerUTF8 use ICU + drop table if exists utf8_overlap; create table utf8_overlap (str String) engine=Memory(); diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference index c39f1fb1ce9..0980e25b70f 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference @@ -416,7 +416,6 @@ logTrace lowCardinalityIndices lowCardinalityKeys lower -lowerUTF8 makeDate makeDate32 makeDateTime @@ -897,7 +896,6 @@ tupleToNameValuePairs unbin unhex upper -upperUTF8 uptime validateNestedArraySizes version diff --git a/tests/queries/0_stateless/02514_if_with_lazy_low_cardinality.sql b/tests/queries/0_stateless/02514_if_with_lazy_low_cardinality.sql index 80e3c0a9ece..b169cfd0ab9 100644 --- a/tests/queries/0_stateless/02514_if_with_lazy_low_cardinality.sql +++ b/tests/queries/0_stateless/02514_if_with_lazy_low_cardinality.sql @@ -1,3 +1,6 @@ +-- Tags: no-fasttest +-- no-fasttest: upper/lowerUTF8 use ICU + create table if not exists t (`arr.key` Array(LowCardinality(String)), `arr.value` Array(LowCardinality(String))) engine = Memory; insert into t (`arr.key`, `arr.value`) values (['a'], ['b']); select if(true, if(lowerUTF8(arr.key) = 'a', 1, 2), 3) as x from t left array join arr; diff --git a/tests/queries/0_stateless/02807_lower_utf8_msan.sql b/tests/queries/0_stateless/02807_lower_utf8_msan.sql index e9eb18bf615..95f224577f7 100644 --- a/tests/queries/0_stateless/02807_lower_utf8_msan.sql +++ b/tests/queries/0_stateless/02807_lower_utf8_msan.sql @@ -1,2 +1,5 @@ +-- Tags: no-fasttest +-- no-fasttest: upper/lowerUTF8 use ICU + SELECT lowerUTF8(arrayJoin(['©--------------------------------------', '©--------------------'])) ORDER BY 1; SELECT upperUTF8(materialize('aaaaАБВГaaaaaaaaaaaaАБВГAAAAaaAA')) FROM numbers(2); diff --git a/tests/queries/0_stateless/03015_peder1001.sql b/tests/queries/0_stateless/03015_peder1001.sql index 810503207f2..df8e4db1536 100644 --- a/tests/queries/0_stateless/03015_peder1001.sql +++ b/tests/queries/0_stateless/03015_peder1001.sql @@ -1,3 +1,6 @@ +-- Tags: no-fasttest +-- no-fasttest: upper/lowerUTF8 use ICU + DROP TABLE IF EXISTS test_data; CREATE TABLE test_data From 4600b270dafec20b276ab83eb557270c24cb4169 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Fri, 16 Aug 2024 17:58:54 +0800 Subject: [PATCH 097/180] remote icu contrib --- .gitmodules | 3 --- contrib/icu | 1 - 2 files changed, 4 deletions(-) delete mode 160000 contrib/icu diff --git a/.gitmodules b/.gitmodules index 7fdfb1103c5..164da311930 100644 --- a/.gitmodules +++ b/.gitmodules @@ -106,9 +106,6 @@ [submodule "contrib/icudata"] path = contrib/icudata url = https://github.com/ClickHouse/icudata -[submodule "contrib/icu"] - path = contrib/icu - url = https://github.com/unicode-org/icu [submodule "contrib/flatbuffers"] path = contrib/flatbuffers url = https://github.com/ClickHouse/flatbuffers diff --git a/contrib/icu b/contrib/icu deleted file mode 160000 index 7750081bda4..00000000000 --- a/contrib/icu +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 7750081bda4b3bc1768ae03849ec70f67ea10625 From 3ee741bd5e33d16b2f5711a8f2b06fca1a64b7bc Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Fri, 16 Aug 2024 18:04:15 +0800 Subject: [PATCH 098/180] add submodule contrib/icu from clickhouse --- .gitmodules | 4 ++++ contrib/icu | 1 + 2 files changed, 5 insertions(+) create mode 160000 contrib/icu diff --git a/.gitmodules b/.gitmodules index 164da311930..a8cc6a07caf 100644 --- a/.gitmodules +++ b/.gitmodules @@ -372,3 +372,7 @@ [submodule "contrib/numactl"] path = contrib/numactl url = https://github.com/ClickHouse/numactl.git +[submodule "contrib/icu"] + path = contrib/icu + url = https://github.com/ClickHouse/icu + branch = ClickHouse/release-75-1 diff --git a/contrib/icu b/contrib/icu new file mode 160000 index 00000000000..4216173eeeb --- /dev/null +++ b/contrib/icu @@ -0,0 +1 @@ +Subproject commit 4216173eeeb39c1d4caaa54a68860e800412d273 From f17655f13fcadb9babbc859e45e9f38cb32ad9e3 Mon Sep 17 00:00:00 2001 From: Dani Pozo Date: Thu, 1 Aug 2024 11:01:27 +0200 Subject: [PATCH 099/180] Load filesystem cache metadata asynchronously --- src/Common/StatusFile.cpp | 2 +- src/Disks/IO/ReadBufferFromRemoteFSGather.cpp | 35 +++--- .../Cached/CachedObjectStorage.cpp | 5 +- src/Interpreters/Cache/FileCache.cpp | 71 +++++++++--- src/Interpreters/Cache/FileCache.h | 10 ++ src/Interpreters/Cache/FileCacheSettings.cpp | 3 + src/Interpreters/Cache/FileCacheSettings.h | 1 + .../InterpreterDescribeCacheQuery.cpp | 2 + src/Interpreters/TemporaryDataOnDisk.cpp | 4 +- src/Interpreters/tests/gtest_filecache.cpp | 11 ++ .../System/StorageSystemFilesystemCache.cpp | 3 + .../StorageSystemFilesystemCacheSettings.cpp | 2 + tests/config/config.d/storage_conf.xml | 2 + tests/config/config.d/storage_conf_02944.xml | 1 + .../integration/test_filesystem_cache/test.py | 106 +++++++++++++++--- .../02344_describe_cache.reference | 2 +- .../0_stateless/02344_describe_cache.sh | 2 +- ...8_filesystem_cache_as_collection.reference | 4 +- .../02908_filesystem_cache_as_collection.sql | 4 +- ...ge_cache_setting_without_restart.reference | 14 +-- ...lly_change_filesystem_cache_size.reference | 10 +- 21 files changed, 227 insertions(+), 67 deletions(-) diff --git a/src/Common/StatusFile.cpp b/src/Common/StatusFile.cpp index 80464f38082..0bbb7ff411d 100644 --- a/src/Common/StatusFile.cpp +++ b/src/Common/StatusFile.cpp @@ -51,7 +51,7 @@ StatusFile::StatusFile(std::string path_, FillFunction fill_) std::string contents; { ReadBufferFromFile in(path, 1024); - LimitReadBuffer limit_in(in, 1024, /* trow_exception */ false, /* exact_limit */ {}); + LimitReadBuffer limit_in(in, 1024, /* throw_exception */ false, /* exact_limit */ {}); readStringUntilEOF(contents, limit_in); } diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp index bb9761a3905..c96f5f0c931 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp @@ -80,20 +80,27 @@ SeekableReadBufferPtr ReadBufferFromRemoteFSGather::createImplementationBuffer(c if (with_file_cache) { - auto cache_key = settings.remote_fs_cache->createKeyForPath(object_path); - buf = std::make_unique( - object_path, - cache_key, - settings.remote_fs_cache, - FileCache::getCommonUser(), - [=, this]() { return read_buffer_creator(/* restricted_seek */true, object); }, - settings, - query_id, - object.bytes_size, - /* allow_seeks */false, - /* use_external_buffer */true, - /* read_until_position */std::nullopt, - cache_log); + if (settings.remote_fs_cache->isInitialized()) + { + auto cache_key = settings.remote_fs_cache->createKeyForPath(object_path); + buf = std::make_unique( + object_path, + cache_key, + settings.remote_fs_cache, + FileCache::getCommonUser(), + [=, this]() { return read_buffer_creator(/* restricted_seek */true, object); }, + settings, + query_id, + object.bytes_size, + /* allow_seeks */false, + /* use_external_buffer */true, + /* read_until_position */std::nullopt, + cache_log); + } + else + { + settings.remote_fs_cache->throwInitExceptionIfNeeded(); + } } /// Can't wrap CachedOnDiskReadBufferFromFile in CachedInMemoryReadBufferFromFile because the diff --git a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp index fb817005399..ab0d357119c 100644 --- a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp +++ b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.cpp @@ -99,7 +99,7 @@ std::unique_ptr CachedObjectStorage::writeObject( /// N /// Need to remove even if cache_on_write == false. removeCacheIfExists(object.remote_path); - if (cache_on_write) + if (cache_on_write && cache->isInitialized()) { auto key = getCacheKey(object.remote_path); return std::make_unique( @@ -122,7 +122,8 @@ void CachedObjectStorage::removeCacheIfExists(const std::string & path_key_for_c return; /// Add try catch? - cache->removeKeyIfExists(getCacheKey(path_key_for_cache), FileCache::getCommonUser().user_id); + if (cache->isInitialized()) + cache->removeKeyIfExists(getCacheKey(path_key_for_cache), FileCache::getCommonUser().user_id); } void CachedObjectStorage::removeObject(const StoredObject & object) diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index e3925163362..4c35c0f7f4c 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -11,11 +11,15 @@ #include #include #include +#include +#include #include #include #include +#include #include +#include namespace fs = std::filesystem; @@ -88,6 +92,7 @@ FileCache::FileCache(const std::string & cache_name, const FileCacheSettings & s , bypass_cache_threshold(settings.enable_bypass_cache_with_threshold ? settings.bypass_cache_threshold : 0) , boundary_alignment(settings.boundary_alignment) , load_metadata_threads(settings.load_metadata_threads) + , load_metadata_asynchronously(settings.load_metadata_asynchronously) , write_cache_per_user_directory(settings.write_cache_per_user_id_directory) , keep_current_size_to_max_ratio(1 - settings.keep_free_space_size_ratio) , keep_current_elements_to_max_ratio(1 - settings.keep_free_space_elements_ratio) @@ -136,7 +141,17 @@ const FileCache::UserInfo & FileCache::getInternalUser() bool FileCache::isInitialized() const { - return is_initialized.load(std::memory_order_seq_cst); + return is_initialized; +} + +void FileCache::throwInitExceptionIfNeeded() +{ + if (load_metadata_asynchronously) + return; + + std::lock_guard lock(init_mutex); + if (init_exception) + std::rethrow_exception(init_exception); } const String & FileCache::getBasePath() const @@ -170,6 +185,35 @@ void FileCache::assertInitialized() const } void FileCache::initialize() +{ + // Prevent initialize() from running twice. This may be caused by two cache disks being created with the same path (see integration/test_filesystem_cache). + callOnce(initialize_called, [&] { + bool need_to_load_metadata = fs::exists(getBasePath()); + try + { + if (!need_to_load_metadata) + fs::create_directories(getBasePath()); + status_file = make_unique(fs::path(getBasePath()) / "status", StatusFile::write_full_info); + } + catch (...) + { + init_exception = std::current_exception(); + tryLogCurrentException(__PRETTY_FUNCTION__); + throw; + } + + if (load_metadata_asynchronously) + { + load_metadata_main_thread = ThreadFromGlobalPool([this, need_to_load_metadata] { initializeImpl(need_to_load_metadata); }); + } + else + { + initializeImpl(need_to_load_metadata); + } + }); +} + +void FileCache::initializeImpl(bool load_metadata) { std::lock_guard lock(init_mutex); @@ -178,16 +222,10 @@ void FileCache::initialize() try { - if (fs::exists(getBasePath())) - { + if (load_metadata) loadMetadata(); - } - else - { - fs::create_directories(getBasePath()); - } - status_file = make_unique(fs::path(getBasePath()) / "status", StatusFile::write_full_info); + metadata.startup(); } catch (...) { @@ -196,8 +234,6 @@ void FileCache::initialize() throw; } - metadata.startup(); - if (keep_current_size_to_max_ratio != 1 || keep_current_elements_to_max_ratio != 1) { keep_up_free_space_ratio_task = Context::getGlobalContextInstance()->getSchedulePool().createTask(log->name(), [this] { freeSpaceRatioKeepingThreadFunc(); }); @@ -205,6 +241,7 @@ void FileCache::initialize() } is_initialized = true; + LOG_TEST(log, "Initialized cache from {}", metadata.getBaseDirectory()); } CachePriorityGuard::Lock FileCache::lockCache() const @@ -1185,7 +1222,6 @@ void FileCache::loadMetadataImpl() std::vector loading_threads; std::exception_ptr first_exception; std::mutex set_exception_mutex; - std::atomic stop_loading = false; LOG_INFO(log, "Loading filesystem cache with {} threads from {}", load_metadata_threads, metadata.getBaseDirectory()); @@ -1195,7 +1231,7 @@ void FileCache::loadMetadataImpl() { loading_threads.emplace_back([&] { - while (!stop_loading) + while (!stop_loading_metadata) { try { @@ -1212,7 +1248,7 @@ void FileCache::loadMetadataImpl() if (!first_exception) first_exception = std::current_exception(); } - stop_loading = true; + stop_loading_metadata = true; return; } } @@ -1225,7 +1261,7 @@ void FileCache::loadMetadataImpl() if (!first_exception) first_exception = std::current_exception(); } - stop_loading = true; + stop_loading_metadata = true; break; } } @@ -1412,6 +1448,11 @@ FileCache::~FileCache() void FileCache::deactivateBackgroundOperations() { shutdown.store(true); + + stop_loading_metadata = true; + if (load_metadata_main_thread.joinable()) + load_metadata_main_thread.join(); + metadata.shutdown(); if (keep_up_free_space_ratio_task) keep_up_free_space_ratio_task->deactivate(); diff --git a/src/Interpreters/Cache/FileCache.h b/src/Interpreters/Cache/FileCache.h index 07be802a940..579472eb824 100644 --- a/src/Interpreters/Cache/FileCache.h +++ b/src/Interpreters/Cache/FileCache.h @@ -8,6 +8,7 @@ #include +#include #include #include #include @@ -82,6 +83,9 @@ public: bool isInitialized() const; + /// Throws if `!load_metadata_asynchronously` and there is an exception in `init_exception` + void throwInitExceptionIfNeeded(); + const String & getBasePath() const; static Key createKeyForPath(const String & path); @@ -198,6 +202,9 @@ private: const size_t bypass_cache_threshold; const size_t boundary_alignment; size_t load_metadata_threads; + const bool load_metadata_asynchronously; + std::atomic stop_loading_metadata = false; + ThreadFromGlobalPool load_metadata_main_thread; const bool write_cache_per_user_directory; BackgroundSchedulePool::TaskHolder keep_up_free_space_ratio_task; @@ -209,6 +216,7 @@ private: std::exception_ptr init_exception; std::atomic is_initialized = false; + OnceFlag initialize_called; mutable std::mutex init_mutex; std::unique_ptr status_file; std::atomic shutdown = false; @@ -246,6 +254,8 @@ private: */ FileCacheQueryLimitPtr query_limit; + void initializeImpl(bool load_metadata); + void assertInitialized() const; void assertCacheCorrectness(); diff --git a/src/Interpreters/Cache/FileCacheSettings.cpp b/src/Interpreters/Cache/FileCacheSettings.cpp index c68ff3183c6..e162d6b7551 100644 --- a/src/Interpreters/Cache/FileCacheSettings.cpp +++ b/src/Interpreters/Cache/FileCacheSettings.cpp @@ -65,6 +65,9 @@ void FileCacheSettings::loadImpl(FuncHas has, FuncGetUInt get_uint, FuncGetStrin if (has("load_metadata_threads")) load_metadata_threads = get_uint("load_metadata_threads"); + if (has("load_metadata_asynchronously")) + load_metadata_asynchronously = get_uint("load_metadata_asynchronously"); + if (boundary_alignment > max_file_segment_size) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Setting `boundary_alignment` cannot exceed `max_file_segment_size`"); diff --git a/src/Interpreters/Cache/FileCacheSettings.h b/src/Interpreters/Cache/FileCacheSettings.h index 93ded202947..72a2b6c3369 100644 --- a/src/Interpreters/Cache/FileCacheSettings.h +++ b/src/Interpreters/Cache/FileCacheSettings.h @@ -32,6 +32,7 @@ struct FileCacheSettings size_t background_download_queue_size_limit = FILECACHE_DEFAULT_BACKGROUND_DOWNLOAD_QUEUE_SIZE_LIMIT; size_t load_metadata_threads = FILECACHE_DEFAULT_LOAD_METADATA_THREADS; + bool load_metadata_asynchronously = false; bool write_cache_per_user_id_directory = false; diff --git a/src/Interpreters/InterpreterDescribeCacheQuery.cpp b/src/Interpreters/InterpreterDescribeCacheQuery.cpp index c7e863bf260..c7464dc6b77 100644 --- a/src/Interpreters/InterpreterDescribeCacheQuery.cpp +++ b/src/Interpreters/InterpreterDescribeCacheQuery.cpp @@ -20,6 +20,7 @@ static Block getSampleBlock() ColumnWithTypeAndName{std::make_shared(), "max_size"}, ColumnWithTypeAndName{std::make_shared(), "max_elements"}, ColumnWithTypeAndName{std::make_shared(), "max_file_segment_size"}, + ColumnWithTypeAndName{std::make_shared(), "is_initialized"}, ColumnWithTypeAndName{std::make_shared(), "boundary_alignment"}, ColumnWithTypeAndName{std::make_shared>(), "cache_on_write_operations"}, ColumnWithTypeAndName{std::make_shared>(), "cache_hits_threshold"}, @@ -50,6 +51,7 @@ BlockIO InterpreterDescribeCacheQuery::execute() res_columns[i++]->insert(settings.max_size); res_columns[i++]->insert(settings.max_elements); res_columns[i++]->insert(settings.max_file_segment_size); + res_columns[i++]->insert(cache->isInitialized()); res_columns[i++]->insert(settings.boundary_alignment); res_columns[i++]->insert(settings.cache_on_write_operations); res_columns[i++]->insert(settings.cache_hits_threshold); diff --git a/src/Interpreters/TemporaryDataOnDisk.cpp b/src/Interpreters/TemporaryDataOnDisk.cpp index 7f0fb8cd6ca..3259d7b67d6 100644 --- a/src/Interpreters/TemporaryDataOnDisk.cpp +++ b/src/Interpreters/TemporaryDataOnDisk.cpp @@ -65,7 +65,7 @@ TemporaryDataOnDisk::TemporaryDataOnDisk(TemporaryDataOnDiskScopePtr parent_, Cu std::unique_ptr TemporaryDataOnDisk::createRawStream(size_t max_file_size) { - if (file_cache) + if (file_cache && file_cache->isInitialized()) { auto holder = createCacheFile(max_file_size); return std::make_unique(std::move(holder)); @@ -81,7 +81,7 @@ std::unique_ptr TemporaryDataOnDisk::createRawStream(si TemporaryFileStream & TemporaryDataOnDisk::createStream(const Block & header, size_t max_file_size) { - if (file_cache) + if (file_cache && file_cache->isInitialized()) { auto holder = createCacheFile(max_file_size); diff --git a/src/Interpreters/tests/gtest_filecache.cpp b/src/Interpreters/tests/gtest_filecache.cpp index 36acc319f4e..5e2d3ee8219 100644 --- a/src/Interpreters/tests/gtest_filecache.cpp +++ b/src/Interpreters/tests/gtest_filecache.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -42,6 +43,7 @@ #include #include +using namespace std::chrono_literals; namespace fs = std::filesystem; using namespace DB; @@ -358,9 +360,11 @@ TEST_F(FileCacheTest, LRUPolicy) settings.max_size = 30; settings.max_elements = 5; settings.boundary_alignment = 1; + settings.load_metadata_asynchronously = false; const size_t file_size = INT_MAX; // the value doesn't really matter because boundary_alignment == 1. + const auto user = FileCache::getCommonUser(); { std::cerr << "Step 1\n"; @@ -815,6 +819,7 @@ TEST_F(FileCacheTest, writeBuffer) settings.max_elements = 5; settings.max_file_segment_size = 5; settings.base_path = cache_base_path; + settings.load_metadata_asynchronously = false; FileCache cache("6", settings); cache.initialize(); @@ -946,6 +951,7 @@ TEST_F(FileCacheTest, temporaryData) settings.max_size = 10_KiB; settings.max_file_segment_size = 1_KiB; settings.base_path = cache_base_path; + settings.load_metadata_asynchronously = false; DB::FileCache file_cache("7", settings); file_cache.initialize(); @@ -1073,6 +1079,7 @@ TEST_F(FileCacheTest, CachedReadBuffer) settings.max_size = 30; settings.max_elements = 10; settings.boundary_alignment = 1; + settings.load_metadata_asynchronously = false; ReadSettings read_settings; read_settings.enable_filesystem_cache = true; @@ -1092,6 +1099,7 @@ TEST_F(FileCacheTest, CachedReadBuffer) auto cache = std::make_shared("8", settings); cache->initialize(); + auto key = cache->createKeyForPath(file_path); const auto user = FileCache::getCommonUser(); @@ -1132,6 +1140,7 @@ TEST_F(FileCacheTest, TemporaryDataReadBufferSize) settings.max_size = 10_KiB; settings.max_file_segment_size = 1_KiB; settings.base_path = cache_base_path; + settings.load_metadata_asynchronously = false; DB::FileCache file_cache("cache", settings); file_cache.initialize(); @@ -1195,6 +1204,7 @@ TEST_F(FileCacheTest, SLRUPolicy) settings.max_size = 40; settings.max_elements = 6; settings.boundary_alignment = 1; + settings.load_metadata_asynchronously = false; settings.cache_policy = "SLRU"; settings.slru_size_ratio = 0.5; @@ -1307,6 +1317,7 @@ TEST_F(FileCacheTest, SLRUPolicy) settings2.boundary_alignment = 1; settings2.cache_policy = "SLRU"; settings2.slru_size_ratio = 0.5; + settings.load_metadata_asynchronously = false; auto cache = std::make_shared("slru_2", settings2); cache->initialize(); diff --git a/src/Storages/System/StorageSystemFilesystemCache.cpp b/src/Storages/System/StorageSystemFilesystemCache.cpp index cfb388bc232..0e972d8411b 100644 --- a/src/Storages/System/StorageSystemFilesystemCache.cpp +++ b/src/Storages/System/StorageSystemFilesystemCache.cpp @@ -47,6 +47,9 @@ void StorageSystemFilesystemCache::fillData(MutableColumns & res_columns, Contex for (const auto & [cache_name, cache_data] : caches) { const auto & cache = cache_data->cache; + if (!cache->isInitialized()) + continue; + cache->iterate([&](const FileSegment::Info & file_segment) { size_t i = 0; diff --git a/src/Storages/System/StorageSystemFilesystemCacheSettings.cpp b/src/Storages/System/StorageSystemFilesystemCacheSettings.cpp index 8915032baf7..c6bba6b8598 100644 --- a/src/Storages/System/StorageSystemFilesystemCacheSettings.cpp +++ b/src/Storages/System/StorageSystemFilesystemCacheSettings.cpp @@ -21,6 +21,7 @@ ColumnsDescription StorageSystemFilesystemCacheSettings::getColumnsDescription() {"path", std::make_shared(), "Cache directory"}, {"max_size", std::make_shared(), "Cache size limit by the number of bytes"}, {"max_elements", std::make_shared(), "Cache size limit by the number of elements"}, + {"is_initialized", std::make_shared(), "Whether the cache is initialized and ready to be used"}, {"current_size", std::make_shared(), "Current cache size by the number of bytes"}, {"current_elements", std::make_shared(), "Current cache size by the number of elements"}, {"max_file_segment_size", std::make_shared(), "Maximum allowed file segment size"}, @@ -56,6 +57,7 @@ void StorageSystemFilesystemCacheSettings::fillData( res_columns[i++]->insert(settings.base_path); res_columns[i++]->insert(settings.max_size); res_columns[i++]->insert(settings.max_elements); + res_columns[i++]->insert(cache->isInitialized()); res_columns[i++]->insert(cache->getUsedCacheSize()); res_columns[i++]->insert(cache->getFileSegmentsNum()); res_columns[i++]->insert(settings.max_file_segment_size); diff --git a/tests/config/config.d/storage_conf.xml b/tests/config/config.d/storage_conf.xml index e106e3a0e6b..091071f0637 100644 --- a/tests/config/config.d/storage_conf.xml +++ b/tests/config/config.d/storage_conf.xml @@ -27,6 +27,7 @@ 0.3 0.15 0.15 + 0 cache @@ -37,6 +38,7 @@ 100 0 0 + 0 diff --git a/tests/config/config.d/storage_conf_02944.xml b/tests/config/config.d/storage_conf_02944.xml index 5f45640a923..08d78900229 100644 --- a/tests/config/config.d/storage_conf_02944.xml +++ b/tests/config/config.d/storage_conf_02944.xml @@ -19,6 +19,7 @@ 10 100 0 + 0 diff --git a/tests/integration/test_filesystem_cache/test.py b/tests/integration/test_filesystem_cache/test.py index 17a8dd8b6e1..aee8bd25c2e 100644 --- a/tests/integration/test_filesystem_cache/test.py +++ b/tests/integration/test_filesystem_cache/test.py @@ -1,6 +1,7 @@ import logging import time import os +import random import pytest from helpers.cluster import ClickHouseCluster @@ -30,14 +31,6 @@ def cluster(): "config.d/storage_conf_2.xml", ], ) - cluster.add_instance( - "node_no_filesystem_caches_path", - main_configs=[ - "config.d/storage_conf.xml", - "config.d/remove_filesystem_caches_path.xml", - ], - stay_alive=True, - ) cluster.add_instance( "node_force_read_through_cache_on_merge", main_configs=[ @@ -59,6 +52,51 @@ def cluster(): cluster.shutdown() +@pytest.fixture(scope="function") +def non_shared_cluster(): + """ + For tests that cannot run in parallel against the same node/cluster (see test_custom_cached_disk, which relies on + changing server settings at runtime) + """ + try: + # Randomize the cluster name + cluster = ClickHouseCluster(f"{__file__}_non_shared_{random.randint(0, 10**7)}") + cluster.add_instance( + "node_no_filesystem_caches_path", + main_configs=[ + "config.d/storage_conf.xml", + "config.d/remove_filesystem_caches_path.xml", + ], + stay_alive=True, + ) + + logging.info("Starting test-exclusive cluster...") + cluster.start() + logging.info("Cluster started") + + yield cluster + finally: + cluster.shutdown() + + +def wait_for_cache_initialized(node, cache_path, max_attempts=50): + initialized = False + attempts = 0 + while not initialized: + query_result = node.query( + "SELECT path FROM system.filesystem_cache_settings WHERE is_initialized" + ) + initialized = cache_path in query_result + + if initialized: + break + + time.sleep(0.1) + attempts += 1 + if attempts >= max_attempts: + raise "Stopped waiting for cache to be initialized" + + @pytest.mark.parametrize("node_name", ["node"]) def test_parallel_cache_loading_on_startup(cluster, node_name): node = cluster.instances[node_name] @@ -71,14 +109,21 @@ def test_parallel_cache_loading_on_startup(cluster, node_name): ORDER BY value SETTINGS disk = disk( type = cache, - path = 'paralel_loading_test', + name = 'parallel_loading_test', + path = 'parallel_loading_test', disk = 'hdd_blob', max_file_segment_size = '1Ki', boundary_alignment = '1Ki', max_size = '1Gi', max_elements = 10000000, load_metadata_threads = 30); + """ + ) + wait_for_cache_initialized(node, "parallel_loading_test") + + node.query( + """ SYSTEM DROP FILESYSTEM CACHE; INSERT INTO test SELECT * FROM generateRandom('a Int32, b String') LIMIT 1000000; SELECT * FROM test FORMAT Null; @@ -103,6 +148,7 @@ def test_parallel_cache_loading_on_startup(cluster, node_name): ) node.restart_clickhouse() + wait_for_cache_initialized(node, "parallel_loading_test") # < because of additional files loaded into cache on server startup. assert cache_count <= int(node.query("SELECT count() FROM system.filesystem_cache")) @@ -131,7 +177,7 @@ def test_caches_with_the_same_configuration(cluster, node_name): node = cluster.instances[node_name] cache_path = "cache1" - node.query(f"SYSTEM DROP FILESYSTEM CACHE;") + node.query("SYSTEM DROP FILESYSTEM CACHE;") for table in ["test", "test2"]: node.query( f""" @@ -142,14 +188,20 @@ def test_caches_with_the_same_configuration(cluster, node_name): ORDER BY value SETTINGS disk = disk( type = cache, - name = {table}, + name = '{table}', path = '{cache_path}', disk = 'hdd_blob', max_file_segment_size = '1Ki', boundary_alignment = '1Ki', cache_on_write_operations=1, max_size = '1Mi'); + """ + ) + wait_for_cache_initialized(node, cache_path) + + node.query( + f""" SET enable_filesystem_cache_on_write_operations=1; INSERT INTO {table} SELECT * FROM generateRandom('a Int32, b String') LIMIT 1000; @@ -195,9 +247,8 @@ def test_caches_with_the_same_configuration(cluster, node_name): @pytest.mark.parametrize("node_name", ["node_caches_with_same_path"]) def test_caches_with_the_same_configuration_2(cluster, node_name): node = cluster.instances[node_name] - cache_path = "cache1" - node.query(f"SYSTEM DROP FILESYSTEM CACHE;") + node.query("SYSTEM DROP FILESYSTEM CACHE;") for table in ["cache1", "cache2"]: node.query( f""" @@ -207,7 +258,13 @@ def test_caches_with_the_same_configuration_2(cluster, node_name): Engine=MergeTree() ORDER BY value SETTINGS disk = '{table}'; + """ + ) + wait_for_cache_initialized(node, "cache1") + + node.query( + f""" SET enable_filesystem_cache_on_write_operations=1; INSERT INTO {table} SELECT * FROM generateRandom('a Int32, b String') LIMIT 1000; @@ -227,8 +284,8 @@ def test_caches_with_the_same_configuration_2(cluster, node_name): ) -def test_custom_cached_disk(cluster): - node = cluster.instances["node_no_filesystem_caches_path"] +def test_custom_cached_disk(non_shared_cluster): + node = non_shared_cluster.instances["node_no_filesystem_caches_path"] assert "Cannot create cached custom disk without" in node.query_and_get_error( f""" @@ -377,6 +434,7 @@ def test_force_filesystem_cache_on_merges(cluster): ORDER BY value SETTINGS disk = disk( type = cache, + name = 'force_cache_on_merges', path = 'force_cache_on_merges', disk = 'hdd_blob', max_file_segment_size = '1Ki', @@ -385,7 +443,13 @@ def test_force_filesystem_cache_on_merges(cluster): max_size = '10Gi', max_elements = 10000000, load_metadata_threads = 30); + """ + ) + wait_for_cache_initialized(node, "force_cache_on_merges") + + node.query( + """ SYSTEM DROP FILESYSTEM CACHE; INSERT INTO test SELECT * FROM generateRandom('a Int32, b String') LIMIT 1000000; INSERT INTO test SELECT * FROM generateRandom('a Int32, b String') LIMIT 1000000; @@ -441,7 +505,13 @@ SETTINGS disk = disk(type = cache, path = "test_system_sync_filesystem_cache", delayed_cleanup_interval_ms = 10000000, disk = hdd_blob), min_bytes_for_wide_part = 10485760; + """ + ) + wait_for_cache_initialized(node, "test_system_sync_filesystem_cache") + + node.query( + """ INSERT INTO test SELECT 1, 'test'; """ ) @@ -525,7 +595,13 @@ SETTINGS disk = disk(type = cache, keep_free_space_elements_ratio = {elements_ratio}, disk = hdd_blob), min_bytes_for_wide_part = 10485760; + """ + ) + wait_for_cache_initialized(node, "test_keep_up_size_ratio") + + node.query( + """ INSERT INTO test SELECT randomString(200); """ ) diff --git a/tests/queries/0_stateless/02344_describe_cache.reference b/tests/queries/0_stateless/02344_describe_cache.reference index 6895606eb2b..13429b14866 100644 --- a/tests/queries/0_stateless/02344_describe_cache.reference +++ b/tests/queries/0_stateless/02344_describe_cache.reference @@ -1,2 +1,2 @@ 1 -102400 10000000 33554432 4194304 0 0 0 0 /var/lib/clickhouse/filesystem_caches/02344_describe_cache_test 0 5000 0 16 +102400 10000000 33554432 1 4194304 0 0 0 0 /var/lib/clickhouse/filesystem_caches/02344_describe_cache_test 0 5000 0 16 diff --git a/tests/queries/0_stateless/02344_describe_cache.sh b/tests/queries/0_stateless/02344_describe_cache.sh index d91661db9bc..c5373b4d7e3 100755 --- a/tests/queries/0_stateless/02344_describe_cache.sh +++ b/tests/queries/0_stateless/02344_describe_cache.sh @@ -11,7 +11,7 @@ $CLICKHOUSE_CLIENT -nm --query """ DROP TABLE IF EXISTS test; CREATE TABLE test (a Int32, b String) ENGINE = MergeTree() ORDER BY tuple() -SETTINGS disk = disk(name = '$disk_name', type = cache, max_size = '100Ki', path = '$disk_name', disk = 's3_disk'); +SETTINGS disk = disk(name = '$disk_name', type = cache, max_size = '100Ki', path = '$disk_name', disk = 's3_disk', load_metadata_asynchronously = 0); """ $CLICKHOUSE_CLIENT -nm --query """ diff --git a/tests/queries/0_stateless/02908_filesystem_cache_as_collection.reference b/tests/queries/0_stateless/02908_filesystem_cache_as_collection.reference index d4191af1594..41a60204eab 100644 --- a/tests/queries/0_stateless/02908_filesystem_cache_as_collection.reference +++ b/tests/queries/0_stateless/02908_filesystem_cache_as_collection.reference @@ -1,2 +1,2 @@ -1048576 10000000 33554432 4194304 0 0 0 0 /var/lib/clickhouse/filesystem_caches/collection_sql 0 5000 0 16 -1048576 10000000 33554432 4194304 0 0 0 0 /var/lib/clickhouse/filesystem_caches/collection 0 5000 0 16 +1048576 10000000 33554432 1 4194304 0 0 0 0 /var/lib/clickhouse/filesystem_caches/collection_sql 0 5000 0 16 +1048576 10000000 33554432 1 4194304 0 0 0 0 /var/lib/clickhouse/filesystem_caches/collection 0 5000 0 16 diff --git a/tests/queries/0_stateless/02908_filesystem_cache_as_collection.sql b/tests/queries/0_stateless/02908_filesystem_cache_as_collection.sql index c7216833bc9..127baa8304e 100644 --- a/tests/queries/0_stateless/02908_filesystem_cache_as_collection.sql +++ b/tests/queries/0_stateless/02908_filesystem_cache_as_collection.sql @@ -3,8 +3,8 @@ CREATE NAMED COLLECTION IF NOT EXISTS cache_collection_sql AS path = 'collection_sql', max_size = '1Mi'; DROP TABLE IF EXISTS test; CREATE TABLE test (a Int32, b String) -ENGINE = MergeTree() ORDER BY a SETTINGS disk = disk(type = cache, disk = 'local_disk', name = '$CLICHOUSE_TEST_UNIQUE_NAME', cache_name='cache_collection_sql'); +ENGINE = MergeTree() ORDER BY a SETTINGS disk = disk(type = cache, disk = 'local_disk', name = '$CLICHOUSE_TEST_UNIQUE_NAME', cache_name='cache_collection_sql', load_metadata_asynchronously = 0); DESCRIBE FILESYSTEM CACHE '$CLICHOUSE_TEST_UNIQUE_NAME'; CREATE TABLE test2 (a Int32, b String) -ENGINE = MergeTree() ORDER BY a SETTINGS disk = disk(type = cache, disk = 'local_disk', name = '$CLICHOUSE_TEST_UNIQUE_NAME_2', cache_name='cache_collection'); +ENGINE = MergeTree() ORDER BY a SETTINGS disk = disk(type = cache, disk = 'local_disk', name = '$CLICHOUSE_TEST_UNIQUE_NAME_2', cache_name='cache_collection', load_metadata_asynchronously = 0); DESCRIBE FILESYSTEM CACHE '$CLICHOUSE_TEST_UNIQUE_NAME_2'; diff --git a/tests/queries/0_stateless/02933_change_cache_setting_without_restart.reference b/tests/queries/0_stateless/02933_change_cache_setting_without_restart.reference index 17a25d82824..0f64d0393b2 100644 --- a/tests/queries/0_stateless/02933_change_cache_setting_without_restart.reference +++ b/tests/queries/0_stateless/02933_change_cache_setting_without_restart.reference @@ -1,7 +1,7 @@ -134217728 10000000 33554432 4194304 1 0 0 0 /var/lib/clickhouse/filesystem_caches/s3_cache_02933/ 0 0 0 16 -134217728 10000000 33554432 4194304 1 0 0 0 /var/lib/clickhouse/filesystem_caches/s3_cache_02933/ 10 1000 0 16 -134217728 10000000 33554432 4194304 1 0 0 0 /var/lib/clickhouse/filesystem_caches/s3_cache_02933/ 5 1000 0 16 -134217728 10000000 33554432 4194304 1 0 0 0 /var/lib/clickhouse/filesystem_caches/s3_cache_02933/ 15 1000 0 16 -134217728 10000000 33554432 4194304 1 0 0 0 /var/lib/clickhouse/filesystem_caches/s3_cache_02933/ 2 1000 0 16 -134217728 10000000 33554432 4194304 1 0 0 0 /var/lib/clickhouse/filesystem_caches/s3_cache_02933/ 0 1000 0 16 -134217728 10000000 33554432 4194304 1 0 0 0 /var/lib/clickhouse/filesystem_caches/s3_cache_02933/ 0 0 0 16 +134217728 10000000 33554432 1 4194304 1 0 0 0 /var/lib/clickhouse/filesystem_caches/s3_cache_02933/ 0 0 0 16 +134217728 10000000 33554432 1 4194304 1 0 0 0 /var/lib/clickhouse/filesystem_caches/s3_cache_02933/ 10 1000 0 16 +134217728 10000000 33554432 1 4194304 1 0 0 0 /var/lib/clickhouse/filesystem_caches/s3_cache_02933/ 5 1000 0 16 +134217728 10000000 33554432 1 4194304 1 0 0 0 /var/lib/clickhouse/filesystem_caches/s3_cache_02933/ 15 1000 0 16 +134217728 10000000 33554432 1 4194304 1 0 0 0 /var/lib/clickhouse/filesystem_caches/s3_cache_02933/ 2 1000 0 16 +134217728 10000000 33554432 1 4194304 1 0 0 0 /var/lib/clickhouse/filesystem_caches/s3_cache_02933/ 0 1000 0 16 +134217728 10000000 33554432 1 4194304 1 0 0 0 /var/lib/clickhouse/filesystem_caches/s3_cache_02933/ 0 0 0 16 diff --git a/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.reference b/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.reference index 298cc908178..c6bbcdc20c2 100644 --- a/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.reference +++ b/tests/queries/0_stateless/02944_dynamically_change_filesystem_cache_size.reference @@ -1,20 +1,20 @@ -100 10 10 10 0 0 0 0 /var/lib/clickhouse/filesystem_caches/s3_cache_02944/ 0 5000 0 16 +100 10 10 1 10 0 0 0 0 /var/lib/clickhouse/filesystem_caches/s3_cache_02944/ 0 5000 0 16 0 10 98 set max_size from 100 to 10 -10 10 10 10 0 0 8 1 /var/lib/clickhouse/filesystem_caches/s3_cache_02944/ 0 5000 0 16 +10 10 10 1 10 0 0 8 1 /var/lib/clickhouse/filesystem_caches/s3_cache_02944/ 0 5000 0 16 1 8 set max_size from 10 to 100 -100 10 10 10 0 0 8 1 /var/lib/clickhouse/filesystem_caches/s3_cache_02944/ 0 5000 0 16 +100 10 10 1 10 0 0 8 1 /var/lib/clickhouse/filesystem_caches/s3_cache_02944/ 0 5000 0 16 10 98 set max_elements from 10 to 2 -100 2 10 10 0 0 18 2 /var/lib/clickhouse/filesystem_caches/s3_cache_02944/ 0 5000 0 16 +100 2 10 1 10 0 0 18 2 /var/lib/clickhouse/filesystem_caches/s3_cache_02944/ 0 5000 0 16 2 18 set max_elements from 2 to 10 -100 10 10 10 0 0 18 2 /var/lib/clickhouse/filesystem_caches/s3_cache_02944/ 0 5000 0 16 +100 10 10 1 10 0 0 18 2 /var/lib/clickhouse/filesystem_caches/s3_cache_02944/ 0 5000 0 16 10 98 From 6bd65dbfa5c5d450e355dc64c110db65d2f56cbb Mon Sep 17 00:00:00 2001 From: Aleksei Filatov Date: Fri, 16 Aug 2024 15:07:53 +0000 Subject: [PATCH 100/180] Use HTTP/1.1 for external HTTP authentication --- src/Access/HTTPAuthClient.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Access/HTTPAuthClient.h b/src/Access/HTTPAuthClient.h index a8b56cf05a7..a1b97a729a3 100644 --- a/src/Access/HTTPAuthClient.h +++ b/src/Access/HTTPAuthClient.h @@ -82,7 +82,8 @@ public: Result authenticate(const String & user_name, const String & password) const { - Poco::Net::HTTPRequest request{Poco::Net::HTTPRequest::HTTP_GET, this->getURI().getPathAndQuery()}; + Poco::Net::HTTPRequest request{ + Poco::Net::HTTPRequest::HTTP_GET, this->getURI().getPathAndQuery(), Poco::Net::HTTPRequest::HTTP_1_1}; Poco::Net::HTTPBasicCredentials basic_credentials{user_name, password}; basic_credentials.authenticate(request); From 45e06de3267486296cc1452c981a78688a2193ae Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Fri, 16 Aug 2024 18:01:43 +0200 Subject: [PATCH 101/180] Minor update in Dynamic/JSON serializations --- src/DataTypes/Serializations/SerializationObject.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/DataTypes/Serializations/SerializationObject.cpp b/src/DataTypes/Serializations/SerializationObject.cpp index 2dd25e540cc..0042aa6d89d 100644 --- a/src/DataTypes/Serializations/SerializationObject.cpp +++ b/src/DataTypes/Serializations/SerializationObject.cpp @@ -199,7 +199,7 @@ void SerializationObject::serializeBinaryBulkStatePrefix( auto object_state = std::make_shared(serialization_version); object_state->max_dynamic_paths = column_object.getMaxDynamicPaths(); /// Write max_dynamic_paths parameter. - writeBinaryLittleEndian(object_state->max_dynamic_paths, *stream); + writeVarUInt(object_state->max_dynamic_paths, *stream); /// Write all dynamic paths in sorted order. object_state->sorted_dynamic_paths.reserve(dynamic_paths.size()); for (const auto & [path, _] : dynamic_paths) @@ -354,7 +354,7 @@ ISerialization::DeserializeBinaryBulkStatePtr SerializationObject::deserializeOb readBinaryLittleEndian(serialization_version, *structure_stream); auto structure_state = std::make_shared(serialization_version); /// Read max_dynamic_paths parameter. - readBinaryLittleEndian(structure_state->max_dynamic_paths, *structure_stream); + readVarUInt(structure_state->max_dynamic_paths, *structure_stream); /// Read the sorted list of dynamic paths. size_t dynamic_paths_size; readVarUInt(dynamic_paths_size, *structure_stream); From c85d5e753899503f93a8f9ca7b67776d386d9130 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Fri, 16 Aug 2024 18:02:51 +0200 Subject: [PATCH 102/180] Update Dynamic serialization --- src/DataTypes/Serializations/SerializationDynamic.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/DataTypes/Serializations/SerializationDynamic.cpp b/src/DataTypes/Serializations/SerializationDynamic.cpp index 6bba87c40fa..ab24779ced2 100644 --- a/src/DataTypes/Serializations/SerializationDynamic.cpp +++ b/src/DataTypes/Serializations/SerializationDynamic.cpp @@ -115,7 +115,7 @@ void SerializationDynamic::serializeBinaryBulkStatePrefix( dynamic_state->max_dynamic_types = column_dynamic.getMaxDynamicTypes(); /// Write max_dynamic_types parameter, because it can differ from the max_dynamic_types /// that is specified in the Dynamic type (we could decrease it before merge). - writeBinaryLittleEndian(dynamic_state->max_dynamic_types, *stream); + writeVarUInt(dynamic_state->max_dynamic_types, *stream); dynamic_state->variant_type = variant_info.variant_type; dynamic_state->variant_names = variant_info.variant_names; @@ -123,7 +123,7 @@ void SerializationDynamic::serializeBinaryBulkStatePrefix( /// Write information about variants. size_t num_variants = dynamic_state->variant_names.size() - 1; /// Don't write shared variant, Dynamic column should always have it. - writeBinaryLittleEndian(num_variants, *stream); + writeVarUInt(num_variants, *stream); if (settings.data_types_binary_encoding) { const auto & variants = assert_cast(*dynamic_state->variant_type).getVariants(); @@ -252,11 +252,11 @@ ISerialization::DeserializeBinaryBulkStatePtr SerializationDynamic::deserializeD readBinaryLittleEndian(structure_version, *structure_stream); auto structure_state = std::make_shared(structure_version); /// Read max_dynamic_types parameter. - readBinaryLittleEndian(structure_state->max_dynamic_types, *structure_stream); + readVarUInt(structure_state->max_dynamic_types, *structure_stream); /// Read information about variants. DataTypes variants; size_t num_variants; - readBinaryLittleEndian(num_variants, *structure_stream); + readVarUInt(num_variants, *structure_stream); variants.reserve(num_variants + 1); /// +1 for shared variant. if (settings.data_types_binary_encoding) { From 4f84c82d6d53ded0adda46aac1db1d345b5ba2eb Mon Sep 17 00:00:00 2001 From: Linh Giang <165205637+linhgiang24@users.noreply.github.com> Date: Fri, 16 Aug 2024 11:02:44 -0600 Subject: [PATCH 103/180] Update grant.md to include POSTGRES privilege Added POSTGRES privilege under the SOURCES category as it seems to be missing. --- docs/en/sql-reference/statements/grant.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/en/sql-reference/statements/grant.md b/docs/en/sql-reference/statements/grant.md index 43fa344a16d..6118f4c1d36 100644 --- a/docs/en/sql-reference/statements/grant.md +++ b/docs/en/sql-reference/statements/grant.md @@ -200,6 +200,7 @@ Hierarchy of privileges: - `JDBC` - `HDFS` - `S3` + - `POSTGRES` - [dictGet](#dictget) - [displaySecretsInShowAndSelect](#displaysecretsinshowandselect) - [NAMED COLLECTION ADMIN](#named-collection-admin) @@ -476,6 +477,7 @@ Allows using external data sources. Applies to [table engines](../../engines/tab - `JDBC`. Level: `GLOBAL` - `HDFS`. Level: `GLOBAL` - `S3`. Level: `GLOBAL` + - `POSTGRES`. Level: `GLOBAL` The `SOURCES` privilege enables use of all the sources. Also you can grant a privilege for each source individually. To use sources, you need additional privileges. From faad7f4ba27f665ecdb4fa6212a695f86413746a Mon Sep 17 00:00:00 2001 From: Julia Kartseva Date: Fri, 16 Aug 2024 20:35:44 +0000 Subject: [PATCH 104/180] Fix Broken pipe error for 03149_numbers_max_block_size_zero.sh The error appeared while debugging https://github.com/ClickHouse/clickhouse-private/issues/14225 Logs: https://pastila.nl/?00628486/754eaf7d96fd03ceecdf1a45458867dc#B9vFn07WAielph/Z5lHbrQ== From the `man grep`: > -q, --quiet, --silent > <...> Exit immediately with zero status if any match is found, even if an error was detected. When `grep -q` finds a match, it immediately exits with status `0 and closes its side of the pipe. If the clickhouse-client is still trying to send data through the pipe, it leads to SIGPIPE signal. Use grep -c instead. It is less efficient, but the output in this test is small. We should also revisit how we handle SIGPIPE signal, e.g. the server should not try to send logs if it already encountered the Broken pipe error. --- tests/queries/0_stateless/03149_numbers_max_block_size_zero.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03149_numbers_max_block_size_zero.sh b/tests/queries/0_stateless/03149_numbers_max_block_size_zero.sh index 6f70a0d2536..2c4669325bb 100755 --- a/tests/queries/0_stateless/03149_numbers_max_block_size_zero.sh +++ b/tests/queries/0_stateless/03149_numbers_max_block_size_zero.sh @@ -4,4 +4,4 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -q "SELECT count(*) FROM numbers(10) AS a, numbers(11) AS b, numbers(12) AS c SETTINGS max_block_size = 0" 2>&1 | grep -q "Sanity check: 'max_block_size' cannot be 0. Set to default value" && echo "OK" || echo "FAIL" +$CLICKHOUSE_CLIENT -q "SELECT count(*) FROM numbers(10) AS a, numbers(11) AS b, numbers(12) AS c SETTINGS max_block_size = 0" 2>&1 | [ $(grep -c "Sanity check: 'max_block_size' cannot be 0. Set to default value") -gt 0 ] && echo "OK" || echo "FAIL" From a6d5047bb09640bcf99bef84f655602d7dfb3361 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Fri, 16 Aug 2024 21:29:46 +0100 Subject: [PATCH 105/180] impl --- .../MergeTree/MergeTreePrefetchedReadPool.cpp | 4 ++++ .../MergeTree/MergeTreeReadPoolBase.cpp | 6 +++++ .../MergeTreeReadPoolParallelReplicas.cpp | 5 +++++ ...rgeTreeReadPoolParallelReplicasInOrder.cpp | 5 +++++ .../ParallelReplicasReadingCoordinator.cpp | 4 ++++ ...icas_read_task_size_overflow_bug.reference | 0 ...l_replicas_read_task_size_overflow_bug.sql | 22 +++++++++++++++++++ 7 files changed, 46 insertions(+) create mode 100644 tests/queries/0_stateless/03223_parallel_replicas_read_task_size_overflow_bug.reference create mode 100644 tests/queries/0_stateless/03223_parallel_replicas_read_task_size_overflow_bug.sql diff --git a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp index a9b77fb6c03..7081eb716f5 100644 --- a/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreePrefetchedReadPool.cpp @@ -395,6 +395,10 @@ void MergeTreePrefetchedReadPool::fillPerThreadTasks(size_t threads, size_t sum_ part_stat.prefetch_step_marks = std::max(part_stat.prefetch_step_marks, per_part_infos[i]->min_marks_per_task); + if (part_stat.prefetch_step_marks == 0) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, "Chosen number of marks to read is zero (likely because of weird interference of settings)"); + LOG_DEBUG( log, "Part: {}, sum_marks: {}, approx mark size: {}, prefetch_step_bytes: {}, prefetch_step_marks: {}, (ranges: {})", diff --git a/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp b/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp index 6d2560bc9c7..9d3c38822e1 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp @@ -13,6 +13,7 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; + extern const int BAD_ARGUMENTS; } MergeTreeReadPoolBase::MergeTreeReadPoolBase( @@ -85,6 +86,11 @@ static size_t calculateMinMarksPerTask( min_marks_per_task = heuristic_min_marks; } } + + if (min_marks_per_task == 0) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, "Chosen number of marks to read is zero (likely because of weird interference of settings)"); + LOG_TEST(&Poco::Logger::get("MergeTreeReadPoolBase"), "Will use min_marks_per_task={}", min_marks_per_task); return min_marks_per_task; } diff --git a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp index 33eaf5a49bd..d23072771f2 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicas.cpp @@ -8,6 +8,7 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; + extern const int BAD_ARGUMENTS; } MergeTreeReadPoolParallelReplicas::MergeTreeReadPoolParallelReplicas( @@ -38,6 +39,10 @@ MergeTreeReadPoolParallelReplicas::MergeTreeReadPoolParallelReplicas( for (const auto & info : per_part_infos) min_marks_per_task = std::max(min_marks_per_task, info->min_marks_per_task); + if (min_marks_per_task == 0) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, "Chosen number of marks to read is zero (likely because of weird interference of settings)"); + extension.all_callback( InitialAllRangesAnnouncement(coordination_mode, parts_ranges.getDescriptions(), extension.number_of_current_replica)); } diff --git a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.cpp b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.cpp index 6b5cf978423..42ffc4304b2 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPoolParallelReplicasInOrder.cpp @@ -6,6 +6,7 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; + extern const int BAD_ARGUMENTS; } MergeTreeReadPoolParallelReplicasInOrder::MergeTreeReadPoolParallelReplicasInOrder( @@ -37,6 +38,10 @@ MergeTreeReadPoolParallelReplicasInOrder::MergeTreeReadPoolParallelReplicasInOrd for (const auto & info : per_part_infos) min_marks_per_task = std::max(min_marks_per_task, info->min_marks_per_task); + if (min_marks_per_task == 0) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, "Chosen number of marks to read is zero (likely because of weird interference of settings)"); + for (const auto & part : parts_ranges) request.push_back({part.data_part->info, MarkRanges{}}); diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp index f46b4de10b7..ee47fe3549a 100644 --- a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp +++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp @@ -1004,6 +1004,10 @@ void ParallelReplicasReadingCoordinator::handleInitialAllRangesAnnouncement(Init ParallelReadResponse ParallelReplicasReadingCoordinator::handleRequest(ParallelReadRequest request) { + if (request.min_number_of_marks == 0) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, "Chosen number of marks to read is zero (likely because of weird interference of settings)"); + ProfileEventTimeIncrement watch(ProfileEvents::ParallelReplicasHandleRequestMicroseconds); std::lock_guard lock(mutex); diff --git a/tests/queries/0_stateless/03223_parallel_replicas_read_task_size_overflow_bug.reference b/tests/queries/0_stateless/03223_parallel_replicas_read_task_size_overflow_bug.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03223_parallel_replicas_read_task_size_overflow_bug.sql b/tests/queries/0_stateless/03223_parallel_replicas_read_task_size_overflow_bug.sql new file mode 100644 index 00000000000..984c7fe0db7 --- /dev/null +++ b/tests/queries/0_stateless/03223_parallel_replicas_read_task_size_overflow_bug.sql @@ -0,0 +1,22 @@ +DROP TABLE IF EXISTS test__fuzz_22 SYNC; + +CREATE TABLE test__fuzz_22 (k Float32, v String) ENGINE = MergeTree ORDER BY k SETTINGS index_granularity = 1; + +SYSTEM STOP MERGES test__fuzz_22; + +INSERT INTO test__fuzz_22 SELECT number, toString(number) FROM numbers(1); +INSERT INTO test__fuzz_22 SELECT number, toString(number) FROM numbers(1); +INSERT INTO test__fuzz_22 SELECT number, toString(number) FROM numbers(1); +INSERT INTO test__fuzz_22 SELECT number, toString(number) FROM numbers(1); + +SET allow_experimental_parallel_reading_from_replicas = 2, parallel_replicas_for_non_replicated_merge_tree = 1, max_parallel_replicas = 3, cluster_for_parallel_replicas='test_cluster_one_shard_three_replicas_localhost', + merge_tree_min_rows_for_concurrent_read = 9223372036854775806, merge_tree_min_rows_for_concurrent_read_for_remote_filesystem = 9223372036854775806; + + SELECT v + FROM test__fuzz_22 +ORDER BY v + LIMIT 10, 10 +SETTINGS max_threads = 4 + FORMAT Null; -- { serverError BAD_ARGUMENTS } + +DROP TABLE test__fuzz_22 SYNC; From cc7d22a7b83440cfbf7d37086ece7fac222f24de Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 16 Aug 2024 23:08:16 +0200 Subject: [PATCH 106/180] Proper parsing of the PostgreSQL-style CAST operator --- src/Parsers/ExpressionElementParsers.cpp | 26 +++++++++++-------- ..._proper_parsing_of_cast_operator.reference | 4 +++ .../03227_proper_parsing_of_cast_operator.sql | 6 +++++ 3 files changed, 25 insertions(+), 11 deletions(-) create mode 100644 tests/queries/0_stateless/03227_proper_parsing_of_cast_operator.reference create mode 100644 tests/queries/0_stateless/03227_proper_parsing_of_cast_operator.sql diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index dd22b80b1cb..ffa1bd93ded 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -853,9 +853,9 @@ bool ParserCastOperator::parseImpl(Pos & pos, ASTPtr & node, Expected & expected /// Parse numbers (including decimals), strings, arrays and tuples of them. + Pos begin = pos; const char * data_begin = pos->begin; const char * data_end = pos->end; - bool is_string_literal = pos->type == StringLiteral; if (pos->type == Minus) { @@ -866,7 +866,7 @@ bool ParserCastOperator::parseImpl(Pos & pos, ASTPtr & node, Expected & expected data_end = pos->end; ++pos; } - else if (pos->type == Number || is_string_literal) + else if (pos->type == Number || pos->type == StringLiteral) { ++pos; } @@ -939,18 +939,22 @@ bool ParserCastOperator::parseImpl(Pos & pos, ASTPtr & node, Expected & expected { String s; size_t data_size = data_end - data_begin; - if (is_string_literal) + if (begin->type == StringLiteral) { - ReadBufferFromMemory buf(data_begin, data_size); - readQuotedStringWithSQLStyle(s, buf); - assert(buf.count() == data_size); + ASTPtr literal; + if (ParserStringLiteral().parse(begin, literal, expected)) + { + node = createFunctionCast(literal, type_ast); + return true; + } + return false; } else - s = String(data_begin, data_size); - - auto literal = std::make_shared(std::move(s)); - node = createFunctionCast(literal, type_ast); - return true; + { + auto literal = std::make_shared(String(data_begin, data_size)); + node = createFunctionCast(literal, type_ast); + return true; + } } return false; diff --git a/tests/queries/0_stateless/03227_proper_parsing_of_cast_operator.reference b/tests/queries/0_stateless/03227_proper_parsing_of_cast_operator.reference new file mode 100644 index 00000000000..2127d396bb3 --- /dev/null +++ b/tests/queries/0_stateless/03227_proper_parsing_of_cast_operator.reference @@ -0,0 +1,4 @@ +414243 +ABC +A +{"a": \'A\'} diff --git a/tests/queries/0_stateless/03227_proper_parsing_of_cast_operator.sql b/tests/queries/0_stateless/03227_proper_parsing_of_cast_operator.sql new file mode 100644 index 00000000000..0c2e7dc582a --- /dev/null +++ b/tests/queries/0_stateless/03227_proper_parsing_of_cast_operator.sql @@ -0,0 +1,6 @@ +SELECT '414243'::String; +SELECT x'414243'::String; +SELECT b'01000001'::String; +SELECT '{"a": \'\x41\'}'::String; +SELECT '{"a": \'\x4\'}'::String; -- { clientError SYNTAX_ERROR } +SELECT '{"a": \'a\x4\'}'::String; -- { clientError SYNTAX_ERROR } From aee031ad4468b870073dc46770d07cea07aa829f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 16 Aug 2024 23:25:49 +0200 Subject: [PATCH 107/180] Slightly better --- src/Parsers/ExpressionElementParsers.cpp | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index ffa1bd93ded..726326bfc85 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -856,6 +856,7 @@ bool ParserCastOperator::parseImpl(Pos & pos, ASTPtr & node, Expected & expected Pos begin = pos; const char * data_begin = pos->begin; const char * data_end = pos->end; + ASTPtr string_literal; if (pos->type == Minus) { @@ -866,10 +867,15 @@ bool ParserCastOperator::parseImpl(Pos & pos, ASTPtr & node, Expected & expected data_end = pos->end; ++pos; } - else if (pos->type == Number || pos->type == StringLiteral) + else if (pos->type == Number) { ++pos; } + else if (pos->type == StringLiteral) + { + if (!ParserStringLiteral().parse(begin, string_literal, expected)) + return false; + } else if (isOneOf(pos->type)) { TokenType last_token = OpeningSquareBracket; @@ -939,15 +945,10 @@ bool ParserCastOperator::parseImpl(Pos & pos, ASTPtr & node, Expected & expected { String s; size_t data_size = data_end - data_begin; - if (begin->type == StringLiteral) + if (string_literal) { - ASTPtr literal; - if (ParserStringLiteral().parse(begin, literal, expected)) - { - node = createFunctionCast(literal, type_ast); - return true; - } - return false; + node = createFunctionCast(string_literal, type_ast); + return true; } else { From 2a5de86fe4f21d77671a317d52b3f378062c7555 Mon Sep 17 00:00:00 2001 From: Julia Kartseva Date: Fri, 16 Aug 2024 21:46:21 +0000 Subject: [PATCH 108/180] shellcheck --- .../queries/0_stateless/03149_numbers_max_block_size_zero.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03149_numbers_max_block_size_zero.sh b/tests/queries/0_stateless/03149_numbers_max_block_size_zero.sh index 2c4669325bb..7f606d889a7 100755 --- a/tests/queries/0_stateless/03149_numbers_max_block_size_zero.sh +++ b/tests/queries/0_stateless/03149_numbers_max_block_size_zero.sh @@ -1,7 +1,9 @@ #!/usr/bin/env bash +# shellcheck disable=SC2266 CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -q "SELECT count(*) FROM numbers(10) AS a, numbers(11) AS b, numbers(12) AS c SETTINGS max_block_size = 0" 2>&1 | [ $(grep -c "Sanity check: 'max_block_size' cannot be 0. Set to default value") -gt 0 ] && echo "OK" || echo "FAIL" +$CLICKHOUSE_CLIENT -q "SELECT count(*) FROM numbers(10) AS a, numbers(11) AS b, numbers(12) AS c SETTINGS max_block_size = 0" 2>&1 | + [ "$(grep -c "Sanity check: 'max_block_size' cannot be 0. Set to default value")" -gt 0 ] && echo "OK" || echo "FAIL" From d952f7cff579d28a51eea428aee7460121862ce5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 16 Aug 2024 23:50:26 +0200 Subject: [PATCH 109/180] Update test --- tests/queries/0_stateless/01825_new_type_json_ghdata.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01825_new_type_json_ghdata.sh b/tests/queries/0_stateless/01825_new_type_json_ghdata.sh index acb4925ce6e..ee702300094 100755 --- a/tests/queries/0_stateless/01825_new_type_json_ghdata.sh +++ b/tests/queries/0_stateless/01825_new_type_json_ghdata.sh @@ -8,7 +8,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS ghdata" ${CLICKHOUSE_CLIENT} -q "CREATE TABLE ghdata (data JSON) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'" --allow_experimental_json_type 1 -cat $CUR_DIR/data_json/ghdata_sample.json | ${CLICKHOUSE_CLIENT} -q "INSERT INTO ghdata FORMAT JSONAsObject" +cat $CUR_DIR/data_json/ghdata_sample.json | ${CLICKHOUSE_CLIENT} --max_block_size 8192 --max_insert_block_size 8192 --max_insert_threads 1 --min_insert_block_size_bytes 0 --min_insert_block_size_rows 0 -q "INSERT INTO ghdata FORMAT JSONAsObject" ${CLICKHOUSE_CLIENT} -q "SELECT count() FROM ghdata WHERE NOT ignore(*)" From b98249ea7fda526a7a561862fcc4a721e5a4587f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 17 Aug 2024 00:06:47 +0200 Subject: [PATCH 110/180] Use temporary tables for input and output in clickhouse-local --- programs/local/LocalServer.cpp | 2 +- tests/queries/0_stateless/01191_rename_dictionary.sql | 1 + .../02141_clickhouse_local_interactive_table.reference | 4 ++-- .../0_stateless/02141_clickhouse_local_interactive_table.sh | 4 ++-- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 200beea7b63..a8b774562f9 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -367,7 +367,7 @@ std::string LocalServer::getInitialCreateTableQuery() else table_structure = "(" + table_structure + ")"; - return fmt::format("CREATE TABLE {} {} ENGINE = File({}, {});", + return fmt::format("CREATE TEMPORARY TABLE {} {} ENGINE = File({}, {});", table_name, table_structure, data_format, table_file); } diff --git a/tests/queries/0_stateless/01191_rename_dictionary.sql b/tests/queries/0_stateless/01191_rename_dictionary.sql index c5012dabc81..be95e5a7d4b 100644 --- a/tests/queries/0_stateless/01191_rename_dictionary.sql +++ b/tests/queries/0_stateless/01191_rename_dictionary.sql @@ -27,6 +27,7 @@ RENAME DICTIONARY test_01191.t TO test_01191.dict1; -- {serverError INCORRECT_QU DROP DICTIONARY test_01191.t; -- {serverError INCORRECT_QUERY} DROP TABLE test_01191.t; +DROP DATABASE IF EXISTS dummy_db; CREATE DATABASE dummy_db ENGINE=Atomic; RENAME DICTIONARY test_01191.dict TO dummy_db.dict1; RENAME DICTIONARY dummy_db.dict1 TO test_01191.dict; diff --git a/tests/queries/0_stateless/02141_clickhouse_local_interactive_table.reference b/tests/queries/0_stateless/02141_clickhouse_local_interactive_table.reference index 0bb8966cbe4..0e74c0a083e 100644 --- a/tests/queries/0_stateless/02141_clickhouse_local_interactive_table.reference +++ b/tests/queries/0_stateless/02141_clickhouse_local_interactive_table.reference @@ -1,2 +1,2 @@ -CREATE TABLE default.`table`\n(\n `key` String\n)\nENGINE = File(\'TSVWithNamesAndTypes\', \'/dev/null\') -CREATE TABLE foo.`table`\n(\n `key` String\n)\nENGINE = File(\'TSVWithNamesAndTypes\', \'/dev/null\') +CREATE TEMPORARY TABLE `table`\n(\n `key` String\n)\nENGINE = File(TSVWithNamesAndTypes, \'/dev/null\') +CREATE TEMPORARY TABLE `table`\n(\n `key` String\n)\nENGINE = File(TSVWithNamesAndTypes, \'/dev/null\') diff --git a/tests/queries/0_stateless/02141_clickhouse_local_interactive_table.sh b/tests/queries/0_stateless/02141_clickhouse_local_interactive_table.sh index 934d87616ac..3a95e59416a 100755 --- a/tests/queries/0_stateless/02141_clickhouse_local_interactive_table.sh +++ b/tests/queries/0_stateless/02141_clickhouse_local_interactive_table.sh @@ -4,5 +4,5 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_LOCAL --file /dev/null --structure "key String" --input-format TSVWithNamesAndTypes --interactive --send_logs_level=trace <<<'show create table table' -$CLICKHOUSE_LOCAL --database foo --file /dev/null --structure "key String" --input-format TSVWithNamesAndTypes --interactive --send_logs_level=trace <<<'show create table table' +$CLICKHOUSE_LOCAL --file /dev/null --structure "key String" --input-format TSVWithNamesAndTypes --interactive --send_logs_level=trace <<<'show create temporary table table' +$CLICKHOUSE_LOCAL --database foo --file /dev/null --structure "key String" --input-format TSVWithNamesAndTypes --interactive --send_logs_level=trace <<<'show create temporary table table' From 8ba142559ca05295670b0a899610ab613c2d5658 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 17 Aug 2024 00:09:39 +0200 Subject: [PATCH 111/180] Pass-through RENAME and UUID-related operations in Overlay database to underlying databases --- src/Databases/DatabasesOverlay.cpp | 47 ++++++++++++++++++++++++++++++ src/Databases/DatabasesOverlay.h | 9 ++++++ src/Interpreters/StorageID.h | 1 - 3 files changed, 56 insertions(+), 1 deletion(-) diff --git a/src/Databases/DatabasesOverlay.cpp b/src/Databases/DatabasesOverlay.cpp index 801356b3dd7..495733e15fd 100644 --- a/src/Databases/DatabasesOverlay.cpp +++ b/src/Databases/DatabasesOverlay.cpp @@ -14,6 +14,8 @@ namespace ErrorCodes { extern const int LOGICAL_ERROR; extern const int CANNOT_GET_CREATE_TABLE_QUERY; + extern const int BAD_ARGUMENTS; + extern const int UNKNOWN_TABLE; } DatabasesOverlay::DatabasesOverlay(const String & name_, ContextPtr context_) @@ -124,6 +126,39 @@ StoragePtr DatabasesOverlay::detachTable(ContextPtr context_, const String & tab getEngineName()); } +void DatabasesOverlay::renameTable( + ContextPtr current_context, + const String & name, + IDatabase & to_database, + const String & to_name, + bool exchange, + bool dictionary) +{ + for (auto & db : databases) + { + if (db->isTableExist(name, current_context)) + { + if (DatabasesOverlay * to_overlay_database = typeid_cast(&to_database)) + { + /// Renaming from Overlay database inside itself or into another Overlay database. + /// Just use the first database in the overlay as a destination. + if (to_overlay_database->databases.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The destination Overlay database {} does not have any members", to_database.getDatabaseName()); + + db->renameTable(current_context, name, *to_overlay_database->databases[0], to_name, exchange, dictionary); + } + else + { + /// Renaming into a different type of database. E.g. from Overlay on top of Atomic database into just Atomic database. + db->renameTable(current_context, name, to_database, to_name, exchange, dictionary); + } + + return; + } + } + throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table {}.{} doesn't exist", backQuote(getDatabaseName()), backQuote(name)); +} + ASTPtr DatabasesOverlay::getCreateTableQueryImpl(const String & name, ContextPtr context_, bool throw_on_error) const { ASTPtr result = nullptr; @@ -178,6 +213,18 @@ String DatabasesOverlay::getTableDataPath(const ASTCreateQuery & query) const return result; } +UUID DatabasesOverlay::getUUID() const +{ + UUID result = UUIDHelpers::Nil; + for (const auto & db : databases) + { + result = db->getUUID(); + if (result != UUIDHelpers::Nil) + break; + } + return result; +} + UUID DatabasesOverlay::tryGetTableUUID(const String & table_name) const { UUID result = UUIDHelpers::Nil; diff --git a/src/Databases/DatabasesOverlay.h b/src/Databases/DatabasesOverlay.h index b0c7e7e4032..40c653e5cb5 100644 --- a/src/Databases/DatabasesOverlay.h +++ b/src/Databases/DatabasesOverlay.h @@ -35,12 +35,21 @@ public: StoragePtr detachTable(ContextPtr context, const String & table_name) override; + void renameTable( + ContextPtr current_context, + const String & name, + IDatabase & to_database, + const String & to_name, + bool exchange, + bool dictionary) override; + ASTPtr getCreateTableQueryImpl(const String & name, ContextPtr context, bool throw_on_error) const override; ASTPtr getCreateDatabaseQuery() const override; String getTableDataPath(const String & table_name) const override; String getTableDataPath(const ASTCreateQuery & query) const override; + UUID getUUID() const override; UUID tryGetTableUUID(const String & table_name) const override; void drop(ContextPtr context) override; diff --git a/src/Interpreters/StorageID.h b/src/Interpreters/StorageID.h index f9afbc7b98d..ad55d16e284 100644 --- a/src/Interpreters/StorageID.h +++ b/src/Interpreters/StorageID.h @@ -27,7 +27,6 @@ class ASTQueryWithTableAndOutput; class ASTTableIdentifier; class Context; -// TODO(ilezhankin): refactor and merge |ASTTableIdentifier| struct StorageID { String database_name; From da0a8051d8c8e8c2c72145e15cdf5a96e99641d2 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 17 Aug 2024 00:22:57 +0200 Subject: [PATCH 112/180] Miscellaneous changes in database engines --- src/Databases/DatabaseLazy.cpp | 2 +- src/Databases/DatabaseLazy.h | 2 +- src/Databases/DatabaseOnDisk.cpp | 10 +++++----- src/Databases/DatabaseOnDisk.h | 4 ++-- src/Databases/DatabaseOrdinary.cpp | 4 ++-- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/Databases/DatabaseLazy.cpp b/src/Databases/DatabaseLazy.cpp index 3fb6d30fcb8..2ccdd8510a8 100644 --- a/src/Databases/DatabaseLazy.cpp +++ b/src/Databases/DatabaseLazy.cpp @@ -52,7 +52,7 @@ DatabaseLazy::DatabaseLazy(const String & name_, const String & metadata_path_, void DatabaseLazy::loadStoredObjects(ContextMutablePtr local_context, LoadingStrictnessLevel /*mode*/) { - iterateMetadataFiles(local_context, [this, &local_context](const String & file_name) + iterateMetadataFiles([this, &local_context](const String & file_name) { const std::string table_name = unescapeForFileName(file_name.substr(0, file_name.size() - 4)); diff --git a/src/Databases/DatabaseLazy.h b/src/Databases/DatabaseLazy.h index 41cfb751141..aeac130594f 100644 --- a/src/Databases/DatabaseLazy.h +++ b/src/Databases/DatabaseLazy.h @@ -12,7 +12,7 @@ class DatabaseLazyIterator; class Context; /** Lazy engine of databases. - * Works like DatabaseOrdinary, but stores in memory only the cache. + * Works like DatabaseOrdinary, but stores only recently accessed tables in memory. * Can be used only with *Log engines. */ class DatabaseLazy final : public DatabaseOnDisk diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index 734f354d9a5..c80e4def94e 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -568,14 +568,14 @@ void DatabaseOnDisk::drop(ContextPtr local_context) assert(TSA_SUPPRESS_WARNING_FOR_READ(tables).empty()); if (local_context->getSettingsRef().force_remove_data_recursively_on_drop) { - (void)fs::remove_all(local_context->getPath() + getDataPath()); + (void)fs::remove_all(std::filesystem::path(getContext()->getPath()) / data_path); (void)fs::remove_all(getMetadataPath()); } else { try { - (void)fs::remove(local_context->getPath() + getDataPath()); + (void)fs::remove(std::filesystem::path(getContext()->getPath()) / data_path); (void)fs::remove(getMetadataPath()); } catch (const fs::filesystem_error & e) @@ -613,7 +613,7 @@ time_t DatabaseOnDisk::getObjectMetadataModificationTime(const String & object_n } } -void DatabaseOnDisk::iterateMetadataFiles(ContextPtr local_context, const IteratingFunction & process_metadata_file) const +void DatabaseOnDisk::iterateMetadataFiles(const IteratingFunction & process_metadata_file) const { auto process_tmp_drop_metadata_file = [&](const String & file_name) { @@ -621,7 +621,7 @@ void DatabaseOnDisk::iterateMetadataFiles(ContextPtr local_context, const Iterat static const char * tmp_drop_ext = ".sql.tmp_drop"; const std::string object_name = file_name.substr(0, file_name.size() - strlen(tmp_drop_ext)); - if (fs::exists(local_context->getPath() + getDataPath() + '/' + object_name)) + if (fs::exists(std::filesystem::path(getContext()->getPath()) / data_path / object_name)) { fs::rename(getMetadataPath() + file_name, getMetadataPath() + object_name + ".sql"); LOG_WARNING(log, "Object {} was not dropped previously and will be restored", backQuote(object_name)); @@ -638,7 +638,7 @@ void DatabaseOnDisk::iterateMetadataFiles(ContextPtr local_context, const Iterat std::vector> metadata_files; fs::directory_iterator dir_end; - for (fs::directory_iterator dir_it(getMetadataPath()); dir_it != dir_end; ++dir_it) + for (fs::directory_iterator dir_it(metadata_path); dir_it != dir_end; ++dir_it) { String file_name = dir_it->path().filename(); /// For '.svn', '.gitignore' directory and similar. diff --git a/src/Databases/DatabaseOnDisk.h b/src/Databases/DatabaseOnDisk.h index 12656068643..ffc95a7c128 100644 --- a/src/Databases/DatabaseOnDisk.h +++ b/src/Databases/DatabaseOnDisk.h @@ -64,7 +64,7 @@ public: time_t getObjectMetadataModificationTime(const String & object_name) const override; String getDataPath() const override { return data_path; } - String getTableDataPath(const String & table_name) const override { return data_path + escapeForFileName(table_name) + "/"; } + String getTableDataPath(const String & table_name) const override { return std::filesystem::path(data_path) / escapeForFileName(table_name) / ""; } String getTableDataPath(const ASTCreateQuery & query) const override { return getTableDataPath(query.getTable()); } String getMetadataPath() const override { return metadata_path; } @@ -83,7 +83,7 @@ protected: using IteratingFunction = std::function; - void iterateMetadataFiles(ContextPtr context, const IteratingFunction & process_metadata_file) const; + void iterateMetadataFiles(const IteratingFunction & process_metadata_file) const; ASTPtr getCreateTableQueryImpl( const String & table_name, diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp index 8808261654f..dd8a3f42ea8 100644 --- a/src/Databases/DatabaseOrdinary.cpp +++ b/src/Databases/DatabaseOrdinary.cpp @@ -55,7 +55,7 @@ static constexpr size_t METADATA_FILE_BUFFER_SIZE = 32768; static constexpr const char * const CONVERT_TO_REPLICATED_FLAG_NAME = "convert_to_replicated"; DatabaseOrdinary::DatabaseOrdinary(const String & name_, const String & metadata_path_, ContextPtr context_) - : DatabaseOrdinary(name_, metadata_path_, "data/" + escapeForFileName(name_) + "/", "DatabaseOrdinary (" + name_ + ")", context_) + : DatabaseOrdinary(name_, metadata_path_, std::filesystem::path("data") / escapeForFileName(name_) / "", "DatabaseOrdinary (" + name_ + ")", context_) { } @@ -265,7 +265,7 @@ void DatabaseOrdinary::loadTablesMetadata(ContextPtr local_context, ParsedTables } }; - iterateMetadataFiles(local_context, process_metadata); + iterateMetadataFiles(process_metadata); size_t objects_in_database = metadata.parsed_tables.size() - prev_tables_count; size_t dictionaries_in_database = metadata.total_dictionaries - prev_total_dictionaries; From 07fa798ffa53216fd37dac51bceb327665fc8dda Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Fri, 16 Aug 2024 23:31:41 +0000 Subject: [PATCH 113/180] add total in system.one, test --- src/Storages/System/StorageSystemOne.cpp | 5 ++++- .../System/StorageSystemViewRefreshes.cpp | 2 +- ...3221_refreshable_matview_progress.reference | 2 ++ .../03221_refreshable_matview_progress.sql | 18 ++++++++++++++++++ 4 files changed, 25 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/03221_refreshable_matview_progress.reference create mode 100644 tests/queries/0_stateless/03221_refreshable_matview_progress.sql diff --git a/src/Storages/System/StorageSystemOne.cpp b/src/Storages/System/StorageSystemOne.cpp index 936d55e61a0..70377715dc3 100644 --- a/src/Storages/System/StorageSystemOne.cpp +++ b/src/Storages/System/StorageSystemOne.cpp @@ -41,7 +41,10 @@ Pipe StorageSystemOne::read( auto column = DataTypeUInt8().createColumnConst(1, 0u)->convertToFullColumnIfConst(); Chunk chunk({ std::move(column) }, 1); - return Pipe(std::make_shared(std::move(header), std::move(chunk))); + auto source = std::make_shared(std::move(header), std::move(chunk)); + source->addTotalRowsApprox(1); + + return Pipe(source); } diff --git a/src/Storages/System/StorageSystemViewRefreshes.cpp b/src/Storages/System/StorageSystemViewRefreshes.cpp index 3941c4c39c2..30539ed6b6a 100644 --- a/src/Storages/System/StorageSystemViewRefreshes.cpp +++ b/src/Storages/System/StorageSystemViewRefreshes.cpp @@ -86,7 +86,7 @@ void StorageSystemViewRefreshes::fillData( res_columns[i++]->insert(refresh.exception_message); res_columns[i++]->insert(refresh.refresh_count); - res_columns[i++]->insert(std::min(Float64(refresh.progress.read_rows) / refresh.progress.total_rows_to_read, 1.0)); + res_columns[i++]->insert(Float64(refresh.progress.read_rows) / refresh.progress.total_rows_to_read); res_columns[i++]->insert(refresh.progress.elapsed_ns / 1e9); res_columns[i++]->insert(refresh.progress.read_rows); res_columns[i++]->insert(refresh.progress.read_bytes); diff --git a/tests/queries/0_stateless/03221_refreshable_matview_progress.reference b/tests/queries/0_stateless/03221_refreshable_matview_progress.reference new file mode 100644 index 00000000000..5ed392e61c7 --- /dev/null +++ b/tests/queries/0_stateless/03221_refreshable_matview_progress.reference @@ -0,0 +1,2 @@ +0 +4 4 1 diff --git a/tests/queries/0_stateless/03221_refreshable_matview_progress.sql b/tests/queries/0_stateless/03221_refreshable_matview_progress.sql new file mode 100644 index 00000000000..4794359fd2b --- /dev/null +++ b/tests/queries/0_stateless/03221_refreshable_matview_progress.sql @@ -0,0 +1,18 @@ +set allow_experimental_refreshable_materialized_view=1; + +CREATE MATERIALIZED VIEW 03221_rmv +REFRESH AFTER 1 SECOND +( +x UInt64 +) +ENGINE = Memory +AS SELECT number AS x +FROM numbers(3) +UNION ALL +SELECT rand64() AS x; + +SELECT sleep(2); + +SELECT read_rows, total_rows, progress FROM system.view_refreshes WHERE view = '03221_rmv'; + +DROP TABLE 03221_rmv; From 142d7b15828c9eeef145fd56d56b33d1004fb68c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 17 Aug 2024 01:35:49 +0200 Subject: [PATCH 114/180] Miscellaneous changes in BaseDaemon --- src/Daemon/BaseDaemon.cpp | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index e7ae8ea5a1d..f75699881bd 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -16,39 +16,29 @@ #include #if defined(OS_LINUX) - #include +#include #endif #include #include #include - #include #include #include -#include #include -#include #include #include #include #include #include - #include #include #include -#include #include -#include #include -#include #include -#include #include -#include -#include #include #include #include @@ -459,13 +449,7 @@ void BaseDaemon::initializeTerminationAndSignalProcessing() signal_listener_thread.start(*signal_listener); #if defined(__ELF__) && !defined(OS_FREEBSD) - String build_id_hex = SymbolIndex::instance().getBuildIDHex(); - if (build_id_hex.empty()) - build_id = ""; - else - build_id = build_id_hex; -#else - build_id = ""; + build_id = SymbolIndex::instance().getBuildIDHex(); #endif git_hash = GIT_HASH; From 83608cb7bfcbf0801994cc916ed36b91c7113307 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 17 Aug 2024 01:40:07 +0200 Subject: [PATCH 115/180] Miscellaneous changes from #66999 (2) --- base/base/CMakeLists.txt | 3 +++ {src/Daemon => base/base}/GitHash.cpp.in | 0 src/Daemon/CMakeLists.txt | 3 --- 3 files changed, 3 insertions(+), 3 deletions(-) rename {src/Daemon => base/base}/GitHash.cpp.in (100%) diff --git a/base/base/CMakeLists.txt b/base/base/CMakeLists.txt index 247028b96e0..3d236f52c36 100644 --- a/base/base/CMakeLists.txt +++ b/base/base/CMakeLists.txt @@ -8,6 +8,8 @@ endif () # when instantiated from JSON.cpp. Try again when libcxx(abi) and Clang are upgraded to 16. set (CMAKE_CXX_STANDARD 20) +configure_file(GitHash.cpp.in GitHash.generated.cpp) + set (SRCS argsToConfig.cpp cgroupsv2.cpp @@ -33,6 +35,7 @@ set (SRCS safeExit.cpp throwError.cpp Numa.cpp + GitHash.generated.cpp ) add_library (common ${SRCS}) diff --git a/src/Daemon/GitHash.cpp.in b/base/base/GitHash.cpp.in similarity index 100% rename from src/Daemon/GitHash.cpp.in rename to base/base/GitHash.cpp.in diff --git a/src/Daemon/CMakeLists.txt b/src/Daemon/CMakeLists.txt index 35ea2122dbb..2068af2200d 100644 --- a/src/Daemon/CMakeLists.txt +++ b/src/Daemon/CMakeLists.txt @@ -1,10 +1,7 @@ -configure_file(GitHash.cpp.in GitHash.generated.cpp) - add_library (daemon BaseDaemon.cpp GraphiteWriter.cpp SentryWriter.cpp - GitHash.generated.cpp ) target_link_libraries (daemon PUBLIC loggers common PRIVATE clickhouse_parsers clickhouse_common_io clickhouse_common_config) From 4b68ba23c0372331401cd327ca5849ba9d1bce8d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 17 Aug 2024 03:43:53 +0200 Subject: [PATCH 116/180] Pretty print tuples in CREATE TABLE statements --- programs/format/Format.cpp | 6 +++++- src/Client/ClientBase.cpp | 6 +++++- src/Core/ExternalTable.cpp | 4 ++-- src/Core/Settings.h | 2 +- src/DataTypes/IDataType.cpp | 1 - src/Databases/DatabaseOnDisk.cpp | 2 +- src/Functions/formatQuery.cpp | 8 +++++++- .../InterpreterShowCreateQuery.cpp | 5 ++++- .../formatWithPossiblyHidingSecrets.h | 3 ++- src/Parsers/ASTColumnDeclaration.cpp | 8 ++------ src/Parsers/ASTDataType.cpp | 19 ++++++++++++++----- src/Parsers/ASTExpressionList.cpp | 5 ++--- src/Parsers/ASTNameTypePair.cpp | 6 +----- src/Parsers/IAST.cpp | 3 ++- src/Parsers/IAST.h | 14 +++++++++----- 15 files changed, 57 insertions(+), 35 deletions(-) diff --git a/programs/format/Format.cpp b/programs/format/Format.cpp index a434c9171e9..4af77533c53 100644 --- a/programs/format/Format.cpp +++ b/programs/format/Format.cpp @@ -264,7 +264,11 @@ int mainEntryClickHouseFormat(int argc, char ** argv) if (!backslash) { WriteBufferFromOwnString str_buf; - formatAST(*res, str_buf, hilite, oneline || approx_query_length < max_line_length); + oneline = oneline || approx_query_length < max_line_length; + IAST::FormatSettings settings(str_buf, oneline, hilite); + settings.show_secrets = true; + settings.print_pretty_type_names = !oneline; + res->format(settings); if (insert_query_payload) { diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 187ef079eda..74357d33f1c 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -331,7 +331,11 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, const Setting { output_stream << std::endl; WriteBufferFromOStream res_buf(output_stream, 4096); - formatAST(*res, res_buf); + IAST::FormatSettings format_settings(res_buf, /* one_line */ false); + format_settings.hilite = true; + format_settings.show_secrets = true; + format_settings.print_pretty_type_names = true; + res->format(format_settings); res_buf.finalize(); output_stream << std::endl << std::endl; } diff --git a/src/Core/ExternalTable.cpp b/src/Core/ExternalTable.cpp index c2bcf6ec651..4ff0d7092d8 100644 --- a/src/Core/ExternalTable.cpp +++ b/src/Core/ExternalTable.cpp @@ -85,7 +85,7 @@ void BaseExternalTable::parseStructureFromStructureField(const std::string & arg /// We use `formatWithPossiblyHidingSensitiveData` instead of `getColumnNameWithoutAlias` because `column->type` is an ASTFunction. /// `getColumnNameWithoutAlias` will return name of the function with `(arguments)` even if arguments is empty. if (column) - structure.emplace_back(column->name, column->type->formatWithPossiblyHidingSensitiveData(0, true, true)); + structure.emplace_back(column->name, column->type->formatWithPossiblyHidingSensitiveData(0, true, true, false)); else throw Exception(ErrorCodes::BAD_ARGUMENTS, "Error while parsing table structure: expected column definition, got {}", child->formatForErrorMessage()); } @@ -102,7 +102,7 @@ void BaseExternalTable::parseStructureFromTypesField(const std::string & argumen throw Exception(ErrorCodes::BAD_ARGUMENTS, "Error while parsing table structure: {}", error); for (size_t i = 0; i < type_list_raw->children.size(); ++i) - structure.emplace_back("_" + toString(i + 1), type_list_raw->children[i]->formatWithPossiblyHidingSensitiveData(0, true, true)); + structure.emplace_back("_" + toString(i + 1), type_list_raw->children[i]->formatWithPossiblyHidingSensitiveData(0, true, true, false)); } void BaseExternalTable::initSampleBlock() diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 0d84ad9022a..1a71494c8cd 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -882,7 +882,7 @@ class IColumn; M(Bool, use_json_alias_for_old_object_type, false, "When enabled, JSON type alias will create old experimental Object type instead of a new JSON type", 0) \ M(Bool, allow_create_index_without_type, false, "Allow CREATE INDEX query without TYPE. Query will be ignored. Made for SQL compatibility tests.", 0) \ M(Bool, create_index_ignore_unique, false, "Ignore UNIQUE keyword in CREATE UNIQUE INDEX. Made for SQL compatibility tests.", 0) \ - M(Bool, print_pretty_type_names, true, "Print pretty type names in DESCRIBE query and toTypeName() function", 0) \ + M(Bool, print_pretty_type_names, true, "Print pretty type names in the DESCRIBE query and `toTypeName` function", 0) \ M(Bool, create_table_empty_primary_key_by_default, false, "Allow to create *MergeTree tables with empty primary key when ORDER BY and PRIMARY KEY not specified", 0) \ M(Bool, allow_named_collection_override_by_default, true, "Allow named collections' fields override by default.", 0) \ M(SQLSecurityType, default_normal_view_sql_security, SQLSecurityType::INVOKER, "Allows to set a default value for SQL SECURITY option when creating a normal view.", 0) \ diff --git a/src/DataTypes/IDataType.cpp b/src/DataTypes/IDataType.cpp index 7fd8a85aeca..49e5b2d022e 100644 --- a/src/DataTypes/IDataType.cpp +++ b/src/DataTypes/IDataType.cpp @@ -8,7 +8,6 @@ #include #include -#include #include #include diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index 734f354d9a5..4e1ddd8cc77 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -504,7 +504,7 @@ void DatabaseOnDisk::renameTable( } -/// It returns create table statement (even if table is detached) +/// It returns the create table statement (even if table is detached) ASTPtr DatabaseOnDisk::getCreateTableQueryImpl(const String & table_name, ContextPtr, bool throw_on_error) const { ASTPtr ast; diff --git a/src/Functions/formatQuery.cpp b/src/Functions/formatQuery.cpp index 9591ea95254..be633bdfe37 100644 --- a/src/Functions/formatQuery.cpp +++ b/src/Functions/formatQuery.cpp @@ -43,6 +43,7 @@ public: max_query_size = settings.max_query_size; max_parser_depth = settings.max_parser_depth; max_parser_backtracks = settings.max_parser_backtracks; + print_pretty_type_names = settings.print_pretty_type_names; } String getName() const override { return name; } @@ -138,7 +139,11 @@ private: } } - formatAST(*ast, buf, /*hilite*/ false, /*single_line*/ output_formatting == OutputFormatting::SingleLine); + IAST::FormatSettings settings(buf, output_formatting == OutputFormatting::SingleLine, /*hilite*/ false); + settings.show_secrets = true; + settings.print_pretty_type_names = print_pretty_type_names; + ast->format(settings); + auto formatted = buf.stringView(); const size_t res_data_new_size = res_data_size + formatted.size() + 1; @@ -165,6 +170,7 @@ private: size_t max_query_size; size_t max_parser_depth; size_t max_parser_backtracks; + bool print_pretty_type_names; }; } diff --git a/src/Interpreters/InterpreterShowCreateQuery.cpp b/src/Interpreters/InterpreterShowCreateQuery.cpp index e5549b2e539..7af6b4948f8 100644 --- a/src/Interpreters/InterpreterShowCreateQuery.cpp +++ b/src/Interpreters/InterpreterShowCreateQuery.cpp @@ -97,7 +97,10 @@ QueryPipeline InterpreterShowCreateQuery::executeImpl() } MutableColumnPtr column = ColumnString::create(); - column->insert(format({.ctx = getContext(), .query = *create_query, .one_line = false})); + column->insert(format({ + .ctx = getContext(), + .query = *create_query, + .one_line = false})); return QueryPipeline(std::make_shared(Block{{ std::move(column), diff --git a/src/Interpreters/formatWithPossiblyHidingSecrets.h b/src/Interpreters/formatWithPossiblyHidingSecrets.h index ea8c295b169..14e84f1d1a4 100644 --- a/src/Interpreters/formatWithPossiblyHidingSecrets.h +++ b/src/Interpreters/formatWithPossiblyHidingSecrets.h @@ -25,7 +25,8 @@ inline String format(const SecretHidingFormatSettings & settings) && settings.ctx->getSettingsRef().format_display_secrets_in_show_and_select && settings.ctx->getAccess()->isGranted(AccessType::displaySecretsInShowAndSelect); - return settings.query.formatWithPossiblyHidingSensitiveData(settings.max_length, settings.one_line, show_secrets); + return settings.query.formatWithPossiblyHidingSensitiveData( + settings.max_length, settings.one_line, show_secrets, settings.ctx->getSettingsRef().print_pretty_type_names); } } diff --git a/src/Parsers/ASTColumnDeclaration.cpp b/src/Parsers/ASTColumnDeclaration.cpp index c96499095d5..23d653012f8 100644 --- a/src/Parsers/ASTColumnDeclaration.cpp +++ b/src/Parsers/ASTColumnDeclaration.cpp @@ -66,17 +66,13 @@ void ASTColumnDeclaration::formatImpl(const FormatSettings & format_settings, Fo { frame.need_parens = false; - /// We have to always backquote column names to avoid ambiguouty with INDEX and other declarations in CREATE query. + /// We have to always backquote column names to avoid ambiguity with INDEX and other declarations in CREATE query. format_settings.ostr << backQuote(name); if (type) { format_settings.ostr << ' '; - - FormatStateStacked type_frame = frame; - type_frame.indent = 0; - - type->formatImpl(format_settings, state, type_frame); + type->formatImpl(format_settings, state, frame); } if (null_modifier) diff --git a/src/Parsers/ASTDataType.cpp b/src/Parsers/ASTDataType.cpp index 3c17ae8c380..21f56e5f7a2 100644 --- a/src/Parsers/ASTDataType.cpp +++ b/src/Parsers/ASTDataType.cpp @@ -40,12 +40,21 @@ void ASTDataType::formatImpl(const FormatSettings & settings, FormatState & stat { settings.ostr << '(' << (settings.hilite ? hilite_none : ""); - for (size_t i = 0, size = arguments->children.size(); i < size; ++i) + if (!settings.one_line && settings.print_pretty_type_names && name == "Tuple") { - if (i != 0) - settings.ostr << ", "; - - arguments->children[i]->formatImpl(settings, state, frame); + ++frame.indent; + std::string indent_str = settings.one_line ? "" : "\n" + std::string(4 * frame.indent, ' '); + for (size_t i = 0, size = arguments->children.size(); i < size; ++i) + { + if (i != 0) + settings.ostr << ','; + settings.ostr << indent_str; + arguments->children[i]->formatImpl(settings, state, frame); + } + } + else + { + arguments->formatImpl(settings, state, frame); } settings.ostr << (settings.hilite ? hilite_function : "") << ')'; diff --git a/src/Parsers/ASTExpressionList.cpp b/src/Parsers/ASTExpressionList.cpp index 61ac482af82..f345b0c6a6f 100644 --- a/src/Parsers/ASTExpressionList.cpp +++ b/src/Parsers/ASTExpressionList.cpp @@ -42,7 +42,8 @@ void ASTExpressionList::formatImpl(const FormatSettings & settings, FormatState void ASTExpressionList::formatImplMultiline(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { - std::string indent_str = "\n" + std::string(4 * (frame.indent + 1), ' '); + ++frame.indent; + std::string indent_str = "\n" + std::string(4 * frame.indent, ' '); if (frame.expression_list_prepend_whitespace) { @@ -50,8 +51,6 @@ void ASTExpressionList::formatImplMultiline(const FormatSettings & settings, For settings.ostr << ' '; } - ++frame.indent; - for (size_t i = 0, size = children.size(); i < size; ++i) { if (i && separator) diff --git a/src/Parsers/ASTNameTypePair.cpp b/src/Parsers/ASTNameTypePair.cpp index e4066081a9b..1515700365f 100644 --- a/src/Parsers/ASTNameTypePair.cpp +++ b/src/Parsers/ASTNameTypePair.cpp @@ -23,12 +23,8 @@ ASTPtr ASTNameTypePair::clone() const void ASTNameTypePair::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { - std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' '); - - settings.ostr << indent_str << backQuoteIfNeed(name) << ' '; + settings.ostr << backQuoteIfNeed(name) << ' '; type->formatImpl(settings, state, frame); } } - - diff --git a/src/Parsers/IAST.cpp b/src/Parsers/IAST.cpp index 37d7f458d61..5bd2c92c60a 100644 --- a/src/Parsers/IAST.cpp +++ b/src/Parsers/IAST.cpp @@ -165,11 +165,12 @@ size_t IAST::checkDepthImpl(size_t max_depth) const return res; } -String IAST::formatWithPossiblyHidingSensitiveData(size_t max_length, bool one_line, bool show_secrets) const +String IAST::formatWithPossiblyHidingSensitiveData(size_t max_length, bool one_line, bool show_secrets, bool print_pretty_type_names) const { WriteBufferFromOwnString buf; FormatSettings settings(buf, one_line); settings.show_secrets = show_secrets; + settings.print_pretty_type_names = print_pretty_type_names; format(settings); return wipeSensitiveDataAndCutToLength(buf.str(), max_length); } diff --git a/src/Parsers/IAST.h b/src/Parsers/IAST.h index e2cf7579667..2293d50b0ec 100644 --- a/src/Parsers/IAST.h +++ b/src/Parsers/IAST.h @@ -201,6 +201,7 @@ public: bool show_secrets; /// Show secret parts of the AST (e.g. passwords, encryption keys). char nl_or_ws; /// Newline or whitespace. LiteralEscapingStyle literal_escaping_style; + bool print_pretty_type_names; explicit FormatSettings( WriteBuffer & ostr_, @@ -209,7 +210,8 @@ public: bool always_quote_identifiers_ = false, IdentifierQuotingStyle identifier_quoting_style_ = IdentifierQuotingStyle::Backticks, bool show_secrets_ = true, - LiteralEscapingStyle literal_escaping_style_ = LiteralEscapingStyle::Regular) + LiteralEscapingStyle literal_escaping_style_ = LiteralEscapingStyle::Regular, + bool print_pretty_type_names_ = false) : ostr(ostr_) , one_line(one_line_) , hilite(hilite_) @@ -218,6 +220,7 @@ public: , show_secrets(show_secrets_) , nl_or_ws(one_line ? ' ' : '\n') , literal_escaping_style(literal_escaping_style_) + , print_pretty_type_names(print_pretty_type_names_) { } @@ -230,6 +233,7 @@ public: , show_secrets(other.show_secrets) , nl_or_ws(other.nl_or_ws) , literal_escaping_style(other.literal_escaping_style) + , print_pretty_type_names(other.print_pretty_type_names) { } @@ -251,7 +255,7 @@ public: /// The state that is copied when each node is formatted. For example, nesting level. struct FormatStateStacked { - UInt8 indent = 0; + UInt16 indent = 0; bool need_parens = false; bool expression_list_always_start_on_new_line = false; /// Line feed and indent before expression list even if it's of single element. bool expression_list_prepend_whitespace = false; /// Prepend whitespace (if it is required) @@ -274,7 +278,7 @@ public: /// Secrets are displayed regarding show_secrets, then SensitiveDataMasker is applied. /// You can use Interpreters/formatWithPossiblyHidingSecrets.h for convenience. - String formatWithPossiblyHidingSensitiveData(size_t max_length, bool one_line, bool show_secrets) const; + String formatWithPossiblyHidingSensitiveData(size_t max_length, bool one_line, bool show_secrets, bool print_pretty_type_names) const; /** formatForLogging and formatForErrorMessage always hide secrets. This inconsistent * behaviour is due to the fact such functions are called from Client which knows nothing about @@ -283,12 +287,12 @@ public: */ String formatForLogging(size_t max_length = 0) const { - return formatWithPossiblyHidingSensitiveData(max_length, true, false); + return formatWithPossiblyHidingSensitiveData(max_length, true, false, false); } String formatForErrorMessage() const { - return formatWithPossiblyHidingSensitiveData(0, true, false); + return formatWithPossiblyHidingSensitiveData(0, true, false, false); } virtual bool hasSecretParts() const { return childrenHaveSecretParts(); } From 566e043c2c84a979ca1c05996f6e3a4303708bff Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 17 Aug 2024 03:44:36 +0200 Subject: [PATCH 117/180] Add a test --- ...print_pretty_tuples_create_query.reference | 56 +++++++++++++++++++ .../03227_print_pretty_tuples_create_query.sh | 35 ++++++++++++ 2 files changed, 91 insertions(+) create mode 100644 tests/queries/0_stateless/03227_print_pretty_tuples_create_query.reference create mode 100755 tests/queries/0_stateless/03227_print_pretty_tuples_create_query.sh diff --git a/tests/queries/0_stateless/03227_print_pretty_tuples_create_query.reference b/tests/queries/0_stateless/03227_print_pretty_tuples_create_query.reference new file mode 100644 index 00000000000..c65dc32a224 --- /dev/null +++ b/tests/queries/0_stateless/03227_print_pretty_tuples_create_query.reference @@ -0,0 +1,56 @@ + +SHOW CREATE TABLE: +CREATE TABLE test.test +( + `x` Tuple( + a String, + b Array(Tuple( + c Tuple( + e String), + d String))), + `y` String +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS index_granularity = 8192 +CREATE TABLE test.test +( + `x` Tuple(a String, b Array(Tuple(c Tuple(e String), d String))), + `y` String +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS index_granularity = 8192 + +clickhouse-format: +CREATE TABLE test +( + `x` Tuple( + a String, + b Array(Tuple( + c Tuple( + e String), + d String))), + `y` String +) +ORDER BY tuple() +CREATE TABLE test (`x` Tuple(a String, b Array(Tuple(c Tuple(e String), d String))), `y` String) ORDER BY tuple() + +formatQuery: +CREATE TABLE test +( + `x` Tuple( + a String, + b Array(Tuple( + c Tuple( + e String), + d String))), + `y` String +) +ORDER BY tuple() +CREATE TABLE test +( + `x` Tuple(a String, b Array(Tuple(c Tuple(e String), d String))), + `y` String +) +ORDER BY tuple() diff --git a/tests/queries/0_stateless/03227_print_pretty_tuples_create_query.sh b/tests/queries/0_stateless/03227_print_pretty_tuples_create_query.sh new file mode 100755 index 00000000000..e5614f9f228 --- /dev/null +++ b/tests/queries/0_stateless/03227_print_pretty_tuples_create_query.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-asan, no-msan, no-tsan +# ^ requires S3 + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +echo +echo "SHOW CREATE TABLE:" +${CLICKHOUSE_CLIENT} --output-format Raw --query " + DROP TABLE IF EXISTS test; + CREATE TABLE test (x Tuple(a String, b Array(Tuple(c Tuple(e String), d String))), y String) ORDER BY (); + SET print_pretty_type_names = 1; + SHOW CREATE TABLE test; + SET print_pretty_type_names = 0; + SHOW CREATE TABLE test; + DROP TABLE test; +" + +echo +echo "clickhouse-format:" +${CLICKHOUSE_FORMAT} --query " + CREATE TABLE test (x Tuple(a String, b Array(Tuple(c Tuple(e String), d String))), y String) ORDER BY () +" +${CLICKHOUSE_FORMAT} --oneline --query " + CREATE TABLE test (x Tuple(a String, b Array(Tuple(c Tuple(e String), d String))), y String) ORDER BY () +" + +echo +echo "formatQuery:" +${CLICKHOUSE_CLIENT} --output-format Raw --query " + SELECT formatQuery('CREATE TABLE test (x Tuple(a String, b Array(Tuple(c Tuple(e String), d String))), y String) ORDER BY ()') SETTINGS print_pretty_type_names = 1; + SELECT formatQuery('CREATE TABLE test (x Tuple(a String, b Array(Tuple(c Tuple(e String), d String))), y String) ORDER BY ()') SETTINGS print_pretty_type_names = 0; +" From 3065b386a4e033a802bbc4855d5113e814648848 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 17 Aug 2024 03:49:45 +0200 Subject: [PATCH 118/180] Update documentation --- src/Core/Settings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 1a71494c8cd..d8837d26e54 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -882,7 +882,7 @@ class IColumn; M(Bool, use_json_alias_for_old_object_type, false, "When enabled, JSON type alias will create old experimental Object type instead of a new JSON type", 0) \ M(Bool, allow_create_index_without_type, false, "Allow CREATE INDEX query without TYPE. Query will be ignored. Made for SQL compatibility tests.", 0) \ M(Bool, create_index_ignore_unique, false, "Ignore UNIQUE keyword in CREATE UNIQUE INDEX. Made for SQL compatibility tests.", 0) \ - M(Bool, print_pretty_type_names, true, "Print pretty type names in the DESCRIBE query and `toTypeName` function", 0) \ + M(Bool, print_pretty_type_names, true, "Print pretty type names in the DESCRIBE query and `toTypeName` function, as well as in the `SHOW CREATE TABLE` query and the `formatQuery` function.", 0) \ M(Bool, create_table_empty_primary_key_by_default, false, "Allow to create *MergeTree tables with empty primary key when ORDER BY and PRIMARY KEY not specified", 0) \ M(Bool, allow_named_collection_override_by_default, true, "Allow named collections' fields override by default.", 0) \ M(SQLSecurityType, default_normal_view_sql_security, SQLSecurityType::INVOKER, "Allows to set a default value for SQL SECURITY option when creating a normal view.", 0) \ From 02ec4e2f92ac769f92aebdb714d0d8da1a924984 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 17 Aug 2024 04:00:31 +0200 Subject: [PATCH 119/180] Fix build --- src/Parsers/ExpressionElementParsers.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 726326bfc85..61b5723072e 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -943,7 +943,6 @@ bool ParserCastOperator::parseImpl(Pos & pos, ASTPtr & node, Expected & expected if (ParserToken(DoubleColon).ignore(pos, expected) && ParserDataType().parse(pos, type_ast, expected)) { - String s; size_t data_size = data_end - data_begin; if (string_literal) { From 7a5df67b3b3b5bd7a8481562e0293150427fef90 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 17 Aug 2024 05:08:58 +0200 Subject: [PATCH 120/180] Fix style --- src/Interpreters/InterpreterShowCreateQuery.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/InterpreterShowCreateQuery.cpp b/src/Interpreters/InterpreterShowCreateQuery.cpp index 7af6b4948f8..3de6b755609 100644 --- a/src/Interpreters/InterpreterShowCreateQuery.cpp +++ b/src/Interpreters/InterpreterShowCreateQuery.cpp @@ -97,10 +97,12 @@ QueryPipeline InterpreterShowCreateQuery::executeImpl() } MutableColumnPtr column = ColumnString::create(); - column->insert(format({ + column->insert(format( + { .ctx = getContext(), .query = *create_query, - .one_line = false})); + .one_line = false + })); return QueryPipeline(std::make_shared(Block{{ std::move(column), From 330086c621c860524b68ce7598d12c8db958101d Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sat, 17 Aug 2024 03:46:33 +0000 Subject: [PATCH 121/180] update 02136_scalar_progress results according to fixed bug --- tests/queries/0_stateless/02136_scalar_progress.reference | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/02136_scalar_progress.reference b/tests/queries/0_stateless/02136_scalar_progress.reference index 5378c52de89..b8957f78e6d 100644 --- a/tests/queries/0_stateless/02136_scalar_progress.reference +++ b/tests/queries/0_stateless/02136_scalar_progress.reference @@ -1,6 +1,7 @@ < X-ClickHouse-Progress: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"100000","result_rows":"0","result_bytes":"0"} < X-ClickHouse-Progress: {"read_rows":"65505","read_bytes":"524040","written_rows":"0","written_bytes":"0","total_rows_to_read":"100000","result_rows":"0","result_bytes":"0"} < X-ClickHouse-Progress: {"read_rows":"100000","read_bytes":"800000","written_rows":"0","written_bytes":"0","total_rows_to_read":"100000","result_rows":"0","result_bytes":"0"} -< X-ClickHouse-Progress: {"read_rows":"100001","read_bytes":"800001","written_rows":"0","written_bytes":"0","total_rows_to_read":"100000","result_rows":"0","result_bytes":"0"} -< X-ClickHouse-Progress: {"read_rows":"100001","read_bytes":"800001","written_rows":"0","written_bytes":"0","total_rows_to_read":"100000","result_rows":"1","result_bytes":"272"} -< X-ClickHouse-Summary: {"read_rows":"100001","read_bytes":"800001","written_rows":"0","written_bytes":"0","total_rows_to_read":"100000","result_rows":"1","result_bytes":"272"} +< X-ClickHouse-Progress: {"read_rows":"100000","read_bytes":"800000","written_rows":"0","written_bytes":"0","total_rows_to_read":"100001","result_rows":"0","result_bytes":"0"} +< X-ClickHouse-Progress: {"read_rows":"100001","read_bytes":"800001","written_rows":"0","written_bytes":"0","total_rows_to_read":"100001","result_rows":"0","result_bytes":"0"} +< X-ClickHouse-Progress: {"read_rows":"100001","read_bytes":"800001","written_rows":"0","written_bytes":"0","total_rows_to_read":"100001","result_rows":"1","result_bytes":"272"} +< X-ClickHouse-Summary: {"read_rows":"100001","read_bytes":"800001","written_rows":"0","written_bytes":"0","total_rows_to_read":"100001","result_rows":"1","result_bytes":"272"} From 00891a2dd8f69dde0b1fe364b6891fd3629d5dbe Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sat, 17 Aug 2024 13:57:24 +0000 Subject: [PATCH 122/180] fix test --- .../queries/0_stateless/03221_refreshable_matview_progress.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/03221_refreshable_matview_progress.sql b/tests/queries/0_stateless/03221_refreshable_matview_progress.sql index 4794359fd2b..30228277bb5 100644 --- a/tests/queries/0_stateless/03221_refreshable_matview_progress.sql +++ b/tests/queries/0_stateless/03221_refreshable_matview_progress.sql @@ -1,3 +1,5 @@ +-- Tags: no-ordinary-database + set allow_experimental_refreshable_materialized_view=1; CREATE MATERIALIZED VIEW 03221_rmv From 9dee9ecfb4adee4cff099f13afe61bdc2c38170e Mon Sep 17 00:00:00 2001 From: Peter Nguyen Date: Sat, 17 Aug 2024 10:33:35 -0600 Subject: [PATCH 123/180] Fix incorrect default value for postgresql_connection_pool_auto_close_connection in docs --- docs/en/operations/settings/settings.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index de601fe02dc..5bf1fe197ae 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -1381,7 +1381,7 @@ Default value: `2`. Close connection before returning connection to the pool. -Default value: true. +Default value: false. ## odbc_bridge_connection_pool_size {#odbc-bridge-connection-pool-size} From a42b12725ab37df74a96e85f7c644c90bd6e30f6 Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Fri, 16 Aug 2024 17:39:09 +0200 Subject: [PATCH 124/180] CI: Native build for package_aarch64 --- tests/ci/ci_config.py | 3 ++- tests/ci/ci_definitions.py | 1 + tests/ci/test_ci_config.py | 14 ++++++++++---- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 7a19eb6f827..173c6c9c931 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -94,7 +94,8 @@ class CI: package_type="deb", static_binary_name="aarch64", additional_pkgs=True, - ) + ), + runner_type=Runners.BUILDER_ARM, ), BuildNames.PACKAGE_ASAN: CommonJobConfigs.BUILD.with_properties( build_config=BuildConfig( diff --git a/tests/ci/ci_definitions.py b/tests/ci/ci_definitions.py index 48847b0d7a6..1bed9db06f2 100644 --- a/tests/ci/ci_definitions.py +++ b/tests/ci/ci_definitions.py @@ -57,6 +57,7 @@ class Runners(metaclass=WithIter): """ BUILDER = "builder" + BUILDER_ARM = "builder-aarch64" STYLE_CHECKER = "style-checker" STYLE_CHECKER_ARM = "style-checker-aarch64" FUNC_TESTER = "func-tester" diff --git a/tests/ci/test_ci_config.py b/tests/ci/test_ci_config.py index 525b3bf367b..c3e55aeac06 100644 --- a/tests/ci/test_ci_config.py +++ b/tests/ci/test_ci_config.py @@ -35,10 +35,16 @@ class TestCIConfig(unittest.TestCase): f"Job [{job}] must have style-checker(-aarch64) runner", ) elif "binary_" in job.lower() or "package_" in job.lower(): - self.assertTrue( - CI.JOB_CONFIGS[job].runner_type == CI.Runners.BUILDER, - f"Job [{job}] must have [{CI.Runners.BUILDER}] runner", - ) + if job.lower() == CI.BuildNames.PACKAGE_AARCH64: + self.assertTrue( + CI.JOB_CONFIGS[job].runner_type in (CI.Runners.BUILDER_ARM,), + f"Job [{job}] must have [{CI.Runners.BUILDER_ARM}] runner", + ) + else: + self.assertTrue( + CI.JOB_CONFIGS[job].runner_type in (CI.Runners.BUILDER,), + f"Job [{job}] must have [{CI.Runners.BUILDER}] runner", + ) elif "aarch64" in job.lower(): self.assertTrue( "aarch" in CI.JOB_CONFIGS[job].runner_type, From 7432400fd0c07b7c967f47b1536706b8f791fcb1 Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Fri, 16 Aug 2024 21:06:58 +0200 Subject: [PATCH 125/180] revert hacks made to prevent OOM in aarch64 --- cmake/limit_jobs.cmake | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/cmake/limit_jobs.cmake b/cmake/limit_jobs.cmake index 17d8dd42a2c..8e48fc9b9d8 100644 --- a/cmake/limit_jobs.cmake +++ b/cmake/limit_jobs.cmake @@ -42,19 +42,9 @@ endif () # But use 2 parallel jobs, since: # - this is what llvm does # - and I've verfied that lld-11 does not use all available CPU time (in peak) while linking one binary -if (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" AND ENABLE_THINLTO) - if (ARCH_AARCH64) - # aarch64 builds start to often fail with OOMs (reason not yet clear), for now let's limit the concurrency - message(STATUS "ThinLTO provides its own parallel linking - limiting parallel link jobs to 1.") - set (PARALLEL_LINK_JOBS 1) - if (LINKER_NAME MATCHES "lld") - math(EXPR LTO_JOBS ${NUMBER_OF_LOGICAL_CORES}/4) - set (CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO} -Wl,--thinlto-jobs=${LTO_JOBS}") - endif() - elseif (PARALLEL_LINK_JOBS GREATER 2) - message(STATUS "ThinLTO provides its own parallel linking - limiting parallel link jobs to 2.") - set (PARALLEL_LINK_JOBS 2) - endif () +if (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" AND ENABLE_THINLTO AND PARALLEL_LINK_JOBS GREATER 2) + message(STATUS "ThinLTO provides its own parallel linking - limiting parallel link jobs to 2.") + set (PARALLEL_LINK_JOBS 2) endif() message(STATUS "Building sub-tree with ${PARALLEL_COMPILE_JOBS} compile jobs and ${PARALLEL_LINK_JOBS} linker jobs (system: ${NUMBER_OF_LOGICAL_CORES} cores, ${TOTAL_PHYSICAL_MEMORY} MB RAM, 'OFF' means the native core count).") From cb8d9a05643d3aac5f410c4eac53124224c63bc8 Mon Sep 17 00:00:00 2001 From: Blargian Date: Sat, 17 Aug 2024 20:13:35 +0200 Subject: [PATCH 126/180] fix typo --- docs/en/sql-reference/functions/type-conversion-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index cd6fd9ab839..a03394be226 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -4952,7 +4952,7 @@ toIntervalMonth(n) **Arguments** -- `n` — Number of m. Positive integer number. [Int*](../data-types/int-uint.md). +- `n` — Number of months. Positive integer number. [Int*](../data-types/int-uint.md). **Returned values** From c68b597eee752d5921d4469af01104a27681296a Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sat, 17 Aug 2024 20:03:18 +0000 Subject: [PATCH 127/180] fix test --- .../queries/0_stateless/03221_refreshable_matview_progress.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03221_refreshable_matview_progress.sql b/tests/queries/0_stateless/03221_refreshable_matview_progress.sql index 30228277bb5..1be276c485c 100644 --- a/tests/queries/0_stateless/03221_refreshable_matview_progress.sql +++ b/tests/queries/0_stateless/03221_refreshable_matview_progress.sql @@ -15,6 +15,6 @@ SELECT rand64() AS x; SELECT sleep(2); -SELECT read_rows, total_rows, progress FROM system.view_refreshes WHERE view = '03221_rmv'; +SELECT read_rows, total_rows, progress FROM system.view_refreshes WHERE database = currentDatabase() and view = '03221_rmv'; DROP TABLE 03221_rmv; From a36688b07387822cae18eca1fbc798a517119e02 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 18 Aug 2024 03:02:20 +0200 Subject: [PATCH 128/180] Fix error --- src/Parsers/ASTDataType.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Parsers/ASTDataType.cpp b/src/Parsers/ASTDataType.cpp index 21f56e5f7a2..4211347fb74 100644 --- a/src/Parsers/ASTDataType.cpp +++ b/src/Parsers/ASTDataType.cpp @@ -54,6 +54,7 @@ void ASTDataType::formatImpl(const FormatSettings & settings, FormatState & stat } else { + frame.expression_list_prepend_whitespace = false; arguments->formatImpl(settings, state, frame); } From 3c021e02b69f9237d9765d6295fd843ad2503398 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 18 Aug 2024 03:02:26 +0200 Subject: [PATCH 129/180] Fix tests --- .../0_stateless/01458_named_tuple_millin.reference | 8 ++++++-- .../01504_compression_multiple_streams.reference | 8 ++++---- ...01548_create_table_compound_column_format.reference | 3 ++- .../01881_aggregate_functions_versioning.reference | 2 +- .../02117_show_create_table_system.reference | 10 ++++++++-- .../02286_tuple_numeric_identifier.reference | 2 +- .../02907_backup_restore_flatten_nested.reference | 4 ++-- 7 files changed, 24 insertions(+), 13 deletions(-) diff --git a/tests/queries/0_stateless/01458_named_tuple_millin.reference b/tests/queries/0_stateless/01458_named_tuple_millin.reference index 954dfe36563..86561570985 100644 --- a/tests/queries/0_stateless/01458_named_tuple_millin.reference +++ b/tests/queries/0_stateless/01458_named_tuple_millin.reference @@ -1,12 +1,16 @@ CREATE TABLE default.tuple ( - `j` Tuple(a Int8, b String) + `j` Tuple( + a Int8, + b String) ) ENGINE = Memory j Tuple(\n a Int8,\n b String) CREATE TABLE default.tuple ( - `j` Tuple(a Int8, b String) + `j` Tuple( + a Int8, + b String) ) ENGINE = Memory j Tuple(\n a Int8,\n b String) diff --git a/tests/queries/0_stateless/01504_compression_multiple_streams.reference b/tests/queries/0_stateless/01504_compression_multiple_streams.reference index 4d3aba66526..14cdce72044 100644 --- a/tests/queries/0_stateless/01504_compression_multiple_streams.reference +++ b/tests/queries/0_stateless/01504_compression_multiple_streams.reference @@ -1,20 +1,20 @@ 1 1 [[1]] (1,[1]) 1 1 [[1]] (1,[1]) -CREATE TABLE default.columns_with_multiple_streams\n(\n `field0` Nullable(Int64) CODEC(Delta(2), LZ4),\n `field1` Nullable(UInt8) CODEC(Delta(8), LZ4),\n `field2` Array(Array(Int64)) CODEC(Delta(8), LZ4),\n `field3` Tuple(UInt32, Array(UInt64)) CODEC(T64, Default)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0, index_granularity = 8192 +CREATE TABLE default.columns_with_multiple_streams\n(\n `field0` Nullable(Int64) CODEC(Delta(2), LZ4),\n `field1` Nullable(UInt8) CODEC(Delta(8), LZ4),\n `field2` Array(Array(Int64)) CODEC(Delta(8), LZ4),\n `field3` Tuple(\n UInt32,\n Array(UInt64)) CODEC(T64, Default)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0, index_granularity = 8192 1 1 [[1]] (1,[1]) 2 2 [[2]] (2,[2]) -CREATE TABLE default.columns_with_multiple_streams\n(\n `field0` Nullable(Int64) CODEC(Delta(2), LZ4),\n `field1` Nullable(UInt8) CODEC(Delta(8), LZ4),\n `field2` Array(Array(Int64)) CODEC(Delta(8), LZ4),\n `field3` Tuple(UInt32, Array(UInt64)) CODEC(Delta, Default)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0, index_granularity = 8192 +CREATE TABLE default.columns_with_multiple_streams\n(\n `field0` Nullable(Int64) CODEC(Delta(2), LZ4),\n `field1` Nullable(UInt8) CODEC(Delta(8), LZ4),\n `field2` Array(Array(Int64)) CODEC(Delta(8), LZ4),\n `field3` Tuple(\n UInt32,\n Array(UInt64)) CODEC(Delta, Default)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS min_rows_for_wide_part = 0, min_bytes_for_wide_part = 0, index_granularity = 8192 1 1 [[1]] (1,[1]) 2 2 [[2]] (2,[2]) 3 3 [[3]] (3,[3]) 1 1 [[1]] (1,[1]) 1 1 [[1]] (1,[1]) -CREATE TABLE default.columns_with_multiple_streams_compact\n(\n `field0` Nullable(Int64) CODEC(Delta(2), LZ4),\n `field1` Nullable(UInt8) CODEC(Delta(8), LZ4),\n `field2` Array(Array(Int64)) CODEC(Delta(8), LZ4),\n `field3` Tuple(UInt32, Array(UInt64)) CODEC(Delta, Default)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS min_rows_for_wide_part = 100000, min_bytes_for_wide_part = 100000, index_granularity = 8192 +CREATE TABLE default.columns_with_multiple_streams_compact\n(\n `field0` Nullable(Int64) CODEC(Delta(2), LZ4),\n `field1` Nullable(UInt8) CODEC(Delta(8), LZ4),\n `field2` Array(Array(Int64)) CODEC(Delta(8), LZ4),\n `field3` Tuple(\n UInt32,\n Array(UInt64)) CODEC(Delta, Default)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS min_rows_for_wide_part = 100000, min_bytes_for_wide_part = 100000, index_granularity = 8192 1 1 [[1]] (1,[1]) 2 2 [[2]] (2,[2]) 1 1 [[1]] (1,[1]) 2 2 [[2]] (2,[2]) -CREATE TABLE default.columns_with_multiple_streams_compact\n(\n `field0` Nullable(Int64) CODEC(Delta(2), LZ4),\n `field1` Nullable(UInt8) CODEC(Delta(8), LZ4),\n `field2` Array(Array(Int64)) CODEC(Delta(8), LZ4),\n `field3` Tuple(UInt32, Array(UInt64)) CODEC(Delta, Default)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS min_rows_for_wide_part = 100000, min_bytes_for_wide_part = 100000, index_granularity = 8192 +CREATE TABLE default.columns_with_multiple_streams_compact\n(\n `field0` Nullable(Int64) CODEC(Delta(2), LZ4),\n `field1` Nullable(UInt8) CODEC(Delta(8), LZ4),\n `field2` Array(Array(Int64)) CODEC(Delta(8), LZ4),\n `field3` Tuple(\n UInt32,\n Array(UInt64)) CODEC(Delta, Default)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS min_rows_for_wide_part = 100000, min_bytes_for_wide_part = 100000, index_granularity = 8192 1 1 [[1]] (1,[1]) 2 2 [[2]] (2,[2]) 3 3 [[3]] (3,[3]) diff --git a/tests/queries/0_stateless/01548_create_table_compound_column_format.reference b/tests/queries/0_stateless/01548_create_table_compound_column_format.reference index 21e31e8f034..c23cc57548b 100644 --- a/tests/queries/0_stateless/01548_create_table_compound_column_format.reference +++ b/tests/queries/0_stateless/01548_create_table_compound_column_format.reference @@ -7,6 +7,7 @@ ENGINE = TinyLog CREATE TABLE test ( `a` Int64, - `b` Tuple(a Int64) + `b` Tuple( + a Int64) ) ENGINE = TinyLog diff --git a/tests/queries/0_stateless/01881_aggregate_functions_versioning.reference b/tests/queries/0_stateless/01881_aggregate_functions_versioning.reference index c30c4ca7e74..e15f312c2c8 100644 --- a/tests/queries/0_stateless/01881_aggregate_functions_versioning.reference +++ b/tests/queries/0_stateless/01881_aggregate_functions_versioning.reference @@ -1 +1 @@ -CREATE TABLE default.test_table\n(\n `col1` DateTime,\n `col2` Int64,\n `col3` AggregateFunction(1, sumMap, Tuple(Array(UInt8), Array(UInt8)))\n)\nENGINE = AggregatingMergeTree\nORDER BY (col1, col2)\nSETTINGS index_granularity = 8192 +CREATE TABLE default.test_table\n(\n `col1` DateTime,\n `col2` Int64,\n `col3` AggregateFunction(1, sumMap, Tuple(\n Array(UInt8),\n Array(UInt8)))\n)\nENGINE = AggregatingMergeTree\nORDER BY (col1, col2)\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index 32e8b2f4312..638a46a142f 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -510,9 +510,15 @@ CREATE TABLE system.parts `rows_where_ttl_info.max` Array(DateTime), `projections` Array(String), `visible` UInt8, - `creation_tid` Tuple(UInt64, UInt64, UUID), + `creation_tid` Tuple( + UInt64, + UInt64, + UUID), `removal_tid_lock` UInt64, - `removal_tid` Tuple(UInt64, UInt64, UUID), + `removal_tid` Tuple( + UInt64, + UInt64, + UUID), `creation_csn` UInt64, `removal_csn` UInt64, `has_lightweight_delete` UInt8, diff --git a/tests/queries/0_stateless/02286_tuple_numeric_identifier.reference b/tests/queries/0_stateless/02286_tuple_numeric_identifier.reference index 21348493d1d..916cdaf83cd 100644 --- a/tests/queries/0_stateless/02286_tuple_numeric_identifier.reference +++ b/tests/queries/0_stateless/02286_tuple_numeric_identifier.reference @@ -1,4 +1,4 @@ -CREATE TABLE default.t_tuple_numeric\n(\n `t` Tuple(`1` Tuple(`2` Int32, `3` Int32), `4` Int32)\n)\nENGINE = Memory +CREATE TABLE default.t_tuple_numeric\n(\n `t` Tuple(\n `1` Tuple(\n `2` Int32,\n `3` Int32),\n `4` Int32)\n)\nENGINE = Memory {"t":{"1":{"2":2,"3":3},"4":4}} 2 3 4 2 3 4 diff --git a/tests/queries/0_stateless/02907_backup_restore_flatten_nested.reference b/tests/queries/0_stateless/02907_backup_restore_flatten_nested.reference index aa8f22f590a..0db19f0591a 100644 --- a/tests/queries/0_stateless/02907_backup_restore_flatten_nested.reference +++ b/tests/queries/0_stateless/02907_backup_restore_flatten_nested.reference @@ -1,8 +1,8 @@ BACKUP_CREATED -CREATE TABLE default.test\n(\n `test` Array(Tuple(foo String, bar Float64))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.test\n(\n `test` Array(Tuple(\n foo String,\n bar Float64))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 BACKUP_CREATED CREATE TABLE default.test2\n(\n `test` Nested(foo String, bar Float64)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 RESTORED -CREATE TABLE default.test\n(\n `test` Array(Tuple(foo String, bar Float64))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.test\n(\n `test` Array(Tuple(\n foo String,\n bar Float64))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 RESTORED CREATE TABLE default.test2\n(\n `test` Nested(foo String, bar Float64)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 From 60dd7e962a19e92cef4e3ab40d2607b3f5a59e90 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 18 Aug 2024 05:56:27 +0200 Subject: [PATCH 130/180] Fix tests --- programs/format/Format.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/programs/format/Format.cpp b/programs/format/Format.cpp index 4af77533c53..f07387bd395 100644 --- a/programs/format/Format.cpp +++ b/programs/format/Format.cpp @@ -264,10 +264,10 @@ int mainEntryClickHouseFormat(int argc, char ** argv) if (!backslash) { WriteBufferFromOwnString str_buf; - oneline = oneline || approx_query_length < max_line_length; - IAST::FormatSettings settings(str_buf, oneline, hilite); + bool oneline_current_query = oneline || approx_query_length < max_line_length; + IAST::FormatSettings settings(str_buf, oneline_current_query, hilite); settings.show_secrets = true; - settings.print_pretty_type_names = !oneline; + settings.print_pretty_type_names = !oneline_current_query; res->format(settings); if (insert_query_payload) @@ -311,7 +311,11 @@ int mainEntryClickHouseFormat(int argc, char ** argv) else { WriteBufferFromOwnString str_buf; - formatAST(*res, str_buf, hilite, oneline); + bool oneline_current_query = oneline || approx_query_length < max_line_length; + IAST::FormatSettings settings(str_buf, oneline_current_query, hilite); + settings.show_secrets = true; + settings.print_pretty_type_names = !oneline_current_query; + res->format(settings); auto res_string = str_buf.str(); WriteBufferFromOStream res_cout(std::cout, 4096); From 1691e4c4977dbaf44bedf61cdf248e7a1997d407 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 18 Aug 2024 06:14:21 +0200 Subject: [PATCH 131/180] Fix test --- tests/queries/0_stateless/01825_new_type_json_ghdata.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01825_new_type_json_ghdata.sh b/tests/queries/0_stateless/01825_new_type_json_ghdata.sh index ee702300094..fbd7d897fb8 100755 --- a/tests/queries/0_stateless/01825_new_type_json_ghdata.sh +++ b/tests/queries/0_stateless/01825_new_type_json_ghdata.sh @@ -8,7 +8,8 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS ghdata" ${CLICKHOUSE_CLIENT} -q "CREATE TABLE ghdata (data JSON) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'" --allow_experimental_json_type 1 -cat $CUR_DIR/data_json/ghdata_sample.json | ${CLICKHOUSE_CLIENT} --max_block_size 8192 --max_insert_block_size 8192 --max_insert_threads 1 --min_insert_block_size_bytes 0 --min_insert_block_size_rows 0 -q "INSERT INTO ghdata FORMAT JSONAsObject" +cat $CUR_DIR/data_json/ghdata_sample.json | ${CLICKHOUSE_CLIENT} \ + --max_memory_usage 10G --query "INSERT INTO ghdata FORMAT JSONAsObject" ${CLICKHOUSE_CLIENT} -q "SELECT count() FROM ghdata WHERE NOT ignore(*)" @@ -16,7 +17,7 @@ ${CLICKHOUSE_CLIENT} -q \ "SELECT data.repo.name, count() AS stars FROM ghdata \ WHERE data.type = 'WatchEvent' GROUP BY data.repo.name ORDER BY stars DESC, data.repo.name LIMIT 5" -${CLICKHOUSE_CLIENT} --allow_experimental_analyzer=1 -q \ +${CLICKHOUSE_CLIENT} --enable_analyzer=1 -q \ "SELECT data.payload.commits[].author.name AS name, count() AS c FROM ghdata \ ARRAY JOIN data.payload.commits[].author.name \ GROUP BY name ORDER BY c DESC, name LIMIT 5" From 7f2c61799dc984add3056f9c77c4e64476ad917b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 18 Aug 2024 06:14:59 +0200 Subject: [PATCH 132/180] Fix test --- .../0_stateless/01825_new_type_json_ghdata_insert_select.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01825_new_type_json_ghdata_insert_select.sh b/tests/queries/0_stateless/01825_new_type_json_ghdata_insert_select.sh index ef87034ff89..2afec5ba7fe 100755 --- a/tests/queries/0_stateless/01825_new_type_json_ghdata_insert_select.sh +++ b/tests/queries/0_stateless/01825_new_type_json_ghdata_insert_select.sh @@ -13,10 +13,10 @@ ${CLICKHOUSE_CLIENT} -q "CREATE TABLE ghdata_2 (data JSON) ENGINE = MergeTree OR ${CLICKHOUSE_CLIENT} -q "CREATE TABLE ghdata_2_string (data String) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'" ${CLICKHOUSE_CLIENT} -q "CREATE TABLE ghdata_2_from_string (data JSON) ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'" --allow_experimental_json_type 1 -cat $CUR_DIR/data_json/ghdata_sample.json | ${CLICKHOUSE_CLIENT} -q "INSERT INTO ghdata_2 FORMAT JSONAsObject" +cat $CUR_DIR/data_json/ghdata_sample.json | ${CLICKHOUSE_CLIENT} --max_memory_usage 10G -q "INSERT INTO ghdata_2 FORMAT JSONAsObject" cat $CUR_DIR/data_json/ghdata_sample.json | ${CLICKHOUSE_CLIENT} -q "INSERT INTO ghdata_2_string FORMAT JSONAsString" -${CLICKHOUSE_CLIENT} -q "INSERT INTO ghdata_2_from_string SELECT data FROM ghdata_2_string" +${CLICKHOUSE_CLIENT} --max_memory_usage 10G -q "INSERT INTO ghdata_2_from_string SELECT data FROM ghdata_2_string" ${CLICKHOUSE_CLIENT} -q "SELECT \ (SELECT mapSort(groupUniqArrayMap(JSONAllPathsWithTypes(data))), sum(cityHash64(toString(data))) FROM ghdata_2_from_string) = \ From d34d41d1e240f0df6256a827661174a30011f204 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sun, 18 Aug 2024 04:34:38 +0000 Subject: [PATCH 133/180] fix test --- .../queries/0_stateless/03221_refreshable_matview_progress.sql | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/queries/0_stateless/03221_refreshable_matview_progress.sql b/tests/queries/0_stateless/03221_refreshable_matview_progress.sql index 1be276c485c..de8de41fd04 100644 --- a/tests/queries/0_stateless/03221_refreshable_matview_progress.sql +++ b/tests/queries/0_stateless/03221_refreshable_matview_progress.sql @@ -16,5 +16,3 @@ SELECT rand64() AS x; SELECT sleep(2); SELECT read_rows, total_rows, progress FROM system.view_refreshes WHERE database = currentDatabase() and view = '03221_rmv'; - -DROP TABLE 03221_rmv; From 03ab872f5c628613e0e187d4e1ad3ca9b9148bf6 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 18 Aug 2024 07:34:44 +0200 Subject: [PATCH 134/180] Fix error --- src/IO/S3/PocoHTTPClient.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/IO/S3/PocoHTTPClient.h b/src/IO/S3/PocoHTTPClient.h index 3b7ec4d1d9c..eb65460ce13 100644 --- a/src/IO/S3/PocoHTTPClient.h +++ b/src/IO/S3/PocoHTTPClient.h @@ -20,6 +20,7 @@ #include #include + namespace Aws::Http::Standard { class StandardHttpResponse; @@ -27,15 +28,17 @@ class StandardHttpResponse; namespace DB { - class Context; } + namespace DB::S3 { + class ClientFactory; class PocoHTTPClient; + struct PocoHTTPClientConfiguration : public Aws::Client::ClientConfiguration { std::function per_request_configuration; @@ -76,6 +79,7 @@ private: friend ClientFactory; }; + class PocoHTTPResponse : public Aws::Http::Standard::StandardHttpResponse { public: @@ -115,6 +119,7 @@ private: Aws::Utils::Stream::ResponseStream body_stream; }; + class PocoHTTPClient : public Aws::Http::HttpClient { public: @@ -170,10 +175,10 @@ protected: std::function error_report; ConnectionTimeouts timeouts; const RemoteHostFilter & remote_host_filter; - unsigned int s3_max_redirects; + unsigned int s3_max_redirects = 0; bool s3_use_adaptive_timeouts = true; bool enable_s3_requests_logging = false; - bool for_disk_s3; + bool for_disk_s3 = false; /// Limits get request per second rate for GET, SELECT and all other requests, excluding throttled by put throttler /// (i.e. throttles GetObject, HeadObject) From 4e91c663a62529d27ddcf6d364338f001e1706eb Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 18 Aug 2024 08:28:16 +0200 Subject: [PATCH 135/180] Fix error --- src/IO/S3/PocoHTTPClient.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp index dc7dcdc6793..3e060e21c51 100644 --- a/src/IO/S3/PocoHTTPClient.cpp +++ b/src/IO/S3/PocoHTTPClient.cpp @@ -397,8 +397,11 @@ void PocoHTTPClient::makeRequestInternalImpl( try { - const auto proxy_configuration = per_request_configuration(); - for (unsigned int attempt = 0; attempt <= s3_max_redirects; ++attempt) + ProxyConfiguration proxy_configuration; + if (per_request_configuration) + proxy_configuration = per_request_configuration(); + + for (size_t attempt = 0; attempt <= s3_max_redirects; ++attempt) { Poco::URI target_uri(uri); @@ -516,7 +519,6 @@ void PocoHTTPClient::makeRequestInternalImpl( LOG_TEST(log, "Redirecting request to new location: {}", location); addMetric(request, S3MetricType::Redirects); - continue; } @@ -564,9 +566,9 @@ void PocoHTTPClient::makeRequestInternalImpl( } else { - if (status_code == 429 || status_code == 503) - { // API throttling + { + /// API throttling addMetric(request, S3MetricType::Throttling); } else if (status_code >= 300) From 7071942858e44053b92b8386e68251be7718e3b5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 18 Aug 2024 09:05:45 +0200 Subject: [PATCH 136/180] Miscellanous changes from #66999 --- programs/keeper/Keeper.cpp | 4 +++- src/Daemon/BaseDaemon.cpp | 4 +--- src/Daemon/BaseDaemon.h | 1 - 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index a447a9e50f6..ced661d9772 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -66,6 +66,8 @@ /// A minimal file used when the keeper is run without installation INCBIN(keeper_resource_embedded_xml, SOURCE_DIR "/programs/keeper/keeper_embedded.xml"); +extern const char * GIT_HASH; + int mainEntryClickHouseKeeper(int argc, char ** argv) { DB::Keeper app; @@ -675,7 +677,7 @@ void Keeper::logRevision() const "Starting ClickHouse Keeper {} (revision: {}, git hash: {}, build id: {}), PID {}", VERSION_STRING, ClickHouseRevision::getVersionRevision(), - git_hash.empty() ? "" : git_hash, + GIT_HASH, build_id.empty() ? "" : build_id, getpid()); } diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index f75699881bd..c42bf7641d2 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -452,8 +452,6 @@ void BaseDaemon::initializeTerminationAndSignalProcessing() build_id = SymbolIndex::instance().getBuildIDHex(); #endif - git_hash = GIT_HASH; - #if defined(OS_LINUX) std::string executable_path = getExecutablePath(); @@ -466,7 +464,7 @@ void BaseDaemon::logRevision() const { logger().information("Starting " + std::string{VERSION_FULL} + " (revision: " + std::to_string(ClickHouseRevision::getVersionRevision()) - + ", git hash: " + (git_hash.empty() ? "" : git_hash) + + ", git hash: " + std::string(GIT_HASH) + ", build id: " + (build_id.empty() ? "" : build_id) + ")" + ", PID " + std::to_string(getpid())); } diff --git a/src/Daemon/BaseDaemon.h b/src/Daemon/BaseDaemon.h index b15aa74fcf3..a6efa94a567 100644 --- a/src/Daemon/BaseDaemon.h +++ b/src/Daemon/BaseDaemon.h @@ -165,7 +165,6 @@ protected: Poco::Util::AbstractConfiguration * last_configuration = nullptr; String build_id; - String git_hash; String stored_binary_hash; bool should_setup_watchdog = false; From 2a48aaad561f52539edbede94015c35e264bd344 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 18 Aug 2024 09:12:49 +0200 Subject: [PATCH 137/180] Fix build --- src/Interpreters/executeQuery.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index fe87eed5570..decc16a3704 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -786,7 +786,7 @@ static std::tuple executeQueryImpl( /// Verify that AST formatting is consistent: /// If you format AST, parse it back, and format it again, you get the same string. - String formatted1 = ast->formatWithPossiblyHidingSensitiveData(0, true, true); + String formatted1 = ast->formatWithPossiblyHidingSensitiveData(0, true, true, false); /// The query can become more verbose after formatting, so: size_t new_max_query_size = max_query_size > 0 ? (1000 + 2 * max_query_size) : 0; @@ -811,7 +811,7 @@ static std::tuple executeQueryImpl( chassert(ast2); - String formatted2 = ast2->formatWithPossiblyHidingSensitiveData(0, true, true); + String formatted2 = ast2->formatWithPossiblyHidingSensitiveData(0, true, true, false); if (formatted1 != formatted2) throw Exception(ErrorCodes::LOGICAL_ERROR, From 4f7e3e8374acd98496092e8f8a219af6755a2f70 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 18 Aug 2024 09:38:00 +0200 Subject: [PATCH 138/180] Fix test 01017_uniqCombined_memory_usage --- .../0_stateless/01017_uniqCombined_memory_usage.sql | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/01017_uniqCombined_memory_usage.sql b/tests/queries/0_stateless/01017_uniqCombined_memory_usage.sql index c13a0859183..eca370d94af 100644 --- a/tests/queries/0_stateless/01017_uniqCombined_memory_usage.sql +++ b/tests/queries/0_stateless/01017_uniqCombined_memory_usage.sql @@ -7,7 +7,8 @@ -- sizeof(HLL) is (2^K * 6 / 8) -- hence max_memory_usage for 100 rows = (96<<10)*100 = 9830400 -SET use_uncompressed_cache = 0; +SET use_uncompressed_cache = 0; +SET memory_profiler_step = 1; -- HashTable for UInt32 (used until (1<<13) elements), hence 8192 elements SELECT 'UInt32'; @@ -31,14 +32,14 @@ SELECT 'K=16'; SELECT 'UInt32'; SET max_memory_usage = 2000000; SELECT sum(u) FROM (SELECT intDiv(number, 4096) AS k, uniqCombined(16)(number % 4096) u FROM numbers(4096 * 100) GROUP BY k); -- { serverError MEMORY_LIMIT_EXCEEDED } -SET max_memory_usage = 4915200; +SET max_memory_usage = 5230000; SELECT sum(u) FROM (SELECT intDiv(number, 4096) AS k, uniqCombined(16)(number % 4096) u FROM numbers(4096 * 100) GROUP BY k); -- HashTable for UInt64 (used until (1<<11) elements), hence 2048 elements SELECT 'UInt64'; SET max_memory_usage = 2000000; SELECT sum(u) FROM (SELECT intDiv(number, 2048) AS k, uniqCombined(16)(reinterpretAsString(number % 2048)) u FROM numbers(2048 * 100) GROUP BY k); -- { serverError MEMORY_LIMIT_EXCEEDED } -SET max_memory_usage = 4915200; +SET max_memory_usage = 5900000; SELECT sum(u) FROM (SELECT intDiv(number, 2048) AS k, uniqCombined(16)(reinterpretAsString(number % 2048)) u FROM numbers(2048 * 100) GROUP BY k); SELECT 'K=18'; From ec8554c85aeae5dcc8367ce09d093c5526ef1d47 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 18 Aug 2024 09:41:29 +0200 Subject: [PATCH 139/180] Fix build --- src/Common/SignalHandlers.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/Common/SignalHandlers.cpp b/src/Common/SignalHandlers.cpp index c4358da2453..6ac6cbcae29 100644 --- a/src/Common/SignalHandlers.cpp +++ b/src/Common/SignalHandlers.cpp @@ -18,13 +18,17 @@ namespace DB { + namespace ErrorCodes { extern const int CANNOT_SET_SIGNAL_HANDLER; extern const int CANNOT_SEND_SIGNAL; } + } +extern const char * GIT_HASH; + using namespace DB; @@ -334,7 +338,7 @@ void SignalListener::onTerminate(std::string_view message, UInt32 thread_num) co size_t pos = message.find('\n'); LOG_FATAL(log, "(version {}{}, build id: {}, git hash: {}) (from thread {}) {}", - VERSION_STRING, VERSION_OFFICIAL, daemon ? daemon->build_id : "", daemon ? daemon->git_hash : "", thread_num, message.substr(0, pos)); + VERSION_STRING, VERSION_OFFICIAL, daemon ? daemon->build_id : "", GIT_HASH, thread_num, message.substr(0, pos)); /// Print trace from std::terminate exception line-by-line to make it easy for grep. while (pos != std::string_view::npos) @@ -368,7 +372,7 @@ try LOG_FATAL(log, "########## Short fault info ############"); LOG_FATAL(log, "(version {}{}, build id: {}, git hash: {}) (from thread {}) Received signal {}", - VERSION_STRING, VERSION_OFFICIAL, daemon ? daemon->build_id : "", daemon ? daemon->git_hash : "", + VERSION_STRING, VERSION_OFFICIAL, daemon ? daemon->build_id : "", GIT_HASH, thread_num, sig); std::string signal_description = "Unknown signal"; @@ -434,13 +438,13 @@ try if (query_id.empty()) { LOG_FATAL(log, "(version {}{}, build id: {}, git hash: {}) (from thread {}) (no query) Received signal {} ({})", - VERSION_STRING, VERSION_OFFICIAL, daemon ? daemon->build_id : "", daemon ? daemon->git_hash : "", + VERSION_STRING, VERSION_OFFICIAL, daemon ? daemon->build_id : "", GIT_HASH, thread_num, signal_description, sig); } else { LOG_FATAL(log, "(version {}{}, build id: {}, git hash: {}) (from thread {}) (query_id: {}) (query: {}) Received signal {} ({})", - VERSION_STRING, VERSION_OFFICIAL, daemon ? daemon->build_id : "", daemon ? daemon->git_hash : "", + VERSION_STRING, VERSION_OFFICIAL, daemon ? daemon->build_id : "", GIT_HASH, thread_num, query_id, query, signal_description, sig); } From 01841d00eab668ecfa96a702e7f3cda68fce52e4 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sun, 18 Aug 2024 12:18:24 +0000 Subject: [PATCH 140/180] fix test --- .../0_stateless/03221_refreshable_matview_progress.sql | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03221_refreshable_matview_progress.sql b/tests/queries/0_stateless/03221_refreshable_matview_progress.sql index de8de41fd04..ecb385c9bfa 100644 --- a/tests/queries/0_stateless/03221_refreshable_matview_progress.sql +++ b/tests/queries/0_stateless/03221_refreshable_matview_progress.sql @@ -3,7 +3,7 @@ set allow_experimental_refreshable_materialized_view=1; CREATE MATERIALIZED VIEW 03221_rmv -REFRESH AFTER 1 SECOND +REFRESH AFTER 10 SECOND ( x UInt64 ) @@ -16,3 +16,5 @@ SELECT rand64() AS x; SELECT sleep(2); SELECT read_rows, total_rows, progress FROM system.view_refreshes WHERE database = currentDatabase() and view = '03221_rmv'; + +DROP TABLE 03221_rmv; From f5308635d193859cdb19a71040006278a21bdc51 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sun, 18 Aug 2024 15:25:07 +0200 Subject: [PATCH 141/180] Revert "Improve compatibility of `upper/lowerUTF8` with Spark" --- .gitmodules | 7 +- contrib/icu | 2 +- src/Common/examples/CMakeLists.txt | 5 - src/Common/examples/utf8_upper_lower.cpp | 27 -- src/Functions/LowerUpperImpl.h | 1 + src/Functions/LowerUpperUTF8Impl.h | 283 +++++++++++++++--- src/Functions/initcapUTF8.cpp | 3 +- src/Functions/lowerUTF8.cpp | 25 +- src/Functions/upperUTF8.cpp | 24 +- .../00170_lower_upper_utf8.reference | 4 - .../0_stateless/00170_lower_upper_utf8.sql | 11 - .../00233_position_function_family.sql | 3 - .../0_stateless/00761_lower_utf8_bug.sql | 3 - .../0_stateless/01278_random_string_utf8.sql | 3 - .../0_stateless/01431_utf8_ubsan.reference | 4 +- .../queries/0_stateless/01431_utf8_ubsan.sql | 3 - .../0_stateless/01590_countSubstrings.sql | 3 - ...71_lower_upper_utf8_row_overlaps.reference | 4 +- .../02071_lower_upper_utf8_row_overlaps.sql | 3 - ...new_functions_must_be_documented.reference | 2 + .../02514_if_with_lazy_low_cardinality.sql | 3 - .../0_stateless/02807_lower_utf8_msan.sql | 3 - tests/queries/0_stateless/03015_peder1001.sql | 3 - 23 files changed, 265 insertions(+), 164 deletions(-) delete mode 100644 src/Common/examples/utf8_upper_lower.cpp diff --git a/.gitmodules b/.gitmodules index f18844e5eb4..cdee6a43ad8 100644 --- a/.gitmodules +++ b/.gitmodules @@ -106,6 +106,9 @@ [submodule "contrib/icudata"] path = contrib/icudata url = https://github.com/ClickHouse/icudata +[submodule "contrib/icu"] + path = contrib/icu + url = https://github.com/unicode-org/icu [submodule "contrib/flatbuffers"] path = contrib/flatbuffers url = https://github.com/ClickHouse/flatbuffers @@ -366,7 +369,3 @@ [submodule "contrib/numactl"] path = contrib/numactl url = https://github.com/ClickHouse/numactl.git -[submodule "contrib/icu"] - path = contrib/icu - url = https://github.com/ClickHouse/icu - branch = ClickHouse/release-75-1 diff --git a/contrib/icu b/contrib/icu index 4216173eeeb..7750081bda4 160000 --- a/contrib/icu +++ b/contrib/icu @@ -1 +1 @@ -Subproject commit 4216173eeeb39c1d4caaa54a68860e800412d273 +Subproject commit 7750081bda4b3bc1768ae03849ec70f67ea10625 diff --git a/src/Common/examples/CMakeLists.txt b/src/Common/examples/CMakeLists.txt index 8383e80d09d..69580d4ad0e 100644 --- a/src/Common/examples/CMakeLists.txt +++ b/src/Common/examples/CMakeLists.txt @@ -92,8 +92,3 @@ endif() clickhouse_add_executable (check_pointer_valid check_pointer_valid.cpp) target_link_libraries (check_pointer_valid PRIVATE clickhouse_common_io clickhouse_common_config) - -if (TARGET ch_contrib::icu) - clickhouse_add_executable (utf8_upper_lower utf8_upper_lower.cpp) - target_link_libraries (utf8_upper_lower PRIVATE ch_contrib::icu) -endif () diff --git a/src/Common/examples/utf8_upper_lower.cpp b/src/Common/examples/utf8_upper_lower.cpp deleted file mode 100644 index 826e1763105..00000000000 --- a/src/Common/examples/utf8_upper_lower.cpp +++ /dev/null @@ -1,27 +0,0 @@ -#include -#include - -std::string utf8_to_lower(const std::string & input) -{ - icu::UnicodeString unicodeInput(input.c_str(), "UTF-8"); - unicodeInput.toLower(); - std::string output; - unicodeInput.toUTF8String(output); - return output; -} - -std::string utf8_to_upper(const std::string & input) -{ - icu::UnicodeString unicodeInput(input.c_str(), "UTF-8"); - unicodeInput.toUpper(); - std::string output; - unicodeInput.toUTF8String(output); - return output; -} - -int main() -{ - std::string input = "ır"; - std::cout << "upper:" << utf8_to_upper(input) << std::endl; - return 0; -} diff --git a/src/Functions/LowerUpperImpl.h b/src/Functions/LowerUpperImpl.h index a52703d10c8..d463ef96e16 100644 --- a/src/Functions/LowerUpperImpl.h +++ b/src/Functions/LowerUpperImpl.h @@ -1,6 +1,7 @@ #pragma once #include + namespace DB { diff --git a/src/Functions/LowerUpperUTF8Impl.h b/src/Functions/LowerUpperUTF8Impl.h index 5da085f48e5..eedabca5b22 100644 --- a/src/Functions/LowerUpperUTF8Impl.h +++ b/src/Functions/LowerUpperUTF8Impl.h @@ -1,14 +1,15 @@ #pragma once - -#include "config.h" - -#if USE_ICU - #include #include -#include -#include +#include +#include #include +#include + +#ifdef __SSE2__ +#include +#endif + namespace DB { @@ -18,7 +19,71 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } -template +/// xor or do nothing +template +UInt8 xor_or_identity(const UInt8 c, const int mask) +{ + return c ^ mask; +} + +template <> +inline UInt8 xor_or_identity(const UInt8 c, const int) +{ + return c; +} + +/// It is caller's responsibility to ensure the presence of a valid cyrillic sequence in array +template +inline void UTF8CyrillicToCase(const UInt8 *& src, UInt8 *& dst) +{ + if (src[0] == 0xD0u && (src[1] >= 0x80u && src[1] <= 0x8Fu)) + { + /// ЀЁЂЃЄЅІЇЈЉЊЋЌЍЎЏ + *dst++ = xor_or_identity(*src++, 0x1); + *dst++ = xor_or_identity(*src++, 0x10); + } + else if (src[0] == 0xD1u && (src[1] >= 0x90u && src[1] <= 0x9Fu)) + { + /// ѐёђѓєѕіїјљњћќѝўџ + *dst++ = xor_or_identity(*src++, 0x1); + *dst++ = xor_or_identity(*src++, 0x10); + } + else if (src[0] == 0xD0u && (src[1] >= 0x90u && src[1] <= 0x9Fu)) + { + /// А-П + *dst++ = *src++; + *dst++ = xor_or_identity(*src++, 0x20); + } + else if (src[0] == 0xD0u && (src[1] >= 0xB0u && src[1] <= 0xBFu)) + { + /// а-п + *dst++ = *src++; + *dst++ = xor_or_identity(*src++, 0x20); + } + else if (src[0] == 0xD0u && (src[1] >= 0xA0u && src[1] <= 0xAFu)) + { + /// Р-Я + *dst++ = xor_or_identity(*src++, 0x1); + *dst++ = xor_or_identity(*src++, 0x20); + } + else if (src[0] == 0xD1u && (src[1] >= 0x80u && src[1] <= 0x8Fu)) + { + /// р-я + *dst++ = xor_or_identity(*src++, 0x1); + *dst++ = xor_or_identity(*src++, 0x20); + } +} + + +/** If the string contains UTF-8 encoded text, convert it to the lower (upper) case. + * Note: It is assumed that after the character is converted to another case, + * the length of its multibyte sequence in UTF-8 does not change. + * Otherwise, the behavior is undefined. + */ +template struct LowerUpperUTF8Impl { static void vector( @@ -38,46 +103,180 @@ struct LowerUpperUTF8Impl return; } - res_data.resize(data.size()); - res_offsets.resize_exact(offsets.size()); - - String output; - size_t curr_offset = 0; - for (size_t i = 0; i < offsets.size(); ++i) - { - const auto * data_start = reinterpret_cast(&data[offsets[i - 1]]); - size_t size = offsets[i] - offsets[i - 1]; - - icu::UnicodeString input(data_start, static_cast(size), "UTF-8"); - if constexpr (upper) - input.toUpper(); - else - input.toLower(); - - output.clear(); - input.toUTF8String(output); - - /// For valid UTF-8 input strings, ICU sometimes produces output with extra '\0's at the end. Only the data before the first - /// '\0' is valid. It the input is not valid UTF-8, then the behavior of lower/upperUTF8 is undefined by definition. In this - /// case, the behavior is also reasonable. - const char * res_end = find_last_not_symbols_or_null<'\0'>(output.data(), output.data() + output.size()); - size_t valid_size = res_end ? res_end - output.data() + 1 : 0; - - res_data.resize(curr_offset + valid_size + 1); - memcpy(&res_data[curr_offset], output.data(), valid_size); - res_data[curr_offset + valid_size] = 0; - - curr_offset += valid_size + 1; - res_offsets[i] = curr_offset; - } + res_data.resize_exact(data.size()); + res_offsets.assign(offsets); + array(data.data(), data.data() + data.size(), offsets, res_data.data()); } static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &, size_t) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "Functions lowerUTF8 and upperUTF8 cannot work with FixedString argument"); } + + /** Converts a single code point starting at `src` to desired case, storing result starting at `dst`. + * `src` and `dst` are incremented by corresponding sequence lengths. */ + static bool toCase(const UInt8 *& src, const UInt8 * src_end, UInt8 *& dst, bool partial) + { + if (src[0] <= ascii_upper_bound) + { + if (*src >= not_case_lower_bound && *src <= not_case_upper_bound) + *dst++ = *src++ ^ flip_case_mask; + else + *dst++ = *src++; + } + else if (src + 1 < src_end + && ((src[0] == 0xD0u && (src[1] >= 0x80u && src[1] <= 0xBFu)) || (src[0] == 0xD1u && (src[1] >= 0x80u && src[1] <= 0x9Fu)))) + { + cyrillic_to_case(src, dst); + } + else if (src + 1 < src_end && src[0] == 0xC2u) + { + /// Punctuation U+0080 - U+00BF, UTF-8: C2 80 - C2 BF + *dst++ = *src++; + *dst++ = *src++; + } + else if (src + 2 < src_end && src[0] == 0xE2u) + { + /// Characters U+2000 - U+2FFF, UTF-8: E2 80 80 - E2 BF BF + *dst++ = *src++; + *dst++ = *src++; + *dst++ = *src++; + } + else + { + size_t src_sequence_length = UTF8::seqLength(*src); + /// In case partial buffer was passed (due to SSE optimization) + /// we cannot convert it with current src_end, but we may have more + /// bytes to convert and eventually got correct symbol. + if (partial && src_sequence_length > static_cast(src_end - src)) + return false; + + auto src_code_point = UTF8::convertUTF8ToCodePoint(src, src_end - src); + if (src_code_point) + { + int dst_code_point = to_case(*src_code_point); + if (dst_code_point > 0) + { + size_t dst_sequence_length = UTF8::convertCodePointToUTF8(dst_code_point, dst, src_end - src); + assert(dst_sequence_length <= 4); + + /// We don't support cases when lowercase and uppercase characters occupy different number of bytes in UTF-8. + /// As an example, this happens for ß and ẞ. + if (dst_sequence_length == src_sequence_length) + { + src += dst_sequence_length; + dst += dst_sequence_length; + return true; + } + } + } + + *dst = *src; + ++dst; + ++src; + } + + return true; + } + +private: + static constexpr auto ascii_upper_bound = '\x7f'; + static constexpr auto flip_case_mask = 'A' ^ 'a'; + + static void array(const UInt8 * src, const UInt8 * src_end, const ColumnString::Offsets & offsets, UInt8 * dst) + { + const auto * offset_it = offsets.begin(); + const UInt8 * begin = src; + +#ifdef __SSE2__ + static constexpr auto bytes_sse = sizeof(__m128i); + + /// If we are before this position, we can still read at least bytes_sse. + const auto * src_end_sse = src_end - bytes_sse + 1; + + /// SSE2 packed comparison operate on signed types, hence compare (c < 0) instead of (c > 0x7f) + const auto v_zero = _mm_setzero_si128(); + const auto v_not_case_lower_bound = _mm_set1_epi8(not_case_lower_bound - 1); + const auto v_not_case_upper_bound = _mm_set1_epi8(not_case_upper_bound + 1); + const auto v_flip_case_mask = _mm_set1_epi8(flip_case_mask); + + while (src < src_end_sse) + { + const auto chars = _mm_loadu_si128(reinterpret_cast(src)); + + /// check for ASCII + const auto is_not_ascii = _mm_cmplt_epi8(chars, v_zero); + const auto mask_is_not_ascii = _mm_movemask_epi8(is_not_ascii); + + /// ASCII + if (mask_is_not_ascii == 0) + { + const auto is_not_case + = _mm_and_si128(_mm_cmpgt_epi8(chars, v_not_case_lower_bound), _mm_cmplt_epi8(chars, v_not_case_upper_bound)); + const auto mask_is_not_case = _mm_movemask_epi8(is_not_case); + + /// everything in correct case ASCII + if (mask_is_not_case == 0) + _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), chars); + else + { + /// ASCII in mixed case + /// keep `flip_case_mask` only where necessary, zero out elsewhere + const auto xor_mask = _mm_and_si128(v_flip_case_mask, is_not_case); + + /// flip case by applying calculated mask + const auto cased_chars = _mm_xor_si128(chars, xor_mask); + + /// store result back to destination + _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), cased_chars); + } + + src += bytes_sse; + dst += bytes_sse; + } + else + { + /// UTF-8 + + /// Find the offset of the next string after src + size_t offset_from_begin = src - begin; + while (offset_from_begin >= *offset_it) + ++offset_it; + + /// Do not allow one row influence another (since row may have invalid sequence, and break the next) + const UInt8 * row_end = begin + *offset_it; + chassert(row_end >= src); + const UInt8 * expected_end = std::min(src + bytes_sse, row_end); + + while (src < expected_end) + { + if (!toCase(src, expected_end, dst, /* partial= */ true)) + { + /// Fallback to handling byte by byte. + src_end_sse = src; + break; + } + } + } + } + + /// Find the offset of the next string after src + size_t offset_from_begin = src - begin; + while (offset_it != offsets.end() && offset_from_begin >= *offset_it) + ++offset_it; +#endif + + /// handle remaining symbols, row by row (to avoid influence of bad UTF8 symbols from one row, to another) + while (src < src_end) + { + const UInt8 * row_end = begin + *offset_it; + chassert(row_end >= src); + + while (src < row_end) + toCase(src, row_end, dst, /* partial= */ false); + ++offset_it; + } + } }; } - -#endif diff --git a/src/Functions/initcapUTF8.cpp b/src/Functions/initcapUTF8.cpp index 004586dce26..282d846094e 100644 --- a/src/Functions/initcapUTF8.cpp +++ b/src/Functions/initcapUTF8.cpp @@ -1,8 +1,9 @@ #include #include +#include #include #include -#include + namespace DB { diff --git a/src/Functions/lowerUTF8.cpp b/src/Functions/lowerUTF8.cpp index e2f7cb84730..7adb0069121 100644 --- a/src/Functions/lowerUTF8.cpp +++ b/src/Functions/lowerUTF8.cpp @@ -1,10 +1,9 @@ -#include "config.h" - -#if USE_ICU - -#include +#include #include #include +#include +#include + namespace DB { @@ -16,25 +15,13 @@ struct NameLowerUTF8 static constexpr auto name = "lowerUTF8"; }; -using FunctionLowerUTF8 = FunctionStringToString, NameLowerUTF8>; +using FunctionLowerUTF8 = FunctionStringToString>, NameLowerUTF8>; } REGISTER_FUNCTION(LowerUTF8) { - FunctionDocumentation::Description description - = R"(Converts a string to lowercase, assuming that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.)"; - FunctionDocumentation::Syntax syntax = "lowerUTF8(input)"; - FunctionDocumentation::Arguments arguments = {{"input", "Input with String type"}}; - FunctionDocumentation::ReturnedValue returned_value = "A String data type value"; - FunctionDocumentation::Examples examples = { - {"first", "SELECT lowerUTF8('München') as Lowerutf8;", "münchen"}, - }; - FunctionDocumentation::Categories categories = {"String"}; - - factory.registerFunction({description, syntax, arguments, returned_value, examples, categories}); + factory.registerFunction(); } } - -#endif diff --git a/src/Functions/upperUTF8.cpp b/src/Functions/upperUTF8.cpp index ef26430331f..659e67f0ef3 100644 --- a/src/Functions/upperUTF8.cpp +++ b/src/Functions/upperUTF8.cpp @@ -1,10 +1,8 @@ -#include "config.h" - -#if USE_ICU - -#include #include #include +#include +#include + namespace DB { @@ -16,25 +14,13 @@ struct NameUpperUTF8 static constexpr auto name = "upperUTF8"; }; -using FunctionUpperUTF8 = FunctionStringToString, NameUpperUTF8>; +using FunctionUpperUTF8 = FunctionStringToString>, NameUpperUTF8>; } REGISTER_FUNCTION(UpperUTF8) { - FunctionDocumentation::Description description - = R"(Converts a string to lowercase, assuming that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.)"; - FunctionDocumentation::Syntax syntax = "upperUTF8(input)"; - FunctionDocumentation::Arguments arguments = {{"input", "Input with String type"}}; - FunctionDocumentation::ReturnedValue returned_value = "A String data type value"; - FunctionDocumentation::Examples examples = { - {"first", "SELECT upperUTF8('München') as Upperutf8;", "MÜNCHEN"}, - }; - FunctionDocumentation::Categories categories = {"String"}; - - factory.registerFunction({description, syntax, arguments, returned_value, examples, categories}); + factory.registerFunction(); } } - -#endif diff --git a/tests/queries/0_stateless/00170_lower_upper_utf8.reference b/tests/queries/0_stateless/00170_lower_upper_utf8.reference index 3c644f22b9b..f202cb75513 100644 --- a/tests/queries/0_stateless/00170_lower_upper_utf8.reference +++ b/tests/queries/0_stateless/00170_lower_upper_utf8.reference @@ -22,7 +22,3 @@ 1 1 1 -1 -1 -1 -1 diff --git a/tests/queries/0_stateless/00170_lower_upper_utf8.sql b/tests/queries/0_stateless/00170_lower_upper_utf8.sql index 85b6c5c6095..4caba2033ff 100644 --- a/tests/queries/0_stateless/00170_lower_upper_utf8.sql +++ b/tests/queries/0_stateless/00170_lower_upper_utf8.sql @@ -1,6 +1,3 @@ --- Tags: no-fasttest --- no-fasttest: upper/lowerUTF8 use ICU - select lower('aaaaaaaaaaaaaaa012345789,.!aaaa' as str) = str; select lowerUTF8('aaaaaaaaaaaaaaa012345789,.!aaaa' as str) = str; select lower('AaAaAaAaAaAaAaA012345789,.!aAaA') = 'aaaaaaaaaaaaaaa012345789,.!aaaa'; @@ -30,11 +27,3 @@ select sum(lower(materialize('aaaaАБВГAAAAaaAA')) = materialize('aaaaАБВ select sum(upper(materialize('aaaaАБВГAAAAaaAA')) = materialize('AAAAАБВГAAAAAAAA')) = count() from system.one array join range(16384) as n; select sum(lowerUTF8(materialize('aaaaАБВГAAAAaaAA')) = materialize('aaaaабвгaaaaaaaa')) = count() from system.one array join range(16384) as n; select sum(upperUTF8(materialize('aaaaАБВГAAAAaaAA')) = materialize('AAAAАБВГAAAAAAAA')) = count() from system.one array join range(16384) as n; - --- Turkish language -select upperUTF8('ır') = 'IR'; -select lowerUTF8('ır') = 'ır'; - --- German language -select upper('öäüß') = 'öäüß'; -select lower('ÖÄÜẞ') = 'ÖÄÜẞ'; diff --git a/tests/queries/0_stateless/00233_position_function_family.sql b/tests/queries/0_stateless/00233_position_function_family.sql index d6668cb7ba4..dd7394bc39a 100644 --- a/tests/queries/0_stateless/00233_position_function_family.sql +++ b/tests/queries/0_stateless/00233_position_function_family.sql @@ -1,6 +1,3 @@ --- Tags: no-fasttest --- no-fasttest: upper/lowerUTF8 use ICU - SET send_logs_level = 'fatal'; select 1 = position('', ''); diff --git a/tests/queries/0_stateless/00761_lower_utf8_bug.sql b/tests/queries/0_stateless/00761_lower_utf8_bug.sql index a0ab55edc15..de20b894331 100644 --- a/tests/queries/0_stateless/00761_lower_utf8_bug.sql +++ b/tests/queries/0_stateless/00761_lower_utf8_bug.sql @@ -1,4 +1 @@ --- Tags: no-fasttest --- no-fasttest: upper/lowerUTF8 use ICU - SELECT lowerUTF8('\xF0') = lowerUTF8('\xF0'); diff --git a/tests/queries/0_stateless/01278_random_string_utf8.sql b/tests/queries/0_stateless/01278_random_string_utf8.sql index 290d6a0c759..da2dc48c3e1 100644 --- a/tests/queries/0_stateless/01278_random_string_utf8.sql +++ b/tests/queries/0_stateless/01278_random_string_utf8.sql @@ -1,6 +1,3 @@ --- Tags: no-fasttest --- no-fasttest: upper/lowerUTF8 use ICU - SELECT randomStringUTF8('string'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } SELECT lengthUTF8(randomStringUTF8(100)); SELECT toTypeName(randomStringUTF8(10)); diff --git a/tests/queries/0_stateless/01431_utf8_ubsan.reference b/tests/queries/0_stateless/01431_utf8_ubsan.reference index dc785e57851..c98c950d535 100644 --- a/tests/queries/0_stateless/01431_utf8_ubsan.reference +++ b/tests/queries/0_stateless/01431_utf8_ubsan.reference @@ -1,2 +1,2 @@ -EFBFBD -EFBFBD +FF +FF diff --git a/tests/queries/0_stateless/01431_utf8_ubsan.sql b/tests/queries/0_stateless/01431_utf8_ubsan.sql index 3a28e023805..d6a299225b1 100644 --- a/tests/queries/0_stateless/01431_utf8_ubsan.sql +++ b/tests/queries/0_stateless/01431_utf8_ubsan.sql @@ -1,5 +1,2 @@ --- Tags: no-fasttest --- no-fasttest: upper/lowerUTF8 use ICU - SELECT hex(lowerUTF8('\xFF')); SELECT hex(upperUTF8('\xFF')); diff --git a/tests/queries/0_stateless/01590_countSubstrings.sql b/tests/queries/0_stateless/01590_countSubstrings.sql index 5ec4f412d7f..b38cbb7d188 100644 --- a/tests/queries/0_stateless/01590_countSubstrings.sql +++ b/tests/queries/0_stateless/01590_countSubstrings.sql @@ -1,6 +1,3 @@ --- Tags: no-fasttest --- no-fasttest: upper/lowerUTF8 use ICU - -- -- countSubstrings -- diff --git a/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.reference b/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.reference index deabef61a88..a3bac432482 100644 --- a/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.reference +++ b/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.reference @@ -5,9 +5,9 @@ insert into utf8_overlap values ('\xe2'), ('Foo⚊BarBazBam'), ('\xe2'), ('Foo -- MONOGRAM FOR YANG with lowerUTF8(str) as l_, upperUTF8(str) as u_, '0x' || hex(str) as h_ select length(str), if(l_ == '\xe2', h_, l_), if(u_ == '\xe2', h_, u_) from utf8_overlap format CSV; -1,"�","�" +1,"0xE2","0xE2" 15,"foo⚊barbazbam","FOO⚊BARBAZBAM" -1,"�","�" +1,"0xE2","0xE2" 15,"foo⚊barbazbam","FOO⚊BARBAZBAM" -- NOTE: regression test for introduced bug -- https://github.com/ClickHouse/ClickHouse/issues/42756 diff --git a/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.sql b/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.sql index d175e0659d0..8ca0a3f5f75 100644 --- a/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.sql +++ b/tests/queries/0_stateless/02071_lower_upper_utf8_row_overlaps.sql @@ -1,6 +1,3 @@ --- Tags: no-fasttest --- no-fasttest: upper/lowerUTF8 use ICU - drop table if exists utf8_overlap; create table utf8_overlap (str String) engine=Memory(); diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference index 0980e25b70f..c39f1fb1ce9 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference @@ -416,6 +416,7 @@ logTrace lowCardinalityIndices lowCardinalityKeys lower +lowerUTF8 makeDate makeDate32 makeDateTime @@ -896,6 +897,7 @@ tupleToNameValuePairs unbin unhex upper +upperUTF8 uptime validateNestedArraySizes version diff --git a/tests/queries/0_stateless/02514_if_with_lazy_low_cardinality.sql b/tests/queries/0_stateless/02514_if_with_lazy_low_cardinality.sql index b169cfd0ab9..80e3c0a9ece 100644 --- a/tests/queries/0_stateless/02514_if_with_lazy_low_cardinality.sql +++ b/tests/queries/0_stateless/02514_if_with_lazy_low_cardinality.sql @@ -1,6 +1,3 @@ --- Tags: no-fasttest --- no-fasttest: upper/lowerUTF8 use ICU - create table if not exists t (`arr.key` Array(LowCardinality(String)), `arr.value` Array(LowCardinality(String))) engine = Memory; insert into t (`arr.key`, `arr.value`) values (['a'], ['b']); select if(true, if(lowerUTF8(arr.key) = 'a', 1, 2), 3) as x from t left array join arr; diff --git a/tests/queries/0_stateless/02807_lower_utf8_msan.sql b/tests/queries/0_stateless/02807_lower_utf8_msan.sql index 95f224577f7..e9eb18bf615 100644 --- a/tests/queries/0_stateless/02807_lower_utf8_msan.sql +++ b/tests/queries/0_stateless/02807_lower_utf8_msan.sql @@ -1,5 +1,2 @@ --- Tags: no-fasttest --- no-fasttest: upper/lowerUTF8 use ICU - SELECT lowerUTF8(arrayJoin(['©--------------------------------------', '©--------------------'])) ORDER BY 1; SELECT upperUTF8(materialize('aaaaАБВГaaaaaaaaaaaaАБВГAAAAaaAA')) FROM numbers(2); diff --git a/tests/queries/0_stateless/03015_peder1001.sql b/tests/queries/0_stateless/03015_peder1001.sql index df8e4db1536..810503207f2 100644 --- a/tests/queries/0_stateless/03015_peder1001.sql +++ b/tests/queries/0_stateless/03015_peder1001.sql @@ -1,6 +1,3 @@ --- Tags: no-fasttest --- no-fasttest: upper/lowerUTF8 use ICU - DROP TABLE IF EXISTS test_data; CREATE TABLE test_data From 427016a450cad536e8cbaf4de04d07313456aa4b Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Tue, 6 Aug 2024 21:39:41 +0200 Subject: [PATCH 142/180] CI: Functional tests to store artifacts on timeout --- docker/test/fasttest/run.sh | 18 +--------- docker/test/sqllogic/Dockerfile | 1 - docker/test/sqllogic/run.sh | 2 +- docker/test/sqltest/Dockerfile | 1 - docker/test/stateful/run.sh | 25 +------------- docker/test/stateless/Dockerfile | 1 - docker/test/stateless/run.sh | 21 ++---------- docker/test/stateless/utils.lib | 16 --------- tests/ci/ci.py | 30 ++++++++-------- tests/ci/ci_definitions.py | 3 +- tests/ci/functional_test_check.py | 37 +++++++++++++++----- tests/ci/report.py | 11 +++--- tests/ci/tee_popen.py | 57 +++++++++++++++++++++++++++---- 13 files changed, 107 insertions(+), 116 deletions(-) diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index 394d31addb1..9920326b11c 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -256,22 +256,6 @@ function configure rm -f "$FASTTEST_DATA/config.d/secure_ports.xml" } -function timeout_with_logging() { - local exit_code=0 - - timeout -s TERM --preserve-status "${@}" || exit_code="${?}" - - echo "Checking if it is a timeout. The code 124 will indicate a timeout." - if [[ "${exit_code}" -eq "124" ]] - then - echo "The command 'timeout ${*}' has been killed by timeout." - else - echo "No, it isn't a timeout." - fi - - return $exit_code -} - function run_tests { clickhouse-server --version @@ -340,7 +324,7 @@ case "$stage" in configure 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/install_log.txt" ;& "run_tests") - timeout_with_logging 35m bash -c run_tests ||: + run_tests ||: /process_functional_tests_result.py --in-results-dir "$FASTTEST_OUTPUT/" \ --out-results-file "$FASTTEST_OUTPUT/test_results.tsv" \ --out-status-file "$FASTTEST_OUTPUT/check_status.tsv" || echo -e "failure\tCannot parse results" > "$FASTTEST_OUTPUT/check_status.tsv" diff --git a/docker/test/sqllogic/Dockerfile b/docker/test/sqllogic/Dockerfile index 1425e12cd84..6397526388e 100644 --- a/docker/test/sqllogic/Dockerfile +++ b/docker/test/sqllogic/Dockerfile @@ -35,7 +35,6 @@ RUN mkdir -p /tmp/clickhouse-odbc-tmp \ ENV TZ=Europe/Amsterdam -ENV MAX_RUN_TIME=9000 RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone ARG sqllogic_test_repo="https://github.com/gregrahn/sqllogictest.git" diff --git a/docker/test/sqllogic/run.sh b/docker/test/sqllogic/run.sh index ccba344035e..32368980f9b 100755 --- a/docker/test/sqllogic/run.sh +++ b/docker/test/sqllogic/run.sh @@ -94,7 +94,7 @@ function run_tests() export -f run_tests -timeout "${MAX_RUN_TIME:-9000}" bash -c run_tests || echo "timeout reached" >&2 +run_tests #/process_functional_tests_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv diff --git a/docker/test/sqltest/Dockerfile b/docker/test/sqltest/Dockerfile index 71d915b0c7a..b805bb03c2b 100644 --- a/docker/test/sqltest/Dockerfile +++ b/docker/test/sqltest/Dockerfile @@ -22,7 +22,6 @@ ARG sqltest_repo="https://github.com/elliotchance/sqltest/" RUN git clone ${sqltest_repo} ENV TZ=UTC -ENV MAX_RUN_TIME=900 RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone COPY run.sh / diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh index 3a4f0d97993..c072eeb0fa8 100755 --- a/docker/test/stateful/run.sh +++ b/docker/test/stateful/run.sh @@ -4,9 +4,6 @@ source /setup_export_logs.sh set -e -x -MAX_RUN_TIME=${MAX_RUN_TIME:-3600} -MAX_RUN_TIME=$((MAX_RUN_TIME == 0 ? 3600 : MAX_RUN_TIME)) - # Choose random timezone for this test run TZ="$(rg -v '#' /usr/share/zoneinfo/zone.tab | awk '{print $3}' | shuf | head -n1)" echo "Choosen random timezone $TZ" @@ -123,9 +120,6 @@ if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]] clickhouse-client --query "DROP TABLE datasets.hits_v1" clickhouse-client --query "DROP TABLE datasets.visits_v1" - - MAX_RUN_TIME=$((MAX_RUN_TIME < 9000 ? MAX_RUN_TIME : 9000)) # min(MAX_RUN_TIME, 2.5 hours) - MAX_RUN_TIME=$((MAX_RUN_TIME != 0 ? MAX_RUN_TIME : 9000)) # set to 2.5 hours if 0 (unlimited) else clickhouse-client --query "CREATE DATABASE test" clickhouse-client --query "SHOW TABLES FROM test" @@ -257,24 +251,7 @@ function run_tests() export -f run_tests -function timeout_with_logging() { - local exit_code=0 - - timeout -s TERM --preserve-status "${@}" || exit_code="${?}" - - echo "Checking if it is a timeout. The code 124 will indicate a timeout." - if [[ "${exit_code}" -eq "124" ]] - then - echo "The command 'timeout ${*}' has been killed by timeout." - else - echo "No, it isn't a timeout." - fi - - return $exit_code -} - -TIMEOUT=$((MAX_RUN_TIME - 700)) -timeout_with_logging "$TIMEOUT" bash -c run_tests ||: +run_tests ||: echo "Files in current directory" ls -la ./ diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index d8eb072328f..b0c4914a4e8 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -65,7 +65,6 @@ ENV TZ=Europe/Amsterdam RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone ENV NUM_TRIES=1 -ENV MAX_RUN_TIME=0 # Unrelated to vars in setup_minio.sh, but should be the same there # to have the same binaries for local running scenario diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index c70cbe1fe45..ad0cd321cc5 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -12,9 +12,6 @@ dmesg --clear # fail on errors, verbose and export all env variables set -e -x -a -MAX_RUN_TIME=${MAX_RUN_TIME:-9000} -MAX_RUN_TIME=$((MAX_RUN_TIME == 0 ? 9000 : MAX_RUN_TIME)) - USE_DATABASE_REPLICATED=${USE_DATABASE_REPLICATED:=0} USE_SHARED_CATALOG=${USE_SHARED_CATALOG:=0} @@ -308,8 +305,6 @@ function run_tests() try_run_with_retry 10 clickhouse-client -q "insert into system.zookeeper (name, path, value) values ('auxiliary_zookeeper2', '/test/chroot/', '')" - TIMEOUT=$((MAX_RUN_TIME - 800 > 8400 ? 8400 : MAX_RUN_TIME - 800)) - START_TIME=${SECONDS} set +e TEST_ARGS=( @@ -324,32 +319,22 @@ function run_tests() --test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" ) - timeout --preserve-status --signal TERM --kill-after 60m ${TIMEOUT}s clickhouse-test "${TEST_ARGS[@]}" 2>&1 \ + clickhouse-test "${TEST_ARGS[@]}" 2>&1 \ | ts '%Y-%m-%d %H:%M:%S' \ | tee -a test_output/test_result.txt set -e - DURATION=$((SECONDS - START_TIME)) - - echo "Elapsed ${DURATION} seconds." - if [[ $DURATION -ge $TIMEOUT ]] - then - echo "It looks like the command is terminated by the timeout, which is ${TIMEOUT} seconds." - fi } export -f run_tests - -# This should be enough to setup job and collect artifacts -TIMEOUT=$((MAX_RUN_TIME - 700)) if [ "$NUM_TRIES" -gt "1" ]; then # We don't run tests with Ordinary database in PRs, only in master. # So run new/changed tests with Ordinary at least once in flaky check. - timeout_with_logging "$TIMEOUT" bash -c 'NUM_TRIES=1; USE_DATABASE_ORDINARY=1; run_tests' \ + NUM_TRIES=1; USE_DATABASE_ORDINARY=1; run_tests \ | sed 's/All tests have finished/Redacted: a message about tests finish is deleted/' | sed 's/No tests were run/Redacted: a message about no tests run is deleted/' ||: fi -timeout_with_logging "$TIMEOUT" bash -c run_tests ||: +run_tests ||: echo "Files in current directory" ls -la ./ diff --git a/docker/test/stateless/utils.lib b/docker/test/stateless/utils.lib index cb257536c36..31cd67254b4 100644 --- a/docker/test/stateless/utils.lib +++ b/docker/test/stateless/utils.lib @@ -40,22 +40,6 @@ function fn_exists() { declare -F "$1" > /dev/null; } -function timeout_with_logging() { - local exit_code=0 - - timeout -s TERM --preserve-status "${@}" || exit_code="${?}" - - echo "Checking if it is a timeout. The code 124 will indicate a timeout." - if [[ "${exit_code}" -eq "124" ]] - then - echo "The command 'timeout ${*}' has been killed by timeout." - else - echo "No, it isn't a timeout." - fi - - return $exit_code -} - function collect_core_dumps() { find . -type f -maxdepth 1 -name 'core.*' | while read -r core; do diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 49b597333dc..1208d8642ad 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -50,7 +50,6 @@ from github_helper import GitHub from pr_info import PRInfo from report import ( ERROR, - FAILURE, PENDING, SUCCESS, BuildResult, @@ -62,11 +61,11 @@ from report import ( FAIL, ) from s3_helper import S3Helper -from stopwatch import Stopwatch from tee_popen import TeePopen from ci_cache import CiCache from ci_settings import CiSettings from ci_buddy import CIBuddy +from stopwatch import Stopwatch from version_helper import get_version_from_repo # pylint: disable=too-many-lines @@ -370,8 +369,8 @@ def _pre_action(s3, job_name, batch, indata, pr_info): # skip_status = SUCCESS already there GH.print_in_group("Commit Status Data", job_status) - # create pre report - jr = JobReport.create_pre_report(status=skip_status, job_skipped=to_be_skipped) + # create dummy report + jr = JobReport.create_dummy(status=skip_status, job_skipped=to_be_skipped) jr.dump() if not to_be_skipped: @@ -990,19 +989,20 @@ def _run_test(job_name: str, run_command: str) -> int: stopwatch = Stopwatch() job_log = Path(TEMP_PATH) / "job_log.txt" with TeePopen(run_command, job_log, env, timeout) as process: + print(f"Job process started, pid [{process.process.pid}]") retcode = process.wait() if retcode != 0: print(f"Run action failed for: [{job_name}] with exit code [{retcode}]") - if timeout and process.timeout_exceeded: - print(f"Timeout {timeout} exceeded, dumping the job report") - JobReport( - status=FAILURE, - description=f"Timeout {timeout} exceeded", - test_results=[TestResult.create_check_timeout_expired(timeout)], - start_time=stopwatch.start_time_str, - duration=stopwatch.duration_seconds, - additional_files=[job_log], - ).dump() + if process.timeout_exceeded: + print(f"Job timed out: [{job_name}] exit code [{retcode}]") + assert JobReport.exist(), "JobReport real or dummy must be present" + jr = JobReport.load() + if jr.dummy: + print( + f"ERROR: Run action failed with timeout and did not generate JobReport - update dummy report with execution time" + ) + jr.test_results = [TestResult.create_check_timeout_expired()] + jr.duration = stopwatch.duration_seconds print(f"Run action done for: [{job_name}]") return retcode @@ -1205,7 +1205,7 @@ def main() -> int: job_report ), "BUG. There must be job report either real report, or pre-report if job was killed" error_description = "" - if not job_report.pre_report: + if not job_report.dummy: # it's a real job report ch_helper = ClickHouseHelper() check_url = "" diff --git a/tests/ci/ci_definitions.py b/tests/ci/ci_definitions.py index 1bed9db06f2..1d1c39f913d 100644 --- a/tests/ci/ci_definitions.py +++ b/tests/ci/ci_definitions.py @@ -332,7 +332,7 @@ class JobConfig: # will be triggered for the job if omitted in CI workflow yml run_command: str = "" # job timeout, seconds - timeout: Optional[int] = None + timeout: int = 7200 # sets number of batches for a multi-batch job num_batches: int = 1 # label that enables job in CI, if set digest isn't used @@ -421,7 +421,6 @@ class CommonJobConfigs: ), run_command='functional_test_check.py "$CHECK_NAME"', runner_type=Runners.FUNC_TESTER, - timeout=9000, ) STATEFUL_TEST = JobConfig( job_name_keyword="stateful", diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py index b7391eff01b..d08f98fa05f 100644 --- a/tests/ci/functional_test_check.py +++ b/tests/ci/functional_test_check.py @@ -5,10 +5,11 @@ import csv import logging import os import re +import signal import subprocess import sys from pathlib import Path -from typing import List, Tuple +from typing import List, Tuple, Optional from build_download_helper import download_all_deb_packages from clickhouse_helper import CiLogsCredentials @@ -25,11 +26,12 @@ from report import ( TestResults, read_test_results, FAILURE, + TestResult, ) from stopwatch import Stopwatch from tee_popen import TeePopen from ci_config import CI -from ci_utils import Utils +from ci_utils import Utils, Shell NO_CHANGES_MSG = "Nothing to run" @@ -113,10 +115,6 @@ def get_run_command( if flaky_check: envs.append("-e NUM_TRIES=50") - envs.append("-e MAX_RUN_TIME=2800") - else: - max_run_time = os.getenv("MAX_RUN_TIME", "0") - envs.append(f"-e MAX_RUN_TIME={max_run_time}") envs += [f"-e {e}" for e in additional_envs] @@ -128,7 +126,7 @@ def get_run_command( ) return ( - f"docker run --volume={builds_path}:/package_folder " + f"docker run --rm --name func-tester --volume={builds_path}:/package_folder " # For dmesg and sysctl "--privileged " f"{ci_logs_args}" @@ -198,7 +196,7 @@ def process_results( state, description = status[0][0], status[0][1] if ret_code != 0: state = ERROR - description += " (but script exited with an error)" + description = f"Job failed, exit code: {ret_code}. " + description try: results_path = result_directory / "test_results.tsv" @@ -240,7 +238,19 @@ def parse_args(): return parser.parse_args() +test_process = None # type: Optional[TeePopen] +timeout_expired = False + + +def handle_sigterm(signum, _frame): + print(f"WARNING: Received signal {signum}") + global timeout_expired + timeout_expired = True + Shell.check(f"docker exec func-tester pkill -f clickhouse-test", verbose=True) + + def main(): + signal.signal(signal.SIGTERM, handle_sigterm) logging.basicConfig(level=logging.INFO) for handler in logging.root.handlers: # pylint: disable=protected-access @@ -328,11 +338,13 @@ def main(): logging.info("Going to run func tests: %s", run_command) with TeePopen(run_command, run_log_path) as process: + global test_process + test_process = process retcode = process.wait() if retcode == 0: logging.info("Run successfully") else: - logging.info("Run failed") + logging.info("Run failed, exit code %s", retcode) try: subprocess.check_call( @@ -348,6 +360,13 @@ def main(): state, description, test_results, additional_logs = process_results( retcode, result_path, server_log_path ) + if timeout_expired: + description = "Timeout expired" + state = FAILURE + test_results.insert( + 0, TestResult.create_check_timeout_expired(stopwatch.duration_seconds) + ) + else: print( "This is validate bugfix or flaky check run, but no changes test to run - skip with success" diff --git a/tests/ci/report.py b/tests/ci/report.py index 6779a6dae96..c2632719aef 100644 --- a/tests/ci/report.py +++ b/tests/ci/report.py @@ -249,6 +249,7 @@ JOB_REPORT_FILE = Path(GITHUB_WORKSPACE) / "job_report.json" JOB_STARTED_TEST_NAME = "STARTED" JOB_FINISHED_TEST_NAME = "COMPLETED" +JOB_TIMEOUT_TEST_NAME = "Job Timeout Expired" @dataclass @@ -277,8 +278,8 @@ class TestResult: self.log_files.append(log_path) @staticmethod - def create_check_timeout_expired(timeout: float) -> "TestResult": - return TestResult("Check timeout expired", "FAIL", timeout) + def create_check_timeout_expired(duration: Optional[float] = None) -> "TestResult": + return TestResult(JOB_TIMEOUT_TEST_NAME, "FAIL", time=duration) TestResults = List[TestResult] @@ -303,7 +304,7 @@ class JobReport: # indicates that this is not real job report but report for the job that was skipped by rerun check job_skipped: bool = False # indicates that report generated by CI script in order to check later if job was killed before real report is generated - pre_report: bool = False + dummy: bool = False exit_code: int = -1 @staticmethod @@ -311,7 +312,7 @@ class JobReport: return datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S") @classmethod - def create_pre_report(cls, status: str, job_skipped: bool) -> "JobReport": + def create_dummy(cls, status: str, job_skipped: bool) -> "JobReport": return JobReport( status=status, description="", @@ -320,7 +321,7 @@ class JobReport: duration=0.0, additional_files=[], job_skipped=job_skipped, - pre_report=True, + dummy=True, ) def update_duration(self): diff --git a/tests/ci/tee_popen.py b/tests/ci/tee_popen.py index 13db50df53f..ad3e62dab9c 100644 --- a/tests/ci/tee_popen.py +++ b/tests/ci/tee_popen.py @@ -2,6 +2,8 @@ import logging import os +import signal +import subprocess import sys from io import TextIOWrapper from pathlib import Path @@ -30,20 +32,35 @@ class TeePopen: self._process = None # type: Optional[Popen] self.timeout = timeout self.timeout_exceeded = False + self.terminated_by_sigterm = False + self.terminated_by_sigkill = False + self.pid = 0 def _check_timeout(self) -> None: if self.timeout is None: return sleep(self.timeout) + logging.warning( + "Timeout exceeded. Send SIGTERM to process %s, timeout %s", + self.process.pid, + self.timeout, + ) + self.send_signal(signal.SIGTERM) + time_wait = 0 + self.terminated_by_sigterm = True self.timeout_exceeded = True + while self.process.poll() is None and time_wait < 100: + print("wait...") + wait = 5 + sleep(wait) + time_wait += wait while self.process.poll() is None: - logging.warning( - "Killing process %s, timeout %s exceeded", - self.process.pid, - self.timeout, + logging.error( + "Process is still running. Send SIGKILL", ) - os.killpg(self.process.pid, 9) - sleep(10) + self.send_signal(signal.SIGKILL) + self.terminated_by_sigkill = True + sleep(5) def __enter__(self) -> "TeePopen": self.process = Popen( @@ -57,6 +74,9 @@ class TeePopen: bufsize=1, errors="backslashreplace", ) + sleep(1) + self.pid = self._get_child_pid() + print(f"Subprocess started, pid [{self.process.pid}], child pid [{self.pid}]") if self.timeout is not None and self.timeout > 0: t = Thread(target=self._check_timeout) t.daemon = True # does not block the program from exit @@ -77,6 +97,22 @@ class TeePopen: self.log_file.close() + def _get_child_pid(self): + # linux only + ps_command = f"ps --ppid {self.process.pid} -o pid=" + res = "NA" + try: + result = subprocess.run( + ps_command, shell=True, capture_output=True, text=True + ) + res = result.stdout.strip() + pid = int(res) + return pid + except Exception as e: + print(f"Failed to get child's pid, command [{ps_command}], result [{res}]") + print(f"ERROR: getting Python subprocess PID: {e}") + return self.process.pid + def wait(self) -> int: if self.process.stdout is not None: for line in self.process.stdout: @@ -85,6 +121,15 @@ class TeePopen: return self.process.wait() + def poll(self): + return self.process.poll() + + def send_signal(self, signal_num): + if self.pid: + os.kill(self.pid, signal_num) + else: + print("ERROR: no process to send signal") + @property def process(self) -> Popen: if self._process is not None: From 8e35b082b2e3315655110bdce4238217dfe85914 Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Sat, 10 Aug 2024 10:01:16 +0200 Subject: [PATCH 143/180] teepopen fix --- tests/ci/tee_popen.py | 26 ++------------------------ 1 file changed, 2 insertions(+), 24 deletions(-) diff --git a/tests/ci/tee_popen.py b/tests/ci/tee_popen.py index ad3e62dab9c..53b0a0f6c2c 100644 --- a/tests/ci/tee_popen.py +++ b/tests/ci/tee_popen.py @@ -3,7 +3,6 @@ import logging import os import signal -import subprocess import sys from io import TextIOWrapper from pathlib import Path @@ -34,7 +33,6 @@ class TeePopen: self.timeout_exceeded = False self.terminated_by_sigterm = False self.terminated_by_sigkill = False - self.pid = 0 def _check_timeout(self) -> None: if self.timeout is None: @@ -75,8 +73,7 @@ class TeePopen: errors="backslashreplace", ) sleep(1) - self.pid = self._get_child_pid() - print(f"Subprocess started, pid [{self.process.pid}], child pid [{self.pid}]") + print(f"Subprocess started, pid [{self.process.pid}]") if self.timeout is not None and self.timeout > 0: t = Thread(target=self._check_timeout) t.daemon = True # does not block the program from exit @@ -97,22 +94,6 @@ class TeePopen: self.log_file.close() - def _get_child_pid(self): - # linux only - ps_command = f"ps --ppid {self.process.pid} -o pid=" - res = "NA" - try: - result = subprocess.run( - ps_command, shell=True, capture_output=True, text=True - ) - res = result.stdout.strip() - pid = int(res) - return pid - except Exception as e: - print(f"Failed to get child's pid, command [{ps_command}], result [{res}]") - print(f"ERROR: getting Python subprocess PID: {e}") - return self.process.pid - def wait(self) -> int: if self.process.stdout is not None: for line in self.process.stdout: @@ -125,10 +106,7 @@ class TeePopen: return self.process.poll() def send_signal(self, signal_num): - if self.pid: - os.kill(self.pid, signal_num) - else: - print("ERROR: no process to send signal") + os.killpg(self.process.pid, signal_num) @property def process(self) -> Popen: From 66fa5a154a8895f481a598616df93f7cb83e42cd Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Mon, 12 Aug 2024 02:34:22 +0200 Subject: [PATCH 144/180] tune timeouts, batches --- tests/ci/ci_config.py | 7 ++++--- tests/ci/ci_definitions.py | 1 + 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 173c6c9c931..99f4ed38475 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -316,6 +316,7 @@ class CI: JobNames.STATEFUL_TEST_PARALLEL_REPL_TSAN: CommonJobConfigs.STATEFUL_TEST.with_properties( required_builds=[BuildNames.PACKAGE_TSAN], random_bucket="parrepl_with_sanitizer", + timeout=3600, ), JobNames.STATELESS_TEST_ASAN: CommonJobConfigs.STATELESS_TEST.with_properties( required_builds=[BuildNames.PACKAGE_ASAN], num_batches=2 @@ -346,7 +347,7 @@ class CI: required_builds=[BuildNames.PACKAGE_RELEASE], num_batches=4 ), JobNames.STATELESS_TEST_S3_DEBUG: CommonJobConfigs.STATELESS_TEST.with_properties( - required_builds=[BuildNames.PACKAGE_DEBUG], num_batches=2 + required_builds=[BuildNames.PACKAGE_DEBUG], num_batches=1 ), JobNames.STATELESS_TEST_AZURE_ASAN: CommonJobConfigs.STATELESS_TEST.with_properties( required_builds=[BuildNames.PACKAGE_ASAN], num_batches=3, release_only=True @@ -401,14 +402,14 @@ class CI: required_builds=[BuildNames.PACKAGE_ASAN], release_only=True, num_batches=4 ), JobNames.INTEGRATION_TEST_ASAN_OLD_ANALYZER: CommonJobConfigs.INTEGRATION_TEST.with_properties( - required_builds=[BuildNames.PACKAGE_ASAN], num_batches=6 + required_builds=[BuildNames.PACKAGE_ASAN], num_batches=4 ), JobNames.INTEGRATION_TEST_TSAN: CommonJobConfigs.INTEGRATION_TEST.with_properties( required_builds=[BuildNames.PACKAGE_TSAN], num_batches=6 ), JobNames.INTEGRATION_TEST_ARM: CommonJobConfigs.INTEGRATION_TEST.with_properties( required_builds=[BuildNames.PACKAGE_AARCH64], - num_batches=6, + num_batches=3, runner_type=Runners.FUNC_TESTER_ARM, ), JobNames.INTEGRATION_TEST: CommonJobConfigs.INTEGRATION_TEST.with_properties( diff --git a/tests/ci/ci_definitions.py b/tests/ci/ci_definitions.py index 1d1c39f913d..13c222b10b9 100644 --- a/tests/ci/ci_definitions.py +++ b/tests/ci/ci_definitions.py @@ -465,6 +465,7 @@ class CommonJobConfigs: ), run_command="upgrade_check.py", runner_type=Runners.STRESS_TESTER, + timeout=3600, ) INTEGRATION_TEST = JobConfig( job_name_keyword="integration", From 1deeca40dbbbc14373e51d830b851b54b82e5efa Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Thu, 15 Aug 2024 13:11:10 +0200 Subject: [PATCH 145/180] Handling timeout in integration tests --- tests/ci/ci.py | 13 ++++++++- tests/ci/ci_config.py | 3 ++- tests/ci/integration_test_check.py | 2 +- tests/ci/integration_tests_runner.py | 40 +++++++++++++++++++++++++++- 4 files changed, 54 insertions(+), 4 deletions(-) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 1208d8642ad..a9ae078b449 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -1003,6 +1003,7 @@ def _run_test(job_name: str, run_command: str) -> int: ) jr.test_results = [TestResult.create_check_timeout_expired()] jr.duration = stopwatch.duration_seconds + jr.additional_files += [job_log] print(f"Run action done for: [{job_name}]") return retcode @@ -1329,10 +1330,20 @@ def main() -> int: if CI.is_test_job(args.job_name): gh = GitHub(get_best_robot_token(), per_page=100) commit = get_commit(gh, pr_info.sha) + check_url = "" + if job_report.test_results or job_report.additional_files: + check_url = upload_result_helper.upload_results( + s3, + pr_info.number, + pr_info.sha, + job_report.test_results, + job_report.additional_files, + job_report.check_name or _get_ext_check_name(args.job_name), + ) post_commit_status( commit, ERROR, - "", + check_url, "Error: " + error_description, _get_ext_check_name(args.job_name), pr_info, diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 99f4ed38475..b5e424c2b3f 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -402,7 +402,8 @@ class CI: required_builds=[BuildNames.PACKAGE_ASAN], release_only=True, num_batches=4 ), JobNames.INTEGRATION_TEST_ASAN_OLD_ANALYZER: CommonJobConfigs.INTEGRATION_TEST.with_properties( - required_builds=[BuildNames.PACKAGE_ASAN], num_batches=4 + required_builds=[BuildNames.PACKAGE_ASAN], + num_batches=3, ), JobNames.INTEGRATION_TEST_TSAN: CommonJobConfigs.INTEGRATION_TEST.with_properties( required_builds=[BuildNames.PACKAGE_TSAN], num_batches=6 diff --git a/tests/ci/integration_test_check.py b/tests/ci/integration_test_check.py index 6245f0490fc..7232ca375a1 100644 --- a/tests/ci/integration_test_check.py +++ b/tests/ci/integration_test_check.py @@ -29,7 +29,7 @@ from stopwatch import Stopwatch import integration_tests_runner as runner from ci_config import CI -from ci_utils import Utils +from ci_utils import Utils, Shell def get_json_params_dict( diff --git a/tests/ci/integration_tests_runner.py b/tests/ci/integration_tests_runner.py index f5dbef4f6db..d3cd3d16de1 100755 --- a/tests/ci/integration_tests_runner.py +++ b/tests/ci/integration_tests_runner.py @@ -9,6 +9,7 @@ import random import re import shlex import shutil +import signal import string import subprocess import sys @@ -16,11 +17,13 @@ import time import zlib # for crc32 from collections import defaultdict from itertools import chain -from typing import Any, Dict +from typing import Any, Dict, Optional from env_helper import IS_CI from integration_test_images import IMAGES from tee_popen import TeePopen +from report import JOB_TIMEOUT_TEST_NAME +from stopwatch import Stopwatch MAX_RETRY = 1 NUM_WORKERS = 5 @@ -621,6 +624,9 @@ class ClickhouseIntegrationTestsRunner: test_data_dirs = {} for i in range(num_tries): + if timeout_expired: + print("Timeout expired - break test group execution") + break logging.info("Running test group %s for the %s retry", test_group, i) clear_ip_tables_and_restart_daemons() @@ -657,6 +663,8 @@ class ClickhouseIntegrationTestsRunner: logging.info("Executing cmd: %s", cmd) # ignore retcode, since it meaningful due to pipe to tee with subprocess.Popen(cmd, shell=True, stderr=log, stdout=log) as proc: + global runner_subprocess + runner_subprocess = proc proc.wait() extra_logs_names = [log_basename] @@ -780,6 +788,9 @@ class ClickhouseIntegrationTestsRunner: logs = [] tries_num = 1 if should_fail else FLAKY_TRIES_COUNT for i in range(tries_num): + if timeout_expired: + print("Timeout expired - break flaky check execution") + break final_retry += 1 logging.info("Running tests for the %s time", i) counters, tests_times, log_paths = self.try_run_test_group( @@ -839,6 +850,7 @@ class ClickhouseIntegrationTestsRunner: return result_state, status_text, test_result, logs def run_impl(self, repo_path, build_path): + stopwatch = Stopwatch() if self.flaky_check or self.bugfix_validate_check: return self.run_flaky_check( repo_path, build_path, should_fail=self.bugfix_validate_check @@ -921,6 +933,9 @@ class ClickhouseIntegrationTestsRunner: random.shuffle(items_to_run) for group, tests in items_to_run: + if timeout_expired: + print("Timeout expired - break tests execution") + break logging.info("Running test group %s containing %s tests", group, len(tests)) group_counters, group_test_times, log_paths = self.try_run_test_group( repo_path, group, tests, MAX_RETRY, NUM_WORKERS, 0 @@ -981,6 +996,17 @@ class ClickhouseIntegrationTestsRunner: status_text = "Timeout, " + status_text result_state = "failure" + if timeout_expired: + logging.error( + "Job killed by external timeout signal - setting status to failure!" + ) + status_text = "Job timeout expired, " + status_text + result_state = "failure" + # add mock test case to make timeout visible in job report and in ci db + test_result.insert( + 0, (JOB_TIMEOUT_TEST_NAME, "FAIL", f"{stopwatch.duration_seconds}", "") + ) + if not counters or sum(len(counter) for counter in counters.values()) == 0: status_text = "No tests found for some reason! It's a bug" result_state = "failure" @@ -1001,6 +1027,7 @@ def write_results(results_file, status_file, results, status): def run(): + signal.signal(signal.SIGTERM, handle_sigterm) logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s") repo_path = os.environ.get("CLICKHOUSE_TESTS_REPO_PATH") @@ -1035,5 +1062,16 @@ def run(): logging.info("Result written") +timeout_expired = False +runner_subprocess = None # type:Optional[subprocess.Popen] + + +def handle_sigterm(signum, _frame): + print(f"WARNING: Received signal {signum}") + global timeout_expired + timeout_expired = True + runner_subprocess.send_signal(signal.SIGTERM) + + if __name__ == "__main__": run() From dde7ee29fc594f87bb35880bede845c4d4f29423 Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Fri, 16 Aug 2024 10:22:12 +0200 Subject: [PATCH 146/180] sort tests in report by status --- tests/ci/ci_config.py | 8 ++++---- tests/ci/integration_test_check.py | 2 +- tests/ci/integration_tests_runner.py | 3 ++- tests/ci/report.py | 19 +++++++++++++++---- 4 files changed, 22 insertions(+), 10 deletions(-) diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index b5e424c2b3f..8ce0b9fde5a 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -344,7 +344,7 @@ class CI: runner_type=Runners.FUNC_TESTER_ARM, ), JobNames.STATELESS_TEST_OLD_ANALYZER_S3_REPLICATED_RELEASE: CommonJobConfigs.STATELESS_TEST.with_properties( - required_builds=[BuildNames.PACKAGE_RELEASE], num_batches=4 + required_builds=[BuildNames.PACKAGE_RELEASE], num_batches=2 ), JobNames.STATELESS_TEST_S3_DEBUG: CommonJobConfigs.STATELESS_TEST.with_properties( required_builds=[BuildNames.PACKAGE_DEBUG], num_batches=1 @@ -354,7 +354,7 @@ class CI: ), JobNames.STATELESS_TEST_S3_TSAN: CommonJobConfigs.STATELESS_TEST.with_properties( required_builds=[BuildNames.PACKAGE_TSAN], - num_batches=4, + num_batches=3, ), JobNames.STRESS_TEST_DEBUG: CommonJobConfigs.STRESS_TEST.with_properties( required_builds=[BuildNames.PACKAGE_DEBUG], @@ -403,14 +403,14 @@ class CI: ), JobNames.INTEGRATION_TEST_ASAN_OLD_ANALYZER: CommonJobConfigs.INTEGRATION_TEST.with_properties( required_builds=[BuildNames.PACKAGE_ASAN], - num_batches=3, + num_batches=6, ), JobNames.INTEGRATION_TEST_TSAN: CommonJobConfigs.INTEGRATION_TEST.with_properties( required_builds=[BuildNames.PACKAGE_TSAN], num_batches=6 ), JobNames.INTEGRATION_TEST_ARM: CommonJobConfigs.INTEGRATION_TEST.with_properties( required_builds=[BuildNames.PACKAGE_AARCH64], - num_batches=3, + num_batches=6, runner_type=Runners.FUNC_TESTER_ARM, ), JobNames.INTEGRATION_TEST: CommonJobConfigs.INTEGRATION_TEST.with_properties( diff --git a/tests/ci/integration_test_check.py b/tests/ci/integration_test_check.py index 7232ca375a1..6245f0490fc 100644 --- a/tests/ci/integration_test_check.py +++ b/tests/ci/integration_test_check.py @@ -29,7 +29,7 @@ from stopwatch import Stopwatch import integration_tests_runner as runner from ci_config import CI -from ci_utils import Utils, Shell +from ci_utils import Utils def get_json_params_dict( diff --git a/tests/ci/integration_tests_runner.py b/tests/ci/integration_tests_runner.py index d3cd3d16de1..c3b71b85022 100755 --- a/tests/ci/integration_tests_runner.py +++ b/tests/ci/integration_tests_runner.py @@ -1070,7 +1070,8 @@ def handle_sigterm(signum, _frame): print(f"WARNING: Received signal {signum}") global timeout_expired timeout_expired = True - runner_subprocess.send_signal(signal.SIGTERM) + if runner_subprocess: + runner_subprocess.send_signal(signal.SIGTERM) if __name__ == "__main__": diff --git a/tests/ci/report.py b/tests/ci/report.py index c2632719aef..a1b25b994c7 100644 --- a/tests/ci/report.py +++ b/tests/ci/report.py @@ -742,10 +742,21 @@ def create_test_html_report( has_test_time = any(tr.time is not None for tr in test_results) has_log_urls = False - # Display entires with logs at the top (they correspond to failed tests) - test_results.sort( - key=lambda result: result.raw_logs is None and result.log_files is None - ) + def sort_key(status): + if "fail" in status.lower(): + return 0 + elif "error" in status.lower(): + return 1 + elif "not" in status.lower(): + return 2 + elif "ok" in status.lower(): + return 10 + elif "success" in status.lower(): + return 9 + else: + return 5 + + test_results.sort(key=lambda result: sort_key(result.status)) for test_result in test_results: colspan = 0 From 90330077e5595c000cec82c8a0819db339296d33 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sun, 18 Aug 2024 17:56:44 +0000 Subject: [PATCH 147/180] fix test --- .../queries/0_stateless/03221_refreshable_matview_progress.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03221_refreshable_matview_progress.sql b/tests/queries/0_stateless/03221_refreshable_matview_progress.sql index ecb385c9bfa..98e1c48478d 100644 --- a/tests/queries/0_stateless/03221_refreshable_matview_progress.sql +++ b/tests/queries/0_stateless/03221_refreshable_matview_progress.sql @@ -1,4 +1,4 @@ --- Tags: no-ordinary-database +-- Tags: no-replicated-database, no-ordinary-database set allow_experimental_refreshable_materialized_view=1; From 683b84e6b66edbfbba12a3c56aad9aefd717bde2 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Sun, 18 Aug 2024 19:37:52 +0100 Subject: [PATCH 148/180] fix --- src/Storages/MergeTree/MergeTreeReadPool.cpp | 4 ++++ src/Storages/MergeTree/MergeTreeReadPoolBase.cpp | 4 ---- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeReadPool.cpp b/src/Storages/MergeTree/MergeTreeReadPool.cpp index cc321cd5a4d..9927d369104 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPool.cpp @@ -235,6 +235,10 @@ void MergeTreeReadPool::fillPerThreadInfo(size_t threads, size_t sum_marks) const auto part_idx = current_parts.back().part_idx; const auto min_marks_per_task = per_part_infos[part_idx]->min_marks_per_task; + if (min_marks_per_task == 0) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, "Chosen number of marks to read is zero (likely because of weird interference of settings)"); + /// Do not get too few rows from part. if (marks_in_part >= min_marks_per_task && need_marks < min_marks_per_task) need_marks = min_marks_per_task; diff --git a/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp b/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp index 9d3c38822e1..1cc13102794 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp @@ -87,10 +87,6 @@ static size_t calculateMinMarksPerTask( } } - if (min_marks_per_task == 0) - throw Exception( - ErrorCodes::BAD_ARGUMENTS, "Chosen number of marks to read is zero (likely because of weird interference of settings)"); - LOG_TEST(&Poco::Logger::get("MergeTreeReadPoolBase"), "Will use min_marks_per_task={}", min_marks_per_task); return min_marks_per_task; } From 3883627aad0e23105ec9e1f985039cd27dcf227d Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Sun, 18 Aug 2024 20:51:40 +0100 Subject: [PATCH 149/180] fix --- src/Storages/MergeTree/MergeTreeReadPool.cpp | 1 + src/Storages/MergeTree/MergeTreeReadPoolBase.cpp | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeReadPool.cpp b/src/Storages/MergeTree/MergeTreeReadPool.cpp index 9927d369104..23c314e48f5 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPool.cpp @@ -24,6 +24,7 @@ namespace ErrorCodes { extern const int CANNOT_SCHEDULE_TASK; extern const int LOGICAL_ERROR; +extern const int BAD_ARGUMENTS; } MergeTreeReadPool::MergeTreeReadPool( diff --git a/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp b/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp index 1cc13102794..95a10454f9e 100644 --- a/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPoolBase.cpp @@ -13,7 +13,6 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; - extern const int BAD_ARGUMENTS; } MergeTreeReadPoolBase::MergeTreeReadPoolBase( From a258b4fb3dcdb8fa2484132aa72e9cece618d488 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 18 Aug 2024 22:18:31 +0200 Subject: [PATCH 150/180] Fix race condition in MergeTree restarting thread --- .../ReplicatedMergeTreeRestartingThread.cpp | 15 ++++++++++++++- .../ReplicatedMergeTreeRestartingThread.h | 11 ++--------- src/Storages/StorageReplicatedMergeTree.cpp | 10 ++++------ 3 files changed, 20 insertions(+), 16 deletions(-) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp index 05fd6f6915b..d3ccda904b6 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp @@ -7,7 +7,6 @@ #include #include #include -#include #include #include @@ -49,6 +48,20 @@ ReplicatedMergeTreeRestartingThread::ReplicatedMergeTreeRestartingThread(Storage task = storage.getContext()->getSchedulePool().createTask(log_name, [this]{ run(); }); } +void ReplicatedMergeTreeRestartingThread::start(bool schedule) +{ + LOG_TRACE(log, "Starting the restating thread, schedule: {}", schedule); + if (schedule) + task->activateAndSchedule(); + else + task->activate(); +} + +void ReplicatedMergeTreeRestartingThread::wakeup() +{ + task->schedule(); +} + void ReplicatedMergeTreeRestartingThread::run() { if (need_stop) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h index 01071d80e8b..d719505ae5e 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h @@ -24,16 +24,9 @@ class ReplicatedMergeTreeRestartingThread public: explicit ReplicatedMergeTreeRestartingThread(StorageReplicatedMergeTree & storage_); - void start(bool schedule = true) - { - LOG_TRACE(log, "Starting restating thread, schedule: {}", schedule); - if (schedule) - task->activateAndSchedule(); - else - task->activate(); - } + void start(bool schedule); - void wakeup() { task->schedule(); } + void wakeup(); void shutdown(bool part_of_full_shutdown); diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 068ff1387b3..ff8e362aa36 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -5194,17 +5194,16 @@ void StorageReplicatedMergeTree::startupImpl(bool from_attach_thread) startBeingLeader(); - /// Activate replica in a separate thread if we are not calling from attach thread - restarting_thread.start(/*schedule=*/!from_attach_thread); - if (from_attach_thread) { LOG_TRACE(log, "Trying to startup table from right now"); - /// Try activating replica in current thread. + /// Try activating replica in the current thread. restarting_thread.run(); + restarting_thread.start(false); } else { + restarting_thread.start(true); /// Wait while restarting_thread finishing initialization. /// NOTE It does not mean that replication is actually started after receiving this event. /// It only means that an attempt to startup replication was made. @@ -5225,7 +5224,7 @@ void StorageReplicatedMergeTree::startupImpl(bool from_attach_thread) session_expired_callback_handler = EventNotifier::instance().subscribe(Coordination::Error::ZSESSIONEXPIRED, [this]() { LOG_TEST(log, "Received event for expired session. Waking up restarting thread"); - restarting_thread.start(); + restarting_thread.start(true); }); startBackgroundMovesIfNeeded(); @@ -5294,7 +5293,6 @@ void StorageReplicatedMergeTree::flushAndPrepareForShutdown() LOG_TRACE(log, "The attach thread is shutdown"); } - restarting_thread.shutdown(/* part_of_full_shutdown */true); /// Explicitly set the event, because the restarting thread will not set it again startup_event.set(); From 3e5d070b8fb47600979a1a7cc672edc3a7327004 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 18 Aug 2024 22:40:42 +0200 Subject: [PATCH 151/180] Fix tests --- .../00804_test_deflate_qpl_codec_compression.reference | 2 +- .../00804_test_zstd_qat_codec_compression.reference | 2 +- .../03227_print_pretty_tuples_create_query.reference | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.reference b/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.reference index a2178f5eda7..a6e03404f2b 100644 --- a/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.reference +++ b/tests/queries/0_stateless/00804_test_deflate_qpl_codec_compression.reference @@ -1,4 +1,4 @@ -CREATE TABLE default.compression_codec\n(\n `id` UInt64 CODEC(DEFLATE_QPL),\n `data` String CODEC(DEFLATE_QPL),\n `ddd` Date CODEC(DEFLATE_QPL),\n `ddd32` Date32 CODEC(DEFLATE_QPL),\n `somenum` Float64 CODEC(DEFLATE_QPL),\n `somestr` FixedString(3) CODEC(DEFLATE_QPL),\n `othernum` Int64 CODEC(DEFLATE_QPL),\n `somearray` Array(UInt8) CODEC(DEFLATE_QPL),\n `somemap` Map(String, UInt32) CODEC(DEFLATE_QPL),\n `sometuple` Tuple(UInt16, UInt64) CODEC(DEFLATE_QPL)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.compression_codec\n(\n `id` UInt64 CODEC(DEFLATE_QPL),\n `data` String CODEC(DEFLATE_QPL),\n `ddd` Date CODEC(DEFLATE_QPL),\n `ddd32` Date32 CODEC(DEFLATE_QPL),\n `somenum` Float64 CODEC(DEFLATE_QPL),\n `somestr` FixedString(3) CODEC(DEFLATE_QPL),\n `othernum` Int64 CODEC(DEFLATE_QPL),\n `somearray` Array(UInt8) CODEC(DEFLATE_QPL),\n `somemap` Map(String, UInt32) CODEC(DEFLATE_QPL),\n `sometuple` Tuple(\n UInt16,\n UInt64) CODEC(DEFLATE_QPL)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 1 hello 2018-12-14 2018-12-14 1.1 aaa 5 [1,2,3] {'k1':1,'k2':2} (1,2) 2 world 2018-12-15 2018-12-15 2.2 bbb 6 [4,5,6] {'k3':3,'k4':4} (3,4) 3 ! 2018-12-16 2018-12-16 3.3 ccc 7 [7,8,9] {'k5':5,'k6':6} (5,6) diff --git a/tests/queries/0_stateless/00804_test_zstd_qat_codec_compression.reference b/tests/queries/0_stateless/00804_test_zstd_qat_codec_compression.reference index 31a4360469f..ff70403ce7a 100644 --- a/tests/queries/0_stateless/00804_test_zstd_qat_codec_compression.reference +++ b/tests/queries/0_stateless/00804_test_zstd_qat_codec_compression.reference @@ -1,4 +1,4 @@ -CREATE TABLE default.compression_codec\n(\n `id` UInt64 CODEC(ZSTD_QAT(1)),\n `data` String CODEC(ZSTD_QAT(1)),\n `ddd` Date CODEC(ZSTD_QAT(1)),\n `ddd32` Date32 CODEC(ZSTD_QAT(1)),\n `somenum` Float64 CODEC(ZSTD_QAT(1)),\n `somestr` FixedString(3) CODEC(ZSTD_QAT(1)),\n `othernum` Int64 CODEC(ZSTD_QAT(1)),\n `somearray` Array(UInt8) CODEC(ZSTD_QAT(1)),\n `somemap` Map(String, UInt32) CODEC(ZSTD_QAT(1)),\n `sometuple` Tuple(UInt16, UInt64) CODEC(ZSTD_QAT(1))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.compression_codec\n(\n `id` UInt64 CODEC(ZSTD_QAT(1)),\n `data` String CODEC(ZSTD_QAT(1)),\n `ddd` Date CODEC(ZSTD_QAT(1)),\n `ddd32` Date32 CODEC(ZSTD_QAT(1)),\n `somenum` Float64 CODEC(ZSTD_QAT(1)),\n `somestr` FixedString(3) CODEC(ZSTD_QAT(1)),\n `othernum` Int64 CODEC(ZSTD_QAT(1)),\n `somearray` Array(UInt8) CODEC(ZSTD_QAT(1)),\n `somemap` Map(String, UInt32) CODEC(ZSTD_QAT(1)),\n `sometuple` Tuple(\n UInt16,\n UInt64) CODEC(ZSTD_QAT(1))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 1 hello 2018-12-14 2018-12-14 1.1 aaa 5 [1,2,3] {'k1':1,'k2':2} (1,2) 2 world 2018-12-15 2018-12-15 2.2 bbb 6 [4,5,6] {'k3':3,'k4':4} (3,4) 3 ! 2018-12-16 2018-12-16 3.3 ccc 7 [7,8,9] {'k5':5,'k6':6} (5,6) diff --git a/tests/queries/0_stateless/03227_print_pretty_tuples_create_query.reference b/tests/queries/0_stateless/03227_print_pretty_tuples_create_query.reference index c65dc32a224..afaaaaa6119 100644 --- a/tests/queries/0_stateless/03227_print_pretty_tuples_create_query.reference +++ b/tests/queries/0_stateless/03227_print_pretty_tuples_create_query.reference @@ -1,6 +1,6 @@ SHOW CREATE TABLE: -CREATE TABLE test.test +CREATE TABLE default.test ( `x` Tuple( a String, @@ -13,7 +13,7 @@ CREATE TABLE test.test ENGINE = MergeTree ORDER BY tuple() SETTINGS index_granularity = 8192 -CREATE TABLE test.test +CREATE TABLE default.test ( `x` Tuple(a String, b Array(Tuple(c Tuple(e String), d String))), `y` String From 8f2c20806a7b757beecb99e52a8d5a1dcab8df07 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 18 Aug 2024 22:45:13 +0200 Subject: [PATCH 152/180] Fix test `01079_bad_alters_zookeeper_long` --- src/Storages/StorageReplicatedMergeTree.cpp | 2 +- tests/clickhouse-test | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 068ff1387b3..66dac0dfe31 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -6342,7 +6342,7 @@ void StorageReplicatedMergeTree::alter( "Metadata on replica is not up to date with common metadata in Zookeeper. " "It means that this replica still not applied some of previous alters." " Probably too many alters executing concurrently (highly not recommended). " - "You can retry this error"); + "You can retry the query"); /// Cannot retry automatically, because some zookeeper ops were lost on the first attempt. Will retry on DDLWorker-level. if (query_context->getZooKeeperMetadataTransaction()) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index a3d7e0e922d..1203ad3730a 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -59,6 +59,7 @@ MESSAGES_TO_RETRY = [ "is already started to be removing by another replica right now", # This is from LSan, and it indicates its own internal problem: "Unable to get registers from thread", + "You can retry", ] MAX_RETRIES = 3 From 207ef87782eca80410ed5747d971be7ddce65e6c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 18 Aug 2024 22:55:38 +0200 Subject: [PATCH 153/180] Fix tests --- tests/queries/0_stateless/01825_new_type_json_ghdata.sh | 3 ++- .../0_stateless/01825_type_json_ghdata_insert_select.sh | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01825_new_type_json_ghdata.sh b/tests/queries/0_stateless/01825_new_type_json_ghdata.sh index fbd7d897fb8..b2f20d825dd 100755 --- a/tests/queries/0_stateless/01825_new_type_json_ghdata.sh +++ b/tests/queries/0_stateless/01825_new_type_json_ghdata.sh @@ -1,5 +1,6 @@ #!/usr/bin/env bash -# Tags: no-fasttest +# Tags: no-fasttest, no-s3-storage +# ^ no-s3-storage: too memory hungry CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01825_type_json_ghdata_insert_select.sh b/tests/queries/0_stateless/01825_type_json_ghdata_insert_select.sh index 711194e71a1..17398d9a0c1 100755 --- a/tests/queries/0_stateless/01825_type_json_ghdata_insert_select.sh +++ b/tests/queries/0_stateless/01825_type_json_ghdata_insert_select.sh @@ -1,5 +1,6 @@ #!/usr/bin/env bash -# Tags: no-fasttest +# Tags: no-fasttest, no-s3-storage +# ^ no-s3-storage: too memory hungry CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From 5ed3c29d4a1e1992671af7168c0a1b01757d97bb Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 19 Aug 2024 01:18:12 +0200 Subject: [PATCH 154/180] Update tests --- tests/queries/0_stateless/01825_new_type_json_ghdata.sh | 2 +- .../queries/0_stateless/01825_type_json_ghdata_insert_select.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01825_new_type_json_ghdata.sh b/tests/queries/0_stateless/01825_new_type_json_ghdata.sh index b2f20d825dd..33940caec29 100755 --- a/tests/queries/0_stateless/01825_new_type_json_ghdata.sh +++ b/tests/queries/0_stateless/01825_new_type_json_ghdata.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-s3-storage +# Tags: no-fasttest, no-s3-storage, long # ^ no-s3-storage: too memory hungry CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) diff --git a/tests/queries/0_stateless/01825_type_json_ghdata_insert_select.sh b/tests/queries/0_stateless/01825_type_json_ghdata_insert_select.sh index 17398d9a0c1..fc503b345d9 100755 --- a/tests/queries/0_stateless/01825_type_json_ghdata_insert_select.sh +++ b/tests/queries/0_stateless/01825_type_json_ghdata_insert_select.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-s3-storage +# Tags: no-fasttest, no-s3-storage, long # ^ no-s3-storage: too memory hungry CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) From 2a8c9b8518175d81632cf7ca48c10522b737e6b4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 19 Aug 2024 07:59:14 +0200 Subject: [PATCH 155/180] Fix tests --- .../0_stateless/01825_new_type_json_ghdata_insert_select.sh | 3 ++- tests/queries/0_stateless/01825_type_json_ghdata.sh | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01825_new_type_json_ghdata_insert_select.sh b/tests/queries/0_stateless/01825_new_type_json_ghdata_insert_select.sh index 2afec5ba7fe..568ba2bd185 100755 --- a/tests/queries/0_stateless/01825_new_type_json_ghdata_insert_select.sh +++ b/tests/queries/0_stateless/01825_new_type_json_ghdata_insert_select.sh @@ -1,5 +1,6 @@ #!/usr/bin/env bash -# Tags: no-fasttest, long +# Tags: no-fasttest, no-s3-storage, long +# ^ no-s3-storage: it is memory-hungry CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01825_type_json_ghdata.sh b/tests/queries/0_stateless/01825_type_json_ghdata.sh index 2686e2c8eb1..7e952de6c08 100755 --- a/tests/queries/0_stateless/01825_type_json_ghdata.sh +++ b/tests/queries/0_stateless/01825_type_json_ghdata.sh @@ -1,5 +1,6 @@ #!/usr/bin/env bash -# Tags: no-fasttest +# Tags: no-fasttest, no-s3-storage, long +# ^ no-s3-storage: it is memory-hungry CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From e623ad041f4937b0e7ed22f3159acfee6c0147b3 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 25 Jul 2024 16:44:17 +0200 Subject: [PATCH 156/180] Make C-z ignorance configurable (ignore_shell_suspend) in clickhouse-client C-z is extermelly useful for some users (like myself), so provide a way to configure it in client and avoid it's ignorance in clickhouse-disks (I hope it is OK since it is not that known utility and it does not have it's own configuration, while cli option is useless, one should remeber about it). Honestly I've never seen any interactive client that forbids C-z, so ignoring it my default looks strange to me. Signed-off-by: Azat Khuzhin --- programs/client/clickhouse-client.xml | 3 +++ programs/disks/DisksApp.cpp | 1 + programs/disks/DisksApp.h | 2 +- programs/disks/DisksClient.cpp | 1 - programs/disks/DisksClient.h | 3 +-- programs/keeper-client/KeeperClient.cpp | 1 + src/Client/ClientBase.cpp | 1 + src/Client/ReplxxLineReader.cpp | 4 +++- src/Client/ReplxxLineReader.h | 1 + 9 files changed, 12 insertions(+), 5 deletions(-) diff --git a/programs/client/clickhouse-client.xml b/programs/client/clickhouse-client.xml index 9ce7d1cb223..6eb8976a6ef 100644 --- a/programs/client/clickhouse-client.xml +++ b/programs/client/clickhouse-client.xml @@ -53,6 +53,9 @@ --> + + +