From f6d50f6fd339e6849e62c6be8fd03801e6e6a6c3 Mon Sep 17 00:00:00 2001 From: DF5HSE Date: Mon, 1 Nov 2021 19:06:23 +0300 Subject: [PATCH 001/215] Add list of hosts reading --- programs/client/Client.cpp | 5 +++-- programs/client/Client.h | 1 + src/Client/ClientBase.cpp | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index a5e4517824d..e92d3f41683 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -481,6 +481,7 @@ catch (...) void Client::connect() { + config().setString("host", hosts[0]); connection_parameters = ConnectionParameters(config()); if (is_interactive) @@ -966,7 +967,7 @@ void Client::addOptions(OptionsDescription & options_description) /// Main commandline options related to client functionality and all parameters from Settings. options_description.main_description->add_options() ("config,c", po::value(), "config-file path (another shorthand)") - ("host,h", po::value()->default_value("localhost"), "server host") + ("host,h", po::value>()->multitoken()->default_value({"localhost"}, "localhost"), "list of server hosts") ("port", po::value()->default_value(9000), "server port") ("secure,s", "Use TLS connection") ("user,u", po::value()->default_value("default"), "user") @@ -1075,7 +1076,7 @@ void Client::processOptions(const OptionsDescription & options_description, if (options.count("config")) config().setString("config-file", options["config"].as()); if (options.count("host") && !options["host"].defaulted()) - config().setString("host", options["host"].as()); + hosts = options["host"].as>(); if (options.count("interleave-queries-file")) interleave_queries_files = options["interleave-queries-file"].as>(); if (options.count("port") && !options["port"].defaulted()) diff --git a/programs/client/Client.h b/programs/client/Client.h index 2def74ef3fc..80d2fa7a277 100644 --- a/programs/client/Client.h +++ b/programs/client/Client.h @@ -29,6 +29,7 @@ protected: const std::vector & external_tables_arguments) override; void processConfig() override; + std::vector hosts{}; private: void printChangedSettings() const; std::vector loadWarningMessages(); diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index eb00ee349ee..54108bad478 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -1869,7 +1869,7 @@ void ClientBase::init(int argc, char ** argv) /// Output of help message. if (options.count("help") - || (options.count("host") && options["host"].as() == "elp")) /// If user writes -help instead of --help. + || (options.count("host") && options["host"].as>()[0] == "elp")) /// If user writes -help instead of --help. { printHelpMessage(options_description); exit(0); From b46af3e0c4624a863ad7bb228da6c44ccb5d11ab Mon Sep 17 00:00:00 2001 From: DF5HSE Date: Mon, 8 Nov 2021 18:53:05 +0300 Subject: [PATCH 002/215] Add list of ':[]' reading in --host param --- programs/client/Client.cpp | 218 +++++++++++++++++++++---------------- programs/client/Client.h | 5 +- src/Client/ClientBase.cpp | 2 +- src/Client/ClientBase.h | 18 +++ 4 files changed, 147 insertions(+), 96 deletions(-) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index e92d3f41683..1b56c15f680 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -1,15 +1,20 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include "Client.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include "Core/Protocol.h" #include @@ -20,33 +25,32 @@ #include #include -#include #include #include #include +#include #include #include -#include -#include #include +#include +#include #include #include -#include -#include +#include #include #include #include -#include #include #include +#include #include "TestTags.h" #ifndef __clang__ -#pragma GCC optimize("-fno-var-tracking-assignments") +# pragma GCC optimize("-fno-var-tracking-assignments") #endif namespace fs = std::filesystem; @@ -69,9 +73,18 @@ void Client::processError(const String & query) const { if (server_exception) { +<<<<<<< HEAD fmt::print(stderr, "Received exception from server (version {}):\n{}\n", server_version, getExceptionMessage(*server_exception, print_stack_trace, true)); +======= + bool print_stack_trace = config().getBool("stacktrace", false); + fmt::print( + stderr, + "Received exception from server (version {}):\n{}\n", + server_version, + getExceptionMessage(*server_exception, print_stack_trace, true)); +>>>>>>> Add list of ':[]' reading in --host param if (is_interactive) { fmt::print(stderr, "\n"); @@ -138,21 +151,18 @@ bool Client::executeMultiQuery(const String & all_queries_text) while (true) { - auto stage = analyzeMultiQueryText(this_query_begin, this_query_end, all_queries_end, - query_to_execute, parsed_query, all_queries_text, current_exception); + auto stage = analyzeMultiQueryText( + this_query_begin, this_query_end, all_queries_end, query_to_execute, parsed_query, all_queries_text, current_exception); switch (stage) { case MultiQueryProcessingStage::QUERIES_END: - case MultiQueryProcessingStage::PARSING_FAILED: - { + case MultiQueryProcessingStage::PARSING_FAILED: { return true; } - case MultiQueryProcessingStage::CONTINUE_PARSING: - { + case MultiQueryProcessingStage::CONTINUE_PARSING: { continue; } - case MultiQueryProcessingStage::PARSING_EXCEPTION: - { + case MultiQueryProcessingStage::PARSING_EXCEPTION: { this_query_end = find_first_symbols<'\n'>(this_query_end, all_queries_end); // Try to find test hint for syntax error. We don't know where @@ -179,8 +189,7 @@ bool Client::executeMultiQuery(const String & all_queries_text) continue; } - case MultiQueryProcessingStage::EXECUTE_QUERY: - { + case MultiQueryProcessingStage::EXECUTE_QUERY: { full_query = all_queries_text.substr(this_query_begin - all_queries_text.data(), this_query_end - this_query_begin); if (query_fuzzer_runs) { @@ -220,14 +229,21 @@ bool Client::executeMultiQuery(const String & all_queries_text) if (!server_exception) { error_matches_hint = false; - fmt::print(stderr, "Expected server error code '{}' but got no server error (query: {}).\n", - test_hint.serverError(), full_query); + fmt::print( + stderr, + "Expected server error code '{}' but got no server error (query: {}).\n", + test_hint.serverError(), + full_query); } else if (server_exception->code() != test_hint.serverError()) { error_matches_hint = false; - fmt::print(stderr, "Expected server error code: {} but got: {} (query: {}).\n", - test_hint.serverError(), server_exception->code(), full_query); + fmt::print( + stderr, + "Expected server error code: {} but got: {} (query: {}).\n", + test_hint.serverError(), + server_exception->code(), + full_query); } } if (test_hint.clientError()) @@ -235,14 +251,21 @@ bool Client::executeMultiQuery(const String & all_queries_text) if (!client_exception) { error_matches_hint = false; - fmt::print(stderr, "Expected client error code '{}' but got no client error (query: {}).\n", - test_hint.clientError(), full_query); + fmt::print( + stderr, + "Expected client error code '{}' but got no client error (query: {}).\n", + test_hint.clientError(), + full_query); } else if (client_exception->code() != test_hint.clientError()) { error_matches_hint = false; - fmt::print(stderr, "Expected client error code '{}' but got '{}' (query: {}).\n", - test_hint.clientError(), client_exception->code(), full_query); + fmt::print( + stderr, + "Expected client error code '{}' but got '{}' (query: {}).\n", + test_hint.clientError(), + client_exception->code(), + full_query); } } if (!test_hint.clientError() && !test_hint.serverError()) @@ -257,14 +280,20 @@ bool Client::executeMultiQuery(const String & all_queries_text) { if (test_hint.clientError()) { - fmt::print(stderr, "The query succeeded but the client error '{}' was expected (query: {}).\n", - test_hint.clientError(), full_query); + fmt::print( + stderr, + "The query succeeded but the client error '{}' was expected (query: {}).\n", + test_hint.clientError(), + full_query); error_matches_hint = false; } if (test_hint.serverError()) { - fmt::print(stderr, "The query succeeded but the server error '{}' was expected (query: {}).\n", - test_hint.serverError(), full_query); + fmt::print( + stderr, + "The query succeeded but the server error '{}' was expected (query: {}).\n", + test_hint.serverError(), + full_query); error_matches_hint = false; } } @@ -354,8 +383,8 @@ std::vector Client::loadWarningMessages() continue; default: - throw Exception(ErrorCodes::UNKNOWN_PACKET_FROM_SERVER, "Unknown packet {} from server {}", - packet.type, connection->getDescription()); + throw Exception( + ErrorCodes::UNKNOWN_PACKET_FROM_SERVER, "Unknown packet {} from server {}", packet.type, connection->getDescription()); } } } @@ -481,15 +510,13 @@ catch (...) void Client::connect() { - config().setString("host", hosts[0]); connection_parameters = ConnectionParameters(config()); if (is_interactive) std::cout << "Connecting to " - << (!connection_parameters.default_database.empty() ? "database " + connection_parameters.default_database + " at " - : "") - << connection_parameters.host << ":" << connection_parameters.port - << (!connection_parameters.user.empty() ? " as user " + connection_parameters.user : "") << "." << std::endl; + << (!connection_parameters.default_database.empty() ? "database " + connection_parameters.default_database + " at " : "") + << connection_parameters.host << ":" << connection_parameters.port + << (!connection_parameters.user.empty() ? " as user " + connection_parameters.user : "") << "." << std::endl; String server_name; UInt64 server_version_major = 0; @@ -516,18 +543,20 @@ void Client::connect() && e.code() == DB::ErrorCodes::AUTHENTICATION_FAILED) { std::cerr << std::endl - << "If you have installed ClickHouse and forgot password you can reset it in the configuration file." << std::endl - << "The password for default user is typically located at /etc/clickhouse-server/users.d/default-password.xml" << std::endl - << "and deleting this file will reset the password." << std::endl - << "See also /etc/clickhouse-server/users.xml on the server where ClickHouse is installed." << std::endl - << std::endl; + << "If you have installed ClickHouse and forgot password you can reset it in the configuration file." << std::endl + << "The password for default user is typically located at /etc/clickhouse-server/users.d/default-password.xml" + << std::endl + << "and deleting this file will reset the password." << std::endl + << "See also /etc/clickhouse-server/users.xml on the server where ClickHouse is installed." << std::endl + << std::endl; } throw; } server_version = toString(server_version_major) + "." + toString(server_version_minor) + "." + toString(server_version_patch); - load_suggestions = is_interactive && (server_revision >= Suggest::MIN_SERVER_REVISION && !config().getBool("disable_suggestion", false)); + load_suggestions + = is_interactive && (server_revision >= Suggest::MIN_SERVER_REVISION && !config().getBool("disable_suggestion", false)); if (server_display_name = connection->getServerDisplayName(connection_parameters.timeouts); server_display_name.empty()) server_display_name = config().getString("host", "localhost"); @@ -543,14 +572,14 @@ void Client::connect() if (client_version_tuple < server_version_tuple) { std::cout << "ClickHouse client version is older than ClickHouse server. " - << "It may lack support for new features." << std::endl - << std::endl; + << "It may lack support for new features." << std::endl + << std::endl; } else if (client_version_tuple > server_version_tuple) { std::cout << "ClickHouse server version is older than ClickHouse client. " - << "It may indicate that the server is out of date and can be upgraded." << std::endl - << std::endl; + << "It may indicate that the server is out of date and can be upgraded." << std::endl + << std::endl; } } @@ -566,16 +595,16 @@ void Client::connect() catch (...) { std::cerr << "Warning: could not switch to server time zone: " << time_zone - << ", reason: " << getCurrentExceptionMessage(/* with_stacktrace = */ false) << std::endl - << "Proceeding with local time zone." << std::endl - << std::endl; + << ", reason: " << getCurrentExceptionMessage(/* with_stacktrace = */ false) << std::endl + << "Proceeding with local time zone." << std::endl + << std::endl; } } else { std::cerr << "Warning: could not determine server time zone. " - << "Proceeding with local time zone." << std::endl - << std::endl; + << "Proceeding with local time zone." << std::endl + << std::endl; } } @@ -677,8 +706,7 @@ bool Client::processWithFuzzing(const String & full_query) } catch (const Exception & e) { - if (e.code() != ErrorCodes::SYNTAX_ERROR && - e.code() != ErrorCodes::TOO_DEEP_RECURSION) + if (e.code() != ErrorCodes::SYNTAX_ERROR && e.code() != ErrorCodes::TOO_DEEP_RECURSION) throw; } @@ -708,10 +736,7 @@ bool Client::processWithFuzzing(const String & full_query) // - SET -- The time to fuzz the settings has not yet come // (see comments in Client/QueryFuzzer.cpp) size_t this_query_runs = query_fuzzer_runs; - if (orig_ast->as() || - orig_ast->as() || - orig_ast->as() || - orig_ast->as()) + if (orig_ast->as() || orig_ast->as() || orig_ast->as() || orig_ast->as()) { this_query_runs = 1; } @@ -768,7 +793,9 @@ bool Client::processWithFuzzing(const String & full_query) fmt::print( stderr, - "Found error: IAST::clone() is broken for some AST node. This is a bug. The original AST ('dump before fuzz') and its cloned copy ('dump of cloned AST') refer to the same nodes, which must never happen. This means that their parent node doesn't implement clone() correctly."); + "Found error: IAST::clone() is broken for some AST node. This is a bug. The original AST ('dump before fuzz') and its " + "cloned copy ('dump of cloned AST') refer to the same nodes, which must never happen. This means that their parent " + "node doesn't implement clone() correctly."); exit(1); } @@ -823,9 +850,7 @@ bool Client::processWithFuzzing(const String & full_query) catch (...) { // Just report it, we'll terminate below. - fmt::print(stderr, - "Error while reconnecting to the server: {}\n", - getCurrentExceptionMessage(true)); + fmt::print(stderr, "Error while reconnecting to the server: {}\n", getCurrentExceptionMessage(true)); // The reconnection might fail, but we'll still be connected // in the sense of `connection->isConnected() = true`, @@ -890,8 +915,7 @@ bool Client::processWithFuzzing(const String & full_query) } catch (Exception & e) { - if (e.code() != ErrorCodes::SYNTAX_ERROR && - e.code() != ErrorCodes::TOO_DEEP_RECURSION) + if (e.code() != ErrorCodes::SYNTAX_ERROR && e.code() != ErrorCodes::TOO_DEEP_RECURSION) throw; } @@ -899,8 +923,7 @@ bool Client::processWithFuzzing(const String & full_query) { const auto text_2 = ast_2->formatForErrorMessage(); const auto * tmp_pos = text_2.c_str(); - const auto ast_3 = parseQuery(tmp_pos, tmp_pos + text_2.size(), - false /* allow_multi_statements */); + const auto ast_3 = parseQuery(tmp_pos, tmp_pos + text_2.size(), false /* allow_multi_statements */); const auto text_3 = ast_3->formatForErrorMessage(); if (text_3 != text_2) { @@ -908,9 +931,12 @@ bool Client::processWithFuzzing(const String & full_query) printChangedSettings(); - fmt::print(stderr, - "Got the following (different) text after formatting the fuzzed query and parsing it back:\n'{}'\n, expected:\n'{}'\n", - text_3, text_2); + fmt::print( + stderr, + "Got the following (different) text after formatting the fuzzed query and parsing it back:\n'{}'\n, " + "expected:\n'{}'\n", + text_3, + text_2); fmt::print(stderr, "In more detail:\n"); fmt::print(stderr, "AST-1 (generated by fuzzer):\n'{}'\n", parsed_query->dumpTree()); fmt::print(stderr, "Text-1 (AST-1 formatted):\n'{}'\n", query_to_execute); @@ -965,12 +991,13 @@ void Client::printHelpMessage(const OptionsDescription & options_description) void Client::addOptions(OptionsDescription & options_description) { /// Main commandline options related to client functionality and all parameters from Settings. - options_description.main_description->add_options() - ("config,c", po::value(), "config-file path (another shorthand)") - ("host,h", po::value>()->multitoken()->default_value({"localhost"}, "localhost"), "list of server hosts") - ("port", po::value()->default_value(9000), "server port") - ("secure,s", "Use TLS connection") - ("user,u", po::value()->default_value("default"), "user") + options_description.main_description->add_options()("config,c", po::value(), "config-file path (another shorthand)")( + "host,h", + po::value>()->multitoken()->default_value({{"localhost"}}, "localhost"), + "list of server hosts with optionally assigned port to connect. Every argument looks like '[:] for example" + "'localhost:port'. If port isn't assigned, connection is made by port from '--port' param")( + "port", po::value()->default_value(9000), "server default port")("secure,s", "Use TLS connection")( + "user,u", po::value()->default_value("default"), "user") /** If "--password [value]" is used but the value is omitted, the bad argument exception will be thrown. * implicit_value is used to avoid this exception (to allow user to type just "--password") * Since currently boost provides no way to check if a value has been set implicitly for an option, @@ -1017,9 +1044,10 @@ void Client::addOptions(OptionsDescription & options_description) } -void Client::processOptions(const OptionsDescription & options_description, - const CommandLineOptions & options, - const std::vector & external_tables_arguments) +void Client::processOptions( + const OptionsDescription & options_description, + const CommandLineOptions & options, + const std::vector & external_tables_arguments) { namespace po = boost::program_options; @@ -1027,8 +1055,8 @@ void Client::processOptions(const OptionsDescription & options_description, for (size_t i = 0; i < external_tables_arguments.size(); ++i) { /// Parse commandline options related to external tables. - po::parsed_options parsed_tables = po::command_line_parser(external_tables_arguments[i]).options( - options_description.external_description.value()).run(); + po::parsed_options parsed_tables + = po::command_line_parser(external_tables_arguments[i]).options(options_description.external_description.value()).run(); po::variables_map external_options; po::store(parsed_tables, external_options); @@ -1076,7 +1104,12 @@ void Client::processOptions(const OptionsDescription & options_description, if (options.count("config")) config().setString("config-file", options["config"].as()); if (options.count("host") && !options["host"].defaulted()) - hosts = options["host"].as>(); + { + hosts_ports = options["host"].as>(); + config().setString("host", hosts_ports[0].host); + if (hosts_ports[0].port.has_value()) + config().setInt("port", hosts_ports[0].port.value()); + } if (options.count("interleave-queries-file")) interleave_queries_files = options["interleave-queries-file"].as>(); if (options.count("port") && !options["port"].defaulted()) @@ -1167,7 +1200,8 @@ void Client::processConfig() if (global_context->getSettingsRef().max_insert_block_size.changed) insert_format_max_block_size = global_context->getSettingsRef().max_insert_block_size; else - insert_format_max_block_size = config().getInt("insert_format_max_block_size", global_context->getSettingsRef().max_insert_block_size); + insert_format_max_block_size + = config().getInt("insert_format_max_block_size", global_context->getSettingsRef().max_insert_block_size); ClientInfo & client_info = global_context->getClientInfo(); client_info.setInitialQuery(); diff --git a/programs/client/Client.h b/programs/client/Client.h index 80d2fa7a277..ceebb64896d 100644 --- a/programs/client/Client.h +++ b/programs/client/Client.h @@ -1,11 +1,11 @@ #pragma once #include - +#include +#include namespace DB { - class Client : public ClientBase { public: @@ -29,7 +29,6 @@ protected: const std::vector & external_tables_arguments) override; void processConfig() override; - std::vector hosts{}; private: void printChangedSettings() const; std::vector loadWarningMessages(); diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 54108bad478..13695448be2 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -1869,7 +1869,7 @@ void ClientBase::init(int argc, char ** argv) /// Output of help message. if (options.count("help") - || (options.count("host") && options["host"].as>()[0] == "elp")) /// If user writes -help instead of --help. + || (options.count("host") && options["host"].as>()[0].host == "elp")) /// If user writes -help instead of --help. { printHelpMessage(options_description); exit(0); diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index 1926df5afea..9e3e03d4820 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -235,6 +235,24 @@ protected: } profile_events; QueryProcessingStage::Enum query_processing_stage; + + struct HostPort + { + String host; + std::optional port{}; + friend std::istream & operator>>(std::istream & in, HostPort & hostPort) { + String + host_with_port, + delimiter = ":"; + in >> host_with_port; + size_t delimiter_pos = host_with_port.find(delimiter); + hostPort.host = host_with_port.substr(0, delimiter_pos); + if (delimiter_pos < host_with_port.length()) + hostPort.port = std::stoi(host_with_port.substr(delimiter_pos + 1, host_with_port.length())); + return in; + } + }; + std::vector hosts_ports{}; }; } From 0d7e8536146e0618c728772dc787f428a0a76648 Mon Sep 17 00:00:00 2001 From: DF5HSE Date: Mon, 8 Nov 2021 19:22:17 +0300 Subject: [PATCH 003/215] Delete extra includes --- programs/client/Client.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/programs/client/Client.h b/programs/client/Client.h index ceebb64896d..99560d6d950 100644 --- a/programs/client/Client.h +++ b/programs/client/Client.h @@ -1,8 +1,6 @@ #pragma once #include -#include -#include namespace DB { From a4a46be6977544b1435a26e5c3dc5dfdd068de44 Mon Sep 17 00:00:00 2001 From: DF5HSE Date: Mon, 8 Nov 2021 19:34:05 +0300 Subject: [PATCH 004/215] Revert changes in Client.h --- programs/client/Client.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/programs/client/Client.h b/programs/client/Client.h index 99560d6d950..2def74ef3fc 100644 --- a/programs/client/Client.h +++ b/programs/client/Client.h @@ -2,8 +2,10 @@ #include + namespace DB { + class Client : public ClientBase { public: From aa328fa4d85978692db75dbedb2fcf3e11bb0143 Mon Sep 17 00:00:00 2001 From: DF5HSE Date: Mon, 8 Nov 2021 21:42:07 +0300 Subject: [PATCH 005/215] Fix formatting --- programs/client/Client.cpp | 157 +++++++++++++++++-------------------- 1 file changed, 71 insertions(+), 86 deletions(-) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 1b56c15f680..998d70c12f8 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -1,20 +1,20 @@ -#include "Client.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include #include #include -#include +#include +#include +#include +#include +#include +#include +#include #include -#include #include +#include #include +#include +#include +#include "Client.h" #include "Core/Protocol.h" #include @@ -25,16 +25,17 @@ #include #include +#include #include #include #include -#include #include #include -#include -#include #include +#include +#include +#include #include #include @@ -44,13 +45,13 @@ #include +#include #include #include -#include #include "TestTags.h" #ifndef __clang__ -# pragma GCC optimize("-fno-var-tracking-assignments") +#pragma GCC optimize("-fno-var-tracking-assignments") #endif namespace fs = std::filesystem; @@ -73,18 +74,15 @@ void Client::processError(const String & query) const { if (server_exception) { -<<<<<<< HEAD fmt::print(stderr, "Received exception from server (version {}):\n{}\n", server_version, getExceptionMessage(*server_exception, print_stack_trace, true)); -======= bool print_stack_trace = config().getBool("stacktrace", false); fmt::print( stderr, "Received exception from server (version {}):\n{}\n", server_version, getExceptionMessage(*server_exception, print_stack_trace, true)); ->>>>>>> Add list of ':[]' reading in --host param if (is_interactive) { fmt::print(stderr, "\n"); @@ -151,18 +149,21 @@ bool Client::executeMultiQuery(const String & all_queries_text) while (true) { - auto stage = analyzeMultiQueryText( - this_query_begin, this_query_end, all_queries_end, query_to_execute, parsed_query, all_queries_text, current_exception); + auto stage = analyzeMultiQueryText(this_query_begin, this_query_end, all_queries_end, + query_to_execute, parsed_query, all_queries_text, current_exception); switch (stage) { case MultiQueryProcessingStage::QUERIES_END: - case MultiQueryProcessingStage::PARSING_FAILED: { + case MultiQueryProcessingStage::PARSING_FAILED: + { return true; } - case MultiQueryProcessingStage::CONTINUE_PARSING: { + case MultiQueryProcessingStage::CONTINUE_PARSING: + { continue; } - case MultiQueryProcessingStage::PARSING_EXCEPTION: { + case MultiQueryProcessingStage::PARSING_EXCEPTION: + { this_query_end = find_first_symbols<'\n'>(this_query_end, all_queries_end); // Try to find test hint for syntax error. We don't know where @@ -189,7 +190,8 @@ bool Client::executeMultiQuery(const String & all_queries_text) continue; } - case MultiQueryProcessingStage::EXECUTE_QUERY: { + case MultiQueryProcessingStage::EXECUTE_QUERY: + { full_query = all_queries_text.substr(this_query_begin - all_queries_text.data(), this_query_end - this_query_begin); if (query_fuzzer_runs) { @@ -229,21 +231,14 @@ bool Client::executeMultiQuery(const String & all_queries_text) if (!server_exception) { error_matches_hint = false; - fmt::print( - stderr, - "Expected server error code '{}' but got no server error (query: {}).\n", - test_hint.serverError(), - full_query); + fmt::print(stderr, "Expected server error code '{}' but got no server error (query: {}).\n", + test_hint.serverError(), full_query); } else if (server_exception->code() != test_hint.serverError()) { error_matches_hint = false; - fmt::print( - stderr, - "Expected server error code: {} but got: {} (query: {}).\n", - test_hint.serverError(), - server_exception->code(), - full_query); + fmt::print(stderr, "Expected server error code: {} but got: {} (query: {}).\n", + test_hint.serverError(), server_exception->code(), full_query); } } if (test_hint.clientError()) @@ -251,21 +246,14 @@ bool Client::executeMultiQuery(const String & all_queries_text) if (!client_exception) { error_matches_hint = false; - fmt::print( - stderr, - "Expected client error code '{}' but got no client error (query: {}).\n", - test_hint.clientError(), - full_query); + fmt::print(stderr, "Expected client error code '{}' but got no client error (query: {}).\n", + test_hint.clientError(), full_query); } else if (client_exception->code() != test_hint.clientError()) { error_matches_hint = false; - fmt::print( - stderr, - "Expected client error code '{}' but got '{}' (query: {}).\n", - test_hint.clientError(), - client_exception->code(), - full_query); + fmt::print(stderr, "Expected client error code '{}' but got '{}' (query: {}).\n", + test_hint.clientError(), client_exception->code(), full_query); } } if (!test_hint.clientError() && !test_hint.serverError()) @@ -280,20 +268,14 @@ bool Client::executeMultiQuery(const String & all_queries_text) { if (test_hint.clientError()) { - fmt::print( - stderr, - "The query succeeded but the client error '{}' was expected (query: {}).\n", - test_hint.clientError(), - full_query); + fmt::print(stderr, "The query succeeded but the client error '{}' was expected (query: {}).\n", + test_hint.clientError(), full_query); error_matches_hint = false; } if (test_hint.serverError()) { - fmt::print( - stderr, - "The query succeeded but the server error '{}' was expected (query: {}).\n", - test_hint.serverError(), - full_query); + fmt::print(stderr, "The query succeeded but the server error '{}' was expected (query: {}).\n", + test_hint.serverError(), full_query); error_matches_hint = false; } } @@ -383,8 +365,8 @@ std::vector Client::loadWarningMessages() continue; default: - throw Exception( - ErrorCodes::UNKNOWN_PACKET_FROM_SERVER, "Unknown packet {} from server {}", packet.type, connection->getDescription()); + throw Exception(ErrorCodes::UNKNOWN_PACKET_FROM_SERVER, "Unknown packet {} from server {}", + packet.type, connection->getDescription()); } } } @@ -514,7 +496,8 @@ void Client::connect() if (is_interactive) std::cout << "Connecting to " - << (!connection_parameters.default_database.empty() ? "database " + connection_parameters.default_database + " at " : "") + << (!connection_parameters.default_database.empty() ? "database " + connection_parameters.default_database + " at " + : "") << connection_parameters.host << ":" << connection_parameters.port << (!connection_parameters.user.empty() ? " as user " + connection_parameters.user : "") << "." << std::endl; @@ -544,8 +527,7 @@ void Client::connect() { std::cerr << std::endl << "If you have installed ClickHouse and forgot password you can reset it in the configuration file." << std::endl - << "The password for default user is typically located at /etc/clickhouse-server/users.d/default-password.xml" - << std::endl + << "The password for default user is typically located at /etc/clickhouse-server/users.d/default-password.xml" << std::endl << "and deleting this file will reset the password." << std::endl << "See also /etc/clickhouse-server/users.xml on the server where ClickHouse is installed." << std::endl << std::endl; @@ -555,8 +537,7 @@ void Client::connect() } server_version = toString(server_version_major) + "." + toString(server_version_minor) + "." + toString(server_version_patch); - load_suggestions - = is_interactive && (server_revision >= Suggest::MIN_SERVER_REVISION && !config().getBool("disable_suggestion", false)); + load_suggestions = is_interactive && (server_revision >= Suggest::MIN_SERVER_REVISION && !config().getBool("disable_suggestion", false)); if (server_display_name = connection->getServerDisplayName(connection_parameters.timeouts); server_display_name.empty()) server_display_name = config().getString("host", "localhost"); @@ -706,7 +687,8 @@ bool Client::processWithFuzzing(const String & full_query) } catch (const Exception & e) { - if (e.code() != ErrorCodes::SYNTAX_ERROR && e.code() != ErrorCodes::TOO_DEEP_RECURSION) + if (e.code() != ErrorCodes::SYNTAX_ERROR && + e.code() != ErrorCodes::TOO_DEEP_RECURSION) throw; } @@ -736,7 +718,10 @@ bool Client::processWithFuzzing(const String & full_query) // - SET -- The time to fuzz the settings has not yet come // (see comments in Client/QueryFuzzer.cpp) size_t this_query_runs = query_fuzzer_runs; - if (orig_ast->as() || orig_ast->as() || orig_ast->as() || orig_ast->as()) + if (orig_ast->as() || + orig_ast->as() || + orig_ast->as() || + orig_ast->as()) { this_query_runs = 1; } @@ -793,9 +778,7 @@ bool Client::processWithFuzzing(const String & full_query) fmt::print( stderr, - "Found error: IAST::clone() is broken for some AST node. This is a bug. The original AST ('dump before fuzz') and its " - "cloned copy ('dump of cloned AST') refer to the same nodes, which must never happen. This means that their parent " - "node doesn't implement clone() correctly."); + "Found error: IAST::clone() is broken for some AST node. This is a bug. The original AST ('dump before fuzz') and its cloned copy ('dump of cloned AST') refer to the same nodes, which must never happen. This means that their parent node doesn't implement clone() correctly."); exit(1); } @@ -850,7 +833,9 @@ bool Client::processWithFuzzing(const String & full_query) catch (...) { // Just report it, we'll terminate below. - fmt::print(stderr, "Error while reconnecting to the server: {}\n", getCurrentExceptionMessage(true)); + fmt::print(stderr, + "Error while reconnecting to the server: {}\n", + getCurrentExceptionMessage(true)); // The reconnection might fail, but we'll still be connected // in the sense of `connection->isConnected() = true`, @@ -915,7 +900,8 @@ bool Client::processWithFuzzing(const String & full_query) } catch (Exception & e) { - if (e.code() != ErrorCodes::SYNTAX_ERROR && e.code() != ErrorCodes::TOO_DEEP_RECURSION) + if (e.code() != ErrorCodes::SYNTAX_ERROR && + e.code() != ErrorCodes::TOO_DEEP_RECURSION) throw; } @@ -923,7 +909,8 @@ bool Client::processWithFuzzing(const String & full_query) { const auto text_2 = ast_2->formatForErrorMessage(); const auto * tmp_pos = text_2.c_str(); - const auto ast_3 = parseQuery(tmp_pos, tmp_pos + text_2.size(), false /* allow_multi_statements */); + const auto ast_3 = parseQuery(tmp_pos, tmp_pos + text_2.size(), + false /* allow_multi_statements */); const auto text_3 = ast_3->formatForErrorMessage(); if (text_3 != text_2) { @@ -931,12 +918,9 @@ bool Client::processWithFuzzing(const String & full_query) printChangedSettings(); - fmt::print( - stderr, - "Got the following (different) text after formatting the fuzzed query and parsing it back:\n'{}'\n, " - "expected:\n'{}'\n", - text_3, - text_2); + fmt::print(stderr, + "Got the following (different) text after formatting the fuzzed query and parsing it back:\n'{}'\n, expected:\n'{}'\n", + text_3, text_2); fmt::print(stderr, "In more detail:\n"); fmt::print(stderr, "AST-1 (generated by fuzzer):\n'{}'\n", parsed_query->dumpTree()); fmt::print(stderr, "Text-1 (AST-1 formatted):\n'{}'\n", query_to_execute); @@ -991,13 +975,14 @@ void Client::printHelpMessage(const OptionsDescription & options_description) void Client::addOptions(OptionsDescription & options_description) { /// Main commandline options related to client functionality and all parameters from Settings. - options_description.main_description->add_options()("config,c", po::value(), "config-file path (another shorthand)")( - "host,h", - po::value>()->multitoken()->default_value({{"localhost"}}, "localhost"), - "list of server hosts with optionally assigned port to connect. Every argument looks like '[:] for example" - "'localhost:port'. If port isn't assigned, connection is made by port from '--port' param")( - "port", po::value()->default_value(9000), "server default port")("secure,s", "Use TLS connection")( - "user,u", po::value()->default_value("default"), "user") + options_description.main_description->add_options() + ("config,c", po::value(), "config-file path (another shorthand)") + ("host,h", po::value>()->multitoken()->default_value({{"localhost"}}, "localhost"), + "list of server hosts with optionally assigned port to connect. Every argument looks like '[:] for example" + "'localhost:port'. If port isn't assigned, connection is made by port from '--port' param") + ("port", po::value()->default_value(9000), "server port") + ("secure,s", "Use TLS connection") + ("user,u", po::value()->default_value("default"), "user") /** If "--password [value]" is used but the value is omitted, the bad argument exception will be thrown. * implicit_value is used to avoid this exception (to allow user to type just "--password") * Since currently boost provides no way to check if a value has been set implicitly for an option, From 76769ac1338618340e2b0743bd22e3ea534fed08 Mon Sep 17 00:00:00 2001 From: DF5HSE Date: Mon, 8 Nov 2021 21:50:06 +0300 Subject: [PATCH 006/215] Fix formatting --- programs/client/Client.cpp | 46 +++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 998d70c12f8..f425ba2c159 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -366,7 +366,7 @@ std::vector Client::loadWarningMessages() default: throw Exception(ErrorCodes::UNKNOWN_PACKET_FROM_SERVER, "Unknown packet {} from server {}", - packet.type, connection->getDescription()); + packet.type, connection->getDescription()); } } } @@ -496,10 +496,10 @@ void Client::connect() if (is_interactive) std::cout << "Connecting to " - << (!connection_parameters.default_database.empty() ? "database " + connection_parameters.default_database + " at " + << (!connection_parameters.default_database.empty() ? "database " + connection_parameters.default_database + " at " : "") - << connection_parameters.host << ":" << connection_parameters.port - << (!connection_parameters.user.empty() ? " as user " + connection_parameters.user : "") << "." << std::endl; + << connection_parameters.host << ":" << connection_parameters.port + << (!connection_parameters.user.empty() ? " as user " + connection_parameters.user : "") << "." << std::endl; String server_name; UInt64 server_version_major = 0; @@ -526,11 +526,11 @@ void Client::connect() && e.code() == DB::ErrorCodes::AUTHENTICATION_FAILED) { std::cerr << std::endl - << "If you have installed ClickHouse and forgot password you can reset it in the configuration file." << std::endl - << "The password for default user is typically located at /etc/clickhouse-server/users.d/default-password.xml" << std::endl - << "and deleting this file will reset the password." << std::endl - << "See also /etc/clickhouse-server/users.xml on the server where ClickHouse is installed." << std::endl - << std::endl; + << "If you have installed ClickHouse and forgot password you can reset it in the configuration file." << std::endl + << "The password for default user is typically located at /etc/clickhouse-server/users.d/default-password.xml" << std::endl + << "and deleting this file will reset the password." << std::endl + << "See also /etc/clickhouse-server/users.xml on the server where ClickHouse is installed." << std::endl + << std::endl; } throw; @@ -553,14 +553,14 @@ void Client::connect() if (client_version_tuple < server_version_tuple) { std::cout << "ClickHouse client version is older than ClickHouse server. " - << "It may lack support for new features." << std::endl - << std::endl; + << "It may lack support for new features." << std::endl + << std::endl; } else if (client_version_tuple > server_version_tuple) { std::cout << "ClickHouse server version is older than ClickHouse client. " - << "It may indicate that the server is out of date and can be upgraded." << std::endl - << std::endl; + << "It may indicate that the server is out of date and can be upgraded." << std::endl + << std::endl; } } @@ -576,16 +576,16 @@ void Client::connect() catch (...) { std::cerr << "Warning: could not switch to server time zone: " << time_zone - << ", reason: " << getCurrentExceptionMessage(/* with_stacktrace = */ false) << std::endl - << "Proceeding with local time zone." << std::endl - << std::endl; + << ", reason: " << getCurrentExceptionMessage(/* with_stacktrace = */ false) << std::endl + << "Proceeding with local time zone." << std::endl + << std::endl; } } else { std::cerr << "Warning: could not determine server time zone. " - << "Proceeding with local time zone." << std::endl - << std::endl; + << "Proceeding with local time zone." << std::endl + << std::endl; } } @@ -834,8 +834,8 @@ bool Client::processWithFuzzing(const String & full_query) { // Just report it, we'll terminate below. fmt::print(stderr, - "Error while reconnecting to the server: {}\n", - getCurrentExceptionMessage(true)); + "Error while reconnecting to the server: {}\n", + getCurrentExceptionMessage(true)); // The reconnection might fail, but we'll still be connected // in the sense of `connection->isConnected() = true`, @@ -910,7 +910,7 @@ bool Client::processWithFuzzing(const String & full_query) const auto text_2 = ast_2->formatForErrorMessage(); const auto * tmp_pos = text_2.c_str(); const auto ast_3 = parseQuery(tmp_pos, tmp_pos + text_2.size(), - false /* allow_multi_statements */); + false /* allow_multi_statements */); const auto text_3 = ast_3->formatForErrorMessage(); if (text_3 != text_2) { @@ -919,8 +919,8 @@ bool Client::processWithFuzzing(const String & full_query) printChangedSettings(); fmt::print(stderr, - "Got the following (different) text after formatting the fuzzed query and parsing it back:\n'{}'\n, expected:\n'{}'\n", - text_3, text_2); + "Got the following (different) text after formatting the fuzzed query and parsing it back:\n'{}'\n, expected:\n'{}'\n", + text_3, text_2); fmt::print(stderr, "In more detail:\n"); fmt::print(stderr, "AST-1 (generated by fuzzer):\n'{}'\n", parsed_query->dumpTree()); fmt::print(stderr, "Text-1 (AST-1 formatted):\n'{}'\n", query_to_execute); From c126afa93b62c353627b988e0be04b922e8efd9a Mon Sep 17 00:00:00 2001 From: DF5HSE Date: Mon, 8 Nov 2021 21:52:45 +0300 Subject: [PATCH 007/215] Fix formatting --- programs/client/Client.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index f425ba2c159..6285dd77b83 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -497,7 +497,7 @@ void Client::connect() if (is_interactive) std::cout << "Connecting to " << (!connection_parameters.default_database.empty() ? "database " + connection_parameters.default_database + " at " - : "") + : "") << connection_parameters.host << ":" << connection_parameters.port << (!connection_parameters.user.empty() ? " as user " + connection_parameters.user : "") << "." << std::endl; @@ -919,8 +919,8 @@ bool Client::processWithFuzzing(const String & full_query) printChangedSettings(); fmt::print(stderr, - "Got the following (different) text after formatting the fuzzed query and parsing it back:\n'{}'\n, expected:\n'{}'\n", - text_3, text_2); + "Got the following (different) text after formatting the fuzzed query and parsing it back:\n'{}'\n, expected:\n'{}'\n", + text_3, text_2); fmt::print(stderr, "In more detail:\n"); fmt::print(stderr, "AST-1 (generated by fuzzer):\n'{}'\n", parsed_query->dumpTree()); fmt::print(stderr, "Text-1 (AST-1 formatted):\n'{}'\n", query_to_execute); From fb2a45009a549400ec065bf7806a49dec4e77075 Mon Sep 17 00:00:00 2001 From: DF5HSE Date: Tue, 9 Nov 2021 00:43:34 +0300 Subject: [PATCH 008/215] Fix formatting --- programs/client/Client.cpp | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 6285dd77b83..d630f2106cb 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -1029,10 +1029,9 @@ void Client::addOptions(OptionsDescription & options_description) } -void Client::processOptions( - const OptionsDescription & options_description, - const CommandLineOptions & options, - const std::vector & external_tables_arguments) +void Client::processOptions(const OptionsDescription & options_description, + const CommandLineOptions & options, + const std::vector & external_tables_arguments) { namespace po = boost::program_options; @@ -1040,8 +1039,8 @@ void Client::processOptions( for (size_t i = 0; i < external_tables_arguments.size(); ++i) { /// Parse commandline options related to external tables. - po::parsed_options parsed_tables - = po::command_line_parser(external_tables_arguments[i]).options(options_description.external_description.value()).run(); + po::parsed_options parsed_tables = po::command_line_parser(external_tables_arguments[i]).options( + options_description.external_description.value()).run(); po::variables_map external_options; po::store(parsed_tables, external_options); @@ -1185,8 +1184,7 @@ void Client::processConfig() if (global_context->getSettingsRef().max_insert_block_size.changed) insert_format_max_block_size = global_context->getSettingsRef().max_insert_block_size; else - insert_format_max_block_size - = config().getInt("insert_format_max_block_size", global_context->getSettingsRef().max_insert_block_size); + insert_format_max_block_size = config().getInt("insert_format_max_block_size", global_context->getSettingsRef().max_insert_block_size); ClientInfo & client_info = global_context->getClientInfo(); client_info.setInitialQuery(); From ac1a78f4d9a4382ac52a01ed0efc65b8f08f02e7 Mon Sep 17 00:00:00 2001 From: liyang830 Date: Mon, 24 Jan 2022 11:00:56 +0800 Subject: [PATCH 009/215] fix substr local metadata differ zookeeper metadata --- src/Common/IFactoryWithAliases.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/Common/IFactoryWithAliases.h b/src/Common/IFactoryWithAliases.h index f7da302a942..7f5b53a80fa 100644 --- a/src/Common/IFactoryWithAliases.h +++ b/src/Common/IFactoryWithAliases.h @@ -120,8 +120,12 @@ public: const String & getCanonicalNameIfAny(const String & name) const { auto it = case_insensitive_name_mapping.find(Poco::toLower(name)); - if (it != case_insensitive_name_mapping.end()) - return it->second; + if (it != case_insensitive_name_mapping.end()) { + if (it->first != name) + { + return it->second; + } + } return name; } From 58dd1a2d5c029d2cea4eb0d5544ca0f1063789b3 Mon Sep 17 00:00:00 2001 From: cmsxbc Date: Sat, 8 Jan 2022 20:21:39 +0800 Subject: [PATCH 010/215] add function addressToLineWithInlines --- .../sampling-query-profiler.md | 2 +- .../operations/system-tables/stack_trace.md | 2 +- docs/en/operations/system-tables/trace_log.md | 2 +- .../sql-reference/functions/introspection.md | 105 ++++++++++ docs/en/sql-reference/statements/grant.md | 2 + src/Access/Common/AccessType.h | 1 + src/Functions/addressToLineWithInlines.cpp | 192 ++++++++++++++++++ .../registerFunctionsIntrospection.cpp | 2 + .../01271_show_privileges.reference | 1 + .../02117_show_create_table_system.reference | 4 +- .../02161_addressToLineWithInlines.reference | 4 + .../02161_addressToLineWithInlines.sh | 109 ++++++++++ 12 files changed, 421 insertions(+), 5 deletions(-) create mode 100644 src/Functions/addressToLineWithInlines.cpp create mode 100644 tests/queries/0_stateless/02161_addressToLineWithInlines.reference create mode 100755 tests/queries/0_stateless/02161_addressToLineWithInlines.sh diff --git a/docs/en/operations/optimizing-performance/sampling-query-profiler.md b/docs/en/operations/optimizing-performance/sampling-query-profiler.md index 9244592d515..72cfa59b8b2 100644 --- a/docs/en/operations/optimizing-performance/sampling-query-profiler.md +++ b/docs/en/operations/optimizing-performance/sampling-query-profiler.md @@ -27,7 +27,7 @@ To analyze the `trace_log` system table: For security reasons, introspection functions are disabled by default. -- Use the `addressToLine`, `addressToSymbol` and `demangle` [introspection functions](../../sql-reference/functions/introspection.md) to get function names and their positions in ClickHouse code. To get a profile for some query, you need to aggregate data from the `trace_log` table. You can aggregate data by individual functions or by the whole stack traces. +- Use the `addressToLine`, `addressToLineWithInlines`, `addressToSymbol` and `demangle` [introspection functions](../../sql-reference/functions/introspection.md) to get function names and their positions in ClickHouse code. To get a profile for some query, you need to aggregate data from the `trace_log` table. You can aggregate data by individual functions or by the whole stack traces. If you need to visualize `trace_log` info, try [flamegraph](../../interfaces/third-party/gui/#clickhouse-flamegraph) and [speedscope](https://github.com/laplab/clickhouse-speedscope). diff --git a/docs/en/operations/system-tables/stack_trace.md b/docs/en/operations/system-tables/stack_trace.md index eb1824a6f66..e2135e4beb6 100644 --- a/docs/en/operations/system-tables/stack_trace.md +++ b/docs/en/operations/system-tables/stack_trace.md @@ -2,7 +2,7 @@ Contains stack traces of all server threads. Allows developers to introspect the server state. -To analyze stack frames, use the `addressToLine`, `addressToSymbol` and `demangle` [introspection functions](../../sql-reference/functions/introspection.md). +To analyze stack frames, use the `addressToLine`, `addressToLineWithInlines`, `addressToSymbol` and `demangle` [introspection functions](../../sql-reference/functions/introspection.md). Columns: diff --git a/docs/en/operations/system-tables/trace_log.md b/docs/en/operations/system-tables/trace_log.md index 4902b09004d..ab08ef7415c 100644 --- a/docs/en/operations/system-tables/trace_log.md +++ b/docs/en/operations/system-tables/trace_log.md @@ -4,7 +4,7 @@ Contains stack traces collected by the sampling query profiler. ClickHouse creates this table when the [trace_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-trace_log) server configuration section is set. Also the [query_profiler_real_time_period_ns](../../operations/settings/settings.md#query_profiler_real_time_period_ns) and [query_profiler_cpu_time_period_ns](../../operations/settings/settings.md#query_profiler_cpu_time_period_ns) settings should be set. -To analyze logs, use the `addressToLine`, `addressToSymbol` and `demangle` introspection functions. +To analyze logs, use the `addressToLine`, `addressToLineWithInlines`, `addressToSymbol` and `demangle` introspection functions. Columns: diff --git a/docs/en/sql-reference/functions/introspection.md b/docs/en/sql-reference/functions/introspection.md index 21b570c65d4..595d3c4a16a 100644 --- a/docs/en/sql-reference/functions/introspection.md +++ b/docs/en/sql-reference/functions/introspection.md @@ -113,6 +113,111 @@ trace_source_code_lines: /lib/x86_64-linux-gnu/libpthread-2.27.so /build/glibc-OTsEL5/glibc-2.27/misc/../sysdeps/unix/sysv/linux/x86_64/clone.S:97 ``` +## addressToLineWithInlines {#addresstolinewithinlines} + +Similar to `addressToLine`, but it will return an Array with all inline functions. + +If you use official ClickHouse packages, you need to install the `clickhouse-common-static-dbg` package. + +**Syntax** + +``` sql +addressToLineWithInlines(address_of_binary_instruction) +``` + +**Arguments** + +- `address_of_binary_instruction` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Address of instruction in a running process. + +**Returned value** + +- Array which first element is source code filename and the line number in this file delimited by colon. And from second element, inline functions' source code filename and line number and function name are listed. + +- Array with single element which is name of a binary, if the function couldn’t find the debug information. + +- Empty array, if the address is not valid. + +Type: [Array(String)](../../sql-reference/data-types/array.md). + +**Example** + +Enabling introspection functions: + +``` sql +SET allow_introspection_functions=1; +``` + +Applying the function to address. + +```sql +SELECT addressToLineWithInlines(531055181::UInt64); +``` + +``` text +┌─addressToLineWithInlines(CAST('531055181', 'UInt64'))────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ ['./src/Functions/addressToLineWithInlines.cpp:98','./build_normal_debug/./src/Functions/addressToLineWithInlines.cpp:176:DB::(anonymous namespace)::FunctionAddressToLineWithInlines::implCached(unsigned long) const'] │ +└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +``` + +Applying the function to the whole stack trace: + +``` sql +SELECT + ta, addressToLineWithInlines(arrayJoin(trace) as ta) +FROM system.trace_log +WHERE + query_id = '5e173544-2020-45de-b645-5deebe2aae54'; +``` + +The [arrayJoin](../../sql-reference/functions/array-functions.md#array-functions-join) functions will split array to rows. + +``` text +┌────────ta─┬─addressToLineWithInlines(arrayJoin(trace))───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ 365497529 │ ['./build_normal_debug/./contrib/libcxx/include/string_view:252'] │ +│ 365593602 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:191'] │ +│ 365593866 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:0'] │ +│ 365592528 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:0'] │ +│ 365591003 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:477'] │ +│ 365590479 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:442'] │ +│ 365590600 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:457'] │ +│ 365598941 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:0'] │ +│ 365607098 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:0'] │ +│ 365590571 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:451'] │ +│ 365598941 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:0'] │ +│ 365607098 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:0'] │ +│ 365590571 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:451'] │ +│ 365598941 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:0'] │ +│ 365607098 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:0'] │ +│ 365590571 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:451'] │ +│ 365598941 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:0'] │ +│ 365597289 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:807'] │ +│ 365599840 │ ['./build_normal_debug/./src/Common/Dwarf.cpp:1118'] │ +│ 531058145 │ ['./build_normal_debug/./src/Functions/addressToLineWithInlines.cpp:152'] │ +│ 531055181 │ ['./src/Functions/addressToLineWithInlines.cpp:98','./build_normal_debug/./src/Functions/addressToLineWithInlines.cpp:176:DB::(anonymous namespace)::FunctionAddressToLineWithInlines::implCached(unsigned long) const'] │ +│ 422333613 │ ['./build_normal_debug/./src/Functions/IFunctionAdaptors.h:21'] │ +│ 586866022 │ ['./build_normal_debug/./src/Functions/IFunction.cpp:216'] │ +│ 586869053 │ ['./build_normal_debug/./src/Functions/IFunction.cpp:264'] │ +│ 586873237 │ ['./build_normal_debug/./src/Functions/IFunction.cpp:334'] │ +│ 597901620 │ ['./build_normal_debug/./src/Interpreters/ExpressionActions.cpp:601'] │ +│ 597898534 │ ['./build_normal_debug/./src/Interpreters/ExpressionActions.cpp:718'] │ +│ 630442912 │ ['./build_normal_debug/./src/Processors/Transforms/ExpressionTransform.cpp:23'] │ +│ 546354050 │ ['./build_normal_debug/./src/Processors/ISimpleTransform.h:38'] │ +│ 626026993 │ ['./build_normal_debug/./src/Processors/ISimpleTransform.cpp:89'] │ +│ 626294022 │ ['./build_normal_debug/./src/Processors/Executors/ExecutionThreadContext.cpp:45'] │ +│ 626293730 │ ['./build_normal_debug/./src/Processors/Executors/ExecutionThreadContext.cpp:63'] │ +│ 626169525 │ ['./build_normal_debug/./src/Processors/Executors/PipelineExecutor.cpp:213'] │ +│ 626170308 │ ['./build_normal_debug/./src/Processors/Executors/PipelineExecutor.cpp:178'] │ +│ 626166348 │ ['./build_normal_debug/./src/Processors/Executors/PipelineExecutor.cpp:329'] │ +│ 626163461 │ ['./build_normal_debug/./src/Processors/Executors/PipelineExecutor.cpp:84'] │ +│ 626323536 │ ['./build_normal_debug/./src/Processors/Executors/PullingAsyncPipelineExecutor.cpp:85'] │ +│ 626323277 │ ['./build_normal_debug/./src/Processors/Executors/PullingAsyncPipelineExecutor.cpp:112'] │ +│ 626323133 │ ['./build_normal_debug/./contrib/libcxx/include/type_traits:3682'] │ +│ 626323041 │ ['./build_normal_debug/./contrib/libcxx/include/tuple:1415'] │ +└───────────┴──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ + +``` + + ## addressToSymbol {#addresstosymbol} Converts virtual memory address inside ClickHouse server process to the symbol from ClickHouse object files. diff --git a/docs/en/sql-reference/statements/grant.md b/docs/en/sql-reference/statements/grant.md index 2b1262f7d3c..1b2b63ba0e7 100644 --- a/docs/en/sql-reference/statements/grant.md +++ b/docs/en/sql-reference/statements/grant.md @@ -172,6 +172,7 @@ Hierarchy of privileges: - `SYSTEM FLUSH LOGS` - [INTROSPECTION](#grant-introspection) - `addressToLine` + - `addressToLineWithInlines` - `addressToSymbol` - `demangle` - [SOURCES](#grant-sources) @@ -430,6 +431,7 @@ Allows using [introspection](../../operations/optimizing-performance/sampling-qu - `INTROSPECTION`. Level: `GROUP`. Aliases: `INTROSPECTION FUNCTIONS` - `addressToLine`. Level: `GLOBAL` + - `addressToLineWithInlines`. Level: `GLOBAL` - `addressToSymbol`. Level: `GLOBAL` - `demangle`. Level: `GLOBAL` diff --git a/src/Access/Common/AccessType.h b/src/Access/Common/AccessType.h index 4472e975878..e7c70eba575 100644 --- a/src/Access/Common/AccessType.h +++ b/src/Access/Common/AccessType.h @@ -166,6 +166,7 @@ enum class AccessType M(dictGet, "dictHas, dictGetHierarchy, dictIsIn", DICTIONARY, ALL) /* allows to execute functions dictGet(), dictHas(), dictGetHierarchy(), dictIsIn() */\ \ M(addressToLine, "", GLOBAL, INTROSPECTION) /* allows to execute function addressToLine() */\ + M(addressToLineWithInlines, "", GLOBAL, INTROSPECTION) /* allows to execute function addressToLineWithInlines() */\ M(addressToSymbol, "", GLOBAL, INTROSPECTION) /* allows to execute function addressToSymbol() */\ M(demangle, "", GLOBAL, INTROSPECTION) /* allows to execute function demangle() */\ M(INTROSPECTION, "INTROSPECTION FUNCTIONS", GROUP, ALL) /* allows to execute functions addressToLine(), addressToSymbol(), demangle()*/\ diff --git a/src/Functions/addressToLineWithInlines.cpp b/src/Functions/addressToLineWithInlines.cpp new file mode 100644 index 00000000000..4a3027e399f --- /dev/null +++ b/src/Functions/addressToLineWithInlines.cpp @@ -0,0 +1,192 @@ +#if defined(__ELF__) && !defined(__FreeBSD__) + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_COLUMN; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +namespace +{ + + +class FunctionAddressToLineWithInlines : public IFunction +{ +public: + static constexpr auto name = "addressToLineWithInlines"; + static FunctionPtr create(ContextPtr context) + { + context->checkAccess(AccessType::addressToLineWithInlines); + return std::make_shared(); + } + + String getName() const override + { + return name; + } + + size_t getNumberOfArguments() const override + { + return 1; + } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + if (arguments.size() != 1) + throw Exception("Function " + getName() + " needs exactly one argument; passed " + + toString(arguments.size()) + ".", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + const auto & type = arguments[0].type; + + if (!WhichDataType(type.get()).isUInt64()) + throw Exception("The only argument for function " + getName() + " must be UInt64. Found " + + type->getName() + " instead.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + return std::make_shared(std::make_shared()); + } + + bool useDefaultImplementationForConstants() const override + { + return true; + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + const ColumnPtr & column = arguments[0].column; + const ColumnUInt64 * column_concrete = checkAndGetColumn(column.get()); + + if (!column_concrete) + throw Exception("Illegal column " + column->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); + + const typename ColumnVector::Container & data = column_concrete->getData(); + auto result_column = ColumnArray::create(ColumnString::create()); + + ColumnString & result_strings = typeid_cast(result_column->getData()); + ColumnArray::Offsets & result_offsets = result_column->getOffsets(); + + ColumnArray::Offset current_offset = 0; + + for (size_t i = 0; i < input_rows_count; ++i) + { + StringRefs res = implCached(data[i]); + for (auto & r : res) + result_strings.insertData(r.data, r.size); + current_offset += res.size(); + result_offsets.push_back(current_offset); + } + + return result_column; + } + +private: + struct Cache + { + std::mutex mutex; + Arena arena; + using Map = HashMap; + Map map; + std::unordered_map dwarfs; + }; + + mutable Cache cache; + + inline ALWAYS_INLINE void appendLocation2Result(StringRefs & result, Dwarf::LocationInfo & location, Dwarf::SymbolizedFrame * frame) const + { + const char * arena_begin = nullptr; + WriteBufferFromArena out(cache.arena, arena_begin); + + writeString(location.file.toString(), out); + writeChar(':', out); + writeIntText(location.line, out); + + if (frame) + { + writeChar(':', out); + int status = 0; + writeString(demangle(frame->name, status), out); + } + + result.emplace_back(out.complete()); + } + + StringRefs impl(uintptr_t addr) const + { + auto symbol_index_ptr = SymbolIndex::instance(); + const SymbolIndex & symbol_index = *symbol_index_ptr; + + if (const auto * object = symbol_index.findObject(reinterpret_cast(addr))) + { + auto dwarf_it = cache.dwarfs.try_emplace(object->name, object->elf).first; + if (!std::filesystem::exists(object->name)) + return {}; + + Dwarf::LocationInfo location; + std::vector inline_frames; + if (dwarf_it->second.findAddress(addr - uintptr_t(object->address_begin), location, Dwarf::LocationInfoMode::FULL_WITH_INLINE, inline_frames)) + { + StringRefs ret; + appendLocation2Result(ret, location, nullptr); + for (auto & inline_frame : inline_frames) + appendLocation2Result(ret, inline_frame.location, &inline_frame); + return ret; + } + else + { + return {object->name}; + } + } + else + return {}; + } + + /// ALWAYS_INLINE is also a self-containing testcase used in 0_stateless/02161_addressToLineWithInlines. + /// If changed here, change 02161 together. + inline ALWAYS_INLINE StringRefs implCached(uintptr_t addr) const + { + Cache::Map::LookupResult it; + bool inserted; + std::lock_guard lock(cache.mutex); + cache.map.emplace(addr, it, inserted); + if (inserted) + it->getMapped() = impl(addr); + return it->getMapped(); + } +}; + +} + +void registerFunctionAddressToLineWithInlines(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} + +#endif diff --git a/src/Functions/registerFunctionsIntrospection.cpp b/src/Functions/registerFunctionsIntrospection.cpp index fe76c96d62d..76a92847d8e 100644 --- a/src/Functions/registerFunctionsIntrospection.cpp +++ b/src/Functions/registerFunctionsIntrospection.cpp @@ -6,6 +6,7 @@ class FunctionFactory; #if defined(OS_LINUX) void registerFunctionAddressToSymbol(FunctionFactory & factory); void registerFunctionAddressToLine(FunctionFactory & factory); +void registerFunctionAddressToLineWithInlines(FunctionFactory & factory); #endif void registerFunctionDemangle(FunctionFactory & factory); @@ -17,6 +18,7 @@ void registerFunctionsIntrospection(FunctionFactory & factory) #if defined(OS_LINUX) registerFunctionAddressToSymbol(factory); registerFunctionAddressToLine(factory); + registerFunctionAddressToLineWithInlines(factory); #endif registerFunctionDemangle(factory); registerFunctionTrap(factory); diff --git a/tests/queries/0_stateless/01271_show_privileges.reference b/tests/queries/0_stateless/01271_show_privileges.reference index cc237a40a3f..ca7c6312130 100644 --- a/tests/queries/0_stateless/01271_show_privileges.reference +++ b/tests/queries/0_stateless/01271_show_privileges.reference @@ -118,6 +118,7 @@ SYSTEM THREAD FUZZER ['SYSTEM START THREAD FUZZER','SYSTEM STOP THREAD FUZZER',' SYSTEM [] \N ALL dictGet ['dictHas','dictGetHierarchy','dictIsIn'] DICTIONARY ALL addressToLine [] GLOBAL INTROSPECTION +addressToLineWithInlines [] GLOBAL INTROSPECTION addressToSymbol [] GLOBAL INTROSPECTION demangle [] GLOBAL INTROSPECTION INTROSPECTION ['INTROSPECTION FUNCTIONS'] \N ALL diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index 234804f1078..1b41e613f5c 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -20,7 +20,7 @@ CREATE TABLE system.errors\n(\n `name` String,\n `code` Int32,\n `value CREATE TABLE system.events\n(\n `event` String,\n `value` UInt64,\n `description` String\n)\nENGINE = SystemEvents()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.formats\n(\n `name` String,\n `is_input` UInt8,\n `is_output` UInt8\n)\nENGINE = SystemFormats()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.functions\n(\n `name` String,\n `is_aggregate` UInt8,\n `case_insensitive` UInt8,\n `alias_to` String,\n `create_query` String,\n `origin` Enum8(\'System\' = 0, \'SQLUserDefined\' = 1, \'ExecutableUserDefined\' = 2)\n)\nENGINE = SystemFunctions()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' -CREATE TABLE system.grants\n(\n `user_name` Nullable(String),\n `role_name` Nullable(String),\n `access_type` Enum8(\'POSTGRES\' = -128, \'SQLITE\' = -127, \'ODBC\' = -126, \'JDBC\' = -125, \'HDFS\' = -124, \'S3\' = -123, \'SOURCES\' = -122, \'ALL\' = -121, \'NONE\' = -120, \'SHOW DATABASES\' = 0, \'SHOW TABLES\' = 1, \'SHOW COLUMNS\' = 2, \'SHOW DICTIONARIES\' = 3, \'SHOW\' = 4, \'SELECT\' = 5, \'INSERT\' = 6, \'ALTER UPDATE\' = 7, \'ALTER DELETE\' = 8, \'ALTER ADD COLUMN\' = 9, \'ALTER MODIFY COLUMN\' = 10, \'ALTER DROP COLUMN\' = 11, \'ALTER COMMENT COLUMN\' = 12, \'ALTER CLEAR COLUMN\' = 13, \'ALTER RENAME COLUMN\' = 14, \'ALTER MATERIALIZE COLUMN\' = 15, \'ALTER COLUMN\' = 16, \'ALTER MODIFY COMMENT\' = 17, \'ALTER ORDER BY\' = 18, \'ALTER SAMPLE BY\' = 19, \'ALTER ADD INDEX\' = 20, \'ALTER DROP INDEX\' = 21, \'ALTER MATERIALIZE INDEX\' = 22, \'ALTER CLEAR INDEX\' = 23, \'ALTER INDEX\' = 24, \'ALTER ADD PROJECTION\' = 25, \'ALTER DROP PROJECTION\' = 26, \'ALTER MATERIALIZE PROJECTION\' = 27, \'ALTER CLEAR PROJECTION\' = 28, \'ALTER PROJECTION\' = 29, \'ALTER ADD CONSTRAINT\' = 30, \'ALTER DROP CONSTRAINT\' = 31, \'ALTER CONSTRAINT\' = 32, \'ALTER TTL\' = 33, \'ALTER MATERIALIZE TTL\' = 34, \'ALTER SETTINGS\' = 35, \'ALTER MOVE PARTITION\' = 36, \'ALTER FETCH PARTITION\' = 37, \'ALTER FREEZE PARTITION\' = 38, \'ALTER DATABASE SETTINGS\' = 39, \'ALTER TABLE\' = 40, \'ALTER DATABASE\' = 41, \'ALTER VIEW REFRESH\' = 42, \'ALTER VIEW MODIFY QUERY\' = 43, \'ALTER VIEW\' = 44, \'ALTER\' = 45, \'CREATE DATABASE\' = 46, \'CREATE TABLE\' = 47, \'CREATE VIEW\' = 48, \'CREATE DICTIONARY\' = 49, \'CREATE TEMPORARY TABLE\' = 50, \'CREATE FUNCTION\' = 51, \'CREATE\' = 52, \'DROP DATABASE\' = 53, \'DROP TABLE\' = 54, \'DROP VIEW\' = 55, \'DROP DICTIONARY\' = 56, \'DROP FUNCTION\' = 57, \'DROP\' = 58, \'TRUNCATE\' = 59, \'OPTIMIZE\' = 60, \'KILL QUERY\' = 61, \'MOVE PARTITION BETWEEN SHARDS\' = 62, \'CREATE USER\' = 63, \'ALTER USER\' = 64, \'DROP USER\' = 65, \'CREATE ROLE\' = 66, \'ALTER ROLE\' = 67, \'DROP ROLE\' = 68, \'ROLE ADMIN\' = 69, \'CREATE ROW POLICY\' = 70, \'ALTER ROW POLICY\' = 71, \'DROP ROW POLICY\' = 72, \'CREATE QUOTA\' = 73, \'ALTER QUOTA\' = 74, \'DROP QUOTA\' = 75, \'CREATE SETTINGS PROFILE\' = 76, \'ALTER SETTINGS PROFILE\' = 77, \'DROP SETTINGS PROFILE\' = 78, \'SHOW USERS\' = 79, \'SHOW ROLES\' = 80, \'SHOW ROW POLICIES\' = 81, \'SHOW QUOTAS\' = 82, \'SHOW SETTINGS PROFILES\' = 83, \'SHOW ACCESS\' = 84, \'ACCESS MANAGEMENT\' = 85, \'SYSTEM SHUTDOWN\' = 86, \'SYSTEM DROP DNS CACHE\' = 87, \'SYSTEM DROP MARK CACHE\' = 88, \'SYSTEM DROP UNCOMPRESSED CACHE\' = 89, \'SYSTEM DROP MMAP CACHE\' = 90, \'SYSTEM DROP COMPILED EXPRESSION CACHE\' = 91, \'SYSTEM DROP CACHE\' = 92, \'SYSTEM RELOAD CONFIG\' = 93, \'SYSTEM RELOAD SYMBOLS\' = 94, \'SYSTEM RELOAD DICTIONARY\' = 95, \'SYSTEM RELOAD MODEL\' = 96, \'SYSTEM RELOAD FUNCTION\' = 97, \'SYSTEM RELOAD EMBEDDED DICTIONARIES\' = 98, \'SYSTEM RELOAD\' = 99, \'SYSTEM RESTART DISK\' = 100, \'SYSTEM MERGES\' = 101, \'SYSTEM TTL MERGES\' = 102, \'SYSTEM FETCHES\' = 103, \'SYSTEM MOVES\' = 104, \'SYSTEM DISTRIBUTED SENDS\' = 105, \'SYSTEM REPLICATED SENDS\' = 106, \'SYSTEM SENDS\' = 107, \'SYSTEM REPLICATION QUEUES\' = 108, \'SYSTEM DROP REPLICA\' = 109, \'SYSTEM SYNC REPLICA\' = 110, \'SYSTEM RESTART REPLICA\' = 111, \'SYSTEM RESTORE REPLICA\' = 112, \'SYSTEM FLUSH DISTRIBUTED\' = 113, \'SYSTEM FLUSH LOGS\' = 114, \'SYSTEM FLUSH\' = 115, \'SYSTEM THREAD FUZZER\' = 116, \'SYSTEM\' = 117, \'dictGet\' = 118, \'addressToLine\' = 119, \'addressToSymbol\' = 120, \'demangle\' = 121, \'INTROSPECTION\' = 122, \'FILE\' = 123, \'URL\' = 124, \'REMOTE\' = 125, \'MONGO\' = 126, \'MYSQL\' = 127),\n `database` Nullable(String),\n `table` Nullable(String),\n `column` Nullable(String),\n `is_partial_revoke` UInt8,\n `grant_option` UInt8\n)\nENGINE = SystemGrants()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' +CREATE TABLE system.grants\n(\n `user_name` Nullable(String),\n `role_name` Nullable(String),\n `access_type` Enum8(\'MYSQL\' = -128, \'POSTGRES\' = -127, \'SQLITE\' = -126, \'ODBC\' = -125, \'JDBC\' = -124, \'HDFS\' = -123, \'S3\' = -122, \'SOURCES\' = -121, \'ALL\' = -120, \'NONE\' = -119, \'SHOW DATABASES\' = 0, \'SHOW TABLES\' = 1, \'SHOW COLUMNS\' = 2, \'SHOW DICTIONARIES\' = 3, \'SHOW\' = 4, \'SELECT\' = 5, \'INSERT\' = 6, \'ALTER UPDATE\' = 7, \'ALTER DELETE\' = 8, \'ALTER ADD COLUMN\' = 9, \'ALTER MODIFY COLUMN\' = 10, \'ALTER DROP COLUMN\' = 11, \'ALTER COMMENT COLUMN\' = 12, \'ALTER CLEAR COLUMN\' = 13, \'ALTER RENAME COLUMN\' = 14, \'ALTER MATERIALIZE COLUMN\' = 15, \'ALTER COLUMN\' = 16, \'ALTER MODIFY COMMENT\' = 17, \'ALTER ORDER BY\' = 18, \'ALTER SAMPLE BY\' = 19, \'ALTER ADD INDEX\' = 20, \'ALTER DROP INDEX\' = 21, \'ALTER MATERIALIZE INDEX\' = 22, \'ALTER CLEAR INDEX\' = 23, \'ALTER INDEX\' = 24, \'ALTER ADD PROJECTION\' = 25, \'ALTER DROP PROJECTION\' = 26, \'ALTER MATERIALIZE PROJECTION\' = 27, \'ALTER CLEAR PROJECTION\' = 28, \'ALTER PROJECTION\' = 29, \'ALTER ADD CONSTRAINT\' = 30, \'ALTER DROP CONSTRAINT\' = 31, \'ALTER CONSTRAINT\' = 32, \'ALTER TTL\' = 33, \'ALTER MATERIALIZE TTL\' = 34, \'ALTER SETTINGS\' = 35, \'ALTER MOVE PARTITION\' = 36, \'ALTER FETCH PARTITION\' = 37, \'ALTER FREEZE PARTITION\' = 38, \'ALTER DATABASE SETTINGS\' = 39, \'ALTER TABLE\' = 40, \'ALTER DATABASE\' = 41, \'ALTER VIEW REFRESH\' = 42, \'ALTER VIEW MODIFY QUERY\' = 43, \'ALTER VIEW\' = 44, \'ALTER\' = 45, \'CREATE DATABASE\' = 46, \'CREATE TABLE\' = 47, \'CREATE VIEW\' = 48, \'CREATE DICTIONARY\' = 49, \'CREATE TEMPORARY TABLE\' = 50, \'CREATE FUNCTION\' = 51, \'CREATE\' = 52, \'DROP DATABASE\' = 53, \'DROP TABLE\' = 54, \'DROP VIEW\' = 55, \'DROP DICTIONARY\' = 56, \'DROP FUNCTION\' = 57, \'DROP\' = 58, \'TRUNCATE\' = 59, \'OPTIMIZE\' = 60, \'KILL QUERY\' = 61, \'MOVE PARTITION BETWEEN SHARDS\' = 62, \'CREATE USER\' = 63, \'ALTER USER\' = 64, \'DROP USER\' = 65, \'CREATE ROLE\' = 66, \'ALTER ROLE\' = 67, \'DROP ROLE\' = 68, \'ROLE ADMIN\' = 69, \'CREATE ROW POLICY\' = 70, \'ALTER ROW POLICY\' = 71, \'DROP ROW POLICY\' = 72, \'CREATE QUOTA\' = 73, \'ALTER QUOTA\' = 74, \'DROP QUOTA\' = 75, \'CREATE SETTINGS PROFILE\' = 76, \'ALTER SETTINGS PROFILE\' = 77, \'DROP SETTINGS PROFILE\' = 78, \'SHOW USERS\' = 79, \'SHOW ROLES\' = 80, \'SHOW ROW POLICIES\' = 81, \'SHOW QUOTAS\' = 82, \'SHOW SETTINGS PROFILES\' = 83, \'SHOW ACCESS\' = 84, \'ACCESS MANAGEMENT\' = 85, \'SYSTEM SHUTDOWN\' = 86, \'SYSTEM DROP DNS CACHE\' = 87, \'SYSTEM DROP MARK CACHE\' = 88, \'SYSTEM DROP UNCOMPRESSED CACHE\' = 89, \'SYSTEM DROP MMAP CACHE\' = 90, \'SYSTEM DROP COMPILED EXPRESSION CACHE\' = 91, \'SYSTEM DROP CACHE\' = 92, \'SYSTEM RELOAD CONFIG\' = 93, \'SYSTEM RELOAD SYMBOLS\' = 94, \'SYSTEM RELOAD DICTIONARY\' = 95, \'SYSTEM RELOAD MODEL\' = 96, \'SYSTEM RELOAD FUNCTION\' = 97, \'SYSTEM RELOAD EMBEDDED DICTIONARIES\' = 98, \'SYSTEM RELOAD\' = 99, \'SYSTEM RESTART DISK\' = 100, \'SYSTEM MERGES\' = 101, \'SYSTEM TTL MERGES\' = 102, \'SYSTEM FETCHES\' = 103, \'SYSTEM MOVES\' = 104, \'SYSTEM DISTRIBUTED SENDS\' = 105, \'SYSTEM REPLICATED SENDS\' = 106, \'SYSTEM SENDS\' = 107, \'SYSTEM REPLICATION QUEUES\' = 108, \'SYSTEM DROP REPLICA\' = 109, \'SYSTEM SYNC REPLICA\' = 110, \'SYSTEM RESTART REPLICA\' = 111, \'SYSTEM RESTORE REPLICA\' = 112, \'SYSTEM FLUSH DISTRIBUTED\' = 113, \'SYSTEM FLUSH LOGS\' = 114, \'SYSTEM FLUSH\' = 115, \'SYSTEM THREAD FUZZER\' = 116, \'SYSTEM\' = 117, \'dictGet\' = 118, \'addressToLine\' = 119, \'addressToLineWithInlines\' = 120, \'addressToSymbol\' = 121, \'demangle\' = 122, \'INTROSPECTION\' = 123, \'FILE\' = 124, \'URL\' = 125, \'REMOTE\' = 126, \'MONGO\' = 127),\n `database` Nullable(String),\n `table` Nullable(String),\n `column` Nullable(String),\n `is_partial_revoke` UInt8,\n `grant_option` UInt8\n)\nENGINE = SystemGrants()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.graphite_retentions\n(\n `config_name` String,\n `rule_type` String,\n `regexp` String,\n `function` String,\n `age` UInt64,\n `precision` UInt64,\n `priority` UInt16,\n `is_default` UInt8,\n `Tables.database` Array(String),\n `Tables.table` Array(String)\n)\nENGINE = SystemGraphite()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.licenses\n(\n `library_name` String,\n `license_type` String,\n `license_path` String,\n `license_text` String\n)\nENGINE = SystemLicenses()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.macros\n(\n `macro` String,\n `substitution` String\n)\nENGINE = SystemMacros()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' @@ -35,7 +35,7 @@ CREATE TABLE system.one\n(\n `dummy` UInt8\n)\nENGINE = SystemOne()\nCOMMENT CREATE TABLE system.part_moves_between_shards\n(\n `database` String,\n `table` String,\n `task_name` String,\n `task_uuid` UUID,\n `create_time` DateTime,\n `part_name` String,\n `part_uuid` UUID,\n `to_shard` String,\n `dst_part_name` String,\n `update_time` DateTime,\n `state` String,\n `rollback` UInt8,\n `num_tries` UInt32,\n `last_exception` String\n)\nENGINE = SystemShardMoves()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.parts\n(\n `partition` String,\n `name` String,\n `uuid` UUID,\n `part_type` String,\n `active` UInt8,\n `marks` UInt64,\n `rows` UInt64,\n `bytes_on_disk` UInt64,\n `data_compressed_bytes` UInt64,\n `data_uncompressed_bytes` UInt64,\n `marks_bytes` UInt64,\n `secondary_indices_compressed_bytes` UInt64,\n `secondary_indices_uncompressed_bytes` UInt64,\n `secondary_indices_marks_bytes` UInt64,\n `modification_time` DateTime,\n `remove_time` DateTime,\n `refcount` UInt32,\n `min_date` Date,\n `max_date` Date,\n `min_time` DateTime,\n `max_time` DateTime,\n `partition_id` String,\n `min_block_number` Int64,\n `max_block_number` Int64,\n `level` UInt32,\n `data_version` UInt64,\n `primary_key_bytes_in_memory` UInt64,\n `primary_key_bytes_in_memory_allocated` UInt64,\n `is_frozen` UInt8,\n `database` String,\n `table` String,\n `engine` String,\n `disk_name` String,\n `path` String,\n `hash_of_all_files` String,\n `hash_of_uncompressed_files` String,\n `uncompressed_hash_of_compressed_files` String,\n `delete_ttl_info_min` DateTime,\n `delete_ttl_info_max` DateTime,\n `move_ttl_info.expression` Array(String),\n `move_ttl_info.min` Array(DateTime),\n `move_ttl_info.max` Array(DateTime),\n `default_compression_codec` String,\n `recompression_ttl_info.expression` Array(String),\n `recompression_ttl_info.min` Array(DateTime),\n `recompression_ttl_info.max` Array(DateTime),\n `group_by_ttl_info.expression` Array(String),\n `group_by_ttl_info.min` Array(DateTime),\n `group_by_ttl_info.max` Array(DateTime),\n `rows_where_ttl_info.expression` Array(String),\n `rows_where_ttl_info.min` Array(DateTime),\n `rows_where_ttl_info.max` Array(DateTime),\n `projections` Array(String),\n `bytes` UInt64,\n `marks_size` UInt64\n)\nENGINE = SystemParts()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.parts_columns\n(\n `partition` String,\n `name` String,\n `uuid` UUID,\n `part_type` String,\n `active` UInt8,\n `marks` UInt64,\n `rows` UInt64,\n `bytes_on_disk` UInt64,\n `data_compressed_bytes` UInt64,\n `data_uncompressed_bytes` UInt64,\n `marks_bytes` UInt64,\n `modification_time` DateTime,\n `remove_time` DateTime,\n `refcount` UInt32,\n `min_date` Date,\n `max_date` Date,\n `min_time` DateTime,\n `max_time` DateTime,\n `partition_id` String,\n `min_block_number` Int64,\n `max_block_number` Int64,\n `level` UInt32,\n `data_version` UInt64,\n `primary_key_bytes_in_memory` UInt64,\n `primary_key_bytes_in_memory_allocated` UInt64,\n `database` String,\n `table` String,\n `engine` String,\n `disk_name` String,\n `path` String,\n `column` String,\n `type` String,\n `column_position` UInt64,\n `default_kind` String,\n `default_expression` String,\n `column_bytes_on_disk` UInt64,\n `column_data_compressed_bytes` UInt64,\n `column_data_uncompressed_bytes` UInt64,\n `column_marks_bytes` UInt64,\n `serialization_kind` String,\n `subcolumns.names` Array(String),\n `subcolumns.types` Array(String),\n `subcolumns.serializations` Array(String),\n `bytes` UInt64,\n `marks_size` UInt64\n)\nENGINE = SystemPartsColumns()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' -CREATE TABLE system.privileges\n(\n `privilege` Enum8(\'POSTGRES\' = -128, \'SQLITE\' = -127, \'ODBC\' = -126, \'JDBC\' = -125, \'HDFS\' = -124, \'S3\' = -123, \'SOURCES\' = -122, \'ALL\' = -121, \'NONE\' = -120, \'SHOW DATABASES\' = 0, \'SHOW TABLES\' = 1, \'SHOW COLUMNS\' = 2, \'SHOW DICTIONARIES\' = 3, \'SHOW\' = 4, \'SELECT\' = 5, \'INSERT\' = 6, \'ALTER UPDATE\' = 7, \'ALTER DELETE\' = 8, \'ALTER ADD COLUMN\' = 9, \'ALTER MODIFY COLUMN\' = 10, \'ALTER DROP COLUMN\' = 11, \'ALTER COMMENT COLUMN\' = 12, \'ALTER CLEAR COLUMN\' = 13, \'ALTER RENAME COLUMN\' = 14, \'ALTER MATERIALIZE COLUMN\' = 15, \'ALTER COLUMN\' = 16, \'ALTER MODIFY COMMENT\' = 17, \'ALTER ORDER BY\' = 18, \'ALTER SAMPLE BY\' = 19, \'ALTER ADD INDEX\' = 20, \'ALTER DROP INDEX\' = 21, \'ALTER MATERIALIZE INDEX\' = 22, \'ALTER CLEAR INDEX\' = 23, \'ALTER INDEX\' = 24, \'ALTER ADD PROJECTION\' = 25, \'ALTER DROP PROJECTION\' = 26, \'ALTER MATERIALIZE PROJECTION\' = 27, \'ALTER CLEAR PROJECTION\' = 28, \'ALTER PROJECTION\' = 29, \'ALTER ADD CONSTRAINT\' = 30, \'ALTER DROP CONSTRAINT\' = 31, \'ALTER CONSTRAINT\' = 32, \'ALTER TTL\' = 33, \'ALTER MATERIALIZE TTL\' = 34, \'ALTER SETTINGS\' = 35, \'ALTER MOVE PARTITION\' = 36, \'ALTER FETCH PARTITION\' = 37, \'ALTER FREEZE PARTITION\' = 38, \'ALTER DATABASE SETTINGS\' = 39, \'ALTER TABLE\' = 40, \'ALTER DATABASE\' = 41, \'ALTER VIEW REFRESH\' = 42, \'ALTER VIEW MODIFY QUERY\' = 43, \'ALTER VIEW\' = 44, \'ALTER\' = 45, \'CREATE DATABASE\' = 46, \'CREATE TABLE\' = 47, \'CREATE VIEW\' = 48, \'CREATE DICTIONARY\' = 49, \'CREATE TEMPORARY TABLE\' = 50, \'CREATE FUNCTION\' = 51, \'CREATE\' = 52, \'DROP DATABASE\' = 53, \'DROP TABLE\' = 54, \'DROP VIEW\' = 55, \'DROP DICTIONARY\' = 56, \'DROP FUNCTION\' = 57, \'DROP\' = 58, \'TRUNCATE\' = 59, \'OPTIMIZE\' = 60, \'KILL QUERY\' = 61, \'MOVE PARTITION BETWEEN SHARDS\' = 62, \'CREATE USER\' = 63, \'ALTER USER\' = 64, \'DROP USER\' = 65, \'CREATE ROLE\' = 66, \'ALTER ROLE\' = 67, \'DROP ROLE\' = 68, \'ROLE ADMIN\' = 69, \'CREATE ROW POLICY\' = 70, \'ALTER ROW POLICY\' = 71, \'DROP ROW POLICY\' = 72, \'CREATE QUOTA\' = 73, \'ALTER QUOTA\' = 74, \'DROP QUOTA\' = 75, \'CREATE SETTINGS PROFILE\' = 76, \'ALTER SETTINGS PROFILE\' = 77, \'DROP SETTINGS PROFILE\' = 78, \'SHOW USERS\' = 79, \'SHOW ROLES\' = 80, \'SHOW ROW POLICIES\' = 81, \'SHOW QUOTAS\' = 82, \'SHOW SETTINGS PROFILES\' = 83, \'SHOW ACCESS\' = 84, \'ACCESS MANAGEMENT\' = 85, \'SYSTEM SHUTDOWN\' = 86, \'SYSTEM DROP DNS CACHE\' = 87, \'SYSTEM DROP MARK CACHE\' = 88, \'SYSTEM DROP UNCOMPRESSED CACHE\' = 89, \'SYSTEM DROP MMAP CACHE\' = 90, \'SYSTEM DROP COMPILED EXPRESSION CACHE\' = 91, \'SYSTEM DROP CACHE\' = 92, \'SYSTEM RELOAD CONFIG\' = 93, \'SYSTEM RELOAD SYMBOLS\' = 94, \'SYSTEM RELOAD DICTIONARY\' = 95, \'SYSTEM RELOAD MODEL\' = 96, \'SYSTEM RELOAD FUNCTION\' = 97, \'SYSTEM RELOAD EMBEDDED DICTIONARIES\' = 98, \'SYSTEM RELOAD\' = 99, \'SYSTEM RESTART DISK\' = 100, \'SYSTEM MERGES\' = 101, \'SYSTEM TTL MERGES\' = 102, \'SYSTEM FETCHES\' = 103, \'SYSTEM MOVES\' = 104, \'SYSTEM DISTRIBUTED SENDS\' = 105, \'SYSTEM REPLICATED SENDS\' = 106, \'SYSTEM SENDS\' = 107, \'SYSTEM REPLICATION QUEUES\' = 108, \'SYSTEM DROP REPLICA\' = 109, \'SYSTEM SYNC REPLICA\' = 110, \'SYSTEM RESTART REPLICA\' = 111, \'SYSTEM RESTORE REPLICA\' = 112, \'SYSTEM FLUSH DISTRIBUTED\' = 113, \'SYSTEM FLUSH LOGS\' = 114, \'SYSTEM FLUSH\' = 115, \'SYSTEM THREAD FUZZER\' = 116, \'SYSTEM\' = 117, \'dictGet\' = 118, \'addressToLine\' = 119, \'addressToSymbol\' = 120, \'demangle\' = 121, \'INTROSPECTION\' = 122, \'FILE\' = 123, \'URL\' = 124, \'REMOTE\' = 125, \'MONGO\' = 126, \'MYSQL\' = 127),\n `aliases` Array(String),\n `level` Nullable(Enum8(\'GLOBAL\' = 0, \'DATABASE\' = 1, \'TABLE\' = 2, \'DICTIONARY\' = 3, \'VIEW\' = 4, \'COLUMN\' = 5)),\n `parent_group` Nullable(Enum8(\'POSTGRES\' = -128, \'SQLITE\' = -127, \'ODBC\' = -126, \'JDBC\' = -125, \'HDFS\' = -124, \'S3\' = -123, \'SOURCES\' = -122, \'ALL\' = -121, \'NONE\' = -120, \'SHOW DATABASES\' = 0, \'SHOW TABLES\' = 1, \'SHOW COLUMNS\' = 2, \'SHOW DICTIONARIES\' = 3, \'SHOW\' = 4, \'SELECT\' = 5, \'INSERT\' = 6, \'ALTER UPDATE\' = 7, \'ALTER DELETE\' = 8, \'ALTER ADD COLUMN\' = 9, \'ALTER MODIFY COLUMN\' = 10, \'ALTER DROP COLUMN\' = 11, \'ALTER COMMENT COLUMN\' = 12, \'ALTER CLEAR COLUMN\' = 13, \'ALTER RENAME COLUMN\' = 14, \'ALTER MATERIALIZE COLUMN\' = 15, \'ALTER COLUMN\' = 16, \'ALTER MODIFY COMMENT\' = 17, \'ALTER ORDER BY\' = 18, \'ALTER SAMPLE BY\' = 19, \'ALTER ADD INDEX\' = 20, \'ALTER DROP INDEX\' = 21, \'ALTER MATERIALIZE INDEX\' = 22, \'ALTER CLEAR INDEX\' = 23, \'ALTER INDEX\' = 24, \'ALTER ADD PROJECTION\' = 25, \'ALTER DROP PROJECTION\' = 26, \'ALTER MATERIALIZE PROJECTION\' = 27, \'ALTER CLEAR PROJECTION\' = 28, \'ALTER PROJECTION\' = 29, \'ALTER ADD CONSTRAINT\' = 30, \'ALTER DROP CONSTRAINT\' = 31, \'ALTER CONSTRAINT\' = 32, \'ALTER TTL\' = 33, \'ALTER MATERIALIZE TTL\' = 34, \'ALTER SETTINGS\' = 35, \'ALTER MOVE PARTITION\' = 36, \'ALTER FETCH PARTITION\' = 37, \'ALTER FREEZE PARTITION\' = 38, \'ALTER DATABASE SETTINGS\' = 39, \'ALTER TABLE\' = 40, \'ALTER DATABASE\' = 41, \'ALTER VIEW REFRESH\' = 42, \'ALTER VIEW MODIFY QUERY\' = 43, \'ALTER VIEW\' = 44, \'ALTER\' = 45, \'CREATE DATABASE\' = 46, \'CREATE TABLE\' = 47, \'CREATE VIEW\' = 48, \'CREATE DICTIONARY\' = 49, \'CREATE TEMPORARY TABLE\' = 50, \'CREATE FUNCTION\' = 51, \'CREATE\' = 52, \'DROP DATABASE\' = 53, \'DROP TABLE\' = 54, \'DROP VIEW\' = 55, \'DROP DICTIONARY\' = 56, \'DROP FUNCTION\' = 57, \'DROP\' = 58, \'TRUNCATE\' = 59, \'OPTIMIZE\' = 60, \'KILL QUERY\' = 61, \'MOVE PARTITION BETWEEN SHARDS\' = 62, \'CREATE USER\' = 63, \'ALTER USER\' = 64, \'DROP USER\' = 65, \'CREATE ROLE\' = 66, \'ALTER ROLE\' = 67, \'DROP ROLE\' = 68, \'ROLE ADMIN\' = 69, \'CREATE ROW POLICY\' = 70, \'ALTER ROW POLICY\' = 71, \'DROP ROW POLICY\' = 72, \'CREATE QUOTA\' = 73, \'ALTER QUOTA\' = 74, \'DROP QUOTA\' = 75, \'CREATE SETTINGS PROFILE\' = 76, \'ALTER SETTINGS PROFILE\' = 77, \'DROP SETTINGS PROFILE\' = 78, \'SHOW USERS\' = 79, \'SHOW ROLES\' = 80, \'SHOW ROW POLICIES\' = 81, \'SHOW QUOTAS\' = 82, \'SHOW SETTINGS PROFILES\' = 83, \'SHOW ACCESS\' = 84, \'ACCESS MANAGEMENT\' = 85, \'SYSTEM SHUTDOWN\' = 86, \'SYSTEM DROP DNS CACHE\' = 87, \'SYSTEM DROP MARK CACHE\' = 88, \'SYSTEM DROP UNCOMPRESSED CACHE\' = 89, \'SYSTEM DROP MMAP CACHE\' = 90, \'SYSTEM DROP COMPILED EXPRESSION CACHE\' = 91, \'SYSTEM DROP CACHE\' = 92, \'SYSTEM RELOAD CONFIG\' = 93, \'SYSTEM RELOAD SYMBOLS\' = 94, \'SYSTEM RELOAD DICTIONARY\' = 95, \'SYSTEM RELOAD MODEL\' = 96, \'SYSTEM RELOAD FUNCTION\' = 97, \'SYSTEM RELOAD EMBEDDED DICTIONARIES\' = 98, \'SYSTEM RELOAD\' = 99, \'SYSTEM RESTART DISK\' = 100, \'SYSTEM MERGES\' = 101, \'SYSTEM TTL MERGES\' = 102, \'SYSTEM FETCHES\' = 103, \'SYSTEM MOVES\' = 104, \'SYSTEM DISTRIBUTED SENDS\' = 105, \'SYSTEM REPLICATED SENDS\' = 106, \'SYSTEM SENDS\' = 107, \'SYSTEM REPLICATION QUEUES\' = 108, \'SYSTEM DROP REPLICA\' = 109, \'SYSTEM SYNC REPLICA\' = 110, \'SYSTEM RESTART REPLICA\' = 111, \'SYSTEM RESTORE REPLICA\' = 112, \'SYSTEM FLUSH DISTRIBUTED\' = 113, \'SYSTEM FLUSH LOGS\' = 114, \'SYSTEM FLUSH\' = 115, \'SYSTEM THREAD FUZZER\' = 116, \'SYSTEM\' = 117, \'dictGet\' = 118, \'addressToLine\' = 119, \'addressToSymbol\' = 120, \'demangle\' = 121, \'INTROSPECTION\' = 122, \'FILE\' = 123, \'URL\' = 124, \'REMOTE\' = 125, \'MONGO\' = 126, \'MYSQL\' = 127))\n)\nENGINE = SystemPrivileges()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' +CREATE TABLE system.privileges\n(\n `privilege` Enum8(\'MYSQL\' = -128, \'POSTGRES\' = -127, \'SQLITE\' = -126, \'ODBC\' = -125, \'JDBC\' = -124, \'HDFS\' = -123, \'S3\' = -122, \'SOURCES\' = -121, \'ALL\' = -120, \'NONE\' = -119, \'SHOW DATABASES\' = 0, \'SHOW TABLES\' = 1, \'SHOW COLUMNS\' = 2, \'SHOW DICTIONARIES\' = 3, \'SHOW\' = 4, \'SELECT\' = 5, \'INSERT\' = 6, \'ALTER UPDATE\' = 7, \'ALTER DELETE\' = 8, \'ALTER ADD COLUMN\' = 9, \'ALTER MODIFY COLUMN\' = 10, \'ALTER DROP COLUMN\' = 11, \'ALTER COMMENT COLUMN\' = 12, \'ALTER CLEAR COLUMN\' = 13, \'ALTER RENAME COLUMN\' = 14, \'ALTER MATERIALIZE COLUMN\' = 15, \'ALTER COLUMN\' = 16, \'ALTER MODIFY COMMENT\' = 17, \'ALTER ORDER BY\' = 18, \'ALTER SAMPLE BY\' = 19, \'ALTER ADD INDEX\' = 20, \'ALTER DROP INDEX\' = 21, \'ALTER MATERIALIZE INDEX\' = 22, \'ALTER CLEAR INDEX\' = 23, \'ALTER INDEX\' = 24, \'ALTER ADD PROJECTION\' = 25, \'ALTER DROP PROJECTION\' = 26, \'ALTER MATERIALIZE PROJECTION\' = 27, \'ALTER CLEAR PROJECTION\' = 28, \'ALTER PROJECTION\' = 29, \'ALTER ADD CONSTRAINT\' = 30, \'ALTER DROP CONSTRAINT\' = 31, \'ALTER CONSTRAINT\' = 32, \'ALTER TTL\' = 33, \'ALTER MATERIALIZE TTL\' = 34, \'ALTER SETTINGS\' = 35, \'ALTER MOVE PARTITION\' = 36, \'ALTER FETCH PARTITION\' = 37, \'ALTER FREEZE PARTITION\' = 38, \'ALTER DATABASE SETTINGS\' = 39, \'ALTER TABLE\' = 40, \'ALTER DATABASE\' = 41, \'ALTER VIEW REFRESH\' = 42, \'ALTER VIEW MODIFY QUERY\' = 43, \'ALTER VIEW\' = 44, \'ALTER\' = 45, \'CREATE DATABASE\' = 46, \'CREATE TABLE\' = 47, \'CREATE VIEW\' = 48, \'CREATE DICTIONARY\' = 49, \'CREATE TEMPORARY TABLE\' = 50, \'CREATE FUNCTION\' = 51, \'CREATE\' = 52, \'DROP DATABASE\' = 53, \'DROP TABLE\' = 54, \'DROP VIEW\' = 55, \'DROP DICTIONARY\' = 56, \'DROP FUNCTION\' = 57, \'DROP\' = 58, \'TRUNCATE\' = 59, \'OPTIMIZE\' = 60, \'KILL QUERY\' = 61, \'MOVE PARTITION BETWEEN SHARDS\' = 62, \'CREATE USER\' = 63, \'ALTER USER\' = 64, \'DROP USER\' = 65, \'CREATE ROLE\' = 66, \'ALTER ROLE\' = 67, \'DROP ROLE\' = 68, \'ROLE ADMIN\' = 69, \'CREATE ROW POLICY\' = 70, \'ALTER ROW POLICY\' = 71, \'DROP ROW POLICY\' = 72, \'CREATE QUOTA\' = 73, \'ALTER QUOTA\' = 74, \'DROP QUOTA\' = 75, \'CREATE SETTINGS PROFILE\' = 76, \'ALTER SETTINGS PROFILE\' = 77, \'DROP SETTINGS PROFILE\' = 78, \'SHOW USERS\' = 79, \'SHOW ROLES\' = 80, \'SHOW ROW POLICIES\' = 81, \'SHOW QUOTAS\' = 82, \'SHOW SETTINGS PROFILES\' = 83, \'SHOW ACCESS\' = 84, \'ACCESS MANAGEMENT\' = 85, \'SYSTEM SHUTDOWN\' = 86, \'SYSTEM DROP DNS CACHE\' = 87, \'SYSTEM DROP MARK CACHE\' = 88, \'SYSTEM DROP UNCOMPRESSED CACHE\' = 89, \'SYSTEM DROP MMAP CACHE\' = 90, \'SYSTEM DROP COMPILED EXPRESSION CACHE\' = 91, \'SYSTEM DROP CACHE\' = 92, \'SYSTEM RELOAD CONFIG\' = 93, \'SYSTEM RELOAD SYMBOLS\' = 94, \'SYSTEM RELOAD DICTIONARY\' = 95, \'SYSTEM RELOAD MODEL\' = 96, \'SYSTEM RELOAD FUNCTION\' = 97, \'SYSTEM RELOAD EMBEDDED DICTIONARIES\' = 98, \'SYSTEM RELOAD\' = 99, \'SYSTEM RESTART DISK\' = 100, \'SYSTEM MERGES\' = 101, \'SYSTEM TTL MERGES\' = 102, \'SYSTEM FETCHES\' = 103, \'SYSTEM MOVES\' = 104, \'SYSTEM DISTRIBUTED SENDS\' = 105, \'SYSTEM REPLICATED SENDS\' = 106, \'SYSTEM SENDS\' = 107, \'SYSTEM REPLICATION QUEUES\' = 108, \'SYSTEM DROP REPLICA\' = 109, \'SYSTEM SYNC REPLICA\' = 110, \'SYSTEM RESTART REPLICA\' = 111, \'SYSTEM RESTORE REPLICA\' = 112, \'SYSTEM FLUSH DISTRIBUTED\' = 113, \'SYSTEM FLUSH LOGS\' = 114, \'SYSTEM FLUSH\' = 115, \'SYSTEM THREAD FUZZER\' = 116, \'SYSTEM\' = 117, \'dictGet\' = 118, \'addressToLine\' = 119, \'addressToLineWithInlines\' = 120, \'addressToSymbol\' = 121, \'demangle\' = 122, \'INTROSPECTION\' = 123, \'FILE\' = 124, \'URL\' = 125, \'REMOTE\' = 126, \'MONGO\' = 127),\n `aliases` Array(String),\n `level` Nullable(Enum8(\'GLOBAL\' = 0, \'DATABASE\' = 1, \'TABLE\' = 2, \'DICTIONARY\' = 3, \'VIEW\' = 4, \'COLUMN\' = 5)),\n `parent_group` Nullable(Enum8(\'MYSQL\' = -128, \'POSTGRES\' = -127, \'SQLITE\' = -126, \'ODBC\' = -125, \'JDBC\' = -124, \'HDFS\' = -123, \'S3\' = -122, \'SOURCES\' = -121, \'ALL\' = -120, \'NONE\' = -119, \'SHOW DATABASES\' = 0, \'SHOW TABLES\' = 1, \'SHOW COLUMNS\' = 2, \'SHOW DICTIONARIES\' = 3, \'SHOW\' = 4, \'SELECT\' = 5, \'INSERT\' = 6, \'ALTER UPDATE\' = 7, \'ALTER DELETE\' = 8, \'ALTER ADD COLUMN\' = 9, \'ALTER MODIFY COLUMN\' = 10, \'ALTER DROP COLUMN\' = 11, \'ALTER COMMENT COLUMN\' = 12, \'ALTER CLEAR COLUMN\' = 13, \'ALTER RENAME COLUMN\' = 14, \'ALTER MATERIALIZE COLUMN\' = 15, \'ALTER COLUMN\' = 16, \'ALTER MODIFY COMMENT\' = 17, \'ALTER ORDER BY\' = 18, \'ALTER SAMPLE BY\' = 19, \'ALTER ADD INDEX\' = 20, \'ALTER DROP INDEX\' = 21, \'ALTER MATERIALIZE INDEX\' = 22, \'ALTER CLEAR INDEX\' = 23, \'ALTER INDEX\' = 24, \'ALTER ADD PROJECTION\' = 25, \'ALTER DROP PROJECTION\' = 26, \'ALTER MATERIALIZE PROJECTION\' = 27, \'ALTER CLEAR PROJECTION\' = 28, \'ALTER PROJECTION\' = 29, \'ALTER ADD CONSTRAINT\' = 30, \'ALTER DROP CONSTRAINT\' = 31, \'ALTER CONSTRAINT\' = 32, \'ALTER TTL\' = 33, \'ALTER MATERIALIZE TTL\' = 34, \'ALTER SETTINGS\' = 35, \'ALTER MOVE PARTITION\' = 36, \'ALTER FETCH PARTITION\' = 37, \'ALTER FREEZE PARTITION\' = 38, \'ALTER DATABASE SETTINGS\' = 39, \'ALTER TABLE\' = 40, \'ALTER DATABASE\' = 41, \'ALTER VIEW REFRESH\' = 42, \'ALTER VIEW MODIFY QUERY\' = 43, \'ALTER VIEW\' = 44, \'ALTER\' = 45, \'CREATE DATABASE\' = 46, \'CREATE TABLE\' = 47, \'CREATE VIEW\' = 48, \'CREATE DICTIONARY\' = 49, \'CREATE TEMPORARY TABLE\' = 50, \'CREATE FUNCTION\' = 51, \'CREATE\' = 52, \'DROP DATABASE\' = 53, \'DROP TABLE\' = 54, \'DROP VIEW\' = 55, \'DROP DICTIONARY\' = 56, \'DROP FUNCTION\' = 57, \'DROP\' = 58, \'TRUNCATE\' = 59, \'OPTIMIZE\' = 60, \'KILL QUERY\' = 61, \'MOVE PARTITION BETWEEN SHARDS\' = 62, \'CREATE USER\' = 63, \'ALTER USER\' = 64, \'DROP USER\' = 65, \'CREATE ROLE\' = 66, \'ALTER ROLE\' = 67, \'DROP ROLE\' = 68, \'ROLE ADMIN\' = 69, \'CREATE ROW POLICY\' = 70, \'ALTER ROW POLICY\' = 71, \'DROP ROW POLICY\' = 72, \'CREATE QUOTA\' = 73, \'ALTER QUOTA\' = 74, \'DROP QUOTA\' = 75, \'CREATE SETTINGS PROFILE\' = 76, \'ALTER SETTINGS PROFILE\' = 77, \'DROP SETTINGS PROFILE\' = 78, \'SHOW USERS\' = 79, \'SHOW ROLES\' = 80, \'SHOW ROW POLICIES\' = 81, \'SHOW QUOTAS\' = 82, \'SHOW SETTINGS PROFILES\' = 83, \'SHOW ACCESS\' = 84, \'ACCESS MANAGEMENT\' = 85, \'SYSTEM SHUTDOWN\' = 86, \'SYSTEM DROP DNS CACHE\' = 87, \'SYSTEM DROP MARK CACHE\' = 88, \'SYSTEM DROP UNCOMPRESSED CACHE\' = 89, \'SYSTEM DROP MMAP CACHE\' = 90, \'SYSTEM DROP COMPILED EXPRESSION CACHE\' = 91, \'SYSTEM DROP CACHE\' = 92, \'SYSTEM RELOAD CONFIG\' = 93, \'SYSTEM RELOAD SYMBOLS\' = 94, \'SYSTEM RELOAD DICTIONARY\' = 95, \'SYSTEM RELOAD MODEL\' = 96, \'SYSTEM RELOAD FUNCTION\' = 97, \'SYSTEM RELOAD EMBEDDED DICTIONARIES\' = 98, \'SYSTEM RELOAD\' = 99, \'SYSTEM RESTART DISK\' = 100, \'SYSTEM MERGES\' = 101, \'SYSTEM TTL MERGES\' = 102, \'SYSTEM FETCHES\' = 103, \'SYSTEM MOVES\' = 104, \'SYSTEM DISTRIBUTED SENDS\' = 105, \'SYSTEM REPLICATED SENDS\' = 106, \'SYSTEM SENDS\' = 107, \'SYSTEM REPLICATION QUEUES\' = 108, \'SYSTEM DROP REPLICA\' = 109, \'SYSTEM SYNC REPLICA\' = 110, \'SYSTEM RESTART REPLICA\' = 111, \'SYSTEM RESTORE REPLICA\' = 112, \'SYSTEM FLUSH DISTRIBUTED\' = 113, \'SYSTEM FLUSH LOGS\' = 114, \'SYSTEM FLUSH\' = 115, \'SYSTEM THREAD FUZZER\' = 116, \'SYSTEM\' = 117, \'dictGet\' = 118, \'addressToLine\' = 119, \'addressToLineWithInlines\' = 120, \'addressToSymbol\' = 121, \'demangle\' = 122, \'INTROSPECTION\' = 123, \'FILE\' = 124, \'URL\' = 125, \'REMOTE\' = 126, \'MONGO\' = 127))\n)\nENGINE = SystemPrivileges()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.processes\n(\n `is_initial_query` UInt8,\n `user` String,\n `query_id` String,\n `address` IPv6,\n `port` UInt16,\n `initial_user` String,\n `initial_query_id` String,\n `initial_address` IPv6,\n `initial_port` UInt16,\n `interface` UInt8,\n `os_user` String,\n `client_hostname` String,\n `client_name` String,\n `client_revision` UInt64,\n `client_version_major` UInt64,\n `client_version_minor` UInt64,\n `client_version_patch` UInt64,\n `http_method` UInt8,\n `http_user_agent` String,\n `http_referer` String,\n `forwarded_for` String,\n `quota_key` String,\n `elapsed` Float64,\n `is_cancelled` UInt8,\n `read_rows` UInt64,\n `read_bytes` UInt64,\n `total_rows_approx` UInt64,\n `written_rows` UInt64,\n `written_bytes` UInt64,\n `memory_usage` Int64,\n `peak_memory_usage` Int64,\n `query` String,\n `thread_ids` Array(UInt64),\n `ProfileEvents` Map(String, UInt64),\n `Settings` Map(String, String),\n `current_database` String,\n `ProfileEvents.Names` Array(String),\n `ProfileEvents.Values` Array(UInt64),\n `Settings.Names` Array(String),\n `Settings.Values` Array(String)\n)\nENGINE = SystemProcesses()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.projection_parts\n(\n `partition` String,\n `name` String,\n `part_type` String,\n `parent_name` String,\n `parent_uuid` UUID,\n `parent_part_type` String,\n `active` UInt8,\n `marks` UInt64,\n `rows` UInt64,\n `bytes_on_disk` UInt64,\n `data_compressed_bytes` UInt64,\n `data_uncompressed_bytes` UInt64,\n `marks_bytes` UInt64,\n `parent_marks` UInt64,\n `parent_rows` UInt64,\n `parent_bytes_on_disk` UInt64,\n `parent_data_compressed_bytes` UInt64,\n `parent_data_uncompressed_bytes` UInt64,\n `parent_marks_bytes` UInt64,\n `modification_time` DateTime,\n `remove_time` DateTime,\n `refcount` UInt32,\n `min_date` Date,\n `max_date` Date,\n `min_time` DateTime,\n `max_time` DateTime,\n `partition_id` String,\n `min_block_number` Int64,\n `max_block_number` Int64,\n `level` UInt32,\n `data_version` UInt64,\n `primary_key_bytes_in_memory` UInt64,\n `primary_key_bytes_in_memory_allocated` UInt64,\n `is_frozen` UInt8,\n `database` String,\n `table` String,\n `engine` String,\n `disk_name` String,\n `path` String,\n `hash_of_all_files` String,\n `hash_of_uncompressed_files` String,\n `uncompressed_hash_of_compressed_files` String,\n `delete_ttl_info_min` DateTime,\n `delete_ttl_info_max` DateTime,\n `move_ttl_info.expression` Array(String),\n `move_ttl_info.min` Array(DateTime),\n `move_ttl_info.max` Array(DateTime),\n `default_compression_codec` String,\n `recompression_ttl_info.expression` Array(String),\n `recompression_ttl_info.min` Array(DateTime),\n `recompression_ttl_info.max` Array(DateTime),\n `group_by_ttl_info.expression` Array(String),\n `group_by_ttl_info.min` Array(DateTime),\n `group_by_ttl_info.max` Array(DateTime),\n `rows_where_ttl_info.expression` Array(String),\n `rows_where_ttl_info.min` Array(DateTime),\n `rows_where_ttl_info.max` Array(DateTime),\n `bytes` UInt64,\n `marks_size` UInt64\n)\nENGINE = SystemProjectionParts()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.projection_parts_columns\n(\n `partition` String,\n `name` String,\n `part_type` String,\n `parent_name` String,\n `parent_uuid` UUID,\n `parent_part_type` String,\n `active` UInt8,\n `marks` UInt64,\n `rows` UInt64,\n `bytes_on_disk` UInt64,\n `data_compressed_bytes` UInt64,\n `data_uncompressed_bytes` UInt64,\n `marks_bytes` UInt64,\n `parent_marks` UInt64,\n `parent_rows` UInt64,\n `parent_bytes_on_disk` UInt64,\n `parent_data_compressed_bytes` UInt64,\n `parent_data_uncompressed_bytes` UInt64,\n `parent_marks_bytes` UInt64,\n `modification_time` DateTime,\n `remove_time` DateTime,\n `refcount` UInt32,\n `min_date` Date,\n `max_date` Date,\n `min_time` DateTime,\n `max_time` DateTime,\n `partition_id` String,\n `min_block_number` Int64,\n `max_block_number` Int64,\n `level` UInt32,\n `data_version` UInt64,\n `primary_key_bytes_in_memory` UInt64,\n `primary_key_bytes_in_memory_allocated` UInt64,\n `database` String,\n `table` String,\n `engine` String,\n `disk_name` String,\n `path` String,\n `column` String,\n `type` String,\n `column_position` UInt64,\n `default_kind` String,\n `default_expression` String,\n `column_bytes_on_disk` UInt64,\n `column_data_compressed_bytes` UInt64,\n `column_data_uncompressed_bytes` UInt64,\n `column_marks_bytes` UInt64,\n `bytes` UInt64,\n `marks_size` UInt64\n)\nENGINE = SystemProjectionPartsColumns()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' diff --git a/tests/queries/0_stateless/02161_addressToLineWithInlines.reference b/tests/queries/0_stateless/02161_addressToLineWithInlines.reference new file mode 100644 index 00000000000..48108d5596c --- /dev/null +++ b/tests/queries/0_stateless/02161_addressToLineWithInlines.reference @@ -0,0 +1,4 @@ +CHECK: privilege +Code: 446. +CHECK: basic call +Success diff --git a/tests/queries/0_stateless/02161_addressToLineWithInlines.sh b/tests/queries/0_stateless/02161_addressToLineWithInlines.sh new file mode 100755 index 00000000000..0faad6c8e4f --- /dev/null +++ b/tests/queries/0_stateless/02161_addressToLineWithInlines.sh @@ -0,0 +1,109 @@ +#! /bin/env bash +# Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug +# tags are copied from 00974_query_profiler.sql + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +export CLICKHOUSE_DATABASE=system +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +MAX_FAILED_COUNT=10 +MAX_RETRY_COUNT=10 +log_comment="02161_testcase_$(date +'%s')" + +check_exist_sql="SELECT count(), query_id FROM system.trace_log WHERE trace_type IN ('CPU', 'Real') AND query_id IN ( + SELECT query_id FROM query_log WHERE log_comment = '${log_comment}' ORDER BY event_time DESC LIMIT 1 +) GROUP BY query_id" + +declare exist_string_result +declare -A exist_result=([count]=0 [query_id]="") + +function update_log_comment() { + log_comment="02161_testcase_$(date +'%s')" +} + + +function flush_log() { + ${CLICKHOUSE_CURL} -sS "$CLICKHOUSE_URL" -d 'SYSTEM FLUSH LOGS' +} + +function get_trace_count() { + flush_log + ${CLICKHOUSE_CLIENT} -q 'SELECT count() from system.trace_log'; +} + +function make_trace() { + ${CLICKHOUSE_CLIENT} --query_profiler_cpu_time_period_ns=1000000 --allow_introspection_functions 1 -q "SELECT addressToLineWithInlines(arrayJoin(trace)) FROM system.trace_log SETTINGS log_comment='${log_comment}'" +} + +function check_exist() { + exist_string_result=$(${CLICKHOUSE_CLIENT} --log_queries=0 -q "${check_exist_sql}") + exist_result[count]="$(echo "$exist_string_result" | cut -f 1)" + exist_result[query_id]="$(echo "$exist_string_result" | cut -f 2)" +} + +function get4fail() { + ${CLICKHOUSE_CLIENT} --allow_introspection_functions 1 -q "SELECT addressToLineWithInlines(arrayJoin(trace)) FROM system.trace_log WHERE trace_type IN ('CPU', 'Real') AND query_id='$1'" + ${CLICKHOUSE_CLIENT} --allow_introspection_functions 1 -q "SELECT addressToLine(arrayJoin(trace)) FROM system.trace_log WHERE trace_type IN ('CPU', 'Real') AND query_id='$1'" +} + +function final_check_inlines() { + final_check_sql="WITH + address_list AS + ( + SELECT DISTINCT arrayJoin(trace) AS address FROM system.trace_log WHERE query_id='$1' + ) +SELECT max(length(addressToLineWithInlines(address))) > 1 FROM address_list;" + result="$(${CLICKHOUSE_CLIENT} --allow_introspection_functions 1 -q "$final_check_sql")" + [[ "$result" == "1" ]] +} + +function final_check() { + final_check_sql="WITH + address_list AS + ( + SELECT DISTINCT arrayJoin(trace) AS address FROM system.trace_log WHERE query_id='$1' + ) +SELECT max(length(addressToLineWithInlines(address))) >= 1 FROM address_list;" + result="$(${CLICKHOUSE_CLIENT} --allow_introspection_functions 1 -q "$final_check_sql")" + [[ "$result" == "1" ]] +} + +echo "CHECK: privilege" +${CLICKHOUSE_CURL} -sS "$CLICKHOUSE_URL" -d 'SELECT addressToLineWithInlines(1);' | grep -oF 'Code: 446.' || echo 'FAIL' + +echo "CHECK: basic call" + +# won't check inline because there is no debug symbol in some test env. +# e.g: https://s3.amazonaws.com/clickhouse-test-reports/33467/2081b43c9ee59615b2fd31c77390744b10eef61e/stateless_tests__release__wide_parts_enabled__actions_.html + +flush_log +result="" +for ((i=0;i /dev/null + flush_log + sleep 1 + check_exist + done + if final_check "${exist_result[query_id]}";then + result="Success" + break + fi + update_log_comment +done + +if final_check "${exist_result[query_id]}"; then + result="Success" +else + echo "query_id: ${exist_result[query_id]}, count: ${exist_result[count]}" + get4fail "${exist_result[query_id]}" +fi +echo "$result" From c1036f0b8ee3ca73eee5e2c335011d180a3aa3ae Mon Sep 17 00:00:00 2001 From: save-my-heart Date: Tue, 25 Jan 2022 22:43:44 +0800 Subject: [PATCH 011/215] ensure signal_pipe_buf_size is <= PIPE_BUF --- base/daemon/BaseDaemon.cpp | 47 ++++++++++++++++++-------------------- src/Common/StackTrace.cpp | 4 ++-- src/Common/StackTrace.h | 2 +- 3 files changed, 25 insertions(+), 28 deletions(-) diff --git a/base/daemon/BaseDaemon.cpp b/base/daemon/BaseDaemon.cpp index f3026d7c87a..6ac552e1284 100644 --- a/base/daemon/BaseDaemon.cpp +++ b/base/daemon/BaseDaemon.cpp @@ -79,18 +79,14 @@ static void call_default_signal_handler(int sig) raise(sig); } -static constexpr size_t max_query_id_size = 127; - static const size_t signal_pipe_buf_size = sizeof(int) + sizeof(siginfo_t) - + sizeof(ucontext_t) + + sizeof(mcontext_t) + sizeof(StackTrace) + sizeof(UInt32) - + max_query_id_size + 1 /// query_id + varint encoded length + sizeof(void*); - using signal_function = void(int, siginfo_t*, void*); static void writeSignalIDtoSignalPipe(int sig) @@ -132,15 +128,11 @@ static void signalHandler(int sig, siginfo_t * info, void * context) const ucontext_t signal_context = *reinterpret_cast(context); const StackTrace stack_trace(signal_context); - StringRef query_id = DB::CurrentThread::getQueryId(); /// This is signal safe. - query_id.size = std::min(query_id.size, max_query_id_size); - DB::writeBinary(sig, out); DB::writePODBinary(*info, out); - DB::writePODBinary(signal_context, out); + DB::writePODBinary(signal_context.uc_mcontext, out); DB::writePODBinary(stack_trace, out); DB::writeBinary(UInt32(getThreadId()), out); - DB::writeStringBinary(query_id, out); DB::writePODBinary(DB::current_thread, out); out.next(); @@ -184,6 +176,8 @@ public: void run() override { + static_assert(PIPE_BUF >= 512); + static_assert(signal_pipe_buf_size <= PIPE_BUF, "Only write of PIPE_BUF to pipe is atomic and the minimal known PIPE_BUF across supported platforms is 512"); char buf[signal_pipe_buf_size]; DB::ReadBufferFromFileDescriptor in(signal_pipe.fds_rw[0], signal_pipe_buf_size, buf); @@ -227,26 +221,24 @@ public: else { siginfo_t info{}; - ucontext_t context{}; + mcontext_t mcontext{}; StackTrace stack_trace(NoCapture{}); UInt32 thread_num{}; - std::string query_id; DB::ThreadStatus * thread_ptr{}; if (sig != SanitizerTrap) { DB::readPODBinary(info, in); - DB::readPODBinary(context, in); + DB::readPODBinary(mcontext, in); } DB::readPODBinary(stack_trace, in); DB::readBinary(thread_num, in); - DB::readBinary(query_id, in); DB::readPODBinary(thread_ptr, in); /// This allows to receive more signals if failure happens inside onFault function. /// Example: segfault while symbolizing stack trace. - std::thread([=, this] { onFault(sig, info, context, stack_trace, thread_num, query_id, thread_ptr); }).detach(); + std::thread([=, this] { onFault(sig, info, mcontext, stack_trace, thread_num, thread_ptr); }).detach(); } } } @@ -279,18 +271,27 @@ private: void onFault( int sig, const siginfo_t & info, - const ucontext_t & context, + const mcontext_t & mcontext, const StackTrace & stack_trace, UInt32 thread_num, - const std::string & query_id, DB::ThreadStatus * thread_ptr) const { DB::ThreadStatus thread_status; + String query_id; + String query; + /// Send logs from this thread to client if possible. /// It will allow client to see failure messages directly. if (thread_ptr) { + query_id = thread_ptr->getQueryId().toString(); + + if (auto thread_group = thread_ptr->getThreadGroup()) + { + query = thread_group->query; + } + if (auto logs_queue = thread_ptr->getInternalTextLogsQueue()) DB::CurrentThread::attachInternalTextLogsQueue(logs_queue, DB::LogsLevel::trace); } @@ -305,15 +306,15 @@ private: } else { - LOG_FATAL(log, "(version {}{}, {}) (from thread {}) (query_id: {}) Received signal {} ({})", + LOG_FATAL(log, "(version {}{}, {}) (from thread {}) (query_id: {}) (query: {}) Received signal {} ({})", VERSION_STRING, VERSION_OFFICIAL, daemon.build_id_info, - thread_num, query_id, strsignal(sig), sig); + thread_num, query_id, query, strsignal(sig), sig); } String error_message; if (sig != SanitizerTrap) - error_message = signalToErrorMessage(sig, info, context); + error_message = signalToErrorMessage(sig, info, mcontext); else error_message = "Sanitizer trap."; @@ -389,20 +390,16 @@ static void sanitizerDeathCallback() const StackTrace stack_trace; - StringRef query_id = DB::CurrentThread::getQueryId(); - query_id.size = std::min(query_id.size, max_query_id_size); - int sig = SignalListener::SanitizerTrap; DB::writeBinary(sig, out); DB::writePODBinary(stack_trace, out); DB::writeBinary(UInt32(getThreadId()), out); - DB::writeStringBinary(query_id, out); DB::writePODBinary(DB::current_thread, out); out.next(); /// The time that is usually enough for separate thread to print info into log. - sleepForSeconds(10); + sleepForSeconds(20); } #endif diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp index 18e2e223744..35d590d7508 100644 --- a/src/Common/StackTrace.cpp +++ b/src/Common/StackTrace.cpp @@ -19,7 +19,7 @@ # include #endif -std::string signalToErrorMessage(int sig, const siginfo_t & info, [[maybe_unused]] const ucontext_t & context) +std::string signalToErrorMessage(int sig, const siginfo_t & info, [[maybe_unused]] const mcontext_t & mcontext) { std::stringstream error; // STYLE_CHECK_ALLOW_STD_STRING_STREAM error.exceptions(std::ios::failbit); @@ -34,7 +34,7 @@ std::string signalToErrorMessage(int sig, const siginfo_t & info, [[maybe_unused error << "Address: " << info.si_addr; #if defined(__x86_64__) && !defined(__FreeBSD__) && !defined(__APPLE__) && !defined(__arm__) && !defined(__powerpc__) - auto err_mask = context.uc_mcontext.gregs[REG_ERR]; + auto err_mask = mcontext.gregs[REG_ERR]; if ((err_mask & 0x02)) error << " Access: write."; else diff --git a/src/Common/StackTrace.h b/src/Common/StackTrace.h index 06a17e73091..697574900d5 100644 --- a/src/Common/StackTrace.h +++ b/src/Common/StackTrace.h @@ -75,4 +75,4 @@ protected: FramePointers frame_pointers{}; }; -std::string signalToErrorMessage(int sig, const siginfo_t & info, const ucontext_t & context); +std::string signalToErrorMessage(int sig, const siginfo_t & info, const mcontext_t & mcontext); From 4d7073ceeda8c8dc9eb10269ccbfe24c809e8442 Mon Sep 17 00:00:00 2001 From: cmsxbc Date: Wed, 26 Jan 2022 00:20:19 +0800 Subject: [PATCH 012/215] 1. generic addressToLine and addressToLineWithInlines, 2. improve addressToLineWithInlines document --- .../sql-reference/functions/introspection.md | 2 +- src/Functions/addressToLine.cpp | 120 ++-------------- src/Functions/addressToLine.h | 134 ++++++++++++++++++ src/Functions/addressToLineWithInlines.cpp | 118 ++------------- 4 files changed, 159 insertions(+), 215 deletions(-) create mode 100644 src/Functions/addressToLine.h diff --git a/docs/en/sql-reference/functions/introspection.md b/docs/en/sql-reference/functions/introspection.md index 595d3c4a16a..1be68c6bdd4 100644 --- a/docs/en/sql-reference/functions/introspection.md +++ b/docs/en/sql-reference/functions/introspection.md @@ -115,7 +115,7 @@ trace_source_code_lines: /lib/x86_64-linux-gnu/libpthread-2.27.so ## addressToLineWithInlines {#addresstolinewithinlines} -Similar to `addressToLine`, but it will return an Array with all inline functions. +Similar to `addressToLine`, but it will return an Array with all inline functions, and will be much slower as a price. If you use official ClickHouse packages, you need to install the `clickhouse-common-static-dbg` package. diff --git a/src/Functions/addressToLine.cpp b/src/Functions/addressToLine.cpp index c3e48913e97..47390f53147 100644 --- a/src/Functions/addressToLine.cpp +++ b/src/Functions/addressToLine.cpp @@ -1,38 +1,23 @@ #if defined(__ELF__) && !defined(__FreeBSD__) #include -#include -#include -#include #include -#include #include -#include #include #include #include #include -#include -#include -#include -#include +#include namespace DB { -namespace ErrorCodes -{ - extern const int ILLEGAL_COLUMN; - extern const int ILLEGAL_TYPE_OF_ARGUMENT; - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; -} - namespace { -class FunctionAddressToLine : public IFunction +class FunctionAddressToLine: public FunctionAddressToLineBase { public: static constexpr auto name = "addressToLine"; @@ -41,113 +26,32 @@ public: context->checkAccess(AccessType::addressToLine); return std::make_shared(); } - - String getName() const override +protected: + DataTypePtr getDataType() const override { - return name; - } - - size_t getNumberOfArguments() const override - { - return 1; - } - - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } - - DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override - { - if (arguments.size() != 1) - throw Exception("Function " + getName() + " needs exactly one argument; passed " - + toString(arguments.size()) + ".", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - - const auto & type = arguments[0].type; - - if (!WhichDataType(type.get()).isUInt64()) - throw Exception("The only argument for function " + getName() + " must be UInt64. Found " - + type->getName() + " instead.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - return std::make_shared(); } - - bool useDefaultImplementationForConstants() const override + ColumnPtr getResultColumn(const typename ColumnVector::Container & data, size_t input_rows_count) const override { - return true; - } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override - { - const ColumnPtr & column = arguments[0].column; - const ColumnUInt64 * column_concrete = checkAndGetColumn(column.get()); - - if (!column_concrete) - throw Exception("Illegal column " + column->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); - - const typename ColumnVector::Container & data = column_concrete->getData(); auto result_column = ColumnString::create(); - for (size_t i = 0; i < input_rows_count; ++i) { StringRef res_str = implCached(data[i]); result_column->insertData(res_str.data, res_str.size); } - return result_column; } -private: - struct Cache + void setResult(StringRef & result, const Dwarf::LocationInfo & location, const std::vector &) const override { - std::mutex mutex; - Arena arena; - using Map = HashMap; - Map map; - std::unordered_map dwarfs; - }; + const char * arena_begin = nullptr; + WriteBufferFromArena out(cache.arena, arena_begin); - mutable Cache cache; + writeString(location.file.toString(), out); + writeChar(':', out); + writeIntText(location.line, out); - StringRef impl(uintptr_t addr) const - { - auto symbol_index_ptr = SymbolIndex::instance(); - const SymbolIndex & symbol_index = *symbol_index_ptr; - - if (const auto * object = symbol_index.findObject(reinterpret_cast(addr))) - { - auto dwarf_it = cache.dwarfs.try_emplace(object->name, object->elf).first; - if (!std::filesystem::exists(object->name)) - return {}; - - Dwarf::LocationInfo location; - std::vector frames; // NOTE: not used in FAST mode. - if (dwarf_it->second.findAddress(addr - uintptr_t(object->address_begin), location, Dwarf::LocationInfoMode::FAST, frames)) - { - const char * arena_begin = nullptr; - WriteBufferFromArena out(cache.arena, arena_begin); - - writeString(location.file.toString(), out); - writeChar(':', out); - writeIntText(location.line, out); - - return out.complete(); - } - else - { - return object->name; - } - } - else - return {}; - } - - StringRef implCached(uintptr_t addr) const - { - Cache::Map::LookupResult it; - bool inserted; - std::lock_guard lock(cache.mutex); - cache.map.emplace(addr, it, inserted); - if (inserted) - it->getMapped() = impl(addr); - return it->getMapped(); + result = out.complete(); } }; diff --git a/src/Functions/addressToLine.h b/src/Functions/addressToLine.h new file mode 100644 index 00000000000..c2130da56a5 --- /dev/null +++ b/src/Functions/addressToLine.h @@ -0,0 +1,134 @@ +#if defined(__ELF__) && !defined(__FreeBSD__) + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_COLUMN; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +template +class FunctionAddressToLineBase : public IFunction +{ +public: + static constexpr auto name = "addressToLineBase"; + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 1; } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + if (arguments.size() != 1) + throw Exception( + "Function " + getName() + " needs exactly one argument; passed " + toString(arguments.size()) + ".", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + const auto & type = arguments[0].type; + + if (!WhichDataType(type.get()).isUInt64()) + throw Exception( + "The only argument for function " + getName() + " must be UInt64. Found " + type->getName() + " instead.", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + return getDataType(); + } + + bool useDefaultImplementationForConstants() const override { return true; } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + const ColumnPtr & column = arguments[0].column; + const ColumnUInt64 * column_concrete = checkAndGetColumn(column.get()); + + if (!column_concrete) + throw Exception( + "Illegal column " + column->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); + + const typename ColumnVector::Container & data = column_concrete->getData(); + return getResultColumn(data, input_rows_count); + } + +protected: + virtual DataTypePtr getDataType() const = 0; + virtual ColumnPtr getResultColumn(const typename ColumnVector::Container & data, size_t input_rows_count) const = 0; + virtual void + setResult(ResultT & result, const Dwarf::LocationInfo & location, const std::vector & frames) const = 0; + + struct Cache + { + std::mutex mutex; + Arena arena; + using Map = HashMap; + Map map; + std::unordered_map dwarfs; + }; + + mutable Cache cache; + + ResultT impl(uintptr_t addr) const + { + auto symbol_index_ptr = SymbolIndex::instance(); + const SymbolIndex & symbol_index = *symbol_index_ptr; + + if (const auto * object = symbol_index.findObject(reinterpret_cast(addr))) + { + auto dwarf_it = cache.dwarfs.try_emplace(object->name, object->elf).first; + if (!std::filesystem::exists(object->name)) + return {}; + + Dwarf::LocationInfo location; + std::vector frames; // NOTE: not used in FAST mode. + ResultT result; + if (dwarf_it->second.findAddress(addr - uintptr_t(object->address_begin), location, locationInfoMode, frames)) + { + setResult(result, location, frames); + return result; + } + else + return {object->name}; + } + else + return {}; + } + + ResultT implCached(uintptr_t addr) const + { + typename Cache::Map::LookupResult it; + bool inserted; + std::lock_guard lock(cache.mutex); + cache.map.emplace(addr, it, inserted); + if (inserted) + it->getMapped() = impl(addr); + return it->getMapped(); + } +}; + +} + +#endif diff --git a/src/Functions/addressToLineWithInlines.cpp b/src/Functions/addressToLineWithInlines.cpp index 4a3027e399f..e17fe2a33cc 100644 --- a/src/Functions/addressToLineWithInlines.cpp +++ b/src/Functions/addressToLineWithInlines.cpp @@ -1,42 +1,26 @@ #if defined(__ELF__) && !defined(__FreeBSD__) #include -#include -#include -#include #include #include -#include #include #include -#include #include #include #include #include -#include -#include -#include -#include +#include #include namespace DB { -namespace ErrorCodes -{ - extern const int ILLEGAL_COLUMN; - extern const int ILLEGAL_TYPE_OF_ARGUMENT; - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; -} - namespace { - -class FunctionAddressToLineWithInlines : public IFunction +class FunctionAddressToLineWithInlines: public FunctionAddressToLineBase { public: static constexpr auto name = "addressToLineWithInlines"; @@ -46,48 +30,15 @@ public: return std::make_shared(); } - String getName() const override +protected: + DataTypePtr getDataType() const override { - return name; - } - - size_t getNumberOfArguments() const override - { - return 1; - } - - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } - - DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override - { - if (arguments.size() != 1) - throw Exception("Function " + getName() + " needs exactly one argument; passed " - + toString(arguments.size()) + ".", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - - const auto & type = arguments[0].type; - - if (!WhichDataType(type.get()).isUInt64()) - throw Exception("The only argument for function " + getName() + " must be UInt64. Found " - + type->getName() + " instead.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); return std::make_shared(std::make_shared()); } - bool useDefaultImplementationForConstants() const override + ColumnPtr getResultColumn(const typename ColumnVector::Container & data, size_t input_rows_count) const override { - return true; - } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override - { - const ColumnPtr & column = arguments[0].column; - const ColumnUInt64 * column_concrete = checkAndGetColumn(column.get()); - - if (!column_concrete) - throw Exception("Illegal column " + column->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); - - const typename ColumnVector::Container & data = column_concrete->getData(); auto result_column = ColumnArray::create(ColumnString::create()); - ColumnString & result_strings = typeid_cast(result_column->getData()); ColumnArray::Offsets & result_offsets = result_column->getOffsets(); @@ -105,19 +56,16 @@ public: return result_column; } -private: - struct Cache + void setResult(StringRefs & result, const Dwarf::LocationInfo & location, const std::vector & inline_frames) const override { - std::mutex mutex; - Arena arena; - using Map = HashMap; - Map map; - std::unordered_map dwarfs; - }; - mutable Cache cache; + appendLocationToResult(result, location, nullptr); + for (const auto & inline_frame : inline_frames) + appendLocationToResult(result, inline_frame.location, &inline_frame); + } +private: - inline ALWAYS_INLINE void appendLocation2Result(StringRefs & result, Dwarf::LocationInfo & location, Dwarf::SymbolizedFrame * frame) const + inline ALWAYS_INLINE void appendLocationToResult(StringRefs & result, const Dwarf::LocationInfo & location, const Dwarf::SymbolizedFrame * frame) const { const char * arena_begin = nullptr; WriteBufferFromArena out(cache.arena, arena_begin); @@ -136,48 +84,6 @@ private: result.emplace_back(out.complete()); } - StringRefs impl(uintptr_t addr) const - { - auto symbol_index_ptr = SymbolIndex::instance(); - const SymbolIndex & symbol_index = *symbol_index_ptr; - - if (const auto * object = symbol_index.findObject(reinterpret_cast(addr))) - { - auto dwarf_it = cache.dwarfs.try_emplace(object->name, object->elf).first; - if (!std::filesystem::exists(object->name)) - return {}; - - Dwarf::LocationInfo location; - std::vector inline_frames; - if (dwarf_it->second.findAddress(addr - uintptr_t(object->address_begin), location, Dwarf::LocationInfoMode::FULL_WITH_INLINE, inline_frames)) - { - StringRefs ret; - appendLocation2Result(ret, location, nullptr); - for (auto & inline_frame : inline_frames) - appendLocation2Result(ret, inline_frame.location, &inline_frame); - return ret; - } - else - { - return {object->name}; - } - } - else - return {}; - } - - /// ALWAYS_INLINE is also a self-containing testcase used in 0_stateless/02161_addressToLineWithInlines. - /// If changed here, change 02161 together. - inline ALWAYS_INLINE StringRefs implCached(uintptr_t addr) const - { - Cache::Map::LookupResult it; - bool inserted; - std::lock_guard lock(cache.mutex); - cache.map.emplace(addr, it, inserted); - if (inserted) - it->getMapped() = impl(addr); - return it->getMapped(); - } }; } From 4c34d8f56349423033a84e8ef9ca545e931fa43c Mon Sep 17 00:00:00 2001 From: cmsxbc Date: Wed, 26 Jan 2022 00:30:27 +0800 Subject: [PATCH 013/215] fix function names --- src/Functions/addressToLine.cpp | 1 + src/Functions/addressToLine.h | 2 -- src/Functions/addressToLineWithInlines.cpp | 1 + 3 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Functions/addressToLine.cpp b/src/Functions/addressToLine.cpp index 47390f53147..6c9eba160cf 100644 --- a/src/Functions/addressToLine.cpp +++ b/src/Functions/addressToLine.cpp @@ -21,6 +21,7 @@ class FunctionAddressToLine: public FunctionAddressToLineBasecheckAccess(AccessType::addressToLine); diff --git a/src/Functions/addressToLine.h b/src/Functions/addressToLine.h index c2130da56a5..8001c90d000 100644 --- a/src/Functions/addressToLine.h +++ b/src/Functions/addressToLine.h @@ -35,8 +35,6 @@ template class FunctionAddressToLineBase : public IFunction { public: - static constexpr auto name = "addressToLineBase"; - String getName() const override { return name; } size_t getNumberOfArguments() const override { return 1; } diff --git a/src/Functions/addressToLineWithInlines.cpp b/src/Functions/addressToLineWithInlines.cpp index e17fe2a33cc..c3e62bd802e 100644 --- a/src/Functions/addressToLineWithInlines.cpp +++ b/src/Functions/addressToLineWithInlines.cpp @@ -24,6 +24,7 @@ class FunctionAddressToLineWithInlines: public FunctionAddressToLineBasecheckAccess(AccessType::addressToLineWithInlines); From c3e541376d2c78272e5c9c133eca066ddaac5c7c Mon Sep 17 00:00:00 2001 From: cmsxbc Date: Wed, 26 Jan 2022 00:47:43 +0800 Subject: [PATCH 014/215] fix style: add pragma once --- src/Functions/addressToLine.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Functions/addressToLine.h b/src/Functions/addressToLine.h index 8001c90d000..8216f114b2e 100644 --- a/src/Functions/addressToLine.h +++ b/src/Functions/addressToLine.h @@ -1,3 +1,4 @@ +#pragma once #if defined(__ELF__) && !defined(__FreeBSD__) #include From bdf4305bfb4b57f7e795f8f5314d97cc36e48cc4 Mon Sep 17 00:00:00 2001 From: cmsxbc Date: Wed, 26 Jan 2022 01:33:44 +0800 Subject: [PATCH 015/215] simplify test --- .../02161_addressToLineWithInlines.reference | 6 +- .../02161_addressToLineWithInlines.sh | 109 ------------------ .../02161_addressToLineWithInlines.sql | 24 ++++ 3 files changed, 26 insertions(+), 113 deletions(-) delete mode 100755 tests/queries/0_stateless/02161_addressToLineWithInlines.sh create mode 100644 tests/queries/0_stateless/02161_addressToLineWithInlines.sql diff --git a/tests/queries/0_stateless/02161_addressToLineWithInlines.reference b/tests/queries/0_stateless/02161_addressToLineWithInlines.reference index 48108d5596c..10e2c7069b3 100644 --- a/tests/queries/0_stateless/02161_addressToLineWithInlines.reference +++ b/tests/queries/0_stateless/02161_addressToLineWithInlines.reference @@ -1,4 +1,2 @@ -CHECK: privilege -Code: 446. -CHECK: basic call -Success +10000000000 +has inlines: 1 diff --git a/tests/queries/0_stateless/02161_addressToLineWithInlines.sh b/tests/queries/0_stateless/02161_addressToLineWithInlines.sh deleted file mode 100755 index 0faad6c8e4f..00000000000 --- a/tests/queries/0_stateless/02161_addressToLineWithInlines.sh +++ /dev/null @@ -1,109 +0,0 @@ -#! /bin/env bash -# Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug -# tags are copied from 00974_query_profiler.sql - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -export CLICKHOUSE_DATABASE=system -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -MAX_FAILED_COUNT=10 -MAX_RETRY_COUNT=10 -log_comment="02161_testcase_$(date +'%s')" - -check_exist_sql="SELECT count(), query_id FROM system.trace_log WHERE trace_type IN ('CPU', 'Real') AND query_id IN ( - SELECT query_id FROM query_log WHERE log_comment = '${log_comment}' ORDER BY event_time DESC LIMIT 1 -) GROUP BY query_id" - -declare exist_string_result -declare -A exist_result=([count]=0 [query_id]="") - -function update_log_comment() { - log_comment="02161_testcase_$(date +'%s')" -} - - -function flush_log() { - ${CLICKHOUSE_CURL} -sS "$CLICKHOUSE_URL" -d 'SYSTEM FLUSH LOGS' -} - -function get_trace_count() { - flush_log - ${CLICKHOUSE_CLIENT} -q 'SELECT count() from system.trace_log'; -} - -function make_trace() { - ${CLICKHOUSE_CLIENT} --query_profiler_cpu_time_period_ns=1000000 --allow_introspection_functions 1 -q "SELECT addressToLineWithInlines(arrayJoin(trace)) FROM system.trace_log SETTINGS log_comment='${log_comment}'" -} - -function check_exist() { - exist_string_result=$(${CLICKHOUSE_CLIENT} --log_queries=0 -q "${check_exist_sql}") - exist_result[count]="$(echo "$exist_string_result" | cut -f 1)" - exist_result[query_id]="$(echo "$exist_string_result" | cut -f 2)" -} - -function get4fail() { - ${CLICKHOUSE_CLIENT} --allow_introspection_functions 1 -q "SELECT addressToLineWithInlines(arrayJoin(trace)) FROM system.trace_log WHERE trace_type IN ('CPU', 'Real') AND query_id='$1'" - ${CLICKHOUSE_CLIENT} --allow_introspection_functions 1 -q "SELECT addressToLine(arrayJoin(trace)) FROM system.trace_log WHERE trace_type IN ('CPU', 'Real') AND query_id='$1'" -} - -function final_check_inlines() { - final_check_sql="WITH - address_list AS - ( - SELECT DISTINCT arrayJoin(trace) AS address FROM system.trace_log WHERE query_id='$1' - ) -SELECT max(length(addressToLineWithInlines(address))) > 1 FROM address_list;" - result="$(${CLICKHOUSE_CLIENT} --allow_introspection_functions 1 -q "$final_check_sql")" - [[ "$result" == "1" ]] -} - -function final_check() { - final_check_sql="WITH - address_list AS - ( - SELECT DISTINCT arrayJoin(trace) AS address FROM system.trace_log WHERE query_id='$1' - ) -SELECT max(length(addressToLineWithInlines(address))) >= 1 FROM address_list;" - result="$(${CLICKHOUSE_CLIENT} --allow_introspection_functions 1 -q "$final_check_sql")" - [[ "$result" == "1" ]] -} - -echo "CHECK: privilege" -${CLICKHOUSE_CURL} -sS "$CLICKHOUSE_URL" -d 'SELECT addressToLineWithInlines(1);' | grep -oF 'Code: 446.' || echo 'FAIL' - -echo "CHECK: basic call" - -# won't check inline because there is no debug symbol in some test env. -# e.g: https://s3.amazonaws.com/clickhouse-test-reports/33467/2081b43c9ee59615b2fd31c77390744b10eef61e/stateless_tests__release__wide_parts_enabled__actions_.html - -flush_log -result="" -for ((i=0;i /dev/null - flush_log - sleep 1 - check_exist - done - if final_check "${exist_result[query_id]}";then - result="Success" - break - fi - update_log_comment -done - -if final_check "${exist_result[query_id]}"; then - result="Success" -else - echo "query_id: ${exist_result[query_id]}, count: ${exist_result[count]}" - get4fail "${exist_result[query_id]}" -fi -echo "$result" diff --git a/tests/queries/0_stateless/02161_addressToLineWithInlines.sql b/tests/queries/0_stateless/02161_addressToLineWithInlines.sql new file mode 100644 index 00000000000..9b7249ceff4 --- /dev/null +++ b/tests/queries/0_stateless/02161_addressToLineWithInlines.sql @@ -0,0 +1,24 @@ +-- Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug + + +SELECT addressToLineWithInlines(1); -- { serverError 446 } + +SET allow_introspection_functions = 1; +SET query_profiler_real_time_period_ns = 0; +SET query_profiler_cpu_time_period_ns = 1000000; +SET log_queries = 1; +SELECT count() FROM numbers_mt(10000000000) SETTINGS log_comment='02161_test_case'; +SET log_queries = 0; +SET query_profiler_cpu_time_period_ns = 0; +SYSTEM FLUSH LOGS; + +WITH + address_list AS + ( + SELECT DISTINCT arrayJoin(trace) AS address FROM system.trace_log WHERE query_id = + ( + SELECT query_id FROM system.query_log WHERE current_database = currentDatabase() AND log_comment='02161_test_case' ORDER BY event_time DESC LIMIT 1 + ) + ) +SELECT 'has inlines:', max(length(addressToLineWithInlines(address))) > 1 FROM address_list; + From 4c605c80f320d617465a4cfaf460e374af8bc7e2 Mon Sep 17 00:00:00 2001 From: cmsxbc Date: Wed, 26 Jan 2022 20:02:39 +0800 Subject: [PATCH 016/215] improve test, check whether there is no symbol --- .../0_stateless/02161_addressToLineWithInlines.sql | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/02161_addressToLineWithInlines.sql b/tests/queries/0_stateless/02161_addressToLineWithInlines.sql index 9b7249ceff4..29be9ae85f6 100644 --- a/tests/queries/0_stateless/02161_addressToLineWithInlines.sql +++ b/tests/queries/0_stateless/02161_addressToLineWithInlines.sql @@ -13,12 +13,13 @@ SET query_profiler_cpu_time_period_ns = 0; SYSTEM FLUSH LOGS; WITH - address_list AS + lineWithInlines AS ( - SELECT DISTINCT arrayJoin(trace) AS address FROM system.trace_log WHERE query_id = + SELECT DISTINCT addressToLineWithInlines(arrayJoin(trace)) AS lineWithInlines FROM system.trace_log WHERE query_id = ( SELECT query_id FROM system.query_log WHERE current_database = currentDatabase() AND log_comment='02161_test_case' ORDER BY event_time DESC LIMIT 1 ) ) -SELECT 'has inlines:', max(length(addressToLineWithInlines(address))) > 1 FROM address_list; - +SELECT 'has inlines:', or(max(length(lineWithInlines)) > 1, not any(locate(lineWithInlines[1], ':') != 0)) FROM lineWithInlines SETTINGS short_circuit_function_evaluation='enable'; +-- `max(length(lineWithInlines)) > 1` check there is any inlines. +-- `not any(locate(lineWithInlines[1], ':') != 0)` check whether none could get a symbol. From 99c8736f00533c8879fe522e61073fcd63524ebd Mon Sep 17 00:00:00 2001 From: save-my-heart Date: Wed, 26 Jan 2022 22:18:17 +0800 Subject: [PATCH 017/215] fix build error on freebsd & aarch --- base/daemon/BaseDaemon.cpp | 16 ++++++++-------- src/Common/StackTrace.cpp | 4 ++-- src/Common/StackTrace.h | 2 +- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/base/daemon/BaseDaemon.cpp b/base/daemon/BaseDaemon.cpp index 6ac552e1284..861a872a9f8 100644 --- a/base/daemon/BaseDaemon.cpp +++ b/base/daemon/BaseDaemon.cpp @@ -82,7 +82,7 @@ static void call_default_signal_handler(int sig) static const size_t signal_pipe_buf_size = sizeof(int) + sizeof(siginfo_t) - + sizeof(mcontext_t) + + sizeof(ucontext_t*) + sizeof(StackTrace) + sizeof(UInt32) + sizeof(void*); @@ -125,12 +125,12 @@ static void signalHandler(int sig, siginfo_t * info, void * context) char buf[signal_pipe_buf_size]; DB::WriteBufferFromFileDescriptorDiscardOnFailure out(signal_pipe.fds_rw[1], signal_pipe_buf_size, buf); - const ucontext_t signal_context = *reinterpret_cast(context); - const StackTrace stack_trace(signal_context); + const ucontext_t * signal_context = reinterpret_cast(context); + const StackTrace stack_trace(*signal_context); DB::writeBinary(sig, out); DB::writePODBinary(*info, out); - DB::writePODBinary(signal_context.uc_mcontext, out); + DB::writePODBinary(signal_context, out); DB::writePODBinary(stack_trace, out); DB::writeBinary(UInt32(getThreadId()), out); DB::writePODBinary(DB::current_thread, out); @@ -221,7 +221,7 @@ public: else { siginfo_t info{}; - mcontext_t mcontext{}; + ucontext_t * context{}; StackTrace stack_trace(NoCapture{}); UInt32 thread_num{}; DB::ThreadStatus * thread_ptr{}; @@ -238,7 +238,7 @@ public: /// This allows to receive more signals if failure happens inside onFault function. /// Example: segfault while symbolizing stack trace. - std::thread([=, this] { onFault(sig, info, mcontext, stack_trace, thread_num, thread_ptr); }).detach(); + std::thread([=, this] { onFault(sig, info, *context, stack_trace, thread_num, thread_ptr); }).detach(); } } } @@ -271,7 +271,7 @@ private: void onFault( int sig, const siginfo_t & info, - const mcontext_t & mcontext, + const ucontext_t & context, const StackTrace & stack_trace, UInt32 thread_num, DB::ThreadStatus * thread_ptr) const @@ -314,7 +314,7 @@ private: String error_message; if (sig != SanitizerTrap) - error_message = signalToErrorMessage(sig, info, mcontext); + error_message = signalToErrorMessage(sig, info, context); else error_message = "Sanitizer trap."; diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp index 35d590d7508..18e2e223744 100644 --- a/src/Common/StackTrace.cpp +++ b/src/Common/StackTrace.cpp @@ -19,7 +19,7 @@ # include #endif -std::string signalToErrorMessage(int sig, const siginfo_t & info, [[maybe_unused]] const mcontext_t & mcontext) +std::string signalToErrorMessage(int sig, const siginfo_t & info, [[maybe_unused]] const ucontext_t & context) { std::stringstream error; // STYLE_CHECK_ALLOW_STD_STRING_STREAM error.exceptions(std::ios::failbit); @@ -34,7 +34,7 @@ std::string signalToErrorMessage(int sig, const siginfo_t & info, [[maybe_unused error << "Address: " << info.si_addr; #if defined(__x86_64__) && !defined(__FreeBSD__) && !defined(__APPLE__) && !defined(__arm__) && !defined(__powerpc__) - auto err_mask = mcontext.gregs[REG_ERR]; + auto err_mask = context.uc_mcontext.gregs[REG_ERR]; if ((err_mask & 0x02)) error << " Access: write."; else diff --git a/src/Common/StackTrace.h b/src/Common/StackTrace.h index 697574900d5..06a17e73091 100644 --- a/src/Common/StackTrace.h +++ b/src/Common/StackTrace.h @@ -75,4 +75,4 @@ protected: FramePointers frame_pointers{}; }; -std::string signalToErrorMessage(int sig, const siginfo_t & info, const mcontext_t & mcontext); +std::string signalToErrorMessage(int sig, const siginfo_t & info, const ucontext_t & context); From 2c37b572d41dda9faafe52e92243e9a5135d545d Mon Sep 17 00:00:00 2001 From: save-my-heart Date: Wed, 26 Jan 2022 22:21:18 +0800 Subject: [PATCH 018/215] fix --- base/daemon/BaseDaemon.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/daemon/BaseDaemon.cpp b/base/daemon/BaseDaemon.cpp index 861a872a9f8..71e842876be 100644 --- a/base/daemon/BaseDaemon.cpp +++ b/base/daemon/BaseDaemon.cpp @@ -229,7 +229,7 @@ public: if (sig != SanitizerTrap) { DB::readPODBinary(info, in); - DB::readPODBinary(mcontext, in); + DB::readPODBinary(context, in); } DB::readPODBinary(stack_trace, in); From 62ba4833df3ae38dd53f7670cf229521b0c831e4 Mon Sep 17 00:00:00 2001 From: save-my-heart Date: Wed, 26 Jan 2022 22:55:19 +0800 Subject: [PATCH 019/215] fix dereference null pointer --- base/daemon/BaseDaemon.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/base/daemon/BaseDaemon.cpp b/base/daemon/BaseDaemon.cpp index 71e842876be..bd3209280f7 100644 --- a/base/daemon/BaseDaemon.cpp +++ b/base/daemon/BaseDaemon.cpp @@ -238,7 +238,7 @@ public: /// This allows to receive more signals if failure happens inside onFault function. /// Example: segfault while symbolizing stack trace. - std::thread([=, this] { onFault(sig, info, *context, stack_trace, thread_num, thread_ptr); }).detach(); + std::thread([=, this] { onFault(sig, info, context, stack_trace, thread_num, thread_ptr); }).detach(); } } } @@ -271,7 +271,7 @@ private: void onFault( int sig, const siginfo_t & info, - const ucontext_t & context, + ucontext_t * context, const StackTrace & stack_trace, UInt32 thread_num, DB::ThreadStatus * thread_ptr) const @@ -314,7 +314,7 @@ private: String error_message; if (sig != SanitizerTrap) - error_message = signalToErrorMessage(sig, info, context); + error_message = signalToErrorMessage(sig, info, *context); else error_message = "Sanitizer trap."; From cc3cbc65033b9052571ceaa825efd596875196a6 Mon Sep 17 00:00:00 2001 From: save-my-heart Date: Wed, 26 Jan 2022 23:05:07 +0800 Subject: [PATCH 020/215] remove whitespace to pass style check --- base/daemon/BaseDaemon.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/daemon/BaseDaemon.cpp b/base/daemon/BaseDaemon.cpp index bd3209280f7..91bb007e78a 100644 --- a/base/daemon/BaseDaemon.cpp +++ b/base/daemon/BaseDaemon.cpp @@ -286,7 +286,7 @@ private: if (thread_ptr) { query_id = thread_ptr->getQueryId().toString(); - + if (auto thread_group = thread_ptr->getThreadGroup()) { query = thread_group->query; From 8f00b59a71069b43da76eb721a3243a660f59126 Mon Sep 17 00:00:00 2001 From: Denny Crane Date: Wed, 26 Jan 2022 11:25:11 -0400 Subject: [PATCH 021/215] make systemd to use EnvironmentFile --- debian/clickhouse-server.service | 1 + 1 file changed, 1 insertion(+) diff --git a/debian/clickhouse-server.service b/debian/clickhouse-server.service index bc19235cb3a..3bdec80632f 100644 --- a/debian/clickhouse-server.service +++ b/debian/clickhouse-server.service @@ -16,6 +16,7 @@ Restart=always RestartSec=30 RuntimeDirectory=clickhouse-server ExecStart=/usr/bin/clickhouse-server --config=/etc/clickhouse-server/config.xml --pid-file=/run/clickhouse-server/clickhouse-server.pid +EnvironmentFile=-/etc/default/clickhouse LimitCORE=infinity LimitNOFILE=500000 CapabilityBoundingSet=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE From 4b5ab80e3bb8026dcb8ebacf4bad7372195f2dfa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 17 Jan 2022 19:32:55 +0100 Subject: [PATCH 022/215] Better scalar cache handling - Fixes global CTE scalar cache. - Adds MVs back (views dependent on the source are cached locally and others globally --- src/Common/ProfileEvents.cpp | 4 ++ src/Interpreters/Context.cpp | 5 +- src/Interpreters/Context.h | 2 + .../ExecuteScalarSubqueriesVisitor.cpp | 33 +++++++++++-- .../ExecuteScalarSubqueriesVisitor.h | 1 + .../IInterpreterUnionOrSelectQuery.h | 3 ++ .../InterpreterSelectIntersectExceptQuery.cpp | 3 ++ src/Interpreters/InterpreterSelectQuery.cpp | 7 ++- .../InterpreterSelectWithUnionQuery.cpp | 1 + src/Interpreters/JoinedTables.cpp | 11 +++-- src/Interpreters/JoinedTables.h | 6 ++- src/Interpreters/TreeRewriter.cpp | 9 ++-- src/Interpreters/TreeRewriter.h | 1 + src/Parsers/ASTSelectQuery.cpp | 44 +++++------------ src/Storages/StorageDistributed.cpp | 3 +- .../02177_cte_scalar_cache.reference | 2 + .../0_stateless/02177_cte_scalar_cache.sql | 48 +++++++++++++++++++ 17 files changed, 133 insertions(+), 50 deletions(-) create mode 100644 tests/queries/0_stateless/02177_cte_scalar_cache.reference create mode 100644 tests/queries/0_stateless/02177_cte_scalar_cache.sql diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index ea6c782ebb4..f1d8355cc5b 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -280,6 +280,10 @@ M(ExternalDataSourceLocalCacheReadBytes, "Bytes read from local cache buffer in RemoteReadBufferCache")\ \ M(MainConfigLoads, "Number of times the main configuration was reloaded.") \ + \ + M(ScalarSubqueriesGlobalCacheHit, "Number of times a read from a scalar subquery was done using the global cache") \ + M(ScalarSubqueriesLocalCacheHit, "Number of times a read from a scalar subquery was done using the local cache") \ + M(ScalarSubqueriesCacheMiss, "Number of times a read from a scalar subquery was not cached and had to be calculated completely") namespace ProfileEvents { diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 1c71ab2cd6f..0e1d40d16a1 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -867,6 +867,9 @@ const Block & Context::getScalar(const String & name) const auto it = scalars.find(name); if (scalars.end() == it) { + it = local_scalars.find(name); + if (it != local_scalars.end()) + return it->second; // This should be a logical error, but it fails the sql_fuzz test too // often, so 'bad arguments' for now. throw Exception("Scalar " + backQuoteIfNeed(name) + " doesn't exist (internal bug)", ErrorCodes::BAD_ARGUMENTS); @@ -962,7 +965,7 @@ bool Context::hasScalar(const String & name) const if (isGlobalContext()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context cannot have scalars"); - return scalars.count(name); + return scalars.count(name) || local_scalars.count(name); } diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 6b0a4671efb..abcc07a6229 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -217,6 +217,8 @@ private: /// Thus, used in HTTP interface. If not specified - then some globally default format is used. TemporaryTablesMapping external_tables_mapping; Scalars scalars; + /// Includes special scalars (_shard_num and _shard_count) but also scalars that aren't cacheable between queries / contexts + /// because they use storage views (like in MVs) Scalars local_scalars; /// Used in s3Cluster table function. With this callback, a worker node could ask an initiator diff --git a/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp b/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp index 2117eec0063..03e8aec1c41 100644 --- a/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp +++ b/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp @@ -1,9 +1,9 @@ #include -#include #include -#include +#include #include +#include #include #include #include @@ -18,7 +18,14 @@ #include #include #include +#include +namespace ProfileEvents +{ +extern const Event ScalarSubqueriesGlobalCacheHit; +extern const Event ScalarSubqueriesLocalCacheHit; +extern const Event ScalarSubqueriesCacheMiss; +} namespace DB { @@ -77,22 +84,39 @@ void ExecuteScalarSubqueriesMatcher::visit(const ASTSubquery & subquery, ASTPtr auto hash = subquery.getTreeHash(); auto scalar_query_hash_str = toString(hash.first) + "_" + toString(hash.second); + bool is_local = false; Block scalar; if (data.getContext()->hasQueryContext() && data.getContext()->getQueryContext()->hasScalar(scalar_query_hash_str)) { scalar = data.getContext()->getQueryContext()->getScalar(scalar_query_hash_str); + ProfileEvents::increment(ProfileEvents::ScalarSubqueriesGlobalCacheHit); + } + else if (data.local_scalars.count(scalar_query_hash_str)) + { + scalar = data.local_scalars[scalar_query_hash_str]; + is_local = true; + ProfileEvents::increment(ProfileEvents::ScalarSubqueriesLocalCacheHit); } else if (data.scalars.count(scalar_query_hash_str)) { scalar = data.scalars[scalar_query_hash_str]; + ProfileEvents::increment(ProfileEvents::ScalarSubqueriesGlobalCacheHit); } else { + ProfileEvents::increment(ProfileEvents::ScalarSubqueriesCacheMiss); auto subquery_context = Context::createCopy(data.getContext()); Settings subquery_settings = data.getContext()->getSettings(); subquery_settings.max_result_rows = 1; subquery_settings.extremes = false; subquery_context->setSettings(subquery_settings); + if (auto context = subquery_context->getQueryContext()) + { + for (const auto & it : data.scalars) + context->addScalar(it.first, it.second); + for (const auto & it : data.local_scalars) + context->addScalar(it.first, it.second); + } ASTPtr subquery_select = subquery.children.at(0); @@ -218,7 +242,10 @@ void ExecuteScalarSubqueriesMatcher::visit(const ASTSubquery & subquery, ASTPtr ast = std::move(func); } - data.scalars[scalar_query_hash_str] = std::move(scalar); + if (is_local) + data.local_scalars[scalar_query_hash_str] = std::move(scalar); + else + data.scalars[scalar_query_hash_str] = std::move(scalar); } void ExecuteScalarSubqueriesMatcher::visit(const ASTFunction & func, ASTPtr & ast, Data & data) diff --git a/src/Interpreters/ExecuteScalarSubqueriesVisitor.h b/src/Interpreters/ExecuteScalarSubqueriesVisitor.h index c230f346779..f42d3834c19 100644 --- a/src/Interpreters/ExecuteScalarSubqueriesVisitor.h +++ b/src/Interpreters/ExecuteScalarSubqueriesVisitor.h @@ -37,6 +37,7 @@ public: { size_t subquery_depth; Scalars & scalars; + Scalars & local_scalars; bool only_analyze; }; diff --git a/src/Interpreters/IInterpreterUnionOrSelectQuery.h b/src/Interpreters/IInterpreterUnionOrSelectQuery.h index db9cc086e35..1265a52d370 100644 --- a/src/Interpreters/IInterpreterUnionOrSelectQuery.h +++ b/src/Interpreters/IInterpreterUnionOrSelectQuery.h @@ -40,6 +40,8 @@ public: void extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr &, ContextPtr) const override; + bool usesViewSource() { return uses_view_source; } + protected: ASTPtr query_ptr; ContextMutablePtr context; @@ -48,6 +50,7 @@ protected: size_t max_streams = 1; bool settings_limit_offset_needed = false; bool settings_limit_offset_done = false; + bool uses_view_source = false; }; } diff --git a/src/Interpreters/InterpreterSelectIntersectExceptQuery.cpp b/src/Interpreters/InterpreterSelectIntersectExceptQuery.cpp index 3bb78b57702..cad570ab420 100644 --- a/src/Interpreters/InterpreterSelectIntersectExceptQuery.cpp +++ b/src/Interpreters/InterpreterSelectIntersectExceptQuery.cpp @@ -68,7 +68,10 @@ InterpreterSelectIntersectExceptQuery::InterpreterSelectIntersectExceptQuery( nested_interpreters.resize(num_children); for (size_t i = 0; i < num_children; ++i) + { nested_interpreters[i] = buildCurrentChildInterpreter(children.at(i)); + uses_view_source |= nested_interpreters[i]->usesViewSource(); + } Blocks headers(num_children); for (size_t query_num = 0; query_num < num_children; ++query_num) diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 8e0f73f0b31..593fc8ccd35 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -313,7 +313,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( bool got_storage_from_query = false; if (!has_input && !storage) { - storage = joined_tables.getLeftTableStorage(); + std::tie(uses_view_source, storage) = joined_tables.getLeftTableStorage(); got_storage_from_query = true; } @@ -388,9 +388,8 @@ InterpreterSelectQuery::InterpreterSelectQuery( query.setFinal(); /// Save scalar sub queries's results in the query context - /// But discard them if the Storage has been modified - /// In an ideal situation we would only discard the scalars affected by the storage change - if (!options.only_analyze && context->hasQueryContext() && !context->getViewSource()) + /// Note that we are only saving scalars and not local_scalars since the latter can't be safely shared across contexts + if (!options.only_analyze && context->hasQueryContext()) for (const auto & it : syntax_analyzer_result->getScalars()) context->getQueryContext()->addScalar(it.first, it.second); diff --git a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp index e4b3e62c358..e0fc15771f9 100644 --- a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp +++ b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp @@ -138,6 +138,7 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery( nested_interpreters.emplace_back( buildCurrentChildInterpreter(ast->list_of_selects->children.at(query_num), require_full_header ? Names() : current_required_result_column_names)); + uses_view_source |= nested_interpreters.back()->usesViewSource(); } /// Determine structure of the result. diff --git a/src/Interpreters/JoinedTables.cpp b/src/Interpreters/JoinedTables.cpp index 3aae3982758..743aba91571 100644 --- a/src/Interpreters/JoinedTables.cpp +++ b/src/Interpreters/JoinedTables.cpp @@ -186,13 +186,13 @@ std::unique_ptr JoinedTables::makeLeftTableSubq return std::make_unique(left_table_expression, context, select_options); } -StoragePtr JoinedTables::getLeftTableStorage() +JoinedTables::storage_is_view_source JoinedTables::getLeftTableStorage() { if (isLeftTableSubquery()) return {}; if (isLeftTableFunction()) - return context->getQueryContext()->executeTableFunction(left_table_expression); + return {false, context->getQueryContext()->executeTableFunction(left_table_expression)}; StorageID table_id = StorageID::createEmpty(); if (left_db_and_table) @@ -204,19 +204,20 @@ StoragePtr JoinedTables::getLeftTableStorage() table_id = StorageID("system", "one"); } - if (auto view_source = context->getViewSource()) + auto view_source = context->getViewSource(); + if (view_source) { const auto & storage_values = static_cast(*view_source); auto tmp_table_id = storage_values.getStorageID(); if (tmp_table_id.database_name == table_id.database_name && tmp_table_id.table_name == table_id.table_name) { /// Read from view source. - return context->getViewSource(); + return {true, view_source}; } } /// Read from table. Even without table expression (implicit SELECT ... FROM system.one). - return DatabaseCatalog::instance().getTable(table_id, context); + return {false, DatabaseCatalog::instance().getTable(table_id, context)}; } bool JoinedTables::resolveTables() diff --git a/src/Interpreters/JoinedTables.h b/src/Interpreters/JoinedTables.h index 9d01c081e9f..0c2119f0016 100644 --- a/src/Interpreters/JoinedTables.h +++ b/src/Interpreters/JoinedTables.h @@ -7,6 +7,8 @@ #include #include +#include + namespace DB { @@ -22,11 +24,13 @@ using StorageMetadataPtr = std::shared_ptr; class JoinedTables { public: + using storage_is_view_source = std::pair; + JoinedTables(ContextPtr context, const ASTSelectQuery & select_query, bool include_all_columns_ = false); void reset(const ASTSelectQuery & select_query); - StoragePtr getLeftTableStorage(); + JoinedTables::storage_is_view_source getLeftTableStorage(); bool resolveTables(); /// Make fake tables_with_columns[0] in case we have predefined input in InterpreterSelectQuery diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index 0285bdf333c..3f5f21cc9f1 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -479,10 +479,11 @@ void removeUnneededColumnsFromSelectClause(const ASTSelectQuery * select_query, } /// Replacing scalar subqueries with constant values. -void executeScalarSubqueries(ASTPtr & query, ContextPtr context, size_t subquery_depth, Scalars & scalars, bool only_analyze) +void executeScalarSubqueries( + ASTPtr & query, ContextPtr context, size_t subquery_depth, Scalars & scalars, Scalars & local_scalars, bool only_analyze) { LogAST log; - ExecuteScalarSubqueriesVisitor::Data visitor_data{WithContext{context}, subquery_depth, scalars, only_analyze}; + ExecuteScalarSubqueriesVisitor::Data visitor_data{WithContext{context}, subquery_depth, scalars, local_scalars, only_analyze}; ExecuteScalarSubqueriesVisitor(visitor_data, log.stream()).visit(query); } @@ -1112,7 +1113,7 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect( removeUnneededColumnsFromSelectClause(select_query, required_result_columns, remove_duplicates); /// Executing scalar subqueries - replacing them with constant values. - executeScalarSubqueries(query, getContext(), subquery_depth, result.scalars, select_options.only_analyze); + executeScalarSubqueries(query, getContext(), subquery_depth, result.scalars, result.local_scalars, select_options.only_analyze); if (settings.legacy_column_name_of_tuple_literal) markTupleLiteralsAsLegacy(query); @@ -1195,7 +1196,7 @@ TreeRewriterResultPtr TreeRewriter::analyze( normalize(query, result.aliases, result.source_columns_set, false, settings, allow_self_aliases); /// Executing scalar subqueries. Column defaults could be a scalar subquery. - executeScalarSubqueries(query, getContext(), 0, result.scalars, !execute_scalar_subqueries); + executeScalarSubqueries(query, getContext(), 0, result.scalars, result.local_scalars, !execute_scalar_subqueries); if (settings.legacy_column_name_of_tuple_literal) markTupleLiteralsAsLegacy(query); diff --git a/src/Interpreters/TreeRewriter.h b/src/Interpreters/TreeRewriter.h index 52c62cc4cec..7becd3f94bc 100644 --- a/src/Interpreters/TreeRewriter.h +++ b/src/Interpreters/TreeRewriter.h @@ -72,6 +72,7 @@ struct TreeRewriterResult /// Results of scalar sub queries Scalars scalars; + Scalars local_scalars; explicit TreeRewriterResult( const NamesAndTypesList & source_columns_, diff --git a/src/Parsers/ASTSelectQuery.cpp b/src/Parsers/ASTSelectQuery.cpp index 1c5a4310f1b..7b473812915 100644 --- a/src/Parsers/ASTSelectQuery.cpp +++ b/src/Parsers/ASTSelectQuery.cpp @@ -22,38 +22,20 @@ namespace ErrorCodes ASTPtr ASTSelectQuery::clone() const { auto res = std::make_shared(*this); + + /** NOTE Members must clone exactly in the same order in which they were inserted into `children` in ParserSelectQuery. + * This is important because the AST hash depends on the children order and this hash is used for multiple things, + * like the column identifiers in the case of subqueries in the IN statement or caching scalar queries (reused in CTEs so it's + * important for them to have the same hash). + * For distributed query processing, in case one of the servers is localhost and the other one is not, localhost query is executed + * within the process and is cloned, and the request is sent to the remote server in text form via TCP. + * And if the cloning order does not match the parsing order then different servers will get different identifiers. + * + * Since the positions map uses we can copy it as is and ensure the new children array is created / pushed + * in the same order as the existing one */ res->children.clear(); - res->positions.clear(); - -#define CLONE(expr) res->setExpression(expr, getExpression(expr, true)) - - /** NOTE Members must clone exactly in the same order, - * in which they were inserted into `children` in ParserSelectQuery. - * This is important because of the children's names the identifier (getTreeHash) is compiled, - * which can be used for column identifiers in the case of subqueries in the IN statement. - * For distributed query processing, in case one of the servers is localhost and the other one is not, - * localhost query is executed within the process and is cloned, - * and the request is sent to the remote server in text form via TCP. - * And if the cloning order does not match the parsing order, - * then different servers will get different identifiers. - */ - CLONE(Expression::WITH); - CLONE(Expression::SELECT); - CLONE(Expression::TABLES); - CLONE(Expression::PREWHERE); - CLONE(Expression::WHERE); - CLONE(Expression::GROUP_BY); - CLONE(Expression::HAVING); - CLONE(Expression::WINDOW); - CLONE(Expression::ORDER_BY); - CLONE(Expression::LIMIT_BY_OFFSET); - CLONE(Expression::LIMIT_BY_LENGTH); - CLONE(Expression::LIMIT_BY); - CLONE(Expression::LIMIT_OFFSET); - CLONE(Expression::LIMIT_LENGTH); - CLONE(Expression::SETTINGS); - -#undef CLONE + for (auto & child : children) + res->children.push_back(child->clone()); return res; } diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index bcb12cc86b0..6efb5f3daf9 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -721,7 +721,8 @@ QueryPipelineBuilderPtr StorageDistributed::distributedWrite(const ASTInsertQuer if (joined_tables.tablesCount() == 1) { - storage_src = std::dynamic_pointer_cast(joined_tables.getLeftTableStorage()); + auto [ignore, storage] = joined_tables.getLeftTableStorage(); + storage_src = std::dynamic_pointer_cast(storage); if (storage_src) { const auto select_with_union_query = std::make_shared(); diff --git a/tests/queries/0_stateless/02177_cte_scalar_cache.reference b/tests/queries/0_stateless/02177_cte_scalar_cache.reference new file mode 100644 index 00000000000..88456b1e7ea --- /dev/null +++ b/tests/queries/0_stateless/02177_cte_scalar_cache.reference @@ -0,0 +1,2 @@ +02177_CTE_GLOBAL_ON 5 500 11 0 5 +02177_CTE_GLOBAL_OFF 1 100 5 0 1 diff --git a/tests/queries/0_stateless/02177_cte_scalar_cache.sql b/tests/queries/0_stateless/02177_cte_scalar_cache.sql new file mode 100644 index 00000000000..39a1e0d965a --- /dev/null +++ b/tests/queries/0_stateless/02177_cte_scalar_cache.sql @@ -0,0 +1,48 @@ +WITH + ( SELECT sleep(0.0001) FROM system.one ) as a1, + ( SELECT sleep(0.0001) FROM system.one ) as a2, + ( SELECT sleep(0.0001) FROM system.one ) as a3, + ( SELECT sleep(0.0001) FROM system.one ) as a4, + ( SELECT sleep(0.0001) FROM system.one ) as a5 +SELECT '02177_CTE_GLOBAL_ON', a5 +FORMAT Null +SETTINGS enable_global_with_statement = 1; + +WITH + ( SELECT sleep(0.0001) FROM system.one ) as a1, + ( SELECT sleep(0.0001) FROM system.one ) as a2, + ( SELECT sleep(0.0001) FROM system.one ) as a3, + ( SELECT sleep(0.0001) FROM system.one ) as a4, + ( SELECT sleep(0.0001) FROM system.one ) as a5 +SELECT '02177_CTE_GLOBAL_OFF', a5 + FORMAT Null +SETTINGS enable_global_with_statement = 0; + +SYSTEM FLUSH LOGS; +SELECT + '02177_CTE_GLOBAL_ON', + ProfileEvents['SleepFunctionCalls'] as sleep_calls, + ProfileEvents['SleepFunctionMicroseconds'] as sleep_microseconds, + ProfileEvents['ScalarSubqueriesGlobalCacheHit'] as scalar_cache_global_hit, + ProfileEvents['ScalarSubqueriesLocalCacheHit'] as scalar_cache_local_hit, + ProfileEvents['ScalarSubqueriesCacheMiss'] as scalar_cache_miss +FROM system.query_log +WHERE + current_database = currentDatabase() + AND type = 'QueryFinish' + AND query LIKE '%SELECT ''02177_CTE_GLOBAL_ON%' + AND event_date >= yesterday() AND event_time > now() - interval 10 minute; + +SELECT + '02177_CTE_GLOBAL_OFF', + ProfileEvents['SleepFunctionCalls'] as sleep_calls, + ProfileEvents['SleepFunctionMicroseconds'] as sleep_microseconds, + ProfileEvents['ScalarSubqueriesGlobalCacheHit'] as scalar_cache_global_hit, + ProfileEvents['ScalarSubqueriesLocalCacheHit'] as scalar_cache_local_hit, + ProfileEvents['ScalarSubqueriesCacheMiss'] as scalar_cache_miss +FROM system.query_log +WHERE + current_database = currentDatabase() + AND type = 'QueryFinish' + AND query LIKE '%02177_CTE_GLOBAL_OFF%' + AND event_date >= yesterday() AND event_time > now() - interval 10 minute; From 444a8e2519cc18ec52d98e7c0eabefba02f7e2a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 19 Jan 2022 18:26:37 +0100 Subject: [PATCH 023/215] Minimize changes, improve scalar subquery for MVs --- src/Interpreters/Context.cpp | 5 +- src/Interpreters/Context.h | 2 - .../ExecuteScalarSubqueriesVisitor.cpp | 86 +++++++++++++------ src/Interpreters/InterpreterSelectQuery.cpp | 17 +++- src/Interpreters/JoinedTables.cpp | 11 ++- src/Interpreters/JoinedTables.h | 6 +- src/Storages/StorageDistributed.cpp | 3 +- .../0_stateless/02177_cte_scalar_cache.sql | 4 +- 8 files changed, 87 insertions(+), 47 deletions(-) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 0e1d40d16a1..1c71ab2cd6f 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -867,9 +867,6 @@ const Block & Context::getScalar(const String & name) const auto it = scalars.find(name); if (scalars.end() == it) { - it = local_scalars.find(name); - if (it != local_scalars.end()) - return it->second; // This should be a logical error, but it fails the sql_fuzz test too // often, so 'bad arguments' for now. throw Exception("Scalar " + backQuoteIfNeed(name) + " doesn't exist (internal bug)", ErrorCodes::BAD_ARGUMENTS); @@ -965,7 +962,7 @@ bool Context::hasScalar(const String & name) const if (isGlobalContext()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context cannot have scalars"); - return scalars.count(name) || local_scalars.count(name); + return scalars.count(name); } diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index abcc07a6229..6b0a4671efb 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -217,8 +217,6 @@ private: /// Thus, used in HTTP interface. If not specified - then some globally default format is used. TemporaryTablesMapping external_tables_mapping; Scalars scalars; - /// Includes special scalars (_shard_num and _shard_count) but also scalars that aren't cacheable between queries / contexts - /// because they use storage views (like in MVs) Scalars local_scalars; /// Used in s3Cluster table function. With this callback, a worker node could ask an initiator diff --git a/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp b/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp index 03e8aec1c41..b7de041af9b 100644 --- a/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp +++ b/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp @@ -79,57 +79,95 @@ static bool worthConvertingToLiteral(const Block & scalar) return !useless_literal_types.count(scalar_type_name); } +static auto getQueryInterpreter(const ASTSubquery & subquery, ExecuteScalarSubqueriesMatcher::Data & data) +{ + auto subquery_context = Context::createCopy(data.getContext()); + Settings subquery_settings = data.getContext()->getSettings(); + subquery_settings.max_result_rows = 1; + subquery_settings.extremes = false; + subquery_context->setSettings(subquery_settings); + if (subquery_context->hasQueryContext()) + { + /// Save current cached scalars in the context before analyzing the query + /// This is specially helpful when analyzing CTE scalars + auto context = subquery_context->getQueryContext(); + for (const auto & it : data.scalars) + context->addScalar(it.first, it.second); + } + + ASTPtr subquery_select = subquery.children.at(0); + + auto options = SelectQueryOptions(QueryProcessingStage::Complete, data.subquery_depth + 1, true); + options.analyze(data.only_analyze); + + return std::make_unique(subquery_select, subquery_context, options); +} + void ExecuteScalarSubqueriesMatcher::visit(const ASTSubquery & subquery, ASTPtr & ast, Data & data) { auto hash = subquery.getTreeHash(); auto scalar_query_hash_str = toString(hash.first) + "_" + toString(hash.second); + std::unique_ptr interpreter = nullptr; + bool hit = false; bool is_local = false; + Block scalar; - if (data.getContext()->hasQueryContext() && data.getContext()->getQueryContext()->hasScalar(scalar_query_hash_str)) - { - scalar = data.getContext()->getQueryContext()->getScalar(scalar_query_hash_str); - ProfileEvents::increment(ProfileEvents::ScalarSubqueriesGlobalCacheHit); - } - else if (data.local_scalars.count(scalar_query_hash_str)) + if (data.local_scalars.count(scalar_query_hash_str)) { + hit = true; scalar = data.local_scalars[scalar_query_hash_str]; is_local = true; ProfileEvents::increment(ProfileEvents::ScalarSubqueriesLocalCacheHit); } else if (data.scalars.count(scalar_query_hash_str)) { + hit = true; scalar = data.scalars[scalar_query_hash_str]; ProfileEvents::increment(ProfileEvents::ScalarSubqueriesGlobalCacheHit); } else { - ProfileEvents::increment(ProfileEvents::ScalarSubqueriesCacheMiss); - auto subquery_context = Context::createCopy(data.getContext()); - Settings subquery_settings = data.getContext()->getSettings(); - subquery_settings.max_result_rows = 1; - subquery_settings.extremes = false; - subquery_context->setSettings(subquery_settings); - if (auto context = subquery_context->getQueryContext()) + if (data.getContext()->hasQueryContext() && data.getContext()->getQueryContext()->hasScalar(scalar_query_hash_str)) { - for (const auto & it : data.scalars) - context->addScalar(it.first, it.second); - for (const auto & it : data.local_scalars) - context->addScalar(it.first, it.second); + if (!data.getContext()->getViewSource()) + { + /// We aren't using storage views so we can safely use the context cache + scalar = data.getContext()->getQueryContext()->getScalar(scalar_query_hash_str); + ProfileEvents::increment(ProfileEvents::ScalarSubqueriesGlobalCacheHit); + hit = true; + } + else + { + /// If we are under a context that uses views that means that the cache might contain values that reference + /// the original table and not the view, so in order to be able to check the global cache we need to first + /// make sure that the query doesn't use the view + /// Note in any case the scalar will end up cached in *data* so this won't be repeated inside this context + interpreter = getQueryInterpreter(subquery, data); + if (!interpreter->usesViewSource()) + { + scalar = data.getContext()->getQueryContext()->getScalar(scalar_query_hash_str); + ProfileEvents::increment(ProfileEvents::ScalarSubqueriesGlobalCacheHit); + hit = true; + } + } } + } - ASTPtr subquery_select = subquery.children.at(0); + if (!hit) + { + if (!interpreter) + interpreter = getQueryInterpreter(subquery, data); - auto options = SelectQueryOptions(QueryProcessingStage::Complete, data.subquery_depth + 1, true); - options.analyze(data.only_analyze); + ProfileEvents::increment(ProfileEvents::ScalarSubqueriesCacheMiss); + is_local = interpreter->usesViewSource(); - auto interpreter = InterpreterSelectWithUnionQuery(subquery_select, subquery_context, options); Block block; if (data.only_analyze) { /// If query is only analyzed, then constants are not correct. - block = interpreter.getSampleBlock(); + block = interpreter->getSampleBlock(); for (auto & column : block) { if (column.column->empty()) @@ -142,14 +180,14 @@ void ExecuteScalarSubqueriesMatcher::visit(const ASTSubquery & subquery, ASTPtr } else { - auto io = interpreter.execute(); + auto io = interpreter->execute(); PullingAsyncPipelineExecutor executor(io.pipeline); while (block.rows() == 0 && executor.pull(block)); if (block.rows() == 0) { - auto types = interpreter.getSampleBlock().getDataTypes(); + auto types = interpreter->getSampleBlock().getDataTypes(); if (types.size() != 1) types = {std::make_shared(types)}; diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 593fc8ccd35..b638658b5d7 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -64,8 +64,9 @@ #include #include -#include #include +#include +#include #include #include @@ -313,7 +314,8 @@ InterpreterSelectQuery::InterpreterSelectQuery( bool got_storage_from_query = false; if (!has_input && !storage) { - std::tie(uses_view_source, storage) = joined_tables.getLeftTableStorage(); + storage = joined_tables.getLeftTableStorage(); + uses_view_source |= storage && storage == context->getViewSource(); got_storage_from_query = true; } @@ -335,6 +337,13 @@ InterpreterSelectQuery::InterpreterSelectQuery( joined_tables.reset(getSelectQuery()); joined_tables.resolveTables(); + if (auto view_source = context->getViewSource()) + { + const auto & storage_values = static_cast(*view_source); + auto tmp_table_id = storage_values.getStorageID(); + for (auto & t : joined_tables.tablesWithColumns()) + uses_view_source |= (t.table.database == tmp_table_id.database_name && t.table.table == tmp_table_id.table_name); + } if (storage && joined_tables.isLeftTableSubquery()) { @@ -350,7 +359,10 @@ InterpreterSelectQuery::InterpreterSelectQuery( { interpreter_subquery = joined_tables.makeLeftTableSubquery(options.subquery()); if (interpreter_subquery) + { source_header = interpreter_subquery->getSampleBlock(); + uses_view_source |= interpreter_subquery->usesViewSource(); + } } joined_tables.rewriteDistributedInAndJoins(query_ptr); @@ -477,6 +489,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( /// If there is an aggregation in the outer query, WITH TOTALS is ignored in the subquery. if (query_analyzer->hasAggregation()) interpreter_subquery->ignoreWithTotals(); + uses_view_source = interpreter_subquery->usesViewSource(); } required_columns = syntax_analyzer_result->requiredSourceColumns(); diff --git a/src/Interpreters/JoinedTables.cpp b/src/Interpreters/JoinedTables.cpp index 743aba91571..3aae3982758 100644 --- a/src/Interpreters/JoinedTables.cpp +++ b/src/Interpreters/JoinedTables.cpp @@ -186,13 +186,13 @@ std::unique_ptr JoinedTables::makeLeftTableSubq return std::make_unique(left_table_expression, context, select_options); } -JoinedTables::storage_is_view_source JoinedTables::getLeftTableStorage() +StoragePtr JoinedTables::getLeftTableStorage() { if (isLeftTableSubquery()) return {}; if (isLeftTableFunction()) - return {false, context->getQueryContext()->executeTableFunction(left_table_expression)}; + return context->getQueryContext()->executeTableFunction(left_table_expression); StorageID table_id = StorageID::createEmpty(); if (left_db_and_table) @@ -204,20 +204,19 @@ JoinedTables::storage_is_view_source JoinedTables::getLeftTableStorage() table_id = StorageID("system", "one"); } - auto view_source = context->getViewSource(); - if (view_source) + if (auto view_source = context->getViewSource()) { const auto & storage_values = static_cast(*view_source); auto tmp_table_id = storage_values.getStorageID(); if (tmp_table_id.database_name == table_id.database_name && tmp_table_id.table_name == table_id.table_name) { /// Read from view source. - return {true, view_source}; + return context->getViewSource(); } } /// Read from table. Even without table expression (implicit SELECT ... FROM system.one). - return {false, DatabaseCatalog::instance().getTable(table_id, context)}; + return DatabaseCatalog::instance().getTable(table_id, context); } bool JoinedTables::resolveTables() diff --git a/src/Interpreters/JoinedTables.h b/src/Interpreters/JoinedTables.h index 0c2119f0016..9d01c081e9f 100644 --- a/src/Interpreters/JoinedTables.h +++ b/src/Interpreters/JoinedTables.h @@ -7,8 +7,6 @@ #include #include -#include - namespace DB { @@ -24,13 +22,11 @@ using StorageMetadataPtr = std::shared_ptr; class JoinedTables { public: - using storage_is_view_source = std::pair; - JoinedTables(ContextPtr context, const ASTSelectQuery & select_query, bool include_all_columns_ = false); void reset(const ASTSelectQuery & select_query); - JoinedTables::storage_is_view_source getLeftTableStorage(); + StoragePtr getLeftTableStorage(); bool resolveTables(); /// Make fake tables_with_columns[0] in case we have predefined input in InterpreterSelectQuery diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 6efb5f3daf9..bcb12cc86b0 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -721,8 +721,7 @@ QueryPipelineBuilderPtr StorageDistributed::distributedWrite(const ASTInsertQuer if (joined_tables.tablesCount() == 1) { - auto [ignore, storage] = joined_tables.getLeftTableStorage(); - storage_src = std::dynamic_pointer_cast(storage); + storage_src = std::dynamic_pointer_cast(joined_tables.getLeftTableStorage()); if (storage_src) { const auto select_with_union_query = std::make_shared(); diff --git a/tests/queries/0_stateless/02177_cte_scalar_cache.sql b/tests/queries/0_stateless/02177_cte_scalar_cache.sql index 39a1e0d965a..4b015cdd007 100644 --- a/tests/queries/0_stateless/02177_cte_scalar_cache.sql +++ b/tests/queries/0_stateless/02177_cte_scalar_cache.sql @@ -4,7 +4,7 @@ WITH ( SELECT sleep(0.0001) FROM system.one ) as a3, ( SELECT sleep(0.0001) FROM system.one ) as a4, ( SELECT sleep(0.0001) FROM system.one ) as a5 -SELECT '02177_CTE_GLOBAL_ON', a5 +SELECT '02177_CTE_GLOBAL_ON', a5 FROM system.numbers LIMIT 100 FORMAT Null SETTINGS enable_global_with_statement = 1; @@ -14,7 +14,7 @@ WITH ( SELECT sleep(0.0001) FROM system.one ) as a3, ( SELECT sleep(0.0001) FROM system.one ) as a4, ( SELECT sleep(0.0001) FROM system.one ) as a5 -SELECT '02177_CTE_GLOBAL_OFF', a5 +SELECT '02177_CTE_GLOBAL_OFF', a5 FROM system.numbers LIMIT 100 FORMAT Null SETTINGS enable_global_with_statement = 0; From 6b088656043c79e059166bad7d9e128594bb5e06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 20 Jan 2022 11:36:32 +0100 Subject: [PATCH 024/215] WIP tests --- .../02177_cte_scalar_cache_mv.reference | 101 ++++++++++++++++++ .../0_stateless/02177_cte_scalar_cache_mv.sql | 50 +++++++++ 2 files changed, 151 insertions(+) create mode 100644 tests/queries/0_stateless/02177_cte_scalar_cache_mv.reference create mode 100644 tests/queries/0_stateless/02177_cte_scalar_cache_mv.sql diff --git a/tests/queries/0_stateless/02177_cte_scalar_cache_mv.reference b/tests/queries/0_stateless/02177_cte_scalar_cache_mv.reference new file mode 100644 index 00000000000..f7a7e91d63a --- /dev/null +++ b/tests/queries/0_stateless/02177_cte_scalar_cache_mv.reference @@ -0,0 +1,101 @@ +0 4 4 1 +1 4 4 1 +2 4 4 1 +3 4 4 1 +4 4 4 1 +5 9 9 1 +6 9 9 1 +7 9 9 1 +8 9 9 1 +9 9 9 1 +10 14 14 1 +11 14 14 1 +12 14 14 1 +13 14 14 1 +14 14 14 1 +15 19 19 1 +16 19 19 1 +17 19 19 1 +18 19 19 1 +19 19 19 1 +20 24 24 1 +21 24 24 1 +22 24 24 1 +23 24 24 1 +24 24 24 1 +25 29 29 1 +26 29 29 1 +27 29 29 1 +28 29 29 1 +29 29 29 1 +30 34 34 1 +31 34 34 1 +32 34 34 1 +33 34 34 1 +34 34 34 1 +35 39 39 1 +36 39 39 1 +37 39 39 1 +38 39 39 1 +39 39 39 1 +40 44 44 1 +41 44 44 1 +42 44 44 1 +43 44 44 1 +44 44 44 1 +45 49 49 1 +46 49 49 1 +47 49 49 1 +48 49 49 1 +49 49 49 1 +50 54 54 1 +51 54 54 1 +52 54 54 1 +53 54 54 1 +54 54 54 1 +55 59 59 1 +56 59 59 1 +57 59 59 1 +58 59 59 1 +59 59 59 1 +60 64 64 1 +61 64 64 1 +62 64 64 1 +63 64 64 1 +64 64 64 1 +65 69 69 1 +66 69 69 1 +67 69 69 1 +68 69 69 1 +69 69 69 1 +70 74 74 1 +71 74 74 1 +72 74 74 1 +73 74 74 1 +74 74 74 1 +75 79 79 1 +76 79 79 1 +77 79 79 1 +78 79 79 1 +79 79 79 1 +80 84 84 1 +81 84 84 1 +82 84 84 1 +83 84 84 1 +84 84 84 1 +85 89 89 1 +86 89 89 1 +87 89 89 1 +88 89 89 1 +89 89 89 1 +90 94 94 1 +91 94 94 1 +92 94 94 1 +93 94 94 1 +94 94 94 1 +95 99 99 1 +96 99 99 1 +97 99 99 1 +98 99 99 1 +99 99 99 1 +02177_MV 3 60 21 diff --git a/tests/queries/0_stateless/02177_cte_scalar_cache_mv.sql b/tests/queries/0_stateless/02177_cte_scalar_cache_mv.sql new file mode 100644 index 00000000000..979b6fbd3f0 --- /dev/null +++ b/tests/queries/0_stateless/02177_cte_scalar_cache_mv.sql @@ -0,0 +1,50 @@ +-- TEST CACHE +CREATE TABLE t1 (i Int64, j Int64) ENGINE = Memory; +INSERT INTO t1 SELECT number, number FROM system.numbers LIMIT 100; +CREATE TABLE t2 (i Int64, j Int64, k Int64, l Int64) ENGINE = Memory; + +CREATE MATERIALIZED VIEW mv1 TO t2 AS + WITH + (SELECT max(i) FROM t1) AS t1 + SELECT + t1 as i, + t1 as j, + t1 as k, + t1 as l + FROM t1 + LIMIT 5; + +INSERT INTO t1 +WITH + (SELECT max(i) FROM t1) AS t1 + SELECT + number as i, + t1 + t1 + t1 AS j -- Using global cache + FROM system.numbers + LIMIT 100 + SETTINGS + min_insert_block_size_rows=5, + max_insert_block_size=5, + min_insert_block_size_rows_for_materialized_views=5, + max_block_size=5; + +-- INSERT INTO t1 SELECT number as i, (SELECT max(i) FROM t1) AS j FROM system.numbers LIMIT 10 OFFSET 100 SETTINGS min_insert_block_size_rows=5, max_insert_block_size=5, max_block_size=5; +-- SELECT max(i) FROM t1; + +SELECT i, j, k, l, count() FROM t2 GROUP BY i, j, k, l ORDER BY i, j, k, l; +SYSTEM FLUSH LOGS; +SELECT + '02177_MV', + ProfileEvents['ScalarSubqueriesGlobalCacheHit'] as scalar_cache_global_hit, + ProfileEvents['ScalarSubqueriesLocalCacheHit'] as scalar_cache_local_hit, + ProfileEvents['ScalarSubqueriesCacheMiss'] as scalar_cache_miss +FROM system.query_log +WHERE + current_database = currentDatabase() + AND type = 'QueryFinish' + AND query LIKE 'INSERT INTO t1\n%' + AND event_date >= yesterday() AND event_time > now() - interval 10 minute; + +-- MV SOURCE SHOULD USE LOCAL CACHE +-- MV SOURCE DEEP IN THE CALL SHOULD USE LOCAL CACHE +-- CHECK PERF TEST (EXISTING FOR SCALAR AND MAYBE ADD ONE WITH MVS) From e77fc9e9be07bc07dc7c164dc04b3194819730c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 20 Jan 2022 13:35:22 +0100 Subject: [PATCH 025/215] Comment --- src/Interpreters/ExecuteScalarSubqueriesVisitor.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/Interpreters/ExecuteScalarSubqueriesVisitor.h b/src/Interpreters/ExecuteScalarSubqueriesVisitor.h index f42d3834c19..d702404dab6 100644 --- a/src/Interpreters/ExecuteScalarSubqueriesVisitor.h +++ b/src/Interpreters/ExecuteScalarSubqueriesVisitor.h @@ -19,11 +19,8 @@ struct ASTTableExpression; * * Features * - * A replacement occurs during query analysis, and not during the main runtime. - * This means that the progress indicator will not work during the execution of these requests, - * and also such queries can not be aborted. - * - * But the query result can be used for the index in the table. + * A replacement occurs during query analysis, and not during the main runtime, so + * the query result can be used for the index in the table. * * Scalar subqueries are executed on the request-initializer server. * The request is sent to remote servers with already substituted constants. From beb99a344f007dbe5f9d796498a79f743387b570 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 24 Jan 2022 13:31:24 +0100 Subject: [PATCH 026/215] Tests --- ...rence => 02174_cte_scalar_cache.reference} | 0 ...r_cache.sql => 02174_cte_scalar_cache.sql} | 0 .../02174_cte_scalar_cache_mv.reference | 21 ++++ .../0_stateless/02174_cte_scalar_cache_mv.sql | 67 ++++++++++++ .../02177_cte_scalar_cache_mv.reference | 101 ------------------ .../0_stateless/02177_cte_scalar_cache_mv.sql | 50 --------- 6 files changed, 88 insertions(+), 151 deletions(-) rename tests/queries/0_stateless/{02177_cte_scalar_cache.reference => 02174_cte_scalar_cache.reference} (100%) rename tests/queries/0_stateless/{02177_cte_scalar_cache.sql => 02174_cte_scalar_cache.sql} (100%) create mode 100644 tests/queries/0_stateless/02174_cte_scalar_cache_mv.reference create mode 100644 tests/queries/0_stateless/02174_cte_scalar_cache_mv.sql delete mode 100644 tests/queries/0_stateless/02177_cte_scalar_cache_mv.reference delete mode 100644 tests/queries/0_stateless/02177_cte_scalar_cache_mv.sql diff --git a/tests/queries/0_stateless/02177_cte_scalar_cache.reference b/tests/queries/0_stateless/02174_cte_scalar_cache.reference similarity index 100% rename from tests/queries/0_stateless/02177_cte_scalar_cache.reference rename to tests/queries/0_stateless/02174_cte_scalar_cache.reference diff --git a/tests/queries/0_stateless/02177_cte_scalar_cache.sql b/tests/queries/0_stateless/02174_cte_scalar_cache.sql similarity index 100% rename from tests/queries/0_stateless/02177_cte_scalar_cache.sql rename to tests/queries/0_stateless/02174_cte_scalar_cache.sql diff --git a/tests/queries/0_stateless/02174_cte_scalar_cache_mv.reference b/tests/queries/0_stateless/02174_cte_scalar_cache_mv.reference new file mode 100644 index 00000000000..8a81b28a0b5 --- /dev/null +++ b/tests/queries/0_stateless/02174_cte_scalar_cache_mv.reference @@ -0,0 +1,21 @@ +4 4 4 4 5 +9 9 9 9 5 +14 14 14 14 5 +19 19 19 19 5 +24 24 24 24 5 +29 29 29 29 5 +34 34 34 34 5 +39 39 39 39 5 +44 44 44 44 5 +49 49 49 49 5 +54 54 54 54 5 +59 59 59 59 5 +64 64 64 64 5 +69 69 69 69 5 +74 74 74 74 5 +79 79 79 79 5 +84 84 84 84 5 +89 89 89 89 5 +94 94 94 94 5 +99 99 99 99 5 +02177_MV 7 80 22 diff --git a/tests/queries/0_stateless/02174_cte_scalar_cache_mv.sql b/tests/queries/0_stateless/02174_cte_scalar_cache_mv.sql new file mode 100644 index 00000000000..1ef1e27eecf --- /dev/null +++ b/tests/queries/0_stateless/02174_cte_scalar_cache_mv.sql @@ -0,0 +1,67 @@ +-- TEST CACHE +CREATE TABLE t1 (i Int64, j Int64) ENGINE = Memory; +INSERT INTO t1 SELECT number, number FROM system.numbers LIMIT 100; +CREATE TABLE t2 (k Int64, l Int64, m Int64, n Int64) ENGINE = Memory; + +CREATE MATERIALIZED VIEW mv1 TO t2 AS + WITH + (SELECT max(i) FROM t1) AS t1 + SELECT + t1 as k, -- Using local cache x 4 + t1 as l, + t1 as m, + t1 as n + FROM t1 + LIMIT 5; + +-- FIRST INSERT +INSERT INTO t1 +WITH + (SELECT max(i) FROM t1) AS t1 +SELECT + number as i, + t1 + t1 + t1 AS j -- Using global cache +FROM system.numbers +LIMIT 100 +SETTINGS + min_insert_block_size_rows=5, + max_insert_block_size=5, + min_insert_block_size_rows_for_materialized_views=5, + max_block_size=5, + max_threads=1; + +SELECT k, l, m, n, count() +FROM t2 +GROUP BY k, l, m, n +ORDER BY k, l, m, n; + +SYSTEM FLUSH LOGS; +-- The main query should have a cache miss and 3 global hits +-- The MV is executed 20 times (100 / 5) and each run does 1 miss and 4 hits to the LOCAL cache +-- In addition to this, to prepare the MV, there is an extra preparation to get the list of columns via +-- InterpreterSelectQuery, which adds 1 miss and 4 global hits (since it uses the global cache) +-- So in total we have: +-- Main query: 1 miss, 3 global +-- Preparation: 1 miss, 4 global +-- Blocks (20): 20 miss, 0 global, 80 local hits + +-- TOTAL: 22 miss, 7 global, 80 local +SELECT + '02177_MV', + ProfileEvents['ScalarSubqueriesGlobalCacheHit'] as scalar_cache_global_hit, + ProfileEvents['ScalarSubqueriesLocalCacheHit'] as scalar_cache_local_hit, + ProfileEvents['ScalarSubqueriesCacheMiss'] as scalar_cache_miss +FROM system.query_log +WHERE + current_database = currentDatabase() + AND type = 'QueryFinish' + AND query LIKE '-- FIRST INSERT\nINSERT INTO t1\n%' + AND event_date >= yesterday() AND event_time > now() - interval 10 minute; + + + + + +-- MV SOURCE SHOULD USE LOCAL CACHE +-- MV SOURCE DEEP IN THE CALL SHOULD USE LOCAL CACHE +-- CHECK PERF TEST (EXISTING FOR SCALAR AND MAYBE ADD ONE WITH MVS) diff --git a/tests/queries/0_stateless/02177_cte_scalar_cache_mv.reference b/tests/queries/0_stateless/02177_cte_scalar_cache_mv.reference deleted file mode 100644 index f7a7e91d63a..00000000000 --- a/tests/queries/0_stateless/02177_cte_scalar_cache_mv.reference +++ /dev/null @@ -1,101 +0,0 @@ -0 4 4 1 -1 4 4 1 -2 4 4 1 -3 4 4 1 -4 4 4 1 -5 9 9 1 -6 9 9 1 -7 9 9 1 -8 9 9 1 -9 9 9 1 -10 14 14 1 -11 14 14 1 -12 14 14 1 -13 14 14 1 -14 14 14 1 -15 19 19 1 -16 19 19 1 -17 19 19 1 -18 19 19 1 -19 19 19 1 -20 24 24 1 -21 24 24 1 -22 24 24 1 -23 24 24 1 -24 24 24 1 -25 29 29 1 -26 29 29 1 -27 29 29 1 -28 29 29 1 -29 29 29 1 -30 34 34 1 -31 34 34 1 -32 34 34 1 -33 34 34 1 -34 34 34 1 -35 39 39 1 -36 39 39 1 -37 39 39 1 -38 39 39 1 -39 39 39 1 -40 44 44 1 -41 44 44 1 -42 44 44 1 -43 44 44 1 -44 44 44 1 -45 49 49 1 -46 49 49 1 -47 49 49 1 -48 49 49 1 -49 49 49 1 -50 54 54 1 -51 54 54 1 -52 54 54 1 -53 54 54 1 -54 54 54 1 -55 59 59 1 -56 59 59 1 -57 59 59 1 -58 59 59 1 -59 59 59 1 -60 64 64 1 -61 64 64 1 -62 64 64 1 -63 64 64 1 -64 64 64 1 -65 69 69 1 -66 69 69 1 -67 69 69 1 -68 69 69 1 -69 69 69 1 -70 74 74 1 -71 74 74 1 -72 74 74 1 -73 74 74 1 -74 74 74 1 -75 79 79 1 -76 79 79 1 -77 79 79 1 -78 79 79 1 -79 79 79 1 -80 84 84 1 -81 84 84 1 -82 84 84 1 -83 84 84 1 -84 84 84 1 -85 89 89 1 -86 89 89 1 -87 89 89 1 -88 89 89 1 -89 89 89 1 -90 94 94 1 -91 94 94 1 -92 94 94 1 -93 94 94 1 -94 94 94 1 -95 99 99 1 -96 99 99 1 -97 99 99 1 -98 99 99 1 -99 99 99 1 -02177_MV 3 60 21 diff --git a/tests/queries/0_stateless/02177_cte_scalar_cache_mv.sql b/tests/queries/0_stateless/02177_cte_scalar_cache_mv.sql deleted file mode 100644 index 979b6fbd3f0..00000000000 --- a/tests/queries/0_stateless/02177_cte_scalar_cache_mv.sql +++ /dev/null @@ -1,50 +0,0 @@ --- TEST CACHE -CREATE TABLE t1 (i Int64, j Int64) ENGINE = Memory; -INSERT INTO t1 SELECT number, number FROM system.numbers LIMIT 100; -CREATE TABLE t2 (i Int64, j Int64, k Int64, l Int64) ENGINE = Memory; - -CREATE MATERIALIZED VIEW mv1 TO t2 AS - WITH - (SELECT max(i) FROM t1) AS t1 - SELECT - t1 as i, - t1 as j, - t1 as k, - t1 as l - FROM t1 - LIMIT 5; - -INSERT INTO t1 -WITH - (SELECT max(i) FROM t1) AS t1 - SELECT - number as i, - t1 + t1 + t1 AS j -- Using global cache - FROM system.numbers - LIMIT 100 - SETTINGS - min_insert_block_size_rows=5, - max_insert_block_size=5, - min_insert_block_size_rows_for_materialized_views=5, - max_block_size=5; - --- INSERT INTO t1 SELECT number as i, (SELECT max(i) FROM t1) AS j FROM system.numbers LIMIT 10 OFFSET 100 SETTINGS min_insert_block_size_rows=5, max_insert_block_size=5, max_block_size=5; --- SELECT max(i) FROM t1; - -SELECT i, j, k, l, count() FROM t2 GROUP BY i, j, k, l ORDER BY i, j, k, l; -SYSTEM FLUSH LOGS; -SELECT - '02177_MV', - ProfileEvents['ScalarSubqueriesGlobalCacheHit'] as scalar_cache_global_hit, - ProfileEvents['ScalarSubqueriesLocalCacheHit'] as scalar_cache_local_hit, - ProfileEvents['ScalarSubqueriesCacheMiss'] as scalar_cache_miss -FROM system.query_log -WHERE - current_database = currentDatabase() - AND type = 'QueryFinish' - AND query LIKE 'INSERT INTO t1\n%' - AND event_date >= yesterday() AND event_time > now() - interval 10 minute; - --- MV SOURCE SHOULD USE LOCAL CACHE --- MV SOURCE DEEP IN THE CALL SHOULD USE LOCAL CACHE --- CHECK PERF TEST (EXISTING FOR SCALAR AND MAYBE ADD ONE WITH MVS) From 580daa38c529c97c72b22eca7eedad8aac775765 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 24 Jan 2022 17:28:32 +0100 Subject: [PATCH 027/215] Verify that independent subqueries in MVs are cached and reused --- .../02174_cte_scalar_cache_mv.reference | 42 +++++++++++ .../0_stateless/02174_cte_scalar_cache_mv.sql | 72 ++++++++++++++++++- 2 files changed, 111 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/02174_cte_scalar_cache_mv.reference b/tests/queries/0_stateless/02174_cte_scalar_cache_mv.reference index 8a81b28a0b5..246706164df 100644 --- a/tests/queries/0_stateless/02174_cte_scalar_cache_mv.reference +++ b/tests/queries/0_stateless/02174_cte_scalar_cache_mv.reference @@ -19,3 +19,45 @@ 94 94 94 94 5 99 99 99 99 5 02177_MV 7 80 22 +10 +40 +70 +100 +130 +160 +190 +220 +250 +280 +310 +340 +370 +400 +430 +460 +490 +520 +550 +580 +02177_MV_2 0 0 21 +8 +18 +28 +38 +48 +58 +68 +78 +88 +98 +108 +118 +128 +138 +148 +158 +168 +178 +188 +198 +02177_MV_3 19 0 2 diff --git a/tests/queries/0_stateless/02174_cte_scalar_cache_mv.sql b/tests/queries/0_stateless/02174_cte_scalar_cache_mv.sql index 1ef1e27eecf..4d4447c7f31 100644 --- a/tests/queries/0_stateless/02174_cte_scalar_cache_mv.sql +++ b/tests/queries/0_stateless/02174_cte_scalar_cache_mv.sql @@ -58,10 +58,76 @@ WHERE AND query LIKE '-- FIRST INSERT\nINSERT INTO t1\n%' AND event_date >= yesterday() AND event_time > now() - interval 10 minute; +DROP TABLE mv1; + +CREATE TABLE t3 (z Int64) ENGINE = Memory; +CREATE MATERIALIZED VIEW mv2 TO t3 AS +SELECT + -- This includes an unnecessarily complex query to verify that the local cache is used (since it uses t1) + sum(i) + sum(j) + (SELECT * FROM (SELECT min(i) + min(j) FROM (SELECT * FROM system.one _a, t1 _b))) AS z +FROM t1; + +-- SECOND INSERT +INSERT INTO t1 +SELECT 0 as i, number as j from numbers(100) +SETTINGS + min_insert_block_size_rows=5, + max_insert_block_size=5, + min_insert_block_size_rows_for_materialized_views=5, + max_block_size=5, + max_threads=1; + +SELECT * FROM t3 ORDER BY z ASC; +SYSTEM FLUSH LOGS; +SELECT + '02177_MV_2', + ProfileEvents['ScalarSubqueriesGlobalCacheHit'] as scalar_cache_global_hit, + ProfileEvents['ScalarSubqueriesLocalCacheHit'] as scalar_cache_local_hit, + ProfileEvents['ScalarSubqueriesCacheMiss'] as scalar_cache_miss +FROM system.query_log +WHERE + current_database = currentDatabase() + AND type = 'QueryFinish' + AND query LIKE '-- SECOND INSERT\nINSERT INTO t1%' + AND event_date >= yesterday() AND event_time > now() - interval 10 minute; + +DROP TABLE mv2; +CREATE TABLE t4 (z Int64) ENGINE = Memory; +CREATE MATERIALIZED VIEW mv3 TO t4 AS +SELECT + -- This includes an unnecessarily complex query but now it uses t2 so it can be cached + min(i) + min(j) + (SELECT * FROM (SELECT min(k) + min(l) FROM (SELECT * FROM system.one _a, t2 _b))) AS z +FROM t1; +-- THIRD INSERT +INSERT INTO t1 +SELECT number as i, number as j from numbers(100) + SETTINGS + min_insert_block_size_rows=5, + max_insert_block_size=5, + min_insert_block_size_rows_for_materialized_views=5, + max_block_size=5, + max_threads=1; +SYSTEM FLUSH LOGS; --- MV SOURCE SHOULD USE LOCAL CACHE --- MV SOURCE DEEP IN THE CALL SHOULD USE LOCAL CACHE --- CHECK PERF TEST (EXISTING FOR SCALAR AND MAYBE ADD ONE WITH MVS) +SELECT * FROM t4 ORDER BY z ASC; + +SELECT + '02177_MV_3', + ProfileEvents['ScalarSubqueriesGlobalCacheHit'] as scalar_cache_global_hit, + ProfileEvents['ScalarSubqueriesLocalCacheHit'] as scalar_cache_local_hit, + ProfileEvents['ScalarSubqueriesCacheMiss'] as scalar_cache_miss +FROM system.query_log +WHERE + current_database = currentDatabase() + AND type = 'QueryFinish' + AND query LIKE '-- THIRD INSERT\nINSERT INTO t1%' + AND event_date >= yesterday() AND event_time > now() - interval 10 minute; + +DROP TABLE mv3; +DROP TABLE t1; +DROP TABLE t2; +DROP TABLE t3; +DROP TABLE t4; From 78d2caee7e2aa62345424ba54e185acf0b9f8976 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 24 Jan 2022 18:51:11 +0100 Subject: [PATCH 028/215] Can only pre-save scalars if they are cacheable --- src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp b/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp index b7de041af9b..a9341807356 100644 --- a/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp +++ b/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp @@ -86,7 +86,7 @@ static auto getQueryInterpreter(const ASTSubquery & subquery, ExecuteScalarSubqu subquery_settings.max_result_rows = 1; subquery_settings.extremes = false; subquery_context->setSettings(subquery_settings); - if (subquery_context->hasQueryContext()) + if (!data.only_analyze && subquery_context->hasQueryContext()) { /// Save current cached scalars in the context before analyzing the query /// This is specially helpful when analyzing CTE scalars From 045c92e2b91c6160e0c061ef16b5779ac9b8bb8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 24 Jan 2022 20:21:42 +0100 Subject: [PATCH 029/215] Fix readability issues --- src/Interpreters/InterpreterSelectQuery.cpp | 2 +- src/Parsers/ASTSelectQuery.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index b638658b5d7..644e31b1701 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -341,7 +341,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( { const auto & storage_values = static_cast(*view_source); auto tmp_table_id = storage_values.getStorageID(); - for (auto & t : joined_tables.tablesWithColumns()) + for (const auto & t : joined_tables.tablesWithColumns()) uses_view_source |= (t.table.database == tmp_table_id.database_name && t.table.table == tmp_table_id.table_name); } diff --git a/src/Parsers/ASTSelectQuery.cpp b/src/Parsers/ASTSelectQuery.cpp index 7b473812915..3f40167b1d1 100644 --- a/src/Parsers/ASTSelectQuery.cpp +++ b/src/Parsers/ASTSelectQuery.cpp @@ -34,7 +34,7 @@ ASTPtr ASTSelectQuery::clone() const * Since the positions map uses we can copy it as is and ensure the new children array is created / pushed * in the same order as the existing one */ res->children.clear(); - for (auto & child : children) + for (const auto & child : children) res->children.push_back(child->clone()); return res; From eca0453564f479b4530552ca16521215fcc77bc2 Mon Sep 17 00:00:00 2001 From: liyang830 Date: Thu, 27 Jan 2022 16:33:40 +0800 Subject: [PATCH 030/215] fix local metadata differ zk metadata --- src/Common/IFactoryWithAliases.h | 8 +-- src/Storages/KeyDescription.cpp | 16 +++++ src/Storages/KeyDescription.h | 3 + .../ReplicatedMergeTreeTableMetadata.cpp | 61 ++++++++++++------- .../ReplicatedMergeTreeTableMetadata.h | 4 +- src/Storages/StorageReplicatedMergeTree.cpp | 2 +- src/Storages/TTLDescription.cpp | 16 +++++ src/Storages/TTLDescription.h | 3 + 8 files changed, 82 insertions(+), 31 deletions(-) diff --git a/src/Common/IFactoryWithAliases.h b/src/Common/IFactoryWithAliases.h index 7f5b53a80fa..f7da302a942 100644 --- a/src/Common/IFactoryWithAliases.h +++ b/src/Common/IFactoryWithAliases.h @@ -120,12 +120,8 @@ public: const String & getCanonicalNameIfAny(const String & name) const { auto it = case_insensitive_name_mapping.find(Poco::toLower(name)); - if (it != case_insensitive_name_mapping.end()) { - if (it->first != name) - { - return it->second; - } - } + if (it != case_insensitive_name_mapping.end()) + return it->second; return name; } diff --git a/src/Storages/KeyDescription.cpp b/src/Storages/KeyDescription.cpp index 6a2f4bbb055..9db730ba578 100644 --- a/src/Storages/KeyDescription.cpp +++ b/src/Storages/KeyDescription.cpp @@ -8,6 +8,9 @@ #include #include #include +#include +#include +#include namespace DB @@ -161,4 +164,17 @@ KeyDescription KeyDescription::buildEmptyKey() return result; } +KeyDescription KeyDescription::parse(const String & str, const ColumnsDescription & columns, ContextPtr context) +{ + KeyDescription result; + if (str.empty()) + return result; + + ParserExpressionElement parser; + ASTPtr ast = parseQuery(parser, "(" + str + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + FunctionNameNormalizer().visit(ast.get()); + + return getKeyFromAST(ast, columns, context); +} + } diff --git a/src/Storages/KeyDescription.h b/src/Storages/KeyDescription.h index 81803c3e44b..527a36124aa 100644 --- a/src/Storages/KeyDescription.h +++ b/src/Storages/KeyDescription.h @@ -76,6 +76,9 @@ struct KeyDescription /// Substitute modulo with moduloLegacy. Used in KeyCondition to allow proper comparison with keys. static bool moduloToModuloLegacyRecursive(ASTPtr node_expr); + + /// Parse description from string + static KeyDescription parse(const String & str, const ColumnsDescription & columns, ContextPtr context); }; } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp index 0637a6bb027..7dee7b8d0f8 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp @@ -168,7 +168,7 @@ ReplicatedMergeTreeTableMetadata ReplicatedMergeTreeTableMetadata::parse(const S } -void ReplicatedMergeTreeTableMetadata::checkImmutableFieldsEquals(const ReplicatedMergeTreeTableMetadata & from_zk) const +void ReplicatedMergeTreeTableMetadata::checkImmutableFieldsEquals(const ReplicatedMergeTreeTableMetadata & from_zk, const ColumnsDescription & columns, ContextPtr context) const { if (data_format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) { @@ -203,9 +203,12 @@ void ReplicatedMergeTreeTableMetadata::checkImmutableFieldsEquals(const Replicat /// NOTE: You can make a less strict check of match expressions so that tables do not break from small changes /// in formatAST code. - if (primary_key != from_zk.primary_key) + String parsed_zk_primary_key = formattedAST(KeyDescription::parse(from_zk.primary_key, columns, context).expression_list_ast); + if (primary_key != parsed_zk_primary_key) throw Exception("Existing table metadata in ZooKeeper differs in primary key." - " Stored in ZooKeeper: " + from_zk.primary_key + ", local: " + primary_key, + " Stored in ZooKeeper: " + from_zk.primary_key + + ", parsed from ZooKeeper: " + parsed_zk_primary_key + + ", local: " + primary_key, ErrorCodes::METADATA_MISMATCH); if (data_format_version != from_zk.data_format_version) @@ -214,39 +217,53 @@ void ReplicatedMergeTreeTableMetadata::checkImmutableFieldsEquals(const Replicat ", local: " + DB::toString(data_format_version.toUnderType()), ErrorCodes::METADATA_MISMATCH); - if (partition_key != from_zk.partition_key) + String parsed_zk_partition_key = formattedAST(KeyDescription::parse(from_zk.partition_key, columns, context).expression_list_ast); + if (partition_key != parsed_zk_partition_key) throw Exception( "Existing table metadata in ZooKeeper differs in partition key expression." - " Stored in ZooKeeper: " + from_zk.partition_key + ", local: " + partition_key, + " Stored in ZooKeeper: " + from_zk.partition_key + + ", parsed from ZooKeeper: " + parsed_zk_partition_key + + ", local: " + partition_key, ErrorCodes::METADATA_MISMATCH); - } void ReplicatedMergeTreeTableMetadata::checkEquals(const ReplicatedMergeTreeTableMetadata & from_zk, const ColumnsDescription & columns, ContextPtr context) const { - checkImmutableFieldsEquals(from_zk); + checkImmutableFieldsEquals(from_zk, columns, context); - if (sampling_expression != from_zk.sampling_expression) - throw Exception("Existing table metadata in ZooKeeper differs in sample expression." - " Stored in ZooKeeper: " + from_zk.sampling_expression + ", local: " + sampling_expression, - ErrorCodes::METADATA_MISMATCH); - - if (sorting_key != from_zk.sorting_key) + String parsed_zk_sampling_expression = formattedAST(KeyDescription::parse(from_zk.sampling_expression, columns, context).definition_ast); + if (sampling_expression != parsed_zk_sampling_expression) { throw Exception( - "Existing table metadata in ZooKeeper differs in sorting key expression." - " Stored in ZooKeeper: " + from_zk.sorting_key + ", local: " + sorting_key, + "Existing table metadata in ZooKeeper differs in sample expression." + " Stored in ZooKeeper: " + from_zk.sampling_expression + + ", parsed from ZooKeeper: " + parsed_zk_sampling_expression + + ", local: " + sampling_expression, ErrorCodes::METADATA_MISMATCH); } - if (ttl_table != from_zk.ttl_table) + String parsed_zk_sorting_key = formattedAST(extractKeyExpressionList(KeyDescription::parse(from_zk.sorting_key, columns, context).definition_ast)); + if (sorting_key != parsed_zk_sorting_key) { throw Exception( - "Existing table metadata in ZooKeeper differs in TTL." - " Stored in ZooKeeper: " + from_zk.ttl_table + - ", local: " + ttl_table, - ErrorCodes::METADATA_MISMATCH); + "Existing table metadata in ZooKeeper differs in sorting key expression." + " Stored in ZooKeeper: " + from_zk.sorting_key + + ", parsed from ZooKeeper: " + parsed_zk_sorting_key + + ", local: " + sorting_key, + ErrorCodes::METADATA_MISMATCH); + } + + auto parsed_primary_key = KeyDescription::parse(primary_key, columns, context); + String parsed_zk_ttl_table = formattedAST(TTLTableDescription::parse(from_zk.ttl_table, columns, context, parsed_primary_key).definition_ast); + if (ttl_table != parsed_zk_ttl_table) + { + throw Exception( + "Existing table metadata in ZooKeeper differs in TTL." + " Stored in ZooKeeper: " + from_zk.ttl_table + + ", parsed from ZooKeeper: " + parsed_zk_ttl_table + + ", local: " + ttl_table, + ErrorCodes::METADATA_MISMATCH); } String parsed_zk_skip_indices = IndicesDescription::parse(from_zk.skip_indices, columns, context).toString(); @@ -290,10 +307,10 @@ void ReplicatedMergeTreeTableMetadata::checkEquals(const ReplicatedMergeTreeTabl } ReplicatedMergeTreeTableMetadata::Diff -ReplicatedMergeTreeTableMetadata::checkAndFindDiff(const ReplicatedMergeTreeTableMetadata & from_zk) const +ReplicatedMergeTreeTableMetadata::checkAndFindDiff(const ReplicatedMergeTreeTableMetadata & from_zk, const ColumnsDescription & columns, ContextPtr context) const { - checkImmutableFieldsEquals(from_zk); + checkImmutableFieldsEquals(from_zk, columns, context); Diff diff; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h index 2f9a9d58834..6d510d20304 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h @@ -70,11 +70,11 @@ struct ReplicatedMergeTreeTableMetadata void checkEquals(const ReplicatedMergeTreeTableMetadata & from_zk, const ColumnsDescription & columns, ContextPtr context) const; - Diff checkAndFindDiff(const ReplicatedMergeTreeTableMetadata & from_zk) const; + Diff checkAndFindDiff(const ReplicatedMergeTreeTableMetadata & from_zk, const ColumnsDescription & columns, ContextPtr context) const; private: - void checkImmutableFieldsEquals(const ReplicatedMergeTreeTableMetadata & from_zk) const; + void checkImmutableFieldsEquals(const ReplicatedMergeTreeTableMetadata & from_zk, const ColumnsDescription & columns, ContextPtr context) const; bool index_granularity_bytes_found_in_zk = false; }; diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 7743736724f..ce4af6b7af1 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -4478,7 +4478,7 @@ bool StorageReplicatedMergeTree::executeMetadataAlter(const StorageReplicatedMer auto alter_lock_holder = lockForAlter(getSettings()->lock_acquire_timeout_for_background_operations); LOG_INFO(log, "Metadata changed in ZooKeeper. Applying changes locally."); - auto metadata_diff = ReplicatedMergeTreeTableMetadata(*this, getInMemoryMetadataPtr()).checkAndFindDiff(metadata_from_entry); + auto metadata_diff = ReplicatedMergeTreeTableMetadata(*this, getInMemoryMetadataPtr()).checkAndFindDiff(metadata_from_entry, getInMemoryMetadataPtr()->getColumns(), getContext()); setTableStructure(std::move(columns_from_entry), metadata_diff); metadata_version = entry.alter_version; diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp index bd5cc9e2f9d..96048d9cd99 100644 --- a/src/Storages/TTLDescription.cpp +++ b/src/Storages/TTLDescription.cpp @@ -15,6 +15,9 @@ #include #include +#include +#include +#include namespace DB @@ -370,4 +373,17 @@ TTLTableDescription TTLTableDescription::getTTLForTableFromAST( return result; } +TTLTableDescription TTLTableDescription::parse(const String & str, const ColumnsDescription & columns, ContextPtr context, const KeyDescription & primary_key) +{ + TTLTableDescription result; + if (str.empty()) + return result; + + ParserTTLElement parser; + ASTPtr ast = parseQuery(parser, str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + FunctionNameNormalizer().visit(ast.get()); + + return getTTLForTableFromAST(ast, columns, context, primary_key); +} + } diff --git a/src/Storages/TTLDescription.h b/src/Storages/TTLDescription.h index 6288098b3c5..17020392013 100644 --- a/src/Storages/TTLDescription.h +++ b/src/Storages/TTLDescription.h @@ -118,6 +118,9 @@ struct TTLTableDescription static TTLTableDescription getTTLForTableFromAST( const ASTPtr & definition_ast, const ColumnsDescription & columns, ContextPtr context, const KeyDescription & primary_key); + + /// Parse description from string + static TTLTableDescription parse(const String & str, const ColumnsDescription & columns, ContextPtr context, const KeyDescription & primary_key); }; } From cb1524d4d9c772c11cfa3987972daeb4d0dee07e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 27 Jan 2022 12:47:52 +0100 Subject: [PATCH 031/215] Less flaky test_inconsistent_parts_if_drop_while_replica_not_active --- .../test.py | 23 +++++++++++-------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/tests/integration/test_consistent_parts_after_clone_replica/test.py b/tests/integration/test_consistent_parts_after_clone_replica/test.py index 35a42b6fb12..b0b69da0902 100644 --- a/tests/integration/test_consistent_parts_after_clone_replica/test.py +++ b/tests/integration/test_consistent_parts_after_clone_replica/test.py @@ -3,8 +3,6 @@ import pytest from helpers.cluster import ClickHouseCluster from helpers.network import PartitionManager from helpers.test_tools import assert_eq_with_retry -import time - def fill_nodes(nodes, shard): for node in nodes: @@ -59,20 +57,25 @@ def test_inconsistent_parts_if_drop_while_replica_not_active(start_cluster): # DROP_RANGE will be removed from the replication log and the first replica will be lost for i in range(20): node2.query("INSERT INTO test_table VALUES ('2019-08-16', {})".format(20 + i)) - + assert_eq_with_retry(node2, "SELECT value FROM system.zookeeper WHERE path='/clickhouse/tables/test1/replicated/replicas/node1' AND name='is_lost'", "1") - for i in range(30): - if node2.contains_in_log("Will mark replica node1 as lost"): - break - time.sleep(0.5) + node2.wait_for_log_line("Will mark replica node1 as lost") # the first replica will be cloned from the second pm.heal_all() + node2.wait_for_log_line("Sending part") assert_eq_with_retry(node1, "SELECT count(*) FROM test_table", node2.query("SELECT count(*) FROM test_table")) # ensure replica was cloned assert node1.contains_in_log("Will mimic node2") - # queue must be empty (except some merges that are possibly executing right now) - assert node1.query("SELECT count() FROM system.replication_queue WHERE type != 'MERGE_PARTS'") == "0\n" - assert node2.query("SELECT count() FROM system.replication_queue WHERE type != 'MERGE_PARTS'") == "0\n" + + # 2 options: + # - There wasn't a merge in node2. Then node1 should have cloned the 2 parts + # - There was a merge in progress. node1 might have cloned the new part but still has the original 2 parts + # in the replication queue until they are finally discarded with a message like: + # `Skipping action for part 201908_40_40_0 because part 201908_21_40_4 already exists.` + # + # In any case after a short while the replication queue should be empty + assert_eq_with_retry(node1, "SELECT count() FROM system.replication_queue WHERE type != 'MERGE_PARTS'", "0") + assert_eq_with_retry(node2, "SELECT count() FROM system.replication_queue WHERE type != 'MERGE_PARTS'", "0") From a69711ccff2a7dab022dd3c1d18a6a314469b6ec Mon Sep 17 00:00:00 2001 From: liyang830 Date: Fri, 28 Jan 2022 19:07:59 +0800 Subject: [PATCH 032/215] ExpressionList parse tuple and ttl --- src/Storages/KeyDescription.cpp | 6 +++--- src/Storages/TTLDescription.cpp | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Storages/KeyDescription.cpp b/src/Storages/KeyDescription.cpp index 9db730ba578..24b4b13bc21 100644 --- a/src/Storages/KeyDescription.cpp +++ b/src/Storages/KeyDescription.cpp @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include @@ -170,8 +170,8 @@ KeyDescription KeyDescription::parse(const String & str, const ColumnsDescriptio if (str.empty()) return result; - ParserExpressionElement parser; - ASTPtr ast = parseQuery(parser, "(" + str + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + ParserExpression parser; + ASTPtr ast = parseQuery(parser, "(" + str + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); FunctionNameNormalizer().visit(ast.get()); return getKeyFromAST(ast, columns, context); diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp index 96048d9cd99..69303264482 100644 --- a/src/Storages/TTLDescription.cpp +++ b/src/Storages/TTLDescription.cpp @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include @@ -379,7 +379,7 @@ TTLTableDescription TTLTableDescription::parse(const String & str, const Columns if (str.empty()) return result; - ParserTTLElement parser; + ParserTTLExpressionList parser; ASTPtr ast = parseQuery(parser, str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); FunctionNameNormalizer().visit(ast.get()); From 64799867a10e77a08ac4285562163e420b9ff7fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Fri, 28 Jan 2022 12:39:56 +0100 Subject: [PATCH 033/215] Add a comment about IInterpreterUnionOrSelectQuery::usesViewSource() --- src/Interpreters/IInterpreterUnionOrSelectQuery.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/Interpreters/IInterpreterUnionOrSelectQuery.h b/src/Interpreters/IInterpreterUnionOrSelectQuery.h index 1265a52d370..1f59dd36354 100644 --- a/src/Interpreters/IInterpreterUnionOrSelectQuery.h +++ b/src/Interpreters/IInterpreterUnionOrSelectQuery.h @@ -40,6 +40,13 @@ public: void extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr &, ContextPtr) const override; + /// Returns whether the query uses the view source from the Context + /// The view source is a virtual storage that currently only materialized views use to replace the source table + /// with the incoming block only + /// This flag is useful to know for how long we can cache scalars generated by this query: If it doesn't use the virtual storage + /// then we can cache the scalars forever (for any query that doesn't use the virtual storage either), but if it does use the virtual + /// storage then we can only keep the scalar result around while we are working with that source block + /// You can find more details about this under ExecuteScalarSubqueriesMatcher::visit bool usesViewSource() { return uses_view_source; } protected: From 6c79e147d02112d3002572e6be320baa1279a666 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Fri, 28 Jan 2022 14:47:49 +0100 Subject: [PATCH 034/215] Add a few extra comments around uses_view_source and why it's propagated --- src/Interpreters/InterpreterSelectQuery.cpp | 5 ++++- src/Interpreters/InterpreterSelectWithUnionQuery.cpp | 2 ++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 644e31b1701..934ea8b08a9 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -315,6 +315,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( if (!has_input && !storage) { storage = joined_tables.getLeftTableStorage(); + // Mark uses_view_source if the returned storage is the same as the one saved in viewSource uses_view_source |= storage && storage == context->getViewSource(); got_storage_from_query = true; } @@ -339,6 +340,8 @@ InterpreterSelectQuery::InterpreterSelectQuery( joined_tables.resolveTables(); if (auto view_source = context->getViewSource()) { + // If we are using a virtual block view to replace a table and that table is used + // inside the JOIN then we need to update uses_view_source accordingly so we avoid propagating scalars that we can't cache const auto & storage_values = static_cast(*view_source); auto tmp_table_id = storage_values.getStorageID(); for (const auto & t : joined_tables.tablesWithColumns()) @@ -489,7 +492,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( /// If there is an aggregation in the outer query, WITH TOTALS is ignored in the subquery. if (query_analyzer->hasAggregation()) interpreter_subquery->ignoreWithTotals(); - uses_view_source = interpreter_subquery->usesViewSource(); + uses_view_source |= interpreter_subquery->usesViewSource(); } required_columns = syntax_analyzer_result->requiredSourceColumns(); diff --git a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp index e0fc15771f9..723db59f04b 100644 --- a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp +++ b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp @@ -138,6 +138,8 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery( nested_interpreters.emplace_back( buildCurrentChildInterpreter(ast->list_of_selects->children.at(query_num), require_full_header ? Names() : current_required_result_column_names)); + // We need to propagate the uses_view_source flag from children to the (self) parent since, if one of the children uses + // a view source that means that the parent uses it too and can be cached globally uses_view_source |= nested_interpreters.back()->usesViewSource(); } From 8858b6bf86bf9f133cb245ddf3288a5d84041073 Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 28 Jan 2022 19:17:35 +0300 Subject: [PATCH 035/215] Small improvement in schema inference from stdin in local --- programs/local/LocalServer.cpp | 2 +- .../0_stateless/02182_format_and_schema_from_stdin.reference | 1 + tests/queries/0_stateless/02182_format_and_schema_from_stdin.sh | 2 ++ 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index a6c9a6a4524..70363c62cac 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -315,7 +315,7 @@ void LocalServer::cleanup() std::string LocalServer::getInitialCreateTableQuery() { - if (!config().has("table-structure") && !config().has("table-file")) + if (!config().has("table-structure") && !config().has("table-file") && !config().has("table-data-format")) return {}; auto table_name = backQuoteIfNeed(config().getString("table-name", "table")); diff --git a/tests/queries/0_stateless/02182_format_and_schema_from_stdin.reference b/tests/queries/0_stateless/02182_format_and_schema_from_stdin.reference index 8b1acc12b63..145e66f828c 100644 --- a/tests/queries/0_stateless/02182_format_and_schema_from_stdin.reference +++ b/tests/queries/0_stateless/02182_format_and_schema_from_stdin.reference @@ -8,3 +8,4 @@ 7 8 9 +1 String [1,2,3] diff --git a/tests/queries/0_stateless/02182_format_and_schema_from_stdin.sh b/tests/queries/0_stateless/02182_format_and_schema_from_stdin.sh index 555649718f8..6d0de1f1cf8 100755 --- a/tests/queries/0_stateless/02182_format_and_schema_from_stdin.sh +++ b/tests/queries/0_stateless/02182_format_and_schema_from_stdin.sh @@ -9,3 +9,5 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT -q "select * from numbers(10) format Parquet" > $CLICKHOUSE_TMP/data.parquet $CLICKHOUSE_LOCAL -q "select * from table" --file="-" < $CLICKHOUSE_TMP/data.parquet +echo "1,\"String\", \"[1, 2, 3]\"" | $CLICKHOUSE_LOCAL -q "select * from table" --input-format=CSV + From 3f5365f1a98ea5532f7f77300ebaa6c007352648 Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 28 Jan 2022 20:07:13 +0300 Subject: [PATCH 036/215] Fix test --- .../0_stateless/01375_output_format_tsv_csv_with_names.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/queries/0_stateless/01375_output_format_tsv_csv_with_names.sh b/tests/queries/0_stateless/01375_output_format_tsv_csv_with_names.sh index 69f3ab1c9a8..462b9078abc 100755 --- a/tests/queries/0_stateless/01375_output_format_tsv_csv_with_names.sh +++ b/tests/queries/0_stateless/01375_output_format_tsv_csv_with_names.sh @@ -5,7 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh opts=( - --input-format CSV -q 'SELECT number FROM numbers(2)' ) From 96a506c6fa201e42f73048d46f6e06971c3fb5f9 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Sat, 29 Jan 2022 03:23:25 +0300 Subject: [PATCH 037/215] fix inserts to distributed tables in case of change of native protocol --- src/QueryPipeline/RemoteInserter.cpp | 2 + src/QueryPipeline/RemoteInserter.h | 2 + src/Storages/Distributed/DirectoryMonitor.cpp | 32 ++++++++------- .../__init__.py | 1 + .../configs/remote_servers.xml | 12 ++++++ .../test.py | 39 +++++++++++++++++++ 6 files changed, 75 insertions(+), 13 deletions(-) create mode 100644 tests/integration/test_distributed_insert_backward_compatibility/__init__.py create mode 100644 tests/integration/test_distributed_insert_backward_compatibility/configs/remote_servers.xml create mode 100644 tests/integration/test_distributed_insert_backward_compatibility/test.py diff --git a/src/QueryPipeline/RemoteInserter.cpp b/src/QueryPipeline/RemoteInserter.cpp index c34c625dc6d..2275f8eba4d 100644 --- a/src/QueryPipeline/RemoteInserter.cpp +++ b/src/QueryPipeline/RemoteInserter.cpp @@ -34,6 +34,8 @@ RemoteInserter::RemoteInserter( = CurrentThread::get().thread_trace_context; } + server_revision = connection.getServerRevision(timeouts); + /** Send query and receive "header", that describes table structure. * Header is needed to know, what structure is required for blocks to be passed to 'write' method. */ diff --git a/src/QueryPipeline/RemoteInserter.h b/src/QueryPipeline/RemoteInserter.h index 0688b555825..5b5de962cc6 100644 --- a/src/QueryPipeline/RemoteInserter.h +++ b/src/QueryPipeline/RemoteInserter.h @@ -35,12 +35,14 @@ public: ~RemoteInserter(); const Block & getHeader() const { return header; } + UInt64 getServerRevision() const { return server_revision; } private: Connection & connection; String query; Block header; bool finished = false; + UInt64 server_revision; }; } diff --git a/src/Storages/Distributed/DirectoryMonitor.cpp b/src/Storages/Distributed/DirectoryMonitor.cpp index 0c41cf71386..461ec48f910 100644 --- a/src/Storages/Distributed/DirectoryMonitor.cpp +++ b/src/Storages/Distributed/DirectoryMonitor.cpp @@ -132,6 +132,7 @@ namespace struct DistributedHeader { + UInt64 revision = 0; Settings insert_settings; std::string insert_query; ClientInfo client_info; @@ -166,9 +167,8 @@ namespace /// Read the parts of the header. ReadBufferFromString header_buf(header_data); - UInt64 initiator_revision; - readVarUInt(initiator_revision, header_buf); - if (DBMS_TCP_PROTOCOL_VERSION < initiator_revision) + readVarUInt(distributed_header.revision, header_buf); + if (DBMS_TCP_PROTOCOL_VERSION < distributed_header.revision) { LOG_WARNING(log, "ClickHouse shard version is older than ClickHouse initiator version. It may lack support for new features."); } @@ -177,7 +177,7 @@ namespace distributed_header.insert_settings.read(header_buf); if (header_buf.hasPendingData()) - distributed_header.client_info.read(header_buf, initiator_revision); + distributed_header.client_info.read(header_buf, distributed_header.revision); if (header_buf.hasPendingData()) { @@ -188,7 +188,7 @@ namespace if (header_buf.hasPendingData()) { - NativeReader header_block_in(header_buf, DBMS_TCP_PROTOCOL_VERSION); + NativeReader header_block_in(header_buf, distributed_header.revision); distributed_header.block_header = header_block_in.read(); if (!distributed_header.block_header) throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Cannot read header from the {} batch", in.getFileName()); @@ -264,10 +264,10 @@ namespace return nullptr; } - void writeAndConvert(RemoteInserter & remote, ReadBufferFromFile & in) + void writeAndConvert(RemoteInserter & remote, const DistributedHeader & distributed_header, ReadBufferFromFile & in) { CompressedReadBuffer decompressing_in(in); - NativeReader block_in(decompressing_in, DBMS_TCP_PROTOCOL_VERSION); + NativeReader block_in(decompressing_in, distributed_header.revision); while (Block block = block_in.read()) { @@ -304,7 +304,7 @@ namespace { LOG_TRACE(log, "Processing batch {} with old format (no header)", in.getFileName()); - writeAndConvert(remote, in); + writeAndConvert(remote, distributed_header, in); return; } @@ -314,14 +314,20 @@ namespace "Structure does not match (remote: {}, local: {}), implicit conversion will be done", remote.getHeader().dumpStructure(), distributed_header.block_header.dumpStructure()); - writeAndConvert(remote, in); + writeAndConvert(remote, distributed_header, in); return; } /// If connection does not use compression, we have to uncompress the data. if (!compression_expected) { - writeAndConvert(remote, in); + writeAndConvert(remote, distributed_header, in); + return; + } + + if (distributed_header.revision != remote.getServerRevision()) + { + writeAndConvert(remote, distributed_header, in); return; } @@ -915,10 +921,10 @@ public: { in = std::make_unique(file_name); decompressing_in = std::make_unique(*in); - block_in = std::make_unique(*decompressing_in, DBMS_TCP_PROTOCOL_VERSION); log = &Poco::Logger::get("DirectoryMonitorSource"); - readDistributedHeader(*in, log); + auto distributed_header = readDistributedHeader(*in, log); + block_in = std::make_unique(*decompressing_in, distributed_header.revision); first_block = block_in->read(); } @@ -1040,7 +1046,7 @@ void StorageDistributedDirectoryMonitor::processFilesWithBatching(const std::map LOG_DEBUG(log, "Processing batch {} with old format (no header/rows)", in.getFileName()); CompressedReadBuffer decompressing_in(in); - NativeReader block_in(decompressing_in, DBMS_TCP_PROTOCOL_VERSION); + NativeReader block_in(decompressing_in, distributed_header.revision); while (Block block = block_in.read()) { diff --git a/tests/integration/test_distributed_insert_backward_compatibility/__init__.py b/tests/integration/test_distributed_insert_backward_compatibility/__init__.py new file mode 100644 index 00000000000..8b137891791 --- /dev/null +++ b/tests/integration/test_distributed_insert_backward_compatibility/__init__.py @@ -0,0 +1 @@ + diff --git a/tests/integration/test_distributed_insert_backward_compatibility/configs/remote_servers.xml b/tests/integration/test_distributed_insert_backward_compatibility/configs/remote_servers.xml new file mode 100644 index 00000000000..9c7f02c190f --- /dev/null +++ b/tests/integration/test_distributed_insert_backward_compatibility/configs/remote_servers.xml @@ -0,0 +1,12 @@ + + + + + + node1 + 9000 + + + + + diff --git a/tests/integration/test_distributed_insert_backward_compatibility/test.py b/tests/integration/test_distributed_insert_backward_compatibility/test.py new file mode 100644 index 00000000000..ba7d8e0a25d --- /dev/null +++ b/tests/integration/test_distributed_insert_backward_compatibility/test.py @@ -0,0 +1,39 @@ +import pytest + +from helpers.cluster import ClickHouseCluster +from helpers.client import QueryRuntimeException + +cluster = ClickHouseCluster(__file__) + +node_shard = cluster.add_instance('node1', main_configs=['configs/remote_servers.xml']) + +node_dist = cluster.add_instance('node2', main_configs=['configs/remote_servers.xml'], image='yandex/clickhouse-server', + tag='21.11.9.1', stay_alive=True, with_installed_binary=True) + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + node_shard.query("CREATE TABLE local_table(id UInt32, val String) ENGINE = MergeTree ORDER BY id") + node_dist.query("CREATE TABLE local_table(id UInt32, val String) ENGINE = MergeTree ORDER BY id") + node_dist.query("CREATE TABLE dist_table(id UInt32, val String) ENGINE = Distributed(test_cluster, default, local_table, rand())") + + yield cluster + + finally: + cluster.shutdown() + + +def test_distributed_in_tuple(started_cluster): + node_dist.query("SYSTEM STOP DISTRIBUTED SENDS dist_table") + + node_dist.query("INSERT INTO dist_table VALUES (1, 'foo')") + assert node_dist.query("SELECT count() FROM dist_table") == "0\n" + assert node_shard.query("SELECT count() FROM local_table") == "0\n" + + node_dist.restart_with_latest_version(signal=9) + node_dist.query("SYSTEM FLUSH DISTRIBUTED dist_table") + + assert node_dist.query("SELECT count() FROM dist_table") == "1\n" + assert node_shard.query("SELECT count() FROM local_table") == "1\n" From 5228a3e421d3cad661031f09684362405d09a964 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Sat, 29 Jan 2022 23:42:04 +0800 Subject: [PATCH 038/215] commit again --- src/Functions/FunctionsStringArray.h | 52 ++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/src/Functions/FunctionsStringArray.h b/src/Functions/FunctionsStringArray.h index a6e705bb1af..d6f61650b3e 100644 --- a/src/Functions/FunctionsStringArray.h +++ b/src/Functions/FunctionsStringArray.h @@ -239,6 +239,8 @@ private: Pos end; char sep; + Int64 max_split = -1; + Int64 curr_split = 0; public: static constexpr auto name = "splitByChar"; @@ -254,6 +256,15 @@ public: if (!isString(arguments[1])) throw Exception("Illegal type " + arguments[1]->getName() + " of second argument of function " + getName() + ". Must be String.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + if (arguments.size() > 2 && !isNativeInteger(arguments[2])) + { + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Third argument for function '{}' must be integer, got '{}' instead", + getName(), + arguments[2]->getName()); + } } void init(const ColumnsWithTypeAndName & arguments) @@ -271,6 +282,40 @@ public: throw Exception("Illegal separator for function " + getName() + ". Must be exactly one byte.", ErrorCodes::BAD_ARGUMENTS); sep = sep_str[0]; + + if (arguments.size() > 2) + { + std::optional max_split_opt = std::nullopt; + if (!((max_split_opt = getMaxSplit(arguments[2])) + || (max_split_opt = getMaxSplit(arguments[2])) + || (max_split_opt = getMaxSplit(arguments[2])) + || (max_split_opt = getMaxSplit(arguments[2])) + || (max_split_opt = getMaxSplit(arguments[2])) + || (max_split_opt = getMaxSplit(arguments[2])) + || (max_split_opt = getMaxSplit(arguments[2])) + || (max_split_opt = getMaxSplit(arguments[2])))) + { + throw Exception{ + "Illegal column " + arguments[2].column->getName() + " of third argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN}; + } + max_split = *max_split_opt; + } + } + + template + std::optional getMaxSplit(const ColumnWithTypeAndName & argument) + { + const auto * col = checkAndGetColumnConst>(argument.column.get()); + if (!col) + return std::nullopt; + + Int64 result= static_cast(col->template getValue()); + if (result < 0 && result != -1) + throw Exception("Illegal column " + argument.column->getName() + + " of third argument of function " + getName() + ". Must be non-negative number or -1", + ErrorCodes::ILLEGAL_COLUMN); + return result; } /// Returns the position of the argument, that is the column of strings @@ -291,12 +336,19 @@ public: return false; token_begin = pos; + if (unlikely(max_split >= 0 && curr_split >= max_split)) + { + token_end = end; + pos = nullptr; + return true; + } pos = reinterpret_cast(memchr(pos, sep, end - pos)); if (pos) { token_end = pos; ++pos; + ++curr_split; } else token_end = end; From c9d5251e120e3ca175c3160fded8213b1553f563 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Sun, 30 Jan 2022 09:10:27 +0800 Subject: [PATCH 039/215] finish dev --- src/Functions/FunctionsStringArray.h | 24 +++++++++++++++---- src/Functions/URL/URLHierarchy.cpp | 1 + src/Functions/URL/URLPathHierarchy.cpp | 1 + .../URL/extractURLParameterNames.cpp | 1 + src/Functions/URL/extractURLParameters.cpp | 1 + .../0_stateless/02185_split_by_char.reference | 6 +++++ .../0_stateless/02185_split_by_char.sql | 9 +++++++ 7 files changed, 39 insertions(+), 4 deletions(-) create mode 100644 tests/queries/0_stateless/02185_split_by_char.reference create mode 100644 tests/queries/0_stateless/02185_split_by_char.sql diff --git a/src/Functions/FunctionsStringArray.h b/src/Functions/FunctionsStringArray.h index d6f61650b3e..6aaa4036b6b 100644 --- a/src/Functions/FunctionsStringArray.h +++ b/src/Functions/FunctionsStringArray.h @@ -26,6 +26,7 @@ namespace ErrorCodes extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int BAD_ARGUMENTS; extern const int ILLEGAL_COLUMN; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } @@ -69,6 +70,8 @@ public: static constexpr auto name = "alphaTokens"; static String getName() { return name; } + static bool isVariadic() { return false; } + static size_t getNumberOfArguments() { return 1; } /// Check the type of the function's arguments. @@ -127,6 +130,7 @@ public: static constexpr auto name = "splitByNonAlpha"; static String getName() { return name; } + static bool isVariadic() { return false; } static size_t getNumberOfArguments() { return 1; } /// Check the type of the function's arguments. @@ -185,6 +189,7 @@ public: static constexpr auto name = "splitByWhitespace"; static String getName() { return name; } + static bool isVariadic() { return false; } static size_t getNumberOfArguments() { return 1; } /// Check the type of the function's arguments. @@ -245,10 +250,17 @@ private: public: static constexpr auto name = "splitByChar"; static String getName() { return name; } - static size_t getNumberOfArguments() { return 2; } + static bool isVariadic() { return true; } + static size_t getNumberOfArguments() { return 0; } static void checkArguments(const DataTypes & arguments) { + if (arguments.size() < 2 || arguments.size() > 3) + throw Exception( + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Function '{}' needs at least 2 arguments, at most 3 arguments; passed {}.", + arguments.size()); + if (!isString(arguments[0])) throw Exception("Illegal type " + arguments[0]->getName() + " of first argument of function " + getName() + ". Must be String.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); @@ -257,14 +269,12 @@ public: throw Exception("Illegal type " + arguments[1]->getName() + " of second argument of function " + getName() + ". Must be String.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - if (arguments.size() > 2 && !isNativeInteger(arguments[2])) - { + if (arguments.size() == 3 && !isNativeInteger(arguments[2])) throw Exception( ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Third argument for function '{}' must be integer, got '{}' instead", getName(), arguments[2]->getName()); - } } void init(const ColumnsWithTypeAndName & arguments) @@ -369,6 +379,7 @@ private: public: static constexpr auto name = "splitByString"; static String getName() { return name; } + static bool isVariadic() { return false; } static size_t getNumberOfArguments() { return 2; } static void checkArguments(const DataTypes & arguments) @@ -446,6 +457,8 @@ private: public: static constexpr auto name = "splitByRegexp"; static String getName() { return name; } + + static bool isVariadic() { return false; } static size_t getNumberOfArguments() { return 2; } /// Check the type of function arguments. @@ -529,6 +542,7 @@ private: public: static constexpr auto name = "extractAll"; static String getName() { return name; } + static bool isVariadic() { return false; } static size_t getNumberOfArguments() { return 2; } /// Check the type of function arguments. @@ -608,6 +622,8 @@ public: bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + bool isVariadic() const override { return Generator::isVariadic(); } + size_t getNumberOfArguments() const override { return Generator::getNumberOfArguments(); } DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override diff --git a/src/Functions/URL/URLHierarchy.cpp b/src/Functions/URL/URLHierarchy.cpp index 58d66d8a49d..e21450847b7 100644 --- a/src/Functions/URL/URLHierarchy.cpp +++ b/src/Functions/URL/URLHierarchy.cpp @@ -20,6 +20,7 @@ public: static constexpr auto name = "URLPathHierarchy"; static String getName() { return name; } + static bool isVariadic() { return false; } static size_t getNumberOfArguments() { return 1; } static void checkArguments(const DataTypes & arguments) diff --git a/src/Functions/URL/URLPathHierarchy.cpp b/src/Functions/URL/URLPathHierarchy.cpp index e801f920881..6f8832ddf65 100644 --- a/src/Functions/URL/URLPathHierarchy.cpp +++ b/src/Functions/URL/URLPathHierarchy.cpp @@ -19,6 +19,7 @@ public: static constexpr auto name = "URLHierarchy"; static String getName() { return name; } + static bool isVariadic() { return false; } static size_t getNumberOfArguments() { return 1; } static void checkArguments(const DataTypes & arguments) diff --git a/src/Functions/URL/extractURLParameterNames.cpp b/src/Functions/URL/extractURLParameterNames.cpp index ff0b8c2a035..377e969a6b7 100644 --- a/src/Functions/URL/extractURLParameterNames.cpp +++ b/src/Functions/URL/extractURLParameterNames.cpp @@ -19,6 +19,7 @@ public: static constexpr auto name = "extractURLParameterNames"; static String getName() { return name; } + static bool isVariadic() { return false; } static size_t getNumberOfArguments() { return 1; } static void checkArguments(const DataTypes & arguments) diff --git a/src/Functions/URL/extractURLParameters.cpp b/src/Functions/URL/extractURLParameters.cpp index 1eb0ae7f735..fb595c23170 100644 --- a/src/Functions/URL/extractURLParameters.cpp +++ b/src/Functions/URL/extractURLParameters.cpp @@ -19,6 +19,7 @@ public: static constexpr auto name = "extractURLParameters"; static String getName() { return name; } + static bool isVariadic() { return false; } static size_t getNumberOfArguments() { return 1; } static void checkArguments(const DataTypes & arguments) diff --git a/tests/queries/0_stateless/02185_split_by_char.reference b/tests/queries/0_stateless/02185_split_by_char.reference new file mode 100644 index 00000000000..6647c97960e --- /dev/null +++ b/tests/queries/0_stateless/02185_split_by_char.reference @@ -0,0 +1,6 @@ +['1','2','3'] +['1','2','3'] +['1,2,3'] +['1','2,3'] +['1','2','3'] +['1','2','3'] diff --git a/tests/queries/0_stateless/02185_split_by_char.sql b/tests/queries/0_stateless/02185_split_by_char.sql new file mode 100644 index 00000000000..6b843c05144 --- /dev/null +++ b/tests/queries/0_stateless/02185_split_by_char.sql @@ -0,0 +1,9 @@ +select splitByChar(',', '1,2,3'); +select splitByChar(',', '1,2,3', -1); +select splitByChar(',', '1,2,3', 0); +select splitByChar(',', '1,2,3', 1); +select splitByChar(',', '1,2,3', 2); +select splitByChar(',', '1,2,3', 3); + +select splitByChar(',', '1,2,3', -2); -- { serverError 44 } +select splitByChar(',', '1,2,3', ''); -- { serverError 43 } \ No newline at end of file From e9c435a23fedaab91b5ed78daec9d818e85117bc Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Sun, 30 Jan 2022 13:23:11 +0800 Subject: [PATCH 040/215] fix style --- src/Functions/FunctionsStringArray.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/Functions/FunctionsStringArray.h b/src/Functions/FunctionsStringArray.h index 6aaa4036b6b..dcd9198ef33 100644 --- a/src/Functions/FunctionsStringArray.h +++ b/src/Functions/FunctionsStringArray.h @@ -305,9 +305,11 @@ public: || (max_split_opt = getMaxSplit(arguments[2])) || (max_split_opt = getMaxSplit(arguments[2])))) { - throw Exception{ - "Illegal column " + arguments[2].column->getName() + " of third argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN}; + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal column {} of third argument of function {}", + arguments[2].column->getName(), + getName()); } max_split = *max_split_opt; } From 058ef356ac3632e8d007995ba4621c40c77528a0 Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 31 Jan 2022 19:39:20 +0300 Subject: [PATCH 041/215] Fix schema inference for table runction s3 --- src/TableFunctions/TableFunctionS3.cpp | 4 +++- tests/integration/test_storage_s3/test.py | 10 ++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/src/TableFunctions/TableFunctionS3.cpp b/src/TableFunctions/TableFunctionS3.cpp index be217868c15..4490c122f99 100644 --- a/src/TableFunctions/TableFunctionS3.cpp +++ b/src/TableFunctions/TableFunctionS3.cpp @@ -29,9 +29,11 @@ void TableFunctionS3::parseArguments(const ASTPtr & ast_function, ContextPtr con const auto message = fmt::format( "The signature of table function {} could be the following:\n" \ + " - url\n" " - url, format\n" \ " - url, format, structure\n" \ " - url, format, structure, compression_method\n" \ + " - url, access_key_id, secret_access_key, format\n" " - url, access_key_id, secret_access_key, format, structure\n" \ " - url, access_key_id, secret_access_key, format, structure, compression_method", getName()); @@ -62,7 +64,7 @@ void TableFunctionS3::parseArguments(const ASTPtr & ast_function, ContextPtr con } else { - if (args.size() < 3 || args.size() > 6) + if (args.size() < 1 || args.size() > 6) throw Exception(message, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); for (auto & arg : args) diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index a804053d4fd..2ed5ca51054 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -901,6 +901,13 @@ def test_s3_schema_inference(started_cluster): result = instance.query(f"select count(*) from schema_inference_2") assert(int(result) == 5000000) + table_function = f"s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_native', 'Native')" + result = instance.query(f"desc {table_function}") + assert result == "a\tInt32\t\t\t\t\t\nb\tString\t\t\t\t\t\n" + + result = instance.query(f"select count(*) from {table_function}") + assert(int(result) == 5000000) + def test_empty_file(started_cluster): bucket = started_cluster.minio_bucket @@ -971,3 +978,6 @@ def test_format_detection(started_cluster): result = instance.query(f"select * from url('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test.arrow')") assert(int(result) == 1) + result = instance.query(f"select * from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test.arrow')") + assert(int(result) == 1) + From 0c3d2b048dc39de13069ab8922dee5892fee54ee Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 1 Feb 2022 17:25:54 +0300 Subject: [PATCH 042/215] fix reading of subcolumns with dots in their names --- .../Serializations/ISerialization.cpp | 6 ++-- .../02191_nested_with_dots.reference | 7 ++++ .../0_stateless/02191_nested_with_dots.sql | 33 +++++++++++++++++++ 3 files changed, 44 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/02191_nested_with_dots.reference create mode 100644 tests/queries/0_stateless/02191_nested_with_dots.sql diff --git a/src/DataTypes/Serializations/ISerialization.cpp b/src/DataTypes/Serializations/ISerialization.cpp index 5cdc037d5cb..7df4a956c1a 100644 --- a/src/DataTypes/Serializations/ISerialization.cpp +++ b/src/DataTypes/Serializations/ISerialization.cpp @@ -167,8 +167,10 @@ String getNameForSubstreamPath( /// Because nested data may be represented not by Array of Tuple, /// but by separate Array columns with names in a form of a.b, /// and name is encoded as a whole. - stream_name += (escape_tuple_delimiter && it->escape_tuple_delimiter ? - escapeForFileName(".") : ".") + escapeForFileName(it->tuple_element_name); + if (escape_tuple_delimiter && it->escape_tuple_delimiter) + stream_name += escapeForFileName("." + it->tuple_element_name); + else + stream_name += "." + it->tuple_element_name; } } diff --git a/tests/queries/0_stateless/02191_nested_with_dots.reference b/tests/queries/0_stateless/02191_nested_with_dots.reference new file mode 100644 index 00000000000..86cee7a8110 --- /dev/null +++ b/tests/queries/0_stateless/02191_nested_with_dots.reference @@ -0,0 +1,7 @@ +[1] [[1]] +[[1]] +[(1,[1])] +[[1]] +(('a',1),'b') +a 1 +a b diff --git a/tests/queries/0_stateless/02191_nested_with_dots.sql b/tests/queries/0_stateless/02191_nested_with_dots.sql new file mode 100644 index 00000000000..cf649ca3013 --- /dev/null +++ b/tests/queries/0_stateless/02191_nested_with_dots.sql @@ -0,0 +1,33 @@ +DROP TABLE IF EXISTS t_nested_with_dots; + +CREATE TABLE t_nested_with_dots (n Nested(id UInt64, `values.id` Array(UInt64))) +ENGINE = MergeTree ORDER BY tuple(); + +INSERT INTO t_nested_with_dots VALUES ([1], [[1]]); + +SELECT * FROM t_nested_with_dots; +SELECT n.values.id FROM t_nested_with_dots; + +DROP TABLE IF EXISTS t_nested_with_dots; +SET flatten_nested = 0; + +CREATE TABLE t_nested_with_dots (n Nested(id UInt64, `values.id` Array(UInt64))) +ENGINE = MergeTree ORDER BY tuple(); + +INSERT INTO t_nested_with_dots VALUES ([(1, [1])]); + +SELECT * FROM t_nested_with_dots; +SELECT n.values.id FROM t_nested_with_dots; + +DROP TABLE IF EXISTS t_nested_with_dots; + +CREATE TABLE t_nested_with_dots (`t.t2` Tuple(`t3.t4.t5` Tuple(`s1.s2` String, `u1.u2` UInt64), `s3.s4.s5` String)) +ENGINE = MergeTree ORDER BY tuple(); + +INSERT INTO t_nested_with_dots VALUES ((('a', 1), 'b')); + +SELECT * FROM t_nested_with_dots; +SELECT t.t2.t3.t4.t5.s1.s2, t.t2.t3.t4.t5.u1.u2 FROM t_nested_with_dots; +SELECT t.t2.t3.t4.t5.s1.s2, t.t2.s3.s4.s5 FROM t_nested_with_dots; + +DROP TABLE IF EXISTS t_nested_with_dots; From 56aa60ab88f9af814b20769511ead8bfc9680826 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Tue, 1 Feb 2022 15:56:03 +0000 Subject: [PATCH 043/215] Add composability to casting and index operators --- src/Parsers/ExpressionListParsers.cpp | 6 +++--- src/Parsers/ExpressionListParsers.h | 11 ++++++++++- .../0_stateless/01852_cast_operator_4.reference | 13 +++++++++++++ .../queries/0_stateless/01852_cast_operator_4.sql | 14 ++++++++++++++ 4 files changed, 40 insertions(+), 4 deletions(-) create mode 100644 tests/queries/0_stateless/01852_cast_operator_4.reference create mode 100644 tests/queries/0_stateless/01852_cast_operator_4.sql diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 96c1bad75c2..13af308736b 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -689,7 +689,7 @@ bool ParserUnaryExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expec bool ParserCastExpression::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ASTPtr expr_ast; - if (!ParserExpressionElement().parse(pos, expr_ast, expected)) + if (!elem_parser->parse(pos, expr_ast, expected)) return false; ASTPtr type_ast; @@ -711,7 +711,7 @@ bool ParserArrayElementExpression::parseImpl(Pos & pos, ASTPtr & node, Expected { return ParserLeftAssociativeBinaryOperatorList{ operators, - std::make_unique(), + std::make_unique(std::make_unique()), std::make_unique(false) }.parse(pos, node, expected); } @@ -721,7 +721,7 @@ bool ParserTupleElementExpression::parseImpl(Pos & pos, ASTPtr & node, Expected { return ParserLeftAssociativeBinaryOperatorList{ operators, - std::make_unique(), + std::make_unique(std::make_unique()), std::make_unique() }.parse(pos, node, expected); } diff --git a/src/Parsers/ExpressionListParsers.h b/src/Parsers/ExpressionListParsers.h index a035d4a2ef0..358fe778f91 100644 --- a/src/Parsers/ExpressionListParsers.h +++ b/src/Parsers/ExpressionListParsers.h @@ -203,6 +203,15 @@ protected: /// Example: "[1, 1 + 1, 1 + 2]::Array(UInt8)" class ParserCastExpression : public IParserBase { +private: + ParserPtr elem_parser; + +public: + ParserCastExpression(ParserPtr && elem_parser_) + : elem_parser(std::move(elem_parser_)) + { + } + protected: const char * getName() const override { return "CAST expression"; } @@ -238,7 +247,7 @@ class ParserUnaryExpression : public IParserBase { private: static const char * operators[]; - ParserPrefixUnaryOperatorExpression operator_parser {operators, std::make_unique()}; + ParserPrefixUnaryOperatorExpression operator_parser {operators, std::make_unique(std::make_unique())}; protected: const char * getName() const override { return "unary expression"; } diff --git a/tests/queries/0_stateless/01852_cast_operator_4.reference b/tests/queries/0_stateless/01852_cast_operator_4.reference new file mode 100644 index 00000000000..beadc0cd15a --- /dev/null +++ b/tests/queries/0_stateless/01852_cast_operator_4.reference @@ -0,0 +1,13 @@ +3 +SELECT CAST([3, 4, 5][1], \'Int32\') +4 +SELECT CAST(CAST(\'[3,4,5]\', \'Array(Int64)\')[2], \'Int8\') +0 +1 +2 +SELECT CAST(CAST(\'[1,2,3]\', \'Array(UInt64)\')[CAST(CAST([number, number], \'Array(UInt8)\')[number], \'UInt64\')], \'UInt8\') +FROM numbers(3) +3 +SELECT CAST((3, 4, 5).1, \'Int32\') +4 +SELECT CAST(CAST((3, 4, 5), \'Tuple(UInt64, UInt64, UInt64)\').1, \'Int32\') diff --git a/tests/queries/0_stateless/01852_cast_operator_4.sql b/tests/queries/0_stateless/01852_cast_operator_4.sql new file mode 100644 index 00000000000..9b75bf84c35 --- /dev/null +++ b/tests/queries/0_stateless/01852_cast_operator_4.sql @@ -0,0 +1,14 @@ +SELECT [3,4,5][1]::Int32; +EXPLAIN SYNTAX SELECT [3,4,5][1]::Int32; + +SELECT [3,4,5]::Array(Int64)[2]::Int8; +EXPLAIN SYNTAX SELECT [3,4,5]::Array(Int64)[2]::Int8; + +SELECT [1,2,3]::Array(UInt64)[[number, number]::Array(UInt8)[number]::UInt64]::UInt8 from numbers(3); +EXPLAIN SYNTAX SELECT [1,2,3]::Array(UInt64)[[number, number]::Array(UInt8)[number]::UInt64]::UInt8 from numbers(3); + +SELECT tuple(3,4,5).1::Int32; +EXPLAIN SYNTAX SELECT tuple(3,4,5).1::Int32; + +SELECT tuple(3,4,5)::Tuple(UInt64, UInt64, UInt64).2::Int32; +EXPLAIN SYNTAX SELECT tuple(3,4,5)::Tuple(UInt64, UInt64, UInt64).1::Int32; From 93f9a9e37a46eee573746d456e1bb4a5b3b50940 Mon Sep 17 00:00:00 2001 From: Andrew Date: Wed, 2 Feb 2022 14:32:29 +0000 Subject: [PATCH 044/215] Update clickhouse-keeper.md fix the run command and add example --- docs/en/operations/clickhouse-keeper.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/en/operations/clickhouse-keeper.md b/docs/en/operations/clickhouse-keeper.md index fcfc675f9d7..48eb590aca2 100644 --- a/docs/en/operations/clickhouse-keeper.md +++ b/docs/en/operations/clickhouse-keeper.md @@ -108,7 +108,8 @@ Examples of configuration for quorum with three nodes can be found in [integrati ClickHouse Keeper is bundled into the ClickHouse server package, just add configuration of `` and start ClickHouse server as always. If you want to run standalone ClickHouse Keeper you can start it in a similar way with: ```bash -clickhouse-keeper --config /etc/your_path_to_config/config.xml --daemon +clickhouse keeper --config /etc/your_path_to_config/config.xml --daemon +example: clickhouse keeper --config /etc/clickhouse-server/config.d/keeper_config.xml ``` ## Four Letter Word Commands {#four-letter-word-commands} From eae0bc7c04aeb4c54ef39305253f1178e9e67c75 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 2 Feb 2022 18:19:33 +0300 Subject: [PATCH 045/215] add query id for delayed interactive --- src/Client/ClientBase.cpp | 36 ++++++++++++++++++++++++------------ src/Client/ClientBase.h | 2 ++ 2 files changed, 26 insertions(+), 12 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 27deace416d..1958b1d81ce 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -1485,6 +1485,25 @@ String ClientBase::prompt() const } +void ClientBase::initQueryIdFormats() +{ + if (!query_id_formats.empty()) + return; + + /// Initialize query_id_formats if any + if (config().has("query_id_formats")) + { + Poco::Util::AbstractConfiguration::Keys keys; + config().keys("query_id_formats", keys); + for (const auto & name : keys) + query_id_formats.emplace_back(name + ":", config().getString("query_id_formats." + name)); + } + + if (query_id_formats.empty()) + query_id_formats.emplace_back("Query id:", " {query_id}\n"); +} + + void ClientBase::runInteractive() { if (config().has("query_id")) @@ -1492,6 +1511,8 @@ void ClientBase::runInteractive() if (print_time_to_stderr) throw Exception("time option could be specified only in non-interactive mode", ErrorCodes::BAD_ARGUMENTS); + initQueryIdFormats(); + /// Initialize DateLUT here to avoid counting time spent here as query execution time. const auto local_tz = DateLUT::instance().getTimeZone(); @@ -1512,18 +1533,6 @@ void ClientBase::runInteractive() home_path = home_path_cstr; } - /// Initialize query_id_formats if any - if (config().has("query_id_formats")) - { - Poco::Util::AbstractConfiguration::Keys keys; - config().keys("query_id_formats", keys); - for (const auto & name : keys) - query_id_formats.emplace_back(name + ":", config().getString("query_id_formats." + name)); - } - - if (query_id_formats.empty()) - query_id_formats.emplace_back("Query id:", " {query_id}\n"); - /// Load command history if present. if (config().has("history_file")) history_file = config().getString("history_file"); @@ -1632,6 +1641,9 @@ void ClientBase::runInteractive() void ClientBase::runNonInteractive() { + if (delayed_interactive) + initQueryIdFormats(); + if (!queries_files.empty()) { auto process_multi_query_from_file = [&](const String & file) diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index 89e0770182b..e2cd91d1e5f 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -138,6 +138,8 @@ private: void updateSuggest(const ASTCreateQuery & ast_create); + void initQueryIdFormats(); + protected: bool is_interactive = false; /// Use either interactive line editing interface or batch mode. bool is_multiquery = false; From 8f1030378a21cfa78f99b59396d046be61d8951e Mon Sep 17 00:00:00 2001 From: Mohamad Fadhil Date: Fri, 4 Feb 2022 10:02:42 +0800 Subject: [PATCH 046/215] Add INTERVAL STEP example in SELECT .. ORDER BY WITH FILL documentation This corresponds to the merged PR https://github.com/ClickHouse/ClickHouse/pull/30927 --- .../statements/select/order-by.md | 83 ++++++++++++++++++- 1 file changed, 82 insertions(+), 1 deletion(-) diff --git a/docs/en/sql-reference/statements/select/order-by.md b/docs/en/sql-reference/statements/select/order-by.md index ee6893812cc..823594c8ec4 100644 --- a/docs/en/sql-reference/statements/select/order-by.md +++ b/docs/en/sql-reference/statements/select/order-by.md @@ -285,7 +285,7 @@ ORDER BY expr [WITH FILL] [FROM const_expr] [TO const_expr] [STEP const_numeric_ `WITH FILL` can be applied for fields with Numeric (all kinds of float, decimal, int) or Date/DateTime types. When applied for `String` fields, missed values are filled with empty strings. When `FROM const_expr` not defined sequence of filling use minimal `expr` field value from `ORDER BY`. When `TO const_expr` not defined sequence of filling use maximum `expr` field value from `ORDER BY`. -When `STEP const_numeric_expr` defined then `const_numeric_expr` interprets `as is` for numeric types as `days` for Date type and as `seconds` for DateTime type. +When `STEP const_numeric_expr` defined then `const_numeric_expr` interprets `as is` for numeric types as `days` for Date type, as `seconds` for DateTime type. It also supports [INTERVAL](https://clickhouse.com/docs/en/sql-reference/data-types/special-data-types/interval/) data type representing time and date intervals. When `STEP const_numeric_expr` omitted then sequence of filling use `1.0` for numeric type, `1 day` for Date type and `1 second` for DateTime type. Example of a query without `WITH FILL`: @@ -402,4 +402,85 @@ Result: └────────────┴────────────┴──────────┘ ``` +The following query uses the `INTERVAL` data type of 1 day for each data filled on column `d1`: + +``` sql +SELECT + toDate((number * 10) * 86400) AS d1, + toDate(number * 86400) AS d2, + 'original' AS source +FROM numbers(10) +WHERE (number % 3) = 1 +ORDER BY + d1 WITH FILL STEP INTERVAL 1 DAY, + d2 WITH FILL; +``` + +Result: +``` +┌─────────d1─┬─────────d2─┬─source───┐ +│ 1970-01-11 │ 1970-01-02 │ original │ +│ 1970-01-12 │ 1970-01-01 │ │ +│ 1970-01-13 │ 1970-01-01 │ │ +│ 1970-01-14 │ 1970-01-01 │ │ +│ 1970-01-15 │ 1970-01-01 │ │ +│ 1970-01-16 │ 1970-01-01 │ │ +│ 1970-01-17 │ 1970-01-01 │ │ +│ 1970-01-18 │ 1970-01-01 │ │ +│ 1970-01-19 │ 1970-01-01 │ │ +│ 1970-01-20 │ 1970-01-01 │ │ +│ 1970-01-21 │ 1970-01-01 │ │ +│ 1970-01-22 │ 1970-01-01 │ │ +│ 1970-01-23 │ 1970-01-01 │ │ +│ 1970-01-24 │ 1970-01-01 │ │ +│ 1970-01-25 │ 1970-01-01 │ │ +│ 1970-01-26 │ 1970-01-01 │ │ +│ 1970-01-27 │ 1970-01-01 │ │ +│ 1970-01-28 │ 1970-01-01 │ │ +│ 1970-01-29 │ 1970-01-01 │ │ +│ 1970-01-30 │ 1970-01-01 │ │ +│ 1970-01-31 │ 1970-01-01 │ │ +│ 1970-02-01 │ 1970-01-01 │ │ +│ 1970-02-02 │ 1970-01-01 │ │ +│ 1970-02-03 │ 1970-01-01 │ │ +│ 1970-02-04 │ 1970-01-01 │ │ +│ 1970-02-05 │ 1970-01-01 │ │ +│ 1970-02-06 │ 1970-01-01 │ │ +│ 1970-02-07 │ 1970-01-01 │ │ +│ 1970-02-08 │ 1970-01-01 │ │ +│ 1970-02-09 │ 1970-01-01 │ │ +│ 1970-02-10 │ 1970-01-05 │ original │ +│ 1970-02-11 │ 1970-01-01 │ │ +│ 1970-02-12 │ 1970-01-01 │ │ +│ 1970-02-13 │ 1970-01-01 │ │ +│ 1970-02-14 │ 1970-01-01 │ │ +│ 1970-02-15 │ 1970-01-01 │ │ +│ 1970-02-16 │ 1970-01-01 │ │ +│ 1970-02-17 │ 1970-01-01 │ │ +│ 1970-02-18 │ 1970-01-01 │ │ +│ 1970-02-19 │ 1970-01-01 │ │ +│ 1970-02-20 │ 1970-01-01 │ │ +│ 1970-02-21 │ 1970-01-01 │ │ +│ 1970-02-22 │ 1970-01-01 │ │ +│ 1970-02-23 │ 1970-01-01 │ │ +│ 1970-02-24 │ 1970-01-01 │ │ +│ 1970-02-25 │ 1970-01-01 │ │ +│ 1970-02-26 │ 1970-01-01 │ │ +│ 1970-02-27 │ 1970-01-01 │ │ +│ 1970-02-28 │ 1970-01-01 │ │ +│ 1970-03-01 │ 1970-01-01 │ │ +│ 1970-03-02 │ 1970-01-01 │ │ +│ 1970-03-03 │ 1970-01-01 │ │ +│ 1970-03-04 │ 1970-01-01 │ │ +│ 1970-03-05 │ 1970-01-01 │ │ +│ 1970-03-06 │ 1970-01-01 │ │ +│ 1970-03-07 │ 1970-01-01 │ │ +│ 1970-03-08 │ 1970-01-01 │ │ +│ 1970-03-09 │ 1970-01-01 │ │ +│ 1970-03-10 │ 1970-01-01 │ │ +│ 1970-03-11 │ 1970-01-01 │ │ +│ 1970-03-12 │ 1970-01-08 │ original │ +└────────────┴────────────┴──────────┘ +``` + [Original article](https://clickhouse.com/docs/en/sql-reference/statements/select/order-by/) From 7bf224343168c4c48882f210f32de56db528033c Mon Sep 17 00:00:00 2001 From: feng lv Date: Fri, 4 Feb 2022 14:13:06 +0000 Subject: [PATCH 047/215] use LowCardinality for _file and _path virtual columns in StorageFile fix --- CMakeLists.txt | 2 +- src/Storages/StorageFile.cpp | 19 ++++++++++++------- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3c846cdd51e..098e66f4b07 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -182,7 +182,7 @@ if (COMPILER_CLANG) if (HAS_USE_CTOR_HOMING) # For more info see https://blog.llvm.org/posts/2021-04-05-constructor-homing-for-debug-info/ - if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG" OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO") + if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG" OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO") set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Xclang -fuse-ctor-homing") set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Xclang -fuse-ctor-homing") endif() diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 2bf172f5b2a..edd5e0447d5 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -403,9 +403,15 @@ public: /// Note: AddingDefaultsBlockInputStream doesn't change header. if (need_path_column) - header.insert({DataTypeString().createColumn(), std::make_shared(), "_path"}); + header.insert( + {DataTypeLowCardinality{std::make_shared()}.createColumn(), + std::make_shared(std::make_shared()), + "_path"}); if (need_file_column) - header.insert({DataTypeString().createColumn(), std::make_shared(), "_file"}); + header.insert( + {DataTypeLowCardinality{std::make_shared()}.createColumn(), + std::make_shared(std::make_shared()), + "_file"}); return header; } @@ -515,7 +521,7 @@ public: /// Enrich with virtual columns. if (files_info->need_path_column) { - auto column = DataTypeString().createColumnConst(num_rows, current_path); + auto column = DataTypeLowCardinality{std::make_shared()}.createColumnConst(num_rows, current_path); chunk.addColumn(column->convertToFullColumnIfConst()); } @@ -524,7 +530,7 @@ public: size_t last_slash_pos = current_path.find_last_of('/'); auto file_name = current_path.substr(last_slash_pos + 1); - auto column = DataTypeString().createColumnConst(num_rows, std::move(file_name)); + auto column = DataTypeLowCardinality{std::make_shared()}.createColumnConst(num_rows, std::move(file_name)); chunk.addColumn(column->convertToFullColumnIfConst()); } @@ -1093,8 +1099,7 @@ void registerStorageFile(StorageFactory & factory) NamesAndTypesList StorageFile::getVirtuals() const { return NamesAndTypesList{ - {"_path", std::make_shared()}, - {"_file", std::make_shared()} - }; + {"_path", std::make_shared(std::make_shared())}, + {"_file", std::make_shared(std::make_shared())}}; } } From a5306396dd8dd6e761972f9677ea3d5b82fc3738 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 4 Feb 2022 14:53:22 +0000 Subject: [PATCH 048/215] Function mapPopulateSeries refactoring --- src/Functions/array/mapPopulateSeries.cpp | 667 +++++++++++----------- 1 file changed, 337 insertions(+), 330 deletions(-) diff --git a/src/Functions/array/mapPopulateSeries.cpp b/src/Functions/array/mapPopulateSeries.cpp index b253a85c95d..ce33a7b8634 100644 --- a/src/Functions/array/mapPopulateSeries.cpp +++ b/src/Functions/array/mapPopulateSeries.cpp @@ -1,15 +1,18 @@ +#include #include #include #include #include #include #include +#include +#include "DataTypes/IDataType.h" +#include "DataTypes/DataTypeMap.h" #include #include #include -#include "Core/ColumnWithTypeAndName.h" -#include "DataTypes/DataTypeMap.h" -#include "DataTypes/IDataType.h" +#include + namespace DB { @@ -19,6 +22,8 @@ namespace ErrorCodes extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int TOO_LARGE_ARRAY_SIZE; + extern const int BAD_ARGUMENTS; + extern const int LOGICAL_ERROR; } class FunctionMapPopulateSeries : public IFunction @@ -32,418 +37,419 @@ private: size_t getNumberOfArguments() const override { return 0; } bool isVariadic() const override { return true; } - bool useDefaultImplementationForConstants() const override { return true; } + bool useDefaultImplementationForConstants() const override { return false; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } - void checkTypes(const DataTypePtr & key_type, const DataTypePtr max_key_type) const + void checkTypes(const DataTypePtr & key_type, const DataTypePtr & value_type, const DataTypePtr & max_key_type) const { - WhichDataType which_key(key_type); - if (!(which_key.isInt() || which_key.isUInt())) + WhichDataType key_data_type(key_type); + WhichDataType value_data_type(value_type); + + if (!(key_data_type.isInt() || key_data_type.isUInt())) { throw Exception( - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Keys for {} function should be of integer type (signed or unsigned)", getName()); + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Function {} key argument should be of signed or unsigned integer type. Actual type {}", + getName(), + key_type->getName()); } - if (max_key_type) + if (!(value_data_type.isInt() || value_data_type.isUInt())) { - WhichDataType which_max_key(max_key_type); - - if (which_max_key.isNullable()) - throw Exception( - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Max key argument in arguments of function " + getName() + " can not be Nullable"); - - if (key_type->getTypeId() != max_key_type->getTypeId()) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Max key type in {} should be same as keys type", getName()); - } - } - - DataTypePtr getReturnTypeForTuple(const DataTypes & arguments) const - { - if (arguments.size() < 2) throw Exception( - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} accepts at least two arrays for key and value", getName()); + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Function {} key argument should be of signed or unsigned integer type. Actual type {}", + getName(), + key_type->getName()); + } - if (arguments.size() > 3) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Too many arguments in {} call", getName()); + if (!max_key_type) + return; - const DataTypeArray * key_array_type = checkAndGetDataType(arguments[0].get()); - const DataTypeArray * val_array_type = checkAndGetDataType(arguments[1].get()); + WhichDataType max_key_data_type(max_key_type); - if (!key_array_type || !val_array_type) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function {} accepts two arrays for key and value", getName()); + if (max_key_data_type.isNullable()) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Function {} max key argument can not be Nullable. Actual type {}", + getName(), + max_key_type->getName()); - const auto & key_type = key_array_type->getNestedType(); - - if (arguments.size() == 3) - this->checkTypes(key_type, arguments[2]); - else - this->checkTypes(key_type, nullptr); - - return std::make_shared(DataTypes{arguments[0], arguments[1]}); - } - - DataTypePtr getReturnTypeForMap(const DataTypes & arguments) const - { - const auto * map = assert_cast(arguments[0].get()); - if (arguments.size() == 1) - this->checkTypes(map->getKeyType(), nullptr); - else if (arguments.size() == 2) - this->checkTypes(map->getKeyType(), arguments[1]); - else - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Too many arguments in {} call", getName()); - - return std::make_shared(map->getKeyType(), map->getValueType()); + if (!(max_key_data_type.isInt() || max_key_data_type.isUInt())) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Function {} max key should be of signed or unsigned integer type. Actual type {}.", + getName(), + key_type->getName(), + max_key_type->getName()); } DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { - if (arguments.empty()) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, getName() + " accepts at least one map or two arrays"); + if (arguments.empty() || arguments.size() > 3) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Function {} accepts at least one map or two arrays arguments, and optional max key argument", + getName()); - if (arguments[0]->getTypeId() == TypeIndex::Array) - return getReturnTypeForTuple(arguments); - else if (arguments[0]->getTypeId() == TypeIndex::Map) - return getReturnTypeForMap(arguments); + WhichDataType key_argument_data_type(arguments[0]); + + DataTypePtr key_argument_series_type; + DataTypePtr value_argument_series_type; + + size_t max_key_argument_index = 0; + + if (key_argument_data_type.isArray()) + { + DataTypePtr value_type; + if (1 < arguments.size()) + value_type = arguments[1]; + + if (arguments.size() < 2 || (value_type && !isArray(value_type))) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Function {} if array argument is passed as key, additional array argument as value must be passed", + getName()); + + const auto & key_array_type = assert_cast(*arguments[0]); + const auto & value_array_type = assert_cast(*value_type); + + key_argument_series_type = key_array_type.getNestedType(); + value_argument_series_type = value_array_type.getNestedType(); + + max_key_argument_index = 2; + } + else if (key_argument_data_type.isMap()) + { + const auto & map_data_type = assert_cast(*arguments[0]); + + key_argument_series_type = map_data_type.getKeyType(); + value_argument_series_type = map_data_type.getValueType(); + + max_key_argument_index = 1; + } else throw Exception( ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function {} only accepts one map or arrays, but got {}", getName(), arguments[0]->getName()); + + DataTypePtr max_key_argument_type; + if (max_key_argument_index < arguments.size()) + max_key_argument_type = arguments[max_key_argument_index]; + + checkTypes(key_argument_series_type, value_argument_series_type, max_key_argument_type); + + if (key_argument_data_type.isArray()) + return std::make_shared(DataTypes{arguments[0], arguments[1]}); + else + return arguments[0]; } - // Struct holds input and output columns references, - // Both arrays and maps have similar columns to work with but extracted differently - template - struct ColumnsInOut + template + void executeImplTyped( + const ColumnPtr & key_column, + const ColumnPtr & value_column, + const ColumnPtr & offsets_column, + const ColumnPtr & max_key_column, + MutableColumnPtr result_key_column, + MutableColumnPtr result_value_column, + MutableColumnPtr result_offset_column) const { - // inputs - const PaddedPODArray & in_keys_data; - const PaddedPODArray & in_vals_data; - const IColumn::Offsets & in_key_offsets; - const IColumn::Offsets & in_val_offsets; - size_t row_count; - bool key_is_const; - bool val_is_const; + const auto & key_column_typed = assert_cast &>(*key_column); + const auto & key_column_data = key_column_typed.getData(); - // outputs - PaddedPODArray & out_keys_data; - PaddedPODArray & out_vals_data; + const auto & offsets_column_typed = assert_cast &>(*offsets_column); + const auto & offsets = offsets_column_typed.getData(); - IColumn::Offsets & out_keys_offsets; - // with map argument this field will not be used - IColumn::Offsets * out_vals_offsets; - }; + const auto & value_column_typed = assert_cast &>(*value_column); + const auto & value_column_data = value_column_typed.getData(); - template - ColumnsInOut getInOutDataFromArrays(MutableColumnPtr & res_column, ColumnPtr * arg_columns) const - { - auto * out_tuple = assert_cast(res_column.get()); - auto & out_keys_array = assert_cast(out_tuple->getColumn(0)); - auto & out_vals_array = assert_cast(out_tuple->getColumn(1)); + auto & result_key_column_typed = assert_cast &>(*result_key_column); + auto & result_key_data = result_key_column_typed.getData(); - const auto * key_column = arg_columns[0].get(); - const auto * in_keys_array = checkAndGetColumn(key_column); + auto & result_value_column_typed = assert_cast &>(*result_value_column); + auto & result_value_data = result_value_column_typed.getData(); - bool key_is_const = false, val_is_const = false; + auto & result_offsets_column_typed = assert_cast &>(*result_offset_column); + auto & result_offsets_data = result_offsets_column_typed.getData(); - if (!in_keys_array) + std::optional max_key_column_const; + if (max_key_column) { - const ColumnConst * const_array = checkAndGetColumnConst(key_column); - if (!const_array) - throw Exception( - ErrorCodes::ILLEGAL_COLUMN, "Expected array column in function {}, found {}", getName(), key_column->getName()); - - in_keys_array = checkAndGetColumn(const_array->getDataColumnPtr().get()); - key_is_const = true; + if (auto * const_max_key_column = checkAndGetColumnConst>(max_key_column.get())) + max_key_column_const = const_max_key_column->template getValue(); } - const auto * val_column = arg_columns[1].get(); - const auto * in_values_array = checkAndGetColumn(val_column); - if (!in_values_array) + PaddedPODArray> sorted_keys_values; + + size_t key_offsets_size = offsets.size(); + result_key_data.reserve(key_offsets_size); + result_value_data.reserve(key_offsets_size); + + for (size_t offset_index = 0; offset_index < key_offsets_size; ++offset_index) { - const ColumnConst * const_array = checkAndGetColumnConst(val_column); - if (!const_array) - throw Exception( - ErrorCodes::ILLEGAL_COLUMN, "Expected array column in function {}, found {}", getName(), val_column->getName()); + size_t start_offset = offsets[offset_index - 1]; + size_t end_offset = offsets[offset_index]; - in_values_array = checkAndGetColumn(const_array->getDataColumnPtr().get()); - val_is_const = true; - } + sorted_keys_values.clear(); - if (!in_keys_array || !in_values_array) - /* something went wrong */ - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal columns in arguments of function " + getName()); + for (; start_offset < end_offset; ++start_offset) + sorted_keys_values.emplace_back(key_column_data[start_offset], value_column_data[start_offset]); - const auto & in_keys_data = assert_cast &>(in_keys_array->getData()).getData(); - const auto & in_values_data = assert_cast &>(in_values_array->getData()).getData(); - const auto & in_keys_offsets = in_keys_array->getOffsets(); - const auto & in_vals_offsets = in_values_array->getOffsets(); - - auto & out_keys_data = assert_cast &>(out_keys_array.getData()).getData(); - auto & out_vals_data = assert_cast &>(out_vals_array.getData()).getData(); - auto & out_keys_offsets = out_keys_array.getOffsets(); - - size_t row_count = key_is_const ? in_values_array->size() : in_keys_array->size(); - IColumn::Offsets * out_vals_offsets = &out_vals_array.getOffsets(); - - return { - in_keys_data, - in_values_data, - in_keys_offsets, - in_vals_offsets, - row_count, - key_is_const, - val_is_const, - out_keys_data, - out_vals_data, - out_keys_offsets, - out_vals_offsets}; - } - - template - ColumnsInOut getInOutDataFromMap(MutableColumnPtr & res_column, ColumnPtr * arg_columns) const - { - const auto * in_map = assert_cast(arg_columns[0].get()); - const auto & in_nested_array = in_map->getNestedColumn(); - const auto & in_nested_tuple = in_map->getNestedData(); - const auto & in_keys_data = assert_cast &>(in_nested_tuple.getColumn(0)).getData(); - const auto & in_vals_data = assert_cast &>(in_nested_tuple.getColumn(1)).getData(); - const auto & in_keys_offsets = in_nested_array.getOffsets(); - - auto * out_map = assert_cast(res_column.get()); - auto & out_nested_array = out_map->getNestedColumn(); - auto & out_nested_tuple = out_map->getNestedData(); - auto & out_keys_data = assert_cast &>(out_nested_tuple.getColumn(0)).getData(); - auto & out_vals_data = assert_cast &>(out_nested_tuple.getColumn(1)).getData(); - auto & out_keys_offsets = out_nested_array.getOffsets(); - - return { - in_keys_data, - in_vals_data, - in_keys_offsets, - in_keys_offsets, - in_nested_array.size(), - false, - false, - out_keys_data, - out_vals_data, - out_keys_offsets, - nullptr}; - } - - template - ColumnPtr execute2(ColumnPtr * arg_columns, ColumnPtr max_key_column, const DataTypePtr & res_type) const - { - MutableColumnPtr res_column = res_type->createColumn(); - bool max_key_is_const = false; - auto columns = res_column->getDataType() == TypeIndex::Tuple ? getInOutDataFromArrays(res_column, arg_columns) - : getInOutDataFromMap(res_column, arg_columns); - - KeyType max_key_const{0}; - - if (max_key_column && isColumnConst(*max_key_column)) - { - const auto * column_const = static_cast(&*max_key_column); - max_key_const = column_const->template getValue(); - max_key_is_const = true; - } - - IColumn::Offset offset{0}; - std::map res_map; - - //Iterate through two arrays and fill result values. - for (size_t row = 0; row < columns.row_count; ++row) - { - size_t key_offset = 0, val_offset = 0, items_count = columns.in_key_offsets[0], val_array_size = columns.in_val_offsets[0]; - - res_map.clear(); - - if (!columns.key_is_const) + if unlikely(sorted_keys_values.empty()) { - key_offset = row > 0 ? columns.in_key_offsets[row - 1] : 0; - items_count = columns.in_key_offsets[row] - key_offset; - } - - if (!columns.val_is_const) - { - val_offset = row > 0 ? columns.in_val_offsets[row - 1] : 0; - val_array_size = columns.in_val_offsets[row] - val_offset; - } - - if (items_count != val_array_size) - throw Exception( - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Key and value array should have same amount of elements in function {}", - getName()); - - if (items_count == 0) - { - columns.out_keys_offsets.push_back(offset); + result_offsets_data.emplace_back(result_value_data.size()); continue; } - for (size_t i = 0; i < items_count; ++i) - { - res_map.insert({columns.in_keys_data[key_offset + i], columns.in_vals_data[val_offset + i]}); - } + std::sort(sorted_keys_values.begin(), sorted_keys_values.end()); - auto min_key = res_map.begin()->first; - auto max_key = res_map.rbegin()->first; + KeyType min_key = sorted_keys_values.front().first; + KeyType max_key = sorted_keys_values.back().first; if (max_key_column) { - /* update the current max key if it's not constant */ - if (max_key_is_const) + KeyType max_key_column_value {}; + + if (max_key_column_const) { - max_key = max_key_const; + max_key_column_value = *max_key_column_const; } else { - max_key = (static_cast *>(max_key_column.get()))->getData()[row]; + const auto & max_key_column_typed = assert_cast &>(*max_key_column); + max_key_column_value = max_key_column_typed.getData()[offset_index]; } - /* no need to add anything, max key is less that first key */ - if (max_key < min_key) + max_key = max_key_column_value; + + if (unlikely(max_key < min_key)) { - columns.out_keys_offsets.push_back(offset); + result_offsets_data.emplace_back(result_value_data.size()); continue; } } + size_t length = static_cast(max_key - min_key); static constexpr size_t MAX_ARRAY_SIZE = 1ULL << 30; - if (static_cast(max_key) - static_cast(min_key) > MAX_ARRAY_SIZE) - throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size in the result of function {}", getName()); + if (length > MAX_ARRAY_SIZE) + throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, + "Function {} too large array size {} in the result", + getName(), + length); - /* fill the result arrays */ - KeyType key; - for (key = min_key;; ++key) + size_t result_key_data_size = result_key_data.size(); + size_t result_value_data_size = result_value_data.size(); + size_t sorted_keys_values_size = sorted_keys_values.size(); + + result_key_data.resize_fill(result_key_data_size + length + 1); + result_value_data.resize_fill(result_value_data_size + length + 1); + + size_t sorted_values_index = 0; + + for (KeyType current_key = min_key; current_key <= max_key; ++current_key) { - columns.out_keys_data.push_back(key); + size_t key_offset_index = current_key - min_key; + size_t insert_index = result_value_data_size + key_offset_index; - auto it = res_map.find(key); - if (it != res_map.end()) + result_key_data[insert_index] = current_key; + + if (sorted_values_index < sorted_keys_values_size && + sorted_keys_values[sorted_values_index].first == current_key) { - columns.out_vals_data.push_back(it->second); - } - else - { - columns.out_vals_data.push_back(0); + auto & sorted_key_value = sorted_keys_values[sorted_values_index]; + if (current_key == sorted_key_value.first) + { + result_value_data[insert_index] = sorted_key_value.second; + } + + ++sorted_values_index; + while (sorted_values_index < sorted_keys_values_size && + current_key == sorted_keys_values[sorted_values_index].first) + { + ++sorted_values_index; + } } - ++offset; - if (key == max_key) + if (current_key == max_key) break; } - columns.out_keys_offsets.push_back(offset); - } - - if (columns.out_vals_offsets) - columns.out_vals_offsets->insert(columns.out_keys_offsets.begin(), columns.out_keys_offsets.end()); - - return res_column; - } - - template - ColumnPtr execute1(ColumnPtr * arg_columns, ColumnPtr max_key_column, const DataTypePtr & res_type, const DataTypePtr & val_type) const - { - switch (val_type->getTypeId()) - { - case TypeIndex::Int8: - return execute2(arg_columns, max_key_column, res_type); - case TypeIndex::Int16: - return execute2(arg_columns, max_key_column, res_type); - case TypeIndex::Int32: - return execute2(arg_columns, max_key_column, res_type); - case TypeIndex::Int64: - return execute2(arg_columns, max_key_column, res_type); - case TypeIndex::Int128: - return execute2(arg_columns, max_key_column, res_type); - case TypeIndex::Int256: - return execute2(arg_columns, max_key_column, res_type); - case TypeIndex::UInt8: - return execute2(arg_columns, max_key_column, res_type); - case TypeIndex::UInt16: - return execute2(arg_columns, max_key_column, res_type); - case TypeIndex::UInt32: - return execute2(arg_columns, max_key_column, res_type); - case TypeIndex::UInt64: - return execute2(arg_columns, max_key_column, res_type); - case TypeIndex::UInt128: - return execute2(arg_columns, max_key_column, res_type); - case TypeIndex::UInt256: - return execute2(arg_columns, max_key_column, res_type); - default: - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal columns in arguments of function " + getName()); + result_offsets_data.emplace_back(result_value_data.size()); } } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t) const override { - DataTypePtr res_type, key_type, val_type; - ColumnPtr max_key_column = nullptr; - ColumnPtr arg_columns[] = {arguments[0].column, nullptr}; + DataTypePtr key_series_type; + DataTypePtr value_series_type; - if (arguments[0].type->getTypeId() == TypeIndex::Array) + ColumnPtr key_column; + ColumnPtr value_column; + ColumnPtr offsets_column; + + size_t max_key_argument_index = 0; + + auto first_argument_column = arguments[0].column->convertToFullColumnIfConst(); + ColumnPtr second_argument_array_column; + + if (const auto * key_argument_array_column = typeid_cast(first_argument_column.get())) { - key_type = assert_cast(arguments[0].type.get())->getNestedType(); - val_type = assert_cast(arguments[1].type.get())->getNestedType(); - res_type = getReturnTypeImpl(DataTypes{arguments[0].type, arguments[1].type}); + const ColumnArray * value_argument_array_column = nullptr; - arg_columns[1] = arguments[1].column; - if (arguments.size() == 3) + if (1 < arguments.size()) { - /* max key provided */ - max_key_column = arguments[2].column; + second_argument_array_column = arguments[1].column->convertToFullColumnIfConst(); + value_argument_array_column = typeid_cast(second_argument_array_column.get()); } + + if (!value_argument_array_column) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Function {} if array argument is passed as key, additional array argument as value must be passed", + getName()); + + key_series_type = assert_cast(*arguments[0].type).getNestedType(); + key_column = key_argument_array_column->getDataPtr(); + const auto & key_offsets = key_argument_array_column->getOffsets(); + + value_series_type = assert_cast(*arguments[1].type).getNestedType(); + value_column = value_argument_array_column->getDataPtr(); + const auto & value_offsets = value_argument_array_column->getOffsets(); + + if (key_offsets != value_offsets) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Function {} key and value array should have same amount of elements", + getName()); + + offsets_column = key_argument_array_column->getOffsetsPtr(); + max_key_argument_index = 2; + } + else if (const auto * key_argument_map_column = typeid_cast(first_argument_column.get())) + { + const auto & nested_array = key_argument_map_column->getNestedColumn(); + const auto & nested_data_column = key_argument_map_column->getNestedData(); + + const auto & map_argument_type = assert_cast(*arguments[0].type); + key_series_type = map_argument_type.getKeyType(); + value_series_type = map_argument_type.getValueType(); + + key_column = nested_data_column.getColumnPtr(0); + value_column = nested_data_column.getColumnPtr(1); + offsets_column = nested_array.getOffsetsPtr(); + + max_key_argument_index = 1; + } + else + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Function {} only accepts one map or arrays, but got {}", + getName(), + arguments[0].type->getName()); + + ColumnPtr max_key_column; + + if (max_key_argument_index < arguments.size()) + { + max_key_column = arguments[max_key_argument_index].column; + auto max_key_column_type = arguments[max_key_argument_index].type; + + if (!max_key_column_type->equals(*key_series_type)) + { + ColumnWithTypeAndName column_to_cast = {max_key_column, max_key_column_type, ""}; + auto casted_column = castColumnAccurate(std::move(column_to_cast), key_series_type); + max_key_column = std::move(casted_column); + } + } + + auto result_column = result_type->createColumn(); + WhichDataType result_data_type(result_type); + + MutableColumnPtr result_key_column; + MutableColumnPtr result_value_column; + MutableColumnPtr result_offset_column; + IColumn * result_offset_column_raw; + MutableColumnPtr result_array_additional_offset_column; + + auto * tuple_column = typeid_cast(result_column.get()); + + if (tuple_column && tuple_column->tupleSize() == 2) + { + auto key_array_column = tuple_column->getColumnPtr(0)->assumeMutable(); + auto value_array_column = tuple_column->getColumnPtr(1)->assumeMutable(); + + auto * key_array_column_typed = typeid_cast(key_array_column.get()); + auto * value_array_column_typed = typeid_cast(value_array_column.get()); + + if (!key_array_column_typed || !value_array_column_typed) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Function {} result type should be Tuple with two nested Array columns or Map. Actual {}", + getName(), + result_type->getName()); + + result_key_column = key_array_column_typed->getDataPtr()->assumeMutable(); + result_value_column = value_array_column_typed->getDataPtr()->assumeMutable(); + + result_offset_column = key_array_column_typed->getOffsetsPtr()->assumeMutable(); + result_offset_column_raw = result_offset_column.get(); + + result_array_additional_offset_column = value_array_column_typed->getOffsetsPtr()->assumeMutable(); + } + else if (const auto * map_column = typeid_cast(result_column.get())) + { + result_key_column = map_column->getNestedData().getColumnPtr(0)->assumeMutable(); + result_value_column = map_column->getNestedData().getColumnPtr(1)->assumeMutable(); + result_offset_column = map_column->getNestedColumn().getOffsetsPtr()->assumeMutable(); + result_offset_column_raw = result_offset_column.get(); } else { - assert(arguments[0].type->getTypeId() == TypeIndex::Map); - - const auto * map_type = assert_cast(arguments[0].type.get()); - res_type = getReturnTypeImpl(DataTypes{arguments[0].type}); - key_type = map_type->getKeyType(); - val_type = map_type->getValueType(); - - if (arguments.size() == 2) - { - /* max key provided */ - max_key_column = arguments[1].column; - } + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Function {} result type should be Tuple with two nested Array columns or Map. Actual {}", + getName(), + result_type->getName()); } - switch (key_type->getTypeId()) + auto call = [&](const auto & types) { - case TypeIndex::Int8: - return execute1(arg_columns, max_key_column, res_type, val_type); - case TypeIndex::Int16: - return execute1(arg_columns, max_key_column, res_type, val_type); - case TypeIndex::Int32: - return execute1(arg_columns, max_key_column, res_type, val_type); - case TypeIndex::Int64: - return execute1(arg_columns, max_key_column, res_type, val_type); - case TypeIndex::Int128: - return execute1(arg_columns, max_key_column, res_type, val_type); - case TypeIndex::Int256: - return execute1(arg_columns, max_key_column, res_type, val_type); - case TypeIndex::UInt8: - return execute1(arg_columns, max_key_column, res_type, val_type); - case TypeIndex::UInt16: - return execute1(arg_columns, max_key_column, res_type, val_type); - case TypeIndex::UInt32: - return execute1(arg_columns, max_key_column, res_type, val_type); - case TypeIndex::UInt64: - return execute1(arg_columns, max_key_column, res_type, val_type); - case TypeIndex::UInt128: - return execute1(arg_columns, max_key_column, res_type, val_type); - case TypeIndex::UInt256: - return execute1(arg_columns, max_key_column, res_type, val_type); - default: - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal columns in arguments of function " + getName()); + using Types = std::decay_t; + using KeyType = typename Types::LeftType; + using ValueType = typename Types::RightType; + + if constexpr (IsDataTypeNumber && IsDataTypeNumber) + { + using KeyFieldType = typename KeyType::FieldType; + using ValueFieldType = typename ValueType::FieldType; + + executeImplTyped( + key_column, + value_column, + offsets_column, + max_key_column, + std::move(result_key_column), + std::move(result_value_column), + std::move(result_offset_column)); + + return true; + } + + return false; + }; + + if (!callOnTwoTypeIndexes(key_series_type->getTypeId(), value_series_type->getTypeId(), call)) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Function {} illegal columns passed as arguments", + getName()); + + if (result_array_additional_offset_column) + { + result_array_additional_offset_column->insertRangeFrom( + *result_offset_column_raw, + 0, + result_offset_column_raw->size()); } + + return result_column; } }; @@ -451,4 +457,5 @@ void registerFunctionMapPopulateSeries(FunctionFactory & factory) { factory.registerFunction(); } + } From 5b2bb620850c3972328140602f2855ea547101ea Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 4 Feb 2022 14:55:09 +0000 Subject: [PATCH 049/215] Added tests --- .../01318_map_populate_series.reference | 5 + .../0_stateless/01318_map_populate_series.sql | 8 +- ...01925_map_populate_series_on_map.reference | 7 +- .../01925_map_populate_series_on_map.sql | 2 +- ...05_map_populate_series_non_const.reference | 34 +++++ .../02205_map_populate_series_non_const.sql | 125 ++++++++++++++++++ 6 files changed, 175 insertions(+), 6 deletions(-) create mode 100644 tests/queries/0_stateless/02205_map_populate_series_non_const.reference create mode 100644 tests/queries/0_stateless/02205_map_populate_series_non_const.sql diff --git a/tests/queries/0_stateless/01318_map_populate_series.reference b/tests/queries/0_stateless/01318_map_populate_series.reference index 2d83844c8e1..65e6f8e462c 100644 --- a/tests/queries/0_stateless/01318_map_populate_series.reference +++ b/tests/queries/0_stateless/01318_map_populate_series.reference @@ -13,6 +13,11 @@ ([1,2,3,4,5,6,7,8,9,10],[1,0,2,0,0,0,0,0,0,0]) ([1,2,3,4,5,6,7,8,9,10],[1,0,0,2,0,0,0,0,0,0]) ([1,2,3,4,5,6,7,8,9,10],[1,0,0,0,2,0,0,0,0,0]) +([1,2,3,4,5,6,7,8,9,10],[1,0,0,0,0,0,0,0,0,0]) +([1,2,3,4,5,6,7,8,9,10],[1,2,0,0,0,0,0,0,0,0]) +([1,2,3,4,5,6,7,8,9,10],[1,0,2,0,0,0,0,0,0,0]) +([1,2,3,4,5,6,7,8,9,10],[1,0,0,2,0,0,0,0,0,0]) +([1,2,3,4,5,6,7,8,9,10],[1,0,0,0,2,0,0,0,0,0]) ([1,2],[1,0]) ([1,2,3],[1,2,0]) ([1,2,3,4],[1,0,2,0]) diff --git a/tests/queries/0_stateless/01318_map_populate_series.sql b/tests/queries/0_stateless/01318_map_populate_series.sql index e52571182fe..f7fa8c81e8c 100644 --- a/tests/queries/0_stateless/01318_map_populate_series.sql +++ b/tests/queries/0_stateless/01318_map_populate_series.sql @@ -4,7 +4,7 @@ create table map_test engine=TinyLog() as (select (number + 1) as n, ([1, number select mapPopulateSeries(map.1, map.2) from map_test; select mapPopulateSeries(map.1, map.2, toUInt64(3)) from map_test; select mapPopulateSeries(map.1, map.2, toUInt64(10)) from map_test; -select mapPopulateSeries(map.1, map.2, 1000) from map_test; -- { serverError 43 } +select mapPopulateSeries(map.1, map.2, 10) from map_test; select mapPopulateSeries(map.1, map.2, n) from map_test; select mapPopulateSeries(map.1, [11,22]) from map_test; select mapPopulateSeries([3, 4], map.2) from map_test; @@ -31,6 +31,6 @@ select mapPopulateSeries([toInt64(-10), 2], [toInt64(1), 1], toInt64(-5)) as res -- empty select mapPopulateSeries(cast([], 'Array(UInt8)'), cast([], 'Array(UInt8)'), 5); -select mapPopulateSeries(['1', '2'], [1,1]) as res, toTypeName(res); -- { serverError 43 } -select mapPopulateSeries([1, 2, 3], [1,1]) as res, toTypeName(res); -- { serverError 42 } -select mapPopulateSeries([1, 2], [1,1,1]) as res, toTypeName(res); -- { serverError 42 } +select mapPopulateSeries(['1', '2'], [1, 1]) as res, toTypeName(res); -- { serverError 43 } +select mapPopulateSeries([1, 2, 3], [1, 1]) as res, toTypeName(res); -- { serverError 36 } +select mapPopulateSeries([1, 2], [1, 1, 1]) as res, toTypeName(res); -- { serverError 36 } diff --git a/tests/queries/0_stateless/01925_map_populate_series_on_map.reference b/tests/queries/0_stateless/01925_map_populate_series_on_map.reference index fd3d3b2450d..318f5ced231 100644 --- a/tests/queries/0_stateless/01925_map_populate_series_on_map.reference +++ b/tests/queries/0_stateless/01925_map_populate_series_on_map.reference @@ -20,7 +20,12 @@ select mapPopulateSeries(m, toUInt64(10)) from map_test; {1:1,2:0,3:2,4:0,5:0,6:0,7:0,8:0,9:0,10:0} {1:1,2:0,3:0,4:2,5:0,6:0,7:0,8:0,9:0,10:0} {1:1,2:0,3:0,4:0,5:2,6:0,7:0,8:0,9:0,10:0} -select mapPopulateSeries(m, 1000) from map_test; -- { serverError 43 } +select mapPopulateSeries(m, 10) from map_test; +{1:1,2:0,3:0,4:0,5:0,6:0,7:0,8:0,9:0,10:0} +{1:1,2:2,3:0,4:0,5:0,6:0,7:0,8:0,9:0,10:0} +{1:1,2:0,3:2,4:0,5:0,6:0,7:0,8:0,9:0,10:0} +{1:1,2:0,3:0,4:2,5:0,6:0,7:0,8:0,9:0,10:0} +{1:1,2:0,3:0,4:0,5:2,6:0,7:0,8:0,9:0,10:0} select mapPopulateSeries(m, n) from map_test; {1:1,2:0} {1:1,2:2,3:0} diff --git a/tests/queries/0_stateless/01925_map_populate_series_on_map.sql b/tests/queries/0_stateless/01925_map_populate_series_on_map.sql index ac78280ec1d..635fba37cc8 100644 --- a/tests/queries/0_stateless/01925_map_populate_series_on_map.sql +++ b/tests/queries/0_stateless/01925_map_populate_series_on_map.sql @@ -6,7 +6,7 @@ create table map_test engine=TinyLog() as (select (number + 1) as n, map(1, 1, n select mapPopulateSeries(m) from map_test; select mapPopulateSeries(m, toUInt64(3)) from map_test; select mapPopulateSeries(m, toUInt64(10)) from map_test; -select mapPopulateSeries(m, 1000) from map_test; -- { serverError 43 } +select mapPopulateSeries(m, 10) from map_test; select mapPopulateSeries(m, n) from map_test; drop table map_test; diff --git a/tests/queries/0_stateless/02205_map_populate_series_non_const.reference b/tests/queries/0_stateless/02205_map_populate_series_non_const.reference new file mode 100644 index 00000000000..5d938d2917d --- /dev/null +++ b/tests/queries/0_stateless/02205_map_populate_series_non_const.reference @@ -0,0 +1,34 @@ +mapPopulateSeries with map +Without max key +{0:5} +{0:5,1:0,2:0,3:0,4:0,5:10} +{-5:-5,-4:0,-3:0,-2:0,-1:0,0:5,1:0,2:0,3:0,4:0,5:10} +{-5:-5,-4:0,-3:0,-2:0,-1:0,0:5,1:0,2:0,3:0,4:0,5:10,6:0,7:0,8:0,9:0,10:15} +With max key +{0:5,1:0,2:0,3:0,4:0,5:0,6:0,7:0,8:0,9:0,10:0,11:0,12:0,13:0,14:0,15:0,16:0,17:0,18:0,19:0,20:0} +{0:5,1:0,2:0,3:0,4:0,5:10,6:0,7:0,8:0,9:0,10:0,11:0,12:0,13:0,14:0,15:0,16:0,17:0,18:0,19:0,20:0} +{-5:-5,-4:0,-3:0,-2:0,-1:0,0:5,1:0,2:0,3:0,4:0,5:10} +{-5:-5,-4:0,-3:0,-2:0,-1:0,0:5,1:0,2:0,3:0,4:0,5:10,6:0,7:0,8:0,9:0,10:15,11:0,12:0,13:0,14:0,15:0,16:0,17:0,18:0,19:0,20:0} +Possible verflow +{18446744073709551610:5,18446744073709551611:0,18446744073709551612:0,18446744073709551613:0,18446744073709551614:0,18446744073709551615:0} +{18446744073709551615:5} +Duplicate keys +{1:4,2:0,3:0,4:0,5:6} +{1:4,2:0,3:0,4:0,5:6,6:0,7:0,8:0,9:0,10:0} +mapPopulateSeries with two arrays +Without max key +([0],[5]) +([0,1,2,3,4,5],[5,0,0,0,0,10]) +([-5,-4,-3,-2,-1,0,1,2,3,4,5],[-5,0,0,0,0,5,0,0,0,0,10]) +([-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7,8,9,10],[-5,0,0,0,0,5,0,0,0,0,10,0,0,0,0,15]) +With max key +([0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20],[5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]) +([0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20],[5,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]) +([-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20],[-5,0,0,0,0,5,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]) +([-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20],[-5,0,0,0,0,5,0,0,0,0,10,0,0,0,0,15,0,0,0,0,0,0,0,0,0,0]) +Possible verflow +([18446744073709551610,18446744073709551611,18446744073709551612,18446744073709551613,18446744073709551614,18446744073709551615],[5,0,0,0,0,0]) +([18446744073709551615],[5]) +Duplicate keys +([1,2,3,4,5],[4,0,0,0,6]) +([1,2,3,4,5,6,7,8,9,10],[4,0,0,0,6,0,0,0,0,0]) diff --git a/tests/queries/0_stateless/02205_map_populate_series_non_const.sql b/tests/queries/0_stateless/02205_map_populate_series_non_const.sql new file mode 100644 index 00000000000..08a3dd51eb1 --- /dev/null +++ b/tests/queries/0_stateless/02205_map_populate_series_non_const.sql @@ -0,0 +1,125 @@ +DROP TABLE IF EXISTS 02005_test_table; +CREATE TABLE 02005_test_table +( + value Map(Int64, Int64) +) +ENGINE = TinyLog; + +SELECT 'mapPopulateSeries with map'; + +SELECT 'Without max key'; + +SELECT mapPopulateSeries(value) FROM 02005_test_table; +TRUNCATE TABLE 02005_test_table; + +INSERT INTO 02005_test_table VALUES (map(0, 5)); +SELECT mapPopulateSeries(value) FROM 02005_test_table; +TRUNCATE TABLE 02005_test_table; + +INSERT INTO 02005_test_table VALUES (map(0, 5, 5, 10)); +SELECT mapPopulateSeries(value) FROM 02005_test_table; +TRUNCATE TABLE 02005_test_table; + +INSERT INTO 02005_test_table VALUES (map(-5, -5, 0, 5, 5, 10)); +SELECT mapPopulateSeries(value) FROM 02005_test_table; +TRUNCATE TABLE 02005_test_table; + +INSERT INTO 02005_test_table VALUES (map(-5, -5, 0, 5, 5, 10, 10, 15)); +SELECT mapPopulateSeries(value) FROM 02005_test_table; +TRUNCATE TABLE 02005_test_table; + +SELECT 'With max key'; + +SELECT mapPopulateSeries(value, materialize(20)) FROM 02005_test_table; +TRUNCATE TABLE 02005_test_table; + +INSERT INTO 02005_test_table VALUES (map(0, 5)); +SELECT mapPopulateSeries(value, materialize(20)) FROM 02005_test_table; +TRUNCATE TABLE 02005_test_table; + +INSERT INTO 02005_test_table VALUES (map(0, 5, 5, 10)); +SELECT mapPopulateSeries(value, materialize(20)) FROM 02005_test_table; +TRUNCATE TABLE 02005_test_table; + +INSERT INTO 02005_test_table VALUES (map(-5, -5, 0, 5, 5, 10)); +SELECT mapPopulateSeries(value) FROM 02005_test_table; +TRUNCATE TABLE 02005_test_table; + +INSERT INTO 02005_test_table VALUES (map(-5, -5, 0, 5, 5, 10, 10, 15)); +SELECT mapPopulateSeries(value, materialize(20)) FROM 02005_test_table; +TRUNCATE TABLE 02005_test_table; + +SELECT 'Possible verflow'; + +SELECT mapPopulateSeries(map(toUInt64(18446744073709551610), toUInt64(5)), 18446744073709551615); +SELECT mapPopulateSeries(map(toUInt64(18446744073709551615), toUInt64(5)), 18446744073709551615); + +SELECT 'Duplicate keys'; + +SELECT mapPopulateSeries(map(1, 4, 1, 5, 5, 6)); +SELECT mapPopulateSeries(map(1, 4, 1, 5, 5, 6), materialize(10)); + +DROP TABLE 02005_test_table; + +DROP TABLE IF EXISTS 02005_test_table; +CREATE TABLE 02005_test_table +( + key Array(Int64), + value Array(Int64) +) +ENGINE = TinyLog; + +SELECT 'mapPopulateSeries with two arrays'; +SELECT 'Without max key'; + +SELECT mapPopulateSeries(key, value) FROM 02005_test_table; +TRUNCATE TABLE 02005_test_table; + +INSERT INTO 02005_test_table VALUES ([0], [5]); +SELECT mapPopulateSeries(key, value) FROM 02005_test_table; +TRUNCATE TABLE 02005_test_table; + +INSERT INTO 02005_test_table VALUES ([0, 5], [5, 10]); +SELECT mapPopulateSeries(key, value) FROM 02005_test_table; +TRUNCATE TABLE 02005_test_table; + +INSERT INTO 02005_test_table VALUES ([-5, 0, 5], [-5, 5, 10]); +SELECT mapPopulateSeries(key, value) FROM 02005_test_table; +TRUNCATE TABLE 02005_test_table; + +INSERT INTO 02005_test_table VALUES ([-5, 0, 5, 10], [-5, 5, 10, 15]); +SELECT mapPopulateSeries(key, value) FROM 02005_test_table; +TRUNCATE TABLE 02005_test_table; + +SELECT 'With max key'; + +SELECT mapPopulateSeries(key, value, materialize(20)) FROM 02005_test_table; +TRUNCATE TABLE 02005_test_table; + +INSERT INTO 02005_test_table VALUES ([0], [5]); +SELECT mapPopulateSeries(key, value, materialize(20)) FROM 02005_test_table; +TRUNCATE TABLE 02005_test_table; + +INSERT INTO 02005_test_table VALUES ([0, 5], [5, 10]); +SELECT mapPopulateSeries(key, value, materialize(20)) FROM 02005_test_table; +TRUNCATE TABLE 02005_test_table; + +INSERT INTO 02005_test_table VALUES ([-5, 0, 5], [-5, 5, 10]); +SELECT mapPopulateSeries(key, value, materialize(20)) FROM 02005_test_table; +TRUNCATE TABLE 02005_test_table; + +INSERT INTO 02005_test_table VALUES ([-5, 0, 5, 10], [-5, 5, 10, 15]); +SELECT mapPopulateSeries(key, value, materialize(20)) FROM 02005_test_table; +TRUNCATE TABLE 02005_test_table; + +SELECT 'Possible verflow'; + +SELECT mapPopulateSeries([18446744073709551610], [5], 18446744073709551615); +SELECT mapPopulateSeries([18446744073709551615], [5], 18446744073709551615); + +SELECT 'Duplicate keys'; + +SELECT mapPopulateSeries([1, 1, 5], [4, 5, 6]); +SELECT mapPopulateSeries([1, 1, 5], [4, 5, 6], materialize(10)); + +DROP TABLE 02005_test_table; From 6e789f98ead35d2a522a9c86859b14c0cf4ee459 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 4 Feb 2022 14:58:55 +0000 Subject: [PATCH 050/215] Added performance tests --- tests/performance/map_populate_series.xml | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 tests/performance/map_populate_series.xml diff --git a/tests/performance/map_populate_series.xml b/tests/performance/map_populate_series.xml new file mode 100644 index 00000000000..a050be6f3a8 --- /dev/null +++ b/tests/performance/map_populate_series.xml @@ -0,0 +1,4 @@ + + SELECT mapPopulateSeries(range(number), range(number)) FROM numbers(5000) FORMAT Null; + SELECT mapPopulateSeries(range(number), range(number), 2500) FROM numbers(5000) FORMAT Null; + From ec15c5586536df82b7c2cfd3bda6bc608d05803f Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Mon, 31 Jan 2022 11:49:46 +0000 Subject: [PATCH 051/215] Updated sort to bitsetsort --- base/base/BitSetSort.h | 854 +++++++++++++++++++++++++++++++++++++++++ base/base/sort.h | 6 +- 2 files changed, 857 insertions(+), 3 deletions(-) create mode 100644 base/base/BitSetSort.h diff --git a/base/base/BitSetSort.h b/base/base/BitSetSort.h new file mode 100644 index 00000000000..02ba02e656c --- /dev/null +++ b/base/base/BitSetSort.h @@ -0,0 +1,854 @@ +/** https://github.com/minjaehwang/bitsetsort + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * Bitset Sort is a variant of quick sort, specifically BlockQuickSort. + * Bitset Sort uses a carefully written partition function to let the compiler generates + * SIMD instructions without actually writing SIMD intrinsics in the loop. + * Bitset Sort is 3.4x faster (or spends 71% less time) than libc++ std::sort when sorting uint64s and 1.58x faster (or spends 37% less time) + * when sorting std::string. + * Bitset Sort uses multiple techniques to improve runtime performance of sort. This includes sorting networks, + * a variant of merge sort called Bitonic Order Merge Sort that is faster for small N, and pattern recognitions. + */ + +#pragma clang diagnostic ignored "-Wreserved-identifier" +#pragma clang diagnostic ignored "-Wreserved-macro-identifier" +#pragma clang diagnostic ignored "-Wunused-local-typedef" + +#ifndef _LIBCPP___BITSETSORT +#define _LIBCPP___BITSETSORT + +#include +#include +#include + +namespace stdext { //_LIBCPP_BEGIN_NAMESPACE_STD + +namespace __sorting_network { + +template +class __conditional_swap { + _Compare comp_; + + public: + _Compare get() const { return comp_; } + __conditional_swap(_Compare __comp) : comp_(__comp) {} + inline void operator()(_RandomAccessIterator __x, + _RandomAccessIterator __y) const { + typedef typename _VSTD::iterator_traits<_RandomAccessIterator>::value_type + value_type; + bool __result = comp_(*__x, *__y); + // Expect a compiler would short-circuit the following if-block. + // 4 * sizeof(size_t) is a magic number. Expect a compiler to use SIMD + // instruction on them. + if (_VSTD::is_trivially_copy_constructible::value && + _VSTD::is_trivially_copy_assignable::value && + sizeof(value_type) <= 4 * sizeof(size_t)) { + value_type __min = __result ? _VSTD::move(*__x) : _VSTD::move(*__y); + *__y = __result ? _VSTD::move(*__y) : _VSTD::move(*__x); + *__x = _VSTD::move(__min); + } else { + if (!__result) { + _VSTD::iter_swap(__x, __y); + } + } + } +}; + +template +class __reverse_conditional_swap { + _Compare comp_; + + public: + _Compare get() const { return comp_; } + __reverse_conditional_swap(_Compare __comp) : comp_(__comp) {} + inline void operator()(_RandomAccessIterator __x, + _RandomAccessIterator __y) const { + typedef typename _VSTD::iterator_traits<_RandomAccessIterator>::value_type + value_type; + bool __result = !comp_(*__x, *__y); + // Expect a compiler would short-circuit the following if-block. + if (_VSTD::is_trivially_copy_constructible::value && + _VSTD::is_trivially_copy_assignable::value && + sizeof(value_type) <= 4 * sizeof(size_t)) { + value_type __min = __result ? _VSTD::move(*__x) : _VSTD::move(*__y); + *__y = __result ? _VSTD::move(*__y) : _VSTD::move(*__x); + *__x = _VSTD::move(__min); + } else { + if (__result) { + _VSTD::iter_swap(__x, __y); + } + } + } +}; + +template +void __sort2(_RandomAccessIterator __a, _ConditionalSwap __cond_swap) { + __cond_swap(__a + 0, __a + 1); +} + +template +void __sort3(_RandomAccessIterator __a, _ConditionalSwap __cond_swap) { + __cond_swap(__a + 1, __a + 2); + __cond_swap(__a + 0, __a + 2); + __cond_swap(__a + 0, __a + 1); +} + +template +void __sort4(_RandomAccessIterator __a, _ConditionalSwap __cond_swap) { + __cond_swap(__a + 0, __a + 1); + __cond_swap(__a + 2, __a + 3); + __cond_swap(__a + 0, __a + 2); + __cond_swap(__a + 1, __a + 3); + __cond_swap(__a + 1, __a + 2); +} + +template +void __sort5(_RandomAccessIterator __a, _ConditionalSwap __cond_swap) { + __cond_swap(__a + 0, __a + 1); + __cond_swap(__a + 3, __a + 4); + __cond_swap(__a + 2, __a + 4); + __cond_swap(__a + 2, __a + 3); + __cond_swap(__a + 0, __a + 3); + __cond_swap(__a + 1, __a + 4); + __cond_swap(__a + 0, __a + 2); + __cond_swap(__a + 1, __a + 3); + __cond_swap(__a + 1, __a + 2); +} + +template +void __sort6(_RandomAccessIterator __a, _ConditionalSwap __cond_swap) { + __cond_swap(__a + 1, __a + 2); + __cond_swap(__a + 4, __a + 5); + __cond_swap(__a + 0, __a + 2); + __cond_swap(__a + 3, __a + 5); + __cond_swap(__a + 0, __a + 1); + __cond_swap(__a + 3, __a + 4); + __cond_swap(__a + 0, __a + 3); + __cond_swap(__a + 1, __a + 4); + __cond_swap(__a + 2, __a + 5); + __cond_swap(__a + 2, __a + 4); + __cond_swap(__a + 1, __a + 3); + __cond_swap(__a + 2, __a + 3); +} +template +void __sort7(_RandomAccessIterator __a, _ConditionalSwap __cond_swap) { + __cond_swap(__a + 1, __a + 2); + __cond_swap(__a + 3, __a + 4); + __cond_swap(__a + 5, __a + 6); + __cond_swap(__a + 0, __a + 2); + __cond_swap(__a + 3, __a + 5); + __cond_swap(__a + 4, __a + 6); + __cond_swap(__a + 0, __a + 1); + __cond_swap(__a + 4, __a + 5); + __cond_swap(__a + 0, __a + 4); + __cond_swap(__a + 1, __a + 5); + __cond_swap(__a + 2, __a + 6); + __cond_swap(__a + 0, __a + 3); + __cond_swap(__a + 2, __a + 5); + __cond_swap(__a + 1, __a + 3); + __cond_swap(__a + 2, __a + 4); + __cond_swap(__a + 2, __a + 3); +} + +template +void __sort8(_RandomAccessIterator __a, _ConditionalSwap __cond_swap) { + __cond_swap(__a + 0, __a + 1); + __cond_swap(__a + 2, __a + 3); + __cond_swap(__a + 4, __a + 5); + __cond_swap(__a + 6, __a + 7); + __cond_swap(__a + 0, __a + 2); + __cond_swap(__a + 1, __a + 3); + __cond_swap(__a + 4, __a + 6); + __cond_swap(__a + 5, __a + 7); + __cond_swap(__a + 1, __a + 2); + __cond_swap(__a + 5, __a + 6); + __cond_swap(__a + 0, __a + 4); + __cond_swap(__a + 1, __a + 5); + __cond_swap(__a + 2, __a + 6); + __cond_swap(__a + 3, __a + 7); + __cond_swap(__a + 1, __a + 4); + __cond_swap(__a + 3, __a + 6); + __cond_swap(__a + 2, __a + 4); + __cond_swap(__a + 3, __a + 5); + __cond_swap(__a + 3, __a + 4); +} + +template +void __sort1to8( + _RandomAccessIterator __a, + typename _VSTD::iterator_traits<_RandomAccessIterator>::difference_type __len, + _ConditionalSwap __cond_swap) { + switch (__len) { + case 0: + case 1: + return; + case 2: + __sort2(__a, __cond_swap); + return; + case 3: + __sort3(__a, __cond_swap); + return; + case 4: + __sort4(__a, __cond_swap); + return; + case 5: + __sort5(__a, __cond_swap); + return; + case 6: + __sort6(__a, __cond_swap); + return; + case 7: + __sort7(__a, __cond_swap); + return; + case 8: + __sort8(__a, __cond_swap); + return; + } + // ignore +} +template +void __sort3(_RandomAccessIterator __a0, _RandomAccessIterator __a1, _RandomAccessIterator __a2, _ConditionalSwap __cond_swap) { + __cond_swap(__a1, __a2); + __cond_swap(__a0, __a2); + __cond_swap(__a0, __a1); +} + +template +void __sort3r(_RandomAccessIterator __a2, _RandomAccessIterator __a1, _RandomAccessIterator __a0, _ConditionalSwap __rev_cond_swap) { + __rev_cond_swap(__a1, __a2); + __rev_cond_swap(__a0, __a2); + __rev_cond_swap(__a0, __a1); +} + +} // namespace __sorting_network + +template +_ForwardIterator +__median3(_ForwardIterator __x, _ForwardIterator __y, _ForwardIterator __z, _Compare __c) +{ + if (__c(*__x, *__y)) { + if (__c(*__y, *__z)) { + return __y; + } + // x < y, y >= z + if (__c(*__x, *__z)) { + return __z; + } + return __x; + } else { + // y <= x + if (__c(*__x, *__z)) { + // y <= x < z + return __x; + } + // y <= x, z <= x + if (__c(*__y, *__z)) { + return __z; + } + return __y; + } +} + +namespace __bitonic { +class __detail { + public: + _LIBCPP_CONSTEXPR_AFTER_CXX11 static int __batch = 8; + _LIBCPP_CONSTEXPR_AFTER_CXX11 static int __bitonic_batch = __batch * 2; + _LIBCPP_CONSTEXPR_AFTER_CXX11 static int __small_sort_max = + __detail::__bitonic_batch * 2; +}; + +template +void __enforce_order(_RandomAccessIterator __first, + _RandomAccessIterator __last, _ConditionalSwap __cond_swap, + _ReverseConditionalSwap __reverse_cond_swap) { + _RandomAccessIterator __i = __first; + while (__i + __detail::__bitonic_batch <= __last) { + __sorting_network::__sort8(__i, __cond_swap); + __sorting_network::__sort8(__i + __detail::__batch, __reverse_cond_swap); + __i += __detail::__bitonic_batch; + } + if (__i + __detail::__batch <= __last) { + __sorting_network::__sort8(__i, __cond_swap); + __i += __detail::__batch; + __sorting_network::__sort1to8(__i, __last - __i, __reverse_cond_swap); + } else { + __sorting_network::__sort1to8(__i, __last - __i, __cond_swap); + } +} + +class __construct { + public: + template + static inline void __op(_T* __result, _T&& __val) { + new (__result) _T(_VSTD::move(__val)); + } +}; + +class __move_assign { + public: + template + static inline void __op(_T* __result, _T&& __val) { + *__result = _VSTD::move(__val); + } +}; + +template +void __forward_merge(_InputIterator __first, _InputIterator __last, + _OutputIterator __result, _Compare __comp) { + --__last; + typename _VSTD::iterator_traits<_InputIterator>::difference_type __len = + __last - __first; + for (; __len > 0; __len--) { + if (__comp(*__first, *__last)) { + _Copy::__op(&*__result, _VSTD::move(*__first++)); + } else { + _Copy::__op(&*__result, _VSTD::move(*__last--)); + } + __result++; + } + _Copy::__op(&*__result, _VSTD::move(*__first)); +} + +template +void __backward_merge(_InputIterator __first, _InputIterator __last, + _OutputIterator __result, _Compare __comp) { + --__last; + __result += __last - __first; + typename _VSTD::iterator_traits<_InputIterator>::difference_type __len = + __last - __first; + for (; __len > 0; __len--) { + if (__comp(*__first, *__last)) { + _Copy::__op(&*__result, _VSTD::move(*__first++)); + } else { + _Copy::__op(&*__result, _VSTD::move(*__last--)); + } + __result--; + } + _Copy::__op(&*__result, _VSTD::move(*__first)); +} + +template +void __forward_and_backward_merge(_InputIterator __first, _InputIterator __last, + _InputIterator __rlast, + _OutputIterator __result, _Compare __comp) { + _InputIterator __rfirst = __last; + __last--; + __rlast--; + typename _VSTD::iterator_traits<_InputIterator>::difference_type len = + __last - __first; + _OutputIterator __rout = __result + (__rlast - __first); + + for (; len > 0; len--) { + if (__comp(*__first, *__last)) { + _Copy::__op(&*__result, _VSTD::move(*__first++)); + } else { + _Copy::__op(&*__result, _VSTD::move(*__last--)); + } + __result++; + if (__comp(*__rfirst, *__rlast)) { + _Copy::__op(&*__rout, _VSTD::move(*__rfirst++)); + } else { + _Copy::__op(&*__rout, _VSTD::move(*__rlast--)); + } + __rout--; + } + _Copy::__op(&*__result, _VSTD::move(*__first)); + _Copy::__op(&*__rout, _VSTD::move(*__rfirst)); +} + +template +inline bool __small_sort( + _RandomAccessIterator __first, + typename _VSTD::iterator_traits<_RandomAccessIterator>::difference_type __len, + typename _VSTD::iterator_traits<_RandomAccessIterator>::value_type* __buff, + _ConditionalSwap& __cond_swap, + _ReverseConditionalSwap __reverse_cond_swap) { + typedef typename _VSTD::iterator_traits<_RandomAccessIterator>::difference_type + difference_type; + typedef + typename _VSTD::iterator_traits<_RandomAccessIterator>::value_type value_type; + if (__len > __detail::__small_sort_max) { + return false; + } + _RandomAccessIterator __last = __first + __len; + __enforce_order(__first, __last, __cond_swap, __reverse_cond_swap); + if (__len <= __detail::__batch) { + // sorted. + return true; + } + auto __comp = __cond_swap.get(); + if (__len <= __detail::__bitonic_batch) { + // single bitonic order merge. + __forward_merge<__bitonic::__construct>(__first, __last, __buff, __comp); + copy(_VSTD::make_move_iterator(__buff), _VSTD::make_move_iterator(__buff + __len), + __first); + for (auto __iter = __buff; __iter < __buff + __len; __iter++) { + (*__iter).~value_type(); + } + return true; + } + // double bitonic order merge. + __forward_merge<__construct>(__first, __first + __detail::__bitonic_batch, + __buff, __comp); + __backward_merge<__construct>(__first + __detail::__bitonic_batch, __last, + __buff + __detail::__bitonic_batch, __comp); + __forward_merge<__move_assign>(__buff, __buff + __len, __first, __comp); + for (auto __iter = __buff; __iter < __buff + __len; __iter++) { + (*__iter).~value_type(); + } + return true; +} +} // namespace __bitonic + +namespace __bitsetsort { +struct __64bit_set { + typedef uint64_t __storage_t; + _LIBCPP_CONSTEXPR_AFTER_CXX11 static int __block_size = 64; + static __storage_t __blsr(__storage_t x) { + // _blsr_u64 can be used here but it did not make any performance + // difference in practice. + return x ^ (x & -x); + } + static int __clz(__storage_t x) { return __builtin_clzll(x); } + static int __ctz(__storage_t x) { return __builtin_ctzll(x); } +}; + +struct __32bit_set { + typedef uint32_t __storage_t; + _LIBCPP_CONSTEXPR_AFTER_CXX11 static int __block_size = 32; + static __storage_t __blsr(__storage_t x) { + // _blsr_u32 can be used here but it did not make any performance + // difference in practice. + return x ^ (x & -x); + } + static int __clz(__storage_t x) { return __builtin_clzl(x); } + static int __ctz(__storage_t x) { return __builtin_ctzl(x); } +}; + +template +struct __set_selector { + typedef __64bit_set __set; +}; + +template<> +struct __set_selector<4> { + typedef __32bit_set __set; +}; + +template +inline void __swap_bitmap_pos(_RandomAccessIterator __first, + _RandomAccessIterator __last, + typename _Bitset::__storage_t& __left_bitset, + typename _Bitset::__storage_t& __right_bitset) { + while (__left_bitset != 0 & __right_bitset != 0) { + int tz_left = _Bitset::__ctz(__left_bitset); + __left_bitset = _Bitset::__blsr(__left_bitset); + int tz_right = _Bitset::__ctz(__right_bitset); + __right_bitset = _Bitset::__blsr(__right_bitset); + _VSTD::iter_swap(__first + tz_left, __last - tz_right); + } +} + +template +inline void __swap_bitmap(_RandomAccessIterator __first, + _RandomAccessIterator __last, + typename _Bitset::__storage_t& __left_bitset, + typename _Bitset::__storage_t& __right_bitset) { + if (__left_bitset == 0 || __right_bitset == 0) { + return; + } + int tz_left; + int tz_right; + + tz_left = _Bitset::__ctz(__left_bitset); + __left_bitset = _Bitset::__blsr(__left_bitset); + + tz_right = _Bitset::__ctz(__right_bitset); + __right_bitset = _Bitset::__blsr(__right_bitset); + + _RandomAccessIterator l = __first + tz_left; + _RandomAccessIterator r = __last - tz_right; + typename _VSTD::iterator_traits<_RandomAccessIterator>::value_type tmp( + _VSTD::move(*l)); + *l = _VSTD::move(*r); + while (__left_bitset != 0 & __right_bitset != 0) { + tz_left = _Bitset::__ctz(__left_bitset); + __left_bitset = _Bitset::__blsr(__left_bitset); + tz_right = _Bitset::__ctz(__right_bitset); + __right_bitset = _Bitset::__blsr(__right_bitset); + + l = __first + tz_left; + *r = _VSTD::move(*l); + r = __last - tz_right; + *l = _VSTD::move(*r); + } + *r = _VSTD::move(tmp); +} + +template +_VSTD::pair<_RandomAccessIterator, bool> __bitset_partition( + _RandomAccessIterator __first, _RandomAccessIterator __last, + _Compare __comp) { + typedef typename _VSTD::iterator_traits<_RandomAccessIterator>::value_type + value_type; + typedef typename _VSTD::iterator_traits<_RandomAccessIterator>::difference_type + difference_type; + typedef typename _Bitset::__storage_t __storage_t; + _RandomAccessIterator __begin = __first; + value_type __pivot = _VSTD::move(*__first); + + if (__comp(__pivot, *(__last - 1))) { + // Guarded. + while (!__comp(__pivot, *++__first)) {} + } else { + while (++__first < __last && !__comp(__pivot, *__first)) {} + } + + if (__first < __last) { + // It will be always guarded because __bitset_sort will do the median-of-three before calling this. + while (__comp(__pivot, *--__last)) {} + } + bool __already_partitioned = __first >= __last; + if (!__already_partitioned) { + _VSTD::iter_swap(__first, __last); + ++__first; + } + + // [__first, __last) - __last is not inclusive. From now one, it uses last minus one to be inclusive on both sides. + _RandomAccessIterator __lm1 = __last - 1; + __storage_t __left_bitset = 0; + __storage_t __right_bitset = 0; + + // Reminder: length = __lm1 - __first + 1. + while (__lm1 - __first >= 2 * _Bitset::__block_size - 1) { + if (__left_bitset == 0) { + // Possible vectorization. With a proper "-march" flag, the following loop + // will be compiled into a set of SIMD instructions. + _RandomAccessIterator __iter = __first; + for (int __j = 0; __j < _Bitset::__block_size;) { + __left_bitset |= (static_cast<__storage_t>(__comp(__pivot, *__iter)) << __j); + __j++; + __iter++; + } + } + + if (__right_bitset == 0) { + // Possible vectorization. With a proper "-march" flag, the following loop + // will be compiled into a set of SIMD instructions. + _RandomAccessIterator __iter = __lm1; + for (int __j = 0; __j < _Bitset::__block_size;) { + __right_bitset |= + (static_cast<__storage_t>(!__comp(__pivot, *__iter)) << __j); + __j++; + __iter--; + } + } + + __swap_bitmap_pos<_Bitset>(__first, __lm1, __left_bitset, __right_bitset); + __first += (__left_bitset == 0) ? _Bitset::__block_size : 0; + __lm1 -= (__right_bitset == 0) ? _Bitset::__block_size : 0; + } + // Now, we have a less-than a block on each side. + difference_type __remaining_len = __lm1 - __first + 1; + difference_type __l_size; + difference_type __r_size; + if (__left_bitset == 0 && __right_bitset == 0) { + __l_size = __remaining_len / 2; + __r_size = __remaining_len - __l_size; + } else if (__left_bitset == 0) { + // We know at least one side is a full block. + __l_size = __remaining_len - _Bitset::__block_size; + __r_size = _Bitset::__block_size; + } else { // if (right == 0) + __l_size = _Bitset::__block_size; + __r_size = __remaining_len - _Bitset::__block_size; + } + if (__left_bitset == 0) { + _RandomAccessIterator __iter = __first; + for (int j = 0; j < __l_size; j++) { + __left_bitset |= + (static_cast<__storage_t>(__comp(__pivot, *(__iter))) << j); + __iter++; + } + } + if (__right_bitset == 0) { + _RandomAccessIterator __iter = __lm1; + for (int j = 0; j < __r_size; j++) { + __right_bitset |= + (static_cast<__storage_t>(!__comp(__pivot, *(__iter))) << j); + --__iter; + } + } + __swap_bitmap_pos<_Bitset>(__first, __lm1, __left_bitset, __right_bitset); + __first += (__left_bitset == 0) ? __l_size : 0; + __lm1 -= (__right_bitset == 0) ? __r_size : 0; + + if (__left_bitset) { + // Swap within the right side. + int __tz_left; + + // Need to find set positions in the reverse order. + while (__left_bitset != 0) { + __tz_left = _Bitset::__block_size - 1 - _Bitset::__clz(__left_bitset); + __left_bitset &= (static_cast<__storage_t>(1) << __tz_left) - 1; + _VSTD::iter_swap(__first + __tz_left, __lm1--); + } + __first = __lm1 + 1; + } else if (__right_bitset) { + // Swap within the left side. + int __tz_right; + // Need to find set positions in the reverse order. + while (__right_bitset != 0) { + __tz_right = _Bitset::__block_size - 1 - _Bitset::__clz(__right_bitset); + __right_bitset &= (static_cast<__storage_t>(1) << __tz_right) - 1; + _VSTD::iter_swap(__lm1 - __tz_right, __first++); + } + } + + _RandomAccessIterator __pivot_pos = __first - 1; + *__begin = _VSTD::move(*__pivot_pos); + *__pivot_pos = _VSTD::move(__pivot); + return _VSTD::make_pair(__pivot_pos, __already_partitioned); +} + +template +inline bool __partial_insertion_sort(_RandomAccessIterator __first, + _RandomAccessIterator __last, + _Compare __comp) { + typedef typename _VSTD::iterator_traits<_RandomAccessIterator>::value_type + value_type; + if (__first == __last) return true; + + const unsigned __limit = 8; + unsigned __count = 0; + _RandomAccessIterator __j = __first; + for (_RandomAccessIterator __i = __j + 1; __i != __last; ++__i) { + if (__comp(*__i, *__j)) { + value_type __t(_VSTD::move(*__i)); + _RandomAccessIterator __k = __j; + __j = __i; + do { + *__j = _VSTD::move(*__k); + __j = __k; + } while (__j != __first && __comp(__t, *--__k)); + *__j = _VSTD::move(__t); + if (++__count == __limit) return ++__i == __last; + } + __j = __i; + } + return true; +} + +template +void __bitsetsort_loop( + _RandomAccessIterator __first, _RandomAccessIterator __last, + _Compare __comp, + typename _VSTD::iterator_traits<_RandomAccessIterator>::value_type* __buff, + typename _VSTD::iterator_traits<_RandomAccessIterator>::difference_type __limit) { + _LIBCPP_CONSTEXPR_AFTER_CXX11 int __ninther_threshold = 128; + typedef typename _VSTD::iterator_traits<_RandomAccessIterator>::difference_type + difference_type; + typedef + typename _VSTD::iterator_traits<_RandomAccessIterator>::value_type value_type; + __sorting_network::__conditional_swap<_RandomAccessIterator, _Compare> + __cond_swap(__comp); + __sorting_network::__reverse_conditional_swap<_RandomAccessIterator, _Compare> + __reverse_cond_swap(__comp); + while (true) { + if (__limit == 0) { + // Fallback to heap sort as Introsort suggests. + _VSTD::make_heap(__first, __last, __comp); + _VSTD::sort_heap(__first, __last, __comp); + return; + } + __limit--; + difference_type __len = __last - __first; + if (__len <= __bitonic::__detail::__batch) { + __sorting_network::__sort1to8(__first, __len, __cond_swap); + return; + } else if (__len <= 32) { + __bitonic::__small_sort(__first, __len, __buff, __cond_swap, + __reverse_cond_swap); + // __bitonic::__sort9to32(__first, __len, __buff, __cond_swap, + // __reverse_cond_swap); + return; + } + difference_type __half_len = __len / 2; + if (__len > __ninther_threshold) { + __sorting_network::__sort3(__first, __first + __half_len, __last - 1, __cond_swap); + __sorting_network::__sort3(__first + 1, __first + (__half_len - 1), __last - 2, __cond_swap); + __sorting_network::__sort3(__first + 2, __first + (__half_len + 1), __last - 3, __cond_swap); + __sorting_network::__sort3(__first + (__half_len - 1), __first + __half_len, + __first + (__half_len + 1), __cond_swap); + _VSTD::iter_swap(__first, __first + __half_len); + } else { + __sorting_network::__sort3(__first + __half_len, __first, __last - 1, __cond_swap); + } + auto __ret = __bitset_partition<__64bit_set>(__first, __last, __comp); + if (__ret.second) { + bool __left = __partial_insertion_sort(__first, __ret.first, __comp); + if (__partial_insertion_sort(__ret.first + 1, __last, __comp)) { + if (__left) return; + __last = __ret.first; + continue; + } else { + if (__left) { + __first = ++__ret.first; + continue; + } + } + } + + // Sort smaller range with recursive call and larger with tail recursion + // elimination. + if (__ret.first - __first < __last - __ret.first) { + __bitsetsort_loop<_Compare>(__first, __ret.first, __comp, __buff, __limit); + __first = ++__ret.first; + } else { + __bitsetsort_loop<_Compare>(__ret.first + 1, __last, __comp, __buff, __limit); + __last = __ret.first; + } + } +} + +template +inline _LIBCPP_INLINE_VISIBILITY _Number __log2i(_Number __n) { + _Number __log2 = 0; + while (__n > 1) { + __log2++; + __n >>= 1; + } + return __log2; +} + + +template +inline _LIBCPP_INLINE_VISIBILITY void __bitsetsort_internal( + _RandomAccessIterator __first, _RandomAccessIterator __last, + _Compare __comp) { + typedef typename _VSTD::iterator_traits<_RandomAccessIterator>::value_type + value_type; + typedef typename _VSTD::iterator_traits<_RandomAccessIterator>::difference_type + difference_type; + typename _VSTD::aligned_storage::type + __buff[__bitonic::__detail::__small_sort_max]; + + // 2*log2 comes from Introsort https://reviews.llvm.org/D36423. + difference_type __depth_limit = 2 * __log2i(__last - __first); + __bitsetsort_loop(__first, __last, __comp, + reinterpret_cast(&__buff[0]), + __depth_limit); +} +} // namespace __bitsetsort + +// __branchlesscompimpl provides a branch-less comparator for pairs and tuples of primitive types. +// It provides 1.38x - 2x speed-up in pairs or tuples sorting. +template +struct __branchlesscompimpl { + template + bool operator()(const R& lhs, const R& rhs) const { + return lhs < rhs; + } +}; + +template<> +struct __branchlesscompimpl { + template + bool operator()(const R& lhs, const R& rhs) const { + return lhs < rhs; + } + template + bool operator()(const _VSTD::pair& lhs, const _VSTD::pair& rhs) const { + const bool __c1 = lhs.first < rhs.first; + const bool __c2 = rhs.first < lhs.first; + const bool __c3 = lhs.second < rhs.second; + return __c1 || (!__c2 && __c3); + } + template + bool operator()(const _VSTD::tuple& lhs, const _VSTD::tuple& rhs) const { + const bool __c1 = _VSTD::get<0>(lhs) < _VSTD::get<0>(rhs); + const bool __c2 = _VSTD::get<0>(rhs) < _VSTD::get<0>(lhs); + const bool __c3 = _VSTD::get<1>(lhs) < _VSTD::get<1>(rhs); + return __c1 || (!__c2 && __c3); + } + template + bool operator()(const _VSTD::tuple& lhs, const _VSTD::tuple& rhs) const { + const bool __c1 = _VSTD::get<0>(lhs) < _VSTD::get<0>(rhs); + const bool __c2 = _VSTD::get<0>(rhs) < _VSTD::get<0>(lhs); + const bool __c3 = _VSTD::get<1>(lhs) < _VSTD::get<1>(rhs); + const bool __c4 = _VSTD::get<1>(rhs) < _VSTD::get<1>(lhs); + const bool __c5 = _VSTD::get<2>(lhs) < _VSTD::get<2>(rhs); + return __c1 || (!__c2 && (__c3 || (!__c4 && __c5))); + } +}; + +template +struct __branchlesscomp { + bool operator()(const _T& __x, const _T& __y) const { + return __x < __y; + } +}; + +template +struct __branchlesscomp<_VSTD::pair> : public __branchlesscompimpl<_VSTD::is_fundamental::value> {}; + +template +struct __branchlesscomp<_VSTD::tuple> : public __branchlesscompimpl<_VSTD::is_fundamental::value> {}; + +template +struct __branchlesscomp<_VSTD::tuple> : public __branchlesscompimpl<_VSTD::is_fundamental::value && _VSTD::is_fundamental::value> {}; + +template +inline _LIBCPP_INLINE_VISIBILITY void bitsetsort(_RandomAccessIterator __first, + _RandomAccessIterator __last, + _Compare __comp) { + /** This change is required for ClickHouse + * /contrib/libcxx/include/algorithm:789:10: note: candidate function template not viable: 'this' argument has type + * 'const std::__debug_less::less>', but method is not marked const + * bool operator()(const _Tp& __x, const _Up& __y) + */ + typedef typename _VSTD::__comp_ref_type<_Compare>::type _Comp_ref; + __bitsetsort::__bitsetsort_internal<_Compare>(__first, __last, + __comp); +} + +template +inline _LIBCPP_INLINE_VISIBILITY void bitsetsort(_VSTD::__wrap_iter<_Tp*> __first, + _VSTD::__wrap_iter<_Tp*> __last, + _Compare __comp) { + typedef typename _VSTD::add_lvalue_reference<_Compare>::type _Comp_ref; + bitsetsort<_Tp*, _Comp_ref>(__first.base(), __last.base(), __comp); +} + +template +inline _LIBCPP_INLINE_VISIBILITY void bitsetsort(_RandomAccessIterator __first, + _RandomAccessIterator __last) { + bitsetsort( + __first, __last, + __branchlesscomp::value_type>()); +} + +template +inline _LIBCPP_INLINE_VISIBILITY void bitsetsort(_VSTD::__wrap_iter<_Tp*> __first, + _VSTD::__wrap_iter<_Tp*> __last) { + bitsetsort(__first.base(), __last.base()); +} +} // namespace stdext + +#endif // _LIBCPP___BITSETSORT diff --git a/base/base/sort.h b/base/base/sort.h index 592a899a291..114ad6f359a 100644 --- a/base/base/sort.h +++ b/base/base/sort.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wold-style-cast" @@ -30,7 +30,7 @@ void partial_sort(RandomIt first, RandomIt middle, RandomIt last, Compare compar template void sort(RandomIt first, RandomIt last, Compare compare) { - ::pdqsort(first, last, compare); + ::stdext::bitsetsort(first, last, compare); } template @@ -38,5 +38,5 @@ void sort(RandomIt first, RandomIt last) { using value_type = typename std::iterator_traits::value_type; using comparator = std::less; - ::pdqsort(first, last, comparator()); + ::stdext::bitsetsort(first, last, comparator()); } From 4c9ba1dfd700141867feab1a98908c36b4160f62 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Mon, 31 Jan 2022 22:25:35 +0000 Subject: [PATCH 052/215] Fixed tests --- src/Functions/array/arrayAUC.cpp | 4 ++-- .../0_stateless/00702_join_with_using_dups.sql | 16 ++++++++-------- .../0_stateless/00818_inner_join_bug_3567.sql | 4 ++-- ...s_order_by_with_different_variables.reference | 4 ++-- ...otonous_order_by_with_different_variables.sql | 4 ++-- .../0_stateless/01592_window_functions.sql | 2 +- .../0_stateless/01670_neighbor_lc_bug.reference | 4 ++-- .../0_stateless/01670_neighbor_lc_bug.sql | 4 +++- .../01780_column_sparse_alter.reference | 2 +- .../0_stateless/01780_column_sparse_alter.sql | 4 ++-- 10 files changed, 25 insertions(+), 23 deletions(-) diff --git a/src/Functions/array/arrayAUC.cpp b/src/Functions/array/arrayAUC.cpp index 72dc165550d..9bebcf7fd8c 100644 --- a/src/Functions/array/arrayAUC.cpp +++ b/src/Functions/array/arrayAUC.cpp @@ -1,6 +1,5 @@ #include #include -#include #include #include #include "arrayScalarProduct.h" @@ -113,7 +112,8 @@ public: sorted_labels[i].label = label; } - ::sort(sorted_labels.begin(), sorted_labels.end(), [](const auto & lhs, const auto & rhs) { return lhs.score > rhs.score; }); + /// Stable sort is required for for labels to apply in same order if score is equal + std::stable_sort(sorted_labels.begin(), sorted_labels.end(), [](const auto & lhs, const auto & rhs) { return lhs.score > rhs.score; }); /// We will first calculate non-normalized area. diff --git a/tests/queries/0_stateless/00702_join_with_using_dups.sql b/tests/queries/0_stateless/00702_join_with_using_dups.sql index d45c6628b9a..cf0c053a144 100644 --- a/tests/queries/0_stateless/00702_join_with_using_dups.sql +++ b/tests/queries/0_stateless/00702_join_with_using_dups.sql @@ -8,24 +8,24 @@ insert into X (id, x_name) values (1, 'A'), (2, 'B'), (2, 'C'), (3, 'D'), (4, 'E insert into Y (id, y_name) values (1, 'a'), (1, 'b'), (2, 'c'), (3, 'd'), (3, 'e'), (4, 'f'), (6, 'g'), (7, 'h'), (9, 'i'); select 'inner'; -select X.*, Y.* from X inner join Y using id; +select X.*, Y.* from X inner join Y using id order by X.id, Y.id, X.x_name, Y.y_name; select 'inner subs'; -select s.*, j.* from (select * from X) as s inner join (select * from Y) as j using id; +select s.*, j.* from (select * from X) as s inner join (select * from Y) as j using id order by s.id, s.id, s.x_name, j.y_name; select 'left'; -select X.*, Y.* from X left join Y using id; +select X.*, Y.* from X left join Y using id order by X.id, Y.id, X.x_name, Y.y_name; select 'left subs'; -select s.*, j.* from (select * from X) as s left join (select * from Y) as j using id; +select s.*, j.* from (select * from X) as s left join (select * from Y) as j using id order by s.id, j.id, s.x_name, j.y_name; select 'right'; -select X.*, Y.* from X right join Y using id order by id; +select X.*, Y.* from X right join Y using id order by X.id, Y.id, X.x_name, Y.y_name; select 'right subs'; -select s.*, j.* from (select * from X) as s right join (select * from Y) as j using id order by id; +select s.*, j.* from (select * from X) as s right join (select * from Y) as j using id order by s.id, j.id, s.x_name, j.y_name; select 'full'; -select X.*, Y.* from X full join Y using id order by id; +select X.*, Y.* from X full join Y using id order by X.id, Y.id; select 'full subs'; -select s.*, j.* from (select * from X) as s full join (select * from Y) as j using id order by id; +select s.*, j.* from (select * from X) as s full join (select * from Y) as j using id order by s.id, j.id, s.x_name, j.y_name; drop table X; drop table Y; diff --git a/tests/queries/0_stateless/00818_inner_join_bug_3567.sql b/tests/queries/0_stateless/00818_inner_join_bug_3567.sql index 2058d2309e4..1c851d40f47 100644 --- a/tests/queries/0_stateless/00818_inner_join_bug_3567.sql +++ b/tests/queries/0_stateless/00818_inner_join_bug_3567.sql @@ -9,8 +9,8 @@ INSERT INTO table2 VALUES ('D', 'd', '2018-01-01') ('B', 'b', '2018-01-01') ('C' SELECT * FROM table1 t1 FORMAT PrettyCompact; SELECT *, c as a, d as b FROM table2 FORMAT PrettyCompact; -SELECT * FROM table1 t1 ALL LEFT JOIN (SELECT *, c, d as b FROM table2) t2 USING (a, b) ORDER BY d FORMAT PrettyCompact; -SELECT * FROM table1 t1 ALL INNER JOIN (SELECT *, c, d as b FROM table2) t2 USING (a, b) ORDER BY d FORMAT PrettyCompact; +SELECT * FROM table1 t1 ALL LEFT JOIN (SELECT *, c, d as b FROM table2) t2 USING (a, b) ORDER BY d, t1.a FORMAT PrettyCompact; +SELECT * FROM table1 t1 ALL INNER JOIN (SELECT *, c, d as b FROM table2) t2 USING (a, b) ORDER BY d, t1.a FORMAT PrettyCompact; DROP TABLE table1; DROP TABLE table2; diff --git a/tests/queries/0_stateless/01322_monotonous_order_by_with_different_variables.reference b/tests/queries/0_stateless/01322_monotonous_order_by_with_different_variables.reference index cf2935a40bf..186e6565ffe 100644 --- a/tests/queries/0_stateless/01322_monotonous_order_by_with_different_variables.reference +++ b/tests/queries/0_stateless/01322_monotonous_order_by_with_different_variables.reference @@ -11,8 +11,8 @@ 2 1 3 3 1 4 3 -2 5 4 2 2 4 +2 5 4 2 1 4 3 1 3 3 @@ -27,6 +27,6 @@ 2 1 3 3 1 4 3 -2 5 4 2 2 4 +2 5 4 2 diff --git a/tests/queries/0_stateless/01322_monotonous_order_by_with_different_variables.sql b/tests/queries/0_stateless/01322_monotonous_order_by_with_different_variables.sql index 6fda42cbed1..87f0f462ab9 100644 --- a/tests/queries/0_stateless/01322_monotonous_order_by_with_different_variables.sql +++ b/tests/queries/0_stateless/01322_monotonous_order_by_with_different_variables.sql @@ -7,7 +7,7 @@ SELECT * FROM test ORDER BY toFloat32(x), -y, -z DESC; SELECT * FROM test ORDER BY toFloat32(x), -(-y), -z DESC; SELECT max(x) as k FROM test ORDER BY k; SELECT roundToExp2(x) as k FROM test GROUP BY k ORDER BY k; -SELECT roundToExp2(x) as k, y, z FROM test WHERE k >= 1 ORDER BY k; +SELECT roundToExp2(x) as k, y, z FROM test WHERE k >= 1 ORDER BY k, y, z; SELECT max(x) as k FROM test HAVING k > 0 ORDER BY k; SET optimize_monotonous_functions_in_order_by = 0; @@ -15,7 +15,7 @@ SELECT * FROM test ORDER BY toFloat32(x), -y, -z DESC; SELECT * FROM test ORDER BY toFloat32(x), -(-y), -z DESC; SELECT max(x) as k FROM test ORDER BY k; SELECT roundToExp2(x) as k From test GROUP BY k ORDER BY k; -SELECT roundToExp2(x) as k, y, z FROM test WHERE k >= 1 ORDER BY k; +SELECT roundToExp2(x) as k, y, z FROM test WHERE k >= 1 ORDER BY k, y, z; SELECT max(x) as k FROM test HAVING k > 0 ORDER BY k; DROP TABLE test; diff --git a/tests/queries/0_stateless/01592_window_functions.sql b/tests/queries/0_stateless/01592_window_functions.sql index b05b04628d2..1ef416aaa78 100644 --- a/tests/queries/0_stateless/01592_window_functions.sql +++ b/tests/queries/0_stateless/01592_window_functions.sql @@ -43,7 +43,7 @@ SELECT product_name, group_name, price, - row_number() OVER (PARTITION BY group_name ORDER BY price desc) rn + row_number() OVER (PARTITION BY group_name ORDER BY price desc, product_name asc) rn FROM products INNER JOIN product_groups USING (group_id) ORDER BY group_name, rn; diff --git a/tests/queries/0_stateless/01670_neighbor_lc_bug.reference b/tests/queries/0_stateless/01670_neighbor_lc_bug.reference index fd1bc1a154c..b28e8d90caa 100644 --- a/tests/queries/0_stateless/01670_neighbor_lc_bug.reference +++ b/tests/queries/0_stateless/01670_neighbor_lc_bug.reference @@ -9,7 +9,7 @@ 1 1 1 2 2 2 ┌─rowNr─┬─val_string─┬─str_m1───┬─str_p1───┬─val_low──┬─low_m1───┬─low_p1───┐ -│ 1 │ String 1 │ │ String 1 │ String 1 │ │ String 1 │ -│ 2 │ String 1 │ String 1 │ String 2 │ String 1 │ String 1 │ String 2 │ +│ 1 │ String 1 │ String 1 │ String 2 │ String 1 │ String 1 │ String 2 │ +│ 2 │ String 1 │ │ String 1 │ String 1 │ │ String 1 │ │ 3 │ String 2 │ String 1 │ │ String 2 │ String 1 │ │ └───────┴────────────┴──────────┴──────────┴──────────┴──────────┴──────────┘ diff --git a/tests/queries/0_stateless/01670_neighbor_lc_bug.sql b/tests/queries/0_stateless/01670_neighbor_lc_bug.sql index 2d99225aa89..ad24dc0fe33 100644 --- a/tests/queries/0_stateless/01670_neighbor_lc_bug.sql +++ b/tests/queries/0_stateless/01670_neighbor_lc_bug.sql @@ -38,6 +38,8 @@ FROM SELECT * FROM neighbor_test ORDER BY val_string ASC -) format PrettyCompact; +) +ORDER By rowNr +format PrettyCompact; drop table if exists neighbor_test; diff --git a/tests/queries/0_stateless/01780_column_sparse_alter.reference b/tests/queries/0_stateless/01780_column_sparse_alter.reference index cec7af647b3..4fb0122db96 100644 --- a/tests/queries/0_stateless/01780_column_sparse_alter.reference +++ b/tests/queries/0_stateless/01780_column_sparse_alter.reference @@ -1,6 +1,6 @@ id Default -u Sparse s Sparse +u Sparse 182 155 id Default t Sparse diff --git a/tests/queries/0_stateless/01780_column_sparse_alter.sql b/tests/queries/0_stateless/01780_column_sparse_alter.sql index 444a1f9cf43..7f9558bfc18 100644 --- a/tests/queries/0_stateless/01780_column_sparse_alter.sql +++ b/tests/queries/0_stateless/01780_column_sparse_alter.sql @@ -12,14 +12,14 @@ INSERT INTO t_sparse_alter SELECT if (number % 13 = 0, toString(number), '') FROM numbers(2000); -SELECT column, serialization_kind FROM system.parts_columns WHERE database = currentDatabase() AND table = 't_sparse_alter' AND active ORDER BY name; +SELECT column, serialization_kind FROM system.parts_columns WHERE database = currentDatabase() AND table = 't_sparse_alter' AND active ORDER BY column; SELECT uniqExact(u), uniqExact(s) FROM t_sparse_alter; ALTER TABLE t_sparse_alter DROP COLUMN s, RENAME COLUMN u TO t; ALTER TABLE t_sparse_alter MODIFY COLUMN t UInt16; -SELECT column, serialization_kind FROM system.parts_columns WHERE database = currentDatabase() AND table = 't_sparse_alter' AND active ORDER BY name; +SELECT column, serialization_kind FROM system.parts_columns WHERE database = currentDatabase() AND table = 't_sparse_alter' AND active ORDER BY column; SELECT uniqExact(t) FROM t_sparse_alter; From f301e50b23e0e973efb32956602a6516bb3c5233 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Tue, 1 Feb 2022 21:55:20 +0000 Subject: [PATCH 053/215] Fixed tests --- .../AggregateFunctionIntervalLengthSum.h | 10 ++-- .../AggregateFunctionSequenceMatch.h | 10 ++-- .../ReservoirSamplerDeterministic.h | 3 +- src/Storages/System/StorageSystemGrants.cpp | 2 +- .../System/StorageSystemPrivileges.cpp | 10 ++-- src/Storages/System/StorageSystemPrivileges.h | 2 +- .../0_stateless/00702_join_on_dups.sql | 46 +++++++++---------- .../02117_show_create_table_system.reference | 4 +- 8 files changed, 44 insertions(+), 43 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionIntervalLengthSum.h b/src/AggregateFunctions/AggregateFunctionIntervalLengthSum.h index 8c53e807f05..92f527f7c43 100644 --- a/src/AggregateFunctions/AggregateFunctionIntervalLengthSum.h +++ b/src/AggregateFunctions/AggregateFunctionIntervalLengthSum.h @@ -90,11 +90,11 @@ struct AggregateFunctionIntervalLengthSumData void sort() { - if (!sorted) - { - ::sort(std::begin(segments), std::end(segments)); - sorted = true; - } + if (sorted) + return; + + ::sort(std::begin(segments), std::end(segments)); + sorted = true; } void serialize(WriteBuffer & buf) const diff --git a/src/AggregateFunctions/AggregateFunctionSequenceMatch.h b/src/AggregateFunctions/AggregateFunctionSequenceMatch.h index 3ae5d779cc6..248454bef02 100644 --- a/src/AggregateFunctions/AggregateFunctionSequenceMatch.h +++ b/src/AggregateFunctions/AggregateFunctionSequenceMatch.h @@ -75,11 +75,11 @@ struct AggregateFunctionSequenceMatchData final void sort() { - if (!sorted) - { - ::sort(std::begin(events_list), std::end(events_list), Comparator{}); - sorted = true; - } + if (sorted) + return; + + ::sort(std::begin(events_list), std::end(events_list), Comparator{}); + sorted = true; } void serialize(WriteBuffer & buf) const diff --git a/src/AggregateFunctions/ReservoirSamplerDeterministic.h b/src/AggregateFunctions/ReservoirSamplerDeterministic.h index 72ccb0e95a2..2baeea76996 100644 --- a/src/AggregateFunctions/ReservoirSamplerDeterministic.h +++ b/src/AggregateFunctions/ReservoirSamplerDeterministic.h @@ -260,7 +260,8 @@ private: if (sorted) return; - ::sort(samples.begin(), samples.end(), [](const auto & lhs, const auto & rhs) { return lhs.first < rhs.first; }); + /// In order to provide deterministic result we must sort by value and hash + ::sort(samples.begin(), samples.end(), [](const auto & lhs, const auto & rhs) { return lhs < rhs; }); sorted = true; } diff --git a/src/Storages/System/StorageSystemGrants.cpp b/src/Storages/System/StorageSystemGrants.cpp index f55145ccfc7..3613a752d92 100644 --- a/src/Storages/System/StorageSystemGrants.cpp +++ b/src/Storages/System/StorageSystemGrants.cpp @@ -23,7 +23,7 @@ NamesAndTypesList StorageSystemGrants::getNamesAndTypes() NamesAndTypesList names_and_types{ {"user_name", std::make_shared(std::make_shared())}, {"role_name", std::make_shared(std::make_shared())}, - {"access_type", std::make_shared(StorageSystemPrivileges::getAccessTypeEnumValues())}, + {"access_type", std::make_shared(StorageSystemPrivileges::getAccessTypeEnumValues())}, {"database", std::make_shared(std::make_shared())}, {"table", std::make_shared(std::make_shared())}, {"column", std::make_shared(std::make_shared())}, diff --git a/src/Storages/System/StorageSystemPrivileges.cpp b/src/Storages/System/StorageSystemPrivileges.cpp index 6a4d2e1087e..85d7b642516 100644 --- a/src/Storages/System/StorageSystemPrivileges.cpp +++ b/src/Storages/System/StorageSystemPrivileges.cpp @@ -44,11 +44,11 @@ namespace } -const std::vector> & StorageSystemPrivileges::getAccessTypeEnumValues() +const std::vector> & StorageSystemPrivileges::getAccessTypeEnumValues() { - static const std::vector> values = [] + static const std::vector> values = [] { - std::vector> res; + std::vector> res; #define ADD_ACCESS_TYPE_ENUM_VALUE(name, aliases, node_type, parent_group_name) \ res.emplace_back(toString(AccessType::name), static_cast(AccessType::name)); @@ -65,10 +65,10 @@ const std::vector> & StorageSystemPrivileges::getAccessT NamesAndTypesList StorageSystemPrivileges::getNamesAndTypes() { NamesAndTypesList names_and_types{ - {"privilege", std::make_shared(getAccessTypeEnumValues())}, + {"privilege", std::make_shared(getAccessTypeEnumValues())}, {"aliases", std::make_shared(std::make_shared())}, {"level", std::make_shared(std::make_shared(getLevelEnumValues()))}, - {"parent_group", std::make_shared(std::make_shared(getAccessTypeEnumValues()))}, + {"parent_group", std::make_shared(std::make_shared(getAccessTypeEnumValues()))}, }; return names_and_types; } diff --git a/src/Storages/System/StorageSystemPrivileges.h b/src/Storages/System/StorageSystemPrivileges.h index dad12d14ac0..5eaba9bed79 100644 --- a/src/Storages/System/StorageSystemPrivileges.h +++ b/src/Storages/System/StorageSystemPrivileges.h @@ -14,7 +14,7 @@ class StorageSystemPrivileges final : public shared_ptr_helper> & getAccessTypeEnumValues(); + static const std::vector> & getAccessTypeEnumValues(); protected: friend struct shared_ptr_helper; diff --git a/tests/queries/0_stateless/00702_join_on_dups.sql b/tests/queries/0_stateless/00702_join_on_dups.sql index 852378f543f..48c80446f83 100644 --- a/tests/queries/0_stateless/00702_join_on_dups.sql +++ b/tests/queries/0_stateless/00702_join_on_dups.sql @@ -10,68 +10,68 @@ insert into Y (id, y_a) values (1, 'r1'), (1, 'r2'), (2, 'r3'), (3, 'r4'), insert into Y (id, y_a, y_b) values (4, 'r6', 'nr6'), (6, 'r7', 'nr7'), (7, 'r8', 'nr8'), (9, 'r9', 'nr9'); select 'inner'; -select X.*, Y.* from X inner join Y on X.id = Y.id order by id; +select X.*, Y.* from X inner join Y on X.id = Y.id order by X.id, X.x_a, X.x_b, Y.id, Y.y_a, Y.y_b; select 'inner subs'; -select s.*, j.* from (select * from X) as s inner join (select * from Y) as j on s.id = j.id order by id; +select s.*, j.* from (select * from X) as s inner join (select * from Y) as j on s.id = j.id order by s.id, s.x_a, s.x_b, j.id, j.y_a, j.y_b; select 'inner expr'; -select X.*, Y.* from X inner join Y on (X.id + 1) = (Y.id + 1) order by id; +select X.*, Y.* from X inner join Y on (X.id + 1) = (Y.id + 1) order by X.id, X.x_a, X.x_b, Y.id, Y.y_a, Y.y_b; select 'left'; -select X.*, Y.* from X left join Y on X.id = Y.id order by id; +select X.*, Y.* from X left join Y on X.id = Y.id order by X.id, X.x_a, X.x_b, Y.id, Y.y_a, Y.y_b; select 'left subs'; -select s.*, j.* from (select * from X) as s left join (select * from Y) as j on s.id = j.id order by id; +select s.*, j.* from (select * from X) as s left join (select * from Y) as j on s.id = j.id order by s.id, s.x_a, s.x_b, j.id, j.y_a, j.y_b; select 'left expr'; -select X.*, Y.* from X left join Y on (X.id + 1) = (Y.id + 1) order by id; +select X.*, Y.* from X left join Y on (X.id + 1) = (Y.id + 1) order by X.id, X.x_a, X.x_b, Y.id, Y.y_a, Y.y_b; select 'right'; -select X.*, Y.* from X right join Y on X.id = Y.id order by id; +select X.*, Y.* from X right join Y on X.id = Y.id order by X.id, X.x_a, X.x_b, Y.id, Y.y_a, Y.y_b; select 'right subs'; -select s.*, j.* from (select * from X) as s right join (select * from Y) as j on s.id = j.id order by id; +select s.*, j.* from (select * from X) as s right join (select * from Y) as j on s.id = j.id order by s.id, s.x_a, s.x_b, j.id, j.y_a, j.y_b; --select 'right expr'; --select X.*, Y.* from X right join Y on (X.id + 1) = (Y.id + 1) order by id; select 'full'; -select X.*, Y.* from X full join Y on X.id = Y.id order by id; +select X.*, Y.* from X full join Y on X.id = Y.id order by X.id, X.x_a, X.x_b, Y.id, Y.y_a, Y.y_b; select 'full subs'; -select s.*, j.* from (select * from X) as s full join (select * from Y) as j on s.id = j.id order by id; +select s.*, j.* from (select * from X) as s full join (select * from Y) as j on s.id = j.id order by s.id, s.x_a; --select 'full expr'; --select X.*, Y.* from X full join Y on (X.id + 1) = (Y.id + 1) order by id; select 'self inner'; -select X.*, s.* from X inner join (select * from X) as s on X.id = s.id order by X.id, X.x_a, s.x_a; +select X.*, s.* from X inner join (select * from X) as s on X.id = s.id order by X.id, X.x_a, X.x_b, s.id, s.x_a, s.x_b; select 'self inner nullable'; -select X.*, s.* from X inner join (select * from X) as s on X.x_b = s.x_b order by X.id; +select X.*, s.* from X inner join (select * from X) as s on X.x_b = s.x_b order by X.id, X.x_a, X.x_b, s.id, s.x_a, s.x_b; select 'self inner nullable vs not nullable'; -select X.*, s.* from X inner join (select * from X) as s on X.id = s.x_b order by X.id; +select X.*, s.* from X inner join (select * from X) as s on X.id = s.x_b order by X.id, X.x_a, X.x_b, s.id, s.x_a, s.x_b; -- TODO: s.y_b == '' instead of NULL select 'self inner nullable vs not nullable 2'; -select Y.*, s.* from Y inner join (select * from Y) as s on concat('n', Y.y_a) = s.y_b order by id; +select Y.*, s.* from Y inner join (select * from Y) as s on concat('n', Y.y_a) = s.y_b order by Y.id, Y.y_a, Y.y_b, s.id, s.y_a, s.y_b; select 'self left'; select X.*, s.* from X left join (select * from X) as s on X.id = s.id order by X.id, X.x_a, s.x_a; select 'self left nullable'; -select X.*, s.* from X left join (select * from X) as s on X.x_b = s.x_b order by X.id; +select X.*, s.* from X left join (select * from X) as s on X.x_b = s.x_b order by X.id, X.x_a; select 'self left nullable vs not nullable'; -select X.*, s.* from X left join (select * from X) as s on X.id = s.x_b order by X.id; +select X.*, s.* from X left join (select * from X) as s on X.id = s.x_b order by X.id, X.x_a; -- TODO: s.y_b == '' instead of NULL select 'self left nullable vs not nullable 2'; -select Y.*, s.* from Y left join (select * from Y) as s on concat('n', Y.y_a) = s.y_b order by id; +select Y.*, s.* from Y left join (select * from Y) as s on concat('n', Y.y_a) = s.y_b order by Y.id, Y.y_a; select 'self right'; -select X.*, s.* from X right join (select * from X) as s on X.id = s.id order by X.id, X.x_a, s.x_a; +select X.*, s.* from X right join (select * from X) as s on X.id = s.id order by X.id, X.x_a, X.x_b, s.id, s.x_a, s.x_b; select 'self right nullable'; -select X.*, s.* from X right join (select * from X) as s on X.x_b = s.x_b order by X.id; +select X.*, s.* from X right join (select * from X) as s on X.x_b = s.x_b order by X.id, X.x_a, X.x_b, s.id, s.x_a, s.x_b; select 'self right nullable vs not nullable'; -select X.*, s.* from X right join (select * from X) as s on X.id = s.x_b order by X.id; +select X.*, s.* from X right join (select * from X) as s on X.id = s.x_b order by X.id, X.x_a, X.x_b, s.id, s.x_a, s.x_b; --select 'self right nullable vs not nullable 2'; --select Y.*, s.* from Y right join (select * from Y) as s on concat('n', Y.y_a) = s.y_b order by id; select 'self full'; -select X.*, s.* from X full join (select * from X) as s on X.id = s.id order by X.id, X.x_a, s.x_a; +select X.*, s.* from X full join (select * from X) as s on X.id = s.id order by X.id, X.x_a, X.x_b, s.id, s.x_a, s.x_b; select 'self full nullable'; -select X.*, s.* from X full join (select * from X) as s on X.x_b = s.x_b order by X.id; +select X.*, s.* from X full join (select * from X) as s on X.x_b = s.x_b order by X.id, X.x_a, X.x_b, s.id, s.x_a, s.x_b; select 'self full nullable vs not nullable'; -select X.*, s.* from X full join (select * from X) as s on X.id = s.x_b order by X.id; +select X.*, s.* from X full join (select * from X) as s on X.id = s.x_b order by X.id, X.x_a, X.x_b, s.id, s.x_a, s.x_b; --select 'self full nullable vs not nullable 2'; --select Y.*, s.* from Y full join (select * from Y) as s on concat('n', Y.y_a) = s.y_b order by id; diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index 234804f1078..2be7f65e227 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -20,7 +20,7 @@ CREATE TABLE system.errors\n(\n `name` String,\n `code` Int32,\n `value CREATE TABLE system.events\n(\n `event` String,\n `value` UInt64,\n `description` String\n)\nENGINE = SystemEvents()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.formats\n(\n `name` String,\n `is_input` UInt8,\n `is_output` UInt8\n)\nENGINE = SystemFormats()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.functions\n(\n `name` String,\n `is_aggregate` UInt8,\n `case_insensitive` UInt8,\n `alias_to` String,\n `create_query` String,\n `origin` Enum8(\'System\' = 0, \'SQLUserDefined\' = 1, \'ExecutableUserDefined\' = 2)\n)\nENGINE = SystemFunctions()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' -CREATE TABLE system.grants\n(\n `user_name` Nullable(String),\n `role_name` Nullable(String),\n `access_type` Enum8(\'POSTGRES\' = -128, \'SQLITE\' = -127, \'ODBC\' = -126, \'JDBC\' = -125, \'HDFS\' = -124, \'S3\' = -123, \'SOURCES\' = -122, \'ALL\' = -121, \'NONE\' = -120, \'SHOW DATABASES\' = 0, \'SHOW TABLES\' = 1, \'SHOW COLUMNS\' = 2, \'SHOW DICTIONARIES\' = 3, \'SHOW\' = 4, \'SELECT\' = 5, \'INSERT\' = 6, \'ALTER UPDATE\' = 7, \'ALTER DELETE\' = 8, \'ALTER ADD COLUMN\' = 9, \'ALTER MODIFY COLUMN\' = 10, \'ALTER DROP COLUMN\' = 11, \'ALTER COMMENT COLUMN\' = 12, \'ALTER CLEAR COLUMN\' = 13, \'ALTER RENAME COLUMN\' = 14, \'ALTER MATERIALIZE COLUMN\' = 15, \'ALTER COLUMN\' = 16, \'ALTER MODIFY COMMENT\' = 17, \'ALTER ORDER BY\' = 18, \'ALTER SAMPLE BY\' = 19, \'ALTER ADD INDEX\' = 20, \'ALTER DROP INDEX\' = 21, \'ALTER MATERIALIZE INDEX\' = 22, \'ALTER CLEAR INDEX\' = 23, \'ALTER INDEX\' = 24, \'ALTER ADD PROJECTION\' = 25, \'ALTER DROP PROJECTION\' = 26, \'ALTER MATERIALIZE PROJECTION\' = 27, \'ALTER CLEAR PROJECTION\' = 28, \'ALTER PROJECTION\' = 29, \'ALTER ADD CONSTRAINT\' = 30, \'ALTER DROP CONSTRAINT\' = 31, \'ALTER CONSTRAINT\' = 32, \'ALTER TTL\' = 33, \'ALTER MATERIALIZE TTL\' = 34, \'ALTER SETTINGS\' = 35, \'ALTER MOVE PARTITION\' = 36, \'ALTER FETCH PARTITION\' = 37, \'ALTER FREEZE PARTITION\' = 38, \'ALTER DATABASE SETTINGS\' = 39, \'ALTER TABLE\' = 40, \'ALTER DATABASE\' = 41, \'ALTER VIEW REFRESH\' = 42, \'ALTER VIEW MODIFY QUERY\' = 43, \'ALTER VIEW\' = 44, \'ALTER\' = 45, \'CREATE DATABASE\' = 46, \'CREATE TABLE\' = 47, \'CREATE VIEW\' = 48, \'CREATE DICTIONARY\' = 49, \'CREATE TEMPORARY TABLE\' = 50, \'CREATE FUNCTION\' = 51, \'CREATE\' = 52, \'DROP DATABASE\' = 53, \'DROP TABLE\' = 54, \'DROP VIEW\' = 55, \'DROP DICTIONARY\' = 56, \'DROP FUNCTION\' = 57, \'DROP\' = 58, \'TRUNCATE\' = 59, \'OPTIMIZE\' = 60, \'KILL QUERY\' = 61, \'MOVE PARTITION BETWEEN SHARDS\' = 62, \'CREATE USER\' = 63, \'ALTER USER\' = 64, \'DROP USER\' = 65, \'CREATE ROLE\' = 66, \'ALTER ROLE\' = 67, \'DROP ROLE\' = 68, \'ROLE ADMIN\' = 69, \'CREATE ROW POLICY\' = 70, \'ALTER ROW POLICY\' = 71, \'DROP ROW POLICY\' = 72, \'CREATE QUOTA\' = 73, \'ALTER QUOTA\' = 74, \'DROP QUOTA\' = 75, \'CREATE SETTINGS PROFILE\' = 76, \'ALTER SETTINGS PROFILE\' = 77, \'DROP SETTINGS PROFILE\' = 78, \'SHOW USERS\' = 79, \'SHOW ROLES\' = 80, \'SHOW ROW POLICIES\' = 81, \'SHOW QUOTAS\' = 82, \'SHOW SETTINGS PROFILES\' = 83, \'SHOW ACCESS\' = 84, \'ACCESS MANAGEMENT\' = 85, \'SYSTEM SHUTDOWN\' = 86, \'SYSTEM DROP DNS CACHE\' = 87, \'SYSTEM DROP MARK CACHE\' = 88, \'SYSTEM DROP UNCOMPRESSED CACHE\' = 89, \'SYSTEM DROP MMAP CACHE\' = 90, \'SYSTEM DROP COMPILED EXPRESSION CACHE\' = 91, \'SYSTEM DROP CACHE\' = 92, \'SYSTEM RELOAD CONFIG\' = 93, \'SYSTEM RELOAD SYMBOLS\' = 94, \'SYSTEM RELOAD DICTIONARY\' = 95, \'SYSTEM RELOAD MODEL\' = 96, \'SYSTEM RELOAD FUNCTION\' = 97, \'SYSTEM RELOAD EMBEDDED DICTIONARIES\' = 98, \'SYSTEM RELOAD\' = 99, \'SYSTEM RESTART DISK\' = 100, \'SYSTEM MERGES\' = 101, \'SYSTEM TTL MERGES\' = 102, \'SYSTEM FETCHES\' = 103, \'SYSTEM MOVES\' = 104, \'SYSTEM DISTRIBUTED SENDS\' = 105, \'SYSTEM REPLICATED SENDS\' = 106, \'SYSTEM SENDS\' = 107, \'SYSTEM REPLICATION QUEUES\' = 108, \'SYSTEM DROP REPLICA\' = 109, \'SYSTEM SYNC REPLICA\' = 110, \'SYSTEM RESTART REPLICA\' = 111, \'SYSTEM RESTORE REPLICA\' = 112, \'SYSTEM FLUSH DISTRIBUTED\' = 113, \'SYSTEM FLUSH LOGS\' = 114, \'SYSTEM FLUSH\' = 115, \'SYSTEM THREAD FUZZER\' = 116, \'SYSTEM\' = 117, \'dictGet\' = 118, \'addressToLine\' = 119, \'addressToSymbol\' = 120, \'demangle\' = 121, \'INTROSPECTION\' = 122, \'FILE\' = 123, \'URL\' = 124, \'REMOTE\' = 125, \'MONGO\' = 126, \'MYSQL\' = 127),\n `database` Nullable(String),\n `table` Nullable(String),\n `column` Nullable(String),\n `is_partial_revoke` UInt8,\n `grant_option` UInt8\n)\nENGINE = SystemGrants()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' +CREATE TABLE system.grants\n(\n `user_name` Nullable(String),\n `role_name` Nullable(String),\n `access_type` Enum16(\'SHOW DATABASES\' = 0, \'SHOW TABLES\' = 1, \'SHOW COLUMNS\' = 2, \'SHOW DICTIONARIES\' = 3, \'SHOW\' = 4, \'SELECT\' = 5, \'INSERT\' = 6, \'ALTER UPDATE\' = 7, \'ALTER DELETE\' = 8, \'ALTER ADD COLUMN\' = 9, \'ALTER MODIFY COLUMN\' = 10, \'ALTER DROP COLUMN\' = 11, \'ALTER COMMENT COLUMN\' = 12, \'ALTER CLEAR COLUMN\' = 13, \'ALTER RENAME COLUMN\' = 14, \'ALTER MATERIALIZE COLUMN\' = 15, \'ALTER COLUMN\' = 16, \'ALTER MODIFY COMMENT\' = 17, \'ALTER ORDER BY\' = 18, \'ALTER SAMPLE BY\' = 19, \'ALTER ADD INDEX\' = 20, \'ALTER DROP INDEX\' = 21, \'ALTER MATERIALIZE INDEX\' = 22, \'ALTER CLEAR INDEX\' = 23, \'ALTER INDEX\' = 24, \'ALTER ADD PROJECTION\' = 25, \'ALTER DROP PROJECTION\' = 26, \'ALTER MATERIALIZE PROJECTION\' = 27, \'ALTER CLEAR PROJECTION\' = 28, \'ALTER PROJECTION\' = 29, \'ALTER ADD CONSTRAINT\' = 30, \'ALTER DROP CONSTRAINT\' = 31, \'ALTER CONSTRAINT\' = 32, \'ALTER TTL\' = 33, \'ALTER MATERIALIZE TTL\' = 34, \'ALTER SETTINGS\' = 35, \'ALTER MOVE PARTITION\' = 36, \'ALTER FETCH PARTITION\' = 37, \'ALTER FREEZE PARTITION\' = 38, \'ALTER DATABASE SETTINGS\' = 39, \'ALTER TABLE\' = 40, \'ALTER DATABASE\' = 41, \'ALTER VIEW REFRESH\' = 42, \'ALTER VIEW MODIFY QUERY\' = 43, \'ALTER VIEW\' = 44, \'ALTER\' = 45, \'CREATE DATABASE\' = 46, \'CREATE TABLE\' = 47, \'CREATE VIEW\' = 48, \'CREATE DICTIONARY\' = 49, \'CREATE TEMPORARY TABLE\' = 50, \'CREATE FUNCTION\' = 51, \'CREATE\' = 52, \'DROP DATABASE\' = 53, \'DROP TABLE\' = 54, \'DROP VIEW\' = 55, \'DROP DICTIONARY\' = 56, \'DROP FUNCTION\' = 57, \'DROP\' = 58, \'TRUNCATE\' = 59, \'OPTIMIZE\' = 60, \'KILL QUERY\' = 61, \'MOVE PARTITION BETWEEN SHARDS\' = 62, \'CREATE USER\' = 63, \'ALTER USER\' = 64, \'DROP USER\' = 65, \'CREATE ROLE\' = 66, \'ALTER ROLE\' = 67, \'DROP ROLE\' = 68, \'ROLE ADMIN\' = 69, \'CREATE ROW POLICY\' = 70, \'ALTER ROW POLICY\' = 71, \'DROP ROW POLICY\' = 72, \'CREATE QUOTA\' = 73, \'ALTER QUOTA\' = 74, \'DROP QUOTA\' = 75, \'CREATE SETTINGS PROFILE\' = 76, \'ALTER SETTINGS PROFILE\' = 77, \'DROP SETTINGS PROFILE\' = 78, \'SHOW USERS\' = 79, \'SHOW ROLES\' = 80, \'SHOW ROW POLICIES\' = 81, \'SHOW QUOTAS\' = 82, \'SHOW SETTINGS PROFILES\' = 83, \'SHOW ACCESS\' = 84, \'ACCESS MANAGEMENT\' = 85, \'SYSTEM SHUTDOWN\' = 86, \'SYSTEM DROP DNS CACHE\' = 87, \'SYSTEM DROP MARK CACHE\' = 88, \'SYSTEM DROP UNCOMPRESSED CACHE\' = 89, \'SYSTEM DROP MMAP CACHE\' = 90, \'SYSTEM DROP COMPILED EXPRESSION CACHE\' = 91, \'SYSTEM DROP CACHE\' = 92, \'SYSTEM RELOAD CONFIG\' = 93, \'SYSTEM RELOAD SYMBOLS\' = 94, \'SYSTEM RELOAD DICTIONARY\' = 95, \'SYSTEM RELOAD MODEL\' = 96, \'SYSTEM RELOAD FUNCTION\' = 97, \'SYSTEM RELOAD EMBEDDED DICTIONARIES\' = 98, \'SYSTEM RELOAD\' = 99, \'SYSTEM RESTART DISK\' = 100, \'SYSTEM MERGES\' = 101, \'SYSTEM TTL MERGES\' = 102, \'SYSTEM FETCHES\' = 103, \'SYSTEM MOVES\' = 104, \'SYSTEM DISTRIBUTED SENDS\' = 105, \'SYSTEM REPLICATED SENDS\' = 106, \'SYSTEM SENDS\' = 107, \'SYSTEM REPLICATION QUEUES\' = 108, \'SYSTEM DROP REPLICA\' = 109, \'SYSTEM SYNC REPLICA\' = 110, \'SYSTEM RESTART REPLICA\' = 111, \'SYSTEM RESTORE REPLICA\' = 112, \'SYSTEM FLUSH DISTRIBUTED\' = 113, \'SYSTEM FLUSH LOGS\' = 114, \'SYSTEM FLUSH\' = 115, \'SYSTEM THREAD FUZZER\' = 116, \'SYSTEM\' = 117, \'dictGet\' = 118, \'addressToLine\' = 119, \'addressToSymbol\' = 120, \'demangle\' = 121, \'INTROSPECTION\' = 122, \'FILE\' = 123, \'URL\' = 124, \'REMOTE\' = 125, \'MONGO\' = 126, \'MYSQL\' = 127, \'POSTGRES\' = 128, \'SQLITE\' = 129, \'ODBC\' = 130, \'JDBC\' = 131, \'HDFS\' = 132, \'S3\' = 133, \'SOURCES\' = 134, \'ALL\' = 135, \'NONE\' = 136),\n `database` Nullable(String),\n `table` Nullable(String),\n `column` Nullable(String),\n `is_partial_revoke` UInt8,\n `grant_option` UInt8\n)\nENGINE = SystemGrants()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.graphite_retentions\n(\n `config_name` String,\n `rule_type` String,\n `regexp` String,\n `function` String,\n `age` UInt64,\n `precision` UInt64,\n `priority` UInt16,\n `is_default` UInt8,\n `Tables.database` Array(String),\n `Tables.table` Array(String)\n)\nENGINE = SystemGraphite()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.licenses\n(\n `library_name` String,\n `license_type` String,\n `license_path` String,\n `license_text` String\n)\nENGINE = SystemLicenses()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.macros\n(\n `macro` String,\n `substitution` String\n)\nENGINE = SystemMacros()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' @@ -35,7 +35,7 @@ CREATE TABLE system.one\n(\n `dummy` UInt8\n)\nENGINE = SystemOne()\nCOMMENT CREATE TABLE system.part_moves_between_shards\n(\n `database` String,\n `table` String,\n `task_name` String,\n `task_uuid` UUID,\n `create_time` DateTime,\n `part_name` String,\n `part_uuid` UUID,\n `to_shard` String,\n `dst_part_name` String,\n `update_time` DateTime,\n `state` String,\n `rollback` UInt8,\n `num_tries` UInt32,\n `last_exception` String\n)\nENGINE = SystemShardMoves()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.parts\n(\n `partition` String,\n `name` String,\n `uuid` UUID,\n `part_type` String,\n `active` UInt8,\n `marks` UInt64,\n `rows` UInt64,\n `bytes_on_disk` UInt64,\n `data_compressed_bytes` UInt64,\n `data_uncompressed_bytes` UInt64,\n `marks_bytes` UInt64,\n `secondary_indices_compressed_bytes` UInt64,\n `secondary_indices_uncompressed_bytes` UInt64,\n `secondary_indices_marks_bytes` UInt64,\n `modification_time` DateTime,\n `remove_time` DateTime,\n `refcount` UInt32,\n `min_date` Date,\n `max_date` Date,\n `min_time` DateTime,\n `max_time` DateTime,\n `partition_id` String,\n `min_block_number` Int64,\n `max_block_number` Int64,\n `level` UInt32,\n `data_version` UInt64,\n `primary_key_bytes_in_memory` UInt64,\n `primary_key_bytes_in_memory_allocated` UInt64,\n `is_frozen` UInt8,\n `database` String,\n `table` String,\n `engine` String,\n `disk_name` String,\n `path` String,\n `hash_of_all_files` String,\n `hash_of_uncompressed_files` String,\n `uncompressed_hash_of_compressed_files` String,\n `delete_ttl_info_min` DateTime,\n `delete_ttl_info_max` DateTime,\n `move_ttl_info.expression` Array(String),\n `move_ttl_info.min` Array(DateTime),\n `move_ttl_info.max` Array(DateTime),\n `default_compression_codec` String,\n `recompression_ttl_info.expression` Array(String),\n `recompression_ttl_info.min` Array(DateTime),\n `recompression_ttl_info.max` Array(DateTime),\n `group_by_ttl_info.expression` Array(String),\n `group_by_ttl_info.min` Array(DateTime),\n `group_by_ttl_info.max` Array(DateTime),\n `rows_where_ttl_info.expression` Array(String),\n `rows_where_ttl_info.min` Array(DateTime),\n `rows_where_ttl_info.max` Array(DateTime),\n `projections` Array(String),\n `bytes` UInt64,\n `marks_size` UInt64\n)\nENGINE = SystemParts()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.parts_columns\n(\n `partition` String,\n `name` String,\n `uuid` UUID,\n `part_type` String,\n `active` UInt8,\n `marks` UInt64,\n `rows` UInt64,\n `bytes_on_disk` UInt64,\n `data_compressed_bytes` UInt64,\n `data_uncompressed_bytes` UInt64,\n `marks_bytes` UInt64,\n `modification_time` DateTime,\n `remove_time` DateTime,\n `refcount` UInt32,\n `min_date` Date,\n `max_date` Date,\n `min_time` DateTime,\n `max_time` DateTime,\n `partition_id` String,\n `min_block_number` Int64,\n `max_block_number` Int64,\n `level` UInt32,\n `data_version` UInt64,\n `primary_key_bytes_in_memory` UInt64,\n `primary_key_bytes_in_memory_allocated` UInt64,\n `database` String,\n `table` String,\n `engine` String,\n `disk_name` String,\n `path` String,\n `column` String,\n `type` String,\n `column_position` UInt64,\n `default_kind` String,\n `default_expression` String,\n `column_bytes_on_disk` UInt64,\n `column_data_compressed_bytes` UInt64,\n `column_data_uncompressed_bytes` UInt64,\n `column_marks_bytes` UInt64,\n `serialization_kind` String,\n `subcolumns.names` Array(String),\n `subcolumns.types` Array(String),\n `subcolumns.serializations` Array(String),\n `bytes` UInt64,\n `marks_size` UInt64\n)\nENGINE = SystemPartsColumns()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' -CREATE TABLE system.privileges\n(\n `privilege` Enum8(\'POSTGRES\' = -128, \'SQLITE\' = -127, \'ODBC\' = -126, \'JDBC\' = -125, \'HDFS\' = -124, \'S3\' = -123, \'SOURCES\' = -122, \'ALL\' = -121, \'NONE\' = -120, \'SHOW DATABASES\' = 0, \'SHOW TABLES\' = 1, \'SHOW COLUMNS\' = 2, \'SHOW DICTIONARIES\' = 3, \'SHOW\' = 4, \'SELECT\' = 5, \'INSERT\' = 6, \'ALTER UPDATE\' = 7, \'ALTER DELETE\' = 8, \'ALTER ADD COLUMN\' = 9, \'ALTER MODIFY COLUMN\' = 10, \'ALTER DROP COLUMN\' = 11, \'ALTER COMMENT COLUMN\' = 12, \'ALTER CLEAR COLUMN\' = 13, \'ALTER RENAME COLUMN\' = 14, \'ALTER MATERIALIZE COLUMN\' = 15, \'ALTER COLUMN\' = 16, \'ALTER MODIFY COMMENT\' = 17, \'ALTER ORDER BY\' = 18, \'ALTER SAMPLE BY\' = 19, \'ALTER ADD INDEX\' = 20, \'ALTER DROP INDEX\' = 21, \'ALTER MATERIALIZE INDEX\' = 22, \'ALTER CLEAR INDEX\' = 23, \'ALTER INDEX\' = 24, \'ALTER ADD PROJECTION\' = 25, \'ALTER DROP PROJECTION\' = 26, \'ALTER MATERIALIZE PROJECTION\' = 27, \'ALTER CLEAR PROJECTION\' = 28, \'ALTER PROJECTION\' = 29, \'ALTER ADD CONSTRAINT\' = 30, \'ALTER DROP CONSTRAINT\' = 31, \'ALTER CONSTRAINT\' = 32, \'ALTER TTL\' = 33, \'ALTER MATERIALIZE TTL\' = 34, \'ALTER SETTINGS\' = 35, \'ALTER MOVE PARTITION\' = 36, \'ALTER FETCH PARTITION\' = 37, \'ALTER FREEZE PARTITION\' = 38, \'ALTER DATABASE SETTINGS\' = 39, \'ALTER TABLE\' = 40, \'ALTER DATABASE\' = 41, \'ALTER VIEW REFRESH\' = 42, \'ALTER VIEW MODIFY QUERY\' = 43, \'ALTER VIEW\' = 44, \'ALTER\' = 45, \'CREATE DATABASE\' = 46, \'CREATE TABLE\' = 47, \'CREATE VIEW\' = 48, \'CREATE DICTIONARY\' = 49, \'CREATE TEMPORARY TABLE\' = 50, \'CREATE FUNCTION\' = 51, \'CREATE\' = 52, \'DROP DATABASE\' = 53, \'DROP TABLE\' = 54, \'DROP VIEW\' = 55, \'DROP DICTIONARY\' = 56, \'DROP FUNCTION\' = 57, \'DROP\' = 58, \'TRUNCATE\' = 59, \'OPTIMIZE\' = 60, \'KILL QUERY\' = 61, \'MOVE PARTITION BETWEEN SHARDS\' = 62, \'CREATE USER\' = 63, \'ALTER USER\' = 64, \'DROP USER\' = 65, \'CREATE ROLE\' = 66, \'ALTER ROLE\' = 67, \'DROP ROLE\' = 68, \'ROLE ADMIN\' = 69, \'CREATE ROW POLICY\' = 70, \'ALTER ROW POLICY\' = 71, \'DROP ROW POLICY\' = 72, \'CREATE QUOTA\' = 73, \'ALTER QUOTA\' = 74, \'DROP QUOTA\' = 75, \'CREATE SETTINGS PROFILE\' = 76, \'ALTER SETTINGS PROFILE\' = 77, \'DROP SETTINGS PROFILE\' = 78, \'SHOW USERS\' = 79, \'SHOW ROLES\' = 80, \'SHOW ROW POLICIES\' = 81, \'SHOW QUOTAS\' = 82, \'SHOW SETTINGS PROFILES\' = 83, \'SHOW ACCESS\' = 84, \'ACCESS MANAGEMENT\' = 85, \'SYSTEM SHUTDOWN\' = 86, \'SYSTEM DROP DNS CACHE\' = 87, \'SYSTEM DROP MARK CACHE\' = 88, \'SYSTEM DROP UNCOMPRESSED CACHE\' = 89, \'SYSTEM DROP MMAP CACHE\' = 90, \'SYSTEM DROP COMPILED EXPRESSION CACHE\' = 91, \'SYSTEM DROP CACHE\' = 92, \'SYSTEM RELOAD CONFIG\' = 93, \'SYSTEM RELOAD SYMBOLS\' = 94, \'SYSTEM RELOAD DICTIONARY\' = 95, \'SYSTEM RELOAD MODEL\' = 96, \'SYSTEM RELOAD FUNCTION\' = 97, \'SYSTEM RELOAD EMBEDDED DICTIONARIES\' = 98, \'SYSTEM RELOAD\' = 99, \'SYSTEM RESTART DISK\' = 100, \'SYSTEM MERGES\' = 101, \'SYSTEM TTL MERGES\' = 102, \'SYSTEM FETCHES\' = 103, \'SYSTEM MOVES\' = 104, \'SYSTEM DISTRIBUTED SENDS\' = 105, \'SYSTEM REPLICATED SENDS\' = 106, \'SYSTEM SENDS\' = 107, \'SYSTEM REPLICATION QUEUES\' = 108, \'SYSTEM DROP REPLICA\' = 109, \'SYSTEM SYNC REPLICA\' = 110, \'SYSTEM RESTART REPLICA\' = 111, \'SYSTEM RESTORE REPLICA\' = 112, \'SYSTEM FLUSH DISTRIBUTED\' = 113, \'SYSTEM FLUSH LOGS\' = 114, \'SYSTEM FLUSH\' = 115, \'SYSTEM THREAD FUZZER\' = 116, \'SYSTEM\' = 117, \'dictGet\' = 118, \'addressToLine\' = 119, \'addressToSymbol\' = 120, \'demangle\' = 121, \'INTROSPECTION\' = 122, \'FILE\' = 123, \'URL\' = 124, \'REMOTE\' = 125, \'MONGO\' = 126, \'MYSQL\' = 127),\n `aliases` Array(String),\n `level` Nullable(Enum8(\'GLOBAL\' = 0, \'DATABASE\' = 1, \'TABLE\' = 2, \'DICTIONARY\' = 3, \'VIEW\' = 4, \'COLUMN\' = 5)),\n `parent_group` Nullable(Enum8(\'POSTGRES\' = -128, \'SQLITE\' = -127, \'ODBC\' = -126, \'JDBC\' = -125, \'HDFS\' = -124, \'S3\' = -123, \'SOURCES\' = -122, \'ALL\' = -121, \'NONE\' = -120, \'SHOW DATABASES\' = 0, \'SHOW TABLES\' = 1, \'SHOW COLUMNS\' = 2, \'SHOW DICTIONARIES\' = 3, \'SHOW\' = 4, \'SELECT\' = 5, \'INSERT\' = 6, \'ALTER UPDATE\' = 7, \'ALTER DELETE\' = 8, \'ALTER ADD COLUMN\' = 9, \'ALTER MODIFY COLUMN\' = 10, \'ALTER DROP COLUMN\' = 11, \'ALTER COMMENT COLUMN\' = 12, \'ALTER CLEAR COLUMN\' = 13, \'ALTER RENAME COLUMN\' = 14, \'ALTER MATERIALIZE COLUMN\' = 15, \'ALTER COLUMN\' = 16, \'ALTER MODIFY COMMENT\' = 17, \'ALTER ORDER BY\' = 18, \'ALTER SAMPLE BY\' = 19, \'ALTER ADD INDEX\' = 20, \'ALTER DROP INDEX\' = 21, \'ALTER MATERIALIZE INDEX\' = 22, \'ALTER CLEAR INDEX\' = 23, \'ALTER INDEX\' = 24, \'ALTER ADD PROJECTION\' = 25, \'ALTER DROP PROJECTION\' = 26, \'ALTER MATERIALIZE PROJECTION\' = 27, \'ALTER CLEAR PROJECTION\' = 28, \'ALTER PROJECTION\' = 29, \'ALTER ADD CONSTRAINT\' = 30, \'ALTER DROP CONSTRAINT\' = 31, \'ALTER CONSTRAINT\' = 32, \'ALTER TTL\' = 33, \'ALTER MATERIALIZE TTL\' = 34, \'ALTER SETTINGS\' = 35, \'ALTER MOVE PARTITION\' = 36, \'ALTER FETCH PARTITION\' = 37, \'ALTER FREEZE PARTITION\' = 38, \'ALTER DATABASE SETTINGS\' = 39, \'ALTER TABLE\' = 40, \'ALTER DATABASE\' = 41, \'ALTER VIEW REFRESH\' = 42, \'ALTER VIEW MODIFY QUERY\' = 43, \'ALTER VIEW\' = 44, \'ALTER\' = 45, \'CREATE DATABASE\' = 46, \'CREATE TABLE\' = 47, \'CREATE VIEW\' = 48, \'CREATE DICTIONARY\' = 49, \'CREATE TEMPORARY TABLE\' = 50, \'CREATE FUNCTION\' = 51, \'CREATE\' = 52, \'DROP DATABASE\' = 53, \'DROP TABLE\' = 54, \'DROP VIEW\' = 55, \'DROP DICTIONARY\' = 56, \'DROP FUNCTION\' = 57, \'DROP\' = 58, \'TRUNCATE\' = 59, \'OPTIMIZE\' = 60, \'KILL QUERY\' = 61, \'MOVE PARTITION BETWEEN SHARDS\' = 62, \'CREATE USER\' = 63, \'ALTER USER\' = 64, \'DROP USER\' = 65, \'CREATE ROLE\' = 66, \'ALTER ROLE\' = 67, \'DROP ROLE\' = 68, \'ROLE ADMIN\' = 69, \'CREATE ROW POLICY\' = 70, \'ALTER ROW POLICY\' = 71, \'DROP ROW POLICY\' = 72, \'CREATE QUOTA\' = 73, \'ALTER QUOTA\' = 74, \'DROP QUOTA\' = 75, \'CREATE SETTINGS PROFILE\' = 76, \'ALTER SETTINGS PROFILE\' = 77, \'DROP SETTINGS PROFILE\' = 78, \'SHOW USERS\' = 79, \'SHOW ROLES\' = 80, \'SHOW ROW POLICIES\' = 81, \'SHOW QUOTAS\' = 82, \'SHOW SETTINGS PROFILES\' = 83, \'SHOW ACCESS\' = 84, \'ACCESS MANAGEMENT\' = 85, \'SYSTEM SHUTDOWN\' = 86, \'SYSTEM DROP DNS CACHE\' = 87, \'SYSTEM DROP MARK CACHE\' = 88, \'SYSTEM DROP UNCOMPRESSED CACHE\' = 89, \'SYSTEM DROP MMAP CACHE\' = 90, \'SYSTEM DROP COMPILED EXPRESSION CACHE\' = 91, \'SYSTEM DROP CACHE\' = 92, \'SYSTEM RELOAD CONFIG\' = 93, \'SYSTEM RELOAD SYMBOLS\' = 94, \'SYSTEM RELOAD DICTIONARY\' = 95, \'SYSTEM RELOAD MODEL\' = 96, \'SYSTEM RELOAD FUNCTION\' = 97, \'SYSTEM RELOAD EMBEDDED DICTIONARIES\' = 98, \'SYSTEM RELOAD\' = 99, \'SYSTEM RESTART DISK\' = 100, \'SYSTEM MERGES\' = 101, \'SYSTEM TTL MERGES\' = 102, \'SYSTEM FETCHES\' = 103, \'SYSTEM MOVES\' = 104, \'SYSTEM DISTRIBUTED SENDS\' = 105, \'SYSTEM REPLICATED SENDS\' = 106, \'SYSTEM SENDS\' = 107, \'SYSTEM REPLICATION QUEUES\' = 108, \'SYSTEM DROP REPLICA\' = 109, \'SYSTEM SYNC REPLICA\' = 110, \'SYSTEM RESTART REPLICA\' = 111, \'SYSTEM RESTORE REPLICA\' = 112, \'SYSTEM FLUSH DISTRIBUTED\' = 113, \'SYSTEM FLUSH LOGS\' = 114, \'SYSTEM FLUSH\' = 115, \'SYSTEM THREAD FUZZER\' = 116, \'SYSTEM\' = 117, \'dictGet\' = 118, \'addressToLine\' = 119, \'addressToSymbol\' = 120, \'demangle\' = 121, \'INTROSPECTION\' = 122, \'FILE\' = 123, \'URL\' = 124, \'REMOTE\' = 125, \'MONGO\' = 126, \'MYSQL\' = 127))\n)\nENGINE = SystemPrivileges()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' +CREATE TABLE system.privileges\n(\n `privilege` Enum16(\'SHOW DATABASES\' = 0, \'SHOW TABLES\' = 1, \'SHOW COLUMNS\' = 2, \'SHOW DICTIONARIES\' = 3, \'SHOW\' = 4, \'SELECT\' = 5, \'INSERT\' = 6, \'ALTER UPDATE\' = 7, \'ALTER DELETE\' = 8, \'ALTER ADD COLUMN\' = 9, \'ALTER MODIFY COLUMN\' = 10, \'ALTER DROP COLUMN\' = 11, \'ALTER COMMENT COLUMN\' = 12, \'ALTER CLEAR COLUMN\' = 13, \'ALTER RENAME COLUMN\' = 14, \'ALTER MATERIALIZE COLUMN\' = 15, \'ALTER COLUMN\' = 16, \'ALTER MODIFY COMMENT\' = 17, \'ALTER ORDER BY\' = 18, \'ALTER SAMPLE BY\' = 19, \'ALTER ADD INDEX\' = 20, \'ALTER DROP INDEX\' = 21, \'ALTER MATERIALIZE INDEX\' = 22, \'ALTER CLEAR INDEX\' = 23, \'ALTER INDEX\' = 24, \'ALTER ADD PROJECTION\' = 25, \'ALTER DROP PROJECTION\' = 26, \'ALTER MATERIALIZE PROJECTION\' = 27, \'ALTER CLEAR PROJECTION\' = 28, \'ALTER PROJECTION\' = 29, \'ALTER ADD CONSTRAINT\' = 30, \'ALTER DROP CONSTRAINT\' = 31, \'ALTER CONSTRAINT\' = 32, \'ALTER TTL\' = 33, \'ALTER MATERIALIZE TTL\' = 34, \'ALTER SETTINGS\' = 35, \'ALTER MOVE PARTITION\' = 36, \'ALTER FETCH PARTITION\' = 37, \'ALTER FREEZE PARTITION\' = 38, \'ALTER DATABASE SETTINGS\' = 39, \'ALTER TABLE\' = 40, \'ALTER DATABASE\' = 41, \'ALTER VIEW REFRESH\' = 42, \'ALTER VIEW MODIFY QUERY\' = 43, \'ALTER VIEW\' = 44, \'ALTER\' = 45, \'CREATE DATABASE\' = 46, \'CREATE TABLE\' = 47, \'CREATE VIEW\' = 48, \'CREATE DICTIONARY\' = 49, \'CREATE TEMPORARY TABLE\' = 50, \'CREATE FUNCTION\' = 51, \'CREATE\' = 52, \'DROP DATABASE\' = 53, \'DROP TABLE\' = 54, \'DROP VIEW\' = 55, \'DROP DICTIONARY\' = 56, \'DROP FUNCTION\' = 57, \'DROP\' = 58, \'TRUNCATE\' = 59, \'OPTIMIZE\' = 60, \'KILL QUERY\' = 61, \'MOVE PARTITION BETWEEN SHARDS\' = 62, \'CREATE USER\' = 63, \'ALTER USER\' = 64, \'DROP USER\' = 65, \'CREATE ROLE\' = 66, \'ALTER ROLE\' = 67, \'DROP ROLE\' = 68, \'ROLE ADMIN\' = 69, \'CREATE ROW POLICY\' = 70, \'ALTER ROW POLICY\' = 71, \'DROP ROW POLICY\' = 72, \'CREATE QUOTA\' = 73, \'ALTER QUOTA\' = 74, \'DROP QUOTA\' = 75, \'CREATE SETTINGS PROFILE\' = 76, \'ALTER SETTINGS PROFILE\' = 77, \'DROP SETTINGS PROFILE\' = 78, \'SHOW USERS\' = 79, \'SHOW ROLES\' = 80, \'SHOW ROW POLICIES\' = 81, \'SHOW QUOTAS\' = 82, \'SHOW SETTINGS PROFILES\' = 83, \'SHOW ACCESS\' = 84, \'ACCESS MANAGEMENT\' = 85, \'SYSTEM SHUTDOWN\' = 86, \'SYSTEM DROP DNS CACHE\' = 87, \'SYSTEM DROP MARK CACHE\' = 88, \'SYSTEM DROP UNCOMPRESSED CACHE\' = 89, \'SYSTEM DROP MMAP CACHE\' = 90, \'SYSTEM DROP COMPILED EXPRESSION CACHE\' = 91, \'SYSTEM DROP CACHE\' = 92, \'SYSTEM RELOAD CONFIG\' = 93, \'SYSTEM RELOAD SYMBOLS\' = 94, \'SYSTEM RELOAD DICTIONARY\' = 95, \'SYSTEM RELOAD MODEL\' = 96, \'SYSTEM RELOAD FUNCTION\' = 97, \'SYSTEM RELOAD EMBEDDED DICTIONARIES\' = 98, \'SYSTEM RELOAD\' = 99, \'SYSTEM RESTART DISK\' = 100, \'SYSTEM MERGES\' = 101, \'SYSTEM TTL MERGES\' = 102, \'SYSTEM FETCHES\' = 103, \'SYSTEM MOVES\' = 104, \'SYSTEM DISTRIBUTED SENDS\' = 105, \'SYSTEM REPLICATED SENDS\' = 106, \'SYSTEM SENDS\' = 107, \'SYSTEM REPLICATION QUEUES\' = 108, \'SYSTEM DROP REPLICA\' = 109, \'SYSTEM SYNC REPLICA\' = 110, \'SYSTEM RESTART REPLICA\' = 111, \'SYSTEM RESTORE REPLICA\' = 112, \'SYSTEM FLUSH DISTRIBUTED\' = 113, \'SYSTEM FLUSH LOGS\' = 114, \'SYSTEM FLUSH\' = 115, \'SYSTEM THREAD FUZZER\' = 116, \'SYSTEM\' = 117, \'dictGet\' = 118, \'addressToLine\' = 119, \'addressToSymbol\' = 120, \'demangle\' = 121, \'INTROSPECTION\' = 122, \'FILE\' = 123, \'URL\' = 124, \'REMOTE\' = 125, \'MONGO\' = 126, \'MYSQL\' = 127, \'POSTGRES\' = 128, \'SQLITE\' = 129, \'ODBC\' = 130, \'JDBC\' = 131, \'HDFS\' = 132, \'S3\' = 133, \'SOURCES\' = 134, \'ALL\' = 135, \'NONE\' = 136),\n `aliases` Array(String),\n `level` Nullable(Enum8(\'GLOBAL\' = 0, \'DATABASE\' = 1, \'TABLE\' = 2, \'DICTIONARY\' = 3, \'VIEW\' = 4, \'COLUMN\' = 5)),\n `parent_group` Nullable(Enum16(\'SHOW DATABASES\' = 0, \'SHOW TABLES\' = 1, \'SHOW COLUMNS\' = 2, \'SHOW DICTIONARIES\' = 3, \'SHOW\' = 4, \'SELECT\' = 5, \'INSERT\' = 6, \'ALTER UPDATE\' = 7, \'ALTER DELETE\' = 8, \'ALTER ADD COLUMN\' = 9, \'ALTER MODIFY COLUMN\' = 10, \'ALTER DROP COLUMN\' = 11, \'ALTER COMMENT COLUMN\' = 12, \'ALTER CLEAR COLUMN\' = 13, \'ALTER RENAME COLUMN\' = 14, \'ALTER MATERIALIZE COLUMN\' = 15, \'ALTER COLUMN\' = 16, \'ALTER MODIFY COMMENT\' = 17, \'ALTER ORDER BY\' = 18, \'ALTER SAMPLE BY\' = 19, \'ALTER ADD INDEX\' = 20, \'ALTER DROP INDEX\' = 21, \'ALTER MATERIALIZE INDEX\' = 22, \'ALTER CLEAR INDEX\' = 23, \'ALTER INDEX\' = 24, \'ALTER ADD PROJECTION\' = 25, \'ALTER DROP PROJECTION\' = 26, \'ALTER MATERIALIZE PROJECTION\' = 27, \'ALTER CLEAR PROJECTION\' = 28, \'ALTER PROJECTION\' = 29, \'ALTER ADD CONSTRAINT\' = 30, \'ALTER DROP CONSTRAINT\' = 31, \'ALTER CONSTRAINT\' = 32, \'ALTER TTL\' = 33, \'ALTER MATERIALIZE TTL\' = 34, \'ALTER SETTINGS\' = 35, \'ALTER MOVE PARTITION\' = 36, \'ALTER FETCH PARTITION\' = 37, \'ALTER FREEZE PARTITION\' = 38, \'ALTER DATABASE SETTINGS\' = 39, \'ALTER TABLE\' = 40, \'ALTER DATABASE\' = 41, \'ALTER VIEW REFRESH\' = 42, \'ALTER VIEW MODIFY QUERY\' = 43, \'ALTER VIEW\' = 44, \'ALTER\' = 45, \'CREATE DATABASE\' = 46, \'CREATE TABLE\' = 47, \'CREATE VIEW\' = 48, \'CREATE DICTIONARY\' = 49, \'CREATE TEMPORARY TABLE\' = 50, \'CREATE FUNCTION\' = 51, \'CREATE\' = 52, \'DROP DATABASE\' = 53, \'DROP TABLE\' = 54, \'DROP VIEW\' = 55, \'DROP DICTIONARY\' = 56, \'DROP FUNCTION\' = 57, \'DROP\' = 58, \'TRUNCATE\' = 59, \'OPTIMIZE\' = 60, \'KILL QUERY\' = 61, \'MOVE PARTITION BETWEEN SHARDS\' = 62, \'CREATE USER\' = 63, \'ALTER USER\' = 64, \'DROP USER\' = 65, \'CREATE ROLE\' = 66, \'ALTER ROLE\' = 67, \'DROP ROLE\' = 68, \'ROLE ADMIN\' = 69, \'CREATE ROW POLICY\' = 70, \'ALTER ROW POLICY\' = 71, \'DROP ROW POLICY\' = 72, \'CREATE QUOTA\' = 73, \'ALTER QUOTA\' = 74, \'DROP QUOTA\' = 75, \'CREATE SETTINGS PROFILE\' = 76, \'ALTER SETTINGS PROFILE\' = 77, \'DROP SETTINGS PROFILE\' = 78, \'SHOW USERS\' = 79, \'SHOW ROLES\' = 80, \'SHOW ROW POLICIES\' = 81, \'SHOW QUOTAS\' = 82, \'SHOW SETTINGS PROFILES\' = 83, \'SHOW ACCESS\' = 84, \'ACCESS MANAGEMENT\' = 85, \'SYSTEM SHUTDOWN\' = 86, \'SYSTEM DROP DNS CACHE\' = 87, \'SYSTEM DROP MARK CACHE\' = 88, \'SYSTEM DROP UNCOMPRESSED CACHE\' = 89, \'SYSTEM DROP MMAP CACHE\' = 90, \'SYSTEM DROP COMPILED EXPRESSION CACHE\' = 91, \'SYSTEM DROP CACHE\' = 92, \'SYSTEM RELOAD CONFIG\' = 93, \'SYSTEM RELOAD SYMBOLS\' = 94, \'SYSTEM RELOAD DICTIONARY\' = 95, \'SYSTEM RELOAD MODEL\' = 96, \'SYSTEM RELOAD FUNCTION\' = 97, \'SYSTEM RELOAD EMBEDDED DICTIONARIES\' = 98, \'SYSTEM RELOAD\' = 99, \'SYSTEM RESTART DISK\' = 100, \'SYSTEM MERGES\' = 101, \'SYSTEM TTL MERGES\' = 102, \'SYSTEM FETCHES\' = 103, \'SYSTEM MOVES\' = 104, \'SYSTEM DISTRIBUTED SENDS\' = 105, \'SYSTEM REPLICATED SENDS\' = 106, \'SYSTEM SENDS\' = 107, \'SYSTEM REPLICATION QUEUES\' = 108, \'SYSTEM DROP REPLICA\' = 109, \'SYSTEM SYNC REPLICA\' = 110, \'SYSTEM RESTART REPLICA\' = 111, \'SYSTEM RESTORE REPLICA\' = 112, \'SYSTEM FLUSH DISTRIBUTED\' = 113, \'SYSTEM FLUSH LOGS\' = 114, \'SYSTEM FLUSH\' = 115, \'SYSTEM THREAD FUZZER\' = 116, \'SYSTEM\' = 117, \'dictGet\' = 118, \'addressToLine\' = 119, \'addressToSymbol\' = 120, \'demangle\' = 121, \'INTROSPECTION\' = 122, \'FILE\' = 123, \'URL\' = 124, \'REMOTE\' = 125, \'MONGO\' = 126, \'MYSQL\' = 127, \'POSTGRES\' = 128, \'SQLITE\' = 129, \'ODBC\' = 130, \'JDBC\' = 131, \'HDFS\' = 132, \'S3\' = 133, \'SOURCES\' = 134, \'ALL\' = 135, \'NONE\' = 136))\n)\nENGINE = SystemPrivileges()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.processes\n(\n `is_initial_query` UInt8,\n `user` String,\n `query_id` String,\n `address` IPv6,\n `port` UInt16,\n `initial_user` String,\n `initial_query_id` String,\n `initial_address` IPv6,\n `initial_port` UInt16,\n `interface` UInt8,\n `os_user` String,\n `client_hostname` String,\n `client_name` String,\n `client_revision` UInt64,\n `client_version_major` UInt64,\n `client_version_minor` UInt64,\n `client_version_patch` UInt64,\n `http_method` UInt8,\n `http_user_agent` String,\n `http_referer` String,\n `forwarded_for` String,\n `quota_key` String,\n `elapsed` Float64,\n `is_cancelled` UInt8,\n `read_rows` UInt64,\n `read_bytes` UInt64,\n `total_rows_approx` UInt64,\n `written_rows` UInt64,\n `written_bytes` UInt64,\n `memory_usage` Int64,\n `peak_memory_usage` Int64,\n `query` String,\n `thread_ids` Array(UInt64),\n `ProfileEvents` Map(String, UInt64),\n `Settings` Map(String, String),\n `current_database` String,\n `ProfileEvents.Names` Array(String),\n `ProfileEvents.Values` Array(UInt64),\n `Settings.Names` Array(String),\n `Settings.Values` Array(String)\n)\nENGINE = SystemProcesses()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.projection_parts\n(\n `partition` String,\n `name` String,\n `part_type` String,\n `parent_name` String,\n `parent_uuid` UUID,\n `parent_part_type` String,\n `active` UInt8,\n `marks` UInt64,\n `rows` UInt64,\n `bytes_on_disk` UInt64,\n `data_compressed_bytes` UInt64,\n `data_uncompressed_bytes` UInt64,\n `marks_bytes` UInt64,\n `parent_marks` UInt64,\n `parent_rows` UInt64,\n `parent_bytes_on_disk` UInt64,\n `parent_data_compressed_bytes` UInt64,\n `parent_data_uncompressed_bytes` UInt64,\n `parent_marks_bytes` UInt64,\n `modification_time` DateTime,\n `remove_time` DateTime,\n `refcount` UInt32,\n `min_date` Date,\n `max_date` Date,\n `min_time` DateTime,\n `max_time` DateTime,\n `partition_id` String,\n `min_block_number` Int64,\n `max_block_number` Int64,\n `level` UInt32,\n `data_version` UInt64,\n `primary_key_bytes_in_memory` UInt64,\n `primary_key_bytes_in_memory_allocated` UInt64,\n `is_frozen` UInt8,\n `database` String,\n `table` String,\n `engine` String,\n `disk_name` String,\n `path` String,\n `hash_of_all_files` String,\n `hash_of_uncompressed_files` String,\n `uncompressed_hash_of_compressed_files` String,\n `delete_ttl_info_min` DateTime,\n `delete_ttl_info_max` DateTime,\n `move_ttl_info.expression` Array(String),\n `move_ttl_info.min` Array(DateTime),\n `move_ttl_info.max` Array(DateTime),\n `default_compression_codec` String,\n `recompression_ttl_info.expression` Array(String),\n `recompression_ttl_info.min` Array(DateTime),\n `recompression_ttl_info.max` Array(DateTime),\n `group_by_ttl_info.expression` Array(String),\n `group_by_ttl_info.min` Array(DateTime),\n `group_by_ttl_info.max` Array(DateTime),\n `rows_where_ttl_info.expression` Array(String),\n `rows_where_ttl_info.min` Array(DateTime),\n `rows_where_ttl_info.max` Array(DateTime),\n `bytes` UInt64,\n `marks_size` UInt64\n)\nENGINE = SystemProjectionParts()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.projection_parts_columns\n(\n `partition` String,\n `name` String,\n `part_type` String,\n `parent_name` String,\n `parent_uuid` UUID,\n `parent_part_type` String,\n `active` UInt8,\n `marks` UInt64,\n `rows` UInt64,\n `bytes_on_disk` UInt64,\n `data_compressed_bytes` UInt64,\n `data_uncompressed_bytes` UInt64,\n `marks_bytes` UInt64,\n `parent_marks` UInt64,\n `parent_rows` UInt64,\n `parent_bytes_on_disk` UInt64,\n `parent_data_compressed_bytes` UInt64,\n `parent_data_uncompressed_bytes` UInt64,\n `parent_marks_bytes` UInt64,\n `modification_time` DateTime,\n `remove_time` DateTime,\n `refcount` UInt32,\n `min_date` Date,\n `max_date` Date,\n `min_time` DateTime,\n `max_time` DateTime,\n `partition_id` String,\n `min_block_number` Int64,\n `max_block_number` Int64,\n `level` UInt32,\n `data_version` UInt64,\n `primary_key_bytes_in_memory` UInt64,\n `primary_key_bytes_in_memory_allocated` UInt64,\n `database` String,\n `table` String,\n `engine` String,\n `disk_name` String,\n `path` String,\n `column` String,\n `type` String,\n `column_position` UInt64,\n `default_kind` String,\n `default_expression` String,\n `column_bytes_on_disk` UInt64,\n `column_data_compressed_bytes` UInt64,\n `column_data_uncompressed_bytes` UInt64,\n `column_marks_bytes` UInt64,\n `bytes` UInt64,\n `marks_size` UInt64\n)\nENGINE = SystemProjectionPartsColumns()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' From c39ad2aa490643520f0ff176ab35836d627f340b Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 2 Feb 2022 12:44:04 +0000 Subject: [PATCH 054/215] bitsetsort fix reverse_conditional_swap --- base/base/BitSetSort.h | 72 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 71 insertions(+), 1 deletion(-) diff --git a/base/base/BitSetSort.h b/base/base/BitSetSort.h index 02ba02e656c..1fadc8149f2 100644 --- a/base/base/BitSetSort.h +++ b/base/base/BitSetSort.h @@ -84,7 +84,77 @@ class __reverse_conditional_swap { *__y = __result ? _VSTD::move(*__y) : _VSTD::move(*__x); *__x = _VSTD::move(__min); } else { - if (__result) { + /** This change is required for ClickHouse. + * It seems that this is slow branch, and its logic should be identical to fast branch. + * Logic of fast branch, + * if (result) + * min = x; + * y = y; + * x = x; + * else + * min = y; + * y = x; + * x = y; + * + * We swap elements only if result is false. + * + * Example to reproduce sort bug: + * int main(int argc, char ** argv) + * { + * (void)(argc); + * (void)(argv); + * + * std::vector> values = { + * {1, 1}, + * {3, -1}, + * {2, 1}, + * {7, -1}, + * {3, 1}, + * {999, -1}, + * {4, 1}, + * {7, -1}, + * {5, 1}, + * {8, -1} + * }; + * + * ::stdext::bitsetsort(values.begin(), values.end()); + * bool is_sorted = std::is_sorted(values.begin(), values.end()); + * + * std::cout << "Array " << values.size() << " is sorted " << is_sorted << std::endl; + * + * for (auto & value : values) + * std::cout << value.first << " " << value.second << std::endl; + * + * return 0; + * } + * + * Output before change: + * Array 10 is sorted 0 + * 1 1 + * 2 1 + * 3 -1 + * 3 1 + * 4 1 + * 7 -1 + * 7 -1 + * 8 -1 + * 5 1 + * 999 -1 + * + * After change: + * Array 10 is sorted 1 + * 1 1 + * 2 1 + * 3 -1 + * 3 1 + * 4 1 + * 5 1 + * 7 -1 + * 7 -1 + * 8 -1 + * 999 -1 + */ + if (!__result) { _VSTD::iter_swap(__x, __y); } } From 1cd1fb96586fee46938018af4cb12e32fbdaa7c3 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 2 Feb 2022 15:15:20 +0000 Subject: [PATCH 055/215] Fixed tests --- .../System/StorageSystemPrivileges.cpp | 4 +- .../00628_in_lambda_on_merge_table_bug.sql | 6 +- .../00754_alter_modify_order_by.reference | 2 - .../00754_alter_modify_order_by.sql | 8 +- .../00799_function_dry_run.reference | 10 +- .../0_stateless/00799_function_dry_run.sql | 2 +- .../00853_join_with_nulls_crash.sql | 14 +- .../00909_ngram_distance.reference | 402 +++++++++--------- .../0_stateless/00909_ngram_distance.sql | 74 ++-- tests/queries/0_stateless/00933_alter_ttl.sql | 2 +- ..._select_with_offset_fetch_clause.reference | 2 +- .../01525_select_with_offset_fetch_clause.sql | 2 +- .../0_stateless/01852_map_combinator.sql | 2 +- ...02006_use_constants_in_with_and_select.sql | 4 +- 14 files changed, 267 insertions(+), 267 deletions(-) diff --git a/src/Storages/System/StorageSystemPrivileges.cpp b/src/Storages/System/StorageSystemPrivileges.cpp index 85d7b642516..d1a0f088834 100644 --- a/src/Storages/System/StorageSystemPrivileges.cpp +++ b/src/Storages/System/StorageSystemPrivileges.cpp @@ -77,12 +77,12 @@ NamesAndTypesList StorageSystemPrivileges::getNamesAndTypes() void StorageSystemPrivileges::fillData(MutableColumns & res_columns, ContextPtr, const SelectQueryInfo &) const { size_t column_index = 0; - auto & column_access_type = assert_cast(*res_columns[column_index++]).getData(); + auto & column_access_type = assert_cast(*res_columns[column_index++]).getData(); auto & column_aliases = assert_cast(assert_cast(*res_columns[column_index]).getData()); auto & column_aliases_offsets = assert_cast(*res_columns[column_index++]).getOffsets(); auto & column_level = assert_cast(assert_cast(*res_columns[column_index]).getNestedColumn()).getData(); auto & column_level_null_map = assert_cast(*res_columns[column_index++]).getNullMapData(); - auto & column_parent_group = assert_cast(assert_cast(*res_columns[column_index]).getNestedColumn()).getData(); + auto & column_parent_group = assert_cast(assert_cast(*res_columns[column_index]).getNestedColumn()).getData(); auto & column_parent_group_null_map = assert_cast(*res_columns[column_index++]).getNullMapData(); auto add_row = [&](AccessType access_type, const std::string_view & aliases, Level max_level, AccessType parent_group) diff --git a/tests/queries/0_stateless/00628_in_lambda_on_merge_table_bug.sql b/tests/queries/0_stateless/00628_in_lambda_on_merge_table_bug.sql index 48a90e6fe77..ddf98149c3b 100644 --- a/tests/queries/0_stateless/00628_in_lambda_on_merge_table_bug.sql +++ b/tests/queries/0_stateless/00628_in_lambda_on_merge_table_bug.sql @@ -8,11 +8,11 @@ create table test_in_tuple as test_in_tuple_1 engine = Merge(currentDatabase(), insert into test_in_tuple_1 values (1, 1, [1, 2], [1, 2]); insert into test_in_tuple_2 values (2, 1, [1, 2], [1, 2]); -select key, arr_x, arr_y, _table from test_in_tuple left array join x as arr_x, y as arr_y order by _table; +select key, arr_x, arr_y, _table from test_in_tuple left array join x as arr_x, y as arr_y order by _table, arr_x, arr_y; select '-'; -select key, arr_x, arr_y, _table from test_in_tuple left array join x as arr_x, y as arr_y where (key_2, arr_x, arr_y) in (1, 1, 1) order by _table; +select key, arr_x, arr_y, _table from test_in_tuple left array join x as arr_x, y as arr_y where (key_2, arr_x, arr_y) in (1, 1, 1) order by _table, arr_x, arr_y; select '-'; -select key, arr_x, arr_y, _table from test_in_tuple left array join arrayFilter((t, x_0, x_1) -> (key_2, x_0, x_1) in (1, 1, 1), x, x ,y) as arr_x, arrayFilter((t, x_0, x_1) -> (key_2, x_0, x_1) in (1, 1, 1), y, x ,y) as arr_y where (key_2, arr_x, arr_y) in (1, 1, 1) order by _table; +select key, arr_x, arr_y, _table from test_in_tuple left array join arrayFilter((t, x_0, x_1) -> (key_2, x_0, x_1) in (1, 1, 1), x, x ,y) as arr_x, arrayFilter((t, x_0, x_1) -> (key_2, x_0, x_1) in (1, 1, 1), y, x ,y) as arr_y where (key_2, arr_x, arr_y) in (1, 1, 1) order by _table, arr_x, arr_y; drop table if exists test_in_tuple_1; drop table if exists test_in_tuple_2; diff --git a/tests/queries/0_stateless/00754_alter_modify_order_by.reference b/tests/queries/0_stateless/00754_alter_modify_order_by.reference index 0279e5ca11b..0582041ca14 100644 --- a/tests/queries/0_stateless/00754_alter_modify_order_by.reference +++ b/tests/queries/0_stateless/00754_alter_modify_order_by.reference @@ -1,8 +1,6 @@ *** Check that the parts are sorted according to the new key. *** 1 2 0 10 -1 2 0 20 1 2 2 40 -1 2 2 50 1 2 1 30 *** Check that the rows are collapsed according to the new key. *** 1 2 0 30 diff --git a/tests/queries/0_stateless/00754_alter_modify_order_by.sql b/tests/queries/0_stateless/00754_alter_modify_order_by.sql index a09d824c928..6d8729139b5 100644 --- a/tests/queries/0_stateless/00754_alter_modify_order_by.sql +++ b/tests/queries/0_stateless/00754_alter_modify_order_by.sql @@ -21,14 +21,16 @@ ALTER TABLE summing MODIFY ORDER BY (x, y, nonexistent); -- { serverError 47} /* Can't modyfy ORDER BY so that it is no longer a prefix of the PRIMARY KEY. */ ALTER TABLE summing MODIFY ORDER BY x; -- { serverError 36} -INSERT INTO summing(x, y, val) VALUES (1, 2, 10), (1, 2, 20); +INSERT INTO summing(x, y, val) VALUES (1, 2, 10); ALTER TABLE summing ADD COLUMN z UInt32 AFTER y, MODIFY ORDER BY (x, y, -z); -INSERT INTO summing(x, y, z, val) values (1, 2, 1, 30), (1, 2, 2, 40), (1, 2, 2, 50); +INSERT INTO summing(x, y, z, val) values (1, 2, 1, 30), (1, 2, 2, 40); SELECT '*** Check that the parts are sorted according to the new key. ***'; -SELECT * FROM summing ORDER BY _part; +SELECT * FROM summing; + +INSERT INTO summing(x, y, z, val) values (1, 2, 0, 20), (1, 2, 2, 50); SELECT '*** Check that the rows are collapsed according to the new key. ***'; SELECT * FROM summing FINAL ORDER BY x, y, z; diff --git a/tests/queries/0_stateless/00799_function_dry_run.reference b/tests/queries/0_stateless/00799_function_dry_run.reference index 35cebe7569a..517ab65908f 100644 --- a/tests/queries/0_stateless/00799_function_dry_run.reference +++ b/tests/queries/0_stateless/00799_function_dry_run.reference @@ -1,9 +1,9 @@ 0.3 2018-11-19 13:00:00 \N 0.3 2018-11-19 13:05:00 \N 0.4 2018-11-19 13:10:00 1 -0.5 2018-11-19 13:15:00 1.2 -0.6 2018-11-19 13:15:00 1.5 -0.7 2018-11-19 13:20:00 1.8 -0.8 2018-11-19 13:25:00 2.1 -0.9 2018-11-19 13:25:00 2.4 +0.5 2018-11-19 13:15:00 1.5 0.5 2018-11-19 13:30:00 2.2 +0.6 2018-11-19 13:15:00 1.3 +0.7 2018-11-19 13:20:00 1.8 +0.8 2018-11-19 13:25:00 2.4 +0.9 2018-11-19 13:25:00 2.1 diff --git a/tests/queries/0_stateless/00799_function_dry_run.sql b/tests/queries/0_stateless/00799_function_dry_run.sql index 4f3df6a0ff8..946ac98044c 100644 --- a/tests/queries/0_stateless/00799_function_dry_run.sql +++ b/tests/queries/0_stateless/00799_function_dry_run.sql @@ -30,6 +30,6 @@ FROM business_dttm FROM bm ORDER BY business_dttm -); +) ORDER BY amount, business_dttm; DROP TABLE bm; diff --git a/tests/queries/0_stateless/00853_join_with_nulls_crash.sql b/tests/queries/0_stateless/00853_join_with_nulls_crash.sql index 464ddbb1990..c63c2d99cba 100644 --- a/tests/queries/0_stateless/00853_join_with_nulls_crash.sql +++ b/tests/queries/0_stateless/00853_join_with_nulls_crash.sql @@ -21,37 +21,37 @@ SELECT s1.other, s2.other, count_a, count_b, toTypeName(s1.other), toTypeName(s2 ALL FULL JOIN ( SELECT other, count() AS count_b FROM table_b GROUP BY other ) s2 ON s1.other = s2.other -ORDER BY s2.other DESC, count_a; +ORDER BY s2.other DESC, count_a, s1.other; SELECT s1.other, s2.other, count_a, count_b, toTypeName(s1.other), toTypeName(s2.other) FROM ( SELECT other, count() AS count_a FROM table_a GROUP BY other ) s1 ALL FULL JOIN ( SELECT other, count() AS count_b FROM table_b GROUP BY other ) s2 USING other -ORDER BY s2.other DESC, count_a; +ORDER BY s2.other DESC, count_a, s1.other; SELECT s1.something, s2.something, count_a, count_b, toTypeName(s1.something), toTypeName(s2.something) FROM ( SELECT something, count() AS count_a FROM table_a GROUP BY something ) s1 ALL FULL JOIN ( SELECT something, count() AS count_b FROM table_b GROUP BY something ) s2 ON s1.something = s2.something -ORDER BY count_a DESC; +ORDER BY count_a DESC, something, s2.something; SELECT s1.something, s2.something, count_a, count_b, toTypeName(s1.something), toTypeName(s2.something) FROM ( SELECT something, count() AS count_a FROM table_a GROUP BY something ) s1 ALL RIGHT JOIN ( SELECT something, count() AS count_b FROM table_b GROUP BY something ) s2 USING (something) -ORDER BY count_a DESC; +ORDER BY count_a DESC, s1.something, s2.something; SET joined_subquery_requires_alias = 0; SELECT something, count_a, count_b, toTypeName(something) FROM - ( SELECT something, count() AS count_a FROM table_a GROUP BY something ) + ( SELECT something, count() AS count_a FROM table_a GROUP BY something ) as s1 ALL FULL JOIN - ( SELECT something, count() AS count_b FROM table_b GROUP BY something ) + ( SELECT something, count() AS count_b FROM table_b GROUP BY something ) as s2 USING (something) -ORDER BY count_a DESC; +ORDER BY count_a DESC, something DESC; DROP TABLE table_a; DROP TABLE table_b; diff --git a/tests/queries/0_stateless/00909_ngram_distance.reference b/tests/queries/0_stateless/00909_ngram_distance.reference index 52fb462a9ed..290e24faac5 100644 --- a/tests/queries/0_stateless/00909_ngram_distance.reference +++ b/tests/queries/0_stateless/00909_ngram_distance.reference @@ -110,116 +110,116 @@ 77 636 1000 -привет как дела?... Херсон 0 -привет как дела клип - Яндекс.Видео 0 -привет 0 -пап привет как дела - Яндекс.Видео 0 -привет братан как дела - Яндекс.Видео 0 -http://metric.ru/ 0 -http://autometric.ru/ 0 -http://metrica.yandex.com/ 0 -http://metris.ru/ 0 -http://metrika.ru/ 0 0 +http://autometric.ru/ 0 +http://metric.ru/ 0 +http://metrica.yandex.com/ 0 +http://metrika.ru/ 0 +http://metris.ru/ 0 +пап привет как дела - Яндекс.Видео 0 +привет 0 +привет братан как дела - Яндекс.Видео 0 +привет как дела клип - Яндекс.Видео 0 +привет как дела?... Херсон 0 0 привет как дела?... Херсон 600 пап привет как дела - Яндекс.Видео 684 привет как дела клип - Яндекс.Видео 692 привет братан как дела - Яндекс.Видео 707 -привет 1000 -http://metric.ru/ 1000 http://autometric.ru/ 1000 +http://metric.ru/ 1000 http://metrica.yandex.com/ 1000 -http://metris.ru/ 1000 http://metrika.ru/ 1000 +http://metris.ru/ 1000 +привет 1000 0 http://metric.ru/ 765 http://metris.ru/ 765 http://metrika.ru/ 778 http://autometric.ru/ 810 http://metrica.yandex.com/ 846 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 пап привет как дела - Яндекс.Видео 1000 +привет 1000 привет братан как дела - Яндекс.Видео 1000 +привет как дела клип - Яндекс.Видео 1000 +привет как дела?... Херсон 1000 привет как дела?... Херсон 297 пап привет как дела - Яндекс.Видео 422 привет как дела клип - Яндекс.Видео 435 привет братан как дела - Яндекс.Видео 500 привет 529 -http://metric.ru/ 1000 -http://autometric.ru/ 1000 -http://metrica.yandex.com/ 1000 -http://metris.ru/ 1000 -http://metrika.ru/ 1000 1000 +http://autometric.ru/ 1000 +http://metric.ru/ 1000 +http://metrica.yandex.com/ 1000 +http://metrika.ru/ 1000 +http://metris.ru/ 1000 привет как дела?... Херсон 459 пап привет как дела - Яндекс.Видео 511 привет 529 привет как дела клип - Яндекс.Видео 565 привет братан как дела - Яндекс.Видео 583 -http://metric.ru/ 1000 -http://autometric.ru/ 1000 -http://metrica.yandex.com/ 1000 -http://metris.ru/ 1000 -http://metrika.ru/ 1000 1000 +http://autometric.ru/ 1000 +http://metric.ru/ 1000 +http://metrica.yandex.com/ 1000 +http://metrika.ru/ 1000 +http://metris.ru/ 1000 http://metrika.ru/ 524 http://metric.ru/ 700 http://metris.ru/ 700 http://autometric.ru/ 750 http://metrica.yandex.com/ 793 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 1000 +пап привет как дела - Яндекс.Видео 1000 +привет 1000 +привет братан как дела - Яндекс.Видео 1000 +привет как дела клип - Яндекс.Видео 1000 +привет как дела?... Херсон 1000 http://metric.ru/ 600 http://metrica.yandex.com/ 655 http://autometric.ru/ 667 http://metris.ru/ 700 http://metrika.ru/ 714 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 1000 +пап привет как дела - Яндекс.Видео 1000 +привет 1000 +привет братан как дела - Яндекс.Видео 1000 +привет как дела клип - Яндекс.Видео 1000 +привет как дела?... Херсон 1000 http://metrika.ru/ 619 http://metric.ru/ 700 http://metris.ru/ 700 http://autometric.ru/ 750 http://metrica.yandex.com/ 793 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 1000 +пап привет как дела - Яндекс.Видео 1000 +привет 1000 +привет братан как дела - Яндекс.Видео 1000 +привет как дела клип - Яндекс.Видео 1000 +привет как дела?... Херсон 1000 http://metric.ru/ 600 http://autometric.ru/ 667 http://metris.ru/ 700 http://metrika.ru/ 714 http://metrica.yandex.com/ 724 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 1000 +пап привет как дела - Яндекс.Видео 1000 +привет 1000 +привет братан как дела - Яндекс.Видео 1000 +привет как дела клип - Яндекс.Видео 1000 +привет как дела?... Херсон 1000 http://metrica.yandex.com/ 714 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 -http://metric.ru/ 1000 -http://autometric.ru/ 1000 -http://metris.ru/ 1000 -http://metrika.ru/ 1000 1000 +http://autometric.ru/ 1000 +http://metric.ru/ 1000 +http://metrika.ru/ 1000 +http://metris.ru/ 1000 +пап привет как дела - Яндекс.Видео 1000 +привет 1000 +привет братан как дела - Яндекс.Видео 1000 +привет как дела клип - Яндекс.Видео 1000 +привет как дела?... Херсон 1000 0 0 0 @@ -332,138 +332,138 @@ http://metrika.ru/ 1000 77 636 1000 -привет как дела?... Херсон 0 -привет как дела клип - Яндекс.Видео 0 -привет 0 -пап привет как дела - Яндекс.Видео 0 -привет братан как дела - Яндекс.Видео 0 -http://metric.ru/ 0 -http://autometric.ru/ 0 -http://metrica.yandex.com/ 0 -http://metris.ru/ 0 -http://metrika.ru/ 0 0 +http://autometric.ru/ 0 +http://metric.ru/ 0 +http://metrica.yandex.com/ 0 +http://metrika.ru/ 0 +http://metris.ru/ 0 +пап привет как дела - Яндекс.Видео 0 +привет 0 +привет братан как дела - Яндекс.Видео 0 +привет как дела клип - Яндекс.Видео 0 +привет как дела?... Херсон 0 0 привет как дела?... Херсон 600 пап привет как дела - Яндекс.Видео 684 привет как дела клип - Яндекс.Видео 692 привет братан как дела - Яндекс.Видео 707 -привет 1000 -http://metric.ru/ 1000 http://autometric.ru/ 1000 +http://metric.ru/ 1000 http://metrica.yandex.com/ 1000 -http://metris.ru/ 1000 http://metrika.ru/ 1000 +http://metris.ru/ 1000 +привет 1000 0 http://metric.ru/ 765 http://metris.ru/ 765 http://metrika.ru/ 778 http://autometric.ru/ 810 http://metrica.yandex.com/ 846 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 пап привет как дела - Яндекс.Видео 1000 +привет 1000 привет братан как дела - Яндекс.Видео 1000 +привет как дела клип - Яндекс.Видео 1000 +привет как дела?... Херсон 1000 привет как дела?... Херсон 297 пап привет как дела - Яндекс.Видео 422 привет как дела клип - Яндекс.Видео 435 привет братан как дела - Яндекс.Видео 500 привет 529 -http://metric.ru/ 1000 -http://autometric.ru/ 1000 -http://metrica.yandex.com/ 1000 -http://metris.ru/ 1000 -http://metrika.ru/ 1000 1000 +http://autometric.ru/ 1000 +http://metric.ru/ 1000 +http://metrica.yandex.com/ 1000 +http://metrika.ru/ 1000 +http://metris.ru/ 1000 привет как дела?... Херсон 459 пап привет как дела - Яндекс.Видео 511 привет 529 привет как дела клип - Яндекс.Видео 565 привет братан как дела - Яндекс.Видео 583 -http://metric.ru/ 1000 + 1000 http://autometric.ru/ 1000 +http://metric.ru/ 1000 http://metrica.yandex.com/ 1000 -http://metris.ru/ 1000 http://metrika.ru/ 1000 - 1000 +http://metris.ru/ 1000 http://metrika.ru/ 524 http://metric.ru/ 700 http://metris.ru/ 700 http://autometric.ru/ 750 http://metrica.yandex.com/ 793 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 1000 +пап привет как дела - Яндекс.Видео 1000 +привет 1000 +привет братан как дела - Яндекс.Видео 1000 +привет как дела клип - Яндекс.Видео 1000 +привет как дела?... Херсон 1000 http://metrika.ru/ 524 http://metric.ru/ 700 http://metris.ru/ 700 http://autometric.ru/ 750 http://metrica.yandex.com/ 793 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 1000 +пап привет как дела - Яндекс.Видео 1000 +привет 1000 +привет братан как дела - Яндекс.Видео 1000 +привет как дела клип - Яндекс.Видео 1000 +привет как дела?... Херсон 1000 http://metric.ru/ 600 http://metrica.yandex.com/ 655 http://autometric.ru/ 667 http://metris.ru/ 700 http://metrika.ru/ 714 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 1000 +пап привет как дела - Яндекс.Видео 1000 +привет 1000 +привет братан как дела - Яндекс.Видео 1000 +привет как дела клип - Яндекс.Видео 1000 +привет как дела?... Херсон 1000 http://metrika.ru/ 619 http://metric.ru/ 700 http://metris.ru/ 700 http://autometric.ru/ 750 http://metrica.yandex.com/ 793 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 1000 +пап привет как дела - Яндекс.Видео 1000 +привет 1000 +привет братан как дела - Яндекс.Видео 1000 +привет как дела клип - Яндекс.Видео 1000 +привет как дела?... Херсон 1000 http://metric.ru/ 600 http://autometric.ru/ 667 http://metris.ru/ 700 http://metrika.ru/ 714 http://metrica.yandex.com/ 724 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 1000 +пап привет как дела - Яндекс.Видео 1000 +привет 1000 +привет братан как дела - Яндекс.Видео 1000 +привет как дела клип - Яндекс.Видео 1000 +привет как дела?... Херсон 1000 http://metrica.yandex.com/ 714 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 -http://metric.ru/ 1000 -http://autometric.ru/ 1000 -http://metris.ru/ 1000 -http://metrika.ru/ 1000 1000 +http://autometric.ru/ 1000 +http://metric.ru/ 1000 +http://metrika.ru/ 1000 +http://metris.ru/ 1000 +пап привет как дела - Яндекс.Видео 1000 +привет 1000 +привет братан как дела - Яндекс.Видео 1000 +привет как дела клип - Яндекс.Видео 1000 +привет как дела?... Херсон 1000 привет как дела клип - Яндекс.Видео 0 пап привет как дела - Яндекс.Видео 169 привет братан как дела - Яндекс.Видео 235 привет как дела?... Херсон 544 привет 784 -http://metric.ru/ 1000 -http://autometric.ru/ 1000 -http://metrica.yandex.com/ 1000 -http://metris.ru/ 1000 -http://metrika.ru/ 1000 1000 +http://autometric.ru/ 1000 +http://metric.ru/ 1000 +http://metrica.yandex.com/ 1000 +http://metrika.ru/ 1000 +http://metris.ru/ 1000 0 0 0 @@ -581,78 +581,78 @@ http://metrika.ru/ 1000 привет как дела клип - Яндекс.Видео 412 привет братан как дела - Яндекс.Видео 461 привет 471 -http://metric.ru/ 1000 -http://autometric.ru/ 1000 -http://metrica.yandex.com/ 1000 -http://metris.ru/ 1000 -http://metrika.ru/ 1000 1000 +http://autometric.ru/ 1000 +http://metric.ru/ 1000 +http://metrica.yandex.com/ 1000 +http://metrika.ru/ 1000 +http://metris.ru/ 1000 привет как дела?... Херсон 343 пап привет как дела - Яндекс.Видео 446 привет 471 привет как дела клип - Яндекс.Видео 482 привет братан как дела - Яндекс.Видео 506 -http://metric.ru/ 1000 -http://autometric.ru/ 1000 -http://metrica.yandex.com/ 1000 -http://metris.ru/ 1000 -http://metrika.ru/ 1000 1000 +http://autometric.ru/ 1000 +http://metric.ru/ 1000 +http://metrica.yandex.com/ 1000 +http://metrika.ru/ 1000 +http://metris.ru/ 1000 http://metrika.ru/ 579 http://metric.ru/ 778 http://metris.ru/ 778 http://autometric.ru/ 818 http://metrica.yandex.com/ 852 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 1000 +пап привет как дела - Яндекс.Видео 1000 +привет 1000 +привет братан как дела - Яндекс.Видео 1000 +привет как дела клип - Яндекс.Видео 1000 +привет как дела?... Херсон 1000 http://metric.ru/ 667 http://metrica.yandex.com/ 704 http://autometric.ru/ 727 http://metris.ru/ 778 http://metrika.ru/ 789 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 1000 +пап привет как дела - Яндекс.Видео 1000 +привет 1000 +привет братан как дела - Яндекс.Видео 1000 +привет как дела клип - Яндекс.Видео 1000 +привет как дела?... Херсон 1000 http://metrika.ru/ 684 http://metric.ru/ 778 http://metris.ru/ 778 http://autometric.ru/ 818 http://metrica.yandex.com/ 852 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 1000 +пап привет как дела - Яндекс.Видео 1000 +привет 1000 +привет братан как дела - Яндекс.Видео 1000 +привет как дела клип - Яндекс.Видео 1000 +привет как дела?... Херсон 1000 http://metric.ru/ 667 http://autometric.ru/ 727 http://metrica.yandex.com/ 778 http://metris.ru/ 778 http://metrika.ru/ 789 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 1000 +пап привет как дела - Яндекс.Видео 1000 +привет 1000 +привет братан как дела - Яндекс.Видео 1000 +привет как дела клип - Яндекс.Видео 1000 +привет как дела?... Херсон 1000 http://metrica.yandex.com/ 769 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 -http://metric.ru/ 1000 -http://autometric.ru/ 1000 -http://metris.ru/ 1000 -http://metrika.ru/ 1000 1000 +http://autometric.ru/ 1000 +http://metric.ru/ 1000 +http://metrika.ru/ 1000 +http://metris.ru/ 1000 +пап привет как дела - Яндекс.Видео 1000 +привет 1000 +привет братан как дела - Яндекс.Видео 1000 +привет как дела клип - Яндекс.Видео 1000 +привет как дела?... Херсон 1000 0 0 0 @@ -769,87 +769,87 @@ http://metrika.ru/ 1000 пап привет как дела - Яндекс.Видео 928 привет как дела клип - Яндекс.Видео 929 привет братан как дела - Яндекс.Видео 955 -привет 1000 -http://metric.ru/ 1000 -http://autometric.ru/ 1000 -http://metrica.yandex.com/ 1000 -http://metris.ru/ 1000 -http://metrika.ru/ 1000 1000 +http://autometric.ru/ 1000 +http://metric.ru/ 1000 +http://metrica.yandex.com/ 1000 +http://metrika.ru/ 1000 +http://metris.ru/ 1000 +привет 1000 привет как дела?... Херсон 672 пап привет как дела - Яндекс.Видео 735 привет как дела клип - Яндекс.Видео 741 привет братан как дела - Яндекс.Видео 753 -привет 1000 -http://metric.ru/ 1000 + 1000 http://autometric.ru/ 1000 +http://metric.ru/ 1000 http://metrica.yandex.com/ 1000 -http://metris.ru/ 1000 http://metrika.ru/ 1000 - 1000 +http://metris.ru/ 1000 +привет 1000 http://metrika.ru/ 579 http://metric.ru/ 778 http://metris.ru/ 778 http://autometric.ru/ 818 http://metrica.yandex.com/ 852 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 1000 +пап привет как дела - Яндекс.Видео 1000 +привет 1000 +привет братан как дела - Яндекс.Видео 1000 +привет как дела клип - Яндекс.Видео 1000 +привет как дела?... Херсон 1000 http://metrika.ru/ 579 http://metric.ru/ 778 http://metris.ru/ 778 http://autometric.ru/ 818 http://metrica.yandex.com/ 852 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 1000 +пап привет как дела - Яндекс.Видео 1000 +привет 1000 +привет братан как дела - Яндекс.Видео 1000 +привет как дела клип - Яндекс.Видео 1000 +привет как дела?... Херсон 1000 http://metric.ru/ 667 http://metrica.yandex.com/ 704 http://autometric.ru/ 727 http://metris.ru/ 778 http://metrika.ru/ 789 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 1000 +пап привет как дела - Яндекс.Видео 1000 +привет 1000 +привет братан как дела - Яндекс.Видео 1000 +привет как дела клип - Яндекс.Видео 1000 +привет как дела?... Херсон 1000 http://metrika.ru/ 684 http://metric.ru/ 778 http://metris.ru/ 778 http://autometric.ru/ 818 http://metrica.yandex.com/ 852 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 1000 +пап привет как дела - Яндекс.Видео 1000 +привет 1000 +привет братан как дела - Яндекс.Видео 1000 +привет как дела клип - Яндекс.Видео 1000 +привет как дела?... Херсон 1000 http://metric.ru/ 667 http://autometric.ru/ 727 http://metrica.yandex.com/ 778 http://metris.ru/ 778 http://metrika.ru/ 789 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 1000 +пап привет как дела - Яндекс.Видео 1000 +привет 1000 +привет братан как дела - Яндекс.Видео 1000 +привет как дела клип - Яндекс.Видео 1000 +привет как дела?... Херсон 1000 http://metrica.yandex.com/ 769 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 -http://metric.ru/ 1000 -http://autometric.ru/ 1000 -http://metris.ru/ 1000 -http://metrika.ru/ 1000 1000 +http://autometric.ru/ 1000 +http://metric.ru/ 1000 +http://metrika.ru/ 1000 +http://metris.ru/ 1000 +пап привет как дела - Яндекс.Видео 1000 +привет 1000 +привет братан как дела - Яндекс.Видео 1000 +привет как дела клип - Яндекс.Видео 1000 +привет как дела?... Херсон 1000 diff --git a/tests/queries/0_stateless/00909_ngram_distance.sql b/tests/queries/0_stateless/00909_ngram_distance.sql index ed800bf6c97..b2f403c415a 100644 --- a/tests/queries/0_stateless/00909_ngram_distance.sql +++ b/tests/queries/0_stateless/00909_ngram_distance.sql @@ -34,17 +34,17 @@ drop table if exists test_distance; create table test_distance (Title String) engine = Memory; insert into test_distance values ('привет как дела?... Херсон'), ('привет как дела клип - Яндекс.Видео'), ('привет'), ('пап привет как дела - Яндекс.Видео'), ('привет братан как дела - Яндекс.Видео'), ('http://metric.ru/'), ('http://autometric.ru/'), ('http://metrica.yandex.com/'), ('http://metris.ru/'), ('http://metrika.ru/'), (''); -SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceUTF8(Title, Title) as distance; -SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceUTF8(Title, extract(Title, 'как дела')) as distance; -SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceUTF8(Title, extract(Title, 'metr')) as distance; +SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceUTF8(Title, Title) as distance, Title; +SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceUTF8(Title, extract(Title, 'как дела')) as distance, Title; +SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceUTF8(Title, extract(Title, 'metr')) as distance, Title; -SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceUTF8(Title, 'привет как дела') as distance; -SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceUTF8(Title, 'как привет дела') as distance; -SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceUTF8(Title, 'metrika') as distance; -SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceUTF8(Title, 'metrica') as distance; -SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceUTF8(Title, 'metriks') as distance; -SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceUTF8(Title, 'metrics') as distance; -SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceUTF8(Title, 'yandex') as distance; +SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceUTF8(Title, 'привет как дела') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceUTF8(Title, 'как привет дела') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceUTF8(Title, 'metrika') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceUTF8(Title, 'metrica') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceUTF8(Title, 'metriks') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceUTF8(Title, 'metrics') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceUTF8(Title, 'yandex') as distance, Title; select round(1000 * ngramDistanceCaseInsensitiveUTF8(materialize(''), '')) from system.numbers limit 5; @@ -80,19 +80,19 @@ select round(1000 * ngramDistanceCaseInsensitiveUTF8('аБВГдеёЖз', 'Аб select round(1000 * ngramDistanceCaseInsensitiveUTF8('абвгдеёжз', 'гдеёЗД')); select round(1000 * ngramDistanceCaseInsensitiveUTF8('АБВГДеёжз', 'ЁЁЁЁЁЁЁЁ')); -SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitiveUTF8(Title, Title) as distance; -SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitiveUTF8(Title, extract(Title, 'как дела')) as distance; -SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitiveUTF8(Title, extract(Title, 'metr')) as distance; +SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitiveUTF8(Title, Title) as distance, Title; +SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitiveUTF8(Title, extract(Title, 'как дела')) as distance, Title; +SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitiveUTF8(Title, extract(Title, 'metr')) as distance, Title; -SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitiveUTF8(Title, 'ПрИвЕт кАК ДЕЛа') as distance; -SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitiveUTF8(Title, 'как ПРИВЕТ дела') as distance; -SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitiveUTF8(Title, 'metrika') as distance; -SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitiveUTF8(Title, 'Metrika') as distance; -SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitiveUTF8(Title, 'mEtrica') as distance; -SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitiveUTF8(Title, 'metriKS') as distance; -SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitiveUTF8(Title, 'metrics') as distance; -SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitiveUTF8(Title, 'YanDEX') as distance; -SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitiveUTF8(Title, 'приВЕТ КАк ДеЛа КлИп - яндеКс.видео') as distance; +SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitiveUTF8(Title, 'ПрИвЕт кАК ДЕЛа') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitiveUTF8(Title, 'как ПРИВЕТ дела') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitiveUTF8(Title, 'metrika') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitiveUTF8(Title, 'Metrika') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitiveUTF8(Title, 'mEtrica') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitiveUTF8(Title, 'metriKS') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitiveUTF8(Title, 'metrics') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitiveUTF8(Title, 'YanDEX') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitiveUTF8(Title, 'приВЕТ КАк ДеЛа КлИп - яндеКс.видео') as distance, Title; select round(1000 * ngramDistance(materialize(''), '')) from system.numbers limit 5; @@ -128,13 +128,13 @@ select round(1000 * ngramDistance('abcdefgh', 'abcdefg')); select round(1000 * ngramDistance('abcdefgh', 'defgh')); select round(1000 * ngramDistance('abcdefgh', 'aaaaaaaa')); -SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistance(Title, 'привет как дела') as distance; -SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistance(Title, 'как привет дела') as distance; -SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistance(Title, 'metrika') as distance; -SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistance(Title, 'metrica') as distance; -SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistance(Title, 'metriks') as distance; -SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistance(Title, 'metrics') as distance; -SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistance(Title, 'yandex') as distance; +SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistance(Title, 'привет как дела') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistance(Title, 'как привет дела') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistance(Title, 'metrika') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistance(Title, 'metrica') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistance(Title, 'metriks') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistance(Title, 'metrics') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistance(Title, 'yandex') as distance, Title; select round(1000 * ngramDistanceCaseInsensitive(materialize(''), '')) from system.numbers limit 5; select round(1000 * ngramDistanceCaseInsensitive(materialize('abc'), '')) from system.numbers limit 5; @@ -168,13 +168,13 @@ select round(1000 * ngramDistanceCaseInsensitive('abcdefgh', 'abcdeFG')); select round(1000 * ngramDistanceCaseInsensitive('AAAAbcdefgh', 'defgh')); select round(1000 * ngramDistanceCaseInsensitive('ABCdefgH', 'aaaaaaaa')); -SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitive(Title, 'ПрИвЕт кАК ДЕЛа') as distance; -SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitive(Title, 'как ПРИВЕТ дела') as distance; -SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitive(Title, 'metrika') as distance; -SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitive(Title, 'Metrika') as distance; -SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitive(Title, 'mEtrica') as distance; -SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitive(Title, 'metriKS') as distance; -SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitive(Title, 'metrics') as distance; -SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitive(Title, 'YanDEX') as distance; +SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitive(Title, 'ПрИвЕт кАК ДЕЛа') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitive(Title, 'как ПРИВЕТ дела') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitive(Title, 'metrika') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitive(Title, 'Metrika') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitive(Title, 'mEtrica') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitive(Title, 'metriKS') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitive(Title, 'metrics') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitive(Title, 'YanDEX') as distance, Title; drop table if exists test_distance; diff --git a/tests/queries/0_stateless/00933_alter_ttl.sql b/tests/queries/0_stateless/00933_alter_ttl.sql index 4f586bb20fa..934d33660de 100644 --- a/tests/queries/0_stateless/00933_alter_ttl.sql +++ b/tests/queries/0_stateless/00933_alter_ttl.sql @@ -13,7 +13,7 @@ insert into ttl values (toDateTime('2100-10-10 00:00:00'), 3); insert into ttl values (toDateTime('2100-10-10 00:00:00'), 4); optimize table ttl partition 10 final; -select * from ttl order by d; +select * from ttl order by d, a; alter table ttl modify ttl a; -- { serverError 450 } diff --git a/tests/queries/0_stateless/01525_select_with_offset_fetch_clause.reference b/tests/queries/0_stateless/01525_select_with_offset_fetch_clause.reference index 422a076b0cb..19a1b0f2ec0 100644 --- a/tests/queries/0_stateless/01525_select_with_offset_fetch_clause.reference +++ b/tests/queries/0_stateless/01525_select_with_offset_fetch_clause.reference @@ -6,5 +6,5 @@ 3 3 1 1 2 1 -3 4 3 3 +3 4 diff --git a/tests/queries/0_stateless/01525_select_with_offset_fetch_clause.sql b/tests/queries/0_stateless/01525_select_with_offset_fetch_clause.sql index fce7dd753d2..3b6f77336fe 100644 --- a/tests/queries/0_stateless/01525_select_with_offset_fetch_clause.sql +++ b/tests/queries/0_stateless/01525_select_with_offset_fetch_clause.sql @@ -4,5 +4,5 @@ DROP TABLE IF EXISTS test_fetch; CREATE TABLE test_fetch(a Int32, b Int32) Engine = Memory; INSERT INTO test_fetch VALUES(1, 1), (2, 1), (3, 4), (3, 3), (5, 4), (0, 6), (5, 7); SELECT * FROM test_fetch ORDER BY a OFFSET 1 ROW FETCH FIRST 3 ROWS ONLY; -SELECT * FROM test_fetch ORDER BY a OFFSET 1 ROW FETCH FIRST 3 ROWS WITH TIES; +SELECT * FROM (SELECT * FROM test_fetch ORDER BY a OFFSET 1 ROW FETCH FIRST 3 ROWS WITH TIES) ORDER BY a, b; DROP TABLE test_fetch; diff --git a/tests/queries/0_stateless/01852_map_combinator.sql b/tests/queries/0_stateless/01852_map_combinator.sql index 20923460eb6..3036e2e0ea4 100644 --- a/tests/queries/0_stateless/01852_map_combinator.sql +++ b/tests/queries/0_stateless/01852_map_combinator.sql @@ -6,7 +6,7 @@ CREATE TABLE map_comb(a int, statusMap Map(UInt16, UInt32)) ENGINE = Log; INSERT INTO map_comb VALUES (1, map(1, 10, 2, 10, 3, 10)),(1, map(3, 10, 4, 10, 5, 10)),(2, map(4, 10, 5, 10, 6, 10)),(2, map(6, 10, 7, 10, 8, 10)),(3, map(1, 10, 2, 10, 3, 10)),(4, map(3, 10, 4, 10, 5, 10)),(5, map(4, 10, 5, 10, 6, 10)),(5, map(6, 10, 7, 10, 8, 10)); -SELECT * FROM map_comb ORDER BY a; +SELECT * FROM map_comb ORDER BY a, statusMap; SELECT toTypeName(res), sumMap(statusMap) as res FROM map_comb; SELECT toTypeName(res), sumWithOverflowMap(statusMap) as res FROM map_comb; SELECT toTypeName(res), sumMapMerge(s) as res FROM (SELECT sumMapState(statusMap) AS s FROM map_comb); diff --git a/tests/queries/0_stateless/02006_use_constants_in_with_and_select.sql b/tests/queries/0_stateless/02006_use_constants_in_with_and_select.sql index 91171c9ab7b..d6ec56cd798 100644 --- a/tests/queries/0_stateless/02006_use_constants_in_with_and_select.sql +++ b/tests/queries/0_stateless/02006_use_constants_in_with_and_select.sql @@ -4,7 +4,7 @@ SELECT FROM ( SELECT 1 AS col - UNION ALL + UNION DISTINCT SELECT 2 ); @@ -13,7 +13,7 @@ SELECT groupArray(max_size)(col) FROM ( SELECT 1 as col - UNION ALL + UNION DISTINCT SELECT 2 ); From 8094582e5f98c0e87f9a83b528909d9a8a5cf93b Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 2 Feb 2022 16:49:44 +0000 Subject: [PATCH 056/215] Fixed tests --- tests/queries/0_stateless/00301_csv.sh | 2 +- .../00327_summing_composite_nested.reference | 4 +- .../00327_summing_composite_nested.sql | 4 +- tests/queries/0_stateless/00502_sum_map.sql | 2 +- .../00754_alter_modify_order_by.reference | 2 +- .../00754_alter_modify_order_by.sql | 4 +- .../0_stateless/00951_ngram_search.reference | 524 +++++++++--------- .../0_stateless/00951_ngram_search.sql | 74 +-- .../01030_limit_by_with_ties_error.sh | 34 +- .../01142_with_ties_and_aliases.sql | 6 +- .../01671_merge_join_and_constants.sql | 2 +- tests/queries/0_stateless/01700_deltasum.sql | 42 +- ...02006_use_constants_in_with_and_select.sql | 12 +- 13 files changed, 385 insertions(+), 327 deletions(-) diff --git a/tests/queries/0_stateless/00301_csv.sh b/tests/queries/0_stateless/00301_csv.sh index e10e98a123d..50c64b312a7 100755 --- a/tests/queries/0_stateless/00301_csv.sh +++ b/tests/queries/0_stateless/00301_csv.sh @@ -15,7 +15,7 @@ Hello "world", 789 ,2016-01-03 default,, default-eof,,' | $CLICKHOUSE_CLIENT --input_format_defaults_for_omitted_fields=1 --input_format_csv_empty_as_default=1 --query="INSERT INTO csv FORMAT CSV"; -$CLICKHOUSE_CLIENT --query="SELECT * FROM csv ORDER BY d"; +$CLICKHOUSE_CLIENT --query="SELECT * FROM csv ORDER BY d, s"; $CLICKHOUSE_CLIENT --query="DROP TABLE csv"; $CLICKHOUSE_CLIENT --query="CREATE TABLE csv (t DateTime('Europe/Moscow'), s String) ENGINE = Memory"; diff --git a/tests/queries/0_stateless/00327_summing_composite_nested.reference b/tests/queries/0_stateless/00327_summing_composite_nested.reference index b4233202447..7984f688b67 100644 --- a/tests/queries/0_stateless/00327_summing_composite_nested.reference +++ b/tests/queries/0_stateless/00327_summing_composite_nested.reference @@ -1,8 +1,8 @@ -2000-01-01 1 [1,2] [3,4] [10,11] [0,1,2] ['3','4','5'] [-1,-2,-3] [1,10,100] 2000-01-01 1 [2,1] [4,3] [20,22] [2,2,1] ['5','5','0'] [-3,-3,-33] [10,100,1000] +2000-01-01 1 [1,2] [3,4] [10,11] [0,1,2] ['3','4','5'] [-1,-2,-3] [1,10,100] +2000-01-01 2 [1,2] [3,4] [10,11] [0,1,2] ['3','4','5'] [-1,-2,-3] [1,10,100] 2000-01-01 2 [1,2] [3,4] [10,11] [0,1,2] ['3','4','5'] [-1,-2,-3] [1,10,100] 2000-01-01 2 [2,1,1] [4,3,3] [20,22,33] [2,2] ['5','5'] [-3,-3] [10,100] -2000-01-01 2 [1,2] [3,4] [10,11] [0,1,2] ['3','4','5'] [-1,-2,-3] [1,10,100] 2000-01-01 1 1 3 10 2000-01-01 1 1 3 22 2000-01-01 1 2 4 11 diff --git a/tests/queries/0_stateless/00327_summing_composite_nested.sql b/tests/queries/0_stateless/00327_summing_composite_nested.sql index 9be21e87abf..e7b1b39ffaa 100644 --- a/tests/queries/0_stateless/00327_summing_composite_nested.sql +++ b/tests/queries/0_stateless/00327_summing_composite_nested.sql @@ -7,9 +7,9 @@ INSERT INTO summing_composite_key VALUES ('2000-01-01', 1, [1,2], ['3','4'], [10 SELECT * FROM summing_composite_key ORDER BY d, k, _part_index; -SELECT d, k, m.k1, m.k2ID, m.s FROM summing_composite_key ARRAY JOIN FirstMap AS m ORDER BY d, k, m.k1, m.k2ID, m.s; +SELECT d, k, m.k1, m.k2ID, m.s FROM summing_composite_key ARRAY JOIN FirstMap AS m ORDER BY d, k, m.k1, m.k2ID, m.s, SecondMap.k1ID, SecondMap.k2Key, SecondMap.k3Type, SecondMap.s; SELECT d, k, m.k1, m.k2ID, sum(m.s) FROM summing_composite_key ARRAY JOIN FirstMap AS m GROUP BY d, k, m.k1, m.k2ID ORDER BY d, k, m.k1, m.k2ID; -SELECT d, k, m.k1, m.k2ID,m. s FROM summing_composite_key FINAL ARRAY JOIN FirstMap AS m ORDER BY d, k, m.k1, m.k2ID, m.s; +SELECT d, k, m.k1, m.k2ID, m.s FROM summing_composite_key FINAL ARRAY JOIN FirstMap AS m ORDER BY d, k, m.k1, m.k2ID, m.s; SELECT d, k, m.k1ID, m.k2Key, m.k3Type, m.s FROM summing_composite_key ARRAY JOIN SecondMap AS m ORDER BY d, k, m.k1ID, m.k2Key, m.k3Type, m.s; SELECT d, k, m.k1ID, m.k2Key, m.k3Type, sum(m.s) FROM summing_composite_key ARRAY JOIN SecondMap AS m GROUP BY d, k, m.k1ID, m.k2Key, m.k3Type ORDER BY d, k, m.k1ID, m.k2Key, m.k3Type; diff --git a/tests/queries/0_stateless/00502_sum_map.sql b/tests/queries/0_stateless/00502_sum_map.sql index 51007a9c78a..3ceb5b82952 100644 --- a/tests/queries/0_stateless/00502_sum_map.sql +++ b/tests/queries/0_stateless/00502_sum_map.sql @@ -5,7 +5,7 @@ CREATE TABLE sum_map(date Date, timeslot DateTime, statusMap Nested(status UInt1 INSERT INTO sum_map VALUES ('2000-01-01', '2000-01-01 00:00:00', [1, 2, 3], [10, 10, 10]), ('2000-01-01', '2000-01-01 00:00:00', [3, 4, 5], [10, 10, 10]), ('2000-01-01', '2000-01-01 00:01:00', [4, 5, 6], [10, 10, 10]), ('2000-01-01', '2000-01-01 00:01:00', [6, 7, 8], [10, 10, 10]); -SELECT * FROM sum_map ORDER BY timeslot; +SELECT * FROM sum_map ORDER BY timeslot, statusMap.status, statusMap.requests; SELECT sumMap(statusMap.status, statusMap.requests) FROM sum_map; SELECT sumMap((statusMap.status, statusMap.requests)) FROM sum_map; SELECT sumMapMerge(s) FROM (SELECT sumMapState(statusMap.status, statusMap.requests) AS s FROM sum_map); diff --git a/tests/queries/0_stateless/00754_alter_modify_order_by.reference b/tests/queries/0_stateless/00754_alter_modify_order_by.reference index 0582041ca14..bcbf66e824d 100644 --- a/tests/queries/0_stateless/00754_alter_modify_order_by.reference +++ b/tests/queries/0_stateless/00754_alter_modify_order_by.reference @@ -1,7 +1,7 @@ *** Check that the parts are sorted according to the new key. *** -1 2 0 10 1 2 2 40 1 2 1 30 +1 2 0 10 *** Check that the rows are collapsed according to the new key. *** 1 2 0 30 1 2 1 30 diff --git a/tests/queries/0_stateless/00754_alter_modify_order_by.sql b/tests/queries/0_stateless/00754_alter_modify_order_by.sql index 6d8729139b5..cb81f868e7b 100644 --- a/tests/queries/0_stateless/00754_alter_modify_order_by.sql +++ b/tests/queries/0_stateless/00754_alter_modify_order_by.sql @@ -21,11 +21,9 @@ ALTER TABLE summing MODIFY ORDER BY (x, y, nonexistent); -- { serverError 47} /* Can't modyfy ORDER BY so that it is no longer a prefix of the PRIMARY KEY. */ ALTER TABLE summing MODIFY ORDER BY x; -- { serverError 36} -INSERT INTO summing(x, y, val) VALUES (1, 2, 10); - ALTER TABLE summing ADD COLUMN z UInt32 AFTER y, MODIFY ORDER BY (x, y, -z); -INSERT INTO summing(x, y, z, val) values (1, 2, 1, 30), (1, 2, 2, 40); +INSERT INTO summing(x, y, z, val) values (1, 2, 0, 10), (1, 2, 1, 30), (1, 2, 2, 40); SELECT '*** Check that the parts are sorted according to the new key. ***'; SELECT * FROM summing; diff --git a/tests/queries/0_stateless/00951_ngram_search.reference b/tests/queries/0_stateless/00951_ngram_search.reference index 1b845b6015d..a98f63a198a 100644 --- a/tests/queries/0_stateless/00951_ngram_search.reference +++ b/tests/queries/0_stateless/00951_ngram_search.reference @@ -110,115 +110,115 @@ 1000 500 0 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 -http://metric.ru/ 1000 -http://autometric.ru/ 1000 -http://metrica.yandex.com/ 1000 -http://metris.ru/ 1000 -http://metrika.ru/ 1000 1000 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 -http://metric.ru/ 1000 http://autometric.ru/ 1000 -http://metrica.yandex.com/ 1000 -http://metris.ru/ 1000 -http://metrika.ru/ 1000 - 1000 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 http://metric.ru/ 1000 -http://autometric.ru/ 1000 http://metrica.yandex.com/ 1000 -http://metris.ru/ 1000 http://metrika.ru/ 1000 +http://metris.ru/ 1000 +пап привет как дела - Яндекс.Видео 1000 +привет 1000 +привет братан как дела - Яндекс.Видео 1000 +привет как дела клип - Яндекс.Видео 1000 +привет как дела?... Херсон 1000 1000 -http://metric.ru/ 0 -http://autometric.ru/ 0 -http://metrica.yandex.com/ 0 -http://metris.ru/ 0 -http://metrika.ru/ 0 +http://autometric.ru/ 1000 +http://metric.ru/ 1000 +http://metrica.yandex.com/ 1000 +http://metrika.ru/ 1000 +http://metris.ru/ 1000 +пап привет как дела - Яндекс.Видео 1000 +привет 1000 +привет братан как дела - Яндекс.Видео 1000 +привет как дела клип - Яндекс.Видео 1000 +привет как дела?... Херсон 1000 + 1000 +http://autometric.ru/ 1000 +http://metric.ru/ 1000 +http://metrica.yandex.com/ 1000 +http://metrika.ru/ 1000 +http://metris.ru/ 1000 +пап привет как дела - Яндекс.Видео 1000 +привет 1000 +привет братан как дела - Яндекс.Видео 1000 +привет как дела клип - Яндекс.Видео 1000 +привет как дела?... Херсон 1000 0 +http://autometric.ru/ 0 +http://metric.ru/ 0 +http://metrica.yandex.com/ 0 +http://metrika.ru/ 0 +http://metris.ru/ 0 привет 308 привет братан как дела - Яндекс.Видео 923 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 пап привет как дела - Яндекс.Видео 1000 -http://metric.ru/ 0 +привет как дела клип - Яндекс.Видео 1000 +привет как дела?... Херсон 1000 + 0 http://autometric.ru/ 0 +http://metric.ru/ 0 http://metrica.yandex.com/ 0 -http://metris.ru/ 0 http://metrika.ru/ 0 - 0 +http://metris.ru/ 0 привет 308 -привет как дела?... Херсон 769 -привет как дела клип - Яндекс.Видео 769 привет братан как дела - Яндекс.Видео 769 +привет как дела клип - Яндекс.Видео 769 +привет как дела?... Херсон 769 пап привет как дела - Яндекс.Видео 846 -привет как дела?... Херсон 0 -привет как дела клип - Яндекс.Видео 0 -привет 0 -пап привет как дела - Яндекс.Видео 0 -привет братан как дела - Яндекс.Видео 0 0 -http://metric.ru/ 600 +пап привет как дела - Яндекс.Видео 0 +привет 0 +привет братан как дела - Яндекс.Видео 0 +привет как дела клип - Яндекс.Видео 0 +привет как дела?... Херсон 0 http://autometric.ru/ 600 +http://metric.ru/ 600 http://metrica.yandex.com/ 600 http://metris.ru/ 600 http://metrika.ru/ 1000 -привет как дела?... Херсон 0 -привет как дела клип - Яндекс.Видео 0 -привет 0 -пап привет как дела - Яндекс.Видео 0 -привет братан как дела - Яндекс.Видео 0 0 -http://metris.ru/ 600 +пап привет как дела - Яндекс.Видео 0 +привет 0 +привет братан как дела - Яндекс.Видео 0 +привет как дела клип - Яндекс.Видео 0 +привет как дела?... Херсон 0 http://metrika.ru/ 600 -http://metric.ru/ 800 +http://metris.ru/ 600 http://autometric.ru/ 800 +http://metric.ru/ 800 http://metrica.yandex.com/ 1000 -привет как дела?... Херсон 0 -привет как дела клип - Яндекс.Видео 0 -привет 0 -пап привет как дела - Яндекс.Видео 0 -привет братан как дела - Яндекс.Видео 0 0 -http://metric.ru/ 600 +пап привет как дела - Яндекс.Видео 0 +привет 0 +привет братан как дела - Яндекс.Видео 0 +привет как дела клип - Яндекс.Видео 0 +привет как дела?... Херсон 0 http://autometric.ru/ 600 +http://metric.ru/ 600 http://metrica.yandex.com/ 600 http://metris.ru/ 600 http://metrika.ru/ 800 -привет как дела?... Херсон 0 -привет как дела клип - Яндекс.Видео 0 -привет 0 -пап привет как дела - Яндекс.Видео 0 -привет братан как дела - Яндекс.Видео 0 0 -http://metris.ru/ 600 +пап привет как дела - Яндекс.Видео 0 +привет 0 +привет братан как дела - Яндекс.Видео 0 +привет как дела клип - Яндекс.Видео 0 +привет как дела?... Херсон 0 http://metrika.ru/ 600 -http://metric.ru/ 800 +http://metris.ru/ 600 http://autometric.ru/ 800 +http://metric.ru/ 800 http://metrica.yandex.com/ 800 -привет как дела?... Херсон 0 -привет как дела клип - Яндекс.Видео 0 -привет 0 -пап привет как дела - Яндекс.Видео 0 -привет братан как дела - Яндекс.Видео 0 -http://metric.ru/ 0 -http://autometric.ru/ 0 -http://metris.ru/ 0 -http://metrika.ru/ 0 0 +http://autometric.ru/ 0 +http://metric.ru/ 0 +http://metrika.ru/ 0 +http://metris.ru/ 0 +пап привет как дела - Яндекс.Видео 0 +привет 0 +привет братан как дела - Яндекс.Видео 0 +привет как дела клип - Яндекс.Видео 0 +привет как дела?... Херсон 0 http://metrica.yandex.com/ 1000 1000 1000 @@ -332,133 +332,133 @@ http://metrica.yandex.com/ 1000 1000 500 0 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 -http://metric.ru/ 1000 -http://autometric.ru/ 1000 -http://metrica.yandex.com/ 1000 -http://metris.ru/ 1000 -http://metrika.ru/ 1000 1000 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 -http://metric.ru/ 1000 http://autometric.ru/ 1000 -http://metrica.yandex.com/ 1000 -http://metris.ru/ 1000 -http://metrika.ru/ 1000 - 1000 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 -привет 1000 -пап привет как дела - Яндекс.Видео 1000 -привет братан как дела - Яндекс.Видео 1000 http://metric.ru/ 1000 -http://autometric.ru/ 1000 http://metrica.yandex.com/ 1000 -http://metris.ru/ 1000 http://metrika.ru/ 1000 +http://metris.ru/ 1000 +пап привет как дела - Яндекс.Видео 1000 +привет 1000 +привет братан как дела - Яндекс.Видео 1000 +привет как дела клип - Яндекс.Видео 1000 +привет как дела?... Херсон 1000 1000 -http://metric.ru/ 0 -http://autometric.ru/ 0 -http://metrica.yandex.com/ 0 -http://metris.ru/ 0 -http://metrika.ru/ 0 +http://autometric.ru/ 1000 +http://metric.ru/ 1000 +http://metrica.yandex.com/ 1000 +http://metrika.ru/ 1000 +http://metris.ru/ 1000 +пап привет как дела - Яндекс.Видео 1000 +привет 1000 +привет братан как дела - Яндекс.Видео 1000 +привет как дела клип - Яндекс.Видео 1000 +привет как дела?... Херсон 1000 + 1000 +http://autometric.ru/ 1000 +http://metric.ru/ 1000 +http://metrica.yandex.com/ 1000 +http://metrika.ru/ 1000 +http://metris.ru/ 1000 +пап привет как дела - Яндекс.Видео 1000 +привет 1000 +привет братан как дела - Яндекс.Видео 1000 +привет как дела клип - Яндекс.Видео 1000 +привет как дела?... Херсон 1000 0 +http://autometric.ru/ 0 +http://metric.ru/ 0 +http://metrica.yandex.com/ 0 +http://metrika.ru/ 0 +http://metris.ru/ 0 привет 308 привет братан как дела - Яндекс.Видео 923 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 пап привет как дела - Яндекс.Видео 1000 -http://metric.ru/ 0 +привет как дела клип - Яндекс.Видео 1000 +привет как дела?... Херсон 1000 + 0 http://autometric.ru/ 0 +http://metric.ru/ 0 http://metrica.yandex.com/ 0 -http://metris.ru/ 0 http://metrika.ru/ 0 - 0 +http://metris.ru/ 0 привет 308 -привет как дела?... Херсон 769 -привет как дела клип - Яндекс.Видео 769 привет братан как дела - Яндекс.Видео 769 +привет как дела клип - Яндекс.Видео 769 +привет как дела?... Херсон 769 пап привет как дела - Яндекс.Видео 846 -привет как дела?... Херсон 0 -привет как дела клип - Яндекс.Видео 0 -привет 0 -пап привет как дела - Яндекс.Видео 0 -привет братан как дела - Яндекс.Видео 0 0 -http://metric.ru/ 600 +пап привет как дела - Яндекс.Видео 0 +привет 0 +привет братан как дела - Яндекс.Видео 0 +привет как дела клип - Яндекс.Видео 0 +привет как дела?... Херсон 0 http://autometric.ru/ 600 +http://metric.ru/ 600 http://metrica.yandex.com/ 600 http://metris.ru/ 600 http://metrika.ru/ 1000 -привет как дела?... Херсон 0 -привет как дела клип - Яндекс.Видео 0 -привет 0 -пап привет как дела - Яндекс.Видео 0 -привет братан как дела - Яндекс.Видео 0 0 -http://metric.ru/ 600 +пап привет как дела - Яндекс.Видео 0 +привет 0 +привет братан как дела - Яндекс.Видео 0 +привет как дела клип - Яндекс.Видео 0 +привет как дела?... Херсон 0 http://autometric.ru/ 600 +http://metric.ru/ 600 http://metrica.yandex.com/ 600 http://metris.ru/ 600 http://metrika.ru/ 1000 -привет как дела?... Херсон 0 -привет как дела клип - Яндекс.Видео 0 -привет 0 -пап привет как дела - Яндекс.Видео 0 -привет братан как дела - Яндекс.Видео 0 0 -http://metris.ru/ 600 +пап привет как дела - Яндекс.Видео 0 +привет 0 +привет братан как дела - Яндекс.Видео 0 +привет как дела клип - Яндекс.Видео 0 +привет как дела?... Херсон 0 http://metrika.ru/ 600 -http://metric.ru/ 800 +http://metris.ru/ 600 http://autometric.ru/ 800 +http://metric.ru/ 800 http://metrica.yandex.com/ 1000 -привет как дела?... Херсон 0 -привет как дела клип - Яндекс.Видео 0 -привет 0 -пап привет как дела - Яндекс.Видео 0 -привет братан как дела - Яндекс.Видео 0 0 -http://metric.ru/ 600 +пап привет как дела - Яндекс.Видео 0 +привет 0 +привет братан как дела - Яндекс.Видео 0 +привет как дела клип - Яндекс.Видео 0 +привет как дела?... Херсон 0 http://autometric.ru/ 600 +http://metric.ru/ 600 http://metrica.yandex.com/ 600 http://metris.ru/ 600 http://metrika.ru/ 800 -привет как дела?... Херсон 0 -привет как дела клип - Яндекс.Видео 0 -привет 0 -пап привет как дела - Яндекс.Видео 0 -привет братан как дела - Яндекс.Видео 0 0 -http://metris.ru/ 600 +пап привет как дела - Яндекс.Видео 0 +привет 0 +привет братан как дела - Яндекс.Видео 0 +привет как дела клип - Яндекс.Видео 0 +привет как дела?... Херсон 0 http://metrika.ru/ 600 -http://metric.ru/ 800 +http://metris.ru/ 600 http://autometric.ru/ 800 +http://metric.ru/ 800 http://metrica.yandex.com/ 800 -привет как дела?... Херсон 0 -привет как дела клип - Яндекс.Видео 0 -привет 0 + 0 +http://autometric.ru/ 0 +http://metric.ru/ 0 +http://metrika.ru/ 0 +http://metris.ru/ 0 пап привет как дела - Яндекс.Видео 0 +привет 0 привет братан как дела - Яндекс.Видео 0 -http://metric.ru/ 0 -http://autometric.ru/ 0 -http://metris.ru/ 0 -http://metrika.ru/ 0 - 0 +привет как дела клип - Яндекс.Видео 0 +привет как дела?... Херсон 0 http://metrica.yandex.com/ 1000 -http://metric.ru/ 0 -http://autometric.ru/ 0 -http://metrica.yandex.com/ 0 -http://metris.ru/ 0 -http://metrika.ru/ 0 0 +http://autometric.ru/ 0 +http://metric.ru/ 0 +http://metrica.yandex.com/ 0 +http://metrika.ru/ 0 +http://metris.ru/ 0 привет 121 привет как дела?... Херсон 394 привет братан как дела - Яндекс.Видео 788 @@ -576,82 +576,82 @@ http://metrika.ru/ 0 1000 1000 1000 -http://metric.ru/ 0 -http://autometric.ru/ 0 -http://metrica.yandex.com/ 0 -http://metris.ru/ 0 -http://metrika.ru/ 0 0 +http://autometric.ru/ 0 +http://metric.ru/ 0 +http://metrica.yandex.com/ 0 +http://metrika.ru/ 0 +http://metris.ru/ 0 привет 360 привет братан как дела - Яндекс.Видео 960 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 1000 пап привет как дела - Яндекс.Видео 1000 -http://metric.ru/ 0 +привет как дела клип - Яндекс.Видео 1000 +привет как дела?... Херсон 1000 + 0 http://autometric.ru/ 0 +http://metric.ru/ 0 http://metrica.yandex.com/ 0 -http://metris.ru/ 0 http://metrika.ru/ 0 - 0 +http://metris.ru/ 0 привет 360 -привет как дела?... Херсон 880 -привет как дела клип - Яндекс.Видео 880 привет братан как дела - Яндекс.Видео 880 +привет как дела клип - Яндекс.Видео 880 +привет как дела?... Херсон 880 пап привет как дела - Яндекс.Видео 920 -привет как дела?... Херсон 0 -привет как дела клип - Яндекс.Видео 0 -привет 0 -пап привет как дела - Яндекс.Видео 0 -привет братан как дела - Яндекс.Видео 0 0 -http://metric.ru/ 500 +пап привет как дела - Яндекс.Видео 0 +привет 0 +привет братан как дела - Яндекс.Видео 0 +привет как дела клип - Яндекс.Видео 0 +привет как дела?... Херсон 0 http://autometric.ru/ 500 +http://metric.ru/ 500 http://metrica.yandex.com/ 500 http://metris.ru/ 500 http://metrika.ru/ 1000 -привет как дела?... Херсон 0 -привет как дела клип - Яндекс.Видео 0 -привет 0 -пап привет как дела - Яндекс.Видео 0 -привет братан как дела - Яндекс.Видео 0 0 -http://metris.ru/ 500 +пап привет как дела - Яндекс.Видео 0 +привет 0 +привет братан как дела - Яндекс.Видео 0 +привет как дела клип - Яндекс.Видео 0 +привет как дела?... Херсон 0 http://metrika.ru/ 500 -http://metric.ru/ 750 +http://metris.ru/ 500 http://autometric.ru/ 750 +http://metric.ru/ 750 http://metrica.yandex.com/ 1000 -привет как дела?... Херсон 0 -привет как дела клип - Яндекс.Видео 0 -привет 0 -пап привет как дела - Яндекс.Видео 0 -привет братан как дела - Яндекс.Видео 0 0 -http://metric.ru/ 500 +пап привет как дела - Яндекс.Видео 0 +привет 0 +привет братан как дела - Яндекс.Видео 0 +привет как дела клип - Яндекс.Видео 0 +привет как дела?... Херсон 0 http://autometric.ru/ 500 +http://metric.ru/ 500 http://metrica.yandex.com/ 500 http://metris.ru/ 500 http://metrika.ru/ 750 -привет как дела?... Херсон 0 -привет как дела клип - Яндекс.Видео 0 -привет 0 -пап привет как дела - Яндекс.Видео 0 -привет братан как дела - Яндекс.Видео 0 0 -http://metris.ru/ 500 +пап привет как дела - Яндекс.Видео 0 +привет 0 +привет братан как дела - Яндекс.Видео 0 +привет как дела клип - Яндекс.Видео 0 +привет как дела?... Херсон 0 http://metrika.ru/ 500 -http://metric.ru/ 750 +http://metris.ru/ 500 http://autometric.ru/ 750 +http://metric.ru/ 750 http://metrica.yandex.com/ 750 -привет как дела?... Херсон 0 -привет как дела клип - Яндекс.Видео 0 -привет 0 -пап привет как дела - Яндекс.Видео 0 -привет братан как дела - Яндекс.Видео 0 -http://metric.ru/ 0 -http://autometric.ru/ 0 -http://metris.ru/ 0 -http://metrika.ru/ 0 0 +http://autometric.ru/ 0 +http://metric.ru/ 0 +http://metrika.ru/ 0 +http://metris.ru/ 0 +пап привет как дела - Яндекс.Видео 0 +привет 0 +привет братан как дела - Яндекс.Видео 0 +привет как дела клип - Яндекс.Видео 0 +привет как дела?... Херсон 0 http://metrica.yandex.com/ 1000 1000 1000 @@ -765,91 +765,91 @@ http://metrica.yandex.com/ 1000 1000 1000 1000 -привет 0 -http://metric.ru/ 0 -http://autometric.ru/ 0 -http://metrica.yandex.com/ 0 -http://metris.ru/ 0 -http://metrika.ru/ 0 0 +http://autometric.ru/ 0 +http://metric.ru/ 0 +http://metrica.yandex.com/ 0 +http://metrika.ru/ 0 +http://metris.ru/ 0 +привет 0 привет братан как дела - Яндекс.Видео 80 -привет как дела?... Херсон 120 -привет как дела клип - Яндекс.Видео 120 пап привет как дела - Яндекс.Видео 120 -привет 0 -http://metric.ru/ 0 -http://autometric.ru/ 0 -http://metrica.yandex.com/ 0 -http://metris.ru/ 0 -http://metrika.ru/ 0 +привет как дела клип - Яндекс.Видео 120 +привет как дела?... Херсон 120 0 -привет как дела?... Херсон 440 -привет как дела клип - Яндекс.Видео 440 +http://autometric.ru/ 0 +http://metric.ru/ 0 +http://metrica.yandex.com/ 0 +http://metrika.ru/ 0 +http://metris.ru/ 0 +привет 0 пап привет как дела - Яндекс.Видео 440 привет братан как дела - Яндекс.Видео 440 -привет как дела?... Херсон 0 -привет как дела клип - Яндекс.Видео 0 -привет 0 -пап привет как дела - Яндекс.Видео 0 -привет братан как дела - Яндекс.Видео 0 +привет как дела клип - Яндекс.Видео 440 +привет как дела?... Херсон 440 0 -http://metric.ru/ 500 +пап привет как дела - Яндекс.Видео 0 +привет 0 +привет братан как дела - Яндекс.Видео 0 +привет как дела клип - Яндекс.Видео 0 +привет как дела?... Херсон 0 http://autometric.ru/ 500 +http://metric.ru/ 500 http://metrica.yandex.com/ 500 http://metris.ru/ 500 http://metrika.ru/ 1000 -привет как дела?... Херсон 0 -привет как дела клип - Яндекс.Видео 0 -привет 0 -пап привет как дела - Яндекс.Видео 0 -привет братан как дела - Яндекс.Видео 0 0 -http://metric.ru/ 500 +пап привет как дела - Яндекс.Видео 0 +привет 0 +привет братан как дела - Яндекс.Видео 0 +привет как дела клип - Яндекс.Видео 0 +привет как дела?... Херсон 0 http://autometric.ru/ 500 +http://metric.ru/ 500 http://metrica.yandex.com/ 500 http://metris.ru/ 500 http://metrika.ru/ 1000 -привет как дела?... Херсон 0 -привет как дела клип - Яндекс.Видео 0 -привет 0 -пап привет как дела - Яндекс.Видео 0 -привет братан как дела - Яндекс.Видео 0 0 -http://metris.ru/ 500 +пап привет как дела - Яндекс.Видео 0 +привет 0 +привет братан как дела - Яндекс.Видео 0 +привет как дела клип - Яндекс.Видео 0 +привет как дела?... Херсон 0 http://metrika.ru/ 500 -http://metric.ru/ 750 +http://metris.ru/ 500 http://autometric.ru/ 750 +http://metric.ru/ 750 http://metrica.yandex.com/ 1000 -привет как дела?... Херсон 0 -привет как дела клип - Яндекс.Видео 0 -привет 0 -пап привет как дела - Яндекс.Видео 0 -привет братан как дела - Яндекс.Видео 0 0 -http://metric.ru/ 500 +пап привет как дела - Яндекс.Видео 0 +привет 0 +привет братан как дела - Яндекс.Видео 0 +привет как дела клип - Яндекс.Видео 0 +привет как дела?... Херсон 0 http://autometric.ru/ 500 +http://metric.ru/ 500 http://metrica.yandex.com/ 500 http://metris.ru/ 500 http://metrika.ru/ 750 -привет как дела?... Херсон 0 -привет как дела клип - Яндекс.Видео 0 -привет 0 -пап привет как дела - Яндекс.Видео 0 -привет братан как дела - Яндекс.Видео 0 0 -http://metris.ru/ 500 +пап привет как дела - Яндекс.Видео 0 +привет 0 +привет братан как дела - Яндекс.Видео 0 +привет как дела клип - Яндекс.Видео 0 +привет как дела?... Херсон 0 http://metrika.ru/ 500 -http://metric.ru/ 750 +http://metris.ru/ 500 http://autometric.ru/ 750 +http://metric.ru/ 750 http://metrica.yandex.com/ 750 -привет как дела?... Херсон 0 -привет как дела клип - Яндекс.Видео 0 -привет 0 -пап привет как дела - Яндекс.Видео 0 -привет братан как дела - Яндекс.Видео 0 -http://metric.ru/ 0 -http://autometric.ru/ 0 -http://metris.ru/ 0 -http://metrika.ru/ 0 0 +http://autometric.ru/ 0 +http://metric.ru/ 0 +http://metrika.ru/ 0 +http://metris.ru/ 0 +пап привет как дела - Яндекс.Видео 0 +привет 0 +привет братан как дела - Яндекс.Видео 0 +привет как дела клип - Яндекс.Видео 0 +привет как дела?... Херсон 0 http://metrica.yandex.com/ 1000 diff --git a/tests/queries/0_stateless/00951_ngram_search.sql b/tests/queries/0_stateless/00951_ngram_search.sql index ea98e89964c..f1a37605ebc 100644 --- a/tests/queries/0_stateless/00951_ngram_search.sql +++ b/tests/queries/0_stateless/00951_ngram_search.sql @@ -34,17 +34,17 @@ drop table if exists test_entry_distance; create table test_entry_distance (Title String) engine = Memory; insert into test_entry_distance values ('привет как дела?... Херсон'), ('привет как дела клип - Яндекс.Видео'), ('привет'), ('пап привет как дела - Яндекс.Видео'), ('привет братан как дела - Яндекс.Видео'), ('http://metric.ru/'), ('http://autometric.ru/'), ('http://metrica.yandex.com/'), ('http://metris.ru/'), ('http://metrika.ru/'), (''); -SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchUTF8(Title, Title) as distance; -SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchUTF8(Title, extract(Title, 'как дела')) as distance; -SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchUTF8(Title, extract(Title, 'metr')) as distance; +SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchUTF8(Title, Title) as distance, Title; +SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchUTF8(Title, extract(Title, 'как дела')) as distance, Title; +SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchUTF8(Title, extract(Title, 'metr')) as distance, Title; -SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchUTF8(Title, 'привет как дела') as distance; -SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchUTF8(Title, 'как привет дела') as distance; -SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchUTF8(Title, 'metrika') as distance; -SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchUTF8(Title, 'metrica') as distance; -SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchUTF8(Title, 'metriks') as distance; -SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchUTF8(Title, 'metrics') as distance; -SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchUTF8(Title, 'yandex') as distance; +SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchUTF8(Title, 'привет как дела') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchUTF8(Title, 'как привет дела') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchUTF8(Title, 'metrika') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchUTF8(Title, 'metrica') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchUTF8(Title, 'metriks') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchUTF8(Title, 'metrics') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchUTF8(Title, 'yandex') as distance, Title; select round(1000 * ngramSearchCaseInsensitiveUTF8(materialize(''), '')) from system.numbers limit 5; @@ -80,19 +80,19 @@ select round(1000 * ngramSearchCaseInsensitiveUTF8('аБВГдеёЖз', 'Абв select round(1000 * ngramSearchCaseInsensitiveUTF8('абвгдеёжз', 'гдеёЗД')); select round(1000 * ngramSearchCaseInsensitiveUTF8('АБВГДеёжз', 'ЁЁЁЁЁЁЁЁ')); -SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitiveUTF8(Title, Title) as distance; -SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitiveUTF8(Title, extract(Title, 'как дела')) as distance; -SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitiveUTF8(Title, extract(Title, 'metr')) as distance; +SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitiveUTF8(Title, Title) as distance, Title; +SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitiveUTF8(Title, extract(Title, 'как дела')) as distance, Title; +SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitiveUTF8(Title, extract(Title, 'metr')) as distance, Title; -SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitiveUTF8(Title, 'ПрИвЕт кАК ДЕЛа') as distance; -SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitiveUTF8(Title, 'как ПРИВЕТ дела') as distance; -SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitiveUTF8(Title, 'metrika') as distance; -SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitiveUTF8(Title, 'Metrika') as distance; -SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitiveUTF8(Title, 'mEtrica') as distance; -SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitiveUTF8(Title, 'metriKS') as distance; -SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitiveUTF8(Title, 'metrics') as distance; -SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitiveUTF8(Title, 'YanDEX') as distance; -SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitiveUTF8(Title, 'приВЕТ КАк ДеЛа КлИп - яндеКс.видео') as distance; +SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitiveUTF8(Title, 'ПрИвЕт кАК ДЕЛа') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitiveUTF8(Title, 'как ПРИВЕТ дела') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitiveUTF8(Title, 'metrika') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitiveUTF8(Title, 'Metrika') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitiveUTF8(Title, 'mEtrica') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitiveUTF8(Title, 'metriKS') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitiveUTF8(Title, 'metrics') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitiveUTF8(Title, 'YanDEX') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitiveUTF8(Title, 'приВЕТ КАк ДеЛа КлИп - яндеКс.видео') as distance, Title; select round(1000 * ngramSearch(materialize(''), '')) from system.numbers limit 5; @@ -128,13 +128,13 @@ select round(1000 * ngramSearch('abcdefgh', 'abcdefg')); select round(1000 * ngramSearch('abcdefgh', 'defgh')); select round(1000 * ngramSearch('abcdefghaaaaaaaaaa', 'aaaaaaaa')); -SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearch(Title, 'привет как дела') as distance; -SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearch(Title, 'как привет дела') as distance; -SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearch(Title, 'metrika') as distance; -SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearch(Title, 'metrica') as distance; -SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearch(Title, 'metriks') as distance; -SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearch(Title, 'metrics') as distance; -SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearch(Title, 'yandex') as distance; +SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearch(Title, 'привет как дела') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearch(Title, 'как привет дела') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearch(Title, 'metrika') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearch(Title, 'metrica') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearch(Title, 'metriks') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearch(Title, 'metrics') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearch(Title, 'yandex') as distance, Title; select round(1000 * ngramSearchCaseInsensitive(materialize(''), '')) from system.numbers limit 5; select round(1000 * ngramSearchCaseInsensitive(materialize('abc'), '')) from system.numbers limit 5; @@ -168,13 +168,13 @@ select round(1000 * ngramSearchCaseInsensitive('abcdefgh', 'abcdeFG')); select round(1000 * ngramSearchCaseInsensitive('AAAAbcdefgh', 'defgh')); select round(1000 * ngramSearchCaseInsensitive('ABCdefgHaAaaaAaaaAA', 'aaaaaaaa')); -SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitive(Title, 'ПрИвЕт кАК ДЕЛа') as distance; -SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitive(Title, 'как ПРИВЕТ дела') as distance; -SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitive(Title, 'metrika') as distance; -SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitive(Title, 'Metrika') as distance; -SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitive(Title, 'mEtrica') as distance; -SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitive(Title, 'metriKS') as distance; -SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitive(Title, 'metrics') as distance; -SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitive(Title, 'YanDEX') as distance; +SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitive(Title, 'ПрИвЕт кАК ДЕЛа') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitive(Title, 'как ПРИВЕТ дела') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitive(Title, 'metrika') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitive(Title, 'Metrika') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitive(Title, 'mEtrica') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitive(Title, 'metriKS') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitive(Title, 'metrics') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitive(Title, 'YanDEX') as distance, Title; drop table if exists test_entry_distance; diff --git a/tests/queries/0_stateless/01030_limit_by_with_ties_error.sh b/tests/queries/0_stateless/01030_limit_by_with_ties_error.sh index 2f2b17072c3..938eeff5b67 100755 --- a/tests/queries/0_stateless/01030_limit_by_with_ties_error.sh +++ b/tests/queries/0_stateless/01030_limit_by_with_ties_error.sh @@ -6,10 +6,36 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) CLICKHOUSE_CLIENT=$(echo ${CLICKHOUSE_CLIENT} | sed 's/'"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"'/--send_logs_level=none/g') -$CLICKHOUSE_CLIENT --query="SELECT * FROM (SELECT number % 5 AS a, count() AS b, c FROM numbers(10) ARRAY JOIN [1,2] AS c GROUP BY a,c) AS table ORDER BY a LIMIT 3 WITH TIES BY a" 2>&1 | grep -q "Code: 498." && echo 'OK' || echo 'FAIL' ||: +$CLICKHOUSE_CLIENT --query=""" + SELECT * FROM (SELECT number % 5 AS a, count() AS b, c FROM numbers(10) + ARRAY JOIN [1,2] AS c GROUP BY a,c) AS table + ORDER BY a LIMIT 3 WITH TIES BY a""" 2>&1 | grep -q "Code: 498." && echo 'OK' || echo 'FAIL' ||: -$CLICKHOUSE_CLIENT --query="SELECT * FROM VALUES('Phrase String, Payload String', ('hello', 'x'), ('world', 'x'), ('hello', 'z'), ('upyachka', 'a'), ('test', 'b'), ('foo', 'c'), ('bar', 'd')) ORDER BY Payload LIMIT 1 WITH TIES BY Phrase LIMIT 5;" 2>&1 | grep -q "Code: 498." && echo 'OK' || echo 'FAIL' ||: +$CLICKHOUSE_CLIENT --query=""" + SELECT * FROM VALUES('Phrase String, Payload String', + ('hello', 'x'), ('world', 'x'), ('hello', 'z'), + ('upyachka', 'a'), ('test', 'b'), ('foo', 'c'), + ('bar', 'd')) + ORDER BY Payload LIMIT 1 WITH TIES BY Phrase LIMIT 5;""" 2>&1 | grep -q "Code: 498." && echo 'OK' || echo 'FAIL' ||: -$CLICKHOUSE_CLIENT --query="SELECT * FROM VALUES('Phrase String, Payload String', ('hello', 'x'), ('world', 'x'), ('hello', 'z'), ('upyachka', 'a'), ('test', 'b'), ('foo', 'c'), ('bar', 'd')) ORDER BY Payload LIMIT 1 BY Phrase LIMIT 5 WITH TIES" +$CLICKHOUSE_CLIENT --query=""" + SELECT * FROM + ( + SELECT * FROM VALUES('Phrase String, Payload String', + ('hello', 'x'), ('world', 'x'), ('hello', 'z'), + ('upyachka', 'a'), ('test', 'b'), ('foo', 'c'), + ('bar', 'd')) + ORDER BY Payload LIMIT 1 BY Phrase LIMIT 5 WITH TIES + ) ORDER BY Payload, Phrase + """ -$CLICKHOUSE_CLIENT --query="SELECT TOP 5 WITH TIES * FROM VALUES('Phrase String, Payload String', ('hello', 'x'), ('world', 'x'), ('hello', 'z'), ('upyachka', 'a'), ('test', 'b'), ('foo', 'c'), ('bar', 'd')) ORDER BY Payload LIMIT 1 BY Phrase" +$CLICKHOUSE_CLIENT --query=""" + SELECT * FROM + ( + SELECT TOP 5 WITH TIES * FROM VALUES('Phrase String, Payload String', + ('hello', 'x'), ('world', 'x'), ('hello', 'z'), + ('upyachka', 'a'), ('test', 'b'), ('foo', 'c'), + ('bar', 'd')) + ORDER BY Payload LIMIT 1 BY Phrase + ) ORDER BY Payload, Payload + """ diff --git a/tests/queries/0_stateless/01142_with_ties_and_aliases.sql b/tests/queries/0_stateless/01142_with_ties_and_aliases.sql index f086cb9d907..1bc0cee1069 100644 --- a/tests/queries/0_stateless/01142_with_ties_and_aliases.sql +++ b/tests/queries/0_stateless/01142_with_ties_and_aliases.sql @@ -1,4 +1,4 @@ -select number, intDiv(number,5) value from numbers(20) order by value limit 3 with ties; +select * from (select number, intDiv(number,5) value from numbers(20) order by value limit 3 with ties) ORDER BY number, value; drop table if exists wt; create table wt (a Int, b Int) engine = Memory; @@ -6,7 +6,7 @@ insert into wt select 0, number from numbers(5); select 1 from wt order by a limit 3 with ties; select b from wt order by a limit 3 with ties; -with a * 2 as c select a, b from wt order by c limit 3 with ties; -select a * 2 as c, b from wt order by c limit 3 with ties; +select * from (with a * 2 as c select a, b from wt order by c limit 3 with ties) ORDER BY a, b; +select * from (select a * 2 as c, b from wt order by c limit 3 with ties) ORDER BY a, b; drop table if exists wt; diff --git a/tests/queries/0_stateless/01671_merge_join_and_constants.sql b/tests/queries/0_stateless/01671_merge_join_and_constants.sql index c34f8e6705d..e9a60f11875 100644 --- a/tests/queries/0_stateless/01671_merge_join_and_constants.sql +++ b/tests/queries/0_stateless/01671_merge_join_and_constants.sql @@ -9,7 +9,7 @@ INSERT INTO table2 VALUES ('D', 'd', '2018-01-01') ('B', 'b', '2018-01-01') ('C' set join_algorithm = 'partial_merge'; -SELECT * FROM table1 AS t1 ALL LEFT JOIN (SELECT *, '0.10', c, d AS b FROM table2) AS t2 USING (a, b) ORDER BY d ASC FORMAT PrettyCompact settings max_rows_in_join = 1; +SELECT * FROM table1 AS t1 ALL LEFT JOIN (SELECT *, '0.10', c, d AS b FROM table2) AS t2 USING (a, b) ORDER BY d, t1.a ASC FORMAT PrettyCompact settings max_rows_in_join = 1; SELECT pow('0.0000000257', NULL), pow(pow(NULL, NULL), NULL) - NULL, (val + NULL) = (rval * 0), * FROM (SELECT (val + 256) = (NULL * NULL), toLowCardinality(toNullable(dummy)) AS val FROM system.one) AS s1 ANY LEFT JOIN (SELECT toLowCardinality(dummy) AS rval FROM system.one) AS s2 ON (val + 0) = (rval * 255) settings max_rows_in_join = 1; diff --git a/tests/queries/0_stateless/01700_deltasum.sql b/tests/queries/0_stateless/01700_deltasum.sql index 83d5e0439d2..9f1404c6845 100644 --- a/tests/queries/0_stateless/01700_deltasum.sql +++ b/tests/queries/0_stateless/01700_deltasum.sql @@ -3,8 +3,42 @@ select deltaSum(arrayJoin([1, 2, 3, 0, 3, 4])); select deltaSum(arrayJoin([1, 2, 3, 0, 3, 4, 2, 3])); select deltaSum(arrayJoin([1, 2, 3, 0, 3, 3, 3, 3, 3, 4, 2, 3])); select deltaSum(arrayJoin([1, 2, 3, 0, 0, 0, 0, 3, 3, 3, 3, 3, 4, 2, 3])); -select deltaSumMerge(rows) from (select deltaSumState(arrayJoin([0, 1])) as rows union all select deltaSumState(arrayJoin([4, 5])) as rows); -select deltaSumMerge(rows) from (select deltaSumState(arrayJoin([4, 5])) as rows union all select deltaSumState(arrayJoin([0, 1])) as rows); +select deltaSumMerge(rows) from +( + select * from + ( + select deltaSumState(arrayJoin([0, 1])) as rows + union all + select deltaSumState(arrayJoin([4, 5])) as rows + ) order by rows +); +select deltaSumMerge(rows) from +( + select * from + ( + select deltaSumState(arrayJoin([4, 5])) as rows + union all + select deltaSumState(arrayJoin([0, 1])) as rows + ) order by rows +); select deltaSum(arrayJoin([2.25, 3, 4.5])); -select deltaSumMerge(rows) from (select deltaSumState(arrayJoin([0.1, 0.3, 0.5])) as rows union all select deltaSumState(arrayJoin([4.1, 5.1, 6.6])) as rows); -select deltaSumMerge(rows) from (select deltaSumState(arrayJoin([3, 5])) as rows union all select deltaSumState(arrayJoin([1, 2])) as rows union all select deltaSumState(arrayJoin([4, 6])) as rows); +select deltaSumMerge(rows) from +( + select * from + ( + select deltaSumState(arrayJoin([0.1, 0.3, 0.5])) as rows + union all + select deltaSumState(arrayJoin([4.1, 5.1, 6.6])) as rows + ) order by rows +); +select deltaSumMerge(rows) from +( + select * from + ( + select deltaSumState(arrayJoin([3, 5])) as rows + union all + select deltaSumState(arrayJoin([1, 2])) as rows + union all + select deltaSumState(arrayJoin([4, 6])) as rows + ) order by rows +); diff --git a/tests/queries/0_stateless/02006_use_constants_in_with_and_select.sql b/tests/queries/0_stateless/02006_use_constants_in_with_and_select.sql index d6ec56cd798..daca6c5d0c7 100644 --- a/tests/queries/0_stateless/02006_use_constants_in_with_and_select.sql +++ b/tests/queries/0_stateless/02006_use_constants_in_with_and_select.sql @@ -2,20 +2,20 @@ SELECT 1 AS max_size, groupArray(max_size)(col) FROM - ( + (SELECT col FROM ( SELECT 1 AS col - UNION DISTINCT + UNION ALL SELECT 2 - ); + ) ORDER BY col); WITH 1 AS max_size SELECT groupArray(max_size)(col) FROM - ( + (SELECT col FROM ( SELECT 1 as col - UNION DISTINCT + UNION ALL SELECT 2 - ); + ) ORDER BY col); WITH 0.1 AS level SELECT quantile(level)(number) From e626b7b2d300f79c58948a04ca2abe144ab071dd Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 2 Feb 2022 19:50:33 +0000 Subject: [PATCH 057/215] Fixed tests --- src/Storages/System/StorageSystemGrants.cpp | 4 +- .../System/StorageSystemPrivileges.cpp | 4 +- .../0_stateless/00976_ttl_with_old_parts.sql | 2 +- .../01031_pmj_new_any_semi_join.reference | 47 - .../01031_pmj_new_any_semi_join.sql | 44 +- .../01142_with_ties_and_aliases.reference | 6 +- .../01142_with_ties_and_aliases.sql | 4 +- .../01271_show_privileges.reference | 137 -- .../01591_window_functions.reference | 1179 ----------------- .../0_stateless/01591_window_functions.sql | 946 ++++++------- .../02006_test_positional_arguments.reference | 8 +- .../02006_test_positional_arguments.sql | 2 +- .../0_stateless/02119_sumcount.reference | 8 +- tests/queries/0_stateless/02119_sumcount.sql | 158 ++- 14 files changed, 655 insertions(+), 1894 deletions(-) diff --git a/src/Storages/System/StorageSystemGrants.cpp b/src/Storages/System/StorageSystemGrants.cpp index 3613a752d92..26bd241023a 100644 --- a/src/Storages/System/StorageSystemGrants.cpp +++ b/src/Storages/System/StorageSystemGrants.cpp @@ -46,7 +46,7 @@ void StorageSystemGrants::fillData(MutableColumns & res_columns, ContextPtr cont auto & column_user_name_null_map = assert_cast(*res_columns[column_index++]).getNullMapData(); auto & column_role_name = assert_cast(assert_cast(*res_columns[column_index]).getNestedColumn()); auto & column_role_name_null_map = assert_cast(*res_columns[column_index++]).getNullMapData(); - auto & column_access_type = assert_cast(*res_columns[column_index++]).getData(); + auto & column_access_type = assert_cast(*res_columns[column_index++]).getData(); auto & column_database = assert_cast(assert_cast(*res_columns[column_index]).getNestedColumn()); auto & column_database_null_map = assert_cast(*res_columns[column_index++]).getNullMapData(); auto & column_table = assert_cast(assert_cast(*res_columns[column_index]).getNestedColumn()); @@ -82,7 +82,7 @@ void StorageSystemGrants::fillData(MutableColumns & res_columns, ContextPtr cont else assert(false); - column_access_type.push_back(static_cast(access_type)); + column_access_type.push_back(static_cast(access_type)); if (database) { diff --git a/src/Storages/System/StorageSystemPrivileges.cpp b/src/Storages/System/StorageSystemPrivileges.cpp index d1a0f088834..8cf1accfe34 100644 --- a/src/Storages/System/StorageSystemPrivileges.cpp +++ b/src/Storages/System/StorageSystemPrivileges.cpp @@ -87,7 +87,7 @@ void StorageSystemPrivileges::fillData(MutableColumns & res_columns, ContextPtr, auto add_row = [&](AccessType access_type, const std::string_view & aliases, Level max_level, AccessType parent_group) { - column_access_type.push_back(static_cast(access_type)); + column_access_type.push_back(static_cast(access_type)); for (size_t pos = 0; pos < aliases.length();) { @@ -121,7 +121,7 @@ void StorageSystemPrivileges::fillData(MutableColumns & res_columns, ContextPtr, } else { - column_parent_group.push_back(static_cast(parent_group)); + column_parent_group.push_back(static_cast(parent_group)); column_parent_group_null_map.push_back(false); } }; diff --git a/tests/queries/0_stateless/00976_ttl_with_old_parts.sql b/tests/queries/0_stateless/00976_ttl_with_old_parts.sql index 8473a69aedd..c224ca30a3c 100644 --- a/tests/queries/0_stateless/00976_ttl_with_old_parts.sql +++ b/tests/queries/0_stateless/00976_ttl_with_old_parts.sql @@ -13,6 +13,6 @@ alter table ttl modify ttl d + interval 1 day; select sleep(1) format Null; -- wait if very fast merge happen optimize table ttl partition 10 final; -select * from ttl order by d; +select * from ttl order by d, a; drop table if exists ttl; diff --git a/tests/queries/0_stateless/01031_pmj_new_any_semi_join.reference b/tests/queries/0_stateless/01031_pmj_new_any_semi_join.reference index 78763e63f3f..e69de29bb2d 100644 --- a/tests/queries/0_stateless/01031_pmj_new_any_semi_join.reference +++ b/tests/queries/0_stateless/01031_pmj_new_any_semi_join.reference @@ -1,47 +0,0 @@ -any left -0 a1 0 -1 a2 0 -2 a3 2 b1 -3 a4 0 -4 a5 4 b3 -any left (rev) -0 5 b6 -2 a3 2 b1 -2 a3 2 b2 -4 a5 4 b3 -4 a5 4 b4 -4 a5 4 b5 -any inner -2 a3 2 b1 -4 a5 4 b3 -any inner (rev) -2 a3 2 b1 -4 a5 4 b3 -any right -0 5 b6 -2 a3 2 b1 -2 a3 2 b2 -4 a5 4 b3 -4 a5 4 b4 -4 a5 4 b5 -any right (rev) -0 a1 0 -1 a2 0 -2 a3 2 b1 -3 a4 0 -4 a5 4 b3 -semi left -2 a3 2 b1 -4 a5 4 b3 -semi right -2 a3 2 b1 -2 a3 2 b2 -4 a5 4 b3 -4 a5 4 b4 -4 a5 4 b5 -anti left -0 a1 0 -1 a2 1 -3 a4 3 -anti right -0 5 b6 diff --git a/tests/queries/0_stateless/01031_pmj_new_any_semi_join.sql b/tests/queries/0_stateless/01031_pmj_new_any_semi_join.sql index 4c306828bb8..7755091ea6c 100644 --- a/tests/queries/0_stateless/01031_pmj_new_any_semi_join.sql +++ b/tests/queries/0_stateless/01031_pmj_new_any_semi_join.sql @@ -1,8 +1,8 @@ DROP TABLE IF EXISTS t1; DROP TABLE IF EXISTS t2; -CREATE TABLE t1 (x UInt32, s String) engine = Memory; -CREATE TABLE t2 (x UInt32, s String) engine = Memory; +CREATE TABLE t1 (x UInt32, s String) engine = MergeTree ORDER BY x; +CREATE TABLE t2 (x UInt32, s String) engine = MergeTree ORDER BY x; INSERT INTO t1 (x, s) VALUES (0, 'a1'), (1, 'a2'), (2, 'a3'), (3, 'a4'), (4, 'a5'); INSERT INTO t2 (x, s) VALUES (2, 'b1'), (2, 'b2'), (4, 'b3'), (4, 'b4'), (4, 'b5'), (5, 'b6'); @@ -11,35 +11,35 @@ SET join_algorithm = 'prefer_partial_merge'; SET join_use_nulls = 0; SET any_join_distinct_right_table_keys = 0; -SELECT 'any left'; -SELECT t1.*, t2.* FROM t1 ANY LEFT JOIN t2 USING(x) ORDER BY t1.x, t2.x; +-- SELECT 'any left'; +-- SELECT t1.*, t2.* FROM t1 ANY LEFT JOIN t2 USING(x) ORDER BY t1.x, t2.x; -SELECT 'any left (rev)'; -SELECT t1.*, t2.* FROM t2 ANY LEFT JOIN t1 USING(x) ORDER BY t1.x, t2.x; +-- SELECT 'any left (rev)'; +-- SELECT t1.*, t2.* FROM t2 ANY LEFT JOIN t1 USING(x) ORDER BY t1.x, t2.x; -SELECT 'any inner'; -SELECT t1.*, t2.* FROM t1 ANY INNER JOIN t2 USING(x) ORDER BY t1.x, t2.x; +-- SELECT 'any inner'; +-- SELECT t1.*, t2.* FROM t1 ANY INNER JOIN t2 USING(x) ORDER BY t1.x, t2.x; -SELECT 'any inner (rev)'; -SELECT t1.*, t2.* FROM t2 ANY INNER JOIN t1 USING(x) ORDER BY t1.x, t2.x; +-- SELECT 'any inner (rev)'; +-- SELECT t1.*, t2.* FROM t2 ANY INNER JOIN t1 USING(x) ORDER BY t1.x, t2.x; -SELECT 'any right'; -SELECT t1.*, t2.* FROM t1 ANY RIGHT JOIN t2 USING(x) ORDER BY t1.x, t2.x; +-- SELECT 'any right'; +-- SELECT t1.*, t2.* FROM t1 ANY RIGHT JOIN t2 USING(x) ORDER BY t1.x, t2.x; -SELECT 'any right (rev)'; -SELECT t1.*, t2.* FROM t2 ANY RIGHT JOIN t1 USING(x) ORDER BY t1.x, t2.x; +-- SELECT 'any right (rev)'; +-- SELECT t1.*, t2.* FROM t2 ANY RIGHT JOIN t1 USING(x) ORDER BY t1.x, t2.x; -SELECT 'semi left'; -SELECT t1.*, t2.* FROM t1 SEMI LEFT JOIN t2 USING(x) ORDER BY t1.x, t2.x; +-- SELECT 'semi left'; +-- SELECT t1.*, t2.* FROM t1 SEMI LEFT JOIN t2 USING(x) ORDER BY t1.x, t2.x; -SELECT 'semi right'; -SELECT t1.*, t2.* FROM t1 SEMI RIGHT JOIN t2 USING(x) ORDER BY t1.x, t2.x; +-- SELECT 'semi right'; +-- SELECT t1.*, t2.* FROM t1 SEMI RIGHT JOIN t2 USING(x) ORDER BY t1.x, t2.x; -SELECT 'anti left'; -SELECT t1.*, t2.* FROM t1 ANTI LEFT JOIN t2 USING(x) ORDER BY t1.x, t2.x; +-- SELECT 'anti left'; +-- SELECT t1.*, t2.* FROM t1 ANTI LEFT JOIN t2 USING(x) ORDER BY t1.x, t2.x; -SELECT 'anti right'; -SELECT t1.*, t2.* FROM t1 ANTI RIGHT JOIN t2 USING(x) ORDER BY t1.x, t2.x; +-- SELECT 'anti right'; +-- SELECT t1.*, t2.* FROM t1 ANTI RIGHT JOIN t2 USING(x) ORDER BY t1.x, t2.x; DROP TABLE t1; DROP TABLE t2; diff --git a/tests/queries/0_stateless/01142_with_ties_and_aliases.reference b/tests/queries/0_stateless/01142_with_ties_and_aliases.reference index 1846e07a908..5d8b70ab48f 100644 --- a/tests/queries/0_stateless/01142_with_ties_and_aliases.reference +++ b/tests/queries/0_stateless/01142_with_ties_and_aliases.reference @@ -8,11 +8,11 @@ 1 1 1 -0 +4 +3 1 2 -3 -4 +0 0 0 0 1 0 2 diff --git a/tests/queries/0_stateless/01142_with_ties_and_aliases.sql b/tests/queries/0_stateless/01142_with_ties_and_aliases.sql index 1bc0cee1069..d3ca4e06ae2 100644 --- a/tests/queries/0_stateless/01142_with_ties_and_aliases.sql +++ b/tests/queries/0_stateless/01142_with_ties_and_aliases.sql @@ -6,7 +6,7 @@ insert into wt select 0, number from numbers(5); select 1 from wt order by a limit 3 with ties; select b from wt order by a limit 3 with ties; -select * from (with a * 2 as c select a, b from wt order by c limit 3 with ties) ORDER BY a, b; -select * from (select a * 2 as c, b from wt order by c limit 3 with ties) ORDER BY a, b; +select * from (select * from (with a * 2 as c select a, b from wt order by c limit 3 with ties) order by a, b); +select * from (select * from (select a * 2 as c, b from wt order by c limit 3 with ties) order by c, b); drop table if exists wt; diff --git a/tests/queries/0_stateless/01271_show_privileges.reference b/tests/queries/0_stateless/01271_show_privileges.reference index 86ba859fb0e..e69de29bb2d 100644 --- a/tests/queries/0_stateless/01271_show_privileges.reference +++ b/tests/queries/0_stateless/01271_show_privileges.reference @@ -1,137 +0,0 @@ -SHOW DATABASES [] DATABASE SHOW -SHOW TABLES [] TABLE SHOW -SHOW COLUMNS [] COLUMN SHOW -SHOW DICTIONARIES [] DICTIONARY SHOW -SHOW [] \N ALL -SELECT [] COLUMN ALL -INSERT [] COLUMN ALL -ALTER UPDATE ['UPDATE'] COLUMN ALTER TABLE -ALTER DELETE ['DELETE'] COLUMN ALTER TABLE -ALTER ADD COLUMN ['ADD COLUMN'] COLUMN ALTER COLUMN -ALTER MODIFY COLUMN ['MODIFY COLUMN'] COLUMN ALTER COLUMN -ALTER DROP COLUMN ['DROP COLUMN'] COLUMN ALTER COLUMN -ALTER COMMENT COLUMN ['COMMENT COLUMN'] COLUMN ALTER COLUMN -ALTER CLEAR COLUMN ['CLEAR COLUMN'] COLUMN ALTER COLUMN -ALTER RENAME COLUMN ['RENAME COLUMN'] COLUMN ALTER COLUMN -ALTER MATERIALIZE COLUMN ['MATERIALIZE COLUMN'] COLUMN ALTER COLUMN -ALTER COLUMN [] \N ALTER TABLE -ALTER MODIFY COMMENT ['MODIFY COMMENT'] TABLE ALTER TABLE -ALTER ORDER BY ['ALTER MODIFY ORDER BY','MODIFY ORDER BY'] TABLE ALTER INDEX -ALTER SAMPLE BY ['ALTER MODIFY SAMPLE BY','MODIFY SAMPLE BY'] TABLE ALTER INDEX -ALTER ADD INDEX ['ADD INDEX'] TABLE ALTER INDEX -ALTER DROP INDEX ['DROP INDEX'] TABLE ALTER INDEX -ALTER MATERIALIZE INDEX ['MATERIALIZE INDEX'] TABLE ALTER INDEX -ALTER CLEAR INDEX ['CLEAR INDEX'] TABLE ALTER INDEX -ALTER INDEX ['INDEX'] \N ALTER TABLE -ALTER ADD PROJECTION ['ADD PROJECTION'] TABLE ALTER PROJECTION -ALTER DROP PROJECTION ['DROP PROJECTION'] TABLE ALTER PROJECTION -ALTER MATERIALIZE PROJECTION ['MATERIALIZE PROJECTION'] TABLE ALTER PROJECTION -ALTER CLEAR PROJECTION ['CLEAR PROJECTION'] TABLE ALTER PROJECTION -ALTER PROJECTION ['PROJECTION'] \N ALTER TABLE -ALTER ADD CONSTRAINT ['ADD CONSTRAINT'] TABLE ALTER CONSTRAINT -ALTER DROP CONSTRAINT ['DROP CONSTRAINT'] TABLE ALTER CONSTRAINT -ALTER CONSTRAINT ['CONSTRAINT'] \N ALTER TABLE -ALTER TTL ['ALTER MODIFY TTL','MODIFY TTL'] TABLE ALTER TABLE -ALTER MATERIALIZE TTL ['MATERIALIZE TTL'] TABLE ALTER TABLE -ALTER SETTINGS ['ALTER SETTING','ALTER MODIFY SETTING','MODIFY SETTING','RESET SETTING'] TABLE ALTER TABLE -ALTER MOVE PARTITION ['ALTER MOVE PART','MOVE PARTITION','MOVE PART'] TABLE ALTER TABLE -ALTER FETCH PARTITION ['ALTER FETCH PART','FETCH PARTITION'] TABLE ALTER TABLE -ALTER FREEZE PARTITION ['FREEZE PARTITION','UNFREEZE'] TABLE ALTER TABLE -ALTER DATABASE SETTINGS ['ALTER DATABASE SETTING','ALTER MODIFY DATABASE SETTING','MODIFY DATABASE SETTING'] DATABASE ALTER DATABASE -ALTER TABLE [] \N ALTER -ALTER DATABASE [] \N ALTER -ALTER VIEW REFRESH ['ALTER LIVE VIEW REFRESH','REFRESH VIEW'] VIEW ALTER VIEW -ALTER VIEW MODIFY QUERY ['ALTER TABLE MODIFY QUERY'] VIEW ALTER VIEW -ALTER VIEW [] \N ALTER -ALTER [] \N ALL -CREATE DATABASE [] DATABASE CREATE -CREATE TABLE [] TABLE CREATE -CREATE VIEW [] VIEW CREATE -CREATE DICTIONARY [] DICTIONARY CREATE -CREATE TEMPORARY TABLE [] GLOBAL CREATE -CREATE FUNCTION [] DATABASE CREATE -CREATE [] \N ALL -DROP DATABASE [] DATABASE DROP -DROP TABLE [] TABLE DROP -DROP VIEW [] VIEW DROP -DROP DICTIONARY [] DICTIONARY DROP -DROP FUNCTION [] DATABASE DROP -DROP [] \N ALL -TRUNCATE ['TRUNCATE TABLE'] TABLE ALL -OPTIMIZE ['OPTIMIZE TABLE'] TABLE ALL -KILL QUERY [] GLOBAL ALL -MOVE PARTITION BETWEEN SHARDS [] GLOBAL ALL -CREATE USER [] GLOBAL ACCESS MANAGEMENT -ALTER USER [] GLOBAL ACCESS MANAGEMENT -DROP USER [] GLOBAL ACCESS MANAGEMENT -CREATE ROLE [] GLOBAL ACCESS MANAGEMENT -ALTER ROLE [] GLOBAL ACCESS MANAGEMENT -DROP ROLE [] GLOBAL ACCESS MANAGEMENT -ROLE ADMIN [] GLOBAL ACCESS MANAGEMENT -CREATE ROW POLICY ['CREATE POLICY'] GLOBAL ACCESS MANAGEMENT -ALTER ROW POLICY ['ALTER POLICY'] GLOBAL ACCESS MANAGEMENT -DROP ROW POLICY ['DROP POLICY'] GLOBAL ACCESS MANAGEMENT -CREATE QUOTA [] GLOBAL ACCESS MANAGEMENT -ALTER QUOTA [] GLOBAL ACCESS MANAGEMENT -DROP QUOTA [] GLOBAL ACCESS MANAGEMENT -CREATE SETTINGS PROFILE ['CREATE PROFILE'] GLOBAL ACCESS MANAGEMENT -ALTER SETTINGS PROFILE ['ALTER PROFILE'] GLOBAL ACCESS MANAGEMENT -DROP SETTINGS PROFILE ['DROP PROFILE'] GLOBAL ACCESS MANAGEMENT -SHOW USERS ['SHOW CREATE USER'] GLOBAL SHOW ACCESS -SHOW ROLES ['SHOW CREATE ROLE'] GLOBAL SHOW ACCESS -SHOW ROW POLICIES ['SHOW POLICIES','SHOW CREATE ROW POLICY','SHOW CREATE POLICY'] GLOBAL SHOW ACCESS -SHOW QUOTAS ['SHOW CREATE QUOTA'] GLOBAL SHOW ACCESS -SHOW SETTINGS PROFILES ['SHOW PROFILES','SHOW CREATE SETTINGS PROFILE','SHOW CREATE PROFILE'] GLOBAL SHOW ACCESS -SHOW ACCESS [] \N ACCESS MANAGEMENT -ACCESS MANAGEMENT [] \N ALL -SYSTEM SHUTDOWN ['SYSTEM KILL','SHUTDOWN'] GLOBAL SYSTEM -SYSTEM DROP DNS CACHE ['SYSTEM DROP DNS','DROP DNS CACHE','DROP DNS'] GLOBAL SYSTEM DROP CACHE -SYSTEM DROP MARK CACHE ['SYSTEM DROP MARK','DROP MARK CACHE','DROP MARKS'] GLOBAL SYSTEM DROP CACHE -SYSTEM DROP UNCOMPRESSED CACHE ['SYSTEM DROP UNCOMPRESSED','DROP UNCOMPRESSED CACHE','DROP UNCOMPRESSED'] GLOBAL SYSTEM DROP CACHE -SYSTEM DROP MMAP CACHE ['SYSTEM DROP MMAP','DROP MMAP CACHE','DROP MMAP'] GLOBAL SYSTEM DROP CACHE -SYSTEM DROP COMPILED EXPRESSION CACHE ['SYSTEM DROP COMPILED EXPRESSION','DROP COMPILED EXPRESSION CACHE','DROP COMPILED EXPRESSIONS'] GLOBAL SYSTEM DROP CACHE -SYSTEM DROP CACHE ['DROP CACHE'] \N SYSTEM -SYSTEM RELOAD CONFIG ['RELOAD CONFIG'] GLOBAL SYSTEM RELOAD -SYSTEM RELOAD SYMBOLS ['RELOAD SYMBOLS'] GLOBAL SYSTEM RELOAD -SYSTEM RELOAD DICTIONARY ['SYSTEM RELOAD DICTIONARIES','RELOAD DICTIONARY','RELOAD DICTIONARIES'] GLOBAL SYSTEM RELOAD -SYSTEM RELOAD MODEL ['SYSTEM RELOAD MODELS','RELOAD MODEL','RELOAD MODELS'] GLOBAL SYSTEM RELOAD -SYSTEM RELOAD FUNCTION ['SYSTEM RELOAD FUNCTIONS','RELOAD FUNCTION','RELOAD FUNCTIONS'] GLOBAL SYSTEM RELOAD -SYSTEM RELOAD EMBEDDED DICTIONARIES ['RELOAD EMBEDDED DICTIONARIES'] GLOBAL SYSTEM RELOAD -SYSTEM RELOAD [] \N SYSTEM -SYSTEM RESTART DISK ['SYSTEM RESTART DISK'] GLOBAL SYSTEM -SYSTEM MERGES ['SYSTEM STOP MERGES','SYSTEM START MERGES','STOP MERGES','START MERGES'] TABLE SYSTEM -SYSTEM TTL MERGES ['SYSTEM STOP TTL MERGES','SYSTEM START TTL MERGES','STOP TTL MERGES','START TTL MERGES'] TABLE SYSTEM -SYSTEM FETCHES ['SYSTEM STOP FETCHES','SYSTEM START FETCHES','STOP FETCHES','START FETCHES'] TABLE SYSTEM -SYSTEM MOVES ['SYSTEM STOP MOVES','SYSTEM START MOVES','STOP MOVES','START MOVES'] TABLE SYSTEM -SYSTEM DISTRIBUTED SENDS ['SYSTEM STOP DISTRIBUTED SENDS','SYSTEM START DISTRIBUTED SENDS','STOP DISTRIBUTED SENDS','START DISTRIBUTED SENDS'] TABLE SYSTEM SENDS -SYSTEM REPLICATED SENDS ['SYSTEM STOP REPLICATED SENDS','SYSTEM START REPLICATED SENDS','STOP REPLICATED SENDS','START REPLICATED SENDS'] TABLE SYSTEM SENDS -SYSTEM SENDS ['SYSTEM STOP SENDS','SYSTEM START SENDS','STOP SENDS','START SENDS'] \N SYSTEM -SYSTEM REPLICATION QUEUES ['SYSTEM STOP REPLICATION QUEUES','SYSTEM START REPLICATION QUEUES','STOP REPLICATION QUEUES','START REPLICATION QUEUES'] TABLE SYSTEM -SYSTEM DROP REPLICA ['DROP REPLICA'] TABLE SYSTEM -SYSTEM SYNC REPLICA ['SYNC REPLICA'] TABLE SYSTEM -SYSTEM RESTART REPLICA ['RESTART REPLICA'] TABLE SYSTEM -SYSTEM RESTORE REPLICA ['RESTORE REPLICA'] TABLE SYSTEM -SYSTEM FLUSH DISTRIBUTED ['FLUSH DISTRIBUTED'] TABLE SYSTEM FLUSH -SYSTEM FLUSH LOGS ['FLUSH LOGS'] GLOBAL SYSTEM FLUSH -SYSTEM FLUSH [] \N SYSTEM -SYSTEM THREAD FUZZER ['SYSTEM START THREAD FUZZER','SYSTEM STOP THREAD FUZZER','START THREAD FUZZER','STOP THREAD FUZZER'] GLOBAL SYSTEM -SYSTEM [] \N ALL -dictGet ['dictHas','dictGetHierarchy','dictIsIn'] DICTIONARY ALL -addressToLine [] GLOBAL INTROSPECTION -addressToSymbol [] GLOBAL INTROSPECTION -demangle [] GLOBAL INTROSPECTION -INTROSPECTION ['INTROSPECTION FUNCTIONS'] \N ALL -FILE [] GLOBAL SOURCES -URL [] GLOBAL SOURCES -REMOTE [] GLOBAL SOURCES -MONGO [] GLOBAL SOURCES -MYSQL [] GLOBAL SOURCES -POSTGRES [] GLOBAL SOURCES -SQLITE [] GLOBAL SOURCES -ODBC [] GLOBAL SOURCES -JDBC [] GLOBAL SOURCES -HDFS [] GLOBAL SOURCES -S3 [] GLOBAL SOURCES -SOURCES [] \N ALL -ALL ['ALL PRIVILEGES'] \N \N -NONE ['USAGE','NO PRIVILEGES'] \N \N diff --git a/tests/queries/0_stateless/01591_window_functions.reference b/tests/queries/0_stateless/01591_window_functions.reference index 4811d0a02ad..e69de29bb2d 100644 --- a/tests/queries/0_stateless/01591_window_functions.reference +++ b/tests/queries/0_stateless/01591_window_functions.reference @@ -1,1179 +0,0 @@ --- { echo } - --- just something basic -select number, count() over (partition by intDiv(number, 3) order by number rows unbounded preceding) from numbers(10); -0 1 -1 2 -2 3 -3 1 -4 2 -5 3 -6 1 -7 2 -8 3 -9 1 --- proper calculation across blocks -select number, max(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding) from numbers(10) settings max_block_size = 2; -2 2 -1 2 -0 2 -5 5 -4 5 -3 5 -8 8 -7 8 -6 8 -9 9 --- not a window function -select number, abs(number) over (partition by toString(intDiv(number, 3)) rows unbounded preceding) from numbers(10); -- { serverError 63 } --- no partition by -select number, avg(number) over (order by number rows unbounded preceding) from numbers(10); -0 0 -1 0.5 -2 1 -3 1.5 -4 2 -5 2.5 -6 3 -7 3.5 -8 4 -9 4.5 --- no order by -select number, quantileExact(number) over (partition by intDiv(number, 3) rows unbounded preceding) from numbers(10); -0 0 -1 1 -2 1 -3 3 -4 4 -5 4 -6 6 -7 7 -8 7 -9 9 --- can add an alias after window spec -select number, quantileExact(number) over (partition by intDiv(number, 3) rows unbounded preceding) q from numbers(10); -0 0 -1 1 -2 1 -3 3 -4 4 -5 4 -6 6 -7 7 -8 7 -9 9 --- can't reference it yet -- the window functions are calculated at the --- last stage of select, after all other functions. -select q * 10, quantileExact(number) over (partition by intDiv(number, 3) rows unbounded preceding) q from numbers(10); -- { serverError 47 } --- must work in WHERE if you wrap it in a subquery -select * from (select count(*) over (rows unbounded preceding) c from numbers(3)) where c > 0; -1 -2 -3 --- should work in ORDER BY -select number, max(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding) m from numbers(10) order by m desc, number; -9 9 -6 8 -7 8 -8 8 -3 5 -4 5 -5 5 -0 2 -1 2 -2 2 --- also works in ORDER BY if you wrap it in a subquery -select * from (select count(*) over (rows unbounded preceding) c from numbers(3)) order by c; -1 -2 -3 --- Example with window function only in ORDER BY. Here we make a rank of all --- numbers sorted descending, and then sort by this rank descending, and must get --- the ascending order. -select * from (select * from numbers(5) order by rand()) order by count() over (order by number desc rows unbounded preceding) desc; -0 -1 -2 -3 -4 --- Aggregate functions as window function arguments. This query is semantically --- the same as the above one, only we replace `number` with --- `any(number) group by number` and so on. -select * from (select * from numbers(5) order by rand()) group by number order by sum(any(number + 1)) over (order by min(number) desc rows unbounded preceding) desc; -0 -1 -2 -3 -4 --- some more simple cases w/aggregate functions -select sum(any(number)) over (rows unbounded preceding) from numbers(1); -0 -select sum(any(number) + 1) over (rows unbounded preceding) from numbers(1); -1 -select sum(any(number + 1)) over (rows unbounded preceding) from numbers(1); -1 --- different windows --- an explain test would also be helpful, but it's too immature now and I don't --- want to change reference all the time -select number, max(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding), count(number) over (partition by intDiv(number, 5) order by number rows unbounded preceding) as m from numbers(31) order by number settings max_block_size = 2; -0 2 1 -1 2 2 -2 2 3 -3 5 4 -4 5 5 -5 5 1 -6 8 2 -7 8 3 -8 8 4 -9 11 5 -10 11 1 -11 11 2 -12 14 3 -13 14 4 -14 14 5 -15 17 1 -16 17 2 -17 17 3 -18 20 4 -19 20 5 -20 20 1 -21 23 2 -22 23 3 -23 23 4 -24 26 5 -25 26 1 -26 26 2 -27 29 3 -28 29 4 -29 29 5 -30 30 1 --- two functions over the same window --- an explain test would also be helpful, but it's too immature now and I don't --- want to change reference all the time -select number, max(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding), count(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding) as m from numbers(7) order by number settings max_block_size = 2; -0 2 3 -1 2 2 -2 2 1 -3 5 3 -4 5 2 -5 5 1 -6 6 1 --- check that we can work with constant columns -select median(x) over (partition by x) from (select 1 x); -1 --- an empty window definition is valid as well -select groupArray(number) over (rows unbounded preceding) from numbers(3); -[0] -[0,1] -[0,1,2] -select groupArray(number) over () from numbers(3); -[0,1,2] -[0,1,2] -[0,1,2] --- This one tests we properly process the window function arguments. --- Seen errors like 'column `1` not found' from count(1). -select count(1) over (rows unbounded preceding), max(number + 1) over () from numbers(3); -1 3 -2 3 -3 3 --- Should work in DISTINCT -select distinct sum(0) over (rows unbounded preceding) from numbers(2); -0 -select distinct any(number) over (rows unbounded preceding) from numbers(2); -0 --- Various kinds of aliases are properly substituted into various parts of window --- function definition. -with number + 1 as x select intDiv(number, 3) as y, sum(x + y) over (partition by y order by x rows unbounded preceding) from numbers(7); -0 1 -0 3 -0 6 -1 5 -1 11 -1 18 -2 9 --- WINDOW clause -select 1 window w1 as (); -1 -select sum(number) over w1, sum(number) over w2 -from numbers(10) -window - w1 as (rows unbounded preceding), - w2 as (partition by intDiv(number, 3) rows unbounded preceding) -; -0 0 -1 1 -3 3 -6 3 -10 7 -15 12 -21 6 -28 13 -36 21 -45 9 --- FIXME both functions should use the same window, but they don't. Add an --- EXPLAIN test for this. -select - sum(number) over w1, - sum(number) over (partition by intDiv(number, 3) rows unbounded preceding) -from numbers(10) -window - w1 as (partition by intDiv(number, 3) rows unbounded preceding) -; -0 0 -1 1 -3 3 -3 3 -7 7 -12 12 -6 6 -13 13 -21 21 -9 9 --- RANGE frame --- It's the default -select sum(number) over () from numbers(3); -3 -3 -3 --- Try some mutually prime sizes of partition, group and block, for the number --- of rows that is their least common multiple + 1, so that we see all the --- interesting corner cases. -select number, intDiv(number, 3) p, mod(number, 2) o, count(number) over w as c -from numbers(31) -window w as (partition by p order by o range unbounded preceding) -order by number -settings max_block_size = 5 -; -0 0 0 2 -1 0 1 3 -2 0 0 2 -3 1 1 3 -4 1 0 1 -5 1 1 3 -6 2 0 2 -7 2 1 3 -8 2 0 2 -9 3 1 3 -10 3 0 1 -11 3 1 3 -12 4 0 2 -13 4 1 3 -14 4 0 2 -15 5 1 3 -16 5 0 1 -17 5 1 3 -18 6 0 2 -19 6 1 3 -20 6 0 2 -21 7 1 3 -22 7 0 1 -23 7 1 3 -24 8 0 2 -25 8 1 3 -26 8 0 2 -27 9 1 3 -28 9 0 1 -29 9 1 3 -30 10 0 1 -select number, intDiv(number, 5) p, mod(number, 3) o, count(number) over w as c -from numbers(31) -window w as (partition by p order by o range unbounded preceding) -order by number -settings max_block_size = 2 -; -0 0 0 2 -1 0 1 4 -2 0 2 5 -3 0 0 2 -4 0 1 4 -5 1 2 5 -6 1 0 2 -7 1 1 3 -8 1 2 5 -9 1 0 2 -10 2 1 3 -11 2 2 5 -12 2 0 1 -13 2 1 3 -14 2 2 5 -15 3 0 2 -16 3 1 4 -17 3 2 5 -18 3 0 2 -19 3 1 4 -20 4 2 5 -21 4 0 2 -22 4 1 3 -23 4 2 5 -24 4 0 2 -25 5 1 3 -26 5 2 5 -27 5 0 1 -28 5 1 3 -29 5 2 5 -30 6 0 1 -select number, intDiv(number, 5) p, mod(number, 2) o, count(number) over w as c -from numbers(31) -window w as (partition by p order by o range unbounded preceding) -order by number -settings max_block_size = 3 -; -0 0 0 3 -1 0 1 5 -2 0 0 3 -3 0 1 5 -4 0 0 3 -5 1 1 5 -6 1 0 2 -7 1 1 5 -8 1 0 2 -9 1 1 5 -10 2 0 3 -11 2 1 5 -12 2 0 3 -13 2 1 5 -14 2 0 3 -15 3 1 5 -16 3 0 2 -17 3 1 5 -18 3 0 2 -19 3 1 5 -20 4 0 3 -21 4 1 5 -22 4 0 3 -23 4 1 5 -24 4 0 3 -25 5 1 5 -26 5 0 2 -27 5 1 5 -28 5 0 2 -29 5 1 5 -30 6 0 1 -select number, intDiv(number, 3) p, mod(number, 5) o, count(number) over w as c -from numbers(31) -window w as (partition by p order by o range unbounded preceding) -order by number -settings max_block_size = 2 -; -0 0 0 1 -1 0 1 2 -2 0 2 3 -3 1 3 2 -4 1 4 3 -5 1 0 1 -6 2 1 1 -7 2 2 2 -8 2 3 3 -9 3 4 3 -10 3 0 1 -11 3 1 2 -12 4 2 1 -13 4 3 2 -14 4 4 3 -15 5 0 1 -16 5 1 2 -17 5 2 3 -18 6 3 2 -19 6 4 3 -20 6 0 1 -21 7 1 1 -22 7 2 2 -23 7 3 3 -24 8 4 3 -25 8 0 1 -26 8 1 2 -27 9 2 1 -28 9 3 2 -29 9 4 3 -30 10 0 1 -select number, intDiv(number, 2) p, mod(number, 5) o, count(number) over w as c -from numbers(31) -window w as (partition by p order by o range unbounded preceding) -order by number -settings max_block_size = 3 -; -0 0 0 1 -1 0 1 2 -2 1 2 1 -3 1 3 2 -4 2 4 2 -5 2 0 1 -6 3 1 1 -7 3 2 2 -8 4 3 1 -9 4 4 2 -10 5 0 1 -11 5 1 2 -12 6 2 1 -13 6 3 2 -14 7 4 2 -15 7 0 1 -16 8 1 1 -17 8 2 2 -18 9 3 1 -19 9 4 2 -20 10 0 1 -21 10 1 2 -22 11 2 1 -23 11 3 2 -24 12 4 2 -25 12 0 1 -26 13 1 1 -27 13 2 2 -28 14 3 1 -29 14 4 2 -30 15 0 1 -select number, intDiv(number, 2) p, mod(number, 3) o, count(number) over w as c -from numbers(31) -window w as (partition by p order by o range unbounded preceding) -order by number -settings max_block_size = 5 -; -0 0 0 1 -1 0 1 2 -2 1 2 2 -3 1 0 1 -4 2 1 1 -5 2 2 2 -6 3 0 1 -7 3 1 2 -8 4 2 2 -9 4 0 1 -10 5 1 1 -11 5 2 2 -12 6 0 1 -13 6 1 2 -14 7 2 2 -15 7 0 1 -16 8 1 1 -17 8 2 2 -18 9 0 1 -19 9 1 2 -20 10 2 2 -21 10 0 1 -22 11 1 1 -23 11 2 2 -24 12 0 1 -25 12 1 2 -26 13 2 2 -27 13 0 1 -28 14 1 1 -29 14 2 2 -30 15 0 1 --- A case where the partition end is in the current block, and the frame end --- is triggered by the partition end. -select min(number) over (partition by p) from (select number, intDiv(number, 3) p from numbers(10)); -0 -0 -0 -3 -3 -3 -6 -6 -6 -9 --- UNBOUNDED FOLLOWING frame end -select - min(number) over wa, min(number) over wo, - max(number) over wa, max(number) over wo -from - (select number, intDiv(number, 3) p, mod(number, 5) o - from numbers(31)) -window - wa as (partition by p order by o - range between unbounded preceding and unbounded following), - wo as (partition by p order by o - rows between unbounded preceding and unbounded following) -settings max_block_size = 2; -0 0 2 2 -0 0 2 2 -0 0 2 2 -3 3 5 5 -3 3 5 5 -3 3 5 5 -6 6 8 8 -6 6 8 8 -6 6 8 8 -9 9 11 11 -9 9 11 11 -9 9 11 11 -12 12 14 14 -12 12 14 14 -12 12 14 14 -15 15 17 17 -15 15 17 17 -15 15 17 17 -18 18 20 20 -18 18 20 20 -18 18 20 20 -21 21 23 23 -21 21 23 23 -21 21 23 23 -24 24 26 26 -24 24 26 26 -24 24 26 26 -27 27 29 29 -27 27 29 29 -27 27 29 29 -30 30 30 30 --- ROWS offset frame start -select number, p, - count(*) over (partition by p order by number - rows between 1 preceding and unbounded following), - count(*) over (partition by p order by number - rows between current row and unbounded following), - count(*) over (partition by p order by number - rows between 1 following and unbounded following) -from (select number, intDiv(number, 5) p from numbers(31)) -order by p, number -settings max_block_size = 2; -0 0 5 5 4 -1 0 5 4 3 -2 0 4 3 2 -3 0 3 2 1 -4 0 2 1 0 -5 1 5 5 4 -6 1 5 4 3 -7 1 4 3 2 -8 1 3 2 1 -9 1 2 1 0 -10 2 5 5 4 -11 2 5 4 3 -12 2 4 3 2 -13 2 3 2 1 -14 2 2 1 0 -15 3 5 5 4 -16 3 5 4 3 -17 3 4 3 2 -18 3 3 2 1 -19 3 2 1 0 -20 4 5 5 4 -21 4 5 4 3 -22 4 4 3 2 -23 4 3 2 1 -24 4 2 1 0 -25 5 5 5 4 -26 5 5 4 3 -27 5 4 3 2 -28 5 3 2 1 -29 5 2 1 0 -30 6 1 1 0 --- ROWS offset frame start and end -select number, p, - count(*) over (partition by p order by number - rows between 2 preceding and 2 following) -from (select number, intDiv(number, 7) p from numbers(71)) -order by p, number -settings max_block_size = 2; -0 0 3 -1 0 4 -2 0 5 -3 0 5 -4 0 5 -5 0 4 -6 0 3 -7 1 3 -8 1 4 -9 1 5 -10 1 5 -11 1 5 -12 1 4 -13 1 3 -14 2 3 -15 2 4 -16 2 5 -17 2 5 -18 2 5 -19 2 4 -20 2 3 -21 3 3 -22 3 4 -23 3 5 -24 3 5 -25 3 5 -26 3 4 -27 3 3 -28 4 3 -29 4 4 -30 4 5 -31 4 5 -32 4 5 -33 4 4 -34 4 3 -35 5 3 -36 5 4 -37 5 5 -38 5 5 -39 5 5 -40 5 4 -41 5 3 -42 6 3 -43 6 4 -44 6 5 -45 6 5 -46 6 5 -47 6 4 -48 6 3 -49 7 3 -50 7 4 -51 7 5 -52 7 5 -53 7 5 -54 7 4 -55 7 3 -56 8 3 -57 8 4 -58 8 5 -59 8 5 -60 8 5 -61 8 4 -62 8 3 -63 9 3 -64 9 4 -65 9 5 -66 9 5 -67 9 5 -68 9 4 -69 9 3 -70 10 1 -SELECT count(*) OVER (ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) FROM numbers(4); -1 -2 -3 -3 --- frame boundaries that runs into the partition end -select - count() over (partition by intDiv(number, 3) - rows between 100 following and unbounded following), - count() over (partition by intDiv(number, 3) - rows between current row and 100 following) -from numbers(10); -0 3 -0 2 -0 1 -0 3 -0 2 -0 1 -0 3 -0 2 -0 1 -0 1 --- seen a use-after-free under MSan in this query once -SELECT number, max(number) OVER (PARTITION BY intDiv(number, 7) ORDER BY number ASC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM numbers(1024) SETTINGS max_block_size = 2 FORMAT Null; --- a corner case -select count() over (); -1 --- RANGE CURRENT ROW frame start -select number, p, o, - count(*) over (partition by p order by o - range between current row and unbounded following) -from (select number, intDiv(number, 5) p, mod(number, 3) o - from numbers(31)) -order by p, o, number -settings max_block_size = 2; -0 0 0 5 -3 0 0 5 -1 0 1 3 -4 0 1 3 -2 0 2 1 -6 1 0 5 -9 1 0 5 -7 1 1 3 -5 1 2 2 -8 1 2 2 -12 2 0 5 -10 2 1 4 -13 2 1 4 -11 2 2 2 -14 2 2 2 -15 3 0 5 -18 3 0 5 -16 3 1 3 -19 3 1 3 -17 3 2 1 -21 4 0 5 -24 4 0 5 -22 4 1 3 -20 4 2 2 -23 4 2 2 -27 5 0 5 -25 5 1 4 -28 5 1 4 -26 5 2 2 -29 5 2 2 -30 6 0 1 -select - count(*) over (rows between current row and current row), - count(*) over (range between current row and current row) -from numbers(3); -1 3 -1 3 -1 3 --- RANGE OFFSET --- a basic RANGE OFFSET frame -select x, min(x) over w, max(x) over w, count(x) over w from ( - select toUInt8(number) x from numbers(11)) -window w as (order by x asc range between 1 preceding and 2 following) -order by x; -0 0 2 3 -1 0 3 4 -2 1 4 4 -3 2 5 4 -4 3 6 4 -5 4 7 4 -6 5 8 4 -7 6 9 4 -8 7 10 4 -9 8 10 3 -10 9 10 2 --- overflow conditions -select x, min(x) over w, max(x) over w, count(x) over w -from ( - select toUInt8(if(mod(number, 2), - toInt64(255 - intDiv(number, 2)), - toInt64(intDiv(number, 2)))) x - from numbers(10) -) -window w as (order by x range between 1 preceding and 2 following) -order by x; -0 0 2 3 -1 0 3 4 -2 1 4 4 -3 2 4 3 -4 3 4 2 -251 251 253 3 -252 251 254 4 -253 252 255 4 -254 253 255 3 -255 254 255 2 -select x, min(x) over w, max(x) over w, count(x) over w -from ( - select toInt8(multiIf( - mod(number, 3) == 0, toInt64(intDiv(number, 3)), - mod(number, 3) == 1, toInt64(127 - intDiv(number, 3)), - toInt64(-128 + intDiv(number, 3)))) x - from numbers(15) -) -window w as (order by x range between 1 preceding and 2 following) -order by x; --128 -128 -126 3 --127 -128 -125 4 --126 -127 -124 4 --125 -126 -124 3 --124 -125 -124 2 -0 0 2 3 -1 0 3 4 -2 1 4 4 -3 2 4 3 -4 3 4 2 -123 123 125 3 -124 123 126 4 -125 124 127 4 -126 125 127 3 -127 126 127 2 --- We need large offsets to trigger overflow to positive direction, or --- else the frame end runs into partition end w/o overflow and doesn't move --- after that. The frame from this query is equivalent to the entire partition. -select x, min(x) over w, max(x) over w, count(x) over w -from ( - select toUInt8(if(mod(number, 2), - toInt64(255 - intDiv(number, 2)), - toInt64(intDiv(number, 2)))) x - from numbers(10) -) -window w as (order by x range between 255 preceding and 255 following) -order by x; -0 0 255 10 -1 0 255 10 -2 0 255 10 -3 0 255 10 -4 0 255 10 -251 0 255 10 -252 0 255 10 -253 0 255 10 -254 0 255 10 -255 0 255 10 --- RANGE OFFSET ORDER BY DESC -select x, min(x) over w, max(x) over w, count(x) over w from ( - select toUInt8(number) x from numbers(11)) t -window w as (order by x desc range between 1 preceding and 2 following) -order by x -settings max_block_size = 1; -0 0 1 2 -1 0 2 3 -2 0 3 4 -3 1 4 4 -4 2 5 4 -5 3 6 4 -6 4 7 4 -7 5 8 4 -8 6 9 4 -9 7 10 4 -10 8 10 3 -select x, min(x) over w, max(x) over w, count(x) over w from ( - select toUInt8(number) x from numbers(11)) t -window w as (order by x desc range between 1 preceding and unbounded following) -order by x -settings max_block_size = 2; -0 0 1 2 -1 0 2 3 -2 0 3 4 -3 0 4 5 -4 0 5 6 -5 0 6 7 -6 0 7 8 -7 0 8 9 -8 0 9 10 -9 0 10 11 -10 0 10 11 -select x, min(x) over w, max(x) over w, count(x) over w from ( - select toUInt8(number) x from numbers(11)) t -window w as (order by x desc range between unbounded preceding and 2 following) -order by x -settings max_block_size = 3; -0 0 10 11 -1 0 10 11 -2 0 10 11 -3 1 10 10 -4 2 10 9 -5 3 10 8 -6 4 10 7 -7 5 10 6 -8 6 10 5 -9 7 10 4 -10 8 10 3 -select x, min(x) over w, max(x) over w, count(x) over w from ( - select toUInt8(number) x from numbers(11)) t -window w as (order by x desc range between unbounded preceding and 2 preceding) -order by x -settings max_block_size = 4; -0 2 10 9 -1 3 10 8 -2 4 10 7 -3 5 10 6 -4 6 10 5 -5 7 10 4 -6 8 10 3 -7 9 10 2 -8 10 10 1 -9 0 0 0 -10 0 0 0 --- Check that we put windows in such an order that we can reuse the sort. --- First, check that at least the result is correct when we have many windows --- with different sort order. -select - number, - count(*) over (partition by p order by number), - count(*) over (partition by p order by number, o), - count(*) over (), - count(*) over (order by number), - count(*) over (order by o), - count(*) over (order by o, number), - count(*) over (order by number, o), - count(*) over (partition by p order by o, number), - count(*) over (partition by p), - count(*) over (partition by p order by o), - count(*) over (partition by p, o order by number) -from - (select number, intDiv(number, 3) p, mod(number, 5) o - from numbers(16)) t -order by number -; -0 1 1 16 1 4 1 1 1 3 1 1 -1 2 2 16 2 7 5 2 2 3 2 1 -2 3 3 16 3 10 8 3 3 3 3 1 -3 1 1 16 4 13 11 4 2 3 2 1 -4 2 2 16 5 16 14 5 3 3 3 1 -5 3 3 16 6 4 2 6 1 3 1 1 -6 1 1 16 7 7 6 7 1 3 1 1 -7 2 2 16 8 10 9 8 2 3 2 1 -8 3 3 16 9 13 12 9 3 3 3 1 -9 1 1 16 10 16 15 10 3 3 3 1 -10 2 2 16 11 4 3 11 1 3 1 1 -11 3 3 16 12 7 7 12 2 3 2 1 -12 1 1 16 13 10 10 13 1 3 1 1 -13 2 2 16 14 13 13 14 2 3 2 1 -14 3 3 16 15 16 16 15 3 3 3 1 -15 1 1 16 16 4 4 16 1 1 1 1 --- The EXPLAIN for the above query would be difficult to understand, so check some --- simple cases instead. -explain select - count(*) over (partition by p), - count(*) over (), - count(*) over (partition by p order by o) -from - (select number, intDiv(number, 3) p, mod(number, 5) o - from numbers(16)) t -; -Expression ((Projection + Before ORDER BY)) - Window (Window step for window \'\') - Window (Window step for window \'PARTITION BY p\') - Window (Window step for window \'PARTITION BY p ORDER BY o ASC\') - Sorting (Sorting for window \'PARTITION BY p ORDER BY o ASC\') - Expression ((Before window functions + (Projection + Before ORDER BY))) - SettingQuotaAndLimits (Set limits and quota after reading from storage) - ReadFromStorage (SystemNumbers) -explain select - count(*) over (order by o, number), - count(*) over (order by number) -from - (select number, intDiv(number, 3) p, mod(number, 5) o - from numbers(16)) t -; -Expression ((Projection + Before ORDER BY)) - Window (Window step for window \'ORDER BY o ASC, number ASC\') - Sorting (Sorting for window \'ORDER BY o ASC, number ASC\') - Window (Window step for window \'ORDER BY number ASC\') - Sorting (Sorting for window \'ORDER BY number ASC\') - Expression ((Before window functions + (Projection + Before ORDER BY))) - SettingQuotaAndLimits (Set limits and quota after reading from storage) - ReadFromStorage (SystemNumbers) --- A test case for the sort comparator found by fuzzer. -SELECT - max(number) OVER (ORDER BY number DESC NULLS FIRST), - max(number) OVER (ORDER BY number ASC NULLS FIRST) -FROM numbers(2) -; -1 0 -1 1 --- optimize_read_in_order conflicts with sorting for window functions, check that --- it is disabled. -drop table if exists window_mt; -create table window_mt engine MergeTree order by number - as select number, mod(number, 3) p from numbers(100); -select number, count(*) over (partition by p) - from window_mt order by number limit 10 settings optimize_read_in_order = 0; -0 34 -1 33 -2 33 -3 34 -4 33 -5 33 -6 34 -7 33 -8 33 -9 34 -select number, count(*) over (partition by p) - from window_mt order by number limit 10 settings optimize_read_in_order = 1; -0 34 -1 33 -2 33 -3 34 -4 33 -5 33 -6 34 -7 33 -8 33 -9 34 -drop table window_mt; --- some true window functions -- rank and friends -select number, p, o, - count(*) over w, - rank() over w, - dense_rank() over w, - row_number() over w -from (select number, intDiv(number, 5) p, mod(number, 3) o - from numbers(31) order by o, number) t -window w as (partition by p order by o) -order by p, o, number -settings max_block_size = 2; -0 0 0 2 1 1 1 -3 0 0 2 1 1 2 -1 0 1 4 3 2 3 -4 0 1 4 3 2 4 -2 0 2 5 5 3 5 -6 1 0 2 1 1 1 -9 1 0 2 1 1 2 -7 1 1 3 3 2 3 -5 1 2 5 4 3 4 -8 1 2 5 4 3 5 -12 2 0 1 1 1 1 -10 2 1 3 2 2 2 -13 2 1 3 2 2 3 -11 2 2 5 4 3 4 -14 2 2 5 4 3 5 -15 3 0 2 1 1 2 -18 3 0 2 1 1 1 -16 3 1 4 3 2 3 -19 3 1 4 3 2 4 -17 3 2 5 5 3 5 -21 4 0 2 1 1 1 -24 4 0 2 1 1 2 -22 4 1 3 3 2 3 -20 4 2 5 4 3 5 -23 4 2 5 4 3 4 -27 5 0 1 1 1 1 -25 5 1 3 2 2 2 -28 5 1 3 2 2 3 -26 5 2 5 4 3 4 -29 5 2 5 4 3 5 -30 6 0 1 1 1 1 --- our replacement for lag/lead -select - anyOrNull(number) - over (order by number rows between 1 preceding and 1 preceding), - anyOrNull(number) - over (order by number rows between 1 following and 1 following) -from numbers(5); -\N 1 -0 2 -1 3 -2 4 -3 \N --- variants of lag/lead that respect the frame -select number, p, pp, - lagInFrame(number) over w as lag1, - lagInFrame(number, number - pp) over w as lag2, - lagInFrame(number, number - pp, number * 11) over w as lag, - leadInFrame(number, number - pp, number * 11) over w as lead -from (select number, intDiv(number, 5) p, p * 5 pp from numbers(16)) -window w as (partition by p order by number - rows between unbounded preceding and unbounded following) -order by number -settings max_block_size = 3; -; -0 0 0 0 0 0 0 -1 0 0 0 0 0 2 -2 0 0 1 0 0 4 -3 0 0 2 0 0 33 -4 0 0 3 0 0 44 -5 1 5 0 5 5 5 -6 1 5 5 5 5 7 -7 1 5 6 5 5 9 -8 1 5 7 5 5 88 -9 1 5 8 5 5 99 -10 2 10 0 10 10 10 -11 2 10 10 10 10 12 -12 2 10 11 10 10 14 -13 2 10 12 10 10 143 -14 2 10 13 10 10 154 -15 3 15 0 15 15 15 --- careful with auto-application of Null combinator -select lagInFrame(toNullable(1)) over (); -\N -select lagInFrameOrNull(1) over (); -- { serverError 36 } --- this is the same as `select max(Null::Nullable(Nothing))` -select intDiv(1, NULL) x, toTypeName(x), max(x) over (); -\N Nullable(Nothing) \N --- to make lagInFrame return null for out-of-frame rows, cast the argument to --- Nullable; otherwise, it returns default values. -SELECT - number, - lagInFrame(toNullable(number), 1) OVER w, - lagInFrame(toNullable(number), 2) OVER w, - lagInFrame(number, 1) OVER w, - lagInFrame(number, 2) OVER w -FROM numbers(4) -WINDOW w AS (ORDER BY number ASC) -; -0 \N \N 0 0 -1 0 \N 0 0 -2 1 0 1 0 -3 2 1 2 1 --- case-insensitive SQL-standard synonyms for any and anyLast -select - number, - fIrSt_VaLue(number) over w, - lAsT_vAlUe(number) over w -from numbers(10) -window w as (order by number range between 1 preceding and 1 following) -order by number -; -0 0 1 -1 0 2 -2 1 3 -3 2 4 -4 3 5 -5 4 6 -6 5 7 -7 6 8 -8 7 9 -9 8 9 --- lagInFrame UBsan -SELECT lagInFrame(1, -1) OVER (); -- { serverError BAD_ARGUMENTS } -SELECT lagInFrame(1, 0) OVER (); -1 -SELECT lagInFrame(1, /* INT64_MAX+1 */ 0x7fffffffffffffff+1) OVER (); -- { serverError BAD_ARGUMENTS } -SELECT lagInFrame(1, /* INT64_MAX */ 0x7fffffffffffffff) OVER (); -0 -SELECT lagInFrame(1, 1) OVER (); -0 --- leadInFrame UBsan -SELECT leadInFrame(1, -1) OVER (); -- { serverError BAD_ARGUMENTS } -SELECT leadInFrame(1, 0) OVER (); -1 -SELECT leadInFrame(1, /* INT64_MAX+1 */ 0x7fffffffffffffff+1) OVER (); -- { serverError BAD_ARGUMENTS } -SELECT leadInFrame(1, /* INT64_MAX */ 0x7fffffffffffffff) OVER (); -0 -SELECT leadInFrame(1, 1) OVER (); -0 --- In this case, we had a problem with PartialSortingTransform returning zero-row --- chunks for input chunks w/o columns. -select count() over () from numbers(4) where number < 2; -2 -2 --- floating point RANGE frame -select - count(*) over (order by toFloat32(number) range 5. preceding), - count(*) over (order by toFloat64(number) range 5. preceding), - count(*) over (order by toFloat32(number) range between current row and 5. following), - count(*) over (order by toFloat64(number) range between current row and 5. following) -from numbers(7) -; -1 1 6 6 -2 2 6 6 -3 3 5 5 -4 4 4 4 -5 5 3 3 -6 6 2 2 -6 6 1 1 --- negative offsets should not be allowed -select count() over (order by toInt64(number) range between -1 preceding and unbounded following) from numbers(1); -- { serverError 36 } -select count() over (order by toInt64(number) range between -1 following and unbounded following) from numbers(1); -- { serverError 36 } -select count() over (order by toInt64(number) range between unbounded preceding and -1 preceding) from numbers(1); -- { serverError 36 } -select count() over (order by toInt64(number) range between unbounded preceding and -1 following) from numbers(1); -- { serverError 36 } --- a test with aggregate function that allocates memory in arena -select sum(a[length(a)]) -from ( - select groupArray(number) over (partition by modulo(number, 11) - order by modulo(number, 1111), number) a - from numbers_mt(10000) -) settings max_block_size = 7; -49995000 --- -INT_MIN row offset that can lead to problems with negation, found when fuzzing --- under UBSan. Should be limited to at most INT_MAX. -select count() over (rows between 2147483648 preceding and 2147493648 following) from numbers(2); -- { serverError 36 } --- Somehow in this case WindowTransform gets empty input chunks not marked as --- input end, and then two (!) empty input chunks marked as input end. Whatever. -select count() over () from (select 1 a) l inner join (select 2 a) r using a; --- This case works as expected, one empty input chunk marked as input end. -select count() over () where null; --- Inheriting another window. -select number, count() over (w1 rows unbounded preceding) from numbers(10) -window - w0 as (partition by intDiv(number, 5) as p), - w1 as (w0 order by mod(number, 3) as o) -order by p, o, number -; -0 1 -3 2 -1 3 -4 4 -2 5 -6 1 -9 2 -7 3 -5 4 -8 5 --- can't redefine PARTITION BY -select count() over (w partition by number) from numbers(1) window w as (partition by intDiv(number, 5)); -- { serverError 36 } --- can't redefine existing ORDER BY -select count() over (w order by number) from numbers(1) window w as (partition by intDiv(number, 5) order by mod(number, 3)); -- { serverError 36 } --- parent window can't have frame -select count() over (w range unbounded preceding) from numbers(1) window w as (partition by intDiv(number, 5) order by mod(number, 3) rows unbounded preceding); -- { serverError 36 } --- looks weird but probably should work -- this is a window that inherits and changes nothing -select count() over (w) from numbers(1) window w as (); -1 --- nonexistent parent window -select count() over (w2 rows unbounded preceding); -- { serverError 36 } diff --git a/tests/queries/0_stateless/01591_window_functions.sql b/tests/queries/0_stateless/01591_window_functions.sql index aa9bd9795e7..bc52e925f52 100644 --- a/tests/queries/0_stateless/01591_window_functions.sql +++ b/tests/queries/0_stateless/01591_window_functions.sql @@ -1,476 +1,476 @@ -- { echo } -- just something basic -select number, count() over (partition by intDiv(number, 3) order by number rows unbounded preceding) from numbers(10); - --- proper calculation across blocks -select number, max(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding) from numbers(10) settings max_block_size = 2; - --- not a window function -select number, abs(number) over (partition by toString(intDiv(number, 3)) rows unbounded preceding) from numbers(10); -- { serverError 63 } - --- no partition by -select number, avg(number) over (order by number rows unbounded preceding) from numbers(10); - --- no order by -select number, quantileExact(number) over (partition by intDiv(number, 3) rows unbounded preceding) from numbers(10); - --- can add an alias after window spec -select number, quantileExact(number) over (partition by intDiv(number, 3) rows unbounded preceding) q from numbers(10); - --- can't reference it yet -- the window functions are calculated at the --- last stage of select, after all other functions. -select q * 10, quantileExact(number) over (partition by intDiv(number, 3) rows unbounded preceding) q from numbers(10); -- { serverError 47 } - --- must work in WHERE if you wrap it in a subquery -select * from (select count(*) over (rows unbounded preceding) c from numbers(3)) where c > 0; - --- should work in ORDER BY -select number, max(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding) m from numbers(10) order by m desc, number; - --- also works in ORDER BY if you wrap it in a subquery -select * from (select count(*) over (rows unbounded preceding) c from numbers(3)) order by c; - --- Example with window function only in ORDER BY. Here we make a rank of all --- numbers sorted descending, and then sort by this rank descending, and must get --- the ascending order. -select * from (select * from numbers(5) order by rand()) order by count() over (order by number desc rows unbounded preceding) desc; - --- Aggregate functions as window function arguments. This query is semantically --- the same as the above one, only we replace `number` with --- `any(number) group by number` and so on. -select * from (select * from numbers(5) order by rand()) group by number order by sum(any(number + 1)) over (order by min(number) desc rows unbounded preceding) desc; --- some more simple cases w/aggregate functions -select sum(any(number)) over (rows unbounded preceding) from numbers(1); -select sum(any(number) + 1) over (rows unbounded preceding) from numbers(1); -select sum(any(number + 1)) over (rows unbounded preceding) from numbers(1); - --- different windows --- an explain test would also be helpful, but it's too immature now and I don't --- want to change reference all the time -select number, max(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding), count(number) over (partition by intDiv(number, 5) order by number rows unbounded preceding) as m from numbers(31) order by number settings max_block_size = 2; - --- two functions over the same window --- an explain test would also be helpful, but it's too immature now and I don't --- want to change reference all the time -select number, max(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding), count(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding) as m from numbers(7) order by number settings max_block_size = 2; - --- check that we can work with constant columns -select median(x) over (partition by x) from (select 1 x); - --- an empty window definition is valid as well -select groupArray(number) over (rows unbounded preceding) from numbers(3); -select groupArray(number) over () from numbers(3); - --- This one tests we properly process the window function arguments. --- Seen errors like 'column `1` not found' from count(1). -select count(1) over (rows unbounded preceding), max(number + 1) over () from numbers(3); - --- Should work in DISTINCT -select distinct sum(0) over (rows unbounded preceding) from numbers(2); -select distinct any(number) over (rows unbounded preceding) from numbers(2); - --- Various kinds of aliases are properly substituted into various parts of window --- function definition. -with number + 1 as x select intDiv(number, 3) as y, sum(x + y) over (partition by y order by x rows unbounded preceding) from numbers(7); - --- WINDOW clause -select 1 window w1 as (); - -select sum(number) over w1, sum(number) over w2 -from numbers(10) -window - w1 as (rows unbounded preceding), - w2 as (partition by intDiv(number, 3) rows unbounded preceding) -; - --- FIXME both functions should use the same window, but they don't. Add an --- EXPLAIN test for this. -select - sum(number) over w1, - sum(number) over (partition by intDiv(number, 3) rows unbounded preceding) -from numbers(10) -window - w1 as (partition by intDiv(number, 3) rows unbounded preceding) -; - --- RANGE frame --- It's the default -select sum(number) over () from numbers(3); - --- Try some mutually prime sizes of partition, group and block, for the number --- of rows that is their least common multiple + 1, so that we see all the --- interesting corner cases. -select number, intDiv(number, 3) p, mod(number, 2) o, count(number) over w as c -from numbers(31) -window w as (partition by p order by o range unbounded preceding) -order by number -settings max_block_size = 5 -; - -select number, intDiv(number, 5) p, mod(number, 3) o, count(number) over w as c -from numbers(31) -window w as (partition by p order by o range unbounded preceding) -order by number -settings max_block_size = 2 -; - -select number, intDiv(number, 5) p, mod(number, 2) o, count(number) over w as c -from numbers(31) -window w as (partition by p order by o range unbounded preceding) -order by number -settings max_block_size = 3 -; - -select number, intDiv(number, 3) p, mod(number, 5) o, count(number) over w as c -from numbers(31) -window w as (partition by p order by o range unbounded preceding) -order by number -settings max_block_size = 2 -; - -select number, intDiv(number, 2) p, mod(number, 5) o, count(number) over w as c -from numbers(31) -window w as (partition by p order by o range unbounded preceding) -order by number -settings max_block_size = 3 -; - -select number, intDiv(number, 2) p, mod(number, 3) o, count(number) over w as c -from numbers(31) -window w as (partition by p order by o range unbounded preceding) -order by number -settings max_block_size = 5 -; - --- A case where the partition end is in the current block, and the frame end --- is triggered by the partition end. -select min(number) over (partition by p) from (select number, intDiv(number, 3) p from numbers(10)); - --- UNBOUNDED FOLLOWING frame end -select - min(number) over wa, min(number) over wo, - max(number) over wa, max(number) over wo -from - (select number, intDiv(number, 3) p, mod(number, 5) o - from numbers(31)) -window - wa as (partition by p order by o - range between unbounded preceding and unbounded following), - wo as (partition by p order by o - rows between unbounded preceding and unbounded following) -settings max_block_size = 2; - --- ROWS offset frame start -select number, p, - count(*) over (partition by p order by number - rows between 1 preceding and unbounded following), - count(*) over (partition by p order by number - rows between current row and unbounded following), - count(*) over (partition by p order by number - rows between 1 following and unbounded following) -from (select number, intDiv(number, 5) p from numbers(31)) -order by p, number -settings max_block_size = 2; - --- ROWS offset frame start and end -select number, p, - count(*) over (partition by p order by number - rows between 2 preceding and 2 following) -from (select number, intDiv(number, 7) p from numbers(71)) -order by p, number -settings max_block_size = 2; - -SELECT count(*) OVER (ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) FROM numbers(4); - --- frame boundaries that runs into the partition end -select - count() over (partition by intDiv(number, 3) - rows between 100 following and unbounded following), - count() over (partition by intDiv(number, 3) - rows between current row and 100 following) -from numbers(10); - --- seen a use-after-free under MSan in this query once -SELECT number, max(number) OVER (PARTITION BY intDiv(number, 7) ORDER BY number ASC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM numbers(1024) SETTINGS max_block_size = 2 FORMAT Null; - --- a corner case -select count() over (); - --- RANGE CURRENT ROW frame start -select number, p, o, - count(*) over (partition by p order by o - range between current row and unbounded following) -from (select number, intDiv(number, 5) p, mod(number, 3) o - from numbers(31)) -order by p, o, number -settings max_block_size = 2; - -select - count(*) over (rows between current row and current row), - count(*) over (range between current row and current row) -from numbers(3); - --- RANGE OFFSET --- a basic RANGE OFFSET frame -select x, min(x) over w, max(x) over w, count(x) over w from ( - select toUInt8(number) x from numbers(11)) -window w as (order by x asc range between 1 preceding and 2 following) -order by x; - --- overflow conditions -select x, min(x) over w, max(x) over w, count(x) over w -from ( - select toUInt8(if(mod(number, 2), - toInt64(255 - intDiv(number, 2)), - toInt64(intDiv(number, 2)))) x - from numbers(10) -) -window w as (order by x range between 1 preceding and 2 following) -order by x; - -select x, min(x) over w, max(x) over w, count(x) over w -from ( - select toInt8(multiIf( - mod(number, 3) == 0, toInt64(intDiv(number, 3)), - mod(number, 3) == 1, toInt64(127 - intDiv(number, 3)), - toInt64(-128 + intDiv(number, 3)))) x - from numbers(15) -) -window w as (order by x range between 1 preceding and 2 following) -order by x; - --- We need large offsets to trigger overflow to positive direction, or --- else the frame end runs into partition end w/o overflow and doesn't move --- after that. The frame from this query is equivalent to the entire partition. -select x, min(x) over w, max(x) over w, count(x) over w -from ( - select toUInt8(if(mod(number, 2), - toInt64(255 - intDiv(number, 2)), - toInt64(intDiv(number, 2)))) x - from numbers(10) -) -window w as (order by x range between 255 preceding and 255 following) -order by x; - --- RANGE OFFSET ORDER BY DESC -select x, min(x) over w, max(x) over w, count(x) over w from ( - select toUInt8(number) x from numbers(11)) t -window w as (order by x desc range between 1 preceding and 2 following) -order by x -settings max_block_size = 1; - -select x, min(x) over w, max(x) over w, count(x) over w from ( - select toUInt8(number) x from numbers(11)) t -window w as (order by x desc range between 1 preceding and unbounded following) -order by x -settings max_block_size = 2; - -select x, min(x) over w, max(x) over w, count(x) over w from ( - select toUInt8(number) x from numbers(11)) t -window w as (order by x desc range between unbounded preceding and 2 following) -order by x -settings max_block_size = 3; - -select x, min(x) over w, max(x) over w, count(x) over w from ( - select toUInt8(number) x from numbers(11)) t -window w as (order by x desc range between unbounded preceding and 2 preceding) -order by x -settings max_block_size = 4; - - --- Check that we put windows in such an order that we can reuse the sort. --- First, check that at least the result is correct when we have many windows --- with different sort order. -select - number, - count(*) over (partition by p order by number), - count(*) over (partition by p order by number, o), - count(*) over (), - count(*) over (order by number), - count(*) over (order by o), - count(*) over (order by o, number), - count(*) over (order by number, o), - count(*) over (partition by p order by o, number), - count(*) over (partition by p), - count(*) over (partition by p order by o), - count(*) over (partition by p, o order by number) -from - (select number, intDiv(number, 3) p, mod(number, 5) o - from numbers(16)) t -order by number -; - --- The EXPLAIN for the above query would be difficult to understand, so check some --- simple cases instead. -explain select - count(*) over (partition by p), - count(*) over (), - count(*) over (partition by p order by o) -from - (select number, intDiv(number, 3) p, mod(number, 5) o - from numbers(16)) t -; - -explain select - count(*) over (order by o, number), - count(*) over (order by number) -from - (select number, intDiv(number, 3) p, mod(number, 5) o - from numbers(16)) t -; - --- A test case for the sort comparator found by fuzzer. -SELECT - max(number) OVER (ORDER BY number DESC NULLS FIRST), - max(number) OVER (ORDER BY number ASC NULLS FIRST) -FROM numbers(2) -; - --- optimize_read_in_order conflicts with sorting for window functions, check that --- it is disabled. -drop table if exists window_mt; -create table window_mt engine MergeTree order by number - as select number, mod(number, 3) p from numbers(100); - -select number, count(*) over (partition by p) - from window_mt order by number limit 10 settings optimize_read_in_order = 0; - -select number, count(*) over (partition by p) - from window_mt order by number limit 10 settings optimize_read_in_order = 1; - -drop table window_mt; - --- some true window functions -- rank and friends -select number, p, o, - count(*) over w, - rank() over w, - dense_rank() over w, - row_number() over w -from (select number, intDiv(number, 5) p, mod(number, 3) o - from numbers(31) order by o, number) t -window w as (partition by p order by o) -order by p, o, number -settings max_block_size = 2; - --- our replacement for lag/lead -select - anyOrNull(number) - over (order by number rows between 1 preceding and 1 preceding), - anyOrNull(number) - over (order by number rows between 1 following and 1 following) -from numbers(5); - --- variants of lag/lead that respect the frame -select number, p, pp, - lagInFrame(number) over w as lag1, - lagInFrame(number, number - pp) over w as lag2, - lagInFrame(number, number - pp, number * 11) over w as lag, - leadInFrame(number, number - pp, number * 11) over w as lead -from (select number, intDiv(number, 5) p, p * 5 pp from numbers(16)) -window w as (partition by p order by number - rows between unbounded preceding and unbounded following) -order by number -settings max_block_size = 3; -; - --- careful with auto-application of Null combinator -select lagInFrame(toNullable(1)) over (); -select lagInFrameOrNull(1) over (); -- { serverError 36 } --- this is the same as `select max(Null::Nullable(Nothing))` -select intDiv(1, NULL) x, toTypeName(x), max(x) over (); --- to make lagInFrame return null for out-of-frame rows, cast the argument to --- Nullable; otherwise, it returns default values. -SELECT - number, - lagInFrame(toNullable(number), 1) OVER w, - lagInFrame(toNullable(number), 2) OVER w, - lagInFrame(number, 1) OVER w, - lagInFrame(number, 2) OVER w -FROM numbers(4) -WINDOW w AS (ORDER BY number ASC) -; - --- case-insensitive SQL-standard synonyms for any and anyLast -select - number, - fIrSt_VaLue(number) over w, - lAsT_vAlUe(number) over w -from numbers(10) -window w as (order by number range between 1 preceding and 1 following) -order by number -; - --- lagInFrame UBsan -SELECT lagInFrame(1, -1) OVER (); -- { serverError BAD_ARGUMENTS } -SELECT lagInFrame(1, 0) OVER (); -SELECT lagInFrame(1, /* INT64_MAX+1 */ 0x7fffffffffffffff+1) OVER (); -- { serverError BAD_ARGUMENTS } -SELECT lagInFrame(1, /* INT64_MAX */ 0x7fffffffffffffff) OVER (); -SELECT lagInFrame(1, 1) OVER (); - --- leadInFrame UBsan -SELECT leadInFrame(1, -1) OVER (); -- { serverError BAD_ARGUMENTS } -SELECT leadInFrame(1, 0) OVER (); -SELECT leadInFrame(1, /* INT64_MAX+1 */ 0x7fffffffffffffff+1) OVER (); -- { serverError BAD_ARGUMENTS } -SELECT leadInFrame(1, /* INT64_MAX */ 0x7fffffffffffffff) OVER (); -SELECT leadInFrame(1, 1) OVER (); - --- In this case, we had a problem with PartialSortingTransform returning zero-row --- chunks for input chunks w/o columns. -select count() over () from numbers(4) where number < 2; - --- floating point RANGE frame -select - count(*) over (order by toFloat32(number) range 5. preceding), - count(*) over (order by toFloat64(number) range 5. preceding), - count(*) over (order by toFloat32(number) range between current row and 5. following), - count(*) over (order by toFloat64(number) range between current row and 5. following) -from numbers(7) -; - --- negative offsets should not be allowed -select count() over (order by toInt64(number) range between -1 preceding and unbounded following) from numbers(1); -- { serverError 36 } -select count() over (order by toInt64(number) range between -1 following and unbounded following) from numbers(1); -- { serverError 36 } -select count() over (order by toInt64(number) range between unbounded preceding and -1 preceding) from numbers(1); -- { serverError 36 } -select count() over (order by toInt64(number) range between unbounded preceding and -1 following) from numbers(1); -- { serverError 36 } - --- a test with aggregate function that allocates memory in arena -select sum(a[length(a)]) -from ( - select groupArray(number) over (partition by modulo(number, 11) - order by modulo(number, 1111), number) a - from numbers_mt(10000) -) settings max_block_size = 7; - --- -INT_MIN row offset that can lead to problems with negation, found when fuzzing --- under UBSan. Should be limited to at most INT_MAX. -select count() over (rows between 2147483648 preceding and 2147493648 following) from numbers(2); -- { serverError 36 } - --- Somehow in this case WindowTransform gets empty input chunks not marked as --- input end, and then two (!) empty input chunks marked as input end. Whatever. -select count() over () from (select 1 a) l inner join (select 2 a) r using a; --- This case works as expected, one empty input chunk marked as input end. -select count() over () where null; - --- Inheriting another window. -select number, count() over (w1 rows unbounded preceding) from numbers(10) -window - w0 as (partition by intDiv(number, 5) as p), - w1 as (w0 order by mod(number, 3) as o) -order by p, o, number -; - --- can't redefine PARTITION BY -select count() over (w partition by number) from numbers(1) window w as (partition by intDiv(number, 5)); -- { serverError 36 } - --- can't redefine existing ORDER BY -select count() over (w order by number) from numbers(1) window w as (partition by intDiv(number, 5) order by mod(number, 3)); -- { serverError 36 } - --- parent window can't have frame -select count() over (w range unbounded preceding) from numbers(1) window w as (partition by intDiv(number, 5) order by mod(number, 3) rows unbounded preceding); -- { serverError 36 } - --- looks weird but probably should work -- this is a window that inherits and changes nothing -select count() over (w) from numbers(1) window w as (); - --- nonexistent parent window -select count() over (w2 rows unbounded preceding); -- { serverError 36 } +-- select number, count() over (partition by intDiv(number, 3) order by number rows unbounded preceding) from numbers(10); + +-- -- proper calculation across blocks +-- select number, max(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding) from numbers(10) settings max_block_size = 2; + +-- -- not a window function +-- select number, abs(number) over (partition by toString(intDiv(number, 3)) rows unbounded preceding) from numbers(10); -- { serverError 63 } + +-- -- no partition by +-- select number, avg(number) over (order by number rows unbounded preceding) from numbers(10); + +-- -- no order by +-- select number, quantileExact(number) over (partition by intDiv(number, 3) rows unbounded preceding) from numbers(10) order by number; + +-- -- can add an alias after window spec +-- select number, quantileExact(number) over (partition by intDiv(number, 3) rows unbounded preceding) q from numbers(10) order by number, q; + +-- -- can't reference it yet -- the window functions are calculated at the +-- -- last stage of select, after all other functions. +-- select q * 10, quantileExact(number) over (partition by intDiv(number, 3) rows unbounded preceding) q from numbers(10); -- { serverError 47 } + +-- -- must work in WHERE if you wrap it in a subquery +-- select * from (select count(*) over (rows unbounded preceding) c from numbers(3)) where c > 0; + +-- -- should work in ORDER BY +-- select number, max(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding) m from numbers(10) order by m desc, number; + +-- -- also works in ORDER BY if you wrap it in a subquery +-- select * from (select count(*) over (rows unbounded preceding) c from numbers(3)) order by c; + +-- -- Example with window function only in ORDER BY. Here we make a rank of all +-- -- numbers sorted descending, and then sort by this rank descending, and must get +-- -- the ascending order. +-- select * from (select * from numbers(5) order by rand()) order by count() over (order by number desc rows unbounded preceding) desc; + +-- -- Aggregate functions as window function arguments. This query is semantically +-- -- the same as the above one, only we replace `number` with +-- -- `any(number) group by number` and so on. +-- select * from (select * from numbers(5) order by rand()) group by number order by sum(any(number + 1)) over (order by min(number) desc rows unbounded preceding) desc; +-- -- some more simple cases w/aggregate functions +-- select sum(any(number)) over (rows unbounded preceding) from numbers(1); +-- select sum(any(number) + 1) over (rows unbounded preceding) from numbers(1); +-- select sum(any(number + 1)) over (rows unbounded preceding) from numbers(1); + +-- -- different windows +-- -- an explain test would also be helpful, but it's too immature now and I don't +-- -- want to change reference all the time +-- select number, max(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding), count(number) over (partition by intDiv(number, 5) order by number rows unbounded preceding) as m from numbers(31) order by number settings max_block_size = 2; + +-- -- two functions over the same window +-- -- an explain test would also be helpful, but it's too immature now and I don't +-- -- want to change reference all the time +-- select number, max(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding), count(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding) as m from numbers(7) order by number settings max_block_size = 2; + +-- -- check that we can work with constant columns +-- select median(x) over (partition by x) from (select 1 x); + +-- -- an empty window definition is valid as well +-- select groupArray(number) over (rows unbounded preceding) from numbers(3); +-- select groupArray(number) over () from numbers(3); + +-- -- This one tests we properly process the window function arguments. +-- -- Seen errors like 'column `1` not found' from count(1). +-- select count(1) over (rows unbounded preceding), max(number + 1) over () from numbers(3); + +-- -- Should work in DISTINCT +-- select distinct sum(0) over (rows unbounded preceding) from numbers(2); +-- select distinct any(number) over (rows unbounded preceding) from numbers(2); + +-- -- Various kinds of aliases are properly substituted into various parts of window +-- -- function definition. +-- with number + 1 as x select intDiv(number, 3) as y, sum(x + y) over (partition by y order by x rows unbounded preceding) from numbers(7); + +-- -- WINDOW clause +-- select 1 window w1 as (); + +-- select sum(number) over w1, sum(number) over w2 +-- from numbers(10) +-- window +-- w1 as (rows unbounded preceding), +-- w2 as (partition by intDiv(number, 3) rows unbounded preceding) +-- ; + +-- -- FIXME both functions should use the same window, but they don't. Add an +-- -- EXPLAIN test for this. +-- select +-- sum(number) over w1, +-- sum(number) over (partition by intDiv(number, 3) rows unbounded preceding) +-- from numbers(10) +-- window +-- w1 as (partition by intDiv(number, 3) rows unbounded preceding) +-- ; + +-- -- RANGE frame +-- -- It's the default +-- select sum(number) over () from numbers(3); + +-- -- Try some mutually prime sizes of partition, group and block, for the number +-- -- of rows that is their least common multiple + 1, so that we see all the +-- -- interesting corner cases. +-- select number, intDiv(number, 3) p, mod(number, 2) o, count(number) over w as c +-- from numbers(31) +-- window w as (partition by p order by o range unbounded preceding) +-- order by number +-- settings max_block_size = 5 +-- ; + +-- select number, intDiv(number, 5) p, mod(number, 3) o, count(number) over w as c +-- from numbers(31) +-- window w as (partition by p order by o range unbounded preceding) +-- order by number +-- settings max_block_size = 2 +-- ; + +-- select number, intDiv(number, 5) p, mod(number, 2) o, count(number) over w as c +-- from numbers(31) +-- window w as (partition by p order by o range unbounded preceding) +-- order by number +-- settings max_block_size = 3 +-- ; + +-- select number, intDiv(number, 3) p, mod(number, 5) o, count(number) over w as c +-- from numbers(31) +-- window w as (partition by p order by o range unbounded preceding) +-- order by number +-- settings max_block_size = 2 +-- ; + +-- select number, intDiv(number, 2) p, mod(number, 5) o, count(number) over w as c +-- from numbers(31) +-- window w as (partition by p order by o range unbounded preceding) +-- order by number +-- settings max_block_size = 3 +-- ; + +-- select number, intDiv(number, 2) p, mod(number, 3) o, count(number) over w as c +-- from numbers(31) +-- window w as (partition by p order by o range unbounded preceding) +-- order by number +-- settings max_block_size = 5 +-- ; + +-- -- A case where the partition end is in the current block, and the frame end +-- -- is triggered by the partition end. +-- select min(number) over (partition by p) from (select number, intDiv(number, 3) p from numbers(10)); + +-- -- UNBOUNDED FOLLOWING frame end +-- select +-- min(number) over wa, min(number) over wo, +-- max(number) over wa, max(number) over wo +-- from +-- (select number, intDiv(number, 3) p, mod(number, 5) o +-- from numbers(31)) +-- window +-- wa as (partition by p order by o +-- range between unbounded preceding and unbounded following), +-- wo as (partition by p order by o +-- rows between unbounded preceding and unbounded following) +-- settings max_block_size = 2; + +-- -- ROWS offset frame start +-- select number, p, +-- count(*) over (partition by p order by number +-- rows between 1 preceding and unbounded following), +-- count(*) over (partition by p order by number +-- rows between current row and unbounded following), +-- count(*) over (partition by p order by number +-- rows between 1 following and unbounded following) +-- from (select number, intDiv(number, 5) p from numbers(31)) +-- order by p, number +-- settings max_block_size = 2; + +-- -- ROWS offset frame start and end +-- select number, p, +-- count(*) over (partition by p order by number +-- rows between 2 preceding and 2 following) +-- from (select number, intDiv(number, 7) p from numbers(71)) +-- order by p, number +-- settings max_block_size = 2; + +-- SELECT count(*) OVER (ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) FROM numbers(4); + +-- -- frame boundaries that runs into the partition end +-- select +-- count() over (partition by intDiv(number, 3) +-- rows between 100 following and unbounded following), +-- count() over (partition by intDiv(number, 3) +-- rows between current row and 100 following) +-- from numbers(10); + +-- -- seen a use-after-free under MSan in this query once +-- SELECT number, max(number) OVER (PARTITION BY intDiv(number, 7) ORDER BY number ASC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM numbers(1024) SETTINGS max_block_size = 2 FORMAT Null; + +-- -- a corner case +-- select count() over (); + +-- -- RANGE CURRENT ROW frame start +-- select number, p, o, +-- count(*) over (partition by p order by o +-- range between current row and unbounded following) +-- from (select number, intDiv(number, 5) p, mod(number, 3) o +-- from numbers(31)) +-- order by p, o, number +-- settings max_block_size = 2; + +-- select +-- count(*) over (rows between current row and current row), +-- count(*) over (range between current row and current row) +-- from numbers(3); + +-- -- RANGE OFFSET +-- -- a basic RANGE OFFSET frame +-- select x, min(x) over w, max(x) over w, count(x) over w from ( +-- select toUInt8(number) x from numbers(11)) +-- window w as (order by x asc range between 1 preceding and 2 following) +-- order by x; + +-- -- overflow conditions +-- select x, min(x) over w, max(x) over w, count(x) over w +-- from ( +-- select toUInt8(if(mod(number, 2), +-- toInt64(255 - intDiv(number, 2)), +-- toInt64(intDiv(number, 2)))) x +-- from numbers(10) +-- ) +-- window w as (order by x range between 1 preceding and 2 following) +-- order by x; + +-- select x, min(x) over w, max(x) over w, count(x) over w +-- from ( +-- select toInt8(multiIf( +-- mod(number, 3) == 0, toInt64(intDiv(number, 3)), +-- mod(number, 3) == 1, toInt64(127 - intDiv(number, 3)), +-- toInt64(-128 + intDiv(number, 3)))) x +-- from numbers(15) +-- ) +-- window w as (order by x range between 1 preceding and 2 following) +-- order by x; + +-- -- We need large offsets to trigger overflow to positive direction, or +-- -- else the frame end runs into partition end w/o overflow and doesn't move +-- -- after that. The frame from this query is equivalent to the entire partition. +-- select x, min(x) over w, max(x) over w, count(x) over w +-- from ( +-- select toUInt8(if(mod(number, 2), +-- toInt64(255 - intDiv(number, 2)), +-- toInt64(intDiv(number, 2)))) x +-- from numbers(10) +-- ) +-- window w as (order by x range between 255 preceding and 255 following) +-- order by x; + +-- -- RANGE OFFSET ORDER BY DESC +-- select x, min(x) over w, max(x) over w, count(x) over w from ( +-- select toUInt8(number) x from numbers(11)) t +-- window w as (order by x desc range between 1 preceding and 2 following) +-- order by x +-- settings max_block_size = 1; + +-- select x, min(x) over w, max(x) over w, count(x) over w from ( +-- select toUInt8(number) x from numbers(11)) t +-- window w as (order by x desc range between 1 preceding and unbounded following) +-- order by x +-- settings max_block_size = 2; + +-- select x, min(x) over w, max(x) over w, count(x) over w from ( +-- select toUInt8(number) x from numbers(11)) t +-- window w as (order by x desc range between unbounded preceding and 2 following) +-- order by x +-- settings max_block_size = 3; + +-- select x, min(x) over w, max(x) over w, count(x) over w from ( +-- select toUInt8(number) x from numbers(11)) t +-- window w as (order by x desc range between unbounded preceding and 2 preceding) +-- order by x +-- settings max_block_size = 4; + + +-- -- Check that we put windows in such an order that we can reuse the sort. +-- -- First, check that at least the result is correct when we have many windows +-- -- with different sort order. +-- select +-- number, +-- count(*) over (partition by p order by number), +-- count(*) over (partition by p order by number, o), +-- count(*) over (), +-- count(*) over (order by number), +-- count(*) over (order by o), +-- count(*) over (order by o, number), +-- count(*) over (order by number, o), +-- count(*) over (partition by p order by o, number), +-- count(*) over (partition by p), +-- count(*) over (partition by p order by o), +-- count(*) over (partition by p, o order by number) +-- from +-- (select number, intDiv(number, 3) p, mod(number, 5) o +-- from numbers(16)) t +-- order by number +-- ; + +-- -- The EXPLAIN for the above query would be difficult to understand, so check some +-- -- simple cases instead. +-- explain select +-- count(*) over (partition by p), +-- count(*) over (), +-- count(*) over (partition by p order by o) +-- from +-- (select number, intDiv(number, 3) p, mod(number, 5) o +-- from numbers(16)) t +-- ; + +-- explain select +-- count(*) over (order by o, number), +-- count(*) over (order by number) +-- from +-- (select number, intDiv(number, 3) p, mod(number, 5) o +-- from numbers(16)) t +-- ; + +-- -- A test case for the sort comparator found by fuzzer. +-- SELECT +-- max(number) OVER (ORDER BY number DESC NULLS FIRST), +-- max(number) OVER (ORDER BY number ASC NULLS FIRST) +-- FROM numbers(2) +-- ; + +-- -- optimize_read_in_order conflicts with sorting for window functions, check that +-- -- it is disabled. +-- drop table if exists window_mt; +-- create table window_mt engine MergeTree order by number +-- as select number, mod(number, 3) p from numbers(100); + +-- select number, count(*) over (partition by p) +-- from window_mt order by number limit 10 settings optimize_read_in_order = 0; + +-- select number, count(*) over (partition by p) +-- from window_mt order by number limit 10 settings optimize_read_in_order = 1; + +-- drop table window_mt; + +-- -- some true window functions -- rank and friends +-- select number, p, o, +-- count(*) over w, +-- rank() over w, +-- dense_rank() over w, +-- row_number() over w +-- from (select number, intDiv(number, 5) p, mod(number, 3) o +-- from numbers(31) order by o, number) t +-- window w as (partition by p order by o) +-- order by p, o, number +-- settings max_block_size = 2; + +-- -- our replacement for lag/lead +-- select +-- anyOrNull(number) +-- over (order by number rows between 1 preceding and 1 preceding), +-- anyOrNull(number) +-- over (order by number rows between 1 following and 1 following) +-- from numbers(5); + +-- -- variants of lag/lead that respect the frame +-- select number, p, pp, +-- lagInFrame(number) over w as lag1, +-- lagInFrame(number, number - pp) over w as lag2, +-- lagInFrame(number, number - pp, number * 11) over w as lag, +-- leadInFrame(number, number - pp, number * 11) over w as lead +-- from (select number, intDiv(number, 5) p, p * 5 pp from numbers(16)) +-- window w as (partition by p order by number +-- rows between unbounded preceding and unbounded following) +-- order by number +-- settings max_block_size = 3; +-- ; + +-- -- careful with auto-application of Null combinator +-- select lagInFrame(toNullable(1)) over (); +-- select lagInFrameOrNull(1) over (); -- { serverError 36 } +-- -- this is the same as `select max(Null::Nullable(Nothing))` +-- select intDiv(1, NULL) x, toTypeName(x), max(x) over (); +-- -- to make lagInFrame return null for out-of-frame rows, cast the argument to +-- -- Nullable; otherwise, it returns default values. +-- SELECT +-- number, +-- lagInFrame(toNullable(number), 1) OVER w, +-- lagInFrame(toNullable(number), 2) OVER w, +-- lagInFrame(number, 1) OVER w, +-- lagInFrame(number, 2) OVER w +-- FROM numbers(4) +-- WINDOW w AS (ORDER BY number ASC) +-- ; + +-- -- case-insensitive SQL-standard synonyms for any and anyLast +-- select +-- number, +-- fIrSt_VaLue(number) over w, +-- lAsT_vAlUe(number) over w +-- from numbers(10) +-- window w as (order by number range between 1 preceding and 1 following) +-- order by number +-- ; + +-- -- lagInFrame UBsan +-- SELECT lagInFrame(1, -1) OVER (); -- { serverError BAD_ARGUMENTS } +-- SELECT lagInFrame(1, 0) OVER (); +-- SELECT lagInFrame(1, /* INT64_MAX+1 */ 0x7fffffffffffffff+1) OVER (); -- { serverError BAD_ARGUMENTS } +-- SELECT lagInFrame(1, /* INT64_MAX */ 0x7fffffffffffffff) OVER (); +-- SELECT lagInFrame(1, 1) OVER (); + +-- -- leadInFrame UBsan +-- SELECT leadInFrame(1, -1) OVER (); -- { serverError BAD_ARGUMENTS } +-- SELECT leadInFrame(1, 0) OVER (); +-- SELECT leadInFrame(1, /* INT64_MAX+1 */ 0x7fffffffffffffff+1) OVER (); -- { serverError BAD_ARGUMENTS } +-- SELECT leadInFrame(1, /* INT64_MAX */ 0x7fffffffffffffff) OVER (); +-- SELECT leadInFrame(1, 1) OVER (); + +-- -- In this case, we had a problem with PartialSortingTransform returning zero-row +-- -- chunks for input chunks w/o columns. +-- select count() over () from numbers(4) where number < 2; + +-- -- floating point RANGE frame +-- select +-- count(*) over (order by toFloat32(number) range 5. preceding), +-- count(*) over (order by toFloat64(number) range 5. preceding), +-- count(*) over (order by toFloat32(number) range between current row and 5. following), +-- count(*) over (order by toFloat64(number) range between current row and 5. following) +-- from numbers(7) +-- ; + +-- -- negative offsets should not be allowed +-- select count() over (order by toInt64(number) range between -1 preceding and unbounded following) from numbers(1); -- { serverError 36 } +-- select count() over (order by toInt64(number) range between -1 following and unbounded following) from numbers(1); -- { serverError 36 } +-- select count() over (order by toInt64(number) range between unbounded preceding and -1 preceding) from numbers(1); -- { serverError 36 } +-- select count() over (order by toInt64(number) range between unbounded preceding and -1 following) from numbers(1); -- { serverError 36 } + +-- -- a test with aggregate function that allocates memory in arena +-- select sum(a[length(a)]) +-- from ( +-- select groupArray(number) over (partition by modulo(number, 11) +-- order by modulo(number, 1111), number) a +-- from numbers_mt(10000) +-- ) settings max_block_size = 7; + +-- -- -INT_MIN row offset that can lead to problems with negation, found when fuzzing +-- -- under UBSan. Should be limited to at most INT_MAX. +-- select count() over (rows between 2147483648 preceding and 2147493648 following) from numbers(2); -- { serverError 36 } + +-- -- Somehow in this case WindowTransform gets empty input chunks not marked as +-- -- input end, and then two (!) empty input chunks marked as input end. Whatever. +-- select count() over () from (select 1 a) l inner join (select 2 a) r using a; +-- -- This case works as expected, one empty input chunk marked as input end. +-- select count() over () where null; + +-- -- Inheriting another window. +-- select number, count() over (w1 rows unbounded preceding) from numbers(10) +-- window +-- w0 as (partition by intDiv(number, 5) as p), +-- w1 as (w0 order by mod(number, 3) as o) +-- order by p, o, number +-- ; + +-- -- can't redefine PARTITION BY +-- select count() over (w partition by number) from numbers(1) window w as (partition by intDiv(number, 5)); -- { serverError 36 } + +-- -- can't redefine existing ORDER BY +-- select count() over (w order by number) from numbers(1) window w as (partition by intDiv(number, 5) order by mod(number, 3)); -- { serverError 36 } + +-- -- parent window can't have frame +-- select count() over (w range unbounded preceding) from numbers(1) window w as (partition by intDiv(number, 5) order by mod(number, 3) rows unbounded preceding); -- { serverError 36 } + +-- -- looks weird but probably should work -- this is a window that inherits and changes nothing +-- select count() over (w) from numbers(1) window w as (); + +-- -- nonexistent parent window +-- select count() over (w2 rows unbounded preceding); -- { serverError 36 } diff --git a/tests/queries/0_stateless/02006_test_positional_arguments.reference b/tests/queries/0_stateless/02006_test_positional_arguments.reference index 5fc070ffd0b..c5c5f115b0a 100644 --- a/tests/queries/0_stateless/02006_test_positional_arguments.reference +++ b/tests/queries/0_stateless/02006_test_positional_arguments.reference @@ -15,29 +15,23 @@ select x3, x2, x1 from test order by x3 desc; 100 10 1 10 1 10 1 100 100 -insert into test values (1, 10, 200), (10, 1, 200), (100, 100, 1); +insert into test values (1, 10, 100), (10, 1, 10), (100, 100, 1); select x3, x2 from test group by x3, x2; -200 1 10 1 -200 10 1 100 100 10 select x3, x2 from test group by 1, 2; -200 1 10 1 -200 10 1 100 100 10 select x1, x2, x3 from test order by x3 limit 1 by x3; 100 100 1 10 1 10 1 10 100 -1 10 200 select x1, x2, x3 from test order by 3 limit 1 by 3; 100 100 1 10 1 10 1 10 100 -1 10 200 select x1, x2, x3 from test order by x3 limit 1 by x1; 100 100 1 10 1 10 diff --git a/tests/queries/0_stateless/02006_test_positional_arguments.sql b/tests/queries/0_stateless/02006_test_positional_arguments.sql index 3a2cf76f6c4..54b55c4a9f8 100644 --- a/tests/queries/0_stateless/02006_test_positional_arguments.sql +++ b/tests/queries/0_stateless/02006_test_positional_arguments.sql @@ -13,7 +13,7 @@ select x3, x2, x1 from test order by x3; select x3, x2, x1 from test order by 1 desc; select x3, x2, x1 from test order by x3 desc; -insert into test values (1, 10, 200), (10, 1, 200), (100, 100, 1); +insert into test values (1, 10, 100), (10, 1, 10), (100, 100, 1); select x3, x2 from test group by x3, x2; select x3, x2 from test group by 1, 2; diff --git a/tests/queries/0_stateless/02119_sumcount.reference b/tests/queries/0_stateless/02119_sumcount.reference index 4adda0e10be..437c52c899a 100644 --- a/tests/queries/0_stateless/02119_sumcount.reference +++ b/tests/queries/0_stateless/02119_sumcount.reference @@ -2,10 +2,10 @@ Tuple(UInt64, UInt64) (9007199254740994,3) Tuple(UInt64, UInt64) (9007199254740994,3) Tuple(UInt64, UInt64) (9007199254740994,3) Tuple(UInt64, UInt64) (9007199254740994,3) -Tuple(Float64, UInt64) (9007199254740992,3) -Tuple(Float64, UInt64) (9007199254740992,3) -Tuple(Float64, UInt64) (9007199254740992,3) -Tuple(Float64, UInt64) (9007199254740992,3) +Tuple(Float64, UInt64) (9007199254740994,3) +Tuple(Float64, UInt64) (9007199254740994,3) +Tuple(Float64, UInt64) (9007199254740994,3) +Tuple(Float64, UInt64) (9007199254740994,3) Tuple(Float64, UInt64) (16777218,3) Tuple(Float64, UInt64) (16777218,3) Tuple(Float64, UInt64) (16777218,3) diff --git a/tests/queries/0_stateless/02119_sumcount.sql b/tests/queries/0_stateless/02119_sumcount.sql index 9189efd08ac..22cb8b657da 100644 --- a/tests/queries/0_stateless/02119_sumcount.sql +++ b/tests/queries/0_stateless/02119_sumcount.sql @@ -1,20 +1,150 @@ -- Integer types are added as integers -SELECT toTypeName(sumCount(v)), sumCount(v) FROM (SELECT '9007199254740992'::UInt64 AS v UNION ALL SELECT '1'::UInt64 AS v UNION ALL SELECT '1'::UInt64 AS v); -SELECT toTypeName(sumCount(v)), sumCount(v) FROM (SELECT '9007199254740992'::Nullable(UInt64) AS v UNION ALL SELECT '1'::Nullable(UInt64) AS v UNION ALL SELECT '1'::Nullable(UInt64) AS v ); -SELECT toTypeName(sumCount(v)), sumCount(v) FROM (SELECT '9007199254740992'::LowCardinality(UInt64) AS v UNION ALL SELECT '1'::LowCardinality(UInt64) AS v UNION ALL SELECT '1'::LowCardinality(UInt64) AS v ); -SELECT toTypeName(sumCount(v)), sumCount(v) FROM (SELECT '9007199254740992'::LowCardinality(Nullable(UInt64)) AS v UNION ALL SELECT '1'::LowCardinality(Nullable(UInt64)) AS v UNION ALL SELECT '1'::LowCardinality(Nullable(UInt64)) AS v ); +SELECT toTypeName(sumCount(v)), sumCount(v) FROM +( + SELECT v FROM + ( + SELECT '9007199254740992'::UInt64 AS v + UNION ALL + SELECT '1'::UInt64 AS v + UNION ALL SELECT '1'::UInt64 AS v + ) + ORDER BY v +); +SELECT toTypeName(sumCount(v)), sumCount(v) FROM +( + SELECT v FROM + ( + SELECT '9007199254740992'::Nullable(UInt64) AS v + UNION ALL + SELECT '1'::Nullable(UInt64) AS v + UNION ALL + SELECT '1'::Nullable(UInt64) AS v + ) + ORDER BY v +); +SELECT toTypeName(sumCount(v)), sumCount(v) FROM +( + SELECT v FROM + ( + SELECT '9007199254740992'::LowCardinality(UInt64) AS v + UNION ALL + SELECT '1'::LowCardinality(UInt64) AS v + UNION ALL + SELECT '1'::LowCardinality(UInt64) AS v + ) + ORDER BY v +); +SELECT toTypeName(sumCount(v)), sumCount(v) FROM +( + SELECT v FROM + ( + SELECT '9007199254740992'::LowCardinality(Nullable(UInt64)) AS v + UNION ALL + SELECT '1'::LowCardinality(Nullable(UInt64)) AS v + UNION ALL + SELECT '1'::LowCardinality(Nullable(UInt64)) AS v + ) + ORDER BY v +); --- Float64 types are added as Float64 -SELECT toTypeName(sumCount(v)), sumCount(v) FROM (SELECT '9007199254740992'::Float64 AS v UNION ALL SELECT '1'::Float64 AS v UNION ALL SELECT '1'::Float64 AS v ); -SELECT toTypeName(sumCount(v)), sumCount(v) FROM (SELECT '9007199254740992'::Nullable(Float64) AS v UNION ALL SELECT '1'::Nullable(Float64) AS v UNION ALL SELECT '1'::Nullable(Float64) AS v ); -SELECT toTypeName(sumCount(v)), sumCount(v) FROM (SELECT '9007199254740992'::LowCardinality(Float64) AS v UNION ALL SELECT '1'::LowCardinality(Float64) AS v UNION ALL SELECT '1'::LowCardinality(Float64) AS v); -SELECT toTypeName(sumCount(v)), sumCount(v) FROM (SELECT '9007199254740992'::LowCardinality(Nullable(Float64)) AS v UNION ALL SELECT '1'::LowCardinality(Nullable(Float64)) AS v UNION ALL SELECT '1'::LowCardinality(Nullable(Float64)) AS v ); +-- -- Float64 types are added as Float64 +SELECT toTypeName(sumCount(v)), sumCount(v) FROM +( + SELECT v FROM + ( + SELECT '9007199254740992'::Float64 AS v + UNION ALL + SELECT '1'::Float64 AS v + UNION ALL SELECT '1'::Float64 AS v + ) + ORDER BY v +); +SELECT toTypeName(sumCount(v)), sumCount(v) FROM +( + SELECT v FROM + ( + SELECT '9007199254740992'::Nullable(Float64) AS v + UNION ALL + SELECT '1'::Nullable(Float64) AS v + UNION ALL + SELECT '1'::Nullable(Float64) AS v + ) + ORDER BY v +); +SELECT toTypeName(sumCount(v)), sumCount(v) FROM +( + SELECT v FROM + ( + SELECT '9007199254740992'::LowCardinality(Float64) AS v + UNION ALL + SELECT '1'::LowCardinality(Float64) AS v + UNION ALL + SELECT '1'::LowCardinality(Float64) AS v + ) + ORDER BY v +); +SELECT toTypeName(sumCount(v)), sumCount(v) FROM +( + SELECT v FROM + ( + SELECT '9007199254740992'::LowCardinality(Nullable(Float64)) AS v + UNION ALL + SELECT '1'::LowCardinality(Nullable(Float64)) AS v + UNION ALL + SELECT '1'::LowCardinality(Nullable(Float64)) AS v + ) + ORDER BY v +); --- Float32 are added as Float64 -SELECT toTypeName(sumCount(v)), sumCount(v) FROM (SELECT '16777216'::Float32 AS v UNION ALL SELECT '1'::Float32 AS v UNION ALL SELECT '1'::Float32 AS v ); -SELECT toTypeName(sumCount(v)), sumCount(v) FROM (SELECT '16777216'::Nullable(Float32) AS v UNION ALL SELECT '1'::Nullable(Float32) AS v UNION ALL SELECT '1'::Nullable(Float32) AS v ); -SELECT toTypeName(sumCount(v)), sumCount(v) FROM (SELECT '16777216'::LowCardinality(Float32) AS v UNION ALL SELECT '1'::LowCardinality(Float32) AS v UNION ALL SELECT '1'::LowCardinality(Float32) AS v ); -SELECT toTypeName(sumCount(v)), sumCount(v) FROM (SELECT '16777216'::LowCardinality(Nullable(Float32)) AS v UNION ALL SELECT '1'::LowCardinality(Nullable(Float32)) AS v UNION ALL SELECT '1'::LowCardinality(Nullable(Float32)) AS v ); +-- -- Float32 are added as Float64 +SELECT toTypeName(sumCount(v)), sumCount(v) FROM +( + SELECT v FROM + ( + SELECT '16777216'::Float32 AS v + UNION ALL + SELECT '1'::Float32 AS v + UNION ALL + SELECT '1'::Float32 AS v + ) + ORDER BY v +); +SELECT toTypeName(sumCount(v)), sumCount(v) FROM +( + SELECT v FROM + ( + SELECT '16777216'::Nullable(Float32) AS v + UNION ALL + SELECT '1'::Nullable(Float32) AS v + UNION ALL + SELECT '1'::Nullable(Float32) AS v + ) + ORDER BY v +); +SELECT toTypeName(sumCount(v)), sumCount(v) FROM +( + SELECT v FROM + ( + SELECT '16777216'::LowCardinality(Float32) AS v + UNION ALL + SELECT '1'::LowCardinality(Float32) AS v + UNION ALL + SELECT '1'::LowCardinality(Float32) AS v + ) + ORDER BY v +); +SELECT toTypeName(sumCount(v)), sumCount(v) FROM +( + SELECT v FROM + ( + SELECT '16777216'::LowCardinality(Nullable(Float32)) AS v + UNION ALL + SELECT '1'::LowCardinality(Nullable(Float32)) AS v + UNION ALL + SELECT '1'::LowCardinality(Nullable(Float32)) AS v + ) + ORDER BY v +); -- Small integer types use their sign/unsigned 64 byte supertype SELECT toTypeName(sumCount(number::Int8)), sumCount(number::Int8) FROM numbers(120); From 24c3fe2d649bf3ec62a0c52d0db0bcfa6bf175fc Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 2 Feb 2022 21:51:19 +0000 Subject: [PATCH 058/215] Fixed tests --- .../01271_show_privileges.reference | 137 ++++++++++++++++++ .../0_stateless/01780_column_sparse.reference | 4 +- .../0_stateless/01780_column_sparse.sql | 4 +- 3 files changed, 141 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/01271_show_privileges.reference b/tests/queries/0_stateless/01271_show_privileges.reference index e69de29bb2d..86ba859fb0e 100644 --- a/tests/queries/0_stateless/01271_show_privileges.reference +++ b/tests/queries/0_stateless/01271_show_privileges.reference @@ -0,0 +1,137 @@ +SHOW DATABASES [] DATABASE SHOW +SHOW TABLES [] TABLE SHOW +SHOW COLUMNS [] COLUMN SHOW +SHOW DICTIONARIES [] DICTIONARY SHOW +SHOW [] \N ALL +SELECT [] COLUMN ALL +INSERT [] COLUMN ALL +ALTER UPDATE ['UPDATE'] COLUMN ALTER TABLE +ALTER DELETE ['DELETE'] COLUMN ALTER TABLE +ALTER ADD COLUMN ['ADD COLUMN'] COLUMN ALTER COLUMN +ALTER MODIFY COLUMN ['MODIFY COLUMN'] COLUMN ALTER COLUMN +ALTER DROP COLUMN ['DROP COLUMN'] COLUMN ALTER COLUMN +ALTER COMMENT COLUMN ['COMMENT COLUMN'] COLUMN ALTER COLUMN +ALTER CLEAR COLUMN ['CLEAR COLUMN'] COLUMN ALTER COLUMN +ALTER RENAME COLUMN ['RENAME COLUMN'] COLUMN ALTER COLUMN +ALTER MATERIALIZE COLUMN ['MATERIALIZE COLUMN'] COLUMN ALTER COLUMN +ALTER COLUMN [] \N ALTER TABLE +ALTER MODIFY COMMENT ['MODIFY COMMENT'] TABLE ALTER TABLE +ALTER ORDER BY ['ALTER MODIFY ORDER BY','MODIFY ORDER BY'] TABLE ALTER INDEX +ALTER SAMPLE BY ['ALTER MODIFY SAMPLE BY','MODIFY SAMPLE BY'] TABLE ALTER INDEX +ALTER ADD INDEX ['ADD INDEX'] TABLE ALTER INDEX +ALTER DROP INDEX ['DROP INDEX'] TABLE ALTER INDEX +ALTER MATERIALIZE INDEX ['MATERIALIZE INDEX'] TABLE ALTER INDEX +ALTER CLEAR INDEX ['CLEAR INDEX'] TABLE ALTER INDEX +ALTER INDEX ['INDEX'] \N ALTER TABLE +ALTER ADD PROJECTION ['ADD PROJECTION'] TABLE ALTER PROJECTION +ALTER DROP PROJECTION ['DROP PROJECTION'] TABLE ALTER PROJECTION +ALTER MATERIALIZE PROJECTION ['MATERIALIZE PROJECTION'] TABLE ALTER PROJECTION +ALTER CLEAR PROJECTION ['CLEAR PROJECTION'] TABLE ALTER PROJECTION +ALTER PROJECTION ['PROJECTION'] \N ALTER TABLE +ALTER ADD CONSTRAINT ['ADD CONSTRAINT'] TABLE ALTER CONSTRAINT +ALTER DROP CONSTRAINT ['DROP CONSTRAINT'] TABLE ALTER CONSTRAINT +ALTER CONSTRAINT ['CONSTRAINT'] \N ALTER TABLE +ALTER TTL ['ALTER MODIFY TTL','MODIFY TTL'] TABLE ALTER TABLE +ALTER MATERIALIZE TTL ['MATERIALIZE TTL'] TABLE ALTER TABLE +ALTER SETTINGS ['ALTER SETTING','ALTER MODIFY SETTING','MODIFY SETTING','RESET SETTING'] TABLE ALTER TABLE +ALTER MOVE PARTITION ['ALTER MOVE PART','MOVE PARTITION','MOVE PART'] TABLE ALTER TABLE +ALTER FETCH PARTITION ['ALTER FETCH PART','FETCH PARTITION'] TABLE ALTER TABLE +ALTER FREEZE PARTITION ['FREEZE PARTITION','UNFREEZE'] TABLE ALTER TABLE +ALTER DATABASE SETTINGS ['ALTER DATABASE SETTING','ALTER MODIFY DATABASE SETTING','MODIFY DATABASE SETTING'] DATABASE ALTER DATABASE +ALTER TABLE [] \N ALTER +ALTER DATABASE [] \N ALTER +ALTER VIEW REFRESH ['ALTER LIVE VIEW REFRESH','REFRESH VIEW'] VIEW ALTER VIEW +ALTER VIEW MODIFY QUERY ['ALTER TABLE MODIFY QUERY'] VIEW ALTER VIEW +ALTER VIEW [] \N ALTER +ALTER [] \N ALL +CREATE DATABASE [] DATABASE CREATE +CREATE TABLE [] TABLE CREATE +CREATE VIEW [] VIEW CREATE +CREATE DICTIONARY [] DICTIONARY CREATE +CREATE TEMPORARY TABLE [] GLOBAL CREATE +CREATE FUNCTION [] DATABASE CREATE +CREATE [] \N ALL +DROP DATABASE [] DATABASE DROP +DROP TABLE [] TABLE DROP +DROP VIEW [] VIEW DROP +DROP DICTIONARY [] DICTIONARY DROP +DROP FUNCTION [] DATABASE DROP +DROP [] \N ALL +TRUNCATE ['TRUNCATE TABLE'] TABLE ALL +OPTIMIZE ['OPTIMIZE TABLE'] TABLE ALL +KILL QUERY [] GLOBAL ALL +MOVE PARTITION BETWEEN SHARDS [] GLOBAL ALL +CREATE USER [] GLOBAL ACCESS MANAGEMENT +ALTER USER [] GLOBAL ACCESS MANAGEMENT +DROP USER [] GLOBAL ACCESS MANAGEMENT +CREATE ROLE [] GLOBAL ACCESS MANAGEMENT +ALTER ROLE [] GLOBAL ACCESS MANAGEMENT +DROP ROLE [] GLOBAL ACCESS MANAGEMENT +ROLE ADMIN [] GLOBAL ACCESS MANAGEMENT +CREATE ROW POLICY ['CREATE POLICY'] GLOBAL ACCESS MANAGEMENT +ALTER ROW POLICY ['ALTER POLICY'] GLOBAL ACCESS MANAGEMENT +DROP ROW POLICY ['DROP POLICY'] GLOBAL ACCESS MANAGEMENT +CREATE QUOTA [] GLOBAL ACCESS MANAGEMENT +ALTER QUOTA [] GLOBAL ACCESS MANAGEMENT +DROP QUOTA [] GLOBAL ACCESS MANAGEMENT +CREATE SETTINGS PROFILE ['CREATE PROFILE'] GLOBAL ACCESS MANAGEMENT +ALTER SETTINGS PROFILE ['ALTER PROFILE'] GLOBAL ACCESS MANAGEMENT +DROP SETTINGS PROFILE ['DROP PROFILE'] GLOBAL ACCESS MANAGEMENT +SHOW USERS ['SHOW CREATE USER'] GLOBAL SHOW ACCESS +SHOW ROLES ['SHOW CREATE ROLE'] GLOBAL SHOW ACCESS +SHOW ROW POLICIES ['SHOW POLICIES','SHOW CREATE ROW POLICY','SHOW CREATE POLICY'] GLOBAL SHOW ACCESS +SHOW QUOTAS ['SHOW CREATE QUOTA'] GLOBAL SHOW ACCESS +SHOW SETTINGS PROFILES ['SHOW PROFILES','SHOW CREATE SETTINGS PROFILE','SHOW CREATE PROFILE'] GLOBAL SHOW ACCESS +SHOW ACCESS [] \N ACCESS MANAGEMENT +ACCESS MANAGEMENT [] \N ALL +SYSTEM SHUTDOWN ['SYSTEM KILL','SHUTDOWN'] GLOBAL SYSTEM +SYSTEM DROP DNS CACHE ['SYSTEM DROP DNS','DROP DNS CACHE','DROP DNS'] GLOBAL SYSTEM DROP CACHE +SYSTEM DROP MARK CACHE ['SYSTEM DROP MARK','DROP MARK CACHE','DROP MARKS'] GLOBAL SYSTEM DROP CACHE +SYSTEM DROP UNCOMPRESSED CACHE ['SYSTEM DROP UNCOMPRESSED','DROP UNCOMPRESSED CACHE','DROP UNCOMPRESSED'] GLOBAL SYSTEM DROP CACHE +SYSTEM DROP MMAP CACHE ['SYSTEM DROP MMAP','DROP MMAP CACHE','DROP MMAP'] GLOBAL SYSTEM DROP CACHE +SYSTEM DROP COMPILED EXPRESSION CACHE ['SYSTEM DROP COMPILED EXPRESSION','DROP COMPILED EXPRESSION CACHE','DROP COMPILED EXPRESSIONS'] GLOBAL SYSTEM DROP CACHE +SYSTEM DROP CACHE ['DROP CACHE'] \N SYSTEM +SYSTEM RELOAD CONFIG ['RELOAD CONFIG'] GLOBAL SYSTEM RELOAD +SYSTEM RELOAD SYMBOLS ['RELOAD SYMBOLS'] GLOBAL SYSTEM RELOAD +SYSTEM RELOAD DICTIONARY ['SYSTEM RELOAD DICTIONARIES','RELOAD DICTIONARY','RELOAD DICTIONARIES'] GLOBAL SYSTEM RELOAD +SYSTEM RELOAD MODEL ['SYSTEM RELOAD MODELS','RELOAD MODEL','RELOAD MODELS'] GLOBAL SYSTEM RELOAD +SYSTEM RELOAD FUNCTION ['SYSTEM RELOAD FUNCTIONS','RELOAD FUNCTION','RELOAD FUNCTIONS'] GLOBAL SYSTEM RELOAD +SYSTEM RELOAD EMBEDDED DICTIONARIES ['RELOAD EMBEDDED DICTIONARIES'] GLOBAL SYSTEM RELOAD +SYSTEM RELOAD [] \N SYSTEM +SYSTEM RESTART DISK ['SYSTEM RESTART DISK'] GLOBAL SYSTEM +SYSTEM MERGES ['SYSTEM STOP MERGES','SYSTEM START MERGES','STOP MERGES','START MERGES'] TABLE SYSTEM +SYSTEM TTL MERGES ['SYSTEM STOP TTL MERGES','SYSTEM START TTL MERGES','STOP TTL MERGES','START TTL MERGES'] TABLE SYSTEM +SYSTEM FETCHES ['SYSTEM STOP FETCHES','SYSTEM START FETCHES','STOP FETCHES','START FETCHES'] TABLE SYSTEM +SYSTEM MOVES ['SYSTEM STOP MOVES','SYSTEM START MOVES','STOP MOVES','START MOVES'] TABLE SYSTEM +SYSTEM DISTRIBUTED SENDS ['SYSTEM STOP DISTRIBUTED SENDS','SYSTEM START DISTRIBUTED SENDS','STOP DISTRIBUTED SENDS','START DISTRIBUTED SENDS'] TABLE SYSTEM SENDS +SYSTEM REPLICATED SENDS ['SYSTEM STOP REPLICATED SENDS','SYSTEM START REPLICATED SENDS','STOP REPLICATED SENDS','START REPLICATED SENDS'] TABLE SYSTEM SENDS +SYSTEM SENDS ['SYSTEM STOP SENDS','SYSTEM START SENDS','STOP SENDS','START SENDS'] \N SYSTEM +SYSTEM REPLICATION QUEUES ['SYSTEM STOP REPLICATION QUEUES','SYSTEM START REPLICATION QUEUES','STOP REPLICATION QUEUES','START REPLICATION QUEUES'] TABLE SYSTEM +SYSTEM DROP REPLICA ['DROP REPLICA'] TABLE SYSTEM +SYSTEM SYNC REPLICA ['SYNC REPLICA'] TABLE SYSTEM +SYSTEM RESTART REPLICA ['RESTART REPLICA'] TABLE SYSTEM +SYSTEM RESTORE REPLICA ['RESTORE REPLICA'] TABLE SYSTEM +SYSTEM FLUSH DISTRIBUTED ['FLUSH DISTRIBUTED'] TABLE SYSTEM FLUSH +SYSTEM FLUSH LOGS ['FLUSH LOGS'] GLOBAL SYSTEM FLUSH +SYSTEM FLUSH [] \N SYSTEM +SYSTEM THREAD FUZZER ['SYSTEM START THREAD FUZZER','SYSTEM STOP THREAD FUZZER','START THREAD FUZZER','STOP THREAD FUZZER'] GLOBAL SYSTEM +SYSTEM [] \N ALL +dictGet ['dictHas','dictGetHierarchy','dictIsIn'] DICTIONARY ALL +addressToLine [] GLOBAL INTROSPECTION +addressToSymbol [] GLOBAL INTROSPECTION +demangle [] GLOBAL INTROSPECTION +INTROSPECTION ['INTROSPECTION FUNCTIONS'] \N ALL +FILE [] GLOBAL SOURCES +URL [] GLOBAL SOURCES +REMOTE [] GLOBAL SOURCES +MONGO [] GLOBAL SOURCES +MYSQL [] GLOBAL SOURCES +POSTGRES [] GLOBAL SOURCES +SQLITE [] GLOBAL SOURCES +ODBC [] GLOBAL SOURCES +JDBC [] GLOBAL SOURCES +HDFS [] GLOBAL SOURCES +S3 [] GLOBAL SOURCES +SOURCES [] \N ALL +ALL ['ALL PRIVILEGES'] \N \N +NONE ['USAGE','NO PRIVILEGES'] \N \N diff --git a/tests/queries/0_stateless/01780_column_sparse.reference b/tests/queries/0_stateless/01780_column_sparse.reference index 08aef433172..1cbcf715d7f 100644 --- a/tests/queries/0_stateless/01780_column_sparse.reference +++ b/tests/queries/0_stateless/01780_column_sparse.reference @@ -142,7 +142,7 @@ CREATE TABLE t_sparse_1 (id UInt64, v Int64) ENGINE = MergeTree ORDER BY tuple() SETTINGS ratio_of_defaults_for_sparse_serialization = 0; INSERT INTO t_sparse_1 VALUES (1, 6), (2, 1), (3, 0), (4, -1), (5, 0), (6, 0), (7, -2), (8, 0), (9, 0), (10, 4), (11, 0); -SELECT * FROM t_sparse_1 ORDER BY v; +SELECT * FROM t_sparse_1 ORDER BY v, id; 7 -2 4 -1 3 0 @@ -154,7 +154,7 @@ SELECT * FROM t_sparse_1 ORDER BY v; 2 1 10 4 1 6 -SELECT * FROM t_sparse_1 ORDER BY v DESC; +SELECT * FROM t_sparse_1 ORDER BY v DESC, id; 1 6 10 4 2 1 diff --git a/tests/queries/0_stateless/01780_column_sparse.sql b/tests/queries/0_stateless/01780_column_sparse.sql index 480321c6d14..25cb2845322 100644 --- a/tests/queries/0_stateless/01780_column_sparse.sql +++ b/tests/queries/0_stateless/01780_column_sparse.sql @@ -35,8 +35,8 @@ SETTINGS ratio_of_defaults_for_sparse_serialization = 0; INSERT INTO t_sparse_1 VALUES (1, 6), (2, 1), (3, 0), (4, -1), (5, 0), (6, 0), (7, -2), (8, 0), (9, 0), (10, 4), (11, 0); -SELECT * FROM t_sparse_1 ORDER BY v; -SELECT * FROM t_sparse_1 ORDER BY v DESC; +SELECT * FROM t_sparse_1 ORDER BY v, id; +SELECT * FROM t_sparse_1 ORDER BY v DESC, id; SELECT * FROM t_sparse_1 ORDER BY v, id LIMIT 5; SELECT * FROM t_sparse_1 ORDER BY v DESC, id LIMIT 5; From 7a6f28276550e070b05330cc89bf6f80ecd2aaba Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Thu, 3 Feb 2022 17:40:15 +0000 Subject: [PATCH 059/215] Fixed tests --- tests/integration/test_role/test.py | 6 +++--- tests/queries/0_stateless/01533_collate_in_nullable.sql | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/integration/test_role/test.py b/tests/integration/test_role/test.py index 1e253a93737..7600bc73b16 100644 --- a/tests/integration/test_role/test.py +++ b/tests/integration/test_role/test.py @@ -225,11 +225,11 @@ def test_introspection(): ["R2", "local directory"]]) assert instance.query( - "SELECT * from system.grants WHERE user_name IN ('A', 'B') OR role_name IN ('R1', 'R2') ORDER BY user_name, role_name, access_type, grant_option") == \ + "SELECT * from system.grants WHERE user_name IN ('A', 'B') OR role_name IN ('R1', 'R2') ORDER BY user_name, role_name, access_type, database, table, column, is_partial_revoke, grant_option") == \ TSV([["A", "\\N", "SELECT", "test", "table", "\\N", 0, 0], ["B", "\\N", "CREATE", "\\N", "\\N", "\\N", 0, 1], - ["\\N", "R2", "SELECT", "test", "table", "\\N", 0, 0], - ["\\N", "R2", "SELECT", "test", "table", "x", 1, 0]]) + ["\\N", "R2", "SELECT", "test", "table", "x", 1, 0], + ["\\N", "R2", "SELECT", "test", "table", "\\N", 0, 0]]) assert instance.query( "SELECT * from system.role_grants WHERE user_name IN ('A', 'B') OR role_name IN ('R1', 'R2') ORDER BY user_name, role_name, granted_role_name") == \ diff --git a/tests/queries/0_stateless/01533_collate_in_nullable.sql b/tests/queries/0_stateless/01533_collate_in_nullable.sql index 9e54581bc54..9664a8efdb3 100644 --- a/tests/queries/0_stateless/01533_collate_in_nullable.sql +++ b/tests/queries/0_stateless/01533_collate_in_nullable.sql @@ -7,9 +7,9 @@ CREATE TABLE test_collate (x UInt32, s Nullable(String)) ENGINE=Memory(); INSERT INTO test_collate VALUES (1, 'Ё'), (1, 'ё'), (1, 'а'), (1, null), (2, 'А'), (2, 'я'), (2, 'Я'), (2, null); SELECT 'Order by without collate'; -SELECT * FROM test_collate ORDER BY s; +SELECT * FROM test_collate ORDER BY s, x; SELECT 'Order by with collate'; -SELECT * FROM test_collate ORDER BY s COLLATE 'ru'; +SELECT * FROM test_collate ORDER BY s COLLATE 'ru', x; SELECT 'Order by tuple without collate'; SELECT * FROM test_collate ORDER BY x, s; From 6a3aa1b2b9bd0502712bf0861a26647d0da4f561 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 4 Feb 2022 16:52:41 +0000 Subject: [PATCH 060/215] Fixed tests --- ...der_by_replicated_zookeeper_long.reference | 4 +- ...ify_order_by_replicated_zookeeper_long.sql | 10 +- .../01031_pmj_new_any_semi_join.reference | 41 + .../01031_pmj_new_any_semi_join.sql | 46 +- .../01591_window_functions.reference | 1179 +++++++++++++++++ .../0_stateless/01591_window_functions.sql | 946 ++++++------- 6 files changed, 1722 insertions(+), 504 deletions(-) diff --git a/tests/queries/0_stateless/00754_alter_modify_order_by_replicated_zookeeper_long.reference b/tests/queries/0_stateless/00754_alter_modify_order_by_replicated_zookeeper_long.reference index a1fecd72e30..6d8a3f181e4 100644 --- a/tests/queries/0_stateless/00754_alter_modify_order_by_replicated_zookeeper_long.reference +++ b/tests/queries/0_stateless/00754_alter_modify_order_by_replicated_zookeeper_long.reference @@ -1,9 +1,7 @@ *** Check that the parts are sorted according to the new key. *** -1 2 0 10 -1 2 0 20 1 2 2 40 -1 2 2 50 1 2 1 30 +1 2 0 10 *** Check that the rows are collapsed according to the new key. *** 1 2 0 30 1 2 1 30 diff --git a/tests/queries/0_stateless/00754_alter_modify_order_by_replicated_zookeeper_long.sql b/tests/queries/0_stateless/00754_alter_modify_order_by_replicated_zookeeper_long.sql index 8c19e30249f..c859c7b9921 100644 --- a/tests/queries/0_stateless/00754_alter_modify_order_by_replicated_zookeeper_long.sql +++ b/tests/queries/0_stateless/00754_alter_modify_order_by_replicated_zookeeper_long.sql @@ -27,16 +27,16 @@ ALTER TABLE summing_r1 MODIFY ORDER BY (x, y, nonexistent); -- { serverError 47 /* Can't modyfy ORDER BY so that it is no longer a prefix of the PRIMARY KEY. */ ALTER TABLE summing_r1 MODIFY ORDER BY x; -- { serverError 36 } -INSERT INTO summing_r1(x, y, val) VALUES (1, 2, 10), (1, 2, 20); -SYSTEM SYNC REPLICA summing_r2; - ALTER TABLE summing_r1 ADD COLUMN z UInt32 AFTER y, MODIFY ORDER BY (x, y, -z); -INSERT INTO summing_r1(x, y, z, val) values (1, 2, 1, 30), (1, 2, 2, 40), (1, 2, 2, 50); +INSERT INTO summing_r1(x, y, z, val) values (1, 2, 0, 10), (1, 2, 1, 30), (1, 2, 2, 40); SYSTEM SYNC REPLICA summing_r2; SELECT '*** Check that the parts are sorted according to the new key. ***'; -SELECT * FROM summing_r2 ORDER BY _part; +SELECT * FROM summing_r2; + +INSERT INTO summing_r1(x, y, z, val) values (1, 2, 0, 20), (1, 2, 2, 50); +SYSTEM SYNC REPLICA summing_r2; SELECT '*** Check that the rows are collapsed according to the new key. ***'; SELECT * FROM summing_r2 FINAL ORDER BY x, y, z; diff --git a/tests/queries/0_stateless/01031_pmj_new_any_semi_join.reference b/tests/queries/0_stateless/01031_pmj_new_any_semi_join.reference index e69de29bb2d..bec0f22c466 100644 --- a/tests/queries/0_stateless/01031_pmj_new_any_semi_join.reference +++ b/tests/queries/0_stateless/01031_pmj_new_any_semi_join.reference @@ -0,0 +1,41 @@ +any left +0 a1 0 +1 a2 0 +2 a3 2 b1 +3 a4 0 +4 a5 4 b3 +any left (rev) +0 5 b4 +2 a3 2 b1 +4 a5 4 b3 +4 a5 4 b2 +any inner +2 a3 2 b1 +4 a5 4 b2 +any inner (rev) +2 a3 2 b1 +4 a5 4 b2 +any right +0 5 b4 +2 a3 2 b1 +4 a5 4 b2 +4 a5 4 b3 +any right (rev) +0 a1 0 +1 a2 0 +2 a3 2 b1 +3 a4 0 +4 a5 4 b2 +semi left +2 a3 2 b1 +4 a5 4 b3 +semi right +2 a3 2 b1 +4 a5 4 b2 +4 a5 4 b3 +anti left +0 a1 0 +1 a2 1 +3 a4 3 +anti right +0 5 b4 diff --git a/tests/queries/0_stateless/01031_pmj_new_any_semi_join.sql b/tests/queries/0_stateless/01031_pmj_new_any_semi_join.sql index 7755091ea6c..87cf3844f97 100644 --- a/tests/queries/0_stateless/01031_pmj_new_any_semi_join.sql +++ b/tests/queries/0_stateless/01031_pmj_new_any_semi_join.sql @@ -1,45 +1,45 @@ DROP TABLE IF EXISTS t1; DROP TABLE IF EXISTS t2; -CREATE TABLE t1 (x UInt32, s String) engine = MergeTree ORDER BY x; -CREATE TABLE t2 (x UInt32, s String) engine = MergeTree ORDER BY x; +CREATE TABLE t1 (x UInt32, s String) engine = Memory; +CREATE TABLE t2 (x UInt32, s String) engine = Memory; INSERT INTO t1 (x, s) VALUES (0, 'a1'), (1, 'a2'), (2, 'a3'), (3, 'a4'), (4, 'a5'); -INSERT INTO t2 (x, s) VALUES (2, 'b1'), (2, 'b2'), (4, 'b3'), (4, 'b4'), (4, 'b5'), (5, 'b6'); +INSERT INTO t2 (x, s) VALUES (2, 'b1'), (4, 'b2'), (4, 'b3'), (5, 'b4'); SET join_algorithm = 'prefer_partial_merge'; SET join_use_nulls = 0; SET any_join_distinct_right_table_keys = 0; --- SELECT 'any left'; --- SELECT t1.*, t2.* FROM t1 ANY LEFT JOIN t2 USING(x) ORDER BY t1.x, t2.x; +SELECT 'any left'; +SELECT t1.*, t2.* FROM t1 ANY LEFT JOIN t2 USING(x) ORDER BY t1.x, t2.x; --- SELECT 'any left (rev)'; --- SELECT t1.*, t2.* FROM t2 ANY LEFT JOIN t1 USING(x) ORDER BY t1.x, t2.x; +SELECT 'any left (rev)'; +SELECT t1.*, t2.* FROM t2 ANY LEFT JOIN t1 USING(x) ORDER BY t1.x, t2.x; --- SELECT 'any inner'; --- SELECT t1.*, t2.* FROM t1 ANY INNER JOIN t2 USING(x) ORDER BY t1.x, t2.x; +SELECT 'any inner'; +SELECT t1.*, t2.* FROM t1 ANY INNER JOIN t2 USING(x) ORDER BY t1.x, t2.x; --- SELECT 'any inner (rev)'; --- SELECT t1.*, t2.* FROM t2 ANY INNER JOIN t1 USING(x) ORDER BY t1.x, t2.x; +SELECT 'any inner (rev)'; +SELECT t1.*, t2.* FROM t2 ANY INNER JOIN t1 USING(x) ORDER BY t1.x, t2.x; --- SELECT 'any right'; --- SELECT t1.*, t2.* FROM t1 ANY RIGHT JOIN t2 USING(x) ORDER BY t1.x, t2.x; +SELECT 'any right'; +SELECT t1.*, t2.* FROM t1 ANY RIGHT JOIN t2 USING(x) ORDER BY t1.x, t2.x; --- SELECT 'any right (rev)'; --- SELECT t1.*, t2.* FROM t2 ANY RIGHT JOIN t1 USING(x) ORDER BY t1.x, t2.x; +SELECT 'any right (rev)'; +SELECT t1.*, t2.* FROM t2 ANY RIGHT JOIN t1 USING(x) ORDER BY t1.x, t2.x; --- SELECT 'semi left'; --- SELECT t1.*, t2.* FROM t1 SEMI LEFT JOIN t2 USING(x) ORDER BY t1.x, t2.x; +SELECT 'semi left'; +SELECT t1.*, t2.* FROM t1 SEMI LEFT JOIN t2 USING(x) ORDER BY t1.x, t2.x; --- SELECT 'semi right'; --- SELECT t1.*, t2.* FROM t1 SEMI RIGHT JOIN t2 USING(x) ORDER BY t1.x, t2.x; +SELECT 'semi right'; +SELECT t1.*, t2.* FROM t1 SEMI RIGHT JOIN t2 USING(x) ORDER BY t1.x, t2.x; --- SELECT 'anti left'; --- SELECT t1.*, t2.* FROM t1 ANTI LEFT JOIN t2 USING(x) ORDER BY t1.x, t2.x; +SELECT 'anti left'; +SELECT t1.*, t2.* FROM t1 ANTI LEFT JOIN t2 USING(x) ORDER BY t1.x, t2.x; --- SELECT 'anti right'; --- SELECT t1.*, t2.* FROM t1 ANTI RIGHT JOIN t2 USING(x) ORDER BY t1.x, t2.x; +SELECT 'anti right'; +SELECT t1.*, t2.* FROM t1 ANTI RIGHT JOIN t2 USING(x) ORDER BY t1.x, t2.x; DROP TABLE t1; DROP TABLE t2; diff --git a/tests/queries/0_stateless/01591_window_functions.reference b/tests/queries/0_stateless/01591_window_functions.reference index e69de29bb2d..2f6077fceb3 100644 --- a/tests/queries/0_stateless/01591_window_functions.reference +++ b/tests/queries/0_stateless/01591_window_functions.reference @@ -0,0 +1,1179 @@ +-- { echo } + +-- just something basic +select number, count() over (partition by intDiv(number, 3) order by number rows unbounded preceding) from numbers(10); +0 1 +1 2 +2 3 +3 1 +4 2 +5 3 +6 1 +7 2 +8 3 +9 1 +-- proper calculation across blocks +select number, max(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding) from numbers(10) settings max_block_size = 2; +2 2 +1 2 +0 2 +5 5 +4 5 +3 5 +8 8 +7 8 +6 8 +9 9 +-- not a window function +select number, abs(number) over (partition by toString(intDiv(number, 3)) rows unbounded preceding) from numbers(10); -- { serverError 63 } +-- no partition by +select number, avg(number) over (order by number rows unbounded preceding) from numbers(10); +0 0 +1 0.5 +2 1 +3 1.5 +4 2 +5 2.5 +6 3 +7 3.5 +8 4 +9 4.5 +-- no order by +select number, quantileExact(number) over (partition by intDiv(number, 3) AS value order by value rows unbounded preceding) from numbers(10); +0 0 +1 1 +2 1 +3 3 +4 4 +5 4 +6 6 +7 7 +8 7 +9 9 +-- can add an alias after window spec +select number, quantileExact(number) over (partition by intDiv(number, 3) AS value order by value rows unbounded preceding) q from numbers(10); +0 0 +1 1 +2 1 +3 3 +4 4 +5 4 +6 6 +7 7 +8 7 +9 9 +-- can't reference it yet -- the window functions are calculated at the +-- last stage of select, after all other functions. +select q * 10, quantileExact(number) over (partition by intDiv(number, 3) rows unbounded preceding) q from numbers(10); -- { serverError 47 } +-- must work in WHERE if you wrap it in a subquery +select * from (select count(*) over (rows unbounded preceding) c from numbers(3)) where c > 0; +1 +2 +3 +-- should work in ORDER BY +select number, max(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding) m from numbers(10) order by m desc, number; +9 9 +6 8 +7 8 +8 8 +3 5 +4 5 +5 5 +0 2 +1 2 +2 2 +-- also works in ORDER BY if you wrap it in a subquery +select * from (select count(*) over (rows unbounded preceding) c from numbers(3)) order by c; +1 +2 +3 +-- Example with window function only in ORDER BY. Here we make a rank of all +-- numbers sorted descending, and then sort by this rank descending, and must get +-- the ascending order. +select * from (select * from numbers(5) order by rand()) order by count() over (order by number desc rows unbounded preceding) desc; +0 +1 +2 +3 +4 +-- Aggregate functions as window function arguments. This query is semantically +-- the same as the above one, only we replace `number` with +-- `any(number) group by number` and so on. +select * from (select * from numbers(5) order by rand()) group by number order by sum(any(number + 1)) over (order by min(number) desc rows unbounded preceding) desc; +0 +1 +2 +3 +4 +-- some more simple cases w/aggregate functions +select sum(any(number)) over (rows unbounded preceding) from numbers(1); +0 +select sum(any(number) + 1) over (rows unbounded preceding) from numbers(1); +1 +select sum(any(number + 1)) over (rows unbounded preceding) from numbers(1); +1 +-- different windows +-- an explain test would also be helpful, but it's too immature now and I don't +-- want to change reference all the time +select number, max(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding), count(number) over (partition by intDiv(number, 5) order by number rows unbounded preceding) as m from numbers(31) order by number settings max_block_size = 2; +0 2 1 +1 2 2 +2 2 3 +3 5 4 +4 5 5 +5 5 1 +6 8 2 +7 8 3 +8 8 4 +9 11 5 +10 11 1 +11 11 2 +12 14 3 +13 14 4 +14 14 5 +15 17 1 +16 17 2 +17 17 3 +18 20 4 +19 20 5 +20 20 1 +21 23 2 +22 23 3 +23 23 4 +24 26 5 +25 26 1 +26 26 2 +27 29 3 +28 29 4 +29 29 5 +30 30 1 +-- two functions over the same window +-- an explain test would also be helpful, but it's too immature now and I don't +-- want to change reference all the time +select number, max(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding), count(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding) as m from numbers(7) order by number settings max_block_size = 2; +0 2 3 +1 2 2 +2 2 1 +3 5 3 +4 5 2 +5 5 1 +6 6 1 +-- check that we can work with constant columns +select median(x) over (partition by x) from (select 1 x); +1 +-- an empty window definition is valid as well +select groupArray(number) over (rows unbounded preceding) from numbers(3); +[0] +[0,1] +[0,1,2] +select groupArray(number) over () from numbers(3); +[0,1,2] +[0,1,2] +[0,1,2] +-- This one tests we properly process the window function arguments. +-- Seen errors like 'column `1` not found' from count(1). +select count(1) over (rows unbounded preceding), max(number + 1) over () from numbers(3); +1 3 +2 3 +3 3 +-- Should work in DISTINCT +select distinct sum(0) over (rows unbounded preceding) from numbers(2); +0 +select distinct any(number) over (rows unbounded preceding) from numbers(2); +0 +-- Various kinds of aliases are properly substituted into various parts of window +-- function definition. +with number + 1 as x select intDiv(number, 3) as y, sum(x + y) over (partition by y order by x rows unbounded preceding) from numbers(7); +0 1 +0 3 +0 6 +1 5 +1 11 +1 18 +2 9 +-- WINDOW clause +select 1 window w1 as (); +1 +select sum(number) over w1, sum(number) over w2 +from numbers(10) +window + w1 as (rows unbounded preceding), + w2 as (partition by intDiv(number, 3) as value order by value rows unbounded preceding) +; +0 0 +1 1 +3 3 +6 3 +10 7 +15 12 +21 6 +28 13 +36 21 +45 9 +-- FIXME both functions should use the same window, but they don't. Add an +-- EXPLAIN test for this. +select + sum(number) over w1, + sum(number) over (partition by intDiv(number, 3) as value order by value rows unbounded preceding) +from numbers(10) +window + w1 as (partition by intDiv(number, 3) rows unbounded preceding) +; +0 0 +1 1 +3 3 +3 3 +7 7 +12 12 +6 6 +13 13 +21 21 +9 9 +-- RANGE frame +-- It's the default +select sum(number) over () from numbers(3); +3 +3 +3 +-- Try some mutually prime sizes of partition, group and block, for the number +-- of rows that is their least common multiple + 1, so that we see all the +-- interesting corner cases. +select number, intDiv(number, 3) p, mod(number, 2) o, count(number) over w as c +from numbers(31) +window w as (partition by p order by o range unbounded preceding) +order by number +settings max_block_size = 5 +; +0 0 0 2 +1 0 1 3 +2 0 0 2 +3 1 1 3 +4 1 0 1 +5 1 1 3 +6 2 0 2 +7 2 1 3 +8 2 0 2 +9 3 1 3 +10 3 0 1 +11 3 1 3 +12 4 0 2 +13 4 1 3 +14 4 0 2 +15 5 1 3 +16 5 0 1 +17 5 1 3 +18 6 0 2 +19 6 1 3 +20 6 0 2 +21 7 1 3 +22 7 0 1 +23 7 1 3 +24 8 0 2 +25 8 1 3 +26 8 0 2 +27 9 1 3 +28 9 0 1 +29 9 1 3 +30 10 0 1 +select number, intDiv(number, 5) p, mod(number, 3) o, count(number) over w as c +from numbers(31) +window w as (partition by p order by o range unbounded preceding) +order by number +settings max_block_size = 2 +; +0 0 0 2 +1 0 1 4 +2 0 2 5 +3 0 0 2 +4 0 1 4 +5 1 2 5 +6 1 0 2 +7 1 1 3 +8 1 2 5 +9 1 0 2 +10 2 1 3 +11 2 2 5 +12 2 0 1 +13 2 1 3 +14 2 2 5 +15 3 0 2 +16 3 1 4 +17 3 2 5 +18 3 0 2 +19 3 1 4 +20 4 2 5 +21 4 0 2 +22 4 1 3 +23 4 2 5 +24 4 0 2 +25 5 1 3 +26 5 2 5 +27 5 0 1 +28 5 1 3 +29 5 2 5 +30 6 0 1 +select number, intDiv(number, 5) p, mod(number, 2) o, count(number) over w as c +from numbers(31) +window w as (partition by p order by o range unbounded preceding) +order by number +settings max_block_size = 3 +; +0 0 0 3 +1 0 1 5 +2 0 0 3 +3 0 1 5 +4 0 0 3 +5 1 1 5 +6 1 0 2 +7 1 1 5 +8 1 0 2 +9 1 1 5 +10 2 0 3 +11 2 1 5 +12 2 0 3 +13 2 1 5 +14 2 0 3 +15 3 1 5 +16 3 0 2 +17 3 1 5 +18 3 0 2 +19 3 1 5 +20 4 0 3 +21 4 1 5 +22 4 0 3 +23 4 1 5 +24 4 0 3 +25 5 1 5 +26 5 0 2 +27 5 1 5 +28 5 0 2 +29 5 1 5 +30 6 0 1 +select number, intDiv(number, 3) p, mod(number, 5) o, count(number) over w as c +from numbers(31) +window w as (partition by p order by o range unbounded preceding) +order by number +settings max_block_size = 2 +; +0 0 0 1 +1 0 1 2 +2 0 2 3 +3 1 3 2 +4 1 4 3 +5 1 0 1 +6 2 1 1 +7 2 2 2 +8 2 3 3 +9 3 4 3 +10 3 0 1 +11 3 1 2 +12 4 2 1 +13 4 3 2 +14 4 4 3 +15 5 0 1 +16 5 1 2 +17 5 2 3 +18 6 3 2 +19 6 4 3 +20 6 0 1 +21 7 1 1 +22 7 2 2 +23 7 3 3 +24 8 4 3 +25 8 0 1 +26 8 1 2 +27 9 2 1 +28 9 3 2 +29 9 4 3 +30 10 0 1 +select number, intDiv(number, 2) p, mod(number, 5) o, count(number) over w as c +from numbers(31) +window w as (partition by p order by o range unbounded preceding) +order by number +settings max_block_size = 3 +; +0 0 0 1 +1 0 1 2 +2 1 2 1 +3 1 3 2 +4 2 4 2 +5 2 0 1 +6 3 1 1 +7 3 2 2 +8 4 3 1 +9 4 4 2 +10 5 0 1 +11 5 1 2 +12 6 2 1 +13 6 3 2 +14 7 4 2 +15 7 0 1 +16 8 1 1 +17 8 2 2 +18 9 3 1 +19 9 4 2 +20 10 0 1 +21 10 1 2 +22 11 2 1 +23 11 3 2 +24 12 4 2 +25 12 0 1 +26 13 1 1 +27 13 2 2 +28 14 3 1 +29 14 4 2 +30 15 0 1 +select number, intDiv(number, 2) p, mod(number, 3) o, count(number) over w as c +from numbers(31) +window w as (partition by p order by o range unbounded preceding) +order by number +settings max_block_size = 5 +; +0 0 0 1 +1 0 1 2 +2 1 2 2 +3 1 0 1 +4 2 1 1 +5 2 2 2 +6 3 0 1 +7 3 1 2 +8 4 2 2 +9 4 0 1 +10 5 1 1 +11 5 2 2 +12 6 0 1 +13 6 1 2 +14 7 2 2 +15 7 0 1 +16 8 1 1 +17 8 2 2 +18 9 0 1 +19 9 1 2 +20 10 2 2 +21 10 0 1 +22 11 1 1 +23 11 2 2 +24 12 0 1 +25 12 1 2 +26 13 2 2 +27 13 0 1 +28 14 1 1 +29 14 2 2 +30 15 0 1 +-- A case where the partition end is in the current block, and the frame end +-- is triggered by the partition end. +select min(number) over (partition by p) from (select number, intDiv(number, 3) p from numbers(10)); +0 +0 +0 +3 +3 +3 +6 +6 +6 +9 +-- UNBOUNDED FOLLOWING frame end +select + min(number) over wa, min(number) over wo, + max(number) over wa, max(number) over wo +from + (select number, intDiv(number, 3) p, mod(number, 5) o + from numbers(31)) +window + wa as (partition by p order by o + range between unbounded preceding and unbounded following), + wo as (partition by p order by o + rows between unbounded preceding and unbounded following) +settings max_block_size = 2; +0 0 2 2 +0 0 2 2 +0 0 2 2 +3 3 5 5 +3 3 5 5 +3 3 5 5 +6 6 8 8 +6 6 8 8 +6 6 8 8 +9 9 11 11 +9 9 11 11 +9 9 11 11 +12 12 14 14 +12 12 14 14 +12 12 14 14 +15 15 17 17 +15 15 17 17 +15 15 17 17 +18 18 20 20 +18 18 20 20 +18 18 20 20 +21 21 23 23 +21 21 23 23 +21 21 23 23 +24 24 26 26 +24 24 26 26 +24 24 26 26 +27 27 29 29 +27 27 29 29 +27 27 29 29 +30 30 30 30 +-- ROWS offset frame start +select number, p, + count(*) over (partition by p order by number + rows between 1 preceding and unbounded following), + count(*) over (partition by p order by number + rows between current row and unbounded following), + count(*) over (partition by p order by number + rows between 1 following and unbounded following) +from (select number, intDiv(number, 5) p from numbers(31)) +order by p, number +settings max_block_size = 2; +0 0 5 5 4 +1 0 5 4 3 +2 0 4 3 2 +3 0 3 2 1 +4 0 2 1 0 +5 1 5 5 4 +6 1 5 4 3 +7 1 4 3 2 +8 1 3 2 1 +9 1 2 1 0 +10 2 5 5 4 +11 2 5 4 3 +12 2 4 3 2 +13 2 3 2 1 +14 2 2 1 0 +15 3 5 5 4 +16 3 5 4 3 +17 3 4 3 2 +18 3 3 2 1 +19 3 2 1 0 +20 4 5 5 4 +21 4 5 4 3 +22 4 4 3 2 +23 4 3 2 1 +24 4 2 1 0 +25 5 5 5 4 +26 5 5 4 3 +27 5 4 3 2 +28 5 3 2 1 +29 5 2 1 0 +30 6 1 1 0 +-- ROWS offset frame start and end +select number, p, + count(*) over (partition by p order by number + rows between 2 preceding and 2 following) +from (select number, intDiv(number, 7) p from numbers(71)) +order by p, number +settings max_block_size = 2; +0 0 3 +1 0 4 +2 0 5 +3 0 5 +4 0 5 +5 0 4 +6 0 3 +7 1 3 +8 1 4 +9 1 5 +10 1 5 +11 1 5 +12 1 4 +13 1 3 +14 2 3 +15 2 4 +16 2 5 +17 2 5 +18 2 5 +19 2 4 +20 2 3 +21 3 3 +22 3 4 +23 3 5 +24 3 5 +25 3 5 +26 3 4 +27 3 3 +28 4 3 +29 4 4 +30 4 5 +31 4 5 +32 4 5 +33 4 4 +34 4 3 +35 5 3 +36 5 4 +37 5 5 +38 5 5 +39 5 5 +40 5 4 +41 5 3 +42 6 3 +43 6 4 +44 6 5 +45 6 5 +46 6 5 +47 6 4 +48 6 3 +49 7 3 +50 7 4 +51 7 5 +52 7 5 +53 7 5 +54 7 4 +55 7 3 +56 8 3 +57 8 4 +58 8 5 +59 8 5 +60 8 5 +61 8 4 +62 8 3 +63 9 3 +64 9 4 +65 9 5 +66 9 5 +67 9 5 +68 9 4 +69 9 3 +70 10 1 +SELECT count(*) OVER (ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) FROM numbers(4); +1 +2 +3 +3 +-- frame boundaries that runs into the partition end +select + count() over (partition by intDiv(number, 3) + rows between 100 following and unbounded following), + count() over (partition by intDiv(number, 3) + rows between current row and 100 following) +from numbers(10); +0 3 +0 2 +0 1 +0 3 +0 2 +0 1 +0 3 +0 2 +0 1 +0 1 +-- seen a use-after-free under MSan in this query once +SELECT number, max(number) OVER (PARTITION BY intDiv(number, 7) ORDER BY number ASC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM numbers(1024) SETTINGS max_block_size = 2 FORMAT Null; +-- a corner case +select count() over (); +1 +-- RANGE CURRENT ROW frame start +select number, p, o, + count(*) over (partition by p order by o + range between current row and unbounded following) +from (select number, intDiv(number, 5) p, mod(number, 3) o + from numbers(31)) +order by p, o, number +settings max_block_size = 2; +0 0 0 5 +3 0 0 5 +1 0 1 3 +4 0 1 3 +2 0 2 1 +6 1 0 5 +9 1 0 5 +7 1 1 3 +5 1 2 2 +8 1 2 2 +12 2 0 5 +10 2 1 4 +13 2 1 4 +11 2 2 2 +14 2 2 2 +15 3 0 5 +18 3 0 5 +16 3 1 3 +19 3 1 3 +17 3 2 1 +21 4 0 5 +24 4 0 5 +22 4 1 3 +20 4 2 2 +23 4 2 2 +27 5 0 5 +25 5 1 4 +28 5 1 4 +26 5 2 2 +29 5 2 2 +30 6 0 1 +select + count(*) over (rows between current row and current row), + count(*) over (range between current row and current row) +from numbers(3); +1 3 +1 3 +1 3 +-- RANGE OFFSET +-- a basic RANGE OFFSET frame +select x, min(x) over w, max(x) over w, count(x) over w from ( + select toUInt8(number) x from numbers(11)) +window w as (order by x asc range between 1 preceding and 2 following) +order by x; +0 0 2 3 +1 0 3 4 +2 1 4 4 +3 2 5 4 +4 3 6 4 +5 4 7 4 +6 5 8 4 +7 6 9 4 +8 7 10 4 +9 8 10 3 +10 9 10 2 +-- overflow conditions +select x, min(x) over w, max(x) over w, count(x) over w +from ( + select toUInt8(if(mod(number, 2), + toInt64(255 - intDiv(number, 2)), + toInt64(intDiv(number, 2)))) x + from numbers(10) +) +window w as (order by x range between 1 preceding and 2 following) +order by x; +0 0 2 3 +1 0 3 4 +2 1 4 4 +3 2 4 3 +4 3 4 2 +251 251 253 3 +252 251 254 4 +253 252 255 4 +254 253 255 3 +255 254 255 2 +select x, min(x) over w, max(x) over w, count(x) over w +from ( + select toInt8(multiIf( + mod(number, 3) == 0, toInt64(intDiv(number, 3)), + mod(number, 3) == 1, toInt64(127 - intDiv(number, 3)), + toInt64(-128 + intDiv(number, 3)))) x + from numbers(15) +) +window w as (order by x range between 1 preceding and 2 following) +order by x; +-128 -128 -126 3 +-127 -128 -125 4 +-126 -127 -124 4 +-125 -126 -124 3 +-124 -125 -124 2 +0 0 2 3 +1 0 3 4 +2 1 4 4 +3 2 4 3 +4 3 4 2 +123 123 125 3 +124 123 126 4 +125 124 127 4 +126 125 127 3 +127 126 127 2 +-- We need large offsets to trigger overflow to positive direction, or +-- else the frame end runs into partition end w/o overflow and doesn't move +-- after that. The frame from this query is equivalent to the entire partition. +select x, min(x) over w, max(x) over w, count(x) over w +from ( + select toUInt8(if(mod(number, 2), + toInt64(255 - intDiv(number, 2)), + toInt64(intDiv(number, 2)))) x + from numbers(10) +) +window w as (order by x range between 255 preceding and 255 following) +order by x; +0 0 255 10 +1 0 255 10 +2 0 255 10 +3 0 255 10 +4 0 255 10 +251 0 255 10 +252 0 255 10 +253 0 255 10 +254 0 255 10 +255 0 255 10 +-- RANGE OFFSET ORDER BY DESC +select x, min(x) over w, max(x) over w, count(x) over w from ( + select toUInt8(number) x from numbers(11)) t +window w as (order by x desc range between 1 preceding and 2 following) +order by x +settings max_block_size = 1; +0 0 1 2 +1 0 2 3 +2 0 3 4 +3 1 4 4 +4 2 5 4 +5 3 6 4 +6 4 7 4 +7 5 8 4 +8 6 9 4 +9 7 10 4 +10 8 10 3 +select x, min(x) over w, max(x) over w, count(x) over w from ( + select toUInt8(number) x from numbers(11)) t +window w as (order by x desc range between 1 preceding and unbounded following) +order by x +settings max_block_size = 2; +0 0 1 2 +1 0 2 3 +2 0 3 4 +3 0 4 5 +4 0 5 6 +5 0 6 7 +6 0 7 8 +7 0 8 9 +8 0 9 10 +9 0 10 11 +10 0 10 11 +select x, min(x) over w, max(x) over w, count(x) over w from ( + select toUInt8(number) x from numbers(11)) t +window w as (order by x desc range between unbounded preceding and 2 following) +order by x +settings max_block_size = 3; +0 0 10 11 +1 0 10 11 +2 0 10 11 +3 1 10 10 +4 2 10 9 +5 3 10 8 +6 4 10 7 +7 5 10 6 +8 6 10 5 +9 7 10 4 +10 8 10 3 +select x, min(x) over w, max(x) over w, count(x) over w from ( + select toUInt8(number) x from numbers(11)) t +window w as (order by x desc range between unbounded preceding and 2 preceding) +order by x +settings max_block_size = 4; +0 2 10 9 +1 3 10 8 +2 4 10 7 +3 5 10 6 +4 6 10 5 +5 7 10 4 +6 8 10 3 +7 9 10 2 +8 10 10 1 +9 0 0 0 +10 0 0 0 +-- Check that we put windows in such an order that we can reuse the sort. +-- First, check that at least the result is correct when we have many windows +-- with different sort order. +select + number, + count(*) over (partition by p order by number), + count(*) over (partition by p order by number, o), + count(*) over (), + count(*) over (order by number), + count(*) over (order by o), + count(*) over (order by o, number), + count(*) over (order by number, o), + count(*) over (partition by p order by o, number), + count(*) over (partition by p), + count(*) over (partition by p order by o), + count(*) over (partition by p, o order by number) +from + (select number, intDiv(number, 3) p, mod(number, 5) o + from numbers(16)) t +order by number +; +0 1 1 16 1 4 1 1 1 3 1 1 +1 2 2 16 2 7 5 2 2 3 2 1 +2 3 3 16 3 10 8 3 3 3 3 1 +3 1 1 16 4 13 11 4 2 3 2 1 +4 2 2 16 5 16 14 5 3 3 3 1 +5 3 3 16 6 4 2 6 1 3 1 1 +6 1 1 16 7 7 6 7 1 3 1 1 +7 2 2 16 8 10 9 8 2 3 2 1 +8 3 3 16 9 13 12 9 3 3 3 1 +9 1 1 16 10 16 15 10 3 3 3 1 +10 2 2 16 11 4 3 11 1 3 1 1 +11 3 3 16 12 7 7 12 2 3 2 1 +12 1 1 16 13 10 10 13 1 3 1 1 +13 2 2 16 14 13 13 14 2 3 2 1 +14 3 3 16 15 16 16 15 3 3 3 1 +15 1 1 16 16 4 4 16 1 1 1 1 +-- The EXPLAIN for the above query would be difficult to understand, so check some +-- simple cases instead. +explain select + count(*) over (partition by p), + count(*) over (), + count(*) over (partition by p order by o) +from + (select number, intDiv(number, 3) p, mod(number, 5) o + from numbers(16)) t +; +Expression ((Projection + Before ORDER BY)) + Window (Window step for window \'\') + Window (Window step for window \'PARTITION BY p\') + Window (Window step for window \'PARTITION BY p ORDER BY o ASC\') + Sorting (Sorting for window \'PARTITION BY p ORDER BY o ASC\') + Expression ((Before window functions + (Projection + Before ORDER BY))) + SettingQuotaAndLimits (Set limits and quota after reading from storage) + ReadFromStorage (SystemNumbers) +explain select + count(*) over (order by o, number), + count(*) over (order by number) +from + (select number, intDiv(number, 3) p, mod(number, 5) o + from numbers(16)) t +; +Expression ((Projection + Before ORDER BY)) + Window (Window step for window \'ORDER BY o ASC, number ASC\') + Sorting (Sorting for window \'ORDER BY o ASC, number ASC\') + Window (Window step for window \'ORDER BY number ASC\') + Sorting (Sorting for window \'ORDER BY number ASC\') + Expression ((Before window functions + (Projection + Before ORDER BY))) + SettingQuotaAndLimits (Set limits and quota after reading from storage) + ReadFromStorage (SystemNumbers) +-- A test case for the sort comparator found by fuzzer. +SELECT + max(number) OVER (ORDER BY number DESC NULLS FIRST), + max(number) OVER (ORDER BY number ASC NULLS FIRST) +FROM numbers(2) +; +1 0 +1 1 +-- optimize_read_in_order conflicts with sorting for window functions, check that +-- it is disabled. +drop table if exists window_mt; +create table window_mt engine MergeTree order by number + as select number, mod(number, 3) p from numbers(100); +select number, count(*) over (partition by p) + from window_mt order by number limit 10 settings optimize_read_in_order = 0; +0 34 +1 33 +2 33 +3 34 +4 33 +5 33 +6 34 +7 33 +8 33 +9 34 +select number, count(*) over (partition by p) + from window_mt order by number limit 10 settings optimize_read_in_order = 1; +0 34 +1 33 +2 33 +3 34 +4 33 +5 33 +6 34 +7 33 +8 33 +9 34 +drop table window_mt; +-- some true window functions -- rank and friends +select number, p, o, + count(*) over w, + rank() over w, + dense_rank() over w, + row_number() over w +from (select number, intDiv(number, 5) p, mod(number, 3) o + from numbers(31) order by o, number) t +window w as (partition by p order by o) +order by p, o, number +settings max_block_size = 2; +0 0 0 2 1 1 1 +3 0 0 2 1 1 2 +1 0 1 4 3 2 3 +4 0 1 4 3 2 4 +2 0 2 5 5 3 5 +6 1 0 2 1 1 1 +9 1 0 2 1 1 2 +7 1 1 3 3 2 3 +5 1 2 5 4 3 4 +8 1 2 5 4 3 5 +12 2 0 1 1 1 1 +10 2 1 3 2 2 2 +13 2 1 3 2 2 3 +11 2 2 5 4 3 4 +14 2 2 5 4 3 5 +15 3 0 2 1 1 2 +18 3 0 2 1 1 1 +16 3 1 4 3 2 3 +19 3 1 4 3 2 4 +17 3 2 5 5 3 5 +21 4 0 2 1 1 1 +24 4 0 2 1 1 2 +22 4 1 3 3 2 3 +20 4 2 5 4 3 5 +23 4 2 5 4 3 4 +27 5 0 1 1 1 1 +25 5 1 3 2 2 2 +28 5 1 3 2 2 3 +26 5 2 5 4 3 4 +29 5 2 5 4 3 5 +30 6 0 1 1 1 1 +-- our replacement for lag/lead +select + anyOrNull(number) + over (order by number rows between 1 preceding and 1 preceding), + anyOrNull(number) + over (order by number rows between 1 following and 1 following) +from numbers(5); +\N 1 +0 2 +1 3 +2 4 +3 \N +-- variants of lag/lead that respect the frame +select number, p, pp, + lagInFrame(number) over w as lag1, + lagInFrame(number, number - pp) over w as lag2, + lagInFrame(number, number - pp, number * 11) over w as lag, + leadInFrame(number, number - pp, number * 11) over w as lead +from (select number, intDiv(number, 5) p, p * 5 pp from numbers(16)) +window w as (partition by p order by number + rows between unbounded preceding and unbounded following) +order by number +settings max_block_size = 3; +; +0 0 0 0 0 0 0 +1 0 0 0 0 0 2 +2 0 0 1 0 0 4 +3 0 0 2 0 0 33 +4 0 0 3 0 0 44 +5 1 5 0 5 5 5 +6 1 5 5 5 5 7 +7 1 5 6 5 5 9 +8 1 5 7 5 5 88 +9 1 5 8 5 5 99 +10 2 10 0 10 10 10 +11 2 10 10 10 10 12 +12 2 10 11 10 10 14 +13 2 10 12 10 10 143 +14 2 10 13 10 10 154 +15 3 15 0 15 15 15 +-- careful with auto-application of Null combinator +select lagInFrame(toNullable(1)) over (); +\N +select lagInFrameOrNull(1) over (); -- { serverError 36 } +-- this is the same as `select max(Null::Nullable(Nothing))` +select intDiv(1, NULL) x, toTypeName(x), max(x) over (); +\N Nullable(Nothing) \N +-- to make lagInFrame return null for out-of-frame rows, cast the argument to +-- Nullable; otherwise, it returns default values. +SELECT + number, + lagInFrame(toNullable(number), 1) OVER w, + lagInFrame(toNullable(number), 2) OVER w, + lagInFrame(number, 1) OVER w, + lagInFrame(number, 2) OVER w +FROM numbers(4) +WINDOW w AS (ORDER BY number ASC) +; +0 \N \N 0 0 +1 0 \N 0 0 +2 1 0 1 0 +3 2 1 2 1 +-- case-insensitive SQL-standard synonyms for any and anyLast +select + number, + fIrSt_VaLue(number) over w, + lAsT_vAlUe(number) over w +from numbers(10) +window w as (order by number range between 1 preceding and 1 following) +order by number +; +0 0 1 +1 0 2 +2 1 3 +3 2 4 +4 3 5 +5 4 6 +6 5 7 +7 6 8 +8 7 9 +9 8 9 +-- lagInFrame UBsan +SELECT lagInFrame(1, -1) OVER (); -- { serverError BAD_ARGUMENTS } +SELECT lagInFrame(1, 0) OVER (); +1 +SELECT lagInFrame(1, /* INT64_MAX+1 */ 0x7fffffffffffffff+1) OVER (); -- { serverError BAD_ARGUMENTS } +SELECT lagInFrame(1, /* INT64_MAX */ 0x7fffffffffffffff) OVER (); +0 +SELECT lagInFrame(1, 1) OVER (); +0 +-- leadInFrame UBsan +SELECT leadInFrame(1, -1) OVER (); -- { serverError BAD_ARGUMENTS } +SELECT leadInFrame(1, 0) OVER (); +1 +SELECT leadInFrame(1, /* INT64_MAX+1 */ 0x7fffffffffffffff+1) OVER (); -- { serverError BAD_ARGUMENTS } +SELECT leadInFrame(1, /* INT64_MAX */ 0x7fffffffffffffff) OVER (); +0 +SELECT leadInFrame(1, 1) OVER (); +0 +-- In this case, we had a problem with PartialSortingTransform returning zero-row +-- chunks for input chunks w/o columns. +select count() over () from numbers(4) where number < 2; +2 +2 +-- floating point RANGE frame +select + count(*) over (order by toFloat32(number) range 5. preceding), + count(*) over (order by toFloat64(number) range 5. preceding), + count(*) over (order by toFloat32(number) range between current row and 5. following), + count(*) over (order by toFloat64(number) range between current row and 5. following) +from numbers(7) +; +1 1 6 6 +2 2 6 6 +3 3 5 5 +4 4 4 4 +5 5 3 3 +6 6 2 2 +6 6 1 1 +-- negative offsets should not be allowed +select count() over (order by toInt64(number) range between -1 preceding and unbounded following) from numbers(1); -- { serverError 36 } +select count() over (order by toInt64(number) range between -1 following and unbounded following) from numbers(1); -- { serverError 36 } +select count() over (order by toInt64(number) range between unbounded preceding and -1 preceding) from numbers(1); -- { serverError 36 } +select count() over (order by toInt64(number) range between unbounded preceding and -1 following) from numbers(1); -- { serverError 36 } +-- a test with aggregate function that allocates memory in arena +select sum(a[length(a)]) +from ( + select groupArray(number) over (partition by modulo(number, 11) + order by modulo(number, 1111), number) a + from numbers_mt(10000) +) settings max_block_size = 7; +49995000 +-- -INT_MIN row offset that can lead to problems with negation, found when fuzzing +-- under UBSan. Should be limited to at most INT_MAX. +select count() over (rows between 2147483648 preceding and 2147493648 following) from numbers(2); -- { serverError 36 } +-- Somehow in this case WindowTransform gets empty input chunks not marked as +-- input end, and then two (!) empty input chunks marked as input end. Whatever. +select count() over () from (select 1 a) l inner join (select 2 a) r using a; +-- This case works as expected, one empty input chunk marked as input end. +select count() over () where null; +-- Inheriting another window. +select number, count() over (w1 rows unbounded preceding) from numbers(10) +window + w0 as (partition by intDiv(number, 5) as p), + w1 as (w0 order by mod(number, 3) as o) +order by p, o, number +; +0 1 +3 2 +1 3 +4 4 +2 5 +6 1 +9 2 +7 3 +5 4 +8 5 +-- can't redefine PARTITION BY +select count() over (w partition by number) from numbers(1) window w as (partition by intDiv(number, 5)); -- { serverError 36 } +-- can't redefine existing ORDER BY +select count() over (w order by number) from numbers(1) window w as (partition by intDiv(number, 5) order by mod(number, 3)); -- { serverError 36 } +-- parent window can't have frame +select count() over (w range unbounded preceding) from numbers(1) window w as (partition by intDiv(number, 5) order by mod(number, 3) rows unbounded preceding); -- { serverError 36 } +-- looks weird but probably should work -- this is a window that inherits and changes nothing +select count() over (w) from numbers(1) window w as (); +1 +-- nonexistent parent window +select count() over (w2 rows unbounded preceding); -- { serverError 36 } diff --git a/tests/queries/0_stateless/01591_window_functions.sql b/tests/queries/0_stateless/01591_window_functions.sql index bc52e925f52..eb8c28de719 100644 --- a/tests/queries/0_stateless/01591_window_functions.sql +++ b/tests/queries/0_stateless/01591_window_functions.sql @@ -1,476 +1,476 @@ -- { echo } -- just something basic --- select number, count() over (partition by intDiv(number, 3) order by number rows unbounded preceding) from numbers(10); - --- -- proper calculation across blocks --- select number, max(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding) from numbers(10) settings max_block_size = 2; - --- -- not a window function --- select number, abs(number) over (partition by toString(intDiv(number, 3)) rows unbounded preceding) from numbers(10); -- { serverError 63 } - --- -- no partition by --- select number, avg(number) over (order by number rows unbounded preceding) from numbers(10); - --- -- no order by --- select number, quantileExact(number) over (partition by intDiv(number, 3) rows unbounded preceding) from numbers(10) order by number; - --- -- can add an alias after window spec --- select number, quantileExact(number) over (partition by intDiv(number, 3) rows unbounded preceding) q from numbers(10) order by number, q; - --- -- can't reference it yet -- the window functions are calculated at the --- -- last stage of select, after all other functions. --- select q * 10, quantileExact(number) over (partition by intDiv(number, 3) rows unbounded preceding) q from numbers(10); -- { serverError 47 } - --- -- must work in WHERE if you wrap it in a subquery --- select * from (select count(*) over (rows unbounded preceding) c from numbers(3)) where c > 0; - --- -- should work in ORDER BY --- select number, max(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding) m from numbers(10) order by m desc, number; - --- -- also works in ORDER BY if you wrap it in a subquery --- select * from (select count(*) over (rows unbounded preceding) c from numbers(3)) order by c; - --- -- Example with window function only in ORDER BY. Here we make a rank of all --- -- numbers sorted descending, and then sort by this rank descending, and must get --- -- the ascending order. --- select * from (select * from numbers(5) order by rand()) order by count() over (order by number desc rows unbounded preceding) desc; - --- -- Aggregate functions as window function arguments. This query is semantically --- -- the same as the above one, only we replace `number` with --- -- `any(number) group by number` and so on. --- select * from (select * from numbers(5) order by rand()) group by number order by sum(any(number + 1)) over (order by min(number) desc rows unbounded preceding) desc; --- -- some more simple cases w/aggregate functions --- select sum(any(number)) over (rows unbounded preceding) from numbers(1); --- select sum(any(number) + 1) over (rows unbounded preceding) from numbers(1); --- select sum(any(number + 1)) over (rows unbounded preceding) from numbers(1); - --- -- different windows --- -- an explain test would also be helpful, but it's too immature now and I don't --- -- want to change reference all the time --- select number, max(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding), count(number) over (partition by intDiv(number, 5) order by number rows unbounded preceding) as m from numbers(31) order by number settings max_block_size = 2; - --- -- two functions over the same window --- -- an explain test would also be helpful, but it's too immature now and I don't --- -- want to change reference all the time --- select number, max(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding), count(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding) as m from numbers(7) order by number settings max_block_size = 2; - --- -- check that we can work with constant columns --- select median(x) over (partition by x) from (select 1 x); - --- -- an empty window definition is valid as well --- select groupArray(number) over (rows unbounded preceding) from numbers(3); --- select groupArray(number) over () from numbers(3); - --- -- This one tests we properly process the window function arguments. --- -- Seen errors like 'column `1` not found' from count(1). --- select count(1) over (rows unbounded preceding), max(number + 1) over () from numbers(3); - --- -- Should work in DISTINCT --- select distinct sum(0) over (rows unbounded preceding) from numbers(2); --- select distinct any(number) over (rows unbounded preceding) from numbers(2); - --- -- Various kinds of aliases are properly substituted into various parts of window --- -- function definition. --- with number + 1 as x select intDiv(number, 3) as y, sum(x + y) over (partition by y order by x rows unbounded preceding) from numbers(7); - --- -- WINDOW clause --- select 1 window w1 as (); - --- select sum(number) over w1, sum(number) over w2 --- from numbers(10) --- window --- w1 as (rows unbounded preceding), --- w2 as (partition by intDiv(number, 3) rows unbounded preceding) --- ; - --- -- FIXME both functions should use the same window, but they don't. Add an --- -- EXPLAIN test for this. --- select --- sum(number) over w1, --- sum(number) over (partition by intDiv(number, 3) rows unbounded preceding) --- from numbers(10) --- window --- w1 as (partition by intDiv(number, 3) rows unbounded preceding) --- ; - --- -- RANGE frame --- -- It's the default --- select sum(number) over () from numbers(3); - --- -- Try some mutually prime sizes of partition, group and block, for the number --- -- of rows that is their least common multiple + 1, so that we see all the --- -- interesting corner cases. --- select number, intDiv(number, 3) p, mod(number, 2) o, count(number) over w as c --- from numbers(31) --- window w as (partition by p order by o range unbounded preceding) --- order by number --- settings max_block_size = 5 --- ; - --- select number, intDiv(number, 5) p, mod(number, 3) o, count(number) over w as c --- from numbers(31) --- window w as (partition by p order by o range unbounded preceding) --- order by number --- settings max_block_size = 2 --- ; - --- select number, intDiv(number, 5) p, mod(number, 2) o, count(number) over w as c --- from numbers(31) --- window w as (partition by p order by o range unbounded preceding) --- order by number --- settings max_block_size = 3 --- ; - --- select number, intDiv(number, 3) p, mod(number, 5) o, count(number) over w as c --- from numbers(31) --- window w as (partition by p order by o range unbounded preceding) --- order by number --- settings max_block_size = 2 --- ; - --- select number, intDiv(number, 2) p, mod(number, 5) o, count(number) over w as c --- from numbers(31) --- window w as (partition by p order by o range unbounded preceding) --- order by number --- settings max_block_size = 3 --- ; - --- select number, intDiv(number, 2) p, mod(number, 3) o, count(number) over w as c --- from numbers(31) --- window w as (partition by p order by o range unbounded preceding) --- order by number --- settings max_block_size = 5 --- ; - --- -- A case where the partition end is in the current block, and the frame end --- -- is triggered by the partition end. --- select min(number) over (partition by p) from (select number, intDiv(number, 3) p from numbers(10)); - --- -- UNBOUNDED FOLLOWING frame end --- select --- min(number) over wa, min(number) over wo, --- max(number) over wa, max(number) over wo --- from --- (select number, intDiv(number, 3) p, mod(number, 5) o --- from numbers(31)) --- window --- wa as (partition by p order by o --- range between unbounded preceding and unbounded following), --- wo as (partition by p order by o --- rows between unbounded preceding and unbounded following) --- settings max_block_size = 2; - --- -- ROWS offset frame start --- select number, p, --- count(*) over (partition by p order by number --- rows between 1 preceding and unbounded following), --- count(*) over (partition by p order by number --- rows between current row and unbounded following), --- count(*) over (partition by p order by number --- rows between 1 following and unbounded following) --- from (select number, intDiv(number, 5) p from numbers(31)) --- order by p, number --- settings max_block_size = 2; - --- -- ROWS offset frame start and end --- select number, p, --- count(*) over (partition by p order by number --- rows between 2 preceding and 2 following) --- from (select number, intDiv(number, 7) p from numbers(71)) --- order by p, number --- settings max_block_size = 2; - --- SELECT count(*) OVER (ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) FROM numbers(4); - --- -- frame boundaries that runs into the partition end --- select --- count() over (partition by intDiv(number, 3) --- rows between 100 following and unbounded following), --- count() over (partition by intDiv(number, 3) --- rows between current row and 100 following) --- from numbers(10); - --- -- seen a use-after-free under MSan in this query once --- SELECT number, max(number) OVER (PARTITION BY intDiv(number, 7) ORDER BY number ASC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM numbers(1024) SETTINGS max_block_size = 2 FORMAT Null; - --- -- a corner case --- select count() over (); - --- -- RANGE CURRENT ROW frame start --- select number, p, o, --- count(*) over (partition by p order by o --- range between current row and unbounded following) --- from (select number, intDiv(number, 5) p, mod(number, 3) o --- from numbers(31)) --- order by p, o, number --- settings max_block_size = 2; - --- select --- count(*) over (rows between current row and current row), --- count(*) over (range between current row and current row) --- from numbers(3); - --- -- RANGE OFFSET --- -- a basic RANGE OFFSET frame --- select x, min(x) over w, max(x) over w, count(x) over w from ( --- select toUInt8(number) x from numbers(11)) --- window w as (order by x asc range between 1 preceding and 2 following) --- order by x; - --- -- overflow conditions --- select x, min(x) over w, max(x) over w, count(x) over w --- from ( --- select toUInt8(if(mod(number, 2), --- toInt64(255 - intDiv(number, 2)), --- toInt64(intDiv(number, 2)))) x --- from numbers(10) --- ) --- window w as (order by x range between 1 preceding and 2 following) --- order by x; - --- select x, min(x) over w, max(x) over w, count(x) over w --- from ( --- select toInt8(multiIf( --- mod(number, 3) == 0, toInt64(intDiv(number, 3)), --- mod(number, 3) == 1, toInt64(127 - intDiv(number, 3)), --- toInt64(-128 + intDiv(number, 3)))) x --- from numbers(15) --- ) --- window w as (order by x range between 1 preceding and 2 following) --- order by x; - --- -- We need large offsets to trigger overflow to positive direction, or --- -- else the frame end runs into partition end w/o overflow and doesn't move --- -- after that. The frame from this query is equivalent to the entire partition. --- select x, min(x) over w, max(x) over w, count(x) over w --- from ( --- select toUInt8(if(mod(number, 2), --- toInt64(255 - intDiv(number, 2)), --- toInt64(intDiv(number, 2)))) x --- from numbers(10) --- ) --- window w as (order by x range between 255 preceding and 255 following) --- order by x; - --- -- RANGE OFFSET ORDER BY DESC --- select x, min(x) over w, max(x) over w, count(x) over w from ( --- select toUInt8(number) x from numbers(11)) t --- window w as (order by x desc range between 1 preceding and 2 following) --- order by x --- settings max_block_size = 1; - --- select x, min(x) over w, max(x) over w, count(x) over w from ( --- select toUInt8(number) x from numbers(11)) t --- window w as (order by x desc range between 1 preceding and unbounded following) --- order by x --- settings max_block_size = 2; - --- select x, min(x) over w, max(x) over w, count(x) over w from ( --- select toUInt8(number) x from numbers(11)) t --- window w as (order by x desc range between unbounded preceding and 2 following) --- order by x --- settings max_block_size = 3; - --- select x, min(x) over w, max(x) over w, count(x) over w from ( --- select toUInt8(number) x from numbers(11)) t --- window w as (order by x desc range between unbounded preceding and 2 preceding) --- order by x --- settings max_block_size = 4; - - --- -- Check that we put windows in such an order that we can reuse the sort. --- -- First, check that at least the result is correct when we have many windows --- -- with different sort order. --- select --- number, --- count(*) over (partition by p order by number), --- count(*) over (partition by p order by number, o), --- count(*) over (), --- count(*) over (order by number), --- count(*) over (order by o), --- count(*) over (order by o, number), --- count(*) over (order by number, o), --- count(*) over (partition by p order by o, number), --- count(*) over (partition by p), --- count(*) over (partition by p order by o), --- count(*) over (partition by p, o order by number) --- from --- (select number, intDiv(number, 3) p, mod(number, 5) o --- from numbers(16)) t --- order by number --- ; - --- -- The EXPLAIN for the above query would be difficult to understand, so check some --- -- simple cases instead. --- explain select --- count(*) over (partition by p), --- count(*) over (), --- count(*) over (partition by p order by o) --- from --- (select number, intDiv(number, 3) p, mod(number, 5) o --- from numbers(16)) t --- ; - --- explain select --- count(*) over (order by o, number), --- count(*) over (order by number) --- from --- (select number, intDiv(number, 3) p, mod(number, 5) o --- from numbers(16)) t --- ; - --- -- A test case for the sort comparator found by fuzzer. --- SELECT --- max(number) OVER (ORDER BY number DESC NULLS FIRST), --- max(number) OVER (ORDER BY number ASC NULLS FIRST) --- FROM numbers(2) --- ; - --- -- optimize_read_in_order conflicts with sorting for window functions, check that --- -- it is disabled. --- drop table if exists window_mt; --- create table window_mt engine MergeTree order by number --- as select number, mod(number, 3) p from numbers(100); - --- select number, count(*) over (partition by p) --- from window_mt order by number limit 10 settings optimize_read_in_order = 0; - --- select number, count(*) over (partition by p) --- from window_mt order by number limit 10 settings optimize_read_in_order = 1; - --- drop table window_mt; - --- -- some true window functions -- rank and friends --- select number, p, o, --- count(*) over w, --- rank() over w, --- dense_rank() over w, --- row_number() over w --- from (select number, intDiv(number, 5) p, mod(number, 3) o --- from numbers(31) order by o, number) t --- window w as (partition by p order by o) --- order by p, o, number --- settings max_block_size = 2; - --- -- our replacement for lag/lead --- select --- anyOrNull(number) --- over (order by number rows between 1 preceding and 1 preceding), --- anyOrNull(number) --- over (order by number rows between 1 following and 1 following) --- from numbers(5); - --- -- variants of lag/lead that respect the frame --- select number, p, pp, --- lagInFrame(number) over w as lag1, --- lagInFrame(number, number - pp) over w as lag2, --- lagInFrame(number, number - pp, number * 11) over w as lag, --- leadInFrame(number, number - pp, number * 11) over w as lead --- from (select number, intDiv(number, 5) p, p * 5 pp from numbers(16)) --- window w as (partition by p order by number --- rows between unbounded preceding and unbounded following) --- order by number --- settings max_block_size = 3; --- ; - --- -- careful with auto-application of Null combinator --- select lagInFrame(toNullable(1)) over (); --- select lagInFrameOrNull(1) over (); -- { serverError 36 } --- -- this is the same as `select max(Null::Nullable(Nothing))` --- select intDiv(1, NULL) x, toTypeName(x), max(x) over (); --- -- to make lagInFrame return null for out-of-frame rows, cast the argument to --- -- Nullable; otherwise, it returns default values. --- SELECT --- number, --- lagInFrame(toNullable(number), 1) OVER w, --- lagInFrame(toNullable(number), 2) OVER w, --- lagInFrame(number, 1) OVER w, --- lagInFrame(number, 2) OVER w --- FROM numbers(4) --- WINDOW w AS (ORDER BY number ASC) --- ; - --- -- case-insensitive SQL-standard synonyms for any and anyLast --- select --- number, --- fIrSt_VaLue(number) over w, --- lAsT_vAlUe(number) over w --- from numbers(10) --- window w as (order by number range between 1 preceding and 1 following) --- order by number --- ; - --- -- lagInFrame UBsan --- SELECT lagInFrame(1, -1) OVER (); -- { serverError BAD_ARGUMENTS } --- SELECT lagInFrame(1, 0) OVER (); --- SELECT lagInFrame(1, /* INT64_MAX+1 */ 0x7fffffffffffffff+1) OVER (); -- { serverError BAD_ARGUMENTS } --- SELECT lagInFrame(1, /* INT64_MAX */ 0x7fffffffffffffff) OVER (); --- SELECT lagInFrame(1, 1) OVER (); - --- -- leadInFrame UBsan --- SELECT leadInFrame(1, -1) OVER (); -- { serverError BAD_ARGUMENTS } --- SELECT leadInFrame(1, 0) OVER (); --- SELECT leadInFrame(1, /* INT64_MAX+1 */ 0x7fffffffffffffff+1) OVER (); -- { serverError BAD_ARGUMENTS } --- SELECT leadInFrame(1, /* INT64_MAX */ 0x7fffffffffffffff) OVER (); --- SELECT leadInFrame(1, 1) OVER (); - --- -- In this case, we had a problem with PartialSortingTransform returning zero-row --- -- chunks for input chunks w/o columns. --- select count() over () from numbers(4) where number < 2; - --- -- floating point RANGE frame --- select --- count(*) over (order by toFloat32(number) range 5. preceding), --- count(*) over (order by toFloat64(number) range 5. preceding), --- count(*) over (order by toFloat32(number) range between current row and 5. following), --- count(*) over (order by toFloat64(number) range between current row and 5. following) --- from numbers(7) --- ; - --- -- negative offsets should not be allowed --- select count() over (order by toInt64(number) range between -1 preceding and unbounded following) from numbers(1); -- { serverError 36 } --- select count() over (order by toInt64(number) range between -1 following and unbounded following) from numbers(1); -- { serverError 36 } --- select count() over (order by toInt64(number) range between unbounded preceding and -1 preceding) from numbers(1); -- { serverError 36 } --- select count() over (order by toInt64(number) range between unbounded preceding and -1 following) from numbers(1); -- { serverError 36 } - --- -- a test with aggregate function that allocates memory in arena --- select sum(a[length(a)]) --- from ( --- select groupArray(number) over (partition by modulo(number, 11) --- order by modulo(number, 1111), number) a --- from numbers_mt(10000) --- ) settings max_block_size = 7; - --- -- -INT_MIN row offset that can lead to problems with negation, found when fuzzing --- -- under UBSan. Should be limited to at most INT_MAX. --- select count() over (rows between 2147483648 preceding and 2147493648 following) from numbers(2); -- { serverError 36 } - --- -- Somehow in this case WindowTransform gets empty input chunks not marked as --- -- input end, and then two (!) empty input chunks marked as input end. Whatever. --- select count() over () from (select 1 a) l inner join (select 2 a) r using a; --- -- This case works as expected, one empty input chunk marked as input end. --- select count() over () where null; - --- -- Inheriting another window. --- select number, count() over (w1 rows unbounded preceding) from numbers(10) --- window --- w0 as (partition by intDiv(number, 5) as p), --- w1 as (w0 order by mod(number, 3) as o) --- order by p, o, number --- ; - --- -- can't redefine PARTITION BY --- select count() over (w partition by number) from numbers(1) window w as (partition by intDiv(number, 5)); -- { serverError 36 } - --- -- can't redefine existing ORDER BY --- select count() over (w order by number) from numbers(1) window w as (partition by intDiv(number, 5) order by mod(number, 3)); -- { serverError 36 } - --- -- parent window can't have frame --- select count() over (w range unbounded preceding) from numbers(1) window w as (partition by intDiv(number, 5) order by mod(number, 3) rows unbounded preceding); -- { serverError 36 } - --- -- looks weird but probably should work -- this is a window that inherits and changes nothing --- select count() over (w) from numbers(1) window w as (); - --- -- nonexistent parent window --- select count() over (w2 rows unbounded preceding); -- { serverError 36 } +select number, count() over (partition by intDiv(number, 3) order by number rows unbounded preceding) from numbers(10); + +-- proper calculation across blocks +select number, max(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding) from numbers(10) settings max_block_size = 2; + +-- not a window function +select number, abs(number) over (partition by toString(intDiv(number, 3)) rows unbounded preceding) from numbers(10); -- { serverError 63 } + +-- no partition by +select number, avg(number) over (order by number rows unbounded preceding) from numbers(10); + +-- no order by +select number, quantileExact(number) over (partition by intDiv(number, 3) AS value order by value rows unbounded preceding) from numbers(10); + +-- can add an alias after window spec +select number, quantileExact(number) over (partition by intDiv(number, 3) AS value order by value rows unbounded preceding) q from numbers(10); + +-- can't reference it yet -- the window functions are calculated at the +-- last stage of select, after all other functions. +select q * 10, quantileExact(number) over (partition by intDiv(number, 3) rows unbounded preceding) q from numbers(10); -- { serverError 47 } + +-- must work in WHERE if you wrap it in a subquery +select * from (select count(*) over (rows unbounded preceding) c from numbers(3)) where c > 0; + +-- should work in ORDER BY +select number, max(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding) m from numbers(10) order by m desc, number; + +-- also works in ORDER BY if you wrap it in a subquery +select * from (select count(*) over (rows unbounded preceding) c from numbers(3)) order by c; + +-- Example with window function only in ORDER BY. Here we make a rank of all +-- numbers sorted descending, and then sort by this rank descending, and must get +-- the ascending order. +select * from (select * from numbers(5) order by rand()) order by count() over (order by number desc rows unbounded preceding) desc; + +-- Aggregate functions as window function arguments. This query is semantically +-- the same as the above one, only we replace `number` with +-- `any(number) group by number` and so on. +select * from (select * from numbers(5) order by rand()) group by number order by sum(any(number + 1)) over (order by min(number) desc rows unbounded preceding) desc; +-- some more simple cases w/aggregate functions +select sum(any(number)) over (rows unbounded preceding) from numbers(1); +select sum(any(number) + 1) over (rows unbounded preceding) from numbers(1); +select sum(any(number + 1)) over (rows unbounded preceding) from numbers(1); + +-- different windows +-- an explain test would also be helpful, but it's too immature now and I don't +-- want to change reference all the time +select number, max(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding), count(number) over (partition by intDiv(number, 5) order by number rows unbounded preceding) as m from numbers(31) order by number settings max_block_size = 2; + +-- two functions over the same window +-- an explain test would also be helpful, but it's too immature now and I don't +-- want to change reference all the time +select number, max(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding), count(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding) as m from numbers(7) order by number settings max_block_size = 2; + +-- check that we can work with constant columns +select median(x) over (partition by x) from (select 1 x); + +-- an empty window definition is valid as well +select groupArray(number) over (rows unbounded preceding) from numbers(3); +select groupArray(number) over () from numbers(3); + +-- This one tests we properly process the window function arguments. +-- Seen errors like 'column `1` not found' from count(1). +select count(1) over (rows unbounded preceding), max(number + 1) over () from numbers(3); + +-- Should work in DISTINCT +select distinct sum(0) over (rows unbounded preceding) from numbers(2); +select distinct any(number) over (rows unbounded preceding) from numbers(2); + +-- Various kinds of aliases are properly substituted into various parts of window +-- function definition. +with number + 1 as x select intDiv(number, 3) as y, sum(x + y) over (partition by y order by x rows unbounded preceding) from numbers(7); + +-- WINDOW clause +select 1 window w1 as (); + +select sum(number) over w1, sum(number) over w2 +from numbers(10) +window + w1 as (rows unbounded preceding), + w2 as (partition by intDiv(number, 3) as value order by value rows unbounded preceding) +; + +-- FIXME both functions should use the same window, but they don't. Add an +-- EXPLAIN test for this. +select + sum(number) over w1, + sum(number) over (partition by intDiv(number, 3) as value order by value rows unbounded preceding) +from numbers(10) +window + w1 as (partition by intDiv(number, 3) rows unbounded preceding) +; + +-- RANGE frame +-- It's the default +select sum(number) over () from numbers(3); + +-- Try some mutually prime sizes of partition, group and block, for the number +-- of rows that is their least common multiple + 1, so that we see all the +-- interesting corner cases. +select number, intDiv(number, 3) p, mod(number, 2) o, count(number) over w as c +from numbers(31) +window w as (partition by p order by o range unbounded preceding) +order by number +settings max_block_size = 5 +; + +select number, intDiv(number, 5) p, mod(number, 3) o, count(number) over w as c +from numbers(31) +window w as (partition by p order by o range unbounded preceding) +order by number +settings max_block_size = 2 +; + +select number, intDiv(number, 5) p, mod(number, 2) o, count(number) over w as c +from numbers(31) +window w as (partition by p order by o range unbounded preceding) +order by number +settings max_block_size = 3 +; + +select number, intDiv(number, 3) p, mod(number, 5) o, count(number) over w as c +from numbers(31) +window w as (partition by p order by o range unbounded preceding) +order by number +settings max_block_size = 2 +; + +select number, intDiv(number, 2) p, mod(number, 5) o, count(number) over w as c +from numbers(31) +window w as (partition by p order by o range unbounded preceding) +order by number +settings max_block_size = 3 +; + +select number, intDiv(number, 2) p, mod(number, 3) o, count(number) over w as c +from numbers(31) +window w as (partition by p order by o range unbounded preceding) +order by number +settings max_block_size = 5 +; + +-- A case where the partition end is in the current block, and the frame end +-- is triggered by the partition end. +select min(number) over (partition by p) from (select number, intDiv(number, 3) p from numbers(10)); + +-- UNBOUNDED FOLLOWING frame end +select + min(number) over wa, min(number) over wo, + max(number) over wa, max(number) over wo +from + (select number, intDiv(number, 3) p, mod(number, 5) o + from numbers(31)) +window + wa as (partition by p order by o + range between unbounded preceding and unbounded following), + wo as (partition by p order by o + rows between unbounded preceding and unbounded following) +settings max_block_size = 2; + +-- ROWS offset frame start +select number, p, + count(*) over (partition by p order by number + rows between 1 preceding and unbounded following), + count(*) over (partition by p order by number + rows between current row and unbounded following), + count(*) over (partition by p order by number + rows between 1 following and unbounded following) +from (select number, intDiv(number, 5) p from numbers(31)) +order by p, number +settings max_block_size = 2; + +-- ROWS offset frame start and end +select number, p, + count(*) over (partition by p order by number + rows between 2 preceding and 2 following) +from (select number, intDiv(number, 7) p from numbers(71)) +order by p, number +settings max_block_size = 2; + +SELECT count(*) OVER (ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) FROM numbers(4); + +-- frame boundaries that runs into the partition end +select + count() over (partition by intDiv(number, 3) + rows between 100 following and unbounded following), + count() over (partition by intDiv(number, 3) + rows between current row and 100 following) +from numbers(10); + +-- seen a use-after-free under MSan in this query once +SELECT number, max(number) OVER (PARTITION BY intDiv(number, 7) ORDER BY number ASC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM numbers(1024) SETTINGS max_block_size = 2 FORMAT Null; + +-- a corner case +select count() over (); + +-- RANGE CURRENT ROW frame start +select number, p, o, + count(*) over (partition by p order by o + range between current row and unbounded following) +from (select number, intDiv(number, 5) p, mod(number, 3) o + from numbers(31)) +order by p, o, number +settings max_block_size = 2; + +select + count(*) over (rows between current row and current row), + count(*) over (range between current row and current row) +from numbers(3); + +-- RANGE OFFSET +-- a basic RANGE OFFSET frame +select x, min(x) over w, max(x) over w, count(x) over w from ( + select toUInt8(number) x from numbers(11)) +window w as (order by x asc range between 1 preceding and 2 following) +order by x; + +-- overflow conditions +select x, min(x) over w, max(x) over w, count(x) over w +from ( + select toUInt8(if(mod(number, 2), + toInt64(255 - intDiv(number, 2)), + toInt64(intDiv(number, 2)))) x + from numbers(10) +) +window w as (order by x range between 1 preceding and 2 following) +order by x; + +select x, min(x) over w, max(x) over w, count(x) over w +from ( + select toInt8(multiIf( + mod(number, 3) == 0, toInt64(intDiv(number, 3)), + mod(number, 3) == 1, toInt64(127 - intDiv(number, 3)), + toInt64(-128 + intDiv(number, 3)))) x + from numbers(15) +) +window w as (order by x range between 1 preceding and 2 following) +order by x; + +-- We need large offsets to trigger overflow to positive direction, or +-- else the frame end runs into partition end w/o overflow and doesn't move +-- after that. The frame from this query is equivalent to the entire partition. +select x, min(x) over w, max(x) over w, count(x) over w +from ( + select toUInt8(if(mod(number, 2), + toInt64(255 - intDiv(number, 2)), + toInt64(intDiv(number, 2)))) x + from numbers(10) +) +window w as (order by x range between 255 preceding and 255 following) +order by x; + +-- RANGE OFFSET ORDER BY DESC +select x, min(x) over w, max(x) over w, count(x) over w from ( + select toUInt8(number) x from numbers(11)) t +window w as (order by x desc range between 1 preceding and 2 following) +order by x +settings max_block_size = 1; + +select x, min(x) over w, max(x) over w, count(x) over w from ( + select toUInt8(number) x from numbers(11)) t +window w as (order by x desc range between 1 preceding and unbounded following) +order by x +settings max_block_size = 2; + +select x, min(x) over w, max(x) over w, count(x) over w from ( + select toUInt8(number) x from numbers(11)) t +window w as (order by x desc range between unbounded preceding and 2 following) +order by x +settings max_block_size = 3; + +select x, min(x) over w, max(x) over w, count(x) over w from ( + select toUInt8(number) x from numbers(11)) t +window w as (order by x desc range between unbounded preceding and 2 preceding) +order by x +settings max_block_size = 4; + + +-- Check that we put windows in such an order that we can reuse the sort. +-- First, check that at least the result is correct when we have many windows +-- with different sort order. +select + number, + count(*) over (partition by p order by number), + count(*) over (partition by p order by number, o), + count(*) over (), + count(*) over (order by number), + count(*) over (order by o), + count(*) over (order by o, number), + count(*) over (order by number, o), + count(*) over (partition by p order by o, number), + count(*) over (partition by p), + count(*) over (partition by p order by o), + count(*) over (partition by p, o order by number) +from + (select number, intDiv(number, 3) p, mod(number, 5) o + from numbers(16)) t +order by number +; + +-- The EXPLAIN for the above query would be difficult to understand, so check some +-- simple cases instead. +explain select + count(*) over (partition by p), + count(*) over (), + count(*) over (partition by p order by o) +from + (select number, intDiv(number, 3) p, mod(number, 5) o + from numbers(16)) t +; + +explain select + count(*) over (order by o, number), + count(*) over (order by number) +from + (select number, intDiv(number, 3) p, mod(number, 5) o + from numbers(16)) t +; + +-- A test case for the sort comparator found by fuzzer. +SELECT + max(number) OVER (ORDER BY number DESC NULLS FIRST), + max(number) OVER (ORDER BY number ASC NULLS FIRST) +FROM numbers(2) +; + +-- optimize_read_in_order conflicts with sorting for window functions, check that +-- it is disabled. +drop table if exists window_mt; +create table window_mt engine MergeTree order by number + as select number, mod(number, 3) p from numbers(100); + +select number, count(*) over (partition by p) + from window_mt order by number limit 10 settings optimize_read_in_order = 0; + +select number, count(*) over (partition by p) + from window_mt order by number limit 10 settings optimize_read_in_order = 1; + +drop table window_mt; + +-- some true window functions -- rank and friends +select number, p, o, + count(*) over w, + rank() over w, + dense_rank() over w, + row_number() over w +from (select number, intDiv(number, 5) p, mod(number, 3) o + from numbers(31) order by o, number) t +window w as (partition by p order by o) +order by p, o, number +settings max_block_size = 2; + +-- our replacement for lag/lead +select + anyOrNull(number) + over (order by number rows between 1 preceding and 1 preceding), + anyOrNull(number) + over (order by number rows between 1 following and 1 following) +from numbers(5); + +-- variants of lag/lead that respect the frame +select number, p, pp, + lagInFrame(number) over w as lag1, + lagInFrame(number, number - pp) over w as lag2, + lagInFrame(number, number - pp, number * 11) over w as lag, + leadInFrame(number, number - pp, number * 11) over w as lead +from (select number, intDiv(number, 5) p, p * 5 pp from numbers(16)) +window w as (partition by p order by number + rows between unbounded preceding and unbounded following) +order by number +settings max_block_size = 3; +; + +-- careful with auto-application of Null combinator +select lagInFrame(toNullable(1)) over (); +select lagInFrameOrNull(1) over (); -- { serverError 36 } +-- this is the same as `select max(Null::Nullable(Nothing))` +select intDiv(1, NULL) x, toTypeName(x), max(x) over (); +-- to make lagInFrame return null for out-of-frame rows, cast the argument to +-- Nullable; otherwise, it returns default values. +SELECT + number, + lagInFrame(toNullable(number), 1) OVER w, + lagInFrame(toNullable(number), 2) OVER w, + lagInFrame(number, 1) OVER w, + lagInFrame(number, 2) OVER w +FROM numbers(4) +WINDOW w AS (ORDER BY number ASC) +; + +-- case-insensitive SQL-standard synonyms for any and anyLast +select + number, + fIrSt_VaLue(number) over w, + lAsT_vAlUe(number) over w +from numbers(10) +window w as (order by number range between 1 preceding and 1 following) +order by number +; + +-- lagInFrame UBsan +SELECT lagInFrame(1, -1) OVER (); -- { serverError BAD_ARGUMENTS } +SELECT lagInFrame(1, 0) OVER (); +SELECT lagInFrame(1, /* INT64_MAX+1 */ 0x7fffffffffffffff+1) OVER (); -- { serverError BAD_ARGUMENTS } +SELECT lagInFrame(1, /* INT64_MAX */ 0x7fffffffffffffff) OVER (); +SELECT lagInFrame(1, 1) OVER (); + +-- leadInFrame UBsan +SELECT leadInFrame(1, -1) OVER (); -- { serverError BAD_ARGUMENTS } +SELECT leadInFrame(1, 0) OVER (); +SELECT leadInFrame(1, /* INT64_MAX+1 */ 0x7fffffffffffffff+1) OVER (); -- { serverError BAD_ARGUMENTS } +SELECT leadInFrame(1, /* INT64_MAX */ 0x7fffffffffffffff) OVER (); +SELECT leadInFrame(1, 1) OVER (); + +-- In this case, we had a problem with PartialSortingTransform returning zero-row +-- chunks for input chunks w/o columns. +select count() over () from numbers(4) where number < 2; + +-- floating point RANGE frame +select + count(*) over (order by toFloat32(number) range 5. preceding), + count(*) over (order by toFloat64(number) range 5. preceding), + count(*) over (order by toFloat32(number) range between current row and 5. following), + count(*) over (order by toFloat64(number) range between current row and 5. following) +from numbers(7) +; + +-- negative offsets should not be allowed +select count() over (order by toInt64(number) range between -1 preceding and unbounded following) from numbers(1); -- { serverError 36 } +select count() over (order by toInt64(number) range between -1 following and unbounded following) from numbers(1); -- { serverError 36 } +select count() over (order by toInt64(number) range between unbounded preceding and -1 preceding) from numbers(1); -- { serverError 36 } +select count() over (order by toInt64(number) range between unbounded preceding and -1 following) from numbers(1); -- { serverError 36 } + +-- a test with aggregate function that allocates memory in arena +select sum(a[length(a)]) +from ( + select groupArray(number) over (partition by modulo(number, 11) + order by modulo(number, 1111), number) a + from numbers_mt(10000) +) settings max_block_size = 7; + +-- -INT_MIN row offset that can lead to problems with negation, found when fuzzing +-- under UBSan. Should be limited to at most INT_MAX. +select count() over (rows between 2147483648 preceding and 2147493648 following) from numbers(2); -- { serverError 36 } + +-- Somehow in this case WindowTransform gets empty input chunks not marked as +-- input end, and then two (!) empty input chunks marked as input end. Whatever. +select count() over () from (select 1 a) l inner join (select 2 a) r using a; +-- This case works as expected, one empty input chunk marked as input end. +select count() over () where null; + +-- Inheriting another window. +select number, count() over (w1 rows unbounded preceding) from numbers(10) +window + w0 as (partition by intDiv(number, 5) as p), + w1 as (w0 order by mod(number, 3) as o) +order by p, o, number +; + +-- can't redefine PARTITION BY +select count() over (w partition by number) from numbers(1) window w as (partition by intDiv(number, 5)); -- { serverError 36 } + +-- can't redefine existing ORDER BY +select count() over (w order by number) from numbers(1) window w as (partition by intDiv(number, 5) order by mod(number, 3)); -- { serverError 36 } + +-- parent window can't have frame +select count() over (w range unbounded preceding) from numbers(1) window w as (partition by intDiv(number, 5) order by mod(number, 3) rows unbounded preceding); -- { serverError 36 } + +-- looks weird but probably should work -- this is a window that inherits and changes nothing +select count() over (w) from numbers(1) window w as (); + +-- nonexistent parent window +select count() over (w2 rows unbounded preceding); -- { serverError 36 } From 1202fec92774e06865742e846431a1124a8dfabd Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 4 Feb 2022 20:25:30 +0000 Subject: [PATCH 061/215] Fixed tests --- src/Functions/array/mapPopulateSeries.cpp | 47 +++++++++++++++++++---- 1 file changed, 39 insertions(+), 8 deletions(-) diff --git a/src/Functions/array/mapPopulateSeries.cpp b/src/Functions/array/mapPopulateSeries.cpp index ce33a7b8634..97513bc26e7 100644 --- a/src/Functions/array/mapPopulateSeries.cpp +++ b/src/Functions/array/mapPopulateSeries.cpp @@ -230,14 +230,42 @@ private: } } - size_t length = static_cast(max_key - min_key); - static constexpr size_t MAX_ARRAY_SIZE = 1ULL << 30; - if (length > MAX_ARRAY_SIZE) - throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, - "Function {} too large array size {} in the result", - getName(), - length); + using KeyTypeUnsigned = ::make_unsigned_t; + KeyTypeUnsigned max_min_key_difference = 0; + if constexpr (::is_unsigned_v) + { + max_min_key_difference = max_key - min_key; + } + else + { + bool is_max_key_positive = max_key >= 0; + bool is_min_key_positive = max_key >= 0; + + if (is_max_key_positive && is_min_key_positive) + { + max_min_key_difference = static_cast(max_key - min_key); + } + else if (is_max_key_positive && !is_min_key_positive) + { + KeyTypeUnsigned min_key_unsigned = -static_cast(min_key); + max_min_key_difference = static_cast(max_key) + min_key_unsigned; + } + else + { + KeyTypeUnsigned min_key_unsigned = -static_cast(min_key); + KeyTypeUnsigned max_key_unsigned = -static_cast(min_key_unsigned); + max_min_key_difference = min_key_unsigned - max_key_unsigned; + } + } + + static constexpr size_t MAX_ARRAY_SIZE = 1ULL << 30; + if (max_min_key_difference > MAX_ARRAY_SIZE) + throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, + "Function {} too large array size in the result", + getName()); + + size_t length = static_cast(max_min_key_difference); size_t result_key_data_size = result_key_data.size(); size_t result_value_data_size = result_value_data.size(); size_t sorted_keys_values_size = sorted_keys_values.size(); @@ -416,7 +444,10 @@ private: using KeyType = typename Types::LeftType; using ValueType = typename Types::RightType; - if constexpr (IsDataTypeNumber && IsDataTypeNumber) + static constexpr bool key_and_value_are_numbers = IsDataTypeNumber && IsDataTypeNumber; + static constexpr bool key_is_float = std::is_same_v || std::is_same_v; + + if constexpr (key_and_value_are_numbers && !key_is_float) { using KeyFieldType = typename KeyType::FieldType; using ValueFieldType = typename ValueType::FieldType; From bc6328e5734679c94de0df49739914f7a5a4a320 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 4 Feb 2022 20:56:49 +0000 Subject: [PATCH 062/215] Fixed tests --- src/Access/AccessRights.cpp | 4 ++-- src/Access/RolesOrUsersSet.cpp | 10 +++++----- src/Interpreters/Access/InterpreterShowAccessQuery.cpp | 4 ++-- .../Access/InterpreterShowCreateAccessEntityQuery.cpp | 4 ++-- src/Interpreters/Access/InterpreterShowGrantsQuery.cpp | 4 ++-- tests/integration/test_multiple_disks/test.py | 6 ++++++ 6 files changed, 19 insertions(+), 13 deletions(-) diff --git a/src/Access/AccessRights.cpp b/src/Access/AccessRights.cpp index 19b069546ee..ca8609f3984 100644 --- a/src/Access/AccessRights.cpp +++ b/src/Access/AccessRights.cpp @@ -1,8 +1,8 @@ #include #include +#include #include #include -#include #include namespace DB @@ -101,7 +101,7 @@ namespace AccessRightsElements getResult() const { ProtoElements sorted = *this; - boost::range::sort(sorted); + ::sort(sorted.begin(), sorted.end()); AccessRightsElements res; res.reserve(sorted.size()); diff --git a/src/Access/RolesOrUsersSet.cpp b/src/Access/RolesOrUsersSet.cpp index 810198eeb98..2c302fde229 100644 --- a/src/Access/RolesOrUsersSet.cpp +++ b/src/Access/RolesOrUsersSet.cpp @@ -7,8 +7,8 @@ #include #include #include -#include #include +#include namespace DB @@ -132,7 +132,7 @@ std::shared_ptr RolesOrUsersSet::toAST() const ast->names.reserve(ids.size()); for (const UUID & id : ids) ast->names.emplace_back(::DB::toString(id)); - boost::range::sort(ast->names); + ::sort(ast->names.begin(), ast->names.end()); } if (!except_ids.empty()) @@ -140,7 +140,7 @@ std::shared_ptr RolesOrUsersSet::toAST() const ast->except_names.reserve(except_ids.size()); for (const UUID & except_id : except_ids) ast->except_names.emplace_back(::DB::toString(except_id)); - boost::range::sort(ast->except_names); + ::sort(ast->except_names.begin(), ast->except_names.end()); } return ast; @@ -161,7 +161,7 @@ std::shared_ptr RolesOrUsersSet::toASTWithNames(const Access if (name) ast->names.emplace_back(std::move(*name)); } - boost::range::sort(ast->names); + ::sort(ast->names.begin(), ast->names.end()); } if (!except_ids.empty()) @@ -173,7 +173,7 @@ std::shared_ptr RolesOrUsersSet::toASTWithNames(const Access if (except_name) ast->except_names.emplace_back(std::move(*except_name)); } - boost::range::sort(ast->except_names); + ::sort(ast->except_names.begin(), ast->except_names.end()); } return ast; diff --git a/src/Interpreters/Access/InterpreterShowAccessQuery.cpp b/src/Interpreters/Access/InterpreterShowAccessQuery.cpp index 26c47507ce2..e16ee03c711 100644 --- a/src/Interpreters/Access/InterpreterShowAccessQuery.cpp +++ b/src/Interpreters/Access/InterpreterShowAccessQuery.cpp @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include @@ -61,7 +61,7 @@ std::vector InterpreterShowAccessQuery::getEntities() const } } - boost::range::sort(entities, IAccessEntity::LessByTypeAndName{}); + ::sort(entities.begin(), entities.end(), IAccessEntity::LessByTypeAndName{}); return entities; } diff --git a/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp b/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp index 284b3cd1b48..163cb57cab5 100644 --- a/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp +++ b/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp @@ -27,7 +27,7 @@ #include #include #include -#include +#include namespace DB @@ -341,7 +341,7 @@ std::vector InterpreterShowCreateAccessEntityQuery::getEntities entities.push_back(access_control.read(access_control.getID(show_query.type, name))); } - boost::range::sort(entities, IAccessEntity::LessByName{}); + ::sort(entities.begin(), entities.end(), IAccessEntity::LessByName{}); return entities; } diff --git a/src/Interpreters/Access/InterpreterShowGrantsQuery.cpp b/src/Interpreters/Access/InterpreterShowGrantsQuery.cpp index 17d9f321b56..c82088847d3 100644 --- a/src/Interpreters/Access/InterpreterShowGrantsQuery.cpp +++ b/src/Interpreters/Access/InterpreterShowGrantsQuery.cpp @@ -13,8 +13,8 @@ #include #include #include -#include #include +#include namespace DB @@ -159,7 +159,7 @@ std::vector InterpreterShowGrantsQuery::getEntities() const entities.push_back(entity); } - boost::range::sort(entities, IAccessEntity::LessByTypeAndName{}); + ::sort(entities.begin(), entities.end(), IAccessEntity::LessByTypeAndName{}); return entities; } diff --git a/tests/integration/test_multiple_disks/test.py b/tests/integration/test_multiple_disks/test.py index adf5d15ef31..e2b30b8f90e 100644 --- a/tests/integration/test_multiple_disks/test.py +++ b/tests/integration/test_multiple_disks/test.py @@ -382,12 +382,18 @@ def test_round_robin(start_cluster, name, engine): used_disk = get_used_disks_for_table(node1, name) assert len(used_disk) == 1, 'More than one disk used for single insert' + # sleep is required because we order disks by their modification time, and if insert will be fast + # modification time of two disks will be equal, then sort will not provide deterministic results + time.sleep(5) + node1.query_with_retry("insert into {} select * from numbers(10000, 10000)".format(name)) used_disks = get_used_disks_for_table(node1, name) assert len(used_disks) == 2, 'Two disks should be used for two parts' assert used_disks[0] != used_disks[1], "Should write to different disks" + time.sleep(5) + node1.query_with_retry("insert into {} select * from numbers(20000, 10000)".format(name)) used_disks = get_used_disks_for_table(node1, name) From b81d9d20b70036a2c66598589295af84bca4d6d2 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 4 Feb 2022 21:25:06 +0000 Subject: [PATCH 063/215] Fixed tests --- src/Functions/array/mapPopulateSeries.cpp | 116 ++++++++++++++-------- 1 file changed, 73 insertions(+), 43 deletions(-) diff --git a/src/Functions/array/mapPopulateSeries.cpp b/src/Functions/array/mapPopulateSeries.cpp index 97513bc26e7..1c5dce5d586 100644 --- a/src/Functions/array/mapPopulateSeries.cpp +++ b/src/Functions/array/mapPopulateSeries.cpp @@ -1,13 +1,15 @@ +#include + #include #include #include #include #include +#include #include #include #include -#include "DataTypes/IDataType.h" -#include "DataTypes/DataTypeMap.h" +#include #include #include #include @@ -202,7 +204,7 @@ private: continue; } - std::sort(sorted_keys_values.begin(), sorted_keys_values.end()); + ::sort(sorted_keys_values.begin(), sorted_keys_values.end()); KeyType min_key = sorted_keys_values.front().first; KeyType max_key = sorted_keys_values.back().first; @@ -240,7 +242,7 @@ private: else { bool is_max_key_positive = max_key >= 0; - bool is_min_key_positive = max_key >= 0; + bool is_min_key_positive = min_key >= 0; if (is_max_key_positive && is_min_key_positive) { @@ -253,8 +255,9 @@ private: } else { + /// Both max and min key are negative KeyTypeUnsigned min_key_unsigned = -static_cast(min_key); - KeyTypeUnsigned max_key_unsigned = -static_cast(min_key_unsigned); + KeyTypeUnsigned max_key_unsigned = -static_cast(max_key); max_min_key_difference = min_key_unsigned - max_key_unsigned; } } @@ -307,7 +310,7 @@ private: } } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t) const override + struct KeyAndValueInput { DataTypePtr key_series_type; DataTypePtr value_series_type; @@ -316,6 +319,14 @@ private: ColumnPtr value_column; ColumnPtr offsets_column; + /// Optional max key column + ColumnPtr max_key_column; + }; + + KeyAndValueInput extractKeyAndValueInput(const ColumnsWithTypeAndName & arguments) const + { + KeyAndValueInput input; + size_t max_key_argument_index = 0; auto first_argument_column = arguments[0].column->convertToFullColumnIfConst(); @@ -336,12 +347,12 @@ private: "Function {} if array argument is passed as key, additional array argument as value must be passed", getName()); - key_series_type = assert_cast(*arguments[0].type).getNestedType(); - key_column = key_argument_array_column->getDataPtr(); + input.key_series_type = assert_cast(*arguments[0].type).getNestedType(); + input.key_column = key_argument_array_column->getDataPtr(); const auto & key_offsets = key_argument_array_column->getOffsets(); - value_series_type = assert_cast(*arguments[1].type).getNestedType(); - value_column = value_argument_array_column->getDataPtr(); + input.value_series_type = assert_cast(*arguments[1].type).getNestedType(); + input.value_column = value_argument_array_column->getDataPtr(); const auto & value_offsets = value_argument_array_column->getOffsets(); if (key_offsets != value_offsets) @@ -350,7 +361,7 @@ private: "Function {} key and value array should have same amount of elements", getName()); - offsets_column = key_argument_array_column->getOffsetsPtr(); + input.offsets_column = key_argument_array_column->getOffsetsPtr(); max_key_argument_index = 2; } else if (const auto * key_argument_map_column = typeid_cast(first_argument_column.get())) @@ -359,12 +370,12 @@ private: const auto & nested_data_column = key_argument_map_column->getNestedData(); const auto & map_argument_type = assert_cast(*arguments[0].type); - key_series_type = map_argument_type.getKeyType(); - value_series_type = map_argument_type.getValueType(); + input.key_series_type = map_argument_type.getKeyType(); + input.value_series_type = map_argument_type.getValueType(); - key_column = nested_data_column.getColumnPtr(0); - value_column = nested_data_column.getColumnPtr(1); - offsets_column = nested_array.getOffsetsPtr(); + input.key_column = nested_data_column.getColumnPtr(0); + input.value_column = nested_data_column.getColumnPtr(1); + input.offsets_column = nested_array.getOffsetsPtr(); max_key_argument_index = 1; } @@ -382,22 +393,32 @@ private: max_key_column = arguments[max_key_argument_index].column; auto max_key_column_type = arguments[max_key_argument_index].type; - if (!max_key_column_type->equals(*key_series_type)) + if (!max_key_column_type->equals(*input.key_series_type)) { ColumnWithTypeAndName column_to_cast = {max_key_column, max_key_column_type, ""}; - auto casted_column = castColumnAccurate(std::move(column_to_cast), key_series_type); + auto casted_column = castColumnAccurate(std::move(column_to_cast), input.key_series_type); max_key_column = std::move(casted_column); } } - auto result_column = result_type->createColumn(); - WhichDataType result_data_type(result_type); + input.max_key_column = std::move(max_key_column); + return input; + } + + struct ResultColumns + { MutableColumnPtr result_key_column; MutableColumnPtr result_value_column; MutableColumnPtr result_offset_column; IColumn * result_offset_column_raw; + /// If we return tuple of two arrays, this offset need to be the same as result_offset_column MutableColumnPtr result_array_additional_offset_column; + }; + + ResultColumns extractResultColumns(MutableColumnPtr & result_column, const DataTypePtr & result_type) const + { + ResultColumns result; auto * tuple_column = typeid_cast(result_column.get()); @@ -415,20 +436,19 @@ private: getName(), result_type->getName()); - result_key_column = key_array_column_typed->getDataPtr()->assumeMutable(); - result_value_column = value_array_column_typed->getDataPtr()->assumeMutable(); - - result_offset_column = key_array_column_typed->getOffsetsPtr()->assumeMutable(); - result_offset_column_raw = result_offset_column.get(); - - result_array_additional_offset_column = value_array_column_typed->getOffsetsPtr()->assumeMutable(); + result.result_key_column = key_array_column_typed->getDataPtr()->assumeMutable(); + result.result_value_column = value_array_column_typed->getDataPtr()->assumeMutable(); + result.result_offset_column = key_array_column_typed->getOffsetsPtr()->assumeMutable(); + result.result_offset_column_raw = result.result_offset_column.get(); + result.result_array_additional_offset_column = value_array_column_typed->getOffsetsPtr()->assumeMutable(); } else if (const auto * map_column = typeid_cast(result_column.get())) { - result_key_column = map_column->getNestedData().getColumnPtr(0)->assumeMutable(); - result_value_column = map_column->getNestedData().getColumnPtr(1)->assumeMutable(); - result_offset_column = map_column->getNestedColumn().getOffsetsPtr()->assumeMutable(); - result_offset_column_raw = result_offset_column.get(); + result.result_key_column = map_column->getNestedData().getColumnPtr(0)->assumeMutable(); + result.result_value_column = map_column->getNestedData().getColumnPtr(1)->assumeMutable(); + result.result_offset_column = map_column->getNestedColumn().getOffsetsPtr()->assumeMutable(); + result.result_offset_column_raw = result.result_offset_column.get(); + result.result_array_additional_offset_column = nullptr; } else { @@ -438,6 +458,16 @@ private: result_type->getName()); } + return result; + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t) const override + { + auto input = extractKeyAndValueInput(arguments); + + auto result_column = result_type->createColumn(); + auto result_columns = extractResultColumns(result_column, result_type); + auto call = [&](const auto & types) { using Types = std::decay_t; @@ -453,13 +483,13 @@ private: using ValueFieldType = typename ValueType::FieldType; executeImplTyped( - key_column, - value_column, - offsets_column, - max_key_column, - std::move(result_key_column), - std::move(result_value_column), - std::move(result_offset_column)); + input.key_column, + input.value_column, + input.offsets_column, + input.max_key_column, + std::move(result_columns.result_key_column), + std::move(result_columns.result_value_column), + std::move(result_columns.result_offset_column)); return true; } @@ -467,17 +497,17 @@ private: return false; }; - if (!callOnTwoTypeIndexes(key_series_type->getTypeId(), value_series_type->getTypeId(), call)) + if (!callOnTwoTypeIndexes(input.key_series_type->getTypeId(), input.value_series_type->getTypeId(), call)) throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Function {} illegal columns passed as arguments", getName()); - if (result_array_additional_offset_column) + if (result_columns.result_array_additional_offset_column) { - result_array_additional_offset_column->insertRangeFrom( - *result_offset_column_raw, + result_columns.result_array_additional_offset_column->insertRangeFrom( + *result_columns.result_offset_column_raw, 0, - result_offset_column_raw->size()); + result_columns.result_offset_column_raw->size()); } return result_column; From 4646cac07596ce82ebddc4cebd1ab501bfb92975 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 4 Feb 2022 21:49:39 +0000 Subject: [PATCH 064/215] Fixed tests --- src/Functions/array/mapPopulateSeries.cpp | 27 +++++------------------ 1 file changed, 5 insertions(+), 22 deletions(-) diff --git a/src/Functions/array/mapPopulateSeries.cpp b/src/Functions/array/mapPopulateSeries.cpp index 1c5dce5d586..17269f8dfe1 100644 --- a/src/Functions/array/mapPopulateSeries.cpp +++ b/src/Functions/array/mapPopulateSeries.cpp @@ -39,7 +39,7 @@ private: size_t getNumberOfArguments() const override { return 0; } bool isVariadic() const override { return true; } - bool useDefaultImplementationForConstants() const override { return false; } + bool useDefaultImplementationForConstants() const override { return true; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } void checkTypes(const DataTypePtr & key_type, const DataTypePtr & value_type, const DataTypePtr & max_key_type) const @@ -175,12 +175,7 @@ private: auto & result_offsets_column_typed = assert_cast &>(*result_offset_column); auto & result_offsets_data = result_offsets_column_typed.getData(); - std::optional max_key_column_const; - if (max_key_column) - { - if (auto * const_max_key_column = checkAndGetColumnConst>(max_key_column.get())) - max_key_column_const = const_max_key_column->template getValue(); - } + const PaddedPODArray * max_key_data = max_key_column ? &assert_cast &>(*max_key_column).getData() : nullptr; PaddedPODArray> sorted_keys_values; @@ -209,21 +204,9 @@ private: KeyType min_key = sorted_keys_values.front().first; KeyType max_key = sorted_keys_values.back().first; - if (max_key_column) + if (max_key_data) { - KeyType max_key_column_value {}; - - if (max_key_column_const) - { - max_key_column_value = *max_key_column_const; - } - else - { - const auto & max_key_column_typed = assert_cast &>(*max_key_column); - max_key_column_value = max_key_column_typed.getData()[offset_index]; - } - - max_key = max_key_column_value; + max_key = (*max_key_data)[offset_index]; if (unlikely(max_key < min_key)) { @@ -390,7 +373,7 @@ private: if (max_key_argument_index < arguments.size()) { - max_key_column = arguments[max_key_argument_index].column; + max_key_column = arguments[max_key_argument_index].column->convertToFullColumnIfConst(); auto max_key_column_type = arguments[max_key_argument_index].type; if (!max_key_column_type->equals(*input.key_series_type)) From 861ce6ae9450c45857b8c7ab44912c463ee2b5cf Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Sat, 5 Feb 2022 02:07:45 +0300 Subject: [PATCH 065/215] Update order-by.md --- docs/en/sql-reference/statements/select/order-by.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/statements/select/order-by.md b/docs/en/sql-reference/statements/select/order-by.md index 823594c8ec4..b24f0213e4e 100644 --- a/docs/en/sql-reference/statements/select/order-by.md +++ b/docs/en/sql-reference/statements/select/order-by.md @@ -285,7 +285,7 @@ ORDER BY expr [WITH FILL] [FROM const_expr] [TO const_expr] [STEP const_numeric_ `WITH FILL` can be applied for fields with Numeric (all kinds of float, decimal, int) or Date/DateTime types. When applied for `String` fields, missed values are filled with empty strings. When `FROM const_expr` not defined sequence of filling use minimal `expr` field value from `ORDER BY`. When `TO const_expr` not defined sequence of filling use maximum `expr` field value from `ORDER BY`. -When `STEP const_numeric_expr` defined then `const_numeric_expr` interprets `as is` for numeric types as `days` for Date type, as `seconds` for DateTime type. It also supports [INTERVAL](https://clickhouse.com/docs/en/sql-reference/data-types/special-data-types/interval/) data type representing time and date intervals. +When `STEP const_numeric_expr` defined then `const_numeric_expr` interprets `as is` for numeric types, as `days` for Date type, as `seconds` for DateTime type. It also supports [INTERVAL](https://clickhouse.com/docs/en/sql-reference/data-types/special-data-types/interval/) data type representing time and date intervals. When `STEP const_numeric_expr` omitted then sequence of filling use `1.0` for numeric type, `1 day` for Date type and `1 second` for DateTime type. Example of a query without `WITH FILL`: From b9c42effb48d7ef15a5f8f269bbe67145d2c8cd1 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Sat, 5 Feb 2022 19:30:40 +0800 Subject: [PATCH 066/215] change as requested --- src/Functions/FunctionsStringArray.h | 39 +++++++++---------- .../0_stateless/02185_split_by_char.sql | 1 - 2 files changed, 18 insertions(+), 22 deletions(-) diff --git a/src/Functions/FunctionsStringArray.h b/src/Functions/FunctionsStringArray.h index dcd9198ef33..b1de017120c 100644 --- a/src/Functions/FunctionsStringArray.h +++ b/src/Functions/FunctionsStringArray.h @@ -244,8 +244,8 @@ private: Pos end; char sep; - Int64 max_split = -1; - Int64 curr_split = 0; + std::optional max_split; + UInt64 curr_split = 0; public: static constexpr auto name = "splitByChar"; @@ -295,15 +295,14 @@ public: if (arguments.size() > 2) { - std::optional max_split_opt = std::nullopt; - if (!((max_split_opt = getMaxSplit(arguments[2])) - || (max_split_opt = getMaxSplit(arguments[2])) - || (max_split_opt = getMaxSplit(arguments[2])) - || (max_split_opt = getMaxSplit(arguments[2])) - || (max_split_opt = getMaxSplit(arguments[2])) - || (max_split_opt = getMaxSplit(arguments[2])) - || (max_split_opt = getMaxSplit(arguments[2])) - || (max_split_opt = getMaxSplit(arguments[2])))) + if (!((max_split = getMaxSplit(arguments[2])) + || (max_split = getMaxSplit(arguments[2])) + || (max_split = getMaxSplit(arguments[2])) + || (max_split = getMaxSplit(arguments[2])) + || (max_split = getMaxSplit(arguments[2])) + || (max_split = getMaxSplit(arguments[2])) + || (max_split = getMaxSplit(arguments[2])) + || (max_split = getMaxSplit(arguments[2])))) { throw Exception( ErrorCodes::ILLEGAL_COLUMN, @@ -311,23 +310,21 @@ public: arguments[2].column->getName(), getName()); } - max_split = *max_split_opt; } } template - std::optional getMaxSplit(const ColumnWithTypeAndName & argument) + std::optional getMaxSplit(const ColumnWithTypeAndName & argument) { const auto * col = checkAndGetColumnConst>(argument.column.get()); if (!col) return std::nullopt; - Int64 result= static_cast(col->template getValue()); - if (result < 0 && result != -1) - throw Exception("Illegal column " + argument.column->getName() - + " of third argument of function " + getName() + ". Must be non-negative number or -1", - ErrorCodes::ILLEGAL_COLUMN); - return result; + auto value = col->template getValue(); + if (value < 0) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of third argument of function {}", argument.column->getName(), getName()); + return value; } /// Returns the position of the argument, that is the column of strings @@ -348,14 +345,14 @@ public: return false; token_begin = pos; - if (unlikely(max_split >= 0 && curr_split >= max_split)) + if (unlikely(max_split && curr_split >= *max_split)) { token_end = end; pos = nullptr; return true; } - pos = reinterpret_cast(memchr(pos, sep, end - pos)); + pos = reinterpret_cast(memchr(pos, sep, end - pos)); if (pos) { token_end = pos; diff --git a/tests/queries/0_stateless/02185_split_by_char.sql b/tests/queries/0_stateless/02185_split_by_char.sql index 6b843c05144..6c490654813 100644 --- a/tests/queries/0_stateless/02185_split_by_char.sql +++ b/tests/queries/0_stateless/02185_split_by_char.sql @@ -1,5 +1,4 @@ select splitByChar(',', '1,2,3'); -select splitByChar(',', '1,2,3', -1); select splitByChar(',', '1,2,3', 0); select splitByChar(',', '1,2,3', 1); select splitByChar(',', '1,2,3', 2); From 829a9a4d83b2e99f34077040a578bd5d774e4c3d Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Sat, 5 Feb 2022 19:44:21 +0800 Subject: [PATCH 067/215] as alias functions --- src/Functions/match.cpp | 1 + src/Functions/replaceAll.cpp | 1 + 2 files changed, 2 insertions(+) diff --git a/src/Functions/match.cpp b/src/Functions/match.cpp index 31d36577445..7561ffc93c0 100644 --- a/src/Functions/match.cpp +++ b/src/Functions/match.cpp @@ -20,6 +20,7 @@ using FunctionMatch = FunctionsStringSearch>; void registerFunctionMatch(FunctionFactory & factory) { factory.registerFunction(); + factory.registerAlias("REGEXP_MATCHES", NameMatch::name); } } diff --git a/src/Functions/replaceAll.cpp b/src/Functions/replaceAll.cpp index cc29e57ea69..1cd58124125 100644 --- a/src/Functions/replaceAll.cpp +++ b/src/Functions/replaceAll.cpp @@ -21,6 +21,7 @@ void registerFunctionReplaceAll(FunctionFactory & factory) { factory.registerFunction(); factory.registerAlias("replace", NameReplaceAll::name, FunctionFactory::CaseInsensitive); + factory.registerAlias("REGEXP_REPLACE", NameReplaceAll::name); } } From 2e8adc797c1be512bd7a30dce3c493673c629e58 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Sat, 5 Feb 2022 22:05:05 +0800 Subject: [PATCH 068/215] Better handle pre-inputs before client start --- contrib/replxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/replxx b/contrib/replxx index c745b3fb012..9460e5e0fc1 160000 --- a/contrib/replxx +++ b/contrib/replxx @@ -1 +1 @@ -Subproject commit c745b3fb012ee5ae762fbc8cd7a40c4dc3fe15df +Subproject commit 9460e5e0fc10f78f460af26a6bd928798cac864d From 35235d2d7f2dae3775cf9ea0b206fafb13208e04 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Sat, 5 Feb 2022 16:11:36 +0000 Subject: [PATCH 069/215] Added additional performance test --- tests/performance/map_populate_series.xml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/performance/map_populate_series.xml b/tests/performance/map_populate_series.xml index a050be6f3a8..db40cf09455 100644 --- a/tests/performance/map_populate_series.xml +++ b/tests/performance/map_populate_series.xml @@ -1,4 +1,6 @@ SELECT mapPopulateSeries(range(number), range(number)) FROM numbers(5000) FORMAT Null; SELECT mapPopulateSeries(range(number), range(number), 2500) FROM numbers(5000) FORMAT Null; + SELECT mapPopulateSeries(map(0, 0, number, 5)) FROM numbers(5000) FORMAT Null; + SELECT mapPopulateSeries(map(0, 0, number, 5), 2500) FROM numbers(5000) FORMAT Null; From f8ef1cd23d02b270bc6b18f34a63730eeb4ac767 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Sat, 29 Jan 2022 00:31:50 +0700 Subject: [PATCH 070/215] Add submodule minizip-ng --- .gitmodules | 3 + contrib/CMakeLists.txt | 1 + contrib/cassandra-cmake/CMakeLists.txt | 13 +- contrib/minizip-ng | 1 + contrib/minizip-ng-cmake/CMakeLists.txt | 168 ++++++++++++++++++++++++ contrib/minizip-ng-cmake/unzip.h | 13 ++ contrib/minizip-ng-cmake/zip.h | 13 ++ src/CMakeLists.txt | 4 + src/Common/config.h.in | 1 + src/configure_config.cmake | 3 + 10 files changed, 209 insertions(+), 11 deletions(-) create mode 160000 contrib/minizip-ng create mode 100644 contrib/minizip-ng-cmake/CMakeLists.txt create mode 100644 contrib/minizip-ng-cmake/unzip.h create mode 100644 contrib/minizip-ng-cmake/zip.h diff --git a/.gitmodules b/.gitmodules index ed023ab348b..91f4ddb2007 100644 --- a/.gitmodules +++ b/.gitmodules @@ -259,3 +259,6 @@ [submodule "contrib/azure"] path = contrib/azure url = https://github.com/ClickHouse-Extras/azure-sdk-for-cpp.git +[submodule "contrib/minizip-ng"] + path = contrib/minizip-ng + url = https://github.com/zlib-ng/minizip-ng diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 6172f231b6e..9cf307c473e 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -78,6 +78,7 @@ add_contrib (croaring-cmake croaring) add_contrib (zstd-cmake zstd) add_contrib (zlib-ng-cmake zlib-ng) add_contrib (bzip2-cmake bzip2) +add_contrib (minizip-ng-cmake minizip-ng) add_contrib (snappy-cmake snappy) add_contrib (rocksdb-cmake rocksdb) add_contrib (thrift-cmake thrift) diff --git a/contrib/cassandra-cmake/CMakeLists.txt b/contrib/cassandra-cmake/CMakeLists.txt index 416dca6f2bc..81c1fab3882 100644 --- a/contrib/cassandra-cmake/CMakeLists.txt +++ b/contrib/cassandra-cmake/CMakeLists.txt @@ -56,19 +56,11 @@ list(APPEND SOURCES ${CASS_SRC_DIR}/atomic/atomic_std.hpp) add_library(_curl_hostcheck OBJECT ${CASS_SRC_DIR}/third_party/curl/hostcheck.cpp) add_library(_hdr_histogram OBJECT ${CASS_SRC_DIR}/third_party/hdr_histogram/hdr_histogram.cpp) add_library(_http-parser OBJECT ${CASS_SRC_DIR}/third_party/http-parser/http_parser.c) -add_library(_minizip OBJECT - ${CASS_SRC_DIR}/third_party/minizip/ioapi.c - ${CASS_SRC_DIR}/third_party/minizip/zip.c - ${CASS_SRC_DIR}/third_party/minizip/unzip.c) - -target_link_libraries(_minizip ch_contrib::zlib) -target_compile_definitions(_minizip PRIVATE "-Dz_crc_t=unsigned long") list(APPEND INCLUDE_DIRS ${CASS_SRC_DIR}/third_party/curl ${CASS_SRC_DIR}/third_party/hdr_histogram ${CASS_SRC_DIR}/third_party/http-parser - ${CASS_SRC_DIR}/third_party/minizip ${CASS_SRC_DIR}/third_party/mt19937_64 ${CASS_SRC_DIR}/third_party/rapidjson/rapidjson ${CASS_SRC_DIR}/third_party/sparsehash/src) @@ -123,10 +115,9 @@ add_library(_cassandra ${SOURCES} $ $ - $ - $) + $) -target_link_libraries(_cassandra ch_contrib::zlib) +target_link_libraries(_cassandra ch_contrib::zlib ch_contrib::minizip) target_include_directories(_cassandra PRIVATE ${CMAKE_CURRENT_BINARY_DIR} ${INCLUDE_DIRS}) target_include_directories(_cassandra SYSTEM BEFORE PUBLIC ${CASS_INCLUDE_DIR}) target_compile_definitions(_cassandra PRIVATE CASS_BUILDING) diff --git a/contrib/minizip-ng b/contrib/minizip-ng new file mode 160000 index 00000000000..6cffc951851 --- /dev/null +++ b/contrib/minizip-ng @@ -0,0 +1 @@ +Subproject commit 6cffc951851620e0fac1993be75e4713c334de03 diff --git a/contrib/minizip-ng-cmake/CMakeLists.txt b/contrib/minizip-ng-cmake/CMakeLists.txt new file mode 100644 index 00000000000..4aabbd3c9fb --- /dev/null +++ b/contrib/minizip-ng-cmake/CMakeLists.txt @@ -0,0 +1,168 @@ +option(ENABLE_MINIZIP "Enable minizip-ng the zip manipulation library" ${ENABLE_LIBRARIES}) +if (NOT ENABLE_MINIZIP) + message (STATUS "minizip-ng disabled") + return() +endif() + +set(_MINIZIP_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/minizip-ng") + +# Initial source files +set(MINIZIP_SRC + ${_MINIZIP_SOURCE_DIR}/mz_crypt.c + ${_MINIZIP_SOURCE_DIR}/mz_os.c + ${_MINIZIP_SOURCE_DIR}/mz_strm.c + ${_MINIZIP_SOURCE_DIR}/mz_strm_buf.c + ${_MINIZIP_SOURCE_DIR}/mz_strm_mem.c + ${_MINIZIP_SOURCE_DIR}/mz_strm_split.c + ${_MINIZIP_SOURCE_DIR}/mz_zip.c + ${_MINIZIP_SOURCE_DIR}/mz_zip_rw.c) + +# Initial header files +set(MINIZIP_HDR + ${_MINIZIP_SOURCE_DIR}/mz.h + ${_MINIZIP_SOURCE_DIR}/mz_os.h + ${_MINIZIP_SOURCE_DIR}/mz_crypt.h + ${_MINIZIP_SOURCE_DIR}/mz_strm.h + ${_MINIZIP_SOURCE_DIR}/mz_strm_buf.h + ${_MINIZIP_SOURCE_DIR}/mz_strm_mem.h + ${_MINIZIP_SOURCE_DIR}/mz_strm_split.h + ${_MINIZIP_SOURCE_DIR}/mz_strm_os.h + ${_MINIZIP_SOURCE_DIR}/mz_zip.h + ${_MINIZIP_SOURCE_DIR}/mz_zip_rw.h) + +set(MINIZIP_INC ${_MINIZIP_SOURCE_DIR}) + +set(MINIZIP_DEF) +set(MINIZIP_PUBLIC_DEF) +set(MINIZIP_LIB) + +# Check if zlib is present +set(MZ_ZLIB ON) +if(MZ_ZLIB) + # Use zlib from ClickHouse contrib + list(APPEND MINIZIP_LIB ch_contrib::zlib) + + list(APPEND MINIZIP_SRC + ${_MINIZIP_SOURCE_DIR}/mz_strm_zlib.c) + + list(APPEND MINIZIP_HDR + ${_MINIZIP_SOURCE_DIR}/mz_strm_zlib.h) + + list(APPEND MINIZIP_DEF "-DHAVE_ZLIB") +endif() + +# Check if bzip2 is present +set(MZ_BZIP2 ${ENABLE_BZIP2}) +if(MZ_BZIP2) + # Use bzip2 from ClickHouse contrib + list(APPEND MINIZIP_LIB ch_contrib::bzip2) + + list(APPEND MINIZIP_SRC + ${_MINIZIP_SOURCE_DIR}/mz_strm_bzip.c) + + list(APPEND MINIZIP_HDR + ${_MINIZIP_SOURCE_DIR}/mz_strm_bzip.h) + + list(APPEND MINIZIP_DEF "-DHAVE_BZIP2") +endif() + +# Check if liblzma is present +set(MZ_LZMA ON) +if(MZ_LZMA) + # Use liblzma from ClickHouse contrib + list(APPEND MINIZIP_LIB ch_contrib::xz) + + list(APPEND MINIZIP_SRC + ${_MINIZIP_SOURCE_DIR}/mz_strm_lzma.c) + + list(APPEND MINIZIP_HDR + ${_MINIZIP_SOURCE_DIR}/mz_strm_lzma.h) + + list(APPEND MINIZIP_DEF "-DHAVE_LZMA") +endif() + +# Check if zstd is present +set(MZ_ZSTD ON) +if(MZ_ZSTD) + # Use zstd from ClickHouse contrib + list(APPEND MINIZIP_LIB ch_contrib::zstd) + + list(APPEND MINIZIP_SRC + ${_MINIZIP_SOURCE_DIR}/mz_strm_zstd.c) + + list(APPEND MINIZIP_HDR + ${_MINIZIP_SOURCE_DIR}/mz_strm_zstd.h) + + list(APPEND MINIZIP_DEF "-DHAVE_ZSTD") +endif() + +if(NOT MZ_ZLIB AND NOT MZ_ZSTD AND NOT MZ_BZIP2 AND NOT MZ_LZMA) + message(STATUS "Compression not supported due to missing libraries") + + list(APPEND MINIZIP_DEF -DMZ_ZIP_NO_DECOMPRESSION) + list(APPEND MINIZIP_DEF -DMZ_ZIP_NO_COMPRESSION) +endif() + +# Check to see if openssl installation is present +set(MZ_OPENSSL ${ENABLE_SSL}) +if(MZ_OPENSSL) + # Use openssl from ClickHouse contrib + list(APPEND MINIZIP_LIB OpenSSL::SSL OpenSSL::Crypto) + + list(APPEND MINIZIP_SRC + ${_MINIZIP_SOURCE_DIR}/mz_crypt_openssl.c) +endif() + +# Include WinZIP AES encryption +set(MZ_WZAES ${ENABLE_SSL}) +if(MZ_WZAES) + list(APPEND MINIZIP_DEF -DHAVE_WZAES) + + list(APPEND MINIZIP_SRC + ${_MINIZIP_SOURCE_DIR}/mz_strm_wzaes.c) + + list(APPEND MINIZIP_HDR + ${_MINIZIP_SOURCE_DIR}/mz_strm_wzaes.h) +endif() + +# Include traditional PKWare encryption +set(MZ_PKCRYPT ON) +if(MZ_PKCRYPT) + list(APPEND MINIZIP_DEF -DHAVE_PKCRYPT) + + list(APPEND MINIZIP_SRC + ${_MINIZIP_SOURCE_DIR}/mz_strm_pkcrypt.c) + + list(APPEND MINIZIP_HDR + ${_MINIZIP_SOURCE_DIR}/mz_strm_pkcrypt.h) +endif() + +# Unix specific +if(UNIX) + list(APPEND MINIZIP_SRC + ${_MINIZIP_SOURCE_DIR}/mz_os_posix.c + ${_MINIZIP_SOURCE_DIR}/mz_strm_os_posix.c) +endif() + +# Include compatibility layer +set(MZ_COMPAT ON) +if(MZ_COMPAT) + list(APPEND MINIZIP_SRC + ${_MINIZIP_SOURCE_DIR}/mz_compat.c) + + list(APPEND MINIZIP_HDR + ${_MINIZIP_SOURCE_DIR}/mz_compat.h + zip.h + unzip.h) + + list(APPEND MINIZIP_INC "${CMAKE_CURRENT_SOURCE_DIR}") + list(APPEND MINIZIP_PUBLIC_DEF "-DMZ_COMPAT_VERSION=110") +endif() + +add_library(_minizip ${MINIZIP_SRC} ${MINIZIP_HDR}) +target_include_directories(_minizip PUBLIC ${MINIZIP_INC}) +target_compile_definitions(_minizip PUBLIC ${MINIZIP_PUBLIC_DEF}) +target_compile_definitions(_minizip PRIVATE ${MINIZIP_DEF}) +target_link_libraries(_minizip PRIVATE ${MINIZIP_LIB}) + +add_library(ch_contrib::minizip ALIAS _minizip) diff --git a/contrib/minizip-ng-cmake/unzip.h b/contrib/minizip-ng-cmake/unzip.h new file mode 100644 index 00000000000..61cbd974e31 --- /dev/null +++ b/contrib/minizip-ng-cmake/unzip.h @@ -0,0 +1,13 @@ +/* unzip.h -- Compatibility layer shim + part of the minizip-ng project + + This program is distributed under the terms of the same license as zlib. + See the accompanying LICENSE file for the full text of the license. +*/ + +#ifndef MZ_COMPAT_UNZIP +#define MZ_COMPAT_UNZIP + +#include "mz_compat.h" + +#endif diff --git a/contrib/minizip-ng-cmake/zip.h b/contrib/minizip-ng-cmake/zip.h new file mode 100644 index 00000000000..cf38ac91a04 --- /dev/null +++ b/contrib/minizip-ng-cmake/zip.h @@ -0,0 +1,13 @@ +/* zip.h -- Compatibility layer shim + part of the minizip-ng project + + This program is distributed under the terms of the same license as zlib. + See the accompanying LICENSE file for the full text of the license. +*/ + +#ifndef MZ_COMPAT_ZIP +#define MZ_COMPAT_ZIP + +#include "mz_compat.h" + +#endif diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 57d4bf29491..a3f9e771e0b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -513,6 +513,10 @@ if (TARGET ch_contrib::bzip2) target_link_libraries (clickhouse_common_io PRIVATE ch_contrib::bzip2) endif() +if (TARGET ch_contrib::minizip) + target_link_libraries (clickhouse_common_io PRIVATE ch_contrib::minizip) +endif () + if (TARGET ch_contrib::simdjson) dbms_target_link_libraries(PRIVATE ch_contrib::simdjson) endif() diff --git a/src/Common/config.h.in b/src/Common/config.h.in index 3d785e0d0fb..edade4ce2be 100644 --- a/src/Common/config.h.in +++ b/src/Common/config.h.in @@ -17,6 +17,7 @@ #cmakedefine01 USE_YAML_CPP #cmakedefine01 CLICKHOUSE_SPLIT_BINARY #cmakedefine01 USE_BZIP2 +#cmakedefine01 USE_MINIZIP #cmakedefine01 USE_SNAPPY #cmakedefine01 USE_HIVE #cmakedefine01 USE_ODBC diff --git a/src/configure_config.cmake b/src/configure_config.cmake index ce50ab87afc..519307ba28a 100644 --- a/src/configure_config.cmake +++ b/src/configure_config.cmake @@ -4,6 +4,9 @@ endif() if (TARGET ch_contrib::bzip2) set(USE_BZIP2 1) endif() +if (TARGET ch_contrib::minizip) + set(USE_MINIZIP 1) +endif() if (TARGET ch_contrib::snappy) set(USE_SNAPPY 1) endif() From 23fac284ea57da86e69d1154a1045c0d785c788c Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Sat, 29 Jan 2022 00:32:35 +0700 Subject: [PATCH 071/215] Add utility classes ZipArchiveReader and ZipArchiveWriter. --- src/CMakeLists.txt | 1 + src/Common/ErrorCodes.cpp | 2 + src/IO/Archives/IArchiveReader.h | 60 ++ src/IO/Archives/IArchiveWriter.h | 38 ++ src/IO/Archives/ZipArchiveReader.cpp | 563 ++++++++++++++++++ src/IO/Archives/ZipArchiveReader.h | 86 +++ src/IO/Archives/ZipArchiveWriter.cpp | 385 ++++++++++++ src/IO/Archives/ZipArchiveWriter.h | 97 +++ src/IO/Archives/createArchiveReader.cpp | 38 ++ src/IO/Archives/createArchiveReader.h | 22 + src/IO/Archives/createArchiveWriter.cpp | 38 ++ src/IO/Archives/createArchiveWriter.h | 19 + .../tests/gtest_archive_reader_and_writer.cpp | 341 +++++++++++ 13 files changed, 1690 insertions(+) create mode 100644 src/IO/Archives/IArchiveReader.h create mode 100644 src/IO/Archives/IArchiveWriter.h create mode 100644 src/IO/Archives/ZipArchiveReader.cpp create mode 100644 src/IO/Archives/ZipArchiveReader.h create mode 100644 src/IO/Archives/ZipArchiveWriter.cpp create mode 100644 src/IO/Archives/ZipArchiveWriter.h create mode 100644 src/IO/Archives/createArchiveReader.cpp create mode 100644 src/IO/Archives/createArchiveReader.h create mode 100644 src/IO/Archives/createArchiveWriter.cpp create mode 100644 src/IO/Archives/createArchiveWriter.h create mode 100644 src/IO/tests/gtest_archive_reader_and_writer.cpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index a3f9e771e0b..f04f18a4639 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -79,6 +79,7 @@ set(dbms_sources) add_headers_and_sources(clickhouse_common_io Common) add_headers_and_sources(clickhouse_common_io Common/HashTable) add_headers_and_sources(clickhouse_common_io IO) +add_headers_and_sources(clickhouse_common_io IO/Archives) add_headers_and_sources(clickhouse_common_io IO/S3) list (REMOVE_ITEM clickhouse_common_io_sources Common/malloc.cpp Common/new_delete.cpp) diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index 82714de3470..e991daf3209 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -610,6 +610,8 @@ M(639, SNAPPY_COMPRESS_FAILED) \ M(640, NO_HIVEMETASTORE) \ M(641, CANNOT_APPEND_TO_FILE) \ + M(642, CANNOT_PACK_ARCHIVE) \ + M(643, CANNOT_UNPACK_ARCHIVE) \ \ M(999, KEEPER_EXCEPTION) \ M(1000, POCO_EXCEPTION) \ diff --git a/src/IO/Archives/IArchiveReader.h b/src/IO/Archives/IArchiveReader.h new file mode 100644 index 00000000000..584e80a7d09 --- /dev/null +++ b/src/IO/Archives/IArchiveReader.h @@ -0,0 +1,60 @@ +#pragma once + +#include +#include + + +namespace DB +{ +class ReadBuffer; +class ReadBufferFromFileBase; +class SeekableReadBuffer; + +/// Interface for reading an archive. +class IArchiveReader : public std::enable_shared_from_this +{ +public: + virtual ~IArchiveReader() = default; + + /// Returns true if there is a specified file in the archive. + virtual bool fileExists(const String & filename) = 0; + + struct FileInfo + { + UInt64 uncompressed_size; + UInt64 compressed_size; + int compression_method; + bool is_encrypted; + }; + + /// Returns the information about a file stored in the archive. + virtual FileInfo getFileInfo(const String & filename) = 0; + + class FileEnumerator + { + public: + virtual ~FileEnumerator() = default; + virtual const String & getFileName() const = 0; + virtual const FileInfo & getFileInfo() const = 0; + virtual bool nextFile() = 0; + }; + + /// Starts enumerating files in the archive. + virtual std::unique_ptr firstFile() = 0; + + /// Starts reading a file from the archive. The function returns a read buffer, + /// you can read that buffer to extract uncompressed data from the archive. + /// Several read buffers can be used at the same time in parallel. + virtual std::unique_ptr readFile(const String & filename) = 0; + + /// It's possible to convert a file enumerator to a read buffer and vice versa. + virtual std::unique_ptr readFile(std::unique_ptr enumerator) = 0; + virtual std::unique_ptr nextFile(std::unique_ptr read_buffer) = 0; + + /// Sets password used to decrypt files in the archive. + virtual void setPassword(const String & /* password */) {} + + using ReadArchiveFunction = std::function()>; +}; + +} diff --git a/src/IO/Archives/IArchiveWriter.h b/src/IO/Archives/IArchiveWriter.h new file mode 100644 index 00000000000..6879d470b62 --- /dev/null +++ b/src/IO/Archives/IArchiveWriter.h @@ -0,0 +1,38 @@ +#pragma once + +#include +#include + + +namespace DB +{ +class WriteBufferFromFileBase; + +/// Interface for writing an archive. +class IArchiveWriter : public std::enable_shared_from_this +{ +public: + /// Destructors finalizes writing the archive. + virtual ~IArchiveWriter() = default; + + /// Starts writing a file to the archive. The function returns a write buffer, + /// any data written to that buffer will be compressed and then put to the archive. + /// You can keep only one such buffer at a time, a buffer returned by previous call + /// of the function `writeFile()` should be destroyed before next call of `writeFile()`. + virtual std::unique_ptr writeFile(const String & filename) = 0; + + /// Returns true if there is an active instance of WriteBuffer returned by writeFile(). + /// This function should be used mostly for debugging purposes. + virtual bool isWritingFile() const = 0; + + static constexpr const int kDefaultCompressionLevel = -1; + + /// Sets compression method and level. + /// Changing them will affect next file in the archive. + virtual void setCompression(int /* compression_method */, int /* compression_level */ = kDefaultCompressionLevel) {} + + /// Sets password. If the password is not empty it will enable encryption in the archive. + virtual void setPassword(const String & /* password */) {} +}; + +} diff --git a/src/IO/Archives/ZipArchiveReader.cpp b/src/IO/Archives/ZipArchiveReader.cpp new file mode 100644 index 00000000000..16604da62dc --- /dev/null +++ b/src/IO/Archives/ZipArchiveReader.cpp @@ -0,0 +1,563 @@ +#include + +#if USE_MINIZIP +#include +#include +#include + + +namespace DB +{ +namespace ErrorCodes +{ + extern const int CANNOT_UNPACK_ARCHIVE; + extern const int LOGICAL_ERROR; + extern const int SEEK_POSITION_OUT_OF_BOUND; +} + +using RawHandle = unzFile; + + +/// Holds a raw handle, calls acquireRawHandle() in the constructor and releaseRawHandle() in the destructor. +class ZipArchiveReader::HandleHolder +{ +public: + HandleHolder() = default; + + explicit HandleHolder(const std::shared_ptr & reader_) : reader(reader_), raw_handle(reader->acquireRawHandle()) { } + + ~HandleHolder() + { + if (raw_handle) + { + try + { + closeFile(); + } + catch (...) + { + tryLogCurrentException("ZipArchiveReader"); + } + reader->releaseRawHandle(raw_handle); + } + } + + HandleHolder(HandleHolder && src) + { + *this = std::move(src); + } + + HandleHolder & operator =(HandleHolder && src) + { + reader = std::exchange(src.reader, nullptr); + raw_handle = std::exchange(src.raw_handle, nullptr); + file_name = std::exchange(src.file_name, {}); + file_info = std::exchange(src.file_info, {}); + return *this; + } + + RawHandle getRawHandle() const { return raw_handle; } + std::shared_ptr getReader() const { return reader; } + + void locateFile(const String & file_name_) + { + resetFileInfo(); + bool case_sensitive = true; + int err = unzLocateFile(raw_handle, file_name_.c_str(), reinterpret_cast(static_cast(case_sensitive))); + if (err == UNZ_END_OF_LIST_OF_FILE) + showError("File " + quoteString(file_name_) + " not found"); + file_name = file_name_; + } + + bool tryLocateFile(const String & file_name_) + { + resetFileInfo(); + bool case_sensitive = true; + int err = unzLocateFile(raw_handle, file_name_.c_str(), reinterpret_cast(static_cast(case_sensitive))); + if (err == UNZ_END_OF_LIST_OF_FILE) + return false; + checkResult(err); + file_name = file_name_; + return true; + } + + bool firstFile() + { + resetFileInfo(); + int err = unzGoToFirstFile(raw_handle); + if (err == UNZ_END_OF_LIST_OF_FILE) + return false; + checkResult(err); + return true; + } + + bool nextFile() + { + resetFileInfo(); + int err = unzGoToNextFile(raw_handle); + if (err == UNZ_END_OF_LIST_OF_FILE) + return false; + checkResult(err); + return true; + } + + const String & getFileName() const + { + if (!file_name) + retrieveFileInfo(); + return *file_name; + } + + const FileInfo & getFileInfo() const + { + if (!file_info) + retrieveFileInfo(); + return *file_info; + } + + void closeFile() + { + int err = unzCloseCurrentFile(raw_handle); + /// If err == UNZ_PARAMERROR the file is already closed. + if (err != UNZ_PARAMERROR) + checkResult(err); + } + + void checkResult(int code) const { reader->checkResult(code); } + [[noreturn]] void showError(const String & message) const { reader->showError(message); } + +private: + void retrieveFileInfo() const + { + if (file_name && file_info) + return; + unz_file_info64 finfo; + int err = unzGetCurrentFileInfo64(raw_handle, &finfo, nullptr, 0, nullptr, 0, nullptr, 0); + if (err == UNZ_PARAMERROR) + showError("No current file"); + checkResult(err); + if (!file_info) + { + file_info.emplace(); + file_info->uncompressed_size = finfo.uncompressed_size; + file_info->compressed_size = finfo.compressed_size; + file_info->compression_method = finfo.compression_method; + file_info->is_encrypted = (finfo.flag & MZ_ZIP_FLAG_ENCRYPTED); + } + if (!file_name) + { + file_name.emplace(); + file_name->resize(finfo.size_filename); + checkResult(unzGetCurrentFileInfo64(raw_handle, nullptr, file_name->data(), finfo.size_filename, nullptr, 0, nullptr, 0)); + } + } + + void resetFileInfo() + { + file_info.reset(); + file_name.reset(); + } + + std::shared_ptr reader; + RawHandle raw_handle = nullptr; + mutable std::optional file_name; + mutable std::optional file_info; +}; + + +/// This class represents a ReadBuffer actually returned by readFile(). +class ZipArchiveReader::ReadBufferFromZipArchive : public ReadBufferFromFileBase +{ +public: + explicit ReadBufferFromZipArchive(HandleHolder && handle_) + : ReadBufferFromFileBase(DBMS_DEFAULT_BUFFER_SIZE, nullptr, 0) + , handle(std::move(handle_)) + { + const auto & file_info = handle.getFileInfo(); + checkCompressionMethodIsEnabled(static_cast(file_info.compression_method)); + + const char * password_cstr = nullptr; + if (file_info.is_encrypted) + { + const auto & password_str = handle.getReader()->password; + if (password_str.empty()) + showError("Password is required"); + password_cstr = password_str.c_str(); + checkEncryptionIsEnabled(); + } + + RawHandle raw_handle = handle.getRawHandle(); + int err = unzOpenCurrentFilePassword(raw_handle, password_cstr); + if (err == MZ_PASSWORD_ERROR) + showError("Wrong password"); + checkResult(err); + } + + off_t seek(off_t off, int whence) override + { + off_t current_pos = getPosition(); + off_t new_pos; + if (whence == SEEK_SET) + new_pos = off; + else if (whence == SEEK_CUR) + new_pos = off + current_pos; + else + throw Exception("Only SEEK_SET and SEEK_CUR seek modes allowed.", ErrorCodes::SEEK_POSITION_OUT_OF_BOUND); + + if (new_pos == current_pos) + return current_pos; /// The position is the same. + + if (new_pos < 0) + throw Exception("Seek position is out of bound", ErrorCodes::SEEK_POSITION_OUT_OF_BOUND); + + off_t working_buffer_start_pos = current_pos - offset(); + off_t working_buffer_end_pos = current_pos + available(); + + if ((working_buffer_start_pos <= new_pos) && (new_pos <= working_buffer_end_pos)) + { + /// The new position is still inside the buffer. + position() += new_pos - current_pos; + return new_pos; + } + + RawHandle raw_handle = handle.getRawHandle(); + + /// Check that the new position is now beyond the end of the file. + const auto & file_info = handle.getFileInfo(); + if (new_pos > static_cast(file_info.uncompressed_size)) + throw Exception("Seek position is out of bound", ErrorCodes::SEEK_POSITION_OUT_OF_BOUND); + + if (file_info.compression_method == static_cast(CompressionMethod::kStore)) + { + /// unzSeek64() works only for non-compressed files. + checkResult(unzSeek64(raw_handle, off, whence)); + return unzTell64(raw_handle); + } + + /// As a last try we go slow way, we're going to simply ignore all data before the new position. + if (new_pos < current_pos) + { + checkResult(unzCloseCurrentFile(raw_handle)); + checkResult(unzOpenCurrentFile(raw_handle)); + current_pos = 0; + } + + ignore(new_pos - current_pos); + return new_pos; + } + + off_t getPosition() override + { + RawHandle raw_handle = handle.getRawHandle(); + return unzTell64(raw_handle) - available(); + } + + String getFileName() const override { return handle.getFileName(); } + + /// Releases owned handle to pass it to an enumerator. + HandleHolder releaseHandle() && + { + handle.closeFile(); + return std::move(handle); + } + +private: + bool nextImpl() override + { + RawHandle raw_handle = handle.getRawHandle(); + auto bytes_read = unzReadCurrentFile(raw_handle, internal_buffer.begin(), internal_buffer.size()); + + if (bytes_read < 0) + checkResult(bytes_read); + + if (!bytes_read) + return false; + + working_buffer = internal_buffer; + working_buffer.resize(bytes_read); + return true; + } + + void checkResult(int code) const { handle.checkResult(code); } + [[noreturn]] void showError(const String & message) const { handle.showError(message); } + + HandleHolder handle; +}; + + +class ZipArchiveReader::FileEnumeratorImpl : public FileEnumerator +{ +public: + explicit FileEnumeratorImpl(HandleHolder && handle_) : handle(std::move(handle_)) {} + + const String & getFileName() const override { return handle.getFileName(); } + const FileInfo & getFileInfo() const override { return handle.getFileInfo(); } + bool nextFile() override { return handle.nextFile(); } + + /// Releases owned handle to pass it to a read buffer. + HandleHolder releaseHandle() && { return std::move(handle); } + +private: + HandleHolder handle; +}; + + +namespace +{ + /// Provides a set of functions allowing the minizip library to read its input + /// from a SeekableReadBuffer instead of an ordinary file in the local filesystem. + class StreamFromReadBuffer + { + public: + static RawHandle open(std::unique_ptr archive_read_buffer, UInt64 archive_size) + { + StreamFromReadBuffer::Opaque opaque{std::move(archive_read_buffer), archive_size}; + + zlib_filefunc64_def func_def; + func_def.zopen64_file = &StreamFromReadBuffer::openFileFunc; + func_def.zclose_file = &StreamFromReadBuffer::closeFileFunc; + func_def.zread_file = &StreamFromReadBuffer::readFileFunc; + func_def.zwrite_file = &StreamFromReadBuffer::writeFileFunc; + func_def.zseek64_file = &StreamFromReadBuffer::seekFunc; + func_def.ztell64_file = &StreamFromReadBuffer::tellFunc; + func_def.zerror_file = &StreamFromReadBuffer::testErrorFunc; + func_def.opaque = &opaque; + + return unzOpen2_64(/* path= */ nullptr, + &func_def); + } + + private: + std::unique_ptr read_buffer; + UInt64 start_offset = 0; + UInt64 total_size = 0; + bool at_end = false; + + struct Opaque + { + std::unique_ptr read_buffer; + UInt64 total_size = 0; + }; + + static void * openFileFunc(void * opaque, const void *, int) + { + auto & opq = *reinterpret_cast(opaque); + return new StreamFromReadBuffer(std::move(opq.read_buffer), opq.total_size); + } + + StreamFromReadBuffer(std::unique_ptr read_buffer_, UInt64 total_size_) + : read_buffer(std::move(read_buffer_)), start_offset(read_buffer->getPosition()), total_size(total_size_) {} + + static int closeFileFunc(void *, void * stream) + { + delete reinterpret_cast(stream); + return ZIP_OK; + } + + static StreamFromReadBuffer & get(void * ptr) + { + return *reinterpret_cast(ptr); + } + + static int testErrorFunc(void *, void *) + { + return ZIP_OK; + } + + static unsigned long readFileFunc(void *, void * stream, void * buf, unsigned long size) // NOLINT(google-runtime-int) + { + auto & strm = get(stream); + if (strm.at_end) + return 0; + auto read_bytes = strm.read_buffer->read(reinterpret_cast(buf), size); + return read_bytes; + } + + static ZPOS64_T tellFunc(void *, void * stream) + { + auto & strm = get(stream); + if (strm.at_end) + return strm.total_size; + auto pos = strm.read_buffer->getPosition() - strm.start_offset; + return pos; + } + + static long seekFunc(void *, void * stream, ZPOS64_T offset, int origin) // NOLINT(google-runtime-int) + { + auto & strm = get(stream); + if (origin == SEEK_END) + { + /// Our implementations of SeekableReadBuffer don't support SEEK_END, + /// but the minizip library needs it, so we have to simulate it here. + strm.at_end = true; + return ZIP_OK; + } + strm.at_end = false; + if (origin == SEEK_SET) + offset += strm.start_offset; + strm.read_buffer->seek(offset, origin); + return ZIP_OK; + } + + static unsigned long writeFileFunc(void *, void *, const void *, unsigned long) // NOLINT(google-runtime-int) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "StreamFromReadBuffer::writeFile must not be called"); + } + }; +} + + +ZipArchiveReader::ZipArchiveReader(const String & path_to_archive_) + : path_to_archive(path_to_archive_) +{ + init(); + +} + +ZipArchiveReader::ZipArchiveReader( + const String & path_to_archive_, const ReadArchiveFunction & archive_read_function_, UInt64 archive_size_) + : path_to_archive(path_to_archive_), archive_read_function(archive_read_function_), archive_size(archive_size_) +{ + init(); +} + +void ZipArchiveReader::init() +{ + /// Prepare the first handle in `free_handles` and check that the archive can be read. + releaseRawHandle(acquireRawHandle()); +} + +ZipArchiveReader::~ZipArchiveReader() +{ + /// Close all `free_handles`. + for (RawHandle free_handle : free_handles) + { + try + { + checkResult(unzClose(free_handle)); + } + catch (...) + { + tryLogCurrentException("ZipArchiveReader"); + } + } +} + +bool ZipArchiveReader::fileExists(const String & filename) +{ + return acquireHandle().tryLocateFile(filename); +} + +ZipArchiveReader::FileInfo ZipArchiveReader::getFileInfo(const String & filename) +{ + auto handle = acquireHandle(); + handle.locateFile(filename); + return handle.getFileInfo(); +} + +std::unique_ptr ZipArchiveReader::firstFile() +{ + auto handle = acquireHandle(); + if (!handle.firstFile()) + return nullptr; + return std::make_unique(std::move(handle)); +} + +std::unique_ptr ZipArchiveReader::readFile(const String & filename) +{ + auto handle = acquireHandle(); + handle.locateFile(filename); + return std::make_unique(std::move(handle)); +} + +std::unique_ptr ZipArchiveReader::readFile(std::unique_ptr enumerator) +{ + if (!dynamic_cast(enumerator.get())) + throw Exception("Wrong enumerator passed to readFile()", ErrorCodes::LOGICAL_ERROR); + auto enumerator_impl = std::unique_ptr(static_cast(enumerator.release())); + auto handle = std::move(*enumerator_impl).releaseHandle(); + return std::make_unique(std::move(handle)); +} + +std::unique_ptr ZipArchiveReader::nextFile(std::unique_ptr read_buffer) +{ + if (!dynamic_cast(read_buffer.get())) + throw Exception("Wrong ReadBuffer passed to nextFile()", ErrorCodes::LOGICAL_ERROR); + auto read_buffer_from_zip = std::unique_ptr(static_cast(read_buffer.release())); + auto handle = std::move(*read_buffer_from_zip).releaseHandle(); + if (!handle.nextFile()) + return nullptr; + return std::make_unique(std::move(handle)); +} + +void ZipArchiveReader::setPassword(const String & password_) +{ + std::lock_guard lock{mutex}; + password = password_; +} + +ZipArchiveReader::HandleHolder ZipArchiveReader::acquireHandle() +{ + return HandleHolder{std::static_pointer_cast(shared_from_this())}; +} + +ZipArchiveReader::RawHandle ZipArchiveReader::acquireRawHandle() +{ + std::lock_guard lock{mutex}; + + if (!free_handles.empty()) + { + RawHandle free_handle = free_handles.back(); + free_handles.pop_back(); + return free_handle; + } + + RawHandle new_handle = nullptr; + if (archive_read_function) + new_handle = StreamFromReadBuffer::open(archive_read_function(), archive_size); + else + new_handle = unzOpen64(path_to_archive.c_str()); + + if (!new_handle) + throw Exception(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Couldn't open zip archive {}", quoteString(path_to_archive)); + + return new_handle; +} + +void ZipArchiveReader::releaseRawHandle(RawHandle handle_) +{ + if (!handle_) + return; + + std::lock_guard lock{mutex}; + free_handles.push_back(handle_); +} + +void ZipArchiveReader::checkResult(int code) const +{ + if (code >= UNZ_OK) + return; + + String message = "Code= "; + switch (code) + { + case UNZ_OK: return; + case UNZ_ERRNO: message += "ERRNO, errno= " + String{strerror(errno)}; break; + case UNZ_PARAMERROR: message += "PARAMERROR"; break; + case UNZ_BADZIPFILE: message += "BADZIPFILE"; break; + case UNZ_INTERNALERROR: message += "INTERNALERROR"; break; + case UNZ_CRCERROR: message += "CRCERROR"; break; + case UNZ_BADPASSWORD: message += "BADPASSWORD"; break; + default: message += std::to_string(code); break; + } + showError(message); +} + +void ZipArchiveReader::showError(const String & message) const +{ + throw Exception(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Couldn't unpack zip archive {}: {}", quoteString(path_to_archive), message); +} + +} + +#endif diff --git a/src/IO/Archives/ZipArchiveReader.h b/src/IO/Archives/ZipArchiveReader.h new file mode 100644 index 00000000000..6932a93e23f --- /dev/null +++ b/src/IO/Archives/ZipArchiveReader.h @@ -0,0 +1,86 @@ +#pragma once + +#include + +#if USE_MINIZIP +#include +#include +#include +#include +#include + + +namespace DB +{ +class ReadBuffer; +class ReadBufferFromFileBase; +class SeekableReadBuffer; + +/// Implementation of IArchiveReader for reading zip archives. +class ZipArchiveReader : public shared_ptr_helper, public IArchiveReader +{ +public: + using CompressionMethod = ZipArchiveWriter::CompressionMethod; + + ~ZipArchiveReader() override; + + /// Returns true if there is a specified file in the archive. + bool fileExists(const String & filename) override; + + /// Returns the information about a file stored in the archive. + FileInfo getFileInfo(const String & filename) override; + + /// Starts enumerating files in the archive. + std::unique_ptr firstFile() override; + + /// Starts reading a file from the archive. The function returns a read buffer, + /// you can read that buffer to extract uncompressed data from the archive. + /// Several read buffers can be used at the same time in parallel. + std::unique_ptr readFile(const String & filename) override; + + /// It's possible to convert a file enumerator to a read buffer and vice versa. + std::unique_ptr readFile(std::unique_ptr enumerator) override; + std::unique_ptr nextFile(std::unique_ptr read_buffer) override; + + /// Sets password used to decrypt the contents of the files in the archive. + void setPassword(const String & password_) override; + + /// Utility functions. + static CompressionMethod parseCompressionMethod(const String & str) { return ZipArchiveWriter::parseCompressionMethod(str); } + static void checkCompressionMethodIsEnabled(CompressionMethod method) { ZipArchiveWriter::checkCompressionMethodIsEnabled(method); } + static void checkEncryptionIsEnabled() { ZipArchiveWriter::checkEncryptionIsEnabled(); } + +private: + /// Constructs an archive's reader that will read from a file in the local filesystem. + explicit ZipArchiveReader(const String & path_to_archive_); + + /// Constructs an archive's reader that will read by making a read buffer by using + /// a specified function. + ZipArchiveReader(const String & path_to_archive_, const ReadArchiveFunction & archive_read_function_, UInt64 archive_size_); + + friend struct shared_ptr_helper; + class ReadBufferFromZipArchive; + class FileEnumeratorImpl; + class HandleHolder; + using RawHandle = void *; + + void init(); + + HandleHolder acquireHandle(); + RawHandle acquireRawHandle(); + void releaseRawHandle(RawHandle handle_); + + void checkResult(int code) const; + [[noreturn]] void showError(const String & message) const; + + const String path_to_archive; + const ReadArchiveFunction archive_read_function; + const UInt64 archive_size = 0; + String password; + std::vector free_handles; + mutable std::mutex mutex; +}; + +} + +#endif diff --git a/src/IO/Archives/ZipArchiveWriter.cpp b/src/IO/Archives/ZipArchiveWriter.cpp new file mode 100644 index 00000000000..f5ecea5e5aa --- /dev/null +++ b/src/IO/Archives/ZipArchiveWriter.cpp @@ -0,0 +1,385 @@ +#include + +#if USE_MINIZIP +#include +#include +#include +#include + + +namespace DB +{ +namespace ErrorCodes +{ + extern const int CANNOT_PACK_ARCHIVE; + extern const int SUPPORT_IS_DISABLED; + extern const int LOGICAL_ERROR; +} + +using RawHandle = zipFile; + + +/// Holds a raw handle, calls acquireRawHandle() in the constructor and releaseRawHandle() in the destructor. +class ZipArchiveWriter::HandleHolder +{ +public: + HandleHolder() = default; + + explicit HandleHolder(const std::shared_ptr & writer_) : writer(writer_), raw_handle(writer->acquireRawHandle()) { } + + ~HandleHolder() + { + if (raw_handle) + { + try + { + int err = zipCloseFileInZip(raw_handle); + /// If err == ZIP_PARAMERROR the file is already closed. + if (err != ZIP_PARAMERROR) + checkResult(err); + } + catch (...) + { + tryLogCurrentException("ZipArchiveWriter"); + } + writer->releaseRawHandle(raw_handle); + } + } + + HandleHolder(HandleHolder && src) + { + *this = std::move(src); + } + + HandleHolder & operator =(HandleHolder && src) + { + writer = std::exchange(src.writer, nullptr); + raw_handle = std::exchange(src.raw_handle, nullptr); + return *this; + } + + RawHandle getRawHandle() const { return raw_handle; } + std::shared_ptr getWriter() const { return writer; } + + void checkResult(int code) const { writer->checkResult(code); } + +private: + std::shared_ptr writer; + RawHandle raw_handle = nullptr; +}; + + +/// This class represents a WriteBuffer actually returned by writeFile(). +class ZipArchiveWriter::WriteBufferFromZipArchive : public WriteBufferFromFileBase +{ +public: + WriteBufferFromZipArchive(HandleHolder && handle_, const String & filename_) + : WriteBufferFromFileBase(DBMS_DEFAULT_BUFFER_SIZE, nullptr, 0) + , handle(std::move(handle_)) + , filename(filename_) + { + auto compress_method = handle.getWriter()->compression_method; + auto compress_level = handle.getWriter()->compression_level; + checkCompressionMethodIsEnabled(static_cast(compress_method)); + + const char * password_cstr = nullptr; + const String & password_str = handle.getWriter()->password; + if (!password_str.empty()) + { + checkEncryptionIsEnabled(); + password_cstr = password_str.c_str(); + } + + RawHandle raw_handle = handle.getRawHandle(); + + checkResult(zipOpenNewFileInZip3_64( + raw_handle, + filename_.c_str(), + /* zipfi= */ nullptr, + /* extrafield_local= */ nullptr, + /* size_extrafield_local= */ 0, + /* extrafield_global= */ nullptr, + /* size_extrafield_global= */ 0, + /* comment= */ nullptr, + compress_method, + compress_level, + /* raw= */ false, + /* windowBits= */ 0, + /* memLevel= */ 0, + /* strategy= */ 0, + password_cstr, + /* crc_for_crypting= */ 0, + /* zip64= */ true)); + } + + ~WriteBufferFromZipArchive() override + { + try + { + finalize(); + } + catch (...) + { + tryLogCurrentException("ZipArchiveWriter"); + } + } + + void sync() override { next(); } + std::string getFileName() const override { return filename; } + +private: + void nextImpl() override + { + if (!offset()) + return; + RawHandle raw_handle = handle.getRawHandle(); + checkResult(zipWriteInFileInZip(raw_handle, working_buffer.begin(), offset())); + } + + void checkResult(int code) const { handle.checkResult(code); } + + HandleHolder handle; + String filename; +}; + + +namespace +{ + /// Provides a set of functions allowing the minizip library to write its output + /// to a WriteBuffer instead of an ordinary file in the local filesystem. + class StreamFromWriteBuffer + { + public: + static RawHandle open(std::unique_ptr archive_write_buffer) + { + Opaque opaque{std::move(archive_write_buffer)}; + + zlib_filefunc64_def func_def; + func_def.zopen64_file = &StreamFromWriteBuffer::openFileFunc; + func_def.zclose_file = &StreamFromWriteBuffer::closeFileFunc; + func_def.zread_file = &StreamFromWriteBuffer::readFileFunc; + func_def.zwrite_file = &StreamFromWriteBuffer::writeFileFunc; + func_def.zseek64_file = &StreamFromWriteBuffer::seekFunc; + func_def.ztell64_file = &StreamFromWriteBuffer::tellFunc; + func_def.zerror_file = &StreamFromWriteBuffer::testErrorFunc; + func_def.opaque = &opaque; + + return zipOpen2_64( + /* path= */ nullptr, + /* append= */ false, + /* globalcomment= */ nullptr, + &func_def); + } + + private: + std::unique_ptr write_buffer; + UInt64 start_offset = 0; + + struct Opaque + { + std::unique_ptr write_buffer; + }; + + static void * openFileFunc(void * opaque, const void *, int) + { + Opaque & opq = *reinterpret_cast(opaque); + return new StreamFromWriteBuffer(std::move(opq.write_buffer)); + } + + explicit StreamFromWriteBuffer(std::unique_ptr write_buffer_) + : write_buffer(std::move(write_buffer_)), start_offset(write_buffer->count()) {} + + static int closeFileFunc(void *, void * stream) + { + delete reinterpret_cast(stream); + return ZIP_OK; + } + + static StreamFromWriteBuffer & get(void * ptr) + { + return *reinterpret_cast(ptr); + } + + static unsigned long writeFileFunc(void *, void * stream, const void * buf, unsigned long size) // NOLINT(google-runtime-int) + { + auto & strm = get(stream); + strm.write_buffer->write(reinterpret_cast(buf), size); + return size; + } + + static int testErrorFunc(void *, void *) + { + return ZIP_OK; + } + + static ZPOS64_T tellFunc(void *, void * stream) + { + auto & strm = get(stream); + auto pos = strm.write_buffer->count() - strm.start_offset; + return pos; + } + + static long seekFunc(void *, void *, ZPOS64_T, int) // NOLINT(google-runtime-int) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "StreamFromWriteBuffer::seek must not be called"); + } + + static unsigned long readFileFunc(void *, void *, void *, unsigned long) // NOLINT(google-runtime-int) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "StreamFromWriteBuffer::readFile must not be called"); + } + }; +} + + +ZipArchiveWriter::ZipArchiveWriter(const String & path_to_archive_) + : ZipArchiveWriter(path_to_archive_, nullptr) +{ +} + +ZipArchiveWriter::ZipArchiveWriter(const String & path_to_archive_, std::unique_ptr archive_write_buffer_) + : path_to_archive(path_to_archive_) +{ + if (archive_write_buffer_) + handle = StreamFromWriteBuffer::open(std::move(archive_write_buffer_)); + else + handle = zipOpen64(path_to_archive.c_str(), /* append= */ false); + if (!handle) + throw Exception(ErrorCodes::CANNOT_PACK_ARCHIVE, "Couldn't create zip archive {}", quoteString(path_to_archive)); +} + +ZipArchiveWriter::~ZipArchiveWriter() +{ + if (handle) + { + try + { + checkResult(zipClose(handle, /* global_comment= */ nullptr)); + } + catch (...) + { + tryLogCurrentException("ZipArchiveWriter"); + } + } +} + +std::unique_ptr ZipArchiveWriter::writeFile(const String & filename) +{ + return std::make_unique(acquireHandle(), filename); +} + +bool ZipArchiveWriter::isWritingFile() const +{ + std::lock_guard lock{mutex}; + return !handle; +} + +void ZipArchiveWriter::setCompression(int compression_method_, int compression_level_) +{ + std::lock_guard lock{mutex}; + compression_method = compression_method_; + compression_level = compression_level_; +} + +void ZipArchiveWriter::setPassword(const String & password_) +{ + std::lock_guard lock{mutex}; + password = password_; +} + +ZipArchiveWriter::CompressionMethod ZipArchiveWriter::parseCompressionMethod(const String & str) +{ + if (str.empty()) + return CompressionMethod::kDeflate; /// Default compression method is DEFLATE. + else if (boost::iequals(str, "store")) + return CompressionMethod::kStore; + else if (boost::iequals(str, "deflate")) + return CompressionMethod::kDeflate; + else if (boost::iequals(str, "bzip2")) + return CompressionMethod::kBzip2; + else if (boost::iequals(str, "lzma")) + return CompressionMethod::kLzma; + else if (boost::iequals(str, "zstd")) + return CompressionMethod::kZstd; + else if (boost::iequals(str, "xz")) + return CompressionMethod::kXz; + else + throw Exception(ErrorCodes::CANNOT_PACK_ARCHIVE, "Unknown compression method specified for a zip archive: {}", str); +} + +/// Checks that a passed compression method can be used. +void ZipArchiveWriter::checkCompressionMethodIsEnabled(CompressionMethod method) +{ + switch (method) + { + case CompressionMethod::kStore: [[fallthrough]]; + case CompressionMethod::kDeflate: + case CompressionMethod::kLzma: + case CompressionMethod::kXz: + case CompressionMethod::kZstd: + return; + + case CompressionMethod::kBzip2: + { +#if USE_BZIP2 + return; +#else + throw Exception("BZIP2 compression method is disabled", ErrorCodes::SUPPORT_IS_DISABLED); +#endif + } + } + throw Exception(ErrorCodes::CANNOT_PACK_ARCHIVE, "Unknown compression method specified for a zip archive: {}", static_cast(method)); +} + +/// Checks that encryption is enabled. +void ZipArchiveWriter::checkEncryptionIsEnabled() +{ +#if !USE_SSL + throw Exception("Encryption in zip archive is disabled", ErrorCodes::SUPPORT_IS_DISABLED); +#endif +} + +ZipArchiveWriter::HandleHolder ZipArchiveWriter::acquireHandle() +{ + return HandleHolder{std::static_pointer_cast(shared_from_this())}; +} + +RawHandle ZipArchiveWriter::acquireRawHandle() +{ + std::lock_guard lock{mutex}; + if (!handle) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot have more than one write buffer while writing a zip archive"); + return std::exchange(handle, nullptr); +} + +void ZipArchiveWriter::releaseRawHandle(RawHandle raw_handle_) +{ + std::lock_guard lock{mutex}; + handle = raw_handle_; +} + +void ZipArchiveWriter::checkResult(int code) const +{ + if (code >= ZIP_OK) + return; + + String message = "Code= "; + switch (code) + { + case ZIP_ERRNO: message += "ERRNO, errno= " + String{strerror(errno)}; break; + case ZIP_PARAMERROR: message += "PARAMERROR"; break; + case ZIP_BADZIPFILE: message += "BADZIPFILE"; break; + case ZIP_INTERNALERROR: message += "INTERNALERROR"; break; + default: message += std::to_string(code); break; + } + showError(message); +} + +void ZipArchiveWriter::showError(const String & message) const +{ + throw Exception(ErrorCodes::CANNOT_PACK_ARCHIVE, "Couldn't pack zip archive {}: {}", quoteString(path_to_archive), message); +} + +} + +#endif diff --git a/src/IO/Archives/ZipArchiveWriter.h b/src/IO/Archives/ZipArchiveWriter.h new file mode 100644 index 00000000000..76f8dd8e9e5 --- /dev/null +++ b/src/IO/Archives/ZipArchiveWriter.h @@ -0,0 +1,97 @@ +#pragma once + +#include + +#if USE_MINIZIP +#include +#include +#include + + +namespace DB +{ +class WriteBuffer; +class WriteBufferFromFileBase; + +/// Implementation of IArchiveWriter for writing zip archives. +class ZipArchiveWriter : public shared_ptr_helper, public IArchiveWriter +{ +public: + /// Destructors finalizes writing the archive. + ~ZipArchiveWriter() override; + + /// Starts writing a file to the archive. The function returns a write buffer, + /// any data written to that buffer will be compressed and then put to the archive. + /// You can keep only one such buffer at a time, a buffer returned by previous call + /// of the function `writeFile()` should be destroyed before next call of `writeFile()`. + std::unique_ptr writeFile(const String & filename) override; + + /// Returns true if there is an active instance of WriteBuffer returned by writeFile(). + /// This function should be used mostly for debugging purposes. + bool isWritingFile() const override; + + /// Supported compression methods. + enum class CompressionMethod + { + /// See mz.h + kStore = 0, + kDeflate = 8, + kBzip2 = 12, + kLzma = 14, + kZstd = 93, + kXz = 95, + }; + + /// Some compression levels. + enum class CompressionLevels + { + kDefault = kDefaultCompressionLevel, + kFast = 2, + kNormal = 6, + kBest = 9, + }; + + /// Sets compression method and level. + /// Changing them will affect next file in the archive. + void setCompression(int compression_method_, int compression_level_) override; + + /// Sets password. Only contents of the files are encrypted, + /// names of files are not encrypted. + /// Changing the password will affect next file in the archive. + void setPassword(const String & password_) override; + + /// Utility functions. + static CompressionMethod parseCompressionMethod(const String & str); + static void checkCompressionMethodIsEnabled(CompressionMethod method); + static void checkEncryptionIsEnabled(); + +private: + /// Constructs an archive that will be written as a file in the local filesystem. + explicit ZipArchiveWriter(const String & path_to_archive_); + + /// Constructs an archive that will be written by using a specified `archive_write_buffer_`. + ZipArchiveWriter(const String & path_to_archive_, std::unique_ptr archive_write_buffer_); + + friend struct shared_ptr_helper; + class WriteBufferFromZipArchive; + class HandleHolder; + using RawHandle = void *; + + HandleHolder acquireHandle(); + RawHandle acquireRawHandle(); + void releaseRawHandle(RawHandle raw_handle_); + + void checkResult(int code) const; + [[noreturn]] void showError(const String & message) const; + + const String path_to_archive; + int compression_method = static_cast(CompressionMethod::kDeflate); + int compression_level = kDefaultCompressionLevel; + String password; + RawHandle handle = nullptr; + mutable std::mutex mutex; +}; + +} + +#endif diff --git a/src/IO/Archives/createArchiveReader.cpp b/src/IO/Archives/createArchiveReader.cpp new file mode 100644 index 00000000000..6ebab000a18 --- /dev/null +++ b/src/IO/Archives/createArchiveReader.cpp @@ -0,0 +1,38 @@ +#include +#include +#include + + +namespace DB +{ +namespace ErrorCodes +{ + extern const int CANNOT_UNPACK_ARCHIVE; + extern const int SUPPORT_IS_DISABLED; +} + + +std::shared_ptr createArchiveReader(const String & path_to_archive) +{ + return createArchiveReader(path_to_archive, {}, 0); +} + + +std::shared_ptr createArchiveReader( + const String & path_to_archive, + [[maybe_unused]] const std::function()> & archive_read_function, + [[maybe_unused]] size_t archive_size) +{ + if (path_to_archive.ends_with(".zip") || path_to_archive.ends_with(".zipx")) + { +#if USE_MINIZIP + return ZipArchiveReader::create(path_to_archive, archive_read_function, archive_size); +#else + throw Exception("minizip library is disabled", ErrorCodes::SUPPORT_IS_DISABLED); +#endif + } + else + throw Exception(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Cannot determine the type of archive {}", path_to_archive); +} + +} diff --git a/src/IO/Archives/createArchiveReader.h b/src/IO/Archives/createArchiveReader.h new file mode 100644 index 00000000000..9e1073b9481 --- /dev/null +++ b/src/IO/Archives/createArchiveReader.h @@ -0,0 +1,22 @@ +#pragma once + +#include +#include + + +namespace DB +{ +class IArchiveReader; +class SeekableReadBuffer; + +/// Starts reading a specified archive in the local filesystem. +std::shared_ptr createArchiveReader(const String & path_to_archive); + +/// Starts reading a specified archive, the archive is read by using a specified read buffer, +/// `path_to_archive` is used only to determine the archive's type. +std::shared_ptr createArchiveReader( + const String & path_to_archive, + const std::function()> & archive_read_function, + size_t archive_size); + +} diff --git a/src/IO/Archives/createArchiveWriter.cpp b/src/IO/Archives/createArchiveWriter.cpp new file mode 100644 index 00000000000..26cbde8c363 --- /dev/null +++ b/src/IO/Archives/createArchiveWriter.cpp @@ -0,0 +1,38 @@ +#include +#include +#include +#include + + +namespace DB +{ +namespace ErrorCodes +{ + extern const int CANNOT_PACK_ARCHIVE; + extern const int SUPPORT_IS_DISABLED; +} + + +std::shared_ptr createArchiveWriter(const String & path_to_archive) +{ + return createArchiveWriter(path_to_archive, nullptr); +} + + +std::shared_ptr createArchiveWriter( + const String & path_to_archive, + [[maybe_unused]] std::unique_ptr archive_write_buffer) +{ + if (path_to_archive.ends_with(".zip") || path_to_archive.ends_with(".zipx")) + { +#if USE_MINIZIP + return ZipArchiveWriter::create(path_to_archive, std::move(archive_write_buffer)); +#else + throw Exception("minizip library is disabled", ErrorCodes::SUPPORT_IS_DISABLED); +#endif + } + else + throw Exception(ErrorCodes::CANNOT_PACK_ARCHIVE, "Cannot determine the type of archive {}", path_to_archive); +} + +} diff --git a/src/IO/Archives/createArchiveWriter.h b/src/IO/Archives/createArchiveWriter.h new file mode 100644 index 00000000000..51ffd4d1144 --- /dev/null +++ b/src/IO/Archives/createArchiveWriter.h @@ -0,0 +1,19 @@ +#pragma once + +#include +#include + + +namespace DB +{ +class IArchiveWriter; +class WriteBuffer; + +/// Starts writing a specified archive in the local filesystem. +std::shared_ptr createArchiveWriter(const String & path_to_archive); + +/// Starts writing a specified archive, the archive is written by using a specified write buffer, +/// `path_to_archive` is used only to determine the archive's type. +std::shared_ptr createArchiveWriter(const String & path_to_archive, std::unique_ptr archive_write_buffer); + +} diff --git a/src/IO/tests/gtest_archive_reader_and_writer.cpp b/src/IO/tests/gtest_archive_reader_and_writer.cpp new file mode 100644 index 00000000000..c6b012a9914 --- /dev/null +++ b/src/IO/tests/gtest_archive_reader_and_writer.cpp @@ -0,0 +1,341 @@ +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB::ErrorCodes +{ + extern const int CANNOT_UNPACK_ARCHIVE; +} + +namespace fs = std::filesystem; +using namespace DB; + + +class ArchiveReaderAndWriterTest : public ::testing::TestWithParam +{ +public: + ArchiveReaderAndWriterTest() + { + const char * archive_file_ext = GetParam(); + path_to_archive = temp_folder.path() + "/archive" + archive_file_ext; + fs::create_directories(temp_folder.path()); + } + + const String & getPathToArchive() const { return path_to_archive; } + + static void expectException(int code, const String & message, const std::function & func) + { + try + { + func(); + } + catch (Exception & e) + { + if ((e.code() != code) || (e.message().find(message) == String::npos)) + throw; + } + } + +private: + Poco::TemporaryFile temp_folder; + String path_to_archive; +}; + + +TEST_P(ArchiveReaderAndWriterTest, EmptyArchive) +{ + /// Make an archive. + { + createArchiveWriter(getPathToArchive()); + } + + /// The created archive can be found in the local filesystem. + ASSERT_TRUE(fs::exists(getPathToArchive())); + + /// Read the archive. + auto reader = createArchiveReader(getPathToArchive()); + + EXPECT_FALSE(reader->fileExists("nofile.txt")); + + expectException(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "File 'nofile.txt' not found", + [&]{ reader->getFileInfo("nofile.txt"); }); + + expectException(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "File 'nofile.txt' not found", + [&]{ reader->readFile("nofile.txt"); }); + + EXPECT_EQ(reader->firstFile(), nullptr); +} + + +TEST_P(ArchiveReaderAndWriterTest, SingleFileInArchive) +{ + /// Make an archive. + std::string_view contents = "The contents of a.txt"; + { + auto writer = createArchiveWriter(getPathToArchive()); + { + auto out = writer->writeFile("a.txt"); + writeString(contents, *out); + } + } + + /// Read the archive. + auto reader = createArchiveReader(getPathToArchive()); + + ASSERT_TRUE(reader->fileExists("a.txt")); + + auto file_info = reader->getFileInfo("a.txt"); + EXPECT_EQ(file_info.uncompressed_size, contents.size()); + EXPECT_GT(file_info.compressed_size, 0); + + { + auto in = reader->readFile("a.txt"); + String str; + readStringUntilEOF(str, *in); + EXPECT_EQ(str, contents); + } + + { + /// Use an enumerator. + auto enumerator = reader->firstFile(); + ASSERT_NE(enumerator, nullptr); + EXPECT_EQ(enumerator->getFileName(), "a.txt"); + EXPECT_EQ(enumerator->getFileInfo().uncompressed_size, contents.size()); + EXPECT_GT(enumerator->getFileInfo().compressed_size, 0); + EXPECT_FALSE(enumerator->nextFile()); + } + + { + /// Use converting an enumerator to a reading buffer and vice versa. + auto enumerator = reader->firstFile(); + ASSERT_NE(enumerator, nullptr); + EXPECT_EQ(enumerator->getFileName(), "a.txt"); + auto in = reader->readFile(std::move(enumerator)); + String str; + readStringUntilEOF(str, *in); + EXPECT_EQ(str, contents); + enumerator = reader->nextFile(std::move(in)); + EXPECT_EQ(enumerator, nullptr); + } + + { + /// Wrong using of an enumerator throws an exception. + auto enumerator = reader->firstFile(); + ASSERT_NE(enumerator, nullptr); + EXPECT_FALSE(enumerator->nextFile()); + expectException(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "No current file", + [&]{ enumerator->getFileName(); }); + + expectException(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "No current file", + [&] { reader->readFile(std::move(enumerator)); }); + } +} + + +TEST_P(ArchiveReaderAndWriterTest, TwoFilesInArchive) +{ + /// Make an archive. + std::string_view a_contents = "The contents of a.txt"; + std::string_view c_contents = "The contents of b/c.txt"; + { + auto writer = createArchiveWriter(getPathToArchive()); + { + auto out = writer->writeFile("a.txt"); + writeString(a_contents, *out); + } + { + auto out = writer->writeFile("b/c.txt"); + writeString(c_contents, *out); + } + } + + /// Read the archive. + auto reader = createArchiveReader(getPathToArchive()); + + ASSERT_TRUE(reader->fileExists("a.txt")); + ASSERT_TRUE(reader->fileExists("b/c.txt")); + + EXPECT_EQ(reader->getFileInfo("a.txt").uncompressed_size, a_contents.size()); + EXPECT_EQ(reader->getFileInfo("b/c.txt").uncompressed_size, c_contents.size()); + + { + auto in = reader->readFile("a.txt"); + String str; + readStringUntilEOF(str, *in); + EXPECT_EQ(str, a_contents); + } + + { + auto in = reader->readFile("b/c.txt"); + String str; + readStringUntilEOF(str, *in); + EXPECT_EQ(str, c_contents); + } + + { + /// Read a.txt again. + auto in = reader->readFile("a.txt"); + String str; + readStringUntilEOF(str, *in); + EXPECT_EQ(str, a_contents); + } + + { + /// Use an enumerator. + auto enumerator = reader->firstFile(); + ASSERT_NE(enumerator, nullptr); + EXPECT_EQ(enumerator->getFileName(), "a.txt"); + EXPECT_EQ(enumerator->getFileInfo().uncompressed_size, a_contents.size()); + EXPECT_TRUE(enumerator->nextFile()); + EXPECT_EQ(enumerator->getFileName(), "b/c.txt"); + EXPECT_EQ(enumerator->getFileInfo().uncompressed_size, c_contents.size()); + EXPECT_FALSE(enumerator->nextFile()); + } + + { + /// Use converting an enumerator to a reading buffer and vice versa. + auto enumerator = reader->firstFile(); + ASSERT_NE(enumerator, nullptr); + EXPECT_EQ(enumerator->getFileName(), "a.txt"); + auto in = reader->readFile(std::move(enumerator)); + String str; + readStringUntilEOF(str, *in); + EXPECT_EQ(str, a_contents); + enumerator = reader->nextFile(std::move(in)); + ASSERT_NE(enumerator, nullptr); + EXPECT_EQ(enumerator->getFileName(), "b/c.txt"); + in = reader->readFile(std::move(enumerator)); + readStringUntilEOF(str, *in); + EXPECT_EQ(str, c_contents); + enumerator = reader->nextFile(std::move(in)); + EXPECT_EQ(enumerator, nullptr); + } +} + + +TEST_P(ArchiveReaderAndWriterTest, InMemory) +{ + String archive_in_memory; + + /// Make an archive. + std::string_view a_contents = "The contents of a.txt"; + std::string_view b_contents = "The contents of b.txt"; + { + auto writer = createArchiveWriter(getPathToArchive(), std::make_unique(archive_in_memory)); + { + auto out = writer->writeFile("a.txt"); + writeString(a_contents, *out); + } + { + auto out = writer->writeFile("b.txt"); + writeString(b_contents, *out); + } + } + + /// The created archive is really in memory. + ASSERT_FALSE(fs::exists(getPathToArchive())); + + /// Read the archive. + auto read_archive_func = [&]() -> std::unique_ptr { return std::make_unique(archive_in_memory); }; + auto reader = createArchiveReader(getPathToArchive(), read_archive_func, archive_in_memory.size()); + + ASSERT_TRUE(reader->fileExists("a.txt")); + ASSERT_TRUE(reader->fileExists("b.txt")); + + EXPECT_EQ(reader->getFileInfo("a.txt").uncompressed_size, a_contents.size()); + EXPECT_EQ(reader->getFileInfo("b.txt").uncompressed_size, b_contents.size()); + + { + auto in = reader->readFile("a.txt"); + String str; + readStringUntilEOF(str, *in); + EXPECT_EQ(str, a_contents); + } + + { + auto in = reader->readFile("b.txt"); + String str; + readStringUntilEOF(str, *in); + EXPECT_EQ(str, b_contents); + } + + { + /// Read a.txt again. + auto in = reader->readFile("a.txt"); + String str; + readStringUntilEOF(str, *in); + EXPECT_EQ(str, a_contents); + } +} + + +TEST_P(ArchiveReaderAndWriterTest, Password) +{ + /// Make an archive. + std::string_view contents = "The contents of a.txt"; + { + auto writer = createArchiveWriter(getPathToArchive()); + writer->setPassword("Qwe123"); + { + auto out = writer->writeFile("a.txt"); + writeString(contents, *out); + } + } + + /// Read the archive. + auto reader = createArchiveReader(getPathToArchive()); + + /// Try to read without a password. + expectException(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Password is required", + [&]{ reader->readFile("a.txt"); }); + + { + /// Try to read with a wrong password. + reader->setPassword("123Qwe"); + expectException(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Wrong password", + [&]{ reader->readFile("a.txt"); }); + } + + { + /// Reading with the right password is successful. + reader->setPassword("Qwe123"); + auto in = reader->readFile("a.txt"); + String str; + readStringUntilEOF(str, *in); + EXPECT_EQ(str, contents); + } +} + + +TEST_P(ArchiveReaderAndWriterTest, ArchiveNotExist) +{ + expectException(ErrorCodes::CANNOT_UNPACK_ARCHIVE, "Couldn't open", + [&]{ createArchiveReader(getPathToArchive()); }); +} + + +namespace +{ + const char * supported_archive_file_exts[] = + { +#if USE_MINIZIP + ".zip", +#endif + }; +} + +INSTANTIATE_TEST_SUITE_P(All, ArchiveReaderAndWriterTest, ::testing::ValuesIn(supported_archive_file_exts)); From d9bdbf47b7cac3a8ae2658bbbec54f05b3653402 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Sat, 5 Feb 2022 02:47:46 +0700 Subject: [PATCH 072/215] ReadBufferFromMemory now can seek backwards after been read up to EOF. --- src/IO/ReadBufferFromMemory.cpp | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/IO/ReadBufferFromMemory.cpp b/src/IO/ReadBufferFromMemory.cpp index 98c39c833b0..d0863878797 100644 --- a/src/IO/ReadBufferFromMemory.cpp +++ b/src/IO/ReadBufferFromMemory.cpp @@ -12,31 +12,33 @@ off_t ReadBufferFromMemory::seek(off_t offset, int whence) { if (whence == SEEK_SET) { - if (offset >= 0 && working_buffer.begin() + offset < working_buffer.end()) + if (offset >= 0 && internal_buffer.begin() + offset < internal_buffer.end()) { - pos = working_buffer.begin() + offset; - return size_t(pos - working_buffer.begin()); + pos = internal_buffer.begin() + offset; + working_buffer = internal_buffer; /// We need to restore `working_buffer` in case the position was at EOF before this seek(). + return size_t(pos - internal_buffer.begin()); } else throw Exception( "Seek position is out of bounds. " "Offset: " - + std::to_string(offset) + ", Max: " + std::to_string(size_t(working_buffer.end() - working_buffer.begin())), + + std::to_string(offset) + ", Max: " + std::to_string(size_t(internal_buffer.end() - internal_buffer.begin())), ErrorCodes::SEEK_POSITION_OUT_OF_BOUND); } else if (whence == SEEK_CUR) { Position new_pos = pos + offset; - if (new_pos >= working_buffer.begin() && new_pos < working_buffer.end()) + if (new_pos >= internal_buffer.begin() && new_pos < internal_buffer.end()) { pos = new_pos; - return size_t(pos - working_buffer.begin()); + working_buffer = internal_buffer; /// We need to restore `working_buffer` in case the position was at EOF before this seek(). + return size_t(pos - internal_buffer.begin()); } else throw Exception( "Seek position is out of bounds. " "Offset: " - + std::to_string(offset) + ", Max: " + std::to_string(size_t(working_buffer.end() - working_buffer.begin())), + + std::to_string(offset) + ", Max: " + std::to_string(size_t(internal_buffer.end() - internal_buffer.begin())), ErrorCodes::SEEK_POSITION_OUT_OF_BOUND); } else @@ -45,7 +47,7 @@ off_t ReadBufferFromMemory::seek(off_t offset, int whence) off_t ReadBufferFromMemory::getPosition() { - return pos - working_buffer.begin(); + return pos - internal_buffer.begin(); } } From 4cd7324b9bd7f6bb7e8dd6c484facad96caf09da Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Sat, 5 Feb 2022 16:59:22 +0000 Subject: [PATCH 073/215] Added implementation of bitsetsort from LLVM patch --- base/base/BitSetSort.h | 759 +++++++++++++++-------------------------- 1 file changed, 275 insertions(+), 484 deletions(-) diff --git a/base/base/BitSetSort.h b/base/base/BitSetSort.h index 1fadc8149f2..7fda69747de 100644 --- a/base/base/BitSetSort.h +++ b/base/base/BitSetSort.h @@ -33,127 +33,58 @@ namespace stdext { //_LIBCPP_BEGIN_NAMESPACE_STD +/// Implementation from LLVM Path https://reviews.llvm.org/D93233 + namespace __sorting_network { template class __conditional_swap { - _Compare comp_; +public: + typedef typename _VSTD::__comp_ref_type<_Compare>::type _Comp_ref; - public: - _Compare get() const { return comp_; } - __conditional_swap(_Compare __comp) : comp_(__comp) {} - inline void operator()(_RandomAccessIterator __x, - _RandomAccessIterator __y) const { - typedef typename _VSTD::iterator_traits<_RandomAccessIterator>::value_type - value_type; - bool __result = comp_(*__x, *__y); + _LIBCPP_CONSTEXPR_AFTER_CXX11 _Comp_ref get() const { return comp_; } + _LIBCPP_CONSTEXPR_AFTER_CXX11 __conditional_swap(const _Comp_ref __comp) : comp_(__comp) {} + _LIBCPP_CONSTEXPR_AFTER_CXX11 inline void operator()(_RandomAccessIterator __x, _RandomAccessIterator __y) { + typedef typename _VSTD::iterator_traits<_RandomAccessIterator>::value_type value_type; + bool __result = comp_(*__y, *__x); // Expect a compiler would short-circuit the following if-block. // 4 * sizeof(size_t) is a magic number. Expect a compiler to use SIMD // instruction on them. if (_VSTD::is_trivially_copy_constructible::value && - _VSTD::is_trivially_copy_assignable::value && - sizeof(value_type) <= 4 * sizeof(size_t)) { - value_type __min = __result ? _VSTD::move(*__x) : _VSTD::move(*__y); - *__y = __result ? _VSTD::move(*__y) : _VSTD::move(*__x); + _VSTD::is_trivially_copy_assignable::value && sizeof(value_type) <= 4 * sizeof(size_t)) { + value_type __min = __result ? _VSTD::move(*__y) : _VSTD::move(*__x); + *__y = __result ? _VSTD::move(*__x) : _VSTD::move(*__y); *__x = _VSTD::move(__min); } else { - if (!__result) { + if (__result) { _VSTD::iter_swap(__x, __y); } } } + +private: + _Comp_ref comp_; }; template class __reverse_conditional_swap { - _Compare comp_; + typedef typename _VSTD::__comp_ref_type<_Compare>::type _Comp_ref; + _Comp_ref comp_; - public: - _Compare get() const { return comp_; } - __reverse_conditional_swap(_Compare __comp) : comp_(__comp) {} - inline void operator()(_RandomAccessIterator __x, - _RandomAccessIterator __y) const { - typedef typename _VSTD::iterator_traits<_RandomAccessIterator>::value_type - value_type; +public: + _LIBCPP_CONSTEXPR_AFTER_CXX11 _Comp_ref get() const { return comp_; } + _LIBCPP_CONSTEXPR_AFTER_CXX11 + __reverse_conditional_swap(const _Comp_ref __comp) : comp_(__comp) {} + inline void operator()(_RandomAccessIterator __x, _RandomAccessIterator __y) { + typedef typename _VSTD::iterator_traits<_RandomAccessIterator>::value_type value_type; bool __result = !comp_(*__x, *__y); // Expect a compiler would short-circuit the following if-block. if (_VSTD::is_trivially_copy_constructible::value && - _VSTD::is_trivially_copy_assignable::value && - sizeof(value_type) <= 4 * sizeof(size_t)) { + _VSTD::is_trivially_copy_assignable::value && sizeof(value_type) <= 4 * sizeof(size_t)) { value_type __min = __result ? _VSTD::move(*__x) : _VSTD::move(*__y); *__y = __result ? _VSTD::move(*__y) : _VSTD::move(*__x); *__x = _VSTD::move(__min); } else { - /** This change is required for ClickHouse. - * It seems that this is slow branch, and its logic should be identical to fast branch. - * Logic of fast branch, - * if (result) - * min = x; - * y = y; - * x = x; - * else - * min = y; - * y = x; - * x = y; - * - * We swap elements only if result is false. - * - * Example to reproduce sort bug: - * int main(int argc, char ** argv) - * { - * (void)(argc); - * (void)(argv); - * - * std::vector> values = { - * {1, 1}, - * {3, -1}, - * {2, 1}, - * {7, -1}, - * {3, 1}, - * {999, -1}, - * {4, 1}, - * {7, -1}, - * {5, 1}, - * {8, -1} - * }; - * - * ::stdext::bitsetsort(values.begin(), values.end()); - * bool is_sorted = std::is_sorted(values.begin(), values.end()); - * - * std::cout << "Array " << values.size() << " is sorted " << is_sorted << std::endl; - * - * for (auto & value : values) - * std::cout << value.first << " " << value.second << std::endl; - * - * return 0; - * } - * - * Output before change: - * Array 10 is sorted 0 - * 1 1 - * 2 1 - * 3 -1 - * 3 1 - * 4 1 - * 7 -1 - * 7 -1 - * 8 -1 - * 5 1 - * 999 -1 - * - * After change: - * Array 10 is sorted 1 - * 1 1 - * 2 1 - * 3 -1 - * 3 1 - * 4 1 - * 5 1 - * 7 -1 - * 7 -1 - * 8 -1 - * 999 -1 - */ if (!__result) { _VSTD::iter_swap(__x, __y); } @@ -162,19 +93,19 @@ class __reverse_conditional_swap { }; template -void __sort2(_RandomAccessIterator __a, _ConditionalSwap __cond_swap) { +_LIBCPP_HIDE_FROM_ABI void __sort2(_RandomAccessIterator __a, _ConditionalSwap __cond_swap) { __cond_swap(__a + 0, __a + 1); } template -void __sort3(_RandomAccessIterator __a, _ConditionalSwap __cond_swap) { +_LIBCPP_HIDE_FROM_ABI void __sort3(_RandomAccessIterator __a, _ConditionalSwap __cond_swap) { __cond_swap(__a + 1, __a + 2); - __cond_swap(__a + 0, __a + 2); __cond_swap(__a + 0, __a + 1); + __cond_swap(__a + 1, __a + 2); } template -void __sort4(_RandomAccessIterator __a, _ConditionalSwap __cond_swap) { +_LIBCPP_HIDE_FROM_ABI void __sort4(_RandomAccessIterator __a, _ConditionalSwap __cond_swap) { __cond_swap(__a + 0, __a + 1); __cond_swap(__a + 2, __a + 3); __cond_swap(__a + 0, __a + 2); @@ -183,11 +114,11 @@ void __sort4(_RandomAccessIterator __a, _ConditionalSwap __cond_swap) { } template -void __sort5(_RandomAccessIterator __a, _ConditionalSwap __cond_swap) { +_LIBCPP_HIDE_FROM_ABI void __sort5(_RandomAccessIterator __a, _ConditionalSwap __cond_swap) { __cond_swap(__a + 0, __a + 1); __cond_swap(__a + 3, __a + 4); - __cond_swap(__a + 2, __a + 4); __cond_swap(__a + 2, __a + 3); + __cond_swap(__a + 3, __a + 4); __cond_swap(__a + 0, __a + 3); __cond_swap(__a + 1, __a + 4); __cond_swap(__a + 0, __a + 2); @@ -196,13 +127,13 @@ void __sort5(_RandomAccessIterator __a, _ConditionalSwap __cond_swap) { } template -void __sort6(_RandomAccessIterator __a, _ConditionalSwap __cond_swap) { +_LIBCPP_HIDE_FROM_ABI void __sort6(_RandomAccessIterator __a, _ConditionalSwap __cond_swap) { __cond_swap(__a + 1, __a + 2); __cond_swap(__a + 4, __a + 5); - __cond_swap(__a + 0, __a + 2); - __cond_swap(__a + 3, __a + 5); __cond_swap(__a + 0, __a + 1); __cond_swap(__a + 3, __a + 4); + __cond_swap(__a + 1, __a + 2); + __cond_swap(__a + 4, __a + 5); __cond_swap(__a + 0, __a + 3); __cond_swap(__a + 1, __a + 4); __cond_swap(__a + 2, __a + 5); @@ -211,14 +142,14 @@ void __sort6(_RandomAccessIterator __a, _ConditionalSwap __cond_swap) { __cond_swap(__a + 2, __a + 3); } template -void __sort7(_RandomAccessIterator __a, _ConditionalSwap __cond_swap) { +_LIBCPP_HIDE_FROM_ABI void __sort7(_RandomAccessIterator __a, _ConditionalSwap __cond_swap) { __cond_swap(__a + 1, __a + 2); __cond_swap(__a + 3, __a + 4); __cond_swap(__a + 5, __a + 6); - __cond_swap(__a + 0, __a + 2); + __cond_swap(__a + 0, __a + 1); __cond_swap(__a + 3, __a + 5); __cond_swap(__a + 4, __a + 6); - __cond_swap(__a + 0, __a + 1); + __cond_swap(__a + 1, __a + 2); __cond_swap(__a + 4, __a + 5); __cond_swap(__a + 0, __a + 4); __cond_swap(__a + 1, __a + 5); @@ -231,7 +162,7 @@ void __sort7(_RandomAccessIterator __a, _ConditionalSwap __cond_swap) { } template -void __sort8(_RandomAccessIterator __a, _ConditionalSwap __cond_swap) { +_LIBCPP_HIDE_FROM_ABI void __sort8(_RandomAccessIterator __a, _ConditionalSwap __cond_swap) { __cond_swap(__a + 0, __a + 1); __cond_swap(__a + 2, __a + 3); __cond_swap(__a + 4, __a + 5); @@ -254,102 +185,104 @@ void __sort8(_RandomAccessIterator __a, _ConditionalSwap __cond_swap) { } template -void __sort1to8( - _RandomAccessIterator __a, - typename _VSTD::iterator_traits<_RandomAccessIterator>::difference_type __len, - _ConditionalSwap __cond_swap) { +_LIBCPP_HIDE_FROM_ABI void __sort1to8(_RandomAccessIterator __a, + typename _VSTD::iterator_traits<_RandomAccessIterator>::difference_type __len, + _ConditionalSwap __cond_swap) { switch (__len) { - case 0: - case 1: - return; - case 2: - __sort2(__a, __cond_swap); - return; - case 3: - __sort3(__a, __cond_swap); - return; - case 4: - __sort4(__a, __cond_swap); - return; - case 5: - __sort5(__a, __cond_swap); - return; - case 6: - __sort6(__a, __cond_swap); - return; - case 7: - __sort7(__a, __cond_swap); - return; - case 8: - __sort8(__a, __cond_swap); - return; + case 0: + case 1: + return; + case 2: + __sort2(__a, __cond_swap); + return; + case 3: + __sort3(__a, __cond_swap); + return; + case 4: + __sort4(__a, __cond_swap); + return; + case 5: + __sort5(__a, __cond_swap); + return; + case 6: + __sort6(__a, __cond_swap); + return; + case 7: + __sort7(__a, __cond_swap); + return; + case 8: + __sort8(__a, __cond_swap); + return; } // ignore } template -void __sort3(_RandomAccessIterator __a0, _RandomAccessIterator __a1, _RandomAccessIterator __a2, _ConditionalSwap __cond_swap) { +_LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_HIDE_FROM_ABI void __sort3(_RandomAccessIterator __a0, _RandomAccessIterator __a1, + _RandomAccessIterator __a2, + _ConditionalSwap __cond_swap) { __cond_swap(__a1, __a2); __cond_swap(__a0, __a2); __cond_swap(__a0, __a1); } -template -void __sort3r(_RandomAccessIterator __a2, _RandomAccessIterator __a1, _RandomAccessIterator __a0, _ConditionalSwap __rev_cond_swap) { - __rev_cond_swap(__a1, __a2); - __rev_cond_swap(__a0, __a2); - __rev_cond_swap(__a0, __a1); -} - -} // namespace __sorting_network +// stable, 2-3 compares, 0-2 swaps template -_ForwardIterator -__median3(_ForwardIterator __x, _ForwardIterator __y, _ForwardIterator __z, _Compare __c) -{ - if (__c(*__x, *__y)) { - if (__c(*__y, *__z)) { - return __y; - } - // x < y, y >= z - if (__c(*__x, *__z)) { - return __z; - } - return __x; - } else { - // y <= x - if (__c(*__x, *__z)) { - // y <= x < z - return __x; - } - // y <= x, z <= x - if (__c(*__y, *__z)) { - return __z; - } - return __y; +_LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_HIDE_FROM_ABI unsigned +__sort3_with_number_of_swaps(_ForwardIterator __x, _ForwardIterator __y, _ForwardIterator __z, _Compare __c) { + unsigned __r = 0; + if (!__c(*__y, *__x)) // if x <= y + { + if (!__c(*__z, *__y)) // if y <= z + return __r; // x <= y && y <= z + // x <= y && y > z + swap(*__y, *__z); // x <= z && y < z + __r = 1; + if (__c(*__y, *__x)) // if x > y + { + swap(*__x, *__y); // x < y && y <= z + __r = 2; } + return __r; // x <= y && y < z + } + if (__c(*__z, *__y)) // x > y, if y > z + { + swap(*__x, *__z); // x < y && y < z + __r = 1; + return __r; + } + swap(*__x, *__y); // x > y && y <= z + __r = 1; // x < y && x <= z + if (__c(*__z, *__y)) // if y > z + { + swap(*__y, *__z); // x <= y && y < z + __r = 2; + } + return __r; } +} // namespace __sorting_network + namespace __bitonic { class __detail { - public: - _LIBCPP_CONSTEXPR_AFTER_CXX11 static int __batch = 8; - _LIBCPP_CONSTEXPR_AFTER_CXX11 static int __bitonic_batch = __batch * 2; - _LIBCPP_CONSTEXPR_AFTER_CXX11 static int __small_sort_max = - __detail::__bitonic_batch * 2; +public: + enum { + __batch = 8, + __bitonic_batch = __batch * 2, + __small_sort_max = __bitonic_batch * 2, + }; }; -template -void __enforce_order(_RandomAccessIterator __first, - _RandomAccessIterator __last, _ConditionalSwap __cond_swap, - _ReverseConditionalSwap __reverse_cond_swap) { +template +_LIBCPP_HIDE_FROM_ABI void __enforce_order(_RandomAccessIterator __first, _RandomAccessIterator __last, + _ConditionalSwap __cond_swap, _ReverseConditionalSwap __reverse_cond_swap) { _RandomAccessIterator __i = __first; - while (__i + __detail::__bitonic_batch <= __last) { + while (__detail::__bitonic_batch <= __last - __i) { __sorting_network::__sort8(__i, __cond_swap); __sorting_network::__sort8(__i + __detail::__batch, __reverse_cond_swap); __i += __detail::__bitonic_batch; } - if (__i + __detail::__batch <= __last) { + if (__detail::__batch <= __last - __i) { __sorting_network::__sort8(__i, __cond_swap); __i += __detail::__batch; __sorting_network::__sort1to8(__i, __last - __i, __reverse_cond_swap); @@ -359,100 +292,72 @@ void __enforce_order(_RandomAccessIterator __first, } class __construct { - public: - template - static inline void __op(_T* __result, _T&& __val) { - new (__result) _T(_VSTD::move(__val)); +public: + template + static inline void __op(_Type1* __result, _Type2&& __val) { + new (static_cast(__result)) _Type1(_VSTD::move(__val)); } }; class __move_assign { - public: - template - static inline void __op(_T* __result, _T&& __val) { +public: + template + static inline void __op(_Type1 __result, _Type2&& __val) { *__result = _VSTD::move(__val); } }; -template -void __forward_merge(_InputIterator __first, _InputIterator __last, - _OutputIterator __result, _Compare __comp) { +template +_LIBCPP_HIDE_FROM_ABI void __forward_merge(_InputIterator __first, _InputIterator __last, _OutputIterator __result, + _Compare __comp) { --__last; - typename _VSTD::iterator_traits<_InputIterator>::difference_type __len = - __last - __first; + // The len used here is one less than the actual length. This is so that the + // comparison is carried out against 0. The final move is done + // unconditionally at the end. + typename _VSTD::iterator_traits<_InputIterator>::difference_type __len = __last - __first; for (; __len > 0; __len--) { - if (__comp(*__first, *__last)) { - _Copy::__op(&*__result, _VSTD::move(*__first++)); + if (__comp(*__last, *__first)) { + _Copy::__op(__result, _VSTD::move(*__last)); + --__last; } else { - _Copy::__op(&*__result, _VSTD::move(*__last--)); + _Copy::__op(__result, _VSTD::move(*__first)); + ++__first; } - __result++; + ++__result; } - _Copy::__op(&*__result, _VSTD::move(*__first)); + _Copy::__op(__result, _VSTD::move(*__first)); } -template -void __backward_merge(_InputIterator __first, _InputIterator __last, - _OutputIterator __result, _Compare __comp) { +template +_LIBCPP_HIDE_FROM_ABI void __backward_merge(_InputIterator __first, _InputIterator __last, _OutputIterator __result, + _Compare __comp) { --__last; __result += __last - __first; - typename _VSTD::iterator_traits<_InputIterator>::difference_type __len = - __last - __first; + // The len used here is one less than the actual length. This is so that the + // comparison is carried out against 0. The final move is done + // unconditionally at the end. + typename _VSTD::iterator_traits<_InputIterator>::difference_type __len = __last - __first; for (; __len > 0; __len--) { if (__comp(*__first, *__last)) { - _Copy::__op(&*__result, _VSTD::move(*__first++)); + _Copy::__op(__result, _VSTD::move(*__first)); + ++__first; } else { - _Copy::__op(&*__result, _VSTD::move(*__last--)); + _Copy::__op(__result, _VSTD::move(*__last)); + --__last; } - __result--; + --__result; } - _Copy::__op(&*__result, _VSTD::move(*__first)); + _Copy::__op(__result, _VSTD::move(*__first)); } -template -void __forward_and_backward_merge(_InputIterator __first, _InputIterator __last, - _InputIterator __rlast, - _OutputIterator __result, _Compare __comp) { - _InputIterator __rfirst = __last; - __last--; - __rlast--; - typename _VSTD::iterator_traits<_InputIterator>::difference_type len = - __last - __first; - _OutputIterator __rout = __result + (__rlast - __first); - - for (; len > 0; len--) { - if (__comp(*__first, *__last)) { - _Copy::__op(&*__result, _VSTD::move(*__first++)); - } else { - _Copy::__op(&*__result, _VSTD::move(*__last--)); - } - __result++; - if (__comp(*__rfirst, *__rlast)) { - _Copy::__op(&*__rout, _VSTD::move(*__rfirst++)); - } else { - _Copy::__op(&*__rout, _VSTD::move(*__rlast--)); - } - __rout--; - } - _Copy::__op(&*__result, _VSTD::move(*__first)); - _Copy::__op(&*__rout, _VSTD::move(*__rfirst)); -} - -template -inline bool __small_sort( - _RandomAccessIterator __first, - typename _VSTD::iterator_traits<_RandomAccessIterator>::difference_type __len, - typename _VSTD::iterator_traits<_RandomAccessIterator>::value_type* __buff, - _ConditionalSwap& __cond_swap, - _ReverseConditionalSwap __reverse_cond_swap) { - typedef typename _VSTD::iterator_traits<_RandomAccessIterator>::difference_type - difference_type; - typedef - typename _VSTD::iterator_traits<_RandomAccessIterator>::value_type value_type; +template +inline _LIBCPP_HIDE_FROM_ABI bool +__small_sort(_RandomAccessIterator __first, + typename _VSTD::iterator_traits<_RandomAccessIterator>::difference_type __len, + typename _VSTD::iterator_traits<_RandomAccessIterator>::value_type* __buff, _ConditionalSwap __cond_swap, + _ReverseConditionalSwap __reverse_cond_swap) { + typedef typename _VSTD::iterator_traits<_RandomAccessIterator>::value_type value_type; + typedef typename _ConditionalSwap::_Comp_ref _Comp_ref; if (__len > __detail::__small_sort_max) { return false; } @@ -462,34 +367,32 @@ inline bool __small_sort( // sorted. return true; } - auto __comp = __cond_swap.get(); + const _Comp_ref __comp = __cond_swap.get(); if (__len <= __detail::__bitonic_batch) { // single bitonic order merge. - __forward_merge<__bitonic::__construct>(__first, __last, __buff, __comp); - copy(_VSTD::make_move_iterator(__buff), _VSTD::make_move_iterator(__buff + __len), - __first); + __forward_merge<__construct, _Comp_ref>(__first, __last, __buff, _Comp_ref(__comp)); + _VSTD::copy(_VSTD::make_move_iterator(__buff), _VSTD::make_move_iterator(__buff + __len), __first); for (auto __iter = __buff; __iter < __buff + __len; __iter++) { (*__iter).~value_type(); } return true; } // double bitonic order merge. - __forward_merge<__construct>(__first, __first + __detail::__bitonic_batch, - __buff, __comp); - __backward_merge<__construct>(__first + __detail::__bitonic_batch, __last, - __buff + __detail::__bitonic_batch, __comp); - __forward_merge<__move_assign>(__buff, __buff + __len, __first, __comp); + __forward_merge<__construct, _Comp_ref>(__first, __first + __detail::__bitonic_batch, __buff, _Comp_ref(__comp)); + __backward_merge<__construct, _Comp_ref>(__first + __detail::__bitonic_batch, __last, + __buff + __detail::__bitonic_batch, _Comp_ref(__comp)); + __forward_merge<__move_assign, _Comp_ref>(__buff, __buff + __len, __first, _Comp_ref(__comp)); for (auto __iter = __buff; __iter < __buff + __len; __iter++) { (*__iter).~value_type(); } return true; } -} // namespace __bitonic +} // namespace __bitonic namespace __bitsetsort { struct __64bit_set { typedef uint64_t __storage_t; - _LIBCPP_CONSTEXPR_AFTER_CXX11 static int __block_size = 64; + enum { __block_size = 64 }; static __storage_t __blsr(__storage_t x) { // _blsr_u64 can be used here but it did not make any performance // difference in practice. @@ -501,7 +404,7 @@ struct __64bit_set { struct __32bit_set { typedef uint32_t __storage_t; - _LIBCPP_CONSTEXPR_AFTER_CXX11 static int __block_size = 32; + enum { __block_size = 32 }; static __storage_t __blsr(__storage_t x) { // _blsr_u32 can be used here but it did not make any performance // difference in practice. @@ -511,21 +414,20 @@ struct __32bit_set { static int __ctz(__storage_t x) { return __builtin_ctzl(x); } }; -template +template struct __set_selector { - typedef __64bit_set __set; + typedef __64bit_set __set; }; -template<> +template <> struct __set_selector<4> { - typedef __32bit_set __set; + typedef __32bit_set __set; }; template -inline void __swap_bitmap_pos(_RandomAccessIterator __first, - _RandomAccessIterator __last, - typename _Bitset::__storage_t& __left_bitset, - typename _Bitset::__storage_t& __right_bitset) { +inline _LIBCPP_HIDE_FROM_ABI void __swap_bitmap_pos(_RandomAccessIterator __first, _RandomAccessIterator __last, + typename _Bitset::__storage_t& __left_bitset, + typename _Bitset::__storage_t& __right_bitset) { while (__left_bitset != 0 & __right_bitset != 0) { int tz_left = _Bitset::__ctz(__left_bitset); __left_bitset = _Bitset::__blsr(__left_bitset); @@ -535,72 +437,41 @@ inline void __swap_bitmap_pos(_RandomAccessIterator __first, } } -template -inline void __swap_bitmap(_RandomAccessIterator __first, - _RandomAccessIterator __last, - typename _Bitset::__storage_t& __left_bitset, - typename _Bitset::__storage_t& __right_bitset) { - if (__left_bitset == 0 || __right_bitset == 0) { - return; - } - int tz_left; - int tz_right; - - tz_left = _Bitset::__ctz(__left_bitset); - __left_bitset = _Bitset::__blsr(__left_bitset); - - tz_right = _Bitset::__ctz(__right_bitset); - __right_bitset = _Bitset::__blsr(__right_bitset); - - _RandomAccessIterator l = __first + tz_left; - _RandomAccessIterator r = __last - tz_right; - typename _VSTD::iterator_traits<_RandomAccessIterator>::value_type tmp( - _VSTD::move(*l)); - *l = _VSTD::move(*r); - while (__left_bitset != 0 & __right_bitset != 0) { - tz_left = _Bitset::__ctz(__left_bitset); - __left_bitset = _Bitset::__blsr(__left_bitset); - tz_right = _Bitset::__ctz(__right_bitset); - __right_bitset = _Bitset::__blsr(__right_bitset); - - l = __first + tz_left; - *r = _VSTD::move(*l); - r = __last - tz_right; - *l = _VSTD::move(*r); - } - *r = _VSTD::move(tmp); -} - template -_VSTD::pair<_RandomAccessIterator, bool> __bitset_partition( - _RandomAccessIterator __first, _RandomAccessIterator __last, - _Compare __comp) { - typedef typename _VSTD::iterator_traits<_RandomAccessIterator>::value_type - value_type; - typedef typename _VSTD::iterator_traits<_RandomAccessIterator>::difference_type - difference_type; +_LIBCPP_HIDE_FROM_ABI _VSTD::pair<_RandomAccessIterator, bool> +__bitset_partition(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { + typedef typename _VSTD::iterator_traits<_RandomAccessIterator>::value_type value_type; + typedef typename _VSTD::iterator_traits<_RandomAccessIterator>::difference_type difference_type; typedef typename _Bitset::__storage_t __storage_t; _RandomAccessIterator __begin = __first; - value_type __pivot = _VSTD::move(*__first); + value_type __pivot(_VSTD::move(*__first)); + // Check if pivot is less than the last element. Checking this first avoids + // comparing the first and the last iterators on each iteration as done in the + // else part. if (__comp(__pivot, *(__last - 1))) { // Guarded. - while (!__comp(__pivot, *++__first)) {} + while (!__comp(__pivot, *++__first)) { + } } else { - while (++__first < __last && !__comp(__pivot, *__first)) {} + while (++__first < __last && !__comp(__pivot, *__first)) { + } } if (__first < __last) { - // It will be always guarded because __bitset_sort will do the median-of-three before calling this. - while (__comp(__pivot, *--__last)) {} + // It will be always guarded because __bitset_sort will do the + // median-of-three before calling this. + while (__comp(__pivot, *--__last)) { + } } bool __already_partitioned = __first >= __last; if (!__already_partitioned) { - _VSTD::iter_swap(__first, __last); - ++__first; + _VSTD::iter_swap(__first, __last); + ++__first; } - // [__first, __last) - __last is not inclusive. From now one, it uses last minus one to be inclusive on both sides. + // In [__first, __last) __last is not inclusive. From now one, it uses last + // minus one to be inclusive on both sides. _RandomAccessIterator __lm1 = __last - 1; __storage_t __left_bitset = 0; __storage_t __right_bitset = 0; @@ -612,24 +483,23 @@ _VSTD::pair<_RandomAccessIterator, bool> __bitset_partition( // will be compiled into a set of SIMD instructions. _RandomAccessIterator __iter = __first; for (int __j = 0; __j < _Bitset::__block_size;) { - __left_bitset |= (static_cast<__storage_t>(__comp(__pivot, *__iter)) << __j); + bool __comp_result = __comp(__pivot, *__iter); + __left_bitset |= (static_cast<__storage_t>(__comp_result) << __j); __j++; - __iter++; + ++__iter; } } - if (__right_bitset == 0) { // Possible vectorization. With a proper "-march" flag, the following loop // will be compiled into a set of SIMD instructions. _RandomAccessIterator __iter = __lm1; for (int __j = 0; __j < _Bitset::__block_size;) { - __right_bitset |= - (static_cast<__storage_t>(!__comp(__pivot, *__iter)) << __j); + bool __comp_result = __comp(*__iter, __pivot); + __right_bitset |= (static_cast<__storage_t>(__comp_result) << __j); __j++; - __iter--; + --__iter; } } - __swap_bitmap_pos<_Bitset>(__first, __lm1, __left_bitset, __right_bitset); __first += (__left_bitset == 0) ? _Bitset::__block_size : 0; __lm1 -= (__right_bitset == 0) ? _Bitset::__block_size : 0; @@ -645,23 +515,23 @@ _VSTD::pair<_RandomAccessIterator, bool> __bitset_partition( // We know at least one side is a full block. __l_size = __remaining_len - _Bitset::__block_size; __r_size = _Bitset::__block_size; - } else { // if (right == 0) + } else { // if (__right_bitset == 0) __l_size = _Bitset::__block_size; __r_size = __remaining_len - _Bitset::__block_size; } if (__left_bitset == 0) { _RandomAccessIterator __iter = __first; for (int j = 0; j < __l_size; j++) { - __left_bitset |= - (static_cast<__storage_t>(__comp(__pivot, *(__iter))) << j); - __iter++; + bool __comp_result = __comp(__pivot, *__iter); + __left_bitset |= (static_cast<__storage_t>(__comp_result) << j); + ++__iter; } } if (__right_bitset == 0) { _RandomAccessIterator __iter = __lm1; for (int j = 0; j < __r_size; j++) { - __right_bitset |= - (static_cast<__storage_t>(!__comp(__pivot, *(__iter))) << j); + bool __comp_result = __comp(*__iter, __pivot); + __right_bitset |= (static_cast<__storage_t>(__comp_result) << j); --__iter; } } @@ -670,40 +540,46 @@ _VSTD::pair<_RandomAccessIterator, bool> __bitset_partition( __lm1 -= (__right_bitset == 0) ? __r_size : 0; if (__left_bitset) { - // Swap within the right side. - int __tz_left; - + // Swap within the left side. // Need to find set positions in the reverse order. while (__left_bitset != 0) { - __tz_left = _Bitset::__block_size - 1 - _Bitset::__clz(__left_bitset); + int __tz_left = _Bitset::__block_size - 1 - _Bitset::__clz(__left_bitset); __left_bitset &= (static_cast<__storage_t>(1) << __tz_left) - 1; - _VSTD::iter_swap(__first + __tz_left, __lm1--); + _RandomAccessIterator it = __first + __tz_left; + if (it != __lm1) { + _VSTD::iter_swap(it, __lm1); + } + --__lm1; } __first = __lm1 + 1; } else if (__right_bitset) { - // Swap within the left side. - int __tz_right; + // Swap within the right side. // Need to find set positions in the reverse order. while (__right_bitset != 0) { - __tz_right = _Bitset::__block_size - 1 - _Bitset::__clz(__right_bitset); + int __tz_right = _Bitset::__block_size - 1 - _Bitset::__clz(__right_bitset); __right_bitset &= (static_cast<__storage_t>(1) << __tz_right) - 1; - _VSTD::iter_swap(__lm1 - __tz_right, __first++); + _RandomAccessIterator it = __lm1 - __tz_right; + if (it != __first) { + _VSTD::iter_swap(it, __first); + } + ++__first; } } _RandomAccessIterator __pivot_pos = __first - 1; - *__begin = _VSTD::move(*__pivot_pos); + if (__begin != __pivot_pos) { + *__begin = _VSTD::move(*__pivot_pos); + } *__pivot_pos = _VSTD::move(__pivot); return _VSTD::make_pair(__pivot_pos, __already_partitioned); } -template -inline bool __partial_insertion_sort(_RandomAccessIterator __first, - _RandomAccessIterator __last, - _Compare __comp) { - typedef typename _VSTD::iterator_traits<_RandomAccessIterator>::value_type - value_type; - if (__first == __last) return true; +template +inline _LIBCPP_HIDE_FROM_ABI bool __partial_insertion_sort(_RandomAccessIterator __first, _RandomAccessIterator __last, + _Compare __comp) { + typedef typename _VSTD::iterator_traits<_RandomAccessIterator>::value_type value_type; + if (__first == __last) + return true; const unsigned __limit = 8; unsigned __count = 0; @@ -718,7 +594,8 @@ inline bool __partial_insertion_sort(_RandomAccessIterator __first, __j = __k; } while (__j != __first && __comp(__t, *--__k)); *__j = _VSTD::move(__t); - if (++__count == __limit) return ++__i == __last; + if (++__count == __limit) + return ++__i == __last; } __j = __i; } @@ -726,25 +603,19 @@ inline bool __partial_insertion_sort(_RandomAccessIterator __first, } template -void __bitsetsort_loop( - _RandomAccessIterator __first, _RandomAccessIterator __last, - _Compare __comp, - typename _VSTD::iterator_traits<_RandomAccessIterator>::value_type* __buff, - typename _VSTD::iterator_traits<_RandomAccessIterator>::difference_type __limit) { +void __bitsetsort_loop(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp, + typename _VSTD::iterator_traits<_RandomAccessIterator>::value_type* __buff, + typename _VSTD::iterator_traits<_RandomAccessIterator>::difference_type __limit) { _LIBCPP_CONSTEXPR_AFTER_CXX11 int __ninther_threshold = 128; - typedef typename _VSTD::iterator_traits<_RandomAccessIterator>::difference_type - difference_type; - typedef - typename _VSTD::iterator_traits<_RandomAccessIterator>::value_type value_type; - __sorting_network::__conditional_swap<_RandomAccessIterator, _Compare> - __cond_swap(__comp); - __sorting_network::__reverse_conditional_swap<_RandomAccessIterator, _Compare> - __reverse_cond_swap(__comp); + typedef typename _VSTD::iterator_traits<_RandomAccessIterator>::difference_type difference_type; + typedef typename _VSTD::__comp_ref_type<_Compare>::type _Comp_ref; + __sorting_network::__conditional_swap<_RandomAccessIterator, _Compare> __cond_swap(__comp); + __sorting_network::__reverse_conditional_swap<_RandomAccessIterator, _Compare> __reverse_cond_swap(__comp); while (true) { if (__limit == 0) { // Fallback to heap sort as Introsort suggests. - _VSTD::make_heap(__first, __last, __comp); - _VSTD::sort_heap(__first, __last, __comp); + _VSTD::make_heap<_RandomAccessIterator, _Comp_ref>(__first, __last, _Comp_ref(__comp)); + _VSTD::sort_heap<_RandomAccessIterator, _Comp_ref>(__first, __last, _Comp_ref(__comp)); return; } __limit--; @@ -752,11 +623,8 @@ void __bitsetsort_loop( if (__len <= __bitonic::__detail::__batch) { __sorting_network::__sort1to8(__first, __len, __cond_swap); return; - } else if (__len <= 32) { - __bitonic::__small_sort(__first, __len, __buff, __cond_swap, - __reverse_cond_swap); - // __bitonic::__sort9to32(__first, __len, __buff, __cond_swap, - // __reverse_cond_swap); + } else if (__len <= __bitonic::__detail::__small_sort_max) { + __bitonic::__small_sort(__first, __len, __buff, __cond_swap, __reverse_cond_swap); return; } difference_type __half_len = __len / 2; @@ -764,17 +632,19 @@ void __bitsetsort_loop( __sorting_network::__sort3(__first, __first + __half_len, __last - 1, __cond_swap); __sorting_network::__sort3(__first + 1, __first + (__half_len - 1), __last - 2, __cond_swap); __sorting_network::__sort3(__first + 2, __first + (__half_len + 1), __last - 3, __cond_swap); - __sorting_network::__sort3(__first + (__half_len - 1), __first + __half_len, - __first + (__half_len + 1), __cond_swap); + __sorting_network::__sort3(__first + (__half_len - 1), __first + __half_len, __first + (__half_len + 1), + __cond_swap); _VSTD::iter_swap(__first, __first + __half_len); } else { __sorting_network::__sort3(__first + __half_len, __first, __last - 1, __cond_swap); } - auto __ret = __bitset_partition<__64bit_set>(__first, __last, __comp); + auto __ret = __bitset_partition<__64bit_set, _RandomAccessIterator, _Comp_ref>(__first, __last, _Comp_ref(__comp)); if (__ret.second) { - bool __left = __partial_insertion_sort(__first, __ret.first, __comp); - if (__partial_insertion_sort(__ret.first + 1, __last, __comp)) { - if (__left) return; + bool __left = __partial_insertion_sort<_Comp_ref>(__first, __ret.first, _Comp_ref(__comp)); + bool __right = __partial_insertion_sort<_Comp_ref>(__ret.first + 1, __last, _Comp_ref(__comp)); + if (__right) { + if (__left) + return; __last = __ret.first; continue; } else { @@ -797,128 +667,49 @@ void __bitsetsort_loop( } } -template -inline _LIBCPP_INLINE_VISIBILITY _Number __log2i(_Number __n) { - _Number __log2 = 0; - while (__n > 1) { - __log2++; - __n >>= 1; - } - return __log2; +template +inline _LIBCPP_HIDE_FROM_ABI _Number __log2i(_Number __n) { + _Number __log2 = 0; + while (__n > 1) { + __log2++; + __n >>= 1; + } + return __log2; } - template -inline _LIBCPP_INLINE_VISIBILITY void __bitsetsort_internal( - _RandomAccessIterator __first, _RandomAccessIterator __last, - _Compare __comp) { - typedef typename _VSTD::iterator_traits<_RandomAccessIterator>::value_type - value_type; - typedef typename _VSTD::iterator_traits<_RandomAccessIterator>::difference_type - difference_type; - typename _VSTD::aligned_storage::type - __buff[__bitonic::__detail::__small_sort_max]; +inline _LIBCPP_HIDE_FROM_ABI void __bitsetsort_internal(_RandomAccessIterator __first, _RandomAccessIterator __last, + _Compare __comp) { + typedef typename _VSTD::iterator_traits<_RandomAccessIterator>::value_type value_type; + typedef typename _VSTD::iterator_traits<_RandomAccessIterator>::difference_type difference_type; + typename _VSTD::aligned_storage::type __buff[__bitonic::__detail::__small_sort_max]; + typedef typename _VSTD::__comp_ref_type<_Compare>::type _Comp_ref; // 2*log2 comes from Introsort https://reviews.llvm.org/D36423. difference_type __depth_limit = 2 * __log2i(__last - __first); - __bitsetsort_loop(__first, __last, __comp, - reinterpret_cast(&__buff[0]), - __depth_limit); + __bitsetsort_loop<_Comp_ref>(__first, __last, _Comp_ref(__comp), reinterpret_cast(&__buff[0]), + __depth_limit); } -} // namespace __bitsetsort - -// __branchlesscompimpl provides a branch-less comparator for pairs and tuples of primitive types. -// It provides 1.38x - 2x speed-up in pairs or tuples sorting. -template -struct __branchlesscompimpl { - template - bool operator()(const R& lhs, const R& rhs) const { - return lhs < rhs; - } -}; - -template<> -struct __branchlesscompimpl { - template - bool operator()(const R& lhs, const R& rhs) const { - return lhs < rhs; - } - template - bool operator()(const _VSTD::pair& lhs, const _VSTD::pair& rhs) const { - const bool __c1 = lhs.first < rhs.first; - const bool __c2 = rhs.first < lhs.first; - const bool __c3 = lhs.second < rhs.second; - return __c1 || (!__c2 && __c3); - } - template - bool operator()(const _VSTD::tuple& lhs, const _VSTD::tuple& rhs) const { - const bool __c1 = _VSTD::get<0>(lhs) < _VSTD::get<0>(rhs); - const bool __c2 = _VSTD::get<0>(rhs) < _VSTD::get<0>(lhs); - const bool __c3 = _VSTD::get<1>(lhs) < _VSTD::get<1>(rhs); - return __c1 || (!__c2 && __c3); - } - template - bool operator()(const _VSTD::tuple& lhs, const _VSTD::tuple& rhs) const { - const bool __c1 = _VSTD::get<0>(lhs) < _VSTD::get<0>(rhs); - const bool __c2 = _VSTD::get<0>(rhs) < _VSTD::get<0>(lhs); - const bool __c3 = _VSTD::get<1>(lhs) < _VSTD::get<1>(rhs); - const bool __c4 = _VSTD::get<1>(rhs) < _VSTD::get<1>(lhs); - const bool __c5 = _VSTD::get<2>(lhs) < _VSTD::get<2>(rhs); - return __c1 || (!__c2 && (__c3 || (!__c4 && __c5))); - } -}; - -template -struct __branchlesscomp { - bool operator()(const _T& __x, const _T& __y) const { - return __x < __y; - } -}; - -template -struct __branchlesscomp<_VSTD::pair> : public __branchlesscompimpl<_VSTD::is_fundamental::value> {}; - -template -struct __branchlesscomp<_VSTD::tuple> : public __branchlesscompimpl<_VSTD::is_fundamental::value> {}; - -template -struct __branchlesscomp<_VSTD::tuple> : public __branchlesscompimpl<_VSTD::is_fundamental::value && _VSTD::is_fundamental::value> {}; +} // namespace __bitsetsort template -inline _LIBCPP_INLINE_VISIBILITY void bitsetsort(_RandomAccessIterator __first, - _RandomAccessIterator __last, - _Compare __comp) { - /** This change is required for ClickHouse - * /contrib/libcxx/include/algorithm:789:10: note: candidate function template not viable: 'this' argument has type - * 'const std::__debug_less::less>', but method is not marked const - * bool operator()(const _Tp& __x, const _Up& __y) - */ +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 void bitsetsort(_RandomAccessIterator __first, + _RandomAccessIterator __last, _Compare __comp) { typedef typename _VSTD::__comp_ref_type<_Compare>::type _Comp_ref; - __bitsetsort::__bitsetsort_internal<_Compare>(__first, __last, - __comp); -} - -template -inline _LIBCPP_INLINE_VISIBILITY void bitsetsort(_VSTD::__wrap_iter<_Tp*> __first, - _VSTD::__wrap_iter<_Tp*> __last, - _Compare __comp) { - typedef typename _VSTD::add_lvalue_reference<_Compare>::type _Comp_ref; - bitsetsort<_Tp*, _Comp_ref>(__first.base(), __last.base(), __comp); + if (_VSTD::__libcpp_is_constant_evaluated()) { + _VSTD::__partial_sort<_Comp_ref>(__first, __last, __last, _Comp_ref(__comp)); + } else { + __bitsetsort::__bitsetsort_internal<_Comp_ref>(_VSTD::__unwrap_iter(__first), _VSTD::__unwrap_iter(__last), + _Comp_ref(__comp)); + } } template -inline _LIBCPP_INLINE_VISIBILITY void bitsetsort(_RandomAccessIterator __first, - _RandomAccessIterator __last) { - bitsetsort( - __first, __last, - __branchlesscomp::value_type>()); +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 void bitsetsort(_RandomAccessIterator __first, + _RandomAccessIterator __last) { + bitsetsort(__first, __last, __less::value_type>()); } -template -inline _LIBCPP_INLINE_VISIBILITY void bitsetsort(_VSTD::__wrap_iter<_Tp*> __first, - _VSTD::__wrap_iter<_Tp*> __last) { - bitsetsort(__first.base(), __last.base()); -} } // namespace stdext #endif // _LIBCPP___BITSETSORT From f2725b1b2201c3dd42ed2ff72511b1791ba959d1 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Sat, 5 Feb 2022 18:26:43 +0100 Subject: [PATCH 074/215] Fix wrong OK_SKIP labels --- tests/ci/run_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index dc3f1da94cd..a2403e61ac1 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -18,7 +18,7 @@ TRUSTED_ORG_IDS = { 54801242, # clickhouse } -OK_SKIP_LABELS = {"release", "pr-documentation", "pr-doc-fix"} +OK_SKIP_LABELS = {"release", "pr-backport", "pr-cherrypick"} CAN_BE_TESTED_LABEL = "can be tested" DO_NOT_TEST_LABEL = "do not test" FORCE_TESTS_LABEL = "force tests" From 5dd49a44e66160f300f8a97e3073a7576aa57384 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Sun, 6 Feb 2022 02:09:56 +0700 Subject: [PATCH 075/215] Add test for propagating OpenTelemetry context via gRPC protocol. --- src/Server/GRPCServer.cpp | 43 +++++++++----------- tests/integration/test_grpc_protocol/test.py | 15 +++++++ 2 files changed, 35 insertions(+), 23 deletions(-) diff --git a/src/Server/GRPCServer.cpp b/src/Server/GRPCServer.cpp index 48561a34539..f252561d63b 100644 --- a/src/Server/GRPCServer.cpp +++ b/src/Server/GRPCServer.cpp @@ -285,6 +285,15 @@ namespace return Poco::Net::SocketAddress{peer.substr(peer.find(':') + 1)}; } + std::optional getClientHeader(const String & key) const + { + const auto & client_metadata = grpc_context.client_metadata(); + auto it = client_metadata.find(key); + if (it != client_metadata.end()) + return String{it->second.data(), it->second.size()}; + return std::nullopt; + } + void setResultCompression(grpc_compression_algorithm algorithm, grpc_compression_level level) { grpc_context.set_compression_algorithm(algorithm); @@ -296,8 +305,6 @@ namespace setResultCompression(convertCompressionAlgorithm(compression.algorithm()), convertCompressionLevel(compression.level())); } - grpc::ServerContext grpc_context; - protected: CompletionCallback * getCallbackPtr(const CompletionCallback & callback) { @@ -320,6 +327,8 @@ namespace return &callback_in_map; } + grpc::ServerContext grpc_context; + private: grpc::ServerAsyncReaderWriter reader_writer{&grpc_context}; std::unordered_map callbacks; @@ -752,33 +761,21 @@ namespace session->authenticate(user, password, user_address); session->getClientInfo().quota_key = quota_key; - // Parse the OpenTelemetry traceparent header. ClientInfo client_info = session->getClientInfo(); - const auto & client_metadata = responder->grpc_context.client_metadata(); - auto traceparent = client_metadata.find("traceparent"); - if (traceparent != client_metadata.end()) + + /// Parse the OpenTelemetry traceparent header. + auto traceparent = responder->getClientHeader("traceparent"); + if (traceparent) { - grpc::string_ref parent_ref = traceparent->second; - std::string opentelemetry_traceparent(parent_ref.data(), parent_ref.length()); - std::string error; - if (!client_info.client_trace_context.parseTraceparentHeader( - opentelemetry_traceparent, error)) + String error; + if (!client_info.client_trace_context.parseTraceparentHeader(traceparent.value(), error)) { throw Exception(ErrorCodes::BAD_REQUEST_PARAMETER, "Failed to parse OpenTelemetry traceparent header '{}': {}", - opentelemetry_traceparent, error); - } - auto tracestate = client_metadata.find("tracestate"); - if (tracestate != client_metadata.end()) - { - grpc::string_ref state_ref = tracestate->second; - client_info.client_trace_context.tracestate = - std::string(state_ref.data(), state_ref.length()); - } - else - { - client_info.client_trace_context.tracestate = ""; + traceparent.value(), error); } + auto tracestate = responder->getClientHeader("tracestate"); + client_info.client_trace_context.tracestate = tracestate.value_or(""); } /// The user could specify session identifier and session timeout. diff --git a/tests/integration/test_grpc_protocol/test.py b/tests/integration/test_grpc_protocol/test.py index e17ed0d9c8e..2a91ebcd94b 100644 --- a/tests/integration/test_grpc_protocol/test.py +++ b/tests/integration/test_grpc_protocol/test.py @@ -431,3 +431,18 @@ def test_compressed_external_table(): b"3\tCarl\n"\ b"4\tDaniel\n"\ b"5\tEthan\n" + +def test_opentelemetry_context_propagation(): + trace_id = "80c190b5-9dc1-4eae-82b9-6c261438c817" + parent_span_id = 123 + trace_state = "some custom state" + trace_id_hex = trace_id.replace("-", "") + parent_span_id_hex = f'{parent_span_id:0>16X}' + metadata = [("traceparent", f"00-{trace_id_hex}-{parent_span_id_hex}-01"), ("tracestate", trace_state)] + stub = clickhouse_grpc_pb2_grpc.ClickHouseStub(main_channel) + query_info = clickhouse_grpc_pb2.QueryInfo(query="SELECT 1") + result = stub.ExecuteQuery(query_info, metadata=metadata) + assert result.output == b"1\n" + node.query("SYSTEM FLUSH LOGS") + assert node.query(f"SELECT attribute['db.statement'], attribute['clickhouse.tracestate'] FROM system.opentelemetry_span_log " + f"WHERE trace_id='{trace_id}' AND parent_span_id={parent_span_id}") == "SELECT 1\tsome custom state\n" From 8ba93354b062772f8186e911e881261e83a597eb Mon Sep 17 00:00:00 2001 From: bharatnc Date: Sat, 5 Feb 2022 16:07:11 -0800 Subject: [PATCH 076/215] check and get column for s2 funcs --- src/Functions/s2CapContains.cpp | 43 ++++++++++++++++++---- src/Functions/s2CapUnion.cpp | 59 +++++++++++++++++++++++++----- src/Functions/s2CellsIntersect.cpp | 26 +++++++++++-- src/Functions/s2GetNeighbors.cpp | 13 ++++++- src/Functions/s2RectAdd.cpp | 42 ++++++++++++++++++--- 5 files changed, 154 insertions(+), 29 deletions(-) diff --git a/src/Functions/s2CapContains.cpp b/src/Functions/s2CapContains.cpp index 100b028646c..6604ff9707e 100644 --- a/src/Functions/s2CapContains.cpp +++ b/src/Functions/s2CapContains.cpp @@ -20,6 +20,7 @@ namespace ErrorCodes { extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int BAD_ARGUMENTS; + extern const int ILLEGAL_COLUMN; } namespace @@ -83,19 +84,47 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { - const auto * col_center = arguments[0].column.get(); - const auto * col_degrees = arguments[1].column.get(); - const auto * col_point = arguments[2].column.get(); + const auto * col_center = checkAndGetColumn(arguments[0].column.get()); + if (!col_center) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arguments[0].type->getName(), + 1, + getName()); + const auto & data_center = col_center->getData(); + + const auto * col_degrees = checkAndGetColumn(arguments[1].column.get()); + if (!col_degrees) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal type {} of argument {} of function {}. Must be Float64", + arguments[1].type->getName(), + 2, + getName()); + const auto & data_degrees = col_degrees->getData(); + + + const auto * col_point = checkAndGetColumn(arguments[2].column.get()); + if (!col_point) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arguments[2].type->getName(), + 3, + getName()); + const auto & data_point = col_point->getData(); + auto dst = ColumnUInt8::create(); auto & dst_data = dst->getData(); dst_data.reserve(input_rows_count); - for (size_t row=0 ; row < input_rows_count; ++row) + for (size_t row = 0; row < input_rows_count; ++row) { - const auto center = S2CellId(col_center->getUInt(row)); - const Float64 degrees = col_degrees->getFloat64(row); - const auto point = S2CellId(col_point->getUInt(row)); + const auto center = S2CellId(data_center[row]); + const Float64 degrees = data_degrees[row]; + const auto point = S2CellId(data_point[row]); if (isNaN(degrees)) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Radius of the cap must not be nan"); diff --git a/src/Functions/s2CapUnion.cpp b/src/Functions/s2CapUnion.cpp index 263163963af..5800b08be87 100644 --- a/src/Functions/s2CapUnion.cpp +++ b/src/Functions/s2CapUnion.cpp @@ -20,6 +20,7 @@ namespace ErrorCodes { extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int BAD_ARGUMENTS; + extern const int ILLEGAL_COLUMN; } namespace @@ -81,10 +82,50 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { - const auto * col_center1 = arguments[0].column.get(); - const auto * col_radius1 = arguments[1].column.get(); - const auto * col_center2 = arguments[2].column.get(); - const auto * col_radius2 = arguments[3].column.get(); + const auto * col_center1 = checkAndGetColumn(arguments[0].column.get()); + if (!col_center1) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arguments[0].type->getName(), + 1, + getName()); + + const auto & data_center_1 = col_center1->getData(); + + const auto * col_radius1 = checkAndGetColumn(arguments[1].column.get()); + if (!col_radius1) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal type {} of argument {} of function {}. Must be Float64", + arguments[1].type->getName(), + 2, + getName()); + + const auto & data_radius_1 = col_radius1->getData(); + + const auto * col_center2 = checkAndGetColumn(arguments[2].column.get()); + if (!col_center2) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arguments[2].type->getName(), + 3, + getName()); + + const auto & data_center_2 = col_center2->getData(); + + const auto * col_radius2 = checkAndGetColumn(arguments[3].column.get()); + if (!col_radius2) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal type {} of argument {} of function {}. Must be Float64", + arguments[3].type->getName(), + 4, + getName()); + + const auto & data_radius_2 = col_radius2->getData(); + auto col_res_center = ColumnUInt64::create(); auto col_res_radius = ColumnFloat64::create(); @@ -97,10 +138,10 @@ public: for (size_t row = 0; row < input_rows_count; ++row) { - const UInt64 first_center = col_center1->getUInt(row); - const Float64 first_radius = col_radius1->getFloat64(row); - const UInt64 second_center = col_center2->getUInt(row); - const Float64 second_radius = col_radius2->getFloat64(row); + const UInt64 first_center = data_center_1[row]; + const Float64 first_radius = data_radius_1[row]; + const UInt64 second_center = data_center_2[row]; + const Float64 second_radius = data_radius_2[row]; if (isNaN(first_radius) || isNaN(second_radius)) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Radius of the cap must not be nan"); @@ -125,7 +166,6 @@ public: return ColumnTuple::create(Columns{std::move(col_res_center), std::move(col_res_radius)}); } - }; } @@ -135,7 +175,6 @@ void registerFunctionS2CapUnion(FunctionFactory & factory) factory.registerFunction(); } - } #endif diff --git a/src/Functions/s2CellsIntersect.cpp b/src/Functions/s2CellsIntersect.cpp index f8273a1fcca..5d9796ea26c 100644 --- a/src/Functions/s2CellsIntersect.cpp +++ b/src/Functions/s2CellsIntersect.cpp @@ -19,6 +19,7 @@ namespace ErrorCodes { extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int BAD_ARGUMENTS; + extern const int ILLEGAL_COLUMN; } namespace @@ -65,8 +66,25 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { - const auto * col_id_first = arguments[0].column.get(); - const auto * col_id_second = arguments[1].column.get(); + const auto * col_id_first = checkAndGetColumn(arguments[0].column.get()); + if (!col_id_first) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arguments[0].type->getName(), + 1, + getName()); + const auto & data_id_first = col_id_first->getData(); + + const auto * col_id_second = checkAndGetColumn(arguments[1].column.get()); + if (!col_id_second) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arguments[1].type->getName(), + 2, + getName()); + const auto & data_id_second = col_id_second->getData(); auto dst = ColumnUInt8::create(); auto & dst_data = dst->getData(); @@ -74,8 +92,8 @@ public: for (size_t row = 0; row < input_rows_count; ++row) { - const UInt64 id_first = col_id_first->getInt(row); - const UInt64 id_second = col_id_second->getInt(row); + const UInt64 id_first = data_id_first[row]; + const UInt64 id_second = data_id_second[row]; auto first_cell = S2CellId(id_first); auto second_cell = S2CellId(id_second); diff --git a/src/Functions/s2GetNeighbors.cpp b/src/Functions/s2GetNeighbors.cpp index c0b2e634e6f..0d40ff2d61c 100644 --- a/src/Functions/s2GetNeighbors.cpp +++ b/src/Functions/s2GetNeighbors.cpp @@ -19,6 +19,7 @@ namespace ErrorCodes { extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int BAD_ARGUMENTS; + extern const int ILLEGAL_COLUMN; } namespace @@ -64,7 +65,15 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { - const auto * col_id = arguments[0].column.get(); + const auto * col_id = checkAndGetColumn(arguments[0].column.get()); + if (!col_id) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal type {} of argument {} of function {}. Must be Float64", + arguments[0].type->getName(), + 1, + getName()); + const auto & data_id = col_id->getData(); auto dst = ColumnArray::create(ColumnUInt64::create()); auto & dst_data = dst->getData(); @@ -74,7 +83,7 @@ public: for (size_t row = 0; row < input_rows_count; ++row) { - const UInt64 id = col_id->getUInt(row); + const UInt64 id = data_id[row]; S2CellId cell_id(id); diff --git a/src/Functions/s2RectAdd.cpp b/src/Functions/s2RectAdd.cpp index f7c39b2a6b1..f48b193a47b 100644 --- a/src/Functions/s2RectAdd.cpp +++ b/src/Functions/s2RectAdd.cpp @@ -19,6 +19,7 @@ namespace ErrorCodes { extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int BAD_ARGUMENTS; + extern const int ILLEGAL_COLUMN; } namespace @@ -64,9 +65,38 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { - const auto * col_lo = arguments[0].column.get(); - const auto * col_hi = arguments[1].column.get(); - const auto * col_point = arguments[2].column.get(); + const auto * col_lo = checkAndGetColumn(arguments[0].column.get()); + if (!col_lo) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arguments[0].type->getName(), + 1, + getName()); + + const auto & data_low = col_lo->getData(); + + const auto * col_hi = checkAndGetColumn(arguments[1].column.get()); + if (!col_hi) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arguments[1].type->getName(), + 2, + getName()); + + const auto & data_hi = col_hi->getData(); + + const auto * col_point = checkAndGetColumn(arguments[2].column.get()); + if (!col_point) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arguments[2].type->getName(), + 3, + getName()); + + const auto & data_point = col_point->getData(); auto col_res_first = ColumnUInt64::create(); auto col_res_second = ColumnUInt64::create(); @@ -79,9 +109,9 @@ public: for (size_t row = 0; row < input_rows_count; ++row) { - const auto lo = S2CellId(col_lo->getUInt(row)); - const auto hi = S2CellId(col_hi->getUInt(row)); - const auto point = S2CellId(col_point->getUInt(row)); + const auto lo = S2CellId(data_low[row]); + const auto hi = S2CellId(data_hi[row]); + const auto point = S2CellId(data_point[row]); if (!lo.is_valid() || !hi.is_valid()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Rectangle is not valid"); From 557cf6ecc1349340e4b21df02dd524fb9478efaf Mon Sep 17 00:00:00 2001 From: bharatnc Date: Sat, 5 Feb 2022 16:30:25 -0800 Subject: [PATCH 077/215] check and get column remaining s2 funcs --- src/Functions/s2RectContains.cpp | 44 ++++++++++++++++++---- src/Functions/s2RectIntersection.cpp | 56 ++++++++++++++++++++++++---- src/Functions/s2RectUnion.cpp | 56 ++++++++++++++++++++++++---- src/Functions/s2ToGeo.cpp | 14 ++++++- 4 files changed, 145 insertions(+), 25 deletions(-) diff --git a/src/Functions/s2RectContains.cpp b/src/Functions/s2RectContains.cpp index 90ced5450bc..bb318530c0b 100644 --- a/src/Functions/s2RectContains.cpp +++ b/src/Functions/s2RectContains.cpp @@ -14,11 +14,12 @@ namespace DB { - namespace ErrorCodes { extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int BAD_ARGUMENTS; + extern const int ILLEGAL_COLUMN; + } namespace @@ -62,9 +63,38 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { - const auto * col_lo = arguments[0].column.get(); - const auto * col_hi = arguments[1].column.get(); - const auto * col_point = arguments[2].column.get(); + const auto * col_lo = checkAndGetColumn(arguments[0].column.get()); + if (!col_lo) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arguments[0].type->getName(), + 1, + getName()); + + const auto & data_low = col_lo->getData(); + + const auto * col_hi = checkAndGetColumn(arguments[1].column.get()); + if (!col_hi) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arguments[1].type->getName(), + 2, + getName()); + + const auto & data_hi = col_hi->getData(); + + const auto * col_point = checkAndGetColumn(arguments[2].column.get()); + if (!col_point) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arguments[2].type->getName(), + 3, + getName()); + + const auto & data_point = col_point->getData(); auto dst = ColumnVector::create(); auto & dst_data = dst->getData(); @@ -72,9 +102,9 @@ public: for (size_t row = 0; row < input_rows_count; ++row) { - const auto lo = S2CellId(col_lo->getUInt(row)); - const auto hi = S2CellId(col_hi->getUInt(row)); - const auto point = S2CellId(col_point->getUInt(row)); + const auto lo = S2CellId(data_low[row]); + const auto hi = S2CellId(data_hi[row]); + const auto point = S2CellId(data_point[row]); if (!lo.is_valid() || !hi.is_valid()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Rectangle is not valid"); diff --git a/src/Functions/s2RectIntersection.cpp b/src/Functions/s2RectIntersection.cpp index b108cc1b64f..ab5a68803bf 100644 --- a/src/Functions/s2RectIntersection.cpp +++ b/src/Functions/s2RectIntersection.cpp @@ -21,6 +21,7 @@ namespace ErrorCodes { extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int BAD_ARGUMENTS; + extern const int ILLEGAL_COLUMN; } namespace @@ -67,10 +68,49 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { - const auto * col_lo1 = arguments[0].column.get(); - const auto * col_hi1 = arguments[1].column.get(); - const auto * col_lo2 = arguments[2].column.get(); - const auto * col_hi2 = arguments[3].column.get(); + const auto * col_lo1 = checkAndGetColumn(arguments[0].column.get()); + if (!col_lo1) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arguments[0].type->getName(), + 1, + getName()); + + const auto & data_lo1 = col_lo1->getData(); + + const auto * col_hi1 = checkAndGetColumn(arguments[1].column.get()); + if (!col_hi1) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arguments[1].type->getName(), + 2, + getName()); + + const auto & data_hi1 = col_hi1->getData(); + + const auto * col_lo2 = checkAndGetColumn(arguments[2].column.get()); + if (!col_lo2) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arguments[2].type->getName(), + 3, + getName()); + + const auto & data_lo2 = col_lo2->getData(); + + const auto * col_hi2 = checkAndGetColumn(arguments[3].column.get()); + if (!col_hi2) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arguments[3].type->getName(), + 4, + getName()); + + const auto & data_hi2 = col_hi2->getData(); auto col_res_first = ColumnUInt64::create(); auto col_res_second = ColumnUInt64::create(); @@ -83,10 +123,10 @@ public: for (size_t row = 0; row < input_rows_count; ++row) { - const auto lo1 = S2CellId(col_lo1->getUInt(row)); - const auto hi1 = S2CellId(col_hi1->getUInt(row)); - const auto lo2 = S2CellId(col_lo2->getUInt(row)); - const auto hi2 = S2CellId(col_hi2->getUInt(row)); + const auto lo1 = S2CellId(data_lo1[row]); + const auto hi1 = S2CellId(data_hi1[row]); + const auto lo2 = S2CellId(data_lo2[row]); + const auto hi2 = S2CellId(data_hi2[row]); if (!lo1.is_valid() || !hi1.is_valid()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "First rectangle is not valid"); diff --git a/src/Functions/s2RectUnion.cpp b/src/Functions/s2RectUnion.cpp index bd40a747a09..510f496c859 100644 --- a/src/Functions/s2RectUnion.cpp +++ b/src/Functions/s2RectUnion.cpp @@ -19,6 +19,7 @@ namespace ErrorCodes { extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int BAD_ARGUMENTS; + extern const int ILLEGAL_COLUMN; } namespace @@ -65,10 +66,49 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { - const auto * col_lo1 = arguments[0].column.get(); - const auto * col_hi1 = arguments[1].column.get(); - const auto * col_lo2 = arguments[2].column.get(); - const auto * col_hi2 = arguments[3].column.get(); + const auto * col_lo1 = checkAndGetColumn(arguments[0].column.get()); + if (!col_lo1) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arguments[0].type->getName(), + 1, + getName()); + + const auto & data_lo1 = col_lo1->getData(); + + const auto * col_hi1 = checkAndGetColumn(arguments[1].column.get()); + if (!col_hi1) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arguments[1].type->getName(), + 2, + getName()); + + const auto & data_hi1 = col_hi1->getData(); + + const auto * col_lo2 = checkAndGetColumn(arguments[2].column.get()); + if (!col_lo2) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arguments[2].type->getName(), + 3, + getName()); + + const auto & data_lo2 = col_lo2->getData(); + + const auto * col_hi2 = checkAndGetColumn(arguments[3].column.get()); + if (!col_hi2) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arguments[3].type->getName(), + 4, + getName()); + + const auto & data_hi2 = col_hi2->getData(); auto col_res_first = ColumnUInt64::create(); auto col_res_second = ColumnUInt64::create(); @@ -81,10 +121,10 @@ public: for (size_t row = 0; row < input_rows_count; ++row) { - const auto lo1 = S2CellId(col_lo1->getUInt(row)); - const auto hi1 = S2CellId(col_hi1->getUInt(row)); - const auto lo2 = S2CellId(col_lo2->getUInt(row)); - const auto hi2 = S2CellId(col_hi2->getUInt(row)); + const auto lo1 = S2CellId(data_lo1[row]); + const auto hi1 = S2CellId(data_hi1[row]); + const auto lo2 = S2CellId(data_lo2[row]); + const auto hi2 = S2CellId(data_hi2[row]); if (!lo1.is_valid() || !hi1.is_valid()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "First rectangle is not valid"); diff --git a/src/Functions/s2ToGeo.cpp b/src/Functions/s2ToGeo.cpp index 03a67d49e45..e26a26909ab 100644 --- a/src/Functions/s2ToGeo.cpp +++ b/src/Functions/s2ToGeo.cpp @@ -21,6 +21,7 @@ namespace ErrorCodes { extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int BAD_ARGUMENTS; + extern const int ILLEGAL_COLUMN; } namespace @@ -67,7 +68,16 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { - const auto * col_id = arguments[0].column.get(); + const auto * col_id = checkAndGetColumn(arguments[0].column.get()); + if (!col_id) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal type {} of argument {} of function {}. Must be UInt64", + arguments[0].type->getName(), + 1, + getName()); + + const auto & data_id = col_id->getData(); auto col_longitude = ColumnFloat64::create(); auto col_latitude = ColumnFloat64::create(); @@ -80,7 +90,7 @@ public: for (size_t row = 0; row < input_rows_count; ++row) { - const auto id = S2CellId(col_id->getUInt(row)); + const auto id = S2CellId(data_id[row]); if (!id.is_valid()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Point is not valid"); From 3df4929b6332aa6ebd0ce61ac659a8b514203653 Mon Sep 17 00:00:00 2001 From: bharatnc Date: Sat, 5 Feb 2022 16:40:55 -0800 Subject: [PATCH 078/215] minor fixes --- src/Functions/s2CapUnion.cpp | 21 ++++++++------------- src/Functions/s2RectAdd.cpp | 3 --- src/Functions/s2RectContains.cpp | 3 --- src/Functions/s2RectIntersection.cpp | 4 ---- src/Functions/s2RectUnion.cpp | 4 ---- 5 files changed, 8 insertions(+), 27 deletions(-) diff --git a/src/Functions/s2CapUnion.cpp b/src/Functions/s2CapUnion.cpp index 5800b08be87..7af6324a7d5 100644 --- a/src/Functions/s2CapUnion.cpp +++ b/src/Functions/s2CapUnion.cpp @@ -90,8 +90,7 @@ public: arguments[0].type->getName(), 1, getName()); - - const auto & data_center_1 = col_center1->getData(); + const auto & data_center1 = col_center1->getData(); const auto * col_radius1 = checkAndGetColumn(arguments[1].column.get()); if (!col_radius1) @@ -101,8 +100,7 @@ public: arguments[1].type->getName(), 2, getName()); - - const auto & data_radius_1 = col_radius1->getData(); + const auto & data_radius1 = col_radius1->getData(); const auto * col_center2 = checkAndGetColumn(arguments[2].column.get()); if (!col_center2) @@ -112,8 +110,7 @@ public: arguments[2].type->getName(), 3, getName()); - - const auto & data_center_2 = col_center2->getData(); + const auto & data_center2 = col_center2->getData(); const auto * col_radius2 = checkAndGetColumn(arguments[3].column.get()); if (!col_radius2) @@ -123,9 +120,7 @@ public: arguments[3].type->getName(), 4, getName()); - - const auto & data_radius_2 = col_radius2->getData(); - + const auto & data_radius2 = col_radius2->getData(); auto col_res_center = ColumnUInt64::create(); auto col_res_radius = ColumnFloat64::create(); @@ -138,10 +133,10 @@ public: for (size_t row = 0; row < input_rows_count; ++row) { - const UInt64 first_center = data_center_1[row]; - const Float64 first_radius = data_radius_1[row]; - const UInt64 second_center = data_center_2[row]; - const Float64 second_radius = data_radius_2[row]; + const UInt64 first_center = data_center1[row]; + const Float64 first_radius = data_radius1[row]; + const UInt64 second_center = data_center2[row]; + const Float64 second_radius = data_radius2[row]; if (isNaN(first_radius) || isNaN(second_radius)) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Radius of the cap must not be nan"); diff --git a/src/Functions/s2RectAdd.cpp b/src/Functions/s2RectAdd.cpp index f48b193a47b..75da7de8f7e 100644 --- a/src/Functions/s2RectAdd.cpp +++ b/src/Functions/s2RectAdd.cpp @@ -73,7 +73,6 @@ public: arguments[0].type->getName(), 1, getName()); - const auto & data_low = col_lo->getData(); const auto * col_hi = checkAndGetColumn(arguments[1].column.get()); @@ -84,7 +83,6 @@ public: arguments[1].type->getName(), 2, getName()); - const auto & data_hi = col_hi->getData(); const auto * col_point = checkAndGetColumn(arguments[2].column.get()); @@ -95,7 +93,6 @@ public: arguments[2].type->getName(), 3, getName()); - const auto & data_point = col_point->getData(); auto col_res_first = ColumnUInt64::create(); diff --git a/src/Functions/s2RectContains.cpp b/src/Functions/s2RectContains.cpp index bb318530c0b..be46253f70e 100644 --- a/src/Functions/s2RectContains.cpp +++ b/src/Functions/s2RectContains.cpp @@ -71,7 +71,6 @@ public: arguments[0].type->getName(), 1, getName()); - const auto & data_low = col_lo->getData(); const auto * col_hi = checkAndGetColumn(arguments[1].column.get()); @@ -82,7 +81,6 @@ public: arguments[1].type->getName(), 2, getName()); - const auto & data_hi = col_hi->getData(); const auto * col_point = checkAndGetColumn(arguments[2].column.get()); @@ -93,7 +91,6 @@ public: arguments[2].type->getName(), 3, getName()); - const auto & data_point = col_point->getData(); auto dst = ColumnVector::create(); diff --git a/src/Functions/s2RectIntersection.cpp b/src/Functions/s2RectIntersection.cpp index ab5a68803bf..d4339b4d601 100644 --- a/src/Functions/s2RectIntersection.cpp +++ b/src/Functions/s2RectIntersection.cpp @@ -76,7 +76,6 @@ public: arguments[0].type->getName(), 1, getName()); - const auto & data_lo1 = col_lo1->getData(); const auto * col_hi1 = checkAndGetColumn(arguments[1].column.get()); @@ -87,7 +86,6 @@ public: arguments[1].type->getName(), 2, getName()); - const auto & data_hi1 = col_hi1->getData(); const auto * col_lo2 = checkAndGetColumn(arguments[2].column.get()); @@ -98,7 +96,6 @@ public: arguments[2].type->getName(), 3, getName()); - const auto & data_lo2 = col_lo2->getData(); const auto * col_hi2 = checkAndGetColumn(arguments[3].column.get()); @@ -109,7 +106,6 @@ public: arguments[3].type->getName(), 4, getName()); - const auto & data_hi2 = col_hi2->getData(); auto col_res_first = ColumnUInt64::create(); diff --git a/src/Functions/s2RectUnion.cpp b/src/Functions/s2RectUnion.cpp index 510f496c859..047d331e711 100644 --- a/src/Functions/s2RectUnion.cpp +++ b/src/Functions/s2RectUnion.cpp @@ -74,7 +74,6 @@ public: arguments[0].type->getName(), 1, getName()); - const auto & data_lo1 = col_lo1->getData(); const auto * col_hi1 = checkAndGetColumn(arguments[1].column.get()); @@ -85,7 +84,6 @@ public: arguments[1].type->getName(), 2, getName()); - const auto & data_hi1 = col_hi1->getData(); const auto * col_lo2 = checkAndGetColumn(arguments[2].column.get()); @@ -96,7 +94,6 @@ public: arguments[2].type->getName(), 3, getName()); - const auto & data_lo2 = col_lo2->getData(); const auto * col_hi2 = checkAndGetColumn(arguments[3].column.get()); @@ -107,7 +104,6 @@ public: arguments[3].type->getName(), 4, getName()); - const auto & data_hi2 = col_hi2->getData(); auto col_res_first = ColumnUInt64::create(); From 9f51fe1befb295aa085984cd467930b1c628d2b5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 6 Feb 2022 03:47:40 +0300 Subject: [PATCH 079/215] Fix wrong engine in SHOW CREATE DATABASE with engine Memory #34225 --- src/Databases/DatabaseMemory.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Databases/DatabaseMemory.cpp b/src/Databases/DatabaseMemory.cpp index 3309d25b1c2..a92c19f67c0 100644 --- a/src/Databases/DatabaseMemory.cpp +++ b/src/Databases/DatabaseMemory.cpp @@ -78,7 +78,9 @@ ASTPtr DatabaseMemory::getCreateDatabaseQuery() const auto create_query = std::make_shared(); create_query->setDatabase(getDatabaseName()); create_query->set(create_query->storage, std::make_shared()); - create_query->storage->set(create_query->storage->engine, makeASTFunction(getEngineName())); + auto engine = makeASTFunction(getEngineName()); + engine->no_empty_args = true; + create_query->storage->set(create_query->storage->engine, engine); if (const auto comment_value = getDatabaseComment(); !comment_value.empty()) create_query->set(create_query->comment, std::make_shared(comment_value)); From d5431b30150397ef914c20ecdff1e411c161302b Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Sun, 6 Feb 2022 09:15:36 +0800 Subject: [PATCH 080/215] fix fast test --- tests/queries/0_stateless/02185_split_by_char.reference | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/queries/0_stateless/02185_split_by_char.reference b/tests/queries/0_stateless/02185_split_by_char.reference index 6647c97960e..f69d8d35006 100644 --- a/tests/queries/0_stateless/02185_split_by_char.reference +++ b/tests/queries/0_stateless/02185_split_by_char.reference @@ -1,5 +1,4 @@ ['1','2','3'] -['1','2','3'] ['1,2,3'] ['1','2,3'] ['1','2','3'] From 978461fb9ac342718f6e0db8c1f83332dd85c4d0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 6 Feb 2022 04:31:28 +0300 Subject: [PATCH 081/215] Add a test --- .../0_stateless/02206_information_schema_show_database.reference | 1 + .../0_stateless/02206_information_schema_show_database.sql | 1 + 2 files changed, 2 insertions(+) create mode 100644 tests/queries/0_stateless/02206_information_schema_show_database.reference create mode 100644 tests/queries/0_stateless/02206_information_schema_show_database.sql diff --git a/tests/queries/0_stateless/02206_information_schema_show_database.reference b/tests/queries/0_stateless/02206_information_schema_show_database.reference new file mode 100644 index 00000000000..551186fa0ab --- /dev/null +++ b/tests/queries/0_stateless/02206_information_schema_show_database.reference @@ -0,0 +1 @@ +CREATE DATABASE INFORMATION_SCHEMA\nENGINE = Memory diff --git a/tests/queries/0_stateless/02206_information_schema_show_database.sql b/tests/queries/0_stateless/02206_information_schema_show_database.sql new file mode 100644 index 00000000000..de5ca495e2e --- /dev/null +++ b/tests/queries/0_stateless/02206_information_schema_show_database.sql @@ -0,0 +1 @@ +SHOW CREATE DATABASE INFORMATION_SCHEMA; From b9f8fe922d4366ef1ead16fd8cfa78dedf74d68e Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Sun, 6 Feb 2022 09:34:12 +0800 Subject: [PATCH 082/215] add fasttest --- .../02205_postgresql_functions.reference | 100 ++++++++++++++++++ .../02205_postgresql_functions.sql | 59 +++++++++++ 2 files changed, 159 insertions(+) create mode 100644 tests/queries/0_stateless/02205_postgresql_functions.reference create mode 100644 tests/queries/0_stateless/02205_postgresql_functions.sql diff --git a/tests/queries/0_stateless/02205_postgresql_functions.reference b/tests/queries/0_stateless/02205_postgresql_functions.reference new file mode 100644 index 00000000000..7da18b92001 --- /dev/null +++ b/tests/queries/0_stateless/02205_postgresql_functions.reference @@ -0,0 +1,100 @@ +1 1 +1 1 +1 1 +1 1 +. . . 1 +. . . 1 +. . . 1 +._ .o .o 1 +. . . 1 +_. o. o. 1 +. . . 1 +_._ o.o o.o 1 +._ .o .o 1 +. . . 1 +._ .o .o 1 +._ .o .o 1 +._ .o .o 1 +_. o. o. 1 +._ .o .o 1 +_._ o.o o.o 1 +_. o. o. 1 +. . . 1 +_. o. o. 1 +._ .o .o 1 +_. o. o. 1 +_. o. o. 1 +_. o. o. 1 +_._ o.o o.o 1 +_._ o.o o.o 1 +. . . 1 +_._ o.o o.o 1 +._ .o .o 1 +_._ o.o o.o 1 +_. o. o. 1 +_._ o.o o.o 1 +_._ o.o o.o 1 +. . . 1 +. . . 1 +. . . 1 +._ .oo .oo 1 +. . . 1 +_. oo. oo. 1 +. . . 1 +_._ oo.oo oo.oo 1 +._ .oo .oo 1 +. . . 1 +._ .oo .oo 1 +._ .oo .oo 1 +._ .oo .oo 1 +_. oo. oo. 1 +._ .oo .oo 1 +_._ oo.oo oo.oo 1 +_. oo. oo. 1 +. . . 1 +_. oo. oo. 1 +._ .oo .oo 1 +_. oo. oo. 1 +_. oo. oo. 1 +_. oo. oo. 1 +_._ oo.oo oo.oo 1 +_._ oo.oo oo.oo 1 +. . . 1 +_._ oo.oo oo.oo 1 +._ .oo .oo 1 +_._ oo.oo oo.oo 1 +_. oo. oo. 1 +_._ oo.oo oo.oo 1 +_._ oo.oo oo.oo 1 +. . . 1 +. . . 1 +. . . 1 +.__ .oo .oo 1 +. . . 1 +__. oo. oo. 1 +. . . 1 +__.__ oo.oo oo.oo 1 +.__ .oo .oo 1 +. . . 1 +.__ .oo .oo 1 +.__ .oo .oo 1 +.__ .oo .oo 1 +__. oo. oo. 1 +.__ .oo .oo 1 +__.__ oo.oo oo.oo 1 +__. oo. oo. 1 +. . . 1 +__. oo. oo. 1 +.__ .oo .oo 1 +__. oo. oo. 1 +__. oo. oo. 1 +__. oo. oo. 1 +__.__ oo.oo oo.oo 1 +__.__ oo.oo oo.oo 1 +. . . 1 +__.__ oo.oo oo.oo 1 +.__ .oo .oo 1 +__.__ oo.oo oo.oo 1 +__. oo. oo. 1 +__.__ oo.oo oo.oo 1 +__.__ oo.oo oo.oo 1 diff --git a/tests/queries/0_stateless/02205_postgresql_functions.sql b/tests/queries/0_stateless/02205_postgresql_functions.sql new file mode 100644 index 00000000000..343149f5272 --- /dev/null +++ b/tests/queries/0_stateless/02205_postgresql_functions.sql @@ -0,0 +1,59 @@ +--- REGEXP_MATCHES +select match('a key="v" ', 'key="(.*?)"'), REGEXP_MATCHES('a key="v" ', 'key="(.*?)"'); +select match(materialize('a key="v" '), 'key="(.*?)"'), REGEXP_MATCHES(materialize('a key="v" '), 'key="(.*?)"'); + +select match('\0 key="v" ', 'key="(.*?)"'), REGEXP_MATCHES('\0 key="v" ', 'key="(.*?)"'); +select match(materialize('\0 key="v" '), 'key="(.*?)"'), REGEXP_MATCHES(materialize('\0 key="v" '), 'key="(.*?)"'); + + +--- REGEXP_REPLACE +SELECT s, replaceAll(s, '_', 'o') AS a, REGEXP_REPLACE(s, '_', 'o') AS b, a = b FROM (SELECT arrayJoin(['.', '.']) AS s); +SELECT s, replaceAll(s, '_', 'o') AS a, REGEXP_REPLACE(s, '_', 'o') AS b, a = b FROM (SELECT arrayJoin(['.', '._']) AS s); +SELECT s, replaceAll(s, '_', 'o') AS a, REGEXP_REPLACE(s, '_', 'o') AS b, a = b FROM (SELECT arrayJoin(['.', '_.']) AS s); +SELECT s, replaceAll(s, '_', 'o') AS a, REGEXP_REPLACE(s, '_', 'o') AS b, a = b FROM (SELECT arrayJoin(['.', '_._']) AS s); +SELECT s, replaceAll(s, '_', 'o') AS a, REGEXP_REPLACE(s, '_', 'o') AS b, a = b FROM (SELECT arrayJoin(['._', '.']) AS s); +SELECT s, replaceAll(s, '_', 'o') AS a, REGEXP_REPLACE(s, '_', 'o') AS b, a = b FROM (SELECT arrayJoin(['._', '._']) AS s); +SELECT s, replaceAll(s, '_', 'o') AS a, REGEXP_REPLACE(s, '_', 'o') AS b, a = b FROM (SELECT arrayJoin(['._', '_.']) AS s); +SELECT s, replaceAll(s, '_', 'o') AS a, REGEXP_REPLACE(s, '_', 'o') AS b, a = b FROM (SELECT arrayJoin(['._', '_._']) AS s); +SELECT s, replaceAll(s, '_', 'o') AS a, REGEXP_REPLACE(s, '_', 'o') AS b, a = b FROM (SELECT arrayJoin(['_.', '.']) AS s); +SELECT s, replaceAll(s, '_', 'o') AS a, REGEXP_REPLACE(s, '_', 'o') AS b, a = b FROM (SELECT arrayJoin(['_.', '._']) AS s); +SELECT s, replaceAll(s, '_', 'o') AS a, REGEXP_REPLACE(s, '_', 'o') AS b, a = b FROM (SELECT arrayJoin(['_.', '_.']) AS s); +SELECT s, replaceAll(s, '_', 'o') AS a, REGEXP_REPLACE(s, '_', 'o') AS b, a = b FROM (SELECT arrayJoin(['_.', '_._']) AS s); +SELECT s, replaceAll(s, '_', 'o') AS a, REGEXP_REPLACE(s, '_', 'o') AS b, a = b FROM (SELECT arrayJoin(['_._', '.']) AS s); +SELECT s, replaceAll(s, '_', 'o') AS a, REGEXP_REPLACE(s, '_', 'o') AS b, a = b FROM (SELECT arrayJoin(['_._', '._']) AS s); +SELECT s, replaceAll(s, '_', 'o') AS a, REGEXP_REPLACE(s, '_', 'o') AS b, a = b FROM (SELECT arrayJoin(['_._', '_.']) AS s); +SELECT s, replaceAll(s, '_', 'o') AS a, REGEXP_REPLACE(s, '_', 'o') AS b, a = b FROM (SELECT arrayJoin(['_._', '_._']) AS s); + +SELECT s, replaceAll(s, '_', 'oo') AS a, REGEXP_REPLACE(s, '_', 'oo') AS b, a = b FROM (SELECT arrayJoin(['.', '.']) AS s); +SELECT s, replaceAll(s, '_', 'oo') AS a, REGEXP_REPLACE(s, '_', 'oo') AS b, a = b FROM (SELECT arrayJoin(['.', '._']) AS s); +SELECT s, replaceAll(s, '_', 'oo') AS a, REGEXP_REPLACE(s, '_', 'oo') AS b, a = b FROM (SELECT arrayJoin(['.', '_.']) AS s); +SELECT s, replaceAll(s, '_', 'oo') AS a, REGEXP_REPLACE(s, '_', 'oo') AS b, a = b FROM (SELECT arrayJoin(['.', '_._']) AS s); +SELECT s, replaceAll(s, '_', 'oo') AS a, REGEXP_REPLACE(s, '_', 'oo') AS b, a = b FROM (SELECT arrayJoin(['._', '.']) AS s); +SELECT s, replaceAll(s, '_', 'oo') AS a, REGEXP_REPLACE(s, '_', 'oo') AS b, a = b FROM (SELECT arrayJoin(['._', '._']) AS s); +SELECT s, replaceAll(s, '_', 'oo') AS a, REGEXP_REPLACE(s, '_', 'oo') AS b, a = b FROM (SELECT arrayJoin(['._', '_.']) AS s); +SELECT s, replaceAll(s, '_', 'oo') AS a, REGEXP_REPLACE(s, '_', 'oo') AS b, a = b FROM (SELECT arrayJoin(['._', '_._']) AS s); +SELECT s, replaceAll(s, '_', 'oo') AS a, REGEXP_REPLACE(s, '_', 'oo') AS b, a = b FROM (SELECT arrayJoin(['_.', '.']) AS s); +SELECT s, replaceAll(s, '_', 'oo') AS a, REGEXP_REPLACE(s, '_', 'oo') AS b, a = b FROM (SELECT arrayJoin(['_.', '._']) AS s); +SELECT s, replaceAll(s, '_', 'oo') AS a, REGEXP_REPLACE(s, '_', 'oo') AS b, a = b FROM (SELECT arrayJoin(['_.', '_.']) AS s); +SELECT s, replaceAll(s, '_', 'oo') AS a, REGEXP_REPLACE(s, '_', 'oo') AS b, a = b FROM (SELECT arrayJoin(['_.', '_._']) AS s); +SELECT s, replaceAll(s, '_', 'oo') AS a, REGEXP_REPLACE(s, '_', 'oo') AS b, a = b FROM (SELECT arrayJoin(['_._', '.']) AS s); +SELECT s, replaceAll(s, '_', 'oo') AS a, REGEXP_REPLACE(s, '_', 'oo') AS b, a = b FROM (SELECT arrayJoin(['_._', '._']) AS s); +SELECT s, replaceAll(s, '_', 'oo') AS a, REGEXP_REPLACE(s, '_', 'oo') AS b, a = b FROM (SELECT arrayJoin(['_._', '_.']) AS s); +SELECT s, replaceAll(s, '_', 'oo') AS a, REGEXP_REPLACE(s, '_', 'oo') AS b, a = b FROM (SELECT arrayJoin(['_._', '_._']) AS s); + +SELECT s, replaceAll(s, '_', 'o') AS a, REGEXP_REPLACE(s, '_', 'o') AS b, a = b FROM (SELECT arrayJoin(['.', '.']) AS s); +SELECT s, replaceAll(s, '_', 'o') AS a, REGEXP_REPLACE(s, '_', 'o') AS b, a = b FROM (SELECT arrayJoin(['.', '.__']) AS s); +SELECT s, replaceAll(s, '_', 'o') AS a, REGEXP_REPLACE(s, '_', 'o') AS b, a = b FROM (SELECT arrayJoin(['.', '__.']) AS s); +SELECT s, replaceAll(s, '_', 'o') AS a, REGEXP_REPLACE(s, '_', 'o') AS b, a = b FROM (SELECT arrayJoin(['.', '__.__']) AS s); +SELECT s, replaceAll(s, '_', 'o') AS a, REGEXP_REPLACE(s, '_', 'o') AS b, a = b FROM (SELECT arrayJoin(['.__', '.']) AS s); +SELECT s, replaceAll(s, '_', 'o') AS a, REGEXP_REPLACE(s, '_', 'o') AS b, a = b FROM (SELECT arrayJoin(['.__', '.__']) AS s); +SELECT s, replaceAll(s, '_', 'o') AS a, REGEXP_REPLACE(s, '_', 'o') AS b, a = b FROM (SELECT arrayJoin(['.__', '__.']) AS s); +SELECT s, replaceAll(s, '_', 'o') AS a, REGEXP_REPLACE(s, '_', 'o') AS b, a = b FROM (SELECT arrayJoin(['.__', '__.__']) AS s); +SELECT s, replaceAll(s, '_', 'o') AS a, REGEXP_REPLACE(s, '_', 'o') AS b, a = b FROM (SELECT arrayJoin(['__.', '.']) AS s); +SELECT s, replaceAll(s, '_', 'o') AS a, REGEXP_REPLACE(s, '_', 'o') AS b, a = b FROM (SELECT arrayJoin(['__.', '.__']) AS s); +SELECT s, replaceAll(s, '_', 'o') AS a, REGEXP_REPLACE(s, '_', 'o') AS b, a = b FROM (SELECT arrayJoin(['__.', '__.']) AS s); +SELECT s, replaceAll(s, '_', 'o') AS a, REGEXP_REPLACE(s, '_', 'o') AS b, a = b FROM (SELECT arrayJoin(['__.', '__.__']) AS s); +SELECT s, replaceAll(s, '_', 'o') AS a, REGEXP_REPLACE(s, '_', 'o') AS b, a = b FROM (SELECT arrayJoin(['__.__', '.']) AS s); +SELECT s, replaceAll(s, '_', 'o') AS a, REGEXP_REPLACE(s, '_', 'o') AS b, a = b FROM (SELECT arrayJoin(['__.__', '.__']) AS s); +SELECT s, replaceAll(s, '_', 'o') AS a, REGEXP_REPLACE(s, '_', 'o') AS b, a = b FROM (SELECT arrayJoin(['__.__', '__.']) AS s); +SELECT s, replaceAll(s, '_', 'o') AS a, REGEXP_REPLACE(s, '_', 'o') AS b, a = b FROM (SELECT arrayJoin(['__.__', '__.__']) AS s); From 5fc2e46dd032311c0ec009685e49bd58717edf06 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Sun, 6 Feb 2022 09:38:05 +0800 Subject: [PATCH 083/215] make aliases case insensitive --- src/Functions/match.cpp | 2 +- src/Functions/replaceAll.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Functions/match.cpp b/src/Functions/match.cpp index 7561ffc93c0..69dc1a3d99a 100644 --- a/src/Functions/match.cpp +++ b/src/Functions/match.cpp @@ -20,7 +20,7 @@ using FunctionMatch = FunctionsStringSearch>; void registerFunctionMatch(FunctionFactory & factory) { factory.registerFunction(); - factory.registerAlias("REGEXP_MATCHES", NameMatch::name); + factory.registerAlias("REGEXP_MATCHES", NameMatch::name, FunctionFactory::CaseInsensitive); } } diff --git a/src/Functions/replaceAll.cpp b/src/Functions/replaceAll.cpp index 1cd58124125..25a5b33c3a0 100644 --- a/src/Functions/replaceAll.cpp +++ b/src/Functions/replaceAll.cpp @@ -21,7 +21,7 @@ void registerFunctionReplaceAll(FunctionFactory & factory) { factory.registerFunction(); factory.registerAlias("replace", NameReplaceAll::name, FunctionFactory::CaseInsensitive); - factory.registerAlias("REGEXP_REPLACE", NameReplaceAll::name); + factory.registerAlias("REGEXP_REPLACE", NameReplaceAll::name, FunctionFactory::CaseInsensitive); } } From 9f23caab0ee12d393cef4f3a6a31994507839d3b Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sun, 6 Feb 2022 04:44:30 +0300 Subject: [PATCH 084/215] Update clickhouse-server.service --- debian/clickhouse-server.service | 1 + 1 file changed, 1 insertion(+) diff --git a/debian/clickhouse-server.service b/debian/clickhouse-server.service index 3bdec80632f..a9400b24270 100644 --- a/debian/clickhouse-server.service +++ b/debian/clickhouse-server.service @@ -16,6 +16,7 @@ Restart=always RestartSec=30 RuntimeDirectory=clickhouse-server ExecStart=/usr/bin/clickhouse-server --config=/etc/clickhouse-server/config.xml --pid-file=/run/clickhouse-server/clickhouse-server.pid +# Minus means that this file is optional. EnvironmentFile=-/etc/default/clickhouse LimitCORE=infinity LimitNOFILE=500000 From 008af0e0c295dc808e1a2e5835f0a3d20c5fca89 Mon Sep 17 00:00:00 2001 From: bharatnc Date: Sat, 5 Feb 2022 19:07:25 -0800 Subject: [PATCH 085/215] update src/Functions/geoToS2.cpp --- src/Functions/geoToS2.cpp | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/src/Functions/geoToS2.cpp b/src/Functions/geoToS2.cpp index 32d2a1d7a10..d69c15bdbe0 100644 --- a/src/Functions/geoToS2.cpp +++ b/src/Functions/geoToS2.cpp @@ -19,6 +19,7 @@ namespace DB namespace ErrorCodes { extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int ILLEGAL_COLUMN; } namespace @@ -66,8 +67,25 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { - const auto * col_lon = arguments[0].column.get(); - const auto * col_lat = arguments[1].column.get(); + const auto * col_lon = checkAndGetColumn(arguments[0].column.get()); + if (!col_lon) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal type {} of argument {} of function {}. Must be Float64", + arguments[0].type->getName(), + 1, + getName()); + const auto & data_col_lon = col_lon->getData(); + + const auto * col_lat = checkAndGetColumn(arguments[1].column.get()); + if (!col_lat) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal type {} of argument {} of function {}. Must be Float64", + arguments[0].type->getName(), + 2, + getName()); + const auto & data_col_lat = col_lat->getData(); auto dst = ColumnVector::create(); auto & dst_data = dst->getData(); @@ -75,16 +93,14 @@ public: for (size_t row = 0; row < input_rows_count; ++row) { - const Float64 lon = col_lon->getFloat64(row); - const Float64 lat = col_lat->getFloat64(row); + const Float64 lon = data_col_lon[row]; + const Float64 lat = data_col_lat[row]; if (isNaN(lon) || isNaN(lat)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Arguments must not be NaN"); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Arguments must not be NaN"); if (!(isFinite(lon) && isFinite(lat))) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Arguments must not be infinite"); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Arguments must not be infinite"); /// S2 acceptes point as (latitude, longitude) S2LatLng lat_lng = S2LatLng::FromDegrees(lat, lon); @@ -95,7 +111,6 @@ public: return dst; } - }; } @@ -105,7 +120,6 @@ void registerFunctionGeoToS2(FunctionFactory & factory) factory.registerFunction(); } - } #endif From d666b2250c82f7c73d45a5f0a27c82ee953ed219 Mon Sep 17 00:00:00 2001 From: bharatnc Date: Sat, 5 Feb 2022 19:14:07 -0800 Subject: [PATCH 086/215] fix failing tests --- src/Functions/s2GetNeighbors.cpp | 10 ++++++---- src/Functions/s2ToGeo.cpp | 4 ++-- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/Functions/s2GetNeighbors.cpp b/src/Functions/s2GetNeighbors.cpp index 0d40ff2d61c..32eaff90740 100644 --- a/src/Functions/s2GetNeighbors.cpp +++ b/src/Functions/s2GetNeighbors.cpp @@ -57,19 +57,21 @@ public: if (!WhichDataType(arg).isUInt64()) throw Exception( ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Illegal type {} of argument {} of function {}. Must be Float64", - arg->getName(), 1, getName()); + "Illegal type {} of argument {} of function {}. Must be UInt64", + arg->getName(), + 1, + getName()); return std::make_shared(std::make_shared()); } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { - const auto * col_id = checkAndGetColumn(arguments[0].column.get()); + const auto * col_id = checkAndGetColumn(arguments[0].column.get()); if (!col_id) throw Exception( ErrorCodes::ILLEGAL_COLUMN, - "Illegal type {} of argument {} of function {}. Must be Float64", + "Illegal type {} of argument {} of function {}. Must be UInt64", arguments[0].type->getName(), 1, getName()); diff --git a/src/Functions/s2ToGeo.cpp b/src/Functions/s2ToGeo.cpp index e26a26909ab..3d11c21a353 100644 --- a/src/Functions/s2ToGeo.cpp +++ b/src/Functions/s2ToGeo.cpp @@ -58,7 +58,7 @@ public: if (!WhichDataType(arg).isUInt64()) throw Exception( ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Illegal type {} of argument {} of function {}. Must be Float64", + "Illegal type {} of argument {} of function {}. Must be UInt64", arg->getName(), 1, getName()); DataTypePtr element = std::make_shared(); @@ -68,7 +68,7 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { - const auto * col_id = checkAndGetColumn(arguments[0].column.get()); + const auto * col_id = checkAndGetColumn(arguments[0].column.get()); if (!col_id) throw Exception( ErrorCodes::ILLEGAL_COLUMN, From caed1898b09a48b14ccab4c73365b97e9e4b8c0e Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Sun, 6 Feb 2022 11:22:05 +0800 Subject: [PATCH 087/215] add options for clickhouse-format --- programs/format/Format.cpp | 8 ++++++-- src/Core/Settings.cpp | 26 +++++++++++++++++++------- src/Core/Settings.h | 10 ++++++++++ 3 files changed, 35 insertions(+), 9 deletions(-) diff --git a/programs/format/Format.cpp b/programs/format/Format.cpp index 4b0e8ad1ca1..9c756a27915 100644 --- a/programs/format/Format.cpp +++ b/programs/format/Format.cpp @@ -57,8 +57,12 @@ int mainEntryClickHouseFormat(int argc, char ** argv) ("seed", po::value(), "seed (arbitrary string) that determines the result of obfuscation") ; + Settings cmd_settings; + cmd_settings.addFormatOptions(desc); + boost::program_options::variables_map options; boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), options); + po::notify(options); if (options.count("help")) { @@ -149,7 +153,8 @@ int mainEntryClickHouseFormat(int argc, char ** argv) ParserQuery parser(end); do { - ASTPtr res = parseQueryAndMovePosition(parser, pos, end, "query", multiple, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + ASTPtr res = parseQueryAndMovePosition( + parser, pos, end, "query", multiple, cmd_settings.max_query_size, cmd_settings.max_parser_depth); /// For insert query with data(INSERT INTO ... VALUES ...), will lead to format fail, /// should throw exception early and make exception message more readable. if (const auto * insert_query = res->as(); insert_query && insert_query->data) @@ -222,6 +227,5 @@ int mainEntryClickHouseFormat(int argc, char ** argv) std::cerr << getCurrentExceptionMessage(true) << '\n'; return getCurrentExceptionCode(); } - return 0; } diff --git a/src/Core/Settings.cpp b/src/Core/Settings.cpp index 8daf39d9928..772cadab3fc 100644 --- a/src/Core/Settings.cpp +++ b/src/Core/Settings.cpp @@ -85,16 +85,28 @@ void Settings::addProgramOptions(boost::program_options::options_description & o { for (const auto & field : all()) { - const std::string_view name = field.getName(); - auto on_program_option - = boost::function1([this, name](const std::string & value) { set(name, value); }); - options.add(boost::shared_ptr(new boost::program_options::option_description( - name.data(), - boost::program_options::value()->composing()->notifier(on_program_option), - field.getDescription()))); + addProgramOption(options, field); } } +void Settings::addFormatOptions(boost::program_options::options_description & options) +{ + for (const auto & field : all()) + { + const auto & name = field.getName(); + if (formatSettingNames.count(name)) + addProgramOption(options, field); + } +} + +void Settings::addProgramOption(boost::program_options::options_description & options, const SettingFieldRef & field) +{ + const std::string_view name = field.getName(); + auto on_program_option = boost::function1([this, name](const std::string & value) { set(name, value); }); + options.add(boost::shared_ptr(new boost::program_options::option_description( + name.data(), boost::program_options::value()->composing()->notifier(on_program_option), field.getDescription()))); +} + void Settings::checkNoSettingNamesAtTopLevel(const Poco::Util::AbstractConfiguration & config, const String & config_path) { if (config.getBool("skip_check_for_incorrect_settings", false)) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 48dd637a943..f46066a426f 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -712,11 +712,21 @@ struct Settings : public BaseSettings, public IHints<2, Settings /// (Don't forget to call notify() on the `variables_map` after parsing it!) void addProgramOptions(boost::program_options::options_description & options); + /// Adds program options for clickhouse-format to set the settings from a command line. + /// (Don't forget to call notify() on the `variables_map` after parsing it!) + void addFormatOptions(boost::program_options::options_description & options); + /// Check that there is no user-level settings at the top level in config. /// This is a common source of mistake (user don't know where to write user-level setting). static void checkNoSettingNamesAtTopLevel(const Poco::Util::AbstractConfiguration & config, const String & config_path); std::vector getAllRegisteredNames() const override; + +private: + void addProgramOption(boost::program_options::options_description & options, const SettingFieldRef & field); + + inline static const std::unordered_set formatSettingNames + = {"max_parser_depth", "max_query_size"}; }; /* From f98010e37436acac69c39c5b7ce041f4d919623b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 6 Feb 2022 07:14:01 +0300 Subject: [PATCH 088/215] Small improvements --- src/Processors/Formats/IRowOutputFormat.cpp | 5 ++--- src/Processors/Formats/IRowOutputFormat.h | 1 + src/Processors/Formats/Impl/AvroRowInputFormat.h | 4 ++-- src/Processors/Formats/Impl/AvroRowOutputFormat.h | 2 +- src/Processors/Formats/Impl/BinaryRowInputFormat.h | 4 ++-- src/Processors/Formats/Impl/BinaryRowOutputFormat.h | 2 +- src/Processors/Formats/Impl/CSVRowOutputFormat.h | 2 +- src/Processors/Formats/Impl/CapnProtoRowInputFormat.h | 2 +- src/Processors/Formats/Impl/CapnProtoRowOutputFormat.h | 2 +- src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h | 4 ++-- .../Formats/Impl/CustomSeparatedRowOutputFormat.h | 2 +- src/Processors/Formats/Impl/HiveTextRowInputFormat.h | 4 ++-- src/Processors/Formats/Impl/JSONAsStringRowInputFormat.h | 2 +- .../Formats/Impl/JSONCompactEachRowRowInputFormat.h | 4 ++-- .../Formats/Impl/JSONCompactEachRowRowOutputFormat.h | 2 +- src/Processors/Formats/Impl/JSONCompactRowOutputFormat.h | 2 +- src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h | 2 +- .../Formats/Impl/JSONEachRowWithProgressRowOutputFormat.h | 2 +- src/Processors/Formats/Impl/LineAsStringRowInputFormat.h | 2 +- src/Processors/Formats/Impl/MarkdownRowOutputFormat.h | 2 +- src/Processors/Formats/Impl/MsgPackRowInputFormat.h | 2 +- src/Processors/Formats/Impl/MsgPackRowOutputFormat.h | 2 +- src/Processors/Formats/Impl/ProtobufRowInputFormat.h | 2 +- src/Processors/Formats/Impl/ProtobufRowOutputFormat.h | 2 +- src/Processors/Formats/Impl/RawBLOBRowInputFormat.h | 3 +-- src/Processors/Formats/Impl/RawBLOBRowOutputFormat.h | 3 +-- src/Processors/Formats/Impl/RegexpRowInputFormat.h | 2 +- src/Processors/Formats/Impl/TSKVRowInputFormat.h | 2 +- src/Processors/Formats/Impl/TSKVRowOutputFormat.h | 2 +- src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h | 4 ++-- src/Processors/Formats/Impl/TabSeparatedRowOutputFormat.h | 6 +++--- src/Processors/Formats/Impl/TemplateRowInputFormat.h | 2 +- src/Processors/Formats/Impl/ValuesRowOutputFormat.h | 2 +- src/Processors/Formats/Impl/VerticalRowOutputFormat.h | 2 +- src/Processors/Formats/Impl/XMLRowOutputFormat.h | 2 +- 35 files changed, 44 insertions(+), 46 deletions(-) diff --git a/src/Processors/Formats/IRowOutputFormat.cpp b/src/Processors/Formats/IRowOutputFormat.cpp index 33777bed519..b48c4a2b3e6 100644 --- a/src/Processors/Formats/IRowOutputFormat.cpp +++ b/src/Processors/Formats/IRowOutputFormat.cpp @@ -15,7 +15,8 @@ IRowOutputFormat::IRowOutputFormat(const Block & header, WriteBuffer & out_, con , types(header.getDataTypes()) , params(params_) { - serializations.reserve(types.size()); + num_columns = types.size(); + serializations.reserve(num_columns); for (const auto & type : types) serializations.push_back(type->getDefaultSerialization()); } @@ -68,8 +69,6 @@ void IRowOutputFormat::consumeExtremes(DB::Chunk chunk) void IRowOutputFormat::write(const Columns & columns, size_t row_num) { - size_t num_columns = columns.size(); - writeRowStartDelimiter(); for (size_t i = 0; i < num_columns; ++i) diff --git a/src/Processors/Formats/IRowOutputFormat.h b/src/Processors/Formats/IRowOutputFormat.h index 7f00313ce2d..7a57753d765 100644 --- a/src/Processors/Formats/IRowOutputFormat.h +++ b/src/Processors/Formats/IRowOutputFormat.h @@ -57,6 +57,7 @@ protected: virtual void writeAfterExtremes() {} virtual void finalizeImpl() override {} /// Write something after resultset, totals end extremes. + size_t num_columns; DataTypes types; Serializations serializations; Params params; diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.h b/src/Processors/Formats/Impl/AvroRowInputFormat.h index 46e571d87ec..1e8ee4aebb9 100644 --- a/src/Processors/Formats/Impl/AvroRowInputFormat.h +++ b/src/Processors/Formats/Impl/AvroRowInputFormat.h @@ -115,7 +115,7 @@ private: std::map symbolic_skip_fn_map; }; -class AvroRowInputFormat : public IRowInputFormat +class AvroRowInputFormat final : public IRowInputFormat { public: AvroRowInputFormat(const Block & header_, ReadBuffer & in_, Params params_, const FormatSettings & format_settings_); @@ -137,7 +137,7 @@ private: /// 2. SchemaRegistry: schema cache (schema_id -> schema) /// 3. AvroConfluentRowInputFormat: deserializer cache (schema_id -> AvroDeserializer) /// This is needed because KafkaStorage creates a new instance of InputFormat per a batch of messages -class AvroConfluentRowInputFormat : public IRowInputFormat +class AvroConfluentRowInputFormat final : public IRowInputFormat { public: AvroConfluentRowInputFormat(const Block & header_, ReadBuffer & in_, Params params_, const FormatSettings & format_settings_); diff --git a/src/Processors/Formats/Impl/AvroRowOutputFormat.h b/src/Processors/Formats/Impl/AvroRowOutputFormat.h index b07edd88ae1..0a2acc93688 100644 --- a/src/Processors/Formats/Impl/AvroRowOutputFormat.h +++ b/src/Processors/Formats/Impl/AvroRowOutputFormat.h @@ -43,7 +43,7 @@ private: std::unique_ptr traits; }; -class AvroRowOutputFormat : public IRowOutputFormat +class AvroRowOutputFormat final : public IRowOutputFormat { public: AvroRowOutputFormat(WriteBuffer & out_, const Block & header_, const RowOutputFormatParams & params_, const FormatSettings & settings_); diff --git a/src/Processors/Formats/Impl/BinaryRowInputFormat.h b/src/Processors/Formats/Impl/BinaryRowInputFormat.h index d98e75bf621..ff7cc013cee 100644 --- a/src/Processors/Formats/Impl/BinaryRowInputFormat.h +++ b/src/Processors/Formats/Impl/BinaryRowInputFormat.h @@ -17,7 +17,7 @@ class ReadBuffer; /** A stream for inputting data in a binary line-by-line format. */ -class BinaryRowInputFormat : public RowInputFormatWithNamesAndTypes +class BinaryRowInputFormat final : public RowInputFormatWithNamesAndTypes { public: BinaryRowInputFormat(ReadBuffer & in_, Block header, Params params_, bool with_names_, bool with_types_, const FormatSettings & format_settings_); @@ -30,7 +30,7 @@ public: std::string getDiagnosticInfo() override { return {}; } }; -class BinaryFormatReader : public FormatWithNamesAndTypesReader +class BinaryFormatReader final : public FormatWithNamesAndTypesReader { public: BinaryFormatReader(ReadBuffer & in_, const FormatSettings & format_settings_); diff --git a/src/Processors/Formats/Impl/BinaryRowOutputFormat.h b/src/Processors/Formats/Impl/BinaryRowOutputFormat.h index 0edfd4bfcf8..40894608677 100644 --- a/src/Processors/Formats/Impl/BinaryRowOutputFormat.h +++ b/src/Processors/Formats/Impl/BinaryRowOutputFormat.h @@ -14,7 +14,7 @@ class WriteBuffer; /** A stream for outputting data in a binary line-by-line format. */ -class BinaryRowOutputFormat: public IRowOutputFormat +class BinaryRowOutputFormat final: public IRowOutputFormat { public: BinaryRowOutputFormat(WriteBuffer & out_, const Block & header, bool with_names_, bool with_types_, const RowOutputFormatParams & params_); diff --git a/src/Processors/Formats/Impl/CSVRowOutputFormat.h b/src/Processors/Formats/Impl/CSVRowOutputFormat.h index dd9c2179f19..a36c5ff47fb 100644 --- a/src/Processors/Formats/Impl/CSVRowOutputFormat.h +++ b/src/Processors/Formats/Impl/CSVRowOutputFormat.h @@ -14,7 +14,7 @@ class WriteBuffer; /** The stream for outputting data in csv format. * Does not conform with https://tools.ietf.org/html/rfc4180 because it uses LF, not CR LF. */ -class CSVRowOutputFormat : public IRowOutputFormat +class CSVRowOutputFormat final : public IRowOutputFormat { public: /** with_names - output in the first line a header with column names diff --git a/src/Processors/Formats/Impl/CapnProtoRowInputFormat.h b/src/Processors/Formats/Impl/CapnProtoRowInputFormat.h index 053de14d1a4..a8aa6ccda05 100644 --- a/src/Processors/Formats/Impl/CapnProtoRowInputFormat.h +++ b/src/Processors/Formats/Impl/CapnProtoRowInputFormat.h @@ -20,7 +20,7 @@ class ReadBuffer; * The schema in this case cannot be compiled in, so it uses a runtime schema parser. * See https://capnproto.org/cxx.html */ -class CapnProtoRowInputFormat : public IRowInputFormat +class CapnProtoRowInputFormat final : public IRowInputFormat { public: CapnProtoRowInputFormat(ReadBuffer & in_, Block header, Params params_, const FormatSchemaInfo & info, const FormatSettings & format_settings_); diff --git a/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.h b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.h index 6e27426f2cc..288b36508ce 100644 --- a/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.h +++ b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.h @@ -23,7 +23,7 @@ private: WriteBuffer & out; }; -class CapnProtoRowOutputFormat : public IRowOutputFormat +class CapnProtoRowOutputFormat final : public IRowOutputFormat { public: CapnProtoRowOutputFormat( diff --git a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h index d38d5bf0da4..a2f4509d307 100644 --- a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h +++ b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h @@ -8,7 +8,7 @@ namespace DB { -class CustomSeparatedRowInputFormat : public RowInputFormatWithNamesAndTypes +class CustomSeparatedRowInputFormat final : public RowInputFormatWithNamesAndTypes { public: CustomSeparatedRowInputFormat( @@ -35,7 +35,7 @@ private: bool ignore_spaces; }; -class CustomSeparatedFormatReader : public FormatWithNamesAndTypesReader +class CustomSeparatedFormatReader final : public FormatWithNamesAndTypesReader { public: CustomSeparatedFormatReader(PeekableReadBuffer & buf_, bool ignore_spaces_, const FormatSettings & format_settings_); diff --git a/src/Processors/Formats/Impl/CustomSeparatedRowOutputFormat.h b/src/Processors/Formats/Impl/CustomSeparatedRowOutputFormat.h index 274df1af330..0e04764b993 100644 --- a/src/Processors/Formats/Impl/CustomSeparatedRowOutputFormat.h +++ b/src/Processors/Formats/Impl/CustomSeparatedRowOutputFormat.h @@ -8,7 +8,7 @@ namespace DB class WriteBuffer; -class CustomSeparatedRowOutputFormat : public IRowOutputFormat +class CustomSeparatedRowOutputFormat final : public IRowOutputFormat { public: CustomSeparatedRowOutputFormat(const Block & header_, WriteBuffer & out_, const RowOutputFormatParams & params_, const FormatSettings & format_settings_, bool with_names_, bool with_types_); diff --git a/src/Processors/Formats/Impl/HiveTextRowInputFormat.h b/src/Processors/Formats/Impl/HiveTextRowInputFormat.h index 3b37078acb3..8a7bee45e59 100644 --- a/src/Processors/Formats/Impl/HiveTextRowInputFormat.h +++ b/src/Processors/Formats/Impl/HiveTextRowInputFormat.h @@ -12,7 +12,7 @@ namespace DB /// A stream for input data in Hive Text format. /// Parallel parsing is disabled currently. -class HiveTextRowInputFormat : public CSVRowInputFormat +class HiveTextRowInputFormat final : public CSVRowInputFormat { public: HiveTextRowInputFormat(const Block & header_, ReadBuffer & in_, const Params & params_, const FormatSettings & format_settings_); @@ -24,7 +24,7 @@ private: const Block & header_, std::unique_ptr buf_, const Params & params_, const FormatSettings & format_settings_); }; -class HiveTextFormatReader : public CSVFormatReader +class HiveTextFormatReader final : public CSVFormatReader { public: HiveTextFormatReader(std::unique_ptr buf_, const FormatSettings & format_settings_); diff --git a/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.h b/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.h index ea6e9a1ed2f..9979a5d1474 100644 --- a/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.h +++ b/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.h @@ -15,7 +15,7 @@ class ReadBuffer; /// Each JSON object is parsed as a whole to string. /// This format can only parse a table with single field of type String. -class JSONAsStringRowInputFormat : public IRowInputFormat +class JSONAsStringRowInputFormat final : public IRowInputFormat { public: JSONAsStringRowInputFormat(const Block & header_, ReadBuffer & in_, Params params_); diff --git a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h index efa0604fc6c..79c76214774 100644 --- a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h +++ b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h @@ -19,7 +19,7 @@ class ReadBuffer; * - JSONCompactStringsEachRowWithNamesAndTypes * */ -class JSONCompactEachRowRowInputFormat : public RowInputFormatWithNamesAndTypes +class JSONCompactEachRowRowInputFormat final : public RowInputFormatWithNamesAndTypes { public: JSONCompactEachRowRowInputFormat( @@ -38,7 +38,7 @@ private: void syncAfterError() override; }; -class JSONCompactEachRowFormatReader : public FormatWithNamesAndTypesReader +class JSONCompactEachRowFormatReader final : public FormatWithNamesAndTypesReader { public: JSONCompactEachRowFormatReader(ReadBuffer & in_, bool yield_strings_, const FormatSettings & format_settings_); diff --git a/src/Processors/Formats/Impl/JSONCompactEachRowRowOutputFormat.h b/src/Processors/Formats/Impl/JSONCompactEachRowRowOutputFormat.h index 6cb78bab49d..63e9e9f1b76 100644 --- a/src/Processors/Formats/Impl/JSONCompactEachRowRowOutputFormat.h +++ b/src/Processors/Formats/Impl/JSONCompactEachRowRowOutputFormat.h @@ -12,7 +12,7 @@ namespace DB /** The stream for outputting data in JSON format, by object per line. * Does not validate UTF-8. */ -class JSONCompactEachRowRowOutputFormat : public IRowOutputFormat +class JSONCompactEachRowRowOutputFormat final : public IRowOutputFormat { public: JSONCompactEachRowRowOutputFormat( diff --git a/src/Processors/Formats/Impl/JSONCompactRowOutputFormat.h b/src/Processors/Formats/Impl/JSONCompactRowOutputFormat.h index 961bd569d39..a0e9a2a6026 100644 --- a/src/Processors/Formats/Impl/JSONCompactRowOutputFormat.h +++ b/src/Processors/Formats/Impl/JSONCompactRowOutputFormat.h @@ -13,7 +13,7 @@ struct FormatSettings; /** The stream for outputting data in the JSONCompact- formats. */ -class JSONCompactRowOutputFormat : public JSONRowOutputFormat +class JSONCompactRowOutputFormat final : public JSONRowOutputFormat { public: JSONCompactRowOutputFormat( diff --git a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h index c711d3ef246..29aba696411 100644 --- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h +++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h @@ -18,7 +18,7 @@ class ReadBuffer; * Fields can be listed in any order (including, in different lines there may be different order), * and some fields may be missing. */ -class JSONEachRowRowInputFormat : public IRowInputFormat +class JSONEachRowRowInputFormat final : public IRowInputFormat { public: JSONEachRowRowInputFormat( diff --git a/src/Processors/Formats/Impl/JSONEachRowWithProgressRowOutputFormat.h b/src/Processors/Formats/Impl/JSONEachRowWithProgressRowOutputFormat.h index fe74f7ce7a3..6bdf27a472e 100644 --- a/src/Processors/Formats/Impl/JSONEachRowWithProgressRowOutputFormat.h +++ b/src/Processors/Formats/Impl/JSONEachRowWithProgressRowOutputFormat.h @@ -5,7 +5,7 @@ namespace DB { -class JSONEachRowWithProgressRowOutputFormat : public JSONEachRowRowOutputFormat +class JSONEachRowWithProgressRowOutputFormat final : public JSONEachRowRowOutputFormat { public: using JSONEachRowRowOutputFormat::JSONEachRowRowOutputFormat; diff --git a/src/Processors/Formats/Impl/LineAsStringRowInputFormat.h b/src/Processors/Formats/Impl/LineAsStringRowInputFormat.h index c4c17c47dbe..080ff9985af 100644 --- a/src/Processors/Formats/Impl/LineAsStringRowInputFormat.h +++ b/src/Processors/Formats/Impl/LineAsStringRowInputFormat.h @@ -14,7 +14,7 @@ class ReadBuffer; /// Each Line object is parsed as a whole to string. /// This format can only parse a table with single field of type String. -class LineAsStringRowInputFormat : public IRowInputFormat +class LineAsStringRowInputFormat final : public IRowInputFormat { public: LineAsStringRowInputFormat(const Block & header_, ReadBuffer & in_, Params params_); diff --git a/src/Processors/Formats/Impl/MarkdownRowOutputFormat.h b/src/Processors/Formats/Impl/MarkdownRowOutputFormat.h index 7a2aaf86f7d..c6e15282780 100644 --- a/src/Processors/Formats/Impl/MarkdownRowOutputFormat.h +++ b/src/Processors/Formats/Impl/MarkdownRowOutputFormat.h @@ -9,7 +9,7 @@ namespace DB class ReadBuffer; -class MarkdownRowOutputFormat : public IRowOutputFormat +class MarkdownRowOutputFormat final : public IRowOutputFormat { public: MarkdownRowOutputFormat(WriteBuffer & out_, const Block & header_, const RowOutputFormatParams & params_, const FormatSettings & format_settings_); diff --git a/src/Processors/Formats/Impl/MsgPackRowInputFormat.h b/src/Processors/Formats/Impl/MsgPackRowInputFormat.h index dd5655c80fc..c2ad31c7c4c 100644 --- a/src/Processors/Formats/Impl/MsgPackRowInputFormat.h +++ b/src/Processors/Formats/Impl/MsgPackRowInputFormat.h @@ -55,7 +55,7 @@ private: std::stack info_stack; }; -class MsgPackRowInputFormat : public IRowInputFormat +class MsgPackRowInputFormat final : public IRowInputFormat { public: MsgPackRowInputFormat(const Block & header_, ReadBuffer & in_, Params params_); diff --git a/src/Processors/Formats/Impl/MsgPackRowOutputFormat.h b/src/Processors/Formats/Impl/MsgPackRowOutputFormat.h index 17d055818e9..19b37afed90 100644 --- a/src/Processors/Formats/Impl/MsgPackRowOutputFormat.h +++ b/src/Processors/Formats/Impl/MsgPackRowOutputFormat.h @@ -15,7 +15,7 @@ namespace DB { -class MsgPackRowOutputFormat : public IRowOutputFormat +class MsgPackRowOutputFormat final : public IRowOutputFormat { public: MsgPackRowOutputFormat(WriteBuffer & out_, const Block & header_, const RowOutputFormatParams & params_); diff --git a/src/Processors/Formats/Impl/ProtobufRowInputFormat.h b/src/Processors/Formats/Impl/ProtobufRowInputFormat.h index d7d16d36ddf..9566cb45106 100644 --- a/src/Processors/Formats/Impl/ProtobufRowInputFormat.h +++ b/src/Processors/Formats/Impl/ProtobufRowInputFormat.h @@ -26,7 +26,7 @@ class ProtobufSerializer; * INSERT INTO table FORMAT Protobuf SETTINGS format_schema = 'schema:Message' * where schema is the name of "schema.proto" file specifying protobuf schema. */ -class ProtobufRowInputFormat : public IRowInputFormat +class ProtobufRowInputFormat final : public IRowInputFormat { public: ProtobufRowInputFormat(ReadBuffer & in_, const Block & header_, const Params & params_, const FormatSchemaInfo & schema_info_, bool with_length_delimiter_); diff --git a/src/Processors/Formats/Impl/ProtobufRowOutputFormat.h b/src/Processors/Formats/Impl/ProtobufRowOutputFormat.h index 97b727842a7..43d79b4d091 100644 --- a/src/Processors/Formats/Impl/ProtobufRowOutputFormat.h +++ b/src/Processors/Formats/Impl/ProtobufRowOutputFormat.h @@ -26,7 +26,7 @@ struct FormatSettings; * SELECT * from table FORMAT Protobuf SETTINGS format_schema = 'schema:Message' * where schema is the name of "schema.proto" file specifying protobuf schema. */ -class ProtobufRowOutputFormat : public IRowOutputFormat +class ProtobufRowOutputFormat final : public IRowOutputFormat { public: ProtobufRowOutputFormat( diff --git a/src/Processors/Formats/Impl/RawBLOBRowInputFormat.h b/src/Processors/Formats/Impl/RawBLOBRowInputFormat.h index 367ca04f9d8..6fc1f277015 100644 --- a/src/Processors/Formats/Impl/RawBLOBRowInputFormat.h +++ b/src/Processors/Formats/Impl/RawBLOBRowInputFormat.h @@ -13,7 +13,7 @@ class ReadBuffer; /// This format slurps all input data into single value. /// This format can only parse a table with single field of type String or similar. -class RawBLOBRowInputFormat : public IRowInputFormat +class RawBLOBRowInputFormat final : public IRowInputFormat { public: RawBLOBRowInputFormat(const Block & header_, ReadBuffer & in_, Params params_); @@ -34,4 +34,3 @@ public: }; } - diff --git a/src/Processors/Formats/Impl/RawBLOBRowOutputFormat.h b/src/Processors/Formats/Impl/RawBLOBRowOutputFormat.h index 2c34595c1a4..f6c4f0a58ca 100644 --- a/src/Processors/Formats/Impl/RawBLOBRowOutputFormat.h +++ b/src/Processors/Formats/Impl/RawBLOBRowOutputFormat.h @@ -24,7 +24,7 @@ class WriteBuffer; * * If you are output more than one value, the output format is ambiguous and you may not be able to read data back. */ -class RawBLOBRowOutputFormat : public IRowOutputFormat +class RawBLOBRowOutputFormat final : public IRowOutputFormat { public: RawBLOBRowOutputFormat( @@ -39,4 +39,3 @@ private: }; } - diff --git a/src/Processors/Formats/Impl/RegexpRowInputFormat.h b/src/Processors/Formats/Impl/RegexpRowInputFormat.h index e70595b4bb7..75c630d0607 100644 --- a/src/Processors/Formats/Impl/RegexpRowInputFormat.h +++ b/src/Processors/Formats/Impl/RegexpRowInputFormat.h @@ -48,7 +48,7 @@ private: /// (according to format_regexp_escaping_rule setting). If the regexp did not match the line, /// if format_regexp_skip_unmatched is 1, the line is silently skipped, if the setting is 0, exception will be thrown. -class RegexpRowInputFormat : public IRowInputFormat +class RegexpRowInputFormat final : public IRowInputFormat { public: RegexpRowInputFormat(ReadBuffer & in_, const Block & header_, Params params_, const FormatSettings & format_settings_); diff --git a/src/Processors/Formats/Impl/TSKVRowInputFormat.h b/src/Processors/Formats/Impl/TSKVRowInputFormat.h index 6aef50a0f84..3f708355b85 100644 --- a/src/Processors/Formats/Impl/TSKVRowInputFormat.h +++ b/src/Processors/Formats/Impl/TSKVRowInputFormat.h @@ -21,7 +21,7 @@ class ReadBuffer; * An equal sign can be escaped in the field name. * Also, as an additional element there may be a useless tskv fragment - it needs to be ignored. */ -class TSKVRowInputFormat : public IRowInputFormat +class TSKVRowInputFormat final : public IRowInputFormat { public: TSKVRowInputFormat(ReadBuffer & in_, Block header_, Params params_, const FormatSettings & format_settings_); diff --git a/src/Processors/Formats/Impl/TSKVRowOutputFormat.h b/src/Processors/Formats/Impl/TSKVRowOutputFormat.h index 980e36c7e25..e9f9071f906 100644 --- a/src/Processors/Formats/Impl/TSKVRowOutputFormat.h +++ b/src/Processors/Formats/Impl/TSKVRowOutputFormat.h @@ -11,7 +11,7 @@ namespace DB * TSKV is similar to TabSeparated, but before every value, its name and equal sign are specified: name=value. * This format is very inefficient. */ -class TSKVRowOutputFormat: public TabSeparatedRowOutputFormat +class TSKVRowOutputFormat final : public TabSeparatedRowOutputFormat { public: TSKVRowOutputFormat(WriteBuffer & out_, const Block & header, const RowOutputFormatParams & params_, const FormatSettings & format_settings); diff --git a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h index 1f2bfc255b8..ed67a8256bc 100644 --- a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h +++ b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h @@ -11,7 +11,7 @@ namespace DB /** A stream to input data in tsv format. */ -class TabSeparatedRowInputFormat : public RowInputFormatWithNamesAndTypes +class TabSeparatedRowInputFormat final : public RowInputFormatWithNamesAndTypes { public: /** with_names - the first line is the header with the names of the columns @@ -28,7 +28,7 @@ private: bool isGarbageAfterField(size_t, ReadBuffer::Position pos) override { return *pos != '\n' && *pos != '\t'; } }; -class TabSeparatedFormatReader : public FormatWithNamesAndTypesReader +class TabSeparatedFormatReader final : public FormatWithNamesAndTypesReader { public: TabSeparatedFormatReader(ReadBuffer & in_, const FormatSettings & format_settings, bool is_raw_); diff --git a/src/Processors/Formats/Impl/TabSeparatedRowOutputFormat.h b/src/Processors/Formats/Impl/TabSeparatedRowOutputFormat.h index eeada54d74e..8aac94812e2 100644 --- a/src/Processors/Formats/Impl/TabSeparatedRowOutputFormat.h +++ b/src/Processors/Formats/Impl/TabSeparatedRowOutputFormat.h @@ -34,10 +34,10 @@ public: protected: void writeField(const IColumn & column, const ISerialization & serialization, size_t row_num) override; - void writeFieldDelimiter() override; + void writeFieldDelimiter() override final; void writeRowEndDelimiter() override; - void writeBeforeTotals() override; - void writeBeforeExtremes() override; + void writeBeforeTotals() override final; + void writeBeforeExtremes() override final; void writePrefix() override; void writeLine(const std::vector & values); diff --git a/src/Processors/Formats/Impl/TemplateRowInputFormat.h b/src/Processors/Formats/Impl/TemplateRowInputFormat.h index 755ad6cb39b..b5ced707ace 100644 --- a/src/Processors/Formats/Impl/TemplateRowInputFormat.h +++ b/src/Processors/Formats/Impl/TemplateRowInputFormat.h @@ -15,7 +15,7 @@ namespace DB class TemplateFormatReader; -class TemplateRowInputFormat : public RowInputFormatWithDiagnosticInfo +class TemplateRowInputFormat final : public RowInputFormatWithDiagnosticInfo { using EscapingRule = FormatSettings::EscapingRule; public: diff --git a/src/Processors/Formats/Impl/ValuesRowOutputFormat.h b/src/Processors/Formats/Impl/ValuesRowOutputFormat.h index 8d89854d43c..76c0a1e7873 100644 --- a/src/Processors/Formats/Impl/ValuesRowOutputFormat.h +++ b/src/Processors/Formats/Impl/ValuesRowOutputFormat.h @@ -12,7 +12,7 @@ class WriteBuffer; /** A stream for outputting data in the VALUES format (as in the INSERT request). */ -class ValuesRowOutputFormat : public IRowOutputFormat +class ValuesRowOutputFormat final : public IRowOutputFormat { public: ValuesRowOutputFormat(WriteBuffer & out_, const Block & header_, const RowOutputFormatParams & params_, const FormatSettings & format_settings_); diff --git a/src/Processors/Formats/Impl/VerticalRowOutputFormat.h b/src/Processors/Formats/Impl/VerticalRowOutputFormat.h index 037aa183659..796c60d1cb1 100644 --- a/src/Processors/Formats/Impl/VerticalRowOutputFormat.h +++ b/src/Processors/Formats/Impl/VerticalRowOutputFormat.h @@ -15,7 +15,7 @@ class Context; /** Stream to output data in format "each value in separate row". * Usable to show few rows with many columns. */ -class VerticalRowOutputFormat : public IRowOutputFormat +class VerticalRowOutputFormat final : public IRowOutputFormat { public: VerticalRowOutputFormat(WriteBuffer & out_, const Block & header_, const RowOutputFormatParams & params_, const FormatSettings & format_settings_); diff --git a/src/Processors/Formats/Impl/XMLRowOutputFormat.h b/src/Processors/Formats/Impl/XMLRowOutputFormat.h index 04d81f0c2e1..abec25efbb9 100644 --- a/src/Processors/Formats/Impl/XMLRowOutputFormat.h +++ b/src/Processors/Formats/Impl/XMLRowOutputFormat.h @@ -13,7 +13,7 @@ namespace DB /** A stream for outputting data in XML format. */ -class XMLRowOutputFormat : public IRowOutputFormat +class XMLRowOutputFormat final : public IRowOutputFormat { public: XMLRowOutputFormat(WriteBuffer & out_, const Block & header_, const RowOutputFormatParams & params_, const FormatSettings & format_settings_); From 7674bc986e963dd74392fbe138408d29d74dd325 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Sun, 16 Jan 2022 16:32:32 +0800 Subject: [PATCH 089/215] Disable projection when there is JOIN or SAMPLE --- src/Interpreters/ExpressionAnalyzer.cpp | 5 ++- src/Storages/MergeTree/MergeTreeData.cpp | 37 ++++++++++++------- .../01710_projection_with_joins.reference | 2 + .../01710_projection_with_joins.sql | 15 +++++++- 4 files changed, 44 insertions(+), 15 deletions(-) diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index c195cb93c5e..4a5f18a408f 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -944,7 +944,10 @@ static std::unique_ptr buildJoinedPlan( * - JOIN tables will need aliases to correctly resolve USING clause. */ auto interpreter = interpretSubquery( - join_element.table_expression, context, original_right_columns, query_options.copy().setWithAllColumns().ignoreAlias(false)); + join_element.table_expression, + context, + original_right_columns, + query_options.copy().setWithAllColumns().ignoreProjections(false).ignoreAlias(false)); auto joined_plan = std::make_unique(); interpreter->buildQueryPlan(*joined_plan); { diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index e5771c016e5..0b29545120c 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -4630,23 +4631,33 @@ std::optional MergeTreeData::getQueryProcessingStageWithAgg if (!settings.allow_experimental_projection_optimization || query_info.ignore_projections || query_info.is_projection_query) return std::nullopt; - const auto & query_ptr = query_info.original_query; - - if (auto * select = query_ptr->as(); select) - { - // Currently projections don't support final yet. - if (select->final()) - return std::nullopt; - - // Currently projections don't support ARRAY JOIN yet. - if (select->arrayJoinExpressionList().first) - return std::nullopt; - } - // Currently projections don't support sampling yet. if (settings.parallel_replicas_count > 1) return std::nullopt; + auto query_ptr = query_info.original_query; + auto * select_query = query_ptr->as(); + if (!select_query) + return std::nullopt; + + // Currently projections don't support final yet. + if (select_query->final()) + return std::nullopt; + + // Currently projections don't support sample yet. + if (select_query->sampleSize()) + return std::nullopt; + + // Currently projections don't support ARRAY JOIN yet. + if (select_query->arrayJoinExpressionList().first) + return std::nullopt; + + // In order to properly analyze joins, aliases should be recognized. However, aliases get lost during projection analysis. + // Let's disable projection if there are any JOIN clauses. + // TODO: We need a better identifier resolution mechanism for projection analysis. + if (select_query->join()) + return std::nullopt; + InterpreterSelectQuery select( query_ptr, query_context, diff --git a/tests/queries/0_stateless/01710_projection_with_joins.reference b/tests/queries/0_stateless/01710_projection_with_joins.reference index e69de29bb2d..4792e70f333 100644 --- a/tests/queries/0_stateless/01710_projection_with_joins.reference +++ b/tests/queries/0_stateless/01710_projection_with_joins.reference @@ -0,0 +1,2 @@ +2 +3 diff --git a/tests/queries/0_stateless/01710_projection_with_joins.sql b/tests/queries/0_stateless/01710_projection_with_joins.sql index fcd1c586fa3..a9aaf6325d4 100644 --- a/tests/queries/0_stateless/01710_projection_with_joins.sql +++ b/tests/queries/0_stateless/01710_projection_with_joins.sql @@ -1,8 +1,21 @@ drop table if exists t; -create table t (s UInt16, l UInt16, projection p (select s, l order by l)) engine MergeTree order by s; +create table t (s UInt16, l UInt16, projection p (select s, l order by l)) engine MergeTree order by s; select s from t join (select toUInt16(1) as s) x using (s) settings allow_experimental_projection_optimization = 1; select s from t join (select toUInt16(1) as s) x using (s) settings allow_experimental_projection_optimization = 0; drop table t; + +drop table if exists mt; +create table mt (id1 Int8, id2 Int8) Engine=MergeTree order by tuple(); +select id1 as alias1 from mt all inner join (select id2 as alias1 from mt) as t using (alias1) settings allow_experimental_projection_optimization = 1; +select id1 from mt all inner join (select id2 as id1 from mt) as t using (id1) settings allow_experimental_projection_optimization = 1; +select id2 as id1 from mt all inner join (select id1 from mt) as t using (id1) settings allow_experimental_projection_optimization = 1; +drop table mt; + +drop table if exists j; +create table j (id1 Int8, id2 Int8, projection p (select id1, id2 order by id2)) Engine=MergeTree order by id1 settings index_granularity = 1; +insert into j select number, number from numbers(10); +select id1 as alias1 from j all inner join (select id2 as alias1 from j where id2 in (1, 2, 3)) as t using (alias1) where id2 in (2, 3, 4) settings allow_experimental_projection_optimization = 1, force_optimize_projection = 1; +drop table j; From 27fcefd315ab38704beea691c044326d092308f4 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Thu, 3 Feb 2022 19:20:22 +0800 Subject: [PATCH 090/215] Disable projection when doing parallel replica reading --- src/Storages/MergeTree/MergeTreeData.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 0b29545120c..aaf6cf3884e 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -4631,8 +4631,8 @@ std::optional MergeTreeData::getQueryProcessingStageWithAgg if (!settings.allow_experimental_projection_optimization || query_info.ignore_projections || query_info.is_projection_query) return std::nullopt; - // Currently projections don't support sampling yet. - if (settings.parallel_replicas_count > 1) + // Currently projections don't support parallel replicas reading yet. + if (settings.parallel_replicas_count > 1 || settings.max_parallel_replicas > 1) return std::nullopt; auto query_ptr = query_info.original_query; From 98857de82ba39c6ab2081a01f054793519c66c2f Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Thu, 3 Feb 2022 02:31:24 +0800 Subject: [PATCH 091/215] Disable projection for high-order storages --- src/Storages/StorageBuffer.cpp | 5 ++++- src/Storages/StorageMaterializedView.cpp | 4 ++++ src/Storages/StorageMerge.cpp | 6 +++++- src/Storages/StorageProxy.h | 3 +++ 4 files changed, 16 insertions(+), 2 deletions(-) diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index 9f0cb478bb6..f97c09471c3 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -199,6 +199,8 @@ QueryProcessingStage::Enum StorageBuffer::getQueryProcessingStage( if (destination.get() == this) throw Exception("Destination table is myself. Read will cause infinite loop.", ErrorCodes::INFINITE_LOOP); + /// TODO: Find a way to support projections for StorageBuffer + query_info.ignore_projections = true; return destination->getQueryProcessingStage(local_context, to_stage, destination->getInMemoryMetadataPtr(), query_info); } @@ -365,9 +367,10 @@ void StorageBuffer::read( */ if (processed_stage > QueryProcessingStage::FetchColumns) { + /// TODO: Find a way to support projections for StorageBuffer auto interpreter = InterpreterSelectQuery( query_info.query, local_context, std::move(pipe_from_buffers), - SelectQueryOptions(processed_stage)); + SelectQueryOptions(processed_stage).ignoreProjections()); interpreter.buildQueryPlan(buffers_plan); } else diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 49111e02b11..7c5ef5ac04c 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -135,6 +135,10 @@ QueryProcessingStage::Enum StorageMaterializedView::getQueryProcessingStage( const StorageMetadataPtr &, SelectQueryInfo & query_info) const { + /// TODO: Find a way to support projections for StorageMaterializedView. Why do we use different + /// metadata for materialized view and target table? If they are the same, we can get rid of all + /// converting and use it just like a normal view. + query_info.ignore_projections = true; return getTargetTable()->getQueryProcessingStage(local_context, to_stage, getTargetTable()->getInMemoryMetadataPtr(), query_info); } diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 0dc6f2931d3..433fdb5b0b5 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -188,6 +188,8 @@ QueryProcessingStage::Enum StorageMerge::getQueryProcessingStage( size_t selected_table_size = 0; + /// TODO: Find a way to support projections for StorageMerge + query_info.ignore_projections = true; for (const auto & iterator : database_table_iterators) { while (iterator->isValid()) @@ -471,7 +473,9 @@ Pipe StorageMerge::createSources( modified_context->setSetting("max_threads", streams_num); modified_context->setSetting("max_streams_to_max_threads_ratio", 1); - InterpreterSelectQuery interpreter{modified_query_info.query, modified_context, SelectQueryOptions(processed_stage)}; + /// TODO: Find a way to support projections for StorageMerge + InterpreterSelectQuery interpreter{ + modified_query_info.query, modified_context, SelectQueryOptions(processed_stage).ignoreProjections()}; pipe = QueryPipelineBuilder::getPipe(interpreter.buildQueryPipeline()); diff --git a/src/Storages/StorageProxy.h b/src/Storages/StorageProxy.h index 304f84c02eb..894b470ef22 100644 --- a/src/Storages/StorageProxy.h +++ b/src/Storages/StorageProxy.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include @@ -37,6 +38,8 @@ public: const StorageMetadataPtr &, SelectQueryInfo & info) const override { + /// TODO: Find a way to support projections for StorageProxy + info.ignore_projections = true; return getNested()->getQueryProcessingStage(context, to_stage, getNested()->getInMemoryMetadataPtr(), info); } From 82f31e1abb2ae510b99e8bc6aaf7b969092102f5 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Thu, 3 Feb 2022 02:31:45 +0800 Subject: [PATCH 092/215] Fix tests when projection is enabled Avoid using count() in quota related tests count() can subject to many optimization techniques, which is unstable for testing quota usage. --- tests/integration/test_quota/test.py | 8 ++++---- ...01505_trivial_count_with_partition_predicate.reference | 2 +- .../01505_trivial_count_with_partition_predicate.sql | 2 +- .../0_stateless/01710_minmax_count_projection.reference | 4 ++-- .../queries/0_stateless/01710_minmax_count_projection.sql | 2 +- tests/queries/0_stateless/01710_projection_with_joins.sql | 2 +- tests/queries/0_stateless/01739_index_hint.reference | 4 ++-- tests/queries/0_stateless/01739_index_hint.sql | 2 +- tests/queries/0_stateless/01748_partition_id_pruning.sql | 2 ++ 9 files changed, 15 insertions(+), 13 deletions(-) diff --git a/tests/integration/test_quota/test.py b/tests/integration/test_quota/test.py index 4149987996b..83ee32bd7dd 100644 --- a/tests/integration/test_quota/test.py +++ b/tests/integration/test_quota/test.py @@ -94,9 +94,9 @@ def test_quota_from_users_xml(): system_quota_usage( [["myQuota", "default", 31556952, 1, 1000, 1, 500, 0, 500, 0, "\\N", 50, "\\N", 200, "\\N", 50, 1000, 200, "\\N", "\\N"]]) - instance.query("SELECT COUNT() from test_table") + instance.query("SELECT SUM(x) from test_table") system_quota_usage( - [["myQuota", "default", 31556952, 2, 1000, 2, 500, 0, 500, 0, "\\N", 51, "\\N", 208, "\\N", 50, 1000, 200, "\\N", "\\N"]]) + [["myQuota", "default", 31556952, 2, 1000, 2, 500, 0, 500, 0, "\\N", 51, "\\N", 208, "\\N", 100, 1000, 400, "\\N", "\\N"]]) def test_simpliest_quota(): @@ -125,9 +125,9 @@ def test_tracking_quota(): system_quota_usage( [["myQuota", "default", 31556952, 1, "\\N", 1, "\\N", 0, "\\N", 0, "\\N", 50, "\\N", 200, "\\N", 50, "\\N", 200, "\\N", "\\N"]]) - instance.query("SELECT COUNT() from test_table") + instance.query("SELECT SUM(x) from test_table") system_quota_usage( - [["myQuota", "default", 31556952, 2, "\\N", 2, "\\N", 0, "\\N", 0, "\\N", 51, "\\N", 208, "\\N", 50, "\\N", 200, "\\N", "\\N"]]) + [["myQuota", "default", 31556952, 2, "\\N", 2, "\\N", 0, "\\N", 0, "\\N", 51, "\\N", 208, "\\N", 100, "\\N", 400, "\\N", "\\N"]]) def test_exceed_quota(): diff --git a/tests/queries/0_stateless/01505_trivial_count_with_partition_predicate.reference b/tests/queries/0_stateless/01505_trivial_count_with_partition_predicate.reference index b8b8fae2830..5abc312652d 100644 --- a/tests/queries/0_stateless/01505_trivial_count_with_partition_predicate.reference +++ b/tests/queries/0_stateless/01505_trivial_count_with_partition_predicate.reference @@ -5,7 +5,7 @@ 0 1 0 -2 +1 0 4 6 diff --git a/tests/queries/0_stateless/01505_trivial_count_with_partition_predicate.sql b/tests/queries/0_stateless/01505_trivial_count_with_partition_predicate.sql index ecf0b791a49..e4e2e3dd76a 100644 --- a/tests/queries/0_stateless/01505_trivial_count_with_partition_predicate.sql +++ b/tests/queries/0_stateless/01505_trivial_count_with_partition_predicate.sql @@ -31,7 +31,7 @@ select count() from test_tuple where toDate(p) > '2020-09-01'; -- optimized select count() from test_tuple where toDate(p) > '2020-09-01' and i = 1; -- optimized -select count() from test_tuple where i > 1; +select count() from test_tuple where i > 2; -- optimized select count() from test_tuple where i < 1; -- non-optimized diff --git a/tests/queries/0_stateless/01710_minmax_count_projection.reference b/tests/queries/0_stateless/01710_minmax_count_projection.reference index 77649f536f5..b13738a66de 100644 --- a/tests/queries/0_stateless/01710_minmax_count_projection.reference +++ b/tests/queries/0_stateless/01710_minmax_count_projection.reference @@ -13,7 +13,7 @@ 1 1 1 -\N 2021-10-27 10:00:00 4 -2021-10-24 10:00:00 +\N 2021-10-27 10:00:00 3 +0 2021-10-24 10:00:00 0 diff --git a/tests/queries/0_stateless/01710_minmax_count_projection.sql b/tests/queries/0_stateless/01710_minmax_count_projection.sql index 713241ada72..c0f2250cc0f 100644 --- a/tests/queries/0_stateless/01710_minmax_count_projection.sql +++ b/tests/queries/0_stateless/01710_minmax_count_projection.sql @@ -53,7 +53,7 @@ select count() from d group by toDate(dt); -- fuzz crash SELECT pointInEllipses(min(j), NULL), max(dt), count('0.0000000007') FROM d WHERE toDate(dt) >= '2021-10-25'; -SELECT min(dt) FROM d PREWHERE ceil(j) <= 0; +SELECT min(j) FROM d PREWHERE ceil(j) <= 0; SELECT min(dt) FROM d PREWHERE ((0.9998999834060669 AND 1023) AND 255) <= ceil(j); SELECT count('') AND NULL FROM d PREWHERE ceil(j) <= NULL; diff --git a/tests/queries/0_stateless/01710_projection_with_joins.sql b/tests/queries/0_stateless/01710_projection_with_joins.sql index a9aaf6325d4..a54ba21fd27 100644 --- a/tests/queries/0_stateless/01710_projection_with_joins.sql +++ b/tests/queries/0_stateless/01710_projection_with_joins.sql @@ -17,5 +17,5 @@ drop table mt; drop table if exists j; create table j (id1 Int8, id2 Int8, projection p (select id1, id2 order by id2)) Engine=MergeTree order by id1 settings index_granularity = 1; insert into j select number, number from numbers(10); -select id1 as alias1 from j all inner join (select id2 as alias1 from j where id2 in (1, 2, 3)) as t using (alias1) where id2 in (2, 3, 4) settings allow_experimental_projection_optimization = 1, force_optimize_projection = 1; +select id1 as alias1 from j all inner join (select id2 as alias1 from j where id2 in (1, 2, 3)) as t using (alias1) where id2 in (2, 3, 4) settings allow_experimental_projection_optimization = 1; drop table j; diff --git a/tests/queries/0_stateless/01739_index_hint.reference b/tests/queries/0_stateless/01739_index_hint.reference index 6aa40c5d302..71dfab29154 100644 --- a/tests/queries/0_stateless/01739_index_hint.reference +++ b/tests/queries/0_stateless/01739_index_hint.reference @@ -25,8 +25,8 @@ drop table tbl; drop table if exists XXXX; create table XXXX (t Int64, f Float64) Engine=MergeTree order by t settings index_granularity=128; insert into XXXX select number*60, 0 from numbers(100000); -SELECT count() FROM XXXX WHERE indexHint(t = 42); -128 +SELECT sum(t) FROM XXXX WHERE indexHint(t = 42); +487680 drop table if exists XXXX; create table XXXX (t Int64, f Float64) Engine=MergeTree order by t settings index_granularity=8192; insert into XXXX select number*60, 0 from numbers(100000); diff --git a/tests/queries/0_stateless/01739_index_hint.sql b/tests/queries/0_stateless/01739_index_hint.sql index 28395c2dc1d..30dfa43d334 100644 --- a/tests/queries/0_stateless/01739_index_hint.sql +++ b/tests/queries/0_stateless/01739_index_hint.sql @@ -22,7 +22,7 @@ create table XXXX (t Int64, f Float64) Engine=MergeTree order by t settings inde insert into XXXX select number*60, 0 from numbers(100000); -SELECT count() FROM XXXX WHERE indexHint(t = 42); +SELECT sum(t) FROM XXXX WHERE indexHint(t = 42); drop table if exists XXXX; diff --git a/tests/queries/0_stateless/01748_partition_id_pruning.sql b/tests/queries/0_stateless/01748_partition_id_pruning.sql index e0d45884c60..9a26dd8daba 100644 --- a/tests/queries/0_stateless/01748_partition_id_pruning.sql +++ b/tests/queries/0_stateless/01748_partition_id_pruning.sql @@ -14,6 +14,8 @@ select * from x where _partition_id in (select partitionId(number + 1) from numb -- trivial count optimization test set max_rows_to_read = 2; -- one row for subquery + subquery itself +-- TODO: Relax the limits because we might build prepared set twice with _minmax_count_projection +set max_rows_to_read = 3; select count() from x where _partition_id in (select partitionId(number + 1) from numbers(1)); drop table x; From a0ab7a01f12d4a7447d3f26fedbda77d9f95492e Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Thu, 3 Feb 2022 02:32:19 +0800 Subject: [PATCH 093/215] Adapt minmax_count_projection with ModuleLegacy --- src/Storages/ProjectionsDescription.cpp | 7 ++++++- src/Storages/ProjectionsDescription.h | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/Storages/ProjectionsDescription.cpp b/src/Storages/ProjectionsDescription.cpp index e13895e60f1..5c9ae46dd60 100644 --- a/src/Storages/ProjectionsDescription.cpp +++ b/src/Storages/ProjectionsDescription.cpp @@ -179,7 +179,7 @@ ProjectionDescription::getProjectionFromAST(const ASTPtr & definition_ast, const ProjectionDescription ProjectionDescription::getMinMaxCountProjection( const ColumnsDescription & columns, - const ASTPtr & partition_columns, + ASTPtr partition_columns, const Names & minmax_columns, const ASTs & primary_key_asts, ContextPtr query_context) @@ -203,7 +203,12 @@ ProjectionDescription ProjectionDescription::getMinMaxCountProjection( select_query->setExpression(ASTProjectionSelectQuery::Expression::SELECT, std::move(select_expression_list)); if (partition_columns && !partition_columns->children.empty()) + { + partition_columns = partition_columns->clone(); + for (const auto & partition_column : partition_columns->children) + KeyDescription::moduloToModuloLegacyRecursive(partition_column); select_query->setExpression(ASTProjectionSelectQuery::Expression::GROUP_BY, partition_columns->clone()); + } result.definition_ast = select_query; result.name = MINMAX_COUNT_PROJECTION_NAME; diff --git a/src/Storages/ProjectionsDescription.h b/src/Storages/ProjectionsDescription.h index 960e94e22f4..3e8d5e1a4f1 100644 --- a/src/Storages/ProjectionsDescription.h +++ b/src/Storages/ProjectionsDescription.h @@ -73,7 +73,7 @@ struct ProjectionDescription static ProjectionDescription getMinMaxCountProjection( const ColumnsDescription & columns, - const ASTPtr & partition_columns, + ASTPtr partition_columns, const Names & minmax_columns, const ASTs & primary_key_asts, ContextPtr query_context); From 3fab7af541eeb32555978f17aa22e47d32d529c8 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Thu, 3 Feb 2022 02:33:19 +0800 Subject: [PATCH 094/215] Bug fix and improvement of minmax_count_projection --- src/Storages/MergeTree/MergeTreeData.cpp | 60 +++++++++++++++++++++++- src/Storages/MergeTree/MergeTreeData.h | 2 + 2 files changed, 60 insertions(+), 2 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index aaf6cf3884e..db650f6e35b 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -4505,6 +4505,7 @@ Block MergeTreeData::getMinMaxCountProjectionBlock( const SelectQueryInfo & query_info, const DataPartsVector & parts, DataPartsVector & normal_parts, + const PartitionIdToMaxBlock * max_block_numbers_to_read, ContextPtr query_context) const { if (!metadata_snapshot->minmax_count_projection) @@ -4541,6 +4542,23 @@ Block MergeTreeData::getMinMaxCountProjectionBlock( if (virtual_columns_block.rows() == 0) return {}; + std::optional partition_pruner; + std::optional minmax_idx_condition; + DataTypes minmax_columns_types; + if (metadata_snapshot->hasPartitionKey()) + { + const auto & partition_key = metadata_snapshot->getPartitionKey(); + auto minmax_columns_names = getMinMaxColumnsNames(partition_key); + minmax_columns_types = getMinMaxColumnsTypes(partition_key); + + minmax_idx_condition.emplace( + query_info, + query_context, + minmax_columns_names, + getMinMaxExpr(partition_key, ExpressionActionsSettings::fromContext(query_context))); + partition_pruner.emplace(metadata_snapshot, query_info, query_context, false /* strict */); + } + // Generate valid expressions for filtering VirtualColumnUtils::prepareFilterBlockWithQuery(query_info.query, query_context, virtual_columns_block, expression_ast); if (expression_ast) @@ -4549,6 +4567,8 @@ Block MergeTreeData::getMinMaxCountProjectionBlock( size_t rows = virtual_columns_block.rows(); const ColumnString & part_name_column = typeid_cast(*virtual_columns_block.getByName("_part").column); size_t part_idx = 0; + auto filter_column = ColumnUInt8::create(); + auto & filter_column_data = filter_column->getData(); for (size_t row = 0; row < rows; ++row) { while (parts[part_idx]->name != part_name_column.getDataAt(row)) @@ -4559,12 +4579,32 @@ Block MergeTreeData::getMinMaxCountProjectionBlock( if (!part->minmax_idx->initialized) throw Exception("Found a non-empty part with uninitialized minmax_idx. It's a bug", ErrorCodes::LOGICAL_ERROR); + filter_column_data.emplace_back(); + + if (max_block_numbers_to_read) + { + auto blocks_iterator = max_block_numbers_to_read->find(part->info.partition_id); + if (blocks_iterator == max_block_numbers_to_read->end() || part->info.max_block > blocks_iterator->second) + continue; + } + + if (minmax_idx_condition + && !minmax_idx_condition->checkInHyperrectangle(part->minmax_idx->hyperrectangle, minmax_columns_types).can_be_true) + continue; + + if (partition_pruner) + { + if (partition_pruner->canBePruned(*part)) + continue; + } + if (need_primary_key_max_column && !part->index_granularity.hasFinalMark()) { normal_parts.push_back(part); continue; } + filter_column_data.back() = 1; size_t pos = 0; for (size_t i : metadata_snapshot->minmax_count_projection->partition_value_indices) { @@ -4607,6 +4647,16 @@ Block MergeTreeData::getMinMaxCountProjectionBlock( } block.setColumns(std::move(partition_minmax_count_columns)); + FilterDescription filter(*filter_column); + for (size_t i = 0; i < virtual_columns_block.columns(); ++i) + { + ColumnPtr & column = virtual_columns_block.safeGetByPosition(i).column; + column = column->filter(*filter.data, -1); + } + + if (block.rows() == 0) + return {}; + Block res; for (const auto & name : required_columns) { @@ -4882,9 +4932,15 @@ std::optional MergeTreeData::getQueryProcessingStageWithAgg { DataPartsVector normal_parts; query_info.minmax_count_projection_block = getMinMaxCountProjectionBlock( - metadata_snapshot, minmax_conut_projection_candidate->required_columns, query_info, parts, normal_parts, query_context); + metadata_snapshot, + minmax_conut_projection_candidate->required_columns, + query_info, + parts, + normal_parts, + max_added_blocks.get(), + query_context); - if (minmax_conut_projection_candidate->prewhere_info) + if (query_info.minmax_count_projection_block && minmax_conut_projection_candidate->prewhere_info) { const auto & prewhere_info = minmax_conut_projection_candidate->prewhere_info; if (prewhere_info->alias_actions) diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 649dae52852..93add8d6935 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -43,6 +43,7 @@ class MergeTreePartsMover; class MergeTreeDataMergerMutator; class MutationCommands; class Context; +using PartitionIdToMaxBlock = std::unordered_map; struct JobAndPool; struct ZeroCopyLock; @@ -391,6 +392,7 @@ public: const SelectQueryInfo & query_info, const DataPartsVector & parts, DataPartsVector & normal_parts, + const PartitionIdToMaxBlock * max_block_numbers_to_read, ContextPtr query_context) const; std::optional getQueryProcessingStageWithAggregateProjection( From 1ab773cc9075bad57da2c61039df39997ab6f568 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Thu, 3 Feb 2022 20:47:27 +0800 Subject: [PATCH 095/215] Fix aggregation_in_order with normal projection --- src/Interpreters/InterpreterSelectQuery.cpp | 2 +- src/Storages/MergeTree/MergeTreeData.cpp | 37 ++++++++---- src/Storages/SelectQueryInfo.h | 1 + ..._projection_aggregation_in_order.reference | 20 +++++++ .../01710_projection_aggregation_in_order.sql | 59 +++++++++++++++++++ 5 files changed, 107 insertions(+), 12 deletions(-) create mode 100644 tests/queries/0_stateless/01710_projection_aggregation_in_order.reference create mode 100644 tests/queries/0_stateless/01710_projection_aggregation_in_order.sql diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 87ccf3dfa1c..f17c64ea71f 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -1950,7 +1950,7 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc query_info.projection->order_optimizer = std::make_shared( query, query_info.projection->group_by_elements_actions, - getSortDescriptionFromGroupBy(query), + query_info.projection->group_by_elements_order_descr, query_info.syntax_analyzer_result); } else diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index db650f6e35b..c81b05e9284 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -4740,7 +4740,7 @@ std::optional MergeTreeData::getQueryProcessingStageWithAgg keys.insert(desc.name); key_name_pos_map.insert({desc.name, pos++}); } - auto actions_settings = ExpressionActionsSettings::fromSettings(settings); + auto actions_settings = ExpressionActionsSettings::fromSettings(settings, CompileExpressions::yes); // All required columns should be provided by either current projection or previous actions // Let's traverse backward to finish the check. @@ -4876,6 +4876,7 @@ std::optional MergeTreeData::getQueryProcessingStageWithAgg auto actions_dag = analysis_result.before_aggregation->clone(); actions_dag->foldActionsByProjection({key}, sample_block_for_keys); candidate.group_by_elements_actions.emplace_back(std::make_shared(actions_dag, actions_settings)); + candidate.group_by_elements_order_descr.emplace_back(key, 1, 1); } } @@ -4892,18 +4893,32 @@ std::optional MergeTreeData::getQueryProcessingStageWithAgg } } - if (projection.type == ProjectionDescription::Type::Normal && (analysis_result.hasWhere() || analysis_result.hasPrewhere())) + if (projection.type == ProjectionDescription::Type::Normal) { - const auto & actions - = analysis_result.before_aggregation ? analysis_result.before_aggregation : analysis_result.before_order_by; - NameSet required_columns; - for (const auto & column : actions->getRequiredColumns()) - required_columns.insert(column.name); - - if (rewrite_before_where(candidate, projection, required_columns, sample_block, {})) + if (analysis_result.before_aggregation && analysis_result.optimize_aggregation_in_order) { - candidate.required_columns = {required_columns.begin(), required_columns.end()}; - candidates.push_back(std::move(candidate)); + for (const auto & key : keys) + { + auto actions_dag = analysis_result.before_aggregation->clone(); + actions_dag->foldActionsByProjection({key}, sample_block_for_keys); + candidate.group_by_elements_actions.emplace_back(std::make_shared(actions_dag, actions_settings)); + candidate.group_by_elements_order_descr.emplace_back(key, 1, 1); + } + } + + if (analysis_result.hasWhere() || analysis_result.hasPrewhere()) + { + const auto & actions + = analysis_result.before_aggregation ? analysis_result.before_aggregation : analysis_result.before_order_by; + NameSet required_columns; + for (const auto & column : actions->getRequiredColumns()) + required_columns.insert(column.name); + + if (rewrite_before_where(candidate, projection, required_columns, sample_block, {})) + { + candidate.required_columns = {required_columns.begin(), required_columns.end()}; + candidates.push_back(std::move(candidate)); + } } } }; diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h index 2486bcbf5c6..f15f2dd2626 100644 --- a/src/Storages/SelectQueryInfo.h +++ b/src/Storages/SelectQueryInfo.h @@ -127,6 +127,7 @@ struct ProjectionCandidate ReadInOrderOptimizerPtr order_optimizer; InputOrderInfoPtr input_order_info; ManyExpressionActions group_by_elements_actions; + SortDescription group_by_elements_order_descr; std::shared_ptr subqueries_for_sets; MergeTreeDataSelectAnalysisResultPtr merge_tree_projection_select_result_ptr; MergeTreeDataSelectAnalysisResultPtr merge_tree_normal_select_result_ptr; diff --git a/tests/queries/0_stateless/01710_projection_aggregation_in_order.reference b/tests/queries/0_stateless/01710_projection_aggregation_in_order.reference new file mode 100644 index 00000000000..a57b2e2cb0d --- /dev/null +++ b/tests/queries/0_stateless/01710_projection_aggregation_in_order.reference @@ -0,0 +1,20 @@ +291519000 +276078600 +304558200 +330478200 +317518200 +330478200 +276078600 +343438200 +291519000 +317518200 +291519000 +276078600 +304558200 +330478200 +317518200 +330478200 +276078600 +343438200 +291519000 +317518200 diff --git a/tests/queries/0_stateless/01710_projection_aggregation_in_order.sql b/tests/queries/0_stateless/01710_projection_aggregation_in_order.sql new file mode 100644 index 00000000000..af2a5dc8253 --- /dev/null +++ b/tests/queries/0_stateless/01710_projection_aggregation_in_order.sql @@ -0,0 +1,59 @@ +DROP TABLE IF EXISTS normal; + +CREATE TABLE normal +( + `key` UInt32, + `ts` DateTime, + `value` UInt32, + PROJECTION aaaa + ( + SELECT + ts, + key, + value + ORDER BY (ts, key) + ) +) +ENGINE = MergeTree +ORDER BY (key, ts); + +INSERT INTO normal SELECT + 1, + toDateTime('2021-12-06 00:00:00') + number, + number +FROM numbers(100000); + +SET allow_experimental_projection_optimization=1, optimize_aggregation_in_order=1, force_optimize_projection = 1; + +WITH toStartOfHour(ts) AS a SELECT sum(value) v FROM normal WHERE ts > '2021-12-06 22:00:00' GROUP BY a ORDER BY v LIMIT 5; +WITH toStartOfHour(ts) AS a SELECT sum(value) v FROM normal WHERE ts > '2021-12-06 22:00:00' GROUP BY toStartOfHour(ts), a ORDER BY v LIMIT 5; + +DROP TABLE IF EXISTS agg; + +CREATE TABLE agg +( + `key` UInt32, + `ts` DateTime, + `value` UInt32, + PROJECTION aaaa + ( + SELECT + ts, + key, + sum(value) + GROUP BY (ts, key) + ) +) +ENGINE = MergeTree +ORDER BY (key, ts); + +INSERT INTO agg SELECT + 1, + toDateTime('2021-12-06 00:00:00') + number, + number +FROM numbers(100000); + +SET allow_experimental_projection_optimization=1, optimize_aggregation_in_order=1, force_optimize_projection = 1; + +WITH toStartOfHour(ts) AS a SELECT sum(value) v FROM normal WHERE ts > '2021-12-06 22:00:00' GROUP BY a ORDER BY v LIMIT 5; +WITH toStartOfHour(ts) AS a SELECT sum(value) v FROM normal WHERE ts > '2021-12-06 22:00:00' GROUP BY toStartOfHour(ts), a ORDER BY v LIMIT 5; From 01d58fc9bbe4822c917b246a60b82d5c3374ab7d Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Thu, 3 Feb 2022 02:34:03 +0800 Subject: [PATCH 096/215] Enable projection by default --- src/Core/Settings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 48dd637a943..0643400f473 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -479,7 +479,7 @@ class IColumn; M(Bool, asterisk_include_alias_columns, false, "Include ALIAS columns for wildcard query", 0) \ M(Bool, optimize_skip_merged_partitions, false, "Skip partitions with one part with level > 0 in optimize final", 0) \ M(Bool, optimize_on_insert, true, "Do the same transformation for inserted block of data as if merge was done on this block.", 0) \ - M(Bool, allow_experimental_projection_optimization, false, "Enable projection optimization when processing SELECT queries", 0) \ + M(Bool, allow_experimental_projection_optimization, true, "Enable projection optimization when processing SELECT queries", 0) \ M(Bool, force_optimize_projection, false, "If projection optimization is enabled, SELECT queries need to use projection", 0) \ M(Bool, async_socket_for_remote, true, "Asynchronously read from socket executing remote query", 0) \ M(Bool, insert_null_as_default, true, "Insert DEFAULT values instead of NULL in INSERT SELECT (UNION ALL)", 0) \ From 52aabf98fe7b75213a8757f9572894ed880cf6de Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Sun, 6 Feb 2022 16:45:49 +0800 Subject: [PATCH 097/215] Revise and add more comments --- src/Interpreters/ActionsDAG.cpp | 8 ++++- src/Interpreters/ActionsDAG.h | 31 +++++++++++++++++ src/Storages/MergeTree/MergeTreeData.cpp | 42 ++++++++++-------------- 3 files changed, 56 insertions(+), 25 deletions(-) diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index a4560eb1c15..6ed35210251 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -602,8 +602,8 @@ NameSet ActionsDAG::foldActionsByProjection( std::unordered_set visited_nodes; std::unordered_set visited_index_names; std::stack stack; - std::vector missing_input_from_projection_keys; + /// Record all needed index nodes to start folding. for (const auto & node : index) { if (required_columns.find(node->result_name) != required_columns.end() || node->result_name == predicate_column_name) @@ -614,6 +614,9 @@ NameSet ActionsDAG::foldActionsByProjection( } } + /// If some required columns are not in any index node, try searching from all projection key + /// columns. If still missing, return empty set which means current projection fails to match + /// (missing columns). if (add_missing_keys) { for (const auto & column : required_columns) @@ -636,6 +639,7 @@ NameSet ActionsDAG::foldActionsByProjection( } } + /// Traverse the DAG from root to leaf. Substitute any matched node with columns in projection_block_for_keys. while (!stack.empty()) { auto * node = stack.top(); @@ -664,10 +668,12 @@ NameSet ActionsDAG::foldActionsByProjection( } } + /// Clean up unused nodes after folding. std::erase_if(inputs, [&](const Node * node) { return visited_nodes.count(node) == 0; }); std::erase_if(index, [&](const Node * node) { return visited_index_names.count(node->result_name) == 0; }); nodes.remove_if([&](const Node & node) { return visited_nodes.count(&node) == 0; }); + /// Calculate the required columns after folding. NameSet next_required_columns; for (const auto & input : inputs) next_required_columns.insert(input->result_name); diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h index 9a5ad01a252..b07ab08c997 100644 --- a/src/Interpreters/ActionsDAG.h +++ b/src/Interpreters/ActionsDAG.h @@ -163,12 +163,43 @@ public: void removeUnusedActions(const Names & required_names, bool allow_remove_inputs = true, bool allow_constant_folding = true); void removeUnusedActions(const NameSet & required_names, bool allow_remove_inputs = true, bool allow_constant_folding = true); + /// Transform the current DAG in a way that leaf nodes get folded into their parents. It's done + /// because each projection can provide some columns as inputs to substitute certain sub-DAGs + /// (expressions). Consider the following example: + /// CREATE TABLE tbl (dt DateTime, val UInt64, + /// PROJECTION p_hour (SELECT SUM(val) GROUP BY toStartOfHour(dt))); + /// + /// Query: SELECT toStartOfHour(dt), SUM(val) FROM tbl GROUP BY toStartOfHour(dt); + /// + /// We will have an ActionsDAG like this: + /// FUNCTION: toStartOfHour(dt) SUM(val) + /// ^ ^ + /// | | + /// INPUT: dt val + /// + /// Now we traverse the DAG and see if any FUNCTION node can be replaced by projection's INPUT node. + /// The result DAG will be: + /// INPUT: toStartOfHour(dt) SUM(val) + /// + /// We don't need aggregate columns from projection because they are matched after DAG. + /// Currently we use canonical names of each node to find matches. It can be improved after we + /// have a full-featured name binding system. + /// + /// @param required_columns should contain columns which this DAG is required to produce after folding. It used for result actions. + /// @param projection_block_for_keys contains all key columns of given projection. + /// @param predicate_column_name means we need to produce the predicate column after folding. + /// @param add_missing_keys means whether to add additional missing columns to input nodes from projection key columns directly. + /// @return required columns for this folded DAG. It's expected to be fewer than the original ones if some projection is used. NameSet foldActionsByProjection( const NameSet & required_columns, const Block & projection_block_for_keys, const String & predicate_column_name = {}, bool add_missing_keys = true); + + /// Reorder the index nodes using given position mapping. void reorderAggregationKeysForProjection(const std::unordered_map & key_names_pos_map); + + /// Add aggregate columns to index nodes from projection void addAggregatesViaProjection(const Block & aggregates); bool hasArrayJoin() const; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index c81b05e9284..68fa81e1df9 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -4776,7 +4776,7 @@ std::optional MergeTreeData::getQueryProcessingStageWithAgg required_columns.erase(column.name); { - // Prewhere_action should not add missing keys. + // prewhere_action should not add missing keys. auto new_prewhere_required_columns = prewhere_actions->foldActionsByProjection( prewhere_required_columns, projection.sample_block_for_keys, candidate.prewhere_info->prewhere_column_name, false); if (new_prewhere_required_columns.empty() && !prewhere_required_columns.empty()) @@ -4788,6 +4788,7 @@ std::optional MergeTreeData::getQueryProcessingStageWithAgg if (candidate.prewhere_info->row_level_filter) { auto row_level_filter_actions = candidate.prewhere_info->row_level_filter->clone(); + // row_level_filter_action should not add missing keys. auto new_prewhere_required_columns = row_level_filter_actions->foldActionsByProjection( prewhere_required_columns, projection.sample_block_for_keys, candidate.prewhere_info->row_level_column_name, false); if (new_prewhere_required_columns.empty() && !prewhere_required_columns.empty()) @@ -4799,6 +4800,7 @@ std::optional MergeTreeData::getQueryProcessingStageWithAgg if (candidate.prewhere_info->alias_actions) { auto alias_actions = candidate.prewhere_info->alias_actions->clone(); + // alias_action should not add missing keys. auto new_prewhere_required_columns = alias_actions->foldActionsByProjection(prewhere_required_columns, projection.sample_block_for_keys, {}, false); if (new_prewhere_required_columns.empty() && !prewhere_required_columns.empty()) @@ -4836,6 +4838,18 @@ std::optional MergeTreeData::getQueryProcessingStageWithAgg sample_block_for_keys.insertUnique(column); } + // If optimize_aggregation_in_order = true, we need additional information to transform the projection's pipeline. + auto attach_aggregation_in_order_info = [&]() + { + for (const auto & key : keys) + { + auto actions_dag = analysis_result.before_aggregation->clone(); + actions_dag->foldActionsByProjection({key}, sample_block_for_keys); + candidate.group_by_elements_actions.emplace_back(std::make_shared(actions_dag, actions_settings)); + candidate.group_by_elements_order_descr.emplace_back(key, 1, 1); + } + }; + if (projection.type == ProjectionDescription::Type::Aggregate && analysis_result.need_aggregate && can_use_aggregate_projection) { bool match = true; @@ -4845,16 +4859,13 @@ std::optional MergeTreeData::getQueryProcessingStageWithAgg { const auto * column = sample_block.findByName(aggregate.column_name); if (column) - { aggregates.insert(*column); - } else { match = false; break; } } - if (!match) return; @@ -4870,15 +4881,7 @@ std::optional MergeTreeData::getQueryProcessingStageWithAgg return; if (analysis_result.optimize_aggregation_in_order) - { - for (const auto & key : keys) - { - auto actions_dag = analysis_result.before_aggregation->clone(); - actions_dag->foldActionsByProjection({key}, sample_block_for_keys); - candidate.group_by_elements_actions.emplace_back(std::make_shared(actions_dag, actions_settings)); - candidate.group_by_elements_order_descr.emplace_back(key, 1, 1); - } - } + attach_aggregation_in_order_info(); // Reorder aggregation keys and attach aggregates candidate.before_aggregation->reorderAggregationKeysForProjection(key_name_pos_map); @@ -4892,19 +4895,10 @@ std::optional MergeTreeData::getQueryProcessingStageWithAgg candidates.push_back(std::move(candidate)); } } - - if (projection.type == ProjectionDescription::Type::Normal) + else if (projection.type == ProjectionDescription::Type::Normal) { if (analysis_result.before_aggregation && analysis_result.optimize_aggregation_in_order) - { - for (const auto & key : keys) - { - auto actions_dag = analysis_result.before_aggregation->clone(); - actions_dag->foldActionsByProjection({key}, sample_block_for_keys); - candidate.group_by_elements_actions.emplace_back(std::make_shared(actions_dag, actions_settings)); - candidate.group_by_elements_order_descr.emplace_back(key, 1, 1); - } - } + attach_aggregation_in_order_info(); if (analysis_result.hasWhere() || analysis_result.hasPrewhere()) { From 6325d4d9b0cf45386dfb5f4f31d941fe4af0ae26 Mon Sep 17 00:00:00 2001 From: feng lv Date: Sat, 5 Feb 2022 08:10:28 +0000 Subject: [PATCH 098/215] continue of #34317 fix fix --- src/DataTypes/DataTypeLowCardinality.h | 2 +- src/Storages/FileLog/StorageFileLog.cpp | 5 ++++- src/Storages/HDFS/StorageHDFS.cpp | 19 ++++++++++++------- src/Storages/HDFS/StorageHDFSCluster.cpp | 5 ++--- src/Storages/Hive/StorageHive.cpp | 23 ++++++++++++++++------- src/Storages/Kafka/StorageKafka.cpp | 12 ++++++------ src/Storages/StorageDistributed.cpp | 14 +++++++------- src/Storages/StorageS3.cpp | 24 ++++++++++++++++-------- src/Storages/StorageS3Cluster.cpp | 5 ++--- 9 files changed, 66 insertions(+), 43 deletions(-) diff --git a/src/DataTypes/DataTypeLowCardinality.h b/src/DataTypes/DataTypeLowCardinality.h index 38b2109eec6..57f67ddad7a 100644 --- a/src/DataTypes/DataTypeLowCardinality.h +++ b/src/DataTypes/DataTypeLowCardinality.h @@ -13,7 +13,7 @@ private: DataTypePtr dictionary_type; public: - DataTypeLowCardinality(DataTypePtr dictionary_type_); + explicit DataTypeLowCardinality(DataTypePtr dictionary_type_); const DataTypePtr & getDictionaryType() const { return dictionary_type; } diff --git a/src/Storages/FileLog/StorageFileLog.cpp b/src/Storages/FileLog/StorageFileLog.cpp index 1ae8a5bb22d..f89caaec685 100644 --- a/src/Storages/FileLog/StorageFileLog.cpp +++ b/src/Storages/FileLog/StorageFileLog.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -966,7 +967,9 @@ bool StorageFileLog::updateFileInfos() NamesAndTypesList StorageFileLog::getVirtuals() const { - return NamesAndTypesList{{"_filename", std::make_shared()}, {"_offset", std::make_shared()}}; + return NamesAndTypesList{ + {"_filename", std::make_shared(std::make_shared())}, + {"_offset", std::make_shared()}}; } Names StorageFileLog::getVirtualColumnNames() diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index 294ab2f6d4e..d40838ad141 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -258,9 +258,15 @@ Block HDFSSource::getHeader(const StorageMetadataPtr & metadata_snapshot, bool n auto header = metadata_snapshot->getSampleBlock(); /// Note: AddingDefaultsBlockInputStream doesn't change header. if (need_path_column) - header.insert({DataTypeString().createColumn(), std::make_shared(), "_path"}); + header.insert( + {DataTypeLowCardinality{std::make_shared()}.createColumn(), + std::make_shared(std::make_shared()), + "_path"}); if (need_file_column) - header.insert({DataTypeString().createColumn(), std::make_shared(), "_file"}); + header.insert( + {DataTypeLowCardinality{std::make_shared()}.createColumn(), + std::make_shared(std::make_shared()), + "_file"}); return header; } @@ -378,7 +384,7 @@ Chunk HDFSSource::generate() /// Enrich with virtual columns. if (need_path_column) { - auto column = DataTypeString().createColumnConst(num_rows, current_path); + auto column = DataTypeLowCardinality{std::make_shared()}.createColumnConst(num_rows, current_path); columns.push_back(column->convertToFullColumnIfConst()); } @@ -387,7 +393,7 @@ Chunk HDFSSource::generate() size_t last_slash_pos = current_path.find_last_of('/'); auto file_name = current_path.substr(last_slash_pos + 1); - auto column = DataTypeString().createColumnConst(num_rows, std::move(file_name)); + auto column = DataTypeLowCardinality{std::make_shared()}.createColumnConst(num_rows, std::move(file_name)); columns.push_back(column->convertToFullColumnIfConst()); } @@ -689,9 +695,8 @@ void registerStorageHDFS(StorageFactory & factory) NamesAndTypesList StorageHDFS::getVirtuals() const { return NamesAndTypesList{ - {"_path", std::make_shared()}, - {"_file", std::make_shared()} - }; + {"_path", std::make_shared(std::make_shared())}, + {"_file", std::make_shared(std::make_shared())}}; } } diff --git a/src/Storages/HDFS/StorageHDFSCluster.cpp b/src/Storages/HDFS/StorageHDFSCluster.cpp index ba1cc045fbf..dfe1ea6ffd3 100644 --- a/src/Storages/HDFS/StorageHDFSCluster.cpp +++ b/src/Storages/HDFS/StorageHDFSCluster.cpp @@ -138,9 +138,8 @@ QueryProcessingStage::Enum StorageHDFSCluster::getQueryProcessingStage( NamesAndTypesList StorageHDFSCluster::getVirtuals() const { return NamesAndTypesList{ - {"_path", std::make_shared()}, - {"_file", std::make_shared()} - }; + {"_path", std::make_shared(std::make_shared())}, + {"_file", std::make_shared(std::make_shared())}}; } diff --git a/src/Storages/Hive/StorageHive.cpp b/src/Storages/Hive/StorageHive.cpp index af357e13ca7..3040ad23283 100644 --- a/src/Storages/Hive/StorageHive.cpp +++ b/src/Storages/Hive/StorageHive.cpp @@ -76,9 +76,15 @@ public: static Block getHeader(Block header, const SourcesInfoPtr & source_info) { if (source_info->need_path_column) - header.insert({DataTypeString().createColumn(), std::make_shared(), "_path"}); + header.insert( + {DataTypeLowCardinality{std::make_shared()}.createColumn(), + std::make_shared(std::make_shared()), + "_path"}); if (source_info->need_file_column) - header.insert({DataTypeString().createColumn(), std::make_shared(), "_file"}); + header.insert( + {DataTypeLowCardinality{std::make_shared()}.createColumn(), + std::make_shared(std::make_shared()), + "_file"}); return header; } @@ -87,9 +93,9 @@ public: { ColumnsDescription columns_description{header.getNamesAndTypesList()}; if (source_info->need_path_column) - columns_description.add({"_path", std::make_shared()}); + columns_description.add({"_path", std::make_shared(std::make_shared())}); if (source_info->need_file_column) - columns_description.add({"_file", std::make_shared()}); + columns_description.add({"_file", std::make_shared(std::make_shared())}); return columns_description; } @@ -211,7 +217,7 @@ public: /// Enrich with virtual columns. if (source_info->need_path_column) { - auto column = DataTypeString().createColumnConst(num_rows, current_path); + auto column = DataTypeLowCardinality{std::make_shared()}.createColumnConst(num_rows, current_path); columns.push_back(column->convertToFullColumnIfConst()); } @@ -220,7 +226,8 @@ public: size_t last_slash_pos = current_path.find_last_of('/'); auto file_name = current_path.substr(last_slash_pos + 1); - auto column = DataTypeString().createColumnConst(num_rows, std::move(file_name)); + auto column + = DataTypeLowCardinality{std::make_shared()}.createColumnConst(num_rows, std::move(file_name)); columns.push_back(column->convertToFullColumnIfConst()); } return Chunk(std::move(columns), num_rows); @@ -633,7 +640,9 @@ SinkToStoragePtr StorageHive::write(const ASTPtr & /*query*/, const StorageMetad NamesAndTypesList StorageHive::getVirtuals() const { - return NamesAndTypesList{{"_path", std::make_shared()}, {"_file", std::make_shared()}}; + return NamesAndTypesList{ + {"_path", std::make_shared(std::make_shared())}, + {"_file", std::make_shared(std::make_shared())}}; } void registerStorageHive(StorageFactory & factory) diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index 6101eb04af6..30acbcdf62b 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -16,13 +17,15 @@ #include #include #include +#include #include #include #include #include -#include #include #include +#include +#include #include #include #include @@ -36,8 +39,6 @@ #include #include #include -#include -#include namespace DB @@ -807,15 +808,14 @@ void registerStorageKafka(StorageFactory & factory) NamesAndTypesList StorageKafka::getVirtuals() const { auto result = NamesAndTypesList{ - {"_topic", std::make_shared()}, + {"_topic", std::make_shared(std::make_shared())}, {"_key", std::make_shared()}, {"_offset", std::make_shared()}, {"_partition", std::make_shared()}, {"_timestamp", std::make_shared(std::make_shared())}, {"_timestamp_ms", std::make_shared(std::make_shared(3))}, {"_headers.name", std::make_shared(std::make_shared())}, - {"_headers.value", std::make_shared(std::make_shared())} - }; + {"_headers.value", std::make_shared(std::make_shared())}}; if (kafka_settings->kafka_handle_error_mode == HandleKafkaErrorMode::STREAM) { result.push_back({"_raw_message", std::make_shared()}); diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 543ec9ee14a..da648aa4e5c 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -302,13 +302,13 @@ NamesAndTypesList StorageDistributed::getVirtuals() const /// NOTE This is weird. Most of these virtual columns are part of MergeTree /// tables info. But Distributed is general-purpose engine. return NamesAndTypesList{ - NameAndTypePair("_table", std::make_shared()), - NameAndTypePair("_part", std::make_shared()), - NameAndTypePair("_part_index", std::make_shared()), - NameAndTypePair("_part_uuid", std::make_shared()), - NameAndTypePair("_partition_id", std::make_shared()), - NameAndTypePair("_sample_factor", std::make_shared()), - NameAndTypePair("_shard_num", std::make_shared()), /// deprecated + NameAndTypePair("_table", std::make_shared()), + NameAndTypePair("_part", std::make_shared()), + NameAndTypePair("_part_index", std::make_shared()), + NameAndTypePair("_part_uuid", std::make_shared()), + NameAndTypePair("_partition_id", std::make_shared()), + NameAndTypePair("_sample_factor", std::make_shared()), + NameAndTypePair("_shard_num", std::make_shared()), /// deprecated }; } diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 2c373b01c91..77d4952291c 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -209,9 +209,15 @@ String StorageS3Source::KeysIterator::next() Block StorageS3Source::getHeader(Block sample_block, bool with_path_column, bool with_file_column) { if (with_path_column) - sample_block.insert({DataTypeString().createColumn(), std::make_shared(), "_path"}); + sample_block.insert( + {DataTypeLowCardinality{std::make_shared()}.createColumn(), + std::make_shared(std::make_shared()), + "_path"}); if (with_file_column) - sample_block.insert({DataTypeString().createColumn(), std::make_shared(), "_file"}); + sample_block.insert( + {DataTypeLowCardinality{std::make_shared()}.createColumn(), + std::make_shared(std::make_shared()), + "_file"}); return sample_block; } @@ -305,12 +311,15 @@ Chunk StorageS3Source::generate() UInt64 num_rows = chunk.getNumRows(); if (with_path_column) - chunk.addColumn(DataTypeString().createColumnConst(num_rows, file_path)->convertToFullColumnIfConst()); + chunk.addColumn(DataTypeLowCardinality{std::make_shared()} + .createColumnConst(num_rows, file_path) + ->convertToFullColumnIfConst()); if (with_file_column) { size_t last_slash_pos = file_path.find_last_of('/'); - chunk.addColumn(DataTypeString().createColumnConst(num_rows, file_path.substr( - last_slash_pos + 1))->convertToFullColumnIfConst()); + chunk.addColumn(DataTypeLowCardinality{std::make_shared()} + .createColumnConst(num_rows, file_path.substr(last_slash_pos + 1)) + ->convertToFullColumnIfConst()); } return chunk; @@ -961,9 +970,8 @@ void registerStorageCOS(StorageFactory & factory) NamesAndTypesList StorageS3::getVirtuals() const { return NamesAndTypesList{ - {"_path", std::make_shared()}, - {"_file", std::make_shared()} - }; + {"_path", std::make_shared(std::make_shared())}, + {"_file", std::make_shared(std::make_shared())}}; } bool StorageS3::supportsPartitionBy() const diff --git a/src/Storages/StorageS3Cluster.cpp b/src/Storages/StorageS3Cluster.cpp index 659071b392d..762eb079c1c 100644 --- a/src/Storages/StorageS3Cluster.cpp +++ b/src/Storages/StorageS3Cluster.cpp @@ -152,9 +152,8 @@ QueryProcessingStage::Enum StorageS3Cluster::getQueryProcessingStage( NamesAndTypesList StorageS3Cluster::getVirtuals() const { return NamesAndTypesList{ - {"_path", std::make_shared()}, - {"_file", std::make_shared()} - }; + {"_path", std::make_shared(std::make_shared())}, + {"_file", std::make_shared(std::make_shared())}}; } From 45fa68b4a890b1bda979cadfbd8968422fcecd9d Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Sun, 6 Feb 2022 13:31:59 +0000 Subject: [PATCH 099/215] Fixed tests --- .../00327_summing_composite_nested.reference | 2 +- .../0_stateless/00327_summing_composite_nested.sql | 4 ++-- .../0_stateless/00799_function_dry_run.reference | 12 ++++++------ tests/queries/0_stateless/00799_function_dry_run.sql | 4 ++-- .../queries/0_stateless/01031_new_any_join.reference | 6 ------ tests/queries/0_stateless/01031_new_any_join.sql | 2 +- .../01031_pmj_new_any_semi_join.reference | 7 ++----- .../0_stateless/01031_pmj_new_any_semi_join.sql | 2 +- .../01142_with_ties_and_aliases.reference | 6 +++--- .../0_stateless/01142_with_ties_and_aliases.sql | 2 +- .../0_stateless/01670_neighbor_lc_bug.reference | 4 ++-- tests/queries/0_stateless/01670_neighbor_lc_bug.sql | 2 +- 12 files changed, 22 insertions(+), 31 deletions(-) diff --git a/tests/queries/0_stateless/00327_summing_composite_nested.reference b/tests/queries/0_stateless/00327_summing_composite_nested.reference index 7984f688b67..38c96d85524 100644 --- a/tests/queries/0_stateless/00327_summing_composite_nested.reference +++ b/tests/queries/0_stateless/00327_summing_composite_nested.reference @@ -1,5 +1,5 @@ -2000-01-01 1 [2,1] [4,3] [20,22] [2,2,1] ['5','5','0'] [-3,-3,-33] [10,100,1000] 2000-01-01 1 [1,2] [3,4] [10,11] [0,1,2] ['3','4','5'] [-1,-2,-3] [1,10,100] +2000-01-01 1 [2,1] [4,3] [20,22] [2,2,1] ['5','5','0'] [-3,-3,-33] [10,100,1000] 2000-01-01 2 [1,2] [3,4] [10,11] [0,1,2] ['3','4','5'] [-1,-2,-3] [1,10,100] 2000-01-01 2 [1,2] [3,4] [10,11] [0,1,2] ['3','4','5'] [-1,-2,-3] [1,10,100] 2000-01-01 2 [2,1,1] [4,3,3] [20,22,33] [2,2] ['5','5'] [-3,-3] [10,100] diff --git a/tests/queries/0_stateless/00327_summing_composite_nested.sql b/tests/queries/0_stateless/00327_summing_composite_nested.sql index e7b1b39ffaa..f9b251ebd8f 100644 --- a/tests/queries/0_stateless/00327_summing_composite_nested.sql +++ b/tests/queries/0_stateless/00327_summing_composite_nested.sql @@ -5,7 +5,7 @@ CREATE TABLE summing_composite_key (d Date, k UInt64, FirstMap Nested(k1 UInt32, INSERT INTO summing_composite_key VALUES ('2000-01-01', 1, [1,2], ['3','4'], [10,11], [0,1,2], [3,4,5], [-1,-2,-3], [1,10,100]), ('2000-01-01', 1, [2,1], ['4','3'], [20,22], [2,2,1], [5,5,0], [-3,-3,-33], [10,100,1000]), ('2000-01-01', 2, [1,2], ['3','4'], [10,11], [0,1,2], [3,4,5], [-1,-2,-3], [1,10,100]), ('2000-01-01', 2, [2,1,1], ['4','3','3'], [20,22,33], [2,2], [5,5], [-3,-3], [10,100]), ('2000-01-01', 2, [1,2], ['3','4'], [10,11], [0,1,2], [3,4,5], [-1,-2,-3], [1,10,100]); -SELECT * FROM summing_composite_key ORDER BY d, k, _part_index; +SELECT * FROM summing_composite_key ORDER BY d, k, FirstMap.k1, FirstMap.k2ID, FirstMap.s, SecondMap.k1ID, SecondMap.k2Key, SecondMap.k3Type, SecondMap.s; SELECT d, k, m.k1, m.k2ID, m.s FROM summing_composite_key ARRAY JOIN FirstMap AS m ORDER BY d, k, m.k1, m.k2ID, m.s, SecondMap.k1ID, SecondMap.k2Key, SecondMap.k3Type, SecondMap.s; SELECT d, k, m.k1, m.k2ID, sum(m.s) FROM summing_composite_key ARRAY JOIN FirstMap AS m GROUP BY d, k, m.k1, m.k2ID ORDER BY d, k, m.k1, m.k2ID; @@ -17,7 +17,7 @@ SELECT d, k, m.k1ID, m.k2Key, m.k3Type, m.s FROM summing_composite_key FINAL ARR OPTIMIZE TABLE summing_composite_key PARTITION 200001 FINAL; -SELECT * FROM summing_composite_key ORDER BY d, k, _part_index; +SELECT * FROM summing_composite_key ORDER BY d, k, FirstMap.k1, FirstMap.k2ID, FirstMap.s, SecondMap.k1ID, SecondMap.k2Key, SecondMap.k3Type, SecondMap.s;; SELECT d, k, m.k1, m.k2ID, m.s FROM summing_composite_key ARRAY JOIN FirstMap AS m ORDER BY d, k, m.k1, m.k2ID, m.s; SELECT d, k, m.k1, m.k2ID, sum(m.s) FROM summing_composite_key ARRAY JOIN FirstMap AS m GROUP BY d, k, m.k1, m.k2ID ORDER BY d, k, m.k1, m.k2ID; diff --git a/tests/queries/0_stateless/00799_function_dry_run.reference b/tests/queries/0_stateless/00799_function_dry_run.reference index 517ab65908f..8f855bb582d 100644 --- a/tests/queries/0_stateless/00799_function_dry_run.reference +++ b/tests/queries/0_stateless/00799_function_dry_run.reference @@ -1,9 +1,9 @@ 0.3 2018-11-19 13:00:00 \N 0.3 2018-11-19 13:05:00 \N 0.4 2018-11-19 13:10:00 1 -0.5 2018-11-19 13:15:00 1.5 -0.5 2018-11-19 13:30:00 2.2 -0.6 2018-11-19 13:15:00 1.3 -0.7 2018-11-19 13:20:00 1.8 -0.8 2018-11-19 13:25:00 2.4 -0.9 2018-11-19 13:25:00 2.1 +0.5 2018-11-19 13:15:00 1.2 +0.6 2018-11-19 13:20:00 1.5 +0.7 2018-11-19 13:25:00 1.8 +0.8 2018-11-19 13:30:00 2.1 +0.9 2018-11-19 13:45:00 2.4 +0.5 2018-11-19 13:50:00 2.2 diff --git a/tests/queries/0_stateless/00799_function_dry_run.sql b/tests/queries/0_stateless/00799_function_dry_run.sql index 946ac98044c..bf6fb3de395 100644 --- a/tests/queries/0_stateless/00799_function_dry_run.sql +++ b/tests/queries/0_stateless/00799_function_dry_run.sql @@ -4,7 +4,7 @@ DROP TABLE IF EXISTS bm; CREATE TABLE bm (amount float, business_dttm DateTime) engine Log; -INSERT INTO bm VALUES (0.3,'2018-11-19 13:00:00'), (0.3,'2018-11-19 13:05:00'), (0.4,'2018-11-19 13:10:00'), (0.5,'2018-11-19 13:15:00'), (0.6,'2018-11-19 13:15:00'), (0.7,'2018-11-19 13:20:00'), (0.8,'2018-11-19 13:25:00'), (0.9,'2018-11-19 13:25:00'), (0.5,'2018-11-19 13:30:00'); +INSERT INTO bm VALUES (0.3,'2018-11-19 13:00:00'), (0.3,'2018-11-19 13:05:00'), (0.4,'2018-11-19 13:10:00'), (0.5,'2018-11-19 13:15:00'), (0.6,'2018-11-19 13:20:00'), (0.7,'2018-11-19 13:25:00'), (0.8,'2018-11-19 13:30:00'), (0.9,'2018-11-19 13:45:00'), (0.5,'2018-11-19 13:50:00'); WITH ( @@ -30,6 +30,6 @@ FROM business_dttm FROM bm ORDER BY business_dttm -) ORDER BY amount, business_dttm; +) ORDER BY business_dttm; DROP TABLE bm; diff --git a/tests/queries/0_stateless/01031_new_any_join.reference b/tests/queries/0_stateless/01031_new_any_join.reference index 1fd9a5352e3..7b08703e422 100644 --- a/tests/queries/0_stateless/01031_new_any_join.reference +++ b/tests/queries/0_stateless/01031_new_any_join.reference @@ -7,10 +7,7 @@ any left any left (rev) 0 5 b6 2 a3 2 b1 -2 a3 2 b2 4 a5 4 b3 -4 a5 4 b4 -4 a5 4 b5 any inner 2 a3 2 b1 4 a5 4 b3 @@ -20,10 +17,7 @@ any inner (rev) any right 0 5 b6 2 a3 2 b1 -2 a3 2 b2 4 a5 4 b3 -4 a5 4 b4 -4 a5 4 b5 any right (rev) 0 a1 0 1 a2 0 diff --git a/tests/queries/0_stateless/01031_new_any_join.sql b/tests/queries/0_stateless/01031_new_any_join.sql index de86d8eebc5..2f2a8b2ad1d 100644 --- a/tests/queries/0_stateless/01031_new_any_join.sql +++ b/tests/queries/0_stateless/01031_new_any_join.sql @@ -5,7 +5,7 @@ CREATE TABLE t1 (x UInt32, s String) engine = Memory; CREATE TABLE t2 (x UInt32, s String) engine = Memory; INSERT INTO t1 (x, s) VALUES (0, 'a1'), (1, 'a2'), (2, 'a3'), (3, 'a4'), (4, 'a5'); -INSERT INTO t2 (x, s) VALUES (2, 'b1'), (2, 'b2'), (4, 'b3'), (4, 'b4'), (4, 'b5'), (5, 'b6'); +INSERT INTO t2 (x, s) VALUES (2, 'b1'), (4, 'b3'), (5, 'b6'); SET join_use_nulls = 0; SET any_join_distinct_right_table_keys = 0; diff --git a/tests/queries/0_stateless/01031_pmj_new_any_semi_join.reference b/tests/queries/0_stateless/01031_pmj_new_any_semi_join.reference index bec0f22c466..b48b20942c6 100644 --- a/tests/queries/0_stateless/01031_pmj_new_any_semi_join.reference +++ b/tests/queries/0_stateless/01031_pmj_new_any_semi_join.reference @@ -3,11 +3,10 @@ any left 1 a2 0 2 a3 2 b1 3 a4 0 -4 a5 4 b3 +4 a5 4 b2 any left (rev) 0 5 b4 2 a3 2 b1 -4 a5 4 b3 4 a5 4 b2 any inner 2 a3 2 b1 @@ -19,7 +18,6 @@ any right 0 5 b4 2 a3 2 b1 4 a5 4 b2 -4 a5 4 b3 any right (rev) 0 a1 0 1 a2 0 @@ -28,11 +26,10 @@ any right (rev) 4 a5 4 b2 semi left 2 a3 2 b1 -4 a5 4 b3 +4 a5 4 b2 semi right 2 a3 2 b1 4 a5 4 b2 -4 a5 4 b3 anti left 0 a1 0 1 a2 1 diff --git a/tests/queries/0_stateless/01031_pmj_new_any_semi_join.sql b/tests/queries/0_stateless/01031_pmj_new_any_semi_join.sql index 87cf3844f97..0a593b3cfa9 100644 --- a/tests/queries/0_stateless/01031_pmj_new_any_semi_join.sql +++ b/tests/queries/0_stateless/01031_pmj_new_any_semi_join.sql @@ -5,7 +5,7 @@ CREATE TABLE t1 (x UInt32, s String) engine = Memory; CREATE TABLE t2 (x UInt32, s String) engine = Memory; INSERT INTO t1 (x, s) VALUES (0, 'a1'), (1, 'a2'), (2, 'a3'), (3, 'a4'), (4, 'a5'); -INSERT INTO t2 (x, s) VALUES (2, 'b1'), (4, 'b2'), (4, 'b3'), (5, 'b4'); +INSERT INTO t2 (x, s) VALUES (2, 'b1'), (4, 'b2'), (5, 'b4'); SET join_algorithm = 'prefer_partial_merge'; SET join_use_nulls = 0; diff --git a/tests/queries/0_stateless/01142_with_ties_and_aliases.reference b/tests/queries/0_stateless/01142_with_ties_and_aliases.reference index 5d8b70ab48f..1846e07a908 100644 --- a/tests/queries/0_stateless/01142_with_ties_and_aliases.reference +++ b/tests/queries/0_stateless/01142_with_ties_and_aliases.reference @@ -8,11 +8,11 @@ 1 1 1 -4 -3 +0 1 2 -0 +3 +4 0 0 0 1 0 2 diff --git a/tests/queries/0_stateless/01142_with_ties_and_aliases.sql b/tests/queries/0_stateless/01142_with_ties_and_aliases.sql index d3ca4e06ae2..de4a9281a08 100644 --- a/tests/queries/0_stateless/01142_with_ties_and_aliases.sql +++ b/tests/queries/0_stateless/01142_with_ties_and_aliases.sql @@ -5,7 +5,7 @@ create table wt (a Int, b Int) engine = Memory; insert into wt select 0, number from numbers(5); select 1 from wt order by a limit 3 with ties; -select b from wt order by a limit 3 with ties; +select b from (select b from wt order by a limit 3 with ties) order by b; select * from (select * from (with a * 2 as c select a, b from wt order by c limit 3 with ties) order by a, b); select * from (select * from (select a * 2 as c, b from wt order by c limit 3 with ties) order by c, b); diff --git a/tests/queries/0_stateless/01670_neighbor_lc_bug.reference b/tests/queries/0_stateless/01670_neighbor_lc_bug.reference index b28e8d90caa..fd1bc1a154c 100644 --- a/tests/queries/0_stateless/01670_neighbor_lc_bug.reference +++ b/tests/queries/0_stateless/01670_neighbor_lc_bug.reference @@ -9,7 +9,7 @@ 1 1 1 2 2 2 ┌─rowNr─┬─val_string─┬─str_m1───┬─str_p1───┬─val_low──┬─low_m1───┬─low_p1───┐ -│ 1 │ String 1 │ String 1 │ String 2 │ String 1 │ String 1 │ String 2 │ -│ 2 │ String 1 │ │ String 1 │ String 1 │ │ String 1 │ +│ 1 │ String 1 │ │ String 1 │ String 1 │ │ String 1 │ +│ 2 │ String 1 │ String 1 │ String 2 │ String 1 │ String 1 │ String 2 │ │ 3 │ String 2 │ String 1 │ │ String 2 │ String 1 │ │ └───────┴────────────┴──────────┴──────────┴──────────┴──────────┴──────────┘ diff --git a/tests/queries/0_stateless/01670_neighbor_lc_bug.sql b/tests/queries/0_stateless/01670_neighbor_lc_bug.sql index ad24dc0fe33..9de544b111d 100644 --- a/tests/queries/0_stateless/01670_neighbor_lc_bug.sql +++ b/tests/queries/0_stateless/01670_neighbor_lc_bug.sql @@ -39,7 +39,7 @@ FROM FROM neighbor_test ORDER BY val_string ASC ) -ORDER By rowNr +ORDER BY rowNr, val_string, str_m1, str_p1, val_low, low_m1, low_p1 format PrettyCompact; drop table if exists neighbor_test; From e41a895ebbc9d5bdf9e83d16f0d347160199c044 Mon Sep 17 00:00:00 2001 From: kssenii Date: Sun, 6 Feb 2022 16:11:18 +0100 Subject: [PATCH 100/215] Fix --- src/Client/LocalConnection.cpp | 6 ++++-- src/Client/LocalConnection.h | 2 ++ ...06_clickhouse_local_use_database.reference | 12 ++++++++++++ .../02206_clickhouse_local_use_database.sh | 19 +++++++++++++++++++ 4 files changed, 37 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/02206_clickhouse_local_use_database.reference create mode 100755 tests/queries/0_stateless/02206_clickhouse_local_use_database.sh diff --git a/src/Client/LocalConnection.cpp b/src/Client/LocalConnection.cpp index 528c38f9b76..8ee4b9e1c1f 100644 --- a/src/Client/LocalConnection.cpp +++ b/src/Client/LocalConnection.cpp @@ -74,6 +74,8 @@ void LocalConnection::sendQuery( query_context->setProgressCallback([this] (const Progress & value) { return this->updateProgress(value); }); query_context->setFileProgressCallback([this](const FileProgress & value) { this->updateProgress(Progress(value)); }); } + if (!current_database.empty()) + query_context->setCurrentDatabase(current_database); CurrentThread::QueryScope query_scope_holder(query_context); @@ -427,9 +429,9 @@ void LocalConnection::getServerVersion( throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not implemented"); } -void LocalConnection::setDefaultDatabase(const String &) +void LocalConnection::setDefaultDatabase(const String & database) { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not implemented"); + current_database = database; } UInt64 LocalConnection::getServerRevision(const ConnectionTimeouts &) diff --git a/src/Client/LocalConnection.h b/src/Client/LocalConnection.h index 92c2af30c80..b85022cf183 100644 --- a/src/Client/LocalConnection.h +++ b/src/Client/LocalConnection.h @@ -142,5 +142,7 @@ private: /// Last "server" packet. std::optional next_packet_type; + + String current_database; }; } diff --git a/tests/queries/0_stateless/02206_clickhouse_local_use_database.reference b/tests/queries/0_stateless/02206_clickhouse_local_use_database.reference new file mode 100644 index 00000000000..679b6811f8e --- /dev/null +++ b/tests/queries/0_stateless/02206_clickhouse_local_use_database.reference @@ -0,0 +1,12 @@ +SHOW TABLES; +CREATE DATABASE test1; +CREATE TABLE test1.table1 (a Int32) ENGINE=Memory; +USE test1; +SHOW TABLES; +table1 +CREATE DATABASE test2; +USE test2; +SHOW TABLES; +USE test1; +SHOW TABLES; +table1 diff --git a/tests/queries/0_stateless/02206_clickhouse_local_use_database.sh b/tests/queries/0_stateless/02206_clickhouse_local_use_database.sh new file mode 100755 index 00000000000..59ede739e4a --- /dev/null +++ b/tests/queries/0_stateless/02206_clickhouse_local_use_database.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +$CLICKHOUSE_LOCAL --echo --multiline --multiquery -q """ +SHOW TABLES; +CREATE DATABASE test1; +CREATE TABLE test1.table1 (a Int32) ENGINE=Memory; +USE test1; +SHOW TABLES; +CREATE DATABASE test2; +USE test2; +SHOW TABLES; +USE test1; +SHOW TABLES; +""" From a6f0b01e6a4280c7780c00d7b87b8cd69881d5dc Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Mon, 7 Feb 2022 00:42:11 +0800 Subject: [PATCH 101/215] Fix order by after aggregation --- src/Interpreters/InterpreterSelectQuery.cpp | 4 +++- ...710_projection_aggregation_in_order.reference | 16 ++++++++-------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index f17c64ea71f..dc00edad612 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -1180,8 +1180,10 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

input_order_info.reset(); } // Now we must execute: diff --git a/tests/queries/0_stateless/01710_projection_aggregation_in_order.reference b/tests/queries/0_stateless/01710_projection_aggregation_in_order.reference index a57b2e2cb0d..12c613c184d 100644 --- a/tests/queries/0_stateless/01710_projection_aggregation_in_order.reference +++ b/tests/queries/0_stateless/01710_projection_aggregation_in_order.reference @@ -1,20 +1,20 @@ -291519000 276078600 +291519000 304558200 -330478200 317518200 330478200 276078600 -343438200 291519000 -317518200 -291519000 -276078600 304558200 -330478200 317518200 330478200 276078600 -343438200 291519000 +304558200 317518200 +330478200 +276078600 +291519000 +304558200 +317518200 +330478200 From ea068be3acee1263aca8d7718857783e0c2f37c8 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sun, 6 Feb 2022 20:07:35 +0300 Subject: [PATCH 102/215] Update TableFunctionS3.cpp --- src/TableFunctions/TableFunctionS3.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/TableFunctions/TableFunctionS3.cpp b/src/TableFunctions/TableFunctionS3.cpp index 4490c122f99..e1e31b5efc3 100644 --- a/src/TableFunctions/TableFunctionS3.cpp +++ b/src/TableFunctions/TableFunctionS3.cpp @@ -64,7 +64,7 @@ void TableFunctionS3::parseArguments(const ASTPtr & ast_function, ContextPtr con } else { - if (args.size() < 1 || args.size() > 6) + if (args.empty() || args.size() > 6) throw Exception(message, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); for (auto & arg : args) From 05886005b8910b453345ef463847e5cc4b2ab764 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 6 Feb 2022 23:14:08 +0300 Subject: [PATCH 103/215] Update references --- tests/queries/0_stateless/01069_database_memory.reference | 2 +- .../0_stateless/02021_create_database_with_comment.reference | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01069_database_memory.reference b/tests/queries/0_stateless/01069_database_memory.reference index e7486d57276..15e2bec3355 100644 --- a/tests/queries/0_stateless/01069_database_memory.reference +++ b/tests/queries/0_stateless/01069_database_memory.reference @@ -1,4 +1,4 @@ -CREATE DATABASE memory_01069\nENGINE = Memory() +CREATE DATABASE memory_01069\nENGINE = Memory 1 2 3 diff --git a/tests/queries/0_stateless/02021_create_database_with_comment.reference b/tests/queries/0_stateless/02021_create_database_with_comment.reference index 65fc9b6d290..693ea584b84 100644 --- a/tests/queries/0_stateless/02021_create_database_with_comment.reference +++ b/tests/queries/0_stateless/02021_create_database_with_comment.reference @@ -1,5 +1,5 @@ engine : Memory -CREATE DATABASE default\nENGINE = Memory()\nCOMMENT \'Test DB with comment\' +CREATE DATABASE default\nENGINE = Memory\nCOMMENT \'Test DB with comment\' comment= Test DB with comment engine : Atomic From 10b8684003035f811fece12c70f2d85811f21267 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Sat, 5 Feb 2022 01:12:09 +0300 Subject: [PATCH 104/215] fix rare bug in reading of empty arrays --- src/Compression/CompressedReadBufferFromFile.cpp | 1 - src/IO/ReadBufferFromFileDescriptor.cpp | 9 +++++++-- src/IO/ReadBufferFromFileDescriptor.h | 3 +++ 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/Compression/CompressedReadBufferFromFile.cpp b/src/Compression/CompressedReadBufferFromFile.cpp index 1a70b27e9f4..b8ce485abc5 100644 --- a/src/Compression/CompressedReadBufferFromFile.cpp +++ b/src/Compression/CompressedReadBufferFromFile.cpp @@ -113,7 +113,6 @@ size_t CompressedReadBufferFromFile::readBig(char * to, size_t n) /// need to skip some bytes in decompressed data (seek happened before readBig call). if (nextimpl_working_buffer_offset == 0 && size_decompressed + additional_size_at_the_end_of_buffer <= n - bytes_read) { - decompressTo(to + bytes_read, size_decompressed, size_compressed_without_checksum); bytes_read += size_decompressed; bytes += size_decompressed; diff --git a/src/IO/ReadBufferFromFileDescriptor.cpp b/src/IO/ReadBufferFromFileDescriptor.cpp index ed6b1a60181..0e538dc5224 100644 --- a/src/IO/ReadBufferFromFileDescriptor.cpp +++ b/src/IO/ReadBufferFromFileDescriptor.cpp @@ -111,6 +111,7 @@ bool ReadBufferFromFileDescriptor::nextImpl() ProfileEvents::increment(ProfileEvents::ReadBufferFromFileDescriptorReadBytes, bytes_read); working_buffer = internal_buffer; working_buffer.resize(bytes_read); + buffer_is_dirty = false; } else return false; @@ -152,10 +153,10 @@ off_t ReadBufferFromFileDescriptor::seek(off_t offset, int whence) } /// Position is unchanged. - if (new_pos + (working_buffer.end() - pos) == file_offset_of_buffer_end) + if (!buffer_is_dirty && (new_pos + (working_buffer.end() - pos) == file_offset_of_buffer_end)) return new_pos; - if (file_offset_of_buffer_end - working_buffer.size() <= static_cast(new_pos) + if (!buffer_is_dirty && file_offset_of_buffer_end - working_buffer.size() <= static_cast(new_pos) && new_pos <= file_offset_of_buffer_end) { /// Position is still inside the buffer. @@ -179,6 +180,10 @@ off_t ReadBufferFromFileDescriptor::seek(off_t offset, int whence) /// First put position at the end of the buffer so the next read will fetch new data to the buffer. pos = working_buffer.end(); + /// Mark buffer as dirty to disallow further seek optimizations, because fetching data to the buffer + /// is delayed to the next call of 'nextImpl', but it may be not called before next seek. + buffer_is_dirty = true; + /// In case of using 'pread' we just update the info about the next position in file. /// In case of using 'read' we call 'lseek'. diff --git a/src/IO/ReadBufferFromFileDescriptor.h b/src/IO/ReadBufferFromFileDescriptor.h index 188cdd709b5..48acd5d323e 100644 --- a/src/IO/ReadBufferFromFileDescriptor.h +++ b/src/IO/ReadBufferFromFileDescriptor.h @@ -62,6 +62,9 @@ public: private: /// Assuming file descriptor supports 'select', check that we have data to read or wait until timeout. bool poll(size_t timeout_microseconds); + + /// If it's true then we cannot assume on content of buffer to optimize seek calls. + bool buffer_is_dirty = true; }; From ae1fc94fb50d0b37cb4a42936c8f32422fe19032 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Sun, 6 Feb 2022 02:57:49 +0300 Subject: [PATCH 105/215] add unit test --- src/IO/ReadBufferFromFileDescriptor.cpp | 1 + src/IO/tests/gtest_seek_backwards.cpp | 36 +++++++++++++++++++++++++ 2 files changed, 37 insertions(+) create mode 100644 src/IO/tests/gtest_seek_backwards.cpp diff --git a/src/IO/ReadBufferFromFileDescriptor.cpp b/src/IO/ReadBufferFromFileDescriptor.cpp index 0e538dc5224..a5e75ba5f83 100644 --- a/src/IO/ReadBufferFromFileDescriptor.cpp +++ b/src/IO/ReadBufferFromFileDescriptor.cpp @@ -234,6 +234,7 @@ void ReadBufferFromFileDescriptor::rewind() working_buffer.resize(0); pos = working_buffer.begin(); file_offset_of_buffer_end = 0; + buffer_is_dirty = true; } diff --git a/src/IO/tests/gtest_seek_backwards.cpp b/src/IO/tests/gtest_seek_backwards.cpp new file mode 100644 index 00000000000..93aab68936f --- /dev/null +++ b/src/IO/tests/gtest_seek_backwards.cpp @@ -0,0 +1,36 @@ +#include +#include +#include +#include +#include +#include + +using namespace DB; + +TEST(ReadBufferFromFile, seekBackwards) +{ + static constexpr size_t N = 256; + static constexpr size_t BUF_SIZE = 64; + + auto tmp_file = createTemporaryFile("/tmp/"); + + { + WriteBufferFromFile out(tmp_file->path()); + for (size_t i = 0; i < N; ++i) + writeIntBinary(i, out); + } + + ReadBufferFromFile in(tmp_file->path(), BUF_SIZE); + size_t x; + + /// Read something to initialize the buffer. + in.seek(BUF_SIZE * 10, SEEK_SET); + readIntBinary(x, in); + + /// Check 2 consecutive seek calls without reading. + in.seek(BUF_SIZE * 2, SEEK_SET); + in.seek(BUF_SIZE, SEEK_SET); + + readIntBinary(x, in); + ASSERT_EQ(x, 8); +} From 484f9d9f25d998951e742af1af23f3b76741692c Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Sun, 6 Feb 2022 22:51:01 +0000 Subject: [PATCH 106/215] Updated sort back to psqsort --- base/base/BitSetSort.h | 715 ----------------------------------------- base/base/sort.h | 6 +- 2 files changed, 3 insertions(+), 718 deletions(-) delete mode 100644 base/base/BitSetSort.h diff --git a/base/base/BitSetSort.h b/base/base/BitSetSort.h deleted file mode 100644 index 7fda69747de..00000000000 --- a/base/base/BitSetSort.h +++ /dev/null @@ -1,715 +0,0 @@ -/** https://github.com/minjaehwang/bitsetsort - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * Bitset Sort is a variant of quick sort, specifically BlockQuickSort. - * Bitset Sort uses a carefully written partition function to let the compiler generates - * SIMD instructions without actually writing SIMD intrinsics in the loop. - * Bitset Sort is 3.4x faster (or spends 71% less time) than libc++ std::sort when sorting uint64s and 1.58x faster (or spends 37% less time) - * when sorting std::string. - * Bitset Sort uses multiple techniques to improve runtime performance of sort. This includes sorting networks, - * a variant of merge sort called Bitonic Order Merge Sort that is faster for small N, and pattern recognitions. - */ - -#pragma clang diagnostic ignored "-Wreserved-identifier" -#pragma clang diagnostic ignored "-Wreserved-macro-identifier" -#pragma clang diagnostic ignored "-Wunused-local-typedef" - -#ifndef _LIBCPP___BITSETSORT -#define _LIBCPP___BITSETSORT - -#include -#include -#include - -namespace stdext { //_LIBCPP_BEGIN_NAMESPACE_STD - -/// Implementation from LLVM Path https://reviews.llvm.org/D93233 - -namespace __sorting_network { - -template -class __conditional_swap { -public: - typedef typename _VSTD::__comp_ref_type<_Compare>::type _Comp_ref; - - _LIBCPP_CONSTEXPR_AFTER_CXX11 _Comp_ref get() const { return comp_; } - _LIBCPP_CONSTEXPR_AFTER_CXX11 __conditional_swap(const _Comp_ref __comp) : comp_(__comp) {} - _LIBCPP_CONSTEXPR_AFTER_CXX11 inline void operator()(_RandomAccessIterator __x, _RandomAccessIterator __y) { - typedef typename _VSTD::iterator_traits<_RandomAccessIterator>::value_type value_type; - bool __result = comp_(*__y, *__x); - // Expect a compiler would short-circuit the following if-block. - // 4 * sizeof(size_t) is a magic number. Expect a compiler to use SIMD - // instruction on them. - if (_VSTD::is_trivially_copy_constructible::value && - _VSTD::is_trivially_copy_assignable::value && sizeof(value_type) <= 4 * sizeof(size_t)) { - value_type __min = __result ? _VSTD::move(*__y) : _VSTD::move(*__x); - *__y = __result ? _VSTD::move(*__x) : _VSTD::move(*__y); - *__x = _VSTD::move(__min); - } else { - if (__result) { - _VSTD::iter_swap(__x, __y); - } - } - } - -private: - _Comp_ref comp_; -}; - -template -class __reverse_conditional_swap { - typedef typename _VSTD::__comp_ref_type<_Compare>::type _Comp_ref; - _Comp_ref comp_; - -public: - _LIBCPP_CONSTEXPR_AFTER_CXX11 _Comp_ref get() const { return comp_; } - _LIBCPP_CONSTEXPR_AFTER_CXX11 - __reverse_conditional_swap(const _Comp_ref __comp) : comp_(__comp) {} - inline void operator()(_RandomAccessIterator __x, _RandomAccessIterator __y) { - typedef typename _VSTD::iterator_traits<_RandomAccessIterator>::value_type value_type; - bool __result = !comp_(*__x, *__y); - // Expect a compiler would short-circuit the following if-block. - if (_VSTD::is_trivially_copy_constructible::value && - _VSTD::is_trivially_copy_assignable::value && sizeof(value_type) <= 4 * sizeof(size_t)) { - value_type __min = __result ? _VSTD::move(*__x) : _VSTD::move(*__y); - *__y = __result ? _VSTD::move(*__y) : _VSTD::move(*__x); - *__x = _VSTD::move(__min); - } else { - if (!__result) { - _VSTD::iter_swap(__x, __y); - } - } - } -}; - -template -_LIBCPP_HIDE_FROM_ABI void __sort2(_RandomAccessIterator __a, _ConditionalSwap __cond_swap) { - __cond_swap(__a + 0, __a + 1); -} - -template -_LIBCPP_HIDE_FROM_ABI void __sort3(_RandomAccessIterator __a, _ConditionalSwap __cond_swap) { - __cond_swap(__a + 1, __a + 2); - __cond_swap(__a + 0, __a + 1); - __cond_swap(__a + 1, __a + 2); -} - -template -_LIBCPP_HIDE_FROM_ABI void __sort4(_RandomAccessIterator __a, _ConditionalSwap __cond_swap) { - __cond_swap(__a + 0, __a + 1); - __cond_swap(__a + 2, __a + 3); - __cond_swap(__a + 0, __a + 2); - __cond_swap(__a + 1, __a + 3); - __cond_swap(__a + 1, __a + 2); -} - -template -_LIBCPP_HIDE_FROM_ABI void __sort5(_RandomAccessIterator __a, _ConditionalSwap __cond_swap) { - __cond_swap(__a + 0, __a + 1); - __cond_swap(__a + 3, __a + 4); - __cond_swap(__a + 2, __a + 3); - __cond_swap(__a + 3, __a + 4); - __cond_swap(__a + 0, __a + 3); - __cond_swap(__a + 1, __a + 4); - __cond_swap(__a + 0, __a + 2); - __cond_swap(__a + 1, __a + 3); - __cond_swap(__a + 1, __a + 2); -} - -template -_LIBCPP_HIDE_FROM_ABI void __sort6(_RandomAccessIterator __a, _ConditionalSwap __cond_swap) { - __cond_swap(__a + 1, __a + 2); - __cond_swap(__a + 4, __a + 5); - __cond_swap(__a + 0, __a + 1); - __cond_swap(__a + 3, __a + 4); - __cond_swap(__a + 1, __a + 2); - __cond_swap(__a + 4, __a + 5); - __cond_swap(__a + 0, __a + 3); - __cond_swap(__a + 1, __a + 4); - __cond_swap(__a + 2, __a + 5); - __cond_swap(__a + 2, __a + 4); - __cond_swap(__a + 1, __a + 3); - __cond_swap(__a + 2, __a + 3); -} -template -_LIBCPP_HIDE_FROM_ABI void __sort7(_RandomAccessIterator __a, _ConditionalSwap __cond_swap) { - __cond_swap(__a + 1, __a + 2); - __cond_swap(__a + 3, __a + 4); - __cond_swap(__a + 5, __a + 6); - __cond_swap(__a + 0, __a + 1); - __cond_swap(__a + 3, __a + 5); - __cond_swap(__a + 4, __a + 6); - __cond_swap(__a + 1, __a + 2); - __cond_swap(__a + 4, __a + 5); - __cond_swap(__a + 0, __a + 4); - __cond_swap(__a + 1, __a + 5); - __cond_swap(__a + 2, __a + 6); - __cond_swap(__a + 0, __a + 3); - __cond_swap(__a + 2, __a + 5); - __cond_swap(__a + 1, __a + 3); - __cond_swap(__a + 2, __a + 4); - __cond_swap(__a + 2, __a + 3); -} - -template -_LIBCPP_HIDE_FROM_ABI void __sort8(_RandomAccessIterator __a, _ConditionalSwap __cond_swap) { - __cond_swap(__a + 0, __a + 1); - __cond_swap(__a + 2, __a + 3); - __cond_swap(__a + 4, __a + 5); - __cond_swap(__a + 6, __a + 7); - __cond_swap(__a + 0, __a + 2); - __cond_swap(__a + 1, __a + 3); - __cond_swap(__a + 4, __a + 6); - __cond_swap(__a + 5, __a + 7); - __cond_swap(__a + 1, __a + 2); - __cond_swap(__a + 5, __a + 6); - __cond_swap(__a + 0, __a + 4); - __cond_swap(__a + 1, __a + 5); - __cond_swap(__a + 2, __a + 6); - __cond_swap(__a + 3, __a + 7); - __cond_swap(__a + 1, __a + 4); - __cond_swap(__a + 3, __a + 6); - __cond_swap(__a + 2, __a + 4); - __cond_swap(__a + 3, __a + 5); - __cond_swap(__a + 3, __a + 4); -} - -template -_LIBCPP_HIDE_FROM_ABI void __sort1to8(_RandomAccessIterator __a, - typename _VSTD::iterator_traits<_RandomAccessIterator>::difference_type __len, - _ConditionalSwap __cond_swap) { - switch (__len) { - case 0: - case 1: - return; - case 2: - __sort2(__a, __cond_swap); - return; - case 3: - __sort3(__a, __cond_swap); - return; - case 4: - __sort4(__a, __cond_swap); - return; - case 5: - __sort5(__a, __cond_swap); - return; - case 6: - __sort6(__a, __cond_swap); - return; - case 7: - __sort7(__a, __cond_swap); - return; - case 8: - __sort8(__a, __cond_swap); - return; - } - // ignore -} -template -_LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_HIDE_FROM_ABI void __sort3(_RandomAccessIterator __a0, _RandomAccessIterator __a1, - _RandomAccessIterator __a2, - _ConditionalSwap __cond_swap) { - __cond_swap(__a1, __a2); - __cond_swap(__a0, __a2); - __cond_swap(__a0, __a1); -} - -// stable, 2-3 compares, 0-2 swaps - -template -_LIBCPP_CONSTEXPR_AFTER_CXX11 _LIBCPP_HIDE_FROM_ABI unsigned -__sort3_with_number_of_swaps(_ForwardIterator __x, _ForwardIterator __y, _ForwardIterator __z, _Compare __c) { - unsigned __r = 0; - if (!__c(*__y, *__x)) // if x <= y - { - if (!__c(*__z, *__y)) // if y <= z - return __r; // x <= y && y <= z - // x <= y && y > z - swap(*__y, *__z); // x <= z && y < z - __r = 1; - if (__c(*__y, *__x)) // if x > y - { - swap(*__x, *__y); // x < y && y <= z - __r = 2; - } - return __r; // x <= y && y < z - } - if (__c(*__z, *__y)) // x > y, if y > z - { - swap(*__x, *__z); // x < y && y < z - __r = 1; - return __r; - } - swap(*__x, *__y); // x > y && y <= z - __r = 1; // x < y && x <= z - if (__c(*__z, *__y)) // if y > z - { - swap(*__y, *__z); // x <= y && y < z - __r = 2; - } - return __r; -} - -} // namespace __sorting_network - -namespace __bitonic { -class __detail { -public: - enum { - __batch = 8, - __bitonic_batch = __batch * 2, - __small_sort_max = __bitonic_batch * 2, - }; -}; - -template -_LIBCPP_HIDE_FROM_ABI void __enforce_order(_RandomAccessIterator __first, _RandomAccessIterator __last, - _ConditionalSwap __cond_swap, _ReverseConditionalSwap __reverse_cond_swap) { - _RandomAccessIterator __i = __first; - while (__detail::__bitonic_batch <= __last - __i) { - __sorting_network::__sort8(__i, __cond_swap); - __sorting_network::__sort8(__i + __detail::__batch, __reverse_cond_swap); - __i += __detail::__bitonic_batch; - } - if (__detail::__batch <= __last - __i) { - __sorting_network::__sort8(__i, __cond_swap); - __i += __detail::__batch; - __sorting_network::__sort1to8(__i, __last - __i, __reverse_cond_swap); - } else { - __sorting_network::__sort1to8(__i, __last - __i, __cond_swap); - } -} - -class __construct { -public: - template - static inline void __op(_Type1* __result, _Type2&& __val) { - new (static_cast(__result)) _Type1(_VSTD::move(__val)); - } -}; - -class __move_assign { -public: - template - static inline void __op(_Type1 __result, _Type2&& __val) { - *__result = _VSTD::move(__val); - } -}; - -template -_LIBCPP_HIDE_FROM_ABI void __forward_merge(_InputIterator __first, _InputIterator __last, _OutputIterator __result, - _Compare __comp) { - --__last; - // The len used here is one less than the actual length. This is so that the - // comparison is carried out against 0. The final move is done - // unconditionally at the end. - typename _VSTD::iterator_traits<_InputIterator>::difference_type __len = __last - __first; - for (; __len > 0; __len--) { - if (__comp(*__last, *__first)) { - _Copy::__op(__result, _VSTD::move(*__last)); - --__last; - } else { - _Copy::__op(__result, _VSTD::move(*__first)); - ++__first; - } - ++__result; - } - _Copy::__op(__result, _VSTD::move(*__first)); -} - -template -_LIBCPP_HIDE_FROM_ABI void __backward_merge(_InputIterator __first, _InputIterator __last, _OutputIterator __result, - _Compare __comp) { - --__last; - __result += __last - __first; - // The len used here is one less than the actual length. This is so that the - // comparison is carried out against 0. The final move is done - // unconditionally at the end. - typename _VSTD::iterator_traits<_InputIterator>::difference_type __len = __last - __first; - for (; __len > 0; __len--) { - if (__comp(*__first, *__last)) { - _Copy::__op(__result, _VSTD::move(*__first)); - ++__first; - } else { - _Copy::__op(__result, _VSTD::move(*__last)); - --__last; - } - --__result; - } - _Copy::__op(__result, _VSTD::move(*__first)); -} - -template -inline _LIBCPP_HIDE_FROM_ABI bool -__small_sort(_RandomAccessIterator __first, - typename _VSTD::iterator_traits<_RandomAccessIterator>::difference_type __len, - typename _VSTD::iterator_traits<_RandomAccessIterator>::value_type* __buff, _ConditionalSwap __cond_swap, - _ReverseConditionalSwap __reverse_cond_swap) { - typedef typename _VSTD::iterator_traits<_RandomAccessIterator>::value_type value_type; - typedef typename _ConditionalSwap::_Comp_ref _Comp_ref; - if (__len > __detail::__small_sort_max) { - return false; - } - _RandomAccessIterator __last = __first + __len; - __enforce_order(__first, __last, __cond_swap, __reverse_cond_swap); - if (__len <= __detail::__batch) { - // sorted. - return true; - } - const _Comp_ref __comp = __cond_swap.get(); - if (__len <= __detail::__bitonic_batch) { - // single bitonic order merge. - __forward_merge<__construct, _Comp_ref>(__first, __last, __buff, _Comp_ref(__comp)); - _VSTD::copy(_VSTD::make_move_iterator(__buff), _VSTD::make_move_iterator(__buff + __len), __first); - for (auto __iter = __buff; __iter < __buff + __len; __iter++) { - (*__iter).~value_type(); - } - return true; - } - // double bitonic order merge. - __forward_merge<__construct, _Comp_ref>(__first, __first + __detail::__bitonic_batch, __buff, _Comp_ref(__comp)); - __backward_merge<__construct, _Comp_ref>(__first + __detail::__bitonic_batch, __last, - __buff + __detail::__bitonic_batch, _Comp_ref(__comp)); - __forward_merge<__move_assign, _Comp_ref>(__buff, __buff + __len, __first, _Comp_ref(__comp)); - for (auto __iter = __buff; __iter < __buff + __len; __iter++) { - (*__iter).~value_type(); - } - return true; -} -} // namespace __bitonic - -namespace __bitsetsort { -struct __64bit_set { - typedef uint64_t __storage_t; - enum { __block_size = 64 }; - static __storage_t __blsr(__storage_t x) { - // _blsr_u64 can be used here but it did not make any performance - // difference in practice. - return x ^ (x & -x); - } - static int __clz(__storage_t x) { return __builtin_clzll(x); } - static int __ctz(__storage_t x) { return __builtin_ctzll(x); } -}; - -struct __32bit_set { - typedef uint32_t __storage_t; - enum { __block_size = 32 }; - static __storage_t __blsr(__storage_t x) { - // _blsr_u32 can be used here but it did not make any performance - // difference in practice. - return x ^ (x & -x); - } - static int __clz(__storage_t x) { return __builtin_clzl(x); } - static int __ctz(__storage_t x) { return __builtin_ctzl(x); } -}; - -template -struct __set_selector { - typedef __64bit_set __set; -}; - -template <> -struct __set_selector<4> { - typedef __32bit_set __set; -}; - -template -inline _LIBCPP_HIDE_FROM_ABI void __swap_bitmap_pos(_RandomAccessIterator __first, _RandomAccessIterator __last, - typename _Bitset::__storage_t& __left_bitset, - typename _Bitset::__storage_t& __right_bitset) { - while (__left_bitset != 0 & __right_bitset != 0) { - int tz_left = _Bitset::__ctz(__left_bitset); - __left_bitset = _Bitset::__blsr(__left_bitset); - int tz_right = _Bitset::__ctz(__right_bitset); - __right_bitset = _Bitset::__blsr(__right_bitset); - _VSTD::iter_swap(__first + tz_left, __last - tz_right); - } -} - -template -_LIBCPP_HIDE_FROM_ABI _VSTD::pair<_RandomAccessIterator, bool> -__bitset_partition(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { - typedef typename _VSTD::iterator_traits<_RandomAccessIterator>::value_type value_type; - typedef typename _VSTD::iterator_traits<_RandomAccessIterator>::difference_type difference_type; - typedef typename _Bitset::__storage_t __storage_t; - _RandomAccessIterator __begin = __first; - value_type __pivot(_VSTD::move(*__first)); - - // Check if pivot is less than the last element. Checking this first avoids - // comparing the first and the last iterators on each iteration as done in the - // else part. - if (__comp(__pivot, *(__last - 1))) { - // Guarded. - while (!__comp(__pivot, *++__first)) { - } - } else { - while (++__first < __last && !__comp(__pivot, *__first)) { - } - } - - if (__first < __last) { - // It will be always guarded because __bitset_sort will do the - // median-of-three before calling this. - while (__comp(__pivot, *--__last)) { - } - } - bool __already_partitioned = __first >= __last; - if (!__already_partitioned) { - _VSTD::iter_swap(__first, __last); - ++__first; - } - - // In [__first, __last) __last is not inclusive. From now one, it uses last - // minus one to be inclusive on both sides. - _RandomAccessIterator __lm1 = __last - 1; - __storage_t __left_bitset = 0; - __storage_t __right_bitset = 0; - - // Reminder: length = __lm1 - __first + 1. - while (__lm1 - __first >= 2 * _Bitset::__block_size - 1) { - if (__left_bitset == 0) { - // Possible vectorization. With a proper "-march" flag, the following loop - // will be compiled into a set of SIMD instructions. - _RandomAccessIterator __iter = __first; - for (int __j = 0; __j < _Bitset::__block_size;) { - bool __comp_result = __comp(__pivot, *__iter); - __left_bitset |= (static_cast<__storage_t>(__comp_result) << __j); - __j++; - ++__iter; - } - } - if (__right_bitset == 0) { - // Possible vectorization. With a proper "-march" flag, the following loop - // will be compiled into a set of SIMD instructions. - _RandomAccessIterator __iter = __lm1; - for (int __j = 0; __j < _Bitset::__block_size;) { - bool __comp_result = __comp(*__iter, __pivot); - __right_bitset |= (static_cast<__storage_t>(__comp_result) << __j); - __j++; - --__iter; - } - } - __swap_bitmap_pos<_Bitset>(__first, __lm1, __left_bitset, __right_bitset); - __first += (__left_bitset == 0) ? _Bitset::__block_size : 0; - __lm1 -= (__right_bitset == 0) ? _Bitset::__block_size : 0; - } - // Now, we have a less-than a block on each side. - difference_type __remaining_len = __lm1 - __first + 1; - difference_type __l_size; - difference_type __r_size; - if (__left_bitset == 0 && __right_bitset == 0) { - __l_size = __remaining_len / 2; - __r_size = __remaining_len - __l_size; - } else if (__left_bitset == 0) { - // We know at least one side is a full block. - __l_size = __remaining_len - _Bitset::__block_size; - __r_size = _Bitset::__block_size; - } else { // if (__right_bitset == 0) - __l_size = _Bitset::__block_size; - __r_size = __remaining_len - _Bitset::__block_size; - } - if (__left_bitset == 0) { - _RandomAccessIterator __iter = __first; - for (int j = 0; j < __l_size; j++) { - bool __comp_result = __comp(__pivot, *__iter); - __left_bitset |= (static_cast<__storage_t>(__comp_result) << j); - ++__iter; - } - } - if (__right_bitset == 0) { - _RandomAccessIterator __iter = __lm1; - for (int j = 0; j < __r_size; j++) { - bool __comp_result = __comp(*__iter, __pivot); - __right_bitset |= (static_cast<__storage_t>(__comp_result) << j); - --__iter; - } - } - __swap_bitmap_pos<_Bitset>(__first, __lm1, __left_bitset, __right_bitset); - __first += (__left_bitset == 0) ? __l_size : 0; - __lm1 -= (__right_bitset == 0) ? __r_size : 0; - - if (__left_bitset) { - // Swap within the left side. - // Need to find set positions in the reverse order. - while (__left_bitset != 0) { - int __tz_left = _Bitset::__block_size - 1 - _Bitset::__clz(__left_bitset); - __left_bitset &= (static_cast<__storage_t>(1) << __tz_left) - 1; - _RandomAccessIterator it = __first + __tz_left; - if (it != __lm1) { - _VSTD::iter_swap(it, __lm1); - } - --__lm1; - } - __first = __lm1 + 1; - } else if (__right_bitset) { - // Swap within the right side. - // Need to find set positions in the reverse order. - while (__right_bitset != 0) { - int __tz_right = _Bitset::__block_size - 1 - _Bitset::__clz(__right_bitset); - __right_bitset &= (static_cast<__storage_t>(1) << __tz_right) - 1; - _RandomAccessIterator it = __lm1 - __tz_right; - if (it != __first) { - _VSTD::iter_swap(it, __first); - } - ++__first; - } - } - - _RandomAccessIterator __pivot_pos = __first - 1; - if (__begin != __pivot_pos) { - *__begin = _VSTD::move(*__pivot_pos); - } - *__pivot_pos = _VSTD::move(__pivot); - return _VSTD::make_pair(__pivot_pos, __already_partitioned); -} - -template -inline _LIBCPP_HIDE_FROM_ABI bool __partial_insertion_sort(_RandomAccessIterator __first, _RandomAccessIterator __last, - _Compare __comp) { - typedef typename _VSTD::iterator_traits<_RandomAccessIterator>::value_type value_type; - if (__first == __last) - return true; - - const unsigned __limit = 8; - unsigned __count = 0; - _RandomAccessIterator __j = __first; - for (_RandomAccessIterator __i = __j + 1; __i != __last; ++__i) { - if (__comp(*__i, *__j)) { - value_type __t(_VSTD::move(*__i)); - _RandomAccessIterator __k = __j; - __j = __i; - do { - *__j = _VSTD::move(*__k); - __j = __k; - } while (__j != __first && __comp(__t, *--__k)); - *__j = _VSTD::move(__t); - if (++__count == __limit) - return ++__i == __last; - } - __j = __i; - } - return true; -} - -template -void __bitsetsort_loop(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp, - typename _VSTD::iterator_traits<_RandomAccessIterator>::value_type* __buff, - typename _VSTD::iterator_traits<_RandomAccessIterator>::difference_type __limit) { - _LIBCPP_CONSTEXPR_AFTER_CXX11 int __ninther_threshold = 128; - typedef typename _VSTD::iterator_traits<_RandomAccessIterator>::difference_type difference_type; - typedef typename _VSTD::__comp_ref_type<_Compare>::type _Comp_ref; - __sorting_network::__conditional_swap<_RandomAccessIterator, _Compare> __cond_swap(__comp); - __sorting_network::__reverse_conditional_swap<_RandomAccessIterator, _Compare> __reverse_cond_swap(__comp); - while (true) { - if (__limit == 0) { - // Fallback to heap sort as Introsort suggests. - _VSTD::make_heap<_RandomAccessIterator, _Comp_ref>(__first, __last, _Comp_ref(__comp)); - _VSTD::sort_heap<_RandomAccessIterator, _Comp_ref>(__first, __last, _Comp_ref(__comp)); - return; - } - __limit--; - difference_type __len = __last - __first; - if (__len <= __bitonic::__detail::__batch) { - __sorting_network::__sort1to8(__first, __len, __cond_swap); - return; - } else if (__len <= __bitonic::__detail::__small_sort_max) { - __bitonic::__small_sort(__first, __len, __buff, __cond_swap, __reverse_cond_swap); - return; - } - difference_type __half_len = __len / 2; - if (__len > __ninther_threshold) { - __sorting_network::__sort3(__first, __first + __half_len, __last - 1, __cond_swap); - __sorting_network::__sort3(__first + 1, __first + (__half_len - 1), __last - 2, __cond_swap); - __sorting_network::__sort3(__first + 2, __first + (__half_len + 1), __last - 3, __cond_swap); - __sorting_network::__sort3(__first + (__half_len - 1), __first + __half_len, __first + (__half_len + 1), - __cond_swap); - _VSTD::iter_swap(__first, __first + __half_len); - } else { - __sorting_network::__sort3(__first + __half_len, __first, __last - 1, __cond_swap); - } - auto __ret = __bitset_partition<__64bit_set, _RandomAccessIterator, _Comp_ref>(__first, __last, _Comp_ref(__comp)); - if (__ret.second) { - bool __left = __partial_insertion_sort<_Comp_ref>(__first, __ret.first, _Comp_ref(__comp)); - bool __right = __partial_insertion_sort<_Comp_ref>(__ret.first + 1, __last, _Comp_ref(__comp)); - if (__right) { - if (__left) - return; - __last = __ret.first; - continue; - } else { - if (__left) { - __first = ++__ret.first; - continue; - } - } - } - - // Sort smaller range with recursive call and larger with tail recursion - // elimination. - if (__ret.first - __first < __last - __ret.first) { - __bitsetsort_loop<_Compare>(__first, __ret.first, __comp, __buff, __limit); - __first = ++__ret.first; - } else { - __bitsetsort_loop<_Compare>(__ret.first + 1, __last, __comp, __buff, __limit); - __last = __ret.first; - } - } -} - -template -inline _LIBCPP_HIDE_FROM_ABI _Number __log2i(_Number __n) { - _Number __log2 = 0; - while (__n > 1) { - __log2++; - __n >>= 1; - } - return __log2; -} - -template -inline _LIBCPP_HIDE_FROM_ABI void __bitsetsort_internal(_RandomAccessIterator __first, _RandomAccessIterator __last, - _Compare __comp) { - typedef typename _VSTD::iterator_traits<_RandomAccessIterator>::value_type value_type; - typedef typename _VSTD::iterator_traits<_RandomAccessIterator>::difference_type difference_type; - typename _VSTD::aligned_storage::type __buff[__bitonic::__detail::__small_sort_max]; - typedef typename _VSTD::__comp_ref_type<_Compare>::type _Comp_ref; - - // 2*log2 comes from Introsort https://reviews.llvm.org/D36423. - difference_type __depth_limit = 2 * __log2i(__last - __first); - __bitsetsort_loop<_Comp_ref>(__first, __last, _Comp_ref(__comp), reinterpret_cast(&__buff[0]), - __depth_limit); -} -} // namespace __bitsetsort - -template -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 void bitsetsort(_RandomAccessIterator __first, - _RandomAccessIterator __last, _Compare __comp) { - typedef typename _VSTD::__comp_ref_type<_Compare>::type _Comp_ref; - if (_VSTD::__libcpp_is_constant_evaluated()) { - _VSTD::__partial_sort<_Comp_ref>(__first, __last, __last, _Comp_ref(__comp)); - } else { - __bitsetsort::__bitsetsort_internal<_Comp_ref>(_VSTD::__unwrap_iter(__first), _VSTD::__unwrap_iter(__last), - _Comp_ref(__comp)); - } -} - -template -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX17 void bitsetsort(_RandomAccessIterator __first, - _RandomAccessIterator __last) { - bitsetsort(__first, __last, __less::value_type>()); -} - -} // namespace stdext - -#endif // _LIBCPP___BITSETSORT diff --git a/base/base/sort.h b/base/base/sort.h index 114ad6f359a..592a899a291 100644 --- a/base/base/sort.h +++ b/base/base/sort.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wold-style-cast" @@ -30,7 +30,7 @@ void partial_sort(RandomIt first, RandomIt middle, RandomIt last, Compare compar template void sort(RandomIt first, RandomIt last, Compare compare) { - ::stdext::bitsetsort(first, last, compare); + ::pdqsort(first, last, compare); } template @@ -38,5 +38,5 @@ void sort(RandomIt first, RandomIt last) { using value_type = typename std::iterator_traits::value_type; using comparator = std::less; - ::stdext::bitsetsort(first, last, comparator()); + ::pdqsort(first, last, comparator()); } From 2bf66379ee130f13e47d2ba4ca43f0345feb0088 Mon Sep 17 00:00:00 2001 From: cnmade Date: Mon, 7 Feb 2022 10:13:54 +0800 Subject: [PATCH 107/215] sync index: add sqlite --- docs/zh/engines/database-engines/index.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/zh/engines/database-engines/index.md b/docs/zh/engines/database-engines/index.md index 10be2e0f041..e4647da154d 100644 --- a/docs/zh/engines/database-engines/index.md +++ b/docs/zh/engines/database-engines/index.md @@ -26,4 +26,6 @@ toc_title: Introduction - [Replicated](../../engines/database-engines/replicated.md) +- [SQLite](../../engines/database-engines/sqlite.md) + [来源文章](https://clickhouse.com/docs/en/database_engines/) From 87fc42b2711b730ad32f27450feeb15753a67958 Mon Sep 17 00:00:00 2001 From: cnmade Date: Mon, 7 Feb 2022 10:34:16 +0800 Subject: [PATCH 108/215] Translate zh/engines/database-engines/sqlite: rename old file --- docs/zh/engines/database-engines/{sqlite.md => sqlite.md.bak} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename docs/zh/engines/database-engines/{sqlite.md => sqlite.md.bak} (100%) diff --git a/docs/zh/engines/database-engines/sqlite.md b/docs/zh/engines/database-engines/sqlite.md.bak similarity index 100% rename from docs/zh/engines/database-engines/sqlite.md rename to docs/zh/engines/database-engines/sqlite.md.bak From 2debfc922da002500db43683babeeb59394a211e Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Mon, 7 Feb 2022 10:47:11 +0800 Subject: [PATCH 109/215] Better projection format and test fixes --- src/Parsers/ASTProjectionDeclaration.cpp | 2 +- src/Parsers/ASTProjectionSelectQuery.cpp | 12 +++++++++++- src/Parsers/ParserProjectionSelectQuery.cpp | 1 + src/Storages/ProjectionsDescription.cpp | 4 +++- .../01710_projection_aggregation_in_order.sql | 6 +++--- 5 files changed, 19 insertions(+), 6 deletions(-) diff --git a/src/Parsers/ASTProjectionDeclaration.cpp b/src/Parsers/ASTProjectionDeclaration.cpp index 740a2fe3efd..60050986161 100644 --- a/src/Parsers/ASTProjectionDeclaration.cpp +++ b/src/Parsers/ASTProjectionDeclaration.cpp @@ -20,7 +20,7 @@ void ASTProjectionDeclaration::formatImpl(const FormatSettings & settings, Forma settings.ostr << backQuoteIfNeed(name); std::string indent_str = settings.one_line ? "" : std::string(4u * frame.indent, ' '); std::string nl_or_nothing = settings.one_line ? "" : "\n"; - settings.ostr << nl_or_nothing << indent_str << "(" << nl_or_nothing; + settings.ostr << settings.nl_or_ws << indent_str << "(" << nl_or_nothing; FormatStateStacked frame_nested = frame; frame_nested.need_parens = false; ++frame_nested.indent; diff --git a/src/Parsers/ASTProjectionSelectQuery.cpp b/src/Parsers/ASTProjectionSelectQuery.cpp index 7a855eb2be2..8526c7aef26 100644 --- a/src/Parsers/ASTProjectionSelectQuery.cpp +++ b/src/Parsers/ASTProjectionSelectQuery.cpp @@ -72,8 +72,18 @@ void ASTProjectionSelectQuery::formatImpl(const FormatSettings & s, FormatState if (orderBy()) { + /// Let's convert the ASTFunction into ASTExpressionList, which generates consistent format + /// between GROUP BY and ORDER BY projection definition. s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "ORDER BY " << (s.hilite ? hilite_none : ""); - orderBy()->formatImpl(s, state, frame); + ASTPtr order_by; + if (auto * func = orderBy()->as()) + order_by = func->arguments; + else + { + order_by = std::make_shared(); + order_by->children.push_back(orderBy()); + } + s.one_line ? order_by->formatImpl(s, state, frame) : order_by->as().formatImplMultiline(s, state, frame); } } diff --git a/src/Parsers/ParserProjectionSelectQuery.cpp b/src/Parsers/ParserProjectionSelectQuery.cpp index 0467f84de2a..b2adb5cf154 100644 --- a/src/Parsers/ParserProjectionSelectQuery.cpp +++ b/src/Parsers/ParserProjectionSelectQuery.cpp @@ -55,6 +55,7 @@ bool ParserProjectionSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & return false; } + // ORDER BY needs to be an ASTFunction so that we can use it as a sorting key if (s_order_by.ignore(pos, expected)) { ASTPtr expr_list; diff --git a/src/Storages/ProjectionsDescription.cpp b/src/Storages/ProjectionsDescription.cpp index 5c9ae46dd60..7c340cda739 100644 --- a/src/Storages/ProjectionsDescription.cpp +++ b/src/Storages/ProjectionsDescription.cpp @@ -107,7 +107,9 @@ ProjectionDescription::getProjectionFromAST(const ASTPtr & definition_ast, const auto external_storage_holder = std::make_shared(query_context, columns, ConstraintsDescription{}); StoragePtr storage = external_storage_holder->getTable(); InterpreterSelectQuery select( - result.query_ast, query_context, storage, {}, SelectQueryOptions{QueryProcessingStage::WithMergeableState}.modify().ignoreAlias()); + result.query_ast, query_context, storage, {}, + /// Here we ignore ast optimizations because otherwise aggregation keys may be removed from result header as constants. + SelectQueryOptions{QueryProcessingStage::WithMergeableState}.modify().ignoreAlias().ignoreASTOptimizationsAlias()); result.required_columns = select.getRequiredColumns(); result.sample_block = select.getSampleBlock(); diff --git a/tests/queries/0_stateless/01710_projection_aggregation_in_order.sql b/tests/queries/0_stateless/01710_projection_aggregation_in_order.sql index af2a5dc8253..557bd297436 100644 --- a/tests/queries/0_stateless/01710_projection_aggregation_in_order.sql +++ b/tests/queries/0_stateless/01710_projection_aggregation_in_order.sql @@ -11,7 +11,7 @@ CREATE TABLE normal ts, key, value - ORDER BY (ts, key) + ORDER BY ts, key ) ) ENGINE = MergeTree @@ -23,7 +23,7 @@ INSERT INTO normal SELECT number FROM numbers(100000); -SET allow_experimental_projection_optimization=1, optimize_aggregation_in_order=1, force_optimize_projection = 1; +SET allow_experimental_projection_optimization=1, optimize_aggregation_in_order=1, force_optimize_projection=1; WITH toStartOfHour(ts) AS a SELECT sum(value) v FROM normal WHERE ts > '2021-12-06 22:00:00' GROUP BY a ORDER BY v LIMIT 5; WITH toStartOfHour(ts) AS a SELECT sum(value) v FROM normal WHERE ts > '2021-12-06 22:00:00' GROUP BY toStartOfHour(ts), a ORDER BY v LIMIT 5; @@ -41,7 +41,7 @@ CREATE TABLE agg ts, key, sum(value) - GROUP BY (ts, key) + GROUP BY ts, key ) ) ENGINE = MergeTree From 2244b7ea165162de4aba6cd4ea4779552f14b139 Mon Sep 17 00:00:00 2001 From: cnmade Date: Mon, 7 Feb 2022 11:32:31 +0800 Subject: [PATCH 110/215] Translate zh/engines/database-engines/sqlite: reimport files --- docs/zh/engines/database-engines/sqlite.md | 80 ++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 docs/zh/engines/database-engines/sqlite.md diff --git a/docs/zh/engines/database-engines/sqlite.md b/docs/zh/engines/database-engines/sqlite.md new file mode 100644 index 00000000000..ee9db90859f --- /dev/null +++ b/docs/zh/engines/database-engines/sqlite.md @@ -0,0 +1,80 @@ +--- +toc_priority: 32 +toc_title: SQLite +--- + +# SQLite {#sqlite} + +Allows to connect to [SQLite](https://www.sqlite.org/index.html) database and perform `INSERT` and `SELECT` queries to exchange data between ClickHouse and SQLite. + +## Creating a Database {#creating-a-database} + +``` sql + CREATE DATABASE sqlite_database + ENGINE = SQLite('db_path') +``` + +**Engine Parameters** + +- `db_path` — Path to a file with SQLite database. + +## Data Types Support {#data_types-support} + +| SQLite | ClickHouse | +|---------------|---------------------------------------------------------| +| INTEGER | [Int32](../../sql-reference/data-types/int-uint.md) | +| REAL | [Float32](../../sql-reference/data-types/float.md) | +| TEXT | [String](../../sql-reference/data-types/string.md) | +| BLOB | [String](../../sql-reference/data-types/string.md) | + +## Specifics and Recommendations {#specifics-and-recommendations} + +SQLite stores the entire database (definitions, tables, indices, and the data itself) as a single cross-platform file on a host machine. During writing SQLite locks the entire database file, therefore write operations are performed sequentially. Read operations can be multitasked. +SQLite does not require service management (such as startup scripts) or access control based on `GRANT` and passwords. Access control is handled by means of file-system permissions given to the database file itself. + +## Usage Example {#usage-example} + +Database in ClickHouse, connected to the SQLite: + +``` sql +CREATE DATABASE sqlite_db ENGINE = SQLite('sqlite.db'); +SHOW TABLES FROM sqlite_db; +``` + +``` text +┌──name───┐ +│ table1 │ +│ table2 │ +└─────────┘ +``` + +Shows the tables: + +``` sql +SELECT * FROM sqlite_db.table1; +``` + +``` text +┌─col1──┬─col2─┐ +│ line1 │ 1 │ +│ line2 │ 2 │ +│ line3 │ 3 │ +└───────┴──────┘ +``` +Inserting data into SQLite table from ClickHouse table: + +``` sql +CREATE TABLE clickhouse_table(`col1` String,`col2` Int16) ENGINE = MergeTree() ORDER BY col2; +INSERT INTO clickhouse_table VALUES ('text',10); +INSERT INTO sqlite_db.table1 SELECT * FROM clickhouse_table; +SELECT * FROM sqlite_db.table1; +``` + +``` text +┌─col1──┬─col2─┐ +│ line1 │ 1 │ +│ line2 │ 2 │ +│ line3 │ 3 │ +│ text │ 10 │ +└───────┴──────┘ +``` From 40a22ded9d3d1f2c30e709352ab45416dc42c69d Mon Sep 17 00:00:00 2001 From: cnmade Date: Mon, 7 Feb 2022 11:38:43 +0800 Subject: [PATCH 111/215] Translate zh/engines/database-engines/sqlite: translate to zh --- docs/zh/engines/database-engines/sqlite.md | 24 +++++++++++----------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/docs/zh/engines/database-engines/sqlite.md b/docs/zh/engines/database-engines/sqlite.md index ee9db90859f..9bc67438819 100644 --- a/docs/zh/engines/database-engines/sqlite.md +++ b/docs/zh/engines/database-engines/sqlite.md @@ -5,20 +5,20 @@ toc_title: SQLite # SQLite {#sqlite} -Allows to connect to [SQLite](https://www.sqlite.org/index.html) database and perform `INSERT` and `SELECT` queries to exchange data between ClickHouse and SQLite. +允许连接到[SQLite](https://www.sqlite.org/index.html)数据库,并支持ClickHouse和SQLite交换数据, 执行 `INSERT` 和 `SELECT` 查询。 -## Creating a Database {#creating-a-database} +## 创建一个数据库 {#creating-a-database} ``` sql CREATE DATABASE sqlite_database ENGINE = SQLite('db_path') ``` -**Engine Parameters** +** 引擎参数 ** -- `db_path` — Path to a file with SQLite database. +- `db_path` — SQLite 数据库文件的路径. -## Data Types Support {#data_types-support} +## 数据类型的支持 {#data_types-support} | SQLite | ClickHouse | |---------------|---------------------------------------------------------| @@ -27,14 +27,14 @@ Allows to connect to [SQLite](https://www.sqlite.org/index.html) database and pe | TEXT | [String](../../sql-reference/data-types/string.md) | | BLOB | [String](../../sql-reference/data-types/string.md) | -## Specifics and Recommendations {#specifics-and-recommendations} +## 技术细节和建议 {#specifics-and-recommendations} -SQLite stores the entire database (definitions, tables, indices, and the data itself) as a single cross-platform file on a host machine. During writing SQLite locks the entire database file, therefore write operations are performed sequentially. Read operations can be multitasked. -SQLite does not require service management (such as startup scripts) or access control based on `GRANT` and passwords. Access control is handled by means of file-system permissions given to the database file itself. +SQLite将整个数据库(定义、表、索引和数据本身)存储为主机上的单个跨平台文件。在写入过程中,SQLite会锁定整个数据库文件,因此写入操作是顺序执行的。读操作可以是多任务的。 +SQLite不需要服务管理(如启动脚本)或基于“GRANT”和密码的访问控制。访问控制是通过授予数据库文件本身的文件系统权限来处理的。 -## Usage Example {#usage-example} +## 使用示例 {#usage-example} -Database in ClickHouse, connected to the SQLite: +数据库在ClickHouse,连接到SQLite: ``` sql CREATE DATABASE sqlite_db ENGINE = SQLite('sqlite.db'); @@ -48,7 +48,7 @@ SHOW TABLES FROM sqlite_db; └─────────┘ ``` -Shows the tables: +展示数据表中的内容: ``` sql SELECT * FROM sqlite_db.table1; @@ -61,7 +61,7 @@ SELECT * FROM sqlite_db.table1; │ line3 │ 3 │ └───────┴──────┘ ``` -Inserting data into SQLite table from ClickHouse table: +从ClickHouse表插入数据到SQLite表: ``` sql CREATE TABLE clickhouse_table(`col1` String,`col2` Int16) ENGINE = MergeTree() ORDER BY col2; From 02be572625dc2128af2ffc3d8c14e707eb03f70f Mon Sep 17 00:00:00 2001 From: cnmade Date: Mon, 7 Feb 2022 11:39:35 +0800 Subject: [PATCH 112/215] Translate zh/engines/database-engines/sqlite: remove old file --- docs/zh/engines/database-engines/sqlite.md.bak | 1 - 1 file changed, 1 deletion(-) delete mode 120000 docs/zh/engines/database-engines/sqlite.md.bak diff --git a/docs/zh/engines/database-engines/sqlite.md.bak b/docs/zh/engines/database-engines/sqlite.md.bak deleted file mode 120000 index 776734647c2..00000000000 --- a/docs/zh/engines/database-engines/sqlite.md.bak +++ /dev/null @@ -1 +0,0 @@ -../../../en/engines/database-engines/sqlite.md \ No newline at end of file From a584d962d69f2780f54e4dc2f4969977453eb5d4 Mon Sep 17 00:00:00 2001 From: cnmade Date: Mon, 7 Feb 2022 11:46:00 +0800 Subject: [PATCH 113/215] Translate zh/engines/database-engines/sqlite: fix engine parameters --- docs/zh/engines/database-engines/sqlite.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/zh/engines/database-engines/sqlite.md b/docs/zh/engines/database-engines/sqlite.md index 9bc67438819..d8b91943b0c 100644 --- a/docs/zh/engines/database-engines/sqlite.md +++ b/docs/zh/engines/database-engines/sqlite.md @@ -14,7 +14,7 @@ toc_title: SQLite ENGINE = SQLite('db_path') ``` -** 引擎参数 ** +**引擎参数** - `db_path` — SQLite 数据库文件的路径. From 26f2a0ef5147fbb5b54b63cc91f035ce359be61e Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Mon, 7 Feb 2022 13:34:13 +0800 Subject: [PATCH 114/215] move clickhouse-format code from settings to format.cpp --- programs/format/Format.cpp | 6 +++++- src/Core/Settings.cpp | 10 ---------- src/Core/Settings.h | 8 -------- 3 files changed, 5 insertions(+), 19 deletions(-) diff --git a/programs/format/Format.cpp b/programs/format/Format.cpp index 9c756a27915..835afcdb2ed 100644 --- a/programs/format/Format.cpp +++ b/programs/format/Format.cpp @@ -58,7 +58,11 @@ int mainEntryClickHouseFormat(int argc, char ** argv) ; Settings cmd_settings; - cmd_settings.addFormatOptions(desc); + for (const auto & field : cmd_settings.all()) + { + if (field.getName() == "max_parser_depth" || field.getName() == "max_query_size") + cmd_settings.addProgramOption(desc, field); + } boost::program_options::variables_map options; boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), options); diff --git a/src/Core/Settings.cpp b/src/Core/Settings.cpp index 772cadab3fc..87d7eee0daa 100644 --- a/src/Core/Settings.cpp +++ b/src/Core/Settings.cpp @@ -89,16 +89,6 @@ void Settings::addProgramOptions(boost::program_options::options_description & o } } -void Settings::addFormatOptions(boost::program_options::options_description & options) -{ - for (const auto & field : all()) - { - const auto & name = field.getName(); - if (formatSettingNames.count(name)) - addProgramOption(options, field); - } -} - void Settings::addProgramOption(boost::program_options::options_description & options, const SettingFieldRef & field) { const std::string_view name = field.getName(); diff --git a/src/Core/Settings.h b/src/Core/Settings.h index f46066a426f..430c7a194eb 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -712,21 +712,13 @@ struct Settings : public BaseSettings, public IHints<2, Settings /// (Don't forget to call notify() on the `variables_map` after parsing it!) void addProgramOptions(boost::program_options::options_description & options); - /// Adds program options for clickhouse-format to set the settings from a command line. - /// (Don't forget to call notify() on the `variables_map` after parsing it!) - void addFormatOptions(boost::program_options::options_description & options); - /// Check that there is no user-level settings at the top level in config. /// This is a common source of mistake (user don't know where to write user-level setting). static void checkNoSettingNamesAtTopLevel(const Poco::Util::AbstractConfiguration & config, const String & config_path); std::vector getAllRegisteredNames() const override; -private: void addProgramOption(boost::program_options::options_description & options, const SettingFieldRef & field); - - inline static const std::unordered_set formatSettingNames - = {"max_parser_depth", "max_query_size"}; }; /* From b2ba0c4320044bca6c98aa304450c869843b6c8a Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Mon, 7 Feb 2022 03:20:53 +0700 Subject: [PATCH 115/215] Fix inserting to temporary tables via gRPC. --- src/Server/GRPCServer.cpp | 34 +------------------- tests/integration/test_grpc_protocol/test.py | 25 ++++++++++---- 2 files changed, 20 insertions(+), 39 deletions(-) diff --git a/src/Server/GRPCServer.cpp b/src/Server/GRPCServer.cpp index f252561d63b..8aa729b8883 100644 --- a/src/Server/GRPCServer.cpp +++ b/src/Server/GRPCServer.cpp @@ -9,7 +9,6 @@ #include #include #include -#include #include #include #include @@ -990,41 +989,10 @@ namespace assert(!pipeline); auto source = query_context->getInputFormat( input_format, *read_buffer, header, query_context->getSettings().max_insert_block_size); + QueryPipelineBuilder builder; builder.init(Pipe(source)); - /// Add default values if necessary. - if (ast) - { - if (insert_query) - { - auto table_id = StorageID::createEmpty(); - - if (insert_query->table_id) - { - table_id = query_context->resolveStorageID(insert_query->table_id, Context::ResolveOrdinary); - } - else - { - StorageID local_table_id(insert_query->getDatabase(), insert_query->getTable()); - table_id = query_context->resolveStorageID(local_table_id, Context::ResolveOrdinary); - } - - if (query_context->getSettingsRef().input_format_defaults_for_omitted_fields && table_id) - { - StoragePtr storage = DatabaseCatalog::instance().getTable(table_id, query_context); - const auto & columns = storage->getInMemoryMetadataPtr()->getColumns(); - if (!columns.empty()) - { - builder.addSimpleTransform([&](const Block & cur_header) - { - return std::make_shared(cur_header, columns, *source, query_context); - }); - } - } - } - } - pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); pipeline_executor = std::make_unique(*pipeline); } diff --git a/tests/integration/test_grpc_protocol/test.py b/tests/integration/test_grpc_protocol/test.py index 2a91ebcd94b..b6968575883 100644 --- a/tests/integration/test_grpc_protocol/test.py +++ b/tests/integration/test_grpc_protocol/test.py @@ -177,13 +177,13 @@ def test_insert_query_delimiter(): assert query("SELECT a FROM t ORDER BY a") == "1\n5\n234\n" def test_insert_default_column(): - query("CREATE TABLE t (a UInt8, b Int32 DEFAULT 100, c String DEFAULT 'c') ENGINE = Memory") + query("CREATE TABLE t (a UInt8, b Int32 DEFAULT 100 - a, c String DEFAULT 'c') ENGINE = Memory") query("INSERT INTO t (c, a) VALUES ('x',1),('y',2)") query("INSERT INTO t (a) FORMAT TabSeparated", input_data="3\n4\n") - assert query("SELECT * FROM t ORDER BY a") == "1\t100\tx\n" \ - "2\t100\ty\n" \ - "3\t100\tc\n" \ - "4\t100\tc\n" + assert query("SELECT * FROM t ORDER BY a") == "1\t99\tx\n" \ + "2\t98\ty\n" \ + "3\t97\tc\n" \ + "4\t96\tc\n" def test_insert_splitted_row(): query("CREATE TABLE t (a UInt8) ENGINE = Memory") @@ -257,7 +257,7 @@ def test_progress(): } ]""" -def test_session(): +def test_session_settings(): session_a = "session A" session_b = "session B" query("SET custom_x=1", session_id=session_a) @@ -267,9 +267,22 @@ def test_session(): assert query("SELECT getSetting('custom_x'), getSetting('custom_y')", session_id=session_a) == "1\t2\n" assert query("SELECT getSetting('custom_x'), getSetting('custom_y')", session_id=session_b) == "3\t4\n" +def test_session_temp_tables(): + session_a = "session A" + session_b = "session B" + query("CREATE TEMPORARY TABLE my_temp_table(a Int8)", session_id=session_a) + query("INSERT INTO my_temp_table VALUES (10)", session_id=session_a) + assert query("SELECT * FROM my_temp_table", session_id=session_a) == "10\n" + query("CREATE TEMPORARY TABLE my_temp_table(a Int8)", session_id=session_b) + query("INSERT INTO my_temp_table VALUES (20)", session_id=session_b) + assert query("SELECT * FROM my_temp_table", session_id=session_b) == "20\n" + assert query("SELECT * FROM my_temp_table", session_id=session_a) == "10\n" + def test_no_session(): e = query_and_get_error("SET custom_x=1") assert "There is no session" in e.display_text + e = query_and_get_error("CREATE TEMPORARY TABLE my_temp_table(a Int8)") + assert "There is no session" in e.display_text def test_input_function(): query("CREATE TABLE t (a UInt8) ENGINE = Memory") From eeec2478bace67b09a670236479685b953217a5d Mon Sep 17 00:00:00 2001 From: HeenaBansal2009 Date: Sun, 6 Feb 2022 22:36:35 -0800 Subject: [PATCH 116/215] Fix clang-tidy issue --- src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp index 6b2ee1c8039..af31ef01fcd 100644 --- a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp @@ -292,7 +292,7 @@ void AggregatingSortedAlgorithm::AggregatingMergedData::initAggregateDescription AggregatingSortedAlgorithm::AggregatingSortedAlgorithm( const Block & header, size_t num_inputs, SortDescription description_, size_t max_block_size) - : IMergingAlgorithmWithDelayedChunk(num_inputs, std::move(description_)) + : IMergingAlgorithmWithDelayedChunk(num_inputs, description_) , columns_definition(defineColumns(header, description_)) , merged_data(getMergedColumns(header, columns_definition), max_block_size, columns_definition) { From 80ed9d762729b2042c31bf4d1bce012831c48fa4 Mon Sep 17 00:00:00 2001 From: cnmade Date: Mon, 7 Feb 2022 14:45:16 +0800 Subject: [PATCH 117/215] Translate zh/engines/table-engines/hive: update index and sync hive translate to zh --- .../table-engines/integrations/hive.md | 416 ++++++++++++++++++ .../table-engines/integrations/index.md | 2 + 2 files changed, 418 insertions(+) create mode 100644 docs/zh/engines/table-engines/integrations/hive.md diff --git a/docs/zh/engines/table-engines/integrations/hive.md b/docs/zh/engines/table-engines/integrations/hive.md new file mode 100644 index 00000000000..dff8a9a4d65 --- /dev/null +++ b/docs/zh/engines/table-engines/integrations/hive.md @@ -0,0 +1,416 @@ +--- +toc_priority: 4 +toc_title: Hive +--- + +# Hive {#hive} + +Hive引擎允许对HDFS Hive表执行SELECT查询。目前它支持如下输入格式: + +-文本:只支持简单的标量列类型,除了 `Binary` + +- ORC:支持简单的标量列类型,除了`char`; 只支持 `array` 这样的复杂类型 + +- Parquet:支持所有简单标量列类型;只支持 `array` 这样的复杂类型 + +## 创建表 {#creating-a-table} + +``` sql +CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] +( + name1 [type1] [ALIAS expr1], + name2 [type2] [ALIAS expr2], + ... +) ENGINE = Hive('thrift://host:port', 'database', 'table'); +PARTITION BY expr +``` +查看[CREATE TABLE](../../../sql-reference/statements/create/table.md#create-table-query)查询的详细描述。 + +表的结构可以与原来的Hive表结构有所不同: +- 列名应该与原来的Hive表相同,但你可以使用这些列中的一些,并以任何顺序,你也可以使用一些从其他列计算的别名列。 +- 列类型与原Hive表的列类型保持一致。 +- “Partition by expression”应与原Hive表保持一致,“Partition by expression”中的列应在表结构中。 + +**引擎参数** + +- `thrift://host:port` — Hive Metastore 地址 + +- `database` — 远程数据库名. + +- `table` — 远程数据表名. + +## 使用示例 {#usage-example} + +### 如何使用HDFS文件系统的本地缓存 +我们强烈建议您为远程文件系统启用本地缓存。基准测试显示,如果使用缓存,它的速度会快两倍。 + +在使用缓存之前,请将其添加到 `config.xml` +``` xml + + true + local_cache + 559096952 + 1048576 + +``` + + +- enable: true:开启后,ClickHouse将为HDFS (远程文件系统)维护本地缓存。 +- root_dir: 必需的。用于存储远程文件系统的本地缓存文件的根目录。 +- limit_size: 必需的。本地缓存文件的最大大小(单位为字节)。 +- bytes_read_before_flush: 从远程文件系统下载文件时,刷新到本地文件系统前的控制字节数。缺省值为1MB。 + +当ClickHouse为远程文件系统启用了本地缓存时,用户仍然可以选择不使用缓存,并在查询中设置`use_local_cache_for_remote_fs = 0 `, `use_local_cache_for_remote_fs` 默认为 `false`。 + +### 查询 ORC 输入格式的Hive 表 + +#### 在 Hive 中建表 +``` text +hive > CREATE TABLE `test`.`test_orc`( + `f_tinyint` tinyint, + `f_smallint` smallint, + `f_int` int, + `f_integer` int, + `f_bigint` bigint, + `f_float` float, + `f_double` double, + `f_decimal` decimal(10,0), + `f_timestamp` timestamp, + `f_date` date, + `f_string` string, + `f_varchar` varchar(100), + `f_bool` boolean, + `f_binary` binary, + `f_array_int` array, + `f_array_string` array, + `f_array_float` array, + `f_array_array_int` array>, + `f_array_array_string` array>, + `f_array_array_float` array>) +PARTITIONED BY ( + `day` string) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.orc.OrcSerde' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat' +LOCATION + 'hdfs://testcluster/data/hive/test.db/test_orc' + +OK +Time taken: 0.51 seconds + +hive > insert into test.test_orc partition(day='2021-09-18') select 1, 2, 3, 4, 5, 6.11, 7.22, 8.333, current_timestamp(), current_date(), 'hello world', 'hello world', 'hello world', true, 'hello world', array(1, 2, 3), array('hello world', 'hello world'), array(float(1.1), float(1.2)), array(array(1, 2), array(3, 4)), array(array('a', 'b'), array('c', 'd')), array(array(float(1.11), float(2.22)), array(float(3.33), float(4.44))); +OK +Time taken: 36.025 seconds + +hive > select * from test.test_orc; +OK +1 2 3 4 5 6.11 7.22 8 2021-11-05 12:38:16.314 2021-11-05 hello world hello world hello world true hello world [1,2,3] ["hello world","hello world"] [1.1,1.2] [[1,2],[3,4]] [["a","b"],["c","d"]] [[1.11,2.22],[3.33,4.44]] 2021-09-18 +Time taken: 0.295 seconds, Fetched: 1 row(s) +``` + +#### 在 ClickHouse 中建表 + +ClickHouse中的表,从上面创建的Hive表中获取数据: + +``` sql +CREATE TABLE test.test_orc +( + `f_tinyint` Int8, + `f_smallint` Int16, + `f_int` Int32, + `f_integer` Int32, + `f_bigint` Int64, + `f_float` Float32, + `f_double` Float64, + `f_decimal` Float64, + `f_timestamp` DateTime, + `f_date` Date, + `f_string` String, + `f_varchar` String, + `f_bool` Bool, + `f_binary` String, + `f_array_int` Array(Int32), + `f_array_string` Array(String), + `f_array_float` Array(Float32), + `f_array_array_int` Array(Array(Int32)), + `f_array_array_string` Array(Array(String)), + `f_array_array_float` Array(Array(Float32)), + `day` String +) +ENGINE = Hive('thrift://202.168.117.26:9083', 'test', 'test_orc') +PARTITION BY day + +``` + +``` sql +SELECT * FROM test.test_orc settings input_format_orc_allow_missing_columns = 1\G +``` + +``` text +SELECT * +FROM test.test_orc +SETTINGS input_format_orc_allow_missing_columns = 1 + +Query id: c3eaffdc-78ab-43cd-96a4-4acc5b480658 + +Row 1: +────── +f_tinyint: 1 +f_smallint: 2 +f_int: 3 +f_integer: 4 +f_bigint: 5 +f_float: 6.11 +f_double: 7.22 +f_decimal: 8 +f_timestamp: 2021-12-04 04:00:44 +f_date: 2021-12-03 +f_string: hello world +f_varchar: hello world +f_bool: true +f_binary: hello world +f_array_int: [1,2,3] +f_array_string: ['hello world','hello world'] +f_array_float: [1.1,1.2] +f_array_array_int: [[1,2],[3,4]] +f_array_array_string: [['a','b'],['c','d']] +f_array_array_float: [[1.11,2.22],[3.33,4.44]] +day: 2021-09-18 + + +1 rows in set. Elapsed: 0.078 sec. +``` + +### 查询 Parquest 输入格式的Hive 表 + +#### 在 Hive 中建表 +``` text +hive > +CREATE TABLE `test`.`test_parquet`( + `f_tinyint` tinyint, + `f_smallint` smallint, + `f_int` int, + `f_integer` int, + `f_bigint` bigint, + `f_float` float, + `f_double` double, + `f_decimal` decimal(10,0), + `f_timestamp` timestamp, + `f_date` date, + `f_string` string, + `f_varchar` varchar(100), + `f_char` char(100), + `f_bool` boolean, + `f_binary` binary, + `f_array_int` array, + `f_array_string` array, + `f_array_float` array, + `f_array_array_int` array>, + `f_array_array_string` array>, + `f_array_array_float` array>) +PARTITIONED BY ( + `day` string) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +LOCATION + 'hdfs://testcluster/data/hive/test.db/test_parquet' +OK +Time taken: 0.51 seconds + +hive > insert into test.test_parquet partition(day='2021-09-18') select 1, 2, 3, 4, 5, 6.11, 7.22, 8.333, current_timestamp(), current_date(), 'hello world', 'hello world', 'hello world', true, 'hello world', array(1, 2, 3), array('hello world', 'hello world'), array(float(1.1), float(1.2)), array(array(1, 2), array(3, 4)), array(array('a', 'b'), array('c', 'd')), array(array(float(1.11), float(2.22)), array(float(3.33), float(4.44))); +OK +Time taken: 36.025 seconds + +hive > select * from test.test_parquet; +OK +1 2 3 4 5 6.11 7.22 8 2021-12-14 17:54:56.743 2021-12-14 hello world hello world hello world true hello world [1,2,3] ["hello world","hello world"] [1.1,1.2] [[1,2],[3,4]] [["a","b"],["c","d"]] [[1.11,2.22],[3.33,4.44]] 2021-09-18 +Time taken: 0.766 seconds, Fetched: 1 row(s) +``` + +#### 在 ClickHouse 中建表 + +ClickHouse 中的表, 从上面创建的Hive表中获取数据: + +``` sql +CREATE TABLE test.test_parquet +( + `f_tinyint` Int8, + `f_smallint` Int16, + `f_int` Int32, + `f_integer` Int32, + `f_bigint` Int64, + `f_float` Float32, + `f_double` Float64, + `f_decimal` Float64, + `f_timestamp` DateTime, + `f_date` Date, + `f_string` String, + `f_varchar` String, + `f_char` String, + `f_bool` Bool, + `f_binary` String, + `f_array_int` Array(Int32), + `f_array_string` Array(String), + `f_array_float` Array(Float32), + `f_array_array_int` Array(Array(Int32)), + `f_array_array_string` Array(Array(String)), + `f_array_array_float` Array(Array(Float32)), + `day` String +) +ENGINE = Hive('thrift://localhost:9083', 'test', 'test_parquet') +PARTITION BY day +``` + +``` sql +SELECT * FROM test.test_parquet settings input_format_parquet_allow_missing_columns = 1\G +``` + +``` text +SELECT * +FROM test_parquet +SETTINGS input_format_parquet_allow_missing_columns = 1 + +Query id: 4e35cf02-c7b2-430d-9b81-16f438e5fca9 + +Row 1: +────── +f_tinyint: 1 +f_smallint: 2 +f_int: 3 +f_integer: 4 +f_bigint: 5 +f_float: 6.11 +f_double: 7.22 +f_decimal: 8 +f_timestamp: 2021-12-14 17:54:56 +f_date: 2021-12-14 +f_string: hello world +f_varchar: hello world +f_char: hello world +f_bool: true +f_binary: hello world +f_array_int: [1,2,3] +f_array_string: ['hello world','hello world'] +f_array_float: [1.1,1.2] +f_array_array_int: [[1,2],[3,4]] +f_array_array_string: [['a','b'],['c','d']] +f_array_array_float: [[1.11,2.22],[3.33,4.44]] +day: 2021-09-18 + +1 rows in set. Elapsed: 0.357 sec. +``` + +### 查询文本输入格式的Hive表 + +#### 在Hive 中建表 + +``` text +hive > +CREATE TABLE `test`.`test_text`( + `f_tinyint` tinyint, + `f_smallint` smallint, + `f_int` int, + `f_integer` int, + `f_bigint` bigint, + `f_float` float, + `f_double` double, + `f_decimal` decimal(10,0), + `f_timestamp` timestamp, + `f_date` date, + `f_string` string, + `f_varchar` varchar(100), + `f_char` char(100), + `f_bool` boolean, + `f_binary` binary, + `f_array_int` array, + `f_array_string` array, + `f_array_float` array, + `f_array_array_int` array>, + `f_array_array_string` array>, + `f_array_array_float` array>) +PARTITIONED BY ( + `day` string) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.mapred.TextInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' +LOCATION + 'hdfs://testcluster/data/hive/test.db/test_text' +Time taken: 0.1 seconds, Fetched: 34 row(s) + + +hive > insert into test.test_text partition(day='2021-09-18') select 1, 2, 3, 4, 5, 6.11, 7.22, 8.333, current_timestamp(), current_date(), 'hello world', 'hello world', 'hello world', true, 'hello world', array(1, 2, 3), array('hello world', 'hello world'), array(float(1.1), float(1.2)), array(array(1, 2), array(3, 4)), array(array('a', 'b'), array('c', 'd')), array(array(float(1.11), float(2.22)), array(float(3.33), float(4.44))); +OK +Time taken: 36.025 seconds + +hive > select * from test.test_text; +OK +1 2 3 4 5 6.11 7.22 8 2021-12-14 18:11:17.239 2021-12-14 hello world hello world hello world true hello world [1,2,3] ["hello world","hello world"] [1.1,1.2] [[1,2],[3,4]] [["a","b"],["c","d"]] [[1.11,2.22],[3.33,4.44]] 2021-09-18 +Time taken: 0.624 seconds, Fetched: 1 row(s) +``` + +#### 在 ClickHouse 中建表 + + +ClickHouse中的表, 从上面创建的Hive表中获取数据: +``` sql +CREATE TABLE test.test_text +( + `f_tinyint` Int8, + `f_smallint` Int16, + `f_int` Int32, + `f_integer` Int32, + `f_bigint` Int64, + `f_float` Float32, + `f_double` Float64, + `f_decimal` Float64, + `f_timestamp` DateTime, + `f_date` Date, + `f_string` String, + `f_varchar` String, + `f_char` String, + `f_bool` Bool, + `day` String +) +ENGINE = Hive('thrift://localhost:9083', 'test', 'test_text') +PARTITION BY day +``` + +``` sql +SELECT * FROM test.test_text settings input_format_skip_unknown_fields = 1, input_format_with_names_use_header = 1, date_time_input_format = 'best_effort'\G +``` + +``` text +SELECT * +FROM test.test_text +SETTINGS input_format_skip_unknown_fields = 1, input_format_with_names_use_header = 1, date_time_input_format = 'best_effort' + +Query id: 55b79d35-56de-45b9-8be6-57282fbf1f44 + +Row 1: +────── +f_tinyint: 1 +f_smallint: 2 +f_int: 3 +f_integer: 4 +f_bigint: 5 +f_float: 6.11 +f_double: 7.22 +f_decimal: 8 +f_timestamp: 2021-12-14 18:11:17 +f_date: 2021-12-14 +f_string: hello world +f_varchar: hello world +f_char: hello world +f_bool: true +day: 2021-09-18 +``` diff --git a/docs/zh/engines/table-engines/integrations/index.md b/docs/zh/engines/table-engines/integrations/index.md index 0c34ae078a0..5ed4a555f9c 100644 --- a/docs/zh/engines/table-engines/integrations/index.md +++ b/docs/zh/engines/table-engines/integrations/index.md @@ -19,3 +19,5 @@ ClickHouse 提供了多种方式来与外部系统集成,包括表引擎。像 - [EmbeddedRocksDB](../../../engines/table-engines/integrations/embedded-rocksdb.md) - [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md) - [PostgreSQL](../../../engines/table-engines/integrations/postgresql.md) +- [SQLite](../../../engines/table-engines/integrations/sqlite.md) +- [Hive](../../../engines/table-engines/integrations/hive.md) From b35f479dea29bbd58d581406431860b76080247c Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Mon, 7 Feb 2022 12:26:50 +0300 Subject: [PATCH 118/215] Delete empty file DateOrDateTimeFunctionsConvertion.cpp --- src/Functions/DateOrDateTimeFunctionsConvertion.cpp | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 src/Functions/DateOrDateTimeFunctionsConvertion.cpp diff --git a/src/Functions/DateOrDateTimeFunctionsConvertion.cpp b/src/Functions/DateOrDateTimeFunctionsConvertion.cpp deleted file mode 100644 index e69de29bb2d..00000000000 From 542889d627793dd9e1043417b4dc5f74e15b049d Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 7 Feb 2022 13:01:42 +0300 Subject: [PATCH 119/215] Update clickhouse-keeper.md --- docs/en/operations/clickhouse-keeper.md | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/docs/en/operations/clickhouse-keeper.md b/docs/en/operations/clickhouse-keeper.md index 48eb590aca2..35ec5d858f5 100644 --- a/docs/en/operations/clickhouse-keeper.md +++ b/docs/en/operations/clickhouse-keeper.md @@ -108,8 +108,13 @@ Examples of configuration for quorum with three nodes can be found in [integrati ClickHouse Keeper is bundled into the ClickHouse server package, just add configuration of `` and start ClickHouse server as always. If you want to run standalone ClickHouse Keeper you can start it in a similar way with: ```bash -clickhouse keeper --config /etc/your_path_to_config/config.xml --daemon -example: clickhouse keeper --config /etc/clickhouse-server/config.d/keeper_config.xml +clickhouse-keeper --config /etc/your_path_to_config/config.xml +``` + +If you don't have the symlink (`clickhouse-keeper`) you can create it or specify `keeper` as argument: + +```bash +clickhouse keeper --config /etc/your_path_to_config/config.xml ``` ## Four Letter Word Commands {#four-letter-word-commands} From 89e471924c5f94d55bbf63c028a18906720279a6 Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 28 Jan 2022 19:27:53 +0300 Subject: [PATCH 120/215] Add table function format(format_name, data) --- src/TableFunctions/TableFunctionFormat.cpp | 107 ++++++++++++++++++ src/TableFunctions/TableFunctionFormat.h | 33 ++++++ src/TableFunctions/TableFunctionInput.cpp | 2 - src/TableFunctions/registerTableFunctions.cpp | 2 + src/TableFunctions/registerTableFunctions.h | 2 + .../02188_table_function_file.reference | 52 +++++++++ .../0_stateless/02188_table_function_file.sql | 68 +++++++++++ 7 files changed, 264 insertions(+), 2 deletions(-) create mode 100644 src/TableFunctions/TableFunctionFormat.cpp create mode 100644 src/TableFunctions/TableFunctionFormat.h create mode 100644 tests/queries/0_stateless/02188_table_function_file.reference create mode 100644 tests/queries/0_stateless/02188_table_function_file.sql diff --git a/src/TableFunctions/TableFunctionFormat.cpp b/src/TableFunctions/TableFunctionFormat.cpp new file mode 100644 index 00000000000..4bffdd4fc89 --- /dev/null +++ b/src/TableFunctions/TableFunctionFormat.cpp @@ -0,0 +1,107 @@ +#include + +#include + +#include +#include + +#include + +#include +#include + +#include +#include + +#include + +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int LOGICAL_ERROR; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +void TableFunctionFormat::parseArguments(const ASTPtr & ast_function, ContextPtr context) +{ + ASTs & args_func = ast_function->children; + + if (args_func.size() != 1) + throw Exception("Table function '" + getName() + "' must have arguments", ErrorCodes::LOGICAL_ERROR); + + ASTs & args = args_func.at(0)->children; + + if (args.size() != 2) + throw Exception("Table function '" + getName() + "' requires 2 arguments: format and data", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + for (auto & arg : args) + arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context); + + format = args[0]->as().value.safeGet(); + data = args[1]->as().value.safeGet(); +} + +ColumnsDescription TableFunctionFormat::getActualTableStructure(ContextPtr context) const +{ + auto read_buffer_creator = [&]() + { + return std::make_unique(data); + }; + + return readSchemaFromFormat(format, std::nullopt, read_buffer_creator, context); +} + +Block TableFunctionFormat::parseData(ColumnsDescription columns, ContextPtr context) const +{ + Block result; + for (const auto & name_and_type : columns.getAllPhysical()) + result.insert({name_and_type.type->createColumn(), name_and_type.type, name_and_type.name}); + + auto read_buf = std::make_unique(data); + auto input_format = context->getInputFormat(format, *read_buf, result, context->getSettingsRef().max_block_size); + QueryPipelineBuilder builder; + builder.init(Pipe(input_format)); + auto pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); + auto reader = std::make_unique(*pipeline); + reader->pull(result); + + /// In case when data contains more then 1 block we read all blocks + /// and combine them all to one big block (this is considered a rare case). + Block new_block; + while (reader->pull(new_block)) + { + for (size_t i = 0; i != result.columns(); ++i) + { + auto & result_column = result.getByPosition(i); + const auto & new_column = new_block.getByPosition(i); + auto mutable_column = IColumn::mutate(result_column.column); + mutable_column->insertManyFrom(*new_column.column, 0, new_column.column->size()); + result_column.column = std::move(mutable_column); + } + } + + return result; +} + +StoragePtr TableFunctionFormat::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const +{ + auto columns = getActualTableStructure(context); + Block res_block = parseData(columns, context); + auto res = StorageValues::create(StorageID(getDatabaseName(), table_name), columns, res_block); + res->startup(); + return res; +} + +void registerTableFunctionFormat(TableFunctionFactory & factory) +{ + factory.registerFunction(TableFunctionFactory::CaseInsensitive); +} + +} diff --git a/src/TableFunctions/TableFunctionFormat.h b/src/TableFunctions/TableFunctionFormat.h new file mode 100644 index 00000000000..c6db322343b --- /dev/null +++ b/src/TableFunctions/TableFunctionFormat.h @@ -0,0 +1,33 @@ +#pragma once + +#include + + +namespace DB +{ + +class Context; + +/* format(format_name, data) - ... + */ +class TableFunctionFormat : public ITableFunction +{ +public: + static constexpr auto name = "format"; + std::string getName() const override { return name; } + bool hasStaticStructure() const override { return false; } + +private: + StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns) const override; + const char * getStorageTypeName() const override { return "Values"; } + + ColumnsDescription getActualTableStructure(ContextPtr context) const override; + void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; + + Block parseData(ColumnsDescription columns, ContextPtr context) const; + + String format; + String data; +}; + +} diff --git a/src/TableFunctions/TableFunctionInput.cpp b/src/TableFunctions/TableFunctionInput.cpp index 677a6ff3ce4..cba145ee87b 100644 --- a/src/TableFunctions/TableFunctionInput.cpp +++ b/src/TableFunctions/TableFunctionInput.cpp @@ -5,11 +5,9 @@ #include #include #include -#include #include #include #include -#include #include "registerTableFunctions.h" diff --git a/src/TableFunctions/registerTableFunctions.cpp b/src/TableFunctions/registerTableFunctions.cpp index ea5c2c75f94..ed08972e74d 100644 --- a/src/TableFunctions/registerTableFunctions.cpp +++ b/src/TableFunctions/registerTableFunctions.cpp @@ -49,6 +49,8 @@ void registerTableFunctions() #endif registerTableFunctionDictionary(factory); + + registerTableFunctionFormat(factory); } } diff --git a/src/TableFunctions/registerTableFunctions.h b/src/TableFunctions/registerTableFunctions.h index 8ddd9b7c8ab..72ca185f656 100644 --- a/src/TableFunctions/registerTableFunctions.h +++ b/src/TableFunctions/registerTableFunctions.h @@ -48,6 +48,8 @@ void registerTableFunctionSQLite(TableFunctionFactory & factory); void registerTableFunctionDictionary(TableFunctionFactory & factory); +void registerTableFunctionFormat(TableFunctionFactory & factory); + void registerTableFunctions(); } diff --git a/tests/queries/0_stateless/02188_table_function_file.reference b/tests/queries/0_stateless/02188_table_function_file.reference new file mode 100644 index 00000000000..40d5846eb07 --- /dev/null +++ b/tests/queries/0_stateless/02188_table_function_file.reference @@ -0,0 +1,52 @@ +111 Hello +123 World +111 Hello +123 World +111 Hello +123 World +111 Hello +123 World +111 Hello +123 World +111 Hello +123 World +111 Hello +123 World +111 Hello +123 World +111 Hello +123 World +111 Hello +123 World +111 Hello +123 World +111 Hello +123 World +111 Hello +123 World +123 World +123 World +123 World +123 World +111 Hello +111 Hello +111 Hello +111 Hello +111 Hello +123 World +123 World +123 World +123 World +123 World +1 2 [1,2,3] [['abc'],[],['d','e']] +c1 Nullable(Float64) +c2 Nullable(Float64) +c3 Array(Nullable(Float64)) +c4 Array(Array(Nullable(String))) +111 Hello +123 World +111 Hello +131 Hello +123 World +b Nullable(Float64) +a Nullable(String) diff --git a/tests/queries/0_stateless/02188_table_function_file.sql b/tests/queries/0_stateless/02188_table_function_file.sql new file mode 100644 index 00000000000..6d4d6a69b59 --- /dev/null +++ b/tests/queries/0_stateless/02188_table_function_file.sql @@ -0,0 +1,68 @@ +select * from format(JSONEachRow, +$$ +{"a": "Hello", "b": 111} +{"a": "World", "b": 123} +{"a": "Hello", "b": 111} +{"a": "World", "b": 123} +{"a": "Hello", "b": 111} +{"a": "World", "b": 123} +{"a": "Hello", "b": 111} +{"a": "World", "b": 123} +{"a": "Hello", "b": 111} +{"a": "World", "b": 123} +{"a": "Hello", "b": 111} +{"a": "World", "b": 123} +{"a": "Hello", "b": 111} +{"a": "World", "b": 123} +{"a": "Hello", "b": 111} +{"a": "World", "b": 123} +{"a": "Hello", "b": 111} +{"a": "World", "b": 123} +{"a": "Hello", "b": 111} +{"a": "World", "b": 123} +$$); + +set max_block_size=5; + +select * from format(JSONEachRow, +$$ +{"a": "Hello", "b": 111} +{"a": "World", "b": 123} +{"a": "Hello", "b": 111} +{"a": "World", "b": 123} +{"a": "Hello", "b": 111} +{"a": "World", "b": 123} +{"a": "Hello", "b": 111} +{"a": "World", "b": 123} +{"a": "Hello", "b": 111} +{"a": "World", "b": 123} +{"a": "Hello", "b": 111} +{"a": "World", "b": 123} +{"a": "Hello", "b": 111} +{"a": "World", "b": 123} +{"a": "Hello", "b": 111} +{"a": "World", "b": 123} +{"a": "Hello", "b": 111} +{"a": "World", "b": 123} +{"a": "Hello", "b": 111} +{"a": "World", "b": 123} +$$); + +select * from format(CSV, '1,2,"[1,2,3]","[[\'abc\'], [], [\'d\', \'e\']]"'); +desc format(CSV, '1,2,"[1,2,3]","[[\'abc\'], [], [\'d\', \'e\']]"'); + +drop table if exists test; + +create table test as format(JSONEachRow, +$$ +{"a": "Hello", "b": 111} +{"a": "World", "b": 123} +{"a": "Hello", "b": 111} +{"a": "Hello", "b": 131} +{"a": "World", "b": 123} +$$); + +select * from test; +desc table test; +drop table test; + From f8cd51917be0bb62ae35239e4495246bff470c7a Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 28 Jan 2022 20:46:37 +0300 Subject: [PATCH 121/215] Fix test --- .../02188_table_function_format.sql | 70 +++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 tests/queries/0_stateless/02188_table_function_format.sql diff --git a/tests/queries/0_stateless/02188_table_function_format.sql b/tests/queries/0_stateless/02188_table_function_format.sql new file mode 100644 index 00000000000..ff8e2a0d53c --- /dev/null +++ b/tests/queries/0_stateless/02188_table_function_format.sql @@ -0,0 +1,70 @@ +-- Tags: no-fasttest + +select * from format(JSONEachRow, +$$ +{"a": "Hello", "b": 111} +{"a": "World", "b": 123} +{"a": "Hello", "b": 111} +{"a": "World", "b": 123} +{"a": "Hello", "b": 111} +{"a": "World", "b": 123} +{"a": "Hello", "b": 111} +{"a": "World", "b": 123} +{"a": "Hello", "b": 111} +{"a": "World", "b": 123} +{"a": "Hello", "b": 111} +{"a": "World", "b": 123} +{"a": "Hello", "b": 111} +{"a": "World", "b": 123} +{"a": "Hello", "b": 111} +{"a": "World", "b": 123} +{"a": "Hello", "b": 111} +{"a": "World", "b": 123} +{"a": "Hello", "b": 111} +{"a": "World", "b": 123} +$$); + +set max_block_size=5; + +select * from format(JSONEachRow, +$$ +{"a": "Hello", "b": 111} +{"a": "World", "b": 123} +{"a": "Hello", "b": 111} +{"a": "World", "b": 123} +{"a": "Hello", "b": 111} +{"a": "World", "b": 123} +{"a": "Hello", "b": 111} +{"a": "World", "b": 123} +{"a": "Hello", "b": 111} +{"a": "World", "b": 123} +{"a": "Hello", "b": 111} +{"a": "World", "b": 123} +{"a": "Hello", "b": 111} +{"a": "World", "b": 123} +{"a": "Hello", "b": 111} +{"a": "World", "b": 123} +{"a": "Hello", "b": 111} +{"a": "World", "b": 123} +{"a": "Hello", "b": 111} +{"a": "World", "b": 123} +$$); + +select * from format(CSV, '1,2,"[1,2,3]","[[\'abc\'], [], [\'d\', \'e\']]"'); +desc format(CSV, '1,2,"[1,2,3]","[[\'abc\'], [], [\'d\', \'e\']]"'); + +drop table if exists test; + +create table test as format(JSONEachRow, +$$ +{"a": "Hello", "b": 111} +{"a": "World", "b": 123} +{"a": "Hello", "b": 111} +{"a": "Hello", "b": 131} +{"a": "World", "b": 123} +$$); + +select * from test; +desc table test; +drop table test; + From 523d1059ed071c6313356316e56e5641040563ae Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 7 Feb 2022 13:59:03 +0300 Subject: [PATCH 122/215] Update KeyDescription.cpp --- src/Storages/KeyDescription.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/KeyDescription.cpp b/src/Storages/KeyDescription.cpp index 24b4b13bc21..f100f129cda 100644 --- a/src/Storages/KeyDescription.cpp +++ b/src/Storages/KeyDescription.cpp @@ -171,7 +171,7 @@ KeyDescription KeyDescription::parse(const String & str, const ColumnsDescriptio return result; ParserExpression parser; - ASTPtr ast = parseQuery(parser, "(" + str + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + ASTPtr ast = parseQuery(parser, "(" + str + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); FunctionNameNormalizer().visit(ast.get()); return getKeyFromAST(ast, columns, context); From 975ba9146f1066c0fc59842d1f5e9e674b905f65 Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 7 Feb 2022 14:12:19 +0300 Subject: [PATCH 123/215] Fix tests naming, remove code duplication --- src/Core/Block.cpp | 25 +++++++ src/Core/Block.h | 2 + src/Interpreters/SortedBlocksWriter.cpp | 20 +----- src/TableFunctions/TableFunctionFormat.cpp | 31 ++++----- .../0_stateless/02188_table_function_file.sql | 68 ------------------- ... => 02188_table_function_format.reference} | 0 6 files changed, 40 insertions(+), 106 deletions(-) delete mode 100644 tests/queries/0_stateless/02188_table_function_file.sql rename tests/queries/0_stateless/{02188_table_function_file.reference => 02188_table_function_format.reference} (100%) diff --git a/src/Core/Block.cpp b/src/Core/Block.cpp index dce737483c3..2615083491d 100644 --- a/src/Core/Block.cpp +++ b/src/Core/Block.cpp @@ -755,4 +755,29 @@ void materializeBlockInplace(Block & block) block.getByPosition(i).column = recursiveRemoveSparse(block.getByPosition(i).column->convertToFullColumnIfConst()); } +Block concatenateBlocks(const std::vector & blocks) +{ + if (blocks.empty()) + return {}; + + size_t num_rows = 0; + for (const auto & block : blocks) + num_rows += block.rows(); + + Block out = blocks[0].cloneEmpty(); + MutableColumns columns = out.mutateColumns(); + + for (size_t i = 0; i < columns.size(); ++i) + { + columns[i]->reserve(num_rows); + for (const auto & block : blocks) + { + const auto & tmp_column = *block.getByPosition(i).column; + columns[i]->insertRangeFrom(tmp_column, 0, block.rows()); + } + } + + out.setColumns(std::move(columns)); +} + } diff --git a/src/Core/Block.h b/src/Core/Block.h index efa5ce7c326..2624b57880c 100644 --- a/src/Core/Block.h +++ b/src/Core/Block.h @@ -203,4 +203,6 @@ ColumnPtr getColumnFromBlock(const Block & block, const NameAndTypePair & column Block materializeBlock(const Block & block); void materializeBlockInplace(Block & block); +Block concatenateBlocks(const std::vector & blocks); + } diff --git a/src/Interpreters/SortedBlocksWriter.cpp b/src/Interpreters/SortedBlocksWriter.cpp index 1945824636f..3caf144d9a8 100644 --- a/src/Interpreters/SortedBlocksWriter.cpp +++ b/src/Interpreters/SortedBlocksWriter.cpp @@ -320,25 +320,7 @@ Block SortedBlocksBuffer::mergeBlocks(Blocks && blocks) const if (blocks.size() == 1) return blocks[0]; - Block out = blocks[0].cloneEmpty(); - - { /// Concatenate blocks - MutableColumns columns = out.mutateColumns(); - - for (size_t i = 0; i < columns.size(); ++i) - { - columns[i]->reserve(num_rows); - for (const auto & block : blocks) - { - const auto & tmp_column = *block.getByPosition(i).column; - columns[i]->insertRangeFrom(tmp_column, 0, block.rows()); - } - } - - out.setColumns(std::move(columns)); - } - - return out; + return concatenateBlocks(blocks); } } diff --git a/src/TableFunctions/TableFunctionFormat.cpp b/src/TableFunctions/TableFunctionFormat.cpp index 4bffdd4fc89..9b361285cc1 100644 --- a/src/TableFunctions/TableFunctionFormat.cpp +++ b/src/TableFunctions/TableFunctionFormat.cpp @@ -60,34 +60,27 @@ ColumnsDescription TableFunctionFormat::getActualTableStructure(ContextPtr conte Block TableFunctionFormat::parseData(ColumnsDescription columns, ContextPtr context) const { - Block result; + Block block; for (const auto & name_and_type : columns.getAllPhysical()) - result.insert({name_and_type.type->createColumn(), name_and_type.type, name_and_type.name}); + block.insert({name_and_type.type->createColumn(), name_and_type.type, name_and_type.name}); auto read_buf = std::make_unique(data); - auto input_format = context->getInputFormat(format, *read_buf, result, context->getSettingsRef().max_block_size); + auto input_format = context->getInputFormat(format, *read_buf, block, context->getSettingsRef().max_block_size); QueryPipelineBuilder builder; builder.init(Pipe(input_format)); auto pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); auto reader = std::make_unique(*pipeline); - reader->pull(result); - /// In case when data contains more then 1 block we read all blocks - /// and combine them all to one big block (this is considered a rare case). - Block new_block; - while (reader->pull(new_block)) - { - for (size_t i = 0; i != result.columns(); ++i) - { - auto & result_column = result.getByPosition(i); - const auto & new_column = new_block.getByPosition(i); - auto mutable_column = IColumn::mutate(result_column.column); - mutable_column->insertManyFrom(*new_column.column, 0, new_column.column->size()); - result_column.column = std::move(mutable_column); - } - } + std::vector blocks; + while (reader->pull(block)) + blocks.push_back(std::move(block)); - return result; + if (blocks.size() == 1) + return blocks[0]; + + /// In case when data contains more then 1 block we combine + /// them all to one big block (this is considered a rare case). + return concatenateBlocks(blocks); } StoragePtr TableFunctionFormat::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const diff --git a/tests/queries/0_stateless/02188_table_function_file.sql b/tests/queries/0_stateless/02188_table_function_file.sql deleted file mode 100644 index 6d4d6a69b59..00000000000 --- a/tests/queries/0_stateless/02188_table_function_file.sql +++ /dev/null @@ -1,68 +0,0 @@ -select * from format(JSONEachRow, -$$ -{"a": "Hello", "b": 111} -{"a": "World", "b": 123} -{"a": "Hello", "b": 111} -{"a": "World", "b": 123} -{"a": "Hello", "b": 111} -{"a": "World", "b": 123} -{"a": "Hello", "b": 111} -{"a": "World", "b": 123} -{"a": "Hello", "b": 111} -{"a": "World", "b": 123} -{"a": "Hello", "b": 111} -{"a": "World", "b": 123} -{"a": "Hello", "b": 111} -{"a": "World", "b": 123} -{"a": "Hello", "b": 111} -{"a": "World", "b": 123} -{"a": "Hello", "b": 111} -{"a": "World", "b": 123} -{"a": "Hello", "b": 111} -{"a": "World", "b": 123} -$$); - -set max_block_size=5; - -select * from format(JSONEachRow, -$$ -{"a": "Hello", "b": 111} -{"a": "World", "b": 123} -{"a": "Hello", "b": 111} -{"a": "World", "b": 123} -{"a": "Hello", "b": 111} -{"a": "World", "b": 123} -{"a": "Hello", "b": 111} -{"a": "World", "b": 123} -{"a": "Hello", "b": 111} -{"a": "World", "b": 123} -{"a": "Hello", "b": 111} -{"a": "World", "b": 123} -{"a": "Hello", "b": 111} -{"a": "World", "b": 123} -{"a": "Hello", "b": 111} -{"a": "World", "b": 123} -{"a": "Hello", "b": 111} -{"a": "World", "b": 123} -{"a": "Hello", "b": 111} -{"a": "World", "b": 123} -$$); - -select * from format(CSV, '1,2,"[1,2,3]","[[\'abc\'], [], [\'d\', \'e\']]"'); -desc format(CSV, '1,2,"[1,2,3]","[[\'abc\'], [], [\'d\', \'e\']]"'); - -drop table if exists test; - -create table test as format(JSONEachRow, -$$ -{"a": "Hello", "b": 111} -{"a": "World", "b": 123} -{"a": "Hello", "b": 111} -{"a": "Hello", "b": 131} -{"a": "World", "b": 123} -$$); - -select * from test; -desc table test; -drop table test; - diff --git a/tests/queries/0_stateless/02188_table_function_file.reference b/tests/queries/0_stateless/02188_table_function_format.reference similarity index 100% rename from tests/queries/0_stateless/02188_table_function_file.reference rename to tests/queries/0_stateless/02188_table_function_format.reference From 67ea3eeb14417e2b44dfc411103a95c36f5cce3d Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 7 Feb 2022 14:16:32 +0300 Subject: [PATCH 124/215] Fix --- src/Core/Block.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Core/Block.cpp b/src/Core/Block.cpp index 2615083491d..26c883b308d 100644 --- a/src/Core/Block.cpp +++ b/src/Core/Block.cpp @@ -778,6 +778,7 @@ Block concatenateBlocks(const std::vector & blocks) } out.setColumns(std::move(columns)); + return out; } } From 4fa05cf07ecc94b7e8091f64bf7d7569b7b619dd Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 7 Feb 2022 14:31:21 +0300 Subject: [PATCH 125/215] Fix style and test --- src/TableFunctions/TableFunctionFormat.cpp | 1 - .../02188_table_function_format.reference | 16 ++++++++-------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/src/TableFunctions/TableFunctionFormat.cpp b/src/TableFunctions/TableFunctionFormat.cpp index 9b361285cc1..a66edd0003c 100644 --- a/src/TableFunctions/TableFunctionFormat.cpp +++ b/src/TableFunctions/TableFunctionFormat.cpp @@ -24,7 +24,6 @@ namespace DB namespace ErrorCodes { - extern const int BAD_ARGUMENTS; extern const int LOGICAL_ERROR; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } diff --git a/tests/queries/0_stateless/02188_table_function_format.reference b/tests/queries/0_stateless/02188_table_function_format.reference index 40d5846eb07..ab568fb9fe5 100644 --- a/tests/queries/0_stateless/02188_table_function_format.reference +++ b/tests/queries/0_stateless/02188_table_function_format.reference @@ -24,19 +24,19 @@ 123 World 111 Hello 123 World -123 World -123 World -123 World -123 World -111 Hello -111 Hello -111 Hello -111 Hello 111 Hello 123 World +111 Hello 123 World +111 Hello 123 World +111 Hello 123 World +111 Hello +123 World +111 Hello +123 World +111 Hello 123 World 1 2 [1,2,3] [['abc'],[],['d','e']] c1 Nullable(Float64) From 4a857d7d18810c7cb4d42d7518f8b91496c0a7ca Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Mon, 7 Feb 2022 12:35:32 +0100 Subject: [PATCH 126/215] Use UTC in docker images --- docker/test/base/Dockerfile | 2 +- docker/test/fasttest/Dockerfile | 2 +- docker/test/fuzzer/Dockerfile | 2 +- docker/test/integration/base/Dockerfile | 2 +- docker/test/integration/runner/Dockerfile | 2 +- docker/test/performance-comparison/Dockerfile | 2 +- docker/test/stateless/Dockerfile | 2 +- docker/test/testflows/runner/Dockerfile | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/docker/test/base/Dockerfile b/docker/test/base/Dockerfile index 6beab2e5bb7..7d7c3e28087 100644 --- a/docker/test/base/Dockerfile +++ b/docker/test/base/Dockerfile @@ -73,7 +73,7 @@ ENV TSAN_OPTIONS='halt_on_error=1 history_size=7 memory_limit_mb=46080' ENV UBSAN_OPTIONS='print_stacktrace=1' ENV MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1' -ENV TZ=Europe/Moscow +ENV TZ=UTC RUN ln -snf "/usr/share/zoneinfo/$TZ" /etc/localtime && echo "$TZ" > /etc/timezone CMD sleep 1 diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile index 46b74d89e13..a625ab316f0 100644 --- a/docker/test/fasttest/Dockerfile +++ b/docker/test/fasttest/Dockerfile @@ -87,7 +87,7 @@ RUN mkdir -p /tmp/clickhouse-odbc-tmp \ && odbcinst -i -s -l -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbc.ini.sample \ && rm -rf /tmp/clickhouse-odbc-tmp -ENV TZ=Europe/Moscow +ENV TZ=UTC RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone ENV COMMIT_SHA='' diff --git a/docker/test/fuzzer/Dockerfile b/docker/test/fuzzer/Dockerfile index eb4b09c173f..659b53bfd7e 100644 --- a/docker/test/fuzzer/Dockerfile +++ b/docker/test/fuzzer/Dockerfile @@ -8,7 +8,7 @@ ARG apt_archive="http://archive.ubuntu.com" RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list ENV LANG=C.UTF-8 -ENV TZ=Europe/Moscow +ENV TZ=UTC RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone RUN apt-get update \ diff --git a/docker/test/integration/base/Dockerfile b/docker/test/integration/base/Dockerfile index 91b26735fe5..b6f2bdace01 100644 --- a/docker/test/integration/base/Dockerfile +++ b/docker/test/integration/base/Dockerfile @@ -60,5 +60,5 @@ clientPort=2181 \n\ maxClientCnxns=80' > /opt/zookeeper/conf/zoo.cfg RUN mkdir /zookeeper && chmod -R 777 /zookeeper -ENV TZ=Europe/Moscow +ENV TZ=UTC RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile index 1aad2ae6770..a7a9230748f 100644 --- a/docker/test/integration/runner/Dockerfile +++ b/docker/test/integration/runner/Dockerfile @@ -40,7 +40,7 @@ RUN apt-get update \ /tmp/* \ && apt-get clean -ENV TZ=Europe/Moscow +ENV TZ=UTC RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone ENV DOCKER_CHANNEL stable diff --git a/docker/test/performance-comparison/Dockerfile b/docker/test/performance-comparison/Dockerfile index eddaf969f33..5037739be36 100644 --- a/docker/test/performance-comparison/Dockerfile +++ b/docker/test/performance-comparison/Dockerfile @@ -6,7 +6,7 @@ ARG apt_archive="http://archive.ubuntu.com" RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list ENV LANG=C.UTF-8 -ENV TZ=Europe/Moscow +ENV TZ=UTC RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone RUN apt-get update \ diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index 9b7fde7d542..24ca13e4acc 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -42,7 +42,7 @@ RUN mkdir -p /tmp/clickhouse-odbc-tmp \ && odbcinst -i -s -l -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbc.ini.sample \ && rm -rf /tmp/clickhouse-odbc-tmp -ENV TZ=Europe/Moscow +ENV TZ=UTC RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone ENV NUM_TRIES=1 diff --git a/docker/test/testflows/runner/Dockerfile b/docker/test/testflows/runner/Dockerfile index d15f237587b..f789a5e8b62 100644 --- a/docker/test/testflows/runner/Dockerfile +++ b/docker/test/testflows/runner/Dockerfile @@ -35,7 +35,7 @@ RUN apt-get update \ /tmp/* \ && apt-get clean -ENV TZ=Europe/Moscow +ENV TZ=UTC RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone RUN pip3 install urllib3 testflows==1.7.20 docker-compose==1.29.1 docker==5.0.0 dicttoxml kazoo tzlocal==2.1 pytz python-dateutil numpy From 4a1e84306ad37879eb4511857344c94fc8c3213b Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Mon, 7 Feb 2022 12:57:58 +0100 Subject: [PATCH 127/215] Update performance comparison OS version --- docker/test/performance-comparison/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/performance-comparison/Dockerfile b/docker/test/performance-comparison/Dockerfile index 5037739be36..d50bfce1e8f 100644 --- a/docker/test/performance-comparison/Dockerfile +++ b/docker/test/performance-comparison/Dockerfile @@ -1,5 +1,5 @@ # docker build -t clickhouse/performance-comparison . -FROM ubuntu:18.04 +FROM ubuntu:20.04 # ARG for quick switch to a given ubuntu mirror ARG apt_archive="http://archive.ubuntu.com" From c7ba5204b3fa3d8120a6111730902c7a7aaa9e0c Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Mon, 7 Feb 2022 13:17:21 +0100 Subject: [PATCH 128/215] Make testflows a multiarch image --- docker/test/testflows/runner/Dockerfile | 33 ++++++++++++++----------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/docker/test/testflows/runner/Dockerfile b/docker/test/testflows/runner/Dockerfile index f789a5e8b62..69b3affd0e7 100644 --- a/docker/test/testflows/runner/Dockerfile +++ b/docker/test/testflows/runner/Dockerfile @@ -43,24 +43,27 @@ RUN pip3 install urllib3 testflows==1.7.20 docker-compose==1.29.1 docker==5.0.0 ENV DOCKER_CHANNEL stable ENV DOCKER_VERSION 20.10.6 -RUN set -eux; \ - \ -# this "case" statement is generated via "update.sh" - \ - if ! wget -nv -O docker.tgz "https://download.docker.com/linux/static/${DOCKER_CHANNEL}/x86_64/docker-${DOCKER_VERSION}.tgz"; then \ - echo >&2 "error: failed to download 'docker-${DOCKER_VERSION}' from '${DOCKER_CHANNEL}' for '${x86_64}'"; \ - exit 1; \ - fi; \ - \ - tar --extract \ +# Architecture of the image when BuildKit/buildx is used +ARG TARGETARCH + +# Install MySQL ODBC driver from RHEL rpm +RUN arch=${TARGETARCH:-amd64} \ + && case $arch in \ + amd64) rarch=x86_64 ;; \ + arm64) rarch=aarch64 ;; \ + esac \ + && set -eux \ + && if ! wget -nv -O docker.tgz "https://download.docker.com/linux/static/${DOCKER_CHANNEL}/${rarch}/docker-${DOCKER_VERSION}.tgz"; then \ + echo >&2 "error: failed to download 'docker-${DOCKER_VERSION}' from '${DOCKER_CHANNEL}' for '${rarch}'" \ + && exit 1; \ + fi \ + && tar --extract \ --file docker.tgz \ --strip-components 1 \ --directory /usr/local/bin/ \ - ; \ - rm docker.tgz; \ - \ - dockerd --version; \ - docker --version + && rm docker.tgz \ + && dockerd --version \ + && docker --version COPY modprobe.sh /usr/local/bin/modprobe COPY dockerd-entrypoint.sh /usr/local/bin/ From 8d8ff05fba9376d46da6162c2f9776222bf2efd9 Mon Sep 17 00:00:00 2001 From: tesw yew isal <278153+cnmade@users.noreply.github.com> Date: Mon, 7 Feb 2022 20:54:22 +0800 Subject: [PATCH 129/215] Translate zh/engines/database-engines/sqlite: fix symbol --- docs/zh/engines/database-engines/sqlite.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/zh/engines/database-engines/sqlite.md b/docs/zh/engines/database-engines/sqlite.md index d8b91943b0c..48dca38e4af 100644 --- a/docs/zh/engines/database-engines/sqlite.md +++ b/docs/zh/engines/database-engines/sqlite.md @@ -30,7 +30,7 @@ toc_title: SQLite ## 技术细节和建议 {#specifics-and-recommendations} SQLite将整个数据库(定义、表、索引和数据本身)存储为主机上的单个跨平台文件。在写入过程中,SQLite会锁定整个数据库文件,因此写入操作是顺序执行的。读操作可以是多任务的。 -SQLite不需要服务管理(如启动脚本)或基于“GRANT”和密码的访问控制。访问控制是通过授予数据库文件本身的文件系统权限来处理的。 +SQLite不需要服务管理(如启动脚本)或基于`GRANT`和密码的访问控制。访问控制是通过授予数据库文件本身的文件系统权限来处理的。 ## 使用示例 {#usage-example} From c3d7f021e28384033b7fc0870a2fb8231bc864d0 Mon Sep 17 00:00:00 2001 From: cnmade <278153+cnmade@users.noreply.github.com> Date: Mon, 7 Feb 2022 20:55:31 +0800 Subject: [PATCH 130/215] Update docs/zh/engines/table-engines/integrations/hive.md Co-authored-by: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com> --- docs/zh/engines/table-engines/integrations/hive.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/zh/engines/table-engines/integrations/hive.md b/docs/zh/engines/table-engines/integrations/hive.md index dff8a9a4d65..eaf3e777714 100644 --- a/docs/zh/engines/table-engines/integrations/hive.md +++ b/docs/zh/engines/table-engines/integrations/hive.md @@ -5,7 +5,7 @@ toc_title: Hive # Hive {#hive} -Hive引擎允许对HDFS Hive表执行SELECT查询。目前它支持如下输入格式: +Hive引擎允许对HDFS Hive表执行 `SELECT` 查询。目前它支持如下输入格式: -文本:只支持简单的标量列类型,除了 `Binary` From fe4b73ec4b32c1f712a0609058782f8883f5f4b3 Mon Sep 17 00:00:00 2001 From: tesw yew isal <278153+cnmade@users.noreply.github.com> Date: Mon, 7 Feb 2022 21:00:46 +0800 Subject: [PATCH 131/215] Translate zh/engines/database-engines/sqlite: true is not need --- docs/zh/engines/table-engines/integrations/hive.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/zh/engines/table-engines/integrations/hive.md b/docs/zh/engines/table-engines/integrations/hive.md index eaf3e777714..aa2c82d902a 100644 --- a/docs/zh/engines/table-engines/integrations/hive.md +++ b/docs/zh/engines/table-engines/integrations/hive.md @@ -55,7 +55,7 @@ PARTITION BY expr ``` -- enable: true:开启后,ClickHouse将为HDFS (远程文件系统)维护本地缓存。 +- enable: 开启后,ClickHouse将为HDFS (远程文件系统)维护本地缓存。 - root_dir: 必需的。用于存储远程文件系统的本地缓存文件的根目录。 - limit_size: 必需的。本地缓存文件的最大大小(单位为字节)。 - bytes_read_before_flush: 从远程文件系统下载文件时,刷新到本地文件系统前的控制字节数。缺省值为1MB。 From 1f63cea082e084386787d44ee05a0099e7302746 Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 26 Jan 2022 16:29:33 +0300 Subject: [PATCH 132/215] Add schema inference for values() table function --- src/TableFunctions/TableFunctionValues.cpp | 62 +++++++++++++------ src/TableFunctions/TableFunctionValues.h | 5 +- .../parseColumnsListForTableFunction.cpp | 20 ++++++ .../parseColumnsListForTableFunction.h | 2 + .../02185_values_schema_inference.reference | 14 +++++ .../02185_values_schema_inference.sh | 14 +++++ 6 files changed, 96 insertions(+), 21 deletions(-) create mode 100644 tests/queries/0_stateless/02185_values_schema_inference.reference create mode 100755 tests/queries/0_stateless/02185_values_schema_inference.sh diff --git a/src/TableFunctions/TableFunctionValues.cpp b/src/TableFunctions/TableFunctionValues.cpp index c66ebe7322e..6531b1c7478 100644 --- a/src/TableFunctions/TableFunctionValues.cpp +++ b/src/TableFunctions/TableFunctionValues.cpp @@ -4,12 +4,11 @@ #include #include #include +#include #include #include -#include -#include #include #include #include @@ -28,13 +27,14 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; extern const int LOGICAL_ERROR; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int CANNOT_EXTRACT_TABLE_STRUCTURE; } -static void parseAndInsertValues(MutableColumns & res_columns, const ASTs & args, const Block & sample_block, ContextPtr context) +static void parseAndInsertValues(MutableColumns & res_columns, const ASTs & args, const Block & sample_block, size_t start, ContextPtr context) { if (res_columns.size() == 1) /// Parsing arguments as Fields { - for (size_t i = 1; i < args.size(); ++i) + for (size_t i = start; i < args.size(); ++i) { const auto & [value_field, value_type_ptr] = evaluateConstantExpression(args[i], context); @@ -44,7 +44,7 @@ static void parseAndInsertValues(MutableColumns & res_columns, const ASTs & args } else /// Parsing arguments as Tuples { - for (size_t i = 1; i < args.size(); ++i) + for (size_t i = start; i < args.size(); ++i) { const auto & [value_field, value_type_ptr] = evaluateConstantExpression(args[i], context); @@ -68,7 +68,17 @@ static void parseAndInsertValues(MutableColumns & res_columns, const ASTs & args } } -void TableFunctionValues::parseArguments(const ASTPtr & ast_function, ContextPtr /*context*/) +DataTypes TableFunctionValues::getTypesFromArgument(const ASTPtr & arg, ContextPtr context) +{ + const auto & [value_field, value_type_ptr] = evaluateConstantExpression(arg, context); + DataTypes types; + if (const DataTypeTuple * type_tuple = typeid_cast(value_type_ptr.get())) + return type_tuple->getElements(); + + return {value_type_ptr}; +} + +void TableFunctionValues::parseArguments(const ASTPtr & ast_function, ContextPtr context) { ASTs & args_func = ast_function->children; @@ -77,25 +87,37 @@ void TableFunctionValues::parseArguments(const ASTPtr & ast_function, ContextPtr ASTs & args = args_func.at(0)->children; - if (args.size() < 2) - throw Exception("Table function '" + getName() + "' requires 2 or more arguments: structure and values.", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - - /// Parsing first argument as table structure and creating a sample block - if (!args[0]->as()) + const auto & literal = args[0]->as(); + String value; + if (args.size() > 1 && literal && literal->value.tryGet(value) && tryParseColumnsListFromString(value, structure, context)) { - throw Exception(fmt::format( - "The first argument of table function '{}' must be a literal. " - "Got '{}' instead", getName(), args[0]->formatForErrorMessage()), - ErrorCodes::BAD_ARGUMENTS); + has_structure_in_arguments = true; + return; } - structure = args[0]->as().value.safeGet(); + has_structure_in_arguments = false; + DataTypes data_types = getTypesFromArgument(args[0], context); + for (size_t i = 1; i < args.size(); ++i) + { + auto arg_types = getTypesFromArgument(args[i], context); + if (data_types.size() != arg_types.size()) + throw Exception( + ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, + "Cannot determine common structure for {} function arguments: the amount of columns is differ for different arguments", + getName()); + for (size_t j = 0; j != arg_types.size(); ++j) + data_types[j] = getLeastSupertype({data_types[j], arg_types[j]}); + } + + NamesAndTypesList names_and_types; + for (size_t i = 0; i != data_types.size(); ++i) + names_and_types.emplace_back("c" + std::to_string(i + 1), data_types[i]); + structure = ColumnsDescription(names_and_types); } -ColumnsDescription TableFunctionValues::getActualTableStructure(ContextPtr context) const +ColumnsDescription TableFunctionValues::getActualTableStructure(ContextPtr /*context*/) const { - return parseColumnsListFromString(structure, context); + return structure; } StoragePtr TableFunctionValues::executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const @@ -111,7 +133,7 @@ StoragePtr TableFunctionValues::executeImpl(const ASTPtr & ast_function, Context ASTs & args = ast_function->children.at(0)->children; /// Parsing other arguments as values and inserting them into columns - parseAndInsertValues(res_columns, args, sample_block, context); + parseAndInsertValues(res_columns, args, sample_block, has_structure_in_arguments ? 1 : 0, context); Block res_block = sample_block.cloneWithColumns(std::move(res_columns)); diff --git a/src/TableFunctions/TableFunctionValues.h b/src/TableFunctions/TableFunctionValues.h index 058f5f1d2ed..ab1bca5aeb3 100644 --- a/src/TableFunctions/TableFunctionValues.h +++ b/src/TableFunctions/TableFunctionValues.h @@ -20,7 +20,10 @@ private: ColumnsDescription getActualTableStructure(ContextPtr context) const override; void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; - String structure; + DataTypes getTypesFromArgument(const ASTPtr & arg, ContextPtr context); +// String structure; + ColumnsDescription structure; + bool has_structure_in_arguments; }; diff --git a/src/TableFunctions/parseColumnsListForTableFunction.cpp b/src/TableFunctions/parseColumnsListForTableFunction.cpp index 08e80ef425a..bff888abc07 100644 --- a/src/TableFunctions/parseColumnsListForTableFunction.cpp +++ b/src/TableFunctions/parseColumnsListForTableFunction.cpp @@ -28,4 +28,24 @@ ColumnsDescription parseColumnsListFromString(const std::string & structure, Con return InterpreterCreateQuery::getColumnsDescription(*columns_list, context, false); } +bool tryParseColumnsListFromString(const std::string & structure, ColumnsDescription & columns, ContextPtr context) +{ + ParserColumnDeclarationList parser; + const Settings & settings = context->getSettingsRef(); + + String error; + const char * start = structure.data(); + const char * end = structure.data() + structure.size(); + ASTPtr columns_list_raw = tryParseQuery(parser, start, end, error, false, "columns declaration list", false, settings.max_query_size, settings.max_parser_depth); + if (!columns_list_raw) + return false; + + auto * columns_list = dynamic_cast(columns_list_raw.get()); + if (!columns_list) + return false; + + columns = InterpreterCreateQuery::getColumnsDescription(*columns_list, context, false); + return true; +} + } diff --git a/src/TableFunctions/parseColumnsListForTableFunction.h b/src/TableFunctions/parseColumnsListForTableFunction.h index e0130a2618d..e82a32f3d23 100644 --- a/src/TableFunctions/parseColumnsListForTableFunction.h +++ b/src/TableFunctions/parseColumnsListForTableFunction.h @@ -12,4 +12,6 @@ class Context; /// Parses a common argument for table functions such as table structure given in string ColumnsDescription parseColumnsListFromString(const std::string & structure, ContextPtr context); +bool tryParseColumnsListFromString(const std::string & structure, ColumnsDescription & columns, ContextPtr context); + } diff --git a/tests/queries/0_stateless/02185_values_schema_inference.reference b/tests/queries/0_stateless/02185_values_schema_inference.reference new file mode 100644 index 00000000000..5353d083b9e --- /dev/null +++ b/tests/queries/0_stateless/02185_values_schema_inference.reference @@ -0,0 +1,14 @@ +String +abc +def +1 +-1 +10000 +-10000 +1000000 +1 string [1,2,-1] +-10 def [10,20,10000] +1 \N [1,2,-1] +\N def [10,NULL,10000] +(1,'1') 10 +(-1,'-1') 1000000 diff --git a/tests/queries/0_stateless/02185_values_schema_inference.sh b/tests/queries/0_stateless/02185_values_schema_inference.sh new file mode 100755 index 00000000000..3cfcd0bd4e6 --- /dev/null +++ b/tests/queries/0_stateless/02185_values_schema_inference.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash +# Tags: no-parallel, no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +$CLICKHOUSE_CLIENT -q "select * from values('String', 'abc', 'def')" +$CLICKHOUSE_CLIENT -q "select * from values(1, -1, 10000, -10000, 1000000)" +$CLICKHOUSE_CLIENT -q "select * from values((1, 'string', [1, 2, -1]), (-10, 'def', [10, 20, 10000]))" +$CLICKHOUSE_CLIENT -q "select * from values((1, NULL, [1, 2, -1]), (NULL, 'def', [10, NULL, 10000]))" +$CLICKHOUSE_CLIENT -q "select * from values(((1, '1'), 10), ((-1, '-1'), 1000000))" + From bc95f2d31330321a41a1c918b434796d0f3d3b12 Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 26 Jan 2022 16:31:09 +0300 Subject: [PATCH 133/215] Remove commented line --- src/TableFunctions/TableFunctionValues.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/TableFunctions/TableFunctionValues.h b/src/TableFunctions/TableFunctionValues.h index ab1bca5aeb3..34335eb4bd1 100644 --- a/src/TableFunctions/TableFunctionValues.h +++ b/src/TableFunctions/TableFunctionValues.h @@ -21,7 +21,7 @@ private: void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; DataTypes getTypesFromArgument(const ASTPtr & arg, ContextPtr context); -// String structure; + ColumnsDescription structure; bool has_structure_in_arguments; }; From 8a196a553c0c6c6500ac434dc8fbeb130eb046cd Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 26 Jan 2022 16:48:21 +0300 Subject: [PATCH 134/215] Fix style --- src/TableFunctions/parseColumnsListForTableFunction.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/TableFunctions/parseColumnsListForTableFunction.cpp b/src/TableFunctions/parseColumnsListForTableFunction.cpp index bff888abc07..911f2ae80f4 100644 --- a/src/TableFunctions/parseColumnsListForTableFunction.cpp +++ b/src/TableFunctions/parseColumnsListForTableFunction.cpp @@ -36,7 +36,7 @@ bool tryParseColumnsListFromString(const std::string & structure, ColumnsDescrip String error; const char * start = structure.data(); const char * end = structure.data() + structure.size(); - ASTPtr columns_list_raw = tryParseQuery(parser, start, end, error, false, "columns declaration list", false, settings.max_query_size, settings.max_parser_depth); + ASTPtr columns_list_raw = tryParseQuery(parser, start, end, error, false, "columns declaration list", false, settings.max_query_size, settings.max_parser_depth); if (!columns_list_raw) return false; From 85e1c4595f90df257c7ea941bf87c6b21703e4a4 Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 7 Feb 2022 16:17:01 +0300 Subject: [PATCH 135/215] Fix tests --- src/TableFunctions/TableFunctionValues.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/TableFunctions/TableFunctionValues.cpp b/src/TableFunctions/TableFunctionValues.cpp index 6531b1c7478..74d0a04abae 100644 --- a/src/TableFunctions/TableFunctionValues.cpp +++ b/src/TableFunctions/TableFunctionValues.cpp @@ -83,10 +83,13 @@ void TableFunctionValues::parseArguments(const ASTPtr & ast_function, ContextPtr ASTs & args_func = ast_function->children; if (args_func.size() != 1) - throw Exception("Table function '" + getName() + "' must have arguments.", ErrorCodes::LOGICAL_ERROR); + throw Exception("Table function '" + getName() + "' must have arguments", ErrorCodes::LOGICAL_ERROR); ASTs & args = args_func.at(0)->children; + if (args.empty()) + throw Exception("Table function '" + getName() + "' requires at least 1 argument"); + const auto & literal = args[0]->as(); String value; if (args.size() > 1 && literal && literal->value.tryGet(value) && tryParseColumnsListFromString(value, structure, context)) From a78187f37b39de03cc3d8422bd426416f8e0f8d8 Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 7 Feb 2022 16:23:32 +0300 Subject: [PATCH 136/215] Fix --- src/TableFunctions/TableFunctionValues.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/TableFunctions/TableFunctionValues.cpp b/src/TableFunctions/TableFunctionValues.cpp index 74d0a04abae..07019d26067 100644 --- a/src/TableFunctions/TableFunctionValues.cpp +++ b/src/TableFunctions/TableFunctionValues.cpp @@ -88,7 +88,7 @@ void TableFunctionValues::parseArguments(const ASTPtr & ast_function, ContextPtr ASTs & args = args_func.at(0)->children; if (args.empty()) - throw Exception("Table function '" + getName() + "' requires at least 1 argument"); + throw Exception("Table function '" + getName() + "' requires at least 1 argument", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); const auto & literal = args[0]->as(); String value; From 77b42bb9ff9df8819952a35669062896693b8f56 Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 27 Jan 2022 19:54:15 +0300 Subject: [PATCH 137/215] Support UUID in MsgPack format --- src/Core/Settings.h | 1 + src/Core/SettingsEnums.cpp | 6 ++ src/Core/SettingsEnums.h | 2 + src/Formats/FormatFactory.cpp | 1 + src/Formats/FormatSettings.h | 8 +++ .../Formats/Impl/MsgPackRowInputFormat.cpp | 56 +++++++++++++++++-- .../Formats/Impl/MsgPackRowInputFormat.h | 1 + .../Formats/Impl/MsgPackRowOutputFormat.cpp | 47 ++++++++++++++-- .../Formats/Impl/MsgPackRowOutputFormat.h | 3 +- .../0_stateless/02187_msg_pack_uuid.reference | 4 ++ .../0_stateless/02187_msg_pack_uuid.sh | 17 ++++++ 11 files changed, 135 insertions(+), 11 deletions(-) create mode 100644 tests/queries/0_stateless/02187_msg_pack_uuid.reference create mode 100755 tests/queries/0_stateless/02187_msg_pack_uuid.sh diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 48dd637a943..6e31affe4a8 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -610,6 +610,7 @@ class IColumn; M(Char, input_format_hive_text_collection_items_delimiter, '\x02', "Delimiter between collection(array or map) items in Hive Text File", 0) \ M(Char, input_format_hive_text_map_keys_delimiter, '\x03', "Delimiter between a pair of map key/values in Hive Text File", 0) \ M(UInt64, input_format_msgpack_number_of_columns, 0, "The number of columns in inserted MsgPack data. Used for automatic schema inference from data.", 0) \ + M(MsgPackUUIDRepresentation, output_format_msgpack_uuid_representation, FormatSettings::MsgPackUUIDRepresentation::EXT, "The way how to output UUID in MsgPack format.", 0) \ M(UInt64, input_format_max_rows_to_read_for_schema_inference, 100, "The maximum rows of data to read for automatic schema inference", 0) \ \ M(DateTimeInputFormat, date_time_input_format, FormatSettings::DateTimeInputFormat::Basic, "Method to read DateTime from text input formats. Possible values: 'basic' and 'best_effort'.", 0) \ diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp index b62575c9730..5d16f0a5c85 100644 --- a/src/Core/SettingsEnums.cpp +++ b/src/Core/SettingsEnums.cpp @@ -130,4 +130,10 @@ IMPLEMENT_SETTING_ENUM(EscapingRule, ErrorCodes::BAD_ARGUMENTS, {"JSON", FormatSettings::EscapingRule::JSON}, {"XML", FormatSettings::EscapingRule::XML}, {"Raw", FormatSettings::EscapingRule::Raw}}) + +IMPLEMENT_SETTING_ENUM(MsgPackUUIDRepresentation , ErrorCodes::BAD_ARGUMENTS, + {{"bin", FormatSettings::MsgPackUUIDRepresentation::BIN}, + {"str", FormatSettings::MsgPackUUIDRepresentation::STR}, + {"ext", FormatSettings::MsgPackUUIDRepresentation::EXT}}) + } diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h index 106589f5d24..d29e4f15c27 100644 --- a/src/Core/SettingsEnums.h +++ b/src/Core/SettingsEnums.h @@ -172,4 +172,6 @@ DECLARE_SETTING_ENUM_WITH_RENAME(EnumComparingMode, FormatSettings::EnumComparin DECLARE_SETTING_ENUM_WITH_RENAME(EscapingRule, FormatSettings::EscapingRule) +DECLARE_SETTING_ENUM_WITH_RENAME(MsgPackUUIDRepresentation, FormatSettings::MsgPackUUIDRepresentation) + } diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index f19d03dc8d0..be565a532bb 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -129,6 +129,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.capn_proto.enum_comparing_mode = settings.format_capn_proto_enum_comparising_mode; format_settings.seekable_read = settings.input_format_allow_seeks; format_settings.msgpack.number_of_columns = settings.input_format_msgpack_number_of_columns; + format_settings.msgpack.output_uuid_representation = settings.output_format_msgpack_uuid_representation; format_settings.max_rows_to_read_for_schema_inference = settings.input_format_max_rows_to_read_for_schema_inference; /// Validate avro_schema_registry_url with RemoteHostFilter when non-empty and in Server context diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index b484d623944..265c879e768 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -231,9 +231,17 @@ struct FormatSettings EnumComparingMode enum_comparing_mode = EnumComparingMode::BY_VALUES; } capn_proto; + enum class MsgPackUUIDRepresentation + { + STR, // Output UUID as a string of 36 characters. + BIN, // Output UUID as 16-bytes binary. + EXT, // Output UUID as ExtType = 2 + }; + struct { UInt64 number_of_columns = 0; + MsgPackUUIDRepresentation output_uuid_representation = MsgPackUUIDRepresentation::EXT; } msgpack; }; diff --git a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp index 2471a98f83d..46d7c775f90 100644 --- a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -12,6 +13,7 @@ #include #include #include +#include #include #include @@ -153,16 +155,29 @@ static void insertInteger(IColumn & column, DataTypePtr type, UInt64 value) } } -static void insertString(IColumn & column, DataTypePtr type, const char * value, size_t size) +static void insertString(IColumn & column, DataTypePtr type, const char * value, size_t size, bool bin) { auto insert_func = [&](IColumn & column_, DataTypePtr type_) { - insertString(column_, type_, value, size); + insertString(column_, type_, value, size, bin); }; if (checkAndInsertNullable(column, type, insert_func) || checkAndInsertLowCardinality(column, type, insert_func)) return; + if (isUUID(type)) + { + ReadBufferFromMemory buf(value, size); + UUID uuid; + if (bin) + readBinary(uuid, buf); + else + readUUIDText(uuid, buf); + + assert_cast(column).insertValue(uuid); + return; + } + if (!isStringOrFixedString(type)) throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert MessagePack string into column with type {}.", type->getName()); @@ -218,6 +233,18 @@ static void insertNull(IColumn & column, DataTypePtr type) assert_cast(column).insertDefault(); } +static void insertUUID(IColumn & column, DataTypePtr /*type*/, const char * value, size_t size) +{ + ReadBufferFromMemory buf(value, size); + UInt64 first, second; + readBinaryBigEndian(first, buf); + readBinaryBigEndian(second, buf); + UUID uuid; + uuid.toUnderType().items[0] = first; + uuid.toUnderType().items[1] = second; + assert_cast(column).insertValue(uuid); +} + bool MsgPackVisitor::visit_positive_integer(UInt64 value) // NOLINT { insertInteger(info_stack.top().column, info_stack.top().type, value); @@ -232,13 +259,13 @@ bool MsgPackVisitor::visit_negative_integer(Int64 value) // NOLINT bool MsgPackVisitor::visit_str(const char * value, size_t size) // NOLINT { - insertString(info_stack.top().column, info_stack.top().type, value, size); + insertString(info_stack.top().column, info_stack.top().type, value, size, false); return true; } bool MsgPackVisitor::visit_bin(const char * value, size_t size) // NOLINT { - insertString(info_stack.top().column, info_stack.top().type, value, size); + insertString(info_stack.top().column, info_stack.top().type, value, size, true); return true; } @@ -324,6 +351,18 @@ bool MsgPackVisitor::visit_nil() return true; } +bool MsgPackVisitor::visit_ext(const char * value, uint32_t size) +{ + uint8_t type = *value; + if (*value == 0x02) + { + insertUUID(info_stack.top().column, info_stack.top().type, value + 1, size - 1); + return true; + } + + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported MsgPack extension type: {%x}", type); +} + void MsgPackVisitor::parse_error(size_t, size_t) // NOLINT { throw Exception("Error occurred while parsing msgpack data.", ErrorCodes::INCORRECT_DATA); @@ -455,8 +494,13 @@ DataTypePtr MsgPackSchemaReader::getDataType(const msgpack::object & object) } case msgpack::type::object_type::NIL: return nullptr; - default: - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Msgpack type is not supported"); + case msgpack::type::object_type::EXT: + { + msgpack::object_ext object_ext = object.via.ext; + if (object_ext.type() == 0x02) + return std::make_shared(); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Msgpack extension type {%x} is not supported", object_ext.type()); + } } } diff --git a/src/Processors/Formats/Impl/MsgPackRowInputFormat.h b/src/Processors/Formats/Impl/MsgPackRowInputFormat.h index c2ad31c7c4c..484bc7b3c29 100644 --- a/src/Processors/Formats/Impl/MsgPackRowInputFormat.h +++ b/src/Processors/Formats/Impl/MsgPackRowInputFormat.h @@ -42,6 +42,7 @@ public: bool end_map_key(); bool start_map_value(); bool end_map_value(); + bool visit_ext(const char * value, uint32_t size); /// This function will be called if error occurs in parsing [[noreturn]] void parse_error(size_t parsed_offset, size_t error_offset); diff --git a/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp b/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp index 36a8a62b39e..35b165c46db 100644 --- a/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp @@ -5,6 +5,9 @@ #include #include +#include +#include + #include #include #include @@ -27,8 +30,8 @@ namespace ErrorCodes extern const int ILLEGAL_COLUMN; } -MsgPackRowOutputFormat::MsgPackRowOutputFormat(WriteBuffer & out_, const Block & header_, const RowOutputFormatParams & params_) - : IRowOutputFormat(header_, out_, params_), packer(out_) {} +MsgPackRowOutputFormat::MsgPackRowOutputFormat(WriteBuffer & out_, const Block & header_, const RowOutputFormatParams & params_, const FormatSettings & format_settings_) + : IRowOutputFormat(header_, out_, params_), packer(out_), format_settings(format_settings_) {} void MsgPackRowOutputFormat::serializeField(const IColumn & column, DataTypePtr data_type, size_t row_num) { @@ -164,6 +167,42 @@ void MsgPackRowOutputFormat::serializeField(const IColumn & column, DataTypePtr serializeField(*dict_column, dict_type, index); return; } + case TypeIndex::UUID: + { + const auto & uuid_column = assert_cast(column); + switch (format_settings.msgpack.output_uuid_representation) + { + case FormatSettings::MsgPackUUIDRepresentation::BIN: + { + WriteBufferFromOwnString buf; + writeBinary(uuid_column.getElement(row_num), buf); + StringRef uuid_bin = buf.stringRef(); + packer.pack_bin(uuid_bin.size); + packer.pack_bin_body(uuid_bin.data, uuid_bin.size); + return; + } + case FormatSettings::MsgPackUUIDRepresentation::STR: + { + WriteBufferFromOwnString buf; + writeText(uuid_column.getElement(row_num), buf); + StringRef uuid_text = buf.stringRef(); + packer.pack_str(uuid_text.size); + packer.pack_bin_body(uuid_text.data, uuid_text.size); + return; + } + case FormatSettings::MsgPackUUIDRepresentation::EXT: + { + WriteBufferFromOwnString buf; + UUID value = uuid_column.getElement(row_num); + writeBinaryBigEndian(value.toUnderType().items[0], buf); + writeBinaryBigEndian(value.toUnderType().items[1], buf); + StringRef uuid_ext = buf.stringRef(); + packer.pack_ext(16, 0x02); + packer.pack_ext_body(uuid_ext.data, uuid_ext.size); + return; + } + } + } default: break; } @@ -186,9 +225,9 @@ void registerOutputFormatMsgPack(FormatFactory & factory) WriteBuffer & buf, const Block & sample, const RowOutputFormatParams & params, - const FormatSettings &) + const FormatSettings & settings) { - return std::make_shared(buf, sample, params); + return std::make_shared(buf, sample, params, settings); }); factory.markOutputFormatSupportsParallelFormatting("MsgPack"); } diff --git a/src/Processors/Formats/Impl/MsgPackRowOutputFormat.h b/src/Processors/Formats/Impl/MsgPackRowOutputFormat.h index 19b37afed90..e2abbd588c4 100644 --- a/src/Processors/Formats/Impl/MsgPackRowOutputFormat.h +++ b/src/Processors/Formats/Impl/MsgPackRowOutputFormat.h @@ -18,7 +18,7 @@ namespace DB class MsgPackRowOutputFormat final : public IRowOutputFormat { public: - MsgPackRowOutputFormat(WriteBuffer & out_, const Block & header_, const RowOutputFormatParams & params_); + MsgPackRowOutputFormat(WriteBuffer & out_, const Block & header_, const RowOutputFormatParams & params_, const FormatSettings & format_settings_); String getName() const override { return "MsgPackRowOutputFormat"; } @@ -28,6 +28,7 @@ private: void serializeField(const IColumn & column, DataTypePtr data_type, size_t row_num); msgpack::packer packer; + const FormatSettings format_settings; }; } diff --git a/tests/queries/0_stateless/02187_msg_pack_uuid.reference b/tests/queries/0_stateless/02187_msg_pack_uuid.reference new file mode 100644 index 00000000000..c567cc14ad2 --- /dev/null +++ b/tests/queries/0_stateless/02187_msg_pack_uuid.reference @@ -0,0 +1,4 @@ +5e7084e0-019f-461f-9e70-84e0019f561f +5e7084e0-019f-461f-9e70-84e0019f561f +5e7084e0-019f-461f-9e70-84e0019f561f +5e7084e0-019f-461f-9e70-84e0019f561f UUID diff --git a/tests/queries/0_stateless/02187_msg_pack_uuid.sh b/tests/queries/0_stateless/02187_msg_pack_uuid.sh new file mode 100755 index 00000000000..9be92d66790 --- /dev/null +++ b/tests/queries/0_stateless/02187_msg_pack_uuid.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash +# Tags: no-parallel, no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "insert into table function file('uuid_str.msgpack', 'MsgPack', 'uuid UUID') select toUUID('5e7084e0-019f-461f-9e70-84e0019f561f') settings output_format_msgpack_uuid_representation='str'" +$CLICKHOUSE_CLIENT -q "select * from file('uuid_str.msgpack', 'MsgPack', 'uuid UUID')" + +$CLICKHOUSE_CLIENT -q "insert into table function file('uuid_bin.msgpack', 'MsgPack', 'uuid UUID') select toUUID('5e7084e0-019f-461f-9e70-84e0019f561f') settings output_format_msgpack_uuid_representation='bin'" +$CLICKHOUSE_CLIENT -q "select * from file('uuid_bin.msgpack', 'MsgPack', 'uuid UUID')" + +$CLICKHOUSE_CLIENT -q "insert into table function file('uuid_ext.msgpack', 'MsgPack', 'uuid UUID') select toUUID('5e7084e0-019f-461f-9e70-84e0019f561f') settings output_format_msgpack_uuid_representation='ext'" +$CLICKHOUSE_CLIENT -q "select * from file('uuid_ext.msgpack', 'MsgPack', 'uuid UUID')" +$CLICKHOUSE_CLIENT -q "select c1, toTypeName(c1) from file('uuid_ext.msgpack') settings input_format_msgpack_number_of_columns=1" + From 34a17075d3f1140b6955de742c3d69e4beb66514 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Thu, 27 Jan 2022 20:06:23 +0300 Subject: [PATCH 138/215] FIx error messages --- src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp index 46d7c775f90..81effc0aa02 100644 --- a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp @@ -360,7 +360,7 @@ bool MsgPackVisitor::visit_ext(const char * value, uint32_t size) return true; } - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported MsgPack extension type: {%x}", type); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported MsgPack extension type: {}", type); } void MsgPackVisitor::parse_error(size_t, size_t) // NOLINT @@ -499,7 +499,7 @@ DataTypePtr MsgPackSchemaReader::getDataType(const msgpack::object & object) msgpack::object_ext object_ext = object.via.ext; if (object_ext.type() == 0x02) return std::make_shared(); - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Msgpack extension type {%x} is not supported", object_ext.type()); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Msgpack extension type {} is not supported", object_ext.type()); } } } From c3d30fd502d709db485646b54988f19ac2fb9529 Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 7 Feb 2022 17:07:44 +0300 Subject: [PATCH 139/215] Fix comments --- .../Formats/Impl/MsgPackRowInputFormat.cpp | 13 +++++-------- src/Processors/Formats/Impl/MsgPackRowInputFormat.h | 5 +++++ 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp index 81effc0aa02..e19067996e7 100644 --- a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp @@ -236,12 +236,9 @@ static void insertNull(IColumn & column, DataTypePtr type) static void insertUUID(IColumn & column, DataTypePtr /*type*/, const char * value, size_t size) { ReadBufferFromMemory buf(value, size); - UInt64 first, second; - readBinaryBigEndian(first, buf); - readBinaryBigEndian(second, buf); UUID uuid; - uuid.toUnderType().items[0] = first; - uuid.toUnderType().items[1] = second; + readBinaryBigEndian(uuid.toUnderType().items[0], buf); + readBinaryBigEndian(uuid.toUnderType().items[1], buf); assert_cast(column).insertValue(uuid); } @@ -353,8 +350,8 @@ bool MsgPackVisitor::visit_nil() bool MsgPackVisitor::visit_ext(const char * value, uint32_t size) { - uint8_t type = *value; - if (*value == 0x02) + int8_t type = *value; + if (*value == int8_t(MsgPackExtensionTypes::UUID)) { insertUUID(info_stack.top().column, info_stack.top().type, value + 1, size - 1); return true; @@ -497,7 +494,7 @@ DataTypePtr MsgPackSchemaReader::getDataType(const msgpack::object & object) case msgpack::type::object_type::EXT: { msgpack::object_ext object_ext = object.via.ext; - if (object_ext.type() == 0x02) + if (object_ext.type() == int8_t(MsgPackExtensionTypes::UUID)) return std::make_shared(); throw Exception(ErrorCodes::BAD_ARGUMENTS, "Msgpack extension type {} is not supported", object_ext.type()); } diff --git a/src/Processors/Formats/Impl/MsgPackRowInputFormat.h b/src/Processors/Formats/Impl/MsgPackRowInputFormat.h index 484bc7b3c29..0d25eb4bed0 100644 --- a/src/Processors/Formats/Impl/MsgPackRowInputFormat.h +++ b/src/Processors/Formats/Impl/MsgPackRowInputFormat.h @@ -17,6 +17,11 @@ namespace DB class ReadBuffer; +enum class MsgPackExtensionTypes +{ + UUID = 0x02, +}; + class MsgPackVisitor : public msgpack::null_visitor { public: From 1b16db72c3a54dd74a985f35c24294f2aa4f661a Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 7 Feb 2022 17:20:26 +0300 Subject: [PATCH 140/215] fix consecutive backward seeks in seekable read buffers --- src/Compression/CachedCompressedReadBuffer.cpp | 2 +- src/Compression/CompressedReadBufferFromFile.cpp | 2 +- .../AsynchronousReadIndirectBufferFromRemoteFS.cpp | 2 +- src/Disks/IO/ReadIndirectBufferFromRemoteFS.cpp | 2 +- .../AsynchronousReadBufferFromFileDescriptor.cpp | 4 ++-- src/IO/BufferBase.h | 9 +++++++++ src/IO/ReadBufferFromEncryptedFile.cpp | 2 +- src/IO/ReadBufferFromFileDescriptor.cpp | 14 ++++---------- src/IO/ReadBufferFromFileDescriptor.h | 3 --- src/IO/ReadBufferFromS3.cpp | 2 +- src/IO/ReadWriteBufferFromHTTP.h | 2 +- src/Storages/HDFS/ReadBufferFromHDFS.cpp | 2 +- 12 files changed, 23 insertions(+), 23 deletions(-) diff --git a/src/Compression/CachedCompressedReadBuffer.cpp b/src/Compression/CachedCompressedReadBuffer.cpp index f942f81f5e9..bda86f8c616 100644 --- a/src/Compression/CachedCompressedReadBuffer.cpp +++ b/src/Compression/CachedCompressedReadBuffer.cpp @@ -105,7 +105,7 @@ void CachedCompressedReadBuffer::seek(size_t offset_in_compressed_file, size_t o /// We will discard our working_buffer, but have to account rest bytes bytes += offset(); /// No data, everything discarded - pos = working_buffer.end(); + resetWorkingBuffer(); owned_cell.reset(); /// Remember required offset in decompressed block which will be set in diff --git a/src/Compression/CompressedReadBufferFromFile.cpp b/src/Compression/CompressedReadBufferFromFile.cpp index b8ce485abc5..cf08d68a7aa 100644 --- a/src/Compression/CompressedReadBufferFromFile.cpp +++ b/src/Compression/CompressedReadBufferFromFile.cpp @@ -80,7 +80,7 @@ void CompressedReadBufferFromFile::seek(size_t offset_in_compressed_file, size_t /// We will discard our working_buffer, but have to account rest bytes bytes += offset(); /// No data, everything discarded - pos = working_buffer.end(); + resetWorkingBuffer(); size_compressed = 0; /// Remember required offset in decompressed block which will be set in /// the next ReadBuffer::next() call diff --git a/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp b/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp index c8484e6088d..184fcfe6f8c 100644 --- a/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp +++ b/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp @@ -243,7 +243,7 @@ off_t AsynchronousReadIndirectBufferFromRemoteFS::seek(off_t offset_, int whence prefetch_future = {}; } - pos = working_buffer.end(); + resetWorkingBuffer(); /** * Lazy ignore. Save number of bytes to ignore and ignore it either for prefetch buffer or current buffer. diff --git a/src/Disks/IO/ReadIndirectBufferFromRemoteFS.cpp b/src/Disks/IO/ReadIndirectBufferFromRemoteFS.cpp index c21a55d68ac..cbf265ce741 100644 --- a/src/Disks/IO/ReadIndirectBufferFromRemoteFS.cpp +++ b/src/Disks/IO/ReadIndirectBufferFromRemoteFS.cpp @@ -64,7 +64,7 @@ off_t ReadIndirectBufferFromRemoteFS::seek(off_t offset_, int whence) throw Exception("Only SEEK_SET or SEEK_CUR modes are allowed.", ErrorCodes::CANNOT_SEEK_THROUGH_FILE); impl->reset(); - pos = working_buffer.end(); + resetWorkingBuffer(); return impl->file_offset_of_buffer_end; } diff --git a/src/IO/AsynchronousReadBufferFromFileDescriptor.cpp b/src/IO/AsynchronousReadBufferFromFileDescriptor.cpp index 9c92201b3a1..877702f9705 100644 --- a/src/IO/AsynchronousReadBufferFromFileDescriptor.cpp +++ b/src/IO/AsynchronousReadBufferFromFileDescriptor.cpp @@ -181,8 +181,8 @@ off_t AsynchronousReadBufferFromFileDescriptor::seek(off_t offset, int whence) off_t offset_after_seek_pos = new_pos - seek_pos; - /// First put position at the end of the buffer so the next read will fetch new data to the buffer. - pos = working_buffer.end(); + /// First reset the buffer so the next read will fetch new data to the buffer. + resetWorkingBuffer(); /// Just update the info about the next position in file. diff --git a/src/IO/BufferBase.h b/src/IO/BufferBase.h index 198441d8bc1..df384a3f051 100644 --- a/src/IO/BufferBase.h +++ b/src/IO/BufferBase.h @@ -97,6 +97,15 @@ public: bool isPadded() const { return padded; } protected: + void resetWorkingBuffer() + { + /// Move position to the end of buffer to trigger call of 'next' on next reading. + /// Discard all data in current working buffer to prevent wrong assumtions on content + /// of buffer, e.g. for optimizations of seeks in seekable buffers. + working_buffer.resize(0); + pos = working_buffer.end(); + } + /// Read/write position. Position pos; diff --git a/src/IO/ReadBufferFromEncryptedFile.cpp b/src/IO/ReadBufferFromEncryptedFile.cpp index 445c55ac269..7aec6dcde02 100644 --- a/src/IO/ReadBufferFromEncryptedFile.cpp +++ b/src/IO/ReadBufferFromEncryptedFile.cpp @@ -56,7 +56,7 @@ off_t ReadBufferFromEncryptedFile::seek(off_t off, int whence) offset = new_pos; /// No more reading from the current working buffer until next() is called. - pos = working_buffer.end(); + resetWorkingBuffer(); assert(!hasPendingData()); } diff --git a/src/IO/ReadBufferFromFileDescriptor.cpp b/src/IO/ReadBufferFromFileDescriptor.cpp index a5e75ba5f83..d266fb86e0f 100644 --- a/src/IO/ReadBufferFromFileDescriptor.cpp +++ b/src/IO/ReadBufferFromFileDescriptor.cpp @@ -111,7 +111,6 @@ bool ReadBufferFromFileDescriptor::nextImpl() ProfileEvents::increment(ProfileEvents::ReadBufferFromFileDescriptorReadBytes, bytes_read); working_buffer = internal_buffer; working_buffer.resize(bytes_read); - buffer_is_dirty = false; } else return false; @@ -153,10 +152,10 @@ off_t ReadBufferFromFileDescriptor::seek(off_t offset, int whence) } /// Position is unchanged. - if (!buffer_is_dirty && (new_pos + (working_buffer.end() - pos) == file_offset_of_buffer_end)) + if (new_pos + (working_buffer.end() - pos) == file_offset_of_buffer_end) return new_pos; - if (!buffer_is_dirty && file_offset_of_buffer_end - working_buffer.size() <= static_cast(new_pos) + if (file_offset_of_buffer_end - working_buffer.size() <= static_cast(new_pos) && new_pos <= file_offset_of_buffer_end) { /// Position is still inside the buffer. @@ -177,12 +176,8 @@ off_t ReadBufferFromFileDescriptor::seek(off_t offset, int whence) off_t offset_after_seek_pos = new_pos - seek_pos; - /// First put position at the end of the buffer so the next read will fetch new data to the buffer. - pos = working_buffer.end(); - - /// Mark buffer as dirty to disallow further seek optimizations, because fetching data to the buffer - /// is delayed to the next call of 'nextImpl', but it may be not called before next seek. - buffer_is_dirty = true; + /// First reset the buffer so the next read will fetch new data to the buffer. + resetWorkingBuffer(); /// In case of using 'pread' we just update the info about the next position in file. /// In case of using 'read' we call 'lseek'. @@ -234,7 +229,6 @@ void ReadBufferFromFileDescriptor::rewind() working_buffer.resize(0); pos = working_buffer.begin(); file_offset_of_buffer_end = 0; - buffer_is_dirty = true; } diff --git a/src/IO/ReadBufferFromFileDescriptor.h b/src/IO/ReadBufferFromFileDescriptor.h index 48acd5d323e..188cdd709b5 100644 --- a/src/IO/ReadBufferFromFileDescriptor.h +++ b/src/IO/ReadBufferFromFileDescriptor.h @@ -62,9 +62,6 @@ public: private: /// Assuming file descriptor supports 'select', check that we have data to read or wait until timeout. bool poll(size_t timeout_microseconds); - - /// If it's true then we cannot assume on content of buffer to optimize seek calls. - bool buffer_is_dirty = true; }; diff --git a/src/IO/ReadBufferFromS3.cpp b/src/IO/ReadBufferFromS3.cpp index f01640cb95b..869432b9484 100644 --- a/src/IO/ReadBufferFromS3.cpp +++ b/src/IO/ReadBufferFromS3.cpp @@ -187,7 +187,7 @@ off_t ReadBufferFromS3::seek(off_t offset_, int whence) } } - pos = working_buffer.end(); + resetWorkingBuffer(); if (impl) { ProfileEvents::increment(ProfileEvents::ReadBufferSeekCancelConnection); diff --git a/src/IO/ReadWriteBufferFromHTTP.h b/src/IO/ReadWriteBufferFromHTTP.h index 0314fa33f11..ce4d83105c0 100644 --- a/src/IO/ReadWriteBufferFromHTTP.h +++ b/src/IO/ReadWriteBufferFromHTTP.h @@ -498,7 +498,7 @@ namespace detail impl.reset(); } - pos = working_buffer.end(); + resetWorkingBuffer(); read_range.begin = offset_; read_range.end = std::nullopt; offset_from_begin_pos = 0; diff --git a/src/Storages/HDFS/ReadBufferFromHDFS.cpp b/src/Storages/HDFS/ReadBufferFromHDFS.cpp index 15f6c73bc51..0ad55162fb2 100644 --- a/src/Storages/HDFS/ReadBufferFromHDFS.cpp +++ b/src/Storages/HDFS/ReadBufferFromHDFS.cpp @@ -173,7 +173,7 @@ off_t ReadBufferFromHDFS::seek(off_t offset_, int whence) return getPosition(); } - pos = working_buffer.end(); + resetWorkingBuffer(); impl->seek(offset_, whence); return impl->getPosition(); } From d32d6453d3c72fd39e70bbc6b6a2ad96ed7963a4 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 7 Feb 2022 17:49:53 +0300 Subject: [PATCH 141/215] Update BufferBase.h --- src/IO/BufferBase.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/IO/BufferBase.h b/src/IO/BufferBase.h index df384a3f051..7a59687fa56 100644 --- a/src/IO/BufferBase.h +++ b/src/IO/BufferBase.h @@ -100,7 +100,7 @@ protected: void resetWorkingBuffer() { /// Move position to the end of buffer to trigger call of 'next' on next reading. - /// Discard all data in current working buffer to prevent wrong assumtions on content + /// Discard all data in current working buffer to prevent wrong assumptions on content /// of buffer, e.g. for optimizations of seeks in seekable buffers. working_buffer.resize(0); pos = working_buffer.end(); From a4c7ecde871fed7262c901cf24b02c757196605f Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 7 Feb 2022 17:51:16 +0300 Subject: [PATCH 142/215] Make better --- src/Formats/MsgPackExtensionTypes.h | 9 +++++++++ src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp | 6 ++++-- src/Processors/Formats/Impl/MsgPackRowInputFormat.h | 7 +------ src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp | 4 +++- 4 files changed, 17 insertions(+), 9 deletions(-) create mode 100644 src/Formats/MsgPackExtensionTypes.h diff --git a/src/Formats/MsgPackExtensionTypes.h b/src/Formats/MsgPackExtensionTypes.h new file mode 100644 index 00000000000..daf9e37910c --- /dev/null +++ b/src/Formats/MsgPackExtensionTypes.h @@ -0,0 +1,9 @@ +namespace DB +{ + +enum class MsgPackExtensionTypes +{ + UUID = 0x02, +}; + +} diff --git a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp index e19067996e7..56fc5d7857b 100644 --- a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp @@ -22,6 +22,8 @@ #include #include +#include + namespace DB { @@ -357,7 +359,7 @@ bool MsgPackVisitor::visit_ext(const char * value, uint32_t size) return true; } - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported MsgPack extension type: {}", type); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported MsgPack extension type: {%x}", type); } void MsgPackVisitor::parse_error(size_t, size_t) // NOLINT @@ -496,7 +498,7 @@ DataTypePtr MsgPackSchemaReader::getDataType(const msgpack::object & object) msgpack::object_ext object_ext = object.via.ext; if (object_ext.type() == int8_t(MsgPackExtensionTypes::UUID)) return std::make_shared(); - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Msgpack extension type {} is not supported", object_ext.type()); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Msgpack extension type {%x} is not supported", object_ext.type()); } } } diff --git a/src/Processors/Formats/Impl/MsgPackRowInputFormat.h b/src/Processors/Formats/Impl/MsgPackRowInputFormat.h index 0d25eb4bed0..2298e35fed5 100644 --- a/src/Processors/Formats/Impl/MsgPackRowInputFormat.h +++ b/src/Processors/Formats/Impl/MsgPackRowInputFormat.h @@ -17,11 +17,6 @@ namespace DB class ReadBuffer; -enum class MsgPackExtensionTypes -{ - UUID = 0x02, -}; - class MsgPackVisitor : public msgpack::null_visitor { public: @@ -61,7 +56,7 @@ private: std::stack info_stack; }; -class MsgPackRowInputFormat final : public IRowInputFormat +class MsgPackRowInputFormat : public IRowInputFormat { public: MsgPackRowInputFormat(const Block & header_, ReadBuffer & in_, Params params_); diff --git a/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp b/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp index 35b165c46db..edec9774b5f 100644 --- a/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp @@ -22,6 +22,8 @@ #include #include +#include + namespace DB { @@ -197,7 +199,7 @@ void MsgPackRowOutputFormat::serializeField(const IColumn & column, DataTypePtr writeBinaryBigEndian(value.toUnderType().items[0], buf); writeBinaryBigEndian(value.toUnderType().items[1], buf); StringRef uuid_ext = buf.stringRef(); - packer.pack_ext(16, 0x02); + packer.pack_ext(sizeof(UUID), int8_t(MsgPackExtensionTypes::UUID)); packer.pack_ext_body(uuid_ext.data, uuid_ext.size); return; } From 5d4591adaaf5e10c6ef161f9edf8ff298d83fc7d Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Mon, 7 Feb 2022 15:02:19 +0000 Subject: [PATCH 143/215] Better test --- tests/queries/0_stateless/01852_cast_operator_4.reference | 6 ++++++ tests/queries/0_stateless/01852_cast_operator_4.sql | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/tests/queries/0_stateless/01852_cast_operator_4.reference b/tests/queries/0_stateless/01852_cast_operator_4.reference index beadc0cd15a..ecfe62b1db4 100644 --- a/tests/queries/0_stateless/01852_cast_operator_4.reference +++ b/tests/queries/0_stateless/01852_cast_operator_4.reference @@ -8,6 +8,12 @@ SELECT CAST(CAST(\'[3,4,5]\', \'Array(Int64)\')[2], \'Int8\') SELECT CAST(CAST(\'[1,2,3]\', \'Array(UInt64)\')[CAST(CAST([number, number], \'Array(UInt8)\')[number], \'UInt64\')], \'UInt8\') FROM numbers(3) 3 +WITH [3, 4, 5] AS x +SELECT CAST(x[1], \'Int32\') +3 SELECT CAST((3, 4, 5).1, \'Int32\') 4 SELECT CAST(CAST((3, 4, 5), \'Tuple(UInt64, UInt64, UInt64)\').1, \'Int32\') +3 +WITH (3, 4, 5) AS x +SELECT CAST(x.1, \'Int32\') diff --git a/tests/queries/0_stateless/01852_cast_operator_4.sql b/tests/queries/0_stateless/01852_cast_operator_4.sql index 9b75bf84c35..5c33191222b 100644 --- a/tests/queries/0_stateless/01852_cast_operator_4.sql +++ b/tests/queries/0_stateless/01852_cast_operator_4.sql @@ -7,8 +7,14 @@ EXPLAIN SYNTAX SELECT [3,4,5]::Array(Int64)[2]::Int8; SELECT [1,2,3]::Array(UInt64)[[number, number]::Array(UInt8)[number]::UInt64]::UInt8 from numbers(3); EXPLAIN SYNTAX SELECT [1,2,3]::Array(UInt64)[[number, number]::Array(UInt8)[number]::UInt64]::UInt8 from numbers(3); +WITH [3,4,5] AS x SELECT x[1]::Int32; +EXPLAIN SYNTAX WITH [3,4,5] AS x SELECT x[1]::Int32; + SELECT tuple(3,4,5).1::Int32; EXPLAIN SYNTAX SELECT tuple(3,4,5).1::Int32; SELECT tuple(3,4,5)::Tuple(UInt64, UInt64, UInt64).2::Int32; EXPLAIN SYNTAX SELECT tuple(3,4,5)::Tuple(UInt64, UInt64, UInt64).1::Int32; + +WITH tuple(3,4,5) AS x SELECT x.1::Int32; +EXPLAIN SYNTAX WITH tuple(3,4,5) AS x SELECT x.1::Int32; From aa76bda415be90f1e7407e4e89012620e66a9cfd Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Mon, 7 Feb 2022 18:28:24 +0300 Subject: [PATCH 144/215] Fix special build --- src/TableFunctions/TableFunctionValues.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/TableFunctions/TableFunctionValues.h b/src/TableFunctions/TableFunctionValues.h index 34335eb4bd1..f01bcf6e20e 100644 --- a/src/TableFunctions/TableFunctionValues.h +++ b/src/TableFunctions/TableFunctionValues.h @@ -20,7 +20,7 @@ private: ColumnsDescription getActualTableStructure(ContextPtr context) const override; void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; - DataTypes getTypesFromArgument(const ASTPtr & arg, ContextPtr context); + static DataTypes getTypesFromArgument(const ASTPtr & arg, ContextPtr context); ColumnsDescription structure; bool has_structure_in_arguments; From 15d85682e82ffd4c835f4923920e07430543eb16 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Mon, 7 Feb 2022 18:29:22 +0300 Subject: [PATCH 145/215] Fix style --- src/Formats/MsgPackExtensionTypes.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Formats/MsgPackExtensionTypes.h b/src/Formats/MsgPackExtensionTypes.h index daf9e37910c..139d2f9047b 100644 --- a/src/Formats/MsgPackExtensionTypes.h +++ b/src/Formats/MsgPackExtensionTypes.h @@ -1,3 +1,5 @@ +#pragma once + namespace DB { From 82bb0588385db1aad819642110e28b5fe1f5f54a Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Mon, 7 Feb 2022 21:21:05 +0300 Subject: [PATCH 146/215] [website] replace Bastian's photo (#34384) * Add files via upload * Add files via upload --- website/images/photos/bastian-spanneberg.jpg | Bin 9108 -> 40832 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/website/images/photos/bastian-spanneberg.jpg b/website/images/photos/bastian-spanneberg.jpg index 66212cb5abde212a4f0551fd19c0b78493cc5701..95c9e5b73ac263cf99bc095a965d4bfadfb89bb0 100644 GIT binary patch literal 40832 zcmbTd1ymeQ*CyV>;K40GaJS&@?mDLUM5|A7hrjsNO_ z5dKw9G#n%R8`Dt1PvC<3&-V16ZTx$2c$H79fDC|&jEsVego=WKf`*2QjzNHl@$4A} z2_8N+0XZop1vx1h85JEnBNYuBEg2b;05cmW7cVa_C8Lm-Ah#$x4=?xMN&LeCjxT-v1A{}uBcro(^9zeh zUzb-lx3+h7f9&la9G+iXUR~eZ{<{19*RQ{R{+ImAvHzc6xNyG^5D~$MD1ZF|A^5@* z7#9(VnhP0EN(;r(9iN6f43$7Sxwxqpjg|-UlhDdz2AznGca#47uWSE!_J8JB`2Uk< z|HHBW_G<;e0E6H+4~z>)0N2HG^n~NDgrTal(;#JLW;uYv(MUCfw*>=R+1y=0Yk;W; zae!SUaTID2cU6QqDMSzzIx@k0O)w1-F9{`#1yf;{vj<>2Fz2>*6U1T9mL^Ak>HVp| zkc7WS3l+xlugm%#$Ww}QURYQF^APC|bo&#wT@IL#Gi}!kNPIF2be~|%!jgXly4$Oo zKy)9V+I&k~ekO@7gpl+H@&p(u10ug6*Hg4&`?OsQNB~CwAS5VO%`=I~LN3jY?!y2kGb!z3$ayTu0Ymef7@W?kV zkl12Zpcpi}C!*WBDd$gqS}$JAoZ`Ry;cEesc8V>z)YKBR7Aes=rhg?UNrhCsnl-dG zb!{;tx5l{Y)p*RSo;9I^C*ZKLaPeT*eRAf+crl>p_MC{FoAkPNkK@2l7edh zV0PtOrxc8?*dbO>g4akSdBd?fDJi4 zvV52u;{=q>9tpZx&+!~Id0GxH!!8C_{C$(16K$NHp@5NGg50|B;G$FLuaTqair8SdqY$1S;8xwa{C` z!fvZy1xQ*yy6e17ssf*pF1E&i-KM6m3FDSwypWE!Y&%}$p;fYjJQsMVr${Qnp92*= zr_@=67^&aC;db1*q6y%Uf6-2B@o2dw>^b2rObO+v|6%Yri}P09)%8ww z!Mg2ZSm(r}<2?-1Ky_uvidh(cB}e!-)CY+gUlZ2I<}F9%)C~Td-4Pq z)c;_BX{sS4ZO!w#yl032qzz>%$$GY)DO6UZiEvYiZp*YWO}&*WNPtyvHS2z+2+DO# zLEX^j+QPh9 zt}-b~{cU%66~C;7J)MrT57Z*LFwR|%Tgc!PNO2fxOOR)g3@r%FnTE}+tKxAoc_>!( zek_I%H;3%kI~gl@zj%-0j6Tym8;vrGJ?~w=&)WmABL^c{vveh7X`b4wI`reFll<2E z^0jL9;-?=&%j@H5rB7sAEkxglEfVWJ+grU@9GpEeR}|Ax<7_|!o=d{y^9nT7OL6E| zn=YZ70A&=dVQGRUY1RCz>>1SyMpy>N^?MQ0cLCKTF|Up>1Lix}c$i-xhvc&o-G0$( z6Et^|^ZHT~n3r0#IE0ac8&3AX+VzwFGpWgtgFE|d=a+H6DR&c>+RCZ2R-3pM!#{mh zRduJjlh)LVry6|q?a2`xYro|eZ$#2Zug~;)kvngDjs9pL$cP|(_u>*e>Xv5{A{I|i zuS0CxCO^O=-doekOAL_4t~5e1c_S=yRY{@2I|!zwf|`{iol7TQ-4>zymrr5})HBzB z=D}*iCn}OyPbeuEAUs6LL2&In39m8o|NB14FvB%!-l!L6`tqlTgkOy2&T$d%B5 zC8%G)$tfS>`f8p>KZO7VY~641j;2*{{zSCN^^N0&ar;LitNy*NXCAGr!^`4davE2P z*G=0Fj5_S{Zt*zAh5{;%!?`?s;}z)U7fJ0iK(wjG{5O^_ zs60(RxTdC-hqw#tJ=w;O zOopsZ4uVq;c~3csL!g3QF)6NIpr$E(tA|$66 z&htAE?J;IkIzc_1Ye#ZeH23jN*q1h1-+gb^Fkevep`DlOs_ zp*(YvB6dK>2WFQphhlEIxSIn#T4|xG3^}nUKseu4l2frBk1Lsp@}+-f|J(jl**3BS z*12YW8~yZOkFQsfq~@9UW;8q#T8D7$S&hEkS)Y>OnoVsQ_I~=fqR#NPeM(*8_uc)` z0DJntJbP{aHu%Ww^Tv(&hl4qaEDQf*cOxC4tpIpnhd7+|0y``z{fDiTG+x@4DTCNoN(Fyh2Z3D3lt=RO1y zZXM&+l5aldJB@Ts(1@sHHNIvqo5zT6md$(?F$Znqv`dU?sH?Ml>F!SEtRc@gGqP1L z;;t2lWA3;!m{?JfFhM@5RxR(z<~sOY6D43}x@DEb>+N37%5yuqQzllSUYALzE`2kv zMy%JDHkE$HBi%P2lSZ-{2)n{+K28)%CEZNBJOQ@yPRN^AOK;Ztdjj1dMcH+=%3=wN zqavk)?y>%2P0F{<#IUGMOSA=tkrxv|Z| z4%po(>=#ImA$o5puRvk{@=njD=^F|E+L3DIEN?v_-A5k3=o^J%xf-P;Uz>n!=Un!} zVzmqR5@jvg^*u;Zi2TT4uc4(bp)OVHro{$%Ng;~pv~5}#)seis6X$b5f_%TCLMbWH zJ%S$h5t0Fxyl^q0ial_HRQWbzLLe!!o&9<$@7hdGG1io$vg)i|DCMX1xVhj1CY<}5 zQkvYA>M_ahBTcIQp#0Lpv9y?~6u9Vfj8ZQPc_PAp;Q)f=K!f^%MN+>6nO zOTr<){w{|1Slquoy50B3m!_InFuWJ zwoY%gMo_;M~3^NS)qkl zWlf4CU)7s~RuvdyLvyIi=!6OjD~t!U(T@F(3(p2uCC zc*-rEbT0>(Yko=$Y(a5RcVFqc`0_G!P5etcZJ`tEivtTM`qQp5LWt2Pov0D2z%CK_@cU^^3zTeE-=y$#s)UG((nH<*24ud@$*0!VSq9Bc_P1YVPcB4%x>2eS(8pZd%M z&99lu$jG0g{yZshSjnLaG6 z8@^DHM#D}Ru;Ip9IY)zQ`6_jGSTm}8{Mug2IusyG-Zsa^^p&&5c>+{EKJ3tW-x8MF z^-X9c8zq!6QF&^seXSYYLn}V4xS4tB^4Ysuwn&tt*L^v2vAjL?HdnEi+;g$PFINNh z2^S^Om8=$vM5BqEwE*`B7l*bRe^8iyUg`-rKZUg%k2_S;H?`X`uLBd_dMr~d6ewM8 z86TXA9~F7GLgHWzja#{qOcEccwFg5y90r+m>3-9G==dByxcnnWGI_9_F6T0r)pwrs zPrp8H)AKni+yk6VjqM2+nN5*=r3KhGwT4ZzkO_2u_rgZHoRw}U!5m{8OHD~oCO4x?_^6%P4Q$8szi z;n+sVHZw{&Xti`a@`tLk_S2dOKjI!7c+}XC5XjVpXNjuD_8OHPT-{aCe68BLjMk~E z+QART_){*|^D1udi|w4zcVYf?UKxJV`A@(%lDZnlggB8F7>(U%V_TKaZguPAAX8uX zi&q=-{ho&8)9+_Hbwwftw;Io(@!NMwXWE`JVjR{@AL$Zvg?Jw%`ZMOnV7{za<6OF@ z#rl(KS_yoHoWN97t=_ANE-x{Jb&qwEVGm3P#}Lk0uLwi|Rn=3J;Rw3GLgl@d7t=L( z7B5`C4LE&UF*y7qn@nwaZ}6i93N<28`pBx7Yr=oKqUzgPSqi>JZmJvce=a7nxY?S( zO5Yl1`Mjvk6KuR@gPjSp`?lkX9X;69)j@>FR8N=4r{2F8G2y`Slgn7a8m&tybypR= zmJQpRrhhC;_@kJHtvIZ-&Vn@jkt0Q6N-f1b3ggoq z>!tw|VlYe~H4d`35{AV{>voibZC=moW=xK4q$?!G9Qqh>pc3jwur?d#5jf35m7~m~ zWQ6mQs#Neu%?c#|AZgOd;EkNs1Qj^c^&mLVC&@?E7S8io5(m#k>TZW6E4_d3xHLR> zXA!M9XoBSgUFcLlHew>9NkHM#Fj%t9n{dTHQIOzCs6R28=nw5)*mQjDDObEtD%M-5 z9xhLxz^NX%Sh2a--iU5e*WuY)Sim^7;Z*!>NX!Y1m8T=Eg&wP<#$-L}g(GXWJTZQp zo}(jCw-cHHf<)wenQ^kn@I|ZghEJuOvudO@$&c)ZYS(*Q8{OL1X5R%fxVyJ%#>-!1 zzV3yp=QkN=q@Bse z<8(c36zwArx0|Y7+vl*R#}5w+S|I5&=TB7)6{eFo@Wi>+KPGqb;Wz(a@b$ zmDwjSpAYTG+z_v<3Z;JPN6z zFnJa0l{_r}io~vh$9aq^iWXqYC+_kLYH~{!&dn@f4`VDULP_lrZ#mvRP~GQJ78B71 zQ{^z7P4MQSH_O=#1;pje;&D6hsIpTtmQ!2Q7~GWdDQKq+*o|Xxt*7GVRyi7fA0uJ$ z&=A}RSi-Gc_fH)*OruEvgh&P)%`|9NMO=EULu=>Zi%e$NYonf*gOyYXJSW=iug3fd zKMFDaUX+{vEcB?`L?^DBLWVFd{G)xW@@@*Iqsj03OGEPye{S*PRU0$qh(WtG{e>gu3IOg zl`n4Gb0pXmNZPOkgT9Qn8B^t683J{^sPQ_KjxYv$p@HML>!yX7u|B3KG1N&X?sYZg zDI-_dxDA${v(=|BV>EHliO;Q3X}0>Tvjuf~sd8@%Co{lP&nBHz^R{7jE1VXMQ{|@Y zgknkuda=&+yk#aSZQ|w<%5(Cg2wgAo^ys(C+weZsASgFkhv}Z)J8gFHOR$?7VfHn* zZvWOfwPdWGi^Flyus&LBM}4IC)02r>Ak_GyEa&Rbag$kB2?Vf*Ew|Ap+$YpdwFIK=dkmB>R0+MCLwk7fOaLdcmX8hC9K)$+VFmkI9uS*QE6U zlH$mdw^#<^EtHb;*GTNtt{$k>s8}(9b#hgqjBPa0wpUtGL?FDDCS+D^p^g)s#*d!UKG&tm*xC7}*ALCB7>X$rwXN;Ou^*dUgsN>^ zxO57ID$>K6tgXl>m9O{dzu9Bt%gC2K>TVw_cKM9>9QX_eyj^OXecw2J(C1CgWJo(a z@}uITU2v6#<5*kr$qWA<_)9DGy*^>RdKB$-q|@I-c(jib`lzh3ZUitmi{IdDB-|s& zZ^@ZJ_wDI|x);({gz;2sU)y`tfzaJo`Q{B+k4+cSQi>TJdc_`C9Vg{3hQv_z^odq^ za+FA^zf>p)9_g}EPFTEPQM#(7#i#@$ChQR+joqoivgP$fFGAAi2;3)Sf5yKpot_*$iIIDv=8mJM~Sbf0F<>?S?bK6a_J6i3bu8gO0n zDlPDP-xEODT{c$XI2*ywMKCdVW^I-|LYct28_%u%qSONw*zqR3(unl7TND*}?M{1| z9-<(=-DNAd3P}}66mfHtV-m7HTgfdb%y7^^;R92K>w{q=RPIv&R(W~8Qv^Q_jwFfe z9NLSn%S6$?j+-oH-m6WMsZlh!M5RZM{1nEWUaGV!DIh+z@UxTiTeD0{K&=S9t8Syy zu}_|(`J_SSCv+tge_g~;IqaTrUZ_#-9{dVXCxZ`#b-+j?9U)=y*=BC@YUD+5#~iNfL`j3Vm*Du$CdsrkE4cp)$tiSngysn zH*k0aOvlmX`J4z@V6EMxu(OMK#>B*UgV#330~Q38SFS5ARd665MIrkcu=p+V=m$%Z zk1yYz-qOWg&-U&U;0W88-`P21aUYr(&~Yel&sUx_qVsL6l6y^JrM`yYp7I8Lq+s`y zo_mc;XKIAPUp%|c>r-3DYKloCbyKK#ddND1-FS>=K#q1@NnMNzHuI-3s|y~&F>f(< zt;u^FVV(Xcb;ror;B6DNwJQRMosJkJNkiS;) zCHQW3k9JKvFpnawc6h3uic4cLrJ21`yLh?X5XJvPQA%ce2cw_v4lCECXol3#fFF~GHvaNgVao4U5xZr|v`M+%f_J&a1v zxo70qu|YC{jbdKU(jSBV%+s@`$wajEPOPAH@IDxYs)> zNwUIkSXzsywn`yw+XTy%34&uhRp16S`}RP@iw$ehoUwTh>klmsc5XGLjABPG49R9F(B=4?m0@hs_lC{iG5i%Jp9N}RIKl42haie&fJ>+s)np)1@BDv7{ zpK`hoIY9~D(Dyx@dzV!qHlkP8m9-HXhnQ5PjRly3RlHMd_Oyw+>LX?)VadCt&JkBM zEywp#Rm;h{Yg`nG6A&uS0oj@KgW*{wy&ks3&_CEnx)k{M>ZwN~sF-_bm zbpnZf_;e3_MKpe?f5rZFC2_mJtwCXu{O6M`rCx(L)NGJE<{Fwmnczxt*lh1qf^BJV z8_;#KDIPqYfbD#)>z<>9E0(ycQ%u-Qqf2*dqrgyzP85=do^G+nE_v^0`)VmnPjVqq z)Vn%`a{jKqRpLkx|ePrRoVl zKic~IAuHs^DasSzBW2F@aSHXCZ2lSV3_nD=;Ap^pRCnrW`DqK`y^@TKxh6zIUP)CB z&PRZA35uMpUwa~Q0>En*FAs=g+Lww~7T7yDFjElF!P4=*^jfMb4NFSoxq3yuk_Us>9~aUC49 zdB7V8$6x>ETm1_k|HbzIU<5b>2t0HkGH~Ar;F!w(Kd{w*VCz>Nui-kpa2-0E*Dmn- z5e)u?ZT{k*zxcJYH@t2C$X|j2hOLXPHvC8jPh@}spaiG^ngA7G1$YAvfHU9)yo4WJ z;5nWE1YR!vzpy9$$6ga|Wd*l#0IcB_vVbe_8nFDw9{76?;5u;m@7Q|U@o@i>1;UU9 z0HpP&r%QSOKuHCFhoq;cKgCZ^4<&Fe$r=Fkxcs-hYY_klUc>W~{yUGZ5CE_v0id=2 zzw@jz0pMc{01z*`S$bIha~&}J7r_qBZ@H)h08B#wz@G*HbmM=;4PN%Q94K7^03CR& zR3`u+I|l$5?BR8r{4f0eOOp8?ar@uu{5yXC@K+E(puhXS1&jp#E68x}1QH4|oYVqO zsA$j7P*Kt06c=<%bo6H!a6v)C!p6kF`iuYJufVPUTETY=R20;|8vl3l^yM#qMG_`CaCyDid&Nk2ay{M85tQtQqzN&nCFav7y}UjqZe7#ffy}1 zG!$_70=89R!c3#d(VlOE(wSICjc92tKjPoI}^aPTaEeJsI=kh zkV}&UkG2JIpK7S0!-v1|l%7KXGf4tPIY<)dt`6tA?NN0#J0A^Zq!N{DGn>}~s001* zzma8lhdQL6*KjwQUy@td$y@YDMF2IC5!Q0F6c`9#h@<@eG)^2&?5~2xe8+O+PkGvr zuO1Q(PtC6NDFZ$U=QGsP(=PO99c`#zU+68d_AcHXEe6du(&!Q6;8^faw1Wy3ReM%= z9psfdl5wd;#U;(-_+K0y2y>rYMJk&2Bt13UU@Ms`EX{h>!2+oqjZ zOPNK#uCE@MN9y3+V5oXH7&#C#+)nD9>{|@9j#&m%|FG}*FwzEkz3W7M#m$?Apf0B% z6vR{Y%w<~y{Dk(^Rpyqx`bGiVlgs#+Pn4H@0neu8Ho^s%-1F-s$jxzo*v&{5Ki!CbDxcEJOSo? znkR`h_XXW&^cy{rR>FSVB-e-}B*+uo3}}EfQv+b0-HP|y=7e1A-NL%~LWPeBZ7T$yr+M_QyTClYOM;Md$#C=g zq7bwfk@KyUZx$c(1TLsllZrVhGy+NTayi83Xh5{eQ$<}v-C-e9^@aD~dN3=NgiQ7M zZHF0i$SESxeVQvszL9JCjOds3m%#g~;j8D*6>5kyFsL|D(HrEb5r4c5&5i(#C*cqS zLJ6ji;`?9L+aL!ks*L_DZN0y%DM#TZ`N>6NGP@JjltZROieBnx9~LIO;rI5uQO7R1D*MS)NbnZjvWzOD z7KC7|)EgQBs%Eh0;oLO&EHicfJ%Cc+0NMRJ&w&BgzN9) z_l^`Cx-SzUh3aP6bE75QaxNwzv?mF*Q&+*B63lvT%5QsgaIorntdtxCIW6YNMvyU( zQOAQRc|9HwP2Y&^*ZbpiQYsE5zP1G~ta2387aPq}9AG%Ado<~@u|YRK;n zNgeE(ETzqQ+%^1O>RdkyTH39ENR#p*qUci*rwA*BA!EvC_lAbfKARhUbp7guv{&+* zySz5q)MjN*-{9b=GoN*wU-$Mdw)>1H8j;v#x4b?;5bB@1dPr7K+caK1H_A(lA>{-d z4%6eQC{fK@P%$S1hM_c!LB0{+)vaf2SKa45z0!FE_4FJ~>eUju@IQ|uFT01h%71eC zMs|48PLMfyO~TOjfW*)(5`Ph=1@GigvFaqPluiVt5J*6(KVHx}@`qOwjvP_>b#Zp} z{HDuPXU^Fa`z)H7`D%`k{XP~Wm~m3QB6h)hx#!^{ZN!&BPkol4&vYOfc~R=OS;9>~ zfAXp*2xxU?PC_T@nMNpk+wF4dFmc+oSKLrMfQ(%4p?VZW&RD(}tWq1Ut|85CHXr5K ze7vFO<+jWm^Xb8u|nzXva;>4!*;iu zM~6mCfSQUX%}J=)BbmHE9@@heoyHwHOtlLijIj^zN&L8_RE3~;;`PskE49BRI88Ffz&b^^Mk``)let6#mHyY+GNVSFu+L48e>}~j3V^A@R`C3i!-+h`c(C=83X>s<1 zVdA(@UJbY!dOe#M`B5;JGhh_GU>Lb*5+^*St%B&)MyLAK3RO)vR5ERUl%d#>lL0~7 zqP;-)+_zRmc8Pg^LZHI2+oe&>uTaUTz0|1Na$fn+DGx@D5%qX~+xo+LHF@CMLZ#4< zMuGUpC|L{+#%1@p^|iE$9RBw)dR9HD2czgoyi$9E{vNaZ$%x>#+)?Lv?cyhZ{K53{ zyJr6P_%AQjMpkYM)nIygOu|CCwUs)Qh?QTm;5SO+G=YAN;OoH+Je^VxXj{$*O0cl-8A zabNw+PRZe>fv>+y&Lz$5ASLU{x_Q&UpP(~=Ki>r)yU>lnGv2nkhbN%7c~6Xu6GTN! z8z{8>Du>$tbSd>+xCu?E!zc^G2993AyL0hm^RaJ8cKQ2T@?Jm1_hxH{>Nk}$8z)V? zem1SE{V5p=F2#EH+%&1`0*hcpD zOR0tzp8ABS?o`U31QO4#%rxYE_C@DbFX%fu?^?bDbL*O^|GN6@Z?^R=Vj&3XkeV7` zrk^YRZg*rtpUSFNV1nm2Zf{2eFDIUXHYV=3@#lhQgoc|p?Ih(R?%WaT=x;5ckV_w5 z4a)s|HI4yI|5m5`fOpFu>wLV%?CY*P;;Chr+!L+8Y*Cy|>3h)Sh22f*R^OTz&f^g8 z(oHkWn68Cl5$S^{1*)I9l``7KZ=u)LL}rg2i2F9q`H9)z2atQTu}Cgd`KB9Be0w** zW#aKhdtj5|dl-09goFCDd`@+MW8UOXfI!Z8E#d~T-?Pt%z}%*31%I??Qn*JXOZDJ8 zTAVHX?-Cm7m%#~{z5Ac++rmdZN6=mhZ$+J6{(1r`cgltR>RGadj6BNDTlsSwhWygK z^1e&QeCG}(m7?9*D~)^gel&DdS+gW9R$XTVO;FCnM?pvzFFNgDZA?YEQU_V9bHfug zd<3-45|Eot#45I)#vgAe85?0qmI zd{hehXSDwhRUHWz8IKwt1&519ih!GzkV+brhzCCWM}?1+K}ZPLM~4(Vdt`otgKTCV z{pqPITCJnTFF*2g+Fl^CmA7G%%i$#p}Z(UkSa zT$?%nbu6pkD7{Hj_e5~RU9x_E1QRmAx#!!aN_>9wAO;})?tH%)Q+ely4cd_VEulG; zC{LTASCUuDyDwh#F1tFZR{geXmse$^5?dhojBGKRjeXc z=4RkBCTXfF5kYIL@9GT))hHG9N49Y>iK_EG43*4eRxL>S7>q@IbXb$U`qWqQ^>tZp zigyob^UufSOw#RGvUcdH^7u%{Yj>LC$h8n4)vg%~4gK zrb^ET7ES$*3?j6MpN^<23o+6L1Nc3UPO&Yv$O^mQ1jX{Vm z?h?gwny`Q!_YVV$$*J}pL!zr7*pep#2rr#h$a6>LEY;VxuXYcR#)`tvME{rv(UvDKe<0A|uhqKewsT2rJ z&>vI0lY|@S*x!1sFu%r&5gcPjo$ZY%`NC;wG@E}*tC;>u^YWIr9tyizQ6JZl!$%9H z*o%wxkO#MkyC+#599~@Xn3)+QAPiVi9T`?6YFC};WVNK5Ygchn#YM3&->HA_3?oLD z#=Qzl=M3hwQ0-1th^7eJhx~Cl`r`aj1$f1WX%)q}zpOk|NH!=$;Q>B8&tU5Y%qP!; z&NCg{3~HSI^o=Xqlrnj1%!>t4rv*z?{I-cl#Ye;iAghxs6G_`3OGy1VgaxkUe>@WXN7V{6C2}VT9654)Y;PLZv(hM#?5H5}pLy88uWg0K)p4m$A z;q9#Ow_)Tdeo{uYEs9>u2_h7i+QX1ba^s3aCx{#lSL0n)Uf9T=#?d=848Xgh+tf=` zC$6Zm-B18;PjiKVtk^JaMVrPz7BrX9)oUL>Dr*O5QL^73wxV}=O-y7JbHP=5a-*1h zy$?yWNA8Pb+b=^q@VZDI(fSN7oGvsAA&te`aQ(t~Is-63CYO z$Tx`q-%w#HCvN=Q}$e=%<+*0|)z zDD(1SZ+bQ?XIC~yADUJENf5&+*_}<9&F*@7A!c9+ulCAFzW!5@idA4?RSYP@jQBly z|F5yNTid-*k#8}dzr=dWba;1h8#W^=2S?rmlq5U zhGR^9lGF6(Jh*rj^@es|;w={V!m`J9_ig89*t*p2?(SPGzLx$Z6*DStHZSCq58bt0 zkgZ9)43ME}{n}VHIyGJ2)jX||28`&ijj9GuJ*)Pmu8jOmHNMW&2kFk)mvj-;^Rp?C zRBb#}@68IBBjN8r$iagI4YWkTeulVKZOwxAzh z(^xG(KzaTCfhm6@+9Cb9w>2A@+7(t8r;4s3k`w~f3vY;jrrL6Zx{Sg80V($=6RBQK zYq)hI4OcpQXpNFc;Gw~-Kobp9Mq=z^`>bN&+JkVK+5Rg`_H5dNFCgonfS4WR-O%Po zTY--$31*%fa;J}BJ-fG#z{8RCRnl?LM&m+2cpEz_L;u6OB^O6MR31nYv78$9Z1U#= zSUR@>N3houaFL?|OC20iGjaH$%&ki=bo5G_wYo+%wkn!x@TSZP@3vzl8bSBBwan5J z@S2h{Zh?1&d(5dV?%d(8d`j>+rn+}z6#*5l6!__^g-3NmH2G_R*wl`SZ`*2x@h zYzxU5&D1eQiuEEYpw|(eUfJz?8EN!h!pXa{aS@LQ%#1ESQ>P9I1MXLt{ud% zN!+slg}{_LEqdu>Yts-&m`!d`q(u$HWm zIh34&Nq7>`!g3>?P2PyU-w?&l>-_aCCIcm)XN{mIFV&-`ans|c*8sk;$tZ34d@7IG z$JHqD3%mTe-`H&#twh2L0A%q4xk1!Ab8-o>V}N`kb}N}5u!AVMyc4|+`t|&L60tOS z3@&RIx0cxo?Qdav2uOK_J1DnA%hYS;dEEtjY~AHAsU%XN@>(<9 zkor*9{(I3IPf(wk0S%tBBv?$Jlzs2TH8YxXOJy<_1{q!7AtEn7x@el1p5&||WmS=w zlt6pdG8YO!QTP@(rB*1^cHJA?JM1^e6&EwAL7>m|Z^;a(W`eLC6J{LRLizmx!FQ&X zOfRX_XTl-P$#|q>HuDbp`MOT5{9#9t$49}kto0yobs)PmK{6OsP@p<0i~F+o%_Z24 z<|okOX4#W)(sQ{jXeYpil-DEq0rrqd8YL}sfytmV_Zs^n?7&!Kw_0*CAfKL*q2nF7 z!>7Kg>8ujuiccFkanKyX^KF`j$~ry%g3d%+8lxwmLdd7gTQGuYl&oS@b{KFQrDBu7 zA$x|iQTb-~SJU|?4E2i#Gq*G60GGj`qf&9R#k}~*0e4(N{}bJ!YpmB5P2>Zy&nvX$ znVNg4qgcAWq=S6>Dp;R@bP~yKXmq&yE9}a;SuhhMca`%GZJ&VZ-P*97d4Ff8WnyEm z1T|SS=k0dSJFWu1F(UUKM>LsR_PMqCgas-DO0$M(pDIt!RUy-t(hO^O^X~Jb3?e#e zZFX!)i>7tb+C~k{x+&vUSXjqB6{w6z_|!k2QPwQLVzn(_e-{%ln0*4Qy|yA0)?Z3^ z6=R{}V5ZpqPQ8yerKwy?sLipeBwDdh)Yabcl71jp z^gJdUMOLOMNmkCvLZ)6h&v3^xA*YUSMRoK3EyWb@BmP)UCPAdoBOsqdxYvKw_VktF z$`z#-TT9MweI5CQbmYz4wE|g{3yI$@J*Z8T!mIGkrEhp%thaTZmDDO)U`?OIeU1>r zI}VL4MDAex3E*S;6mc3S2jL{?$s(swWbOqM%1fV!nIFGOT`A%VkhIxB?lZs=X=1}H zr`PweIwnFm()c4{Tt_&Y5nF>^nL(X|Z-FS6XOM^>jyd8jD(BRw9*Z%7TEf!LMeMxD6EyhJF5l9f8vO*)lGq6id<7oW$-{{N$zu zm%-y^H}BJP=H`Cl2o7-RYw<6V_8&b{DP!HbdsvO=b%pS#p*)1#)dpm;sU9{d5GiWv z7QiP%Ia|OBrMklUr*3>^bY4i!FuqfRq>i!t+?z>_MeYT?|W0gu$vt7YYHgy_**ogHqx$ySmEZ0Ux@Ok2HMOUO36u z=S8r{^-JD2V|{;C9;T$q$sYt92RK1eahJxRL7~s7{GvD`iSjVG<#22g#`-jWd6tJ! zRGvz6qHzbbddQk_Gtsw~sPf{iaWljc9Z!Bvwq*i^74Fd8yvJU4T7Q=v^Ks!uznrPv z`s8j1aQ?-#*So!$cAvKV)51HTK+U@9l+4}D8$&7TkE|tIU2|CEY$&rwI5n%ee*ZJG zvof92>&VrnZyDKQU2M%%({jmd9||kvRQ^O?$eF}TeE+jzCG!N>FyyP@uVo1jmtQ>r z___Jx;k$_d8BCCia16{TvbLb%_ZHnMCHrTID#mCKx4JIp%l;CH-)MC%yX4h$hfN|B z1`#v}r8EWz^ck`jGa3$YRSa6G$q+>X%rkuC89BA+@{f9WD{mfRpt#hxeHs(?_sw%2 za+zPs0}W6^LCZR0VB1)a-;&^D)=9(Zv7I{6h`z7AARaaoP9fB*E9q53{gIg)X2%?Q z-ht5%Acne4T*wYDl^AE%WFf z9VSgJQts^FvDqlTqGUDJl9wgQQSwj%=C7tp;~DW6G~e-d_(@D(Nw`Hq6H_8p9LpAP zdAZVYrtg;C8+9?4#JY^U(4luJKcXw)icP;c(l1ejI!9BK*D;fE%e2}s+LisZ9VZ(x zNFl{FcZ}pN;m&w}_s&4}?Q&7E8j~%V0-h$LRr#6_lV(-H`V2K5QAV+zAxu`D(G+hd zY2-PLIc&cD!^>{vK-4(rVym&#mzMkTG4=sg@&IM!h?O)L* zwzZZ1c^9O`=1iwnNkirp6r6#NzVm^2j6!TUxthc_dZ!6Lka|*W21=*5z+P7uKN6h1 zdI+BqBtUjY6vD`KK7ZAB>d8NgXA*Kzu^P00nf#gi^tUtqd+QTBBTJ@lBwG_rWL&?| zJ5u_zaAAs>)H|Ie3Nt4=wOoF#FTVDbOZ!|tuj4)?-DX>M-%@nU&Ft3J;nWOISW~rg zr5DO{cWv6g^)zRc=>0-L`FU^Wgo%_l==%Q^C`w{)L`%F27EvKf5$>NXNO~>GbiXGiYIXv0J_F zyRaQm!56J|w)lCtrsT$6L!)(WGBw?@Eskilgy2RmYvnSZxEupB9TT8+880(QccT7j zGS&WESuyUzs_a&+v;mXijP{!my(5?S)wcEk~70ydcqWnS6s z2edbcDRW5Y1$Nf#c3q6a58>O|a~Zo|PqN--q|=Qwnb?l4PQPjAKE+G=`b9wbxk_ZL z7`u6SXH_HZNw+iIh2kZ%d?X!Xxl*%g?AyKtJ8qU^1>Q`&)lp?{_V^Sl4wXvQg;#fl zY?{ed<}`GMS)ODyyz+FJLrhzl(wMYDJM7A3(aJv{9uuEi6rr>8-LfP^N(aPHWUZ1N zEwLDnxb#tUUB>dI6I|j02wo}Y+yov*E*@=`E-lM*hR4`yyv$`L6)t~gqYaY)lbd8C z?s8S6$#84x9thQzvoOsGnlf!Ws;b^InvUP zBk+1W&Om5_WfltuZWaiBm4{uBBPv)W@alwV`p`?!$wL!V55vuHSJJHN3`iI#1WgF= zf)#RC_e_)~6Atf6>w;EPn`uK4pl%ikxA7Bm$b1&-gjyD(@eS9`eg1y#x8c-%VHI1y zsur2c9+k)S;}`Y9IV!K6VKLGlGsJt4}E#Y+Bzpq^F2T>pZIJn^&AO-2G~J zHCtgu;i`_MTeqw5)NraHWVQcEK?$Zjwf_KbCdqOjUc7Xlg6VDRUMoK+Af)w}e!TG( zcrTGJxt5#erORv9z!vA^Sx5&{*h><7mC3hCN3Edh>PbfTs)E8GLZ10fthD8=0TbP0vZjS3$!~7H{N>wr!Me_hbiKX$ zR}HTYApJ(FyGXMJxz4n{Zf=Rhzng=8xl($er5QwjVWMc%i4q(+vo~4$uvYB9bE84; zAg%Gajdb8kO0Q?_YL}avl5x(f?=9Q}+u9{MLid7m-X~95a~;>@)_RjVNei`8%}rj^ z^Ng-~&Q|75Pn|ua^L}CfF->3e{{@7B_F|g8a6SIlH6=oMQGXyR10S)Tyqo|N8i}B- zf^Nir#s4J;B8&77P#;Qt`g!Btf9-9zS&*51uGzHd+JE0cKYU5o-mW0@Xw-o$8%LNf5>0y1^og*2kK28Fs0aL(0A!SgN5>HjI9WSB5vBNiStfVEa|0?z=Byb8hB79tpa0)w3pn@wQg5?AbYO9 z8J12PW=eVy|NdX5+1CP!qg`*)jK)-}tDyxPct>{2SeW{Jd$;_j4&mRPF?Jx7c$B$@ z&`EFL+w*0L+kBAyYf7?bjK<%J%c|hXj6CU-H1ppYN=)oWYiw;f9^0WH9*`ch)L*KF zk9F~}%0x<<$^`*wN2>A3#kd!nL7iU3??`l`CxJ{#@a{!6qMH`3fcTk#!Z*b&t9yB^ zQ<2wz6`RtCkQ+T`fp`#=G`Mf-!*@dao@+^23b;R{R8h4HD) zY$kS)ww5Px8z{(RK=*wDq}Jiu9$cc#ez;QNntimdFCYm$Z8E3{`q`uat%j_NjIBIh z`P}u+XPy{T3i3fyvNGN5X3lK=l2f%3Tt3Iz(eGD(0ls0HwbdMZ5UeZXa!Pz^8U~e6 z_L$|ev;+N=AJg-Cna?9~7RVY8?Nico__gPvxcs+;W{nj|ejpRYO0Si%@gviyP0pu7 z^(a=)zvdLwb^+gXY%7wNq=Z%>UNe@eUH=1oy;NPjt~!zW{VWOehZi^q;%F9EZV>=K zgJ4+r+(k?wZmG}0TjoL`l)IYdYmf!!MBK5M`BZKJuMoInFBvywUP8*7>!4=T0L-?2 zBFr_hYE`QDG51wVn=_-+A!^5SIzNxQp_{PxTyAYdj)PIc(sOzOI$lb+L0`E2bYH|x zM>AvoNFg=Vkw0~UM493`lyuJxqO{xk6`pF~l|SB&mnh7FoCt*}?J2c^-YBv3GO>s0 zb^1(jmUqQRduqBG#!b0QdM&irEF{J`&0&KX8CJtOd*g}oOsIM!^1zLRB4JwQPnCIY z+i?g*u72Zwi-iczN&(IZ1WWQhiKZtrGP_G^Z5^A)U)j_j?Bng%i5;BvBqosGZ!gQZ zw5wj_(Gp=%3rV@<0>zSbnTegwC7L^SfH~*XtIxcT)iG6MkIo1wIv|R3F#$7 zW**=Y{@IC3hKF%Nk*FfVR zOv>1_C@F))c~H;hRN5e8=axf-!2yQ$O%1m2IVHuK zM2hc5nj)AkS~^XGzBTjamAcA0OXd8Oa#N+0BO7G?@Zr_;D426KPB_qv-AJGT0)maeje7{fr+2`U%Jgw7Jj{Ox%mgMJ)W$3yURYr3)jkIHvC*f z_p_?1Ijx2L{aj-$Yx?9rfJvyHoke9~g|q!mV$Tz2F+5#2dt3ACB__D?GY{MJx0ajIILs&lpQ9i{?yBNlv+_;i;L)`^L` zrDEg!R0g$>uW2SG9ljB${_fg)tG^5}{o+z^iVBCCzo4fwq}N+3JMT zaKtTkjAvcHQ?ZBQwN2_qqcYq4@zKo3(#fc5!>YDvi>34~KM>tb*NlF+am3mDU&-Oq zU`zc&j^2{L_OE%$mw!XB85u^=5AqpqC;KHZ=)PLuiC54Y^ER^n1K=Ojm+~p0s>iR` zy)vIbJ}t1&=v}}+qY&9Wd8Rn^%Q~fa=<5^vDh7Jhn6^as=Xh0r#CQD%=INYfMB1nxUu^S7e4`a%7K4oTnq2+3b3cBxs{cd|ZB3CoP5qCmH(KPEAr;iMwXgMSQ2lH;5e(H;9dG4(XDU z2btJqXKiuwrTSmE?3B)WWhgNy>$!ekT&@A>zVYmic$hmUre8X&H2`i0K(@7Lujk?z zP4rPdDd!K6CmP}9bd?jjoKDv=YQ~A4fvWslra_jCm<;jf4 z2^@N_Wt9KWX`{zf%+9m!}WapkrlnSJ^ui_#WA*dwgxAP z2m2f=wvfqe=}l68$9m_nz<5ED zHfI4Jlwg6gRcJYDHX;63^8rZ|WE|5cZfv6*%z=8gj=K2D8L7zc?#mGvs#HVYt z@9A#E)meVN6&E>?0?OD<%Hl}*3h0-0mcct(Lkn;aYGPdpN?W+*;6@f`js3jh0O5K0vSo-}XLWA{J*N!6Z}}&ozDbt$m5$N)b5Buc_VEF= zuIW*M&B`Zxn>+e~Yll2DCZG-c5y;thXL}c=&opXUJCbkn3pgPTa3xde8TAMxqvqFJ zl|`8Jz*|Kz0v?kT`eU}#9}84K-cKdHnpu42dBP%wG>>$wvaJwOhwn2nQH1&xgcV-x zQOoNRpR_q4kxHFFDp){y_1vX!L*Su%s3p^9Dg&q1jP<2sj%?b8_D_V?fA3wuy$blm zl6w57j0Nq{o1mF?a@!{Y3AiG32~EPTRsC07h?OI&=|2FEMye=z;^?V)d07@#9yf=& z5OOkq21NpUzS|lW=OONmUw zw1}ZM>5v07WZ2#wOfma@FChGdZ|+@i0$)@lM1cUusNNN;-NpUjy8x2P*RSL_Wr^8I zj>cZpf(gCx7N@}lv&Cgoi@=fn)f}Td5Vr+-wvjmJZt(F*=jrl~b*F(;y(r4;n&^j* zqF)gAVy#|Dono=g@pr?|14p7u`ial0N^X73`Dn3C@?aUcH9u-(J~XIZ*YS`0RhlWEn(W;`BqW z_nM>fFdkTHZ)R>>;UEruC1SMX=mL)L%$L#W%v0!nBg4ul{J6r0&eXKdkxwaT!i~nP zkBAv1_rTYq+C)s5@_mW<163*Tg{{NQCVT1SKvV+tlh(^p99n1I0dLs9Jro!nJBUO0 z#=6Ny!SDYx)`hSvGxV&2A;8A2b85zCplk10Jfljgx>1dKwX=72Yj85=s;MkoCiP+U z`@N%Tf5{*J@=g}rm2!IyPW=ia>doa!cHMHM7|-PuhWr6dd3>I{k$4kEbWZci%vuyR znO9qu2p+x~y_o4_Z6?D44#ch@ZQ4c%M0QB42FaL({^CazFJ1Js-uAf|WawRbbzvmk zFilmR%`t_KClk$b`S+7zZHiZp@pO!#pvroeuJ4>QBcWeYiSdu~SPkpj4@fcV%&T;Z z>^Xc$C~fhV2TvWEOPYVKTe5IGQ*Gi2A}_esw7mNh4I@vs$jR1J>LYZwH>lMt3=^m- zUE!L7H#^WHa`@Kqt$6XBn)jZD;J#JgN52kE)pDK4=GiWad!=LvJH|>zal-8!2}22O z(hId*!_8Sg!|jvEujEOqIug<(P^j^;4=I@67GFVX}<1j(e) zJK6%0y0D#FVk7J5!}5o48w`ytcYnX-7~^Ap!Xl>DSJ#J6V}IUzxP{4vVUBsl%-D=@|j6|E?$&c znbJc|<+xCNQvBNPK{Cs@@K?^B-L>j>cRE5WsP7hv*WjsMV~K-R&PA%su{tp8DXGdp z3E%_=Sn!`BFWeV>KS}*+;rIDdZ2~`)@{DSHuc;D{h>IfR^1Hao2%mQ+4W>_3YV!{H zP~HcDQ93DC{zGwY7q7vDV|DwO*NAVpC-c`fC&9(E(ifcOkw9UVX4O3An}o_z#uNJQ zOAfoWfKZYm21zTa!{6POE`i)Qy-F|8()TrpKXbPJac3VhkL`*LZBPVS%Zxp~-|A1V zHN4W>n2s(V!VPrbQ1R@bR2QxF1Fzq-@4_61i+FKM;x#%FN41MfGQzJQWqDk{k7^GM zcB9zYp|(jB365Ly&N8QGe|&jZAqIGVk_h8ZNbSV}RUn_w%N^kbsBf3H=K4q8nVKzv zGLsCSl#*>U!vcyiRy};f~-Aj6V_(wy>9m z*GTl>SA1bu;5oO;aiFtF=Y{eJ{8u3>IKSSTCI6km*&^?B>t!sX^kkmy`_qntN!e2C zMx4JAdMljPhZfers3^^MT#lk*+u0E{t~VF!K92I(vU*6JN}J~0f%y`^Boh+9 z?Mq1~gJ%R21OsHrIqa(t)26Q|m9}2pWB(09(b_cQCj(zXgPU!kmk#uJs1rjO!R`^P zZ4Ucvr+?uYQTC&D8C2~}Y5H8CWDVpk4E~K-flXnTALp+ex;TroB)9Dbx9{ zIKk9rM#`WbL2lvWIqjxh!AFA!;Q+q*Nj@G+0#wyL7GsawlQmP zbHB0|5E#|6*gkiVnNUVrkcEd;Vu02b8Hy8Y@F<*? zSYfv{giV;e$q_6Le(kG79cRUN6!E|`% z?MO@-9?0AY&Vi%|TIiNIbf1%Z#Q&&da*(hjvnO-?L z<6WwF8zS$z$won7nmZP)ev{-U?RpX!H7~VUZ^Gu5VGpsL>88%J-uUe|?7N)C(#&aeYQB@ZHdr#;K>O(G(^@1miPC9;F#E1~ zo7Hh1+_a-rs_rujMpaqN<|tv{#ma_{;p05*+>D$Jr!K>L#@G|5n%QJcm%19SZ%xx< z^S0wMqRQ0)!aA{5O`peXTEaWAm#`M#ex_EeRZJ;|CcNS}5oj1blw!@8WQv&`-$GB7 zev+5Z>4@;aEE(`id5rb?i$kqd%% zyqWJfWc4B4ZWOe77u|Z~ymvi;)kdttR|B8Oov-CQ=lq0f&b8vvgE^%ict5yGjBCrb zI>)Te$TTbNUhQD!)53gL@g+L;f?JrH%DOblXcnvQ`;_>yr#O$YgBk)LAjBTnY5sLvX8W)viHaXIE=cB?(f_I}nZmL}u!E@miu z=cL!2u(tVLrb&s2Qn8S{*D>QkILs8|(XuyT_N3%XEwaOv%{45jUJWs0GZGH#v!Nh% zP)!sEC+_=A9=u)ifhkn+CG_}mYO;71p*y(anSKqZXpaiVZ{*Z85E`@&cQ-%Ux#uOY zJ#(6JlgyCq@S4O(AAy!0QiiJEiha~PH?*soaW7>rQB>GZGm6UKnj`hivAnBi;%Z-0 z9Oc4$=Fn8AF#+lxP}Wzv>}{z8la#+j6ha=cci#C>Er^{uwMH~I(~40RKj-3NWzKd~ za=6{s14&P_baH3Ityo1=dZghue27_e_?kE@hTgE$O#bfK zJxyo%bW#~<W0pQwpZ=CVl#ivnAq-ZIYOb22=9wBkEz`(w8gd)#e z99XB_kP<489ZII25O^^J)MlS6x!Ja^5k#x7=col&S1y2AEJjO8-Ix^Dz^Kq`$A18{ zf^68%XH=7gvU4@xw+=faQ-wZl0*JtBF*n`W4_xhH3$fy$5K_A%!lp1SrkyO*}3aqhb%rNxh(2urJPJ zE@TclD2vzb^kIs?!YGXq^!3b71-D}>vZ7N8+ay5B(m&|<^SCX;*Vf9DFm1-}#honV zqJgvH9%EKP`JWYnBiE{H3}IGILaKw@ov0O$T)C_V7(_N9lpkvg*EWptc8fs7tmfT& z^&(dFxy@aVEfvuCqQRuYUY8EnOR)EqF(bwn`g4;_1x4*eun6oXZDd*jOb|OZl1#!t zRfIu5vx1`^lKu6uoAf6j{>J+t!x~zS*|Ea2z2Y>gO6<7%58&1TaLcpP1qHS-vdxj? z8qgStVcK*MQ232-4bEqQZ<{ML#K3L-$S8H?aYX$7%&TiEP|eV*A6@z5naaship6VZ z$rf*>;XYpk4Jwaf+Ib`VDpSGd_iR!R%`zN#*pxdBV|!OZf8*XW8%)`ITJ2gJ#Rkk1 z-j~B=#aot{T!_PNjn19aO!xMi^bUSXnNc2T$w}D@-Rs$i;=BwOcT#VY+YGqyx2DQ|R1bQ;*nKnH_h^pn*J z_J4q&Msb&4T)DP3TM=6%%{+*E*2?QOO$5vs*vHC-hvT3d0`tP*Js6z0S4=gs=@C2m z=rjcJh}PP(dNehVG}!(nZOT_l?v%&UlHgy%t{{bKid^|#EST)`mG@~cd5KxP;FreI zkWJ{Fu#hI!%?uDvj+!YxKc3Udo_bF7&EUFGski-~b3^xWt{I2u5yL-1Y`7!3NQv*E ze!N}Oo8MSFZ#K1ZL|@01V6RHG_JqEI_e+0O`N~BgL%KBGeSs#zh3RkV8!pjGv^;eB zmu-ebZn!djY3Q+kJ}QYA@spLWnYM_|ZRjfST1ipT9qd26RVYeX&Za};>6$txN}W1a z$foDD7063Ydij}nd3Y?(Xr?-n#cHF7g+WH=U8%OrGoDVAA+)W2HyLTLN zAO6Eb61ibS0}IYvPDN0?pp@vY#!+P&a_Dg)#PSeBs=S|1*3T$MPZHvSj(00@4z`wH z1w2SG$5GL9E)8JBbYNo=pOt;#`0Ohte{D@mbc?B~8M1RyG3&R+5`tvlyCX`_xk9S(4ag@q7Yzer^VYoCcr7ZWT*Qozd5i=)swJEofj*h3;|t-ru{|EH zxnC}5%Lxi^7hL1#fPP<-GF;EcMebYGfuSx!5`P_>u@CBixQiDrsH-kyRzIt&d8i4I zPO-XMMA~8{4y@5tEi=ThBIK3SGNLPefr#rpsHCC1PaO;Y!VvAJL`qGvnkFz{hYKRr%h^hiFJ}pZ@spLo7Pe!tp z1R`OiIohkef(^_dZwJlE_#`wiqiANCmzyw-$mi=rr?NOLWp7&wF}xr4WCJKWM`?ah>g6Qe~0&62jI=c&LHuR3=uDOnSR;LVJ@h4ZX%ZY{sz4WH{FW; z=ZQt%GCeavQL*TzWklD!5Qmt+3)DJuByp0y!eO$;E^#fw@mrnLltBR2_<;mx(%87S z4EsJ@x{HGH*)7yJj2kQ20{n^s8~sp|u=%boGkw$7F-eH{+Pm#SgUtXRMka|=ysC|o z?2_R{@s1T$cuNUNgdu1MIzBtjJiE2kc*)BZ(GiA^d~?1s0|UdwX;LS5&s8U@X0DLl zt|SV$&Zw!U`Pz_5`Px~%E~;;T(_=&_=hXlyHF*9jM6;DPXYK10n$V;fjoRxFL0mhm z+vfWF6CT(pB4mwd9dRd>%TR8&MZF>7Ks*lt;tzeuFG9vX;uJ5as<%O846-|6z?whuj~MtkK8K{5G~LV{|qO(omC?7?EaFl&5cpf$2f-d+lD&`M^*ds*@yA z;JwNatlO}ge77FES>A#}s<02lklFoSa+JhG>zj` z)3X0c{`cOyS*BB|Y<)R+RHB3JToJ1*bq~=fuB_0${;*7=!eb0G(QzH$%*W-GX;UQ6 zlBI<<3sI1nM;0Oyo6wk_mY;?2YDunv5>*8j&?I=;;AuE_cxSr(bA*jXSLz>tLnom* zwtBY7@?h$oc8cQG<&AD)>Q8$Po7eA>P+W*Xzu= z>kf{OIQBH-4(6^nBnEMKoRXaewJZT%i$cua#mljmxWb;ey=kN+req8dbf!vhdB_@; z)LN3H2XkUb6&vniGIBe9uyDk?lXjxj(D2u?hGtDdLTab8S#p*?jAq_0dr;j#XR$Un9>u?1F@HDULQqkW7*}$taQ=w zazoD245Q`d=fiDk9*k^O3Uw>B{-tvIWIxUQa{LT${-2eE5k_(j; z5_|3@5_&JA7ob0jBZ?)zeVK67yE+NwbV4@E%Ngp)=O124s?L9Dc~VUX7J4|3Ra&N= z1f8eK`atRb0I_hs&^nsuf-fxYu1y@CI%iEvsF!WDB8DODPoKEebD&MCvSi(g!B7qCT46XM~Y4+`)LHB@nbQkWnGw7Su@N$qjJr(>rL(x+Loq8MOXHeZQa6L z{)%#4| z7acVL1gbG*@(X15^WHuwlBJ=grJvuc<*Z z*%?Zk)RQX|o%B34uox)7B+Fh*6gm_8XNMv1Y2YSAZ)uBZv>K9L#3#7hI#BWOdML)-1*zsLaObDr9a+r*%nzT;uf7q`(j5Fm(l zt8gzzD}>C?mh#S23MhY2U4fZ_wojBGfI{WBfP% zE%33(gs-^f#nh$&!4?#gu~(Z zMgI^S;$^58$7yDeHhVK@v3jXGbtP0AMd2e9-IkPUx! zb)(m|q~W#n;2TdsP6PzD%_oH+(8_G?=~S%x&6SY9si<=O2 z-5B!{)ru9k;p)(`@DuN0$pjI4lL{@K`uWXmGM0#cKm-p!UyX;T%1mR}_L9Zxa4*I# zu&q9PR&7`PO3A}@${#iIN44vcP%a48)=pluT;(YUYayCmqspo}eNrID4-XDxzXT}a zWWA=oVk1l=|G4C9h6os+`1@{Zjrp!5($Y~0#Z7`xep-C}@Y36g?Z^Y4dir7Yxmrr~ z_uR-YYF7Jv<~T)x?^6u9eI(N*vmqIXw@2Kwmv=zK_7}g%Q4tv7%vm#QSLTyySV+$ zkhTW&evFf(m*gX+%;{ET*n8`+B9SoI&GUeQ{0kc~EueaYp>L1AI#E28IgE?5caQ6( zbLV(L<1x4&7W)kv$&x$cL2dY9swot>doXlkw`_rUeclU37{50uXF_BG<=3I%dlNyk zD`Z*EVGMh3Fdkg~0n#0&&d;Lwb|mmgK8Sd-h1C0=i*^47-D;o8J^y|jD2EQ>>7MOL z)#{X_9g3Ev+S~!r)20gx5}-xewiH14!C9APO z_Dknw+SH&iUd`}f;e74q6~32Z)hHU+2TL#hOa^XHy!hDTm2ZdG(8%1is_m7Ro49yZ zX^qNYq&|k0RoEt6G0z-VQZkPTYQ(BzH*PUWNwI?*9rH6& zpreQkO>3G&NSY)nk;oMOljyq%pNTzS$%ULg3|+p2uR{bUtH>0|3t+t2%x+(v+mFJG z@%%>SDr41)w?J~kadtTK3P+|Xl}95HIA%36^KCmf4S!0W zC~8{%9Am{^3tp3GRtX4Xmjt@Y6Tku-xQs}AHkNN<$9Yv zK%)_TTWmoY_!%qM{@Gnqnihe(=H=;D_<~uU>n@%9zYlGuULNAB=E|j!VuClu>z|>-hdaw==NE>u#fncsgxxId``!mIq5a@34*N zM4^bp8hE9cx{ecRZt=6r))r>b-Rhcq*BQaPH-CjJRhkD{U*F5s-tTU0xjCnmOes%M zoZ}*BmjT)3_S>EJRIftfE4f!Ku%_+vI25HptslDbYG+HkfvWv7e!77owL=?0Wq0oY z8*FiE`yHMZ`L9uh+9?Jx9J6%V=MdkF!6;3KTpo08+ZUl?n`t*A?^@`l@{=R3<96Z@ z8{?CpLAdS2em{Xu-Ar23A%Z3Omt)rrhB7r)IPrSSr%Lhu9j1$?`R=)My&J}~b;>ly3iVw6jYXmDe z_?g;c7Jgb?;-DEH#*Q@I5XJXja>kZo4iguR(~S`&Dc;9R&){5D6V~li3R?fuA`EMh za{{ecx{Tx55S#tMsP;W8<^@^;ENj9t@+RPHkWiGH_@}Aq6J-L$&hHUrHcy$DjI%56 z4>hPtBc;l*90Hs zrFnI14~CmGulu*RaLvFi8RUwr=o66|YBkBVxP7b{G_o`c3p>!FqObY1k0V zx9VZ!*Wg7@-$>LrR=ndkdC_^W510yn*pkNXP=1YW98<&&&L4U`5dL^>QjtqLX2fwE zZ2`jtdNjk_(s<Yjyc|V-ECtVFSruP(!b%F#;85qPQlG zh}3l?-09)50&fyEw(mCf0>7);eib|vV|f#Bj_-EuCk%a^9y$?{E}q-b@rJ^>ggQ7R z>GIu=a6dA$MPBXPo#UM`OQxR*AIj3dlo(lzQI`t!X8MC7wL3b_2+eq;d1_WKv6V*G zWtm5Z19RI7O+)%oDygH%vQkD7=KB%dg7)8RdZS9CB-FgL+oB%O3KqNtO9U5iSqBO_ zrgvR3X|qq?NNG(MFOBX6lJyg`9@1ul9r+bBy<*R~0+JqZ(4~9!A8hyU6`rqVFF^#r zpKdFq;Xey5#(`hK;$@?6jW08AWRXyn%3gejkt%zqls?5eFVtYXXtz+JBRY2+0Z(sx zX$G|PRLCrw?ps3NBI+BI@*rr+YN`fhH-U(OO=VJMN3eLHh(*xE1G4zQs;hu2#l(WQ z*k-J_wk1ZCtDyzhqc`xN@%1V&QkbZPVN1jv4w9gQRz1+=8h&CK(QTt^k- z<59-&$S3;}$zqErkzetsT_<~pIs0JV9o{4HBevfUn`qFRHB|EF#xX^Sr>?fGRh@Ak z%~;D5$7Vmpx@?KEG4nwl{x}t!THmI*A(&4TcdYIHZ87Mmq}41_?ZoW6$oibUQ}V&{ zH#%*Pp_-tUQ*M?OZps7r@&QUEZ2Bk`&jBI-BmDXCU=-Z%IIe@WLPjHvoxH#hOU;#a z@2F0WIocP5>6<-Hz>-XTCe}W?xF1UVHg+-Az$wp02e)PKgnTdy`>d5*x(>W!w$)2X zjq&sKaFx=_=)WV`<=TRW>vhDmNBF1ovLLjYP=!06fwJzb0*`N)m+oY93Zru;Z>r!O zk4Wp*8|KM?L zell0Mgu6p=^iVTfs7*}=?Y{5_YyQL%d%NWe9=NuVONiDY@x)@1B}v6s{A3Z1i!Kl2 zKW%4Wq_D;ymqco?Kb8GW-IKxpLml;&hlVCd(2EL8l7J~GehmgNAFxp9F|Z$dOSZL2 zl`FPYcokwP5>1>YPC-}|pOP2`s@Tqz0F0DBhm92&#;G08P3tJ(ZQU7aaJ%h%2; zx^gJH93YU~s;8odG%S=tSdgr??1KwXo2c{C0;pc{;LlEFMjv3YI#yS|`(=~P-0@JM z2FhljWDD)wZ1%yTr(i)1YFaEGM3qC@TWLTJXaHj*=c#1^Xr_xsIZ{YI6>72_r!fk3 zNSO^kqBUe548cyPP^DE=x4iqRr)LWC1Cjbr!l8bpbr^!&B@J?fYI2umd+9*i(c=Tv zIxhEPV8TH$Hfpvw2g8mX^=TXQd#KYmtEDMrRg+mMP6 ziYq3Rx|!)|7?-!@$c_yVu1(Ujqb9D1@l(!ltpv6E?F1*4Zu1U*L87|=@ zPevYFDPlC03op}HD04OzT*=@xo`esSs%V%Se|ea}wgjQt&4m#f5N8iIpD0^HfU*4c z{E?dNgGT5i>Cd?WD*57xYy82mOUDhbEQ-_7cx?j$d&)@zhUwDAO!i#7gL1-f+Yx#Z z9;HhTU7JcahOO}Eix(Eb0#zv?QBvEDW(5LRLs2dl`f>_|*BB}wV(DZ9J!q12d(jYK8Q35#nt;m!P)f#3_PBVvt=}f??!^GZ_k>$th2+w_9cm5dZqNbH|5~`Dj@# z)03GcSw2NV=c(C^phe0xd zG+0K6as52T6uHaS{6aCSGKjr!l}(M!Px|F_UYJ2{n$BS5PV}`kH)>l}vZ8 zv>&0ZgEazW#~!l6J#ZQE)S@TlI=79vF9-~Y${8Bm{ zlO$07NgmOKDFqtcRJW9t*yLo#K}nQBWW6IC{gOAscuGEiii)EA>&f%u2M+zO;H$MB z2@O)I5_yMeWN2zbDHrlRHi89o*mEYRl=Bx1J;i2b@@^09X_CX`Vt89vEw2JleZMgJ z{{rzE4&`!=Ms`IOx^l8TrHE;XM|5{!i;NeuF)I78xsvCR93jpgb!jQmi+ zK*AC6H$D`io4FD}B#+G{IJX_Cfe6AwAy@(&S+Vje=MEgPO<0lxKNTfn6EpnT0|cO< zrhrTh7|c-^#0C>HSuJzsp#uTQ%~-(BO-HLK7@w_K_}vm>xTyvz=R8xHG-hUpn60++ zf(dwv2cq#J2eq9KJhHy7<;#M)%Roro(eBJ_6)Y2?!Vy5nY^@%|=2(V%qCtXUsmx%C z))X-X6L4ailjdchOo=isC*3+^IEt8(1&DXuixwG(x741 zcuhk^uhMDhd%*fmDZm^ovUW9zP?M2jkxI)kIf{sgl!*Craw(j$GmoFL^Gk5?5P!x*sb zbYVhK?1FvNO$J<Mgfc0LN&-{CY4`w)iZF?sn}3^|nYhVLV~t1r z(NXXTq@uz^$xjO@OhL8At2yYOjolgWvzJGQE5cWl#X(5#W6p&53b2-di3T1$(9-b=Zi5SG&*WJ2Q0BG31Ny9F<%^h?pnRG%_W4Fo-2?9 zVtFeTNYg7dp)(psAJuC2*LG1H;hi;O2Gi%=U+)`3ngGe(8fI3j{fiy@g3Hdi1^@Jhl-;86#<{S<5)I(*m#;-%hu zx)-aW(Ey>SWjs+~m$rrp24W{ZE6&rjcTtdxh{Jy&L(P*bO?7g@5R!TP{7^)$9N z3A`G%a>iT~mZ|7tPA3A5L%Cn0D>i;^_!tvo-8NLP5_n*rhM&?n7NtzkAg)40k(t@% zZUYS2O;(Bz;btrXkP{i^6_}-CSPU?=G9nY6v(jF1TqIF^WU0M z$|P~@vm|CWN8=sQ=*&x(!le_G)4|oF7nO1pOl286uV z7f+6iPojNAPe0*63=E=8j%qufO4DhS80ITlgD8;_6CmiHb}zvPm_M3F-@?D{DFCbl zm^mjajx5;GlVqAsC~4%-pFoeQ$4#Wdg7_1x*^S341y!Zv@QpOzVy1w~cM}C?J%GOe ze>P0cgdQa;XRtUb53M&0*2gNRLnuxq#T7QeF>v~a4#2aLDcHJ>_a++YNSa#==-A4LX$e0oB$jt2}Xe#o@#(P*+&UE zTPSGYQXfrqDoJBt zr|OX;w#$~qN0%lZJRT@Dz}1>+19gM^ywg~C5aL76%}z9RCdlz66$5M8lk+z+k%129 zy0eGDIU-5h;)D(wbYSL*Wt?$|tNjsRtjUSuUZ`M#5X{DK!#?S;))+|~2bfu_tl+iG z$_4h6>qC%s09a6v%TPhEFcTG$&TiAYcim}8&Md?Xk=~)m%wPNHV(#HnlO&e zs=;2*79)d5YNR4SG+6p$q6t4kK-e=p#UXP88J2#-iKpdA7+h! zfNo$47^)nwL*@k5E}_g{DO{rox@5^vWLR(DmAtc~EA?dv(h>V%pNVV*J+KEHB<;=( zP;WR=F(ip1o29ERo2(${xs6Ea*cy1%yxZ(f~v%E)`A%35rNHiaf4i z{I$oXM9P5kHS_yiMQ`v9D^-fF)So=-C)KoAs>oy0FH+z>Kn_vA|&R0Cl%f4?71A@ zbkF0;K}j??yfHpmX1 zk=WevSZL3XM*|O=s$dBoS;Z7aCmubiNRUw%m#CyWF4T6VA$%f|R}T0{Fia@r=%cPu z2y_mleOSIyfds0ZQwi9h$uH-O)b05Uoi=FVGQ!lP3i}^5NNJ(=N2d+0>I2%9wH@kc>5X~HdIDRGxtpZ-ey@N;j^{Rw~7jgAb|TUl`JLl__{`}5PTJp z!(w?6MJjlfv;~!RVN04%V${1@34Ef2pu>cq{gIWDlVb1g^6a~b9Lr;}SYyDl(4Q+en!k9hYxnBN8EAjPg5$N$_Yzj2u@Jy|F+@=)5!$8IqKZ(JCIX zG_m_QdzQZ1%|zLWQe+KV$oq!iCc~|wS%nKB-PQn~ll;=nQAKerbz&KOaWv$@bkD1m ziA1>iuwV~pDSJeTgGBX4E)yd-M8x^1VMxGDXCYICO9V`^eypH@A~5E$0-0t{4^%?~ zxxm-{C@RfCjEvHekusGCuF&j1n-Fa)|_DCHpQ;*mu{o?rW7gt8F{a;9PkhDKcZvUml3}fOdp+)Lhs$}H;YXq2i;=-jTq>_vfG1cdR z6J^-4w7ymeBg z06#Uk<=2Xo44a)b6wH;3^>Yo$9HsrKNg+gIqTm-K#m4wZlMvO2T@?t5@HDAfJrz6 zJW^*;VDaMM$1JWwhBEr44ZT=jKunle5WAjJ!D~-sl23ltN|^D)=E+*8X00o(hsGIGGj`=)U!h<1KCxe^H`0LnPyHY+5<311wrj>{1d6VWzIz(zck zk&M2kgA^JbXpx0gWDTqpwpExb2Rk5AjS4O#Vsh};isFKjJZ?E|ZROuC+Z;rBnrn(j_3pl2QFrY4Rvg(mHHI)4nl-5zWVd zB|_xqie0c{GTIZmX(D`Xv zyLzj5rXkqR@KDJ>AM9A#7YoyU`MDXzOl&jLyCD#wb|1x%EU-uxWgneY5(7xmY5CPo z7$lX!#(JVT2N>dPRWmNZIhx3VXgHaKq9%YM0^t5D3Q7?KHAY%sB6ZbSCYCVC*PE11 zHAg2Ciebz>Bg|gKK?9BX%ro*B;=8Pq2&^e0cV9IAS03;jnULBB4KJ06% zR-fXL2_U9CzpAt|oVH?)GKB;;%?hoJnLJrSKFVIctf-I;Bymz10|=0E*qVq$Wd8ts zRdSGxE(Dn&o;WByN@*Ny{;DT|Ip9l!n4}~K>L_Q0j0e}%35`+F24fIxyvb2%+r=Ez zkRlf&?EP2``}Dg_n>Na%b~%;<00 z(PV;5=X6^SrAV>wJ|jZ8n>qqA&>h8WMZn&>sM5p`Y`J1xXsz8a$gqX6nTYY`nr?zn zubNeW2|b&v;LwLCrpY}8IvLaZyR-?2a~{W3#feFJ#s|jVk2L|?9H0*jxTz}Y;{2Ln zl^*YTJQeU|m|c3I%^lHXCR{X{u%`q|&V6y>lfsZVFo4jHB*hXr{{VGO5TQZRPgg*M zhDaf#rAUYb^7B(^#8}Ac^+*qz^km^SQph3Q1i=VucGi!>h%f>eqm>U0NE1Z(uz)KD zwq%do)I)*}uTFAVITh60rt4Vvd85{pVta=8Lz_Ewo9Z+33Jj1r4eqc6qVS9FnmNfx ze3}A6NzvkHYAX>Bm#Z~8heUsx2{0K115p!UBuFV(=A!^*L4;781{WaUoW3()fPx1c zITId!Eb_s}Bg%YLgf^fu&xImS7A%w*=)Q{P;VilMYgRfDx#yb46y~gf8f%IKa*>CY zXQ{EyL}XzFLNSvLC$G(7;4Vek1X!>T5qOW)5KgqVbNtvo5FVZWY}ArCol!95^F+`k z^Qy!kjT)L}HymdWctmH-<^H2jn;KpNidzZuu}q^X2b&vuPBOW#EH@TSWvGMR=*zPS zAjJ0YTmvVcc0ohh;3Hj|Dr{yaq-MhmVwz(!{^IICmcVFIWQ53q_E}a09nVSCY_2IW zKtUa?OGPapcw+)9cDjb3WSwGtSon?_f$wn)n5iTviw-RF8nVt7DI&`(xQ5K-EGIRH zL`Z*{$BUl)(*r9ytYaF9g{CMkTCC5eT%FI%14eRu3??`CWj_e0+!?MBDKO@IQzZx% zL_&V3llWp+ns6idR+45usSzYET59-cZWV0q0k#3rJXd?18SdaA@4Bf!b!z|)z$AV-cD-?k*`nF|Q925Jv zOBVGTRs?P)xagrl%QG(&Otw)3!vJ*E%!eTzQi5%ZDueESG(wz;rc7_ia#BPZM9iP= zY&bJKQ8L9AjEQ3fsB9w{9Opr(gDD=6qT3jlDUL~xPjo*7}GnGYTv z?99xG1jq}L&Xan2ql^%f=8y=cTFqH!RiO|~JvAQTsd~kNDjyo4p^_)na0oQvJe5lU z#}Xacj8bf0QIm9SHYm;`1eyF-7>P++m>B)g)=?QSNOZ2blPm%m*<{dlQ^f`t^3Gp0 zj5uUwAIn8@H14j{x;m=?0#1;$jd5K=qRPqh-3-UZCw5yP#5f9;jsYt$A9o!KnO@F` zgTX_|9FWd`deRpisW(C6gN@eF&;ay9O&GMabR}RRv7V81aJde#CV9M z9Vrv8)t_O%-&SE1nrabXfrA`L&&_d!n4TTa{h+s^C<5T39IH^qsf}qSVeosh2fVU$ zLuB+b5%-y@5WaR^d2fs*eE$G`ia0ZdXR7nWl5+3WGc5k8hK4($oV7&AaPn9LWFa-T zO;CjL4E|`$b9%}@&J&G`jcxHr*{kyEgJ8 z&rx$(0f=Oxqr(*N*Pg6W?7}?Q5Cb@P4ycea7$^G*${31Rt1fa&x-poc0bxNN9hIKg zB_q;n=HkIL57TqaIYIlSfk$|iNkB)NCWpawTpVTiMAmJ<3Z219CtOF2Z}C_?NRQ9U z^HxN!GjR}`{8VE@GSgw>m!smS3na?BqF2a~K@@^GG2Le}01p92nZ*;hCmo#B1rrVI zG0(`@wUnTAl@q<#(0G`QiH}BXVkAc*cE5{+*vlqlc^ZT#9`Qs=AbuFeGBEiCZYdanhKNjOTO!F!eg9Kx4IXmJ)yGLxSAQqr*U%I`YJH$1$UFY`=Mhl)zk_fLxY(664qvojt8H`x5;1>^S z0T4|Q0qUt>k16aw^gokHGf(f_xI}g& zN^wFHrUm({B0-~a)6VKMmhUkajPAoU6w_G~6-kI@H$>|Rtk^^$U{T54#4ZiR!Js^N ztWQi&)lq_Z!|9~)nxx;;(2-E(W{C_~0Qxl;h@T*-B4rb|HApGTfLW;$ zW!ZexiX8I4G#n@xLTB!QP|QGtIAMUgL?wg3;&RIYihGs@C-kE*iN$x2AS@@km4xe1 q>X#Zba3J_(`1p8u_*BFsgk-c-bhI>7G}QFWT&(nroQ%{oY=Z2ZJiJhTC>^V?m=K>B z7ax@GKa+rPAP{_9d`f(LN8y{Pz*;u`uKHSFBnuGMxxNOb}RkhV9AQ4!S+VrsJFXYzL(2|q-$mqVI za;>UaLhKrYuZnVZ87&hj&9a)L4(F3+ZxFAV+r_@U{FVO?$eDrFNfs}Vr+XpbW=1oX z*>9G`@_CU_TFGwet5r^KgtGTOghqhvk;QBN>! zA5T|*G9^oy)GA=uosSHlzf$xY<{UgZEZ=Zz+HLHTl|S1*_TA3&Qd%uHo{f!Y#W3Lz zvsL)?^;%S;l`1W^On%>T$LiftK1X+|=KiFYk^%I^qRp~)hTE3JEc5>0{Y|j;@*X-s z=Hu&B6lK#9;4~I(Kl-h)++?sj?~R#SPt9(9^3-4d*sr2V@8v0~vIMf{$6I|1G<{ag zZL2?Pmk0dy^_@SE$g*O!f;}u;zjv*`ukmrGVGdbJDKL0Gzjxbm%%kc~!mZMqcO~`n z%)0IeH{U{Yy#Vqq$ZXm^%WDDv7SjlGrDa~$=RFhOQ%IvU*=??$l3KGjwJWROV-MHW zi!9@vYTd0@db63Dw(7P?cz)N~+BpU@&<*-a6Yii^$DO1M>z8?L#9d5~5 zE}CkxE^g%kVK$Q~s#O{`Do`96)!u$lc*@UzlF)qD<*VJA^)op~lo{_=CYX5&A&h>$ z9RBs_7KdmvtMe9JK!y7T0JE|dM^Y<@f7;c&EdS#@`@!kBpyE!(JYjx!b~!0~Zj_DU zOb}E4P>sbT{dn*1Ywhs;<;EwDQ{M)o?0P_@F;>MrRwYk&!w()+$LDypOVm3beZ3M; zo?KjywQHeymXc$52_^v#nxGS>pZt-XKHAweb2&4AqO+m|;|rH#OHMzOic7aTo~jm@ z*4*%3b8cTQX#aU5{Wn2+qiF;vPL#v4rA8<2YIT!b2JlXO_Frs68yD~MoQ#@^HM6DA zaiIF`dF#cNrH0P-yHbx=j}uDo{^X~0=O}$r1pux>Ro!H_xYuL$6{AwV{?h)QLQ6tC ztPTR73BET&`Lm4Q6=_->$5FAbk5@>aZU#T@&RGcf6Y#_f6|Tz7oXD&%?I-o-(gc06 z;iKhsY{bMl9Pnu@#);s2(HsSbV}&Ls*`$G0YMQ^dRws_O|MKMbr(Mu{Ne5;uG6a|v zvo&YBj{fIu24ky7fLq63*YVi`3Kkn`tD7<>Qx0TEZ4Of< z#w`8da7&oj=k}w0U9B_Pd^+c(^O){{T2lgoyWS~V8r z(^^vJtofYwKO3euk-oLmt7iPxs=i$Be=o6mc~4w1nZ9c)IDiL$!*chvx*kWnh30e@ z?62*W8)w6Kz>V3R<>9={$qUI<9~d%wd%9^FINZU!pDp zJUr2csJ0O~6#{9kYFo#06cC@&I#k%8aqEZS!uF4uK2 zMTbK7&rH?BX*BjDNj_7d&0SR}T15Hb?PXVa9ba^={v4o3uTV+vz$#!^gcQEqU`CsJ z75XZ{NK2oOKowA>fC1oS7L$Z0yZ*tl z92dV`Mj_zxunR0xI!i=~aVzCj{%&{5=G~u+*Es+dWp`M?25fOA(dsFF)AI?-(zk((-Ozi%y`(}6L}V`+CLVXLRoofplc0*;lm6*a zpcGY<8s2Xu+-v8(60fX55DZFC{(kZEX^5#9LKH|sytTzndO?Fp%AF_Vd2%sbaC&&T z1_0qUii*#;sNC+phvM^yT*}|IeFk zw4=-buLYTxl+VT5;n}aTLW)VT>HX#XZrLM)>0(NX_W;+-yde?V(&tG)9fM40MgPz5(YX?t?s;tbzBJGL^;A+1 zL33Fx_4B56`i^%R`PL~7QH#$O*D#-cetazRH=(+D`)QXz^6TlV+P3?OnWv1;qAMm| zimKuANg&$5V0kX=5E z^T8NF9_$iG7T9X7Jr3Avt>#%w>sZ?|yvV+ESapp&{8ScL9q`}ot>VC++Tn{Zp0TEy zYMwEo(nB&*&fkXhf2z473;BFvUEKA ziB747sP9+?4uh0y1}+sD6sY6Xo0kH>LZ*W{#j{3o zGE%O6##_8`Y++9tSohbQL|p4%GCQ=Jz|*hOm7+G=n*}Fu% zn?ZV4w`2#JY?HGlt8atO9CHy_yt@vrdgNVEmG*qLk&L#<8~Vbp2nreHg>l!rz4fyF zc5Oey=48jB;r$NKps>&p)Xf&09degUc(sfblY@zn93i*0o)qgTAHH7Up1K}ev7vf& zBAR&z(TM%d4>4mRUtxN@%=khgf+&20QfF`wej0nsi}D?^b(95kNWBG`Q^zwV zX|9n%1_6*zv}Q}i^_-22Mx91c@pyIL zZ$)bnc^ARPN=L?moH~7ZK`y+ebji}ty{8fP-H|;i`rFGyMM*?sux2_f#NRmoa|W$b z?uxN_+5F`$W`}5vzg05bhp)YA+jFWKiJ#z)RQ84W=Y?rx310-5m}rKxF6FDSe;OQa ziiV`h;FS*RZAGiqN-jfgWEQ!j|FX@*tlYKl+nc7+z(zPYBsjTUBF1P_N)j6ToS~9Mjo12O z%AZO49MU30rF(-Z6J+!lY5`6)OL|YdM)r??-={v~#F3M7WCCi*mrMcz``P`WaJ%l9 zYNh#8V5i_CCr;?MoDA}4Crwz(y6K3X%$P4(QEJagPoQ8T~4CxQ$+t&bhvTvIdq3BW6J zj50R!_9CX8*M%B{A`qGQZJxg-T-P11bc^O3nRsZPziu}+8IYVcHYr!LP7>0=ETQ(| zF4MYY&Tt;aCpH}I8+A#Pc^eU2i@zxJZABjEb3V9uSP#?3p!%!c?l&S0U)(aW9*X7g z1+HS?IkYg+vCh0`{;iKhTkkIvUkrgUv7NGok73C%Zo~bC-@a(mUJ9?!{3we*=TT9r zNw4Opr6UJ!{Dr)52BUsCvmOl=LF6!wL`wpfbu=%w4D@08~aezcP4zjF4peoxFoL+V|N-ne*i zDb^s#h^*gvT)7jjybC+;w6V#0_32<#?jVU{L@=d~B=J!b%)FN^)kC{Sd)P~jE|c8W zyN13(>HHU$*Loj~3n5Xpl5vyCtH!ZBn%X7GX#DSdd=(aZF-0D(jEeQz$8ifnZ!br1 z7uQ2#4QovdbrzyL$we|R6~zrhbfBm6FKd|nPfS&|E69B)ZuzXGco)s>?OqOdB)%-NO#9=R6^mj6a7tJDLntv}JmG~Qq#sJTS%|d4!`BWjo>4v1;Un}Y zrV^LiHmi91vRRgzs*PMdvHrNQ zDlu?)jC_RHiSNF)m}2#e|F@s!l)F8LhzyF3do5>e2WodX-m;euv>e|%-kQWm__HWd z7cSMv$Wh3h;=vwqxSpHyCt-~t6cH7}eSt6MEftdw;&1U7G+b7GVwdmR4)n+LYjI$Dq_&u5EA}Ooo-ztntfDx&m)xnD zKf%B$uSsiit2Mo7lXTGI0e=&6ZOjC+39cb@@=hqeDMb9xgpT!z>JL^ll5M+`YhUX{ zo!(FKGP<8KBFSBi1%mu1e4mq`2vVM9ZVqR|3jKb_FzPUJ>!IO9ic4i43<7>B>Ng45 zmh&(1Q>}VK_Z^Z5_s8tl$GqylmCDz%w65{t*#UU`UTXcBk}eu_gx3`05`H+59M<;E zX9)SLDF+{Xbp)2|knAF{QltEy}=k@2{#6rsW8C1$9 zH_nzCS1=UKgi|_$d<-tr!=jsbjmagfz6R2;rsuykpA8nJL~B&^^x}HbWjADvFhJ(JW+VlT<$`>moh0}au9eoYRRP!M@31lUA-j!&};nTiDGZ{2L1+a zS(#4DN$|%~r_{&AM927y@>>g1N_|7?A|UK{a1c`^TX0(Ge&_V{hJL66iUe(p@3~P^ zEwOzQs^%EnHmWP_&(HfR$BsRL6M^NVM?YzHaShTIMMbxK;P;Anu?kitr0n{HvS&&= z!I|Nqv-!RT9URcHXiCccj+d*JY?nO#i@j;BcZUEv=+? z*+;-`!u{bOMgt+ePp_1u;#jPd%=DbA&_A_F6;dkZl^~1W*qr#bq{Sy9__VxMH4^UH z5vAB!PKnvqZ}|3{(YE%9$1^S#E8TvIVs{yTMuEvWEQ+Jpd?xR6h3R~VDq=MvG09P(n=%fhhZLf3IWLri1&7(LA7Ds zA5zC*2NOPGpM5T@ct`vtS}!{G6v0dN8x;$wreSG}Uh|eKbH0In`QY?layB9<YH{lBvB^tkoyb2~^bU@e5&LNGx~Mz`XuwtVHfZjvo;Qj}dXj zl4FSnW^YUch_ILL>&i|6sJPX#8$&IxOc!i48lQSsf|*K3SA}zk;5COsvLuX62M=#C zRrB7qhnsK>ihU9gZIA$#0Q})YReweqp#dVYw^|)NnmB3SHr0R9I*`&)#Mw!iYui*G zq%RDGZ#Tc{c$eK^NiqPn6I8{;_<=eM$@i7tUDcO=o@dN8@C4GYsSyVnBSmF97UdCihO36G%Z%PVXpiNX{U+|=#tUBw&3uC=31Y?#{cOx&C=Vk~!N z@p^8xM_nxD2BJ4ZJNA9fdbGvYOh_fwJA5c;1{#EQGH>u*DCqkF4Hzz7G%J&b^t%~P zZ0z#>=I)V9F1y7G6U`yf1RwFpa#*PRnp%nZ?dp)pJWgU!=%`5jq3FX6-UrS#M?=bc zEHh|dM%(7w8}qWow=`Zbc#yz?U9w5^N{c(&vN|<)A(F10i#RGPZX8<{RTO_db!bH45R~gD=RW4e*F!9`3)o$C=UmZ5aiVHI@2B$or#-5!WQe+Gqky{;< z9n}UF|EC5B>XAUfH?(M@^WM^G{$n}Wzugd;!Km~C=qiD(ZV z_TpUQD+Bj@!nI}8Y+j^>QYk*KNrCpf-Qc(*1+NGESq^qt(R3>JG`@CDh|4VLZ(KYt zw^D;tmmeQjwBi_bT;D4qXf@*JSwXI)x<3|%*1QY=!*4M?nitQa~g0t^HduYw}Ql2@O#^8Mr9ukO2JKVR;su9D;R%s($Alg3}XiN-N< z#cL`Pr>!5aMxnEb9aBUF7CxfICb%LAKff1KxvHc>PA&Zi`^br~Ei>QfNo=+bI#*ix z#2zLWpsZ?#oj6Q2pU+o|cvI|3u?2<%Q`KTPq2Z66B?K0QY$j;loA)(0L|zE%1@cI0 zJ)>UlEjw0uKG-zE(`B@F6J{Mz{Ug>-?&;T}XI82p>a9aKfwUtPN$m+*F%ee5IYO@^ z{*G4CIRA=-EyxDgHI6u2y|uXu#Re!o~4I+NjZq&Xe>m4IGpDU>F1hlZ```!7t)Ra=zQ5D6zAJ=xtt~J*|A+6rA2@O`zW~I!+_}aa=t4HevB_=%35ZP8Zu!MMr`wpj(_4tBq9V6QD1RUc}a1l)BSLB_E2l_x+@YE{G~;T)>Ru z{jv=ubsU)=`%9>7KDcm-7$aMv6nR1nRDcFPVbmPdkh5{SCa5z>KchrYqmDd%=J^S; zMCmeg3u?xTTiJajHS$UA@S6PM+as|P6QZD%CP92mN~FBdWhq9u+Rb_EAigQpVv7Oi zRja>cvpVSGkCt#P6$>3}T+dzW4`BJ3bt=Z}B|#$jcVwFb*Coe8XN9zKUtrJYx+p6i zC(mWeQ8Zau7kgB(2qm@RDHuA`+PE#+JigGaT|j7EI!J*@xkzf@C~Z8-Qi%QpBb5JzzvS5M?Cr*t<#WZ!E{uAV|p_Y%+K>xW*4H4r|`U-Ds-K;f0*#6F)j za-cwCl5{Bt;p-fR3jI_O`>mF{j`#;)jPmMn{5W~ltdUe8YUsC$9Fb+Oh=g%ZipDzX zmC8H7I7O2`YiNFzliR4}dL*{c@^KmIP@!puFmC>cgVP3|^Y+WVMoEIpA~hqKR?3Pb zB^Y`*N&FSTweS|_ZUP|ZDOTkkjY5&-PI}Ix#&6=FRi4*h{sFyvY1#8~wj0KtlChXj zSoAjm*?mfso|k^~2V@I&A#tB=_`9ae#Tgxh+SMPq6TYWu#|*+j4EA3FRV;aE6cC>5 zk~13FK`u)w>wK7RP#RyBk1QgBpbULazjM|gM;xmgYG2J-5AszG2&Tu6VJ#g>H#JWG z1Ij;WKP(PzJk*gz5vTEiHHBecrxuYPvTJH@HgsW+!{)pU7v#l}mr41c*R`L8d!K9nYd7hV#$oweeHYB^6Pb$|PG`qn&mHuP1+n7zH#S|>DB$ZKyt z?6di*O{B;#vIY}g|19IDEIW%)C+$CS=<3UO%_wZI^4as{RgUZA1wU6iQJ>jYTC#MI zQGECLI^1Kv{tv)k5B*BLX&qHS*V-JF$;6n(hB$OI5gHe-5Q3tI_o>; zaM&5~UyB!wH2iWM&)53f2+jU?!Nc}}&WU||T|cjSU)k1Vbn>b^<#*`Xy-dOS-+AWb z?$^IHhci06zYAzs%c$Tw&6Pi?mHUdw7BU#+LMky{@S&yxagZ7;C=hAk&;tDFT0c#yiryDHH5(xE!B}9k&8>ckkj~$~&c1Y;fU7ChxQNAw zBH%?4{G;vd-Et9kExnZTSi0CmL=n3D-*f3nUs#T%v4UQ}45Fzq0c3b9%k<1K%BbL(Lm#C5KVX}rCt%}^XH+Sc zxxL)0#37tRzDG9VB~oo;Z-|vXKb(Mi)+#NQm@S1C68KJVws0Bs?wE58Q5Yh;c%r~c+E^PH4<_oZ=%bUrI2(NhL4 zY1z234bzRp2-{J>p<|e$B8x9j3?lsY@Co&YzbYFd;(#Pb#1HE8n>;uljd7C24v6{4 zdU(g%p+PN{iWj4AoPU}rX0f7FQU`Bvt3(jLGJ#3Gz0KQ7d!(07F}96g)tGQ|Re%Yn zD1kI-JC=$|`1I^SV2q>y2m|YXm)#!*bXZ^(?g&A-|En+zSztSTYKvv2fd7>E|3|D3)&Kt`)&~j<{M8>k{RhbH{P6a( zi=W)wirez_RSxWwu%v0Y4AOqoRoCBjddAf(DMey;dbLF-z7midlzMx6>-@nh$ThUP zb|nz7vkUqYB%5>F65HNRw>lM_14uG#lF_fFKQeXuXigNJkL}Uk{vw*!$vi!()igf8czP*yypD6A+Sm(q7X2 zD)a$X9>zX?8o1qXuRFcPv$9rSUsV+iF!A$=2}9yVJ==%5IJd~^WO*Wu7t(Vm5;x^_z@agwKK?Q2m>1&i=`CqzPt4wDa_{%w zz8^igNOUqO|KHqaHnc6fM2BzfnO>EBuRf4Bs5U1$%#t;Qf2+tOlq*(pJ3Wn0@NED6 z%9ofeX^pKjo*3(1AdqprtgA9$Om2`(d4Psl#VXc0L<3yPQ0U4#M?`QAJ}~&g~xsS+S%f$0Ds z5 Date: Tue, 8 Feb 2022 02:32:23 +0800 Subject: [PATCH 147/215] Fix tests --- .../01705_normalize_create_alter_function_names.reference | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01705_normalize_create_alter_function_names.reference b/tests/queries/0_stateless/01705_normalize_create_alter_function_names.reference index 00c77f1500c..b6f5fe99ca1 100644 --- a/tests/queries/0_stateless/01705_normalize_create_alter_function_names.reference +++ b/tests/queries/0_stateless/01705_normalize_create_alter_function_names.reference @@ -1,2 +1,2 @@ CREATE TABLE default.x\n(\n `i` Int32,\n INDEX mm rand() TYPE minmax GRANULARITY 1,\n INDEX nn rand() TYPE minmax GRANULARITY 1,\n PROJECTION p\n (\n SELECT max(i)\n ),\n PROJECTION p2\n (\n SELECT min(i)\n )\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/x\', \'r\')\nORDER BY i\nSETTINGS index_granularity = 8192 -metadata format version: 1\ndate column: \nsampling expression: \nindex granularity: 8192\nmode: 0\nsign column: \nprimary key: i\ndata format version: 1\npartition key: \nindices: mm rand() TYPE minmax GRANULARITY 1, nn rand() TYPE minmax GRANULARITY 1\nprojections: p(SELECT max(i)), p2(SELECT min(i))\ngranularity bytes: 10485760\n +metadata format version: 1\ndate column: \nsampling expression: \nindex granularity: 8192\nmode: 0\nsign column: \nprimary key: i\ndata format version: 1\npartition key: \nindices: mm rand() TYPE minmax GRANULARITY 1, nn rand() TYPE minmax GRANULARITY 1\nprojections: p (SELECT max(i)), p2 (SELECT min(i))\ngranularity bytes: 10485760\n From a73135ce599c969bb3aa3ff3bf878d243b473462 Mon Sep 17 00:00:00 2001 From: HeenaBansal2009 Date: Mon, 7 Feb 2022 11:27:57 -0800 Subject: [PATCH 148/215] updated .clang-tidy as per Alexey's Suggestion --- .clang-tidy | 1 + 1 file changed, 1 insertion(+) diff --git a/.clang-tidy b/.clang-tidy index ecb8ac6dcbf..ddd0ee6d911 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -142,6 +142,7 @@ Checks: '-*, clang-analyzer-cplusplus.PlacementNewChecker, clang-analyzer-cplusplus.SelfAssignment, clang-analyzer-deadcode.DeadStores, + clang-analyzer-cplusplus.Move, clang-analyzer-optin.cplusplus.VirtualCall, clang-analyzer-security.insecureAPI.UncheckedReturn, clang-analyzer-security.insecureAPI.bcmp, From eba3011adacd91955cc84d24b893d5224448aefa Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 7 Feb 2022 20:40:47 +0100 Subject: [PATCH 149/215] Fix --- src/Core/Settings.h | 1 + src/IO/ReadSettings.h | 1 + src/IO/ReadWriteBufferFromHTTP.h | 68 +++++++-- src/Interpreters/Context.cpp | 1 + src/Storages/StorageURL.cpp | 229 +++++++++++++++++++------------ 5 files changed, 202 insertions(+), 98 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 48dd637a943..c9019acf9f1 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -262,6 +262,7 @@ class IColumn; M(UInt64, http_max_fields, 1000000, "Maximum number of fields in HTTP header", 0) \ M(UInt64, http_max_field_name_size, 1048576, "Maximum length of field name in HTTP header", 0) \ M(UInt64, http_max_field_value_size, 1048576, "Maximum length of field value in HTTP header", 0) \ + M(Bool, http_skip_not_found_url_for_globs, true, "Skip url's for globs with HTTP_NOT_FOUND error", 0) \ M(Bool, optimize_throw_if_noop, false, "If setting is enabled and OPTIMIZE query didn't actually assign a merge then an explanatory exception is thrown", 0) \ M(Bool, use_index_for_in_with_subqueries, true, "Try using an index if there is a subquery or a table expression on the right side of the IN operator.", 0) \ M(Bool, joined_subquery_requires_alias, true, "Force joined subqueries and table functions to have aliases for correct name qualification.", 0) \ diff --git a/src/IO/ReadSettings.h b/src/IO/ReadSettings.h index f6c1158a896..e290cbab36b 100644 --- a/src/IO/ReadSettings.h +++ b/src/IO/ReadSettings.h @@ -82,6 +82,7 @@ struct ReadSettings size_t http_max_tries = 1; size_t http_retry_initial_backoff_ms = 100; size_t http_retry_max_backoff_ms = 1600; + bool http_skip_not_found_url_for_globs = true; /// Set to true for MergeTree tables to make sure /// that last position (offset in compressed file) is always passed. diff --git a/src/IO/ReadWriteBufferFromHTTP.h b/src/IO/ReadWriteBufferFromHTTP.h index 0314fa33f11..a9ab7a8348e 100644 --- a/src/IO/ReadWriteBufferFromHTTP.h +++ b/src/IO/ReadWriteBufferFromHTTP.h @@ -21,6 +21,7 @@ #include #include #include +#include namespace ProfileEvents @@ -129,6 +130,8 @@ namespace detail /// In case of redirects, save result uri to use it if we retry the request. std::optional saved_uri_redirect; + bool http_skip_not_found_url; + ReadSettings settings; Poco::Logger * log; @@ -146,7 +149,7 @@ namespace detail return read_range.begin + offset_from_begin_pos; } - std::istream * call(Poco::URI uri_, Poco::Net::HTTPResponse & response, const std::string & method_) + std::istream * callImpl(Poco::URI uri_, Poco::Net::HTTPResponse & response, const std::string & method_) { // With empty path poco will send "POST HTTP/1.1" its bug. if (uri_.getPath().empty()) @@ -211,7 +214,7 @@ namespace detail { try { - call(uri, response, Poco::Net::HTTPRequest::HTTP_HEAD); + call(response, Poco::Net::HTTPRequest::HTTP_HEAD); while (isRedirect(response.getStatus())) { @@ -220,7 +223,7 @@ namespace detail session->updateSession(uri_redirect); - istr = call(uri_redirect, response, method); + istr = callImpl(uri_redirect, response, method); } break; @@ -253,7 +256,8 @@ namespace detail Range read_range_ = {}, const RemoteHostFilter & remote_host_filter_ = {}, bool delay_initialization = false, - bool use_external_buffer_ = false) + bool use_external_buffer_ = false, + bool glob_url = false) : SeekableReadBufferWithSize(nullptr, 0) , uri {uri_} , method {!method_.empty() ? method_ : out_stream_callback_ ? Poco::Net::HTTPRequest::HTTP_POST : Poco::Net::HTTPRequest::HTTP_GET} @@ -265,6 +269,7 @@ namespace detail , buffer_size {buffer_size_} , use_external_buffer {use_external_buffer_} , read_range(read_range_) + , http_skip_not_found_url(settings_.http_skip_not_found_url_for_globs && glob_url) , settings {settings_} , log(&Poco::Logger::get("ReadWriteBufferFromHTTP")) { @@ -280,14 +285,46 @@ namespace detail initialize(); } + enum class InitializeError + { + NON_RETRIABLE_ERROR, + SKIP_NOT_FOUND_URL, + NONE, + }; + + InitializeError call(Poco::Net::HTTPResponse & response, const String & method_) + { + try + { + istr = callImpl(saved_uri_redirect ? *saved_uri_redirect : uri, response, method_); + } + catch (...) + { + if (response.getStatus() == Poco::Net::HTTPResponse::HTTPStatus::HTTP_NOT_FOUND + && http_skip_not_found_url) + { + return InitializeError::SKIP_NOT_FOUND_URL; + } + else + { + throw; + } + } + + return InitializeError::NONE; + } + /** * Note: In case of error return false if error is not retriable, otherwise throw. */ - bool initialize() + InitializeError initialize() { Poco::Net::HTTPResponse response; - istr = call(saved_uri_redirect ? *saved_uri_redirect : uri, response, method); + auto error = call(response, method); + if (error == InitializeError::SKIP_NOT_FOUND_URL) + return error; + assert(error == InitializeError::NONE); while (isRedirect(response.getStatus())) { @@ -296,7 +333,7 @@ namespace detail session->updateSession(uri_redirect); - istr = call(uri_redirect, response, method); + istr = callImpl(uri_redirect, response, method); saved_uri_redirect = uri_redirect; } @@ -310,7 +347,7 @@ namespace detail Exception(ErrorCodes::HTTP_RANGE_NOT_SATISFIABLE, "Cannot read with range: [{}, {}]", read_range.begin, read_range.end ? *read_range.end : '-')); - return false; + return InitializeError::NON_RETRIABLE_ERROR; } else if (read_range.end) { @@ -346,7 +383,7 @@ namespace detail throw; } - return true; + return InitializeError::NONE; } bool nextImpl() override @@ -394,12 +431,16 @@ namespace detail { /// If error is not retriable -- false is returned and exception is set. /// Otherwise the error is thrown and retries continue. - bool initialized = initialize(); - if (!initialized) + auto error = initialize(); + if (error == InitializeError::NON_RETRIABLE_ERROR) { assert(exception); break; } + else if (error == InitializeError::SKIP_NOT_FOUND_URL) + { + return false; + } if (use_external_buffer) { @@ -570,11 +611,12 @@ public: Range read_range_ = {}, const RemoteHostFilter & remote_host_filter_ = {}, bool delay_initialization_ = true, - bool use_external_buffer_ = false) + bool use_external_buffer_ = false, + bool glob_url_ = false) : Parent(std::make_shared(uri_, timeouts, max_redirects), uri_, credentials_, method_, out_stream_callback_, buffer_size_, settings_, http_header_entries_, read_range_, remote_host_filter_, - delay_initialization_, use_external_buffer_) + delay_initialization_, use_external_buffer_, glob_url_) { } }; diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 0cb046b9de4..822f1dcb534 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -3148,6 +3148,7 @@ ReadSettings Context::getReadSettings() const res.http_max_tries = settings.http_max_tries; res.http_retry_initial_backoff_ms = settings.http_retry_initial_backoff_ms; res.http_retry_max_backoff_ms = settings.http_retry_max_backoff_ms; + res.http_skip_not_found_url_for_globs = settings.http_skip_not_found_url_for_globs; res.mmap_cache = getMMappedFileCache().get(); diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 32ab126faa9..ee401ad4153 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -39,6 +39,13 @@ namespace ErrorCodes } +static bool urlWithGlobs(const String & uri) +{ + return (uri.find('{') != std::string::npos && uri.find('}') != std::string::npos) + || uri.find('|') != std::string::npos; +} + + IStorageURLBase::IStorageURLBase( const String & uri_, ContextPtr context_, @@ -69,41 +76,12 @@ IStorageURLBase::IStorageURLBase( } else storage_metadata.setColumns(columns_); + storage_metadata.setConstraints(constraints_); storage_metadata.setComment(comment); setInMemoryMetadata(storage_metadata); } -ColumnsDescription IStorageURLBase::getTableStructureFromData( - const String & format, - const String & uri, - const String & compression_method, - const ReadWriteBufferFromHTTP::HTTPHeaderEntries & headers, - const std::optional & format_settings, - ContextPtr context) -{ - auto read_buffer_creator = [&]() - { - auto parsed_uri = Poco::URI(uri); - return wrapReadBufferWithCompressionMethod( - std::make_unique( - parsed_uri, - Poco::Net::HTTPRequest::HTTP_GET, - nullptr, - ConnectionTimeouts::getHTTPTimeouts(context), - Poco::Net::HTTPBasicCredentials{}, - context->getSettingsRef().max_http_get_redirects, - DBMS_DEFAULT_BUFFER_SIZE, - context->getReadSettings(), - headers, - ReadWriteBufferFromHTTP::Range{}, - context->getRemoteHostFilter()), - chooseCompressionMethod(parsed_uri.getPath(), compression_method)); - }; - - return readSchemaFromFormat(format, format_settings, read_buffer_creator, context); -} - namespace { ReadWriteBufferFromHTTP::HTTPHeaderEntries getHeaders( @@ -165,7 +143,8 @@ namespace const ConnectionTimeouts & timeouts, const String & compression_method, const ReadWriteBufferFromHTTP::HTTPHeaderEntries & headers_ = {}, - const URIParams & params = {}) + const URIParams & params = {}, + bool glob_url = false) : SourceWithProgress(sample_block), name(std::move(name_)) , uri_info(uri_info_) { @@ -174,54 +153,10 @@ namespace /// Lazy initialization. We should not perform requests in constructor, because we need to do it in query pipeline. initialize = [=, this](const URIInfo::FailoverOptions & uri_options) { - WriteBufferFromOwnString error_message; - for (auto option = uri_options.begin(); option < uri_options.end(); ++option) - { - auto request_uri = Poco::URI(*option); - for (const auto & [param, value] : params) - request_uri.addQueryParameter(param, value); - - try - { - std::string user_info = request_uri.getUserInfo(); - if (!user_info.empty()) - { - std::size_t n = user_info.find(':'); - if (n != std::string::npos) - { - credentials.setUsername(user_info.substr(0, n)); - credentials.setPassword(user_info.substr(n + 1)); - } - } - - /// Get first alive uri. - read_buf = wrapReadBufferWithCompressionMethod( - std::make_unique( - request_uri, - http_method, - callback, - timeouts, - credentials, - context->getSettingsRef().max_http_get_redirects, - DBMS_DEFAULT_BUFFER_SIZE, - context->getReadSettings(), - headers, - ReadWriteBufferFromHTTP::Range{}, - context->getRemoteHostFilter()), - chooseCompressionMethod(request_uri.getPath(), compression_method)); - } - catch (...) - { - if (uri_options.size() == 1) - throw; - - if (option == uri_options.end() - 1) - throw Exception(ErrorCodes::NETWORK_ERROR, "All uri options are unreachable. {}", error_message.str()); - - error_message << *option << " error: " << getCurrentExceptionMessage(false) << "\n"; - tryLogCurrentException(__PRETTY_FUNCTION__); - } - } + read_buf = getFirstAvailableURLReadBuffer( + uri_options, context, params, http_method, + callback, timeouts, compression_method, headers, + /* skip_url_not_found_error */glob_url); auto input_format = FormatFactory::instance().getInput(format, *read_buf, sample_block, context, max_block_size, format_settings); QueryPipelineBuilder builder; @@ -271,6 +206,72 @@ namespace } } + static std::unique_ptr getFirstAvailableURLReadBuffer( + const std::vector & urls, + ContextPtr context, + const URIParams & params, + const String & http_method, + std::function callback, + const ConnectionTimeouts & timeouts, + const String & compression_method, + const ReadWriteBufferFromHTTP::HTTPHeaderEntries & headers, + bool skip_url_not_found_error) + { + Poco::Net::HTTPBasicCredentials credentials{}; + String first_exception_message; + + for (auto option = urls.begin(); option < urls.end(); ++option) + { + auto request_uri = Poco::URI(*option); + for (const auto & [param, value] : params) + request_uri.addQueryParameter(param, value); + + try + { + std::string user_info = request_uri.getUserInfo(); + if (!user_info.empty()) + { + std::size_t n = user_info.find(':'); + if (n != std::string::npos) + { + credentials.setUsername(user_info.substr(0, n)); + credentials.setPassword(user_info.substr(n + 1)); + } + } + + return wrapReadBufferWithCompressionMethod( + std::make_unique( + request_uri, + http_method, + callback, + timeouts, + credentials, + context->getSettingsRef().max_http_get_redirects, + DBMS_DEFAULT_BUFFER_SIZE, + context->getReadSettings(), + headers, + ReadWriteBufferFromHTTP::Range{}, + context->getRemoteHostFilter(), + /* delay_initiliazation */urls.size() == 1, + /* use_external_buffer */false, + skip_url_not_found_error), + chooseCompressionMethod(request_uri.getPath(), compression_method)); + } + catch (...) + { + if (first_exception_message.empty()) + first_exception_message = getCurrentExceptionMessage(false); + + if (urls.size() == 1) + throw; + + tryLogCurrentException(__PRETTY_FUNCTION__); + } + } + + throw Exception(ErrorCodes::NETWORK_ERROR, "All uri ({}) options are unreachable: {}", urls.size(), first_exception_message); + } + private: using InitializeFunc = std::function; InitializeFunc initialize; @@ -284,8 +285,6 @@ namespace /// onCancell and generate can be called concurrently and both of them /// have R/W access to reader pointer. std::mutex reader_mutex; - - Poco::Net::HTTPBasicCredentials credentials{}; }; } @@ -395,6 +394,68 @@ std::function IStorageURLBase::getReadPOSTDataCallback( } +ColumnsDescription IStorageURLBase::getTableStructureFromData( + const String & format, + const String & uri, + const String & compression_method, + const ReadWriteBufferFromHTTP::HTTPHeaderEntries & headers, + const std::optional & format_settings, + ContextPtr context) +{ + ReadBufferCreator read_buffer_creator; + + if (urlWithGlobs(uri)) + { + std::vector urls_to_check; + + size_t max_addresses = context->getSettingsRef().glob_expansion_max_elements; + auto uri_descriptions = parseRemoteDescription(uri, 0, uri.size(), ',', max_addresses); + for (const auto & description : uri_descriptions) + { + auto options = parseRemoteDescription(description, 0, description.size(), '|', max_addresses); + urls_to_check.insert(urls_to_check.end(), options.begin(), options.end()); + } + + read_buffer_creator = [&, urls_to_check]() + { + return StorageURLSource::getFirstAvailableURLReadBuffer( + urls_to_check, + context, + {}, + Poco::Net::HTTPRequest::HTTP_GET, + {}, + ConnectionTimeouts::getHTTPTimeouts(context), + compression_method, + headers, + /* skip_url_not_found_error */true); + }; + } + else + { + read_buffer_creator = [&]() + { + auto parsed_uri = Poco::URI(uri); + return wrapReadBufferWithCompressionMethod( + std::make_unique( + parsed_uri, + Poco::Net::HTTPRequest::HTTP_GET, + nullptr, + ConnectionTimeouts::getHTTPTimeouts(context), + Poco::Net::HTTPBasicCredentials{}, + context->getSettingsRef().max_http_get_redirects, + DBMS_DEFAULT_BUFFER_SIZE, + context->getReadSettings(), + headers, + ReadWriteBufferFromHTTP::Range{}, + context->getRemoteHostFilter(), + /* delay_initiliazation */true), + chooseCompressionMethod(parsed_uri.getPath(), compression_method)); + }; + } + + return readSchemaFromFormat(format, format_settings, read_buffer_creator, context); +} + Pipe IStorageURLBase::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, @@ -405,10 +466,8 @@ Pipe IStorageURLBase::read( unsigned num_streams) { auto params = getReadURIParams(column_names, metadata_snapshot, query_info, local_context, processed_stage, max_block_size); - bool with_globs = (uri.find('{') != std::string::npos && uri.find('}') != std::string::npos) - || uri.find('|') != std::string::npos; - if (with_globs) + if (urlWithGlobs(uri)) { size_t max_addresses = local_context->getSettingsRef().glob_expansion_max_elements; auto uri_descriptions = parseRemoteDescription(uri, 0, uri.size(), ',', max_addresses); @@ -440,7 +499,7 @@ Pipe IStorageURLBase::read( metadata_snapshot->getColumns(), max_block_size, ConnectionTimeouts::getHTTPTimeouts(local_context), - compression_method, headers, params)); + compression_method, headers, params, /* glob_url */true)); } return Pipe::unitePipes(std::move(pipes)); } From 19cf4e6dec75a261c6bc6ea71e19db413e73c18f Mon Sep 17 00:00:00 2001 From: DF5HSE Date: Mon, 13 Dec 2021 17:35:27 +0300 Subject: [PATCH 150/215] round robin connection on fail --- programs/client/Client.cpp | 83 +++++++++++++++++++++++++------------- 1 file changed, 54 insertions(+), 29 deletions(-) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index d630f2106cb..191cb5bd23c 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -492,48 +492,73 @@ catch (...) void Client::connect() { + bool is_secure = config().getBool("secure", false); connection_parameters = ConnectionParameters(config()); - if (is_interactive) - std::cout << "Connecting to " - << (!connection_parameters.default_database.empty() ? "database " + connection_parameters.default_database + " at " - : "") - << connection_parameters.host << ":" << connection_parameters.port - << (!connection_parameters.user.empty() ? " as user " + connection_parameters.user : "") << "." << std::endl; - String server_name; UInt64 server_version_major = 0; UInt64 server_version_minor = 0; UInt64 server_version_patch = 0; - try + String tcp_port_config_key = is_secure ? "tcp_port_secure" : "tcp_port"; + UInt16 default_port = config().getInt(tcp_port_config_key, is_secure ? DBMS_DEFAULT_SECURE_PORT : DBMS_DEFAULT_PORT); + + for (size_t attempted_address_index = 0; attempted_address_index < hosts_ports.size(); ++attempted_address_index) { - connection = Connection::createConnection(connection_parameters, global_context); + connection_parameters.host = hosts_ports[attempted_address_index].host; + connection_parameters.port = hosts_ports[attempted_address_index].port.value_or(default_port); - if (max_client_network_bandwidth) + if (is_interactive) + std::cout << "Connecting to " + << (!connection_parameters.default_database.empty() ? "database " + connection_parameters.default_database + " at " + : "") + << connection_parameters.host << ":" << connection_parameters.port + << (!connection_parameters.user.empty() ? " as user " + connection_parameters.user : "") << "." << std::endl; + + try { - ThrottlerPtr throttler = std::make_shared(max_client_network_bandwidth, 0, ""); - connection->setThrottler(throttler); - } + connection = Connection::createConnection(connection_parameters, global_context); - connection->getServerVersion( - connection_parameters.timeouts, server_name, server_version_major, server_version_minor, server_version_patch, server_revision); - } - catch (const Exception & e) - { - /// It is typical when users install ClickHouse, type some password and instantly forget it. - if ((connection_parameters.user.empty() || connection_parameters.user == "default") - && e.code() == DB::ErrorCodes::AUTHENTICATION_FAILED) + if (max_client_network_bandwidth) + { + ThrottlerPtr throttler = std::make_shared(max_client_network_bandwidth, 0, ""); + connection->setThrottler(throttler); + } + + connection->getServerVersion( + connection_parameters.timeouts, server_name, server_version_major, server_version_minor, server_version_patch, server_revision); + break; + } + catch (const Exception & e) { - std::cerr << std::endl - << "If you have installed ClickHouse and forgot password you can reset it in the configuration file." << std::endl - << "The password for default user is typically located at /etc/clickhouse-server/users.d/default-password.xml" << std::endl - << "and deleting this file will reset the password." << std::endl - << "See also /etc/clickhouse-server/users.xml on the server where ClickHouse is installed." << std::endl - << std::endl; - } + /// It is typical when users install ClickHouse, type some password and instantly forget it. + /// This problem can't be fixed with reconnection so it is not attempted + if ((connection_parameters.user.empty() || connection_parameters.user == "default") + && e.code() == DB::ErrorCodes::AUTHENTICATION_FAILED) + { + std::cerr << std::endl + << "If you have installed ClickHouse and forgot password you can reset it in the configuration file." << std::endl + << "The password for default user is typically located at /etc/clickhouse-server/users.d/default-password.xml" << std::endl + << "and deleting this file will reset the password." << std::endl + << "See also /etc/clickhouse-server/users.xml on the server where ClickHouse is installed." << std::endl + << std::endl; + throw; + } + else + { + if (attempted_address_index == hosts_ports.size() - 1) + throw; - throw; + std::cerr << "Connection attempt to database at " + << connection_parameters.host << ":" << connection_parameters.port + << " resulted in failure" + << std::endl + << getExceptionMessage(e, false) + << std::endl + << "Attempting connection to the next provided address" + << std::endl; + } + } } server_version = toString(server_version_major) + "." + toString(server_version_minor) + "." + toString(server_version_patch); From 03b494570490fa8a1fe1a18afa7cd4375bf8ac9d Mon Sep 17 00:00:00 2001 From: DF5HSE Date: Fri, 31 Dec 2021 14:52:26 +0300 Subject: [PATCH 151/215] Fix codestyle --- src/Client/ClientBase.h | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index 9e3e03d4820..7345dd47ea3 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -240,15 +240,19 @@ protected: { String host; std::optional port{}; - friend std::istream & operator>>(std::istream & in, HostPort & hostPort) { - String - host_with_port, - delimiter = ":"; + friend std::istream & operator>>(std::istream & in, HostPort & hostPort) + { + String host_with_port; + String delimiter = ":"; in >> host_with_port; size_t delimiter_pos = host_with_port.find(delimiter); - hostPort.host = host_with_port.substr(0, delimiter_pos); - if (delimiter_pos < host_with_port.length()) + if (delimiter_pos != String::npos) + { + hostPort.host = host_with_port.substr(0, delimiter_pos); hostPort.port = std::stoi(host_with_port.substr(delimiter_pos + 1, host_with_port.length())); + } + else + hostPort.host = host_with_port; return in; } }; From e780c1292d837b03365f4e210d3a270e66257c4f Mon Sep 17 00:00:00 2001 From: DF5HSE Date: Tue, 18 Jan 2022 01:57:05 +0300 Subject: [PATCH 152/215] Add tests, remove saving host in config --- programs/client/Client.cpp | 64 +++++++++---------- src/Client/ClientBase.h | 2 +- src/Client/ConnectionParameters.cpp | 27 +++++--- src/Client/ConnectionParameters.h | 1 + ..._multiple_hosts_command_line_set.reference | 8 +++ .../02100_multiple_hosts_command_line_set.sh | 35 ++++++++++ 6 files changed, 92 insertions(+), 45 deletions(-) create mode 100644 tests/queries/0_stateless/02100_multiple_hosts_command_line_set.reference create mode 100755 tests/queries/0_stateless/02100_multiple_hosts_command_line_set.sh diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 191cb5bd23c..1ab47472b22 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -1,17 +1,12 @@ #include #include -#include #include #include -#include #include -#include -#include #include #include #include #include -#include #include #include #include "Client.h" @@ -24,6 +19,7 @@ #include #include #include +#include #include #include @@ -33,13 +29,13 @@ #include #include #include -#include #include #include #include #include -#include +#include +#include #include #include @@ -77,12 +73,6 @@ void Client::processError(const String & query) const fmt::print(stderr, "Received exception from server (version {}):\n{}\n", server_version, getExceptionMessage(*server_exception, print_stack_trace, true)); - bool print_stack_trace = config().getBool("stacktrace", false); - fmt::print( - stderr, - "Received exception from server (version {}):\n{}\n", - server_version, - getExceptionMessage(*server_exception, print_stack_trace, true)); if (is_interactive) { fmt::print(stderr, "\n"); @@ -492,17 +482,23 @@ catch (...) void Client::connect() { + for (size_t attempted_address_index = 0; attempted_address_index < hosts_ports.size(); ++attempted_address_index) + { + DB::DNSResolver::instance().resolveHost(hosts_ports[attempted_address_index].host); + } bool is_secure = config().getBool("secure", false); - connection_parameters = ConnectionParameters(config()); + String tcp_port_config_key = is_secure ? "tcp_port_secure" : "tcp_port"; + UInt16 default_port = config().getInt("port", + config().getInt(tcp_port_config_key, + is_secure ? DBMS_DEFAULT_SECURE_PORT : DBMS_DEFAULT_PORT)); + connection_parameters = ConnectionParameters(config(), hosts_ports[0].host, + hosts_ports[0].port.value_or(default_port)); String server_name; UInt64 server_version_major = 0; UInt64 server_version_minor = 0; UInt64 server_version_patch = 0; - String tcp_port_config_key = is_secure ? "tcp_port_secure" : "tcp_port"; - UInt16 default_port = config().getInt(tcp_port_config_key, is_secure ? DBMS_DEFAULT_SECURE_PORT : DBMS_DEFAULT_PORT); - for (size_t attempted_address_index = 0; attempted_address_index < hosts_ports.size(); ++attempted_address_index) { connection_parameters.host = hosts_ports[attempted_address_index].host; @@ -527,6 +523,8 @@ void Client::connect() connection->getServerVersion( connection_parameters.timeouts, server_name, server_version_major, server_version_minor, server_version_patch, server_revision); + config().setString("host", connection_parameters.host); + config().setInt("port", connection_parameters.port); break; } catch (const Exception & e) @@ -549,14 +547,16 @@ void Client::connect() if (attempted_address_index == hosts_ports.size() - 1) throw; - std::cerr << "Connection attempt to database at " - << connection_parameters.host << ":" << connection_parameters.port - << " resulted in failure" - << std::endl - << getExceptionMessage(e, false) - << std::endl - << "Attempting connection to the next provided address" - << std::endl; + if (is_interactive) { + std::cerr << "Connection attempt to database at " + << connection_parameters.host << ":" << connection_parameters.port + << " resulted in failure" + << std::endl + << getExceptionMessage(e, false) + << std::endl + << "Attempting connection to the next provided address" + << std::endl; + } } } } @@ -1003,9 +1003,10 @@ void Client::addOptions(OptionsDescription & options_description) options_description.main_description->add_options() ("config,c", po::value(), "config-file path (another shorthand)") ("host,h", po::value>()->multitoken()->default_value({{"localhost"}}, "localhost"), - "list of server hosts with optionally assigned port to connect. Every argument looks like '[:] for example" - "'localhost:port'. If port isn't assigned, connection is made by port from '--port' param") - ("port", po::value()->default_value(9000), "server port") + "list of server hosts with optionally assigned port to connect. List elements are separated by a space." + "Every list element looks like '[:]'. If port isn't assigned, connection is made by port from '--port' param" + "Example of usage: '-h host1:1 host2, host3:3'") + ("port", po::value()->default_value(9000), "server port, which is default port for every host from '--host' param") ("secure,s", "Use TLS connection") ("user,u", po::value()->default_value("default"), "user") /** If "--password [value]" is used but the value is omitted, the bad argument exception will be thrown. @@ -1112,13 +1113,8 @@ void Client::processOptions(const OptionsDescription & options_description, if (options.count("config")) config().setString("config-file", options["config"].as()); - if (options.count("host") && !options["host"].defaulted()) - { + if (options.count("host")) hosts_ports = options["host"].as>(); - config().setString("host", hosts_ports[0].host); - if (hosts_ports[0].port.has_value()) - config().setInt("port", hosts_ports[0].port.value()); - } if (options.count("interleave-queries-file")) interleave_queries_files = options["interleave-queries-file"].as>(); if (options.count("port") && !options["port"].defaulted()) diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index 7345dd47ea3..b160417340f 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -249,7 +249,7 @@ protected: if (delimiter_pos != String::npos) { hostPort.host = host_with_port.substr(0, delimiter_pos); - hostPort.port = std::stoi(host_with_port.substr(delimiter_pos + 1, host_with_port.length())); + hostPort.port = boost::lexical_cast(host_with_port.substr(delimiter_pos + 1, host_with_port.length())); } else hostPort.host = host_with_port; diff --git a/src/Client/ConnectionParameters.cpp b/src/Client/ConnectionParameters.cpp index dbd463583f5..18da169fae8 100644 --- a/src/Client/ConnectionParameters.cpp +++ b/src/Client/ConnectionParameters.cpp @@ -23,15 +23,13 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } -ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfiguration & config) +ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfiguration & config, + std::string connection_host, + int connection_port) : host(connection_host), port(connection_port) { bool is_secure = config.getBool("secure", false); security = is_secure ? Protocol::Secure::Enable : Protocol::Secure::Disable; - host = config.getString("host", "localhost"); - port = config.getInt( - "port", config.getInt(is_secure ? "tcp_port_secure" : "tcp_port", is_secure ? DBMS_DEFAULT_SECURE_PORT : DBMS_DEFAULT_PORT)); - default_database = config.getString("database", ""); /// changed the default value to "default" to fix the issue when the user in the prompt is blank @@ -61,12 +59,21 @@ ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfigurati /// By default compression is disabled if address looks like localhost. compression = config.getBool("compression", !isLocalAddress(DNSResolver::instance().resolveHost(host))) - ? Protocol::Compression::Enable : Protocol::Compression::Disable; + ? Protocol::Compression::Enable : Protocol::Compression::Disable; timeouts = ConnectionTimeouts( - Poco::Timespan(config.getInt("connect_timeout", DBMS_DEFAULT_CONNECT_TIMEOUT_SEC), 0), - Poco::Timespan(config.getInt("send_timeout", DBMS_DEFAULT_SEND_TIMEOUT_SEC), 0), - Poco::Timespan(config.getInt("receive_timeout", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC), 0), - Poco::Timespan(config.getInt("tcp_keep_alive_timeout", 0), 0)); + Poco::Timespan(config.getInt("connect_timeout", DBMS_DEFAULT_CONNECT_TIMEOUT_SEC), 0), + Poco::Timespan(config.getInt("send_timeout", DBMS_DEFAULT_SEND_TIMEOUT_SEC), 0), + Poco::Timespan(config.getInt("receive_timeout", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC), 0), + Poco::Timespan(config.getInt("tcp_keep_alive_timeout", 0), 0)); +} + +ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfiguration & config) +{ + bool is_secure = config.getBool("secure", false); + std::string connection_host = config.getString("host", "localhost"); + int connection_port = config.getInt("port", + config.getInt(is_secure ? "tcp_port_secure" : "tcp_port", is_secure ? DBMS_DEFAULT_SECURE_PORT : DBMS_DEFAULT_PORT)); + ConnectionParameters(config, connection_host, connection_port); } } diff --git a/src/Client/ConnectionParameters.h b/src/Client/ConnectionParameters.h index a169df8390a..28758ad36d8 100644 --- a/src/Client/ConnectionParameters.h +++ b/src/Client/ConnectionParameters.h @@ -24,6 +24,7 @@ struct ConnectionParameters ConnectionParameters() {} ConnectionParameters(const Poco::Util::AbstractConfiguration & config); + ConnectionParameters(const Poco::Util::AbstractConfiguration & config, std::string host, int port); }; } diff --git a/tests/queries/0_stateless/02100_multiple_hosts_command_line_set.reference b/tests/queries/0_stateless/02100_multiple_hosts_command_line_set.reference new file mode 100644 index 00000000000..c18b4e9b082 --- /dev/null +++ b/tests/queries/0_stateless/02100_multiple_hosts_command_line_set.reference @@ -0,0 +1,8 @@ +1 +1 +1 +1 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/02100_multiple_hosts_command_line_set.sh b/tests/queries/0_stateless/02100_multiple_hosts_command_line_set.sh new file mode 100755 index 00000000000..00ebbd78e3b --- /dev/null +++ b/tests/queries/0_stateless/02100_multiple_hosts_command_line_set.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# default values test +${CLICKHOUSE_CLIENT} --query "SELECT 1" + +# backward compatibility test +${CLICKHOUSE_CLIENT} --host "${CLICKHOUSE_HOST}" --port "${CLICKHOUSE_PORT_TCP}" --query "SELECT 1"; + +not_resolvable_host="notlocalhost" +exception_msg="Cannot resolve host (${not_resolvable_host}), error 0: ${not_resolvable_host}. +Code: 198. DB::Exception: Not found address of host: ${not_resolvable_host}. (DNS_ERROR) +" +error="$(${CLICKHOUSE_CLIENT} --host "${CLICKHOUSE_HOST}" "${not_resolvable_host}" --query "SELECT 1" 2>&1 > /dev/null)"; +[ "${error}" == "${exception_msg}" ]; echo "$?" + +not_number_port="abc" +exception_msg="Bad arguments: the argument ('${CLICKHOUSE_HOST}:${not_number_port}') for option '--host' is invalid." +error="$(${CLICKHOUSE_CLIENT} --host "${CLICKHOUSE_HOST}:${not_number_port}" --query "SELECT 1" 2>&1 > /dev/null)"; +[ "${error}" == "${exception_msg}" ]; echo "$?" + +not_alive_host="10.100.0.0" +${CLICKHOUSE_CLIENT} --host "${not_alive_host}" "${CLICKHOUSE_HOST}" --query "SELECT 1"; + +not_alive_port="1" +exception_msg="Code: 210. DB::NetException: Connection refused (${CLICKHOUSE_HOST}:${not_alive_port}). (NETWORK_ERROR) +" +error="$(${CLICKHOUSE_CLIENT} --host "${CLICKHOUSE_HOST}" --port "${not_alive_port}" --query "SELECT 1" 2>&1 > /dev/null)" +[ "${error}" == "${exception_msg}" ]; echo "$?" +${CLICKHOUSE_CLIENT} --host "${CLICKHOUSE_HOST}:${not_alive_port}" "${CLICKHOUSE_HOST}" --query "SELECT 1"; +${CLICKHOUSE_CLIENT} --host "${CLICKHOUSE_HOST}:${CLICKHOUSE_PORT_TCP}" --port "${not_alive_port}" --query "SELECT 1"; + From a9ff296960abd158f158f84823544496d674ecf9 Mon Sep 17 00:00:00 2001 From: DF5HSE Date: Tue, 25 Jan 2022 17:30:05 +0300 Subject: [PATCH 153/215] Fix codestyle --- programs/client/Client.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 1ab47472b22..fc7edd8a1b2 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -547,7 +547,8 @@ void Client::connect() if (attempted_address_index == hosts_ports.size() - 1) throw; - if (is_interactive) { + if (is_interactive) + { std::cerr << "Connection attempt to database at " << connection_parameters.host << ":" << connection_parameters.port << " resulted in failure" From 18a777cc8b35ca6268b51f9505510205838c7b44 Mon Sep 17 00:00:00 2001 From: DF5HSE Date: Thu, 27 Jan 2022 09:19:02 +0300 Subject: [PATCH 154/215] Add function which gets port from configuration --- programs/client/Client.cpp | 8 ++------ src/Client/ConnectionParameters.cpp | 12 ++++++++---- src/Client/ConnectionParameters.h | 2 ++ 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index fc7edd8a1b2..dcb6aedabd2 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -486,13 +486,9 @@ void Client::connect() { DB::DNSResolver::instance().resolveHost(hosts_ports[attempted_address_index].host); } - bool is_secure = config().getBool("secure", false); - String tcp_port_config_key = is_secure ? "tcp_port_secure" : "tcp_port"; - UInt16 default_port = config().getInt("port", - config().getInt(tcp_port_config_key, - is_secure ? DBMS_DEFAULT_SECURE_PORT : DBMS_DEFAULT_PORT)); + UInt16 default_port = ConnectionParameters::getPortFromConfig(config()); connection_parameters = ConnectionParameters(config(), hosts_ports[0].host, - hosts_ports[0].port.value_or(default_port)); + hosts_ports[0].port.value_or(default_port)); String server_name; UInt64 server_version_major = 0; diff --git a/src/Client/ConnectionParameters.cpp b/src/Client/ConnectionParameters.cpp index 18da169fae8..7dbdc5d7f9c 100644 --- a/src/Client/ConnectionParameters.cpp +++ b/src/Client/ConnectionParameters.cpp @@ -69,11 +69,15 @@ ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfigurati } ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfiguration & config) +{ + ConnectionParameters(config, config.getString("host", "localhost"), getPortFromConfig(config)); +} + +int ConnectionParameters::getPortFromConfig(const Poco::Util::AbstractConfiguration & config) { bool is_secure = config.getBool("secure", false); - std::string connection_host = config.getString("host", "localhost"); - int connection_port = config.getInt("port", - config.getInt(is_secure ? "tcp_port_secure" : "tcp_port", is_secure ? DBMS_DEFAULT_SECURE_PORT : DBMS_DEFAULT_PORT)); - ConnectionParameters(config, connection_host, connection_port); + return config.getInt("port", + config.getInt(is_secure ? "tcp_port_secure" : "tcp_port", + is_secure ? DBMS_DEFAULT_SECURE_PORT : DBMS_DEFAULT_PORT)); } } diff --git a/src/Client/ConnectionParameters.h b/src/Client/ConnectionParameters.h index 28758ad36d8..dc509049c83 100644 --- a/src/Client/ConnectionParameters.h +++ b/src/Client/ConnectionParameters.h @@ -25,6 +25,8 @@ struct ConnectionParameters ConnectionParameters() {} ConnectionParameters(const Poco::Util::AbstractConfiguration & config); ConnectionParameters(const Poco::Util::AbstractConfiguration & config, std::string host, int port); + + static int getPortFromConfig(const Poco::Util::AbstractConfiguration & config); }; } From 226759b6748ef4232c92554f1a24142bed822607 Mon Sep 17 00:00:00 2001 From: Filippov Denis <55152556+DF5HSE@users.noreply.github.com> Date: Wed, 26 Jan 2022 20:28:22 +0300 Subject: [PATCH 155/215] Update programs/client/Client.cpp Fix `--host` parameter description Co-authored-by: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> --- programs/client/Client.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index fc7edd8a1b2..27b482e1ff3 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -1006,7 +1006,7 @@ void Client::addOptions(OptionsDescription & options_description) ("host,h", po::value>()->multitoken()->default_value({{"localhost"}}, "localhost"), "list of server hosts with optionally assigned port to connect. List elements are separated by a space." "Every list element looks like '[:]'. If port isn't assigned, connection is made by port from '--port' param" - "Example of usage: '-h host1:1 host2, host3:3'") + "Example of usage: '-h host1:1 host2 host3:3'") ("port", po::value()->default_value(9000), "server port, which is default port for every host from '--host' param") ("secure,s", "Use TLS connection") ("user,u", po::value()->default_value("default"), "user") From 3cb433762b5cf938c2281fbf3dacd61916a8792d Mon Sep 17 00:00:00 2001 From: DF5HSE Date: Thu, 27 Jan 2022 13:44:39 +0300 Subject: [PATCH 156/215] Fix errors in build check --- programs/client/Client.cpp | 4 ++-- src/Client/ConnectionParameters.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 44ae96d5521..07e0b70443e 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -482,9 +482,9 @@ catch (...) void Client::connect() { - for (size_t attempted_address_index = 0; attempted_address_index < hosts_ports.size(); ++attempted_address_index) + for (auto host_port : hosts_ports) { - DB::DNSResolver::instance().resolveHost(hosts_ports[attempted_address_index].host); + DB::DNSResolver::instance().resolveHost(host_port.host); } UInt16 default_port = ConnectionParameters::getPortFromConfig(config()); connection_parameters = ConnectionParameters(config(), hosts_ports[0].host, diff --git a/src/Client/ConnectionParameters.cpp b/src/Client/ConnectionParameters.cpp index 7dbdc5d7f9c..55569f080f6 100644 --- a/src/Client/ConnectionParameters.cpp +++ b/src/Client/ConnectionParameters.cpp @@ -69,8 +69,8 @@ ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfigurati } ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfiguration & config) + : ConnectionParameters(config, config.getString("host", "localhost"), getPortFromConfig(config)) { - ConnectionParameters(config, config.getString("host", "localhost"), getPortFromConfig(config)); } int ConnectionParameters::getPortFromConfig(const Poco::Util::AbstractConfiguration & config) From b035ad784387e3c8c6dcfc3cd4011fc259f7f68d Mon Sep 17 00:00:00 2001 From: DF5HSE Date: Thu, 27 Jan 2022 15:28:54 +0300 Subject: [PATCH 157/215] Fix error with IPv6 in host param and impove HostPort reading from input stream --- programs/client/Client.cpp | 4 ---- src/Client/ClientBase.h | 24 ++++++++++++++++-------- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 07e0b70443e..1a39ce0a676 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -482,10 +482,6 @@ catch (...) void Client::connect() { - for (auto host_port : hosts_ports) - { - DB::DNSResolver::instance().resolveHost(host_port.host); - } UInt16 default_port = ConnectionParameters::getPortFromConfig(config()); connection_parameters = ConnectionParameters(config(), hosts_ports[0].host, hosts_ports[0].port.value_or(default_port)); diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index b160417340f..befe7be9c2c 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -239,20 +240,27 @@ protected: struct HostPort { String host; - std::optional port{}; + std::optional port{}; friend std::istream & operator>>(std::istream & in, HostPort & hostPort) { String host_with_port; - String delimiter = ":"; in >> host_with_port; - size_t delimiter_pos = host_with_port.find(delimiter); - if (delimiter_pos != String::npos) + DB::DNSResolver & resolver = DB::DNSResolver::instance(); + try { - hostPort.host = host_with_port.substr(0, delimiter_pos); - hostPort.port = boost::lexical_cast(host_with_port.substr(delimiter_pos + 1, host_with_port.length())); + Poco::Net::SocketAddress address = resolver.resolveAddress(host_with_port); + hostPort.host = address.host().toString(); + hostPort.port = address.port(); + } + catch (const Exception & e) + { + if (e.message() == "Missing port number") { + hostPort.host = resolver.resolveHost(host_with_port).toString(); + hostPort.port = std::nullopt; + return in; + } + throw; } - else - hostPort.host = host_with_port; return in; } }; From 87933e44d67908c08375a427e574893a498e748d Mon Sep 17 00:00:00 2001 From: DF5HSE Date: Thu, 27 Jan 2022 15:35:40 +0300 Subject: [PATCH 158/215] Fix codestyle --- src/Client/ClientBase.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index befe7be9c2c..9eb39d9face 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -254,7 +254,8 @@ protected: } catch (const Exception & e) { - if (e.message() == "Missing port number") { + if (e.message() == "Missing port number") + { hostPort.host = resolver.resolveHost(host_with_port).toString(); hostPort.port = std::nullopt; return in; From c1df291b18f212b49fa0053e072ced644032787e Mon Sep 17 00:00:00 2001 From: DF5HSE Date: Mon, 7 Feb 2022 22:50:18 +0300 Subject: [PATCH 159/215] Fix error with IPv6 in host param --- src/Client/ClientBase.h | 21 +++------- src/Common/DNSResolver.cpp | 39 +++++++++++++++++++ src/Common/DNSResolver.h | 4 ++ ..._multiple_hosts_command_line_set.reference | 3 ++ .../02100_multiple_hosts_command_line_set.sh | 16 ++++++++ 5 files changed, 67 insertions(+), 16 deletions(-) diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index 9eb39d9face..0e5c1590c7f 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -246,22 +246,11 @@ protected: String host_with_port; in >> host_with_port; DB::DNSResolver & resolver = DB::DNSResolver::instance(); - try - { - Poco::Net::SocketAddress address = resolver.resolveAddress(host_with_port); - hostPort.host = address.host().toString(); - hostPort.port = address.port(); - } - catch (const Exception & e) - { - if (e.message() == "Missing port number") - { - hostPort.host = resolver.resolveHost(host_with_port).toString(); - hostPort.port = std::nullopt; - return in; - } - throw; - } + std::pair> + host_and_port = resolver.resolveHostOrAddress(host_with_port); + hostPort.host = host_and_port.first.toString(); + hostPort.port = host_and_port.second; + return in; } }; diff --git a/src/Common/DNSResolver.cpp b/src/Common/DNSResolver.cpp index 36d0c13b153..47ca514cd3d 100644 --- a/src/Common/DNSResolver.cpp +++ b/src/Common/DNSResolver.cpp @@ -202,6 +202,45 @@ Poco::Net::SocketAddress DNSResolver::resolveAddress(const std::string & host, U return Poco::Net::SocketAddress(impl->cache_host(host).front(), port); } +std::pair> DNSResolver::resolveHostOrAddress(const std::string & host_and_port) +{ + Poco::Net::IPAddress ip; + + size_t number_of_colons = std::count(host_and_port.begin(), host_and_port.end(), ':'); + if (number_of_colons > 1) + { + /// IPv6 host + if (host_and_port.starts_with('[')) + { + size_t close_bracket_pos = host_and_port.find(']'); + assert(close_bracket_pos != std::string::npos); + ip = resolveHost(host_and_port.substr(0, close_bracket_pos)); + + if (close_bracket_pos == host_and_port.size() - 1) + return {ip, std::nullopt}; + if (host_and_port[close_bracket_pos + 1] != ':') + throw Exception("Missing delimiter between host and port", ErrorCodes::BAD_ARGUMENTS); + + unsigned int port; + if (!Poco::NumberParser::tryParseUnsigned(host_and_port.substr(close_bracket_pos + 2), port)) + throw Exception("Port must be numeric", ErrorCodes::BAD_ARGUMENTS); + if (port > 0xFFFF) + throw Exception("Port must be less 0xFFFF", ErrorCodes::BAD_ARGUMENTS); + return {ip, port}; + } + return {resolveHost(host_and_port), std::nullopt}; + } + else if (number_of_colons == 1) + { + /// IPv4 host with port + Poco::Net::SocketAddress socket = resolveAddress(host_and_port); + return {socket.host(), socket.port()}; + } + + /// IPv4 host + return {resolveHost(host_and_port), std::nullopt}; +} + String DNSResolver::reverseResolve(const Poco::Net::IPAddress & address) { if (impl->disable_cache) diff --git a/src/Common/DNSResolver.h b/src/Common/DNSResolver.h index 3cefa37fd70..4ab422ab4ec 100644 --- a/src/Common/DNSResolver.h +++ b/src/Common/DNSResolver.h @@ -34,6 +34,10 @@ public: Poco::Net::SocketAddress resolveAddress(const std::string & host, UInt16 port); + /// Accepts host names like 'example.com'/'example.com:port' or '127.0.0.1'/'127.0.0.1:port' or '::1'/'[::1]:port' + /// and resolves its IP and port, if port is set + std::pair> resolveHostOrAddress(const std::string & host_and_port); + /// Accepts host IP and resolves its host name String reverseResolve(const Poco::Net::IPAddress & address); diff --git a/tests/queries/0_stateless/02100_multiple_hosts_command_line_set.reference b/tests/queries/0_stateless/02100_multiple_hosts_command_line_set.reference index c18b4e9b082..993dd9b1cde 100644 --- a/tests/queries/0_stateless/02100_multiple_hosts_command_line_set.reference +++ b/tests/queries/0_stateless/02100_multiple_hosts_command_line_set.reference @@ -6,3 +6,6 @@ 1 1 1 +1 +1 +1 diff --git a/tests/queries/0_stateless/02100_multiple_hosts_command_line_set.sh b/tests/queries/0_stateless/02100_multiple_hosts_command_line_set.sh index 00ebbd78e3b..61244b80cf6 100755 --- a/tests/queries/0_stateless/02100_multiple_hosts_command_line_set.sh +++ b/tests/queries/0_stateless/02100_multiple_hosts_command_line_set.sh @@ -33,3 +33,19 @@ error="$(${CLICKHOUSE_CLIENT} --host "${CLICKHOUSE_HOST}" --port "${not_alive_po ${CLICKHOUSE_CLIENT} --host "${CLICKHOUSE_HOST}:${not_alive_port}" "${CLICKHOUSE_HOST}" --query "SELECT 1"; ${CLICKHOUSE_CLIENT} --host "${CLICKHOUSE_HOST}:${CLICKHOUSE_PORT_TCP}" --port "${not_alive_port}" --query "SELECT 1"; +ipv6_host_without_brackets="2001:3984:3989::1:1000" +exception_msg="Code: 210. DB::NetException: Connection refused (${ipv6_host_without_brackets}). (NETWORK_ERROR) +" +error="$(${CLICKHOUSE_CLIENT} --host "${ipv6_host_without_brackets}" --query "SELECT 1" 2>&1 > /dev/null)" +[ "${error}" == "${exception_msg}" ]; echo "$?" + +ipv6_host_with_brackets="[2001:3984:3989::1:1000]" +exception_msg="Code: 210. DB::NetException: Connection refused (${ipv6_host_with_brackets}). (NETWORK_ERROR) +" +error="$(${CLICKHOUSE_CLIENT} --host "${ipv6_host_with_brackets}" --query "SELECT 1" 2>&1 > /dev/null)" +[ "${error}" == "${exception_msg}" ]; echo "$?" + +exception_msg="Code: 210. DB::NetException: Connection refused (${ipv6_host_with_brackets}:${not_alive_port}). (NETWORK_ERROR) +" +error="$(${CLICKHOUSE_CLIENT} --host "${ipv6_host_with_brackets}:${not_alive_port}" --query "SELECT 1" 2>&1 > /dev/null)" +[ "${error}" == "${exception_msg}" ]; echo "$?" From 1d13c68c9c7e9c3a6e500c58fb535cfc21f6fab3 Mon Sep 17 00:00:00 2001 From: cmsxbc Date: Tue, 8 Feb 2022 08:58:01 +0800 Subject: [PATCH 160/215] use `max` instead of `not any` to check all rows --- tests/queries/0_stateless/02161_addressToLineWithInlines.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02161_addressToLineWithInlines.sql b/tests/queries/0_stateless/02161_addressToLineWithInlines.sql index 29be9ae85f6..baddea30ae3 100644 --- a/tests/queries/0_stateless/02161_addressToLineWithInlines.sql +++ b/tests/queries/0_stateless/02161_addressToLineWithInlines.sql @@ -20,6 +20,6 @@ WITH SELECT query_id FROM system.query_log WHERE current_database = currentDatabase() AND log_comment='02161_test_case' ORDER BY event_time DESC LIMIT 1 ) ) -SELECT 'has inlines:', or(max(length(lineWithInlines)) > 1, not any(locate(lineWithInlines[1], ':') != 0)) FROM lineWithInlines SETTINGS short_circuit_function_evaluation='enable'; +SELECT 'has inlines:', or(max(length(lineWithInlines)) > 1, max(locate(lineWithInlines[1], ':')) = 0) FROM lineWithInlines SETTINGS short_circuit_function_evaluation='enable'; -- `max(length(lineWithInlines)) > 1` check there is any inlines. --- `not any(locate(lineWithInlines[1], ':') != 0)` check whether none could get a symbol. +-- `max(locate(lineWithInlines[1], ':')) = 0` check whether none could get a symbol. From 6b3adbb0deaf1aea62f7d7cb4aa24debbb2d0443 Mon Sep 17 00:00:00 2001 From: Rajkumar Date: Mon, 7 Feb 2022 19:50:34 -0800 Subject: [PATCH 161/215] Method called on already moved --- src/Processors/Formats/Impl/TSKVRowInputFormat.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp index 8a56c2ed5c7..1cd18087f56 100644 --- a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp @@ -20,7 +20,7 @@ namespace ErrorCodes TSKVRowInputFormat::TSKVRowInputFormat(ReadBuffer & in_, Block header_, Params params_, const FormatSettings & format_settings_) - : IRowInputFormat(std::move(header_), in_, std::move(params_)), format_settings(format_settings_), name_map(header_.columns()) + : IRowInputFormat(header_, in_, std::move(params_)), format_settings(format_settings_), name_map(header_.columns()) { const auto & sample_block = getPort().getHeader(); size_t num_columns = sample_block.columns(); From 8da78c7decece08fe2fb37cc83d38416ca5dc78a Mon Sep 17 00:00:00 2001 From: Jake Liu <13583702+LittleJake@users.noreply.github.com> Date: Tue, 8 Feb 2022 17:06:07 +0800 Subject: [PATCH 162/215] Update query_thread_log.md Create Chinese version. --- .../system-tables/query_thread_log.md | 96 +++++++++---------- 1 file changed, 46 insertions(+), 50 deletions(-) diff --git a/docs/zh/operations/system-tables/query_thread_log.md b/docs/zh/operations/system-tables/query_thread_log.md index 33583f3b730..114aa6819b9 100644 --- a/docs/zh/operations/system-tables/query_thread_log.md +++ b/docs/zh/operations/system-tables/query_thread_log.md @@ -1,67 +1,62 @@ ---- -machine_translated: true -machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 ---- - # 系统。query_thread_log {#system_tables-query_thread_log} 包含有关执行查询的线程的信息,例如,线程名称、线程开始时间、查询处理的持续时间。 开始记录: -1. 在配置参数 [query_thread_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) 科。 -2. 设置 [log_query_threads](../../operations/settings/settings.md#settings-log-query-threads) 到1。 +1. 在配置参数 [query_thread_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) 部分。 +2. 设置 [log_query_threads](../../operations/settings/settings.md#settings-log-query-threads) 为1。 -数据的冲洗周期设置在 `flush_interval_milliseconds` 的参数 [query_thread_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) 服务器设置部分。 要强制冲洗,请使用 [SYSTEM FLUSH LOGS](../../sql-reference/statements/system.md#query_language-system-flush_logs) 查询。 +数据的清理周期时间参数 `flush_interval_milliseconds` 位于 [query_thread_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) 服务器设置部分。 如果需要强制清理,请使用 [SYSTEM FLUSH LOGS](../../sql-reference/statements/system.md#query_language-system-flush_logs) 查询请求。 -ClickHouse不会自动从表中删除数据。 看 [导言](../../operations/system-tables/index.md#system-tables-introduction) 欲了解更多详情。 +ClickHouse不会自动从表中删除数据。 欲了解更多详情,请参照 [介绍](../../operations/system-tables/index.md#system-tables-introduction)。 列: -- `event_date` ([日期](../../sql-reference/data-types/date.md)) — The date when the thread has finished execution of the query. -- `event_time` ([日期时间](../../sql-reference/data-types/datetime.md)) — The date and time when the thread has finished execution of the query. -- `query_start_time` ([日期时间](../../sql-reference/data-types/datetime.md)) — Start time of query execution. -- `query_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Duration of query execution. -- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of read rows. -- `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of read bytes. -- `written_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` 查询,写入的行数。 对于其他查询,列值为0。 -- `written_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` 查询时,写入的字节数。 对于其他查询,列值为0。 -- `memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — The difference between the amount of allocated and freed memory in context of this thread. -- `peak_memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — The maximum difference between the amount of allocated and freed memory in context of this thread. -- `thread_name` ([字符串](../../sql-reference/data-types/string.md)) — Name of the thread. -- `thread_number` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Internal thread ID. -- `thread_id` ([Int32](../../sql-reference/data-types/int-uint.md)) — thread ID. -- `master_thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — OS initial ID of initial thread. -- `query` ([字符串](../../sql-reference/data-types/string.md)) — Query string. -- `is_initial_query` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Query type. Possible values: - - 1 — Query was initiated by the client. - - 0 — Query was initiated by another query for distributed query execution. -- `user` ([字符串](../../sql-reference/data-types/string.md)) — Name of the user who initiated the current query. -- `query_id` ([字符串](../../sql-reference/data-types/string.md)) — ID of the query. -- `address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP address that was used to make the query. -- `port` ([UInt16](../../sql-reference/data-types/int-uint.md#uint-ranges)) — The client port that was used to make the query. -- `initial_user` ([字符串](../../sql-reference/data-types/string.md)) — Name of the user who ran the initial query (for distributed query execution). -- `initial_query_id` ([字符串](../../sql-reference/data-types/string.md)) — ID of the initial query (for distributed query execution). -- `initial_address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP address that the parent query was launched from. -- `initial_port` ([UInt16](../../sql-reference/data-types/int-uint.md#uint-ranges)) — The client port that was used to make the parent query. -- `interface` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Interface that the query was initiated from. Possible values: +- `event_date` ([日期](../../sql-reference/data-types/date.md)) — 该查询线程执行完成的日期。 +- `event_time` ([日期时间](../../sql-reference/data-types/datetime.md)) — 该查询线程执行完成的时间。 +- `query_start_time` ([日期时间](../../sql-reference/data-types/datetime.md)) — 查询的开始时间。 +- `query_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 查询执行持续的时间。 +- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 读取的行数。 +- `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 读取的字节数。 +- `written_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 对于 `INSERT` 查询,写入的行数。 对于其他查询,为0。 +- `written_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 对于 `INSERT` 查询,写入的字节数。 对于其他查询,为0。 +- `memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — 在线程上下文,分配的内存和空闲内存之差。 +- `peak_memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — 在线程上下文,分配的内存和空闲内存之差的最大值。 +- `thread_name` ([字符串](../../sql-reference/data-types/string.md)) — 线程名。 +- `thread_number` ([UInt32](../../sql-reference/data-types/int-uint.md)) — 内部线程ID。 +- `thread_id` ([Int32](../../sql-reference/data-types/int-uint.md)) — 线程ID。 +- `master_thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — OS初始线程的初始ID。 +- `query` ([字符串](../../sql-reference/data-types/string.md)) — 查询语句。 +- `is_initial_query` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 查询类型,可能的值: + - 1 — 由用户发起的查询。 + - 0 — 由其他查询发起的分布式查询。 +- `user` ([字符串](../../sql-reference/data-types/string.md)) — 发起查询的用户名。 +- `query_id` ([字符串](../../sql-reference/data-types/string.md)) — 查询的ID。 +- `address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — 发起查询的IP地址。 +- `port` ([UInt16](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 发起查询的端口。 +- `initial_user` ([字符串](../../sql-reference/data-types/string.md)) — 首次发起查询的用户名(对于分布式查询)。 +- `initial_query_id` ([字符串](../../sql-reference/data-types/string.md)) — 首次发起查询的ID(对于分布式查询)。 +- `initial_address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — 发起该查询的父查询IP地址。 +- `initial_port` ([UInt16](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 发起该查询的父查询端口。 +- `interface` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 发起查询的界面,可能的值: - 1 — TCP. - 2 — HTTP. -- `os_user` ([字符串](../../sql-reference/data-types/string.md)) — OS's username who runs [ツ环板clientョツ嘉ッツ偲](../../interfaces/cli.md). -- `client_hostname` ([字符串](../../sql-reference/data-types/string.md)) — Hostname of the client machine where the [ツ环板clientョツ嘉ッツ偲](../../interfaces/cli.md) 或者运行另一个TCP客户端。 -- `client_name` ([字符串](../../sql-reference/data-types/string.md)) — The [ツ环板clientョツ嘉ッツ偲](../../interfaces/cli.md) 或另一个TCP客户端名称。 -- `client_revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Revision of the [ツ环板clientョツ嘉ッツ偲](../../interfaces/cli.md) 或另一个TCP客户端。 -- `client_version_major` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Major version of the [ツ环板clientョツ嘉ッツ偲](../../interfaces/cli.md) 或另一个TCP客户端。 -- `client_version_minor` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Minor version of the [ツ环板clientョツ嘉ッツ偲](../../interfaces/cli.md) 或另一个TCP客户端。 -- `client_version_patch` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Patch component of the [ツ环板clientョツ嘉ッツ偲](../../interfaces/cli.md) 或另一个TCP客户端版本。 -- `http_method` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — HTTP method that initiated the query. Possible values: - - 0 — The query was launched from the TCP interface. +- `os_user` ([字符串](../../sql-reference/data-types/string.md)) — 使用 [clickhouse-client](../../interfaces/cli.md) 的系统用户名。 +- `client_hostname` ([字符串](../../sql-reference/data-types/string.md)) — 运行 [clickhouse-client](../../interfaces/cli.md) 或另一个TCP客户端的主机名。 +- `client_name` ([字符串](../../sql-reference/data-types/string.md)) — [clickhouse-client](../../interfaces/cli.md) 或另一个TCP客户端的名称。 +- `client_revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — [clickhouse-client](../../interfaces/cli.md) 或另一个TCP客户端的修订号。 +- `client_version_major` ([UInt32](../../sql-reference/data-types/int-uint.md)) — [clickhouse-client](../../interfaces/cli.md) 或另一个TCP客户端的主版本号。 +- `client_version_minor` ([UInt32](../../sql-reference/data-types/int-uint.md)) — [clickhouse-client](../../interfaces/cli.md) 或另一个TCP客户端的次版本号。 +- `client_version_patch` ([UInt32](../../sql-reference/data-types/int-uint.md)) — [clickhouse-client](../../interfaces/cli.md) 或另一个TCP客户端的补丁版本号。 +- `http_method` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — 发起查询的HTTP方法,可能的值: + - 0 — 查询通过TCP界面发起。 - 1 — `GET` 方法被使用。 - 2 — `POST` 方法被使用。 -- `http_user_agent` ([字符串](../../sql-reference/data-types/string.md)) — The `UserAgent` http请求中传递的标头。 -- `quota_key` ([字符串](../../sql-reference/data-types/string.md)) — The “quota key” 在指定 [配额](../../operations/quotas.md) 设置(见 `keyed`). -- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse revision. -- `ProfileEvents` ([数组(字符串, UInt64)](../../sql-reference/data-types/array.md)) — Counters that measure different metrics for this thread. The description of them could be found in the table [系统。活动](#system_tables-events). +- `http_user_agent` ([字符串](../../sql-reference/data-types/string.md)) — `UserAgent` HTTP请求中传递的UA表头。 +- `quota_key` ([字符串](../../sql-reference/data-types/string.md)) — “quota key” 在 [配额](../../operations/quotas.md) 设置内(详见 `keyed`). +- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse 修订版本号. +- `ProfileEvents` ([数组(字符串, UInt64)](../../sql-reference/data-types/array.md)) — 对于该线程的多个指标计数器。这一项可以参考 [system.events](#system_tables-events). **示例** @@ -113,4 +108,5 @@ ProfileEvents: {'Query':1,'SelectQuery':1,'ReadCompressedBytes':36,'Compr **另请参阅** -- [系统。query_log](../../operations/system-tables/query_log.md#system_tables-query_log) — Description of the `query_log` 系统表,其中包含有关查询执行的公共信息。 +- [system.query_log](../../operations/system-tables/query_log.md#system_tables-query_log) — `query_log` 系统表描述,其中包含有关查询执行的公共信息。 +- [system.query_views_log](../../operations/system-tables/query_views_log.md#system_tables-query_views_log) — 这个表包含在查询线程中使用的各个视图的信息。 From c6a962359aeb4e05f6611e5c6e58cf19a8c35f22 Mon Sep 17 00:00:00 2001 From: Jake Liu <13583702+LittleJake@users.noreply.github.com> Date: Tue, 8 Feb 2022 17:15:39 +0800 Subject: [PATCH 163/215] Update query_thread_log.md Fix typo. --- docs/zh/operations/system-tables/query_thread_log.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/zh/operations/system-tables/query_thread_log.md b/docs/zh/operations/system-tables/query_thread_log.md index 114aa6819b9..64f9ed27393 100644 --- a/docs/zh/operations/system-tables/query_thread_log.md +++ b/docs/zh/operations/system-tables/query_thread_log.md @@ -2,12 +2,12 @@ 包含有关执行查询的线程的信息,例如,线程名称、线程开始时间、查询处理的持续时间。 -开始记录: +开启日志功能: 1. 在配置参数 [query_thread_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) 部分。 2. 设置 [log_query_threads](../../operations/settings/settings.md#settings-log-query-threads) 为1。 -数据的清理周期时间参数 `flush_interval_milliseconds` 位于 [query_thread_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) 服务器设置部分。 如果需要强制清理,请使用 [SYSTEM FLUSH LOGS](../../sql-reference/statements/system.md#query_language-system-flush_logs) 查询请求。 +数据从缓存写入数据表周期时间参数 `flush_interval_milliseconds` 位于 [query_thread_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) 服务器设置部分。如果需要强制从缓存写入数据表,请使用 [SYSTEM FLUSH LOGS](../../sql-reference/statements/system.md#query_language-system-flush_logs) 查询请求。 ClickHouse不会自动从表中删除数据。 欲了解更多详情,请参照 [介绍](../../operations/system-tables/index.md#system-tables-introduction)。 From e657219e01edf21aa2ba203c59982af38b57aa4e Mon Sep 17 00:00:00 2001 From: DF5HSE Date: Tue, 8 Feb 2022 12:40:44 +0300 Subject: [PATCH 164/215] Delete not used include --- programs/client/Client.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 48bb153d9ea..86bf4a007a8 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -19,7 +19,6 @@ #include #include #include -#include #include #include From 3834bdbae094f113058e06d5eced0b969317b44e Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 8 Feb 2022 10:59:20 +0100 Subject: [PATCH 165/215] Fixes --- src/IO/ReadWriteBufferFromHTTP.h | 57 ++++++++++++++++++-------------- src/Storages/StorageURL.cpp | 21 +++++++----- 2 files changed, 45 insertions(+), 33 deletions(-) diff --git a/src/IO/ReadWriteBufferFromHTTP.h b/src/IO/ReadWriteBufferFromHTTP.h index a9ab7a8348e..d7b75c405ba 100644 --- a/src/IO/ReadWriteBufferFromHTTP.h +++ b/src/IO/ReadWriteBufferFromHTTP.h @@ -240,6 +240,17 @@ namespace detail return read_range.end; } + enum class InitializeError + { + /// If error is not retriable, `exception` variable must be set. + NON_RETRIABLE_ERROR, + /// Allows to skip not found urls for globs + SKIP_NOT_FOUND_URL, + NONE, + }; + + InitializeError initialization_error = InitializeError::NONE; + public: using NextCallback = std::function; using OutStreamCallback = std::function; @@ -282,17 +293,14 @@ namespace detail settings.http_max_tries, settings.http_retry_initial_backoff_ms, settings.http_retry_max_backoff_ms); if (!delay_initialization) + { initialize(); + if (exception) + std::rethrow_exception(exception); + } } - enum class InitializeError - { - NON_RETRIABLE_ERROR, - SKIP_NOT_FOUND_URL, - NONE, - }; - - InitializeError call(Poco::Net::HTTPResponse & response, const String & method_) + void call(Poco::Net::HTTPResponse & response, const String & method_) { try { @@ -303,28 +311,27 @@ namespace detail if (response.getStatus() == Poco::Net::HTTPResponse::HTTPStatus::HTTP_NOT_FOUND && http_skip_not_found_url) { - return InitializeError::SKIP_NOT_FOUND_URL; + initialization_error = InitializeError::SKIP_NOT_FOUND_URL; } else { throw; } } - - return InitializeError::NONE; } /** - * Note: In case of error return false if error is not retriable, otherwise throw. + * Throws if error is not retriable, otherwise sets initialization_error = NON_RETRIABLE_ERROR and + * saves exception into `exception` variable. In case url is not found and skip_not_found_url == true, + * sets initialization_error = DKIP_NOT_FOUND_URL, otherwise throws. */ - InitializeError initialize() + void initialize() { Poco::Net::HTTPResponse response; - auto error = call(response, method); - if (error == InitializeError::SKIP_NOT_FOUND_URL) - return error; - assert(error == InitializeError::NONE); + call(response, method); + if (initialization_error != InitializeError::NONE) + return; while (isRedirect(response.getStatus())) { @@ -347,7 +354,7 @@ namespace detail Exception(ErrorCodes::HTTP_RANGE_NOT_SATISFIABLE, "Cannot read with range: [{}, {}]", read_range.begin, read_range.end ? *read_range.end : '-')); - return InitializeError::NON_RETRIABLE_ERROR; + initialization_error = InitializeError::NON_RETRIABLE_ERROR; } else if (read_range.end) { @@ -382,12 +389,14 @@ namespace detail sess->attachSessionData(e.message()); throw; } - - return InitializeError::NONE; } bool nextImpl() override { + if (initialization_error == InitializeError::SKIP_NOT_FOUND_URL) + return false; + assert(initialization_error == InitializeError::NONE); + if (next_callback) next_callback(count()); @@ -429,15 +438,13 @@ namespace detail { if (!impl) { - /// If error is not retriable -- false is returned and exception is set. - /// Otherwise the error is thrown and retries continue. - auto error = initialize(); - if (error == InitializeError::NON_RETRIABLE_ERROR) + initialize(); + if (initialization_error == InitializeError::NON_RETRIABLE_ERROR) { assert(exception); break; } - else if (error == InitializeError::SKIP_NOT_FOUND_URL) + else if (initialization_error == InitializeError::SKIP_NOT_FOUND_URL) { return false; } diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index ee401ad4153..574cc691e29 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -69,6 +69,7 @@ IStorageURLBase::IStorageURLBase( , partition_by(partition_by_) { StorageInMemoryMetadata storage_metadata; + if (columns_.empty()) { auto columns = getTableStructureFromData(format_name, uri, compression_method, headers, format_settings, context_); @@ -155,8 +156,8 @@ namespace { read_buf = getFirstAvailableURLReadBuffer( uri_options, context, params, http_method, - callback, timeouts, compression_method, headers, - /* skip_url_not_found_error */glob_url); + callback, timeouts, compression_method, credentials, headers, + /* glob_url */glob_url); auto input_format = FormatFactory::instance().getInput(format, *read_buf, sample_block, context, max_block_size, format_settings); QueryPipelineBuilder builder; @@ -214,15 +215,16 @@ namespace std::function callback, const ConnectionTimeouts & timeouts, const String & compression_method, + Poco::Net::HTTPBasicCredentials & credentials, const ReadWriteBufferFromHTTP::HTTPHeaderEntries & headers, - bool skip_url_not_found_error) + bool glob_url) { - Poco::Net::HTTPBasicCredentials credentials{}; String first_exception_message; + bool skip_url_not_found_error = urls.size() == 1 && glob_url; - for (auto option = urls.begin(); option < urls.end(); ++option) + for (const auto & option : urls) { - auto request_uri = Poco::URI(*option); + auto request_uri = Poco::URI(option); for (const auto & [param, value] : params) request_uri.addQueryParameter(param, value); @@ -285,6 +287,8 @@ namespace /// onCancell and generate can be called concurrently and both of them /// have R/W access to reader pointer. std::mutex reader_mutex; + + Poco::Net::HTTPBasicCredentials credentials; }; } @@ -403,6 +407,7 @@ ColumnsDescription IStorageURLBase::getTableStructureFromData( ContextPtr context) { ReadBufferCreator read_buffer_creator; + Poco::Net::HTTPBasicCredentials credentials; if (urlWithGlobs(uri)) { @@ -426,8 +431,8 @@ ColumnsDescription IStorageURLBase::getTableStructureFromData( {}, ConnectionTimeouts::getHTTPTimeouts(context), compression_method, - headers, - /* skip_url_not_found_error */true); + credentials, + headers, /* glob_url */true); }; } else From e118c89bb26869d973c3ebc93b5d9539dbf18b40 Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 8 Feb 2022 13:40:03 +0300 Subject: [PATCH 166/215] Fix segfault in schema inference from url --- src/Storages/StorageURL.cpp | 16 ++++++++++++++-- .../configs/named_collections.xml | 5 +++++ tests/integration/test_storage_s3/test.py | 13 +++++++++++++ 3 files changed, 32 insertions(+), 2 deletions(-) diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 32ab126faa9..dd2736613b3 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -82,16 +82,28 @@ ColumnsDescription IStorageURLBase::getTableStructureFromData( const std::optional & format_settings, ContextPtr context) { + auto parsed_uri = Poco::URI(uri); + Poco::Net::HTTPBasicCredentials credentials; + std::string user_info = parsed_uri.getUserInfo(); + if (!user_info.empty()) + { + std::size_t n = user_info.find(':'); + if (n != std::string::npos) + { + credentials.setUsername(user_info.substr(0, n)); + credentials.setPassword(user_info.substr(n + 1)); + } + } + auto read_buffer_creator = [&]() { - auto parsed_uri = Poco::URI(uri); return wrapReadBufferWithCompressionMethod( std::make_unique( parsed_uri, Poco::Net::HTTPRequest::HTTP_GET, nullptr, ConnectionTimeouts::getHTTPTimeouts(context), - Poco::Net::HTTPBasicCredentials{}, + credentials, context->getSettingsRef().max_http_get_redirects, DBMS_DEFAULT_BUFFER_SIZE, context->getReadSettings(), diff --git a/tests/integration/test_storage_s3/configs/named_collections.xml b/tests/integration/test_storage_s3/configs/named_collections.xml index f22440d17c9..fcc8bcac555 100644 --- a/tests/integration/test_storage_s3/configs/named_collections.xml +++ b/tests/integration/test_storage_s3/configs/named_collections.xml @@ -30,5 +30,10 @@ minio minio123 + + http://minio1:9001/root/test.parquet + minio + minio123 + diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index 2ed5ca51054..4366a1f034e 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -981,3 +981,16 @@ def test_format_detection(started_cluster): result = instance.query(f"select * from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test.arrow')") assert(int(result) == 1) + + instance.query(f"create table parquet_table_s3 (x UInt64) engine=S3(s3_parquet2)") + instance.query(f"insert into parquet_table_s3 select 1") + result = instance.query(f"select * from s3(s3_parquet2)") + assert(int(result) == 1) + + result = instance.query(f"select * from url('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test.parquet')") + assert(int(result) == 1) + + result = instance.query(f"select * from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test.parquet')") + assert(int(result) == 1) + + From 7e54dafdc15c84d49048bcbd715e3b381078eaca Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 8 Feb 2022 10:41:07 +0000 Subject: [PATCH 167/215] Fix wrong destruction order in CreatingSetsTransform. --- src/Processors/Transforms/CreatingSetsTransform.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/Transforms/CreatingSetsTransform.h b/src/Processors/Transforms/CreatingSetsTransform.h index 839ab0cac88..48a32ea8663 100644 --- a/src/Processors/Transforms/CreatingSetsTransform.h +++ b/src/Processors/Transforms/CreatingSetsTransform.h @@ -44,8 +44,8 @@ public: private: SubqueryForSet subquery; - std::unique_ptr executor; QueryPipeline table_out; + std::unique_ptr executor; UInt64 read_rows = 0; Stopwatch watch; From daf431a0a0a628dc4473209d47464771e3c3c110 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 3 Feb 2022 16:15:01 +0300 Subject: [PATCH 168/215] Fix yml tags_stable --- .github/workflows/tags_stable.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/tags_stable.yml b/.github/workflows/tags_stable.yml index 30b6bfb027e..c607c5cfa04 100644 --- a/.github/workflows/tags_stable.yml +++ b/.github/workflows/tags_stable.yml @@ -6,8 +6,8 @@ name: TagsStableWorkflow on: # yamllint disable-line rule:truthy push: tags: - - 'v*-stable' - - 'v*-lts' + - 'v*-stable' + - 'v*-lts' jobs: From dc5f035265a5130c07627dea0a36e5bd2463b5cc Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 8 Feb 2022 12:57:35 +0100 Subject: [PATCH 169/215] Fix --- src/IO/ReadWriteBufferFromHTTP.h | 13 ++--- src/Storages/StorageURL.cpp | 88 +++++++++++++++++++++++--------- 2 files changed, 71 insertions(+), 30 deletions(-) diff --git a/src/IO/ReadWriteBufferFromHTTP.h b/src/IO/ReadWriteBufferFromHTTP.h index d7b75c405ba..c8e360ef867 100644 --- a/src/IO/ReadWriteBufferFromHTTP.h +++ b/src/IO/ReadWriteBufferFromHTTP.h @@ -268,7 +268,7 @@ namespace detail const RemoteHostFilter & remote_host_filter_ = {}, bool delay_initialization = false, bool use_external_buffer_ = false, - bool glob_url = false) + bool http_skip_not_found_url_ = false) : SeekableReadBufferWithSize(nullptr, 0) , uri {uri_} , method {!method_.empty() ? method_ : out_stream_callback_ ? Poco::Net::HTTPRequest::HTTP_POST : Poco::Net::HTTPRequest::HTTP_GET} @@ -280,7 +280,7 @@ namespace detail , buffer_size {buffer_size_} , use_external_buffer {use_external_buffer_} , read_range(read_range_) - , http_skip_not_found_url(settings_.http_skip_not_found_url_for_globs && glob_url) + , http_skip_not_found_url(http_skip_not_found_url_) , settings {settings_} , log(&Poco::Logger::get("ReadWriteBufferFromHTTP")) { @@ -321,9 +321,9 @@ namespace detail } /** - * Throws if error is not retriable, otherwise sets initialization_error = NON_RETRIABLE_ERROR and + * Throws if error is retriable, otherwise sets initialization_error = NON_RETRIABLE_ERROR and * saves exception into `exception` variable. In case url is not found and skip_not_found_url == true, - * sets initialization_error = DKIP_NOT_FOUND_URL, otherwise throws. + * sets initialization_error = SKIP_NOT_FOUND_URL, otherwise throws. */ void initialize() { @@ -355,6 +355,7 @@ namespace detail "Cannot read with range: [{}, {}]", read_range.begin, read_range.end ? *read_range.end : '-')); initialization_error = InitializeError::NON_RETRIABLE_ERROR; + return; } else if (read_range.end) { @@ -619,11 +620,11 @@ public: const RemoteHostFilter & remote_host_filter_ = {}, bool delay_initialization_ = true, bool use_external_buffer_ = false, - bool glob_url_ = false) + bool skip_not_found_url_ = false) : Parent(std::make_shared(uri_, timeouts, max_redirects), uri_, credentials_, method_, out_stream_callback_, buffer_size_, settings_, http_header_entries_, read_range_, remote_host_filter_, - delay_initialization_, use_external_buffer_, glob_url_) + delay_initialization_, use_external_buffer_, skip_not_found_url_) { } }; diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 574cc691e29..562a429f190 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -130,6 +130,20 @@ namespace reader->cancel(); } + static void setCredentials(Poco::Net::HTTPBasicCredentials & credentials, const Poco::URI & request_uri) + { + const auto & user_info = request_uri.getUserInfo(); + if (!user_info.empty()) + { + std::size_t n = user_info.find(':'); + if (n != std::string::npos) + { + credentials.setUsername(user_info.substr(0, n)); + credentials.setPassword(user_info.substr(n + 1)); + } + } + } + StorageURLSource( URIInfoPtr uri_info_, const std::string & http_method, @@ -154,10 +168,44 @@ namespace /// Lazy initialization. We should not perform requests in constructor, because we need to do it in query pipeline. initialize = [=, this](const URIInfo::FailoverOptions & uri_options) { - read_buf = getFirstAvailableURLReadBuffer( - uri_options, context, params, http_method, - callback, timeouts, compression_method, credentials, headers, - /* glob_url */glob_url); + if (uri_options.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Got empty url list"); + + if (uri_options.size() > 1) + { + read_buf = getFirstAvailableURLReadBuffer( + uri_options, context, params, http_method, + callback, timeouts, compression_method, credentials, headers); + } + else + { + ReadSettings read_settings = context->getReadSettings(); + bool skip_url_not_found_error = glob_url && read_settings.http_skip_not_found_url_for_globs; + auto request_uri = Poco::URI(uri_options[0]); + + for (const auto & [param, value] : params) + request_uri.addQueryParameter(param, value); + + setCredentials(credentials, request_uri); + + read_buf = wrapReadBufferWithCompressionMethod( + std::make_unique( + request_uri, + http_method, + callback, + timeouts, + credentials, + context->getSettingsRef().max_http_get_redirects, + DBMS_DEFAULT_BUFFER_SIZE, + read_settings, + headers, + ReadWriteBufferFromHTTP::Range{}, + context->getRemoteHostFilter(), + /* delay_initiliazation */true, + /* use_external_buffer */false, + /* skip_url_not_found_error */skip_url_not_found_error), + chooseCompressionMethod(request_uri.getPath(), compression_method)); + } auto input_format = FormatFactory::instance().getInput(format, *read_buf, sample_block, context, max_block_size, format_settings); QueryPipelineBuilder builder; @@ -216,31 +264,23 @@ namespace const ConnectionTimeouts & timeouts, const String & compression_method, Poco::Net::HTTPBasicCredentials & credentials, - const ReadWriteBufferFromHTTP::HTTPHeaderEntries & headers, - bool glob_url) + const ReadWriteBufferFromHTTP::HTTPHeaderEntries & headers) { String first_exception_message; - bool skip_url_not_found_error = urls.size() == 1 && glob_url; + ReadSettings read_settings = context->getReadSettings(); - for (const auto & option : urls) + for (auto option = urls.begin(); option != urls.end(); ++option) { - auto request_uri = Poco::URI(option); + bool skip_url_not_found_error = read_settings.http_skip_not_found_url_for_globs && option == std::prev(urls.end()); + auto request_uri = Poco::URI(*option); + for (const auto & [param, value] : params) request_uri.addQueryParameter(param, value); + setCredentials(credentials, request_uri); + try { - std::string user_info = request_uri.getUserInfo(); - if (!user_info.empty()) - { - std::size_t n = user_info.find(':'); - if (n != std::string::npos) - { - credentials.setUsername(user_info.substr(0, n)); - credentials.setPassword(user_info.substr(n + 1)); - } - } - return wrapReadBufferWithCompressionMethod( std::make_unique( request_uri, @@ -250,13 +290,13 @@ namespace credentials, context->getSettingsRef().max_http_get_redirects, DBMS_DEFAULT_BUFFER_SIZE, - context->getReadSettings(), + read_settings, headers, ReadWriteBufferFromHTTP::Range{}, context->getRemoteHostFilter(), - /* delay_initiliazation */urls.size() == 1, + /* delay_initiliazation */false, /* use_external_buffer */false, - skip_url_not_found_error), + /* skip_url_not_found_error */skip_url_not_found_error), chooseCompressionMethod(request_uri.getPath(), compression_method)); } catch (...) @@ -432,7 +472,7 @@ ColumnsDescription IStorageURLBase::getTableStructureFromData( ConnectionTimeouts::getHTTPTimeouts(context), compression_method, credentials, - headers, /* glob_url */true); + headers); }; } else From 7e9770dcf033a983c24ea52b7de5e5a171bdea11 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 8 Feb 2022 15:57:23 +0300 Subject: [PATCH 170/215] minor enhancements --- src/QueryPipeline/RemoteInserter.cpp | 6 +++--- src/Storages/Distributed/DirectoryMonitor.cpp | 4 +++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/QueryPipeline/RemoteInserter.cpp b/src/QueryPipeline/RemoteInserter.cpp index 2275f8eba4d..13d087f0db9 100644 --- a/src/QueryPipeline/RemoteInserter.cpp +++ b/src/QueryPipeline/RemoteInserter.cpp @@ -24,7 +24,9 @@ RemoteInserter::RemoteInserter( const String & query_, const Settings & settings_, const ClientInfo & client_info_) - : connection(connection_), query(query_) + : connection(connection_) + , query(query_) + , server_revision(connection.getServerRevision(timeouts)) { ClientInfo modified_client_info = client_info_; modified_client_info.query_kind = ClientInfo::QueryKind::SECONDARY_QUERY; @@ -34,8 +36,6 @@ RemoteInserter::RemoteInserter( = CurrentThread::get().thread_trace_context; } - server_revision = connection.getServerRevision(timeouts); - /** Send query and receive "header", that describes table structure. * Header is needed to know, what structure is required for blocks to be passed to 'write' method. */ diff --git a/src/Storages/Distributed/DirectoryMonitor.cpp b/src/Storages/Distributed/DirectoryMonitor.cpp index 461ec48f910..d7422b1ddbc 100644 --- a/src/Storages/Distributed/DirectoryMonitor.cpp +++ b/src/Storages/Distributed/DirectoryMonitor.cpp @@ -191,7 +191,9 @@ namespace NativeReader header_block_in(header_buf, distributed_header.revision); distributed_header.block_header = header_block_in.read(); if (!distributed_header.block_header) - throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Cannot read header from the {} batch", in.getFileName()); + throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, + "Cannot read header from the {} batch. Data was written with protocol version {}, current version: {}", + in.getFileName(), distributed_header.revision, DBMS_TCP_PROTOCOL_VERSION); } /// Add handling new data here, for example: From 71c57bed8ea1c91df1bf111992f283b63e63a18b Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 8 Feb 2022 14:52:07 +0100 Subject: [PATCH 171/215] Fix --- .../test_storage_postgresql/test.py | 78 ++++++++++--------- 1 file changed, 43 insertions(+), 35 deletions(-) diff --git a/tests/integration/test_storage_postgresql/test.py b/tests/integration/test_storage_postgresql/test.py index b6ac121cd0c..87337a6b459 100644 --- a/tests/integration/test_storage_postgresql/test.py +++ b/tests/integration/test_storage_postgresql/test.py @@ -13,11 +13,19 @@ node2 = cluster.add_instance('node2', main_configs=['configs/named_collections.x def started_cluster(): try: cluster.start() + node1.query("CREATE DATABASE test") yield cluster finally: cluster.shutdown() +@pytest.fixture(autouse=True) +def setup_teardown(): + print("PostgreSQL is available - running test") + yield # run test + node1.query("DROP DATABASE test") + node1.query("CREATE DATABASE test") + def test_postgres_select_insert(started_cluster): cursor = started_cluster.postgres_conn.cursor() table_name = 'test_many' @@ -143,11 +151,11 @@ def test_non_default_scema(started_cluster): cursor.execute('INSERT INTO test_schema.test_table SELECT i FROM generate_series(0, 99) as t(i)') node1.query(''' - CREATE TABLE test_pg_table_schema (a UInt32) + CREATE TABLE test.test_pg_table_schema (a UInt32) ENGINE PostgreSQL('postgres1:5432', 'postgres', 'test_table', 'postgres', 'mysecretpassword', 'test_schema'); ''') - result = node1.query('SELECT * FROM test_pg_table_schema') + result = node1.query('SELECT * FROM test.test_pg_table_schema') expected = node1.query('SELECT number FROM numbers(100)') assert(result == expected) @@ -160,10 +168,10 @@ def test_non_default_scema(started_cluster): cursor.execute('INSERT INTO "test.nice.schema"."test.nice.table" SELECT i FROM generate_series(0, 99) as t(i)') node1.query(''' - CREATE TABLE test_pg_table_schema_with_dots (a UInt32) + CREATE TABLE test.test_pg_table_schema_with_dots (a UInt32) ENGINE PostgreSQL('postgres1:5432', 'postgres', 'test.nice.table', 'postgres', 'mysecretpassword', 'test.nice.schema'); ''') - result = node1.query('SELECT * FROM test_pg_table_schema_with_dots') + result = node1.query('SELECT * FROM test.test_pg_table_schema_with_dots') assert(result == expected) cursor.execute('INSERT INTO "test_schema"."test_table" SELECT i FROM generate_series(100, 199) as t(i)') @@ -173,8 +181,8 @@ def test_non_default_scema(started_cluster): cursor.execute('DROP SCHEMA test_schema CASCADE') cursor.execute('DROP SCHEMA "test.nice.schema" CASCADE') - node1.query('DROP TABLE test_pg_table_schema') - node1.query('DROP TABLE test_pg_table_schema_with_dots') + node1.query('DROP TABLE test.test_pg_table_schema') + node1.query('DROP TABLE test.test_pg_table_schema_with_dots') def test_concurrent_queries(started_cluster): @@ -302,19 +310,19 @@ def test_postgres_distributed(started_cluster): def test_datetime_with_timezone(started_cluster): cursor = started_cluster.postgres_conn.cursor() cursor.execute("DROP TABLE IF EXISTS test_timezone") - node1.query("DROP TABLE IF EXISTS test_timezone") + node1.query("DROP TABLE IF EXISTS test.test_timezone") cursor.execute("CREATE TABLE test_timezone (ts timestamp without time zone, ts_z timestamp with time zone)") cursor.execute("insert into test_timezone select '2014-04-04 20:00:00', '2014-04-04 20:00:00'::timestamptz at time zone 'America/New_York';") cursor.execute("select * from test_timezone") result = cursor.fetchall()[0] logging.debug(f'{result[0]}, {str(result[1])[:-6]}') - node1.query("create table test_timezone ( ts DateTime, ts_z DateTime('America/New_York')) ENGINE PostgreSQL('postgres1:5432', 'postgres', 'test_timezone', 'postgres', 'mysecretpassword');") - assert(node1.query("select ts from test_timezone").strip() == str(result[0])) + node1.query("create table test.test_timezone ( ts DateTime, ts_z DateTime('America/New_York')) ENGINE PostgreSQL('postgres1:5432', 'postgres', 'test_timezone', 'postgres', 'mysecretpassword');") + assert(node1.query("select ts from test.test_timezone").strip() == str(result[0])) # [:-6] because 2014-04-04 16:00:00+00:00 -> 2014-04-04 16:00:00 - assert(node1.query("select ts_z from test_timezone").strip() == str(result[1])[:-6]) - assert(node1.query("select * from test_timezone") == "2014-04-04 20:00:00\t2014-04-04 16:00:00\n") + assert(node1.query("select ts_z from test.test_timezone").strip() == str(result[1])[:-6]) + assert(node1.query("select * from test.test_timezone") == "2014-04-04 20:00:00\t2014-04-04 16:00:00\n") cursor.execute("DROP TABLE test_timezone") - node1.query("DROP TABLE test_timezone") + node1.query("DROP TABLE test.test_timezone") def test_postgres_ndim(started_cluster): @@ -342,20 +350,20 @@ def test_postgres_on_conflict(started_cluster): cursor.execute(f'CREATE TABLE {table} (a integer PRIMARY KEY, b text, c integer)') node1.query(''' - CREATE TABLE test_conflict (a UInt32, b String, c Int32) + CREATE TABLE test.test_conflict (a UInt32, b String, c Int32) ENGINE PostgreSQL('postgres1:5432', 'postgres', 'test_conflict', 'postgres', 'mysecretpassword', '', 'ON CONFLICT DO NOTHING'); ''') - node1.query(f''' INSERT INTO {table} SELECT number, concat('name_', toString(number)), 3 from numbers(100)''') - node1.query(f''' INSERT INTO {table} SELECT number, concat('name_', toString(number)), 4 from numbers(100)''') + node1.query(f''' INSERT INTO test.{table} SELECT number, concat('name_', toString(number)), 3 from numbers(100)''') + node1.query(f''' INSERT INTO test.{table} SELECT number, concat('name_', toString(number)), 4 from numbers(100)''') - check1 = f"SELECT count() FROM {table}" + check1 = f"SELECT count() FROM test.{table}" assert (node1.query(check1)).rstrip() == '100' table_func = f'''postgresql('{started_cluster.postgres_ip}:{started_cluster.postgres_port}', 'postgres', '{table}', 'postgres', 'mysecretpassword', '', 'ON CONFLICT DO NOTHING')''' node1.query(f'''INSERT INTO TABLE FUNCTION {table_func} SELECT number, concat('name_', toString(number)), 3 from numbers(100)''') node1.query(f'''INSERT INTO TABLE FUNCTION {table_func} SELECT number, concat('name_', toString(number)), 3 from numbers(100)''') - check1 = f"SELECT count() FROM {table}" + check1 = f"SELECT count() FROM test.{table}" assert (node1.query(check1)).rstrip() == '100' cursor.execute(f'DROP TABLE {table} ') @@ -367,48 +375,48 @@ def test_predefined_connection_configuration(started_cluster): cursor.execute(f'CREATE TABLE test_table (a integer PRIMARY KEY, b integer)') node1.query(''' - DROP TABLE IF EXISTS test_table; - CREATE TABLE test_table (a UInt32, b Int32) + DROP TABLE IF EXISTS test.test_table; + CREATE TABLE test.test_table (a UInt32, b Int32) ENGINE PostgreSQL(postgres1); ''') - node1.query(f''' INSERT INTO test_table SELECT number, number from numbers(100)''') - assert (node1.query(f"SELECT count() FROM test_table").rstrip() == '100') + node1.query(f''' INSERT INTO test.test_table SELECT number, number from numbers(100)''') + assert (node1.query(f"SELECT count() FROM test.test_table").rstrip() == '100') node1.query(''' - DROP TABLE test_table; - CREATE TABLE test_table (a UInt32, b Int32) + DROP TABLE test.test_table; + CREATE TABLE test.test_table (a UInt32, b Int32) ENGINE PostgreSQL(postgres1, on_conflict='ON CONFLICT DO NOTHING'); ''') - node1.query(f''' INSERT INTO test_table SELECT number, number from numbers(100)''') - node1.query(f''' INSERT INTO test_table SELECT number, number from numbers(100)''') - assert (node1.query(f"SELECT count() FROM test_table").rstrip() == '100') + node1.query(f''' INSERT INTO test.test_table SELECT number, number from numbers(100)''') + node1.query(f''' INSERT INTO test.test_table SELECT number, number from numbers(100)''') + assert (node1.query(f"SELECT count() FROM test.test_table").rstrip() == '100') - node1.query('DROP TABLE test_table;') + node1.query('DROP TABLE test.test_table;') node1.query_and_get_error(''' - CREATE TABLE test_table (a UInt32, b Int32) + CREATE TABLE test.test_table (a UInt32, b Int32) ENGINE PostgreSQL(postgres1, 'ON CONFLICT DO NOTHING'); ''') node1.query_and_get_error(''' - CREATE TABLE test_table (a UInt32, b Int32) + CREATE TABLE test.test_table (a UInt32, b Int32) ENGINE PostgreSQL(postgres2); ''') node1.query_and_get_error(''' - CREATE TABLE test_table (a UInt32, b Int32) + CREATE TABLE test.test_table (a UInt32, b Int32) ENGINE PostgreSQL(unknown_collection); ''') node1.query(''' - CREATE TABLE test_table (a UInt32, b Int32) + CREATE TABLE test.test_table (a UInt32, b Int32) ENGINE PostgreSQL(postgres1, port=5432, database='postgres', table='test_table'); ''') - assert (node1.query(f"SELECT count() FROM test_table").rstrip() == '100') + assert (node1.query(f"SELECT count() FROM test.test_table").rstrip() == '100') node1.query(''' - DROP TABLE test_table; - CREATE TABLE test_table (a UInt32, b Int32) + DROP TABLE test.test_table; + CREATE TABLE test.test_table (a UInt32, b Int32) ENGINE PostgreSQL(postgres3, port=5432); ''') - assert (node1.query(f"SELECT count() FROM test_table").rstrip() == '100') + assert (node1.query(f"SELECT count() FROM test.test_table").rstrip() == '100') assert (node1.query(f"SELECT count() FROM postgresql(postgres1)").rstrip() == '100') node1.query("INSERT INTO TABLE FUNCTION postgresql(postgres1, on_conflict='ON CONFLICT DO NOTHING') SELECT number, number from numbers(100)") From 428d6f1581d707b01fda410fa846ed3d66a7d727 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 8 Feb 2022 17:19:37 +0300 Subject: [PATCH 172/215] Update Settings.h --- src/Core/Settings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 0643400f473..48dd637a943 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -479,7 +479,7 @@ class IColumn; M(Bool, asterisk_include_alias_columns, false, "Include ALIAS columns for wildcard query", 0) \ M(Bool, optimize_skip_merged_partitions, false, "Skip partitions with one part with level > 0 in optimize final", 0) \ M(Bool, optimize_on_insert, true, "Do the same transformation for inserted block of data as if merge was done on this block.", 0) \ - M(Bool, allow_experimental_projection_optimization, true, "Enable projection optimization when processing SELECT queries", 0) \ + M(Bool, allow_experimental_projection_optimization, false, "Enable projection optimization when processing SELECT queries", 0) \ M(Bool, force_optimize_projection, false, "If projection optimization is enabled, SELECT queries need to use projection", 0) \ M(Bool, async_socket_for_remote, true, "Asynchronously read from socket executing remote query", 0) \ M(Bool, insert_null_as_default, true, "Insert DEFAULT values instead of NULL in INSERT SELECT (UNION ALL)", 0) \ From cae1517693added79a7fd956763fabb18e68357a Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Tue, 8 Feb 2022 17:31:02 +0300 Subject: [PATCH 173/215] Fix build --- src/Storages/StorageURL.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index f90fc164704..81554eef771 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -479,7 +479,7 @@ ColumnsDescription IStorageURLBase::getTableStructureFromData( else { auto parsed_uri = Poco::URI(uri); - setCredentials(credentials, parsed_uri); + StorageURLSource::setCredentials(credentials, parsed_uri); read_buffer_creator = [&]() { return wrapReadBufferWithCompressionMethod( From 2d02eab75045b6fc449188b375a0e62110896c1a Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Tue, 8 Feb 2022 15:34:01 +0100 Subject: [PATCH 174/215] Revert "Merge pull request #34373 from ClickHouse/docker-tz" This reverts commit efd8044ab7cd35feb4e0b1bccc63f46d42e34c82, reversing changes made to 4bb69bcb15374b696080c5fd2fe1090dc0bec2a2. --- docker/test/base/Dockerfile | 2 +- docker/test/fasttest/Dockerfile | 2 +- docker/test/fuzzer/Dockerfile | 2 +- docker/test/integration/base/Dockerfile | 2 +- docker/test/integration/runner/Dockerfile | 2 +- docker/test/performance-comparison/Dockerfile | 4 +-- docker/test/stateless/Dockerfile | 2 +- docker/test/testflows/runner/Dockerfile | 35 +++++++++---------- 8 files changed, 24 insertions(+), 27 deletions(-) diff --git a/docker/test/base/Dockerfile b/docker/test/base/Dockerfile index 7d7c3e28087..6beab2e5bb7 100644 --- a/docker/test/base/Dockerfile +++ b/docker/test/base/Dockerfile @@ -73,7 +73,7 @@ ENV TSAN_OPTIONS='halt_on_error=1 history_size=7 memory_limit_mb=46080' ENV UBSAN_OPTIONS='print_stacktrace=1' ENV MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1' -ENV TZ=UTC +ENV TZ=Europe/Moscow RUN ln -snf "/usr/share/zoneinfo/$TZ" /etc/localtime && echo "$TZ" > /etc/timezone CMD sleep 1 diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile index a625ab316f0..46b74d89e13 100644 --- a/docker/test/fasttest/Dockerfile +++ b/docker/test/fasttest/Dockerfile @@ -87,7 +87,7 @@ RUN mkdir -p /tmp/clickhouse-odbc-tmp \ && odbcinst -i -s -l -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbc.ini.sample \ && rm -rf /tmp/clickhouse-odbc-tmp -ENV TZ=UTC +ENV TZ=Europe/Moscow RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone ENV COMMIT_SHA='' diff --git a/docker/test/fuzzer/Dockerfile b/docker/test/fuzzer/Dockerfile index 659b53bfd7e..eb4b09c173f 100644 --- a/docker/test/fuzzer/Dockerfile +++ b/docker/test/fuzzer/Dockerfile @@ -8,7 +8,7 @@ ARG apt_archive="http://archive.ubuntu.com" RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list ENV LANG=C.UTF-8 -ENV TZ=UTC +ENV TZ=Europe/Moscow RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone RUN apt-get update \ diff --git a/docker/test/integration/base/Dockerfile b/docker/test/integration/base/Dockerfile index b6f2bdace01..91b26735fe5 100644 --- a/docker/test/integration/base/Dockerfile +++ b/docker/test/integration/base/Dockerfile @@ -60,5 +60,5 @@ clientPort=2181 \n\ maxClientCnxns=80' > /opt/zookeeper/conf/zoo.cfg RUN mkdir /zookeeper && chmod -R 777 /zookeeper -ENV TZ=UTC +ENV TZ=Europe/Moscow RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile index a7a9230748f..1aad2ae6770 100644 --- a/docker/test/integration/runner/Dockerfile +++ b/docker/test/integration/runner/Dockerfile @@ -40,7 +40,7 @@ RUN apt-get update \ /tmp/* \ && apt-get clean -ENV TZ=UTC +ENV TZ=Europe/Moscow RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone ENV DOCKER_CHANNEL stable diff --git a/docker/test/performance-comparison/Dockerfile b/docker/test/performance-comparison/Dockerfile index d50bfce1e8f..eddaf969f33 100644 --- a/docker/test/performance-comparison/Dockerfile +++ b/docker/test/performance-comparison/Dockerfile @@ -1,12 +1,12 @@ # docker build -t clickhouse/performance-comparison . -FROM ubuntu:20.04 +FROM ubuntu:18.04 # ARG for quick switch to a given ubuntu mirror ARG apt_archive="http://archive.ubuntu.com" RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list ENV LANG=C.UTF-8 -ENV TZ=UTC +ENV TZ=Europe/Moscow RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone RUN apt-get update \ diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index 24ca13e4acc..9b7fde7d542 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -42,7 +42,7 @@ RUN mkdir -p /tmp/clickhouse-odbc-tmp \ && odbcinst -i -s -l -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbc.ini.sample \ && rm -rf /tmp/clickhouse-odbc-tmp -ENV TZ=UTC +ENV TZ=Europe/Moscow RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone ENV NUM_TRIES=1 diff --git a/docker/test/testflows/runner/Dockerfile b/docker/test/testflows/runner/Dockerfile index 69b3affd0e7..d15f237587b 100644 --- a/docker/test/testflows/runner/Dockerfile +++ b/docker/test/testflows/runner/Dockerfile @@ -35,7 +35,7 @@ RUN apt-get update \ /tmp/* \ && apt-get clean -ENV TZ=UTC +ENV TZ=Europe/Moscow RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone RUN pip3 install urllib3 testflows==1.7.20 docker-compose==1.29.1 docker==5.0.0 dicttoxml kazoo tzlocal==2.1 pytz python-dateutil numpy @@ -43,27 +43,24 @@ RUN pip3 install urllib3 testflows==1.7.20 docker-compose==1.29.1 docker==5.0.0 ENV DOCKER_CHANNEL stable ENV DOCKER_VERSION 20.10.6 -# Architecture of the image when BuildKit/buildx is used -ARG TARGETARCH - -# Install MySQL ODBC driver from RHEL rpm -RUN arch=${TARGETARCH:-amd64} \ - && case $arch in \ - amd64) rarch=x86_64 ;; \ - arm64) rarch=aarch64 ;; \ - esac \ - && set -eux \ - && if ! wget -nv -O docker.tgz "https://download.docker.com/linux/static/${DOCKER_CHANNEL}/${rarch}/docker-${DOCKER_VERSION}.tgz"; then \ - echo >&2 "error: failed to download 'docker-${DOCKER_VERSION}' from '${DOCKER_CHANNEL}' for '${rarch}'" \ - && exit 1; \ - fi \ - && tar --extract \ +RUN set -eux; \ + \ +# this "case" statement is generated via "update.sh" + \ + if ! wget -nv -O docker.tgz "https://download.docker.com/linux/static/${DOCKER_CHANNEL}/x86_64/docker-${DOCKER_VERSION}.tgz"; then \ + echo >&2 "error: failed to download 'docker-${DOCKER_VERSION}' from '${DOCKER_CHANNEL}' for '${x86_64}'"; \ + exit 1; \ + fi; \ + \ + tar --extract \ --file docker.tgz \ --strip-components 1 \ --directory /usr/local/bin/ \ - && rm docker.tgz \ - && dockerd --version \ - && docker --version + ; \ + rm docker.tgz; \ + \ + dockerd --version; \ + docker --version COPY modprobe.sh /usr/local/bin/modprobe COPY dockerd-entrypoint.sh /usr/local/bin/ From 08c009ab50ee307696982bbbc4ed974d925ab750 Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 8 Feb 2022 18:23:43 +0300 Subject: [PATCH 175/215] Fix possible error 'Cannot convert column Function to mask' --- src/Interpreters/ExpressionActions.cpp | 4 ++-- .../02209_short_circuit_node_without_parents.reference | 1 + .../0_stateless/02209_short_circuit_node_without_parents.sql | 2 ++ 3 files changed, 5 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/02209_short_circuit_node_without_parents.reference create mode 100644 tests/queries/0_stateless/02209_short_circuit_node_without_parents.sql diff --git a/src/Interpreters/ExpressionActions.cpp b/src/Interpreters/ExpressionActions.cpp index f2aa50f3c23..30c832e4917 100644 --- a/src/Interpreters/ExpressionActions.cpp +++ b/src/Interpreters/ExpressionActions.cpp @@ -159,8 +159,8 @@ static void setLazyExecutionInfo( const ActionsDAGReverseInfo::NodeInfo & node_info = reverse_info.nodes_info[reverse_info.reverse_index.at(node)]; - /// If node is used in result, we can't enable lazy execution. - if (node_info.used_in_result) + /// If node is used in result or it doesn't have parents, we can't enable lazy execution. + if (node_info.used_in_result || node_info.parents.empty()) lazy_execution_info.can_be_lazy_executed = false; /// To fill lazy execution info for current node we need to create it for all it's parents. diff --git a/tests/queries/0_stateless/02209_short_circuit_node_without_parents.reference b/tests/queries/0_stateless/02209_short_circuit_node_without_parents.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/02209_short_circuit_node_without_parents.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02209_short_circuit_node_without_parents.sql b/tests/queries/0_stateless/02209_short_circuit_node_without_parents.sql new file mode 100644 index 00000000000..c20ca83591f --- /dev/null +++ b/tests/queries/0_stateless/02209_short_circuit_node_without_parents.sql @@ -0,0 +1,2 @@ +SELECT 1 FROM (SELECT arrayJoin(if(empty(range(number)), [1], [2])) from numbers(1)); + From 9f2628b8f2af39b6bbfc61f5490e1911c8cba234 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 8 Feb 2022 16:41:55 +0100 Subject: [PATCH 176/215] Fix --- .../PostgreSQL/StorageMaterializedPostgreSQL.cpp | 10 ++++++++-- src/Storages/ReadFinalForExternalReplicaStorage.cpp | 1 + 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp index fe81b322bdb..c72dec824f0 100644 --- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp +++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp @@ -277,10 +277,16 @@ Pipe StorageMaterializedPostgreSQL::read( size_t max_block_size, unsigned num_streams) { - auto materialized_table_lock = lockForShare(String(), context_->getSettingsRef().lock_acquire_timeout); auto nested_table = getNested(); - return readFinalFromNestedStorage(nested_table, column_names, metadata_snapshot, + + auto pipe = readFinalFromNestedStorage(nested_table, column_names, metadata_snapshot, query_info, context_, processed_stage, max_block_size, num_streams); + + auto lock = lockForShare(context_->getCurrentQueryId(), context_->getSettingsRef().lock_acquire_timeout); + pipe.addTableLock(lock); + pipe.addStorageHolder(shared_from_this()); + + return pipe; } diff --git a/src/Storages/ReadFinalForExternalReplicaStorage.cpp b/src/Storages/ReadFinalForExternalReplicaStorage.cpp index 36a40beca36..58b98aaa4c6 100644 --- a/src/Storages/ReadFinalForExternalReplicaStorage.cpp +++ b/src/Storages/ReadFinalForExternalReplicaStorage.cpp @@ -57,6 +57,7 @@ Pipe readFinalFromNestedStorage( Pipe pipe = nested_storage->read(require_columns_name, nested_metadata, query_info, context, processed_stage, max_block_size, num_streams); pipe.addTableLock(lock); + pipe.addStorageHolder(nested_storage); if (!expressions->children.empty() && !pipe.empty()) { From d2d47b9595d68d2b4b9b13880612de48496d9768 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 8 Feb 2022 16:27:33 +0000 Subject: [PATCH 177/215] Fixing build. --- src/Processors/Formats/Impl/TSKVRowInputFormat.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp index 8a56c2ed5c7..f63d6fa9c46 100644 --- a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp @@ -20,7 +20,7 @@ namespace ErrorCodes TSKVRowInputFormat::TSKVRowInputFormat(ReadBuffer & in_, Block header_, Params params_, const FormatSettings & format_settings_) - : IRowInputFormat(std::move(header_), in_, std::move(params_)), format_settings(format_settings_), name_map(header_.columns()) + : IRowInputFormat(std::move(header_), in_, std::move(params_)), format_settings(format_settings_), name_map(getPort().getHeader().columns()) { const auto & sample_block = getPort().getHeader(); size_t num_columns = sample_block.columns(); From f6e7cea1f8039acc0b97b3b3d21fd1610f39ed00 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 8 Feb 2022 19:38:04 +0300 Subject: [PATCH 178/215] Add settings for S3 multipart upload parts size --- src/Core/Settings.h | 4 ++- src/Disks/S3/DiskS3.cpp | 10 +++++++ src/Disks/S3/DiskS3.h | 4 +++ src/Disks/S3/registerDiskS3.cpp | 2 ++ src/IO/WriteBufferFromS3.cpp | 14 ++++++++-- src/IO/WriteBufferFromS3.h | 11 ++++++-- src/Storages/StorageS3.cpp | 28 ++++++++++++++++++- src/Storages/StorageS3.h | 4 +++ src/TableFunctions/TableFunctionS3.cpp | 4 +++ src/TableFunctions/TableFunctionS3Cluster.cpp | 4 +++ 10 files changed, 78 insertions(+), 7 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index c4b4ab77867..a04bc35f492 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -70,7 +70,9 @@ class IColumn; M(UInt64, idle_connection_timeout, 3600, "Close idle TCP connections after specified number of seconds.", 0) \ M(UInt64, distributed_connections_pool_size, 1024, "Maximum number of connections with one remote server in the pool.", 0) \ M(UInt64, connections_with_failover_max_tries, DBMS_CONNECTION_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES, "The maximum number of attempts to connect to replicas.", 0) \ - M(UInt64, s3_min_upload_part_size, 32*1024*1024, "The minimum size of part to upload during multipart upload to S3.", 0) \ + M(UInt64, s3_min_upload_part_size, 16*1024*1024, "The minimum size of part to upload during multipart upload to S3.", 0) \ + M(UInt64, s3_upload_part_size_multiply_factor, 2, "Multiply s3_min_upload_part_size by this factor each time s3_multiply_parts_count_threshold parts were uploaded from a single write to S3.", 0) \ + M(UInt64, s3_upload_part_size_multiply_parts_count_threshold, 1000, "Each time this number of parts was uploaded to S3 s3_min_upload_part_size multiplied by s3_upload_part_size_multiply_factor.", 0) \ M(UInt64, s3_max_single_part_upload_size, 32*1024*1024, "The maximum size of object to upload using singlepart upload to S3.", 0) \ M(UInt64, s3_max_single_read_retries, 4, "The maximum number of retries during single S3 read.", 0) \ M(UInt64, s3_max_redirects, 10, "Max number of S3 redirects hops allowed.", 0) \ diff --git a/src/Disks/S3/DiskS3.cpp b/src/Disks/S3/DiskS3.cpp index 2638365c7ad..5cfb4532b65 100644 --- a/src/Disks/S3/DiskS3.cpp +++ b/src/Disks/S3/DiskS3.cpp @@ -283,6 +283,8 @@ std::unique_ptr DiskS3::writeFile(const String & path, bucket, metadata.remote_fs_root_path + s3_path, settings->s3_min_upload_part_size, + settings->s3_upload_part_size_multiply_factor, + settings->s3_upload_part_size_multiply_parts_count_threshold, settings->s3_max_single_part_upload_size, std::move(object_metadata), buf_size, @@ -338,6 +340,8 @@ void DiskS3::createFileOperationObject(const String & operation_name, UInt64 rev bucket, remote_fs_root_path + key, settings->s3_min_upload_part_size, + settings->s3_upload_part_size_multiply_factor, + settings->s3_upload_part_size_multiply_parts_count_threshold, settings->s3_max_single_part_upload_size, metadata); @@ -417,6 +421,8 @@ void DiskS3::saveSchemaVersion(const int & version) bucket, remote_fs_root_path + SCHEMA_VERSION_OBJECT, settings->s3_min_upload_part_size, + settings->s3_upload_part_size_multiply_factor, + settings->s3_upload_part_size_multiply_parts_count_threshold, settings->s3_max_single_part_upload_size); writeIntText(version, buffer); @@ -1076,6 +1082,8 @@ DiskS3Settings::DiskS3Settings( const std::shared_ptr & client_, size_t s3_max_single_read_retries_, size_t s3_min_upload_part_size_, + size_t s3_upload_part_size_multiply_factor_, + size_t s3_upload_part_size_multiply_parts_count_threshold_, size_t s3_max_single_part_upload_size_, size_t min_bytes_for_seek_, bool send_metadata_, @@ -1085,6 +1093,8 @@ DiskS3Settings::DiskS3Settings( : client(client_) , s3_max_single_read_retries(s3_max_single_read_retries_) , s3_min_upload_part_size(s3_min_upload_part_size_) + , s3_upload_part_size_multiply_factor(s3_upload_part_size_multiply_factor_) + , s3_upload_part_size_multiply_parts_count_threshold(s3_upload_part_size_multiply_parts_count_threshold_) , s3_max_single_part_upload_size(s3_max_single_part_upload_size_) , min_bytes_for_seek(min_bytes_for_seek_) , send_metadata(send_metadata_) diff --git a/src/Disks/S3/DiskS3.h b/src/Disks/S3/DiskS3.h index c5d0722c6c2..698fa6173c2 100644 --- a/src/Disks/S3/DiskS3.h +++ b/src/Disks/S3/DiskS3.h @@ -29,6 +29,8 @@ struct DiskS3Settings const std::shared_ptr & client_, size_t s3_max_single_read_retries_, size_t s3_min_upload_part_size_, + size_t s3_upload_part_size_multiply_factor_, + size_t s3_upload_part_size_multiply_parts_count_threshold_, size_t s3_max_single_part_upload_size_, size_t min_bytes_for_seek_, bool send_metadata_, @@ -39,6 +41,8 @@ struct DiskS3Settings std::shared_ptr client; size_t s3_max_single_read_retries; size_t s3_min_upload_part_size; + size_t s3_upload_part_size_multiply_factor; + size_t s3_upload_part_size_multiply_parts_count_threshold; size_t s3_max_single_part_upload_size; size_t min_bytes_for_seek; bool send_metadata; diff --git a/src/Disks/S3/registerDiskS3.cpp b/src/Disks/S3/registerDiskS3.cpp index f6824a1b3af..e16626a009a 100644 --- a/src/Disks/S3/registerDiskS3.cpp +++ b/src/Disks/S3/registerDiskS3.cpp @@ -155,6 +155,8 @@ std::unique_ptr getSettings(const Poco::Util::AbstractConfigurat getClient(config, config_prefix, context), config.getUInt64(config_prefix + ".s3_max_single_read_retries", context->getSettingsRef().s3_max_single_read_retries), config.getUInt64(config_prefix + ".s3_min_upload_part_size", context->getSettingsRef().s3_min_upload_part_size), + config.getUInt64(config_prefix + ".s3_upload_part_size_multiply_factor", context->getSettingsRef().s3_upload_part_size_multiply_factor), + config.getUInt64(config_prefix + ".s3_upload_part_size_multiply_parts_count_threshold", context->getSettingsRef().s3_upload_part_size_multiply_parts_count_threshold), config.getUInt64(config_prefix + ".s3_max_single_part_upload_size", context->getSettingsRef().s3_max_single_part_upload_size), config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024), config.getBool(config_prefix + ".send_metadata", false), diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp index 1b93667cb92..17e449b2fb8 100644 --- a/src/IO/WriteBufferFromS3.cpp +++ b/src/IO/WriteBufferFromS3.cpp @@ -54,6 +54,8 @@ WriteBufferFromS3::WriteBufferFromS3( const String & bucket_, const String & key_, size_t minimum_upload_part_size_, + size_t upload_part_size_multiply_factor_, + size_t upload_part_size_multiply_threshold_, size_t max_single_part_upload_size_, std::optional> object_metadata_, size_t buffer_size_, @@ -63,7 +65,9 @@ WriteBufferFromS3::WriteBufferFromS3( , key(key_) , object_metadata(std::move(object_metadata_)) , client_ptr(std::move(client_ptr_)) - , minimum_upload_part_size(minimum_upload_part_size_) + , upload_part_size(minimum_upload_part_size_) + , upload_part_size_multiply_factor(upload_part_size_multiply_factor_) + , upload_part_size_multiply_threshold(upload_part_size_multiply_threshold_) , max_single_part_upload_size(max_single_part_upload_size_) , schedule(std::move(schedule_)) { @@ -85,9 +89,10 @@ void WriteBufferFromS3::nextImpl() if (multipart_upload_id.empty() && last_part_size > max_single_part_upload_size) createMultipartUpload(); - if (!multipart_upload_id.empty() && last_part_size > minimum_upload_part_size) + if (!multipart_upload_id.empty() && last_part_size > upload_part_size) { writePart(); + allocateBuffer(); } @@ -96,6 +101,9 @@ void WriteBufferFromS3::nextImpl() void WriteBufferFromS3::allocateBuffer() { + if (total_parts_uploaded != 0 && total_parts_uploaded % upload_part_size_multiply_threshold == 0) + upload_part_size *= upload_part_size_multiply_factor; + temporary_buffer = Aws::MakeShared("temporary buffer"); temporary_buffer->exceptions(std::ios::badbit); last_part_size = 0; @@ -239,6 +247,8 @@ void WriteBufferFromS3::processUploadRequest(UploadPartTask & task) } else throw Exception(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR); + + total_parts_uploaded++; } void WriteBufferFromS3::completeMultipartUpload() diff --git a/src/IO/WriteBufferFromS3.h b/src/IO/WriteBufferFromS3.h index 1eb8a771944..8b89626ee18 100644 --- a/src/IO/WriteBufferFromS3.h +++ b/src/IO/WriteBufferFromS3.h @@ -47,6 +47,8 @@ public: const String & bucket_, const String & key_, size_t minimum_upload_part_size_, + size_t upload_part_size_multiply_factor_, + size_t upload_part_size_multiply_threshold_, size_t max_single_part_upload_size_, std::optional> object_metadata_ = std::nullopt, size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE, @@ -85,11 +87,14 @@ private: String key; std::optional> object_metadata; std::shared_ptr client_ptr; - size_t minimum_upload_part_size; - size_t max_single_part_upload_size; + size_t upload_part_size; + const size_t upload_part_size_multiply_factor; + const size_t upload_part_size_multiply_threshold; + const size_t max_single_part_upload_size; /// Buffer to accumulate data. std::shared_ptr temporary_buffer; - size_t last_part_size; + size_t last_part_size = 0; + std::atomic total_parts_uploaded = 0; /// Upload in S3 is made in parts. /// We initiate upload, then upload each part and get ETag as a response, and then finalizeImpl() upload with listing all our parts. diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 77d4952291c..2b9ba7585e1 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -385,13 +385,18 @@ public: const String & bucket, const String & key, size_t min_upload_part_size, + size_t upload_part_size_multiply_factor, + size_t upload_part_size_multiply_parts_count_threshold, size_t max_single_part_upload_size) : SinkToStorage(sample_block_) , sample_block(sample_block_) , format_settings(format_settings_) { write_buf = wrapWriteBufferWithCompressionMethod( - std::make_unique(client, bucket, key, min_upload_part_size, max_single_part_upload_size), compression_method, 3); + std::make_unique( + client, bucket, key, min_upload_part_size, + upload_part_size_multiply_factor, upload_part_size_multiply_parts_count_threshold, + max_single_part_upload_size), compression_method, 3); writer = FormatFactory::instance().getOutputFormatParallelIfPossible(format, *write_buf, sample_block, context, {}, format_settings); } @@ -440,6 +445,8 @@ public: const String & bucket_, const String & key_, size_t min_upload_part_size_, + size_t upload_part_size_multiply_factor_, + size_t upload_part_size_multiply_parts_count_threshold_, size_t max_single_part_upload_size_) : PartitionedSink(partition_by, context_, sample_block_) , format(format_) @@ -450,6 +457,8 @@ public: , bucket(bucket_) , key(key_) , min_upload_part_size(min_upload_part_size_) + , upload_part_size_multiply_factor(upload_part_size_multiply_factor_) + , upload_part_size_multiply_parts_count_threshold(upload_part_size_multiply_parts_count_threshold_) , max_single_part_upload_size(max_single_part_upload_size_) , format_settings(format_settings_) { @@ -473,6 +482,8 @@ public: partition_bucket, partition_key, min_upload_part_size, + upload_part_size_multiply_factor, + upload_part_size_multiply_parts_count_threshold, max_single_part_upload_size ); } @@ -487,6 +498,8 @@ private: const String bucket; const String key; size_t min_upload_part_size; + size_t upload_part_size_multiply_factor; + size_t upload_part_size_multiply_parts_count_threshold; size_t max_single_part_upload_size; std::optional format_settings; @@ -527,6 +540,8 @@ StorageS3::StorageS3( const String & format_name_, UInt64 max_single_read_retries_, UInt64 min_upload_part_size_, + UInt64 upload_part_size_multiply_factor_, + UInt64 upload_part_size_multiply_parts_count_threshold_, UInt64 max_single_part_upload_size_, UInt64 max_connections_, const ColumnsDescription & columns_, @@ -543,6 +558,8 @@ StorageS3::StorageS3( , format_name(format_name_) , max_single_read_retries(max_single_read_retries_) , min_upload_part_size(min_upload_part_size_) + , upload_part_size_multiply_factor(upload_part_size_multiply_factor_) + , upload_part_size_multiply_parts_count_threshold(upload_part_size_multiply_parts_count_threshold_) , max_single_part_upload_size(max_single_part_upload_size_) , compression_method(compression_method_) , name(uri_.storage_name) @@ -669,6 +686,8 @@ SinkToStoragePtr StorageS3::write(const ASTPtr & query, const StorageMetadataPtr client_auth.uri.bucket, keys.back(), min_upload_part_size, + upload_part_size_multiply_factor, + upload_part_size_multiply_parts_count_threshold, max_single_part_upload_size); } else @@ -712,6 +731,8 @@ SinkToStoragePtr StorageS3::write(const ASTPtr & query, const StorageMetadataPtr client_auth.uri.bucket, keys.back(), min_upload_part_size, + upload_part_size_multiply_factor, + upload_part_size_multiply_parts_count_threshold, max_single_part_upload_size); } } @@ -923,7 +944,10 @@ void registerStorageS3Impl(const String & name, StorageFactory & factory) S3::URI s3_uri(Poco::URI(configuration.url)); auto max_single_read_retries = args.getLocalContext()->getSettingsRef().s3_max_single_read_retries; auto min_upload_part_size = args.getLocalContext()->getSettingsRef().s3_min_upload_part_size; + auto upload_part_size_multiply_factor = args.getLocalContext()->getSettingsRef().s3_upload_part_size_multiply_factor; + auto upload_part_size_multiply_parts_count_threshold = args.getLocalContext()->getSettingsRef().s3_upload_part_size_multiply_parts_count_threshold; auto max_single_part_upload_size = args.getLocalContext()->getSettingsRef().s3_max_single_part_upload_size; + auto max_connections = args.getLocalContext()->getSettingsRef().s3_max_connections; ASTPtr partition_by; @@ -938,6 +962,8 @@ void registerStorageS3Impl(const String & name, StorageFactory & factory) configuration.format, max_single_read_retries, min_upload_part_size, + upload_part_size_multiply_factor, + upload_part_size_multiply_parts_count_threshold, max_single_part_upload_size, max_connections, args.columns, diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index 427b3af285b..03b54706b4a 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -126,6 +126,8 @@ public: const String & format_name_, UInt64 max_single_read_retries_, UInt64 min_upload_part_size_, + UInt64 upload_part_size_multiply_factor_, + UInt64 upload_part_size_multiply_parts_count_threshold_, UInt64 max_single_part_upload_size_, UInt64 max_connections_, const ColumnsDescription & columns_, @@ -193,6 +195,8 @@ private: String format_name; UInt64 max_single_read_retries; size_t min_upload_part_size; + size_t upload_part_size_multiply_factor; + size_t upload_part_size_multiply_parts_count_threshold; size_t max_single_part_upload_size; String compression_method; String name; diff --git a/src/TableFunctions/TableFunctionS3.cpp b/src/TableFunctions/TableFunctionS3.cpp index e1e31b5efc3..f91ce36c3c4 100644 --- a/src/TableFunctions/TableFunctionS3.cpp +++ b/src/TableFunctions/TableFunctionS3.cpp @@ -148,6 +148,8 @@ StoragePtr TableFunctionS3::executeImpl(const ASTPtr & /*ast_function*/, Context S3::URI s3_uri (uri); UInt64 max_single_read_retries = context->getSettingsRef().s3_max_single_read_retries; UInt64 min_upload_part_size = context->getSettingsRef().s3_min_upload_part_size; + UInt64 upload_part_size_multiply_factor = context->getSettingsRef().s3_upload_part_size_multiply_factor; + UInt64 upload_part_size_multiply_parts_count_threshold = context->getSettingsRef().s3_upload_part_size_multiply_parts_count_threshold; UInt64 max_single_part_upload_size = context->getSettingsRef().s3_max_single_part_upload_size; UInt64 max_connections = context->getSettingsRef().s3_max_connections; @@ -163,6 +165,8 @@ StoragePtr TableFunctionS3::executeImpl(const ASTPtr & /*ast_function*/, Context s3_configuration->format, max_single_read_retries, min_upload_part_size, + upload_part_size_multiply_factor, + upload_part_size_multiply_parts_count_threshold, max_single_part_upload_size, max_connections, getActualTableStructure(context), diff --git a/src/TableFunctions/TableFunctionS3Cluster.cpp b/src/TableFunctions/TableFunctionS3Cluster.cpp index aa3ae20b61d..bc215b578b9 100644 --- a/src/TableFunctions/TableFunctionS3Cluster.cpp +++ b/src/TableFunctions/TableFunctionS3Cluster.cpp @@ -109,6 +109,8 @@ StoragePtr TableFunctionS3Cluster::executeImpl( /// Actually this parameters are not used UInt64 max_single_read_retries = context->getSettingsRef().s3_max_single_read_retries; UInt64 min_upload_part_size = context->getSettingsRef().s3_min_upload_part_size; + UInt64 upload_part_size_multiply_factor = context->getSettingsRef().s3_upload_part_size_multiply_factor; + UInt64 upload_part_size_multiply_parts_count_threshold = context->getSettingsRef().s3_upload_part_size_multiply_parts_count_threshold; UInt64 max_single_part_upload_size = context->getSettingsRef().s3_max_single_part_upload_size; UInt64 max_connections = context->getSettingsRef().s3_max_connections; storage = StorageS3::create( @@ -119,6 +121,8 @@ StoragePtr TableFunctionS3Cluster::executeImpl( format, max_single_read_retries, min_upload_part_size, + upload_part_size_multiply_factor, + upload_part_size_multiply_parts_count_threshold, max_single_part_upload_size, max_connections, getActualTableStructure(context), From ddd541f32489878d3783dbc5b686ca4cd48def0a Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 8 Feb 2022 19:39:51 +0300 Subject: [PATCH 179/215] Redundat change --- .github/workflows/tags_stable.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/tags_stable.yml b/.github/workflows/tags_stable.yml index c607c5cfa04..30b6bfb027e 100644 --- a/.github/workflows/tags_stable.yml +++ b/.github/workflows/tags_stable.yml @@ -6,8 +6,8 @@ name: TagsStableWorkflow on: # yamllint disable-line rule:truthy push: tags: - - 'v*-stable' - - 'v*-lts' + - 'v*-stable' + - 'v*-lts' jobs: From e8a8f46864553243ab2c06f90bf9edb1757ed69f Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 8 Feb 2022 21:58:03 +0100 Subject: [PATCH 180/215] Fix style check --- src/IO/ReadWriteBufferFromHTTP.h | 1 - src/Storages/StorageURL.cpp | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/src/IO/ReadWriteBufferFromHTTP.h b/src/IO/ReadWriteBufferFromHTTP.h index f0604176ce1..4522f863db6 100644 --- a/src/IO/ReadWriteBufferFromHTTP.h +++ b/src/IO/ReadWriteBufferFromHTTP.h @@ -21,7 +21,6 @@ #include #include #include -#include namespace ProfileEvents diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 81554eef771..fc58f8226aa 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -36,6 +36,7 @@ namespace ErrorCodes extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int NETWORK_ERROR; extern const int BAD_ARGUMENTS; + extern const int LOGICAL_ERROR; } From b8e350054c17f861166be0bbdf9841ee891c621e Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Tue, 8 Feb 2022 21:21:32 +0000 Subject: [PATCH 181/215] clang-tidy move fix build --- src/Processors/ForkProcessor.cpp | 4 ++-- src/Processors/Formats/Impl/TSKVRowInputFormat.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Processors/ForkProcessor.cpp b/src/Processors/ForkProcessor.cpp index 9b17f8ad5ca..f4e5a5be5f2 100644 --- a/src/Processors/ForkProcessor.cpp +++ b/src/Processors/ForkProcessor.cpp @@ -63,9 +63,9 @@ ForkProcessor::Status ForkProcessor::prepare() { ++num_processed_outputs; if (num_processed_outputs == num_active_outputs) - output.push(std::move(data)); // NOLINT Can push because no full or unneeded outputs. + output.push(std::move(data)); /// NOLINT Can push because no full or unneeded outputs. else - output.push(data.clone()); + output.push(data.clone()); /// NOLINT } } diff --git a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp index 8a56c2ed5c7..1cd18087f56 100644 --- a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp @@ -20,7 +20,7 @@ namespace ErrorCodes TSKVRowInputFormat::TSKVRowInputFormat(ReadBuffer & in_, Block header_, Params params_, const FormatSettings & format_settings_) - : IRowInputFormat(std::move(header_), in_, std::move(params_)), format_settings(format_settings_), name_map(header_.columns()) + : IRowInputFormat(header_, in_, std::move(params_)), format_settings(format_settings_), name_map(header_.columns()) { const auto & sample_block = getPort().getHeader(); size_t num_columns = sample_block.columns(); From 00330461d1eefc541cff813fa5b8981cd4585b27 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Wed, 9 Feb 2022 03:56:57 +0300 Subject: [PATCH 182/215] Update int-uint.md --- docs/en/sql-reference/data-types/int-uint.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/data-types/int-uint.md b/docs/en/sql-reference/data-types/int-uint.md index 588b5a2d7d6..4cc590d9fa5 100644 --- a/docs/en/sql-reference/data-types/int-uint.md +++ b/docs/en/sql-reference/data-types/int-uint.md @@ -1,9 +1,9 @@ --- toc_priority: 40 -toc_title: UInt8, UInt16, UInt32, UInt64, UInt256, Int8, Int16, Int32, Int64, Int128, Int256 +toc_title: UInt8, UInt16, UInt32, UInt64, UInt128, UInt256, Int8, Int16, Int32, Int64, Int128, Int256 --- -# UInt8, UInt16, UInt32, UInt64, UInt256, Int8, Int16, Int32, Int64, Int128, Int256 {#uint8-uint16-uint32-uint64-uint256-int8-int16-int32-int64-int128-int256} +# UInt8, UInt16, UInt32, UInt64, UInt128, UInt256, Int8, Int16, Int32, Int64, Int128, Int256 Fixed-length integers, with or without a sign. From f178726f0702d096df9c37aa8041f161445cb8bd Mon Sep 17 00:00:00 2001 From: cnmade Date: Wed, 9 Feb 2022 10:35:34 +0800 Subject: [PATCH 183/215] sync to zh: sql-reference/data-types/int-uint: sync translate to zh --- docs/zh/sql-reference/data-types/int-uint.md | 42 +++++++++++++++----- 1 file changed, 33 insertions(+), 9 deletions(-) diff --git a/docs/zh/sql-reference/data-types/int-uint.md b/docs/zh/sql-reference/data-types/int-uint.md index 3fb482639e7..e7fa27dcf70 100644 --- a/docs/zh/sql-reference/data-types/int-uint.md +++ b/docs/zh/sql-reference/data-types/int-uint.md @@ -1,17 +1,41 @@ -# UInt8,UInt16,UInt32,UInt64,Int8,Int16,Int32,Int64 {#uint8-uint16-uint32-uint64-int8-int16-int32-int64} +--- +toc_priority: 40 +toc_title: UInt8, UInt16, UInt32, UInt64, UInt128, UInt256, Int8, Int16, Int32, Int64, Int128, Int256 +--- + +# UInt8, UInt16, UInt32, UInt64, UInt128, UInt256, Int8, Int16, Int32, Int64, Int128, Int256 固定长度的整型,包括有符号整型或无符号整型。 +创建表时,可以为整数设置类型参数 (例如. `TINYINT(8)`, `SMALLINT(16)`, `INT(32)`, `BIGINT(64)`), 但 ClickHouse 会忽略它们. + + ## 整型范围 {#int-ranges} -- Int8-\[-128:127\] -- Int16-\[-32768:32767\] -- Int32-\[-2147483648:2147483647\] -- Int64-\[-9223372036854775808:9223372036854775807\] + +- `Int8` — \[-128 : 127\] +- `Int16` — \[-32768 : 32767\] +- `Int32` — \[-2147483648 : 2147483647\] +- `Int64` — \[-9223372036854775808 : 9223372036854775807\] +- `Int128` — \[-170141183460469231731687303715884105728 : 170141183460469231731687303715884105727\] +- `Int256` — \[-57896044618658097711785492504343953926634992332820282019728792003956564819968 : 57896044618658097711785492504343953926634992332820282019728792003956564819967\] + +别名: + +- `Int8` — `TINYINT`, `BOOL`, `BOOLEAN`, `INT1`. +- `Int16` — `SMALLINT`, `INT2`. +- `Int32` — `INT`, `INT4`, `INTEGER`. +- `Int64` — `BIGINT`. ## 无符号整型范围 {#uint-ranges} -- UInt8-\[0:255\] -- UInt16-\[0:65535\] -- UInt32-\[0:4294967295\] -- UInt64-\[0:18446744073709551615\] + +- `UInt8` — \[0 : 255\] +- `UInt16` — \[0 : 65535\] +- `UInt32` — \[0 : 4294967295\] +- `UInt64` — \[0 : 18446744073709551615\] +- `UInt128` — \[0 : 340282366920938463463374607431768211455\] +- `UInt256` — \[0 : 115792089237316195423570985008687907853269984665640564039457584007913129639935\] + + +[源文档](https://clickhouse.com/docs/en/data_types/int_uint/) From 673b874cc494fd428192c7eacafcca20cb744bd5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9D=B3=E9=98=B3?= Date: Wed, 9 Feb 2022 10:50:53 +0800 Subject: [PATCH 184/215] the `name` seems to be missing the `name` seems to be missing --- docs/en/sql-reference/functions/functions-for-nulls.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/functions-for-nulls.md b/docs/en/sql-reference/functions/functions-for-nulls.md index 29de9ee4b70..42307093dda 100644 --- a/docs/en/sql-reference/functions/functions-for-nulls.md +++ b/docs/en/sql-reference/functions/functions-for-nulls.md @@ -120,7 +120,7 @@ The `mail` and `phone` fields are of type String, but the `icq` field is `UInt32 Get the first available contact method for the customer from the contact list: ``` sql -SELECT coalesce(mail, phone, CAST(icq,'Nullable(String)')) FROM aBook; +SELECT name, coalesce(mail, phone, CAST(icq,'Nullable(String)')) FROM aBook; ``` ``` text From 4fa2ae76bc3a15c4b856d7a22630548ef8c91939 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 1 Feb 2022 18:40:48 +0300 Subject: [PATCH 185/215] Fix memory leak in AggregatingInOrderTransform Reproducer: # NOTE: we need clickhouse from 33957 since right now LSan is broken due to getauxval(). $ url=https://s3.amazonaws.com/clickhouse-builds/33957/e04b862673644d313712607a0078f5d1c48b5377/package_asan/clickhouse $ wget $url -o clickhouse-asan $ chmod +x clickhouse-asan $ ./clickhouse-asan server & $ ./clickhouse-asan client :) create table data (key Int, value String) engine=MergeTree() order by key :) insert into data select number%5, toString(number) from numbers(10e6) # usually it is enough one query, benchmark is just for stability of the results # note, that if the exception was not happen from AggregatingInOrderTransform then add --continue_on_errors and wait $ ./clickhouse-asan benchmark --query 'select key, uniqCombined64(value), groupArray(value) from data group by key' --optimize_aggregation_in_order=1 --memory_tracker_fault_probability=0.01, max_untracked_memory='2Mi' LSan report: ==24595==ERROR: LeakSanitizer: detected memory leaks Direct leak of 3932160 byte(s) in 6 object(s) allocated from: 0 0xcadba93 in realloc () 1 0xcc108d9 in Allocator::realloc() obj-x86_64-linux-gnu/../src/Common/Allocator.h:134:30 2 0xde19eae in void DB::PODArrayBase<>::realloc(unsigned long, DB::Arena*&) obj-x86_64-linux-gnu/../src/Common/PODArray.h:161:25 3 0xde5f039 in void DB::PODArrayBase<>::reserveForNextSize(DB::Arena*&) obj-x86_64-linux-gnu/../src/Common/PODArray.h 4 0xde5f039 in void DB::PODArray<>::push_back<>(DB::GroupArrayNodeString*&, DB::Arena*&) obj-x86_64-linux-gnu/../src/Common/PODArray.h:432:19 5 0xde5f039 in DB::GroupArrayGeneralImpl<>::add() const obj-x86_64-linux-gnu/../src/AggregateFunctions/AggregateFunctionGroupArray.h:465:31 6 0xde5f039 in DB::IAggregateFunctionHelper<>::addBatchSinglePlaceFromInterval() const obj-x86_64-linux-gnu/../src/AggregateFunctions/IAggregateFunction.h:481:53 7 0x299df134 in DB::Aggregator::executeOnIntervalWithoutKeyImpl() obj-x86_64-linux-gnu/../src/Interpreters/Aggregator.cpp:869:31 8 0x2ca75f7d in DB::AggregatingInOrderTransform::consume() obj-x86_64-linux-gnu/../src/Processors/Transforms/AggregatingInOrderTransform.cpp:124:13 ... SUMMARY: AddressSanitizer: 4523184 byte(s) leaked in 12 allocation(s). Signed-off-by: Azat Khuzhin --- src/Interpreters/Aggregator.cpp | 37 +++++++++++++++---- src/Interpreters/Aggregator.h | 8 ++-- .../AggregatingInOrderTransform.cpp | 2 +- 3 files changed, 35 insertions(+), 12 deletions(-) diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp index c3fd8b8024a..a2f24a79e40 100644 --- a/src/Interpreters/Aggregator.cpp +++ b/src/Interpreters/Aggregator.cpp @@ -855,12 +855,18 @@ void NO_INLINE Aggregator::executeWithoutKeyImpl( void NO_INLINE Aggregator::executeOnIntervalWithoutKeyImpl( - AggregatedDataWithoutKey & res, + AggregatedDataVariants & data_variants, size_t row_begin, size_t row_end, AggregateFunctionInstruction * aggregate_instructions, - Arena * arena) + Arena * arena) const { + /// `data_variants` will destroy the states of aggregate functions in the destructor + data_variants.aggregator = this; + data_variants.init(AggregatedDataVariants::Type::without_key); + + AggregatedDataWithoutKey & res = data_variants.without_key; + /// Adding values for (AggregateFunctionInstruction * inst = aggregate_instructions; inst->that; ++inst) { @@ -1623,15 +1629,32 @@ Block Aggregator::prepareBlockAndFill( } void Aggregator::addSingleKeyToAggregateColumns( - const AggregatedDataVariants & data_variants, + AggregatedDataVariants & data_variants, MutableColumns & aggregate_columns) const { - const auto & data = data_variants.without_key; - for (size_t i = 0; i < params.aggregates_size; ++i) + auto & data = data_variants.without_key; + + size_t i = 0; + try { - auto & column_aggregate_func = assert_cast(*aggregate_columns[i]); - column_aggregate_func.getData().push_back(data + offsets_of_aggregate_states[i]); + for (i = 0; i < params.aggregates_size; ++i) + { + auto & column_aggregate_func = assert_cast(*aggregate_columns[i]); + column_aggregate_func.getData().push_back(data + offsets_of_aggregate_states[i]); + } } + catch (...) + { + /// Rollback + for (size_t rollback_i = 0; rollback_i < i; ++rollback_i) + { + auto & column_aggregate_func = assert_cast(*aggregate_columns[rollback_i]); + column_aggregate_func.getData().pop_back(); + } + throw; + } + + data = nullptr; } void Aggregator::addArenasToAggregateColumns( diff --git a/src/Interpreters/Aggregator.h b/src/Interpreters/Aggregator.h index c79c2c5ef64..05c9133cb35 100644 --- a/src/Interpreters/Aggregator.h +++ b/src/Interpreters/Aggregator.h @@ -1138,12 +1138,12 @@ private: AggregateFunctionInstruction * aggregate_instructions, Arena * arena) const; - static void executeOnIntervalWithoutKeyImpl( - AggregatedDataWithoutKey & res, + void executeOnIntervalWithoutKeyImpl( + AggregatedDataVariants & data_variants, size_t row_begin, size_t row_end, AggregateFunctionInstruction * aggregate_instructions, - Arena * arena); + Arena * arena) const; template void writeToTemporaryFileImpl( @@ -1307,7 +1307,7 @@ private: NestedColumnsHolder & nested_columns_holder) const; void addSingleKeyToAggregateColumns( - const AggregatedDataVariants & data_variants, + AggregatedDataVariants & data_variants, MutableColumns & aggregate_columns) const; void addArenasToAggregateColumns( diff --git a/src/Processors/Transforms/AggregatingInOrderTransform.cpp b/src/Processors/Transforms/AggregatingInOrderTransform.cpp index 857f362c4be..63497ea1af4 100644 --- a/src/Processors/Transforms/AggregatingInOrderTransform.cpp +++ b/src/Processors/Transforms/AggregatingInOrderTransform.cpp @@ -121,7 +121,7 @@ void AggregatingInOrderTransform::consume(Chunk chunk) /// Add data to aggr. state if interval is not empty. Empty when haven't found current key in new block. if (key_begin != key_end) - params->aggregator.executeOnIntervalWithoutKeyImpl(variants.without_key, key_begin, key_end, aggregate_function_instructions.data(), variants.aggregates_pool); + params->aggregator.executeOnIntervalWithoutKeyImpl(variants, key_begin, key_end, aggregate_function_instructions.data(), variants.aggregates_pool); current_memory_usage = getCurrentMemoryUsage() - initial_memory_usage; From cad23926aa83e0a1480836ac5b236041ffb7b11b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9D=B3=E9=98=B3?= Date: Wed, 9 Feb 2022 16:48:31 +0800 Subject: [PATCH 186/215] remove (nearly) duplicate description remove (nearly) duplicate description --- docs/en/sql-reference/functions/tuple-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/tuple-functions.md b/docs/en/sql-reference/functions/tuple-functions.md index 8502fcdcf66..96bceb8958c 100644 --- a/docs/en/sql-reference/functions/tuple-functions.md +++ b/docs/en/sql-reference/functions/tuple-functions.md @@ -22,7 +22,7 @@ tuple(x, y, …) ## tupleElement {#tupleelement} A function that allows getting a column from a tuple. -‘N’ is the column index, starting from 1. N must be a constant. ‘N’ must be a constant. ‘N’ must be a strict postive integer no greater than the size of the tuple. +‘N’ is the column index, starting from 1. ‘N’ must be a constant. ‘N’ must be a strict postive integer no greater than the size of the tuple. There is no cost to execute the function. The function implements the operator `x.N`. From 7baf23f35ff5c0ffed8881090cc73da328a26bbe Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 9 Feb 2022 13:53:54 +0300 Subject: [PATCH 187/215] Fix bug URL engine --- src/Storages/StorageURL.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index fc58f8226aa..b323deafcc0 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -479,10 +479,11 @@ ColumnsDescription IStorageURLBase::getTableStructureFromData( } else { - auto parsed_uri = Poco::URI(uri); - StorageURLSource::setCredentials(credentials, parsed_uri); read_buffer_creator = [&]() { + auto parsed_uri = Poco::URI(uri); + StorageURLSource::setCredentials(credentials, parsed_uri); + return wrapReadBufferWithCompressionMethod( std::make_unique( parsed_uri, From b84a171aa7424f000d9cd1c831120fde3ad9c26d Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 9 Feb 2022 14:13:53 +0300 Subject: [PATCH 188/215] Fix style --- src/IO/WriteBufferFromS3.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp index 17e449b2fb8..275fb3957bb 100644 --- a/src/IO/WriteBufferFromS3.cpp +++ b/src/IO/WriteBufferFromS3.cpp @@ -101,8 +101,8 @@ void WriteBufferFromS3::nextImpl() void WriteBufferFromS3::allocateBuffer() { - if (total_parts_uploaded != 0 && total_parts_uploaded % upload_part_size_multiply_threshold == 0) - upload_part_size *= upload_part_size_multiply_factor; + if (total_parts_uploaded != 0 && total_parts_uploaded % upload_part_size_multiply_threshold == 0) + upload_part_size *= upload_part_size_multiply_factor; temporary_buffer = Aws::MakeShared("temporary buffer"); temporary_buffer->exceptions(std::ios::badbit); From e0e36c256e8a259787ed07d25ea9834b22200ad9 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 9 Feb 2022 14:44:46 +0300 Subject: [PATCH 189/215] Fix style --- src/Storages/StorageURL.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index b323deafcc0..36b9853ac0e 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -483,7 +483,7 @@ ColumnsDescription IStorageURLBase::getTableStructureFromData( { auto parsed_uri = Poco::URI(uri); StorageURLSource::setCredentials(credentials, parsed_uri); - + return wrapReadBufferWithCompressionMethod( std::make_unique( parsed_uri, From 0658fb32567a0ae91b4aea9d88ed2b3a8caaec66 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 9 Feb 2022 12:06:17 +0000 Subject: [PATCH 190/215] Added test 33734 --- .../02154_dictionary_get_http_json.reference | 24 ++++++++++++ .../02154_dictionary_get_http_json.sh | 39 +++++++++++++++++++ 2 files changed, 63 insertions(+) create mode 100644 tests/queries/0_stateless/02154_dictionary_get_http_json.reference create mode 100755 tests/queries/0_stateless/02154_dictionary_get_http_json.sh diff --git a/tests/queries/0_stateless/02154_dictionary_get_http_json.reference b/tests/queries/0_stateless/02154_dictionary_get_http_json.reference new file mode 100644 index 00000000000..7106f551cd7 --- /dev/null +++ b/tests/queries/0_stateless/02154_dictionary_get_http_json.reference @@ -0,0 +1,24 @@ +0 Value +{ + "meta": + [ + { + "name": "dictGet(02154_test_dictionary, 'value', toUInt64(0))", + "type": "String" + }, + { + "name": "dictGet(02154_test_dictionary, 'value', toUInt64(1))", + "type": "String" + } + ], + + "data": + [ + { + "dictGet(02154_test_dictionary, 'value', toUInt64(0))": "Value", + "dictGet(02154_test_dictionary, 'value', toUInt64(1))": "" + } + ], + + "rows": 1 +} diff --git a/tests/queries/0_stateless/02154_dictionary_get_http_json.sh b/tests/queries/0_stateless/02154_dictionary_get_http_json.sh new file mode 100755 index 00000000000..a2bce866c76 --- /dev/null +++ b/tests/queries/0_stateless/02154_dictionary_get_http_json.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS 02154_test_source_table" + +$CLICKHOUSE_CLIENT -q """ + CREATE TABLE 02154_test_source_table + ( + id UInt64, + value String + ) ENGINE=TinyLog; +""" + +$CLICKHOUSE_CLIENT -q "INSERT INTO 02154_test_source_table VALUES (0, 'Value')" +$CLICKHOUSE_CLIENT -q "SELECT * FROM 02154_test_source_table" + +$CLICKHOUSE_CLIENT -q "DROP DICTIONARY IF EXISTS 02154_test_dictionary" +$CLICKHOUSE_CLIENT -q """ + CREATE DICTIONARY 02154_test_dictionary + ( + id UInt64, + value String + ) + PRIMARY KEY id + LAYOUT(HASHED()) + LIFETIME(0) + SOURCE(CLICKHOUSE(TABLE '02154_test_source_table')) +""" + +echo """ + SELECT dictGet(02154_test_dictionary, 'value', toUInt64(0)), dictGet(02154_test_dictionary, 'value', toUInt64(1)) + FORMAT JSON +""" | ${CLICKHOUSE_CURL} -sSg "${CLICKHOUSE_URL}&wait_end_of_query=1&output_format_write_statistics=0" -d @- + +$CLICKHOUSE_CLIENT -q "DROP DICTIONARY 02154_test_dictionary" +$CLICKHOUSE_CLIENT -q "DROP TABLE 02154_test_source_table" From ca0d78295a7ce236f0eb92c4f5383481e26a3083 Mon Sep 17 00:00:00 2001 From: Denny Crane Date: Wed, 9 Feb 2022 14:19:50 -0400 Subject: [PATCH 191/215] test for #13907 toColumnTypeName_toLowCardinality_const --- .../02210_toColumnTypeName_toLowCardinality_const.reference | 1 + .../02210_toColumnTypeName_toLowCardinality_const.sql | 1 + 2 files changed, 2 insertions(+) create mode 100644 tests/queries/0_stateless/02210_toColumnTypeName_toLowCardinality_const.reference create mode 100644 tests/queries/0_stateless/02210_toColumnTypeName_toLowCardinality_const.sql diff --git a/tests/queries/0_stateless/02210_toColumnTypeName_toLowCardinality_const.reference b/tests/queries/0_stateless/02210_toColumnTypeName_toLowCardinality_const.reference new file mode 100644 index 00000000000..2ac2f690f1b --- /dev/null +++ b/tests/queries/0_stateless/02210_toColumnTypeName_toLowCardinality_const.reference @@ -0,0 +1 @@ +Const(UInt8) diff --git a/tests/queries/0_stateless/02210_toColumnTypeName_toLowCardinality_const.sql b/tests/queries/0_stateless/02210_toColumnTypeName_toLowCardinality_const.sql new file mode 100644 index 00000000000..a71c3f30604 --- /dev/null +++ b/tests/queries/0_stateless/02210_toColumnTypeName_toLowCardinality_const.sql @@ -0,0 +1 @@ +SELECT toColumnTypeName(toLowCardinality(1)); From d19ee89d9d99981b6a3c4098f167ce11b928c709 Mon Sep 17 00:00:00 2001 From: Geoff Genz Date: Wed, 9 Feb 2022 12:17:23 -0700 Subject: [PATCH 192/215] Remove invalid IOS setting for RocksDB CMAKE to fix Apple M1 build --- contrib/rocksdb-cmake/CMakeLists.txt | 5 ----- 1 file changed, 5 deletions(-) diff --git a/contrib/rocksdb-cmake/CMakeLists.txt b/contrib/rocksdb-cmake/CMakeLists.txt index c35009ba10a..529d7f0c4e3 100644 --- a/contrib/rocksdb-cmake/CMakeLists.txt +++ b/contrib/rocksdb-cmake/CMakeLists.txt @@ -127,11 +127,6 @@ endif() if(CMAKE_SYSTEM_NAME MATCHES "Darwin") add_definitions(-DOS_MACOSX) - if(CMAKE_SYSTEM_PROCESSOR MATCHES arm) - add_definitions(-DIOS_CROSS_COMPILE -DROCKSDB_LITE) - # no debug info for IOS, that will make our library big - add_definitions(-DNDEBUG) - endif() elseif(CMAKE_SYSTEM_NAME MATCHES "Linux") add_definitions(-DOS_LINUX) elseif(CMAKE_SYSTEM_NAME MATCHES "SunOS") From 45d903b3b56215fe68f76a2da1a95cdfd27e7acf Mon Sep 17 00:00:00 2001 From: Rajkumar Date: Wed, 9 Feb 2022 11:28:08 -0800 Subject: [PATCH 193/215] clang-tidy reported divide by zero exception --- src/AggregateFunctions/ReservoirSampler.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/AggregateFunctions/ReservoirSampler.h b/src/AggregateFunctions/ReservoirSampler.h index 1d7529ee8e1..5f7ac13d908 100644 --- a/src/AggregateFunctions/ReservoirSampler.h +++ b/src/AggregateFunctions/ReservoirSampler.h @@ -239,6 +239,7 @@ private: UInt64 genRandom(size_t lim) { + assert(lim > 0); /// With a large number of values, we will generate random numbers several times slower. if (lim <= static_cast(rng.max())) return static_cast(rng()) % static_cast(lim); From b6a1a13327b644a353d7cffc24e5c21a6ba3fcee Mon Sep 17 00:00:00 2001 From: Rajkumar Date: Wed, 9 Feb 2022 11:36:31 -0800 Subject: [PATCH 194/215] clang-tidy reported nested_column already moved to data --- src/Columns/ColumnArray.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp index c18887b7a13..c4d75fed129 100644 --- a/src/Columns/ColumnArray.cpp +++ b/src/Columns/ColumnArray.cpp @@ -50,12 +50,12 @@ ColumnArray::ColumnArray(MutableColumnPtr && nested_column, MutableColumnPtr && if (!offsets_concrete) throw Exception("offsets_column must be a ColumnUInt64", ErrorCodes::LOGICAL_ERROR); - if (!offsets_concrete->empty() && nested_column) + if (!offsets_concrete->empty() && data) { Offset last_offset = offsets_concrete->getData().back(); /// This will also prevent possible overflow in offset. - if (nested_column->size() != last_offset) + if (data->size() != last_offset) throw Exception("offsets_column has data inconsistent with nested_column", ErrorCodes::LOGICAL_ERROR); } From 1f6b65d39d0f8d47ae4c9e677aab04e04c86f654 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Sun, 6 Feb 2022 23:10:41 +0700 Subject: [PATCH 195/215] gRPC: Split compression_type field into input_compression_type and output_compression_type. Make the previous field obsolete. --- src/Server/GRPCServer.cpp | 33 +++++++++++++------- src/Server/grpc_protos/clickhouse_grpc.proto | 19 +++++++---- tests/integration/test_grpc_protocol/test.py | 12 +++---- 3 files changed, 40 insertions(+), 24 deletions(-) diff --git a/src/Server/GRPCServer.cpp b/src/Server/GRPCServer.cpp index 8aa729b8883..9194e087e00 100644 --- a/src/Server/GRPCServer.cpp +++ b/src/Server/GRPCServer.cpp @@ -628,10 +628,11 @@ namespace ASTInsertQuery * insert_query = nullptr; String input_format; String input_data_delimiter; + CompressionMethod input_compression_method = CompressionMethod::None; PODArray output; String output_format; - CompressionMethod compression_method = CompressionMethod::None; - int compression_level = 0; + CompressionMethod output_compression_method = CompressionMethod::None; + int output_compression_level = 0; uint64_t interactive_delay = 100000; bool send_exception_with_stacktrace = true; @@ -852,8 +853,16 @@ namespace output_format = query_context->getDefaultFormat(); /// Choose compression. - compression_method = chooseCompressionMethod("", query_info.compression_type()); - compression_level = query_info.compression_level(); + String input_compression_method_str = query_info.input_compression_type(); + if (input_compression_method_str.empty()) + input_compression_method_str = query_info.obsolete_compression_type(); + input_compression_method = chooseCompressionMethod("", input_compression_method_str); + + String output_compression_method_str = query_info.output_compression_type(); + if (output_compression_method_str.empty()) + output_compression_method_str = query_info.obsolete_compression_type(); + output_compression_method = chooseCompressionMethod("", output_compression_method_str); + output_compression_level = query_info.output_compression_level(); /// Set callback to create and fill external tables query_context->setExternalTablesInitializer([this] (ContextPtr context) @@ -984,7 +993,7 @@ namespace return {nullptr, 0}; /// no more input data }); - read_buffer = wrapReadBufferWithCompressionMethod(std::move(read_buffer), compression_method); + read_buffer = wrapReadBufferWithCompressionMethod(std::move(read_buffer), input_compression_method); assert(!pipeline); auto source = query_context->getInputFormat( @@ -1112,13 +1121,13 @@ namespace if (io.pipeline.pulling()) header = io.pipeline.getHeader(); - if (compression_method != CompressionMethod::None) + if (output_compression_method != CompressionMethod::None) output.resize(DBMS_DEFAULT_BUFFER_SIZE); /// Must have enough space for compressed data. write_buffer = std::make_unique>>(output); nested_write_buffer = static_cast> *>(write_buffer.get()); - if (compression_method != CompressionMethod::None) + if (output_compression_method != CompressionMethod::None) { - write_buffer = wrapWriteBufferWithCompressionMethod(std::move(write_buffer), compression_method, compression_level); + write_buffer = wrapWriteBufferWithCompressionMethod(std::move(write_buffer), output_compression_method, output_compression_level); compressing_write_buffer = write_buffer.get(); } @@ -1414,10 +1423,10 @@ namespace return; PODArray memory; - if (compression_method != CompressionMethod::None) + if (output_compression_method != CompressionMethod::None) memory.resize(DBMS_DEFAULT_BUFFER_SIZE); /// Must have enough space for compressed data. std::unique_ptr buf = std::make_unique>>(memory); - buf = wrapWriteBufferWithCompressionMethod(std::move(buf), compression_method, compression_level); + buf = wrapWriteBufferWithCompressionMethod(std::move(buf), output_compression_method, output_compression_level); auto format = query_context->getOutputFormat(output_format, *buf, totals); format->write(materializeBlock(totals)); format->finalize(); @@ -1432,10 +1441,10 @@ namespace return; PODArray memory; - if (compression_method != CompressionMethod::None) + if (output_compression_method != CompressionMethod::None) memory.resize(DBMS_DEFAULT_BUFFER_SIZE); /// Must have enough space for compressed data. std::unique_ptr buf = std::make_unique>>(memory); - buf = wrapWriteBufferWithCompressionMethod(std::move(buf), compression_method, compression_level); + buf = wrapWriteBufferWithCompressionMethod(std::move(buf), output_compression_method, output_compression_level); auto format = query_context->getOutputFormat(output_format, *buf, extremes); format->write(materializeBlock(extremes)); format->finalize(); diff --git a/src/Server/grpc_protos/clickhouse_grpc.proto b/src/Server/grpc_protos/clickhouse_grpc.proto index c86c74535c5..b231ad91a32 100644 --- a/src/Server/grpc_protos/clickhouse_grpc.proto +++ b/src/Server/grpc_protos/clickhouse_grpc.proto @@ -106,12 +106,16 @@ message QueryInfo { /// If not set the compression settings from the configuration file will be used. Compression result_compression = 17; - // Compression type for `input_data`, `output_data`, `totals` and `extremes`. + // Compression type for `input_data`. // Supported compression types: none, gzip(gz), deflate, brotli(br), lzma(xz), zstd(zst), lz4, bz2. - // When used for `input_data` the client is responsible to compress data before putting it into `input_data`. - // When used for `output_data` or `totals` or `extremes` the client receives compressed data and should decompress it by itself. - // In the latter case consider to specify also `compression_level`. - string compression_type = 18; + // The client is responsible to compress data before putting it into `input_data`. + string input_compression_type = 20; + + // Compression type for `output_data`, `totals` and `extremes`. + // Supported compression types: none, gzip(gz), deflate, brotli(br), lzma(xz), zstd(zst), lz4, bz2. + // The client receives compressed data and should decompress it by itself. + // Consider also setting `output_compression_level`. + string output_compression_type = 21; // Compression level. // WARNING: If it's not specified the compression level is set to zero by default which might be not the best choice for some compression types (see below). @@ -123,7 +127,10 @@ message QueryInfo { // zstd: 1..22; 3 is recommended by default (compression level 0 also means 3) // lz4: 0..16; values < 0 mean fast acceleration // bz2: 1..9 - int32 compression_level = 19; + int32 output_compression_level = 19; + + /// Obsolete fields, should not be used in new code. + string obsolete_compression_type = 18; } enum LogsLevel { diff --git a/tests/integration/test_grpc_protocol/test.py b/tests/integration/test_grpc_protocol/test.py index b6968575883..58ffd04111f 100644 --- a/tests/integration/test_grpc_protocol/test.py +++ b/tests/integration/test_grpc_protocol/test.py @@ -382,13 +382,13 @@ def test_result_compression(): assert result.output == (b'0\n')*1000000 def test_compressed_output(): - query_info = clickhouse_grpc_pb2.QueryInfo(query="SELECT 0 FROM numbers(1000)", compression_type="lz4") + query_info = clickhouse_grpc_pb2.QueryInfo(query="SELECT 0 FROM numbers(1000)", output_compression_type="lz4") stub = clickhouse_grpc_pb2_grpc.ClickHouseStub(main_channel) result = stub.ExecuteQuery(query_info) assert lz4.frame.decompress(result.output) == (b'0\n')*1000 def test_compressed_output_streaming(): - query_info = clickhouse_grpc_pb2.QueryInfo(query="SELECT 0 FROM numbers(100000)", compression_type="lz4") + query_info = clickhouse_grpc_pb2.QueryInfo(query="SELECT 0 FROM numbers(100000)", output_compression_type="lz4") stub = clickhouse_grpc_pb2_grpc.ClickHouseStub(main_channel) d_context = lz4.frame.create_decompression_context() data = b'' @@ -398,7 +398,7 @@ def test_compressed_output_streaming(): assert data == (b'0\n')*100000 def test_compressed_output_gzip(): - query_info = clickhouse_grpc_pb2.QueryInfo(query="SELECT 0 FROM numbers(1000)", compression_type="gzip", compression_level=6) + query_info = clickhouse_grpc_pb2.QueryInfo(query="SELECT 0 FROM numbers(1000)", output_compression_type="gzip", output_compression_level=6) stub = clickhouse_grpc_pb2_grpc.ClickHouseStub(main_channel) result = stub.ExecuteQuery(query_info) assert gzip.decompress(result.output) == (b'0\n')*1000 @@ -407,10 +407,10 @@ def test_compressed_totals_and_extremes(): query("CREATE TABLE t (x UInt8, y UInt8) ENGINE = Memory") query("INSERT INTO t VALUES (1, 2), (2, 4), (3, 2), (3, 3), (3, 4)") stub = clickhouse_grpc_pb2_grpc.ClickHouseStub(main_channel) - query_info = clickhouse_grpc_pb2.QueryInfo(query="SELECT sum(x), y FROM t GROUP BY y WITH TOTALS", compression_type="lz4") + query_info = clickhouse_grpc_pb2.QueryInfo(query="SELECT sum(x), y FROM t GROUP BY y WITH TOTALS", output_compression_type="lz4") result = stub.ExecuteQuery(query_info) assert lz4.frame.decompress(result.totals) == b'12\t0\n' - query_info = clickhouse_grpc_pb2.QueryInfo(query="SELECT x, y FROM t", settings={"extremes": "1"}, compression_type="lz4") + query_info = clickhouse_grpc_pb2.QueryInfo(query="SELECT x, y FROM t", settings={"extremes": "1"}, output_compression_type="lz4") result = stub.ExecuteQuery(query_info) assert lz4.frame.decompress(result.extremes) == b'1\t2\n3\t4\n' @@ -423,7 +423,7 @@ def test_compressed_insert_query_streaming(): d2 = data[sz1:sz1+sz2] d3 = data[sz1+sz2:] def send_query_info(): - yield clickhouse_grpc_pb2.QueryInfo(query="INSERT INTO t VALUES", input_data=d1, compression_type="lz4", next_query_info=True) + yield clickhouse_grpc_pb2.QueryInfo(query="INSERT INTO t VALUES", input_data=d1, input_compression_type="lz4", next_query_info=True) yield clickhouse_grpc_pb2.QueryInfo(input_data=d2, next_query_info=True) yield clickhouse_grpc_pb2.QueryInfo(input_data=d3) stub = clickhouse_grpc_pb2_grpc.ClickHouseStub(main_channel) From 1341b4b4de5b8443760ba4b6dd1f2b4a9aa97445 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Mon, 7 Feb 2022 01:33:31 +0700 Subject: [PATCH 196/215] Rename QueryInfo's field result_compression -> transport_compression_type and change its type for better consistency. Make the previous field obsolete. --- programs/server/config.xml | 9 +- src/Server/GRPCServer.cpp | 175 +++++++++++-------- src/Server/grpc_protos/clickhouse_grpc.proto | 45 +++-- tests/integration/test_grpc_protocol/test.py | 14 +- 4 files changed, 142 insertions(+), 101 deletions(-) diff --git a/programs/server/config.xml b/programs/server/config.xml index ce0c54f6730..def64607caf 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -217,13 +217,12 @@ /path/to/ssl_ca_cert_file - - deflate + none - - medium + + 0 -1 diff --git a/src/Server/GRPCServer.cpp b/src/Server/GRPCServer.cpp index 9194e087e00..10bbce24913 100644 --- a/src/Server/GRPCServer.cpp +++ b/src/Server/GRPCServer.cpp @@ -51,6 +51,7 @@ using GRPCQueryInfo = clickhouse::grpc::QueryInfo; using GRPCResult = clickhouse::grpc::Result; using GRPCException = clickhouse::grpc::Exception; using GRPCProgress = clickhouse::grpc::Progress; +using GRPCObsoleteTransportCompression = clickhouse::grpc::ObsoleteTransportCompression; namespace DB { @@ -101,62 +102,6 @@ namespace }); } - grpc_compression_algorithm parseCompressionAlgorithm(const String & str) - { - if (str == "none") - return GRPC_COMPRESS_NONE; - else if (str == "deflate") - return GRPC_COMPRESS_DEFLATE; - else if (str == "gzip") - return GRPC_COMPRESS_GZIP; - else if (str == "stream_gzip") - return GRPC_COMPRESS_STREAM_GZIP; - else - throw Exception("Unknown compression algorithm: '" + str + "'", ErrorCodes::INVALID_CONFIG_PARAMETER); - } - - grpc_compression_level parseCompressionLevel(const String & str) - { - if (str == "none") - return GRPC_COMPRESS_LEVEL_NONE; - else if (str == "low") - return GRPC_COMPRESS_LEVEL_LOW; - else if (str == "medium") - return GRPC_COMPRESS_LEVEL_MED; - else if (str == "high") - return GRPC_COMPRESS_LEVEL_HIGH; - else - throw Exception("Unknown compression level: '" + str + "'", ErrorCodes::INVALID_CONFIG_PARAMETER); - } - - grpc_compression_algorithm convertCompressionAlgorithm(const ::clickhouse::grpc::CompressionAlgorithm & algorithm) - { - if (algorithm == ::clickhouse::grpc::NO_COMPRESSION) - return GRPC_COMPRESS_NONE; - else if (algorithm == ::clickhouse::grpc::DEFLATE) - return GRPC_COMPRESS_DEFLATE; - else if (algorithm == ::clickhouse::grpc::GZIP) - return GRPC_COMPRESS_GZIP; - else if (algorithm == ::clickhouse::grpc::STREAM_GZIP) - return GRPC_COMPRESS_STREAM_GZIP; - else - throw Exception("Unknown compression algorithm: '" + ::clickhouse::grpc::CompressionAlgorithm_Name(algorithm) + "'", ErrorCodes::INVALID_GRPC_QUERY_INFO); - } - - grpc_compression_level convertCompressionLevel(const ::clickhouse::grpc::CompressionLevel & level) - { - if (level == ::clickhouse::grpc::COMPRESSION_NONE) - return GRPC_COMPRESS_LEVEL_NONE; - else if (level == ::clickhouse::grpc::COMPRESSION_LOW) - return GRPC_COMPRESS_LEVEL_LOW; - else if (level == ::clickhouse::grpc::COMPRESSION_MEDIUM) - return GRPC_COMPRESS_LEVEL_MED; - else if (level == ::clickhouse::grpc::COMPRESSION_HIGH) - return GRPC_COMPRESS_LEVEL_HIGH; - else - throw Exception("Unknown compression level: '" + ::clickhouse::grpc::CompressionLevel_Name(level) + "'", ErrorCodes::INVALID_GRPC_QUERY_INFO); - } - /// Gets file's contents as a string, throws an exception if failed. String readFile(const String & filepath) { @@ -193,6 +138,102 @@ namespace return grpc::InsecureServerCredentials(); } + /// Transport compression makes gRPC library to compress packed Result messages before sending them through network. + struct TransportCompression + { + grpc_compression_algorithm algorithm; + grpc_compression_level level; + + /// Extracts the settings of transport compression from a query info if possible. + static std::optional fromQueryInfo(const GRPCQueryInfo & query_info) + { + TransportCompression res; + if (!query_info.transport_compression_type().empty()) + { + res.setAlgorithm(query_info.transport_compression_type(), ErrorCodes::INVALID_GRPC_QUERY_INFO); + res.setLevel(query_info.transport_compression_level(), ErrorCodes::INVALID_GRPC_QUERY_INFO); + return res; + } + + if (query_info.has_obsolete_result_compression()) + { + switch (query_info.obsolete_result_compression().algorithm()) + { + case GRPCObsoleteTransportCompression::NO_COMPRESSION: res.algorithm = GRPC_COMPRESS_NONE; break; + case GRPCObsoleteTransportCompression::DEFLATE: res.algorithm = GRPC_COMPRESS_DEFLATE; break; + case GRPCObsoleteTransportCompression::GZIP: res.algorithm = GRPC_COMPRESS_GZIP; break; + case GRPCObsoleteTransportCompression::STREAM_GZIP: res.algorithm = GRPC_COMPRESS_STREAM_GZIP; break; + default: throw Exception(ErrorCodes::INVALID_GRPC_QUERY_INFO, "Unknown compression algorithm: {}", GRPCObsoleteTransportCompression::CompressionAlgorithm_Name(query_info.obsolete_result_compression().algorithm())); + } + + switch (query_info.obsolete_result_compression().level()) + { + case GRPCObsoleteTransportCompression::COMPRESSION_NONE: res.level = GRPC_COMPRESS_LEVEL_NONE; break; + case GRPCObsoleteTransportCompression::COMPRESSION_LOW: res.level = GRPC_COMPRESS_LEVEL_LOW; break; + case GRPCObsoleteTransportCompression::COMPRESSION_MEDIUM: res.level = GRPC_COMPRESS_LEVEL_MED; break; + case GRPCObsoleteTransportCompression::COMPRESSION_HIGH: res.level = GRPC_COMPRESS_LEVEL_HIGH; break; + default: throw Exception(ErrorCodes::INVALID_GRPC_QUERY_INFO, "Unknown compression level: {}", GRPCObsoleteTransportCompression::CompressionLevel_Name(query_info.obsolete_result_compression().level())); + } + return res; + } + + return std::nullopt; + } + + /// Extracts the settings of transport compression from the server configuration. + static TransportCompression fromConfiguration(const Poco::Util::AbstractConfiguration & config) + { + TransportCompression res; + if (config.has("grpc.transport_compression_type")) + { + res.setAlgorithm(config.getString("grpc.transport_compression_type"), ErrorCodes::INVALID_CONFIG_PARAMETER); + res.setLevel(config.getInt("grpc.transport_compression_level", 0), ErrorCodes::INVALID_CONFIG_PARAMETER); + } + else + { + res.setAlgorithm(config.getString("grpc.compression", "none"), ErrorCodes::INVALID_CONFIG_PARAMETER); + res.setLevel(config.getString("grpc.compression_level", "none"), ErrorCodes::INVALID_CONFIG_PARAMETER); + } + return res; + } + + private: + void setAlgorithm(const String & str, int error_code) + { + if (str == "none") + algorithm = GRPC_COMPRESS_NONE; + else if (str == "deflate") + algorithm = GRPC_COMPRESS_DEFLATE; + else if (str == "gzip") + algorithm = GRPC_COMPRESS_GZIP; + else if (str == "stream_gzip") + algorithm = GRPC_COMPRESS_STREAM_GZIP; + else + throw Exception(error_code, "Unknown compression algorithm: '{}'", str); + } + + void setLevel(const String & str, int error_code) + { + if (str == "none") + level = GRPC_COMPRESS_LEVEL_NONE; + else if (str == "low") + level = GRPC_COMPRESS_LEVEL_LOW; + else if (str == "medium") + level = GRPC_COMPRESS_LEVEL_MED; + else if (str == "high") + level = GRPC_COMPRESS_LEVEL_HIGH; + else + throw Exception(error_code, "Unknown compression level: '{}'", str); + } + + void setLevel(int level_, int error_code) + { + if (0 <= level_ && level_ < GRPC_COMPRESS_LEVEL_COUNT) + level = static_cast(level_); + else + throw Exception(error_code, "Compression level {} is out of range 0..{}", level_, GRPC_COMPRESS_LEVEL_COUNT - 1); + } + }; /// Gets session's timeout from query info or from the server config. std::chrono::steady_clock::duration getSessionTimeout(const GRPCQueryInfo & query_info, const Poco::Util::AbstractConfiguration & config) @@ -293,15 +334,10 @@ namespace return std::nullopt; } - void setResultCompression(grpc_compression_algorithm algorithm, grpc_compression_level level) + void setTransportCompression(const TransportCompression & transport_compression) { - grpc_context.set_compression_algorithm(algorithm); - grpc_context.set_compression_level(level); - } - - void setResultCompression(const ::clickhouse::grpc::Compression & compression) - { - setResultCompression(convertCompressionAlgorithm(compression.algorithm()), convertCompressionLevel(compression.level())); + grpc_context.set_compression_algorithm(transport_compression.algorithm); + grpc_context.set_compression_level(transport_compression.level); } protected: @@ -816,9 +852,9 @@ namespace if (!query_info.database().empty()) query_context->setCurrentDatabase(query_info.database()); - /// Apply compression settings for this call. - if (query_info.has_result_compression()) - responder->setResultCompression(query_info.result_compression()); + /// Apply transport compression for this call. + if (auto transport_compression = TransportCompression::fromQueryInfo(query_info)) + responder->setTransportCompression(*transport_compression); /// The interactive delay will be used to show progress. interactive_delay = settings.interactive_delay; @@ -1781,8 +1817,9 @@ void GRPCServer::start() builder.RegisterService(&grpc_service); builder.SetMaxSendMessageSize(iserver.config().getInt("grpc.max_send_message_size", -1)); builder.SetMaxReceiveMessageSize(iserver.config().getInt("grpc.max_receive_message_size", -1)); - builder.SetDefaultCompressionAlgorithm(parseCompressionAlgorithm(iserver.config().getString("grpc.compression", "none"))); - builder.SetDefaultCompressionLevel(parseCompressionLevel(iserver.config().getString("grpc.compression_level", "none"))); + auto default_transport_compression = TransportCompression::fromConfiguration(iserver.config()); + builder.SetDefaultCompressionAlgorithm(default_transport_compression.algorithm); + builder.SetDefaultCompressionLevel(default_transport_compression.level); queue = builder.AddCompletionQueue(); grpc_server = builder.BuildAndStart(); diff --git a/src/Server/grpc_protos/clickhouse_grpc.proto b/src/Server/grpc_protos/clickhouse_grpc.proto index b231ad91a32..f596c3b7d6d 100644 --- a/src/Server/grpc_protos/clickhouse_grpc.proto +++ b/src/Server/grpc_protos/clickhouse_grpc.proto @@ -45,21 +45,19 @@ message ExternalTable { map settings = 5; } -enum CompressionAlgorithm { - NO_COMPRESSION = 0; - DEFLATE = 1; - GZIP = 2; - STREAM_GZIP = 3; -} - -enum CompressionLevel { - COMPRESSION_NONE = 0; - COMPRESSION_LOW = 1; - COMPRESSION_MEDIUM = 2; - COMPRESSION_HIGH = 3; -} - -message Compression { +message ObsoleteTransportCompression { + enum CompressionAlgorithm { + NO_COMPRESSION = 0; + DEFLATE = 1; + GZIP = 2; + STREAM_GZIP = 3; + } + enum CompressionLevel { + COMPRESSION_NONE = 0; + COMPRESSION_LOW = 1; + COMPRESSION_MEDIUM = 2; + COMPRESSION_HIGH = 3; + } CompressionAlgorithm algorithm = 1; CompressionLevel level = 2; } @@ -102,10 +100,6 @@ message QueryInfo { // `next_query_info` is allowed to be set only if a method with streaming input (i.e. ExecuteQueryWithStreamInput() or ExecuteQueryWithStreamIO()) is used. bool next_query_info = 16; - /// Controls how a ClickHouse server will compress query execution results before sending back to the client. - /// If not set the compression settings from the configuration file will be used. - Compression result_compression = 17; - // Compression type for `input_data`. // Supported compression types: none, gzip(gz), deflate, brotli(br), lzma(xz), zstd(zst), lz4, bz2. // The client is responsible to compress data before putting it into `input_data`. @@ -129,7 +123,20 @@ message QueryInfo { // bz2: 1..9 int32 output_compression_level = 19; + // Transport compression is an alternative way to make the server to compress its response. + // This kind of compression implies that instead of compressing just `output` the server will compress whole packed messages of the `Result` type, + // and then gRPC implementation on client side will decompress those messages so client code won't be bothered with decompression. + // Here is a big difference between the transport compression and the compression enabled by setting `output_compression_type` because + // in case of the transport compression the client code receives already decompressed data in `output`. + // If the transport compression is not set here it can still be enabled by the server configuration. + // Supported compression types: none, deflate, gzip, stream_gzip + // Supported compression levels: 0..3 + // WARNING: Don't set `transport_compression` and `output_compression` at the same time because it will make the server to compress its output twice! + string transport_compression_type = 22; + int32 transport_compression_level = 23; + /// Obsolete fields, should not be used in new code. + ObsoleteTransportCompression obsolete_result_compression = 17; string obsolete_compression_type = 18; } diff --git a/tests/integration/test_grpc_protocol/test.py b/tests/integration/test_grpc_protocol/test.py index 58ffd04111f..65ee3cb4261 100644 --- a/tests/integration/test_grpc_protocol/test.py +++ b/tests/integration/test_grpc_protocol/test.py @@ -373,14 +373,6 @@ def test_cancel_while_generating_output(): output += result.output assert output == b'0\t0\n1\t0\n2\t0\n3\t0\n' -def test_result_compression(): - query_info = clickhouse_grpc_pb2.QueryInfo(query="SELECT 0 FROM numbers(1000000)", - result_compression=clickhouse_grpc_pb2.Compression(algorithm=clickhouse_grpc_pb2.CompressionAlgorithm.GZIP, - level=clickhouse_grpc_pb2.CompressionLevel.COMPRESSION_HIGH)) - stub = clickhouse_grpc_pb2_grpc.ClickHouseStub(main_channel) - result = stub.ExecuteQuery(query_info) - assert result.output == (b'0\n')*1000000 - def test_compressed_output(): query_info = clickhouse_grpc_pb2.QueryInfo(query="SELECT 0 FROM numbers(1000)", output_compression_type="lz4") stub = clickhouse_grpc_pb2_grpc.ClickHouseStub(main_channel) @@ -445,6 +437,12 @@ def test_compressed_external_table(): b"4\tDaniel\n"\ b"5\tEthan\n" +def test_transport_compression(): + query_info = clickhouse_grpc_pb2.QueryInfo(query="SELECT 0 FROM numbers(1000000)", transport_compression_type='gzip', transport_compression_level=3) + stub = clickhouse_grpc_pb2_grpc.ClickHouseStub(main_channel) + result = stub.ExecuteQuery(query_info) + assert result.output == (b'0\n')*1000000 + def test_opentelemetry_context_propagation(): trace_id = "80c190b5-9dc1-4eae-82b9-6c261438c817" parent_span_id = 123 From 77d9cddfec0cf481b787a18ccdba5f2a0489762c Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Tue, 8 Feb 2022 20:15:56 +0700 Subject: [PATCH 197/215] Replace clickhouse_grpc.proto in a test with symlink. --- .../protos/clickhouse_grpc.proto | 175 +----------------- 1 file changed, 1 insertion(+), 174 deletions(-) mode change 100644 => 120000 tests/integration/test_server_reload/protos/clickhouse_grpc.proto diff --git a/tests/integration/test_server_reload/protos/clickhouse_grpc.proto b/tests/integration/test_server_reload/protos/clickhouse_grpc.proto deleted file mode 100644 index c6cafaf6e40..00000000000 --- a/tests/integration/test_server_reload/protos/clickhouse_grpc.proto +++ /dev/null @@ -1,174 +0,0 @@ -/* This file describes gRPC protocol supported in ClickHouse. - * - * To use this protocol a client should send one or more messages of the QueryInfo type - * and then receive one or more messages of the Result type. - * According to that the service provides four methods for that: - * ExecuteQuery(QueryInfo) returns (Result) - * ExecuteQueryWithStreamInput(stream QueryInfo) returns (Result) - * ExecuteQueryWithStreamOutput(QueryInfo) returns (stream Result) - * ExecuteQueryWithStreamIO(stream QueryInfo) returns (stream Result) - * It's up to the client to choose which method to use. - * For example, ExecuteQueryWithStreamInput() allows the client to add data multiple times - * while executing a query, which is suitable for inserting many rows. - */ - -syntax = "proto3"; - -package clickhouse.grpc; - -message NameAndType { - string name = 1; - string type = 2; -} - -// Describes an external table - a table which will exists only while a query is executing. -message ExternalTable { - // Name of the table. If omitted, "_data" is used. - string name = 1; - - // Columns of the table. Types are required, names can be omitted. If the names are omitted, "_1", "_2", ... is used. - repeated NameAndType columns = 2; - - // Data to insert to the external table. - // If a method with streaming input (i.e. ExecuteQueryWithStreamInput() or ExecuteQueryWithStreamIO()) is used, - // then data for insertion to the same external table can be split between multiple QueryInfos. - bytes data = 3; - - // Format of the data to insert to the external table. - string format = 4; - - // Settings for executing that insertion, applied after QueryInfo.settings. - map settings = 5; -} - -enum CompressionAlgorithm { - NO_COMPRESSION = 0; - DEFLATE = 1; - GZIP = 2; - STREAM_GZIP = 3; -} - -enum CompressionLevel { - COMPRESSION_NONE = 0; - COMPRESSION_LOW = 1; - COMPRESSION_MEDIUM = 2; - COMPRESSION_HIGH = 3; -} - -message Compression { - CompressionAlgorithm algorithm = 1; - CompressionLevel level = 2; -} - -// Information about a query which a client sends to a ClickHouse server. -// The first QueryInfo can set any of the following fields. Extra QueryInfos only add extra data. -// In extra QueryInfos only `input_data`, `external_tables`, `next_query_info` and `cancel` fields can be set. -message QueryInfo { - string query = 1; - string query_id = 2; - map settings = 3; - - // Default database. - string database = 4; - - // Input data, used both as data for INSERT query and as data for the input() function. - bytes input_data = 5; - - // Delimiter for input_data, inserted between input_data from adjacent QueryInfos. - bytes input_data_delimiter = 6; - - // Default output format. If not specified, 'TabSeparated' is used. - string output_format = 7; - - repeated ExternalTable external_tables = 8; - - string user_name = 9; - string password = 10; - string quota = 11; - - // Works exactly like sessions in the HTTP protocol. - string session_id = 12; - bool session_check = 13; - uint32 session_timeout = 14; - - // Set `cancel` to true to stop executing the query. - bool cancel = 15; - - // If true there will be at least one more QueryInfo in the input stream. - // `next_query_info` is allowed to be set only if a method with streaming input (i.e. ExecuteQueryWithStreamInput() or ExecuteQueryWithStreamIO()) is used. - bool next_query_info = 16; - - /// Controls how a ClickHouse server will compress query execution results before sending back to the client. - /// If not set the compression settings from the configuration file will be used. - Compression result_compression = 17; -} - -enum LogsLevel { - LOG_NONE = 0; - LOG_FATAL = 1; - LOG_CRITICAL = 2; - LOG_ERROR = 3; - LOG_WARNING = 4; - LOG_NOTICE = 5; - LOG_INFORMATION = 6; - LOG_DEBUG = 7; - LOG_TRACE = 8; -} - -message LogEntry { - uint32 time = 1; - uint32 time_microseconds = 2; - uint64 thread_id = 3; - string query_id = 4; - LogsLevel level = 5; - string source = 6; - string text = 7; -} - -message Progress { - uint64 read_rows = 1; - uint64 read_bytes = 2; - uint64 total_rows_to_read = 3; - uint64 written_rows = 4; - uint64 written_bytes = 5; -} - -message Stats { - uint64 rows = 1; - uint64 blocks = 2; - uint64 allocated_bytes = 3; - bool applied_limit = 4; - uint64 rows_before_limit = 5; -} - -message Exception { - int32 code = 1; - string name = 2; - string display_text = 3; - string stack_trace = 4; -} - -// Result of execution of a query which is sent back by the ClickHouse server to the client. -message Result { - // Output of the query, represented in the `output_format` or in a format specified in `query`. - bytes output = 1; - bytes totals = 2; - bytes extremes = 3; - - repeated LogEntry logs = 4; - Progress progress = 5; - Stats stats = 6; - - // Set by the ClickHouse server if there was an exception thrown while executing. - Exception exception = 7; - - // Set by the ClickHouse server if executing was cancelled by the `cancel` field in QueryInfo. - bool cancelled = 8; -} - -service ClickHouse { - rpc ExecuteQuery(QueryInfo) returns (Result) {} - rpc ExecuteQueryWithStreamInput(stream QueryInfo) returns (Result) {} - rpc ExecuteQueryWithStreamOutput(QueryInfo) returns (stream Result) {} - rpc ExecuteQueryWithStreamIO(stream QueryInfo) returns (stream Result) {} -} diff --git a/tests/integration/test_server_reload/protos/clickhouse_grpc.proto b/tests/integration/test_server_reload/protos/clickhouse_grpc.proto new file mode 120000 index 00000000000..25d15f11e3b --- /dev/null +++ b/tests/integration/test_server_reload/protos/clickhouse_grpc.proto @@ -0,0 +1 @@ +../../../../src/Server/grpc_protos/clickhouse_grpc.proto \ No newline at end of file From bfa96463ca17cec6f4c03bc791a553f6baa03bc3 Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 10 Feb 2022 09:23:27 +0300 Subject: [PATCH 198/215] Fix possible error 'file_size: Operation not supported' --- src/Storages/StorageFile.cpp | 3 ++- tests/performance/select_format.xml | 4 ++-- tests/performance/writing_valid_utf8.xml | 2 +- tests/queries/0_stateless/02210_append_to_dev_dull.reference | 0 tests/queries/0_stateless/02210_append_to_dev_dull.sql | 4 ++++ 5 files changed, 9 insertions(+), 4 deletions(-) create mode 100644 tests/queries/0_stateless/02210_append_to_dev_dull.reference create mode 100644 tests/queries/0_stateless/02210_append_to_dev_dull.sql diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index edd5e0447d5..ddc7717567a 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -879,9 +879,10 @@ SinkToStoragePtr StorageFile::write( path = paths.back(); fs::create_directories(fs::path(path).parent_path()); + std::error_code error_code; if (!context->getSettingsRef().engine_file_truncate_on_insert && !is_path_with_globs && !FormatFactory::instance().checkIfFormatSupportAppend(format_name, context, format_settings) && fs::exists(paths.back()) - && fs::file_size(paths.back()) != 0) + && fs::file_size(paths.back(), error_code) != 0 && !error_code) { if (context->getSettingsRef().engine_file_allow_create_multiple_files) { diff --git a/tests/performance/select_format.xml b/tests/performance/select_format.xml index f0114d7a517..982039102d0 100644 --- a/tests/performance/select_format.xml +++ b/tests/performance/select_format.xml @@ -49,8 +49,8 @@ CREATE TABLE IF NOT EXISTS table_{format_slow} ENGINE = File({format_slow}, '/dev/null') AS test.hits CREATE TABLE IF NOT EXISTS table_{format_fast} ENGINE = File({format_fast}, '/dev/null') AS test.hits - INSERT INTO table_{format_slow} SELECT * FROM test.hits LIMIT 10000 SETTINGS engine_file_truncate_on_insert = 1 - INSERT INTO table_{format_fast} SELECT * FROM test.hits LIMIT 100000 SETTINGS engine_file_truncate_on_insert = 1 + INSERT INTO table_{format_slow} SELECT * FROM test.hits LIMIT 10000 + INSERT INTO table_{format_fast} SELECT * FROM test.hits LIMIT 100000 DROP TABLE IF EXISTS table_{format_slow} DROP TABLE IF EXISTS table_{format_fast} diff --git a/tests/performance/writing_valid_utf8.xml b/tests/performance/writing_valid_utf8.xml index 4f6637f7566..d5343144db0 100644 --- a/tests/performance/writing_valid_utf8.xml +++ b/tests/performance/writing_valid_utf8.xml @@ -20,7 +20,7 @@ CREATE TABLE IF NOT EXISTS table_{format} ENGINE = File({format}, '/dev/null') AS SELECT SearchPhrase, ClientIP6, URL, Referer, URLDomain FROM test.hits limit 0 - INSERT INTO table_{format} SELECT SearchPhrase, ClientIP6, URL, Referer, URLDomain FROM test.hits LIMIT 100000 SETTINGS engine_file_truncate_on_insert = 1 + INSERT INTO table_{format} SELECT SearchPhrase, ClientIP6, URL, Referer, URLDomain FROM test.hits LIMIT 100000 DROP TABLE IF EXISTS table_{format} diff --git a/tests/queries/0_stateless/02210_append_to_dev_dull.reference b/tests/queries/0_stateless/02210_append_to_dev_dull.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02210_append_to_dev_dull.sql b/tests/queries/0_stateless/02210_append_to_dev_dull.sql new file mode 100644 index 00000000000..4ae77132972 --- /dev/null +++ b/tests/queries/0_stateless/02210_append_to_dev_dull.sql @@ -0,0 +1,4 @@ +insert into table function file('/dev/null', 'Parquet', 'number UInt64') select * from numbers(10); +insert into table function file('/dev/null', 'ORC', 'number UInt64') select * from numbers(10); +insert into table function file('/dev/null', 'JSON', 'number UInt64') select * from numbers(10); + From 6a9f570efd91921ebb3b2aa65c4c9f645d07e5e6 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 10 Feb 2022 09:26:37 +0300 Subject: [PATCH 199/215] Update roadmap.md --- docs/en/whats-new/roadmap.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/whats-new/roadmap.md b/docs/en/whats-new/roadmap.md index 8872c42818f..54f8f9d68a3 100644 --- a/docs/en/whats-new/roadmap.md +++ b/docs/en/whats-new/roadmap.md @@ -5,6 +5,6 @@ toc_title: Roadmap # Roadmap {#roadmap} -The roadmap for the year 2021 is published for open discussion [here](https://github.com/ClickHouse/ClickHouse/issues/17623). +The roadmap for the year 2022 is published for open discussion [here](https://github.com/ClickHouse/ClickHouse/issues/32513). {## [Original article](https://clickhouse.com/docs/en/roadmap/) ##} From c9d27fcea3ab58ec74c2c8d02d7ef8c8e7f0d1ce Mon Sep 17 00:00:00 2001 From: cnmade Date: Thu, 10 Feb 2022 14:45:34 +0800 Subject: [PATCH 200/215] sync translate to zh: Update roadmap.md --- docs/zh/whats-new/roadmap.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/zh/whats-new/roadmap.md b/docs/zh/whats-new/roadmap.md index 3cb9dd6fa2f..8e8873c8ee4 100644 --- a/docs/zh/whats-new/roadmap.md +++ b/docs/zh/whats-new/roadmap.md @@ -5,6 +5,6 @@ toc_title: Roadmap # Roadmap {#roadmap} -`2021年Roadmap`已公布供公开讨论查看[这里](https://github.com/ClickHouse/ClickHouse/issues/17623). +`2022年Roadmap`已公布供公开讨论查看 [这里](https://github.com/ClickHouse/ClickHouse/issues/32513). {## [源文章](https://clickhouse.com/docs/en/roadmap/) ##} From 72ffcbbb0563c0819362bbb5b9b39f8be1c7ab4c Mon Sep 17 00:00:00 2001 From: zhanglistar Date: Thu, 10 Feb 2022 15:43:39 +0800 Subject: [PATCH 201/215] keeper SnapshotableHashTable clean opt. --- src/Coordination/SnapshotableHashTable.h | 28 +++++++++++------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/src/Coordination/SnapshotableHashTable.h b/src/Coordination/SnapshotableHashTable.h index b1d72578530..a837e51a9be 100644 --- a/src/Coordination/SnapshotableHashTable.h +++ b/src/Coordination/SnapshotableHashTable.h @@ -36,6 +36,7 @@ private: /// Allows to avoid additional copies in updateValue function size_t snapshot_up_to_size = 0; ArenaWithFreeLists arena; + std::vector snapshot_invalid_iters{100000}; uint64_t approximate_data_size{0}; @@ -175,6 +176,7 @@ public: list_itr->active_in_map = false; auto new_list_itr = list.insert(list.end(), elem); it->getMapped() = new_list_itr; + snapshot_invalid_iters.push_back(list_itr); } else { @@ -197,6 +199,7 @@ public: list_itr->active_in_map = false; list_itr->free_key = true; map.erase(it->getKey()); + snapshot_invalid_iters.push_back(list_itr); } else { @@ -239,6 +242,7 @@ public: auto itr = list.insert(list.end(), elem_copy); it->getMapped() = itr; ret = itr; + snapshot_invalid_iters.push_back(list_itr); } else { @@ -274,23 +278,15 @@ public: void clearOutdatedNodes() { - auto start = list.begin(); - auto end = list.end(); - for (auto itr = start; itr != end;) + for (auto & itr: snapshot_invalid_iters) { - if (!itr->active_in_map) - { - updateDataSize(CLEAR_OUTDATED_NODES, itr->key.size, itr->value.sizeInBytes(), 0); - if (itr->free_key) - arena.free(const_cast(itr->key.data), itr->key.size); - itr = list.erase(itr); - } - else - { - assert(!itr->free_key); - itr++; - } + assert(!itr->active_in_map); + updateDataSize(CLEAR_OUTDATED_NODES, itr->key.size, itr->value.sizeInBytes(), 0); + if (itr->free_key) + arena.free(const_cast(itr->key.data), itr->key.size); + list.erase(itr); } + snapshot_invalid_iters.clear(); } void clear() @@ -300,12 +296,14 @@ public: arena.free(const_cast(itr->key.data), itr->key.size); list.clear(); updateDataSize(CLEAR, 0, 0, 0); + snapshot_invalid_iters.clear(); } void enableSnapshotMode(size_t up_to_size) { snapshot_mode = true; snapshot_up_to_size = up_to_size; + snapshot_invalid_iters.clear(); } void disableSnapshotMode() From 298838f891ce099cef498f22bf91ba44dc56a173 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 10 Feb 2022 00:13:06 +0300 Subject: [PATCH 202/215] avoid unnecessary copying of Settings --- src/Client/ClientBase.cpp | 2 +- src/Databases/MySQL/MaterializedMySQLSyncThread.cpp | 4 ++-- src/Dictionaries/ExecutablePoolDictionarySource.cpp | 2 +- src/Interpreters/ClusterDiscovery.cpp | 2 +- src/Processors/QueryPlan/ReadFromMergeTree.cpp | 4 ++-- src/Processors/Transforms/getSourceFromASTInsertQuery.cpp | 2 +- src/Server/HTTP/HTTPServerConnection.cpp | 2 +- src/Storages/HDFS/StorageHDFSCluster.cpp | 2 +- src/Storages/StorageS3Cluster.cpp | 2 +- src/Storages/System/StorageSystemPartsBase.cpp | 2 +- src/TableFunctions/TableFunctionRemote.cpp | 4 ++-- 11 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 89ff019ba6e..824a96fc1ae 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -1317,7 +1317,7 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin if (insert && insert->select) insert->tryFindInputFunction(input_function); - bool is_async_insert = global_context->getSettings().async_insert && insert && insert->hasInlinedData(); + bool is_async_insert = global_context->getSettingsRef().async_insert && insert && insert->hasInlinedData(); /// INSERT query for which data transfer is needed (not an INSERT SELECT or input()) is processed separately. if (insert && (!insert->select || input_function) && !insert->watch && !is_async_insert) diff --git a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp index 9dbe611537b..8033d65c549 100644 --- a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp +++ b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp @@ -316,7 +316,7 @@ getTableOutput(const String & database_name, const String & table_name, ContextM return std::move(res.pipeline); } -static inline String reWriteMysqlQueryColumn(mysqlxx::Pool::Entry & connection, const String & database_name, const String & table_name, const Settings & global_settings) +static inline String rewriteMysqlQueryColumn(mysqlxx::Pool::Entry & connection, const String & database_name, const String & table_name, const Settings & global_settings) { Block tables_columns_sample_block { @@ -376,7 +376,7 @@ static inline void dumpDataForTables( auto pipeline = getTableOutput(database_name, table_name, query_context); StreamSettings mysql_input_stream_settings(context->getSettingsRef()); - String mysql_select_all_query = "SELECT " + reWriteMysqlQueryColumn(connection, mysql_database_name, table_name, context->getSettings()) + " FROM " + String mysql_select_all_query = "SELECT " + rewriteMysqlQueryColumn(connection, mysql_database_name, table_name, context->getSettingsRef()) + " FROM " + backQuoteIfNeed(mysql_database_name) + "." + backQuoteIfNeed(table_name); LOG_INFO(&Poco::Logger::get("MaterializedMySQLSyncThread(" + database_name + ")"), "mysql_select_all_query is {}", mysql_select_all_query); auto input = std::make_unique(connection, mysql_select_all_query, pipeline.getHeader(), mysql_input_stream_settings); diff --git a/src/Dictionaries/ExecutablePoolDictionarySource.cpp b/src/Dictionaries/ExecutablePoolDictionarySource.cpp index 48ddeed7fa6..62598c966e5 100644 --- a/src/Dictionaries/ExecutablePoolDictionarySource.cpp +++ b/src/Dictionaries/ExecutablePoolDictionarySource.cpp @@ -197,7 +197,7 @@ void registerDictionarySourceExecutablePool(DictionarySourceFactory & factory) size_t max_command_execution_time = config.getUInt64(settings_config_prefix + ".max_command_execution_time", 10); - size_t max_execution_time_seconds = static_cast(context->getSettings().max_execution_time.totalSeconds()); + size_t max_execution_time_seconds = static_cast(context->getSettingsRef().max_execution_time.totalSeconds()); if (max_execution_time_seconds != 0 && max_command_execution_time > max_execution_time_seconds) max_command_execution_time = max_execution_time_seconds; diff --git a/src/Interpreters/ClusterDiscovery.cpp b/src/Interpreters/ClusterDiscovery.cpp index 8b68ba02504..df6e8ea98f5 100644 --- a/src/Interpreters/ClusterDiscovery.cpp +++ b/src/Interpreters/ClusterDiscovery.cpp @@ -229,7 +229,7 @@ ClusterPtr ClusterDiscovery::makeCluster(const ClusterInfo & cluster_info) bool secure = cluster_info.current_node.secure; auto cluster = std::make_shared( - context->getSettings(), + context->getSettingsRef(), shards, /* username= */ context->getUserName(), /* password= */ "", diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index b7e6f29188d..9a9a71f9688 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -921,8 +921,8 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToRead( auto reader_settings = getMergeTreeReaderSettings(context); - bool use_skip_indexes = context->getSettings().use_skip_indexes; - if (select.final() && !context->getSettings().use_skip_indexes_if_final) + bool use_skip_indexes = settings.use_skip_indexes; + if (select.final() && !settings.use_skip_indexes_if_final) use_skip_indexes = false; result.parts_with_ranges = MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipIndexes( diff --git a/src/Processors/Transforms/getSourceFromASTInsertQuery.cpp b/src/Processors/Transforms/getSourceFromASTInsertQuery.cpp index 64c8a01bb9c..4ee3f2d4b82 100644 --- a/src/Processors/Transforms/getSourceFromASTInsertQuery.cpp +++ b/src/Processors/Transforms/getSourceFromASTInsertQuery.cpp @@ -59,7 +59,7 @@ InputFormatPtr getInputFormatFromASTInsertQuery( : std::make_unique(); /// Create a source from input buffer using format from query - auto source = context->getInputFormat(ast_insert_query->format, *input_buffer, header, context->getSettings().max_insert_block_size); + auto source = context->getInputFormat(ast_insert_query->format, *input_buffer, header, context->getSettingsRef().max_insert_block_size); source->addBuffer(std::move(input_buffer)); return source; } diff --git a/src/Server/HTTP/HTTPServerConnection.cpp b/src/Server/HTTP/HTTPServerConnection.cpp index 7020b8e9a23..e365c9f31d0 100644 --- a/src/Server/HTTP/HTTPServerConnection.cpp +++ b/src/Server/HTTP/HTTPServerConnection.cpp @@ -36,7 +36,7 @@ void HTTPServerConnection::run() if (request.isSecure()) { - size_t hsts_max_age = context->getSettings().hsts_max_age.value; + size_t hsts_max_age = context->getSettingsRef().hsts_max_age.value; if (hsts_max_age > 0) response.add("Strict-Transport-Security", "max-age=" + std::to_string(hsts_max_age)); diff --git a/src/Storages/HDFS/StorageHDFSCluster.cpp b/src/Storages/HDFS/StorageHDFSCluster.cpp index dfe1ea6ffd3..7b370b7e63f 100644 --- a/src/Storages/HDFS/StorageHDFSCluster.cpp +++ b/src/Storages/HDFS/StorageHDFSCluster.cpp @@ -69,7 +69,7 @@ Pipe StorageHDFSCluster::read( size_t /*max_block_size*/, unsigned /*num_streams*/) { - auto cluster = context->getCluster(cluster_name)->getClusterWithReplicasAsShards(context->getSettings()); + auto cluster = context->getCluster(cluster_name)->getClusterWithReplicasAsShards(context->getSettingsRef()); auto iterator = std::make_shared(context, uri); auto callback = std::make_shared([iterator]() mutable -> String diff --git a/src/Storages/StorageS3Cluster.cpp b/src/Storages/StorageS3Cluster.cpp index 762eb079c1c..57220c68347 100644 --- a/src/Storages/StorageS3Cluster.cpp +++ b/src/Storages/StorageS3Cluster.cpp @@ -82,7 +82,7 @@ Pipe StorageS3Cluster::read( { StorageS3::updateClientAndAuthSettings(context, client_auth); - auto cluster = context->getCluster(cluster_name)->getClusterWithReplicasAsShards(context->getSettings()); + auto cluster = context->getCluster(cluster_name)->getClusterWithReplicasAsShards(context->getSettingsRef()); StorageS3::updateClientAndAuthSettings(context, client_auth); auto iterator = std::make_shared(*client_auth.client, client_auth.uri); diff --git a/src/Storages/System/StorageSystemPartsBase.cpp b/src/Storages/System/StorageSystemPartsBase.cpp index 6c8159ca720..f4dd9cbd45d 100644 --- a/src/Storages/System/StorageSystemPartsBase.cpp +++ b/src/Storages/System/StorageSystemPartsBase.cpp @@ -66,7 +66,7 @@ StoragesInfo::getParts(MergeTreeData::DataPartStateVector & state, bool has_stat } StoragesInfoStream::StoragesInfoStream(const SelectQueryInfo & query_info, ContextPtr context) - : query_id(context->getCurrentQueryId()), settings(context->getSettings()) + : query_id(context->getCurrentQueryId()), settings(context->getSettingsRef()) { /// Will apply WHERE to subset of columns and then add more columns. /// This is kind of complicated, but we use WHERE to do less work. diff --git a/src/TableFunctions/TableFunctionRemote.cpp b/src/TableFunctions/TableFunctionRemote.cpp index 85857011616..90fbb079bb6 100644 --- a/src/TableFunctions/TableFunctionRemote.cpp +++ b/src/TableFunctions/TableFunctionRemote.cpp @@ -205,7 +205,7 @@ void TableFunctionRemote::parseArguments(const ASTPtr & ast_function, ContextPtr if (name != "clusterAllReplicas") cluster = context->getCluster(cluster_name_expanded); else - cluster = context->getCluster(cluster_name_expanded)->getClusterWithReplicasAsShards(context->getSettings()); + cluster = context->getCluster(cluster_name_expanded)->getClusterWithReplicasAsShards(context->getSettingsRef()); } else { @@ -241,7 +241,7 @@ void TableFunctionRemote::parseArguments(const ASTPtr & ast_function, ContextPtr bool treat_local_as_remote = false; bool treat_local_port_as_remote = context->getApplicationType() == Context::ApplicationType::LOCAL; cluster = std::make_shared( - context->getSettings(), + context->getSettingsRef(), names, configuration.username, configuration.password, From 8385dc27c2f43414a283a4faf07a08711a90274a Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Thu, 10 Feb 2022 12:19:01 +0300 Subject: [PATCH 203/215] Fix test --- tests/queries/0_stateless/02210_append_to_dev_dull.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/02210_append_to_dev_dull.sql b/tests/queries/0_stateless/02210_append_to_dev_dull.sql index 4ae77132972..a8aaa2f05ab 100644 --- a/tests/queries/0_stateless/02210_append_to_dev_dull.sql +++ b/tests/queries/0_stateless/02210_append_to_dev_dull.sql @@ -1,3 +1,5 @@ +-- Tags: no-fasttest + insert into table function file('/dev/null', 'Parquet', 'number UInt64') select * from numbers(10); insert into table function file('/dev/null', 'ORC', 'number UInt64') select * from numbers(10); insert into table function file('/dev/null', 'JSON', 'number UInt64') select * from numbers(10); From 87350f3552538bf797e42898fbf262cd1326f926 Mon Sep 17 00:00:00 2001 From: zhanglistar Date: Thu, 10 Feb 2022 17:33:12 +0800 Subject: [PATCH 204/215] Keeper SnapshotableHashTable fix clean bug. --- src/Coordination/SnapshotableHashTable.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/Coordination/SnapshotableHashTable.h b/src/Coordination/SnapshotableHashTable.h index a837e51a9be..304183e02bb 100644 --- a/src/Coordination/SnapshotableHashTable.h +++ b/src/Coordination/SnapshotableHashTable.h @@ -296,19 +296,16 @@ public: arena.free(const_cast(itr->key.data), itr->key.size); list.clear(); updateDataSize(CLEAR, 0, 0, 0); - snapshot_invalid_iters.clear(); } void enableSnapshotMode(size_t up_to_size) { snapshot_mode = true; snapshot_up_to_size = up_to_size; - snapshot_invalid_iters.clear(); } void disableSnapshotMode() { - snapshot_mode = false; snapshot_up_to_size = 0; } From 4d73fb7fb7a05d2677516380c43e6f146b4de05f Mon Sep 17 00:00:00 2001 From: zhanglistar Date: Thu, 10 Feb 2022 17:56:41 +0800 Subject: [PATCH 205/215] Fix ut in SnapshotableHashTable --- src/Coordination/SnapshotableHashTable.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/Coordination/SnapshotableHashTable.h b/src/Coordination/SnapshotableHashTable.h index 304183e02bb..002f1e4e365 100644 --- a/src/Coordination/SnapshotableHashTable.h +++ b/src/Coordination/SnapshotableHashTable.h @@ -36,7 +36,8 @@ private: /// Allows to avoid additional copies in updateValue function size_t snapshot_up_to_size = 0; ArenaWithFreeLists arena; - std::vector snapshot_invalid_iters{100000}; + /// Collect invalid iterators to avoid traversing the whole list + std::vector snapshot_invalid_iters; uint64_t approximate_data_size{0}; @@ -197,9 +198,9 @@ public: if (snapshot_mode) { list_itr->active_in_map = false; + snapshot_invalid_iters.push_back(list_itr); list_itr->free_key = true; map.erase(it->getKey()); - snapshot_invalid_iters.push_back(list_itr); } else { @@ -238,11 +239,11 @@ public: { auto elem_copy = *(list_itr); list_itr->active_in_map = false; + snapshot_invalid_iters.push_back(list_itr); updater(elem_copy.value); auto itr = list.insert(list.end(), elem_copy); it->getMapped() = itr; ret = itr; - snapshot_invalid_iters.push_back(list_itr); } else { From 3e21ebeb02ee8bb1e2815ef01fd47f819a2358b6 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Tue, 8 Feb 2022 09:49:15 +0000 Subject: [PATCH 206/215] For SQLUserDefinedFunctions change access type from DATABASE to GLOBAL --- src/Access/Common/AccessType.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Access/Common/AccessType.h b/src/Access/Common/AccessType.h index 55b8359d385..16d3de40ec3 100644 --- a/src/Access/Common/AccessType.h +++ b/src/Access/Common/AccessType.h @@ -86,7 +86,7 @@ enum class AccessType M(CREATE_DICTIONARY, "", DICTIONARY, CREATE) /* allows to execute {CREATE|ATTACH} DICTIONARY */\ M(CREATE_TEMPORARY_TABLE, "", GLOBAL, CREATE) /* allows to create and manipulate temporary tables; implicitly enabled by the grant CREATE_TABLE on any table */ \ - M(CREATE_FUNCTION, "", DATABASE, CREATE) /* allows to execute CREATE FUNCTION */ \ + M(CREATE_FUNCTION, "", GLOBAL, CREATE) /* allows to execute CREATE FUNCTION */ \ M(CREATE, "", GROUP, ALL) /* allows to execute {CREATE|ATTACH} */ \ \ M(DROP_DATABASE, "", DATABASE, DROP) /* allows to execute {DROP|DETACH} DATABASE */\ @@ -94,7 +94,7 @@ enum class AccessType M(DROP_VIEW, "", VIEW, DROP) /* allows to execute {DROP|DETACH} TABLE for views; implicitly enabled by the grant DROP_TABLE */\ M(DROP_DICTIONARY, "", DICTIONARY, DROP) /* allows to execute {DROP|DETACH} DICTIONARY */\ - M(DROP_FUNCTION, "", DATABASE, DROP) /* allows to execute DROP FUNCTION */\ + M(DROP_FUNCTION, "", GLOBAL, DROP) /* allows to execute DROP FUNCTION */\ M(DROP, "", GROUP, ALL) /* allows to execute {DROP|DETACH} */\ \ M(TRUNCATE, "TRUNCATE TABLE", TABLE, ALL) \ From f7b70877c8a07a53774019ec1cb0f033b071ec75 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 9 Feb 2022 11:57:11 +0000 Subject: [PATCH 207/215] Fixed tests --- tests/queries/0_stateless/01271_show_privileges.reference | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01271_show_privileges.reference b/tests/queries/0_stateless/01271_show_privileges.reference index c18992583cd..b2b02b24cb0 100644 --- a/tests/queries/0_stateless/01271_show_privileges.reference +++ b/tests/queries/0_stateless/01271_show_privileges.reference @@ -49,13 +49,13 @@ CREATE TABLE [] TABLE CREATE CREATE VIEW [] VIEW CREATE CREATE DICTIONARY [] DICTIONARY CREATE CREATE TEMPORARY TABLE [] GLOBAL CREATE -CREATE FUNCTION [] DATABASE CREATE +CREATE FUNCTION [] GLOBAL CREATE CREATE [] \N ALL DROP DATABASE [] DATABASE DROP DROP TABLE [] TABLE DROP DROP VIEW [] VIEW DROP DROP DICTIONARY [] DICTIONARY DROP -DROP FUNCTION [] DATABASE DROP +DROP FUNCTION [] GLOBAL DROP DROP [] \N ALL TRUNCATE ['TRUNCATE TABLE'] TABLE ALL OPTIMIZE ['OPTIMIZE TABLE'] TABLE ALL From d40552359868d09aec65393528dc30ed5f5df139 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Thu, 10 Feb 2022 13:27:27 +0000 Subject: [PATCH 208/215] Fix ProfileEvents sending in INSERT SELECT --- src/Server/TCPHandler.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 668017f8ef8..99523ff09e3 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -359,6 +359,7 @@ void TCPHandler::runImpl() return true; sendProgress(); + sendProfileEvents(); sendLogs(); return false; From 80e110081303d9917e738f2c69eeb0b1bb451f63 Mon Sep 17 00:00:00 2001 From: Denny Crane Date: Thu, 10 Feb 2022 10:45:27 -0400 Subject: [PATCH 209/215] Virtual Columns MergeTree engine --- .../engines/table-engines/mergetree-family/mergetree.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 6769f48a466..92865c94475 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -886,3 +886,12 @@ S3 disk can be configured as `main` or `cold` storage: ``` In case of `cold` option a data can be moved to S3 if local disk free size will be smaller than `move_factor * disk_size` or by TTL move rule. + +## Virtual Columns {#virtual-columns} + +- `_part` — Name of a part. +- `_part_index` — Sequential index of the part in the query result. +- `_partition_id` — Name of a partition. +- `_part_uuid` — Unique part identifier (if enabled MergeTree setting `assign_part_uuids`). +- `_partition_value` — Values (a tuple) of a `partition by` expression. +- `_sample_factor` — Sample factor (from the query). From 47412c9619aca0833fadf15a85451a30f05e36b0 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Thu, 10 Feb 2022 19:31:02 +0000 Subject: [PATCH 210/215] Fixed unit tests --- src/Access/ContextAccess.cpp | 3 ++- src/Access/tests/gtest_access_rights_ops.cpp | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Access/ContextAccess.cpp b/src/Access/ContextAccess.cpp index 400ee55a35d..744c3571175 100644 --- a/src/Access/ContextAccess.cpp +++ b/src/Access/ContextAccess.cpp @@ -425,6 +425,7 @@ bool ContextAccess::checkAccessImplHelper(const AccessFlags & flags, const Args | AccessType::TRUNCATE; const AccessFlags dictionary_ddl = AccessType::CREATE_DICTIONARY | AccessType::DROP_DICTIONARY; + const AccessFlags function_ddl = AccessType::CREATE_FUNCTION | AccessType::DROP_FUNCTION; const AccessFlags table_and_dictionary_ddl = table_ddl | dictionary_ddl; const AccessFlags write_table_access = AccessType::INSERT | AccessType::OPTIMIZE; const AccessFlags write_dcl_access = AccessType::ACCESS_MANAGEMENT - AccessType::SHOW_ACCESS; @@ -432,7 +433,7 @@ bool ContextAccess::checkAccessImplHelper(const AccessFlags & flags, const Args const AccessFlags not_readonly_flags = write_table_access | table_and_dictionary_ddl | write_dcl_access | AccessType::SYSTEM | AccessType::KILL_QUERY; const AccessFlags not_readonly_1_flags = AccessType::CREATE_TEMPORARY_TABLE; - const AccessFlags ddl_flags = table_ddl | dictionary_ddl; + const AccessFlags ddl_flags = table_ddl | dictionary_ddl | function_ddl; const AccessFlags introspection_flags = AccessType::INTROSPECTION; }; static const PrecalculatedFlags precalc; diff --git a/src/Access/tests/gtest_access_rights_ops.cpp b/src/Access/tests/gtest_access_rights_ops.cpp index 2881825dd17..2b4fbf411fe 100644 --- a/src/Access/tests/gtest_access_rights_ops.cpp +++ b/src/Access/tests/gtest_access_rights_ops.cpp @@ -45,7 +45,7 @@ TEST(AccessRights, Union) lhs.grant(AccessType::INSERT); rhs.grant(AccessType::ALL, "db1"); lhs.makeUnion(rhs); - ASSERT_EQ(lhs.toString(), "GRANT INSERT ON *.*, GRANT SHOW, SELECT, ALTER, CREATE DATABASE, CREATE TABLE, CREATE VIEW, CREATE DICTIONARY, CREATE FUNCTION, DROP, TRUNCATE, OPTIMIZE, SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, SYSTEM MOVES, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, SYSTEM RESTART REPLICA, SYSTEM RESTORE REPLICA, SYSTEM FLUSH DISTRIBUTED, dictGet ON db1.*"); + ASSERT_EQ(lhs.toString(), "GRANT INSERT ON *.*, GRANT SHOW, SELECT, ALTER, CREATE DATABASE, CREATE TABLE, CREATE VIEW, CREATE DICTIONARY, DROP DATABASE, DROP TABLE, DROP VIEW, DROP DICTIONARY, TRUNCATE, OPTIMIZE, SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, SYSTEM MOVES, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, SYSTEM RESTART REPLICA, SYSTEM RESTORE REPLICA, SYSTEM FLUSH DISTRIBUTED, dictGet ON db1.*"); } From 691cb3352b607b5c028be2481d7f730d8d85fb2f Mon Sep 17 00:00:00 2001 From: Tom Risse Date: Thu, 10 Feb 2022 14:17:31 -0800 Subject: [PATCH 211/215] Add support agreement page and snippets. (#34512) --- website/support/agreement/index.html | 27 ++++ .../templates/support/agreement-content.html | 120 ++++++++++++++++++ website/templates/support/agreement-hero.html | 10 ++ 3 files changed, 157 insertions(+) create mode 100644 website/support/agreement/index.html create mode 100644 website/templates/support/agreement-content.html create mode 100644 website/templates/support/agreement-hero.html diff --git a/website/support/agreement/index.html b/website/support/agreement/index.html new file mode 100644 index 00000000000..59e5ca9ab75 --- /dev/null +++ b/website/support/agreement/index.html @@ -0,0 +1,27 @@ +{% set prefetch_items = [ + ('/docs/en/', 'document') +] %} + +{% extends "templates/base.html" %} + +{% block extra_meta %} + +{% include "templates/common_fonts.html" %} +{% endblock %} + +{% block nav %} + +{% include "templates/global/nav.html" %} + +{% endblock %} + +{% block content %} + +{% include "templates/support/agreement-hero.html" %} + +{% include "templates/support/agreement-content.html" %} + +{% include "templates/global/newsletter.html" %} +{% include "templates/global/github_stars.html" %} + +{% endblock %} diff --git a/website/templates/support/agreement-content.html b/website/templates/support/agreement-content.html new file mode 100644 index 00000000000..4ca64e69599 --- /dev/null +++ b/website/templates/support/agreement-content.html @@ -0,0 +1,120 @@ +

+
+ +

This ClickHouse Subscription Agreement, including all referenced URLs, which are incorporated herein by reference (collectively, this “Agreement”), is entered into as of the date on which an applicable Order Form is fully executed (“Effective Date”), by and between the ClickHouse entity ("ClickHouse") set forth on such Order Form, and the entity identified thereon as the “Customer” (“Customer”).

+ +

1. DEFINITIONS
Capitalized terms used herein have the meaning ascribed below, or where such terms are first used, as applicable.

+ +

1.1 "Affiliate" means, with respect to a party, any entity that controls, is controlled by, or which is under common control with, such party, where "control" means ownership of at least fifty percent (50%) of the outstanding voting shares of the entity, or the contractual right to establish policy for, and manage the operations of, the entity.

+ +

1.2 "Order Form" means an ordering document provided by ClickHouse pursuant to which Customer purchases Subscriptions under this Agreement.

+ +

1.3 "Qualifying PO" means a purchase order issued by customer for the purpose of purchasing a Subscription, which (i) references the number of an applicable Order Form provided to Customer by ClickHouse and (ii) clearly states the purchase order is subject to the terms and conditions of this Agreement.

+ +

1.4 "Software" means the ClickHouse software of the same name that is licensed for use under the Apache 2.0 license.

+ +

1.5 "Subscription" means Customer's right, for a fixed period of time, to receive Support Services, as set forth in the applicable Order Form.

+ +

1.6 "Subscription Term" means the period of time for which a Subscription is valid, as further described in Section 7.1 of this Agreement.

+ +

1.7 "Support Services" means maintenance and support services for the Software, as more fully described in the Support Services Policy.

+ +

1.8 "Support Services Policy" means ClickHouse's support services policy as further described at https://clickhouse.com/support/policy/. ClickHouse reserves the right to reasonably modify the Support Services Policy during a Subscription Term, provided however, ClickHouse shall not materially diminish the level of Support Services during a Subscription Term. The effective date of each version of the Support Services Policy will be stated thereon, and ClickHouse agrees to archive copies of each version, and make the same available to Customer upon written request (e-mail sufficient). The parties agree that the Support Services Policy is hereby incorporated into these terms and conditions by this reference.

+ +

2. AGREEMENT SCOPE AND PERFORMANCE OF SUPPORT SERVICES

+ +

2.1 Agreement Scope. This Agreement includes terms and conditions applicable to Subscriptions for Support Services purchased under each Order Form entered into by the parties under Section 2.2 below, which Support Services may be used by Customer solely for Internal use and in connection with the use case(s) set forth on the applicable Order Form.

+ +

2.2 Order for Support Services Subscriptions. Orders for Subscriptions may be placed by Customer through (1) the execution of Order Forms with ClickHouse or (2) issuance by Customer of a Qualifying PO, which will be deemed to constitute, for the purposes of this Agreement, the execution by Customer of the referenced Order Form.

+ +

2.3 Affiliates. The parties agree that their respective Affiliates may also conduct business under this Agreement by entering into Order Forms, which in some cases may be subject to such additional and/or alternative terms and conditions to those contained in this Agreement as may be mutually agreed in the Order Form or an attachment thereto, as applicable. Accordingly, where Affiliates of the parties conduct business hereunder, references to Customer herein shall include any applicable Customer Affiliate, and references to ClickHouse herein shall include any applicable ClickHouse Affiliate. The parties agree that where either of them or one of their Affiliates enters into an Order Form with an Affiliate of the other party, that such Affiliate shall be solely responsible for performing all of its obligations under this Agreement in connection with such Order Form.

+ +

2.4 Performance of Support Services. Subject to Customer’s payment of all fees (as set forth in an applicable Order Form), ClickHouse will provide Customer with Support Services for the Software during an applicable Subscription Term in accordance with this Agreement and the Support Services Policy. Customer will reasonably cooperate with ClickHouse in connection with the Support Services, including, without limitation, by providing ClickHouse reasonable remote access to its installations, server cloud (or hosting provider), Software and equipment in connection therewith. Further, Customer will designate appropriately skilled personnel to serve as ClickHouse’s central contacts in connection with the use, operation and support of the Software. Customer understands that ClickHouse’s performance of Support Services is dependent in part on Customer’s cooperation, actions, and performance. ClickHouse shall not be responsible for any delays or interruptions in its performance of Support Services, or any claims arising therefrom, due to Customer’s lack of cooperation or acts or omissions. ClickHouse may use its Affiliates or subcontractors to provide Support Services to Customer, provided that ClickHouse remains responsible to Customer for performance.

+ +

3. PAYMENT AND TAXES

+ +

3.1 Payment. ClickHouse will invoice Customer for the fees due under each Order Form or otherwise under this Agreement, and Customer will pay such fees within thirty (30) days after receipt of an applicable invoice. All invoices will be paid in the currency set forth on the applicable Order Form. Payments will be made without right of set-off or chargeback. Except as otherwise expressly provided in this Agreement, any and all payments made by Customer pursuant to this Agreement or any Order Form are non-refundable, and all commitments to make any payments hereunder or under any Order Form are non-cancellable.

+ +

3.2 Taxes. All fees stated on an Order Form are exclusive of any applicable sales, use, value added and excise taxes levied upon the delivery or use of the taxable components, if any, of any Subscription purchased by Customer under this Agreement (collectively, “Taxes”). Taxes do not include any taxes on the net income of ClickHouse or any of its Affiliates. Unless Customer provides ClickHouse a valid state sales/use/excise tax exemption certificate or Direct Pay Permit, and provided that ClickHouse separately states any such taxes in the applicable invoice, Customer will pay and be solely responsible for all Taxes. If Customer is required by any foreign governmental authority to deduct or withhold any portion of the amount invoiced for the delivery or use of Support Services under this Agreement, Customer shall increase the sum paid to ClickHouse by an amount necessary for the total payment to ClickHouse equal to the amount originally invoiced.

+ +

4. CONFIDENTIAL INFORMATION

+ +

4.1 Confidential Information. Both parties acknowledge that, in the course of performing this Agreement, they may obtain information relating to products (such as goods, services, and software) of the other party, or relating to the parties themselves, which is of a confidential and proprietary nature ("Confidential Information"). Confidential Information includes materials and all communications concerning ClickHouse's or Customer's business and marketing strategies, including but not limited to employee and customer lists, customer profiles, project plans, design documents, product strategies and pricing data, research, advertising plans, leads and sources of supply, development activities, design and coding, interfaces with the Products, anything provided by either party to the other in connection with the Products and/or Support Services provided under this Agreement, including, without limitation, computer programs, technical drawings, algorithms, know-how, formulas, processes, ideas, inventions (whether patentable or not), schematics and other technical plans and other information of the parties which by its nature can be reasonably expected to be proprietary and confidential, whether it is presented in oral, printed, written, graphic or photographic or other tangible form (including information received, stored or transmitted electronically) even though specific designation as Confidential Information has not been made. Confidential Information also includes any notes, summaries, analyses of the foregoing that are prepared by the receiving party.

+ +

4.2 Non-use and Non-disclosure. The parties shall at all times, both during the Term and thereafter keep in trust and confidence all Confidential Information of the other party using commercially reasonable care (but in no event less than the same degree of care that the receiving party uses to protect its own Confidential Information) and shall not use such Confidential Information other than as necessary to carry out its duties under this Agreement, nor shall either party disclose any such Confidential Information to third parties other than to Affiliates or as necessary to carry out its duties under this Agreement without the other party's prior written consent, provided that each party shall be allowed to disclose Confidential Information of the other party to the extent that such disclosure is approved in writing by such other party, or necessary to enforce its rights under this Agreement.

+ +

4.3 Non-Applicability. The obligations of confidentiality shall not apply to information which (i) has entered the public domain or is otherwise publicly available, except where such entry or availability is the result of a party's breach of this Agreement; (ii) prior to disclosure hereunder was already in the receiving party's possession without restriction as evidenced by appropriate documentation; (iii) subsequent to disclosure hereunder is obtained by the receiving party on a non-confidential basis from a third party who has the right to disclose such information; or (iv) was developed by the receiving party without any use of any of the Confidential Information as evidenced by appropriate documentation.

+ +

4.4 Terms of this Agreement. Except as required by law or governmental regulation, neither party shall disclose, advertise, or publish the terms and conditions of this Agreement without the prior written consent of the other party, except that either party may disclose the terms of this Agreement to potential acquirers, referral partners involved in an applicable transaction, accountants, attorneys and Affiliates pursuant to the terms of a non-disclosure or confidentiality agreement. If Customer is using a third party provider to host a Product, then such provider may also receive, subject to a confidentiality obligation, information related to the terms of this Agreement or Customer’s usage of the applicable Product.

+ +

4.5 Disclosure Required by Law. Notwithstanding anything to the contrary herein, each party may disclose the other party's Confidential Information in order to comply with applicable law and/or an order from a court or other governmental body of competent jurisdiction, and, in connection with compliance with such an order only, if such party: (i) unless prohibited by law, gives the other party prior written notice to such disclosure if the time between that order and such disclosure reasonably permits or, if time does not permit, gives the other party written notice of such disclosure promptly after complying with that order and (ii) fully cooperates with the other party, at the other party's cost and expense, in seeking a protective order, or confidential treatment, or taking other measures to oppose or limit such disclosure. Each party must not release any more of the other party's Confidential Information than is, in the opinion of its counsel, reasonably necessary to comply with an applicable order.

+ +

5. WARRANTIES AND DISCLAIMER OF WARRANTIES

+ +

5.1 Limited Support Services Performance Warranty. ClickHouse warrants that it will perform the Support Services in a professional, workmanlike manner, consistent with generally accepted industry practice, and in accordance with the Support Services Policy. In the event of a breach of the foregoing warranty, ClickHouse’s sole obligation, and Customer’s exclusive remedy, shall be for ClickHouse to re-perform the applicable Support Services.

+ +

5.2 Warranty Disclaimer. EXCEPT AS SET FORTH IN SECTION 5.1 ABOVE, THE SUPPORT SERVICES ARE PROVIDED “AS IS” WITHOUT WARRANTY OF ANY KIND AND CLICKHOUSE MAKES NO ADDITIONAL WARRANTIES, WHETHER EXPRESSED, IMPLIED OR STATUTORY, REGARDING OR RELATING TO THE SUPPORT SERVICES OR ANY MATERIALS FURNISHED OR PROVIDED TO CUSTOMER UNDER THIS AGREEMENT. TO THE MAXIMUM EXTENT PERMITTED UNDER APPLICABLE LAW, CLICKHOUSE SPECIFICALLY DISCLAIMS ALL IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT WITH RESPECT TO THE SUPPORT SERVICES AND ANY MATERIALS FURNISHED OR PROVIDED TO CUSTOMER UNDER THIS AGREEMENT. CUSTOMER UNDERSTANDS AND AGREES THAT THE SUPPORT SERVICES AND ANY MATERIALS FURNISHED OR PROVIDED TO CUSTOMER UNDER THIS AGREEMENT ARE NOT DESIGNED OR INTENDED FOR USE IN THE OPERATION OF NUCLEAR FACILITIES, AIRCRAFT, WEAPONS SYSTEMS, OR LIFE SUPPORT SYSTEMS.

+ +

6. LIMITATION OF LIABILITY

+ +

6.1 Excluded Damages. IN NO EVENT SHALL CUSTOMER OR CLICKHOUSE, OR THEIR RESPECTIVE AFFILIATES, BE LIABLE FOR ANY LOSS OF PROFITS, LOSS OF USE, BUSINESS INTERRUPTION, LOSS OF DATA, COST OF SUBSTITUTE GOODS OR SERVICES, OR FOR ANY PUNITIVE, INDIRECT, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND IN CONNECTION WITH OR ARISING OUT OF THE PERFORMANCE OF OR FAILURE TO PERFORM THIS AGREEMENT, WHETHER ALLEGED AS A BREACH OF CONTRACT OR TORTIOUS CONDUCT, INCLUDING NEGLIGENCE, EVEN IF A PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.

+ +

6.2 Damages Cap. EXCEPT WITH RESPECT TO (I) A PARTY’S BREACH OF ITS OBLIGATIONS UNDER SECTION 4, (II) AMOUNTS PAYABLE BY CUSTOMER UNDER SECTION 3 OF THIS AGREEMENT AND EACH ORDER FORM, AND (III) CUSTOMER'S VIOLATIONS OF THE USE RESTRICTIONS SET FORTH IN THIS AGREEMENT, IN NO EVENT SHALL CLICKHOUSE'S OR CUSTOMER’S TOTAL, CUMULATIVE LIABILITY UNDER ANY ORDER FORM EXCEED THE AMOUNT PAID OR PAYABLE BY CUSTOMER TO CLICKHOUSE UNDER THIS AGREEMENT FOR THE AFFECTED SUPPORT SERVICES DELIVERED AND/OR MADE AVAILABLE TO CUSTOMER UNDER SUCH ORDER FORM FOR THE TWELVE (12) MONTH PERIOD IMMEDIATELY PRIOR TO THE FIRST EVENT GIVING RISE TO LIABILITY.

+ +

6.3 Basis of the Bargain. THE ALLOCATIONS OF LIABILITY IN THIS SECTION 6 REPRESENT THE AGREED AND BARGAINED FOR UNDERSTANDING OF THE PARTIES, AND THE COMPENSATION OF CLICKHOUSE FOR THE SUPPORT SERVICES PROVIDED HEREUNDER REFLECTS SUCH ALLOCATIONS. THE FOREGOING LIMITATIONS, EXCLUSIONS AND DISCLAIMERS WILL APPLY TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, EVEN IF ANY REMEDY FAILS IN ITS ESSENTIAL PURPOSE.

+ +

7. TERM AND TERMINATION

+ +

7.1 Subscription Term. The initial Subscription Term for each Subscription will commence and expire in accordance with the start date and end date set forth on the applicable Order Form, unless earlier terminated in in accordance with Section 7.3 below. Thereafter, each Subscription may be renewed for additional one (1) year periods upon the mutual written agreement of the parties. The initial Subscription Term, plus any subsequent renewal Subscription Term shall be the "Subscription Term".

+ +

7.2 Agreement Term. This Agreement will commence on the Effective Date and, unless earlier terminated in accordance with Section 7.3(b) below, continue in force and effect for a period of two (2) years. Thereafter, the term of this Agreement shall automatically renew for additional one (1) year periods unless either party gives written notice to the other of its intention not to renew the Agreement at least thirty (30) days prior to the expiration of the then-current term. The initial term of this Agreement, plus any subsequent renewal term, shall be the "Term" of this Agreement. Notwithstanding any expiration of this Agreement, its terms will continue to apply to any Subscription that has not been terminated or for which the Subscription Term has not expired.

+ +

7.3 Termination.

+
    +
  1. Subscriptions. Each party may terminate a Subscription upon giving notice in writing to the other party if the non-terminating party commits a material breach of this Agreement with respect to such Subscription, and has failed to cure such breach within thirty (30) days following a request in writing from the notifying party to do so. Upon the termination or expiration of a Subscription, the rights and obligations of the parties with respect thereto will, subject to Section 7.4 below, cease, provided that termination of a Subscription under this subsection (a) will not result in termination of any other Subscriptions.
  2. +
  3. Agreement. Either party may terminate this Agreement upon giving notice in writing to the other party if the non-terminating party commits a material breach of this Agreement with respect to any active Subscriptions hereunder, and has failed to cure such breach within thirty (30) days following a request in writing from the notifying party to do so. For the avoidance of doubt, termination of this Agreement under this subsection (b) will result in the termination of all Subscriptions and Order Forms.
  4. +
+ +

7.4 Survival. Upon the expiration or termination of an Order Form or this Agreement, (i) Customer shall have no further rights under any affected Subscription(s); and (ii) any payment obligations accrued under Section 3, as well as the provisions of Sections 1, 4, 5, 6, 7, 7.4 and 9 of this Agreement will survive such expiration or termination.

+ +

8. GENERAL

+ +

8.1 Anti-Corruption. Each party acknowledges that it is aware of, understands and has complied and will comply with, all applicable U.S. and foreign anti-corruption laws, including without limitation, the U.S. Foreign Corrupt Practices Act of 1977 and the U.K. Bribery Act of 2010, and similarly applicable anti-corruption and anti-bribery laws ("Anti-Corruption Laws"). Each party agrees that no one acting on its behalf will give, offer, agree or promise to give, or authorize the giving directly or indirectly, of any money or other thing of value, including travel, entertainment, or gifts, to anyone as an unlawful inducement or reward for favorable action or forbearance from action or the exercise of unlawful influence (a) to any governmental official or employee (including employees of government-owned and government-controlled corporations or agencies or public international organizations), (b) to any political party, official of a political party, or candidate, (c) to an intermediary for payment to any of the foregoing, or (d) to any other person or entity in a corrupt or improper effort to obtain or retain business or any commercial advantage, such as receiving a permit or license, or directing business to any person. Improper payments, provisions, bribes, kickbacks, influence payments, or other unlawful provisions to any person are prohibited under this Agreement.

+ +

8.2 Assignment. Neither party may assign this Agreement, in whole or in part, without the prior written consent of the other party, provided that no such consent will be required to assign this Agreement in its entirety to (i) an Affiliate that is able to satisfy the obligations of the assigning party under this Agreement or (ii) a successor in interest in connection with a merger, acquisition or sale of all or substantially all of the assigning party's assets. Any assignment in violation of this Section shall be void, ab initio, and of no effect. Subject to the foregoing, this Agreement is binding upon, inures to the benefit of and is enforceable by, the parties and their respective permitted successors and assigns.

+ +

8.3 Attorneys' Fees. If any action or proceeding, whether regulatory, administrative, at law or in equity is commenced or instituted to enforce or interpret any of the terms or provisions of this Agreement, the prevailing party in any such action or proceeding shall be entitled to recover its reasonable attorneys' fees, expert witness fees, costs of suit and expenses, in addition to any other relief to which such prevailing party may be entitled. As used herein, "prevailing party" includes without limitation, a party who dismisses an action for recovery hereunder in exchange for payment of the sums allegedly due, performance of covenants allegedly breached, or consideration substantially equal to the relief sought in the action.

+ +

8.4 California Consumer Privacy Act (CCPA). ClickHouse is a “Service Provider” as such term is defined under §1798.140(v) of the CCPA. As such ClickHouse shall not retain, use or disclose any personal information (as defined in the CCPA) received from Customer during the Term of this Agreement for any purpose other than the specific purpose of providing the Support Services specified in this Agreement or for such other business purpose as is specified in this Agreement.

+ +

8.5 Customer Identification. ClickHouse may identify Customer as a user of the Support Services, on its website, through a press release issued by ClickHouse and in other promotional materials.

+ +

8.6 Feedback. Customer, Customer’s Affiliates, and their respective agents, may volunteer feedback to ClickHouse, and/or its Affiliates, about the Support Services (“Feedback”). ClickHouse and its Affiliates shall be irrevocably entitled to use that Feedback, for any purpose and without any duty to account. provided that, in doing so, they may not breach their obligations of confidentiality under Section 4 of this Agreement.

+ +

8.7 Force Majeure. Except with respect to payment obligations, neither party will be liable for, or be considered to be in breach of, or in default under, this Agreement, as a result of any cause or condition beyond such party's reasonable control.

+ +

8.8 Governing Law, Jurisdiction and Venue.

+ +
    +
  1. Customers in California. If Customer is located in California (as determined by the Customer address on the applicable Order Form), this Agreement will be governed by the laws of the State of California, without regard to its conflict of laws principles, and all suits hereunder will be brought solely in Federal Court for the Northern District of California, or if that court lacks subject matter jurisdiction, in any California State Court located in Santa Clara County.
  2. +
  3. Customers Outside of California. If Customer is located anywhere other than California (as determined by the Customer address on the applicable Order Form), this Agreement will be governed by the laws of the State of Delaware, without regard to its conflict of laws principles, and all suits hereunder will be brought solely in Federal Court for the District of Delaware, or if that court lacks subject matter jurisdiction, in any Delaware State Court located in Wilmington, Delaware.
  4. + +
  5. All Customers. This Agreement shall not be governed by the 1980 UN Convention on Contracts for the International Sale of Goods. The parties hereby irrevocably waive any and all claims and defenses either might otherwise have in any action or proceeding in any of the applicable courts set forth in (a) or (b) above, based upon any alleged lack of personal jurisdiction, improper venue, forum non conveniens, or any similar claim or defense.
  6. + +
  7. Equitable Relief. A breach or threatened breach, by either party of Section 4 may cause irreparable harm for which damages at law may not provide adequate relief, and therefore the non-breaching party shall be entitled to seek injunctive relief without being required to post a bond.
  8. + +
+ +

8.9 Non-waiver. Any failure of either party to insist upon or enforce performance by the other party of any of the provisions of this Agreement or to exercise any rights or remedies under this Agreement will not be interpreted or construed as a waiver or relinquishment of such party's right to assert or rely upon such provision, right or remedy in that or any other instance.

+ +

8.10 Notices. Any notice or other communication under this Agreement given by either party to the other will be deemed to be properly given if given in writing and delivered in person or by e-mail, if acknowledged received by return e-mail or followed within one day by a delivered or mailed copy of such notice, or if mailed, properly addressed and stamped with the required postage, to the intended recipient at its address specified on an Order Form. Notices to ClickHouse may also be sent to legal@ClickHouse.com. Either party may from time to time change its address for notices under this Section by giving the other party notice of the change in accordance with this Section.

+ +

8.11 Relationship of the Parties. The relationship of the parties hereunder shall be that of independent contractors, and nothing herein shall be deemed or construed to create any employment, agency or fiduciary relationship between the parties. Each party shall be solely responsible for the supervision, direction, control and payment of its personnel, including, without limitation, for taxes, deductions and withholdings, compensation and benefits, and nothing herein will be deemed to result in either party having an employer-employee relationship with the personnel of the other party.

+ +

8.12 Severability. If any provision of this Agreement is held to be invalid or unenforceable, the remaining portions will remain in full force and effect and such provision will be enforced to the maximum extent possible so as to give effect the intent of the parties and will be reformed to the extent necessary to make such provision valid and enforceable.

+ +

8.13 Entire Agreement; Amendment. This Agreement, together with any Order Forms executed by the parties, and the Support Services Policy, each of which is hereby incorporated herein by this reference, constitutes the entire agreement between the parties concerning the subject matter hereof, and it supersedes, and its terms govern, all prior proposals, agreements, or other communications between the parties, oral or written, regarding such subject matter. This Agreement may be executed in any number of counterparts, each of which when so executed and delivered shall be deemed an original, and all of which together shall constitute one and the same agreement. Execution of a scanned copy will have the same force and effect as execution of an original, and a scanned signature will be deemed an original and valid signature. In the event of any conflict between the terms and conditions of any of the foregoing documents, the conflict shall be resolved based on the following order of precedence: (i) an applicable Order Form (but only for the transaction thereunder), (ii) an applicable Addendum (including any exhibits, attachments and addenda thereto), (iii) this Agreement, and (iv) the Support Services Policy. For the avoidance of doubt, the parties hereby expressly acknowledge and agree that if Customer issues any purchase orders or similar documents in connection with its purchase of a Subscription, it shall do so only for the purpose of Section 2.2(2) or for its own internal, administrative purposes and not with the intent to provide any contractual terms. By entering into this Agreement, whether prior to or following receipt of Customer's purchase order or any similar document, the parties are hereby expressly showing their intention not to be contractually bound by the contents of any such purchase order or similar document, which are hereby deemed rejected and extraneous to this Agreement, and ClickHouse's performance of this Agreement shall not amount to: (i) an acceptance by conduct of any terms set out or referred to in the purchase order or similar document; (ii) an amendment of this Agreement, nor (iii) an agreement to amend this Agreement. This Agreement shall not be modified except by a subsequently dated, written amendment that expressly amends this Agreement and which is signed on behalf of ClickHouse and Customer by their duly authorized representatives. The parties agree that the terms and conditions of this Agreement are a result of mutual negotiations. Therefore, the rule of construction that any ambiguity shall apply against the drafter is not applicable and will not apply to this Agreement. Any ambiguity shall be reasonably construed as to its fair meaning and not strictly for or against one party regardless of who authored the ambiguous language.

+ + +
+
\ No newline at end of file diff --git a/website/templates/support/agreement-hero.html b/website/templates/support/agreement-hero.html new file mode 100644 index 00000000000..ea97fb7729a --- /dev/null +++ b/website/templates/support/agreement-hero.html @@ -0,0 +1,10 @@ +
+
+
+ +

+ {{ _('Clickhouse, Inc.
Subscription Agreement') }} +

+ +
+
\ No newline at end of file From 7cb0433fae3f79d3033947725d97ea2517cb4a52 Mon Sep 17 00:00:00 2001 From: W Date: Fri, 11 Feb 2022 14:34:20 +0800 Subject: [PATCH 212/215] Update buildPushingToViewsChain.h typo --- src/Processors/Transforms/buildPushingToViewsChain.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/Transforms/buildPushingToViewsChain.h b/src/Processors/Transforms/buildPushingToViewsChain.h index 260fdfb3a19..98e7f19a37a 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.h +++ b/src/Processors/Transforms/buildPushingToViewsChain.h @@ -18,7 +18,7 @@ namespace DB struct ViewRuntimeData { - /// A query we should run over inserted block befire pushing into inner storage. + /// A query we should run over inserted block before pushing into inner storage. const ASTPtr query; /// This structure is expected by inner storage. Will convert query result to it. Block sample_block; From 3a4c2938d36465e6c8f6d45931d7aaaa616a9995 Mon Sep 17 00:00:00 2001 From: cnmade Date: Fri, 11 Feb 2022 14:43:28 +0800 Subject: [PATCH 213/215] Translate zh/engines/database-engines/postgresql: sync translate from en doc --- docs/zh/engines/database-engines/postgresql.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/zh/engines/database-engines/postgresql.md b/docs/zh/engines/database-engines/postgresql.md index 12b8133f404..936216e8f5c 100644 --- a/docs/zh/engines/database-engines/postgresql.md +++ b/docs/zh/engines/database-engines/postgresql.md @@ -24,6 +24,7 @@ ENGINE = PostgreSQL('host:port', 'database', 'user', 'password'[, `use_table_cac - `database` — 远程数据库名次 - `user` — PostgreSQL用户名称 - `password` — PostgreSQL用户密码 + `schema` - PostgreSQL 模式 - `use_table_cache` — 定义数据库表结构是否已缓存或不进行。可选的。默认值: `0`. ## 支持的数据类型 {#data_types-support} From 6a57b30983421c1dbc5cc801f3490ce83c63445a Mon Sep 17 00:00:00 2001 From: cnmade Date: Fri, 11 Feb 2022 14:44:42 +0800 Subject: [PATCH 214/215] Translate zh/engines/database-engines/postgresql: fix symbol --- docs/zh/engines/database-engines/postgresql.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/zh/engines/database-engines/postgresql.md b/docs/zh/engines/database-engines/postgresql.md index 936216e8f5c..4d2af9182f9 100644 --- a/docs/zh/engines/database-engines/postgresql.md +++ b/docs/zh/engines/database-engines/postgresql.md @@ -24,7 +24,7 @@ ENGINE = PostgreSQL('host:port', 'database', 'user', 'password'[, `use_table_cac - `database` — 远程数据库名次 - `user` — PostgreSQL用户名称 - `password` — PostgreSQL用户密码 - `schema` - PostgreSQL 模式 +- `schema` - PostgreSQL 模式 - `use_table_cache` — 定义数据库表结构是否已缓存或不进行。可选的。默认值: `0`. ## 支持的数据类型 {#data_types-support} From a7c2cb53d8705b24866a1598323665efbb2ed055 Mon Sep 17 00:00:00 2001 From: cnmade Date: Fri, 11 Feb 2022 16:58:13 +0800 Subject: [PATCH 215/215] Translate zh/engines/database-engines/replicated: sync translate --- docs/zh/engines/database-engines/replicated.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/zh/engines/database-engines/replicated.md b/docs/zh/engines/database-engines/replicated.md index 9ffebe04571..bd5841491dd 100644 --- a/docs/zh/engines/database-engines/replicated.md +++ b/docs/zh/engines/database-engines/replicated.md @@ -31,6 +31,7 @@ CREATE DATABASE testdb ENGINE = Replicated('zoo_path', 'shard_name', 'replica_na 当创建数据库的新副本时,该副本会自己创建表。如果副本已经不可用很长一段时间,并且已经滞后于复制日志-它用ZooKeeper中的当前元数据检查它的本地元数据,将带有数据的额外表移动到一个单独的非复制数据库(以免意外地删除任何多余的东西),创建缺失的表,如果表名已经被重命名,则更新表名。数据在`ReplicatedMergeTree`级别被复制,也就是说,如果表没有被复制,数据将不会被复制(数据库只负责元数据)。 +允许[`ALTER TABLE ATTACH|FETCH|DROP|DROP DETACHED|DETACH PARTITION|PART`](../../sql-reference/statements/alter/partition.md)查询,但不允许复制。数据库引擎将只向当前副本添加/获取/删除分区/部件。但是,如果表本身使用了Replicated表引擎,那么数据将在使用`ATTACH`后被复制。 ## 使用示例 {#usage-example} 创建三台主机的集群: