From ea936e6f94736e9e4a47e1494a105c2256324873 Mon Sep 17 00:00:00 2001 From: Alexey Boykov Date: Tue, 10 Aug 2021 14:52:34 +0300 Subject: [PATCH 001/262] Adding interface for local fuzzing --- programs/local/CMakeLists.txt | 13 + programs/local/LocalServer.cpp | 289 +++++++++++------- programs/local/LocalServer.h | 3 +- src/Core/Settings.h | 2 +- src/Functions/CMakeLists.txt | 4 + src/Functions/getFuzzerData.cpp | 51 ++++ .../registerFunctionsMiscellaneous.cpp | 8 + 7 files changed, 263 insertions(+), 107 deletions(-) create mode 100644 src/Functions/getFuzzerData.cpp diff --git a/programs/local/CMakeLists.txt b/programs/local/CMakeLists.txt index 530128c2041..62811b9c1b9 100644 --- a/programs/local/CMakeLists.txt +++ b/programs/local/CMakeLists.txt @@ -17,3 +17,16 @@ clickhouse_program_add(local) if(NOT CLICKHOUSE_ONE_SHARED) target_link_libraries(clickhouse-local-lib PRIVATE clickhouse-server-lib) endif() + +if (ENABLE_FUZZING) + add_compile_definitions(FUZZING_MODE=1) + add_executable(fuzz-clickhouse-local LocalServer.cpp ${SRCS}) + target_link_libraries(fuzz-clickhouse-local PRIVATE + dbms + ${LIB_FUZZING_ENGINE} + loggers + clickhouse_functions + clickhouse_aggregate_functions + clickhouse_storages_system + clickhouse_table_functions) +endif () diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index e256338a538..7f3a3be3815 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -211,6 +211,11 @@ try throw Exception("Specify either `query` or `queries-file` option", ErrorCodes::BAD_ARGUMENTS); } + // Non-fuzzer mode: main function runs only one time, + // so first_time clauses does not affect anything + static bool first_time = true; + if (first_time) + { shared_context = Context::createShared(); global_context = Context::createGlobal(shared_context.get()); global_context->makeGlobalContext(); @@ -299,15 +304,119 @@ try { attachSystemTables(global_context); } + status.reset(); + } - processQueries(); + /// processing queries + static String initial_create_query = getInitialCreateTableQuery(); + static String queries_str = initial_create_query; + + if (first_time) + { + if (config().has("query")) + queries_str += config().getRawString("query"); + else + { + String queries_from_file; + ReadBufferFromFile in(config().getString("queries-file")); + readStringUntilEOF(queries_from_file, in); + queries_str += queries_from_file; + } + } + + static const auto & settings = global_context->getSettingsRef(); + + static std::vector queries; + static auto parse_res = splitMultipartQuery(queries_str, queries, settings.max_query_size, settings.max_parser_depth); + + if (!parse_res.second) + throw Exception("Cannot parse and execute the following part of query: " + String(parse_res.first), ErrorCodes::SYNTAX_ERROR); + + /// we can't mutate global global_context (can lead to races, as it was already passed to some background threads) + /// so we can't reuse it safely as a query context and need a copy here + auto context = Context::createCopy(global_context); + + context->makeSessionContext(); + context->makeQueryContext(); + + context->setUser("default", "", Poco::Net::SocketAddress{}); + context->setCurrentQueryId(""); + applyCmdSettings(context); + + /// Use the same query_id (and thread group) for all queries + CurrentThread::QueryScope query_scope_holder(context); + + /// Set progress show + need_render_progress = config().getBool("progress", false); + + std::function finalize_progress; + if (need_render_progress) + { + /// Set progress callback, which can be run from multiple threads. + context->setProgressCallback([&](const Progress & value) + { + /// Write progress only if progress was updated + if (progress_indication.updateProgress(value)) + progress_indication.writeProgress(); + }); + + /// Set finalizing callback for progress, which is called right before finalizing query output. + finalize_progress = [&]() + { + progress_indication.clearProgressOutput(); + }; + + /// Set callback for file processing progress. + progress_indication.setFileProgressCallback(context); + } + + bool echo_queries = config().hasOption("echo") || config().hasOption("verbose"); + + std::exception_ptr exception; + first_time = false; + + for (const auto & query : queries) + { + written_first_block = false; + progress_indication.resetProgress(); + + ReadBufferFromString read_buf(query); + WriteBufferFromFileDescriptor write_buf(STDOUT_FILENO); + if (echo_queries) + { + writeString(query, write_buf); + writeChar('\n', write_buf); + write_buf.next(); + } + + try + { + executeQuery(read_buf, write_buf, /* allow_into_outfile = */ true, context, {}, {}, finalize_progress); + } + catch (...) + { + if (!config().hasOption("ignore-error")) + { + throw; + } + + if (!exception) + exception = std::current_exception(); + + std::cerr << getCurrentExceptionMessage(config().hasOption("stacktrace")) << '\n'; + } + } + + if (exception) + std::rethrow_exception(exception); + +#ifndef FUZZING_MODE global_context->shutdown(); global_context.reset(); - status.reset(); cleanup(); - +#endif return Application::EXIT_OK; } catch (const Exception & e) @@ -350,107 +459,6 @@ std::string LocalServer::getInitialCreateTableQuery() "; "; } - -void LocalServer::processQueries() -{ - String initial_create_query = getInitialCreateTableQuery(); - String queries_str = initial_create_query; - - if (config().has("query")) - queries_str += config().getRawString("query"); - else - { - String queries_from_file; - ReadBufferFromFile in(config().getString("queries-file")); - readStringUntilEOF(queries_from_file, in); - queries_str += queries_from_file; - } - - const auto & settings = global_context->getSettingsRef(); - - std::vector queries; - auto parse_res = splitMultipartQuery(queries_str, queries, settings.max_query_size, settings.max_parser_depth); - - if (!parse_res.second) - throw Exception("Cannot parse and execute the following part of query: " + String(parse_res.first), ErrorCodes::SYNTAX_ERROR); - - /// we can't mutate global global_context (can lead to races, as it was already passed to some background threads) - /// so we can't reuse it safely as a query context and need a copy here - auto context = Context::createCopy(global_context); - - context->makeSessionContext(); - context->makeQueryContext(); - - context->setUser("default", "", Poco::Net::SocketAddress{}); - context->setCurrentQueryId(""); - applyCmdSettings(context); - - /// Use the same query_id (and thread group) for all queries - CurrentThread::QueryScope query_scope_holder(context); - - /// Set progress show - need_render_progress = config().getBool("progress", false); - - std::function finalize_progress; - if (need_render_progress) - { - /// Set progress callback, which can be run from multiple threads. - context->setProgressCallback([&](const Progress & value) - { - /// Write progress only if progress was updated - if (progress_indication.updateProgress(value)) - progress_indication.writeProgress(); - }); - - /// Set finalizing callback for progress, which is called right before finalizing query output. - finalize_progress = [&]() - { - progress_indication.clearProgressOutput(); - }; - - /// Set callback for file processing progress. - progress_indication.setFileProgressCallback(context); - } - - bool echo_queries = config().hasOption("echo") || config().hasOption("verbose"); - - std::exception_ptr exception; - - for (const auto & query : queries) - { - written_first_block = false; - progress_indication.resetProgress(); - - ReadBufferFromString read_buf(query); - WriteBufferFromFileDescriptor write_buf(STDOUT_FILENO); - - if (echo_queries) - { - writeString(query, write_buf); - writeChar('\n', write_buf); - write_buf.next(); - } - - try - { - executeQuery(read_buf, write_buf, /* allow_into_outfile = */ true, context, {}, {}, finalize_progress); - } - catch (...) - { - if (!config().hasOption("ignore-error")) - throw; - - if (!exception) - exception = std::current_exception(); - - std::cerr << getCurrentExceptionMessage(config().hasOption("stacktrace")) << '\n'; - } - } - - if (exception) - std::rethrow_exception(exception); -} - static const char * minimal_default_user_xml = "" " " @@ -661,9 +669,80 @@ void LocalServer::applyCmdOptions(ContextMutablePtr context) #pragma GCC diagnostic ignored "-Wunused-function" #pragma GCC diagnostic ignored "-Wmissing-declarations" -int mainEntryClickHouseLocal(int argc, char ** argv) +#ifdef FUZZING_MODE +#include + +class FuzzApp { DB::LocalServer app; + +public: + inline void init(int argc, char ** argv) + { + app.init(argc, argv); + } + + inline int run() + { + return app.run(); + } +} fuzz_app; + +extern "C" int LLVMFuzzerInitialize(int * pargc, char *** pargv) +{ + int & argc = *pargc; + char ** argv = *pargv; + + // position of delimiter "--" that separates arguments + // of clickhouse-local and fuzzer + int pos_delim = argc; + for (int i = 0; i < argc; ++i) + { + if (strcmp(argv[i], "--") == 0) + { + pos_delim = i; + break; + } + } + + fuzz_app.init(pos_delim, argv); + for (int i = pos_delim + 1; i < argc; ++i) + std::swap(argv[i], argv[i - pos_delim]); + argc -= pos_delim; + if (argc == 0) // no delimiter provided + ++argc; + return 0; +} + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size) +{ + try + { + // inappropriate symbol for fuzzing at the end + if (size) + --size; + auto cur_str = String(reinterpret_cast(data), size); + // to clearly see the beginning and the end + std::cerr << '>' << cur_str << '<' << std::endl; + DB::FunctionGetFuzzerData::update(cur_str); + fuzz_app.run(); + } + catch (...) + { + std::cerr << DB::getCurrentExceptionMessage(true) << std::endl; + return 1; + } + return 0; +} +#endif + +int mainEntryClickHouseLocal(int argc, char ** argv) +{ +#ifdef FUZZING_MODE + FuzzApp & app = fuzz_app; +#else + DB::LocalServer app; +#endif try { app.init(argc, argv); diff --git a/programs/local/LocalServer.h b/programs/local/LocalServer.h index e82caad7542..ed79e482bc9 100644 --- a/programs/local/LocalServer.h +++ b/programs/local/LocalServer.h @@ -38,7 +38,6 @@ private: void tryInitPath(); void applyCmdOptions(ContextMutablePtr context); void applyCmdSettings(ContextMutablePtr context); - void processQueries(); void setupUsers(); void cleanup(); @@ -60,3 +59,5 @@ protected: }; } + +int mainEntryClickHouseLocal(int argc, char ** argv); diff --git a/src/Core/Settings.h b/src/Core/Settings.h index e1bd1d29153..23f4e7911f3 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -575,7 +575,7 @@ class IColumn; \ M(String, format_regexp, "", "Regular expression (for Regexp format)", 0) \ M(String, format_regexp_escaping_rule, "Raw", "Field escaping rule (for Regexp format)", 0) \ - M(Bool, format_regexp_skip_unmatched, false, "Skip lines unmatched by regular expression (for Regexp format", 0) \ + M(Bool, format_regexp_skip_unmatched, false, "Skip lines unmatched by regular expression (for Regexp format)", 0) \ \ M(Bool, output_format_enable_streaming, false, "Enable streaming in output formats that support it.", 0) \ M(Bool, output_format_write_statistics, true, "Write statistics about read rows, bytes, time elapsed in suitable output formats.", 0) \ diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt index 04e5f80468b..f35f8a411b1 100644 --- a/src/Functions/CMakeLists.txt +++ b/src/Functions/CMakeLists.txt @@ -127,3 +127,7 @@ set_source_files_properties("pointInPolygon.cpp" PROPERTIES COMPILE_FLAGS -fno-s # target_link_libraries(clickhouse_functions PRIVATE ${S2_LIBRARY}) target_include_directories(clickhouse_functions SYSTEM PUBLIC ${S2_GEOMETRY_INCLUDE_DIR}) + +if (ENABLE_FUZZING) + add_compile_definitions(FUZZING_MODE=1) +endif () diff --git a/src/Functions/getFuzzerData.cpp b/src/Functions/getFuzzerData.cpp new file mode 100644 index 00000000000..ef944593b2c --- /dev/null +++ b/src/Functions/getFuzzerData.cpp @@ -0,0 +1,51 @@ +#include +#include +#include +#include + +namespace DB +{ +class FunctionGetFuzzerData : public IFunction +{ + inline static String fuzz_data; + +public: + static constexpr auto name = "getFuzzerData"; + + inline static FunctionPtr create(ContextPtr) { return create(); } + + static FunctionPtr create() + { + return std::make_shared(); + } + + inline String getName() const override { return name; } + + inline size_t getNumberOfArguments() const override { return 0; } + + DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override + { + return std::make_shared(); + } + + inline bool isDeterministic() const override { return false; } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName &, + const DataTypePtr &, + size_t input_rows_count) const override + { + return DataTypeString().createColumnConst(input_rows_count, fuzz_data); + } + + static void update(const String & fuzz_data_) + { + fuzz_data = fuzz_data_; + } +}; + +void registerFunctionGetFuzzerData(FunctionFactory & factory) +{ + factory.registerFunction(); + factory.registerAlias("get_fuzzer_data", FunctionGetFuzzerData::name, FunctionFactory::CaseInsensitive); +} +} diff --git a/src/Functions/registerFunctionsMiscellaneous.cpp b/src/Functions/registerFunctionsMiscellaneous.cpp index 12c54aeeefd..3bd644a14f5 100644 --- a/src/Functions/registerFunctionsMiscellaneous.cpp +++ b/src/Functions/registerFunctionsMiscellaneous.cpp @@ -83,6 +83,10 @@ void registerFunctionInitialQueryID(FunctionFactory & factory); void registerFunctionConvertCharset(FunctionFactory &); #endif +#ifdef FUZZING_MODE +void registerFunctionGetFuzzerData(FunctionFactory & factory); +#endif + void registerFunctionsMiscellaneous(FunctionFactory & factory) { registerFunctionCurrentDatabase(factory); @@ -160,6 +164,10 @@ void registerFunctionsMiscellaneous(FunctionFactory & factory) #if USE_ICU registerFunctionConvertCharset(factory); #endif + +#ifdef FUZZING_MODE + registerFunctionGetFuzzerData(factory); +#endif } } From 5826c7592a54a9776aaff82cc11008573e436820 Mon Sep 17 00:00:00 2001 From: Alexey Boykov Date: Thu, 12 Aug 2021 15:31:19 +0300 Subject: [PATCH 002/262] Update (see description) Improved parsing exception handling and changed the flag for file querying --- programs/local/LocalServer.cpp | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 7f3a3be3815..5d71baef33e 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -328,11 +328,33 @@ try static const auto & settings = global_context->getSettingsRef(); static std::vector queries; - static auto parse_res = splitMultipartQuery(queries_str, queries, settings.max_query_size, settings.max_parser_depth); + if (first_time) + { + std::pair parse_res; +#ifdef FUZZING_MODE + try + { +#endif + parse_res = splitMultipartQuery(queries_str, queries, settings.max_query_size, settings.max_parser_depth); +#ifdef FUZZING_MODE + } + catch (const Exception &) + { + // will be caught at the end of the main + throw; + } + catch (...) + { + std::cerr << "Undefined error while parsing" << std::endl; + exit(1); + } +#endif if (!parse_res.second) throw Exception("Cannot parse and execute the following part of query: " + String(parse_res.first), ErrorCodes::SYNTAX_ERROR); + } + first_time = false; /// we can't mutate global global_context (can lead to races, as it was already passed to some background threads) /// so we can't reuse it safely as a query context and need a copy here auto context = Context::createCopy(global_context); @@ -349,7 +371,6 @@ try /// Set progress show need_render_progress = config().getBool("progress", false); - std::function finalize_progress; if (need_render_progress) { @@ -374,7 +395,6 @@ try bool echo_queries = config().hasOption("echo") || config().hasOption("verbose"); std::exception_ptr exception; - first_time = false; for (const auto & query : queries) { @@ -565,7 +585,7 @@ void LocalServer::init(int argc, char ** argv) ("help", "produce help message") ("config-file,c", po::value(), "config-file path") ("query,q", po::value(), "query") - ("queries-file, qf", po::value(), "file path with queries to execute") + ("queries-file,Q", po::value(), "file path with queries to execute") ("database,d", po::value(), "database") ("table,N", po::value(), "name of the initial table") @@ -730,7 +750,8 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size) catch (...) { std::cerr << DB::getCurrentExceptionMessage(true) << std::endl; - return 1; + auto code = DB::getCurrentExceptionCode(); + return code ? code : 1; } return 0; } From a1e8d5d638b9a65c60bed452724cfbdba3d4ce88 Mon Sep 17 00:00:00 2001 From: Alexey Boykov Date: Thu, 12 Aug 2021 21:21:32 +0300 Subject: [PATCH 003/262] Fix creation global app clickhouse-server cannot run if FuzzApp is constructed. --- programs/local/LocalServer.cpp | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 5d71baef33e..98fb31c0b8f 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -706,7 +706,9 @@ public: { return app.run(); } -} fuzz_app; +}; + +std::optional fuzz_app; extern "C" int LLVMFuzzerInitialize(int * pargc, char *** pargv) { @@ -725,7 +727,8 @@ extern "C" int LLVMFuzzerInitialize(int * pargc, char *** pargv) } } - fuzz_app.init(pos_delim, argv); + fuzz_app.emplace(); + fuzz_app->init(pos_delim, argv); for (int i = pos_delim + 1; i < argc; ++i) std::swap(argv[i], argv[i - pos_delim]); argc -= pos_delim; @@ -745,7 +748,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size) // to clearly see the beginning and the end std::cerr << '>' << cur_str << '<' << std::endl; DB::FunctionGetFuzzerData::update(cur_str); - fuzz_app.run(); + fuzz_app->run(); } catch (...) { @@ -759,11 +762,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size) int mainEntryClickHouseLocal(int argc, char ** argv) { -#ifdef FUZZING_MODE - FuzzApp & app = fuzz_app; -#else DB::LocalServer app; -#endif try { app.init(argc, argv); From 8f6b98c7621b47a5211188a8bebf49593b4ebdd6 Mon Sep 17 00:00:00 2001 From: Alexey Boykov Date: Fri, 13 Aug 2021 14:32:50 +0300 Subject: [PATCH 004/262] Removed redundant class --- programs/local/LocalServer.cpp | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 98fb31c0b8f..9beb2ba7cb3 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -692,23 +692,7 @@ void LocalServer::applyCmdOptions(ContextMutablePtr context) #ifdef FUZZING_MODE #include -class FuzzApp -{ - DB::LocalServer app; - -public: - inline void init(int argc, char ** argv) - { - app.init(argc, argv); - } - - inline int run() - { - return app.run(); - } -}; - -std::optional fuzz_app; +std::optional fuzz_app; extern "C" int LLVMFuzzerInitialize(int * pargc, char *** pargv) { From f5b37883085679156b52b0f9a90c327a3fe1dc84 Mon Sep 17 00:00:00 2001 From: Alexey Boykov <33257111+mathalex@users.noreply.github.com> Date: Mon, 16 Aug 2021 20:22:39 +0300 Subject: [PATCH 005/262] Update LocalServer.h --- programs/local/LocalServer.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/programs/local/LocalServer.h b/programs/local/LocalServer.h index ed79e482bc9..4da8142a6ff 100644 --- a/programs/local/LocalServer.h +++ b/programs/local/LocalServer.h @@ -59,5 +59,3 @@ protected: }; } - -int mainEntryClickHouseLocal(int argc, char ** argv); From f1976bdf96c91666f3ab4e8805bc2f93c594472d Mon Sep 17 00:00:00 2001 From: Alexey Boykov <33257111+mathalex@users.noreply.github.com> Date: Mon, 23 Aug 2021 19:34:14 +0300 Subject: [PATCH 006/262] Update getFuzzerData.cpp --- src/Functions/getFuzzerData.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Functions/getFuzzerData.cpp b/src/Functions/getFuzzerData.cpp index ef944593b2c..2570347598c 100644 --- a/src/Functions/getFuzzerData.cpp +++ b/src/Functions/getFuzzerData.cpp @@ -29,6 +29,8 @@ public: } inline bool isDeterministic() const override { return false; } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr &, From fbe453ff9d411939254eff32645b8356f9db63f5 Mon Sep 17 00:00:00 2001 From: Alexey Boykov Date: Tue, 31 Aug 2021 15:15:04 +0300 Subject: [PATCH 007/262] style --- programs/local/LocalServer.cpp | 6 ++---- src/Functions/getFuzzerData.cpp | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 1885ca68e83..faa9d2a60a5 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -394,9 +394,9 @@ try /// Set finalizing callback for progress, which is called right before finalizing query output. finalize_progress = [&]() - { + { progress_indication.clearProgressOutput(); - }; + }; /// Set callback for file processing progress. progress_indication.setFileProgressCallback(context); @@ -427,9 +427,7 @@ try catch (...) { if (!config().hasOption("ignore-error")) - { throw; - } if (!exception) exception = std::current_exception(); diff --git a/src/Functions/getFuzzerData.cpp b/src/Functions/getFuzzerData.cpp index 2570347598c..c01f575f0be 100644 --- a/src/Functions/getFuzzerData.cpp +++ b/src/Functions/getFuzzerData.cpp @@ -29,7 +29,7 @@ public: } inline bool isDeterministic() const override { return false; } - + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } ColumnPtr executeImpl(const ColumnsWithTypeAndName &, From eb3d251bb0dfe62bfb8c3944fd79dfa6445b165b Mon Sep 17 00:00:00 2001 From: Alexey Boykov Date: Tue, 31 Aug 2021 17:21:23 +0300 Subject: [PATCH 008/262] try fix --- programs/local/LocalServer.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index faa9d2a60a5..6e65193857a 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -213,6 +213,8 @@ try throw Exception("Specify either `query` or `queries-file` option", ErrorCodes::BAD_ARGUMENTS); } + std::optional status; + // Non-fuzzer mode: main function runs only one time, // so first_time clauses does not affect anything static bool first_time = true; @@ -224,8 +226,6 @@ try global_context->setApplicationType(Context::ApplicationType::LOCAL); tryInitPath(); - std::optional status; - /// Skip temp path installation /// We will terminate process on error @@ -315,7 +315,6 @@ try { attachSystemTables(global_context); } - status.reset(); } /// processing queries @@ -443,6 +442,7 @@ try global_context->shutdown(); global_context.reset(); + status.reset(); cleanup(); #endif return Application::EXIT_OK; From 24f47e9e3a6db4aa25cc00da01fcb1cd11e8f0ca Mon Sep 17 00:00:00 2001 From: Alexey Boykov Date: Wed, 1 Sep 2021 16:24:31 +0300 Subject: [PATCH 009/262] delete static Attempt to fix recursive_mutex lock failed. --- programs/local/CMakeLists.txt | 1 + programs/local/LocalServer.cpp | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/programs/local/CMakeLists.txt b/programs/local/CMakeLists.txt index 62811b9c1b9..d965ecf05be 100644 --- a/programs/local/CMakeLists.txt +++ b/programs/local/CMakeLists.txt @@ -20,6 +20,7 @@ endif() if (ENABLE_FUZZING) add_compile_definitions(FUZZING_MODE=1) + set (WITH_COVERAGE ON) add_executable(fuzz-clickhouse-local LocalServer.cpp ${SRCS}) target_link_libraries(fuzz-clickhouse-local PRIVATE dbms diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 6e65193857a..0a422576545 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -319,8 +319,8 @@ try /// processing queries - static String initial_create_query = getInitialCreateTableQuery(); - static String queries_str = initial_create_query; + String initial_create_query = getInitialCreateTableQuery(); + String queries_str = initial_create_query; if (first_time) { @@ -335,7 +335,7 @@ try } } - static const auto & settings = global_context->getSettingsRef(); + const auto & settings = global_context->getSettingsRef(); static std::vector queries; if (first_time) From dbb697491eb90301b824475ebb7398ad598fa342 Mon Sep 17 00:00:00 2001 From: Alexey Boykov Date: Thu, 2 Sep 2021 16:28:17 +0300 Subject: [PATCH 010/262] Try make full compatibility --- programs/local/LocalServer.cpp | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 0a422576545..86de654b939 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -215,11 +215,11 @@ try std::optional status; - // Non-fuzzer mode: main function runs only one time, - // so first_time clauses does not affect anything +#ifdef FUZZING_MODE static bool first_time = true; if (first_time) { +#endif shared_context = Context::createShared(); global_context = Context::createGlobal(shared_context.get()); global_context->makeGlobalContext(); @@ -315,15 +315,19 @@ try { attachSystemTables(global_context); } +#ifdef FUZZING_MODE } +#endif /// processing queries String initial_create_query = getInitialCreateTableQuery(); String queries_str = initial_create_query; +#ifdef FUZZING_MODE if (first_time) { +#endif if (config().has("query")) queries_str += config().getRawString("query"); else @@ -333,13 +337,19 @@ try readStringUntilEOF(queries_from_file, in); queries_str += queries_from_file; } +#ifdef FUZZING_MODE } +#endif const auto & settings = global_context->getSettingsRef(); +#ifdef FUZZING_MODE static std::vector queries; if (first_time) { +#else + std::vector queries; +#endif std::pair parse_res; #ifdef FUZZING_MODE try @@ -362,9 +372,11 @@ try if (!parse_res.second) throw Exception("Cannot parse and execute the following part of query: " + String(parse_res.first), ErrorCodes::SYNTAX_ERROR); +#ifdef FUZZING_MODE } - first_time = false; +#endif + /// Authenticate and create a context to execute queries. Session session{global_context, ClientInfo::Interface::LOCAL}; session.authenticate("default", "", {}); @@ -445,6 +457,7 @@ try status.reset(); cleanup(); #endif + return Application::EXIT_OK; } catch (const Exception & e) From 3354bca55396bf9b03595332c2b3f9579d714ac3 Mon Sep 17 00:00:00 2001 From: Alexey Boykov Date: Thu, 30 Sep 2021 16:51:15 +0300 Subject: [PATCH 011/262] Make code exactly similar in the non-fuzzer mode --- programs/local/LocalServer.cpp | 426 +++++++++++++++++++++++++-------- programs/local/LocalServer.h | 1 + 2 files changed, 328 insertions(+), 99 deletions(-) diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index b0b1ac45630..7d7f0d26eaa 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -192,6 +192,7 @@ static DatabasePtr createMemoryDatabaseIfNotExists(ContextPtr context, const Str return system_database; } +#ifndef FUZZING_MODE int LocalServer::main(const std::vector & /*args*/) try { @@ -212,19 +213,14 @@ try throw Exception("Specify either `query` or `queries-file` option", ErrorCodes::BAD_ARGUMENTS); } - std::optional status; - -#ifdef FUZZING_MODE - static bool first_time = true; - if (first_time) - { -#endif shared_context = Context::createShared(); global_context = Context::createGlobal(shared_context.get()); global_context->makeGlobalContext(); global_context->setApplicationType(Context::ApplicationType::LOCAL); tryInitPath(); + std::optional status; + /// Skip temp path installation /// We will terminate process on error @@ -321,19 +317,62 @@ try attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA)); attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE)); } -#ifdef FUZZING_MODE + + processQueries(); + + global_context->shutdown(); + global_context.reset(); + + status.reset(); + cleanup(); + + return Application::EXIT_OK; +} +catch (const Exception & e) +{ + try + { + cleanup(); } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } + + std::cerr << getCurrentExceptionMessage(config().hasOption("stacktrace")) << '\n'; + + /// If exception code isn't zero, we should return non-zero return code anyway. + return e.code() ? e.code() : -1; +} #endif - /// processing queries +std::string LocalServer::getInitialCreateTableQuery() +{ + if (!config().has("table-structure")) + return {}; + auto table_name = backQuoteIfNeed(config().getString("table-name", "table")); + auto table_structure = config().getString("table-structure"); + auto data_format = backQuoteIfNeed(config().getString("table-data-format", "TSV")); + String table_file; + if (!config().has("table-file") || config().getString("table-file") == "-") /// Use Unix tools stdin naming convention + table_file = "stdin"; + else /// Use regular file + table_file = quoteString(config().getString("table-file")); + + return + "CREATE TABLE " + table_name + + " (" + table_structure + ") " + + "ENGINE = " + "File(" + data_format + ", " + table_file + ")" + "; "; +} + +void LocalServer::processQueries() +{ String initial_create_query = getInitialCreateTableQuery(); String queries_str = initial_create_query; -#ifdef FUZZING_MODE - if (first_time) - { -#endif if (config().has("query")) queries_str += config().getRawString("query"); else @@ -343,45 +382,14 @@ try readStringUntilEOF(queries_from_file, in); queries_str += queries_from_file; } -#ifdef FUZZING_MODE - } -#endif const auto & settings = global_context->getSettingsRef(); -#ifdef FUZZING_MODE - static std::vector queries; - if (first_time) - { -#else std::vector queries; -#endif - std::pair parse_res; -#ifdef FUZZING_MODE - try - { -#endif - parse_res = splitMultipartQuery(queries_str, queries, settings.max_query_size, settings.max_parser_depth); -#ifdef FUZZING_MODE - } - catch (const Exception &) - { - // will be caught at the end of the main - throw; - } - catch (...) - { - std::cerr << "Undefined error while parsing" << std::endl; - exit(1); - } -#endif + auto parse_res = splitMultipartQuery(queries_str, queries, settings.max_query_size, settings.max_parser_depth); if (!parse_res.second) throw Exception("Cannot parse and execute the following part of query: " + String(parse_res.first), ErrorCodes::SYNTAX_ERROR); -#ifdef FUZZING_MODE - } - first_time = false; -#endif /// Authenticate and create a context to execute queries. Session session{global_context, ClientInfo::Interface::LOCAL}; @@ -399,16 +407,17 @@ try /// Set progress show need_render_progress = config().getBool("progress", false); + std::function finalize_progress; if (need_render_progress) { /// Set progress callback, which can be run from multiple threads. context->setProgressCallback([&](const Progress & value) - { - /// Write progress only if progress was updated - if (progress_indication.updateProgress(value)) - progress_indication.writeProgress(); - }); + { + /// Write progress only if progress was updated + if (progress_indication.updateProgress(value)) + progress_indication.writeProgress(); + }); /// Set finalizing callback for progress, which is called right before finalizing query output. finalize_progress = [&]() @@ -431,6 +440,7 @@ try ReadBufferFromString read_buf(query); WriteBufferFromFileDescriptor write_buf(STDOUT_FILENO); + if (echo_queries) { writeString(query, write_buf); @@ -456,55 +466,6 @@ try if (exception) std::rethrow_exception(exception); - -#ifndef FUZZING_MODE - global_context->shutdown(); - global_context.reset(); - - status.reset(); - cleanup(); -#endif - - return Application::EXIT_OK; -} -catch (const Exception & e) -{ - try - { - cleanup(); - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } - - std::cerr << getCurrentExceptionMessage(config().hasOption("stacktrace")) << '\n'; - - /// If exception code isn't zero, we should return non-zero return code anyway. - return e.code() ? e.code() : -1; -} - - -std::string LocalServer::getInitialCreateTableQuery() -{ - if (!config().has("table-structure")) - return {}; - - auto table_name = backQuoteIfNeed(config().getString("table-name", "table")); - auto table_structure = config().getString("table-structure"); - auto data_format = backQuoteIfNeed(config().getString("table-data-format", "TSV")); - String table_file; - if (!config().has("table-file") || config().getString("table-file") == "-") /// Use Unix tools stdin naming convention - table_file = "stdin"; - else /// Use regular file - table_file = quoteString(config().getString("table-file")); - - return - "CREATE TABLE " + table_name + - " (" + table_structure + ") " + - "ENGINE = " - "File(" + data_format + ", " + table_file + ")" - "; "; } static const char * minimal_default_user_xml = @@ -718,6 +679,273 @@ void LocalServer::applyCmdOptions(ContextMutablePtr context) #pragma GCC diagnostic ignored "-Wmissing-declarations" #ifdef FUZZING_MODE +/// This main will not lead to a crash after reuse +int DB::LocalServer::main(const std::vector & /*args*/) +try +{ + Poco::Logger * log = &logger(); + ThreadStatus thread_status; + UseSSL use_ssl; + + if (!config().has("query") && !config().has("table-structure") && !config().has("queries-file")) /// Nothing to process + { + if (config().hasOption("verbose")) + std::cerr << "There are no queries to process." << '\n'; + + return Application::EXIT_OK; + } + + if (config().has("query") && config().has("queries-file")) + { + throw Exception("Specify either `query` or `queries-file` option", ErrorCodes::BAD_ARGUMENTS); + } + + std::optional status; + + static bool first_time = true; + if (first_time) + { + shared_context = Context::createShared(); + global_context = Context::createGlobal(shared_context.get()); + global_context->makeGlobalContext(); + global_context->setApplicationType(Context::ApplicationType::LOCAL); + tryInitPath(); + + /// Skip temp path installation + + /// We will terminate process on error + static KillingErrorHandler error_handler; + Poco::ErrorHandler::set(&error_handler); + + /// Don't initialize DateLUT + + registerFunctions(); + registerAggregateFunctions(); + registerTableFunctions(); + registerStorages(); + registerDictionaries(); + registerDisks(); + registerFormats(); + + /// Maybe useless + if (config().has("macros")) + global_context->setMacros(std::make_unique(config(), "macros", log)); + + /// Skip networking + + /// Sets external authenticators config (LDAP, Kerberos). + global_context->setExternalAuthenticatorsConfig(config()); + + global_context->initializeBackgroundExecutors(); + + setupUsers(); + + /// Limit on total number of concurrently executing queries. + /// There is no need for concurrent queries, override max_concurrent_queries. + global_context->getProcessList().setMaxSize(0); + + /// Size of cache for uncompressed blocks. Zero means disabled. + size_t uncompressed_cache_size = config().getUInt64("uncompressed_cache_size", 0); + if (uncompressed_cache_size) + global_context->setUncompressedCache(uncompressed_cache_size); + + /// Size of cache for marks (index of MergeTree family of tables). It is necessary. + /// Specify default value for mark_cache_size explicitly! + size_t mark_cache_size = config().getUInt64("mark_cache_size", 5368709120); + if (mark_cache_size) + global_context->setMarkCache(mark_cache_size); + + /// A cache for mmapped files. + size_t mmap_cache_size = config().getUInt64("mmap_cache_size", 1000); /// The choice of default is arbitrary. + if (mmap_cache_size) + global_context->setMMappedFileCache(mmap_cache_size); + + /// Load global settings from default_profile and system_profile. + global_context->setDefaultProfiles(config()); + + /// We load temporary database first, because projections need it. + DatabaseCatalog::instance().initializeAndLoadTemporaryDatabase(); + + /** Init dummy default DB + * NOTE: We force using isolated default database to avoid conflicts with default database from server environment + * Otherwise, metadata of temporary File(format, EXPLICIT_PATH) tables will pollute metadata/ directory; + * if such tables will not be dropped, clickhouse-server will not be able to load them due to security reasons. + */ + std::string default_database = config().getString("default_database", "_local"); + DatabaseCatalog::instance().attachDatabase(default_database, std::make_shared(default_database, global_context)); + global_context->setCurrentDatabase(default_database); + applyCmdOptions(global_context); + + if (config().has("path")) + { + String path = global_context->getPath(); + + /// Lock path directory before read + status.emplace(path + "status", StatusFile::write_full_info); + + fs::create_directories(fs::path(path) / "user_defined/"); + LOG_DEBUG(log, "Loading user defined objects from {}", path); + Poco::File(path + "user_defined/").createDirectories(); + UserDefinedSQLObjectsLoader::instance().loadObjects(global_context); + LOG_DEBUG(log, "Loaded user defined objects."); + + LOG_DEBUG(log, "Loading metadata from {}", path); + fs::create_directories(fs::path(path) / "data/"); + fs::create_directories(fs::path(path) / "metadata/"); + loadMetadataSystem(global_context); + attachSystemTablesLocal(*createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::SYSTEM_DATABASE)); + attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA)); + attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE)); + loadMetadata(global_context); + startupSystemTables(); + DatabaseCatalog::instance().loadDatabases(); + LOG_DEBUG(log, "Loaded metadata."); + } + else if (!config().has("no-system-tables")) + { + attachSystemTablesLocal(*createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::SYSTEM_DATABASE)); + attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA)); + attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE)); + } + } + + /// processing queries + + String initial_create_query = getInitialCreateTableQuery(); + String queries_str = initial_create_query; + + if (first_time) + { + if (config().has("query")) + queries_str += config().getRawString("query"); + else + { + String queries_from_file; + ReadBufferFromFile in(config().getString("queries-file")); + readStringUntilEOF(queries_from_file, in); + queries_str += queries_from_file; + } + } + + const auto & settings = global_context->getSettingsRef(); + + static std::vector queries; + if (first_time) + { + std::pair parse_res; + try + { + parse_res = splitMultipartQuery(queries_str, queries, settings.max_query_size, settings.max_parser_depth); + } + catch (const Exception &) + { + // will be caught at the end of the main + throw; + } + catch (...) + { + std::cerr << "Undefined error while parsing" << std::endl; + exit(1); + } + + if (!parse_res.second) + throw Exception("Cannot parse and execute the following part of query: " + String(parse_res.first), ErrorCodes::SYNTAX_ERROR); + } + first_time = false; + + /// Authenticate and create a context to execute queries. + Session session{global_context, ClientInfo::Interface::LOCAL}; + session.authenticate("default", "", {}); + + /// Use the same context for all queries. + auto context = session.makeQueryContext(); + context->makeSessionContext(); /// initial_create_query requires a session context to be set. + context->setCurrentQueryId(""); + + applyCmdSettings(context); + + /// Use the same query_id (and thread group) for all queries + CurrentThread::QueryScope query_scope_holder(context); + + /// Set progress show + need_render_progress = config().getBool("progress", false); + std::function finalize_progress; + if (need_render_progress) + { + /// Set progress callback, which can be run from multiple threads. + context->setProgressCallback([&](const Progress & value) + { + /// Write progress only if progress was updated + if (progress_indication.updateProgress(value)) + progress_indication.writeProgress(); + }); + + /// Set finalizing callback for progress, which is called right before finalizing query output. + finalize_progress = [&]() + { + progress_indication.clearProgressOutput(); + }; + + /// Set callback for file processing progress. + progress_indication.setFileProgressCallback(context); + } + + bool echo_queries = config().hasOption("echo") || config().hasOption("verbose"); + + std::exception_ptr exception; + + for (const auto & query : queries) + { + written_first_block = false; + progress_indication.resetProgress(); + + ReadBufferFromString read_buf(query); + WriteBufferFromFileDescriptor write_buf(STDOUT_FILENO); + if (echo_queries) + { + writeString(query, write_buf); + writeChar('\n', write_buf); + write_buf.next(); + } + + try + { + executeQuery(read_buf, write_buf, /* allow_into_outfile = */ true, context, {}, {}, finalize_progress); + } + catch (...) + { + if (!config().hasOption("ignore-error")) + throw; + + if (!exception) + exception = std::current_exception(); + + std::cerr << getCurrentExceptionMessage(config().hasOption("stacktrace")) << '\n'; + } + } + + if (exception) + std::rethrow_exception(exception); + + return Application::EXIT_OK; +} +catch (const Exception & e) +{ + try + { + cleanup(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } + + std::cerr << getCurrentExceptionMessage(config().hasOption("stacktrace")) << '\n'; + + /// If exception code isn't zero, we should return non-zero return code anyway. + return e.code() ? e.code() : -1; +} + #include std::optional fuzz_app; diff --git a/programs/local/LocalServer.h b/programs/local/LocalServer.h index 4da8142a6ff..e82caad7542 100644 --- a/programs/local/LocalServer.h +++ b/programs/local/LocalServer.h @@ -38,6 +38,7 @@ private: void tryInitPath(); void applyCmdOptions(ContextMutablePtr context); void applyCmdSettings(ContextMutablePtr context); + void processQueries(); void setupUsers(); void cleanup(); From f5b1e1daaae2931b181f1e89658e3bc00240d8fd Mon Sep 17 00:00:00 2001 From: Alexey Boykov Date: Thu, 30 Sep 2021 16:53:53 +0300 Subject: [PATCH 012/262] Spaces --- programs/local/LocalServer.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 7d7f0d26eaa..6eeed5160c4 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -413,11 +413,11 @@ void LocalServer::processQueries() { /// Set progress callback, which can be run from multiple threads. context->setProgressCallback([&](const Progress & value) - { - /// Write progress only if progress was updated - if (progress_indication.updateProgress(value)) - progress_indication.writeProgress(); - }); + { + /// Write progress only if progress was updated + if (progress_indication.updateProgress(value)) + progress_indication.writeProgress(); + }); /// Set finalizing callback for progress, which is called right before finalizing query output. finalize_progress = [&]() From ea320c96d52f83ccbdae0011d6611c6fcf02d43e Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Wed, 18 Aug 2021 16:55:15 +0200 Subject: [PATCH 013/262] Test for issue #26643 --- .../message_with_repeated.proto | 19 ++ .../message_with_repeated_pb2.py | 180 ++++++++++++++++++ tests/integration/test_storage_kafka/test.py | 118 ++++++++++++ 3 files changed, 317 insertions(+) create mode 100644 tests/integration/test_storage_kafka/clickhouse_path/format_schemas/message_with_repeated.proto create mode 100644 tests/integration/test_storage_kafka/message_with_repeated_pb2.py diff --git a/tests/integration/test_storage_kafka/clickhouse_path/format_schemas/message_with_repeated.proto b/tests/integration/test_storage_kafka/clickhouse_path/format_schemas/message_with_repeated.proto new file mode 100644 index 00000000000..791a5086866 --- /dev/null +++ b/tests/integration/test_storage_kafka/clickhouse_path/format_schemas/message_with_repeated.proto @@ -0,0 +1,19 @@ +syntax = "proto3"; +option optimize_for = SPEED; +message Message { + uint32 tnow = 1; + string server = 2; + string clien = 3; + uint32 sPort = 4; + uint32 cPort = 5; + repeated dd r = 6; + string method = 7; +} + +message dd { + string name = 1; + uint32 class = 2; + uint32 type = 3; + uint64 ttl = 4; + bytes data = 5; +} \ No newline at end of file diff --git a/tests/integration/test_storage_kafka/message_with_repeated_pb2.py b/tests/integration/test_storage_kafka/message_with_repeated_pb2.py new file mode 100644 index 00000000000..69702307e7f --- /dev/null +++ b/tests/integration/test_storage_kafka/message_with_repeated_pb2.py @@ -0,0 +1,180 @@ +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: clickhouse_path/format_schemas/message_with_repeated.proto + +import sys +_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) +from google.protobuf import descriptor as _descriptor +from google.protobuf import message as _message +from google.protobuf import reflection as _reflection +from google.protobuf import symbol_database as _symbol_database +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + + + +DESCRIPTOR = _descriptor.FileDescriptor( + name='clickhouse_path/format_schemas/message_with_repeated.proto', + package='', + syntax='proto3', + serialized_options=_b('H\001'), + serialized_pb=_b('\n:clickhouse_path/format_schemas/message_with_repeated.proto\"t\n\x07Message\x12\x0c\n\x04tnow\x18\x01 \x01(\r\x12\x0e\n\x06server\x18\x02 \x01(\t\x12\r\n\x05\x63lien\x18\x03 \x01(\t\x12\r\n\x05sPort\x18\x04 \x01(\r\x12\r\n\x05\x63Port\x18\x05 \x01(\r\x12\x0e\n\x01r\x18\x06 \x03(\x0b\x32\x03.dd\x12\x0e\n\x06method\x18\x07 \x01(\t\"J\n\x02\x64\x64\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\r\n\x05\x63lass\x18\x02 \x01(\r\x12\x0c\n\x04type\x18\x03 \x01(\r\x12\x0b\n\x03ttl\x18\x04 \x01(\x04\x12\x0c\n\x04\x64\x61ta\x18\x05 \x01(\x0c\x42\x02H\x01\x62\x06proto3') +) + + + + +_MESSAGE = _descriptor.Descriptor( + name='Message', + full_name='Message', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='tnow', full_name='Message.tnow', index=0, + number=1, type=13, cpp_type=3, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='server', full_name='Message.server', index=1, + number=2, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=_b("").decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='clien', full_name='Message.clien', index=2, + number=3, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=_b("").decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='sPort', full_name='Message.sPort', index=3, + number=4, type=13, cpp_type=3, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='cPort', full_name='Message.cPort', index=4, + number=5, type=13, cpp_type=3, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='r', full_name='Message.r', index=5, + number=6, type=11, cpp_type=10, label=3, + has_default_value=False, default_value=[], + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='method', full_name='Message.method', index=6, + number=7, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=_b("").decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + serialized_options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=62, + serialized_end=178, +) + + +_DD = _descriptor.Descriptor( + name='dd', + full_name='dd', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='name', full_name='dd.name', index=0, + number=1, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=_b("").decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='class', full_name='dd.class', index=1, + number=2, type=13, cpp_type=3, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='type', full_name='dd.type', index=2, + number=3, type=13, cpp_type=3, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='ttl', full_name='dd.ttl', index=3, + number=4, type=4, cpp_type=4, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='data', full_name='dd.data', index=4, + number=5, type=12, cpp_type=9, label=1, + has_default_value=False, default_value=_b(""), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + serialized_options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=180, + serialized_end=254, +) + +_MESSAGE.fields_by_name['r'].message_type = _DD +DESCRIPTOR.message_types_by_name['Message'] = _MESSAGE +DESCRIPTOR.message_types_by_name['dd'] = _DD +_sym_db.RegisterFileDescriptor(DESCRIPTOR) + +Message = _reflection.GeneratedProtocolMessageType('Message', (_message.Message,), dict( + DESCRIPTOR = _MESSAGE, + __module__ = 'clickhouse_path.format_schemas.message_with_repeated_pb2' + # @@protoc_insertion_point(class_scope:Message) + )) +_sym_db.RegisterMessage(Message) + +dd = _reflection.GeneratedProtocolMessageType('dd', (_message.Message,), dict( + DESCRIPTOR = _DD, + __module__ = 'clickhouse_path.format_schemas.message_with_repeated_pb2' + # @@protoc_insertion_point(class_scope:dd) + )) +_sym_db.RegisterMessage(dd) + + +DESCRIPTOR._options = None +# @@protoc_insertion_point(module_scope) diff --git a/tests/integration/test_storage_kafka/test.py b/tests/integration/test_storage_kafka/test.py index 8883684730f..cc4cff94f1e 100644 --- a/tests/integration/test_storage_kafka/test.py +++ b/tests/integration/test_storage_kafka/test.py @@ -35,6 +35,7 @@ from kafka.admin import NewTopic from . import kafka_pb2 from . import social_pb2 +from . import message_with_repeated_pb2 # TODO: add test for run-time offset update in CH, if we manually update it on Kafka side. @@ -3077,6 +3078,123 @@ def test_kafka_consumer_failover(kafka_cluster): kafka_delete_topic(admin_client, topic_name) +# https://github.com/ClickHouse/ClickHouse/issues/26643 +def test_issue26643(kafka_cluster): + + # for backporting: + # admin_client = KafkaAdminClient(bootstrap_servers="localhost:9092") + admin_client = KafkaAdminClient(bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port)) + producer = KafkaProducer(bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port), value_serializer=producer_serializer) + + topic_list = [] + topic_list.append(NewTopic(name="test_issue26643", num_partitions=4, replication_factor=1)) + admin_client.create_topics(new_topics=topic_list, validate_only=False) + + msg = message_with_repeated_pb2.Message( + tnow=1629000000, + server='server1', + clien='host1', + sPort=443, + cPort=50000, + r=[ + message_with_repeated_pb2.dd(name='1', type=444, ttl=123123, data=b'adsfasd'), + message_with_repeated_pb2.dd(name='2') + ], + method='GET' + ) + + data = b'' + serialized_msg = msg.SerializeToString() + data = data + _VarintBytes(len(serialized_msg)) + serialized_msg + + msg = message_with_repeated_pb2.Message( + tnow=1629000002 + ) + + serialized_msg = msg.SerializeToString() + data = data + _VarintBytes(len(serialized_msg)) + serialized_msg + + producer.send(topic="test_issue26643", value=data) + + data = _VarintBytes(len(serialized_msg)) + serialized_msg + producer.send(topic="test_issue26643", value=data) + producer.flush() + + instance.query(''' + CREATE TABLE IF NOT EXISTS test.test_queue + ( + `tnow` UInt32, + `server` String, + `client` String, + `sPort` UInt16, + `cPort` UInt16, + `r.name` Array(String), + `r.class` Array(UInt16), + `r.type` Array(UInt16), + `r.ttl` Array(UInt32), + `r.data` Array(String), + `method` String + ) + ENGINE = Kafka + SETTINGS + kafka_broker_list = 'kafka1:19092', + kafka_topic_list = 'test_issue26643', + kafka_group_name = 'test_issue26643_group', + kafka_format = 'Protobuf', + kafka_schema = 'message_with_repeated.proto:Message', + kafka_num_consumers = 4, + kafka_skip_broken_messages = 10000; + + SET allow_suspicious_low_cardinality_types=1; + + CREATE TABLE test.log + ( + `tnow` DateTime CODEC(DoubleDelta, LZ4), + `server` LowCardinality(String), + `client` LowCardinality(String), + `sPort` LowCardinality(UInt16), + `cPort` UInt16 CODEC(T64, LZ4), + `r.name` Array(String), + `r.class` Array(LowCardinality(UInt16)), + `r.type` Array(LowCardinality(UInt16)), + `r.ttl` Array(LowCardinality(UInt32)), + `r.data` Array(String), + `method` LowCardinality(String) + ) + ENGINE = MergeTree + PARTITION BY toYYYYMMDD(tnow) + ORDER BY (tnow, server) + TTL toDate(tnow) + toIntervalMonth(1000) + SETTINGS index_granularity = 16384, merge_with_ttl_timeout = 7200; + + CREATE MATERIALIZED VIEW test.test_consumer TO test.log AS + SELECT + toDateTime(a.tnow) AS tnow, + a.server AS server, + a.client AS client, + a.sPort AS sPort, + a.cPort AS cPort, + a.`r.name` AS `r.name`, + a.`r.class` AS `r.class`, + a.`r.type` AS `r.type`, + a.`r.ttl` AS `r.ttl`, + a.`r.data` AS `r.data`, + a.method AS method + FROM test.test_queue AS a; + ''') + + instance.wait_for_log_line("Committed offset") + result = instance.query('SELECT * FROM test.log') + + expected = '''\ +2021-08-15 07:00:00 server1 443 50000 ['1','2'] [0,0] [444,0] [123123,0] ['adsfasd',''] GET +2021-08-15 07:00:02 0 0 [] [] [] [] [] +2021-08-15 07:00:02 0 0 [] [] [] [] [] +''' + assert TSV(result) == TSV(expected) + + # kafka_cluster.open_bash_shell('instance') + if __name__ == '__main__': cluster.start() input("Cluster created, press any key to destroy...") From c5cb4e071c1f8026850d2eb0eebedc75b656e8d2 Mon Sep 17 00:00:00 2001 From: Alexey Boykov Date: Thu, 7 Oct 2021 21:01:36 +0300 Subject: [PATCH 014/262] Creating only one binary, check compatibility --- programs/CMakeLists.txt | 4 ++ programs/local/CMakeLists.txt | 8 ++-- programs/local/LocalServer.cpp | 70 +++++++++++++++++++++++++++++++++- programs/main.cpp | 15 ++++++-- 4 files changed, 89 insertions(+), 8 deletions(-) diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt index 43d9f974648..4806a7fe46e 100644 --- a/programs/CMakeLists.txt +++ b/programs/CMakeLists.txt @@ -473,3 +473,7 @@ if (ENABLE_TESTS AND USE_GTEST) add_custom_target (clickhouse-tests ALL DEPENDS ${CLICKHOUSE_UNIT_TESTS_TARGETS}) add_dependencies(clickhouse-bundle clickhouse-tests) endif() + +if (ENABLE_FUZZING) + add_compile_definitions(FUZZING_MODE=1) +endif () diff --git a/programs/local/CMakeLists.txt b/programs/local/CMakeLists.txt index d965ecf05be..4ac8ad5d30d 100644 --- a/programs/local/CMakeLists.txt +++ b/programs/local/CMakeLists.txt @@ -21,13 +21,15 @@ endif() if (ENABLE_FUZZING) add_compile_definitions(FUZZING_MODE=1) set (WITH_COVERAGE ON) - add_executable(fuzz-clickhouse-local LocalServer.cpp ${SRCS}) - target_link_libraries(fuzz-clickhouse-local PRIVATE + target_link_libraries(clickhouse-local-lib PRIVATE ${LIB_FUZZING_ENGINE}) + #add_executable(fuzz-clickhouse-local LocalServer.cpp ${SRCS}) + #[[target_link_libraries(fuzz-clickhouse-local PRIVATE dbms ${LIB_FUZZING_ENGINE} loggers clickhouse_functions clickhouse_aggregate_functions clickhouse_storages_system - clickhouse_table_functions) + clickhouse_table_functions + readpassphrase)]] endif () diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index d93fa82d59c..9b567e60193 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -418,6 +418,11 @@ try ThreadStatus thread_status; setupSignalHandler(); +#ifdef FUZZING_MODE + static bool first_time = true; + if (first_time) + { +#endif std::cout << std::fixed << std::setprecision(3); std::cerr << std::fixed << std::setprecision(3); @@ -441,6 +446,10 @@ try processConfig(); applyCmdSettings(global_context); connect(); +#ifdef FUZZING_MODE + first_time = false; + } +#endif if (is_interactive) { @@ -455,7 +464,9 @@ try runNonInteractive(); } +#ifndef FUZZING_MODE cleanup(); +#endif return Application::EXIT_OK; } catch (...) @@ -728,4 +739,61 @@ int mainEntryClickHouseLocal(int argc, char ** argv) auto code = DB::getCurrentExceptionCode(); return code ? code : 1; } -} \ No newline at end of file +} + +#ifdef FUZZING_MODE +#include + +std::optional fuzz_app; + +extern "C" int LLVMFuzzerInitialize(int * pargc, char *** pargv) +{ + int & argc = *pargc; + char ** argv = *pargv; + + // position of delimiter "--" that separates arguments + // of clickhouse-local and fuzzer + int pos_delim = argc; + for (int i = 0; i < argc; ++i) + { + if (strcmp(argv[i], "--") == 0) + { + pos_delim = i; + break; + } + } + + fuzz_app.emplace(); + fuzz_app->init(pos_delim, argv); + for (int i = pos_delim + 1; i < argc; ++i) + std::swap(argv[i], argv[i - pos_delim]); + argc -= pos_delim; + if (argc == 0) // no delimiter provided + ++argc; + return 0; +} + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size) +{ + try + { + // inappropriate symbol for fuzzing at the end + if (size) + --size; + auto cur_str = String(reinterpret_cast(data), size); + // to clearly see the beginning and the end + std::cerr << '>' << cur_str << '<' << std::endl; + DB::FunctionGetFuzzerData::update(cur_str); + fuzz_app->run(); + } + catch (...) + { + std::cerr << "Why here?!?!?!?!?!?!?!?!?!?!?!?!?!?!?!?!?!?!?!?!" << std::endl; + std::cerr << DB::getCurrentExceptionMessage(true) << std::endl; + return 0; + //auto code = DB::getCurrentExceptionCode(); + //return code ? code : 1; + } + return 0; +} +#endif diff --git a/programs/main.cpp b/programs/main.cpp index 8c70dcacc99..bd5dabbd100 100644 --- a/programs/main.cpp +++ b/programs/main.cpp @@ -90,7 +90,7 @@ using MainFunc = int (*)(int, char**); /// Add an item here to register new application -std::pair clickhouse_applications[] = +[[maybe_unused]]std::pair clickhouse_applications[] = { #if ENABLE_CLICKHOUSE_LOCAL {"local", mainEntryClickHouseLocal}, @@ -141,7 +141,7 @@ std::pair clickhouse_applications[] = {"hash-binary", mainEntryClickHouseHashBinary}, }; - +#ifndef FUZZING_MODE int printHelp(int, char **) { std::cerr << "Use one of the following commands:" << std::endl; @@ -149,8 +149,9 @@ int printHelp(int, char **) std::cerr << "clickhouse " << application.first << " [args] " << std::endl; return -1; } +#endif - +#ifndef FUZZING_MODE bool isClickhouseApp(const std::string & app_suffix, std::vector & argv) { /// Use app if the first arg 'app' is passed (the arg should be quietly removed) @@ -170,6 +171,7 @@ bool isClickhouseApp(const std::string & app_suffix, std::vector & argv) std::string app_name = "clickhouse-" + app_suffix; return !argv.empty() && (app_name == argv[0] || endsWith(argv[0], "/" + app_name)); } +#endif enum class InstructionFail @@ -338,9 +340,13 @@ struct Checker /// /// extern bool inside_main; /// class C { C() { assert(inside_main); } }; +#ifndef FUZZING_MODE bool inside_main = false; +#else +bool inside_main = true; +#endif - +#ifndef FUZZING_MODE int main(int argc_, char ** argv_) { inside_main = true; @@ -371,3 +377,4 @@ int main(int argc_, char ** argv_) return main_func(static_cast(argv.size()), argv.data()); } +#endif From a4100cc980f025825b45e05b2a9184bc828f7033 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 26 Oct 2021 17:45:41 +0000 Subject: [PATCH 015/262] Build local in CI in fuzz mode --- docker/packager/other/fuzzer.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docker/packager/other/fuzzer.sh b/docker/packager/other/fuzzer.sh index 5eec51f9625..708356f9b0a 100755 --- a/docker/packager/other/fuzzer.sh +++ b/docker/packager/other/fuzzer.sh @@ -31,5 +31,10 @@ do mv "$FUZZER_PATH" /output/fuzzers done +ninja clickhouse-local +LOCAL_PATH=$(find ./programs -name clickhouse) +strip --strip-unneeded "$LOCAL_PATH" +mv "$LOCAL_PATH" /output/fuzzers + tar -zcvf /output/fuzzers.tar.gz /output/fuzzers rm -rf /output/fuzzers From 373b5a687fad8bbca496229fdb7c28efb5260ef5 Mon Sep 17 00:00:00 2001 From: Alexey Date: Tue, 9 Nov 2021 18:57:11 +0000 Subject: [PATCH 016/262] new files created --- docs/en/interfaces/grpc.md | 33 +++++++++++++++++++++++++++++++++ docs/ru/interfaces/grpc.md | 6 ++++++ 2 files changed, 39 insertions(+) create mode 100644 docs/en/interfaces/grpc.md create mode 100644 docs/ru/interfaces/grpc.md diff --git a/docs/en/interfaces/grpc.md b/docs/en/interfaces/grpc.md new file mode 100644 index 00000000000..c9d09033b70 --- /dev/null +++ b/docs/en/interfaces/grpc.md @@ -0,0 +1,33 @@ +--- +toc_priority: 18 +toc_title: gRPC Protocol +--- + +# gRPC Protocol {#grpc-protocol} + +For the specification of the protocol see [clickhouse_grpc.proto](https://github.com/vitlibar/ClickHouse/blob/grpc-protocol/src/Server/grpc_protos/clickhouse_grpc.proto) + +To use the protocol first set grpc_port in the main configuration file, and then you can either write a client in any of the programming languages supported by gRPC by using the provided schema or use the built-in client utils/grpc-client/clickhouse-grpc-client.py. The built-in client is operated very likely to clickhouse-client, for example + +``` text +utils/grpc-client/clickhouse-grpc-client.py -q "SELECT sum(number) FROM numbers(10)" + +cat a.txt | utils/grpc-client/clickhouse-grpc-client.py -q "INSERT INTO temp FORMAT TSV" +``` +and so on. Without parameters it runs the built-in client in the interactive mode. + +The implementation of gRPC protocol also supports compression, SSL, getting progress and logs, authentication, parallel queries through the same channel, cancellation of queries, sessions, external tables. + +/* This file describes gRPC protocol supported in ClickHouse. + * + * To use this protocol a client should send one or more messages of the QueryInfo type + * and then receive one or more messages of the Result type. + * According to that the service provides four methods for that: + * ExecuteQuery(QueryInfo) returns (Result) + * ExecuteQueryWithStreamInput(stream QueryInfo) returns (Result) + * ExecuteQueryWithStreamOutput(QueryInfo) returns (stream Result) + * ExecuteQueryWithStreamIO(stream QueryInfo) returns (stream Result) + * It's up to the client to choose which method to use. + * For example, ExecuteQueryWithStreamInput() allows the client to add data multiple times + * while executing a query, which is suitable for inserting many rows. + */ diff --git a/docs/ru/interfaces/grpc.md b/docs/ru/interfaces/grpc.md new file mode 100644 index 00000000000..f4b61334a3a --- /dev/null +++ b/docs/ru/interfaces/grpc.md @@ -0,0 +1,6 @@ +--- +toc_priority: 18 +toc_title: gRPC интерфейс +--- + +# Интерфейс gRPC {#grpc-interface} From 49217a24796a949fbfa6f22382b13c6fa67913fb Mon Sep 17 00:00:00 2001 From: Alexey Date: Sun, 14 Nov 2021 18:11:40 +0000 Subject: [PATCH 017/262] en draft --- docs/en/interfaces/grpc.md | 105 ++++++++++++++++++++++++++++-------- docs/en/interfaces/index.md | 4 +- 2 files changed, 86 insertions(+), 23 deletions(-) diff --git a/docs/en/interfaces/grpc.md b/docs/en/interfaces/grpc.md index c9d09033b70..e6f6b8124e8 100644 --- a/docs/en/interfaces/grpc.md +++ b/docs/en/interfaces/grpc.md @@ -1,33 +1,96 @@ --- -toc_priority: 18 +toc_priority: 19 toc_title: gRPC Protocol --- # gRPC Protocol {#grpc-protocol} -For the specification of the protocol see [clickhouse_grpc.proto](https://github.com/vitlibar/ClickHouse/blob/grpc-protocol/src/Server/grpc_protos/clickhouse_grpc.proto) +## Introduction {#grpc-protocol-introduction} -To use the protocol first set grpc_port in the main configuration file, and then you can either write a client in any of the programming languages supported by gRPC by using the provided schema or use the built-in client utils/grpc-client/clickhouse-grpc-client.py. The built-in client is operated very likely to clickhouse-client, for example +ClickHouse supports [gRPC](https://en.wikipedia.org/wiki/GRPC). It is an open source remote procedure call system that uses HTTP/2 and Protocol Buffers. The implementation of gRPC protocol supports: + +- SSL; +- authentication; +- sessions; +- compression; +- parallel queries through the same channel; +- cancellation of queries; +- getting progress and logs; +- external tables. + +The protocol specification is described in [clickhouse_grpc.proto](https://github.com/ClickHouse/ClickHouse/blob/master/src/Server/grpc_protos/clickhouse_grpc.proto). + +## ClickHouse Configuration {#grpc-protocol-configuration} + +To use the gRPC protocol set `grpc_port` in the main [server configuration](../../operations/configuration-files/). See the following configuration example: + +```xml +9100 + + false + + + /path/to/ssl_cert_file + /path/to/ssl_key_file + + + false + + + /path/to/ssl_ca_cert_file + + + deflate + + + medium + + + -1 + -1 + + + false + +``` + +## Built-in Client {#grpc-client} + +You can either write a client in any of the programming languages supported by gRPC by using the provided specification or use the built-in Python client. + +The built-in client is [utils/grpc-client/clickhouse-grpc-client.py](https://github.com/ClickHouse/ClickHouse/blob/master/utils/grpc-client/clickhouse-grpc-client.py). It requires [grpcio and grpcio-tools](https://grpc.io/docs/languages/python/quickstart) modules. To run the client in interactive mode call it without arguments. + +Arguments: + +- `--help` – Show this help message and exit +- `--host HOST, -h HOST` – The server name, ‘localhost’ by default. You can use either the name or the IPv4 or IPv6 address. +- `--port PORT` – The port to connect to. This port should be enabled on the ClickHouse server (see grpc_port in the config). +- `--user USER_NAME, -u USER_NAME` – The username. Default value: ‘default’. +- `--password PASSWORD` – The password. Default value: empty string. +- `--query QUERY, -q QUERY` – The query to process when using non-interactive mode. +- `--database DATABASE, -d DATABASE` – Select the current default database. Default value: the current database from the server settings (‘default’ by default). +- `--format OUTPUT_FORMAT, -f OUTPUT_FORMAT` – Use the specified default format to output the result. +- `--debug` – Enables showing the debug information. + +**Client Usage Example** + +In the following example a table is created and loaded with data from a CSV file. ``` text -utils/grpc-client/clickhouse-grpc-client.py -q "SELECT sum(number) FROM numbers(10)" - -cat a.txt | utils/grpc-client/clickhouse-grpc-client.py -q "INSERT INTO temp FORMAT TSV" +./clickhouse-grpc-client.py -q "CREATE TABLE grpc_example_table (id UInt32, text String) ENGINE = MergeTree() ORDER BY id;" +echo "0,Input data for" > a.txt +echo "1,gRPC protocol example" >> a.txt +cat a.txt | ./clickhouse-grpc-client.py -q "INSERT INTO grpc_example_table FORMAT CSV" +./clickhouse-grpc-client.py --format PrettyCompact -q "SELECT * FROM grpc_example_table;" ``` -and so on. Without parameters it runs the built-in client in the interactive mode. -The implementation of gRPC protocol also supports compression, SSL, getting progress and logs, authentication, parallel queries through the same channel, cancellation of queries, sessions, external tables. +Result: -/* This file describes gRPC protocol supported in ClickHouse. - * - * To use this protocol a client should send one or more messages of the QueryInfo type - * and then receive one or more messages of the Result type. - * According to that the service provides four methods for that: - * ExecuteQuery(QueryInfo) returns (Result) - * ExecuteQueryWithStreamInput(stream QueryInfo) returns (Result) - * ExecuteQueryWithStreamOutput(QueryInfo) returns (stream Result) - * ExecuteQueryWithStreamIO(stream QueryInfo) returns (stream Result) - * It's up to the client to choose which method to use. - * For example, ExecuteQueryWithStreamInput() allows the client to add data multiple times - * while executing a query, which is suitable for inserting many rows. - */ +``` text +┌─id─┬─text──────────────────┐ +│ 0 │ Input data for │ +│ 1 │ gRPC protocol example │ +└────┴───────────────────────┘ +``` diff --git a/docs/en/interfaces/index.md b/docs/en/interfaces/index.md index 10f15ae47d6..2c386a66618 100644 --- a/docs/en/interfaces/index.md +++ b/docs/en/interfaces/index.md @@ -6,10 +6,11 @@ toc_title: Introduction # Interfaces {#interfaces} -ClickHouse provides two network interfaces (both can be optionally wrapped in TLS for additional security): +ClickHouse provides three network interfaces (they can be optionally wrapped in TLS for additional security): - [HTTP](http.md), which is documented and easy to use directly. - [Native TCP](../interfaces/tcp.md), which has less overhead. +- [gRPC protocol](grpc.md). In most cases it is recommended to use appropriate tool or library instead of interacting with those directly. Officially supported by Yandex are the following: @@ -24,4 +25,3 @@ There are also a wide range of third-party libraries for working with ClickHouse - [Integrations](../interfaces/third-party/integrations.md) - [Visual interfaces](../interfaces/third-party/gui.md) -[Original article](https://clickhouse.com/docs/en/interfaces/) From 20a3ff3b44a8644d96eec6d42ac43132ea433ad1 Mon Sep 17 00:00:00 2001 From: Alexey Date: Mon, 15 Nov 2021 19:54:07 +0000 Subject: [PATCH 018/262] format fixes --- docs/en/interfaces/grpc.md | 4 ++-- utils/grpc-client/a.txt | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) create mode 100644 utils/grpc-client/a.txt diff --git a/docs/en/interfaces/grpc.md b/docs/en/interfaces/grpc.md index e6f6b8124e8..cd37e6717d4 100644 --- a/docs/en/interfaces/grpc.md +++ b/docs/en/interfaces/grpc.md @@ -64,7 +64,7 @@ The built-in client is [utils/grpc-client/clickhouse-grpc-client.py](https://git Arguments: -- `--help` – Show this help message and exit +- `--help` – Show this help message and exit. - `--host HOST, -h HOST` – The server name, ‘localhost’ by default. You can use either the name or the IPv4 or IPv6 address. - `--port PORT` – The port to connect to. This port should be enabled on the ClickHouse server (see grpc_port in the config). - `--user USER_NAME, -u USER_NAME` – The username. Default value: ‘default’. @@ -72,7 +72,7 @@ Arguments: - `--query QUERY, -q QUERY` – The query to process when using non-interactive mode. - `--database DATABASE, -d DATABASE` – Select the current default database. Default value: the current database from the server settings (‘default’ by default). - `--format OUTPUT_FORMAT, -f OUTPUT_FORMAT` – Use the specified default format to output the result. -- `--debug` – Enables showing the debug information. +- `--debug` – Enables showing the debug information. **Client Usage Example** diff --git a/utils/grpc-client/a.txt b/utils/grpc-client/a.txt new file mode 100644 index 00000000000..0cc9a3b87c3 --- /dev/null +++ b/utils/grpc-client/a.txt @@ -0,0 +1,2 @@ +0,Input data for +1,gRPC protocol example From 5196ce1de40ba52a7adf567f049ee910f8bda417 Mon Sep 17 00:00:00 2001 From: Alexey Date: Mon, 15 Nov 2021 19:56:12 +0000 Subject: [PATCH 019/262] unused file --- utils/grpc-client/a.txt | 2 -- 1 file changed, 2 deletions(-) delete mode 100644 utils/grpc-client/a.txt diff --git a/utils/grpc-client/a.txt b/utils/grpc-client/a.txt deleted file mode 100644 index 0cc9a3b87c3..00000000000 --- a/utils/grpc-client/a.txt +++ /dev/null @@ -1,2 +0,0 @@ -0,Input data for -1,gRPC protocol example From 8fbd46f95881d5c9b9ca45a47efcab1e163b4e47 Mon Sep 17 00:00:00 2001 From: liyang830 Date: Wed, 17 Nov 2021 15:44:03 +0800 Subject: [PATCH 020/262] change need_reset_counters method --- src/Access/EnabledQuota.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Access/EnabledQuota.cpp b/src/Access/EnabledQuota.cpp index cf78bfd0475..76708a92764 100644 --- a/src/Access/EnabledQuota.cpp +++ b/src/Access/EnabledQuota.cpp @@ -67,7 +67,7 @@ struct EnabledQuota::Impl { /// We reset counters only if the interval's end has been calculated before. /// If it hasn't we just calculate the interval's end for the first time and don't reset counters yet. - need_reset_counters = true; + need_reset_counters = (end_loaded.count() != 0); break; } end = std::chrono::system_clock::time_point{end_loaded}; From 93294734d3f98d4e322ebbf15d25ce36c086af8a Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Sun, 7 Nov 2021 16:02:40 +0800 Subject: [PATCH 021/262] Better clickhouse-client multiline input --- base/base/LineReader.h | 1 - base/base/ReplxxLineReader.cpp | 31 +++++++++++++++++-- src/Client/ClientBase.cpp | 7 ++--- .../01526_client_start_and_exit.sh | 5 ++- 4 files changed, 35 insertions(+), 9 deletions(-) diff --git a/base/base/LineReader.h b/base/base/LineReader.h index 0e36a9e01d1..12a856e2051 100644 --- a/base/base/LineReader.h +++ b/base/base/LineReader.h @@ -53,7 +53,6 @@ protected: String input; -private: bool multiline; Patterns extenders; diff --git a/base/base/ReplxxLineReader.cpp b/base/base/ReplxxLineReader.cpp index 38867faf5d5..55540bda214 100644 --- a/base/base/ReplxxLineReader.cpp +++ b/base/base/ReplxxLineReader.cpp @@ -22,7 +22,14 @@ namespace /// Trim ending whitespace inplace void trim(String & s) { - s.erase(std::find_if(s.rbegin(), s.rend(), [](int ch) { return !std::isspace(ch); }).base(), s.end()); + s.erase(std::find_if(s.rbegin(), s.rend(), [](unsigned char ch) { return !std::isspace(ch); }).base(), s.end()); +} + +/// Check if string ends with given character after skipping whitespaces. +bool ends_with(const std::string_view & s, const std::string_view & p) +{ + auto ss = std::string_view(s.data(), s.rend() - std::find_if(s.rbegin(), s.rend(), [](unsigned char ch) { return !std::isspace(ch); })); + return ss.ends_with(p); } /// Copied from replxx::src/util.cxx::now_ms_str() under the terms of 3-clause BSD license of Replxx. @@ -178,8 +185,28 @@ ReplxxLineReader::ReplxxLineReader( rx.bind_key(Replxx::KEY::control('N'), [this](char32_t code) { return rx.invoke(Replxx::ACTION::HISTORY_NEXT, code); }); rx.bind_key(Replxx::KEY::control('P'), [this](char32_t code) { return rx.invoke(Replxx::ACTION::HISTORY_PREVIOUS, code); }); + auto commit_action = [this](char32_t code) + { + std::string_view str = rx.get_state().text(); + + /// Always commit line when we see extender at the end. It will start a new prompt. + for (const auto * extender : extenders) + if (ends_with(str, extender)) + return rx.invoke(Replxx::ACTION::COMMIT_LINE, code); + + /// If we see an delimiter at the end, commit right away. + for (const auto * delimiter : delimiters) + if (ends_with(str, delimiter)) + return rx.invoke(Replxx::ACTION::COMMIT_LINE, code); + + /// If we allow multiline and there is already something in the input, start a newline. + if (multiline && !input.empty()) + return rx.invoke(Replxx::ACTION::NEW_LINE, code); + return rx.invoke(Replxx::ACTION::COMMIT_LINE, code); + }; /// bind C-j to ENTER action. - rx.bind_key(Replxx::KEY::control('J'), [this](char32_t code) { return rx.invoke(Replxx::ACTION::COMMIT_LINE, code); }); + rx.bind_key(Replxx::KEY::control('J'), commit_action); + rx.bind_key(Replxx::KEY::ENTER, commit_action); /// By default COMPLETE_NEXT/COMPLETE_PREV was binded to C-p/C-n, re-bind /// to M-P/M-N (that was used for HISTORY_COMMON_PREFIX_SEARCH before, but diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index ebc6c7d3107..eedcee74834 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -1354,11 +1354,8 @@ void ClientBase::runInteractive() LineReader lr(history_file, config().has("multiline"), query_extenders, query_delimiters); #endif - /// Enable bracketed-paste-mode only when multiquery is enabled and multiline is - /// disabled, so that we are able to paste and execute multiline queries in a whole - /// instead of erroring out, while be less intrusive. - if (config().has("multiquery") && !config().has("multiline")) - lr.enableBracketedPaste(); + /// Enable bracketed-paste-mode so that we are able to paste multiline queries as a whole. + lr.enableBracketedPaste(); do { diff --git a/tests/queries/0_stateless/01526_client_start_and_exit.sh b/tests/queries/0_stateless/01526_client_start_and_exit.sh index 82eb56305fc..0c5c94e3eac 100755 --- a/tests/queries/0_stateless/01526_client_start_and_exit.sh +++ b/tests/queries/0_stateless/01526_client_start_and_exit.sh @@ -10,8 +10,11 @@ ${CLICKHOUSE_CLIENT} -q "SELECT 'CREATE TABLE test_' || hex(randomPrintableASCII function stress() { + # 2004l is ignored because parallel running expect emulated terminal doesn't + # work well with bracketed paste enabling sequence, which is \e033?2004l + # (https://cirw.in/blog/bracketed-paste) while true; do - "${CURDIR}"/01526_client_start_and_exit.expect-not-a-test-case | grep -v -P 'ClickHouse client|Connecting|Connected|:\) Bye\.|new year|^\s*$|spawn bash|^0\s*$' + "${CURDIR}"/01526_client_start_and_exit.expect-not-a-test-case | grep -v -P 'ClickHouse client|Connecting|Connected|:\) Bye\.|new year|^\s*$|spawn bash|\?2004l|^0\s*$' done } From 449d30aa5b1887198cb9704a9712c1371c4b988c Mon Sep 17 00:00:00 2001 From: Alexey Date: Sat, 20 Nov 2021 09:35:23 +0000 Subject: [PATCH 022/262] en draft --- docs/en/interfaces/grpc.md | 53 ++++++++++++++++++------------------- docs/en/interfaces/index.md | 2 +- 2 files changed, 27 insertions(+), 28 deletions(-) diff --git a/docs/en/interfaces/grpc.md b/docs/en/interfaces/grpc.md index cd37e6717d4..ef7e48a8c01 100644 --- a/docs/en/interfaces/grpc.md +++ b/docs/en/interfaces/grpc.md @@ -1,13 +1,13 @@ --- toc_priority: 19 -toc_title: gRPC Protocol +toc_title: gRPC Interface --- -# gRPC Protocol {#grpc-protocol} +# gRPC Interface {#grpc-interface} -## Introduction {#grpc-protocol-introduction} +## Introduction {#grpc-interface-introduction} -ClickHouse supports [gRPC](https://en.wikipedia.org/wiki/GRPC). It is an open source remote procedure call system that uses HTTP/2 and Protocol Buffers. The implementation of gRPC protocol supports: +ClickHouse supports [gRPC](https://grpc.io/) interface. It is an open source remote procedure call system that uses HTTP/2 and [Protocol Buffers](https://en.wikipedia.org/wiki/Protocol_Buffers). The implementation of gRPC in ClickHouse supports: - SSL; - authentication; @@ -18,25 +18,25 @@ ClickHouse supports [gRPC](https://en.wikipedia.org/wiki/GRPC). It is an open so - getting progress and logs; - external tables. -The protocol specification is described in [clickhouse_grpc.proto](https://github.com/ClickHouse/ClickHouse/blob/master/src/Server/grpc_protos/clickhouse_grpc.proto). +The specification of the interface is described in [clickhouse_grpc.proto](https://github.com/ClickHouse/ClickHouse/blob/master/src/Server/grpc_protos/clickhouse_grpc.proto). -## ClickHouse Configuration {#grpc-protocol-configuration} +## gRPC Configuration {#grpc-interface-configuration} -To use the gRPC protocol set `grpc_port` in the main [server configuration](../../operations/configuration-files/). See the following configuration example: +To use the gRPC interface set `grpc_port` in the main [server configuration](../../operations/configuration-files/). Other configuration options see in the following example: ```xml 9100 false - + /path/to/ssl_cert_file /path/to/ssl_key_file - + false - + /path/to/ssl_ca_cert_file + false ``` ## Built-in Client {#grpc-client} -You can either write a client in any of the programming languages supported by gRPC by using the provided specification or use the built-in Python client. +You can write a client in any of the programming languages supported by gRPC using the provided [specification](https://github.com/ClickHouse/ClickHouse/blob/master/src/Server/grpc_protos/clickhouse_grpc.proto) +Or you can use a built-in Python client. It is placed in [utils/grpc-client/clickhouse-grpc-client.py](https://github.com/ClickHouse/ClickHouse/blob/master/utils/grpc-client/clickhouse-grpc-client.py) in the repository. The built-in client requires [grpcio and grpcio-tools](https://grpc.io/docs/languages/python/quickstart) Python modules. -The built-in client is [utils/grpc-client/clickhouse-grpc-client.py](https://github.com/ClickHouse/ClickHouse/blob/master/utils/grpc-client/clickhouse-grpc-client.py). It requires [grpcio and grpcio-tools](https://grpc.io/docs/languages/python/quickstart) modules. To run the client in interactive mode call it without arguments. +To run the client in an interactive mode call it without arguments. The client supports the following arguments: -Arguments: - -- `--help` – Show this help message and exit. -- `--host HOST, -h HOST` – The server name, ‘localhost’ by default. You can use either the name or the IPv4 or IPv6 address. -- `--port PORT` – The port to connect to. This port should be enabled on the ClickHouse server (see grpc_port in the config). -- `--user USER_NAME, -u USER_NAME` – The username. Default value: ‘default’. -- `--password PASSWORD` – The password. Default value: empty string. -- `--query QUERY, -q QUERY` – The query to process when using non-interactive mode. -- `--database DATABASE, -d DATABASE` – Select the current default database. Default value: the current database from the server settings (‘default’ by default). -- `--format OUTPUT_FORMAT, -f OUTPUT_FORMAT` – Use the specified default format to output the result. -- `--debug` – Enables showing the debug information. +- `--help` – Shows a help message and exits. +- `--host HOST, -h HOST` – A server name. Default value: `localhost`. You can use IPv4 or IPv6 addresses also. +- `--port PORT` – A port to connect to. This port should be enabled in the ClickHouse server configuration (see `grpc_port`). +- `--user USER_NAME, -u USER_NAME` – A user name. Default value: `default`. +- `--password PASSWORD` – A password. Default value: empty string. +- `--query QUERY, -q QUERY` – A query to process when using non-interactive mode. +- `--database DATABASE, -d DATABASE` – A default database. If not specified, the current database set in the server settings is used (`default` by default). +- `--format OUTPUT_FORMAT, -f OUTPUT_FORMAT` – A result output [format](formats.md). +- `--debug` – Enables showing debug information. **Client Usage Example** -In the following example a table is created and loaded with data from a CSV file. +In the following example a table is created and loaded with data from a CSV file. Then the content of the table is queried. ``` text ./clickhouse-grpc-client.py -q "CREATE TABLE grpc_example_table (id UInt32, text String) ENGINE = MergeTree() ORDER BY id;" -echo "0,Input data for" > a.txt -echo "1,gRPC protocol example" >> a.txt +echo "0,Input data for" > a.txt ; echo "1,gRPC protocol example" >> a.txt cat a.txt | ./clickhouse-grpc-client.py -q "INSERT INTO grpc_example_table FORMAT CSV" + ./clickhouse-grpc-client.py --format PrettyCompact -q "SELECT * FROM grpc_example_table;" ``` diff --git a/docs/en/interfaces/index.md b/docs/en/interfaces/index.md index 2c386a66618..7b73cec22a0 100644 --- a/docs/en/interfaces/index.md +++ b/docs/en/interfaces/index.md @@ -10,7 +10,7 @@ ClickHouse provides three network interfaces (they can be optionally wrapped in - [HTTP](http.md), which is documented and easy to use directly. - [Native TCP](../interfaces/tcp.md), which has less overhead. -- [gRPC protocol](grpc.md). +- [gRPC](grpc.md). In most cases it is recommended to use appropriate tool or library instead of interacting with those directly. Officially supported by Yandex are the following: From 2431834f8a0a6a0d7ca5bf7a190066a84d7eec88 Mon Sep 17 00:00:00 2001 From: Alexey Date: Sun, 21 Nov 2021 18:56:08 +0000 Subject: [PATCH 023/262] fix link --- docs/en/interfaces/grpc.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/interfaces/grpc.md b/docs/en/interfaces/grpc.md index ef7e48a8c01..f9faab5235d 100644 --- a/docs/en/interfaces/grpc.md +++ b/docs/en/interfaces/grpc.md @@ -22,7 +22,7 @@ The specification of the interface is described in [clickhouse_grpc.proto](https ## gRPC Configuration {#grpc-interface-configuration} -To use the gRPC interface set `grpc_port` in the main [server configuration](../../operations/configuration-files/). Other configuration options see in the following example: +To use the gRPC interface set `grpc_port` in the main [server configuration](../operations/configuration-files.md). Other configuration options see in the following example: ```xml 9100 From a18b0313769df69cbc3f658438172dc49303f030 Mon Sep 17 00:00:00 2001 From: liyang830 Date: Tue, 23 Nov 2021 14:33:32 +0800 Subject: [PATCH 024/262] fix quota fist used bug --- src/Access/EnabledQuota.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Access/EnabledQuota.cpp b/src/Access/EnabledQuota.cpp index 76708a92764..5f8251ce3c9 100644 --- a/src/Access/EnabledQuota.cpp +++ b/src/Access/EnabledQuota.cpp @@ -67,7 +67,7 @@ struct EnabledQuota::Impl { /// We reset counters only if the interval's end has been calculated before. /// If it hasn't we just calculate the interval's end for the first time and don't reset counters yet. - need_reset_counters = (end_loaded.count() != 0); + need_reset_counters = (end_of_interval.load().count() != 0); break; } end = std::chrono::system_clock::time_point{end_loaded}; From c6d3065885283ffcdf986a39b736c0f03071ff3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 22 Nov 2021 16:11:06 +0100 Subject: [PATCH 025/262] Check max_execution_time in the pipeline and pulling executors --- src/Interpreters/ProcessList.cpp | 1 - src/Interpreters/ProcessList.h | 3 +- src/Processors/Executors/PipelineExecutor.cpp | 31 +++++++- src/Processors/Executors/PipelineExecutor.h | 7 ++ .../PullingAsyncPipelineExecutor.cpp | 3 +- .../Executors/PullingPipelineExecutor.cpp | 3 + src/Storages/System/StorageSystemNumbers.cpp | 2 + .../02122_join_group_by_timeout.reference | 6 ++ .../02122_join_group_by_timeout.sh | 73 +++++++++++++++++++ 9 files changed, 122 insertions(+), 7 deletions(-) create mode 100644 tests/queries/0_stateless/02122_join_group_by_timeout.reference create mode 100755 tests/queries/0_stateless/02122_join_group_by_timeout.sh diff --git a/src/Interpreters/ProcessList.cpp b/src/Interpreters/ProcessList.cpp index 4d58f0c97dc..85c2f4ff955 100644 --- a/src/Interpreters/ProcessList.cpp +++ b/src/Interpreters/ProcessList.cpp @@ -295,7 +295,6 @@ QueryStatus::QueryStatus( , query(query_) , client_info(client_info_) , priority_handle(std::move(priority_handle_)) - , num_queries_increment{CurrentMetrics::Query} { } diff --git a/src/Interpreters/ProcessList.h b/src/Interpreters/ProcessList.h index 02be24bb2bd..eea377f5f9b 100644 --- a/src/Interpreters/ProcessList.h +++ b/src/Interpreters/ProcessList.h @@ -79,6 +79,7 @@ protected: friend class ThreadStatus; friend class CurrentThread; friend class ProcessListEntry; + friend class PipelineExecutor; String query; ClientInfo client_info; @@ -95,8 +96,6 @@ protected: QueryPriorities::Handle priority_handle; - CurrentMetrics::Increment num_queries_increment{CurrentMetrics::Query}; - std::atomic is_killed { false }; void setUserProcessList(ProcessListForUser * user_process_list_); diff --git a/src/Processors/Executors/PipelineExecutor.cpp b/src/Processors/Executors/PipelineExecutor.cpp index c8c9153b777..2c11402a3c3 100644 --- a/src/Processors/Executors/PipelineExecutor.cpp +++ b/src/Processors/Executors/PipelineExecutor.cpp @@ -23,6 +23,7 @@ namespace ErrorCodes { extern const int LOGICAL_ERROR; extern const int QUERY_WAS_CANCELLED; + extern const int TIMEOUT_EXCEEDED; } @@ -46,6 +47,13 @@ PipelineExecutor::PipelineExecutor(Processors & processors, QueryStatus * elem) throw; } + if (process_list_element) + { + auto settings = process_list_element->context.lock()->getSettings(); + limits.max_execution_time = settings.max_execution_time; + overflow_mode = settings.timeout_overflow_mode; + } + checkTimeLimit(); } PipelineExecutor::~PipelineExecutor() @@ -73,6 +81,7 @@ void PipelineExecutor::finish() void PipelineExecutor::execute(size_t num_threads) { + checkTimeLimit(); if (num_threads < 1) num_threads = 1; @@ -101,6 +110,7 @@ void PipelineExecutor::execute(size_t num_threads) bool PipelineExecutor::executeStep(std::atomic_bool * yield_flag) { + checkTimeLimit(); if (!is_execution_initialized) { initializeExecution(1); @@ -124,10 +134,25 @@ bool PipelineExecutor::executeStep(std::atomic_bool * yield_flag) return false; } +bool PipelineExecutor::checkTimeLimit() +{ + if (process_list_element) + { + if (process_list_element->isKilled()) + throw Exception("Query was cancelled", ErrorCodes::QUERY_WAS_CANCELLED); + + bool cont = limits.checkTimeLimit(process_list_element->watch, overflow_mode); + if (!cont) + cancel(); + return cont; + } + + return true; +} + void PipelineExecutor::finalizeExecution() { - if (process_list_element && process_list_element->isKilled()) - throw Exception("Query was cancelled", ErrorCodes::QUERY_WAS_CANCELLED); + checkTimeLimit(); if (cancelled) return; @@ -190,6 +215,8 @@ void PipelineExecutor::executeStepImpl(size_t thread_num, std::atomic_bool * yie if (tasks.isFinished()) break; + checkTimeLimit(); + #ifndef NDEBUG Stopwatch processing_time_watch; #endif diff --git a/src/Processors/Executors/PipelineExecutor.h b/src/Processors/Executors/PipelineExecutor.h index 19137b2306a..1675082f08e 100644 --- a/src/Processors/Executors/PipelineExecutor.h +++ b/src/Processors/Executors/PipelineExecutor.h @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -43,6 +44,9 @@ public: /// Cancel execution. May be called from another thread. void cancel(); + /// Checks the query time limits (cancelled or timeout) + bool checkTimeLimit(); + private: ExecutingGraphPtr graph; @@ -71,6 +75,9 @@ private: void finish(); String dumpPipeline() const; + + ExecutionSpeedLimits limits; + OverflowMode overflow_mode; }; using PipelineExecutorPtr = std::shared_ptr; diff --git a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp index fdddfdef2a4..8760325d958 100644 --- a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp +++ b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp @@ -117,8 +117,7 @@ bool PullingAsyncPipelineExecutor::pull(Chunk & chunk, uint64_t milliseconds) data->rethrowExceptionIfHas(); - bool is_execution_finished = lazy_format ? lazy_format->isFinished() - : data->is_finished.load(); + bool is_execution_finished = !data->executor->checkTimeLimit() || lazy_format ? lazy_format->isFinished() : data->is_finished.load(); if (is_execution_finished) { diff --git a/src/Processors/Executors/PullingPipelineExecutor.cpp b/src/Processors/Executors/PullingPipelineExecutor.cpp index a9c73b9f8fb..3cc91ceeeeb 100644 --- a/src/Processors/Executors/PullingPipelineExecutor.cpp +++ b/src/Processors/Executors/PullingPipelineExecutor.cpp @@ -44,6 +44,9 @@ bool PullingPipelineExecutor::pull(Chunk & chunk) if (!executor) executor = std::make_shared(pipeline.processors, pipeline.process_list_element); + if (!executor->checkTimeLimit()) + return false; + if (!executor->executeStep(&has_data_flag)) return false; diff --git a/src/Storages/System/StorageSystemNumbers.cpp b/src/Storages/System/StorageSystemNumbers.cpp index 4aed5098bd1..f1fde8b79b6 100644 --- a/src/Storages/System/StorageSystemNumbers.cpp +++ b/src/Storages/System/StorageSystemNumbers.cpp @@ -7,6 +7,8 @@ #include #include +#include + namespace DB { diff --git a/tests/queries/0_stateless/02122_join_group_by_timeout.reference b/tests/queries/0_stateless/02122_join_group_by_timeout.reference new file mode 100644 index 00000000000..e1284a85d4b --- /dev/null +++ b/tests/queries/0_stateless/02122_join_group_by_timeout.reference @@ -0,0 +1,6 @@ +Code: 159 +Code: 159 +Code: 159 +0 +Code: 159 +0 diff --git a/tests/queries/0_stateless/02122_join_group_by_timeout.sh b/tests/queries/0_stateless/02122_join_group_by_timeout.sh new file mode 100755 index 00000000000..5335c60554f --- /dev/null +++ b/tests/queries/0_stateless/02122_join_group_by_timeout.sh @@ -0,0 +1,73 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# TCP CLIENT: As of today (22/11/21) uses PullingAsyncPipelineExecutor +### Should be cancelled after 1 second and return a 159 exception (timeout) +timeout -s KILL 5 $CLICKHOUSE_CLIENT --max_execution_time 1 -q \ + "SELECT * FROM + ( + SELECT a.name as n + FROM + ( + SELECT 'Name' as name, number FROM system.numbers LIMIT 2000000 + ) AS a, + ( + SELECT 'Name' as name2, number FROM system.numbers LIMIT 2000000 + ) as b + GROUP BY n + ) + LIMIT 20 + FORMAT Null" 2>&1 | grep -o "Code: 159" + +### Should stop pulling data and return what has been generated already (return code 0) +timeout -s KILL 5 $CLICKHOUSE_CLIENT -q \ + "SELECT a.name as n + FROM + ( + SELECT 'Name' as name, number FROM system.numbers LIMIT 2000000 + ) AS a, + ( + SELECT 'Name' as name2, number FROM system.numbers LIMIT 2000000 + ) as b + FORMAT Null + SETTINGS max_execution_time = 1, timeout_overflow_mode = 'break' + " +echo $? + + +# HTTP CLIENT: As of today (22/11/21) uses PullingPipelineExecutor +### Should be cancelled after 1 second and return a 159 exception (timeout) +${CLICKHOUSE_CURL} -q --max-time 5 -sS "$CLICKHOUSE_URL&max_execution_time=1" -d \ + "SELECT * FROM + ( + SELECT a.name as n + FROM + ( + SELECT 'Name' as name, number FROM system.numbers LIMIT 2000000 + ) AS a, + ( + SELECT 'Name' as name2, number FROM system.numbers LIMIT 2000000 + ) as b + GROUP BY n + ) + LIMIT 20 + FORMAT Null" 2>&1 | grep -o "Code: 159" + + +### Should stop pulling data and return what has been generated already (return code 0) +${CLICKHOUSE_CURL} -q --max-time 5 -sS "$CLICKHOUSE_URL" -d \ + "SELECT a.name as n + FROM + ( + SELECT 'Name' as name, number FROM system.numbers LIMIT 2000000 + ) AS a, + ( + SELECT 'Name' as name2, number FROM system.numbers LIMIT 2000000 + ) as b + FORMAT Null + SETTINGS max_execution_time = 1, timeout_overflow_mode = 'break' + " +echo $? From f39648dafbfa672272abcbb00b9ffbef787e97f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 22 Nov 2021 16:52:34 +0100 Subject: [PATCH 026/262] Style --- src/Interpreters/ProcessList.cpp | 5 ----- src/Interpreters/ProcessList.h | 5 ----- src/Processors/Executors/PipelineExecutor.cpp | 1 - 3 files changed, 11 deletions(-) diff --git a/src/Interpreters/ProcessList.cpp b/src/Interpreters/ProcessList.cpp index 85c2f4ff955..826da7c6db7 100644 --- a/src/Interpreters/ProcessList.cpp +++ b/src/Interpreters/ProcessList.cpp @@ -16,11 +16,6 @@ #include -namespace CurrentMetrics -{ - extern const Metric Query; -} - namespace DB { diff --git a/src/Interpreters/ProcessList.h b/src/Interpreters/ProcessList.h index eea377f5f9b..2c1bc0b0a85 100644 --- a/src/Interpreters/ProcessList.h +++ b/src/Interpreters/ProcessList.h @@ -25,11 +25,6 @@ #include -namespace CurrentMetrics -{ - extern const Metric Query; -} - namespace DB { diff --git a/src/Processors/Executors/PipelineExecutor.cpp b/src/Processors/Executors/PipelineExecutor.cpp index 2c11402a3c3..bb0b0901b2e 100644 --- a/src/Processors/Executors/PipelineExecutor.cpp +++ b/src/Processors/Executors/PipelineExecutor.cpp @@ -23,7 +23,6 @@ namespace ErrorCodes { extern const int LOGICAL_ERROR; extern const int QUERY_WAS_CANCELLED; - extern const int TIMEOUT_EXCEEDED; } From 146c4a1157eb08dfea257922034b95d38b38457a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 22 Nov 2021 17:29:15 +0100 Subject: [PATCH 027/262] Only print one distinct error per command --- .../queries/0_stateless/02122_join_group_by_timeout.reference | 2 -- tests/queries/0_stateless/02122_join_group_by_timeout.sh | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/02122_join_group_by_timeout.reference b/tests/queries/0_stateless/02122_join_group_by_timeout.reference index e1284a85d4b..f314e22e519 100644 --- a/tests/queries/0_stateless/02122_join_group_by_timeout.reference +++ b/tests/queries/0_stateless/02122_join_group_by_timeout.reference @@ -1,6 +1,4 @@ Code: 159 -Code: 159 -Code: 159 0 Code: 159 0 diff --git a/tests/queries/0_stateless/02122_join_group_by_timeout.sh b/tests/queries/0_stateless/02122_join_group_by_timeout.sh index 5335c60554f..a8c4ee5f30a 100755 --- a/tests/queries/0_stateless/02122_join_group_by_timeout.sh +++ b/tests/queries/0_stateless/02122_join_group_by_timeout.sh @@ -20,7 +20,7 @@ timeout -s KILL 5 $CLICKHOUSE_CLIENT --max_execution_time 1 -q \ GROUP BY n ) LIMIT 20 - FORMAT Null" 2>&1 | grep -o "Code: 159" + FORMAT Null" 2>&1 | grep -o "Code: 159" | sort | uniq ### Should stop pulling data and return what has been generated already (return code 0) timeout -s KILL 5 $CLICKHOUSE_CLIENT -q \ @@ -54,7 +54,7 @@ ${CLICKHOUSE_CURL} -q --max-time 5 -sS "$CLICKHOUSE_URL&max_execution_time=1" -d GROUP BY n ) LIMIT 20 - FORMAT Null" 2>&1 | grep -o "Code: 159" + FORMAT Null" 2>&1 | grep -o "Code: 159" | sort | uniq ### Should stop pulling data and return what has been generated already (return code 0) From cbe3a47f2f3c4057601b70cb3445145f6a49c01e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Tue, 23 Nov 2021 13:24:51 +0100 Subject: [PATCH 028/262] PipelineExecutor: Avoid throwing in constructor after saving the querystatus Otherwise the query status would keep a pointer to the executor which is dying at that very moment --- src/Processors/Executors/PipelineExecutor.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/Processors/Executors/PipelineExecutor.cpp b/src/Processors/Executors/PipelineExecutor.cpp index bb0b0901b2e..15a5671a627 100644 --- a/src/Processors/Executors/PipelineExecutor.cpp +++ b/src/Processors/Executors/PipelineExecutor.cpp @@ -32,8 +32,6 @@ PipelineExecutor::PipelineExecutor(Processors & processors, QueryStatus * elem) try { graph = std::make_unique(processors); - if (process_list_element) - process_list_element->addPipelineExecutor(this); } catch (Exception & exception) { @@ -51,8 +49,11 @@ PipelineExecutor::PipelineExecutor(Processors & processors, QueryStatus * elem) auto settings = process_list_element->context.lock()->getSettings(); limits.max_execution_time = settings.max_execution_time; overflow_mode = settings.timeout_overflow_mode; + + // Add the pipeline to the QueryStatus at the end to avoid issues if other things throw + // as that would leave the executor "linked" + process_list_element->addPipelineExecutor(this); } - checkTimeLimit(); } PipelineExecutor::~PipelineExecutor() From 15dc86bd3721b641664aa063f6ad9dcdf70748ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Tue, 23 Nov 2021 14:50:23 +0100 Subject: [PATCH 029/262] Fix 00613_shard_distributed_max_execution_time flakyness --- src/Processors/Executors/PipelineExecutor.cpp | 2 ++ ...0613_shard_distributed_max_execution_time.reference | 10 ---------- .../00613_shard_distributed_max_execution_time.sql | 2 +- 3 files changed, 3 insertions(+), 11 deletions(-) diff --git a/src/Processors/Executors/PipelineExecutor.cpp b/src/Processors/Executors/PipelineExecutor.cpp index 15a5671a627..ce22ad0ec50 100644 --- a/src/Processors/Executors/PipelineExecutor.cpp +++ b/src/Processors/Executors/PipelineExecutor.cpp @@ -142,6 +142,8 @@ bool PipelineExecutor::checkTimeLimit() throw Exception("Query was cancelled", ErrorCodes::QUERY_WAS_CANCELLED); bool cont = limits.checkTimeLimit(process_list_element->watch, overflow_mode); + // We call cancel here so that all processors are notified and tasks waken up + // so that the "break" is faster and doesn't wait for long events if (!cont) cancel(); return cont; diff --git a/tests/queries/0_stateless/00613_shard_distributed_max_execution_time.reference b/tests/queries/0_stateless/00613_shard_distributed_max_execution_time.reference index 8b1acc12b63..e69de29bb2d 100644 --- a/tests/queries/0_stateless/00613_shard_distributed_max_execution_time.reference +++ b/tests/queries/0_stateless/00613_shard_distributed_max_execution_time.reference @@ -1,10 +0,0 @@ -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 diff --git a/tests/queries/0_stateless/00613_shard_distributed_max_execution_time.sql b/tests/queries/0_stateless/00613_shard_distributed_max_execution_time.sql index de3000533a1..1f4cb2a36b2 100644 --- a/tests/queries/0_stateless/00613_shard_distributed_max_execution_time.sql +++ b/tests/queries/0_stateless/00613_shard_distributed_max_execution_time.sql @@ -1,4 +1,4 @@ -- Tags: distributed SET max_execution_time = 1, timeout_overflow_mode = 'break'; -SELECT DISTINCT * FROM remote('127.0.0.{2,3}', system.numbers) WHERE number < 10; +SELECT * FROM remote('127.0.0.{2,3}', system.numbers) WHERE number < 10 FORMAT Null; From cb6f99fe6faa1d0658a481c73fdfe6ecc26e41de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 24 Nov 2021 09:58:03 +0100 Subject: [PATCH 030/262] Use getContext Co-authored-by: Azat Khuzhin --- src/Processors/Executors/PipelineExecutor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/Executors/PipelineExecutor.cpp b/src/Processors/Executors/PipelineExecutor.cpp index ce22ad0ec50..a4d35eb3de2 100644 --- a/src/Processors/Executors/PipelineExecutor.cpp +++ b/src/Processors/Executors/PipelineExecutor.cpp @@ -46,7 +46,7 @@ PipelineExecutor::PipelineExecutor(Processors & processors, QueryStatus * elem) } if (process_list_element) { - auto settings = process_list_element->context.lock()->getSettings(); + auto settings = process_list_element->getContext()->getSettings(); limits.max_execution_time = settings.max_execution_time; overflow_mode = settings.timeout_overflow_mode; From 34d0f403dc50139cc6de3463d92bf9e50b230bd5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 24 Nov 2021 12:32:08 +0100 Subject: [PATCH 031/262] Make clang-tidy happy --- src/Processors/Executors/PipelineExecutor.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/Processors/Executors/PipelineExecutor.cpp b/src/Processors/Executors/PipelineExecutor.cpp index a4d35eb3de2..c1ae48014c3 100644 --- a/src/Processors/Executors/PipelineExecutor.cpp +++ b/src/Processors/Executors/PipelineExecutor.cpp @@ -1,6 +1,5 @@ #include #include -#include #include #include #include @@ -47,12 +46,11 @@ PipelineExecutor::PipelineExecutor(Processors & processors, QueryStatus * elem) if (process_list_element) { auto settings = process_list_element->getContext()->getSettings(); - limits.max_execution_time = settings.max_execution_time; - overflow_mode = settings.timeout_overflow_mode; - // Add the pipeline to the QueryStatus at the end to avoid issues if other things throw // as that would leave the executor "linked" process_list_element->addPipelineExecutor(this); + limits.max_execution_time = settings.max_execution_time; + overflow_mode = settings.timeout_overflow_mode; } } From b0864ecbaa9ccda43f66de94c396cbed34d4052b Mon Sep 17 00:00:00 2001 From: liyang830 Date: Thu, 25 Nov 2021 18:33:19 +0800 Subject: [PATCH 032/262] =?UTF-8?q?feat=EF=BC=9Ashow=20tables=20.etc=20not?= =?UTF-8?q?=20two=20queries?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/Core/Settings.h | 9 ++++++++- .../Access/InterpreterShowPrivilegesQuery.cpp | 3 ++- src/Interpreters/Access/InterpreterShowPrivilegesQuery.h | 4 ++-- src/Interpreters/InterpreterShowProcesslistQuery.cpp | 1 + src/Interpreters/InterpreterShowProcesslistQuery.h | 4 ++-- src/Interpreters/InterpreterShowTablesQuery.cpp | 1 + src/Interpreters/InterpreterShowTablesQuery.h | 4 ++-- src/Interpreters/executeQuery.cpp | 2 +- 8 files changed, 19 insertions(+), 9 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index fb8d72f51c4..d94e2f8d1c1 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -641,10 +641,17 @@ class IColumn; // End of FORMAT_FACTORY_SETTINGS // Please add settings non-related to formats into the COMMON_SETTINGS above. +// settings work in query runtime +#define RUNTIME_QUERY_SETTINGS(M) \ + M(Bool, is_reinterpreted_execution, false, "Queries such as show tables will be reinterpreted to select query.", 0) +// End of RUNTIME_QUERY_SETTINGS + + #define LIST_OF_SETTINGS(M) \ COMMON_SETTINGS(M) \ OBSOLETE_SETTINGS(M) \ - FORMAT_FACTORY_SETTINGS(M) + FORMAT_FACTORY_SETTINGS(M) \ + RUNTIME_QUERY_SETTINGS(M) DECLARE_SETTINGS_TRAITS_ALLOW_CUSTOM_SETTINGS(SettingsTraits, LIST_OF_SETTINGS) diff --git a/src/Interpreters/Access/InterpreterShowPrivilegesQuery.cpp b/src/Interpreters/Access/InterpreterShowPrivilegesQuery.cpp index 05aa74d7dc4..436e2962ed3 100644 --- a/src/Interpreters/Access/InterpreterShowPrivilegesQuery.cpp +++ b/src/Interpreters/Access/InterpreterShowPrivilegesQuery.cpp @@ -1,6 +1,6 @@ #include #include - +#include namespace DB { @@ -12,6 +12,7 @@ InterpreterShowPrivilegesQuery::InterpreterShowPrivilegesQuery(const ASTPtr & qu BlockIO InterpreterShowPrivilegesQuery::execute() { + context->applySettingChange({"is_reinterpreted_execution", true}); return executeQuery("SELECT * FROM system.privileges", context, true); } diff --git a/src/Interpreters/Access/InterpreterShowPrivilegesQuery.h b/src/Interpreters/Access/InterpreterShowPrivilegesQuery.h index 852d5173eb1..d8a9d3206e5 100644 --- a/src/Interpreters/Access/InterpreterShowPrivilegesQuery.h +++ b/src/Interpreters/Access/InterpreterShowPrivilegesQuery.h @@ -15,8 +15,8 @@ public: BlockIO execute() override; - bool ignoreQuota() const override { return true; } - bool ignoreLimits() const override { return true; } + bool ignoreQuota() const override { return false; } + bool ignoreLimits() const override { return false; } private: ASTPtr query_ptr; diff --git a/src/Interpreters/InterpreterShowProcesslistQuery.cpp b/src/Interpreters/InterpreterShowProcesslistQuery.cpp index 780ba688a89..5c11f3a53fc 100644 --- a/src/Interpreters/InterpreterShowProcesslistQuery.cpp +++ b/src/Interpreters/InterpreterShowProcesslistQuery.cpp @@ -12,6 +12,7 @@ namespace DB BlockIO InterpreterShowProcesslistQuery::execute() { + getContext()->applySettingChange({"is_reinterpreted_execution", true}); return executeQuery("SELECT * FROM system.processes", getContext(), true); } diff --git a/src/Interpreters/InterpreterShowProcesslistQuery.h b/src/Interpreters/InterpreterShowProcesslistQuery.h index 31454882a89..05124eb2b41 100644 --- a/src/Interpreters/InterpreterShowProcesslistQuery.h +++ b/src/Interpreters/InterpreterShowProcesslistQuery.h @@ -19,8 +19,8 @@ public: /// We ignore the quota and limits here because execute() will rewrite a show query as a SELECT query and then /// the SELECT query will checks the quota and limits. - bool ignoreQuota() const override { return true; } - bool ignoreLimits() const override { return true; } + bool ignoreQuota() const override { return false; } + bool ignoreLimits() const override { return false; } private: ASTPtr query_ptr; diff --git a/src/Interpreters/InterpreterShowTablesQuery.cpp b/src/Interpreters/InterpreterShowTablesQuery.cpp index 609df1404ca..b5c79cdb415 100644 --- a/src/Interpreters/InterpreterShowTablesQuery.cpp +++ b/src/Interpreters/InterpreterShowTablesQuery.cpp @@ -142,6 +142,7 @@ String InterpreterShowTablesQuery::getRewrittenQuery() BlockIO InterpreterShowTablesQuery::execute() { + getContext()->applySettingChange({"is_reinterpreted_execution", true}); return executeQuery(getRewrittenQuery(), getContext(), true); } diff --git a/src/Interpreters/InterpreterShowTablesQuery.h b/src/Interpreters/InterpreterShowTablesQuery.h index 16fc9ef2cf4..fe1bd861177 100644 --- a/src/Interpreters/InterpreterShowTablesQuery.h +++ b/src/Interpreters/InterpreterShowTablesQuery.h @@ -22,8 +22,8 @@ public: /// We ignore the quota and limits here because execute() will rewrite a show query as a SELECT query and then /// the SELECT query will checks the quota and limits. - bool ignoreQuota() const override { return true; } - bool ignoreLimits() const override { return true; } + bool ignoreQuota() const override { return false; } + bool ignoreLimits() const override { return false; } private: ASTPtr query_ptr; diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index f401f708ab1..0404530ff9b 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -600,7 +600,7 @@ static std::tuple executeQueryImpl( auto interpreter = InterpreterFactory::get(ast, context, SelectQueryOptions(stage).setInternal(internal)); std::shared_ptr quota; - if (!interpreter->ignoreQuota()) + if (!interpreter->ignoreQuota() && !context->getSettingsRef().is_reinterpreted_execution) { quota = context->getQuota(); if (quota) From 9892151290bee34f068be82355a8c0b54e67fb88 Mon Sep 17 00:00:00 2001 From: liyang830 Date: Thu, 25 Nov 2021 20:49:18 +0800 Subject: [PATCH 033/262] fix: modify test_quota integration test --- tests/integration/test_quota/test.py | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/tests/integration/test_quota/test.py b/tests/integration/test_quota/test.py index 4149987996b..57080459513 100644 --- a/tests/integration/test_quota/test.py +++ b/tests/integration/test_quota/test.py @@ -393,31 +393,46 @@ def test_query_inserts(): def test_consumption_of_show_tables(): + assert re.match( + "myQuota\\tdefault\\t.*\\t31556952\\t0\\t1000\\t0\\t.*\\t\\\\N.*", + instance.query("SHOW QUOTA")) assert instance.query("SHOW TABLES") == "test_table\n" assert re.match( - "myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t1\\t500\\t0\\t500\\t0\\t\\\\N\\t1\\t\\\\N.*", + "myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t0\\t.*\\t\\\\N.*", instance.query("SHOW QUOTA")) def test_consumption_of_show_databases(): + assert re.match( + "myQuota\\tdefault\\t.*\\t31556952\\t0\\t1000\\t0\\t.*\\t\\\\N.*", + instance.query("SHOW QUOTA")) assert instance.query("SHOW DATABASES") == "INFORMATION_SCHEMA\ndefault\ninformation_schema\nsystem\n" assert re.match( - "myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t1\\t500\\t0\\t500\\t0\\t\\\\N\\t4\\t\\\\N.*", + "myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t0\\t.*\\t\\\\N.*", instance.query("SHOW QUOTA")) def test_consumption_of_show_clusters(): + assert re.match( + "myQuota\\tdefault\\t.*\\t31556952\\t0\\t1000\\t0\\t.*\\t\\\\N.*", + instance.query("SHOW QUOTA")) assert len(instance.query("SHOW CLUSTERS")) > 0 assert re.match( - "myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t1\\t500\\t0\\t500\\t0\\t\\\\N.*", + "myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t0\\t.*\\t\\\\N.*", instance.query("SHOW QUOTA")) def test_consumption_of_show_processlist(): + assert re.match( + "myQuota\\tdefault\\t.*\\t31556952\\t0\\t1000\\t0\\t.*\\t\\\\N.*", + instance.query("SHOW QUOTA")) instance.query("SHOW PROCESSLIST") assert re.match( - "myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t1\\t500\\t0\\t500\\t0\\t\\\\N\\t0\\t\\\\N.*", + "myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t0\\t.*\\t\\\\N.*", instance.query("SHOW QUOTA")) def test_consumption_of_show_privileges(): + assert re.match( + "myQuota\\tdefault\\t.*\\t31556952\\t0\\t1000\\t0\\t.*\\t\\\\N.*", + instance.query("SHOW QUOTA")) assert len(instance.query("SHOW PRIVILEGES")) > 0 assert re.match( - "myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t1\\t500\\t0\\t500\\t0\\t\\\\N.*", + "myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t0\\t.*\\t\\\\N.*", instance.query("SHOW QUOTA")) From df3ac768dbeca0cd7cbaffc919d86d14f5cd5130 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Thu, 25 Nov 2021 19:03:56 +0300 Subject: [PATCH 034/262] Fix recursive user defined functions crash --- .../UserDefinedSQLFunctionVisitor.cpp | 37 ++++++++++++++----- .../UserDefinedSQLFunctionVisitor.h | 2 +- ...rsive_sql_user_defined_functions.reference | 2 + ...5_recursive_sql_user_defined_functions.sql | 21 +++++++++++ 4 files changed, 52 insertions(+), 10 deletions(-) create mode 100644 tests/queries/0_stateless/02125_recursive_sql_user_defined_functions.reference create mode 100644 tests/queries/0_stateless/02125_recursive_sql_user_defined_functions.sql diff --git a/src/Interpreters/UserDefinedSQLFunctionVisitor.cpp b/src/Interpreters/UserDefinedSQLFunctionVisitor.cpp index d9ac53097ab..33740a40cc7 100644 --- a/src/Interpreters/UserDefinedSQLFunctionVisitor.cpp +++ b/src/Interpreters/UserDefinedSQLFunctionVisitor.cpp @@ -1,6 +1,7 @@ #include "UserDefinedSQLFunctionVisitor.h" #include +#include #include #include @@ -18,19 +19,16 @@ namespace ErrorCodes extern const int UNSUPPORTED_METHOD; } -void UserDefinedSQLFunctionMatcher::visit(ASTPtr & ast, Data & data) +void UserDefinedSQLFunctionMatcher::visit(ASTPtr & ast, Data &) { auto * function = ast->as(); if (!function) return; - auto result = tryToReplaceFunction(*function); - - if (result) - { - ast = result; - visit(ast, data); - } + std::unordered_set udf_in_replace_process; + auto replace_result = tryToReplaceFunction(*function, udf_in_replace_process); + if (replace_result) + ast = replace_result; } bool UserDefinedSQLFunctionMatcher::needChildVisit(const ASTPtr &, const ASTPtr &) @@ -38,8 +36,11 @@ bool UserDefinedSQLFunctionMatcher::needChildVisit(const ASTPtr &, const ASTPtr return true; } -ASTPtr UserDefinedSQLFunctionMatcher::tryToReplaceFunction(const ASTFunction & function) +ASTPtr UserDefinedSQLFunctionMatcher::tryToReplaceFunction(const ASTFunction & function, std::unordered_set & udf_in_replace_process) { + if (udf_in_replace_process.find(function.name) != udf_in_replace_process.end()) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Recursive function call during function user defined function call {}", function.name); + auto user_defined_function = UserDefinedSQLFunctionFactory::instance().tryGet(function.name); if (!user_defined_function) return nullptr; @@ -71,8 +72,17 @@ ASTPtr UserDefinedSQLFunctionMatcher::tryToReplaceFunction(const ASTFunction & f identifier_name_to_function_argument.emplace(identifier_name, function_argument); } + auto [it, _] = udf_in_replace_process.emplace(function.name); + auto function_body_to_update = function_core_expression->children.at(1)->clone(); + if (auto * inner_function = function_body_to_update->as()) + { + auto replace_result = tryToReplaceFunction(*inner_function, udf_in_replace_process); + if (replace_result) + function_body_to_update = replace_result; + } + std::stack ast_nodes_to_update; ast_nodes_to_update.push(function_body_to_update); @@ -83,6 +93,13 @@ ASTPtr UserDefinedSQLFunctionMatcher::tryToReplaceFunction(const ASTFunction & f for (auto & child : ast_node_to_update->children) { + if (auto * inner_function = child->as()) + { + auto replace_result = tryToReplaceFunction(*inner_function, udf_in_replace_process); + if (replace_result) + child = replace_result; + } + auto identifier_name_opt = tryGetIdentifierName(child); if (identifier_name_opt) { @@ -104,6 +121,8 @@ ASTPtr UserDefinedSQLFunctionMatcher::tryToReplaceFunction(const ASTFunction & f } } + udf_in_replace_process.erase(it); + auto function_alias = function.tryGetAlias(); if (!function_alias.empty()) diff --git a/src/Interpreters/UserDefinedSQLFunctionVisitor.h b/src/Interpreters/UserDefinedSQLFunctionVisitor.h index 46f95f29ab9..686594c088f 100644 --- a/src/Interpreters/UserDefinedSQLFunctionVisitor.h +++ b/src/Interpreters/UserDefinedSQLFunctionVisitor.h @@ -34,7 +34,7 @@ public: private: static void visit(ASTFunction & func, const Data & data); - static ASTPtr tryToReplaceFunction(const ASTFunction & function); + static ASTPtr tryToReplaceFunction(const ASTFunction & function, std::unordered_set & udf_in_replace_process); }; diff --git a/tests/queries/0_stateless/02125_recursive_sql_user_defined_functions.reference b/tests/queries/0_stateless/02125_recursive_sql_user_defined_functions.reference new file mode 100644 index 00000000000..4792e70f333 --- /dev/null +++ b/tests/queries/0_stateless/02125_recursive_sql_user_defined_functions.reference @@ -0,0 +1,2 @@ +2 +3 diff --git a/tests/queries/0_stateless/02125_recursive_sql_user_defined_functions.sql b/tests/queries/0_stateless/02125_recursive_sql_user_defined_functions.sql new file mode 100644 index 00000000000..2cbaa4b5aaf --- /dev/null +++ b/tests/queries/0_stateless/02125_recursive_sql_user_defined_functions.sql @@ -0,0 +1,21 @@ +DROP FUNCTION IF EXISTS 02125_function; +CREATE FUNCTION 02125_function AS x -> 02125_function(x); +SELECT 02125_function(1); --{serverError 1}; +DROP FUNCTION 02125_function; + +DROP FUNCTION IF EXISTS 02125_function_1; +CREATE FUNCTION 02125_function_1 AS x -> 02125_function_2(x); + +DROP FUNCTION IF EXISTS 02125_function_2; +CREATE FUNCTION 02125_function_2 AS x -> 02125_function_1(x); + +SELECT 02125_function_1(1); --{serverError 1}; +SELECT 02125_function_2(2); --{serverError 1}; + +CREATE OR REPLACE FUNCTION 02125_function_2 AS x -> x + 1; + +SELECT 02125_function_1(1); +SELECT 02125_function_2(2); + +DROP FUNCTION 02125_function_1; +DROP FUNCTION 02125_function_2; From c498b7ba5967abd72e9889a1e88c46fa51adccf2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Fri, 26 Nov 2021 12:44:39 +0100 Subject: [PATCH 035/262] Move limits check to ProcessList --- src/Interpreters/ProcessList.cpp | 12 ++++++++++++ src/Interpreters/ProcessList.h | 12 ++++++++++-- src/Processors/Executors/PipelineExecutor.cpp | 13 +++---------- src/Processors/Executors/PipelineExecutor.h | 4 ---- src/Storages/System/StorageSystemNumbers.cpp | 1 - 5 files changed, 25 insertions(+), 17 deletions(-) diff --git a/src/Interpreters/ProcessList.cpp b/src/Interpreters/ProcessList.cpp index 826da7c6db7..d842f5c5937 100644 --- a/src/Interpreters/ProcessList.cpp +++ b/src/Interpreters/ProcessList.cpp @@ -24,6 +24,7 @@ namespace ErrorCodes extern const int TOO_MANY_SIMULTANEOUS_QUERIES; extern const int QUERY_WITH_SAME_ID_IS_ALREADY_RUNNING; extern const int LOGICAL_ERROR; + extern const int QUERY_WAS_CANCELLED; } @@ -291,6 +292,9 @@ QueryStatus::QueryStatus( , client_info(client_info_) , priority_handle(std::move(priority_handle_)) { + auto settings = getContext()->getSettings(); + limits.max_execution_time = settings.max_execution_time; + overflow_mode = settings.timeout_overflow_mode; } QueryStatus::~QueryStatus() @@ -326,6 +330,14 @@ void QueryStatus::removePipelineExecutor(PipelineExecutor * e) std::erase_if(executors, [e](PipelineExecutor * x) { return x == e; }); } +bool QueryStatus::checkTimeLimit() +{ + if (is_killed.load()) + throw Exception("Query was cancelled", ErrorCodes::QUERY_WAS_CANCELLED); + + return limits.checkTimeLimit(watch, overflow_mode); +} + void QueryStatus::setUserProcessList(ProcessListForUser * user_process_list_) { diff --git a/src/Interpreters/ProcessList.h b/src/Interpreters/ProcessList.h index 2c1bc0b0a85..52a476c2a48 100644 --- a/src/Interpreters/ProcessList.h +++ b/src/Interpreters/ProcessList.h @@ -1,11 +1,12 @@ #pragma once #include -#include #include #include #include #include +#include +#include #include #include #include @@ -74,7 +75,6 @@ protected: friend class ThreadStatus; friend class CurrentThread; friend class ProcessListEntry; - friend class PipelineExecutor; String query; ClientInfo client_info; @@ -89,6 +89,11 @@ protected: /// Progress of output stream Progress progress_out; + /// Used to externally check for the query time limits + /// They are saved in the constructor to limit the overhead of each call to checkTimeLimit() + ExecutionSpeedLimits limits; + OverflowMode overflow_mode; + QueryPriorities::Handle priority_handle; std::atomic is_killed { false }; @@ -170,6 +175,9 @@ public: /// Removes a pipeline to the QueryStatus void removePipelineExecutor(PipelineExecutor * e); + + /// Checks the query time limits (cancelled or timeout) + bool checkTimeLimit(); }; diff --git a/src/Processors/Executors/PipelineExecutor.cpp b/src/Processors/Executors/PipelineExecutor.cpp index c1ae48014c3..53e26481b3a 100644 --- a/src/Processors/Executors/PipelineExecutor.cpp +++ b/src/Processors/Executors/PipelineExecutor.cpp @@ -21,7 +21,6 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; - extern const int QUERY_WAS_CANCELLED; } @@ -45,12 +44,9 @@ PipelineExecutor::PipelineExecutor(Processors & processors, QueryStatus * elem) } if (process_list_element) { - auto settings = process_list_element->getContext()->getSettings(); // Add the pipeline to the QueryStatus at the end to avoid issues if other things throw // as that would leave the executor "linked" process_list_element->addPipelineExecutor(this); - limits.max_execution_time = settings.max_execution_time; - overflow_mode = settings.timeout_overflow_mode; } } @@ -136,15 +132,12 @@ bool PipelineExecutor::checkTimeLimit() { if (process_list_element) { - if (process_list_element->isKilled()) - throw Exception("Query was cancelled", ErrorCodes::QUERY_WAS_CANCELLED); - - bool cont = limits.checkTimeLimit(process_list_element->watch, overflow_mode); + bool continuing = process_list_element->checkTimeLimit(); // We call cancel here so that all processors are notified and tasks waken up // so that the "break" is faster and doesn't wait for long events - if (!cont) + if (!continuing) cancel(); - return cont; + return continuing; } return true; diff --git a/src/Processors/Executors/PipelineExecutor.h b/src/Processors/Executors/PipelineExecutor.h index 1675082f08e..dd3212caca8 100644 --- a/src/Processors/Executors/PipelineExecutor.h +++ b/src/Processors/Executors/PipelineExecutor.h @@ -2,7 +2,6 @@ #include #include -#include #include #include @@ -75,9 +74,6 @@ private: void finish(); String dumpPipeline() const; - - ExecutionSpeedLimits limits; - OverflowMode overflow_mode; }; using PipelineExecutorPtr = std::shared_ptr; diff --git a/src/Storages/System/StorageSystemNumbers.cpp b/src/Storages/System/StorageSystemNumbers.cpp index f1fde8b79b6..c09279e65ac 100644 --- a/src/Storages/System/StorageSystemNumbers.cpp +++ b/src/Storages/System/StorageSystemNumbers.cpp @@ -7,7 +7,6 @@ #include #include -#include namespace DB { From eb0435c51d373feafd25fd676a251a8feb9a07de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Fri, 26 Nov 2021 12:52:51 +0100 Subject: [PATCH 036/262] Reduce header exposure to ProcessList.h --- src/Processors/Transforms/CountingTransform.cpp | 3 +++ src/Processors/Transforms/CountingTransform.h | 4 +++- src/Processors/Transforms/LimitsCheckingTransform.h | 3 ++- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/Processors/Transforms/CountingTransform.cpp b/src/Processors/Transforms/CountingTransform.cpp index fd9b80e4673..88ecbe6adc3 100644 --- a/src/Processors/Transforms/CountingTransform.cpp +++ b/src/Processors/Transforms/CountingTransform.cpp @@ -1,6 +1,9 @@ #include + +#include #include #include +#include namespace ProfileEvents diff --git a/src/Processors/Transforms/CountingTransform.h b/src/Processors/Transforms/CountingTransform.h index 9d0ccf11ace..e7100e8510b 100644 --- a/src/Processors/Transforms/CountingTransform.h +++ b/src/Processors/Transforms/CountingTransform.h @@ -1,12 +1,14 @@ #pragma once +#include #include -#include namespace DB { +class QueryStatus; +class ThreadStatus; /// Proxy class which counts number of written block, rows, bytes class CountingTransform final : public ExceptionKeepingTransform diff --git a/src/Processors/Transforms/LimitsCheckingTransform.h b/src/Processors/Transforms/LimitsCheckingTransform.h index 50891ece654..2f96a17c17b 100644 --- a/src/Processors/Transforms/LimitsCheckingTransform.h +++ b/src/Processors/Transforms/LimitsCheckingTransform.h @@ -1,8 +1,9 @@ #pragma once +#include #include #include #include -#include +#include #include From 05c6f065c1c1a9e356fc047040f2aa74315c71f4 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 26 Nov 2021 17:57:48 +0300 Subject: [PATCH 037/262] remove LowCardinality from window function args. --- src/Processors/Transforms/WindowTransform.cpp | 3 ++- .../02126_lc_window_functions.reference | 10 +++++++++ .../0_stateless/02126_lc_window_functions.sql | 22 +++++++++++++++++++ 3 files changed, 34 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02126_lc_window_functions.reference create mode 100644 tests/queries/0_stateless/02126_lc_window_functions.sql diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index 0b3b4ab3f96..bbe2984a23d 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -1059,7 +1060,7 @@ void WindowTransform::appendChunk(Chunk & chunk) // Just materialize everything. auto columns = chunk.detachColumns(); for (auto & column : columns) - column = std::move(column)->convertToFullColumnIfConst(); + column = recursiveRemoveLowCardinality(std::move(column)->convertToFullColumnIfConst()); block.input_columns = std::move(columns); // Initialize output columns. diff --git a/tests/queries/0_stateless/02126_lc_window_functions.reference b/tests/queries/0_stateless/02126_lc_window_functions.reference new file mode 100644 index 00000000000..75378377541 --- /dev/null +++ b/tests/queries/0_stateless/02126_lc_window_functions.reference @@ -0,0 +1,10 @@ +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 diff --git a/tests/queries/0_stateless/02126_lc_window_functions.sql b/tests/queries/0_stateless/02126_lc_window_functions.sql new file mode 100644 index 00000000000..b76d921406b --- /dev/null +++ b/tests/queries/0_stateless/02126_lc_window_functions.sql @@ -0,0 +1,22 @@ +SELECT max(id) OVER () AS aid +FROM +( + SELECT materialize(toLowCardinality('aaaa')) AS id + FROM numbers_mt(1000000) +) +FORMAT `Null`; + +SELECT max(id) OVER (PARTITION BY id) AS id +FROM +( + SELECT materialize('aaaa') AS id + FROM numbers_mt(1000000) +) +FORMAT `Null`; + +SELECT countIf(sym = 'Red') OVER () AS res +FROM +( + SELECT CAST(CAST(number % 5, 'Enum8(\'Red\' = 0, \'Blue\' = 1, \'Yellow\' = 2, \'Black\' = 3, \'White\' = 4)'), 'LowCardinality(String)') AS sym + FROM numbers(10) +); From b78d2709d68903b92575e7ff44b278043531ef98 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 29 Nov 2021 12:04:00 +0300 Subject: [PATCH 038/262] Update WindowTransform.cpp add comment --- src/Processors/Transforms/WindowTransform.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index bbe2984a23d..57754847a42 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -1057,6 +1057,8 @@ void WindowTransform::appendChunk(Chunk & chunk) // Another problem with Const columns is that the aggregate functions // can't work with them, so we have to materialize them like the // Aggregator does. + // Likewise, aggregate functions can't work with LowCardinality, + // so we have to materialize them too. // Just materialize everything. auto columns = chunk.detachColumns(); for (auto & column : columns) From 05da57f7a6dfaeebf144bc66ce00e31533ea0099 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Mon, 29 Nov 2021 13:19:54 +0300 Subject: [PATCH 039/262] Fix identity user defined function --- src/Interpreters/UserDefinedSQLFunctionVisitor.cpp | 12 +++++------- .../02126_identity_user_defined_function.reference | 1 + .../02126_identity_user_defined_function.sql | 12 ++++++++++++ 3 files changed, 18 insertions(+), 7 deletions(-) create mode 100644 tests/queries/0_stateless/02126_identity_user_defined_function.reference create mode 100644 tests/queries/0_stateless/02126_identity_user_defined_function.sql diff --git a/src/Interpreters/UserDefinedSQLFunctionVisitor.cpp b/src/Interpreters/UserDefinedSQLFunctionVisitor.cpp index 33740a40cc7..bc63a4dfec3 100644 --- a/src/Interpreters/UserDefinedSQLFunctionVisitor.cpp +++ b/src/Interpreters/UserDefinedSQLFunctionVisitor.cpp @@ -76,15 +76,11 @@ ASTPtr UserDefinedSQLFunctionMatcher::tryToReplaceFunction(const ASTFunction & f auto function_body_to_update = function_core_expression->children.at(1)->clone(); - if (auto * inner_function = function_body_to_update->as()) - { - auto replace_result = tryToReplaceFunction(*inner_function, udf_in_replace_process); - if (replace_result) - function_body_to_update = replace_result; - } + auto expression_list = std::make_shared(); + expression_list->children.emplace_back(std::move(function_body_to_update)); std::stack ast_nodes_to_update; - ast_nodes_to_update.push(function_body_to_update); + ast_nodes_to_update.push(expression_list); while (!ast_nodes_to_update.empty()) { @@ -123,6 +119,8 @@ ASTPtr UserDefinedSQLFunctionMatcher::tryToReplaceFunction(const ASTFunction & f udf_in_replace_process.erase(it); + function_body_to_update = expression_list->children[0]; + auto function_alias = function.tryGetAlias(); if (!function_alias.empty()) diff --git a/tests/queries/0_stateless/02126_identity_user_defined_function.reference b/tests/queries/0_stateless/02126_identity_user_defined_function.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/02126_identity_user_defined_function.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02126_identity_user_defined_function.sql b/tests/queries/0_stateless/02126_identity_user_defined_function.sql new file mode 100644 index 00000000000..8c3b7b85d48 --- /dev/null +++ b/tests/queries/0_stateless/02126_identity_user_defined_function.sql @@ -0,0 +1,12 @@ +DROP FUNCTION IF EXISTS 02126_function; +CREATE FUNCTION 02126_function AS x -> x; +SELECT 02126_function(1); +DROP FUNCTION 02126_function; + +CREATE FUNCTION 02126_function AS () -> x; +SELECT 02126_function(); --{ serverError 47 } +DROP FUNCTION 02126_function; + +CREATE FUNCTION 02126_function AS () -> 5; +SELECT 02126_function(); +DROP FUNCTION 02126_function; From e9ae49c68b7e020c21a64cce200b685817fde348 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 29 Nov 2021 10:47:12 +0100 Subject: [PATCH 040/262] IParser: Set to vector --- src/Parsers/IParser.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Parsers/IParser.h b/src/Parsers/IParser.h index 65b4b6df7f2..0d0e65082a9 100644 --- a/src/Parsers/IParser.h +++ b/src/Parsers/IParser.h @@ -1,7 +1,7 @@ #pragma once -#include #include +#include #include #include @@ -25,7 +25,7 @@ namespace ErrorCodes struct Expected { const char * max_parsed_pos = nullptr; - std::set variants; + std::vector variants; /// 'description' should be statically allocated string. void add(const char * current_pos, const char * description) @@ -37,7 +37,7 @@ struct Expected } if (!max_parsed_pos || current_pos >= max_parsed_pos) - variants.insert(description); + variants.push_back(description); } void add(TokenIterator it, const char * description) From 89734819ca868fcaf984d033fe24c0f63dbeb625 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 29 Nov 2021 10:47:21 +0100 Subject: [PATCH 041/262] Inline TokenIterator --- src/Parsers/TokenIterator.h | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/src/Parsers/TokenIterator.h b/src/Parsers/TokenIterator.h index e3a5b9f79c3..b84bec57817 100644 --- a/src/Parsers/TokenIterator.h +++ b/src/Parsers/TokenIterator.h @@ -1,8 +1,10 @@ #pragma once -#include +#include #include +#include + namespace DB { @@ -59,22 +61,30 @@ private: public: explicit TokenIterator(Tokens & tokens_) : tokens(&tokens_) {} - const Token & get() { return (*tokens)[index]; } - const Token & operator*() { return get(); } - const Token * operator->() { return &get(); } + ALWAYS_INLINE const Token & get() { return (*tokens)[index]; } + ALWAYS_INLINE const Token & operator*() { return get(); } + ALWAYS_INLINE const Token * operator->() { return &get(); } - TokenIterator & operator++() { ++index; return *this; } - TokenIterator & operator--() { --index; return *this; } + ALWAYS_INLINE TokenIterator & operator++() + { + ++index; + return *this; + } + ALWAYS_INLINE TokenIterator & operator--() + { + --index; + return *this; + } - bool operator< (const TokenIterator & rhs) const { return index < rhs.index; } - bool operator<= (const TokenIterator & rhs) const { return index <= rhs.index; } - bool operator== (const TokenIterator & rhs) const { return index == rhs.index; } - bool operator!= (const TokenIterator & rhs) const { return index != rhs.index; } + ALWAYS_INLINE bool operator<(const TokenIterator & rhs) const { return index < rhs.index; } + ALWAYS_INLINE bool operator<=(const TokenIterator & rhs) const { return index <= rhs.index; } + ALWAYS_INLINE bool operator==(const TokenIterator & rhs) const { return index == rhs.index; } + ALWAYS_INLINE bool operator!=(const TokenIterator & rhs) const { return index != rhs.index; } - bool isValid() { return get().type < TokenType::EndOfStream; } + ALWAYS_INLINE bool isValid() { return get().type < TokenType::EndOfStream; } /// Rightmost token we had looked. - const Token & max() { return tokens->max(); } + ALWAYS_INLINE const Token & max() { return tokens->max(); } }; From 740451c723d1e43dc49a501752a7d4519f21d0b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 29 Nov 2021 11:32:06 +0100 Subject: [PATCH 042/262] Inline Expected add --- src/Parsers/IParser.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Parsers/IParser.h b/src/Parsers/IParser.h index 0d0e65082a9..32eac0fc396 100644 --- a/src/Parsers/IParser.h +++ b/src/Parsers/IParser.h @@ -28,7 +28,7 @@ struct Expected std::vector variants; /// 'description' should be statically allocated string. - void add(const char * current_pos, const char * description) + ALWAYS_INLINE void add(const char * current_pos, const char * description) { if (!max_parsed_pos || current_pos > max_parsed_pos) { @@ -40,7 +40,7 @@ struct Expected variants.push_back(description); } - void add(TokenIterator it, const char * description) + ALWAYS_INLINE void add(TokenIterator it, const char * description) { add(it->begin, description); } From 6218fc4ad50bac481922f54b3966a80b73f26599 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 29 Nov 2021 11:32:22 +0100 Subject: [PATCH 043/262] Inline IParser depth --- src/Parsers/IParser.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Parsers/IParser.h b/src/Parsers/IParser.h index 32eac0fc396..4b80eadd4d0 100644 --- a/src/Parsers/IParser.h +++ b/src/Parsers/IParser.h @@ -60,18 +60,18 @@ public: Pos(Tokens & tokens_, uint32_t max_depth_) : TokenIterator(tokens_), max_depth(max_depth_) {} - void increaseDepth() + ALWAYS_INLINE void increaseDepth() { ++depth; - if (max_depth > 0 && depth > max_depth) + if (unlikely(max_depth > 0 && depth > max_depth)) throw Exception( "Maximum parse depth (" + std::to_string(max_depth) + ") exceeded. Consider rising max_parser_depth parameter.", ErrorCodes::TOO_DEEP_RECURSION); } - void decreaseDepth() + ALWAYS_INLINE void decreaseDepth() { - if (depth == 0) + if (unlikely(depth == 0)) throw Exception("Logical error in parser: incorrect calculation of parse depth", ErrorCodes::LOGICAL_ERROR); --depth; } From 8439f4c96c0ce67e3a6b36eb9cc977f770a25b23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 29 Nov 2021 12:15:30 +0100 Subject: [PATCH 044/262] Inline IParserBase wrappers --- src/Parsers/IParserBase.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Parsers/IParserBase.h b/src/Parsers/IParserBase.h index cf69e5f2dfa..ce08bdef790 100644 --- a/src/Parsers/IParserBase.h +++ b/src/Parsers/IParserBase.h @@ -12,7 +12,7 @@ class IParserBase : public IParser { public: template - static bool wrapParseImpl(Pos & pos, const F & func) + ALWAYS_INLINE static bool wrapParseImpl(Pos & pos, const F & func) { Pos begin = pos; bool res = func(); @@ -24,7 +24,7 @@ public: struct IncreaseDepthTag {}; template - static bool wrapParseImpl(Pos & pos, IncreaseDepthTag, const F & func) + ALWAYS_INLINE static bool wrapParseImpl(Pos & pos, IncreaseDepthTag, const F & func) { Pos begin = pos; pos.increaseDepth(); From 02035643c1ebae63c5fe5c28be1dd5573d56d140 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 29 Nov 2021 12:52:25 +0100 Subject: [PATCH 045/262] Add perf test --- tests/performance/explain_ast.xml | 5911 +++++++++++++++++++++++++++++ 1 file changed, 5911 insertions(+) create mode 100644 tests/performance/explain_ast.xml diff --git a/tests/performance/explain_ast.xml b/tests/performance/explain_ast.xml new file mode 100644 index 00000000000..0daa748de83 --- /dev/null +++ b/tests/performance/explain_ast.xml @@ -0,0 +1,5911 @@ + + + + 0, c3, NULL)) AS c3_q, +quantiles(0.25, 0.5, 0.75)(if(c4 > 0, c4, NULL)) AS c4_q, +quantiles(0.25, 0.5, 0.75)(t.c17 / t.c19) AS c5_q, +quantiles(0.25, 0.5, 0.75)(c6) AS c6_q, +quantiles(0.25, 0.5, 0.75)(c7) AS c7_q, +quantiles(0.25, 0.5, 0.75)(c8) AS c8_q, +quantiles(0.25, 0.5, 0.75)(c9) AS c9_q, +quantiles(0.25, 0.5, 0.75)(c10) AS c10_q, +quantiles(0.25, 0.5, 0.75)(c11) AS c11_q, +quantiles(0.25, 0.5, 0.75)(c12) AS c12_q, +quantiles(0.25, 0.5, 0.75)(c13) AS c13_q, +quantiles(0.25, 0.5, 0.75)(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_q, +quantiles(0.25, 0.5, 0.75)(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_q, +quantiles(0.25, 0.5, 0.75)(t.c16) AS c16_q, +quantiles(0.25, 0.5, 0.75)(t.c17) AS c17_q, +quantiles(0.25, 0.5, 0.75)(if(t.c18 > 0, t.c18, NULL)) AS c18_q, +max(if(c3 > 0, c3, NULL)) AS c3_max, +min(if(c3 > 0, c3, NULL)) AS c3_min, +avg(if(c3 > 0, c3, NULL)) AS c3_avg, +max(if(c4 > 0, c4, NULL)) AS c4_max, +min(if(c4 > 0, c4, NULL)) AS c4_min, +avg(if(c4 > 0, c4, NULL)) AS c4_avg, +max(t.c17 / t.c19) AS c5_max, +min(t.c17 / t.c19) AS c5_min, +avg(t.c17 / t.c19) AS c5_avg, +max(if(c6 > 0, c6, NULL)) AS c6_max, +min(if(c6 > 0, c6, NULL)) AS c6_min, +avg(if(c6 > 0, c6, NULL)) AS c6_avg, +max(if(c7 > 0, c7, NULL)) AS c7_max, +min(if(c7 > 0, c7, NULL)) AS c7_min, +avg(if(c7 > 0, c7, NULL)) AS c7_avg, +max(if(c10 > 0, c10, NULL)) AS c10_max, +min(if(c10 > 0, c10, NULL)) AS c10_min, +avg(if(c10 > 0, c10, NULL)) AS c10_avg, +max(if(c8 > 0, c8, NULL)) AS c8_max, +min(if(c8 > 0, c8, NULL)) AS c8_min, +avg(if(c8 > 0, c8, NULL)) AS c8_avg, +max(if(c9 > 0, c9, NULL)) AS c9_max, +min(if(c9 > 0, c9, NULL)) AS c9_min, +avg(if(c9 > 0, c9, NULL)) AS c9_avg, +max(if(c11 > 0, c11, NULL)) AS c11_max, +min(if(c11 > 0, c11, NULL)) AS c11_min, +avg(if(c11 > 0, c11, NULL)) AS c11_avg, +max(if(c12 > 0, c12, NULL)) AS c12_max, +min(if(c12 > 0, c12, NULL)) AS c12_min, +avg(if(c12 > 0, c12, NULL)) AS c12_avg, +max(if(c13 > 0, c13, NULL)) AS c13_max, +min(if(c13 > 0, c13, NULL)) AS c13_min, +avg(if(c13 > 0, c13, NULL)) AS c13_avg, +max(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_max, +min(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_min, +avg(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_avg, +max(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_max, +min(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_min, +avg(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_avg, +max(t.c16) AS c16_max, +min(t.c16) AS c16_min, +avg(t.c16) AS c16_avg, +max(t.c17) AS c17_max, +min(t.c17) AS c17_min, +avg(t.c17) AS c17_avg, +max(if(t.c18 > 0, t.c18, NULL)) AS c18_max, +min(if(t.c18 > 0, t.c18, NULL)) AS c18_min, +avg(if(t.c18 > 0, t.c18, NULL)) AS c18_avg, +sum(t.c19) AS c19, +sum(if(t.c18 > 0, t.c18, NULL)) AS c18, +sum(t.c16) AS c16, +sum(c23) AS c23, +sum(t.c17) AS c17, +sum(if(t.c24 > 0, t.c24, NULL)) AS c24, +c24 / c19 AS c14, +c24 / c17 AS c15, +median(if(isNotNull(c29) AND (t.c22 > 0), c13 * (t.c22 / c29), NULL)) AS c21, +sum(c22) AS c22 +FROM +( +SELECT +c27, +c39 AS c1, +c29, +c19, +c23, +c17, +c16, +c18, +c22, +c24, +c3, +c4, +c8, +c9, +c10, +c11, +c12, +c13, +c6, +c7 +FROM +( +SELECT +c27, +uniqExact(c30, c31) AS c19, +uniqExact(c30, c31, c32) AS c23, +uniqExactIf(c30, c31, c33 IN ('c37', 'c38')) AS c17, +countIf(c33 IN ('c37', 'c38')) AS c16, +countIf(c33 = 'c39') AS c18, +coalesce(sumIf(c29, c33 = 'c39'), 0) AS c22, +coalesce(sumIf(c37, c33 = 'c39'), 0) AS c24, +if((c18 > 0) AND (c19 > 0), c18 / c19, NULL) AS c3, +if(c17 != 0, c18 / c17, NULL) AS c4, +coalesce(avgIf(c34, (c34 > 0) AND (c33 IN ('c37', 'c38'))), NULL) AS c8, +coalesce(avgIf(c35, (c35 > 0) AND (c33 IN ('c37', 'c38'))), NULL) AS c9, +coalesce(avgIf(c34, (c34 > 0) AND (c33 = 'c39')), NULL) AS c10, +coalesce(avgIf(c35, (c35 > 0) AND (c33 = 'c39')), NULL) AS c11, +coalesce(avgIf(c37, c33 = 'c39'), NULL) AS c12, +coalesce(avgIf(c37 / c34, (c34 > 0) AND (c33 = 'c39')), NULL) AS c13, +coalesce(avgIf(c37, (c37 > 0) AND (c33 IN ('c37', 'c38'))), NULL) AS c6, +coalesce(minIf(c37, (c37 > 0) AND (c33 IN ('c37', 'c38')) AND (c37 > (c36 / 2))), NULL) AS c7 +FROM +( +SELECT +c27, +c30, +c32, +c31, +NULL AS c29, +NULL AS c33, +NULL AS c37, +NULL AS c34, +NULL AS c35 +FROM +( +SELECT +c27, +c30, +c32, +c31 +FROM database.t1 +PREWHERE ((c32 >= parseDateTimeBestEffort('2020-01-01')) AND (c32 <= parseDateTimeBestEffort('2020-01-01 23:59:59'))) AND (c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +)) +WHERE c61 = 0 +) AS table25 +UNION ALL +SELECT +c27, +c30, +c32, +c31, +c29, +c33, +c37, +c34, +c35 +FROM +( +SELECT +c27, +c30, +c32, +'c37' AS c33, +coalesce(c37 * joinGet('database.table18', 'c60', concat(c26, '_', 'CH')), 0) AS c37, +if(c53 > 0, c53, 2) AS c53, +c54, +if(c29 > 0, c29, 1) AS c29, +c55, +c56, +datediff('day', c55, c56) AS c34, +datediff('day', c32, c55) AS c35, +c31 +FROM database.table24 +PREWHERE ((c32 >= parseDateTimeBestEffort('2020-01-01')) AND (c32 <= parseDateTimeBestEffort('2020-01-01 23:59:59'))) AND (c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +)) +WHERE (c61 = 0) AND (c37 < (666 * (1 / joinGet('database.table18', 'c60', concat(c26, '_', 'CH'))))) +) AS table23 +UNION ALL +SELECT +c27, +c30, +c32, +c31, +c29, +c33, +c37, +c34, +c35 +FROM +( +SELECT +c27, +c30, +c32, +'c39' AS c33, +coalesce(c37 * joinGet('database.table18', 'c60', concat(c26, '_', 'CH')), 0) AS c37, +if(c53 > 0, c53, 2) AS c53, +c54, +if(c29 > 0, c29, 1) AS c29, +c55, +c56, +datediff('day', c55, c56) AS c34, +datediff('day', c32, c55) AS c35, +c31 +FROM database.table22 +PREWHERE ((c32 >= parseDateTimeBestEffort('2020-01-01')) AND (c32 <= parseDateTimeBestEffort('2020-01-01 23:59:59'))) AND (c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +)) +WHERE (c61 = 0) AND (c37 < (666 * (1 / joinGet('database.table18', 'c60', concat(c26, '_', 'CH'))))) +) AS table21 +) AS table20 +ALL LEFT JOIN +( +SELECT +c27, +avgMerge(avg_c37) * joinGet('database.table18', 'c60', concat('USD', '_', 'CH')) AS c36 +FROM database.table19 +PREWHERE c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +) +WHERE date > (now() - toIntervalMonth(3)) +GROUP BY c27 +) AS table17 USING (c27) +GROUP BY c27 +) AS table16 +ALL LEFT JOIN +( +SELECT +comp_c27 AS c27, +assumeNotNull(c39) AS c39, +c29 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +) USING (c27) +) AS t +ALL LEFT JOIN +( +SELECT +c1, +c2 +FROM +( +SELECT +c39 AS c1, +groupArray(comp_c27) AS c49, +multiIf(c1 = 'c58', if(length(c49) <= 2, 0, 1), c1 = 'c57', 1, if(length(c49) <= 3, 0, 1)) AS c2 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +GROUP BY c39 +) AS table3 +) USING (c1) +GROUP BY +c1, +c2 +) AS table2 +ORDER BY c1 ASC +) AS table1 +UNION ALL +SELECT * +FROM +( +SELECT +c1, +c2, +c3_q[1] AS c3_q1, +c3_q[3] AS c3_q3, +c3_q[2] AS c3_median, +least(c3_max, c3_q3 + (1.5 * (c3_q3 - c3_q1))) AS c3_max, +greatest(c3_min, c3_q1 - (1.5 * (c3_q3 - c3_q1))) AS c3_min, +c3_avg, +c4_q[1] AS c4_q1, +c4_q[3] AS c4_q3, +c4_q[2] AS c4_median, +least(c4_max, c4_q3 + (1.5 * (c4_q3 - c4_q1))) AS c4_max, +greatest(c4_min, c4_q1 - (1.5 * (c4_q3 - c4_q1))) AS c4_min, +c4_avg, +c5_q[1] AS c5_q1, +c5_q[3] AS c5_q3, +c5_q[2] AS c5_median, +least(c5_max, c5_q3 + (1.5 * (c5_q3 - c5_q1))) AS c5_max, +greatest(c5_min, c5_q1 - (1.5 * (c5_q3 - c5_q1))) AS c5_min, +c5_avg, +c6_q[1] AS c6_q1, +c6_q[3] AS c6_q3, +c6_q[2] AS c6_median, +least(c6_max, c6_q3 + (1.5 * (c6_q3 - c6_q1))) AS c6_max, +greatest(c6_min, c6_q1 - (1.5 * (c6_q3 - c6_q1))) AS c6_min, +c6_avg, +c7_q[1] AS c7_q1, +c7_q[3] AS c7_q3, +c7_q[2] AS c7_median, +least(c7_max, c7_q3 + (1.5 * (c7_q3 - c7_q1))) AS c7_max, +greatest(c7_min, c7_q1 - (1.5 * (c7_q3 - c7_q1))) AS c7_min, +c7_avg, +c8_q[1] AS c8_q1, +c8_q[3] AS c8_q3, +c8_q[2] AS c8_median, +least(c8_max, c8_q3 + (1.5 * (c8_q3 - c8_q1))) AS c8_max, +greatest(c8_min, c8_q1 - (1.5 * (c8_q3 - c8_q1))) AS c8_min, +c8_avg, +c9_q[1] AS c9_q1, +c9_q[3] AS c9_q3, +c9_q[2] AS c9_median, +least(c9_max, c9_q3 + (1.5 * (c9_q3 - c9_q1))) AS c9_max, +greatest(c9_min, c9_q1 - (1.5 * (c9_q3 - c9_q1))) AS c9_min, +c9_avg, +c10_q[1] AS c10_q1, +c10_q[3] AS c10_q3, +c10_q[2] AS c10_median, +least(c10_max, c10_q3 + (1.5 * (c10_q3 - c10_q1))) AS c10_max, +greatest(c10_min, c10_q1 - (1.5 * (c10_q3 - c10_q1))) AS c10_min, +c10_avg, +c10_avg, +c11_q[1] AS c11_q1, +c11_q[3] AS c11_q3, +c11_q[2] AS c11_median, +least(c11_max, c11_q3 + (1.5 * (c11_q3 - c11_q1))) AS c11_max, +greatest(c11_min, c11_q1 - (1.5 * (c11_q3 - c11_q1))) AS c11_min, +c11_avg, +c12_q[1] AS c12_q1, +c12_q[3] AS c12_q3, +c12_q[2] AS c12_median, +least(c12_max, c12_q3 + (1.5 * (c12_q3 - c12_q1))) AS c12_max, +greatest(c12_min, c12_q1 - (1.5 * (c12_q3 - c12_q1))) AS c12_min, +c12_avg, +c13_q[1] AS c13_q1, +c13_q[3] AS c13_q3, +c13_q[2] AS c13_median, +least(c13_max, c13_q3 + (1.5 * (c13_q3 - c13_q1))) AS c13_max, +greatest(c13_min, c13_q1 - (1.5 * (c13_q3 - c13_q1))) AS c13_min, +c13_avg, +c14_q[1] AS c14_q1, +c14_q[3] AS c14_q3, +c14_q[2] AS c14_median, +least(c14_max, c14_q3 + (1.5 * (c14_q3 - c14_q1))) AS c14_max, +greatest(c14_min, c14_q1 - (1.5 * (c14_q3 - c14_q1))) AS c14_min, +c14_avg, +c15_q[1] AS c15_q1, +c15_q[3] AS c15_q3, +c15_q[2] AS c15_median, +least(c15_max, c15_q3 + (1.5 * (c15_q3 - c15_q1))) AS c15_max, +greatest(c15_min, c15_q1 - (1.5 * (c15_q3 - c15_q1))) AS c15_min, +c15_avg, +c16_q[1] AS c16_q1, +c16_q[3] AS c16_q3, +c16_q[2] AS c16_median, +least(toFloat64(c16_max), c16_q3 + (1.5 * (c16_q3 - c16_q1))) AS c16_max, +greatest(toFloat64(c16_min), c16_q1 - (1.5 * (c16_q3 - c16_q1))) AS c16_min, +c16_avg, +c17_q[1] AS c17_q1, +c17_q[3] AS c17_q3, +c17_q[2] AS c17_median, +least(toFloat64(c17_max), c17_q3 + (1.5 * (c17_q3 - c17_q1))) AS c17_max, +greatest(toFloat64(c17_min), c17_q1 - (1.5 * (c17_q3 - c17_q1))) AS c17_min, +c17_avg, +c18_q[1] AS c18_q1, +c18_q[3] AS c18_q3, +c18_q[2] AS c18_median, +least(toFloat64(c18_max), c18_q3 + (1.5 * (c18_q3 - c18_q1))) AS c18_max, +greatest(toFloat64(c18_min), c18_q1 - (1.5 * (c18_q3 - c18_q1))) AS c18_min, +c18_avg, +round(if(c19 != 0, c24 / c19, 0), 2) AS c20, +c21, +c22, +c23 AS c23, +c19 AS c19, +c16 AS c16, +c17 AS c17, +c18 AS c18, +round(c24, 2) AS c24, +round(if(c17 != 0, c24 / c17, 0), 2) AS c25, +'CH' AS c26 +FROM +( +SELECT +c1, +c2, +groupUniqArray(c27) AS c28, +groupUniqArrayIf(c27, isNotNull(c29)) AS c28_with_c29, +quantiles(0.25, 0.5, 0.75)(if(c3 > 0, c3, NULL)) AS c3_q, +quantiles(0.25, 0.5, 0.75)(if(c4 > 0, c4, NULL)) AS c4_q, +quantiles(0.25, 0.5, 0.75)(t.c17 / t.c19) AS c5_q, +quantiles(0.25, 0.5, 0.75)(c6) AS c6_q, +quantiles(0.25, 0.5, 0.75)(c7) AS c7_q, +quantiles(0.25, 0.5, 0.75)(c8) AS c8_q, +quantiles(0.25, 0.5, 0.75)(c9) AS c9_q, +quantiles(0.25, 0.5, 0.75)(c10) AS c10_q, +quantiles(0.25, 0.5, 0.75)(c11) AS c11_q, +quantiles(0.25, 0.5, 0.75)(c12) AS c12_q, +quantiles(0.25, 0.5, 0.75)(c13) AS c13_q, +quantiles(0.25, 0.5, 0.75)(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_q, +quantiles(0.25, 0.5, 0.75)(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_q, +quantiles(0.25, 0.5, 0.75)(t.c16) AS c16_q, +quantiles(0.25, 0.5, 0.75)(t.c17) AS c17_q, +quantiles(0.25, 0.5, 0.75)(if(t.c18 > 0, t.c18, NULL)) AS c18_q, +max(if(c3 > 0, c3, NULL)) AS c3_max, +min(if(c3 > 0, c3, NULL)) AS c3_min, +avg(if(c3 > 0, c3, NULL)) AS c3_avg, +max(if(c4 > 0, c4, NULL)) AS c4_max, +min(if(c4 > 0, c4, NULL)) AS c4_min, +avg(if(c4 > 0, c4, NULL)) AS c4_avg, +max(t.c17 / t.c19) AS c5_max, +min(t.c17 / t.c19) AS c5_min, +avg(t.c17 / t.c19) AS c5_avg, +max(if(c6 > 0, c6, NULL)) AS c6_max, +min(if(c6 > 0, c6, NULL)) AS c6_min, +avg(if(c6 > 0, c6, NULL)) AS c6_avg, +max(if(c7 > 0, c7, NULL)) AS c7_max, +min(if(c7 > 0, c7, NULL)) AS c7_min, +avg(if(c7 > 0, c7, NULL)) AS c7_avg, +max(if(c10 > 0, c10, NULL)) AS c10_max, +min(if(c10 > 0, c10, NULL)) AS c10_min, +avg(if(c10 > 0, c10, NULL)) AS c10_avg, +max(if(c8 > 0, c8, NULL)) AS c8_max, +min(if(c8 > 0, c8, NULL)) AS c8_min, +avg(if(c8 > 0, c8, NULL)) AS c8_avg, +max(if(c9 > 0, c9, NULL)) AS c9_max, +min(if(c9 > 0, c9, NULL)) AS c9_min, +avg(if(c9 > 0, c9, NULL)) AS c9_avg, +max(if(c11 > 0, c11, NULL)) AS c11_max, +min(if(c11 > 0, c11, NULL)) AS c11_min, +avg(if(c11 > 0, c11, NULL)) AS c11_avg, +max(if(c12 > 0, c12, NULL)) AS c12_max, +min(if(c12 > 0, c12, NULL)) AS c12_min, +avg(if(c12 > 0, c12, NULL)) AS c12_avg, +max(if(c13 > 0, c13, NULL)) AS c13_max, +min(if(c13 > 0, c13, NULL)) AS c13_min, +avg(if(c13 > 0, c13, NULL)) AS c13_avg, +max(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_max, +min(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_min, +avg(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_avg, +max(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_max, +min(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_min, +avg(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_avg, +max(t.c16) AS c16_max, +min(t.c16) AS c16_min, +avg(t.c16) AS c16_avg, +max(t.c17) AS c17_max, +min(t.c17) AS c17_min, +avg(t.c17) AS c17_avg, +max(if(t.c18 > 0, t.c18, NULL)) AS c18_max, +min(if(t.c18 > 0, t.c18, NULL)) AS c18_min, +avg(if(t.c18 > 0, t.c18, NULL)) AS c18_avg, +sum(t.c19) AS c19, +sum(if(t.c18 > 0, t.c18, NULL)) AS c18, +sum(t.c16) AS c16, +sum(c23) AS c23, +sum(t.c17) AS c17, +sum(if(t.c24 > 0, t.c24, NULL)) AS c24, +c24 / c19 AS c14, +c24 / c17 AS c15, +median(if(isNotNull(c29) AND (t.c22 > 0), c13 * (t.c22 / c29), NULL)) AS c21, +sum(c22) AS c22 +FROM +( +SELECT +c27, +c39 AS c1, +c29, +c19, +c23, +c17, +c16, +c18, +c22, +c24, +c3, +c4, +c8, +c9, +c10, +c11, +c12, +c13, +c6, +c7 +FROM +( +SELECT +c27, +uniqExact(c30, c31) AS c19, +uniqExact(c30, c31, c32) AS c23, +uniqExactIf(c30, c31, c33 IN ('c37', 'c38')) AS c17, +countIf(c33 IN ('c37', 'c38')) AS c16, +countIf(c33 = 'c39') AS c18, +coalesce(sumIf(c29, c33 = 'c39'), 0) AS c22, +coalesce(sumIf(c37, c33 = 'c39'), 0) AS c24, +if((c18 > 0) AND (c19 > 0), c18 / c19, NULL) AS c3, +if(c17 != 0, c18 / c17, NULL) AS c4, +coalesce(avgIf(c34, (c34 > 0) AND (c33 IN ('c37', 'c38'))), NULL) AS c8, +coalesce(avgIf(c35, (c35 > 0) AND (c33 IN ('c37', 'c38'))), NULL) AS c9, +coalesce(avgIf(c34, (c34 > 0) AND (c33 = 'c39')), NULL) AS c10, +coalesce(avgIf(c35, (c35 > 0) AND (c33 = 'c39')), NULL) AS c11, +coalesce(avgIf(c37, c33 = 'c39'), NULL) AS c12, +coalesce(avgIf(c37 / c34, (c34 > 0) AND (c33 = 'c39')), NULL) AS c13, +coalesce(avgIf(c37, (c37 > 0) AND (c33 IN ('c37', 'c38'))), NULL) AS c6, +coalesce(minIf(c37, (c37 > 0) AND (c33 IN ('c37', 'c38')) AND (c37 > (c36 / 2))), NULL) AS c7 +FROM +( +SELECT +c27, +c30, +c32, +c31, +NULL AS c29, +NULL AS c33, +NULL AS c37, +NULL AS c34, +NULL AS c35 +FROM +( +SELECT +c27, +c30, +c32, +c31 +FROM database.t1 +PREWHERE ((c32 >= parseDateTimeBestEffort('2020-01-01')) AND (c32 <= parseDateTimeBestEffort('2020-01-01 23:59:59'))) AND (c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +)) +WHERE c61 = 0 +) AS table25 +UNION ALL +SELECT +c27, +c30, +c32, +c31, +c29, +c33, +c37, +c34, +c35 +FROM +( +SELECT +c27, +c30, +c32, +'c37' AS c33, +coalesce(c37 * joinGet('database.table18', 'c60', concat(c26, '_', 'CH')), 0) AS c37, +if(c53 > 0, c53, 2) AS c53, +c54, +if(c29 > 0, c29, 1) AS c29, +c55, +c56, +datediff('day', c55, c56) AS c34, +datediff('day', c32, c55) AS c35, +c31 +FROM database.table24 +PREWHERE ((c32 >= parseDateTimeBestEffort('2020-01-01')) AND (c32 <= parseDateTimeBestEffort('2020-01-01 23:59:59'))) AND (c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +)) +WHERE (c61 = 0) AND (c37 < (666 * (1 / joinGet('database.table18', 'c60', concat(c26, '_', 'CH'))))) +) AS table23 +UNION ALL +SELECT +c27, +c30, +c32, +c31, +c29, +c33, +c37, +c34, +c35 +FROM +( +SELECT +c27, +c30, +c32, +'c39' AS c33, +coalesce(c37 * joinGet('database.table18', 'c60', concat(c26, '_', 'CH')), 0) AS c37, +if(c53 > 0, c53, 2) AS c53, +c54, +if(c29 > 0, c29, 1) AS c29, +c55, +c56, +datediff('day', c55, c56) AS c34, +datediff('day', c32, c55) AS c35, +c31 +FROM database.table22 +PREWHERE ((c32 >= parseDateTimeBestEffort('2020-01-01')) AND (c32 <= parseDateTimeBestEffort('2020-01-01 23:59:59'))) AND (c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +)) +WHERE (c61 = 0) AND (c37 < (666 * (1 / joinGet('database.table18', 'c60', concat(c26, '_', 'CH'))))) +) AS table21 +) AS table20 +ALL LEFT JOIN +( +SELECT +c27, +avgMerge(avg_c37) * joinGet('database.table18', 'c60', concat('USD', '_', 'CH')) AS c36 +FROM database.table19 +PREWHERE c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +) +WHERE date > (now() - toIntervalMonth(3)) +GROUP BY c27 +) AS table17 USING (c27) +GROUP BY c27 +) AS table16 +ALL LEFT JOIN +( +SELECT +comp_c27 AS c27, +assumeNotNull(c39) AS c39, +c29 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +) USING (c27) +) AS t +ALL LEFT JOIN +( +SELECT +c1, +c2 +FROM +( +SELECT +c39 AS c1, +groupArray(comp_c27) AS c49, +multiIf(c1 = 'c58', if(length(c49) <= 2, 0, 1), c1 = 'c57', 1, if(length(c49) <= 3, 0, 1)) AS c2 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +GROUP BY c39 +) AS table3 +) USING (c1) +GROUP BY +c1, +c2 +) AS table2 +ORDER BY c1 ASC +) AS table1 +UNION ALL +SELECT * +FROM +( +SELECT +c1, +c2, +c3_q[1] AS c3_q1, +c3_q[3] AS c3_q3, +c3_q[2] AS c3_median, +least(c3_max, c3_q3 + (1.5 * (c3_q3 - c3_q1))) AS c3_max, +greatest(c3_min, c3_q1 - (1.5 * (c3_q3 - c3_q1))) AS c3_min, +c3_avg, +c4_q[1] AS c4_q1, +c4_q[3] AS c4_q3, +c4_q[2] AS c4_median, +least(c4_max, c4_q3 + (1.5 * (c4_q3 - c4_q1))) AS c4_max, +greatest(c4_min, c4_q1 - (1.5 * (c4_q3 - c4_q1))) AS c4_min, +c4_avg, +c5_q[1] AS c5_q1, +c5_q[3] AS c5_q3, +c5_q[2] AS c5_median, +least(c5_max, c5_q3 + (1.5 * (c5_q3 - c5_q1))) AS c5_max, +greatest(c5_min, c5_q1 - (1.5 * (c5_q3 - c5_q1))) AS c5_min, +c5_avg, +c6_q[1] AS c6_q1, +c6_q[3] AS c6_q3, +c6_q[2] AS c6_median, +least(c6_max, c6_q3 + (1.5 * (c6_q3 - c6_q1))) AS c6_max, +greatest(c6_min, c6_q1 - (1.5 * (c6_q3 - c6_q1))) AS c6_min, +c6_avg, +c7_q[1] AS c7_q1, +c7_q[3] AS c7_q3, +c7_q[2] AS c7_median, +least(c7_max, c7_q3 + (1.5 * (c7_q3 - c7_q1))) AS c7_max, +greatest(c7_min, c7_q1 - (1.5 * (c7_q3 - c7_q1))) AS c7_min, +c7_avg, +c8_q[1] AS c8_q1, +c8_q[3] AS c8_q3, +c8_q[2] AS c8_median, +least(c8_max, c8_q3 + (1.5 * (c8_q3 - c8_q1))) AS c8_max, +greatest(c8_min, c8_q1 - (1.5 * (c8_q3 - c8_q1))) AS c8_min, +c8_avg, +c9_q[1] AS c9_q1, +c9_q[3] AS c9_q3, +c9_q[2] AS c9_median, +least(c9_max, c9_q3 + (1.5 * (c9_q3 - c9_q1))) AS c9_max, +greatest(c9_min, c9_q1 - (1.5 * (c9_q3 - c9_q1))) AS c9_min, +c9_avg, +c10_q[1] AS c10_q1, +c10_q[3] AS c10_q3, +c10_q[2] AS c10_median, +least(c10_max, c10_q3 + (1.5 * (c10_q3 - c10_q1))) AS c10_max, +greatest(c10_min, c10_q1 - (1.5 * (c10_q3 - c10_q1))) AS c10_min, +c10_avg, +c10_avg, +c11_q[1] AS c11_q1, +c11_q[3] AS c11_q3, +c11_q[2] AS c11_median, +least(c11_max, c11_q3 + (1.5 * (c11_q3 - c11_q1))) AS c11_max, +greatest(c11_min, c11_q1 - (1.5 * (c11_q3 - c11_q1))) AS c11_min, +c11_avg, +c12_q[1] AS c12_q1, +c12_q[3] AS c12_q3, +c12_q[2] AS c12_median, +least(c12_max, c12_q3 + (1.5 * (c12_q3 - c12_q1))) AS c12_max, +greatest(c12_min, c12_q1 - (1.5 * (c12_q3 - c12_q1))) AS c12_min, +c12_avg, +c13_q[1] AS c13_q1, +c13_q[3] AS c13_q3, +c13_q[2] AS c13_median, +least(c13_max, c13_q3 + (1.5 * (c13_q3 - c13_q1))) AS c13_max, +greatest(c13_min, c13_q1 - (1.5 * (c13_q3 - c13_q1))) AS c13_min, +c13_avg, +c14_q[1] AS c14_q1, +c14_q[3] AS c14_q3, +c14_q[2] AS c14_median, +least(c14_max, c14_q3 + (1.5 * (c14_q3 - c14_q1))) AS c14_max, +greatest(c14_min, c14_q1 - (1.5 * (c14_q3 - c14_q1))) AS c14_min, +c14_avg, +c15_q[1] AS c15_q1, +c15_q[3] AS c15_q3, +c15_q[2] AS c15_median, +least(c15_max, c15_q3 + (1.5 * (c15_q3 - c15_q1))) AS c15_max, +greatest(c15_min, c15_q1 - (1.5 * (c15_q3 - c15_q1))) AS c15_min, +c15_avg, +c16_q[1] AS c16_q1, +c16_q[3] AS c16_q3, +c16_q[2] AS c16_median, +least(toFloat64(c16_max), c16_q3 + (1.5 * (c16_q3 - c16_q1))) AS c16_max, +greatest(toFloat64(c16_min), c16_q1 - (1.5 * (c16_q3 - c16_q1))) AS c16_min, +c16_avg, +c17_q[1] AS c17_q1, +c17_q[3] AS c17_q3, +c17_q[2] AS c17_median, +least(toFloat64(c17_max), c17_q3 + (1.5 * (c17_q3 - c17_q1))) AS c17_max, +greatest(toFloat64(c17_min), c17_q1 - (1.5 * (c17_q3 - c17_q1))) AS c17_min, +c17_avg, +c18_q[1] AS c18_q1, +c18_q[3] AS c18_q3, +c18_q[2] AS c18_median, +least(toFloat64(c18_max), c18_q3 + (1.5 * (c18_q3 - c18_q1))) AS c18_max, +greatest(toFloat64(c18_min), c18_q1 - (1.5 * (c18_q3 - c18_q1))) AS c18_min, +c18_avg, +round(if(c19 != 0, c24 / c19, 0), 2) AS c20, +c21, +c22, +c23 AS c23, +c19 AS c19, +c16 AS c16, +c17 AS c17, +c18 AS c18, +round(c24, 2) AS c24, +round(if(c17 != 0, c24 / c17, 0), 2) AS c25, +'CH' AS c26 +FROM +( +SELECT +c1, +c2, +groupUniqArray(c27) AS c28, +groupUniqArrayIf(c27, isNotNull(c29)) AS c28_with_c29, +quantiles(0.25, 0.5, 0.75)(if(c3 > 0, c3, NULL)) AS c3_q, +quantiles(0.25, 0.5, 0.75)(if(c4 > 0, c4, NULL)) AS c4_q, +quantiles(0.25, 0.5, 0.75)(t.c17 / t.c19) AS c5_q, +quantiles(0.25, 0.5, 0.75)(c6) AS c6_q, +quantiles(0.25, 0.5, 0.75)(c7) AS c7_q, +quantiles(0.25, 0.5, 0.75)(c8) AS c8_q, +quantiles(0.25, 0.5, 0.75)(c9) AS c9_q, +quantiles(0.25, 0.5, 0.75)(c10) AS c10_q, +quantiles(0.25, 0.5, 0.75)(c11) AS c11_q, +quantiles(0.25, 0.5, 0.75)(c12) AS c12_q, +quantiles(0.25, 0.5, 0.75)(c13) AS c13_q, +quantiles(0.25, 0.5, 0.75)(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_q, +quantiles(0.25, 0.5, 0.75)(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_q, +quantiles(0.25, 0.5, 0.75)(t.c16) AS c16_q, +quantiles(0.25, 0.5, 0.75)(t.c17) AS c17_q, +quantiles(0.25, 0.5, 0.75)(if(t.c18 > 0, t.c18, NULL)) AS c18_q, +max(if(c3 > 0, c3, NULL)) AS c3_max, +min(if(c3 > 0, c3, NULL)) AS c3_min, +avg(if(c3 > 0, c3, NULL)) AS c3_avg, +max(if(c4 > 0, c4, NULL)) AS c4_max, +min(if(c4 > 0, c4, NULL)) AS c4_min, +avg(if(c4 > 0, c4, NULL)) AS c4_avg, +max(t.c17 / t.c19) AS c5_max, +min(t.c17 / t.c19) AS c5_min, +avg(t.c17 / t.c19) AS c5_avg, +max(if(c6 > 0, c6, NULL)) AS c6_max, +min(if(c6 > 0, c6, NULL)) AS c6_min, +avg(if(c6 > 0, c6, NULL)) AS c6_avg, +max(if(c7 > 0, c7, NULL)) AS c7_max, +min(if(c7 > 0, c7, NULL)) AS c7_min, +avg(if(c7 > 0, c7, NULL)) AS c7_avg, +max(if(c10 > 0, c10, NULL)) AS c10_max, +min(if(c10 > 0, c10, NULL)) AS c10_min, +avg(if(c10 > 0, c10, NULL)) AS c10_avg, +max(if(c8 > 0, c8, NULL)) AS c8_max, +min(if(c8 > 0, c8, NULL)) AS c8_min, +avg(if(c8 > 0, c8, NULL)) AS c8_avg, +max(if(c9 > 0, c9, NULL)) AS c9_max, +min(if(c9 > 0, c9, NULL)) AS c9_min, +avg(if(c9 > 0, c9, NULL)) AS c9_avg, +max(if(c11 > 0, c11, NULL)) AS c11_max, +min(if(c11 > 0, c11, NULL)) AS c11_min, +avg(if(c11 > 0, c11, NULL)) AS c11_avg, +max(if(c12 > 0, c12, NULL)) AS c12_max, +min(if(c12 > 0, c12, NULL)) AS c12_min, +avg(if(c12 > 0, c12, NULL)) AS c12_avg, +max(if(c13 > 0, c13, NULL)) AS c13_max, +min(if(c13 > 0, c13, NULL)) AS c13_min, +avg(if(c13 > 0, c13, NULL)) AS c13_avg, +max(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_max, +min(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_min, +avg(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_avg, +max(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_max, +min(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_min, +avg(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_avg, +max(t.c16) AS c16_max, +min(t.c16) AS c16_min, +avg(t.c16) AS c16_avg, +max(t.c17) AS c17_max, +min(t.c17) AS c17_min, +avg(t.c17) AS c17_avg, +max(if(t.c18 > 0, t.c18, NULL)) AS c18_max, +min(if(t.c18 > 0, t.c18, NULL)) AS c18_min, +avg(if(t.c18 > 0, t.c18, NULL)) AS c18_avg, +sum(t.c19) AS c19, +sum(if(t.c18 > 0, t.c18, NULL)) AS c18, +sum(t.c16) AS c16, +sum(c23) AS c23, +sum(t.c17) AS c17, +sum(if(t.c24 > 0, t.c24, NULL)) AS c24, +c24 / c19 AS c14, +c24 / c17 AS c15, +median(if(isNotNull(c29) AND (t.c22 > 0), c13 * (t.c22 / c29), NULL)) AS c21, +sum(c22) AS c22 +FROM +( +SELECT +c27, +c39 AS c1, +c29, +c19, +c23, +c17, +c16, +c18, +c22, +c24, +c3, +c4, +c8, +c9, +c10, +c11, +c12, +c13, +c6, +c7 +FROM +( +SELECT +c27, +uniqExact(c30, c31) AS c19, +uniqExact(c30, c31, c32) AS c23, +uniqExactIf(c30, c31, c33 IN ('c37', 'c38')) AS c17, +countIf(c33 IN ('c37', 'c38')) AS c16, +countIf(c33 = 'c39') AS c18, +coalesce(sumIf(c29, c33 = 'c39'), 0) AS c22, +coalesce(sumIf(c37, c33 = 'c39'), 0) AS c24, +if((c18 > 0) AND (c19 > 0), c18 / c19, NULL) AS c3, +if(c17 != 0, c18 / c17, NULL) AS c4, +coalesce(avgIf(c34, (c34 > 0) AND (c33 IN ('c37', 'c38'))), NULL) AS c8, +coalesce(avgIf(c35, (c35 > 0) AND (c33 IN ('c37', 'c38'))), NULL) AS c9, +coalesce(avgIf(c34, (c34 > 0) AND (c33 = 'c39')), NULL) AS c10, +coalesce(avgIf(c35, (c35 > 0) AND (c33 = 'c39')), NULL) AS c11, +coalesce(avgIf(c37, c33 = 'c39'), NULL) AS c12, +coalesce(avgIf(c37 / c34, (c34 > 0) AND (c33 = 'c39')), NULL) AS c13, +coalesce(avgIf(c37, (c37 > 0) AND (c33 IN ('c37', 'c38'))), NULL) AS c6, +coalesce(minIf(c37, (c37 > 0) AND (c33 IN ('c37', 'c38')) AND (c37 > (c36 / 2))), NULL) AS c7 +FROM +( +SELECT +c27, +c30, +c32, +c31, +NULL AS c29, +NULL AS c33, +NULL AS c37, +NULL AS c34, +NULL AS c35 +FROM +( +SELECT +c27, +c30, +c32, +c31 +FROM database.t1 +PREWHERE ((c32 >= parseDateTimeBestEffort('2020-01-01')) AND (c32 <= parseDateTimeBestEffort('2020-01-01 23:59:59'))) AND (c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +)) +WHERE c61 = 0 +) AS table25 +UNION ALL +SELECT +c27, +c30, +c32, +c31, +c29, +c33, +c37, +c34, +c35 +FROM +( +SELECT +c27, +c30, +c32, +'c37' AS c33, +coalesce(c37 * joinGet('database.table18', 'c60', concat(c26, '_', 'CH')), 0) AS c37, +if(c53 > 0, c53, 2) AS c53, +c54, +if(c29 > 0, c29, 1) AS c29, +c55, +c56, +datediff('day', c55, c56) AS c34, +datediff('day', c32, c55) AS c35, +c31 +FROM database.table24 +PREWHERE ((c32 >= parseDateTimeBestEffort('2020-01-01')) AND (c32 <= parseDateTimeBestEffort('2020-01-01 23:59:59'))) AND (c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +)) +WHERE (c61 = 0) AND (c37 < (666 * (1 / joinGet('database.table18', 'c60', concat(c26, '_', 'CH'))))) +) AS table23 +UNION ALL +SELECT +c27, +c30, +c32, +c31, +c29, +c33, +c37, +c34, +c35 +FROM +( +SELECT +c27, +c30, +c32, +'c39' AS c33, +coalesce(c37 * joinGet('database.table18', 'c60', concat(c26, '_', 'CH')), 0) AS c37, +if(c53 > 0, c53, 2) AS c53, +c54, +if(c29 > 0, c29, 1) AS c29, +c55, +c56, +datediff('day', c55, c56) AS c34, +datediff('day', c32, c55) AS c35, +c31 +FROM database.table22 +PREWHERE ((c32 >= parseDateTimeBestEffort('2020-01-01')) AND (c32 <= parseDateTimeBestEffort('2020-01-01 23:59:59'))) AND (c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +)) +WHERE (c61 = 0) AND (c37 < (666 * (1 / joinGet('database.table18', 'c60', concat(c26, '_', 'CH'))))) +) AS table21 +) AS table20 +ALL LEFT JOIN +( +SELECT +c27, +avgMerge(avg_c37) * joinGet('database.table18', 'c60', concat('USD', '_', 'CH')) AS c36 +FROM database.table19 +PREWHERE c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +) +WHERE date > (now() - toIntervalMonth(3)) +GROUP BY c27 +) AS table17 USING (c27) +GROUP BY c27 +) AS table16 +ALL LEFT JOIN +( +SELECT +comp_c27 AS c27, +assumeNotNull(c39) AS c39, +c29 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +) USING (c27) +) AS t +ALL LEFT JOIN +( +SELECT +c1, +c2 +FROM +( +SELECT +c39 AS c1, +groupArray(comp_c27) AS c49, +multiIf(c1 = 'c58', if(length(c49) <= 2, 0, 1), c1 = 'c57', 1, if(length(c49) <= 3, 0, 1)) AS c2 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +GROUP BY c39 +) AS table3 +) USING (c1) +GROUP BY +c1, +c2 +) AS table2 +ORDER BY c1 ASC +) AS table1 +UNION ALL +SELECT * +FROM +( +SELECT +c1, +c2, +c3_q[1] AS c3_q1, +c3_q[3] AS c3_q3, +c3_q[2] AS c3_median, +least(c3_max, c3_q3 + (1.5 * (c3_q3 - c3_q1))) AS c3_max, +greatest(c3_min, c3_q1 - (1.5 * (c3_q3 - c3_q1))) AS c3_min, +c3_avg, +c4_q[1] AS c4_q1, +c4_q[3] AS c4_q3, +c4_q[2] AS c4_median, +least(c4_max, c4_q3 + (1.5 * (c4_q3 - c4_q1))) AS c4_max, +greatest(c4_min, c4_q1 - (1.5 * (c4_q3 - c4_q1))) AS c4_min, +c4_avg, +c5_q[1] AS c5_q1, +c5_q[3] AS c5_q3, +c5_q[2] AS c5_median, +least(c5_max, c5_q3 + (1.5 * (c5_q3 - c5_q1))) AS c5_max, +greatest(c5_min, c5_q1 - (1.5 * (c5_q3 - c5_q1))) AS c5_min, +c5_avg, +c6_q[1] AS c6_q1, +c6_q[3] AS c6_q3, +c6_q[2] AS c6_median, +least(c6_max, c6_q3 + (1.5 * (c6_q3 - c6_q1))) AS c6_max, +greatest(c6_min, c6_q1 - (1.5 * (c6_q3 - c6_q1))) AS c6_min, +c6_avg, +c7_q[1] AS c7_q1, +c7_q[3] AS c7_q3, +c7_q[2] AS c7_median, +least(c7_max, c7_q3 + (1.5 * (c7_q3 - c7_q1))) AS c7_max, +greatest(c7_min, c7_q1 - (1.5 * (c7_q3 - c7_q1))) AS c7_min, +c7_avg, +c8_q[1] AS c8_q1, +c8_q[3] AS c8_q3, +c8_q[2] AS c8_median, +least(c8_max, c8_q3 + (1.5 * (c8_q3 - c8_q1))) AS c8_max, +greatest(c8_min, c8_q1 - (1.5 * (c8_q3 - c8_q1))) AS c8_min, +c8_avg, +c9_q[1] AS c9_q1, +c9_q[3] AS c9_q3, +c9_q[2] AS c9_median, +least(c9_max, c9_q3 + (1.5 * (c9_q3 - c9_q1))) AS c9_max, +greatest(c9_min, c9_q1 - (1.5 * (c9_q3 - c9_q1))) AS c9_min, +c9_avg, +c10_q[1] AS c10_q1, +c10_q[3] AS c10_q3, +c10_q[2] AS c10_median, +least(c10_max, c10_q3 + (1.5 * (c10_q3 - c10_q1))) AS c10_max, +greatest(c10_min, c10_q1 - (1.5 * (c10_q3 - c10_q1))) AS c10_min, +c10_avg, +c10_avg, +c11_q[1] AS c11_q1, +c11_q[3] AS c11_q3, +c11_q[2] AS c11_median, +least(c11_max, c11_q3 + (1.5 * (c11_q3 - c11_q1))) AS c11_max, +greatest(c11_min, c11_q1 - (1.5 * (c11_q3 - c11_q1))) AS c11_min, +c11_avg, +c12_q[1] AS c12_q1, +c12_q[3] AS c12_q3, +c12_q[2] AS c12_median, +least(c12_max, c12_q3 + (1.5 * (c12_q3 - c12_q1))) AS c12_max, +greatest(c12_min, c12_q1 - (1.5 * (c12_q3 - c12_q1))) AS c12_min, +c12_avg, +c13_q[1] AS c13_q1, +c13_q[3] AS c13_q3, +c13_q[2] AS c13_median, +least(c13_max, c13_q3 + (1.5 * (c13_q3 - c13_q1))) AS c13_max, +greatest(c13_min, c13_q1 - (1.5 * (c13_q3 - c13_q1))) AS c13_min, +c13_avg, +c14_q[1] AS c14_q1, +c14_q[3] AS c14_q3, +c14_q[2] AS c14_median, +least(c14_max, c14_q3 + (1.5 * (c14_q3 - c14_q1))) AS c14_max, +greatest(c14_min, c14_q1 - (1.5 * (c14_q3 - c14_q1))) AS c14_min, +c14_avg, +c15_q[1] AS c15_q1, +c15_q[3] AS c15_q3, +c15_q[2] AS c15_median, +least(c15_max, c15_q3 + (1.5 * (c15_q3 - c15_q1))) AS c15_max, +greatest(c15_min, c15_q1 - (1.5 * (c15_q3 - c15_q1))) AS c15_min, +c15_avg, +c16_q[1] AS c16_q1, +c16_q[3] AS c16_q3, +c16_q[2] AS c16_median, +least(toFloat64(c16_max), c16_q3 + (1.5 * (c16_q3 - c16_q1))) AS c16_max, +greatest(toFloat64(c16_min), c16_q1 - (1.5 * (c16_q3 - c16_q1))) AS c16_min, +c16_avg, +c17_q[1] AS c17_q1, +c17_q[3] AS c17_q3, +c17_q[2] AS c17_median, +least(toFloat64(c17_max), c17_q3 + (1.5 * (c17_q3 - c17_q1))) AS c17_max, +greatest(toFloat64(c17_min), c17_q1 - (1.5 * (c17_q3 - c17_q1))) AS c17_min, +c17_avg, +c18_q[1] AS c18_q1, +c18_q[3] AS c18_q3, +c18_q[2] AS c18_median, +least(toFloat64(c18_max), c18_q3 + (1.5 * (c18_q3 - c18_q1))) AS c18_max, +greatest(toFloat64(c18_min), c18_q1 - (1.5 * (c18_q3 - c18_q1))) AS c18_min, +c18_avg, +round(if(c19 != 0, c24 / c19, 0), 2) AS c20, +c21, +c22, +c23 AS c23, +c19 AS c19, +c16 AS c16, +c17 AS c17, +c18 AS c18, +round(c24, 2) AS c24, +round(if(c17 != 0, c24 / c17, 0), 2) AS c25, +'CH' AS c26 +FROM +( +SELECT +c1, +c2, +groupUniqArray(c27) AS c28, +groupUniqArrayIf(c27, isNotNull(c29)) AS c28_with_c29, +quantiles(0.25, 0.5, 0.75)(if(c3 > 0, c3, NULL)) AS c3_q, +quantiles(0.25, 0.5, 0.75)(if(c4 > 0, c4, NULL)) AS c4_q, +quantiles(0.25, 0.5, 0.75)(t.c17 / t.c19) AS c5_q, +quantiles(0.25, 0.5, 0.75)(c6) AS c6_q, +quantiles(0.25, 0.5, 0.75)(c7) AS c7_q, +quantiles(0.25, 0.5, 0.75)(c8) AS c8_q, +quantiles(0.25, 0.5, 0.75)(c9) AS c9_q, +quantiles(0.25, 0.5, 0.75)(c10) AS c10_q, +quantiles(0.25, 0.5, 0.75)(c11) AS c11_q, +quantiles(0.25, 0.5, 0.75)(c12) AS c12_q, +quantiles(0.25, 0.5, 0.75)(c13) AS c13_q, +quantiles(0.25, 0.5, 0.75)(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_q, +quantiles(0.25, 0.5, 0.75)(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_q, +quantiles(0.25, 0.5, 0.75)(t.c16) AS c16_q, +quantiles(0.25, 0.5, 0.75)(t.c17) AS c17_q, +quantiles(0.25, 0.5, 0.75)(if(t.c18 > 0, t.c18, NULL)) AS c18_q, +max(if(c3 > 0, c3, NULL)) AS c3_max, +min(if(c3 > 0, c3, NULL)) AS c3_min, +avg(if(c3 > 0, c3, NULL)) AS c3_avg, +max(if(c4 > 0, c4, NULL)) AS c4_max, +min(if(c4 > 0, c4, NULL)) AS c4_min, +avg(if(c4 > 0, c4, NULL)) AS c4_avg, +max(t.c17 / t.c19) AS c5_max, +min(t.c17 / t.c19) AS c5_min, +avg(t.c17 / t.c19) AS c5_avg, +max(if(c6 > 0, c6, NULL)) AS c6_max, +min(if(c6 > 0, c6, NULL)) AS c6_min, +avg(if(c6 > 0, c6, NULL)) AS c6_avg, +max(if(c7 > 0, c7, NULL)) AS c7_max, +min(if(c7 > 0, c7, NULL)) AS c7_min, +avg(if(c7 > 0, c7, NULL)) AS c7_avg, +max(if(c10 > 0, c10, NULL)) AS c10_max, +min(if(c10 > 0, c10, NULL)) AS c10_min, +avg(if(c10 > 0, c10, NULL)) AS c10_avg, +max(if(c8 > 0, c8, NULL)) AS c8_max, +min(if(c8 > 0, c8, NULL)) AS c8_min, +avg(if(c8 > 0, c8, NULL)) AS c8_avg, +max(if(c9 > 0, c9, NULL)) AS c9_max, +min(if(c9 > 0, c9, NULL)) AS c9_min, +avg(if(c9 > 0, c9, NULL)) AS c9_avg, +max(if(c11 > 0, c11, NULL)) AS c11_max, +min(if(c11 > 0, c11, NULL)) AS c11_min, +avg(if(c11 > 0, c11, NULL)) AS c11_avg, +max(if(c12 > 0, c12, NULL)) AS c12_max, +min(if(c12 > 0, c12, NULL)) AS c12_min, +avg(if(c12 > 0, c12, NULL)) AS c12_avg, +max(if(c13 > 0, c13, NULL)) AS c13_max, +min(if(c13 > 0, c13, NULL)) AS c13_min, +avg(if(c13 > 0, c13, NULL)) AS c13_avg, +max(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_max, +min(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_min, +avg(if(t.c24 > 0, t.c24, NULL) / t.c19) AS c14_avg, +max(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_max, +min(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_min, +avg(if(t.c24 > 0, t.c24, NULL) / t.c17) AS c15_avg, +max(t.c16) AS c16_max, +min(t.c16) AS c16_min, +avg(t.c16) AS c16_avg, +max(t.c17) AS c17_max, +min(t.c17) AS c17_min, +avg(t.c17) AS c17_avg, +max(if(t.c18 > 0, t.c18, NULL)) AS c18_max, +min(if(t.c18 > 0, t.c18, NULL)) AS c18_min, +avg(if(t.c18 > 0, t.c18, NULL)) AS c18_avg, +sum(t.c19) AS c19, +sum(if(t.c18 > 0, t.c18, NULL)) AS c18, +sum(t.c16) AS c16, +sum(c23) AS c23, +sum(t.c17) AS c17, +sum(if(t.c24 > 0, t.c24, NULL)) AS c24, +c24 / c19 AS c14, +c24 / c17 AS c15, +median(if(isNotNull(c29) AND (t.c22 > 0), c13 * (t.c22 / c29), NULL)) AS c21, +sum(c22) AS c22 +FROM +( +SELECT +c27, +c39 AS c1, +c29, +c19, +c23, +c17, +c16, +c18, +c22, +c24, +c3, +c4, +c8, +c9, +c10, +c11, +c12, +c13, +c6, +c7 +FROM +( +SELECT +c27, +uniqExact(c30, c31) AS c19, +uniqExact(c30, c31, c32) AS c23, +uniqExactIf(c30, c31, c33 IN ('c37', 'c38')) AS c17, +countIf(c33 IN ('c37', 'c38')) AS c16, +countIf(c33 = 'c39') AS c18, +coalesce(sumIf(c29, c33 = 'c39'), 0) AS c22, +coalesce(sumIf(c37, c33 = 'c39'), 0) AS c24, +if((c18 > 0) AND (c19 > 0), c18 / c19, NULL) AS c3, +if(c17 != 0, c18 / c17, NULL) AS c4, +coalesce(avgIf(c34, (c34 > 0) AND (c33 IN ('c37', 'c38'))), NULL) AS c8, +coalesce(avgIf(c35, (c35 > 0) AND (c33 IN ('c37', 'c38'))), NULL) AS c9, +coalesce(avgIf(c34, (c34 > 0) AND (c33 = 'c39')), NULL) AS c10, +coalesce(avgIf(c35, (c35 > 0) AND (c33 = 'c39')), NULL) AS c11, +coalesce(avgIf(c37, c33 = 'c39'), NULL) AS c12, +coalesce(avgIf(c37 / c34, (c34 > 0) AND (c33 = 'c39')), NULL) AS c13, +coalesce(avgIf(c37, (c37 > 0) AND (c33 IN ('c37', 'c38'))), NULL) AS c6, +coalesce(minIf(c37, (c37 > 0) AND (c33 IN ('c37', 'c38')) AND (c37 > (c36 / 2))), NULL) AS c7 +FROM +( +SELECT +c27, +c30, +c32, +c31, +NULL AS c29, +NULL AS c33, +NULL AS c37, +NULL AS c34, +NULL AS c35 +FROM +( +SELECT +c27, +c30, +c32, +c31 +FROM database.t1 +PREWHERE ((c32 >= parseDateTimeBestEffort('2020-01-01')) AND (c32 <= parseDateTimeBestEffort('2020-01-01 23:59:59'))) AND (c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +)) +WHERE c61 = 0 +) AS table25 +UNION ALL +SELECT +c27, +c30, +c32, +c31, +c29, +c33, +c37, +c34, +c35 +FROM +( +SELECT +c27, +c30, +c32, +'c37' AS c33, +coalesce(c37 * joinGet('database.table18', 'c60', concat(c26, '_', 'CH')), 0) AS c37, +if(c53 > 0, c53, 2) AS c53, +c54, +if(c29 > 0, c29, 1) AS c29, +c55, +c56, +datediff('day', c55, c56) AS c34, +datediff('day', c32, c55) AS c35, +c31 +FROM database.table24 +PREWHERE ((c32 >= parseDateTimeBestEffort('2020-01-01')) AND (c32 <= parseDateTimeBestEffort('2020-01-01 23:59:59'))) AND (c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +)) +WHERE (c61 = 0) AND (c37 < (666 * (1 / joinGet('database.table18', 'c60', concat(c26, '_', 'CH'))))) +) AS table23 +UNION ALL +SELECT +c27, +c30, +c32, +c31, +c29, +c33, +c37, +c34, +c35 +FROM +( +SELECT +c27, +c30, +c32, +'c39' AS c33, +coalesce(c37 * joinGet('database.table18', 'c60', concat(c26, '_', 'CH')), 0) AS c37, +if(c53 > 0, c53, 2) AS c53, +c54, +if(c29 > 0, c29, 1) AS c29, +c55, +c56, +datediff('day', c55, c56) AS c34, +datediff('day', c32, c55) AS c35, +c31 +FROM database.table22 +PREWHERE ((c32 >= parseDateTimeBestEffort('2020-01-01')) AND (c32 <= parseDateTimeBestEffort('2020-01-01 23:59:59'))) AND (c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +)) +WHERE (c61 = 0) AND (c37 < (666 * (1 / joinGet('database.table18', 'c60', concat(c26, '_', 'CH'))))) +) AS table21 +) AS table20 +ALL LEFT JOIN +( +SELECT +c27, +avgMerge(avg_c37) * joinGet('database.table18', 'c60', concat('USD', '_', 'CH')) AS c36 +FROM database.table19 +PREWHERE c27 IN +( +SELECT comp_c27 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +) +WHERE date > (now() - toIntervalMonth(3)) +GROUP BY c27 +) AS table17 USING (c27) +GROUP BY c27 +) AS table16 +ALL LEFT JOIN +( +SELECT +comp_c27 AS c27, +assumeNotNull(c39) AS c39, +c29 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +) USING (c27) +) AS t +ALL LEFT JOIN +( +SELECT +c1, +c2 +FROM +( +SELECT +c39 AS c1, +groupArray(comp_c27) AS c49, +multiIf(c1 = 'c58', if(length(c49) <= 2, 0, 1), c1 = 'c57', 1, if(length(c49) <= 3, 0, 1)) AS c2 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +min_c32, +max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38 +FROM +( +SELECT +comp_c27, +groupArray(c39) AS c39, +any(c40) AS c40, +any(c41) AS c41, +any(c42) AS c42, +any(c29) AS c29, +any(c43) AS c43, +any(c44) AS c44, +any(min_c32) AS min_c32, +any(max_c32) AS max_c32, +any(c45) AS c45, +any(c46) AS c46, +any(c38) AS c38, +any(c47) AS c47 +FROM +( +SELECT +c27 AS comp_c27, +if(comp_c27 = 0, toDate('2010-01-01'), toDate(minMerge(min_c32))) AS min_c32, +if(comp_c27 = 0, toDate(now()), toDate(maxMerge(max_c32))) + 1 AS max_c32, +NULL AS c39, +NULL AS c40, +NULL AS c41, +NULL AS c42, +NULL AS c29, +NULL AS c43, +NULL AS c44, +NULL AS c45, +NULL AS c46, +NULL AS c38, +NULL AS c47 +FROM database.table15 +GROUP BY comp_c27 +UNION ALL +SELECT +comp_c27, +NULL AS min_c32, +NULL AS max_c32, +c39, +c40, +c41, +c42, +c29, +c43, +c44, +c45, +c46, +c38, +c47 +FROM +( +SELECT +c39, +comp_c27 AS c27, +comp_c27, +c40, +c41, +assumeNotNull(c45) AS c45, +assumeNotNull(c46) AS c46, +assumeNotNull(c38) AS c38, +joinGet('database.table14', 'c48', c40) AS c42, +joinGet('database.table14', 'c29', c40) AS c29, +joinGet('database.table14', 'c43', c40) AS c43, +joinGet('database.table14', 'property_c44', c40) AS c44, +splitByChar(',', assumeNotNull(joinGet('database.jointable13', 'prefix_c33', comp_c27))) AS c33s, +joinGet('database.jointable13', 'c47', comp_c27) AS c47 +FROM +( +SELECT +c39, +comp_c27, +joinGet('database.jointable13', 'c40', comp_c27) AS c40, +c41, +c45, +c46, +c38 +FROM +( +SELECT +c39, +arrayJoin(arrayMap(x -> toInt64(x), arrayFilter(x -> (length(x) > 0), splitByString(', ', c49)))) AS comp_c27, +c41, +c45, +c46, +c38 +FROM +( +SELECT +'c57' AS c39, +toString(c27) AS c49, +1 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE c27 IN (322) +UNION ALL +SELECT +'c58' AS c39, +arrayStringConcat(groupArray(toString(c27)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table12 +WHERE chain_id IN +( +SELECT chain_id +FROM database.table12 +WHERE c27 IN (322) +) +UNION ALL +SELECT +'c59' AS c39, +assumeNotNull(c27s_str) AS c49, +0 AS c41, +c50 AS c45, +c51 AS c46, +c52 AS c38 +FROM +( +SELECT * +FROM table11 +WHERE c27 IN (322) +) AS c1s_c59 +WHERE c27 IN (322) +UNION ALL +SELECT +'superSupercalifragilisticexpialidocious' AS c39, +arrayStringConcat(groupArray(toString(c1_id)), ', ') AS c49, +0 AS c41, +'' AS c45, +'' AS c46, +0 AS c38 +FROM database.table10 +WHERE c27 IN (322) +) AS table9 +) +) AS a +) AS table8 +) AS table7 +GROUP BY comp_c27 +) AS table6 +WHERE (parseDateTimeBestEffort('2020-01-01') >= min_c32) AND (max_c32 >= (parseDateTimeBestEffort('2021-05-02') - 2)) +) AS table5 +ARRAY JOIN c39 +WHERE isNotNull(c39) +) AS table4 +GROUP BY c39 +) AS table3 +) USING (c1) +GROUP BY +c1, +c2 +) AS table2 +ORDER BY c1 ASC +) AS table1 +FORMAT Null + ]]> + + From fc48172980d9137b89d683f099688365e9ee4c0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 29 Nov 2021 19:46:51 +0100 Subject: [PATCH 046/262] Expected: Drop duplicates --- src/Parsers/IParser.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Parsers/IParser.h b/src/Parsers/IParser.h index 4b80eadd4d0..64f117c707f 100644 --- a/src/Parsers/IParser.h +++ b/src/Parsers/IParser.h @@ -34,9 +34,11 @@ struct Expected { variants.clear(); max_parsed_pos = current_pos; + variants.push_back(description); + return; } - if (!max_parsed_pos || current_pos >= max_parsed_pos) + if ((current_pos == max_parsed_pos) && (find(variants.begin(), variants.end(), description) == variants.end())) variants.push_back(description); } From 0cba91f63d1fa667a9963077efaa1a23eca7538f Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 1 Dec 2021 11:41:44 +0300 Subject: [PATCH 047/262] SQLUserDefinedFunctionVisitor updated exception message --- src/Interpreters/UserDefinedSQLFunctionVisitor.cpp | 4 +++- .../02126_identity_user_defined_function.reference | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/UserDefinedSQLFunctionVisitor.cpp b/src/Interpreters/UserDefinedSQLFunctionVisitor.cpp index bc63a4dfec3..3e82930af9d 100644 --- a/src/Interpreters/UserDefinedSQLFunctionVisitor.cpp +++ b/src/Interpreters/UserDefinedSQLFunctionVisitor.cpp @@ -39,7 +39,9 @@ bool UserDefinedSQLFunctionMatcher::needChildVisit(const ASTPtr &, const ASTPtr ASTPtr UserDefinedSQLFunctionMatcher::tryToReplaceFunction(const ASTFunction & function, std::unordered_set & udf_in_replace_process) { if (udf_in_replace_process.find(function.name) != udf_in_replace_process.end()) - throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Recursive function call during function user defined function call {}", function.name); + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, + "Recursive function call detected during function call {}", + function.name); auto user_defined_function = UserDefinedSQLFunctionFactory::instance().tryGet(function.name); if (!user_defined_function) diff --git a/tests/queries/0_stateless/02126_identity_user_defined_function.reference b/tests/queries/0_stateless/02126_identity_user_defined_function.reference index d00491fd7e5..26b37d07fac 100644 --- a/tests/queries/0_stateless/02126_identity_user_defined_function.reference +++ b/tests/queries/0_stateless/02126_identity_user_defined_function.reference @@ -1 +1,2 @@ 1 +5 From 8dbc7a8dae17090a18778f29629d8746a1bb9b72 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Wed, 1 Dec 2021 23:17:31 +0800 Subject: [PATCH 048/262] Fix detaching parts with projections --- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 4 +--- .../01710_projection_detach_part.reference | 1 + .../0_stateless/01710_projection_detach_part.sql | 15 +++++++++++++++ 3 files changed, 17 insertions(+), 3 deletions(-) create mode 100644 tests/queries/0_stateless/01710_projection_detach_part.reference create mode 100644 tests/queries/0_stateless/01710_projection_detach_part.sql diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 93149f87f99..1d89d53d64a 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -1323,9 +1323,7 @@ void IMergeTreeDataPart::renameToDetached(const String & prefix) const void IMergeTreeDataPart::makeCloneInDetached(const String & prefix, const StorageMetadataPtr & /*metadata_snapshot*/) const { String destination_path = fs::path(storage.relative_data_path) / getRelativePathForDetachedPart(prefix); - - /// Backup is not recursive (max_level is 0), so do not copy inner directories - localBackup(volume->getDisk(), getFullRelativePath(), destination_path, 0); + localBackup(volume->getDisk(), getFullRelativePath(), destination_path); volume->getDisk()->removeFileIfExists(fs::path(destination_path) / DELETE_ON_DESTROY_MARKER_FILE_NAME); } diff --git a/tests/queries/0_stateless/01710_projection_detach_part.reference b/tests/queries/0_stateless/01710_projection_detach_part.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/01710_projection_detach_part.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/01710_projection_detach_part.sql b/tests/queries/0_stateless/01710_projection_detach_part.sql new file mode 100644 index 00000000000..e3e6c7ac165 --- /dev/null +++ b/tests/queries/0_stateless/01710_projection_detach_part.sql @@ -0,0 +1,15 @@ +set allow_experimental_projection_optimization = 1; + +drop table if exists t; + +create table t (i int, j int, projection x (select * order by j)) engine MergeTree partition by i order by i; + +insert into t values (1, 2); + +alter table t detach partition 1; + +alter table t attach partition 1; + +select count() from system.projection_parts where database = currentDatabase() and table = 't'; + +drop table t; From 1b155c98cac38f62d8087ef8999ae83356db72c3 Mon Sep 17 00:00:00 2001 From: Alexey Date: Wed, 1 Dec 2021 16:10:51 +0000 Subject: [PATCH 049/262] ru translation --- docs/ru/interfaces/grpc.md | 89 +++++++++++++++++++++++++++++++++++++ docs/ru/interfaces/index.md | 7 +-- 2 files changed, 93 insertions(+), 3 deletions(-) diff --git a/docs/ru/interfaces/grpc.md b/docs/ru/interfaces/grpc.md index f4b61334a3a..96eb6773bfb 100644 --- a/docs/ru/interfaces/grpc.md +++ b/docs/ru/interfaces/grpc.md @@ -4,3 +4,92 @@ toc_title: gRPC интерфейс --- # Интерфейс gRPC {#grpc-interface} + +## Введение {#grpc-interface-introduction} + +ClickHouse поддерживает интерфейс [gRPC](https://grpc.io/). Это система удаленного вызова процедур с открытым исходным кодом, которая использует HTTP/2 и [Protocol Buffers](https://ru.wikipedia.org/wiki/Protocol_Buffers). В реализации gRPC в ClickHouse поддерживаются: + +- SSL; +- аутентификацию; +- сессии; +- сжатие; +- параллельные запросы, выполняемые через один канал; +- отмена запросов; +- получение прогресса операций и логов; +- внешние таблицы. + +Спецификация интерфейса содержится в [clickhouse_grpc.proto](https://github.com/ClickHouse/ClickHouse/blob/master/src/Server/grpc_protos/clickhouse_grpc.proto). + +## Конфигурация gRPC {#grpc-interface-configuration} + +Чтобы сделать доступным интерфейс gRPC, нужно задать порт с помощью настройки `grpc_port` в [конфигурации сервера](../operations/configuration-files.md). Другие настройки приведены в примере: + +```xml +9100 + + false + + + /path/to/ssl_cert_file + /path/to/ssl_key_file + + + false + + + /path/to/ssl_ca_cert_file + + + deflate + + + medium + + + -1 + -1 + + + false + +``` + +## Встроенный клиент {#grpc-client} + +Можно написать клиент на любом языке программирования, который поддерживается gRPC с использованием [спецификации](https://github.com/ClickHouse/ClickHouse/blob/master/src/Server/grpc_protos/clickhouse_grpc.proto) +Также можно воспользоваться встроенным Python клиентом. Он расположен в [utils/grpc-client/clickhouse-grpc-client.py](https://github.com/ClickHouse/ClickHouse/blob/master/utils/grpc-client/clickhouse-grpc-client.py) в репозитории. Для работы встроенного клиента требуются Python модули [grpcio и grpcio-tools](https://grpc.io/docs/languages/python/quickstart). + +Чтобы запустить клиент в интерактивном режиме, вызовите его без аргументов. Клиент поддерживает аргументы: + +- `--help` – вывести справку и завершить работу. +- `--host HOST, -h HOST` – имя сервера. Значение по умолчанию: `localhost`. Можно задать адрес IPv4 или IPv6. +- `--port PORT` – номер порта. Этот порт должен быть задан в конфигурации сервера ClickHouse настройкой `grpc_port`. +- `--user USER_NAME, -u USER_NAME` – имя пользователя. Значение по умолчанию: `default`. +- `--password PASSWORD` – пароль. Значение по умолчанию: пустая строка. +- `--query QUERY, -q QUERY` – запрос, который нужно выполнить. +- `--database DATABASE, -d DATABASE` – база данных по умолчанию. Если не указана, то будет использована база данных, заданная в настройках сервера (по умолчанию `default`). +- `--format OUTPUT_FORMAT, -f OUTPUT_FORMAT` – [формат](formats.md) вывода результата. +- `--debug` – вывод отладочной информации. + +**Пример использования клиента** + +В примере создается таблица, и в нее загружаются данные из CSV файла. Затем выводится содержимое таблицы. + +``` text +./clickhouse-grpc-client.py -q "CREATE TABLE grpc_example_table (id UInt32, text String) ENGINE = MergeTree() ORDER BY id;" +echo "0,Input data for" > a.txt ; echo "1,gRPC protocol example" >> a.txt +cat a.txt | ./clickhouse-grpc-client.py -q "INSERT INTO grpc_example_table FORMAT CSV" + +./clickhouse-grpc-client.py --format PrettyCompact -q "SELECT * FROM grpc_example_table;" +``` + +Результат: + +``` text +┌─id─┬─text──────────────────┐ +│ 0 │ Input data for │ +│ 1 │ gRPC protocol example │ +└────┴───────────────────────┘ +``` diff --git a/docs/ru/interfaces/index.md b/docs/ru/interfaces/index.md index 12e8853823e..b23a402e0b7 100644 --- a/docs/ru/interfaces/index.md +++ b/docs/ru/interfaces/index.md @@ -6,12 +6,13 @@ toc_title: "Введение" # Интерфейсы {#interfaces} -ClickHouse предоставляет два сетевых интерфейса (оба могут быть дополнительно обернуты в TLS для дополнительной безопасности): +ClickHouse предоставляет три сетевых интерфейса (они могут быть обернуты в TLS для дополнительной безопасности): - [HTTP](http.md), который задокументирован и прост для использования напрямую; -- [Native TCP](tcp.md), который имеет меньше накладных расходов. +- [Native TCP](tcp.md), который имеет меньше накладных расходов; +- [gRPC](grpc.md). -В большинстве случаев рекомендуется использовать подходящий инструмент или библиотеку, а не напрямую взаимодействовать с ClickHouse по сути. Официально поддерживаемые Яндексом: +В большинстве случаев рекомендуется использовать подходящий инструмент или библиотеку, а не напрямую взаимодействовать с ClickHouse. Официально поддерживаемые Яндексом: - [Консольный клиент](cli.md); - [JDBC-драйвер](jdbc.md); From 8e9f4949e4aa33c8407f8c6e2be12459842fc465 Mon Sep 17 00:00:00 2001 From: frank chen Date: Thu, 2 Dec 2021 15:49:34 +0800 Subject: [PATCH 050/262] Set Content-Type in HTTP packets issued from URL engine Signed-off-by: frank chen --- src/IO/WriteBufferFromHTTP.cpp | 11 ++++++++++- src/IO/WriteBufferFromHTTP.h | 1 + src/Storages/StorageURL.cpp | 27 ++++++++++++++++++++++++++- 3 files changed, 37 insertions(+), 2 deletions(-) diff --git a/src/IO/WriteBufferFromHTTP.cpp b/src/IO/WriteBufferFromHTTP.cpp index 6bb6936855f..352e38f3529 100644 --- a/src/IO/WriteBufferFromHTTP.cpp +++ b/src/IO/WriteBufferFromHTTP.cpp @@ -7,7 +7,11 @@ namespace DB { WriteBufferFromHTTP::WriteBufferFromHTTP( - const Poco::URI & uri, const std::string & method, const ConnectionTimeouts & timeouts, size_t buffer_size_) + const Poco::URI & uri, + const std::string & method, + const std::string & content_type, + const ConnectionTimeouts & timeouts, + size_t buffer_size_) : WriteBufferFromOStream(buffer_size_) , session{makeHTTPSession(uri, timeouts)} , request{method, uri.getPathAndQuery(), Poco::Net::HTTPRequest::HTTP_1_1} @@ -15,6 +19,11 @@ WriteBufferFromHTTP::WriteBufferFromHTTP( request.setHost(uri.getHost()); request.setChunkedTransferEncoding(true); + if (!content_type.empty()) + { + request.set("Content-Type", content_type); + } + LOG_TRACE((&Poco::Logger::get("WriteBufferToHTTP")), "Sending request to {}", uri.toString()); ostr = &session->sendRequest(request); diff --git a/src/IO/WriteBufferFromHTTP.h b/src/IO/WriteBufferFromHTTP.h index cfd3597a95c..31b2a921889 100644 --- a/src/IO/WriteBufferFromHTTP.h +++ b/src/IO/WriteBufferFromHTTP.h @@ -20,6 +20,7 @@ class WriteBufferFromHTTP : public WriteBufferFromOStream public: explicit WriteBufferFromHTTP(const Poco::URI & uri, const std::string & method = Poco::Net::HTTPRequest::HTTP_POST, // POST or PUT only + const std::string & content_type = "", const ConnectionTimeouts & timeouts = {}, size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE); diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 527458ab668..005c842c783 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -219,8 +219,33 @@ StorageURLSink::StorageURLSink( const String & http_method) : SinkToStorage(sample_block) { + // + // get the content type first + // + // The code here may look a little wired. + // The getContentType() is prodived on IOutputFormat class which relies on a WriteBuffer object, + // and this WriteBuffer object here is WriterBufferFromHTTP itself which accepts the Content-Type header. + // So, this is cyclic dependency. + // To decouple such dependency, we must be able to set header to 'WriteBufferFromHTTP' after we get the instance of output format by calling IOutputFormat::getContentType. + // But this is tricky because the 'WriteBufferFromHTTP' object may have been decorated by 'WriteBufferWithCompression' and is not acceesible due to private modifiers. + // + // So, here we first instantiate an OutputFormat object with a fake stream to get the Content-Type. + // This is not the best way but a more simpler way to understand. + // + std::string content_type; + { + WriteBufferFromOStream buffer(std::cout); + auto output = FormatFactory::instance().getOutputFormat(format, + buffer, + sample_block, + context, + {} /* write callback */, + format_settings); + content_type = output->getContentType(); + } + write_buf = wrapWriteBufferWithCompressionMethod( - std::make_unique(Poco::URI(uri), http_method, timeouts), + std::make_unique(Poco::URI(uri), http_method, content_type, timeouts), compression_method, 3); writer = FormatFactory::instance().getOutputFormat(format, *write_buf, sample_block, context, {} /* write callback */, format_settings); From 0491bf791bebbabb455f9afef5145f51facc6064 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Thu, 2 Dec 2021 12:44:54 +0300 Subject: [PATCH 051/262] Dictionaries custom query condition fix --- src/Dictionaries/ExternalQueryBuilder.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/Dictionaries/ExternalQueryBuilder.cpp b/src/Dictionaries/ExternalQueryBuilder.cpp index 9ddaaeb573a..f513c7b2f61 100644 --- a/src/Dictionaries/ExternalQueryBuilder.cpp +++ b/src/Dictionaries/ExternalQueryBuilder.cpp @@ -15,6 +15,7 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } +static constexpr std::string_view CONDITION_PLACEHOLDER_TO_REPLACE_VALUE = "{condition}"; ExternalQueryBuilder::ExternalQueryBuilder( const DictionaryStructure & dict_struct_, @@ -215,7 +216,7 @@ std::string ExternalQueryBuilder::composeUpdateQuery(const std::string & update_ { writeString(query, out); - auto condition_position = query.find("{condition}"); + auto condition_position = query.find(CONDITION_PLACEHOLDER_TO_REPLACE_VALUE); if (condition_position == std::string::npos) { writeString(" WHERE ", out); @@ -230,7 +231,7 @@ std::string ExternalQueryBuilder::composeUpdateQuery(const std::string & update_ const auto & condition_value = condition_value_buffer.str(); auto query_copy = query; - query_copy.replace(condition_position, condition_value.size(), condition_value); + query_copy.replace(condition_position, CONDITION_PLACEHOLDER_TO_REPLACE_VALUE.size(), condition_value); return query_copy; } @@ -300,7 +301,7 @@ std::string ExternalQueryBuilder::composeLoadIdsQuery(const std::vector { writeString(query, out); - auto condition_position = query.find("{condition}"); + auto condition_position = query.find(CONDITION_PLACEHOLDER_TO_REPLACE_VALUE); if (condition_position == std::string::npos) { writeString(" WHERE ", out); @@ -315,7 +316,7 @@ std::string ExternalQueryBuilder::composeLoadIdsQuery(const std::vector const auto & condition_value = condition_value_buffer.str(); auto query_copy = query; - query_copy.replace(condition_position, condition_value.size(), condition_value); + query_copy.replace(condition_position, CONDITION_PLACEHOLDER_TO_REPLACE_VALUE.size(), condition_value); return query_copy; } @@ -391,7 +392,7 @@ std::string ExternalQueryBuilder::composeLoadKeysQuery( { writeString(query, out); - auto condition_position = query.find("{condition}"); + auto condition_position = query.find(CONDITION_PLACEHOLDER_TO_REPLACE_VALUE); if (condition_position == std::string::npos) { writeString(" WHERE ", out); @@ -406,7 +407,7 @@ std::string ExternalQueryBuilder::composeLoadKeysQuery( const auto & condition_value = condition_value_buffer.str(); auto query_copy = query; - query_copy.replace(condition_position, condition_value.size(), condition_value); + query_copy.replace(condition_position, CONDITION_PLACEHOLDER_TO_REPLACE_VALUE.size(), condition_value); return query_copy; } From 492f9c3936940894594928eb0c81265ea654792b Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Thu, 2 Dec 2021 13:11:21 +0300 Subject: [PATCH 052/262] Fix flacky test test_executable_storage_input --- tests/integration/test_executable_table_function/test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_executable_table_function/test.py b/tests/integration/test_executable_table_function/test.py index 1473212552a..f5537e26b94 100644 --- a/tests/integration/test_executable_table_function/test.py +++ b/tests/integration/test_executable_table_function/test.py @@ -63,8 +63,8 @@ def test_executable_storage_no_input(started_cluster): def test_executable_storage_input(started_cluster): skip_test_msan(node) node.query("DROP TABLE IF EXISTS test_table") - node.query("CREATE TABLE test_table (value String) ENGINE=Executable('test_no_input.sh', 'TabSeparated', (SELECT 1))") - assert node.query("SELECT * FROM test_table") == '1\n' + node.query("CREATE TABLE test_table (value String) ENGINE=Executable('test_input.sh', 'TabSeparated', (SELECT 1))") + assert node.query("SELECT * FROM test_table") == 'Key 1\n' node.query("DROP TABLE test_table") def test_executable_storage_input_multiple_pipes(started_cluster): From 9bb0663841120ced8a290a12ce62564b7ca6fc74 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Thu, 2 Dec 2021 13:13:58 +0300 Subject: [PATCH 053/262] Fixed tests --- .../0_stateless/02125_recursive_sql_user_defined_functions.sql | 2 ++ .../0_stateless/02126_identity_user_defined_function.sql | 2 ++ 2 files changed, 4 insertions(+) diff --git a/tests/queries/0_stateless/02125_recursive_sql_user_defined_functions.sql b/tests/queries/0_stateless/02125_recursive_sql_user_defined_functions.sql index 2cbaa4b5aaf..1870521c255 100644 --- a/tests/queries/0_stateless/02125_recursive_sql_user_defined_functions.sql +++ b/tests/queries/0_stateless/02125_recursive_sql_user_defined_functions.sql @@ -1,3 +1,5 @@ +-- Tags: no-parallel + DROP FUNCTION IF EXISTS 02125_function; CREATE FUNCTION 02125_function AS x -> 02125_function(x); SELECT 02125_function(1); --{serverError 1}; diff --git a/tests/queries/0_stateless/02126_identity_user_defined_function.sql b/tests/queries/0_stateless/02126_identity_user_defined_function.sql index 8c3b7b85d48..a53c6e28a48 100644 --- a/tests/queries/0_stateless/02126_identity_user_defined_function.sql +++ b/tests/queries/0_stateless/02126_identity_user_defined_function.sql @@ -1,3 +1,5 @@ +-- Tags: no-parallel + DROP FUNCTION IF EXISTS 02126_function; CREATE FUNCTION 02126_function AS x -> x; SELECT 02126_function(1); From 6f5064e7bf7180152fd71a262e8c9ec54d4a5c5e Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 2 Dec 2021 14:53:14 +0300 Subject: [PATCH 054/262] Add a test with 20000 mutations in one query --- .../02125_many_mutations.reference | 6 +++ .../0_stateless/02125_many_mutations.sh | 49 +++++++++++++++++++ 2 files changed, 55 insertions(+) create mode 100644 tests/queries/0_stateless/02125_many_mutations.reference create mode 100755 tests/queries/0_stateless/02125_many_mutations.sh diff --git a/tests/queries/0_stateless/02125_many_mutations.reference b/tests/queries/0_stateless/02125_many_mutations.reference new file mode 100644 index 00000000000..c98d8221c7f --- /dev/null +++ b/tests/queries/0_stateless/02125_many_mutations.reference @@ -0,0 +1,6 @@ +0 0 +1 1 +20000 +0 +0 20000 +1 20001 diff --git a/tests/queries/0_stateless/02125_many_mutations.sh b/tests/queries/0_stateless/02125_many_mutations.sh new file mode 100755 index 00000000000..727cc9d6213 --- /dev/null +++ b/tests/queries/0_stateless/02125_many_mutations.sh @@ -0,0 +1,49 @@ +#!/usr/bin/env bash +# Tags: long + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "create table many_mutations (x UInt32, y UInt32) engine = MergeTree order by x" +$CLICKHOUSE_CLIENT -q "insert into many_mutations values (0, 0), (1, 1)" +$CLICKHOUSE_CLIENT -q "system stop merges many_mutations" + +$CLICKHOUSE_CLIENT -q "select x, y from many_mutations order by x" + +job() +{ + for i in {1..1000} + do + $CLICKHOUSE_CLIENT -q "alter table many_mutations update y = y + 1 where 1" + done +} + +job & +job & +job & +job & +job & +job & +job & +job & +job & +job & +job & +job & +job & +job & +job & +job & +job & +job & +job & +job & + +wait + +$CLICKHOUSE_CLIENT -q "select count() from system.mutations where database = currentDatabase() and table = 'many_mutations' and not is_done" +$CLICKHOUSE_CLIENT -q "system start merges many_mutations" +$CLICKHOUSE_CLIENT -q "optimize table many_mutations final" +$CLICKHOUSE_CLIENT -q "select count() from system.mutations where database = currentDatabase() and table = 'many_mutations' and not is_done" +$CLICKHOUSE_CLIENT -q "select x, y from many_mutations order by x" From 5662d0aa5915f005ca8f86c63d444b19ee3df0ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 2 Dec 2021 14:53:55 +0100 Subject: [PATCH 055/262] Use softer checks --- src/Interpreters/ProcessList.cpp | 8 ++++++++ src/Interpreters/ProcessList.h | 2 ++ src/Processors/Executors/PipelineExecutor.cpp | 18 ++++++++++++++---- src/Processors/Executors/PipelineExecutor.h | 4 +++- .../Executors/PullingAsyncPipelineExecutor.cpp | 3 ++- .../Executors/PullingPipelineExecutor.cpp | 2 +- 6 files changed, 30 insertions(+), 7 deletions(-) diff --git a/src/Interpreters/ProcessList.cpp b/src/Interpreters/ProcessList.cpp index d842f5c5937..a4583685a90 100644 --- a/src/Interpreters/ProcessList.cpp +++ b/src/Interpreters/ProcessList.cpp @@ -338,6 +338,14 @@ bool QueryStatus::checkTimeLimit() return limits.checkTimeLimit(watch, overflow_mode); } +bool QueryStatus::checkTimeLimitSoft() +{ + if (is_killed.load()) + return false; + + return limits.checkTimeLimit(watch, OverflowMode::BREAK); +} + void QueryStatus::setUserProcessList(ProcessListForUser * user_process_list_) { diff --git a/src/Interpreters/ProcessList.h b/src/Interpreters/ProcessList.h index 52a476c2a48..0e64e60bd89 100644 --- a/src/Interpreters/ProcessList.h +++ b/src/Interpreters/ProcessList.h @@ -178,6 +178,8 @@ public: /// Checks the query time limits (cancelled or timeout) bool checkTimeLimit(); + /// Same as checkTimeLimit but it never throws + bool checkTimeLimitSoft(); }; diff --git a/src/Processors/Executors/PipelineExecutor.cpp b/src/Processors/Executors/PipelineExecutor.cpp index 53e26481b3a..feaa7c7af5d 100644 --- a/src/Processors/Executors/PipelineExecutor.cpp +++ b/src/Processors/Executors/PipelineExecutor.cpp @@ -104,7 +104,7 @@ void PipelineExecutor::execute(size_t num_threads) bool PipelineExecutor::executeStep(std::atomic_bool * yield_flag) { - checkTimeLimit(); + checkTimeLimitSoft(); if (!is_execution_initialized) { initializeExecution(1); @@ -128,11 +128,11 @@ bool PipelineExecutor::executeStep(std::atomic_bool * yield_flag) return false; } -bool PipelineExecutor::checkTimeLimit() +bool PipelineExecutor::checkTimeLimitSoft() { if (process_list_element) { - bool continuing = process_list_element->checkTimeLimit(); + bool continuing = process_list_element->checkTimeLimitSoft(); // We call cancel here so that all processors are notified and tasks waken up // so that the "break" is faster and doesn't wait for long events if (!continuing) @@ -143,6 +143,15 @@ bool PipelineExecutor::checkTimeLimit() return true; } +bool PipelineExecutor::checkTimeLimit() +{ + bool continuing = checkTimeLimitSoft(); + if (!continuing) + process_list_element->checkTimeLimit(); // Will throw if needed + + return continuing; +} + void PipelineExecutor::finalizeExecution() { checkTimeLimit(); @@ -208,7 +217,8 @@ void PipelineExecutor::executeStepImpl(size_t thread_num, std::atomic_bool * yie if (tasks.isFinished()) break; - checkTimeLimit(); + if (!checkTimeLimitSoft()) + break; #ifndef NDEBUG Stopwatch processing_time_watch; diff --git a/src/Processors/Executors/PipelineExecutor.h b/src/Processors/Executors/PipelineExecutor.h index dd3212caca8..99a60910e41 100644 --- a/src/Processors/Executors/PipelineExecutor.h +++ b/src/Processors/Executors/PipelineExecutor.h @@ -43,8 +43,10 @@ public: /// Cancel execution. May be called from another thread. void cancel(); - /// Checks the query time limits (cancelled or timeout) + /// Checks the query time limits (cancelled or timeout). Throws on cancellation or when time limit is reached and the query uses "break" bool checkTimeLimit(); + /// Same as checkTimeLimit but it never throws. It returns false on cancellation or time limit reached + bool checkTimeLimitSoft(); private: ExecutingGraphPtr graph; diff --git a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp index 8760325d958..0ba07df95a6 100644 --- a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp +++ b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp @@ -117,7 +117,8 @@ bool PullingAsyncPipelineExecutor::pull(Chunk & chunk, uint64_t milliseconds) data->rethrowExceptionIfHas(); - bool is_execution_finished = !data->executor->checkTimeLimit() || lazy_format ? lazy_format->isFinished() : data->is_finished.load(); + bool is_execution_finished + = !data->executor->checkTimeLimitSoft() || lazy_format ? lazy_format->isFinished() : data->is_finished.load(); if (is_execution_finished) { diff --git a/src/Processors/Executors/PullingPipelineExecutor.cpp b/src/Processors/Executors/PullingPipelineExecutor.cpp index 3cc91ceeeeb..ae522c1073d 100644 --- a/src/Processors/Executors/PullingPipelineExecutor.cpp +++ b/src/Processors/Executors/PullingPipelineExecutor.cpp @@ -44,7 +44,7 @@ bool PullingPipelineExecutor::pull(Chunk & chunk) if (!executor) executor = std::make_shared(pipeline.processors, pipeline.process_list_element); - if (!executor->checkTimeLimit()) + if (!executor->checkTimeLimitSoft()) return false; if (!executor->executeStep(&has_data_flag)) From 37572f736201bd5565935031e92ca9ec94a0d538 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 2 Dec 2021 14:57:33 +0100 Subject: [PATCH 056/262] 02122_join_group_by_timeout: Unify max process timeouts --- .../0_stateless/02122_join_group_by_timeout.sh | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/tests/queries/0_stateless/02122_join_group_by_timeout.sh b/tests/queries/0_stateless/02122_join_group_by_timeout.sh index a8c4ee5f30a..4116453b69a 100755 --- a/tests/queries/0_stateless/02122_join_group_by_timeout.sh +++ b/tests/queries/0_stateless/02122_join_group_by_timeout.sh @@ -4,9 +4,11 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -# TCP CLIENT: As of today (22/11/21) uses PullingAsyncPipelineExecutor +MAX_PROCESS_WAIT=5 + +# TCP CLIENT: As of today (02/12/21) uses PullingAsyncPipelineExecutor ### Should be cancelled after 1 second and return a 159 exception (timeout) -timeout -s KILL 5 $CLICKHOUSE_CLIENT --max_execution_time 1 -q \ +timeout -s KILL $MAX_PROCESS_WAIT $CLICKHOUSE_CLIENT --max_execution_time 1 -q \ "SELECT * FROM ( SELECT a.name as n @@ -23,7 +25,7 @@ timeout -s KILL 5 $CLICKHOUSE_CLIENT --max_execution_time 1 -q \ FORMAT Null" 2>&1 | grep -o "Code: 159" | sort | uniq ### Should stop pulling data and return what has been generated already (return code 0) -timeout -s KILL 5 $CLICKHOUSE_CLIENT -q \ +timeout -s KILL $MAX_PROCESS_WAIT $CLICKHOUSE_CLIENT -q \ "SELECT a.name as n FROM ( @@ -38,9 +40,9 @@ timeout -s KILL 5 $CLICKHOUSE_CLIENT -q \ echo $? -# HTTP CLIENT: As of today (22/11/21) uses PullingPipelineExecutor +# HTTP CLIENT: As of today (02/12/21) uses PullingPipelineExecutor ### Should be cancelled after 1 second and return a 159 exception (timeout) -${CLICKHOUSE_CURL} -q --max-time 5 -sS "$CLICKHOUSE_URL&max_execution_time=1" -d \ +${CLICKHOUSE_CURL} -q --max-time $MAX_PROCESS_WAIT -sS "$CLICKHOUSE_URL&max_execution_time=1" -d \ "SELECT * FROM ( SELECT a.name as n @@ -58,7 +60,7 @@ ${CLICKHOUSE_CURL} -q --max-time 5 -sS "$CLICKHOUSE_URL&max_execution_time=1" -d ### Should stop pulling data and return what has been generated already (return code 0) -${CLICKHOUSE_CURL} -q --max-time 5 -sS "$CLICKHOUSE_URL" -d \ +${CLICKHOUSE_CURL} -q --max-time $MAX_PROCESS_WAIT -sS "$CLICKHOUSE_URL" -d \ "SELECT a.name as n FROM ( From 755ba5dc090f7c3544de1113bcc6539428a2319b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 2 Dec 2021 15:09:53 +0100 Subject: [PATCH 057/262] Don't forget to check the output of checkTimeLimitSoft --- src/Interpreters/ProcessList.h | 2 +- src/Processors/Executors/PipelineExecutor.cpp | 1 - src/Processors/Executors/PipelineExecutor.h | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/Interpreters/ProcessList.h b/src/Interpreters/ProcessList.h index 0e64e60bd89..9c826bde061 100644 --- a/src/Interpreters/ProcessList.h +++ b/src/Interpreters/ProcessList.h @@ -179,7 +179,7 @@ public: /// Checks the query time limits (cancelled or timeout) bool checkTimeLimit(); /// Same as checkTimeLimit but it never throws - bool checkTimeLimitSoft(); + [[nodiscard]] bool checkTimeLimitSoft(); }; diff --git a/src/Processors/Executors/PipelineExecutor.cpp b/src/Processors/Executors/PipelineExecutor.cpp index feaa7c7af5d..e722f8718f7 100644 --- a/src/Processors/Executors/PipelineExecutor.cpp +++ b/src/Processors/Executors/PipelineExecutor.cpp @@ -104,7 +104,6 @@ void PipelineExecutor::execute(size_t num_threads) bool PipelineExecutor::executeStep(std::atomic_bool * yield_flag) { - checkTimeLimitSoft(); if (!is_execution_initialized) { initializeExecution(1); diff --git a/src/Processors/Executors/PipelineExecutor.h b/src/Processors/Executors/PipelineExecutor.h index 99a60910e41..12f2bd8b75b 100644 --- a/src/Processors/Executors/PipelineExecutor.h +++ b/src/Processors/Executors/PipelineExecutor.h @@ -46,7 +46,7 @@ public: /// Checks the query time limits (cancelled or timeout). Throws on cancellation or when time limit is reached and the query uses "break" bool checkTimeLimit(); /// Same as checkTimeLimit but it never throws. It returns false on cancellation or time limit reached - bool checkTimeLimitSoft(); + [[nodiscard]] bool checkTimeLimitSoft(); private: ExecutingGraphPtr graph; From 6e27c9c6257a64932bc95c5223db54d57eabb820 Mon Sep 17 00:00:00 2001 From: Alexey Date: Thu, 2 Dec 2021 20:19:50 +0000 Subject: [PATCH 058/262] More default argument values Actual interactive mode call conditions --- docs/en/interfaces/grpc.md | 10 +++++++--- docs/ru/interfaces/grpc.md | 10 +++++++--- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/docs/en/interfaces/grpc.md b/docs/en/interfaces/grpc.md index f9faab5235d..eb572629532 100644 --- a/docs/en/interfaces/grpc.md +++ b/docs/en/interfaces/grpc.md @@ -61,18 +61,22 @@ To use the gRPC interface set `grpc_port` in the main [server configuration](../ You can write a client in any of the programming languages supported by gRPC using the provided [specification](https://github.com/ClickHouse/ClickHouse/blob/master/src/Server/grpc_protos/clickhouse_grpc.proto) Or you can use a built-in Python client. It is placed in [utils/grpc-client/clickhouse-grpc-client.py](https://github.com/ClickHouse/ClickHouse/blob/master/utils/grpc-client/clickhouse-grpc-client.py) in the repository. The built-in client requires [grpcio and grpcio-tools](https://grpc.io/docs/languages/python/quickstart) Python modules. -To run the client in an interactive mode call it without arguments. The client supports the following arguments: +The client supports the following arguments: - `--help` – Shows a help message and exits. - `--host HOST, -h HOST` – A server name. Default value: `localhost`. You can use IPv4 or IPv6 addresses also. -- `--port PORT` – A port to connect to. This port should be enabled in the ClickHouse server configuration (see `grpc_port`). +- `--port PORT` – A port to connect to. This port should be enabled in the ClickHouse server configuration (see `grpc_port`). Default value: `9100`. - `--user USER_NAME, -u USER_NAME` – A user name. Default value: `default`. - `--password PASSWORD` – A password. Default value: empty string. - `--query QUERY, -q QUERY` – A query to process when using non-interactive mode. - `--database DATABASE, -d DATABASE` – A default database. If not specified, the current database set in the server settings is used (`default` by default). -- `--format OUTPUT_FORMAT, -f OUTPUT_FORMAT` – A result output [format](formats.md). +- `--format OUTPUT_FORMAT, -f OUTPUT_FORMAT` – A result output [format](formats.md). Default value for interactive mode: `PrettyCompact`. - `--debug` – Enables showing debug information. +To run the client in an interactive mode call it without `--query` argument. + +In a batch mode query data can be passed via `stdin`. + **Client Usage Example** In the following example a table is created and loaded with data from a CSV file. Then the content of the table is queried. diff --git a/docs/ru/interfaces/grpc.md b/docs/ru/interfaces/grpc.md index 96eb6773bfb..35a59272035 100644 --- a/docs/ru/interfaces/grpc.md +++ b/docs/ru/interfaces/grpc.md @@ -61,18 +61,22 @@ ClickHouse поддерживает интерфейс [gRPC](https://grpc.io/). Можно написать клиент на любом языке программирования, который поддерживается gRPC с использованием [спецификации](https://github.com/ClickHouse/ClickHouse/blob/master/src/Server/grpc_protos/clickhouse_grpc.proto) Также можно воспользоваться встроенным Python клиентом. Он расположен в [utils/grpc-client/clickhouse-grpc-client.py](https://github.com/ClickHouse/ClickHouse/blob/master/utils/grpc-client/clickhouse-grpc-client.py) в репозитории. Для работы встроенного клиента требуются Python модули [grpcio и grpcio-tools](https://grpc.io/docs/languages/python/quickstart). -Чтобы запустить клиент в интерактивном режиме, вызовите его без аргументов. Клиент поддерживает аргументы: +Клиент поддерживает аргументы: - `--help` – вывести справку и завершить работу. - `--host HOST, -h HOST` – имя сервера. Значение по умолчанию: `localhost`. Можно задать адрес IPv4 или IPv6. -- `--port PORT` – номер порта. Этот порт должен быть задан в конфигурации сервера ClickHouse настройкой `grpc_port`. +- `--port PORT` – номер порта. Этот порт должен быть задан в конфигурации сервера ClickHouse настройкой `grpc_port`. Значение по умолчанию: `9100` - `--user USER_NAME, -u USER_NAME` – имя пользователя. Значение по умолчанию: `default`. - `--password PASSWORD` – пароль. Значение по умолчанию: пустая строка. - `--query QUERY, -q QUERY` – запрос, который нужно выполнить. - `--database DATABASE, -d DATABASE` – база данных по умолчанию. Если не указана, то будет использована база данных, заданная в настройках сервера (по умолчанию `default`). -- `--format OUTPUT_FORMAT, -f OUTPUT_FORMAT` – [формат](formats.md) вывода результата. +- `--format OUTPUT_FORMAT, -f OUTPUT_FORMAT` – [формат](formats.md) вывода результата. Значение по умолчанию для интерактивного режима: `PrettyCompact`. - `--debug` – вывод отладочной информации. +Чтобы запустить клиент в интерактивном режиме, не указывайте аргумент `--query`. + +В пакетном режиме данные запроса можно передать через `stdin`. + **Пример использования клиента** В примере создается таблица, и в нее загружаются данные из CSV файла. Затем выводится содержимое таблицы. From 4fbfc7c56f9c2e95c1498f055bc003c9bfc0e831 Mon Sep 17 00:00:00 2001 From: zzsmdfj Date: Fri, 3 Dec 2021 15:08:03 +0800 Subject: [PATCH 059/262] MaterializedMySQL support VARBINARY type --- src/DataTypes/DataTypeString.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/DataTypes/DataTypeString.cpp b/src/DataTypes/DataTypeString.cpp index 84610557d21..7fa3a394be8 100644 --- a/src/DataTypes/DataTypeString.cpp +++ b/src/DataTypes/DataTypeString.cpp @@ -91,5 +91,6 @@ void registerDataTypeString(DataTypeFactory & factory) factory.registerAlias("NCHAR LARGE OBJECT", "String", DataTypeFactory::CaseInsensitive); factory.registerAlias("BINARY LARGE OBJECT", "String", DataTypeFactory::CaseInsensitive); factory.registerAlias("BINARY VARYING", "String", DataTypeFactory::CaseInsensitive); + factory.registerAlias("VARBINARY", "String", DataTypeFactory::CaseInsensitive); } } From fb0cc625090c265c4d9d399db2688d51f44b18eb Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 3 Dec 2021 10:52:13 +0300 Subject: [PATCH 060/262] perf: fix waiting of the server after running tests killall requires strict match, i.e. "clickhouse-server" not "clickhouse": 2021-12-03 05:24:56 + env kill -- -21700 2021-12-03 05:24:56 kill: (-21700): No such process 2021-12-03 05:24:56 + killall clickhouse 2021-12-03 05:24:56 clickhouse: no process found 2021-12-03 05:24:56 + echo Servers stopped. 2021-12-03 05:24:56 Servers stopped. 2021-12-03 05:24:56 + analyze_queries $ tail -n1 *-server-log.log ==> left-server-log.log <== 2021.12.03 05:26:59.530647 [ 450 ] {} SystemLog (system.asynchronous_metric_log): Flushed system log up to offset 1668052 ==> right-server-log.log <== 2021.12.03 05:27:20.873136 [ 466 ] {} SystemLog (system.metric_log): Flushed system log up to offset 9605 ==> setup-server-log.log <== 2021.12.03 02:47:14.844395 [ 96 ] {} Application: Child process exited normally with code 0. As you can see killall instantly fails with no such process, while this cannot be true since it was there, and also according to logs there were messages after running analyze_queries() from compare.sh This should fix problems like in [1]. [1]: https://clickhouse-test-reports.s3.yandex.net/32080/344298f4037f88b114b8e798bb30036b24be8f16/performance_comparison/report.html#fail1 --- docker/test/performance-comparison/compare.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index b6a06be2ac7..2fefe856eea 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -1409,7 +1409,7 @@ case "$stage" in while env kill -- -$watchdog_pid ; do sleep 1; done # Stop the servers to free memory for the subsequent query analysis. - while killall clickhouse; do echo . ; sleep 1 ; done + while killall clickhouse-server; do echo . ; sleep 1 ; done echo Servers stopped. ;& "analyze_queries") From 3e96b28843e94159af35bfbd7bf0de3c3cfbc2c0 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 3 Dec 2021 10:56:42 +0300 Subject: [PATCH 061/262] perf: convert killall to pkill (since killall has some magic, see -e option) --- docker/test/performance-comparison/compare.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index 2fefe856eea..c32b50a3cbe 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -61,7 +61,7 @@ function configure cp -rv right/config left ||: # Start a temporary server to rename the tables - while killall clickhouse-server; do echo . ; sleep 1 ; done + while pkill clickhouse-serv; do echo . ; sleep 1 ; done echo all killed set -m # Spawn temporary in its own process groups @@ -88,7 +88,7 @@ function configure clickhouse-client --port $LEFT_SERVER_PORT --query "create database test" ||: clickhouse-client --port $LEFT_SERVER_PORT --query "rename table datasets.hits_v1 to test.hits" ||: - while killall clickhouse-server; do echo . ; sleep 1 ; done + while pkill clickhouse-serv; do echo . ; sleep 1 ; done echo all killed # Make copies of the original db for both servers. Use hardlinks instead @@ -106,7 +106,7 @@ function configure function restart { - while killall clickhouse-server; do echo . ; sleep 1 ; done + while pkill clickhouse-serv; do echo . ; sleep 1 ; done echo all killed # Change the jemalloc settings here. @@ -1409,7 +1409,7 @@ case "$stage" in while env kill -- -$watchdog_pid ; do sleep 1; done # Stop the servers to free memory for the subsequent query analysis. - while killall clickhouse-server; do echo . ; sleep 1 ; done + while pkill clickhouse-serv; do echo . ; sleep 1 ; done echo Servers stopped. ;& "analyze_queries") From 2d3f77314703193a8e4f87527f91c0f8686e135f Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 3 Dec 2021 13:25:55 +0300 Subject: [PATCH 062/262] Function accurateCastOrDefault remove separate branch --- src/Functions/castOrDefault.cpp | 66 +++++++++++++++------------------ 1 file changed, 30 insertions(+), 36 deletions(-) diff --git a/src/Functions/castOrDefault.cpp b/src/Functions/castOrDefault.cpp index 622059d0a49..95046d95176 100644 --- a/src/Functions/castOrDefault.cpp +++ b/src/Functions/castOrDefault.cpp @@ -107,7 +107,7 @@ public: const auto & null_map_data = cast_result_nullable.getNullMapData(); size_t null_map_data_size = null_map_data.size(); const auto & nested_column = cast_result_nullable.getNestedColumn(); - IColumn::MutablePtr result = return_type->createColumn(); + auto result = return_type->createColumn(); result->reserve(null_map_data_size); ColumnNullable * result_nullable = nullptr; @@ -116,49 +116,43 @@ public: size_t start_insert_index = 0; - /// Created separate branch because cast and inserting field from other column is slower + Field default_value; + ColumnPtr default_column; + if (arguments.size() == 3) { - const auto & default_column_with_type = arguments[2]; - auto default_column = default_column_with_type.column->convertToFullColumnIfConst(); + auto default_values_column = arguments[2].column; - for (size_t i = 0; i < null_map_data_size; ++i) - { - bool is_current_index_null = null_map_data[i]; - if (!is_current_index_null) - continue; - - if (i != start_insert_index) - { - if (result_nullable) - result_nullable->insertRangeFromNotNullable(nested_column, start_insert_index, i - start_insert_index); - else - result->insertRangeFrom(nested_column, start_insert_index, i - start_insert_index); - } - - result->insertFrom(*default_column, i); - start_insert_index = i + 1; - } + if (isColumnConst(*default_values_column)) + default_value = (*default_values_column)[0]; + else + default_column = default_values_column->convertToFullColumnIfConst(); } else { - for (size_t i = 0; i < null_map_data_size; ++i) + default_value = return_type->getDefault(); + } + + for (size_t i = 0; i < null_map_data_size; ++i) + { + bool is_current_index_null = null_map_data[i]; + if (!is_current_index_null) + continue; + + if (i != start_insert_index) { - bool is_current_index_null = null_map_data[i]; - if (!is_current_index_null) - continue; - - if (i != start_insert_index) - { - if (result_nullable) - result_nullable->insertRangeFromNotNullable(nested_column, start_insert_index, i - start_insert_index); - else - result->insertRangeFrom(nested_column, start_insert_index, i - start_insert_index); - } - - result->insertDefault(); - start_insert_index = i + 1; + if (result_nullable) + result_nullable->insertRangeFromNotNullable(nested_column, start_insert_index, i - start_insert_index); + else + result->insertRangeFrom(nested_column, start_insert_index, i - start_insert_index); } + + if (default_column) + result->insertFrom(*default_column, i); + else + result->insert(default_value); + + start_insert_index = i + 1; } if (null_map_data_size != start_insert_index) From 4bbb02bbae52f8d6807c1db7d411e5a1fc438257 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 3 Dec 2021 14:06:58 +0300 Subject: [PATCH 063/262] RangeHashedDictionary added update_field support --- .../ClickHouseDictionarySource.cpp | 78 ++++---- src/Dictionaries/RangeHashedDictionary.cpp | 172 ++++++++++++------ src/Dictionaries/RangeHashedDictionary.h | 11 +- .../test_dictionaries_update_field/test.py | 2 +- 4 files changed, 156 insertions(+), 107 deletions(-) diff --git a/src/Dictionaries/ClickHouseDictionarySource.cpp b/src/Dictionaries/ClickHouseDictionarySource.cpp index edca02b83ad..1ddcdd96454 100644 --- a/src/Dictionaries/ClickHouseDictionarySource.cpp +++ b/src/Dictionaries/ClickHouseDictionarySource.cpp @@ -230,74 +230,64 @@ void registerDictionarySourceClickHouse(DictionarySourceFactory & factory) std::string settings_config_prefix = config_prefix + ".clickhouse"; - std::unique_ptr configuration; + std::string host = config.getString(settings_config_prefix + ".host", "localhost"); + std::string user = config.getString(settings_config_prefix + ".user", "default"); + std::string password = config.getString(settings_config_prefix + ".password", ""); + std::string db = config.getString(settings_config_prefix + ".db", default_database); + std::string table = config.getString(settings_config_prefix + ".table", ""); + UInt16 port = static_cast(config.getUInt(settings_config_prefix + ".port", default_port)); + auto named_collection = created_from_ddl ? getExternalDataSourceConfiguration(config, settings_config_prefix, global_context) : std::nullopt; + if (named_collection) { - std::string host = named_collection->host; - UInt16 port = named_collection->port; - configuration = std::make_unique( - ClickHouseDictionarySource::Configuration{ - .host = host, - .user = named_collection->username, - .password = named_collection->password, - .db = named_collection->database, - .table = named_collection->table, - .query = config.getString(settings_config_prefix + ".query", ""), - .where = config.getString(settings_config_prefix + ".where", ""), - .invalidate_query = config.getString(settings_config_prefix + ".invalidate_query", ""), - .update_field = config.getString(settings_config_prefix + ".update_field", ""), - .update_lag = config.getUInt64(settings_config_prefix + ".update_lag", 1), - .port = port, - .is_local = isLocalAddress({host, port}, default_port), - .secure = config.getBool(settings_config_prefix + ".secure", false) - }); - } - else - { - std::string host = config.getString(settings_config_prefix + ".host", "localhost"); - UInt16 port = static_cast(config.getUInt(settings_config_prefix + ".port", default_port)); - configuration = std::make_unique( - ClickHouseDictionarySource::Configuration{ - .host = host, - .user = config.getString(settings_config_prefix + ".user", "default"), - .password = config.getString(settings_config_prefix + ".password", ""), - .db = config.getString(settings_config_prefix + ".db", default_database), - .table = config.getString(settings_config_prefix + ".table", ""), - .query = config.getString(settings_config_prefix + ".query", ""), - .where = config.getString(settings_config_prefix + ".where", ""), - .invalidate_query = config.getString(settings_config_prefix + ".invalidate_query", ""), - .update_field = config.getString(settings_config_prefix + ".update_field", ""), - .update_lag = config.getUInt64(settings_config_prefix + ".update_lag", 1), - .port = port, - .is_local = isLocalAddress({host, port}, default_port), - .secure = config.getBool(settings_config_prefix + ".secure", false) - }); + host = named_collection->host; + user = named_collection->username; + password = named_collection->password; + db = named_collection->database; + table = named_collection->table; + port = named_collection->port; } + ClickHouseDictionarySource::Configuration configuration{ + .host = host, + .user = user, + .password = password, + .db = db, + .table = table, + .query = config.getString(settings_config_prefix + ".query", ""), + .where = config.getString(settings_config_prefix + ".where", ""), + .invalidate_query = config.getString(settings_config_prefix + ".invalidate_query", ""), + .update_field = config.getString(settings_config_prefix + ".update_field", ""), + .update_lag = config.getUInt64(settings_config_prefix + ".update_lag", 1), + .port = port, + .is_local = isLocalAddress({host, port}, default_port), + .secure = config.getBool(settings_config_prefix + ".secure", false)}; + ContextMutablePtr context; - if (configuration->is_local) + if (configuration.is_local) { /// We should set user info even for the case when the dictionary is loaded in-process (without TCP communication). Session session(global_context, ClientInfo::Interface::LOCAL); - session.authenticate(configuration->user, configuration->password, {}); + session.authenticate(configuration.user, configuration.password, {}); context = session.makeQueryContext(); } else { context = Context::createCopy(global_context); } + context->applySettingsChanges(readSettingsFromDictionaryConfig(config, config_prefix)); String dictionary_name = config.getString(".dictionary.name", ""); String dictionary_database = config.getString(".dictionary.database", ""); - if (dictionary_name == configuration->table && dictionary_database == configuration->db) + if (dictionary_name == configuration.table && dictionary_database == configuration.db) throw Exception(ErrorCodes::BAD_ARGUMENTS, "ClickHouseDictionarySource table cannot be dictionary table"); - return std::make_unique(dict_struct, *configuration, sample_block, context); + return std::make_unique(dict_struct, configuration, sample_block, context); }; factory.registerSource("clickhouse", create_table_source); diff --git a/src/Dictionaries/RangeHashedDictionary.cpp b/src/Dictionaries/RangeHashedDictionary.cpp index 42d6a0c0c03..bc871a8cdcf 100644 --- a/src/Dictionaries/RangeHashedDictionary.cpp +++ b/src/Dictionaries/RangeHashedDictionary.cpp @@ -78,12 +78,14 @@ RangeHashedDictionary::RangeHashedDictionary( const DictionaryStructure & dict_struct_, DictionarySourcePtr source_ptr_, const DictionaryLifetime dict_lifetime_, - bool require_nonempty_) + bool require_nonempty_, + BlockPtr update_field_loaded_block_) : IDictionary(dict_id_) , dict_struct(dict_struct_) , source_ptr{std::move(source_ptr_)} , dict_lifetime(dict_lifetime_) , require_nonempty(require_nonempty_) + , update_field_loaded_block(std::move(update_field_loaded_block_)) { createAttributes(); loadData(); @@ -295,7 +297,6 @@ void RangeHashedDictionary::createAttributes() for (const auto & attribute : dict_struct.attributes) { - attribute_index_by_name.emplace(attribute.name, attributes.size()); attributes.push_back(createAttribute(attribute)); if (attribute.hierarchical) @@ -307,68 +308,21 @@ void RangeHashedDictionary::createAttributes() template void RangeHashedDictionary::loadData() { - QueryPipeline pipeline(source_ptr->loadAll()); - - PullingPipelineExecutor executor(pipeline); - Block block; - while (executor.pull(block)) + if (!source_ptr->hasUpdateField()) { - size_t skip_keys_size_offset = dict_struct.getKeysSize(); + QueryPipeline pipeline(source_ptr->loadAll()); - Columns key_columns; - key_columns.reserve(skip_keys_size_offset); - - /// Split into keys columns and attribute columns - for (size_t i = 0; i < skip_keys_size_offset; ++i) - key_columns.emplace_back(block.safeGetByPosition(i).column); - - DictionaryKeysArenaHolder arena_holder; - DictionaryKeysExtractor keys_extractor(key_columns, arena_holder.getComplexKeyArena()); - const size_t keys_size = keys_extractor.getKeysSize(); - - element_count += keys_size; - - // Support old behaviour, where invalid date means 'open range'. - const bool is_date = isDate(dict_struct.range_min->type); - - const auto & min_range_column = unwrapNullableColumn(*block.safeGetByPosition(skip_keys_size_offset).column); - const auto & max_range_column = unwrapNullableColumn(*block.safeGetByPosition(skip_keys_size_offset + 1).column); - - skip_keys_size_offset += 2; - - for (size_t attribute_index = 0; attribute_index < attributes.size(); ++attribute_index) + PullingPipelineExecutor executor(pipeline); + Block block; + while (executor.pull(block)) { - const auto & attribute_column = *block.safeGetByPosition(attribute_index + skip_keys_size_offset).column; - auto & attribute = attributes[attribute_index]; - - for (size_t key_index = 0; key_index < keys_size; ++key_index) - { - auto key = keys_extractor.extractCurrentKey(); - - RangeStorageType lower_bound; - RangeStorageType upper_bound; - - if (is_date) - { - lower_bound = getColumnIntValueOrDefault(min_range_column, key_index, is_date, 0); - upper_bound = getColumnIntValueOrDefault(max_range_column, key_index, is_date, DATE_LUT_MAX_DAY_NUM + 1); - } - else - { - lower_bound = getColumnIntValueOrDefault(min_range_column, key_index, is_date, RANGE_MIN_NULL_VALUE); - upper_bound = getColumnIntValueOrDefault(max_range_column, key_index, is_date, RANGE_MAX_NULL_VALUE); - } - - if constexpr (std::is_same_v) - key = copyKeyInArena(key); - - setAttributeValue(attribute, key, Range{lower_bound, upper_bound}, attribute_column[key_index]); - keys_extractor.rollbackCurrentKey(); - } - - keys_extractor.reset(); + blockToAttributes(block); } } + else + { + updateData(); + } if (require_nonempty && 0 == element_count) throw Exception(ErrorCodes::DICTIONARY_IS_EMPTY, @@ -497,6 +451,106 @@ void RangeHashedDictionary::getItemsImpl( found_count.fetch_add(keys_found, std::memory_order_relaxed); } +template +void RangeHashedDictionary::updateData() +{ + if (!update_field_loaded_block || update_field_loaded_block->rows() == 0) + { + QueryPipeline pipeline(source_ptr->loadUpdatedAll()); + + PullingPipelineExecutor executor(pipeline); + Block block; + while (executor.pull(block)) + { + /// We are using this to keep saved data if input stream consists of multiple blocks + if (!update_field_loaded_block) + update_field_loaded_block = std::make_shared(block.cloneEmpty()); + + for (size_t attribute_index = 0; attribute_index < block.columns(); ++attribute_index) + { + const IColumn & update_column = *block.getByPosition(attribute_index).column.get(); + MutableColumnPtr saved_column = update_field_loaded_block->getByPosition(attribute_index).column->assumeMutable(); + saved_column->insertRangeFrom(update_column, 0, update_column.size()); + } + } + } + else + { + static constexpr size_t range_columns_size = 2; + + auto pipe = source_ptr->loadUpdatedAll(); + mergeBlockWithPipe( + dict_struct.getKeysSize() + range_columns_size, + *update_field_loaded_block, + std::move(pipe)); + } + + if (update_field_loaded_block) + { + blockToAttributes(*update_field_loaded_block.get()); + } +} + +template +void RangeHashedDictionary::blockToAttributes(const Block & block [[maybe_unused]]) +{ + size_t skip_keys_size_offset = dict_struct.getKeysSize(); + + Columns key_columns; + key_columns.reserve(skip_keys_size_offset); + + /// Split into keys columns and attribute columns + for (size_t i = 0; i < skip_keys_size_offset; ++i) + key_columns.emplace_back(block.safeGetByPosition(i).column); + + DictionaryKeysArenaHolder arena_holder; + DictionaryKeysExtractor keys_extractor(key_columns, arena_holder.getComplexKeyArena()); + const size_t keys_size = keys_extractor.getKeysSize(); + + element_count += keys_size; + + // Support old behaviour, where invalid date means 'open range'. + const bool is_date = isDate(dict_struct.range_min->type); + + const auto & min_range_column = unwrapNullableColumn(*block.safeGetByPosition(skip_keys_size_offset).column); + const auto & max_range_column = unwrapNullableColumn(*block.safeGetByPosition(skip_keys_size_offset + 1).column); + + skip_keys_size_offset += 2; + + for (size_t attribute_index = 0; attribute_index < attributes.size(); ++attribute_index) + { + const auto & attribute_column = *block.safeGetByPosition(attribute_index + skip_keys_size_offset).column; + auto & attribute = attributes[attribute_index]; + + for (size_t key_index = 0; key_index < keys_size; ++key_index) + { + auto key = keys_extractor.extractCurrentKey(); + + RangeStorageType lower_bound; + RangeStorageType upper_bound; + + if (is_date) + { + lower_bound = getColumnIntValueOrDefault(min_range_column, key_index, is_date, 0); + upper_bound = getColumnIntValueOrDefault(max_range_column, key_index, is_date, DATE_LUT_MAX_DAY_NUM + 1); + } + else + { + lower_bound = getColumnIntValueOrDefault(min_range_column, key_index, is_date, RANGE_MIN_NULL_VALUE); + upper_bound = getColumnIntValueOrDefault(max_range_column, key_index, is_date, RANGE_MAX_NULL_VALUE); + } + + if constexpr (std::is_same_v) + key = copyKeyInArena(key); + + setAttributeValue(attribute, key, Range{lower_bound, upper_bound}, attribute_column[key_index]); + keys_extractor.rollbackCurrentKey(); + } + + keys_extractor.reset(); + } +} + template template void RangeHashedDictionary::setAttributeValueImpl(Attribute & attribute, KeyType key, const Range & range, const Field & value) diff --git a/src/Dictionaries/RangeHashedDictionary.h b/src/Dictionaries/RangeHashedDictionary.h index 1ccd9708d79..1605e2bab81 100644 --- a/src/Dictionaries/RangeHashedDictionary.h +++ b/src/Dictionaries/RangeHashedDictionary.h @@ -39,7 +39,8 @@ public: const DictionaryStructure & dict_struct_, DictionarySourcePtr source_ptr_, const DictionaryLifetime dict_lifetime_, - bool require_nonempty_); + bool require_nonempty_, + BlockPtr update_field_loaded_block_ = nullptr); std::string getTypeName() const override { return "RangeHashed"; } @@ -63,7 +64,7 @@ public: std::shared_ptr clone() const override { - return std::make_shared(getDictionaryID(), dict_struct, source_ptr->clone(), dict_lifetime, require_nonempty); + return std::make_shared(getDictionaryID(), dict_struct, source_ptr->clone(), dict_lifetime, require_nonempty, update_field_loaded_block); } const IDictionarySource * getSource() const override { return source_ptr.get(); } @@ -156,6 +157,10 @@ private: ValueSetter && set_value, DefaultValueExtractor & default_value_extractor) const; + void updateData(); + + void blockToAttributes(const Block & block); + template static void setAttributeValueImpl(Attribute & attribute, KeyType key, const Range & range, const Field & value); @@ -185,8 +190,8 @@ private: const DictionarySourcePtr source_ptr; const DictionaryLifetime dict_lifetime; const bool require_nonempty; + BlockPtr update_field_loaded_block; - std::map attribute_index_by_name; std::vector attributes; Arena complex_key_arena; diff --git a/tests/integration/test_dictionaries_update_field/test.py b/tests/integration/test_dictionaries_update_field/test.py index 2e46403c63b..8fb0d67e8b8 100644 --- a/tests/integration/test_dictionaries_update_field/test.py +++ b/tests/integration/test_dictionaries_update_field/test.py @@ -34,7 +34,7 @@ def started_cluster(): @pytest.mark.parametrize("dictionary_name,dictionary_type", [ ("flat_update_field_dictionary", "FLAT"), ("simple_key_hashed_update_field_dictionary", "HASHED"), - ("complex_key_hashed_update_field_dictionary", "HASHED") + ("complex_key_hashed_update_field_dictionary", "COMPLEX_KEY_HASHED") ]) def test_update_field(started_cluster, dictionary_name, dictionary_type): create_dictionary_query = """ From 8e37281a7f51b123bc83b814961332105b913378 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 3 Dec 2021 14:10:47 +0300 Subject: [PATCH 064/262] RangeHashedDictionary fix bytes_allocated with update_field --- src/Dictionaries/RangeHashedDictionary.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Dictionaries/RangeHashedDictionary.cpp b/src/Dictionaries/RangeHashedDictionary.cpp index bc871a8cdcf..7dc955eb8f7 100644 --- a/src/Dictionaries/RangeHashedDictionary.cpp +++ b/src/Dictionaries/RangeHashedDictionary.cpp @@ -355,6 +355,9 @@ void RangeHashedDictionary::calculateBytesAllocated() if constexpr (dictionary_key_type == DictionaryKeyType::Complex) bytes_allocated += complex_key_arena.size(); + + if (update_field_loaded_block) + bytes_allocated += update_field_loaded_block->allocatedBytes(); } template From 4c916a0e3888cf4a958852dc091aebb95d9ca8b7 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 3 Dec 2021 14:31:49 +0300 Subject: [PATCH 065/262] DictionariesLoader qualify name with database fix --- src/Interpreters/ExternalDictionariesLoader.cpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/Interpreters/ExternalDictionariesLoader.cpp b/src/Interpreters/ExternalDictionariesLoader.cpp index e682a98114d..74bff33c914 100644 --- a/src/Interpreters/ExternalDictionariesLoader.cpp +++ b/src/Interpreters/ExternalDictionariesLoader.cpp @@ -95,14 +95,16 @@ QualifiedTableName ExternalDictionariesLoader::qualifyDictionaryNameWithDatabase return qualified_dictionary_name; } - if (qualified_name->database.empty() && has(dictionary_name)) + /// If dictionary was not qualified with database name, try to resolve dictionary as xml dictionary. + if (qualified_name->database.empty() && !has(qualified_name->table)) { - /// This is xml dictionary - return *qualified_name; - } + auto current_database_name = query_context->getCurrentDatabase(); + std::string resolved_name = resolveDictionaryNameFromDatabaseCatalog(dictionary_name, current_database_name); - if (qualified_name->database.empty()) - qualified_name->database = query_context->getCurrentDatabase(); + /// If after qualify dictionary_name with default_database_name we find it, add default_database to qualified name. + if (has(resolved_name)) + qualified_name->database = query_context->getCurrentDatabase(); + } return *qualified_name; } From 898db5b46846622b715c49c26e902ea3c1b17657 Mon Sep 17 00:00:00 2001 From: frank chen Date: Fri, 3 Dec 2021 19:42:46 +0800 Subject: [PATCH 066/262] Resolve review comments Signed-off-by: frank chen --- src/Formats/FormatFactory.cpp | 20 ++++++++++++++++++++ src/Formats/FormatFactory.h | 5 +++++ src/Storages/StorageURL.cpp | 25 +------------------------ 3 files changed, 26 insertions(+), 24 deletions(-) diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 8e490fac301..898012eeaf9 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -302,6 +302,26 @@ OutputFormatPtr FormatFactory::getOutputFormat( return format; } +String FormatFactory::getContentType( + const String & name, + ContextPtr context, + const std::optional & _format_settings) const +{ + const auto & output_getter = getCreators(name).output_creator; + if (!output_getter) + throw Exception(ErrorCodes::FORMAT_IS_NOT_SUITABLE_FOR_OUTPUT, "Format {} is not suitable for output (with processors)", name); + + auto format_settings = _format_settings ? *_format_settings : getFormatSettings(context); + + Block emptyBlock; + RowOutputFormatParams emptyParams; + WriteBufferFromOwnString emptyBuffer; + auto format = output_getter(emptyBuffer, emptyBlock, emptyParams, format_settings); + + return format->getContentType(); +} + + void FormatFactory::registerInputFormat(const String & name, InputCreator input_creator) { auto & target = dict[name].input_creator; diff --git a/src/Formats/FormatFactory.h b/src/Formats/FormatFactory.h index 77ecd2c167f..ea285c47996 100644 --- a/src/Formats/FormatFactory.h +++ b/src/Formats/FormatFactory.h @@ -131,6 +131,11 @@ public: const Block & sample, ContextPtr context, WriteCallback callback = {}, + const std::optional & _format_settings = std::nullopt) const; + + String getContentType( + const String & name, + ContextPtr context, const std::optional & format_settings = std::nullopt) const; void registerFileSegmentationEngine(const String & name, FileSegmentationEngine file_segmentation_engine); diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 2d975aa42f3..fe05d168c31 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -230,30 +230,7 @@ StorageURLSink::StorageURLSink( const String & http_method) : SinkToStorage(sample_block) { - // - // get the content type first - // - // The code here may look a little wired. - // The getContentType() is prodived on IOutputFormat class which relies on a WriteBuffer object, - // and this WriteBuffer object here is WriterBufferFromHTTP itself which accepts the Content-Type header. - // So, this is cyclic dependency. - // To decouple such dependency, we must be able to set header to 'WriteBufferFromHTTP' after we get the instance of output format by calling IOutputFormat::getContentType. - // But this is tricky because the 'WriteBufferFromHTTP' object may have been decorated by 'WriteBufferWithCompression' and is not acceesible due to private modifiers. - // - // So, here we first instantiate an OutputFormat object with a fake stream to get the Content-Type. - // This is not the best way but a more simpler way to understand. - // - std::string content_type; - { - WriteBufferFromOStream buffer(std::cout); - auto output = FormatFactory::instance().getOutputFormat(format, - buffer, - sample_block, - context, - {} /* write callback */, - format_settings); - content_type = output->getContentType(); - } + std::string content_type = FormatFactory::instance().getContentType(format, context, format_settings); write_buf = wrapWriteBufferWithCompressionMethod( std::make_unique(Poco::URI(uri), http_method, content_type, timeouts), From 4f136cb30c89d7e378fb4eb4ad942261c4a8a16a Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 3 Dec 2021 15:37:39 +0300 Subject: [PATCH 067/262] Fix NaN deserialization for Quoted escaping rule --- .../Serializations/SerializationNullable.cpp | 63 +++++++++++++++++-- .../Impl/CustomSeparatedRowInputFormat.cpp | 2 +- .../02130_parse_quoted_null.reference | 12 ++++ .../0_stateless/02130_parse_quoted_null.sh | 56 +++++++++++++++++ 4 files changed, 127 insertions(+), 6 deletions(-) create mode 100644 tests/queries/0_stateless/02130_parse_quoted_null.reference create mode 100755 tests/queries/0_stateless/02130_parse_quoted_null.sh diff --git a/src/DataTypes/Serializations/SerializationNullable.cpp b/src/DataTypes/Serializations/SerializationNullable.cpp index 5e2b31ebb9d..261d0ff3c5d 100644 --- a/src/DataTypes/Serializations/SerializationNullable.cpp +++ b/src/DataTypes/Serializations/SerializationNullable.cpp @@ -394,12 +394,65 @@ template ReturnType SerializationNullable::deserializeTextQuotedImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested) { - return safeDeserialize(column, *nested, - [&istr] + if (istr.eof() || (*istr.position() != 'N' && *istr.position() != 'n')) + { + /// This is not null, surely. + return safeDeserialize(column, *nested, + [] { return false; }, + [&nested, &istr, &settings] (IColumn & nested_column) { nested->deserializeTextQuoted(nested_column, istr, settings); }); + } + + /// Check if we have enough data in buffer to check if it's a null. + if (istr.available() >= 4) + { + auto check_for_null = [&istr]() { - return checkStringByFirstCharacterAndAssertTheRestCaseInsensitive("NULL", istr); - }, - [&nested, &istr, &settings] (IColumn & nested_column) { nested->deserializeTextQuoted(nested_column, istr, settings); }); + auto * pos = istr.position(); + if (checkStringCaseInsensitive("NULL", istr)) + return true; + istr.position() = pos; + return false; + }; + auto deserialize_nested = [&nested, &settings, &istr] (IColumn & nested_column) + { + nested->deserializeTextQuoted(nested_column, istr, settings); + }; + return safeDeserialize(column, *nested, check_for_null, deserialize_nested); + } + + /// We don't have enough data in buffer to check if it's a NULL + /// and we cannot check it just by one symbol (otherwise we won't be able + /// to differentiate for example NULL and NaN for float) + /// Use PeekableReadBuffer to make a checkpoint before checking + /// null and rollback if the check was failed. + PeekableReadBuffer buf(istr, true); + auto check_for_null = [&buf]() + { + buf.setCheckpoint(); + SCOPE_EXIT(buf.dropCheckpoint()); + if (checkStringCaseInsensitive("NULL", buf)) + return true; + + buf.rollbackToCheckpoint(); + return false; + }; + + auto deserialize_nested = [&nested, &settings, &buf] (IColumn & nested_column) + { + nested->deserializeTextQuoted(nested_column, buf, settings); + /// Check that we don't have any unread data in PeekableReadBuffer own memory. + if (likely(!buf.hasUnreadData())) + return; + + /// We have some unread data in PeekableReadBuffer own memory. + /// It can happen only if there is an unquoted string instead of a number. + throw DB::ParsingException( + ErrorCodes::CANNOT_READ_ALL_DATA, + "Error while parsing Nullable: got an unquoted string {} instead of a number", + String(buf.position(), std::min(10ul, buf.available()))); + }; + + return safeDeserialize(column, *nested, check_for_null, deserialize_nested); } diff --git a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp index 6ff9a8cca2c..8cd9d154ae4 100644 --- a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp @@ -205,7 +205,7 @@ void CustomSeparatedRowInputFormat::syncAfterError() bool CustomSeparatedRowInputFormat::parseRowStartWithDiagnosticInfo(WriteBuffer & out) { - return parseDelimiterWithDiagnosticInfo(out, buf, format_settings.custom.row_before_delimiter, "delimiter before first firld", ignore_spaces); + return parseDelimiterWithDiagnosticInfo(out, buf, format_settings.custom.row_before_delimiter, "delimiter before first field", ignore_spaces); } bool CustomSeparatedRowInputFormat::parseFieldDelimiterWithDiagnosticInfo(WriteBuffer & out) diff --git a/tests/queries/0_stateless/02130_parse_quoted_null.reference b/tests/queries/0_stateless/02130_parse_quoted_null.reference new file mode 100644 index 00000000000..1f7989bd2ba --- /dev/null +++ b/tests/queries/0_stateless/02130_parse_quoted_null.reference @@ -0,0 +1,12 @@ +\N 1 +nan 2 +42.42 3 +\N 4 +\N 5 +\N 6 +\N 7 +nan 8 +nan 9 +nan 10 +nan 11 +OK diff --git a/tests/queries/0_stateless/02130_parse_quoted_null.sh b/tests/queries/0_stateless/02130_parse_quoted_null.sh new file mode 100755 index 00000000000..9cb6cb73e6c --- /dev/null +++ b/tests/queries/0_stateless/02130_parse_quoted_null.sh @@ -0,0 +1,56 @@ +#!/usr/bin/env bash +# Tags: no-parallel + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') +DATA_FILE=$USER_FILES_PATH/test_02130.data +SELECT_QUERY="select * from file('test_02130.data', 'CustomSeparated', 'x Nullable(Float64), y Nullable(UInt64)') settings input_format_parallel_parsing=0, format_custom_escaping_rule='Quoted'" + + +$CLICKHOUSE_CLIENT -q "drop table if exists test_02130" +$CLICKHOUSE_CLIENT -q "create table test_02130 (x Nullable(Float64), y Nullable(UInt64)) engine=Memory()" + +echo -e "null\t1" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" + +echo -e "nan\t2" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" + +echo -e "42.42\t3" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" + +echo -e "null\t4" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=1 + +echo -e "null\t5" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=2 + +echo -e "null\t6" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=3 + +echo -e "null\t7" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=4 + +echo -e "nan\t8" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=1 + +echo -e "nan\t9" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=2 + +echo -e "nan\t10" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=3 + +echo -e "nan\t11" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=4 + +echo -e "42\tnan" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=4 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL' + +$CLICKHOUSE_CLIENT -q "select * from test_02130 order by y" +$CLICKHOUSE_CLIENT -q "drop table test_02130" + +rm $DATA_FILE From 7549619b25c8a711cc2e3522c0e6631e0307528f Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 3 Dec 2021 16:25:35 +0300 Subject: [PATCH 068/262] Improve skiping unknown fields with Quoted escaping rule in Template/CustomSeparated formats --- src/Formats/EscapingRuleUtils.cpp | 5 +- src/IO/ReadHelpers.cpp | 92 +++++++++++++++++++ src/IO/ReadHelpers.h | 11 +++ .../Impl/CustomSeparatedRowInputFormat.cpp | 2 +- .../02129_skip_quoted_fields.reference | 26 ++++++ .../0_stateless/02129_skip_quoted_fields.sh | 53 +++++++++++ 6 files changed, 184 insertions(+), 5 deletions(-) create mode 100644 tests/queries/0_stateless/02129_skip_quoted_fields.reference create mode 100755 tests/queries/0_stateless/02129_skip_quoted_fields.sh diff --git a/src/Formats/EscapingRuleUtils.cpp b/src/Formats/EscapingRuleUtils.cpp index 2c2662a6a67..d956d9e6bfb 100644 --- a/src/Formats/EscapingRuleUtils.cpp +++ b/src/Formats/EscapingRuleUtils.cpp @@ -69,10 +69,7 @@ void skipFieldByEscapingRule(ReadBuffer & buf, FormatSettings::EscapingRule esca readEscapedString(tmp, buf); break; case FormatSettings::EscapingRule::Quoted: - /// FIXME: it skips only strings, not numbers, arrays or tuples. - /// we should read until delimiter and skip all data between - /// single quotes. - readQuotedString(tmp, buf); + readQuotedFieldIntoString(tmp, buf); break; case FormatSettings::EscapingRule::CSV: readCSVString(tmp, buf, format_settings.csv); diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp index 675adc43ce6..b0a6838b81e 100644 --- a/src/IO/ReadHelpers.cpp +++ b/src/IO/ReadHelpers.cpp @@ -1212,4 +1212,96 @@ void skipToNextRowOrEof(PeekableReadBuffer & buf, const String & row_after_delim } } + +template +static void readQuotedFieldInBrackets(String & s, ReadBuffer & buf) +{ + assertChar(opening_bracket, buf); + s.push_back(opening_bracket); + + size_t balance = 1; + + while (!buf.eof() && balance) + { + char * next_pos = find_first_symbols<'\'', opening_bracket, closing_bracket>(buf.position(), buf.buffer().end()); + appendToStringOrVector(s, buf, next_pos); + buf.position() = next_pos; + + if (!buf.hasPendingData()) + continue; + + s.push_back(*buf.position()); + + if (*buf.position() == '\'') + { + readQuotedStringInto(s, buf); + s.push_back('\''); + } + else if (*buf.position() == opening_bracket) + { + ++balance; + ++buf.position(); + } + else if (*buf.position() == closing_bracket) + { + --balance; + ++buf.position(); + } + } +} + +void readQuotedFieldIntoString(String & s, ReadBuffer & buf) +{ + s.clear(); + + if (buf.eof()) + return; + + /// Possible values in 'Quoted' field: + /// - Strings: '...' + /// - Arrays: [...] + /// - Tuples: (...) + /// - Maps: {...} + /// - NULL + /// - Number: integer, float, decimal. + + if (*buf.position() == '\'') + readQuotedString(s, buf); + else if (*buf.position() == '[') + readQuotedFieldInBrackets<'[', ']'>(s, buf); + else if (*buf.position() == '(') + readQuotedFieldInBrackets<'(', ')'>(s, buf); + else if (*buf.position() == '{') + readQuotedFieldInBrackets<'{', '}'>(s, buf); + else if (checkCharCaseInsensitive('n', buf)) + { + /// NULL or NaN + if (checkCharCaseInsensitive('u', buf)) + { + assertStringCaseInsensitive("ll", buf); + s.append("NULL"); + } + else + { + assertStringCaseInsensitive("an", buf); + s.append("NaN"); + } + } + else + { + /// It's an integer, float or decimal. They all can be parsed as float. + /// Use PeekableReadBuffer to copy field to string after parsing. + PeekableReadBuffer peekable_buf(buf); + peekable_buf.setCheckpoint(); + Float64 tmp; + readFloatText(tmp, peekable_buf); + peekable_buf.makeContinuousMemoryFromCheckpointToPos(); + auto * end = peekable_buf.position(); + peekable_buf.rollbackToCheckpoint(); + s.append(peekable_buf.position(), end); + peekable_buf.position() = end; + } +} + + } diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index da59fc7973c..c48306cf6d3 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -184,6 +184,15 @@ inline void assertChar(char symbol, ReadBuffer & buf) } } +inline bool checkCharCaseInsensitive(char c, ReadBuffer & buf) +{ + char a; + if (!buf.peek(a) || !equalsCaseInsensitive(a, c)) + return false; + buf.ignore(); + return true; +} + inline void assertString(const String & s, ReadBuffer & buf) { assertString(s.c_str(), buf); @@ -1375,4 +1384,6 @@ struct PcgDeserializer } }; +void readQuotedFieldIntoString(String & s, ReadBuffer & buf); + } diff --git a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp index 6ff9a8cca2c..8cd9d154ae4 100644 --- a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp @@ -205,7 +205,7 @@ void CustomSeparatedRowInputFormat::syncAfterError() bool CustomSeparatedRowInputFormat::parseRowStartWithDiagnosticInfo(WriteBuffer & out) { - return parseDelimiterWithDiagnosticInfo(out, buf, format_settings.custom.row_before_delimiter, "delimiter before first firld", ignore_spaces); + return parseDelimiterWithDiagnosticInfo(out, buf, format_settings.custom.row_before_delimiter, "delimiter before first field", ignore_spaces); } bool CustomSeparatedRowInputFormat::parseFieldDelimiterWithDiagnosticInfo(WriteBuffer & out) diff --git a/tests/queries/0_stateless/02129_skip_quoted_fields.reference b/tests/queries/0_stateless/02129_skip_quoted_fields.reference new file mode 100644 index 00000000000..312f526ca28 --- /dev/null +++ b/tests/queries/0_stateless/02129_skip_quoted_fields.reference @@ -0,0 +1,26 @@ +1 42 +2 42 +3 42 +4 42 +5 42 +6 42 +7 42 +8 42 +9 42 +10 42 +11 42 +12 42 +13 42 +14 42 +15 42 +16 42 +17 42 +18 42 +19 42 +20 42 +21 42 +22 42 +23 42 +24 42 +25 42 +26 42 diff --git a/tests/queries/0_stateless/02129_skip_quoted_fields.sh b/tests/queries/0_stateless/02129_skip_quoted_fields.sh new file mode 100755 index 00000000000..c1baeb5b8f2 --- /dev/null +++ b/tests/queries/0_stateless/02129_skip_quoted_fields.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env bash +# Tags: no-parallel + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "drop table if exists test_02129" +$CLICKHOUSE_CLIENT -q "create table test_02129 (x UInt64, y UInt64) engine=Memory()" + +QUERY="insert into test_02129 format CustomSeparatedWithNames settings input_format_skip_unknown_fields=1, format_custom_escaping_rule='Quoted'" + +# Skip string +echo -e "'x'\t'trash'\t'y'\n1\t'Some string'\t42" | $CLICKHOUSE_CLIENT -q "$QUERY" + +# Skip number +echo -e "'x'\t'trash'\t'y'\n2\t42\t42" | $CLICKHOUSE_CLIENT -q "$QUERY" +echo -e "'x'\t'trash'\t'y'\n3\t4242.4242\t42" | $CLICKHOUSE_CLIENT -q "$QUERY" +echo -e "'x'\t'trash'\t'y'\n4\t-42\t42" | $CLICKHOUSE_CLIENT -q "$QUERY" +echo -e "'x'\t'trash'\t'y'\n5\t+42\t42" | $CLICKHOUSE_CLIENT -q "$QUERY" +echo -e "'x'\t'trash'\t'y'\n6\t-4242.424242\t42" | $CLICKHOUSE_CLIENT -q "$QUERY" +echo -e "'x'\t'trash'\t'y'\n7\t+4242.424242\t42" | $CLICKHOUSE_CLIENT -q "$QUERY" +echo -e "'x'\t'trash'\t'y'\n8\tnan\t42" | $CLICKHOUSE_CLIENT -q "$QUERY" +echo -e "'x'\t'trash'\t'y'\n9\tinf\t42" | $CLICKHOUSE_CLIENT -q "$QUERY" +echo -e "'x'\t'trash'\t'y'\n10\t+nan\t42" | $CLICKHOUSE_CLIENT -q "$QUERY" +echo -e "'x'\t'trash'\t'y'\n11\t+inf\t42" | $CLICKHOUSE_CLIENT -q "$QUERY" +echo -e "'x'\t'trash'\t'y'\n12\t-nan\t42" | $CLICKHOUSE_CLIENT -q "$QUERY" +echo -e "'x'\t'trash'\t'y'\n13\t-inf\t42" | $CLICKHOUSE_CLIENT -q "$QUERY" +echo -e "'x'\t'trash'\t'y'\n14\t44444444444444444444444444.444444444444444444444444\t42" | $CLICKHOUSE_CLIENT -q "$QUERY" +echo -e "'x'\t'trash'\t'y'\n15\t30e30\t42" | $CLICKHOUSE_CLIENT -q "$QUERY" +echo -e "'x'\t'trash'\t'y'\n16\t-30e-30\t42" | $CLICKHOUSE_CLIENT -q "$QUERY" + +# Skip NULL +echo -e "'x'\t'trash'\t'y'\n17\tNULL\t42" | $CLICKHOUSE_CLIENT -q "$QUERY" + +# Skip an array +echo -e "'x'\t'trash'\t'y'\n18\t[1,2,3,4]\t42" | $CLICKHOUSE_CLIENT -q "$QUERY" +echo -e "'x'\t'trash'\t'y'\n19\t['some string ]][[][][]', 'one more string (){}][[{[[[[[[']\t42" | $CLICKHOUSE_CLIENT -q "$QUERY" +echo -e "'x'\t'trash'\t'y'\n20\t[[(1,2), (3,4)], [(5,6), (7,8)]]\t42" | $CLICKHOUSE_CLIENT -q "$QUERY" + +# Skip a tuple +echo -e "'x'\t'trash'\t'y'\n21\t(1,2,3,4)\t42" | $CLICKHOUSE_CLIENT -q "$QUERY" +echo -e "'x'\t'trash'\t'y'\n22\t('some string ()))))(()(())', 'one more string (){}][[{[)))))')\t42" | $CLICKHOUSE_CLIENT -q "$QUERY" +echo -e "'x'\t'trash'\t'y'\n23\t(([1,2], (3,4)), ([5,6], (7,8)))\t42" | $CLICKHOUSE_CLIENT -q "$QUERY" + +# Skip a map +echo -e "'x'\t'trash'\t'y'\n24\t{1:2,2:3,3:4,4:5}\t42" | $CLICKHOUSE_CLIENT -q "$QUERY" +echo -e "'x'\t'trash'\t'y'\n25\t{'some string }}}}}}{{{{':123, 'one more string (){}][[{[{{{{{':123}\t42" | $CLICKHOUSE_CLIENT -q "$QUERY" +echo -e "'x'\t'trash'\t'y'\n26\t{'key':{1:(1,2), 2:(3,4)}, 'foo':{1:(5,6), 2:(7,8)}}\t42" | $CLICKHOUSE_CLIENT -q "$QUERY" + +$CLICKHOUSE_CLIENT -q "select * from test_02129 order by x" +$CLICKHOUSE_CLIENT -q "drop table test_02129" + From c319b6fa32352e07a411bc56c9096d37726de805 Mon Sep 17 00:00:00 2001 From: frank chen Date: Fri, 3 Dec 2021 22:09:04 +0800 Subject: [PATCH 069/262] Fix style Signed-off-by: frank chen --- src/Formats/FormatFactory.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 898012eeaf9..4539a0d6e6a 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -313,10 +313,10 @@ String FormatFactory::getContentType( auto format_settings = _format_settings ? *_format_settings : getFormatSettings(context); - Block emptyBlock; - RowOutputFormatParams emptyParams; - WriteBufferFromOwnString emptyBuffer; - auto format = output_getter(emptyBuffer, emptyBlock, emptyParams, format_settings); + Block empty_block; + RowOutputFormatParams empty_params; + WriteBufferFromOwnString empty_buffer; + auto format = output_getter(empty_buffer, empty_block, empty_params, format_settings); return format->getContentType(); } From 90eba0c0f6f396066ac0eb98b9da2cd5423f99f5 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Fri, 3 Dec 2021 17:45:53 +0300 Subject: [PATCH 070/262] fix uncaught exception in DatabaseLazy --- src/Databases/DatabaseLazy.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/Databases/DatabaseLazy.cpp b/src/Databases/DatabaseLazy.cpp index 4b3e06e318e..1ff84b53eee 100644 --- a/src/Databases/DatabaseLazy.cpp +++ b/src/Databases/DatabaseLazy.cpp @@ -269,6 +269,7 @@ StoragePtr DatabaseLazy::loadTable(const String & table_name) const } void DatabaseLazy::clearExpiredTables() const +try { std::lock_guard lock(mutex); auto time_now = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()); @@ -303,6 +304,10 @@ void DatabaseLazy::clearExpiredTables() const cache_expiration_queue.splice(cache_expiration_queue.begin(), busy_tables, busy_tables.begin(), busy_tables.end()); } +catch (...) +{ + tryLogCurrentException(log, __PRETTY_FUNCTION__); +} DatabaseLazyIterator::DatabaseLazyIterator(const DatabaseLazy & database_, Strings && table_names_) From 5b8f63a6f3b784e951b64f8138e2f1bf9cf1e338 Mon Sep 17 00:00:00 2001 From: Alexey Date: Fri, 3 Dec 2021 19:36:59 +0000 Subject: [PATCH 071/262] fixes --- docs/en/interfaces/grpc.md | 2 +- docs/ru/interfaces/grpc.md | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/en/interfaces/grpc.md b/docs/en/interfaces/grpc.md index eb572629532..21c899ee2fe 100644 --- a/docs/en/interfaces/grpc.md +++ b/docs/en/interfaces/grpc.md @@ -58,7 +58,7 @@ To use the gRPC interface set `grpc_port` in the main [server configuration](../ ## Built-in Client {#grpc-client} -You can write a client in any of the programming languages supported by gRPC using the provided [specification](https://github.com/ClickHouse/ClickHouse/blob/master/src/Server/grpc_protos/clickhouse_grpc.proto) +You can write a client in any of the programming languages supported by gRPC using the provided [specification](https://github.com/ClickHouse/ClickHouse/blob/master/src/Server/grpc_protos/clickhouse_grpc.proto). Or you can use a built-in Python client. It is placed in [utils/grpc-client/clickhouse-grpc-client.py](https://github.com/ClickHouse/ClickHouse/blob/master/utils/grpc-client/clickhouse-grpc-client.py) in the repository. The built-in client requires [grpcio and grpcio-tools](https://grpc.io/docs/languages/python/quickstart) Python modules. The client supports the following arguments: diff --git a/docs/ru/interfaces/grpc.md b/docs/ru/interfaces/grpc.md index 35a59272035..924b9ea11db 100644 --- a/docs/ru/interfaces/grpc.md +++ b/docs/ru/interfaces/grpc.md @@ -10,7 +10,7 @@ toc_title: gRPC интерфейс ClickHouse поддерживает интерфейс [gRPC](https://grpc.io/). Это система удаленного вызова процедур с открытым исходным кодом, которая использует HTTP/2 и [Protocol Buffers](https://ru.wikipedia.org/wiki/Protocol_Buffers). В реализации gRPC в ClickHouse поддерживаются: - SSL; -- аутентификацию; +- аутентификация; - сессии; - сжатие; - параллельные запросы, выполняемые через один канал; @@ -58,24 +58,24 @@ ClickHouse поддерживает интерфейс [gRPC](https://grpc.io/). ## Встроенный клиент {#grpc-client} -Можно написать клиент на любом языке программирования, который поддерживается gRPC с использованием [спецификации](https://github.com/ClickHouse/ClickHouse/blob/master/src/Server/grpc_protos/clickhouse_grpc.proto) +Можно написать клиент на любом языке программирования, который поддерживается gRPC, с использованием [спецификации](https://github.com/ClickHouse/ClickHouse/blob/master/src/Server/grpc_protos/clickhouse_grpc.proto). Также можно воспользоваться встроенным Python клиентом. Он расположен в [utils/grpc-client/clickhouse-grpc-client.py](https://github.com/ClickHouse/ClickHouse/blob/master/utils/grpc-client/clickhouse-grpc-client.py) в репозитории. Для работы встроенного клиента требуются Python модули [grpcio и grpcio-tools](https://grpc.io/docs/languages/python/quickstart). Клиент поддерживает аргументы: - `--help` – вывести справку и завершить работу. - `--host HOST, -h HOST` – имя сервера. Значение по умолчанию: `localhost`. Можно задать адрес IPv4 или IPv6. -- `--port PORT` – номер порта. Этот порт должен быть задан в конфигурации сервера ClickHouse настройкой `grpc_port`. Значение по умолчанию: `9100` +- `--port PORT` – номер порта. Этот порт должен быть задан в конфигурации сервера ClickHouse настройкой `grpc_port`. Значение по умолчанию: `9100`. - `--user USER_NAME, -u USER_NAME` – имя пользователя. Значение по умолчанию: `default`. - `--password PASSWORD` – пароль. Значение по умолчанию: пустая строка. -- `--query QUERY, -q QUERY` – запрос, который нужно выполнить. +- `--query QUERY, -q QUERY` – запрос, который выполнится, когда используется неинтерактивный режим работы. - `--database DATABASE, -d DATABASE` – база данных по умолчанию. Если не указана, то будет использована база данных, заданная в настройках сервера (по умолчанию `default`). - `--format OUTPUT_FORMAT, -f OUTPUT_FORMAT` – [формат](formats.md) вывода результата. Значение по умолчанию для интерактивного режима: `PrettyCompact`. - `--debug` – вывод отладочной информации. Чтобы запустить клиент в интерактивном режиме, не указывайте аргумент `--query`. -В пакетном режиме данные запроса можно передать через `stdin`. +В неинтерактивном режиме данные запроса можно передать через `stdin`. **Пример использования клиента** From 1cc5dd797901dca7350222bbdd332c36facda4e9 Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 3 Dec 2021 23:36:35 +0300 Subject: [PATCH 072/262] Fix --- tests/queries/0_stateless/01054_window_view_proc_tumble_to.sql | 2 +- tests/queries/0_stateless/01055_window_view_proc_hop_to.sql | 2 +- tests/queries/0_stateless/01056_window_view_proc_hop_watch.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/01054_window_view_proc_tumble_to.sql b/tests/queries/0_stateless/01054_window_view_proc_tumble_to.sql index 8ecd93fbf87..f229969603b 100644 --- a/tests/queries/0_stateless/01054_window_view_proc_tumble_to.sql +++ b/tests/queries/0_stateless/01054_window_view_proc_tumble_to.sql @@ -9,7 +9,7 @@ CREATE TABLE dst(count UInt64) Engine=MergeTree ORDER BY tuple(); CREATE TABLE mt(a Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple(); CREATE WINDOW VIEW wv TO dst AS SELECT count(a) AS count FROM mt GROUP BY TUMBLE(timestamp, INTERVAL '1' SECOND, 'US/Samoa') AS wid; -INSERT INTO mt VALUES (1, now() + 1); +INSERT INTO mt VALUES (1, now('US/Samoa') + 1); SELECT sleep(3); SELECT count from dst; diff --git a/tests/queries/0_stateless/01055_window_view_proc_hop_to.sql b/tests/queries/0_stateless/01055_window_view_proc_hop_to.sql index 8e28577f645..b75cc33e741 100644 --- a/tests/queries/0_stateless/01055_window_view_proc_hop_to.sql +++ b/tests/queries/0_stateless/01055_window_view_proc_hop_to.sql @@ -9,7 +9,7 @@ CREATE TABLE dst(count UInt64) Engine=MergeTree ORDER BY tuple(); CREATE TABLE mt(a Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple(); CREATE WINDOW VIEW wv TO dst AS SELECT count(a) AS count FROM mt GROUP BY HOP(timestamp, INTERVAL '1' SECOND, INTERVAL '1' SECOND, 'US/Samoa') AS wid; -INSERT INTO mt VALUES (1, now() + 1); +INSERT INTO mt VALUES (1, now('US/Samoa') + 1); SELECT sleep(3); SELECT count from dst; diff --git a/tests/queries/0_stateless/01056_window_view_proc_hop_watch.py b/tests/queries/0_stateless/01056_window_view_proc_hop_watch.py index 353bd38bc54..df83615d507 100755 --- a/tests/queries/0_stateless/01056_window_view_proc_hop_watch.py +++ b/tests/queries/0_stateless/01056_window_view_proc_hop_watch.py @@ -39,7 +39,7 @@ with client(name='client1>', log=log) as client1, client(name='client2>', log=lo client1.send('WATCH 01056_window_view_proc_hop_watch.wv') client1.expect('Query id' + end_of_block) - client2.send("INSERT INTO 01056_window_view_proc_hop_watch.mt VALUES (1, now() + 1)") + client2.send("INSERT INTO 01056_window_view_proc_hop_watch.mt VALUES (1, now('US/Samoa') + 1)") client1.expect('1' + end_of_block) client1.expect('Progress: 1.00 rows.*\)') From 4709ff934031c99d32194a49a30c9a907068f3d8 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 1 Dec 2021 22:01:05 +0300 Subject: [PATCH 073/262] More generic check for CMAKE_BUILD_TYPE in jemalloc --- contrib/jemalloc-cmake/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/jemalloc-cmake/CMakeLists.txt b/contrib/jemalloc-cmake/CMakeLists.txt index fd52ce4a4f3..fb11879fb21 100644 --- a/contrib/jemalloc-cmake/CMakeLists.txt +++ b/contrib/jemalloc-cmake/CMakeLists.txt @@ -1,6 +1,6 @@ if (SANITIZE OR NOT ( ((OS_LINUX OR OS_FREEBSD) AND (ARCH_AMD64 OR ARCH_ARM OR ARCH_PPC64LE OR ARCH_RISCV64)) OR - (OS_DARWIN AND (CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo" OR CMAKE_BUILD_TYPE STREQUAL "Debug")) + (OS_DARWIN AND (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" OR CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG")) )) if (ENABLE_JEMALLOC) message (${RECONFIGURE_MESSAGE_LEVEL} From 130be9a4f92c7d15af7ca67f0fbdd6f20b3d6225 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 3 Dec 2021 21:39:09 +0300 Subject: [PATCH 074/262] Bump libpqxx to fix assertion in jemalloc Refs: https://github.com/ClickHouse-Extras/libpqxx/pull/5 --- contrib/libpqxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/libpqxx b/contrib/libpqxx index 357608d11b7..63e20f9485b 160000 --- a/contrib/libpqxx +++ b/contrib/libpqxx @@ -1 +1 @@ -Subproject commit 357608d11b7a1961c3fb7db2ef9a5dbb2e87da77 +Subproject commit 63e20f9485b8cbeabf99008123248fc9f033e766 From 22a74dc68df9891756375a290b390208fcd5e9c5 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 3 Dec 2021 21:46:56 +0300 Subject: [PATCH 075/262] Make jemalloc under osx even more reliable - explicitly call zone_register() again - explicitly call malloc(free()) to initialize jemalloc() --- src/Common/new_delete.cpp | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/src/Common/new_delete.cpp b/src/Common/new_delete.cpp index fa32d56b350..27db87809d3 100644 --- a/src/Common/new_delete.cpp +++ b/src/Common/new_delete.cpp @@ -1,6 +1,44 @@ #include #include +#if defined(OS_DARWIN) && (USE_JEMALLOC) +/// In case of OSX jemalloc register itself as a default zone allocator. +/// +/// Sure jemalloc will register itself, since zone_register() declared with +/// constructor attribute (since zone_register is also forbidden from +/// optimizing out), however those constructors will be called before +/// constructors for global variable initializers (__cxx_global_var_init()). +/// +/// So to make jemalloc under OSX more stable, we will call it explicitly from +/// global variable initializers so that each allocation will use it. +/// (NOTE: It is ok to call it twice, since zone_register() is a no-op if the +/// default zone is already replaced with something.) +/// +/// Refs: https://github.com/jemalloc/jemalloc/issues/708 + +extern "C" +{ + extern void zone_register(); +} + +static struct InitializeJemallocZoneAllocatorForOSX +{ + InitializeJemallocZoneAllocatorForOSX() + { + zone_register(); + /// jemalloc() initializes itself only on malloc() + /// and so if some global initializer will have free(nullptr) + /// jemalloc may trigger some internal assertion. + /// + /// To prevent this, we explicitly call malloc(free()) here. + if (void * ptr = malloc(0)) + { + free(ptr); + } + } +} initializeJemallocZoneAllocatorForOSX; +#endif + /// Replace default new/delete with memory tracking versions. /// @sa https://en.cppreference.com/w/cpp/memory/new/operator_new From abab7108e46f81192e326bd3927abf1b23b65bb0 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 4 Dec 2021 00:15:52 +0300 Subject: [PATCH 076/262] Fix QueryProfiler building under osx Fixes: #32165 --- src/Common/QueryProfiler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/QueryProfiler.cpp b/src/Common/QueryProfiler.cpp index 12410de6bf0..0b2cd602b38 100644 --- a/src/Common/QueryProfiler.cpp +++ b/src/Common/QueryProfiler.cpp @@ -25,13 +25,13 @@ namespace { #if defined(OS_LINUX) thread_local size_t write_trace_iteration = 0; +#endif /// Even after timer_delete() the signal can be delivered, /// since it does not do anything with pending signals. /// /// And so to overcome this flag is exists, /// to ignore delivered signals after timer_delete(). thread_local bool signal_handler_disarmed = true; -#endif void writeTraceInfo(TraceType trace_type, int /* sig */, siginfo_t * info, void * context) { From c522c06755bfa280b14bab4ab9c3f0a0f3d227f5 Mon Sep 17 00:00:00 2001 From: vxider Date: Sat, 4 Dec 2021 12:30:04 +0000 Subject: [PATCH 077/262] fix windowview parser --- src/Functions/FunctionsWindow.cpp | 28 +++++++++++++------ src/Storages/WindowView/StorageWindowView.cpp | 19 +++++++------ src/Storages/WindowView/StorageWindowView.h | 1 + .../01050_window_view_parser_tumble.reference | 1 + .../01050_window_view_parser_tumble.sql | 6 +++- .../01051_window_view_parser_hop.reference | 1 + .../01051_window_view_parser_hop.sql | 4 +++ 7 files changed, 43 insertions(+), 17 deletions(-) diff --git a/src/Functions/FunctionsWindow.cpp b/src/Functions/FunctionsWindow.cpp index 2ed5e9863d6..a26faac304d 100644 --- a/src/Functions/FunctionsWindow.cpp +++ b/src/Functions/FunctionsWindow.cpp @@ -238,12 +238,18 @@ struct WindowImpl [[maybe_unused]] static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name) { - const auto which_type = WhichDataType(arguments[0].type); + const auto & time_column = arguments[0]; + const auto which_type = WhichDataType(time_column.type); ColumnPtr result_column; - if (which_type.isDateTime()) - result_column= WindowImpl::dispatchForColumns(arguments, function_name); + if (arguments.size() == 1) + { + if (which_type.isUInt32()) + return time_column.column; + else //isTuple + result_column = time_column.column; + } else - result_column = arguments[0].column; + result_column = WindowImpl::dispatchForColumns(arguments, function_name); return executeWindowBound(result_column, 0, function_name); } }; @@ -260,12 +266,18 @@ struct WindowImpl [[maybe_unused]] static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String& function_name) { - const auto which_type = WhichDataType(arguments[0].type); + const auto & time_column = arguments[0]; + const auto which_type = WhichDataType(time_column.type); ColumnPtr result_column; - if (which_type.isDateTime()) - result_column = WindowImpl::dispatchForColumns(arguments, function_name); + if (arguments.size() == 1) + { + if (which_type.isUInt32()) + return time_column.column; + else //isTuple + result_column = time_column.column; + } else - result_column = arguments[0].column; + result_column = WindowImpl::dispatchForColumns(arguments, function_name); return executeWindowBound(result_column, 1, function_name); } }; diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 79ba2568de4..f51f5ddab08 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -89,22 +89,23 @@ namespace { data.is_tumble = t->name == "TUMBLE"; data.is_hop = t->name == "HOP"; + auto temp_node = t->clone(); + temp_node->setAlias(""); if (!data.window_function) { + data.serialized_window_function = serializeAST(*temp_node); t->name = "WINDOW_ID"; data.window_id_name = t->getColumnName(); data.window_id_alias = t->alias; data.window_function = t->clone(); data.window_function->setAlias(""); - data.serialized_window_function = serializeAST(*data.window_function); data.timestamp_column_name = t->arguments->children[0]->getColumnName(); } else { - auto temp_node = t->clone(); - temp_node->setAlias(""); if (serializeAST(*temp_node) != data.serialized_window_function) throw Exception("WINDOW VIEW only support ONE WINDOW FUNCTION", ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_WINDOW_VIEW); + t->name = "WINDOW_ID"; } } } @@ -146,7 +147,7 @@ namespace void visit(ASTFunction & node, ASTPtr & node_ptr) { - if (node.name == "WINDOW_ID") + if (node.name == "WINDOW_ID" || node.name == "TUMBLE" || node.name == "HOP") { if (const auto * t = node.arguments->children[0]->as(); t && t->name == "now") @@ -938,10 +939,12 @@ StorageWindowView::StorageWindowView( ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_WINDOW_VIEW, "UNION is not supported for {}", getName()); - ASTSelectQuery & select_query = typeid_cast(*query.select->list_of_selects->children.at(0)); + ASTSelectQuery & select_query_ = typeid_cast(*query.select->list_of_selects->children.at(0)); String select_database_name = getContext()->getCurrentDatabase(); String select_table_name; - extractDependentTable(getContext(), select_query, select_database_name, select_table_name); + extractDependentTable(getContext(), select_query_, select_database_name, select_table_name); + + select_query = select_query_.clone(); /// If the table is not specified - use the table `system.one` if (select_table_name.empty()) @@ -953,7 +956,7 @@ StorageWindowView::StorageWindowView( DatabaseCatalog::instance().addDependency(select_table_id, table_id_); /// Extract all info from query; substitute Function_TUMPLE and Function_HOP with Function_WINDOW_ID. - auto inner_query = innerQueryParser(select_query); + auto inner_query = innerQueryParser(select_query_); // Parse mergeable query mergeable_query = inner_query->clone(); @@ -1344,7 +1347,7 @@ Block & StorageWindowView::getHeader() const if (!sample_block) { sample_block = InterpreterSelectQuery( - getFinalQuery(), window_view_context, getParentStorage(), nullptr, + select_query->clone(), window_view_context, getParentStorage(), nullptr, SelectQueryOptions(QueryProcessingStage::Complete)).getSampleBlock(); for (size_t i = 0; i < sample_block.columns(); ++i) diff --git a/src/Storages/WindowView/StorageWindowView.h b/src/Storages/WindowView/StorageWindowView.h index 893647add79..e989663c7e5 100644 --- a/src/Storages/WindowView/StorageWindowView.h +++ b/src/Storages/WindowView/StorageWindowView.h @@ -150,6 +150,7 @@ public: private: Poco::Logger * log; + ASTPtr select_query; ASTPtr mergeable_query; ASTPtr final_query; diff --git a/tests/queries/0_stateless/01050_window_view_parser_tumble.reference b/tests/queries/0_stateless/01050_window_view_parser_tumble.reference index 75cd8e28af5..6375c151906 100644 --- a/tests/queries/0_stateless/01050_window_view_parser_tumble.reference +++ b/tests/queries/0_stateless/01050_window_view_parser_tumble.reference @@ -4,3 +4,4 @@ ---WITH--- ---WHERE--- ---ORDER_BY--- +---With now--- diff --git a/tests/queries/0_stateless/01050_window_view_parser_tumble.sql b/tests/queries/0_stateless/01050_window_view_parser_tumble.sql index 12f67a68237..6837036263c 100644 --- a/tests/queries/0_stateless/01050_window_view_parser_tumble.sql +++ b/tests/queries/0_stateless/01050_window_view_parser_tumble.sql @@ -10,7 +10,7 @@ CREATE WINDOW VIEW wv WATERMARK=INTERVAL '1' SECOND AS SELECT count(a), TUMBLE_S SELECT '---With w_end---'; DROP TABLE IF EXISTS wv NO DELAY; -CREATE WINDOW VIEW wv AS SELECT count(a), TUMBLE_START(wid) AS w_start, TUMBLE_END(wid) AS w_end FROM mt GROUP BY TUMBLE(timestamp, INTERVAL '3' SECOND) AS wid; +CREATE WINDOW VIEW wv AS SELECT count(a), TUMBLE_START(TUMBLE(timestamp, INTERVAL '3' SECOND)) AS w_start, TUMBLE_END(wid) AS w_end FROM mt GROUP BY TUMBLE(timestamp, INTERVAL '3' SECOND) AS wid; SELECT '---WithOut w_end---'; DROP TABLE IF EXISTS wv NO DELAY; @@ -27,3 +27,7 @@ CREATE WINDOW VIEW wv AS SELECT count(a), TUMBLE_START(wid) AS w_start FROM mt W SELECT '---ORDER_BY---'; DROP TABLE IF EXISTS wv NO DELAY; CREATE WINDOW VIEW wv AS SELECT count(a), TUMBLE_START(wid) AS w_start FROM mt WHERE a != 1 GROUP BY TUMBLE(timestamp, INTERVAL '3' SECOND) AS wid ORDER BY w_start; + +SELECT '---With now---'; +DROP TABLE IF EXISTS wv NO DELAY; +CREATE WINDOW VIEW wv AS SELECT count(a), TUMBLE_START(wid) AS w_start, TUMBLE_END(TUMBLE(now(), INTERVAL '3' SECOND)) AS w_end FROM mt GROUP BY TUMBLE(now(), INTERVAL '3' SECOND) AS wid; diff --git a/tests/queries/0_stateless/01051_window_view_parser_hop.reference b/tests/queries/0_stateless/01051_window_view_parser_hop.reference index 75cd8e28af5..6375c151906 100644 --- a/tests/queries/0_stateless/01051_window_view_parser_hop.reference +++ b/tests/queries/0_stateless/01051_window_view_parser_hop.reference @@ -4,3 +4,4 @@ ---WITH--- ---WHERE--- ---ORDER_BY--- +---With now--- diff --git a/tests/queries/0_stateless/01051_window_view_parser_hop.sql b/tests/queries/0_stateless/01051_window_view_parser_hop.sql index 3c1e3d16320..df0729108d0 100644 --- a/tests/queries/0_stateless/01051_window_view_parser_hop.sql +++ b/tests/queries/0_stateless/01051_window_view_parser_hop.sql @@ -27,3 +27,7 @@ CREATE WINDOW VIEW wv AS SELECT count(a), HOP_START(wid) AS w_start FROM mt WHER SELECT '---ORDER_BY---'; DROP TABLE IF EXISTS wv NO DELAY; CREATE WINDOW VIEW wv AS SELECT count(a), HOP_START(wid) AS w_start FROM mt WHERE a != 1 GROUP BY HOP(timestamp, INTERVAL '3' SECOND, INTERVAL '5' SECOND) AS wid ORDER BY w_start; + +SELECT '---With now---'; +DROP TABLE IF EXISTS wv NO DELAY; +CREATE WINDOW VIEW wv AS SELECT count(a), HOP_START(wid) AS w_start, HOP_END(HOP(now(), INTERVAL '1' SECOND, INTERVAL '3' SECOND)) as w_end FROM mt GROUP BY HOP(now(), INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; From 55d50c0b4d939fb4007b4006a0faba312bc40d45 Mon Sep 17 00:00:00 2001 From: frank chen Date: Sat, 4 Dec 2021 21:56:52 +0800 Subject: [PATCH 078/262] Improve span operation name Signed-off-by: frank chen --- src/Interpreters/executeQuery.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 2ab4167176f..37a0d87a120 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -626,7 +626,13 @@ static std::tuple executeQueryImpl( } { - OpenTelemetrySpanHolder span("IInterpreter::execute()"); + std::unique_ptr span; + if (context->query_trace_context.trace_id != UUID()) + { + auto raw_interpreter_ptr = interpreter.get(); + std::string class_name(abi::__cxa_demangle(typeid(*raw_interpreter_ptr).name(), nullptr, nullptr, nullptr)); + span = std::make_unique(class_name + "::execute()"); + } res = interpreter->execute(); } From 78ceb5c8839de811c57be383a8a6612692bf95d9 Mon Sep 17 00:00:00 2001 From: vxider Date: Sat, 4 Dec 2021 14:18:10 +0000 Subject: [PATCH 079/262] add illegal arg check --- src/Storages/WindowView/StorageWindowView.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index f51f5ddab08..94f88842cbb 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -91,6 +91,10 @@ namespace data.is_hop = t->name == "HOP"; auto temp_node = t->clone(); temp_node->setAlias(""); + if (startsWith(t->arguments->children[0]->getColumnName(), "toDateTime")) + throw Exception( + "The first argument of window function should not be a constant value.", + ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_WINDOW_VIEW); if (!data.window_function) { data.serialized_window_function = serializeAST(*temp_node); From 15e3dbe3f210e5478825de8c997d3513c3ad890f Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Tue, 30 Nov 2021 14:43:30 +0300 Subject: [PATCH 080/262] Fix skipping columns in Nested while writing protobuf. --- src/Formats/ProtobufSerializer.cpp | 132 +++++++++++------- ..._format_skipped_column_in_nested.reference | 27 ++++ ...rotobuf_format_skipped_column_in_nested.sh | 55 ++++++++ ...obuf_format_skipped_column_in_nested.proto | 29 ++++ 4 files changed, 194 insertions(+), 49 deletions(-) create mode 100644 tests/queries/0_stateless/00825_protobuf_format_skipped_column_in_nested.reference create mode 100755 tests/queries/0_stateless/00825_protobuf_format_skipped_column_in_nested.sh create mode 100644 tests/queries/0_stateless/format_schemas/00825_protobuf_format_skipped_column_in_nested.proto diff --git a/src/Formats/ProtobufSerializer.cpp b/src/Formats/ProtobufSerializer.cpp index ac89203c6e0..94a385aa067 100644 --- a/src/Formats/ProtobufSerializer.cpp +++ b/src/Formats/ProtobufSerializer.cpp @@ -2062,7 +2062,7 @@ namespace }; ProtobufSerializerMessage( - std::vector field_descs_, + std::vector && field_descs_, const FieldDescriptor * parent_field_descriptor_, bool with_length_delimiter_, const ProtobufReaderOrWriter & reader_or_writer_) @@ -2091,8 +2091,10 @@ namespace for (const FieldInfo & info : field_infos) { field_columns.clear(); + field_columns.reserve(info.column_indices.size()); for (size_t column_index : info.column_indices) { + assert(column_index < num_columns_); field_columns.emplace_back(columns_[column_index]); } info.field_serializer->setColumns(field_columns.data(), field_columns.size()); @@ -2103,11 +2105,9 @@ namespace missing_column_indices.resize(num_columns_); for (size_t column_index : collections::range(num_columns_)) missing_column_indices[column_index] = column_index; - for (const FieldInfo & info : field_infos) - { - for (size_t column_index : info.column_indices) + for (const auto & field_info : field_infos) + for (size_t column_index : field_info.column_indices) missing_column_indices[column_index] = static_cast(-1); - } boost::range::remove_erase(missing_column_indices, static_cast(-1)); } } @@ -2195,6 +2195,7 @@ namespace reader->endNestedMessage(); else reader->endMessage(false); + addDefaultsToMissingColumns(row_num); } @@ -2229,9 +2230,9 @@ namespace void addDefaultsToMissingColumns(size_t row_num) { - for (size_t column_idx : missing_column_indices) + for (size_t column_index : missing_column_indices) { - auto & column = columns[column_idx]; + auto & column = columns[column_index]; size_t old_size = column->size(); if (row_num >= old_size) column->assumeMutableRef().insertDefault(); @@ -2241,7 +2242,7 @@ namespace struct FieldInfo { FieldInfo( - std::vector column_indices_, + std::vector && column_indices_, const FieldDescriptor & field_descriptor_, std::unique_ptr field_serializer_) : column_indices(std::move(column_indices_)) @@ -2277,8 +2278,8 @@ namespace class ProtobufSerializerTupleAsNestedMessage : public ProtobufSerializer { public: - explicit ProtobufSerializerTupleAsNestedMessage(std::unique_ptr nested_message_serializer_) - : nested_message_serializer(std::move(nested_message_serializer_)) + explicit ProtobufSerializerTupleAsNestedMessage(std::unique_ptr message_serializer_) + : message_serializer(std::move(message_serializer_)) { } @@ -2292,7 +2293,7 @@ namespace element_columns.reserve(tuple_size); for (size_t i : collections::range(tuple_size)) element_columns.emplace_back(column_tuple.getColumnPtr(i)); - nested_message_serializer->setColumns(element_columns.data(), element_columns.size()); + message_serializer->setColumns(element_columns.data(), element_columns.size()); } void setColumns(const MutableColumnPtr * columns, [[maybe_unused]] size_t num_columns) override @@ -2302,12 +2303,12 @@ namespace setColumns(&column0, 1); } - void writeRow(size_t row_num) override { nested_message_serializer->writeRow(row_num); } - void readRow(size_t row_num) override { nested_message_serializer->readRow(row_num); } - void insertDefaults(size_t row_num) override { nested_message_serializer->insertDefaults(row_num); } + void writeRow(size_t row_num) override { message_serializer->writeRow(row_num); } + void readRow(size_t row_num) override { message_serializer->readRow(row_num); } + void insertDefaults(size_t row_num) override { message_serializer->insertDefaults(row_num); } private: - const std::unique_ptr nested_message_serializer; + const std::unique_ptr message_serializer; }; @@ -2317,8 +2318,8 @@ namespace { public: explicit ProtobufSerializerFlattenedNestedAsArrayOfNestedMessages( - std::unique_ptr nested_message_serializer_) - : nested_message_serializer(std::move(nested_message_serializer_)) + std::unique_ptr message_serializer_) + : message_serializer(std::move(message_serializer_)) { } @@ -2340,7 +2341,7 @@ namespace std::sort(offset_columns.begin(), offset_columns.end()); offset_columns.erase(std::unique(offset_columns.begin(), offset_columns.end()), offset_columns.end()); - nested_message_serializer->setColumns(data_columns.data(), data_columns.size()); + message_serializer->setColumns(data_columns.data(), data_columns.size()); } void setColumns(const MutableColumnPtr * columns, size_t num_columns) override @@ -2364,7 +2365,7 @@ namespace throw Exception("Components of FlattenedNested have different sizes", ErrorCodes::PROTOBUF_BAD_CAST); } for (size_t i : collections::range(start_offset, end_offset)) - nested_message_serializer->writeRow(i); + message_serializer->writeRow(i); } void readRow(size_t row_num) override @@ -2377,7 +2378,7 @@ namespace try { - nested_message_serializer->readRow(old_data_size); + message_serializer->readRow(old_data_size); size_t data_size = data_columns[0]->size(); if (data_size != old_data_size + 1) throw Exception("Unexpected number of elements of ColumnArray has been read", ErrorCodes::LOGICAL_ERROR); @@ -2433,7 +2434,7 @@ namespace } private: - const std::unique_ptr nested_message_serializer; + const std::unique_ptr message_serializer; Columns data_columns; Columns offset_columns; }; @@ -2445,7 +2446,7 @@ namespace public: explicit ProtobufSerializerBuilder(const ProtobufReaderOrWriter & reader_or_writer_) : reader_or_writer(reader_or_writer_) {} - std::unique_ptr buildMessageSerializer( + std::unique_ptr buildMessageSerializer( const Strings & column_names, const DataTypes & data_types, std::vector & missing_column_indices, @@ -2453,16 +2454,17 @@ namespace bool with_length_delimiter) { std::vector used_column_indices; - auto serializer = buildMessageSerializerImpl( + auto message_serializer = buildMessageSerializerImpl( /* num_columns = */ column_names.size(), column_names.data(), data_types.data(), - used_column_indices, message_descriptor, with_length_delimiter, - /* parent_field_descriptor = */ nullptr); + /* parent_field_descriptor = */ nullptr, + used_column_indices, + /* columns_are_reordered_outside = */ false); - if (!serializer) + if (!message_serializer) { throw Exception( "Not found matches between the names of the columns {" + boost::algorithm::join(column_names, ", ") @@ -2473,10 +2475,12 @@ namespace missing_column_indices.clear(); missing_column_indices.reserve(column_names.size() - used_column_indices.size()); - boost::range::set_difference(collections::range(column_names.size()), used_column_indices, + auto used_column_indices_sorted = std::move(used_column_indices); + std::sort(used_column_indices_sorted.begin(), used_column_indices_sorted.end()); + boost::range::set_difference(collections::range(column_names.size()), used_column_indices_sorted, std::back_inserter(missing_column_indices)); - return serializer; + return message_serializer; } private: @@ -2621,24 +2625,38 @@ namespace } /// Builds a serializer for a protobuf message (root or nested). + /// + /// Some of the passed columns might be skipped, the function sets `used_column_indices` to + /// the list of those columns which match any fields in the protobuf message. + /// + /// Normally `columns_are_reordered_outside` should be false - if it's false it means that + /// the used column indices will be passed to ProtobufSerializerMessage, which will write/read + /// only those columns and set the rest of columns by default. + /// Set `columns_are_reordered_outside` to true if you're going to reorder columns + /// according to `used_column_indices` returned and pass to + /// ProtobufSerializerMessage::setColumns() only the columns which are actually used. template std::unique_ptr buildMessageSerializerImpl( size_t num_columns, const StringOrStringViewT * column_names, const DataTypePtr * data_types, - std::vector & used_column_indices, const MessageDescriptor & message_descriptor, bool with_length_delimiter, - const FieldDescriptor * parent_field_descriptor) + const FieldDescriptor * parent_field_descriptor, + std::vector & used_column_indices, + bool columns_are_reordered_outside) { std::vector field_descs; boost::container::flat_map field_descriptors_in_use; used_column_indices.clear(); used_column_indices.reserve(num_columns); + boost::container::flat_set used_column_indices_sorted; + used_column_indices_sorted.reserve(num_columns); + size_t sequential_column_index = 0; auto add_field_serializer = [&](const std::string_view & column_name_, - std::vector column_indices_, + std::vector && column_indices_, const FieldDescriptor & field_descriptor_, std::unique_ptr field_serializer_) { @@ -2652,12 +2670,17 @@ namespace ErrorCodes::MULTIPLE_COLUMNS_SERIALIZED_TO_SAME_PROTOBUF_FIELD); } - for (size_t column_index : column_indices_) + used_column_indices.insert(used_column_indices.end(), column_indices_.begin(), column_indices_.end()); + used_column_indices_sorted.insert(column_indices_.begin(), column_indices_.end()); + + auto column_indices_to_pass_to_message_serializer = std::move(column_indices_); + if (columns_are_reordered_outside) { - /// Keep `used_column_indices` sorted. - used_column_indices.insert(boost::range::upper_bound(used_column_indices, column_index), column_index); + for (auto & index : column_indices_to_pass_to_message_serializer) + index = sequential_column_index++; } - field_descs.push_back({std::move(column_indices_), &field_descriptor_, std::move(field_serializer_)}); + + field_descs.push_back({std::move(column_indices_to_pass_to_message_serializer), &field_descriptor_, std::move(field_serializer_)}); field_descriptors_in_use.emplace(&field_descriptor_, column_name_); }; @@ -2666,7 +2689,7 @@ namespace /// We're going through all the passed columns. for (size_t column_idx : collections::range(num_columns)) { - if (boost::range::binary_search(used_column_indices, column_idx)) + if (used_column_indices_sorted.count(column_idx)) continue; const auto & column_name = column_names[column_idx]; @@ -2702,7 +2725,7 @@ namespace for (size_t j : collections::range(column_idx + 1, num_columns)) { - if (boost::range::binary_search(used_column_indices, j)) + if (used_column_indices_sorted.count(j)) continue; std::string_view other_suffix; if (!columnNameStartsWithFieldName(column_names[j], *field_descriptor, other_suffix)) @@ -2740,10 +2763,15 @@ namespace nested_column_names.size(), nested_column_names.data(), nested_data_types.data(), - used_column_indices_in_nested, *field_descriptor->message_type(), - false, - field_descriptor); + /* with_length_delimiter = */ false, + field_descriptor, + used_column_indices_in_nested, + /* columns_are_reordered_outside = */ true); + + /// `columns_are_reordered_outside` is true because column indices are + /// going to be transformed and then written to the outer message, + /// see add_field_serializer() below. if (nested_message_serializer) { @@ -2774,10 +2802,15 @@ namespace nested_column_names.size(), nested_column_names.data(), nested_data_types.data(), - used_column_indices_in_nested, *field_descriptor->message_type(), - false, - field_descriptor); + /* with_length_delimiter = */ false, + field_descriptor, + used_column_indices_in_nested, + /* columns_are_reordered_outside = */ true); + + /// `columns_are_reordered_outside` is true because column indices are + /// going to be transformed and then written to the outer message, + /// see add_field_serializer() below. if (nested_message_serializer) { @@ -2907,16 +2940,17 @@ namespace { /// Try to serialize as a nested message. std::vector used_column_indices; - auto nested_message_serializer = buildMessageSerializerImpl( + auto message_serializer = buildMessageSerializerImpl( size_of_tuple, tuple_data_type.getElementNames().data(), tuple_data_type.getElements().data(), - used_column_indices, *field_descriptor.message_type(), - false, - &field_descriptor); + /* with_length_delimiter = */ false, + &field_descriptor, + used_column_indices, + /* columns_are_reordered_outside = */ false); - if (!nested_message_serializer) + if (!message_serializer) { throw Exception( "Not found matches between the names of the tuple's elements {" @@ -2926,7 +2960,7 @@ namespace ErrorCodes::NO_COLUMNS_SERIALIZED_TO_PROTOBUF_FIELDS); } - return std::make_unique(std::move(nested_message_serializer)); + return std::make_unique(std::move(message_serializer)); } /// Serialize as a repeated field. diff --git a/tests/queries/0_stateless/00825_protobuf_format_skipped_column_in_nested.reference b/tests/queries/0_stateless/00825_protobuf_format_skipped_column_in_nested.reference new file mode 100644 index 00000000000..1a80e6401db --- /dev/null +++ b/tests/queries/0_stateless/00825_protobuf_format_skipped_column_in_nested.reference @@ -0,0 +1,27 @@ +e4048ead-30a2-45e5-90be-2af1c7137523 dummy [1] [50639] [58114] [[5393]] [[1]] [[3411]] [[17811]] [[(10,20)]] + +Binary representation: +00000000 44 0a 24 65 34 30 34 38 65 61 64 2d 33 30 61 32 |D.$e4048ead-30a2| +00000010 2d 34 35 65 35 2d 39 30 62 65 2d 32 61 66 31 63 |-45e5-90be-2af1c| +00000020 37 31 33 37 35 32 33 62 1c 10 01 18 cf 8b 03 20 |7137523b....... | +00000030 82 c6 03 5a 10 28 01 30 91 2a 40 93 8b 01 52 05 |...Z.(.0.*@...R.| +00000040 4d 00 00 a0 41 |M...A| +00000045 + +MESSAGE #1 AT 0x00000001 +identifier: "e4048ead-30a2-45e5-90be-2af1c7137523" +modules { + module_id: 1 + supply: 50639 + temp: 58114 + nodes { + node_id: 1 + opening_time: 5393 + current: 17811 + coords { + y: 20 + } + } +} + +Binary representation is as expected diff --git a/tests/queries/0_stateless/00825_protobuf_format_skipped_column_in_nested.sh b/tests/queries/0_stateless/00825_protobuf_format_skipped_column_in_nested.sh new file mode 100755 index 00000000000..b413385fb77 --- /dev/null +++ b/tests/queries/0_stateless/00825_protobuf_format_skipped_column_in_nested.sh @@ -0,0 +1,55 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +# https://github.com/ClickHouse/ClickHouse/issues/31160 + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +SCHEMADIR=$CURDIR/format_schemas +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +set -eo pipefail + +# Run the client. +$CLICKHOUSE_CLIENT --multiquery < "$BINARY_FILE_PATH" + +# Check the output in the protobuf format +echo +$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$SCHEMADIR/00825_protobuf_format_skipped_column_in_nested:UpdateMessage" --input "$BINARY_FILE_PATH" + +# Check the input in the protobuf format (now the table contains the same data twice). +#echo +#$CLICKHOUSE_CLIENT --query "INSERT INTO table_skipped_column_in_nested_00825 FORMAT Protobuf SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_skipped_column_in_nested:UpdateMessage'" < "$BINARY_FILE_PATH" +#$CLICKHOUSE_CLIENT --query "SELECT * FROM table_skipped_column_in_nested_00825" + +rm "$BINARY_FILE_PATH" +$CLICKHOUSE_CLIENT --query "DROP TABLE table_skipped_column_in_nested_00825" diff --git a/tests/queries/0_stateless/format_schemas/00825_protobuf_format_skipped_column_in_nested.proto b/tests/queries/0_stateless/format_schemas/00825_protobuf_format_skipped_column_in_nested.proto new file mode 100644 index 00000000000..054de349e24 --- /dev/null +++ b/tests/queries/0_stateless/format_schemas/00825_protobuf_format_skipped_column_in_nested.proto @@ -0,0 +1,29 @@ +syntax = "proto3"; + +message UpdateMessage { + string identifier = 1; + //string unused1 = 100; + + message Module { + uint32 module_id = 2; + uint32 supply = 3; + uint32 temp = 4; + + message ModuleNode { + uint32 node_id = 5; + uint32 opening_time = 6; + uint32 closing_time = 7; // The column in the table is named `closing_time_time` + uint32 current = 8; + + message Coords { + //float x = 8; + float y = 9; + } + Coords coords = 10; + } + + repeated ModuleNode nodes = 11; + } + + repeated Module modules = 12; +} From 2e0b4800440e76a9877ce6f8871b904d01c421aa Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Wed, 1 Dec 2021 21:19:47 +0300 Subject: [PATCH 081/262] Improve error handling while serializing protobufs. --- src/Formats/ProtobufSerializer.cpp | 494 ++++++++++++++++++++--------- src/Formats/ProtobufSerializer.h | 4 +- 2 files changed, 351 insertions(+), 147 deletions(-) diff --git a/src/Formats/ProtobufSerializer.cpp b/src/Formats/ProtobufSerializer.cpp index 94a385aa067..efe01740cf6 100644 --- a/src/Formats/ProtobufSerializer.cpp +++ b/src/Formats/ProtobufSerializer.cpp @@ -28,6 +28,7 @@ # include # include # include +# include # include # include # include @@ -139,6 +140,15 @@ namespace } + WriteBuffer & writeIndent(WriteBuffer & out, size_t size) { return out << String(size * 4, ' '); } + + + [[noreturn]] void wrongNumberOfColumns(size_t number_of_columns, const String & expected) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong number of columns: expected {}, specified {}", expected, number_of_columns); + } + + struct ProtobufReaderOrWriter { ProtobufReaderOrWriter(ProtobufReader & reader_) : reader(&reader_) {} // NOLINT(google-explicit-constructor) @@ -152,8 +162,12 @@ namespace class ProtobufSerializerSingleValue : public ProtobufSerializer { protected: - ProtobufSerializerSingleValue(const FieldDescriptor & field_descriptor_, const ProtobufReaderOrWriter & reader_or_writer_) - : field_descriptor(field_descriptor_) + ProtobufSerializerSingleValue( + const std::string_view & column_name_, + const FieldDescriptor & field_descriptor_, + const ProtobufReaderOrWriter & reader_or_writer_) + : column_name(column_name_) + , field_descriptor(field_descriptor_) , field_typeid(field_descriptor_.type()) , field_tag(field_descriptor.number()) , reader(reader_or_writer_.reader) @@ -164,13 +178,15 @@ namespace void setColumns(const ColumnPtr * columns, [[maybe_unused]] size_t num_columns) override { - assert(num_columns == 1); + if (num_columns != 1) + wrongNumberOfColumns(num_columns, "1"); column = columns[0]; } void setColumns(const MutableColumnPtr * columns, [[maybe_unused]] size_t num_columns) override { - assert(num_columns == 1); + if (num_columns != 1) + wrongNumberOfColumns(num_columns, "1"); column = columns[0]->getPtr(); } @@ -259,14 +275,28 @@ namespace return result; } + [[noreturn]] void incompatibleColumnType(const std::string_view & column_type) const + { + throw Exception( + ErrorCodes::DATA_TYPE_INCOMPATIBLE_WITH_PROTOBUF_FIELD, + "The column {} ({}) cannot be serialized to the field {} ({}) due to their types are not compatible", + quoteString(column_name), + column_type, + quoteString(field_descriptor.full_name()), + field_descriptor.type_name()); + } + [[noreturn]] void cannotConvertValue(const std::string_view & src_value, const std::string_view & src_type_name, const std::string_view & dest_type_name) const { throw Exception( - "Could not convert value '" + String{src_value} + "' from type " + String{src_type_name} + " to type " + String{dest_type_name} + - " while " + (reader ? "reading" : "writing") + " field " + field_descriptor.name(), + "Could not convert value '" + String{src_value} + "' from type " + String{src_type_name} + " to type " + + String{dest_type_name} + " while " + (reader ? "reading" : "writing") + " field " + + quoteString(field_descriptor.name()) + " " + (reader ? "for inserting into" : "extracted from") + " column " + + quoteString(column_name), ErrorCodes::PROTOBUF_BAD_CAST); } + const String column_name; const FieldDescriptor & field_descriptor; const FieldTypeId field_typeid; const int field_tag; @@ -289,8 +319,8 @@ namespace public: using ColumnType = ColumnVector; - ProtobufSerializerNumber(const FieldDescriptor & field_descriptor_, const ProtobufReaderOrWriter & reader_or_writer_) - : ProtobufSerializerSingleValue(field_descriptor_, reader_or_writer_) + ProtobufSerializerNumber(const std::string_view & column_name_, const FieldDescriptor & field_descriptor_, const ProtobufReaderOrWriter & reader_or_writer_) + : ProtobufSerializerSingleValue(column_name_, field_descriptor_, reader_or_writer_) { setFunctions(); } @@ -319,6 +349,13 @@ namespace column_vector.insertValue(getDefaultNumber()); } + void describeTree(WriteBuffer & out, size_t indent) const override + { + writeIndent(out, indent) << "ProtobufSerializerNumber<" << TypeName << ">: column " << quoteString(column_name) + << " -> field " << quoteString(field_descriptor.full_name()) << " (" << field_descriptor.type_name() + << ")\n"; + } + private: void setFunctions() { @@ -469,7 +506,7 @@ namespace case FieldTypeId::TYPE_ENUM: { if (std::is_floating_point_v) - failedToSetFunctions(); + incompatibleColumnType(TypeName); write_function = [this](NumberType value) { @@ -484,18 +521,10 @@ namespace } default: - failedToSetFunctions(); + incompatibleColumnType(TypeName); } } - [[noreturn]] void failedToSetFunctions() const - { - throw Exception( - "The field " + quoteString(field_descriptor.full_name()) + " has an incompatible type " + field_descriptor.type_name() - + " for serialization of the data type " + quoteString(TypeName), - ErrorCodes::DATA_TYPE_INCOMPATIBLE_WITH_PROTOBUF_FIELD); - } - NumberType getDefaultNumber() { if (!default_number) @@ -529,10 +558,11 @@ namespace using ColumnType = std::conditional_t; ProtobufSerializerString( + const std::string_view & column_name_, const std::shared_ptr & fixed_string_data_type_, const google::protobuf::FieldDescriptor & field_descriptor_, const ProtobufReaderOrWriter & reader_or_writer_) - : ProtobufSerializerSingleValue(field_descriptor_, reader_or_writer_) + : ProtobufSerializerSingleValue(column_name_, field_descriptor_, reader_or_writer_) , fixed_string_data_type(fixed_string_data_type_) , n(fixed_string_data_type->getN()) { @@ -542,8 +572,10 @@ namespace } ProtobufSerializerString( - const google::protobuf::FieldDescriptor & field_descriptor_, const ProtobufReaderOrWriter & reader_or_writer_) - : ProtobufSerializerSingleValue(field_descriptor_, reader_or_writer_) + const std::string_view & column_name_, + const google::protobuf::FieldDescriptor & field_descriptor_, + const ProtobufReaderOrWriter & reader_or_writer_) + : ProtobufSerializerSingleValue(column_name_, field_descriptor_, reader_or_writer_) { static_assert(!is_fixed_string, "This constructor for String only"); setFunctions(); @@ -649,6 +681,13 @@ namespace } } + void describeTree(WriteBuffer & out, size_t indent) const override + { + writeIndent(out, indent) << "ProtobufSerializerString<" << (is_fixed_string ? "fixed" : "") << ">: column " + << quoteString(column_name) << " -> field " << quoteString(field_descriptor.full_name()) << " (" + << field_descriptor.type_name() << ")\n"; + } + private: void setFunctions() { @@ -799,18 +838,10 @@ namespace } default: - failedToSetFunctions(); + this->incompatibleColumnType(is_fixed_string ? "FixedString" : "String"); } } - [[noreturn]] void failedToSetFunctions() - { - throw Exception( - "The field " + quoteString(field_descriptor.full_name()) + " has an incompatible type " + field_descriptor.type_name() - + " for serialization of the data type " + quoteString(is_fixed_string ? "FixedString" : "String"), - ErrorCodes::DATA_TYPE_INCOMPATIBLE_WITH_PROTOBUF_FIELD); - } - const PaddedPODArray & getDefaultString() { if (!default_string) @@ -890,16 +921,24 @@ namespace using BaseClass = ProtobufSerializerNumber; ProtobufSerializerEnum( + const std::string_view & column_name_, const std::shared_ptr & enum_data_type_, const FieldDescriptor & field_descriptor_, const ProtobufReaderOrWriter & reader_or_writer_) - : BaseClass(field_descriptor_, reader_or_writer_), enum_data_type(enum_data_type_) + : BaseClass(column_name_, field_descriptor_, reader_or_writer_), enum_data_type(enum_data_type_) { assert(enum_data_type); setFunctions(); prepareEnumMapping(); } + void describeTree(WriteBuffer & out, size_t indent) const override + { + writeIndent(out, indent) << "ProtobufSerializerEnum<" << TypeName << ">: column " << quoteString(this->column_name) + << " -> field " << quoteString(this->field_descriptor.full_name()) << " (" + << this->field_descriptor.type_name() << ")\n"; + } + private: void setFunctions() { @@ -964,18 +1003,10 @@ namespace } default: - failedToSetFunctions(); + this->incompatibleColumnType(enum_data_type->getName()); } } - [[noreturn]] void failedToSetFunctions() - { - throw Exception( - "The field " + quoteString(this->field_descriptor.full_name()) + " has an incompatible type " + this->field_descriptor.type_name() - + " for serialization of the data type " + quoteString(enum_data_type->getName()), - ErrorCodes::DATA_TYPE_INCOMPATIBLE_WITH_PROTOBUF_FIELD); - } - void checkEnumDataTypeValue(NumberType value) { enum_data_type->findByValue(value); /// Throws an exception if the value isn't defined in the DataTypeEnum. @@ -1089,10 +1120,11 @@ namespace using ColumnType = ColumnDecimal; ProtobufSerializerDecimal( + const std::string_view & column_name_, const DataTypeDecimalBase & decimal_data_type_, const FieldDescriptor & field_descriptor_, const ProtobufReaderOrWriter & reader_or_writer_) - : ProtobufSerializerSingleValue(field_descriptor_, reader_or_writer_) + : ProtobufSerializerSingleValue(column_name_, field_descriptor_, reader_or_writer_) , precision(decimal_data_type_.getPrecision()) , scale(decimal_data_type_.getScale()) { @@ -1123,6 +1155,13 @@ namespace column_decimal.insertValue(getDefaultDecimal()); } + void describeTree(WriteBuffer & out, size_t indent) const override + { + writeIndent(out, indent) << "ProtobufSerializerDecimal<" << TypeName << ">: column " << quoteString(column_name) + << " -> field " << quoteString(field_descriptor.full_name()) << " (" << field_descriptor.type_name() + << ")\n"; + } + private: void setFunctions() { @@ -1227,7 +1266,7 @@ namespace case FieldTypeId::TYPE_BOOL: { if (std::is_same_v) - failedToSetFunctions(); + incompatibleColumnType(TypeName); else { write_function = [this](const DecimalType & decimal) @@ -1281,18 +1320,10 @@ namespace } default: - failedToSetFunctions(); + incompatibleColumnType(TypeName); } } - [[noreturn]] void failedToSetFunctions() - { - throw Exception( - "The field " + quoteString(field_descriptor.full_name()) + " has an incompatible type " + field_descriptor.type_name() - + " for serialization of the data type " + quoteString(TypeName), - ErrorCodes::DATA_TYPE_INCOMPATIBLE_WITH_PROTOBUF_FIELD); - } - DecimalType getDefaultDecimal() { if (!default_decimal) @@ -1349,13 +1380,20 @@ namespace { public: ProtobufSerializerDate( + const std::string_view & column_name_, const FieldDescriptor & field_descriptor_, const ProtobufReaderOrWriter & reader_or_writer_) - : ProtobufSerializerNumber(field_descriptor_, reader_or_writer_) + : ProtobufSerializerNumber(column_name_, field_descriptor_, reader_or_writer_) { setFunctions(); } + void describeTree(WriteBuffer & out, size_t indent) const override + { + writeIndent(out, indent) << "ProtobufSerializerDate: column " << quoteString(column_name) << " -> field " + << quoteString(field_descriptor.full_name()) << " (" << field_descriptor.type_name() << ")\n"; + } + private: void setFunctions() { @@ -1395,7 +1433,7 @@ namespace } default: - failedToSetFunctions(); + incompatibleColumnType("Date"); } } @@ -1412,14 +1450,6 @@ namespace readDateText(date, buf); return date; } - - [[noreturn]] void failedToSetFunctions() - { - throw Exception( - "The field " + quoteString(field_descriptor.full_name()) + " has an incompatible type " + field_descriptor.type_name() - + " for serialization of the data type 'Date'", - ErrorCodes::DATA_TYPE_INCOMPATIBLE_WITH_PROTOBUF_FIELD); - } }; @@ -1428,15 +1458,22 @@ namespace { public: ProtobufSerializerDateTime( + const std::string_view & column_name_, const DataTypeDateTime & type, const FieldDescriptor & field_descriptor_, const ProtobufReaderOrWriter & reader_or_writer_) - : ProtobufSerializerNumber(field_descriptor_, reader_or_writer_), + : ProtobufSerializerNumber(column_name_, field_descriptor_, reader_or_writer_), date_lut(type.getTimeZone()) { setFunctions(); } + void describeTree(WriteBuffer & out, size_t indent) const override + { + writeIndent(out, indent) << "ProtobufSerializerDateTime: column " << quoteString(column_name) << " -> field " + << quoteString(field_descriptor.full_name()) << " (" << field_descriptor.type_name() << ")\n"; + } + protected: const DateLUTImpl & date_lut; @@ -1478,7 +1515,7 @@ namespace } default: - failedToSetFunctions(); + incompatibleColumnType("DateTime"); } } @@ -1497,14 +1534,6 @@ namespace tm = 0; return tm; } - - [[noreturn]] void failedToSetFunctions() - { - throw Exception( - "The field " + quoteString(field_descriptor.full_name()) + " has an incompatible type " + field_descriptor.type_name() - + " for serialization of the data type 'DateTime'", - ErrorCodes::DATA_TYPE_INCOMPATIBLE_WITH_PROTOBUF_FIELD); - } }; @@ -1513,9 +1542,10 @@ namespace { public: ProtobufSerializerUUID( + const std::string_view & column_name_, const google::protobuf::FieldDescriptor & field_descriptor_, const ProtobufReaderOrWriter & reader_or_writer_) - : ProtobufSerializerSingleValue(field_descriptor_, reader_or_writer_) + : ProtobufSerializerSingleValue(column_name_, field_descriptor_, reader_or_writer_) { setFunctions(); } @@ -1544,16 +1574,17 @@ namespace column_vector.insertDefault(); } + void describeTree(WriteBuffer & out, size_t indent) const override + { + writeIndent(out, indent) << "ProtobufSerializerUUID: column " << quoteString(column_name) << " -> field " + << quoteString(field_descriptor.full_name()) << " (" << field_descriptor.type_name() << ")\n"; + } + private: void setFunctions() { if ((field_typeid != FieldTypeId::TYPE_STRING) && (field_typeid != FieldTypeId::TYPE_BYTES)) - { - throw Exception( - "The field " + quoteString(field_descriptor.full_name()) + " has an incompatible type " + field_descriptor.type_name() - + " for serialization of the data type UUID", - ErrorCodes::DATA_TYPE_INCOMPATIBLE_WITH_PROTOBUF_FIELD); - } + incompatibleColumnType("UUID"); write_function = [this](UUID value) { @@ -1591,20 +1622,16 @@ namespace { public: ProtobufSerializerAggregateFunction( + const std::string_view & column_name_, const std::shared_ptr & aggregate_function_data_type_, const google::protobuf::FieldDescriptor & field_descriptor_, const ProtobufReaderOrWriter & reader_or_writer_) - : ProtobufSerializerSingleValue(field_descriptor_, reader_or_writer_) + : ProtobufSerializerSingleValue(column_name_, field_descriptor_, reader_or_writer_) , aggregate_function_data_type(aggregate_function_data_type_) , aggregate_function(aggregate_function_data_type->getFunction()) { if ((field_typeid != FieldTypeId::TYPE_STRING) && (field_typeid != FieldTypeId::TYPE_BYTES)) - { - throw Exception( - "The field " + quoteString(field_descriptor.full_name()) + " has an incompatible type " + field_descriptor.type_name() - + " for serialization of the data type " + quoteString(aggregate_function_data_type->getName()), - ErrorCodes::DATA_TYPE_INCOMPATIBLE_WITH_PROTOBUF_FIELD); - } + incompatibleColumnType(aggregate_function_data_type->getName()); } void writeRow(size_t row_num) override @@ -1642,6 +1669,12 @@ namespace column_af.getData().push_back(data); } + void describeTree(WriteBuffer & out, size_t indent) const override + { + writeIndent(out, indent) << "ProtobufSerializerAggregateFunction: column " << quoteString(column_name) << " -> field " + << quoteString(field_descriptor.full_name()) << " (" << field_descriptor.type_name() << ")\n"; + } + private: void dataToString(ConstAggregateDataPtr data, String & str) const { @@ -1684,7 +1717,8 @@ namespace void setColumns(const ColumnPtr * columns, [[maybe_unused]] size_t num_columns) override { - assert(num_columns == 1); + if (num_columns != 1) + wrongNumberOfColumns(num_columns, "1"); column = columns[0]; const auto & column_nullable = assert_cast(*column); ColumnPtr nested_column = column_nullable.getNestedColumnPtr(); @@ -1693,7 +1727,8 @@ namespace void setColumns(const MutableColumnPtr * columns, [[maybe_unused]] size_t num_columns) override { - assert(num_columns == 1); + if (num_columns != 1) + wrongNumberOfColumns(num_columns, "1"); ColumnPtr column0 = columns[0]->getPtr(); setColumns(&column0, 1); } @@ -1744,6 +1779,12 @@ namespace column_nullable.insertDefault(); } + void describeTree(WriteBuffer & out, size_t indent) const override + { + writeIndent(out, indent) << "ProtobufSerializerNullable ->\n"; + nested_serializer->describeTree(out, indent + 1); + } + private: const std::unique_ptr nested_serializer; ColumnPtr column; @@ -1761,7 +1802,8 @@ namespace void setColumns(const ColumnPtr * columns, [[maybe_unused]] size_t num_columns) override { - assert(num_columns == 1); + if (num_columns != 1) + wrongNumberOfColumns(num_columns, "1"); const auto & column_map = assert_cast(*columns[0]); ColumnPtr nested_column = column_map.getNestedColumnPtr(); nested_serializer->setColumns(&nested_column, 1); @@ -1769,7 +1811,8 @@ namespace void setColumns(const MutableColumnPtr * columns, [[maybe_unused]] size_t num_columns) override { - assert(num_columns == 1); + if (num_columns != 1) + wrongNumberOfColumns(num_columns, "1"); ColumnPtr column0 = columns[0]->getPtr(); setColumns(&column0, 1); } @@ -1778,6 +1821,12 @@ namespace void readRow(size_t row_num) override { nested_serializer->readRow(row_num); } void insertDefaults(size_t row_num) override { nested_serializer->insertDefaults(row_num); } + void describeTree(WriteBuffer & out, size_t indent) const override + { + writeIndent(out, indent) << "ProtobufSerializerMap ->\n"; + nested_serializer->describeTree(out, indent + 1); + } + private: const std::unique_ptr nested_serializer; }; @@ -1794,7 +1843,8 @@ namespace void setColumns(const ColumnPtr * columns, [[maybe_unused]] size_t num_columns) override { - assert(num_columns == 1); + if (num_columns != 1) + wrongNumberOfColumns(num_columns, "1"); column = columns[0]; const auto & column_lc = assert_cast(*column); ColumnPtr nested_column = column_lc.getDictionary().getNestedColumn(); @@ -1804,7 +1854,8 @@ namespace void setColumns(const MutableColumnPtr * columns, [[maybe_unused]] size_t num_columns) override { - assert(num_columns == 1); + if (num_columns != 1) + wrongNumberOfColumns(num_columns, "1"); ColumnPtr column0 = columns[0]->getPtr(); setColumns(&column0, 1); } @@ -1862,6 +1913,12 @@ namespace column_lc.insertFromFullColumn(*default_value_column, 0); } + void describeTree(WriteBuffer & out, size_t indent) const override + { + writeIndent(out, indent) << "ProtobufSerializerLowCardinality ->\n"; + nested_serializer->describeTree(out, indent + 1); + } + private: const std::unique_ptr nested_serializer; ColumnPtr column; @@ -1882,7 +1939,8 @@ namespace void setColumns(const ColumnPtr * columns, [[maybe_unused]] size_t num_columns) override { - assert(num_columns == 1); + if (num_columns != 1) + wrongNumberOfColumns(num_columns, "1"); column = columns[0]; const auto & column_array = assert_cast(*column); ColumnPtr data_column = column_array.getDataPtr(); @@ -1891,7 +1949,8 @@ namespace void setColumns(const MutableColumnPtr * columns, [[maybe_unused]] size_t num_columns) override { - assert(num_columns == 1); + if (num_columns != 1) + wrongNumberOfColumns(num_columns, "1"); ColumnPtr column0 = columns[0]->getPtr(); setColumns(&column0, 1); } @@ -1944,6 +2003,12 @@ namespace column_array.insertDefault(); } + void describeTree(WriteBuffer & out, size_t indent) const override + { + writeIndent(out, indent) << "ProtobufSerializerArray ->\n"; + element_serializer->describeTree(out, indent + 1); + } + private: const std::unique_ptr element_serializer; ColumnPtr column; @@ -1955,10 +2020,12 @@ namespace { public: ProtobufSerializerTupleAsArray( + const std::string_view & column_name_, const std::shared_ptr & tuple_data_type_, const FieldDescriptor & field_descriptor_, std::vector> element_serializers_) - : tuple_data_type(tuple_data_type_) + : column_name(column_name_) + , tuple_data_type(tuple_data_type_) , tuple_size(tuple_data_type->getElements().size()) , field_descriptor(field_descriptor_) , element_serializers(std::move(element_serializers_)) @@ -1969,7 +2036,8 @@ namespace void setColumns(const ColumnPtr * columns, [[maybe_unused]] size_t num_columns) override { - assert(num_columns == 1); + if (num_columns != 1) + wrongNumberOfColumns(num_columns, "1"); column = columns[0]; const auto & column_tuple = assert_cast(*column); for (size_t i : collections::range(tuple_size)) @@ -1982,7 +2050,8 @@ namespace void setColumns(const MutableColumnPtr * columns, [[maybe_unused]] size_t num_columns) override { - assert(num_columns == 1); + if (num_columns != 1) + wrongNumberOfColumns(num_columns, "1"); ColumnPtr column0 = columns[0]->getPtr(); setColumns(&column0, 1); } @@ -2006,9 +2075,12 @@ namespace if (current_element_index >= tuple_size) { throw Exception( - "Too many (" + std::to_string(current_element_index) + ") elements was read from the field " - + field_descriptor.full_name() + " to fit in the data type " + tuple_data_type->getName(), - ErrorCodes::PROTOBUF_BAD_CAST); + ErrorCodes::PROTOBUF_BAD_CAST, + "Column {}: More than {} elements was read from the field {} to fit in the data type {}", + quoteString(column_name), + tuple_size, + quoteString(field_descriptor.full_name()), + tuple_data_type->getName()); } element_serializers[current_element_index]->readRow(row_num); @@ -2040,7 +2112,17 @@ namespace } } + void describeTree(WriteBuffer & out, size_t indent) const override + { + writeIndent(out, indent) << "ProtobufSerializerTupleAsArray: column " << quoteString(column_name) << " (" + << tuple_data_type->getName() << ") -> field " << quoteString(field_descriptor.full_name()) << " (" + << field_descriptor.type_name() << ") ->\n"; + for (const auto & element_serializer : element_serializers) + element_serializer->describeTree(out, indent + 1); + } + private: + const String column_name; const std::shared_ptr tuple_data_type; const size_t tuple_size; const FieldDescriptor & field_descriptor; @@ -2085,6 +2167,9 @@ namespace void setColumns(const ColumnPtr * columns_, size_t num_columns_) override { + if (!num_columns_) + wrongNumberOfColumns(num_columns_, ">0"); + columns.assign(columns_, columns_ + num_columns_); std::vector field_columns; @@ -2094,7 +2179,8 @@ namespace field_columns.reserve(info.column_indices.size()); for (size_t column_index : info.column_indices) { - assert(column_index < num_columns_); + if (column_index >= num_columns_) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong column index {}, expected column indices <{}", column_index, num_columns_); field_columns.emplace_back(columns_[column_index]); } info.field_serializer->setColumns(field_columns.data(), field_columns.size()); @@ -2206,6 +2292,32 @@ namespace addDefaultsToMissingColumns(row_num); } + void describeTree(WriteBuffer & out, size_t indent) const override + { + size_t num_columns = 0; + for (const auto & field_info : field_infos) + num_columns += field_info.column_indices.size(); + + writeIndent(out, indent) << "ProtobufSerializerMessage: " << num_columns << " columns ->"; + if (parent_field_descriptor) + out << " field " << quoteString(parent_field_descriptor->full_name()) << " (" << parent_field_descriptor->type_name() << ")"; + + for (size_t i = 0; i != field_infos.size(); ++i) + { + out << "\n"; + const auto & field_info = field_infos[i]; + writeIndent(out, indent + 1) << "Columns #"; + for (size_t j = 0; j != field_info.column_indices.size(); ++j) + { + if (j) + out << ", "; + out << field_info.column_indices[j]; + } + out << " ->\n"; + field_info.field_serializer->describeTree(out, indent + 2); + } + } + private: size_t findFieldIndexByFieldTag(int field_tag) { @@ -2285,7 +2397,8 @@ namespace void setColumns(const ColumnPtr * columns, [[maybe_unused]] size_t num_columns) override { - assert(num_columns == 1); + if (num_columns != 1) + wrongNumberOfColumns(num_columns, "1"); const auto & column_tuple = assert_cast(*columns[0]); size_t tuple_size = column_tuple.tupleSize(); assert(tuple_size); @@ -2298,7 +2411,8 @@ namespace void setColumns(const MutableColumnPtr * columns, [[maybe_unused]] size_t num_columns) override { - assert(num_columns == 1); + if (num_columns != 1) + wrongNumberOfColumns(num_columns, "1"); ColumnPtr column0 = columns[0]->getPtr(); setColumns(&column0, 1); } @@ -2307,6 +2421,12 @@ namespace void readRow(size_t row_num) override { message_serializer->readRow(row_num); } void insertDefaults(size_t row_num) override { message_serializer->insertDefaults(row_num); } + void describeTree(WriteBuffer & out, size_t indent) const override + { + writeIndent(out, indent) << "ProtobufSerializerTupleAsNestedMessage ->\n"; + message_serializer->describeTree(out, indent + 1); + } + private: const std::unique_ptr message_serializer; }; @@ -2318,14 +2438,23 @@ namespace { public: explicit ProtobufSerializerFlattenedNestedAsArrayOfNestedMessages( - std::unique_ptr message_serializer_) - : message_serializer(std::move(message_serializer_)) + const std::vector & column_names_, + const FieldDescriptor * parent_field_descriptor_, + std::unique_ptr message_serializer_, + const std::function & get_root_desc_function_) + : parent_field_descriptor(parent_field_descriptor_) + , message_serializer(std::move(message_serializer_)) + , get_root_desc_function(get_root_desc_function_) { + column_names.reserve(column_names_.size()); + for (const auto & column_name : column_names_) + column_names.emplace_back(column_name); } void setColumns(const ColumnPtr * columns, size_t num_columns) override { - assert(num_columns); + if (!num_columns) + wrongNumberOfColumns(num_columns, ">0"); data_columns.clear(); data_columns.reserve(num_columns); offset_columns.clear(); @@ -2335,11 +2464,26 @@ namespace { const auto & column_array = assert_cast(*columns[i]); data_columns.emplace_back(column_array.getDataPtr()); - offset_columns.emplace_back(column_array.getOffsetsPtr()); - } - std::sort(offset_columns.begin(), offset_columns.end()); - offset_columns.erase(std::unique(offset_columns.begin(), offset_columns.end()), offset_columns.end()); + auto offset_column = column_array.getOffsetsPtr(); + if (std::binary_search(offset_columns.begin(), offset_columns.end(), offset_column)) + continue; + + /// Keep `offset_columns` sorted. + offset_columns.insert(std::upper_bound(offset_columns.begin(), offset_columns.end(), offset_column), offset_column); + + /// All the columns listed in `offset_columns` should have equal offsets. + if (i >= 1) + { + const auto & column_array0 = assert_cast(*columns[0]); + if (!column_array0.hasEqualOffsets(column_array)) + { + throw Exception(ErrorCodes::PROTOBUF_BAD_CAST, + "Column #{} {} and column #{} {} are supposed to have equal offsets according to the following serialization tree:\n{}", + 0, quoteString(column_names[0]), i, quoteString(column_names[i]), get_root_desc_function(0)); + } + } + } message_serializer->setColumns(data_columns.data(), data_columns.size()); } @@ -2358,12 +2502,6 @@ namespace const auto & offset_column0 = assert_cast(*offset_columns[0]); size_t start_offset = offset_column0.getElement(row_num - 1); size_t end_offset = offset_column0.getElement(row_num); - for (size_t i : collections::range(1, offset_columns.size())) - { - const auto & offset_column = assert_cast(*offset_columns[i]); - if (offset_column.getElement(row_num) != end_offset) - throw Exception("Components of FlattenedNested have different sizes", ErrorCodes::PROTOBUF_BAD_CAST); - } for (size_t i : collections::range(start_offset, end_offset)) message_serializer->writeRow(i); } @@ -2433,8 +2571,26 @@ namespace } } + void describeTree(WriteBuffer & out, size_t indent) const override + { + writeIndent(out, indent) << "ProtobufSerializerFlattenedNestedAsArrayOfNestedMessages: columns "; + for (size_t i = 0; i != column_names.size(); ++i) + { + if (i) + out << ", "; + out << "#" << i << " " << quoteString(column_names[i]); + } + out << " ->"; + if (parent_field_descriptor) + out << " field " << quoteString(parent_field_descriptor->full_name()) << " (" << parent_field_descriptor->type_name() << ") ->\n"; + message_serializer->describeTree(out, indent + 1); + } + private: + Strings column_names; + const FieldDescriptor * parent_field_descriptor; const std::unique_ptr message_serializer; + const std::function get_root_desc_function; Columns data_columns; Columns offset_columns; }; @@ -2453,6 +2609,14 @@ namespace const MessageDescriptor & message_descriptor, bool with_length_delimiter) { + root_serializer_ptr = std::make_shared(); + get_root_desc_function = [root_serializer_ptr = root_serializer_ptr](size_t indent) -> String + { + WriteBufferFromOwnString buf; + (*root_serializer_ptr)->describeTree(buf, indent); + return buf.str(); + }; + std::vector used_column_indices; auto message_serializer = buildMessageSerializerImpl( /* num_columns = */ column_names.size(), @@ -2480,6 +2644,12 @@ namespace boost::range::set_difference(collections::range(column_names.size()), used_column_indices_sorted, std::back_inserter(missing_column_indices)); + *root_serializer_ptr = message_serializer.get(); + +#if 0 + LOG_INFO(&Poco::Logger::get("ProtobufSerializer"), "Serialization tree:\n{}", get_root_desc_function(0)); +#endif + return message_serializer; } @@ -2635,10 +2805,35 @@ namespace /// Set `columns_are_reordered_outside` to true if you're going to reorder columns /// according to `used_column_indices` returned and pass to /// ProtobufSerializerMessage::setColumns() only the columns which are actually used. - template std::unique_ptr buildMessageSerializerImpl( size_t num_columns, - const StringOrStringViewT * column_names, + const String * column_names, + const DataTypePtr * data_types, + const MessageDescriptor & message_descriptor, + bool with_length_delimiter, + const FieldDescriptor * parent_field_descriptor, + std::vector & used_column_indices, + bool columns_are_reordered_outside) + { + std::vector column_names_sv; + column_names_sv.reserve(num_columns); + for (size_t i = 0; i != num_columns; ++i) + column_names_sv.emplace_back(column_names[i]); + + return buildMessageSerializerImpl( + num_columns, + column_names_sv.data(), + data_types, + message_descriptor, + with_length_delimiter, + parent_field_descriptor, + used_column_indices, + columns_are_reordered_outside); + } + + std::unique_ptr buildMessageSerializerImpl( + size_t num_columns, + const std::string_view * column_names, const DataTypePtr * data_types, const MessageDescriptor & message_descriptor, bool with_length_delimiter, @@ -2814,7 +3009,11 @@ namespace if (nested_message_serializer) { - auto field_serializer = std::make_unique(std::move(nested_message_serializer)); + std::vector column_names_used; + for (size_t i : used_column_indices_in_nested) + column_names_used.emplace_back(nested_column_names[i]); + auto field_serializer = std::make_unique( + std::move(column_names_used), field_descriptor, std::move(nested_message_serializer), get_root_desc_function); transformColumnIndices(used_column_indices_in_nested, nested_column_indices); add_field_serializer(column_name, std::move(used_column_indices_in_nested), *field_descriptor, std::move(field_serializer)); break; @@ -2856,34 +3055,34 @@ namespace auto data_type_id = data_type->getTypeId(); switch (data_type_id) { - case TypeIndex::UInt8: return std::make_unique>(field_descriptor, reader_or_writer); - case TypeIndex::UInt16: return std::make_unique>(field_descriptor, reader_or_writer); - case TypeIndex::UInt32: return std::make_unique>(field_descriptor, reader_or_writer); - case TypeIndex::UInt64: return std::make_unique>(field_descriptor, reader_or_writer); - case TypeIndex::UInt128: return std::make_unique>(field_descriptor, reader_or_writer); - case TypeIndex::UInt256: return std::make_unique>(field_descriptor, reader_or_writer); - case TypeIndex::Int8: return std::make_unique>(field_descriptor, reader_or_writer); - case TypeIndex::Int16: return std::make_unique>(field_descriptor, reader_or_writer); - case TypeIndex::Int32: return std::make_unique>(field_descriptor, reader_or_writer); - case TypeIndex::Int64: return std::make_unique>(field_descriptor, reader_or_writer); - case TypeIndex::Int128: return std::make_unique>(field_descriptor, reader_or_writer); - case TypeIndex::Int256: return std::make_unique>(field_descriptor, reader_or_writer); - case TypeIndex::Float32: return std::make_unique>(field_descriptor, reader_or_writer); - case TypeIndex::Float64: return std::make_unique>(field_descriptor, reader_or_writer); - case TypeIndex::Date: return std::make_unique(field_descriptor, reader_or_writer); - case TypeIndex::DateTime: return std::make_unique(assert_cast(*data_type), field_descriptor, reader_or_writer); - case TypeIndex::DateTime64: return std::make_unique(assert_cast(*data_type), field_descriptor, reader_or_writer); - case TypeIndex::String: return std::make_unique>(field_descriptor, reader_or_writer); - case TypeIndex::FixedString: return std::make_unique>(typeid_cast>(data_type), field_descriptor, reader_or_writer); - case TypeIndex::Enum8: return std::make_unique>(typeid_cast>(data_type), field_descriptor, reader_or_writer); - case TypeIndex::Enum16: return std::make_unique>(typeid_cast>(data_type), field_descriptor, reader_or_writer); - case TypeIndex::Decimal32: return std::make_unique>(assert_cast &>(*data_type), field_descriptor, reader_or_writer); - case TypeIndex::Decimal64: return std::make_unique>(assert_cast &>(*data_type), field_descriptor, reader_or_writer); - case TypeIndex::Decimal128: return std::make_unique>(assert_cast &>(*data_type), field_descriptor, reader_or_writer); - case TypeIndex::Decimal256: return std::make_unique>(assert_cast &>(*data_type), field_descriptor, reader_or_writer); - case TypeIndex::UUID: return std::make_unique(field_descriptor, reader_or_writer); - case TypeIndex::Interval: return std::make_unique(field_descriptor, reader_or_writer); - case TypeIndex::AggregateFunction: return std::make_unique(typeid_cast>(data_type), field_descriptor, reader_or_writer); + case TypeIndex::UInt8: return std::make_unique>(column_name, field_descriptor, reader_or_writer); + case TypeIndex::UInt16: return std::make_unique>(column_name, field_descriptor, reader_or_writer); + case TypeIndex::UInt32: return std::make_unique>(column_name, field_descriptor, reader_or_writer); + case TypeIndex::UInt64: return std::make_unique>(column_name, field_descriptor, reader_or_writer); + case TypeIndex::UInt128: return std::make_unique>(column_name, field_descriptor, reader_or_writer); + case TypeIndex::UInt256: return std::make_unique>(column_name, field_descriptor, reader_or_writer); + case TypeIndex::Int8: return std::make_unique>(column_name, field_descriptor, reader_or_writer); + case TypeIndex::Int16: return std::make_unique>(column_name, field_descriptor, reader_or_writer); + case TypeIndex::Int32: return std::make_unique>(column_name, field_descriptor, reader_or_writer); + case TypeIndex::Int64: return std::make_unique>(column_name, field_descriptor, reader_or_writer); + case TypeIndex::Int128: return std::make_unique>(column_name, field_descriptor, reader_or_writer); + case TypeIndex::Int256: return std::make_unique>(column_name, field_descriptor, reader_or_writer); + case TypeIndex::Float32: return std::make_unique>(column_name, field_descriptor, reader_or_writer); + case TypeIndex::Float64: return std::make_unique>(column_name, field_descriptor, reader_or_writer); + case TypeIndex::Date: return std::make_unique(column_name, field_descriptor, reader_or_writer); + case TypeIndex::DateTime: return std::make_unique(column_name, assert_cast(*data_type), field_descriptor, reader_or_writer); + case TypeIndex::DateTime64: return std::make_unique(column_name, assert_cast(*data_type), field_descriptor, reader_or_writer); + case TypeIndex::String: return std::make_unique>(column_name, field_descriptor, reader_or_writer); + case TypeIndex::FixedString: return std::make_unique>(column_name, typeid_cast>(data_type), field_descriptor, reader_or_writer); + case TypeIndex::Enum8: return std::make_unique>(column_name, typeid_cast>(data_type), field_descriptor, reader_or_writer); + case TypeIndex::Enum16: return std::make_unique>(column_name, typeid_cast>(data_type), field_descriptor, reader_or_writer); + case TypeIndex::Decimal32: return std::make_unique>(column_name, assert_cast &>(*data_type), field_descriptor, reader_or_writer); + case TypeIndex::Decimal64: return std::make_unique>(column_name, assert_cast &>(*data_type), field_descriptor, reader_or_writer); + case TypeIndex::Decimal128: return std::make_unique>(column_name, assert_cast &>(*data_type), field_descriptor, reader_or_writer); + case TypeIndex::Decimal256: return std::make_unique>(column_name, assert_cast &>(*data_type), field_descriptor, reader_or_writer); + case TypeIndex::UUID: return std::make_unique(column_name, field_descriptor, reader_or_writer); + case TypeIndex::Interval: return std::make_unique(column_name, field_descriptor, reader_or_writer); + case TypeIndex::AggregateFunction: return std::make_unique(column_name, typeid_cast>(data_type), field_descriptor, reader_or_writer); case TypeIndex::Nullable: { @@ -2981,6 +3180,7 @@ namespace return nullptr; return std::make_unique( + column_name, typeid_cast>(data_type), field_descriptor, std::move(nested_serializers)); @@ -3007,6 +3207,8 @@ namespace } const ProtobufReaderOrWriter reader_or_writer; + std::function get_root_desc_function; + std::shared_ptr root_serializer_ptr; }; } diff --git a/src/Formats/ProtobufSerializer.h b/src/Formats/ProtobufSerializer.h index 315a138f9cf..3eaca6a18d6 100644 --- a/src/Formats/ProtobufSerializer.h +++ b/src/Formats/ProtobufSerializer.h @@ -15,7 +15,7 @@ class ProtobufWriter; class IDataType; using DataTypePtr = std::shared_ptr; using DataTypes = std::vector; - +class WriteBuffer; /// Utility class, does all the work for serialization in the Protobuf format. class ProtobufSerializer @@ -30,6 +30,8 @@ public: virtual void readRow(size_t row_num) = 0; virtual void insertDefaults(size_t row_num) = 0; + virtual void describeTree(WriteBuffer & out, size_t indent) const = 0; + static std::unique_ptr create( const Strings & column_names, const DataTypes & data_types, From 82e0e9d27c7ad640059c8b33072ea4777bf3196a Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 3 Dec 2021 22:29:33 +0300 Subject: [PATCH 082/262] Fix minmax partition index during merge in case of empty part in future parts It is possible to get empty part in future parts, during merging, that can be created in case of there is information about this part in zookeeper, but no replicas has it (StorageReplicatedMergeTree::createEmptyPartInsteadOfLost()) And this may incorrectly initialize min value, which will be updated after one more merge (since during that merge there will be no more information about empty part) or OPTIMIZE. --- src/Storages/MergeTree/MergeTask.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index a3e549ecda3..881086c024b 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -501,7 +501,14 @@ bool MergeTask::VerticalMergeStage::finalizeVerticalMergeForAllColumns() const bool MergeTask::MergeProjectionsStage::mergeMinMaxIndexAndPrepareProjections() const { for (const auto & part : global_ctx->future_part->parts) - global_ctx->new_data_part->minmax_idx->merge(*part->minmax_idx); + { + /// Skip empty parts, + /// (that can be created in StorageReplicatedMergeTree::createEmptyPartInsteadOfLost()) + /// since they can incorrectly set min, + /// that will be changed after one more merge/OPTIMIZE. + if (!part->isEmpty()) + global_ctx->new_data_part->minmax_idx->merge(*part->minmax_idx); + } /// Print overall profiling info. NOTE: it may duplicates previous messages { From 030ea3dc3f058a391d6cd49d10a842900e500147 Mon Sep 17 00:00:00 2001 From: romanzhukov Date: Sun, 5 Dec 2021 02:08:42 +0300 Subject: [PATCH 083/262] .. --- docs/en/sql-reference/functions/string-search-functions.md | 4 +++- docs/ru/sql-reference/functions/string-search-functions.md | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md index c62603a50b9..7d46f676a42 100644 --- a/docs/en/sql-reference/functions/string-search-functions.md +++ b/docs/en/sql-reference/functions/string-search-functions.md @@ -373,7 +373,9 @@ The same as `multiMatchAny`, but returns the array of all indicies that match th ## multiFuzzyMatchAny(haystack, distance, \[pattern1, pattern2, …, patternn\]) {#multifuzzymatchanyhaystack-distance-pattern1-pattern2-patternn} -The same as `multiMatchAny`, but returns 1 if any pattern matches the haystack within a constant [edit distance](https://en.wikipedia.org/wiki/Edit_distance). This function relies on the experimental feature of [hyperscan](https://intel.github.io/hyperscan/dev-reference/compilation.html#approximate-matching) library, and can be slow for some corner cases. The performance depends on the edit distance value and patterns used, but it's always more expensive compared to a non-fuzzy variants. +The same as `multiMatchAny`, but returns 1 if any pattern matches the haystack within a constant [edit distance](https://en.wikipedia.org/wiki/Edit_distance). + This function relies on the experimental feature of [hyperscan](https://intel.github.io/hyperscan/dev-reference/compilation.html#approximate-matching) library, and can be slow for some corner cases. + The performance depends on the edit distance value and patterns used, but it's always more expensive compared to a non-fuzzy variants. ## multiFuzzyMatchAnyIndex(haystack, distance, \[pattern1, pattern2, …, patternn\]) {#multifuzzymatchanyindexhaystack-distance-pattern1-pattern2-patternn} diff --git a/docs/ru/sql-reference/functions/string-search-functions.md b/docs/ru/sql-reference/functions/string-search-functions.md index 5bbd760bfb6..6967b0e82d6 100644 --- a/docs/ru/sql-reference/functions/string-search-functions.md +++ b/docs/ru/sql-reference/functions/string-search-functions.md @@ -357,7 +357,9 @@ Result: ## multiFuzzyMatchAny(haystack, distance, \[pattern1, pattern2, …, patternn\]) {#multifuzzymatchanyhaystack-distance-pattern1-pattern2-patternn} -То же, что и `multiMatchAny`, но возвращает 1 если любой pattern соответствует haystack в пределах константного [редакционного расстояния](https://en.wikipedia.org/wiki/Edit_distance). Эта функция также находится в экспериментальном режиме и может быть очень медленной. За подробностями обращайтесь к [документации hyperscan](https://intel.github.io/hyperscan/dev-reference/compilation.html#approximate-matching). +То же, что и `multiMatchAny`, но возвращает 1 если любой pattern соответствует haystack в пределах константного [редакционного расстояния](https://en.wikipedia.org/wiki/Edit_distance). + Эта функция также находится в экспериментальном режиме и может быть очень медленной. + За подробностями обращайтесь к [документации hyperscan](https://intel.github.io/hyperscan/dev-reference/compilation.html#approximate-matching). ## multiFuzzyMatchAnyIndex(haystack, distance, \[pattern1, pattern2, …, patternn\]) {#multifuzzymatchanyindexhaystack-distance-pattern1-pattern2-patternn} From f4055f1957655111780073cccb40e08c0940f1d9 Mon Sep 17 00:00:00 2001 From: romanzhukov Date: Sun, 5 Dec 2021 19:36:12 +0300 Subject: [PATCH 084/262] Update multiFuzzyMatchAny --- docs/en/sql-reference/functions/string-search-functions.md | 4 +--- docs/ru/sql-reference/functions/string-search-functions.md | 5 ++--- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md index 7d46f676a42..c62603a50b9 100644 --- a/docs/en/sql-reference/functions/string-search-functions.md +++ b/docs/en/sql-reference/functions/string-search-functions.md @@ -373,9 +373,7 @@ The same as `multiMatchAny`, but returns the array of all indicies that match th ## multiFuzzyMatchAny(haystack, distance, \[pattern1, pattern2, …, patternn\]) {#multifuzzymatchanyhaystack-distance-pattern1-pattern2-patternn} -The same as `multiMatchAny`, but returns 1 if any pattern matches the haystack within a constant [edit distance](https://en.wikipedia.org/wiki/Edit_distance). - This function relies on the experimental feature of [hyperscan](https://intel.github.io/hyperscan/dev-reference/compilation.html#approximate-matching) library, and can be slow for some corner cases. - The performance depends on the edit distance value and patterns used, but it's always more expensive compared to a non-fuzzy variants. +The same as `multiMatchAny`, but returns 1 if any pattern matches the haystack within a constant [edit distance](https://en.wikipedia.org/wiki/Edit_distance). This function relies on the experimental feature of [hyperscan](https://intel.github.io/hyperscan/dev-reference/compilation.html#approximate-matching) library, and can be slow for some corner cases. The performance depends on the edit distance value and patterns used, but it's always more expensive compared to a non-fuzzy variants. ## multiFuzzyMatchAnyIndex(haystack, distance, \[pattern1, pattern2, …, patternn\]) {#multifuzzymatchanyindexhaystack-distance-pattern1-pattern2-patternn} diff --git a/docs/ru/sql-reference/functions/string-search-functions.md b/docs/ru/sql-reference/functions/string-search-functions.md index 6967b0e82d6..5b4a02b9e5b 100644 --- a/docs/ru/sql-reference/functions/string-search-functions.md +++ b/docs/ru/sql-reference/functions/string-search-functions.md @@ -357,9 +357,8 @@ Result: ## multiFuzzyMatchAny(haystack, distance, \[pattern1, pattern2, …, patternn\]) {#multifuzzymatchanyhaystack-distance-pattern1-pattern2-patternn} -То же, что и `multiMatchAny`, но возвращает 1 если любой pattern соответствует haystack в пределах константного [редакционного расстояния](https://en.wikipedia.org/wiki/Edit_distance). - Эта функция также находится в экспериментальном режиме и может быть очень медленной. - За подробностями обращайтесь к [документации hyperscan](https://intel.github.io/hyperscan/dev-reference/compilation.html#approximate-matching). +То же, что и `multiMatchAny`, но возвращает 1 если любой шаблон соответствует haystack в пределах константного [редакционного расстояния](https://en.wikipedia.org/wiki/Edit_distance). +Эта функция основана на экспериментальной библиотеке [hyperscan](https://intel.github.io/hyperscan/dev-reference/compilation.html#approximate-matching) и может быть медленной для некоторых частных случаев. Производительность зависит от значения редакционного расстояния и используемых шаблонов, но всегда медленнее по сравнению с нечеткими вариантами. ## multiFuzzyMatchAnyIndex(haystack, distance, \[pattern1, pattern2, …, patternn\]) {#multifuzzymatchanyindexhaystack-distance-pattern1-pattern2-patternn} From 3efb4038ca8abd24324732160754810b3b969a65 Mon Sep 17 00:00:00 2001 From: Tatiana Kirillova Date: Sun, 5 Dec 2021 19:46:24 +0300 Subject: [PATCH 085/262] sparkbar description --- .../aggregate-functions/reference/sparkbar.md | 56 +++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 docs/en/sql-reference/aggregate-functions/reference/sparkbar.md diff --git a/docs/en/sql-reference/aggregate-functions/reference/sparkbar.md b/docs/en/sql-reference/aggregate-functions/reference/sparkbar.md new file mode 100644 index 00000000000..9fcf752d36e --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/sparkbar.md @@ -0,0 +1,56 @@ +--- +toc_priority: 311 +toc_title: sparkbar +--- + +# sparkbar {#sparkbar} + +The function plots a frequency histogram for values `x` and the repetition rate of these `y` values over the interval `[min_x, max_x]`. + +If no interval is specified, then the minimum `x` will be used as the interval start, and the maximum `x` will use as the interval end. + +**Syntax** + +``` sql +sparkbar(width, min_x, max_x)(x, y) +``` + +**Arguments** + +- `width` — The number of segments. Must be [Integer](../../../sql-reference/data-types/int-uint.md). +- `min_x` — The interval start. Optional value. +- `max_x` — The interval end. Optional value. +- `x` — The range of values. +- `y` — The frequency of values. + +**Returned value** + +- The frequency histogram. + +**Example** + +Query: + +``` sql +CREATE TABLE spark_bar_data (`cnt` UInt64,`event_date` Date) ENGINE = MergeTree ORDER BY event_date SETTINGS index_granularity = 8192; + +INSERT INTO spark_bar_data VALUES(1,'2020-01-01'),(4,'2020-01-02'),(5,'2020-01-03'),(2,'2020-01-04'),(3,'2020-01-05'),(7,'2020-01-06'),(6,'2020-01-07'),(8,'2020-01-08'),(2,'2020-01-11'); + +SELECT sparkbar(9)(event_date,cnt) FROM spark_bar_data; + +SELECT sparkbar(9,toDate('2020-01-01'),toDate('2020-01-10'))(event_date,cnt) FROM spark_bar_data; +``` + +Result: + +``` text + +┌─sparkbar(9)(event_date, cnt)─┐ +│ ▁▅▄▃██▅ ▁ │ +└──────────────────────────────┘ + +┌─sparkbar(9, toDate('2020-01-01'), toDate('2020-01-10'))(event_date, cnt)─┐ +│ ▁▄▄▂▅▇█▁ │ +└──────────────────────────────────────────────────────────────────────────┘ +``` + From f386ce3495807734efc0c958bd58cdf3f744f941 Mon Sep 17 00:00:00 2001 From: Tatiana Kirillova Date: Sun, 5 Dec 2021 20:29:10 +0300 Subject: [PATCH 086/262] fix example --- .../aggregate-functions/reference/sparkbar.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/aggregate-functions/reference/sparkbar.md b/docs/en/sql-reference/aggregate-functions/reference/sparkbar.md index 9fcf752d36e..34052aac86f 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/sparkbar.md +++ b/docs/en/sql-reference/aggregate-functions/reference/sparkbar.md @@ -46,11 +46,15 @@ Result: ``` text ┌─sparkbar(9)(event_date, cnt)─┐ -│ ▁▅▄▃██▅ ▁ │ +│ │ +│ ▁▅▄▃██▅ ▁ │ +│ │ └──────────────────────────────┘ ┌─sparkbar(9, toDate('2020-01-01'), toDate('2020-01-10'))(event_date, cnt)─┐ -│ ▁▄▄▂▅▇█▁ │ +│ │ +│▁▄▄▂▅▇█▁ │ +│ │ └──────────────────────────────────────────────────────────────────────────┘ ``` From 715a25d4f484dbf178a181a29e1f61e5604b8a46 Mon Sep 17 00:00:00 2001 From: romanzhukov Date: Sun, 5 Dec 2021 20:12:42 +0300 Subject: [PATCH 087/262] Update query_log settings Update query_views_log.md Update query_views_log Minor fixes --- docs/en/operations/settings/settings.md | 11 ++- .../system-tables/query_views_log.md | 4 +- .../settings.md | 8 +- docs/ru/operations/settings/settings.md | 13 ++- docs/ru/operations/system-tables/query_log.md | 1 + .../system-tables/query_thread_log.md | 4 +- .../system-tables/query_views_log.md | 87 ++++++++++++++++++- .../functions/string-search-functions.md | 3 +- 8 files changed, 115 insertions(+), 16 deletions(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index bb1a807c766..c4c13a48560 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -992,9 +992,16 @@ log_queries_min_type='EXCEPTION_WHILE_PROCESSING' Setting up query threads logging. -Queries’ threads run by ClickHouse with this setup are logged according to the rules in the [query_thread_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) server configuration parameter. +Query threads log into `system.query_thread_log` table. This setting have effect only when [log_queries](#settings-log-queries) is true. Queries’ threads run by ClickHouse with this setup are logged according to the rules in the [query_thread_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) server configuration parameter. -Example: +Possible values: + +- 0 — Disabled. +- 1 — Enabled. + +Default value: `1`. + +**Example** ``` text log_query_threads=1 diff --git a/docs/en/operations/system-tables/query_views_log.md b/docs/en/operations/system-tables/query_views_log.md index 25d7f6522f5..6a6bbef45e2 100644 --- a/docs/en/operations/system-tables/query_views_log.md +++ b/docs/en/operations/system-tables/query_views_log.md @@ -4,8 +4,8 @@ Contains information about the dependent views executed when running a query, fo To start logging: -1. Configure parameters in the [query_views_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_views_log) section. -2. Set [log_query_views](../../operations/settings/settings.md#settings-log-query-views) to 1. +1. Configure parameters in the [query_views_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_views_log) section. +2. Set [log_query_views](../../operations/settings/settings.md#settings-log-query-views) to 1. The flushing period of data is set in `flush_interval_milliseconds` parameter of the [query_views_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_views_log) server settings section. To force flushing, use the [SYSTEM FLUSH LOGS](../../sql-reference/statements/system.md#query_language-system-flush_logs) query. diff --git a/docs/ru/operations/server-configuration-parameters/settings.md b/docs/ru/operations/server-configuration-parameters/settings.md index c194c70ebbc..a19380e36f4 100644 --- a/docs/ru/operations/server-configuration-parameters/settings.md +++ b/docs/ru/operations/server-configuration-parameters/settings.md @@ -999,14 +999,14 @@ ClickHouse проверяет условия для `min_part_size` и `min_part Настройки логирования информации о зависимых представлениях (materialized, live и т.п.) в запросах принятых с настройкой [log_query_views=1](../../operations/settings/settings.md#settings-log-query-views). -Запросы сохраняются в таблицу system.query_views_log. Вы можете изменить название этой таблицы в параметре `table` (см. ниже). +Запросы логируются в таблице [system.query_views_log](../../operations/system-tables/query_views_log.md#system_tables-query_views_log). Вы можете изменить название этой таблицы в параметре `table` (см. ниже). При настройке логирования используются следующие параметры: - `database` – имя базы данных. -- `table` – имя таблицы куда будут записываться использованные представления. -- `partition_by` — устанавливает [произвольный ключ партиционирования](../../engines/table-engines/mergetree-family/custom-partitioning-key.md). Нельзя использовать если используется `engine` -- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. +- `table` – имя системной таблицы, где будут логироваться запросы. +- `partition_by` — устанавливает [произвольный ключ партиционирования](../../engines/table-engines/mergetree-family/custom-partitioning-key.md). Нельзя использовать если указывается параметр `engine`. +- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если задан параметр `partition_by`. - `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. Если таблица не существует, то ClickHouse создаст её. Если структура журнала запросов изменилась при обновлении сервера ClickHouse, то таблица со старой структурой переименовывается, а новая таблица создается автоматически. diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 1cbdfc913b5..d6830a30321 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -912,11 +912,18 @@ log_queries_min_type='EXCEPTION_WHILE_PROCESSING' ## log_query_threads {#settings-log-query-threads} -Установка логирования информации о потоках выполнения запроса. +Управляет логированием информации о потоках выполнения запросов. -Лог информации о потоках выполнения запросов, переданных в ClickHouse с этой установкой, записывается согласно правилам конфигурационного параметра сервера [query_thread_log](../server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log). +Информация о потоках выполнения запросов сохраняется в системной таблице `system.query_thread_log`. Работает только в том случае, если включена настройка [log_queries](#settings-log-queries). Лог информации о потоках выполнения запросов, переданных в ClickHouse с этой установкой, записывается согласно правилам конфигурационного параметра сервера [query_thread_log](../server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log). -Пример: +Возможные значения: + +- 0 — отключено. +- 1 — включено. + +Значение по умолчанию: `1`. + +**Пример** ``` text log_query_threads=1 diff --git a/docs/ru/operations/system-tables/query_log.md b/docs/ru/operations/system-tables/query_log.md index 644cee853cc..aa4d01a4d47 100644 --- a/docs/ru/operations/system-tables/query_log.md +++ b/docs/ru/operations/system-tables/query_log.md @@ -55,6 +55,7 @@ ClickHouse не удаляет данные из таблица автомати - `query_kind` ([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md)) — тип запроса. - `databases` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — имена баз данных, присутствующих в запросе. - `tables` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — имена таблиц, присутствующих в запросе. +- `views` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — имена представлений (материализованные или live), которые представленны в запросе. - `columns` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — имена столбцов, присутствующих в запросе. - `projections` ([String](../../sql-reference/data-types/string.md)) — имена проекций, использованных при выполнении запроса. - `exception_code` ([Int32](../../sql-reference/data-types/int-uint.md)) — код исключения. diff --git a/docs/ru/operations/system-tables/query_thread_log.md b/docs/ru/operations/system-tables/query_thread_log.md index 00538c9c9ae..c23d2828520 100644 --- a/docs/ru/operations/system-tables/query_thread_log.md +++ b/docs/ru/operations/system-tables/query_thread_log.md @@ -112,5 +112,5 @@ ProfileEvents: {'Query':1,'SelectQuery':1,'ReadCompressedBytes':36,'Compr **Смотрите также** -- [system.query_log](../../operations/system-tables/query_log.md#system_tables-query_log) — описание системной таблицы `query_log`, которая содержит общую информацию о выполненных запросах. - +- [system.query_log](../../operations/system-tables/query_log.md#system_tables-query_log) — описание системной таблицы `query_log`, которая содержит общую информацию о выполненных запросах. +- [system.query_views_log](../../operations/system-tables/query_views_log.md#system_tables-query_views_log) — описание системной таблицы `query_views_log`, которая содержит информацию о всех представлениях, участвующих в выполненных запросах. diff --git a/docs/ru/operations/system-tables/query_views_log.md b/docs/ru/operations/system-tables/query_views_log.md index f606e4108ca..7b67d7b0b82 120000 --- a/docs/ru/operations/system-tables/query_views_log.md +++ b/docs/ru/operations/system-tables/query_views_log.md @@ -1 +1,86 @@ -../../../en/operations/system-tables/query_views_log.md \ No newline at end of file +# system.query_views_log {#system_tables-query_views_log} + +Содержит информацию о зависимых представлениях, выполняемых при выполнении запроса, например, тип представления или время выполнения. + +Чтобы начать ведение журнала: + +1. Настройте параметры в разделе [query_views_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_views_log). +2. Включите настройку [log_query_views=1](../../operations/settings/settings.md#settings-log-query-views). + + + +Период сброса данных из буфера в памяти задается в параметре `flush_interval_milliseconds` в разделе настроек сервера [query_views_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_views_log ). Для принудительного сброса используйте запрос [SYSTEM FLUSH LOGS](../../sql-reference/statements/system.md#query_language-system-flush_logs). + +ClickHouse не удаляет данные из таблицы автоматически. Подробнее смотрите раздел [Системные таблицы](../../operations/system-tables/index.md#system-tables-introduction). + +Вы можете включить настройку [log_queries_probability](../../operations/settings/settings.md#log-queries-probability), чтобы уменьшить количество запросов, регистрируемых в таблице `query_views_log`. + +Столбцы: + +- `event_date` ([Date](../../sql-reference/data-types/date.md)) — дата, когда произошло последнее событие с представлением. +- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — дата и время завершения выполнения представления. +- `event_time_microseconds` ([DateTime](../../sql-reference/data-types/datetime.md)) — дата и время завершения выполнения представления с точностью до микросекунд. +- `view_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — продолжительность выполнения представления (сумма его этапов) в миллисекундах. +- `initial_query_id` ([String](../../sql-reference/data-types/string.md)) — идентификатор начального запроса (при распределённом выполнении запроса). +- `view_name` ([String](../../sql-reference/data-types/string.md)) — имя представления. +- `view_uuid` ([UUID](../../sql-reference/data-types/uuid.md)) — UUID представления. +- `view_type` ([Enum8](../../sql-reference/data-types/enum.md)) — тип представления. Возможные значения: + - `'Default' = 1` — [обычные представления](../../sql-reference/statements/create/view.md#normal). Не должно появляться в этом журнале. + - `'Materialized' = 2` — [материализованные представления](../../sql-reference/statements/create/view.md#materialized). + - `'Live' = 3` — [live представления](../../sql-reference/statements/create/view.md#live-view). +- `view_query` ([String](../../sql-reference/data-types/string.md)) — запрос, выполняемый представлением. +- `view_target` ([String](../../sql-reference/data-types/string.md)) — имя целевой таблицы представления. +- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — количество прочитанных строк. +- `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — количество прочитанных байт. +- `written_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — количество записанных строк. +- `written_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — количество записанных байт. +- `peak_memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — максимальная разница между объемом выделенной и освобожденной памяти в контексте этого представления. +- `ProfileEvents` ([Map(String, UInt64)](../../sql-reference/data-types/array.md)) — События профиля, которые измеряют различные показатели. Их описание можно найти в таблице [system.events](../../operations/system-tables/events.md#system_tables-events). +- `status` ([Enum8](../../sql-reference/data-types/enum.md)) — статус представления. Возможные значения: + - `'QueryStart' = 1` — успешное начало выполнения представления. Не должно отображаться. + - `'QueryFinish' = 2` — успешное завершение выполнения представления. + - `'ExceptionBeforeStart' = 3` — исключение до начала выполнения представления. + - `'ExceptionWhileProcessing' = 4` — исключение во время выполнения представления. +- `exception_code` ([Int32](../../sql-reference/data-types/int-uint.md)) — код исключения. +- `exception` ([String](../../sql-reference/data-types/string.md)) — сообщение исключения. +- `stack_trace` ([String](../../sql-reference/data-types/string.md)) — [трассировка стека](https://ru.wikipedia.org/wiki/Трассировка_стека). Пустая строка, если запрос был успешно выполнен. + +**Пример** + +Запрос: + +``` sql +SELECT * FROM system.query_views_log LIMIT 1 \G; +``` + +Результат: + +``` text +Row 1: +────── +event_date: 2021-06-22 +event_time: 2021-06-22 13:23:07 +event_time_microseconds: 2021-06-22 13:23:07.738221 +view_duration_ms: 0 +initial_query_id: c3a1ac02-9cad-479b-af54-9e9c0a7afd70 +view_name: default.matview_inner +view_uuid: 00000000-0000-0000-0000-000000000000 +view_type: Materialized +view_query: SELECT * FROM default.table_b +view_target: default.`.inner.matview_inner` +read_rows: 4 +read_bytes: 64 +written_rows: 2 +written_bytes: 32 +peak_memory_usage: 4196188 +ProfileEvents: {'FileOpen':2,'WriteBufferFromFileDescriptorWrite':2,'WriteBufferFromFileDescriptorWriteBytes':187,'IOBufferAllocs':3,'IOBufferAllocBytes':3145773,'FunctionExecute':3,'DiskWriteElapsedMicroseconds':13,'InsertedRows':2,'InsertedBytes':16,'SelectedRows':4,'SelectedBytes':48,'ContextLock':16,'RWLockAcquiredReadLocks':1,'RealTimeMicroseconds':698,'SoftPageFaults':4,'OSReadChars':463} +status: QueryFinish +exception_code: 0 +exception: +stack_trace: +``` + +**См. также** + +- [system.query_log](../../operations/system-tables/query_log.md#system_tables-query_log) — описание системной таблицы `query_log`, которая содержит общую информацию о выполненных запросах. +- [system.query_thread_log](../../operations/system-tables/query_thread_log.md#system_tables-query_thread_log) — описание системной таблицы `query_thread_log`, которая содержит информацию о каждом потоке выполнения запроса. diff --git a/docs/ru/sql-reference/functions/string-search-functions.md b/docs/ru/sql-reference/functions/string-search-functions.md index 5b4a02b9e5b..f0c8f51225e 100644 --- a/docs/ru/sql-reference/functions/string-search-functions.md +++ b/docs/ru/sql-reference/functions/string-search-functions.md @@ -357,8 +357,7 @@ Result: ## multiFuzzyMatchAny(haystack, distance, \[pattern1, pattern2, …, patternn\]) {#multifuzzymatchanyhaystack-distance-pattern1-pattern2-patternn} -То же, что и `multiMatchAny`, но возвращает 1 если любой шаблон соответствует haystack в пределах константного [редакционного расстояния](https://en.wikipedia.org/wiki/Edit_distance). -Эта функция основана на экспериментальной библиотеке [hyperscan](https://intel.github.io/hyperscan/dev-reference/compilation.html#approximate-matching) и может быть медленной для некоторых частных случаев. Производительность зависит от значения редакционного расстояния и используемых шаблонов, но всегда медленнее по сравнению с нечеткими вариантами. +То же, что и `multiMatchAny`, но возвращает 1 если любой шаблон соответствует haystack в пределах константного [редакционного расстояния](https://en.wikipedia.org/wiki/Edit_distance). Эта функция основана на экспериментальной библиотеке [hyperscan](https://intel.github.io/hyperscan/dev-reference/compilation.html#approximate-matching) и может быть медленной для некоторых частных случаев. Производительность зависит от значения редакционного расстояния и используемых шаблонов, но всегда медленнее по сравнению с нечеткими вариантами. ## multiFuzzyMatchAnyIndex(haystack, distance, \[pattern1, pattern2, …, patternn\]) {#multifuzzymatchanyindexhaystack-distance-pattern1-pattern2-patternn} From 428e34d81131ad5eb123a156b7fe33f4478d3224 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 5 Dec 2021 11:34:10 +0300 Subject: [PATCH 088/262] Fix versioning of aggregate functions (fixes performance tests) --- src/Columns/ColumnAggregateFunction.cpp | 17 ++++++++++++----- src/DataTypes/DataTypeAggregateFunction.cpp | 2 +- .../01881_aggregate_functions_versioning.sql | 12 ++++++++++++ 3 files changed, 25 insertions(+), 6 deletions(-) diff --git a/src/Columns/ColumnAggregateFunction.cpp b/src/Columns/ColumnAggregateFunction.cpp index aa19aa2d8b0..0bd6b54a25a 100644 --- a/src/Columns/ColumnAggregateFunction.cpp +++ b/src/Columns/ColumnAggregateFunction.cpp @@ -28,13 +28,20 @@ namespace ErrorCodes } -static String getTypeString(const AggregateFunctionPtr & func) +static String getTypeString(const AggregateFunctionPtr & func, std::optional version = std::nullopt) { WriteBufferFromOwnString stream; - stream << "AggregateFunction(" << func->getName(); + + stream << "AggregateFunction("; + + /// If aggregate function does not support versioning its version is 0 and is not printed. + if (version && *version) + stream << *version << ", "; + + stream << func->getName(); + const auto & parameters = func->getParameters(); const auto & argument_types = func->getArgumentTypes(); - if (!parameters.empty()) { stream << '('; @@ -56,7 +63,7 @@ static String getTypeString(const AggregateFunctionPtr & func) ColumnAggregateFunction::ColumnAggregateFunction(const AggregateFunctionPtr & func_, std::optional version_) - : func(func_), type_string(getTypeString(func)), version(version_) + : func(func_), type_string(getTypeString(func, version_)), version(version_) { } @@ -403,7 +410,7 @@ void ColumnAggregateFunction::protect() MutableColumnPtr ColumnAggregateFunction::cloneEmpty() const { - return create(func); + return create(func, version); } Field ColumnAggregateFunction::operator[](size_t n) const diff --git a/src/DataTypes/DataTypeAggregateFunction.cpp b/src/DataTypes/DataTypeAggregateFunction.cpp index a5bf047f092..d572da1ecd0 100644 --- a/src/DataTypes/DataTypeAggregateFunction.cpp +++ b/src/DataTypes/DataTypeAggregateFunction.cpp @@ -85,7 +85,7 @@ String DataTypeAggregateFunction::getNameImpl(bool with_version) const MutableColumnPtr DataTypeAggregateFunction::createColumn() const { - return ColumnAggregateFunction::create(function, version); + return ColumnAggregateFunction::create(function, getVersion()); } diff --git a/tests/queries/0_stateless/01881_aggregate_functions_versioning.sql b/tests/queries/0_stateless/01881_aggregate_functions_versioning.sql index 2f6a60409d5..f5007f8ef9b 100644 --- a/tests/queries/0_stateless/01881_aggregate_functions_versioning.sql +++ b/tests/queries/0_stateless/01881_aggregate_functions_versioning.sql @@ -8,3 +8,15 @@ CREATE TABLE test_table ENGINE = AggregatingMergeTree() ORDER BY (col1, col2); SHOW CREATE TABLE test_table; + +-- regression from performance tests comparison script +DROP TABLE IF EXISTS test; +CREATE TABLE test +ENGINE = Null AS +WITH ( + SELECT arrayReduce('sumMapState', [(['foo'], arrayMap(x -> -0., ['foo']))]) + ) AS all_metrics +SELECT + (finalizeAggregation(arrayReduce('sumMapMergeState', [all_metrics])) AS metrics_tuple).1 AS metric_names, + metrics_tuple.2 AS metric_values +FROM system.one; From 2b1789adc5dc240ce4f85eb5af4cd05c3548af01 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 5 Dec 2021 20:25:14 +0300 Subject: [PATCH 089/262] Pass through version for aggregate function from SerializationAggregateFunction too Suggested-by: @kssenii --- src/Columns/ColumnAggregateFunction.cpp | 5 +++-- src/Columns/ColumnAggregateFunction.h | 2 +- .../Serializations/SerializationAggregateFunction.cpp | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/Columns/ColumnAggregateFunction.cpp b/src/Columns/ColumnAggregateFunction.cpp index 0bd6b54a25a..d675d166f5f 100644 --- a/src/Columns/ColumnAggregateFunction.cpp +++ b/src/Columns/ColumnAggregateFunction.cpp @@ -73,10 +73,11 @@ ColumnAggregateFunction::ColumnAggregateFunction(const AggregateFunctionPtr & fu } -void ColumnAggregateFunction::set(const AggregateFunctionPtr & func_) +void ColumnAggregateFunction::set(const AggregateFunctionPtr & func_, size_t version_) { func = func_; - type_string = getTypeString(func); + version = version_; + type_string = getTypeString(func, version); } diff --git a/src/Columns/ColumnAggregateFunction.h b/src/Columns/ColumnAggregateFunction.h index 548ad238f0d..b5efff928bb 100644 --- a/src/Columns/ColumnAggregateFunction.h +++ b/src/Columns/ColumnAggregateFunction.h @@ -103,7 +103,7 @@ private: public: ~ColumnAggregateFunction() override; - void set(const AggregateFunctionPtr & func_); + void set(const AggregateFunctionPtr & func_, size_t version_); AggregateFunctionPtr getAggregateFunction() { return func; } AggregateFunctionPtr getAggregateFunction() const { return func; } diff --git a/src/DataTypes/Serializations/SerializationAggregateFunction.cpp b/src/DataTypes/Serializations/SerializationAggregateFunction.cpp index 47b4bed9b31..442df47a773 100644 --- a/src/DataTypes/Serializations/SerializationAggregateFunction.cpp +++ b/src/DataTypes/Serializations/SerializationAggregateFunction.cpp @@ -79,7 +79,7 @@ void SerializationAggregateFunction::deserializeBinaryBulk(IColumn & column, Rea ColumnAggregateFunction::Container & vec = real_column.getData(); Arena & arena = real_column.createOrGetArena(); - real_column.set(function); + real_column.set(function, version); vec.reserve(vec.size() + limit); size_t size_of_state = function->sizeOfData(); From 9222e6ed9a2438d6f962d69dbfa6fcab370b447a Mon Sep 17 00:00:00 2001 From: romanzhukov Date: Sun, 5 Dec 2021 23:32:27 +0300 Subject: [PATCH 090/262] Update query_views_log.md Delete query_views_log.md Create query_views_log.md --- docs/ru/operations/system-tables/query_views_log.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) mode change 120000 => 100644 docs/ru/operations/system-tables/query_views_log.md diff --git a/docs/ru/operations/system-tables/query_views_log.md b/docs/ru/operations/system-tables/query_views_log.md deleted file mode 120000 index 7b67d7b0b82..00000000000 --- a/docs/ru/operations/system-tables/query_views_log.md +++ /dev/null @@ -1,86 +0,0 @@ -# system.query_views_log {#system_tables-query_views_log} - -Содержит информацию о зависимых представлениях, выполняемых при выполнении запроса, например, тип представления или время выполнения. - -Чтобы начать ведение журнала: - -1. Настройте параметры в разделе [query_views_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_views_log). -2. Включите настройку [log_query_views=1](../../operations/settings/settings.md#settings-log-query-views). - - - -Период сброса данных из буфера в памяти задается в параметре `flush_interval_milliseconds` в разделе настроек сервера [query_views_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_views_log ). Для принудительного сброса используйте запрос [SYSTEM FLUSH LOGS](../../sql-reference/statements/system.md#query_language-system-flush_logs). - -ClickHouse не удаляет данные из таблицы автоматически. Подробнее смотрите раздел [Системные таблицы](../../operations/system-tables/index.md#system-tables-introduction). - -Вы можете включить настройку [log_queries_probability](../../operations/settings/settings.md#log-queries-probability), чтобы уменьшить количество запросов, регистрируемых в таблице `query_views_log`. - -Столбцы: - -- `event_date` ([Date](../../sql-reference/data-types/date.md)) — дата, когда произошло последнее событие с представлением. -- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — дата и время завершения выполнения представления. -- `event_time_microseconds` ([DateTime](../../sql-reference/data-types/datetime.md)) — дата и время завершения выполнения представления с точностью до микросекунд. -- `view_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — продолжительность выполнения представления (сумма его этапов) в миллисекундах. -- `initial_query_id` ([String](../../sql-reference/data-types/string.md)) — идентификатор начального запроса (при распределённом выполнении запроса). -- `view_name` ([String](../../sql-reference/data-types/string.md)) — имя представления. -- `view_uuid` ([UUID](../../sql-reference/data-types/uuid.md)) — UUID представления. -- `view_type` ([Enum8](../../sql-reference/data-types/enum.md)) — тип представления. Возможные значения: - - `'Default' = 1` — [обычные представления](../../sql-reference/statements/create/view.md#normal). Не должно появляться в этом журнале. - - `'Materialized' = 2` — [материализованные представления](../../sql-reference/statements/create/view.md#materialized). - - `'Live' = 3` — [live представления](../../sql-reference/statements/create/view.md#live-view). -- `view_query` ([String](../../sql-reference/data-types/string.md)) — запрос, выполняемый представлением. -- `view_target` ([String](../../sql-reference/data-types/string.md)) — имя целевой таблицы представления. -- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — количество прочитанных строк. -- `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — количество прочитанных байт. -- `written_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — количество записанных строк. -- `written_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — количество записанных байт. -- `peak_memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — максимальная разница между объемом выделенной и освобожденной памяти в контексте этого представления. -- `ProfileEvents` ([Map(String, UInt64)](../../sql-reference/data-types/array.md)) — События профиля, которые измеряют различные показатели. Их описание можно найти в таблице [system.events](../../operations/system-tables/events.md#system_tables-events). -- `status` ([Enum8](../../sql-reference/data-types/enum.md)) — статус представления. Возможные значения: - - `'QueryStart' = 1` — успешное начало выполнения представления. Не должно отображаться. - - `'QueryFinish' = 2` — успешное завершение выполнения представления. - - `'ExceptionBeforeStart' = 3` — исключение до начала выполнения представления. - - `'ExceptionWhileProcessing' = 4` — исключение во время выполнения представления. -- `exception_code` ([Int32](../../sql-reference/data-types/int-uint.md)) — код исключения. -- `exception` ([String](../../sql-reference/data-types/string.md)) — сообщение исключения. -- `stack_trace` ([String](../../sql-reference/data-types/string.md)) — [трассировка стека](https://ru.wikipedia.org/wiki/Трассировка_стека). Пустая строка, если запрос был успешно выполнен. - -**Пример** - -Запрос: - -``` sql -SELECT * FROM system.query_views_log LIMIT 1 \G; -``` - -Результат: - -``` text -Row 1: -────── -event_date: 2021-06-22 -event_time: 2021-06-22 13:23:07 -event_time_microseconds: 2021-06-22 13:23:07.738221 -view_duration_ms: 0 -initial_query_id: c3a1ac02-9cad-479b-af54-9e9c0a7afd70 -view_name: default.matview_inner -view_uuid: 00000000-0000-0000-0000-000000000000 -view_type: Materialized -view_query: SELECT * FROM default.table_b -view_target: default.`.inner.matview_inner` -read_rows: 4 -read_bytes: 64 -written_rows: 2 -written_bytes: 32 -peak_memory_usage: 4196188 -ProfileEvents: {'FileOpen':2,'WriteBufferFromFileDescriptorWrite':2,'WriteBufferFromFileDescriptorWriteBytes':187,'IOBufferAllocs':3,'IOBufferAllocBytes':3145773,'FunctionExecute':3,'DiskWriteElapsedMicroseconds':13,'InsertedRows':2,'InsertedBytes':16,'SelectedRows':4,'SelectedBytes':48,'ContextLock':16,'RWLockAcquiredReadLocks':1,'RealTimeMicroseconds':698,'SoftPageFaults':4,'OSReadChars':463} -status: QueryFinish -exception_code: 0 -exception: -stack_trace: -``` - -**См. также** - -- [system.query_log](../../operations/system-tables/query_log.md#system_tables-query_log) — описание системной таблицы `query_log`, которая содержит общую информацию о выполненных запросах. -- [system.query_thread_log](../../operations/system-tables/query_thread_log.md#system_tables-query_thread_log) — описание системной таблицы `query_thread_log`, которая содержит информацию о каждом потоке выполнения запроса. diff --git a/docs/ru/operations/system-tables/query_views_log.md b/docs/ru/operations/system-tables/query_views_log.md new file mode 100644 index 00000000000..9715d739af1 --- /dev/null +++ b/docs/ru/operations/system-tables/query_views_log.md @@ -0,0 +1,84 @@ +# system.query_views_log {#system_tables-query_views_log} + +Содержит информацию о зависимых представлениях, выполняемых при выполнении запроса, например, тип представления или время выполнения. + +Чтобы начать ведение журнала: + +1. Настройте параметры в разделе [query_views_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_views_log). +2. Включите настройку [log_query_views=1](../../operations/settings/settings.md#settings-log-query-views). + +Период сброса данных из буфера в памяти задается в параметре `flush_interval_milliseconds` в разделе настроек сервера [query_views_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_views_log ). Для принудительного сброса используйте запрос [SYSTEM FLUSH LOGS](../../sql-reference/statements/system.md#query_language-system-flush_logs). + +ClickHouse не удаляет данные из таблицы автоматически. Подробнее смотрите раздел [Системные таблицы](../../operations/system-tables/index.md#system-tables-introduction). + +Вы можете включить настройку [log_queries_probability](../../operations/settings/settings.md#log-queries-probability), чтобы уменьшить количество запросов, регистрируемых в таблице `query_views_log`. + +Столбцы: + +- `event_date` ([Date](../../sql-reference/data-types/date.md)) — дата, когда произошло последнее событие с представлением. +- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — дата и время завершения выполнения представления. +- `event_time_microseconds` ([DateTime](../../sql-reference/data-types/datetime.md)) — дата и время завершения выполнения представления с точностью до микросекунд. +- `view_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — продолжительность выполнения представления (сумма его этапов) в миллисекундах. +- `initial_query_id` ([String](../../sql-reference/data-types/string.md)) — идентификатор начального запроса (при распределённом выполнении запроса). +- `view_name` ([String](../../sql-reference/data-types/string.md)) — имя представления. +- `view_uuid` ([UUID](../../sql-reference/data-types/uuid.md)) — UUID представления. +- `view_type` ([Enum8](../../sql-reference/data-types/enum.md)) — тип представления. Возможные значения: + - `'Default' = 1` — [обычные представления](../../sql-reference/statements/create/view.md#normal). Не должно появляться в этом журнале. + - `'Materialized' = 2` — [материализованные представления](../../sql-reference/statements/create/view.md#materialized). + - `'Live' = 3` — [live представления](../../sql-reference/statements/create/view.md#live-view). +- `view_query` ([String](../../sql-reference/data-types/string.md)) — запрос, выполняемый представлением. +- `view_target` ([String](../../sql-reference/data-types/string.md)) — имя целевой таблицы представления. +- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — количество прочитанных строк. +- `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — количество прочитанных байт. +- `written_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — количество записанных строк. +- `written_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — количество записанных байт. +- `peak_memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — максимальная разница между объемом выделенной и освобожденной памяти в контексте этого представления. +- `ProfileEvents` ([Map(String, UInt64)](../../sql-reference/data-types/array.md)) — События профиля, которые измеряют различные показатели. Их описание можно найти в таблице [system.events](../../operations/system-tables/events.md#system_tables-events). +- `status` ([Enum8](../../sql-reference/data-types/enum.md)) — статус представления. Возможные значения: + - `'QueryStart' = 1` — успешное начало выполнения представления. Не должно отображаться. + - `'QueryFinish' = 2` — успешное завершение выполнения представления. + - `'ExceptionBeforeStart' = 3` — исключение до начала выполнения представления. + - `'ExceptionWhileProcessing' = 4` — исключение во время выполнения представления. +- `exception_code` ([Int32](../../sql-reference/data-types/int-uint.md)) — код исключения. +- `exception` ([String](../../sql-reference/data-types/string.md)) — сообщение исключения. +- `stack_trace` ([String](../../sql-reference/data-types/string.md)) — [трассировка стека](https://ru.wikipedia.org/wiki/Трассировка_стека). Пустая строка, если запрос был успешно выполнен. + +**Пример** + +Запрос: + +``` sql +SELECT * FROM system.query_views_log LIMIT 1 \G; +``` + +Результат: + +``` text +Row 1: +────── +event_date: 2021-06-22 +event_time: 2021-06-22 13:23:07 +event_time_microseconds: 2021-06-22 13:23:07.738221 +view_duration_ms: 0 +initial_query_id: c3a1ac02-9cad-479b-af54-9e9c0a7afd70 +view_name: default.matview_inner +view_uuid: 00000000-0000-0000-0000-000000000000 +view_type: Materialized +view_query: SELECT * FROM default.table_b +view_target: default.`.inner.matview_inner` +read_rows: 4 +read_bytes: 64 +written_rows: 2 +written_bytes: 32 +peak_memory_usage: 4196188 +ProfileEvents: {'FileOpen':2,'WriteBufferFromFileDescriptorWrite':2,'WriteBufferFromFileDescriptorWriteBytes':187,'IOBufferAllocs':3,'IOBufferAllocBytes':3145773,'FunctionExecute':3,'DiskWriteElapsedMicroseconds':13,'InsertedRows':2,'InsertedBytes':16,'SelectedRows':4,'SelectedBytes':48,'ContextLock':16,'RWLockAcquiredReadLocks':1,'RealTimeMicroseconds':698,'SoftPageFaults':4,'OSReadChars':463} +status: QueryFinish +exception_code: 0 +exception: +stack_trace: +``` + +**См. также** + +- [system.query_log](../../operations/system-tables/query_log.md#system_tables-query_log) — описание системной таблицы `query_log`, которая содержит общую информацию о выполненных запросах. +- [system.query_thread_log](../../operations/system-tables/query_thread_log.md#system_tables-query_thread_log) — описание системной таблицы `query_thread_log`, которая содержит информацию о каждом потоке выполнения запроса. From 9d1e63be0fe0233e49982d1b3bfd59afa2a92f71 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Mon, 6 Dec 2021 11:54:45 +0800 Subject: [PATCH 091/262] 1. support default expression for storage hdfs; 2. optimize storage hdfs when format is column oriented --- src/Functions/FunctionMapMapped.h | 240 ++++++++++++++++++++ src/Storages/HDFS/StorageHDFS.cpp | 126 +++++++--- src/Storages/HDFS/StorageHDFS.h | 7 + tests/integration/test_storage_hdfs/test.py | 12 + 4 files changed, 349 insertions(+), 36 deletions(-) create mode 100644 src/Functions/FunctionMapMapped.h diff --git a/src/Functions/FunctionMapMapped.h b/src/Functions/FunctionMapMapped.h new file mode 100644 index 00000000000..ff746b58f71 --- /dev/null +++ b/src/Functions/FunctionMapMapped.h @@ -0,0 +1,240 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_COLUMN; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int SIZES_OF_ARRAYS_DOESNT_MATCH; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + + +// TODO 只要map没有实现去重,那么这个函数就不能用了。先等着吧 +/** Higher-order functions for maps. + * These functions optionally apply a map (transform) to map(or multiple map of identical size) by lambda function, + * and return some result based on that transformation. + * + * Examples: + * arrayMap(x1,...,xn -> expression, array1,...,arrayn) - apply the expression to each element of the array (or set of parallel arrays). + * arrayFilter(x -> predicate, array) - leave in the array only the elements for which the expression is true. + * + * For some functions arrayCount, arrayExists, arrayAll, an overload of the form f(array) is available, + * which works in the same way as f(x -> x, array). + * + * See the example of Impl template parameter in arrayMap.cpp + */ +template +class FunctionArrayMapped : public IFunction +{ +public: + static constexpr auto name = Name::name; + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + String getName() const override + { + return name; + } + + bool isVariadic() const override { return true; } + size_t getNumberOfArguments() const override { return 0; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + /// Called if at least one function argument is a lambda expression. + /// For argument-lambda expressions, it defines the types of arguments of these expressions. + void getLambdaArgumentTypes(DataTypes & arguments) const override + { + if (arguments.empty()) + throw Exception("Function " + getName() + " needs at least one argument; passed " + + toString(arguments.size()) + ".", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + if (arguments.size() == 1) + throw Exception("Function " + getName() + " needs at least one array argument.", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + DataTypes nested_types(arguments.size() - 1); + for (size_t i = 0; i < nested_types.size(); ++i) + { + const DataTypeArray * array_type = checkAndGetDataType(&*arguments[i + 1]); + if (!array_type) + throw Exception("Argument " + toString(i + 2) + " of function " + getName() + " must be array. Found " + + arguments[i + 1]->getName() + " instead.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + nested_types[i] = recursiveRemoveLowCardinality(array_type->getNestedType()); + } + + const DataTypeFunction * function_type = checkAndGetDataType(arguments[0].get()); + if (!function_type || function_type->getArgumentTypes().size() != nested_types.size()) + throw Exception("First argument for this overload of " + getName() + " must be a function with " + + toString(nested_types.size()) + " arguments. Found " + + arguments[0]->getName() + " instead.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + arguments[0] = std::make_shared(nested_types); + } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + size_t min_args = Impl::needExpression() ? 2 : 1; + if (arguments.size() < min_args) + throw Exception("Function " + getName() + " needs at least " + + toString(min_args) + " argument; passed " + + toString(arguments.size()) + ".", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + if (arguments.size() == 1) + { + const auto * array_type = checkAndGetDataType(arguments[0].type.get()); + + if (!array_type) + throw Exception("The only argument for function " + getName() + " must be array. Found " + + arguments[0].type->getName() + " instead.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + DataTypePtr nested_type = array_type->getNestedType(); + + if (Impl::needBoolean() && !WhichDataType(nested_type).isUInt8()) + throw Exception("The only argument for function " + getName() + " must be array of UInt8. Found " + + arguments[0].type->getName() + " instead.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + return Impl::getReturnType(nested_type, nested_type); + } + else + { + if (arguments.size() > 2 && Impl::needOneArray()) + throw Exception("Function " + getName() + " needs one array argument.", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + const auto * data_type_function = checkAndGetDataType(arguments[0].type.get()); + + if (!data_type_function) + throw Exception("First argument for function " + getName() + " must be a function.", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + /// The types of the remaining arguments are already checked in getLambdaArgumentTypes. + + DataTypePtr return_type = removeLowCardinality(data_type_function->getReturnType()); + if (Impl::needBoolean() && !WhichDataType(return_type).isUInt8()) + throw Exception("Expression for function " + getName() + " must return UInt8, found " + + return_type->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + const auto * first_array_type = checkAndGetDataType(arguments[1].type.get()); + + return Impl::getReturnType(return_type, first_array_type->getNestedType()); + } + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override + { + if (arguments.size() == 1) + { + ColumnPtr column_array_ptr = arguments[0].column; + const auto * column_array = checkAndGetColumn(column_array_ptr.get()); + + if (!column_array) + { + const ColumnConst * column_const_array = checkAndGetColumnConst(column_array_ptr.get()); + if (!column_const_array) + throw Exception("Expected array column, found " + column_array_ptr->getName(), ErrorCodes::ILLEGAL_COLUMN); + column_array_ptr = column_const_array->convertToFullColumn(); + column_array = assert_cast(column_array_ptr.get()); + } + + return Impl::execute(*column_array, column_array->getDataPtr()); + } + else + { + const auto & column_with_type_and_name = arguments[0]; + + if (!column_with_type_and_name.column) + throw Exception("First argument for function " + getName() + " must be a function.", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + const auto * column_function = typeid_cast(column_with_type_and_name.column.get()); + + if (!column_function) + throw Exception("First argument for function " + getName() + " must be a function.", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + ColumnPtr offsets_column; + + ColumnPtr column_first_array_ptr; + const ColumnArray * column_first_array = nullptr; + + ColumnsWithTypeAndName arrays; + arrays.reserve(arguments.size() - 1); + + for (size_t i = 1; i < arguments.size(); ++i) + { + const auto & array_with_type_and_name = arguments[i]; + + ColumnPtr column_array_ptr = array_with_type_and_name.column; + const auto * column_array = checkAndGetColumn(column_array_ptr.get()); + + const DataTypePtr & array_type_ptr = array_with_type_and_name.type; + const auto * array_type = checkAndGetDataType(array_type_ptr.get()); + + if (!column_array) + { + const ColumnConst * column_const_array = checkAndGetColumnConst(column_array_ptr.get()); + if (!column_const_array) + throw Exception("Expected array column, found " + column_array_ptr->getName(), ErrorCodes::ILLEGAL_COLUMN); + column_array_ptr = recursiveRemoveLowCardinality(column_const_array->convertToFullColumn()); + column_array = checkAndGetColumn(column_array_ptr.get()); + } + + if (!array_type) + throw Exception("Expected array type, found " + array_type_ptr->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + if (!offsets_column) + { + offsets_column = column_array->getOffsetsPtr(); + } + else + { + /// The first condition is optimization: do not compare data if the pointers are equal. + if (column_array->getOffsetsPtr() != offsets_column + && column_array->getOffsets() != typeid_cast(*offsets_column).getData()) + throw Exception("Arrays passed to " + getName() + " must have equal size", ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH); + } + + if (i == 1) + { + column_first_array_ptr = column_array_ptr; + column_first_array = column_array; + } + + arrays.emplace_back(ColumnWithTypeAndName(column_array->getDataPtr(), + recursiveRemoveLowCardinality(array_type->getNestedType()), + array_with_type_and_name.name)); + } + + /// Put all the necessary columns multiplied by the sizes of arrays into the columns. + auto replicated_column_function_ptr = IColumn::mutate(column_function->replicate(column_first_array->getOffsets())); + auto * replicated_column_function = typeid_cast(replicated_column_function_ptr.get()); + replicated_column_function->appendArguments(arrays); + + auto lambda_result = replicated_column_function->reduce().column; + if (lambda_result->lowCardinality()) + lambda_result = lambda_result->convertToFullColumnIfLowCardinality(); + + return Impl::execute(*column_first_array, lambda_result); + } + } +}; + +} diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index 90e63aef46d..cc974ccdce9 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -63,7 +64,11 @@ StorageHDFS::StorageHDFS( ContextPtr context_, const String & compression_method_ = "", ASTPtr partition_by_) - : IStorage(table_id_), WithContext(context_), uri(uri_), format_name(format_name_), compression_method(compression_method_) + : IStorage(table_id_) + , WithContext(context_) + , uri(uri_) + , format_name(format_name_) + , compression_method(compression_method_) , partition_by(partition_by_) { context_->getRemoteHostFilter().checkURL(Poco::URI(uri)); @@ -76,8 +81,7 @@ StorageHDFS::StorageHDFS( setInMemoryMetadata(storage_metadata); } -namespace -{ +using StorageHDFSPtr = std::shared_ptr; class HDFSSource : public SourceWithProgress, WithContext { @@ -93,8 +97,12 @@ public: using SourcesInfoPtr = std::shared_ptr; - static Block getHeader(Block header, bool need_path_column, bool need_file_column) + static Block getHeader(const StorageMetadataPtr & metadata_snapshot, bool need_path_column, bool need_file_column) { + auto header = metadata_snapshot->getSampleBlock(); + + /// Note: AddingDefaultsBlockInputStream doesn't change header. + if (need_path_column) header.insert({DataTypeString().createColumn(), std::make_shared(), "_path"}); if (need_file_column) @@ -103,22 +111,35 @@ public: return header; } + static Block getBlockForSource( + const StorageHDFSPtr & storage, + const StorageMetadataPtr & metadata_snapshot, + const ColumnsDescription & columns_description, + const SourcesInfoPtr & files_info) + { + if (storage->isColumnOriented()) + return metadata_snapshot->getSampleBlockForColumns( + columns_description.getNamesOfPhysical(), storage->getVirtuals(), storage->getStorageID()); + else + return getHeader(metadata_snapshot, files_info->need_path_column, files_info->need_file_column); + } + HDFSSource( - SourcesInfoPtr source_info_, - String uri_, - String format_, - String compression_method_, - Block sample_block_, + StorageHDFSPtr storage_, + const StorageMetadataPtr & metadata_snapshot_, ContextPtr context_, - UInt64 max_block_size_) - : SourceWithProgress(getHeader(sample_block_, source_info_->need_path_column, source_info_->need_file_column)) + UInt64 max_block_size_, + SourcesInfoPtr source_info_, + String uri_without_path_, + ColumnsDescription columns_description_) + : SourceWithProgress(getBlockForSource(storage_, metadata_snapshot_, columns_description_, source_info_)) , WithContext(context_) + , storage(std::move(storage_)) + , metadata_snapshot(metadata_snapshot_) , source_info(std::move(source_info_)) - , uri(std::move(uri_)) - , format(std::move(format_)) - , compression_method(compression_method_) + , uri_without_path(std::move(uri_without_path_)) , max_block_size(max_block_size_) - , sample_block(std::move(sample_block_)) + , columns_description(std::move(columns_description_)) { } @@ -138,14 +159,30 @@ public: return {}; auto path = source_info->uris[pos]; - current_path = uri + path; + current_path = uri_without_path + path; - auto compression = chooseCompressionMethod(path, compression_method); - read_buf = wrapReadBufferWithCompressionMethod(std::make_unique(uri, path, getContext()->getGlobalContext()->getConfigRef()), compression); - auto input_format = getContext()->getInputFormat(format, *read_buf, sample_block, max_block_size); - pipeline = QueryPipeline(std::move(input_format)); + auto compression = chooseCompressionMethod(path, storage->compression_method); + read_buf = wrapReadBufferWithCompressionMethod(std::make_unique(uri_without_path, path, getContext()->getGlobalContext()->getConfigRef()), compression); - reader = std::make_unique(pipeline); + auto get_block_for_format = [&]() -> Block + { + if (storage->isColumnOriented()) + return metadata_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); + return metadata_snapshot->getSampleBlock(); + }; + auto input_format = getContext()->getInputFormat(storage->format_name, *read_buf, get_block_for_format(), max_block_size); + + QueryPipelineBuilder builder; + builder.init(Pipe(input_format)); + if (columns_description.hasDefaults()) + { + builder.addSimpleTransform([&](const Block & header) + { + return std::make_shared(header, columns_description, *input_format, getContext()); + }); + } + pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); + reader = std::make_unique(*pipeline); } Block res; @@ -180,17 +217,17 @@ public: } private: - std::unique_ptr read_buf; - QueryPipeline pipeline; - std::unique_ptr reader; + StorageHDFSPtr storage; + StorageMetadataPtr metadata_snapshot; SourcesInfoPtr source_info; - String uri; - String format; - String compression_method; - String current_path; - + String uri_without_path; UInt64 max_block_size; - Block sample_block; + ColumnsDescription columns_description; + + std::unique_ptr read_buf; + std::unique_ptr pipeline; + std::unique_ptr reader; + String current_path; }; class HDFSSink : public SinkToStorage @@ -235,7 +272,6 @@ private: OutputFormatPtr writer; }; - class PartitionedHDFSSink : public PartitionedSink { public: @@ -314,13 +350,14 @@ Strings LSWithRegexpMatching(const String & path_for_ls, const HDFSFSPtr & fs, c } } } - return result; } +bool StorageHDFS::isColumnOriented() const +{ + return format_name != "Distributed" && FormatFactory::instance().checkIfFormatIsColumnOriented(format_name); } - Pipe StorageHDFS::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, @@ -355,11 +392,27 @@ Pipe StorageHDFS::read( num_streams = sources_info->uris.size(); Pipes pipes; - + auto this_ptr = std::static_pointer_cast(shared_from_this()); for (size_t i = 0; i < num_streams; ++i) - pipes.emplace_back(std::make_shared( - sources_info, uri_without_path, format_name, compression_method, metadata_snapshot->getSampleBlock(), context_, max_block_size)); + { + const auto get_columns_for_format = [&]() -> ColumnsDescription + { + if (isColumnOriented()) + return ColumnsDescription{ + metadata_snapshot->getSampleBlockForColumns(column_names, getVirtuals(), getStorageID()).getNamesAndTypesList()}; + else + return metadata_snapshot->getColumns(); + }; + pipes.emplace_back(std::make_shared( + this_ptr, + metadata_snapshot, + context_, + max_block_size, + sources_info, + uri_without_path, + get_columns_for_format())); + } return Pipe::unitePipes(std::move(pipes)); } @@ -450,6 +503,7 @@ NamesAndTypesList StorageHDFS::getVirtuals() const {"_file", std::make_shared()} }; } + } #endif diff --git a/src/Storages/HDFS/StorageHDFS.h b/src/Storages/HDFS/StorageHDFS.h index 945f0b9f0f1..db6b078265d 100644 --- a/src/Storages/HDFS/StorageHDFS.h +++ b/src/Storages/HDFS/StorageHDFS.h @@ -38,7 +38,14 @@ public: bool supportsPartitionBy() const override { return true; } + /// Check if the format is column-oriented. + /// Is is useful because column oriented formats could effectively skip unknown columns + /// So we can create a header of only required columns in read method and ask + /// format to read only them. Note: this hack cannot be done with ordinary formats like TSV. + bool isColumnOriented() const; + protected: + friend class HDFSSource; StorageHDFS( const String & uri_, const StorageID & table_id_, diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py index fda4d89805a..ede1dafefb1 100644 --- a/tests/integration/test_storage_hdfs/test.py +++ b/tests/integration/test_storage_hdfs/test.py @@ -310,6 +310,18 @@ def test_seekable_formats(started_cluster): result = node1.query(f"SELECT count() FROM {table_function}") assert(int(result) == 5000000) +def test_read_table_with_default(started_cluster): + hdfs_api = started_cluster.hdfs_api + + data = "n\n100\n" + hdfs_api.write_data("/simple_table_function", data) + assert hdfs_api.read_data("/simple_table_function") == data + + output = "n\tm\n100\t200\n" + assert node1.query( + "select * from hdfs('hdfs://hdfs1:9000/simple_table_function', 'TSVWithNames', 'n UInt32, m UInt32 DEFAULT n * 2') FORMAT TSVWithNames") == output + + if __name__ == '__main__': cluster.start() From a98f740bf21440a5f521add95867902d11ccbc99 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Mon, 6 Dec 2021 12:01:39 +0800 Subject: [PATCH 092/262] remove unused file --- src/Functions/FunctionMapMapped.h | 240 ------------------------------ 1 file changed, 240 deletions(-) delete mode 100644 src/Functions/FunctionMapMapped.h diff --git a/src/Functions/FunctionMapMapped.h b/src/Functions/FunctionMapMapped.h deleted file mode 100644 index ff746b58f71..00000000000 --- a/src/Functions/FunctionMapMapped.h +++ /dev/null @@ -1,240 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int ILLEGAL_COLUMN; - extern const int ILLEGAL_TYPE_OF_ARGUMENT; - extern const int SIZES_OF_ARRAYS_DOESNT_MATCH; - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; -} - - -// TODO 只要map没有实现去重,那么这个函数就不能用了。先等着吧 -/** Higher-order functions for maps. - * These functions optionally apply a map (transform) to map(or multiple map of identical size) by lambda function, - * and return some result based on that transformation. - * - * Examples: - * arrayMap(x1,...,xn -> expression, array1,...,arrayn) - apply the expression to each element of the array (or set of parallel arrays). - * arrayFilter(x -> predicate, array) - leave in the array only the elements for which the expression is true. - * - * For some functions arrayCount, arrayExists, arrayAll, an overload of the form f(array) is available, - * which works in the same way as f(x -> x, array). - * - * See the example of Impl template parameter in arrayMap.cpp - */ -template -class FunctionArrayMapped : public IFunction -{ -public: - static constexpr auto name = Name::name; - static FunctionPtr create(ContextPtr) { return std::make_shared(); } - - String getName() const override - { - return name; - } - - bool isVariadic() const override { return true; } - size_t getNumberOfArguments() const override { return 0; } - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } - - /// Called if at least one function argument is a lambda expression. - /// For argument-lambda expressions, it defines the types of arguments of these expressions. - void getLambdaArgumentTypes(DataTypes & arguments) const override - { - if (arguments.empty()) - throw Exception("Function " + getName() + " needs at least one argument; passed " - + toString(arguments.size()) + ".", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - - if (arguments.size() == 1) - throw Exception("Function " + getName() + " needs at least one array argument.", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - - DataTypes nested_types(arguments.size() - 1); - for (size_t i = 0; i < nested_types.size(); ++i) - { - const DataTypeArray * array_type = checkAndGetDataType(&*arguments[i + 1]); - if (!array_type) - throw Exception("Argument " + toString(i + 2) + " of function " + getName() + " must be array. Found " - + arguments[i + 1]->getName() + " instead.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - nested_types[i] = recursiveRemoveLowCardinality(array_type->getNestedType()); - } - - const DataTypeFunction * function_type = checkAndGetDataType(arguments[0].get()); - if (!function_type || function_type->getArgumentTypes().size() != nested_types.size()) - throw Exception("First argument for this overload of " + getName() + " must be a function with " - + toString(nested_types.size()) + " arguments. Found " - + arguments[0]->getName() + " instead.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - arguments[0] = std::make_shared(nested_types); - } - - DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override - { - size_t min_args = Impl::needExpression() ? 2 : 1; - if (arguments.size() < min_args) - throw Exception("Function " + getName() + " needs at least " - + toString(min_args) + " argument; passed " - + toString(arguments.size()) + ".", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - - if (arguments.size() == 1) - { - const auto * array_type = checkAndGetDataType(arguments[0].type.get()); - - if (!array_type) - throw Exception("The only argument for function " + getName() + " must be array. Found " - + arguments[0].type->getName() + " instead.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - DataTypePtr nested_type = array_type->getNestedType(); - - if (Impl::needBoolean() && !WhichDataType(nested_type).isUInt8()) - throw Exception("The only argument for function " + getName() + " must be array of UInt8. Found " - + arguments[0].type->getName() + " instead.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - return Impl::getReturnType(nested_type, nested_type); - } - else - { - if (arguments.size() > 2 && Impl::needOneArray()) - throw Exception("Function " + getName() + " needs one array argument.", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - - const auto * data_type_function = checkAndGetDataType(arguments[0].type.get()); - - if (!data_type_function) - throw Exception("First argument for function " + getName() + " must be a function.", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - /// The types of the remaining arguments are already checked in getLambdaArgumentTypes. - - DataTypePtr return_type = removeLowCardinality(data_type_function->getReturnType()); - if (Impl::needBoolean() && !WhichDataType(return_type).isUInt8()) - throw Exception("Expression for function " + getName() + " must return UInt8, found " - + return_type->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - const auto * first_array_type = checkAndGetDataType(arguments[1].type.get()); - - return Impl::getReturnType(return_type, first_array_type->getNestedType()); - } - } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override - { - if (arguments.size() == 1) - { - ColumnPtr column_array_ptr = arguments[0].column; - const auto * column_array = checkAndGetColumn(column_array_ptr.get()); - - if (!column_array) - { - const ColumnConst * column_const_array = checkAndGetColumnConst(column_array_ptr.get()); - if (!column_const_array) - throw Exception("Expected array column, found " + column_array_ptr->getName(), ErrorCodes::ILLEGAL_COLUMN); - column_array_ptr = column_const_array->convertToFullColumn(); - column_array = assert_cast(column_array_ptr.get()); - } - - return Impl::execute(*column_array, column_array->getDataPtr()); - } - else - { - const auto & column_with_type_and_name = arguments[0]; - - if (!column_with_type_and_name.column) - throw Exception("First argument for function " + getName() + " must be a function.", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - const auto * column_function = typeid_cast(column_with_type_and_name.column.get()); - - if (!column_function) - throw Exception("First argument for function " + getName() + " must be a function.", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - ColumnPtr offsets_column; - - ColumnPtr column_first_array_ptr; - const ColumnArray * column_first_array = nullptr; - - ColumnsWithTypeAndName arrays; - arrays.reserve(arguments.size() - 1); - - for (size_t i = 1; i < arguments.size(); ++i) - { - const auto & array_with_type_and_name = arguments[i]; - - ColumnPtr column_array_ptr = array_with_type_and_name.column; - const auto * column_array = checkAndGetColumn(column_array_ptr.get()); - - const DataTypePtr & array_type_ptr = array_with_type_and_name.type; - const auto * array_type = checkAndGetDataType(array_type_ptr.get()); - - if (!column_array) - { - const ColumnConst * column_const_array = checkAndGetColumnConst(column_array_ptr.get()); - if (!column_const_array) - throw Exception("Expected array column, found " + column_array_ptr->getName(), ErrorCodes::ILLEGAL_COLUMN); - column_array_ptr = recursiveRemoveLowCardinality(column_const_array->convertToFullColumn()); - column_array = checkAndGetColumn(column_array_ptr.get()); - } - - if (!array_type) - throw Exception("Expected array type, found " + array_type_ptr->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - if (!offsets_column) - { - offsets_column = column_array->getOffsetsPtr(); - } - else - { - /// The first condition is optimization: do not compare data if the pointers are equal. - if (column_array->getOffsetsPtr() != offsets_column - && column_array->getOffsets() != typeid_cast(*offsets_column).getData()) - throw Exception("Arrays passed to " + getName() + " must have equal size", ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH); - } - - if (i == 1) - { - column_first_array_ptr = column_array_ptr; - column_first_array = column_array; - } - - arrays.emplace_back(ColumnWithTypeAndName(column_array->getDataPtr(), - recursiveRemoveLowCardinality(array_type->getNestedType()), - array_with_type_and_name.name)); - } - - /// Put all the necessary columns multiplied by the sizes of arrays into the columns. - auto replicated_column_function_ptr = IColumn::mutate(column_function->replicate(column_first_array->getOffsets())); - auto * replicated_column_function = typeid_cast(replicated_column_function_ptr.get()); - replicated_column_function->appendArguments(arrays); - - auto lambda_result = replicated_column_function->reduce().column; - if (lambda_result->lowCardinality()) - lambda_result = lambda_result->convertToFullColumnIfLowCardinality(); - - return Impl::execute(*column_first_array, lambda_result); - } - } -}; - -} From 85169c03baa5388e1dd8604db2501d56cd2a9702 Mon Sep 17 00:00:00 2001 From: Alexey Date: Mon, 6 Dec 2021 04:28:40 +0000 Subject: [PATCH 093/262] example marked with ```bash not ```text --- docs/en/interfaces/grpc.md | 2 +- docs/ru/interfaces/grpc.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/interfaces/grpc.md b/docs/en/interfaces/grpc.md index 21c899ee2fe..b30715082ec 100644 --- a/docs/en/interfaces/grpc.md +++ b/docs/en/interfaces/grpc.md @@ -81,7 +81,7 @@ In a batch mode query data can be passed via `stdin`. In the following example a table is created and loaded with data from a CSV file. Then the content of the table is queried. -``` text +``` bash ./clickhouse-grpc-client.py -q "CREATE TABLE grpc_example_table (id UInt32, text String) ENGINE = MergeTree() ORDER BY id;" echo "0,Input data for" > a.txt ; echo "1,gRPC protocol example" >> a.txt cat a.txt | ./clickhouse-grpc-client.py -q "INSERT INTO grpc_example_table FORMAT CSV" diff --git a/docs/ru/interfaces/grpc.md b/docs/ru/interfaces/grpc.md index 924b9ea11db..89032c9372c 100644 --- a/docs/ru/interfaces/grpc.md +++ b/docs/ru/interfaces/grpc.md @@ -81,7 +81,7 @@ ClickHouse поддерживает интерфейс [gRPC](https://grpc.io/). В примере создается таблица, и в нее загружаются данные из CSV файла. Затем выводится содержимое таблицы. -``` text +``` bash ./clickhouse-grpc-client.py -q "CREATE TABLE grpc_example_table (id UInt32, text String) ENGINE = MergeTree() ORDER BY id;" echo "0,Input data for" > a.txt ; echo "1,gRPC protocol example" >> a.txt cat a.txt | ./clickhouse-grpc-client.py -q "INSERT INTO grpc_example_table FORMAT CSV" From 445b0983eab355de77d376653618f18514f45e73 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 3 Dec 2021 01:32:06 +0300 Subject: [PATCH 094/262] stress: ignore server memory limits for hung check In [1] hung check failed because the server was under memory pressure. [1]: https://s3.amazonaws.com/clickhouse-test-reports/32019/b6290ae00223fc91d514b82de25e195ab34f8bca/stress_test__undefined__actions_.html --- docker/test/stress/stress | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/docker/test/stress/stress b/docker/test/stress/stress index acb45b05636..3bb518131c8 100755 --- a/docker/test/stress/stress +++ b/docker/test/stress/stress @@ -188,7 +188,13 @@ if __name__ == "__main__": if args.hung_check: have_long_running_queries = prepare_for_hung_check(args.drop_databases) logging.info("Checking if some queries hung") - cmd = "{} {} {}".format(args.test_cmd, "--hung-check", "00001_select_1") + cmd = ' '.join([args.test_cmd, + # Do not track memory allocations up to 100MiB, + # this will allow to ignore server memory limit (max_server_memory_usage) for this query. + "--client-option", "max_untracked_memory=100Mi", + "--hung-check", + "00001_select_1" + ]) res = call(cmd, shell=True, stderr=STDOUT) hung_check_status = "No queries hung\tOK\n" if res != 0 and have_long_running_queries: From 169941c5d07f643df00d23262ff1e79be0e57845 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 3 Dec 2021 01:32:06 +0300 Subject: [PATCH 095/262] stress: allow 100MiB of memory to overcommit for SHOW/DROP DATABASE CI: https://s3.amazonaws.com/clickhouse-test-reports/32019/b6290ae00223fc91d514b82de25e195ab34f8bca/stress_test__thread__actions_.html --- docker/test/stress/stress | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/docker/test/stress/stress b/docker/test/stress/stress index 3bb518131c8..b5aba692a93 100755 --- a/docker/test/stress/stress +++ b/docker/test/stress/stress @@ -117,11 +117,14 @@ def prepare_for_hung_check(drop_databases): try: # Here we try to drop all databases in async mode. If some queries really hung, than drop will hung too. # Otherwise we will get rid of queries which wait for background pool. It can take a long time on slow builds (more than 900 seconds). - databases = check_output('clickhouse client -q "SHOW DATABASES"', shell=True, timeout=30).decode('utf-8').strip().split() + # + # Also specify max_untracked_memory to allow 100MiB of memory to overcommit. + databases = check_output('clickhouse client -q "SHOW DATABASES" --max_untracked_memory=100Mi', + shell=True, timeout=30).decode('utf-8').strip().split() for db in databases: if db == "system": continue - command = f'clickhouse client -q "DROP DATABASE {db}"' + command = f'clickhouse client -q "DROP DATABASE {db}" --max_untracked_memory=100Mi' # we don't wait for drop Popen(command, shell=True) break From d3a7aed0054d1ef5d6615317302a6f1717ba7d19 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 3 Dec 2021 21:27:40 +0300 Subject: [PATCH 096/262] stress: tune memory_profiler_step too --- docker/test/stress/stress | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/docker/test/stress/stress b/docker/test/stress/stress index b5aba692a93..f9259891b57 100755 --- a/docker/test/stress/stress +++ b/docker/test/stress/stress @@ -118,13 +118,13 @@ def prepare_for_hung_check(drop_databases): # Here we try to drop all databases in async mode. If some queries really hung, than drop will hung too. # Otherwise we will get rid of queries which wait for background pool. It can take a long time on slow builds (more than 900 seconds). # - # Also specify max_untracked_memory to allow 100MiB of memory to overcommit. - databases = check_output('clickhouse client -q "SHOW DATABASES" --max_untracked_memory=100Mi', + # Also specify max_untracked_memory to allow 1GiB of memory to overcommit. + databases = check_output('clickhouse client -q "SHOW DATABASES" --max_untracked_memory=1Gi --memory_profiler_step=1Gi', shell=True, timeout=30).decode('utf-8').strip().split() for db in databases: if db == "system": continue - command = f'clickhouse client -q "DROP DATABASE {db}" --max_untracked_memory=100Mi' + command = f'clickhouse client -q "DROP DATABASE {db}" --max_untracked_memory=1Gi --memory_profiler_step=1Gi' # we don't wait for drop Popen(command, shell=True) break @@ -192,9 +192,19 @@ if __name__ == "__main__": have_long_running_queries = prepare_for_hung_check(args.drop_databases) logging.info("Checking if some queries hung") cmd = ' '.join([args.test_cmd, - # Do not track memory allocations up to 100MiB, + # Do not track memory allocations up to 1Gi, # this will allow to ignore server memory limit (max_server_memory_usage) for this query. - "--client-option", "max_untracked_memory=100Mi", + # + # NOTE: memory_profiler_step should be also adjusted, because: + # + # untracked_memory_limit = min(settings.max_untracked_memory, settings.memory_profiler_step) + # + # NOTE: that if there will be queries with GROUP BY, this trick + # will not work due to CurrentMemoryTracker::check() from + # Aggregator code. + # But right now it should work, since neither hung check, nor 00001_select_1 has GROUP BY. + "--client-option", "max_untracked_memory=1Gi", + "--client-option", "memory_profiler_step=1Gi", "--hung-check", "00001_select_1" ]) From adec69016653b96029afabdc4085367540af19ee Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 3 Dec 2021 21:31:58 +0300 Subject: [PATCH 097/262] stress: SYSTEM DROP MARK CACHE --- docker/test/stress/stress | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/test/stress/stress b/docker/test/stress/stress index f9259891b57..85374d5efed 100755 --- a/docker/test/stress/stress +++ b/docker/test/stress/stress @@ -102,6 +102,7 @@ def prepare_for_hung_check(drop_databases): call_with_retry("clickhouse client -q 'SYSTEM START FETCHES'") call_with_retry("clickhouse client -q 'SYSTEM START REPLICATED SENDS'") call_with_retry("clickhouse client -q 'SYSTEM START REPLICATION QUEUES'") + call_with_retry("clickhouse client -q 'SYSTEM DROP MARK CACHE'") # Issue #21004, live views are experimental, so let's just suppress it call_with_retry("""clickhouse client -q "KILL QUERY WHERE upper(query) LIKE 'WATCH %'" """) From 7063ea3ee2308e51367033048ee5d0b3436fe1de Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 3 Dec 2021 21:36:22 +0300 Subject: [PATCH 098/262] stress: add make_query_command() helper --- docker/test/stress/stress | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/docker/test/stress/stress b/docker/test/stress/stress index 85374d5efed..ddb309c7aee 100755 --- a/docker/test/stress/stress +++ b/docker/test/stress/stress @@ -75,6 +75,9 @@ def call_with_retry(query, timeout=30, retry_count=5): else: break +def make_query_command(query): + return f"""clickhouse client -q "{query}" --max_untracked_memory=1Gi --memory_profiler_step=1Gi""" + def prepare_for_hung_check(drop_databases): # FIXME this function should not exist, but... @@ -95,23 +98,23 @@ def prepare_for_hung_check(drop_databases): # Some tests execute SYSTEM STOP MERGES or similar queries. # It may cause some ALTERs to hang. # Possibly we should fix tests and forbid to use such queries without specifying table. - call_with_retry("clickhouse client -q 'SYSTEM START MERGES'") - call_with_retry("clickhouse client -q 'SYSTEM START DISTRIBUTED SENDS'") - call_with_retry("clickhouse client -q 'SYSTEM START TTL MERGES'") - call_with_retry("clickhouse client -q 'SYSTEM START MOVES'") - call_with_retry("clickhouse client -q 'SYSTEM START FETCHES'") - call_with_retry("clickhouse client -q 'SYSTEM START REPLICATED SENDS'") - call_with_retry("clickhouse client -q 'SYSTEM START REPLICATION QUEUES'") - call_with_retry("clickhouse client -q 'SYSTEM DROP MARK CACHE'") + call_with_retry(make_query_command('SYSTEM START MERGES')) + call_with_retry(make_query_command('SYSTEM START DISTRIBUTED SENDS')) + call_with_retry(make_query_command('SYSTEM START TTL MERGES')) + call_with_retry(make_query_command('SYSTEM START MOVES')) + call_with_retry(make_query_command('SYSTEM START FETCHES')) + call_with_retry(make_query_command('SYSTEM START REPLICATED SENDS')) + call_with_retry(make_query_command('SYSTEM START REPLICATION QUEUES')) + call_with_retry(make_query_command('SYSTEM DROP MARK CACHE')) # Issue #21004, live views are experimental, so let's just suppress it - call_with_retry("""clickhouse client -q "KILL QUERY WHERE upper(query) LIKE 'WATCH %'" """) + call_with_retry(make_query_command("KILL QUERY WHERE upper(query) LIKE 'WATCH %'")) # Kill other queries which known to be slow # It's query from 01232_preparing_sets_race_condition_long, it may take up to 1000 seconds in slow builds - call_with_retry("""clickhouse client -q "KILL QUERY WHERE query LIKE 'insert into tableB select %'" """) + call_with_retry(make_query_command("KILL QUERY WHERE query LIKE 'insert into tableB select %'")) # Long query from 00084_external_agregation - call_with_retry("""clickhouse client -q "KILL QUERY WHERE query LIKE 'SELECT URL, uniq(SearchPhrase) AS u FROM test.hits GROUP BY URL ORDER BY u %'" """) + call_with_retry(make_query_command("KILL QUERY WHERE query LIKE 'SELECT URL, uniq(SearchPhrase) AS u FROM test.hits GROUP BY URL ORDER BY u %'")) if drop_databases: for i in range(5): @@ -120,12 +123,11 @@ def prepare_for_hung_check(drop_databases): # Otherwise we will get rid of queries which wait for background pool. It can take a long time on slow builds (more than 900 seconds). # # Also specify max_untracked_memory to allow 1GiB of memory to overcommit. - databases = check_output('clickhouse client -q "SHOW DATABASES" --max_untracked_memory=1Gi --memory_profiler_step=1Gi', - shell=True, timeout=30).decode('utf-8').strip().split() + databases = check_output(make_query_command('SHOW DATABASES'), shell=True, timeout=30).decode('utf-8').strip().split() for db in databases: if db == "system": continue - command = f'clickhouse client -q "DROP DATABASE {db}" --max_untracked_memory=1Gi --memory_profiler_step=1Gi' + command = make_query_command(f'DROP DATABASE {db}') # we don't wait for drop Popen(command, shell=True) break From 116731396137cad5988236ad8cc36faf05b74d51 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 3 Dec 2021 23:07:16 +0300 Subject: [PATCH 099/262] stress: allow memory overcommit for one more query --- docker/test/stress/stress | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/docker/test/stress/stress b/docker/test/stress/stress index ddb309c7aee..d9b0d9a0dc7 100755 --- a/docker/test/stress/stress +++ b/docker/test/stress/stress @@ -139,9 +139,15 @@ def prepare_for_hung_check(drop_databases): # Wait for last queries to finish if any, not longer than 300 seconds - call("""clickhouse client -q "select sleepEachRow(( - select maxOrDefault(300 - elapsed) + 1 from system.processes where query not like '%from system.processes%' and elapsed < 300 - ) / 300) from numbers(300) format Null" """, shell=True, stderr=STDOUT, timeout=330) + call(make_query_command(""" + select sleepEachRow(( + select maxOrDefault(300 - elapsed) + 1 + from system.processes + where query not like '%from system.processes%' and elapsed < 300 + ) / 300) + from numbers(300) + format Null + """), shell=True, stderr=STDOUT, timeout=330) # Even if all clickhouse-test processes are finished, there are probably some sh scripts, # which still run some new queries. Let's ignore them. From f9a95fce131ba7a5330843163f33a00f8e86ffed Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 4 Dec 2021 00:14:00 +0300 Subject: [PATCH 100/262] stress: allow memory overcommit for SELECT 1 query --- docker/test/stress/stress | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/docker/test/stress/stress b/docker/test/stress/stress index d9b0d9a0dc7..62a42cdf548 100755 --- a/docker/test/stress/stress +++ b/docker/test/stress/stress @@ -91,9 +91,7 @@ def prepare_for_hung_check(drop_databases): logging.info("Will terminate gdb (if any)") call_with_retry("kill -TERM $(pidof gdb)") - # Some tests set too low memory limit for default user and forget to reset in back. - # It may cause SYSTEM queries to fail, let's disable memory limit. - call_with_retry("clickhouse client --max_memory_usage_for_user=0 -q 'SELECT 1 FORMAT Null'") + call_with_retry(make_query_command('SELECT 1 FORMAT Null')) # Some tests execute SYSTEM STOP MERGES or similar queries. # It may cause some ALTERs to hang. From d68d01988ec3d156f77ccd67470c27a69d7fc215 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 4 Dec 2021 21:56:15 +0300 Subject: [PATCH 101/262] clickhouse-test: apply --client-option in get_stacktraces_from_clickhouse() --- tests/clickhouse-test | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 01d632a1f50..8a87227519f 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -205,26 +205,31 @@ def get_stacktraces_from_gdb(server_pid): # collect server stacktraces from system.stack_trace table # it does not work in Sandbox -def get_stacktraces_from_clickhouse(client, replicated_database=False): +def get_stacktraces_from_clickhouse(args): + settings_str = ' '.join([ + get_additional_client_options(args), + '--allow_introspection_functions=1', + '--skip_unavailable_shards=1', + ]) replicated_msg = \ - "{} --allow_introspection_functions=1 --skip_unavailable_shards=1 --query \ + "{} {} --query \ \"SELECT materialize((hostName(), tcpPort())) as host, thread_id, \ arrayStringConcat(arrayMap(x, y -> concat(x, ': ', y), \ arrayMap(x -> addressToLine(x), trace), \ arrayMap(x -> demangle(addressToSymbol(x)), trace)), '\n') as trace \ FROM clusterAllReplicas('test_cluster_database_replicated', 'system.stack_trace') \ - ORDER BY host, thread_id FORMAT Vertical\"".format(client) + ORDER BY host, thread_id FORMAT Vertical\"".format(args.client, settings_str) msg = \ - "{} --allow_introspection_functions=1 --query \ + "{} {} --query \ \"SELECT arrayStringConcat(arrayMap(x, y -> concat(x, ': ', y), \ arrayMap(x -> addressToLine(x), trace), \ arrayMap(x -> demangle(addressToSymbol(x)), trace)), '\n') as trace \ - FROM system.stack_trace FORMAT Vertical\"".format(client) + FROM system.stack_trace FORMAT Vertical\"".format(args.client, settings_str) try: return subprocess.check_output( - replicated_msg if replicated_database else msg, + replicated_msg if args.replicated_database else msg, shell=True, stderr=subprocess.STDOUT).decode('utf-8') except Exception as e: print(f"Error occurred while receiving stack traces from client: {e}") @@ -250,8 +255,7 @@ def print_stacktraces() -> None: if bt is None: print("\nCollecting stacktraces from system.stacktraces table:") - bt = get_stacktraces_from_clickhouse( - args.client, args.replicated_database) + bt = get_stacktraces_from_clickhouse(args) if bt is not None: print(bt) From f8bf3b19932752e985e59f03fe2bdf52b280e0ae Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 6 Dec 2021 09:05:34 +0300 Subject: [PATCH 102/262] stress: use max_memory_usage_for_user as a soft limit to avoid memory limit exceeded By using max_memory_usage_for_user as a soft limit, and after max_server_memory_usage as a hard, we can allow normal overcommit, using max_memory_usage_for_user=0 instead of relying on max_untracked_memory. --- docker/test/stress/run.sh | 36 ++++++++++++++++++++++++++++++++++-- docker/test/stress/stress | 3 ++- 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 673e4c11570..6d720d02cdc 100755 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -55,9 +55,41 @@ function configure() echo "1" \ > /etc/clickhouse-server/config.d/asynchronous_metrics_update_period_s.xml + local total_mem + total_mem=$(awk '/MemTotal/ { print $(NF-1) }' /proc/meminfo) # KiB + total_mem=$(( total_mem*1024 )) # bytes # Set maximum memory usage as half of total memory (less chance of OOM). - echo "0.5" \ - > /etc/clickhouse-server/config.d/max_server_memory_usage_to_ram_ratio.xml + # + # But not via max_server_memory_usage but via max_memory_usage_for_user, + # so that we can override this setting and execute service queries, like: + # - hung check + # - show/drop database + # - ... + # + # So max_memory_usage_for_user will be a soft limit, and + # max_server_memory_usage will be hard limit, and queries that should be + # executed regardless memory limits will use max_memory_usage_for_user=0, + # instead of relying on max_untracked_memory + local max_server_mem + max_server_mem=$((total_mem*75/100)) # 75% + echo "Setting max_server_memory_usage=$max_server_mem" + cat > /etc/clickhouse-server/config.d/max_server_memory_usage.xml < + ${max_server_mem} + +EOL + local max_users_mem + max_users_mem=$((total_mem*50/100)) # 50% + echo "Setting max_memory_usage_for_user=$max_users_mem" + cat > /etc/clickhouse-server/users.d/max_memory_usage_for_user.xml < + + + ${max_users_mem} + + + +EOL } function stop() diff --git a/docker/test/stress/stress b/docker/test/stress/stress index 62a42cdf548..c89c5ff5e27 100755 --- a/docker/test/stress/stress +++ b/docker/test/stress/stress @@ -76,7 +76,7 @@ def call_with_retry(query, timeout=30, retry_count=5): break def make_query_command(query): - return f"""clickhouse client -q "{query}" --max_untracked_memory=1Gi --memory_profiler_step=1Gi""" + return f"""clickhouse client -q "{query}" --max_untracked_memory=1Gi --memory_profiler_step=1Gi --max_memory_usage_for_user=0""" def prepare_for_hung_check(drop_databases): @@ -211,6 +211,7 @@ if __name__ == "__main__": # Aggregator code. # But right now it should work, since neither hung check, nor 00001_select_1 has GROUP BY. "--client-option", "max_untracked_memory=1Gi", + "--client-option", "max_memory_usage_for_user=0", "--client-option", "memory_profiler_step=1Gi", "--hung-check", "00001_select_1" From ef77f45f599713ca662ebae4279829522043c8a7 Mon Sep 17 00:00:00 2001 From: sunlisheng Date: Wed, 1 Dec 2021 20:11:07 +0800 Subject: [PATCH 103/262] Make HDFS replication configurable in WriteBufferFromHDFSImpl#WriteBufferFromHDFSImpl Signed-off-by: sunlisheng --- src/Disks/HDFS/DiskHDFS.cpp | 5 +++-- src/Disks/HDFS/DiskHDFS.h | 7 +++++-- src/Storages/HDFS/StorageHDFS.cpp | 2 +- src/Storages/HDFS/WriteBufferFromHDFS.cpp | 6 ++++-- src/Storages/HDFS/WriteBufferFromHDFS.h | 1 + 5 files changed, 14 insertions(+), 7 deletions(-) diff --git a/src/Disks/HDFS/DiskHDFS.cpp b/src/Disks/HDFS/DiskHDFS.cpp index 5264e6413e7..bae2a57bc67 100644 --- a/src/Disks/HDFS/DiskHDFS.cpp +++ b/src/Disks/HDFS/DiskHDFS.cpp @@ -97,7 +97,7 @@ std::unique_ptr DiskHDFS::writeFile(const String & path /// Single O_WRONLY in libhdfs adds O_TRUNC auto hdfs_buffer = std::make_unique(hdfs_path, - config, buf_size, + config, settings->replication, buf_size, mode == WriteMode::Rewrite ? O_WRONLY : O_WRONLY | O_APPEND); return std::make_unique>(std::move(hdfs_buffer), @@ -147,7 +147,8 @@ std::unique_ptr getSettings(const Poco::Util::AbstractConfigur return std::make_unique( config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024), config.getInt(config_prefix + ".thread_pool_size", 16), - config.getInt(config_prefix + ".objects_chunk_size_to_delete", 1000)); + config.getInt(config_prefix + ".objects_chunk_size_to_delete", 1000), + config.getInt(config_prefix + ".dfs.replication", 3)); } } diff --git a/src/Disks/HDFS/DiskHDFS.h b/src/Disks/HDFS/DiskHDFS.h index 881d6e2937c..47150f1cfd8 100644 --- a/src/Disks/HDFS/DiskHDFS.h +++ b/src/Disks/HDFS/DiskHDFS.h @@ -14,14 +14,17 @@ struct DiskHDFSSettings size_t min_bytes_for_seek; int thread_pool_size; int objects_chunk_size_to_delete; + int replication; DiskHDFSSettings( int min_bytes_for_seek_, int thread_pool_size_, - int objects_chunk_size_to_delete_) + int objects_chunk_size_to_delete_, + int replication_) : min_bytes_for_seek(min_bytes_for_seek_) , thread_pool_size(thread_pool_size_) - , objects_chunk_size_to_delete(objects_chunk_size_to_delete_) {} + , objects_chunk_size_to_delete(objects_chunk_size_to_delete_) + , replication(replication_) {} }; diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index 90e63aef46d..54351deffd8 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -203,7 +203,7 @@ public: const CompressionMethod compression_method) : SinkToStorage(sample_block) { - write_buf = wrapWriteBufferWithCompressionMethod(std::make_unique(uri, context->getGlobalContext()->getConfigRef()), compression_method, 3); + write_buf = wrapWriteBufferWithCompressionMethod(std::make_unique(uri, context->getGlobalContext()->getConfigRef(), 0), compression_method, 3); writer = FormatFactory::instance().getOutputFormatParallelIfPossible(format, *write_buf, sample_block, context); } diff --git a/src/Storages/HDFS/WriteBufferFromHDFS.cpp b/src/Storages/HDFS/WriteBufferFromHDFS.cpp index d6a10452f97..9f5e3c1f7d2 100644 --- a/src/Storages/HDFS/WriteBufferFromHDFS.cpp +++ b/src/Storages/HDFS/WriteBufferFromHDFS.cpp @@ -29,6 +29,7 @@ struct WriteBufferFromHDFS::WriteBufferFromHDFSImpl explicit WriteBufferFromHDFSImpl( const std::string & hdfs_uri_, const Poco::Util::AbstractConfiguration & config_, + int replication_, int flags) : hdfs_uri(hdfs_uri_) , builder(createHDFSBuilder(hdfs_uri, config_)) @@ -43,7 +44,7 @@ struct WriteBufferFromHDFS::WriteBufferFromHDFSImpl if (!hdfsExists(fs.get(), path.c_str())) throw Exception(ErrorCodes::BAD_ARGUMENTS, "File {} already exists", path); - fout = hdfsOpenFile(fs.get(), path.c_str(), flags, 0, 0, 0); /// O_WRONLY meaning create or overwrite i.e., implies O_TRUNCAT here + fout = hdfsOpenFile(fs.get(), path.c_str(), flags, 0, replication_, 0); /// O_WRONLY meaning create or overwrite i.e., implies O_TRUNCAT here if (fout == nullptr) { @@ -82,10 +83,11 @@ struct WriteBufferFromHDFS::WriteBufferFromHDFSImpl WriteBufferFromHDFS::WriteBufferFromHDFS( const std::string & hdfs_name_, const Poco::Util::AbstractConfiguration & config_, + int replication_, size_t buf_size_, int flags_) : BufferWithOwnMemory(buf_size_) - , impl(std::make_unique(hdfs_name_, config_, flags_)) + , impl(std::make_unique(hdfs_name_, config_, replication_, flags_)) { } diff --git a/src/Storages/HDFS/WriteBufferFromHDFS.h b/src/Storages/HDFS/WriteBufferFromHDFS.h index 503371f6118..fe9af7dfba4 100644 --- a/src/Storages/HDFS/WriteBufferFromHDFS.h +++ b/src/Storages/HDFS/WriteBufferFromHDFS.h @@ -23,6 +23,7 @@ public: WriteBufferFromHDFS( const String & hdfs_name_, const Poco::Util::AbstractConfiguration & config_, + int replication_, size_t buf_size_ = DBMS_DEFAULT_BUFFER_SIZE, int flags = O_WRONLY); From 0f045fa20ad60bd19942f2e9dd8ff6798847630d Mon Sep 17 00:00:00 2001 From: sunlisheng Date: Wed, 1 Dec 2021 20:30:59 +0800 Subject: [PATCH 104/262] rename replication Signed-off-by: sunlisheng --- src/Disks/HDFS/DiskHDFS.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Disks/HDFS/DiskHDFS.cpp b/src/Disks/HDFS/DiskHDFS.cpp index bae2a57bc67..5194983100b 100644 --- a/src/Disks/HDFS/DiskHDFS.cpp +++ b/src/Disks/HDFS/DiskHDFS.cpp @@ -148,7 +148,7 @@ std::unique_ptr getSettings(const Poco::Util::AbstractConfigur config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024), config.getInt(config_prefix + ".thread_pool_size", 16), config.getInt(config_prefix + ".objects_chunk_size_to_delete", 1000), - config.getInt(config_prefix + ".dfs.replication", 3)); + config.getInt(config_prefix + ".replication", 3)); } } From 5be58a88398612587882542864aafeec191acee1 Mon Sep 17 00:00:00 2001 From: leosunli Date: Thu, 2 Dec 2021 11:40:22 +0800 Subject: [PATCH 105/262] Make HDFS replication configurable in WriteBufferFromHDFSImpl#WriteBufferFromHDFSImpl Signed-off-by: leosunli --- src/Core/Settings.h | 1 + src/Disks/HDFS/DiskHDFS.cpp | 11 ++++++----- src/Disks/HDFS/DiskHDFS.h | 11 +++++------ src/Storages/HDFS/StorageHDFS.cpp | 2 +- src/Storages/HDFS/WriteBufferFromHDFS.cpp | 8 ++++---- src/Storages/HDFS/WriteBufferFromHDFS.h | 3 ++- 6 files changed, 19 insertions(+), 17 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index bcbe45dd002..4d014930195 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -75,6 +75,7 @@ class IColumn; M(UInt64, s3_max_single_read_retries, 4, "The maximum number of retries during single S3 read.", 0) \ M(UInt64, s3_max_redirects, 10, "Max number of S3 redirects hops allowed.", 0) \ M(UInt64, s3_max_connections, 1024, "The maximum number of connections per server.", 0) \ + M(UInt64, hdfs_replication, 3, "The actual number of replications can be specified when the hdfs file is created.", 0) \ M(UInt64, hsts_max_age, 0, "Expired time for hsts. 0 means disable HSTS.", 0) \ M(Bool, extremes, false, "Calculate minimums and maximums of the result columns. They can be output in JSON-formats.", IMPORTANT) \ M(Bool, use_uncompressed_cache, false, "Whether to use the cache of uncompressed blocks.", 0) \ diff --git a/src/Disks/HDFS/DiskHDFS.cpp b/src/Disks/HDFS/DiskHDFS.cpp index 5194983100b..e92bcf2ec16 100644 --- a/src/Disks/HDFS/DiskHDFS.cpp +++ b/src/Disks/HDFS/DiskHDFS.cpp @@ -60,9 +60,11 @@ DiskHDFS::DiskHDFS( const String & hdfs_root_path_, SettingsPtr settings_, DiskPtr metadata_disk_, - const Poco::Util::AbstractConfiguration & config_) + const Poco::Util::AbstractConfiguration & config_, + const Settings & contextSettings_) : IDiskRemote(disk_name_, hdfs_root_path_, metadata_disk_, "DiskHDFS", settings_->thread_pool_size) , config(config_) + , contextSettings(contextSettings_) , hdfs_builder(createHDFSBuilder(hdfs_root_path_, config)) , hdfs_fs(createHDFSFS(hdfs_builder.get())) , settings(std::move(settings_)) @@ -97,7 +99,7 @@ std::unique_ptr DiskHDFS::writeFile(const String & path /// Single O_WRONLY in libhdfs adds O_TRUNC auto hdfs_buffer = std::make_unique(hdfs_path, - config, settings->replication, buf_size, + config, contextSettings, buf_size, mode == WriteMode::Rewrite ? O_WRONLY : O_WRONLY | O_APPEND); return std::make_unique>(std::move(hdfs_buffer), @@ -147,8 +149,7 @@ std::unique_ptr getSettings(const Poco::Util::AbstractConfigur return std::make_unique( config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024), config.getInt(config_prefix + ".thread_pool_size", 16), - config.getInt(config_prefix + ".objects_chunk_size_to_delete", 1000), - config.getInt(config_prefix + ".replication", 3)); + config.getInt(config_prefix + ".objects_chunk_size_to_delete", 1000)); } } @@ -175,7 +176,7 @@ void registerDiskHDFS(DiskFactory & factory) return std::make_shared( name, uri, getSettings(config, config_prefix), - metadata_disk, config); + metadata_disk, config, context.getSettingsRef()); }; factory.registerDiskType("hdfs", creator); diff --git a/src/Disks/HDFS/DiskHDFS.h b/src/Disks/HDFS/DiskHDFS.h index 47150f1cfd8..0c67983722a 100644 --- a/src/Disks/HDFS/DiskHDFS.h +++ b/src/Disks/HDFS/DiskHDFS.h @@ -14,17 +14,14 @@ struct DiskHDFSSettings size_t min_bytes_for_seek; int thread_pool_size; int objects_chunk_size_to_delete; - int replication; DiskHDFSSettings( int min_bytes_for_seek_, int thread_pool_size_, - int objects_chunk_size_to_delete_, - int replication_) + int objects_chunk_size_to_delete_) : min_bytes_for_seek(min_bytes_for_seek_) , thread_pool_size(thread_pool_size_) - , objects_chunk_size_to_delete(objects_chunk_size_to_delete_) - , replication(replication_) {} + , objects_chunk_size_to_delete(objects_chunk_size_to_delete_) {} }; @@ -43,7 +40,8 @@ public: const String & hdfs_root_path_, SettingsPtr settings_, DiskPtr metadata_disk_, - const Poco::Util::AbstractConfiguration & config_); + const Poco::Util::AbstractConfiguration & config_, + const Settings & contextSettings_); DiskType getType() const override { return DiskType::HDFS; } bool isRemote() const override { return true; } @@ -70,6 +68,7 @@ private: String getRandomName() { return toString(UUIDHelpers::generateV4()); } const Poco::Util::AbstractConfiguration & config; + const Settings & contextSettings; HDFSBuilderWrapper hdfs_builder; HDFSFSPtr hdfs_fs; diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index 54351deffd8..5d1b98892e9 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -203,7 +203,7 @@ public: const CompressionMethod compression_method) : SinkToStorage(sample_block) { - write_buf = wrapWriteBufferWithCompressionMethod(std::make_unique(uri, context->getGlobalContext()->getConfigRef(), 0), compression_method, 3); + write_buf = wrapWriteBufferWithCompressionMethod(std::make_unique(uri, context->getGlobalContext()->getConfigRef(), context.getSettingsRef()), compression_method, 3); writer = FormatFactory::instance().getOutputFormatParallelIfPossible(format, *write_buf, sample_block, context); } diff --git a/src/Storages/HDFS/WriteBufferFromHDFS.cpp b/src/Storages/HDFS/WriteBufferFromHDFS.cpp index 9f5e3c1f7d2..a84849359f8 100644 --- a/src/Storages/HDFS/WriteBufferFromHDFS.cpp +++ b/src/Storages/HDFS/WriteBufferFromHDFS.cpp @@ -29,7 +29,7 @@ struct WriteBufferFromHDFS::WriteBufferFromHDFSImpl explicit WriteBufferFromHDFSImpl( const std::string & hdfs_uri_, const Poco::Util::AbstractConfiguration & config_, - int replication_, + const Settings & settings_, int flags) : hdfs_uri(hdfs_uri_) , builder(createHDFSBuilder(hdfs_uri, config_)) @@ -44,7 +44,7 @@ struct WriteBufferFromHDFS::WriteBufferFromHDFSImpl if (!hdfsExists(fs.get(), path.c_str())) throw Exception(ErrorCodes::BAD_ARGUMENTS, "File {} already exists", path); - fout = hdfsOpenFile(fs.get(), path.c_str(), flags, 0, replication_, 0); /// O_WRONLY meaning create or overwrite i.e., implies O_TRUNCAT here + fout = hdfsOpenFile(fs.get(), path.c_str(), flags, 0, settings_.hdfs_replication, 0); /// O_WRONLY meaning create or overwrite i.e., implies O_TRUNCAT here if (fout == nullptr) { @@ -83,11 +83,11 @@ struct WriteBufferFromHDFS::WriteBufferFromHDFSImpl WriteBufferFromHDFS::WriteBufferFromHDFS( const std::string & hdfs_name_, const Poco::Util::AbstractConfiguration & config_, - int replication_, + const Settings & settings_, size_t buf_size_, int flags_) : BufferWithOwnMemory(buf_size_) - , impl(std::make_unique(hdfs_name_, config_, replication_, flags_)) + , impl(std::make_unique(hdfs_name_, config_, settings_, flags_)) { } diff --git a/src/Storages/HDFS/WriteBufferFromHDFS.h b/src/Storages/HDFS/WriteBufferFromHDFS.h index fe9af7dfba4..752ea6659ef 100644 --- a/src/Storages/HDFS/WriteBufferFromHDFS.h +++ b/src/Storages/HDFS/WriteBufferFromHDFS.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -23,7 +24,7 @@ public: WriteBufferFromHDFS( const String & hdfs_name_, const Poco::Util::AbstractConfiguration & config_, - int replication_, + const Settings & settings_, size_t buf_size_ = DBMS_DEFAULT_BUFFER_SIZE, int flags = O_WRONLY); From e81a1dbe6a1fcb9684d3eed7cbd680467c4b91d8 Mon Sep 17 00:00:00 2001 From: leosunli Date: Thu, 2 Dec 2021 15:28:25 +0800 Subject: [PATCH 106/262] Make HDFS replication configurable in WriteBufferFromHDFSImpl#WriteBufferFromHDFSImpl Signed-off-by: leosunli --- src/Disks/HDFS/DiskHDFS.cpp | 11 +++++------ src/Disks/HDFS/DiskHDFS.h | 11 ++++++----- src/Storages/HDFS/StorageHDFS.cpp | 2 +- src/Storages/HDFS/WriteBufferFromHDFS.cpp | 8 ++++---- src/Storages/HDFS/WriteBufferFromHDFS.h | 2 +- 5 files changed, 17 insertions(+), 17 deletions(-) diff --git a/src/Disks/HDFS/DiskHDFS.cpp b/src/Disks/HDFS/DiskHDFS.cpp index e92bcf2ec16..befd4ed0f5f 100644 --- a/src/Disks/HDFS/DiskHDFS.cpp +++ b/src/Disks/HDFS/DiskHDFS.cpp @@ -60,11 +60,9 @@ DiskHDFS::DiskHDFS( const String & hdfs_root_path_, SettingsPtr settings_, DiskPtr metadata_disk_, - const Poco::Util::AbstractConfiguration & config_, - const Settings & contextSettings_) + const Poco::Util::AbstractConfiguration & config_) : IDiskRemote(disk_name_, hdfs_root_path_, metadata_disk_, "DiskHDFS", settings_->thread_pool_size) , config(config_) - , contextSettings(contextSettings_) , hdfs_builder(createHDFSBuilder(hdfs_root_path_, config)) , hdfs_fs(createHDFSFS(hdfs_builder.get())) , settings(std::move(settings_)) @@ -99,7 +97,7 @@ std::unique_ptr DiskHDFS::writeFile(const String & path /// Single O_WRONLY in libhdfs adds O_TRUNC auto hdfs_buffer = std::make_unique(hdfs_path, - config, contextSettings, buf_size, + config, settings->replication, buf_size, mode == WriteMode::Rewrite ? O_WRONLY : O_WRONLY | O_APPEND); return std::make_unique>(std::move(hdfs_buffer), @@ -144,12 +142,13 @@ bool DiskHDFS::checkUniqueId(const String & hdfs_uri) const namespace { -std::unique_ptr getSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix) +std::unique_ptr getSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, const Settings & setttings) { return std::make_unique( config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024), config.getInt(config_prefix + ".thread_pool_size", 16), - config.getInt(config_prefix + ".objects_chunk_size_to_delete", 1000)); + config.getInt(config_prefix + ".objects_chunk_size_to_delete", 1000), + setttings.hdfs_replication); } } diff --git a/src/Disks/HDFS/DiskHDFS.h b/src/Disks/HDFS/DiskHDFS.h index 0c67983722a..47150f1cfd8 100644 --- a/src/Disks/HDFS/DiskHDFS.h +++ b/src/Disks/HDFS/DiskHDFS.h @@ -14,14 +14,17 @@ struct DiskHDFSSettings size_t min_bytes_for_seek; int thread_pool_size; int objects_chunk_size_to_delete; + int replication; DiskHDFSSettings( int min_bytes_for_seek_, int thread_pool_size_, - int objects_chunk_size_to_delete_) + int objects_chunk_size_to_delete_, + int replication_) : min_bytes_for_seek(min_bytes_for_seek_) , thread_pool_size(thread_pool_size_) - , objects_chunk_size_to_delete(objects_chunk_size_to_delete_) {} + , objects_chunk_size_to_delete(objects_chunk_size_to_delete_) + , replication(replication_) {} }; @@ -40,8 +43,7 @@ public: const String & hdfs_root_path_, SettingsPtr settings_, DiskPtr metadata_disk_, - const Poco::Util::AbstractConfiguration & config_, - const Settings & contextSettings_); + const Poco::Util::AbstractConfiguration & config_); DiskType getType() const override { return DiskType::HDFS; } bool isRemote() const override { return true; } @@ -68,7 +70,6 @@ private: String getRandomName() { return toString(UUIDHelpers::generateV4()); } const Poco::Util::AbstractConfiguration & config; - const Settings & contextSettings; HDFSBuilderWrapper hdfs_builder; HDFSFSPtr hdfs_fs; diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index 5d1b98892e9..6b03a2490ae 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -203,7 +203,7 @@ public: const CompressionMethod compression_method) : SinkToStorage(sample_block) { - write_buf = wrapWriteBufferWithCompressionMethod(std::make_unique(uri, context->getGlobalContext()->getConfigRef(), context.getSettingsRef()), compression_method, 3); + write_buf = wrapWriteBufferWithCompressionMethod(std::make_unique(uri, context->getGlobalContext()->getConfigRef(), context.getSettingsRef().hdfs_replication), compression_method, 3); writer = FormatFactory::instance().getOutputFormatParallelIfPossible(format, *write_buf, sample_block, context); } diff --git a/src/Storages/HDFS/WriteBufferFromHDFS.cpp b/src/Storages/HDFS/WriteBufferFromHDFS.cpp index a84849359f8..9f5e3c1f7d2 100644 --- a/src/Storages/HDFS/WriteBufferFromHDFS.cpp +++ b/src/Storages/HDFS/WriteBufferFromHDFS.cpp @@ -29,7 +29,7 @@ struct WriteBufferFromHDFS::WriteBufferFromHDFSImpl explicit WriteBufferFromHDFSImpl( const std::string & hdfs_uri_, const Poco::Util::AbstractConfiguration & config_, - const Settings & settings_, + int replication_, int flags) : hdfs_uri(hdfs_uri_) , builder(createHDFSBuilder(hdfs_uri, config_)) @@ -44,7 +44,7 @@ struct WriteBufferFromHDFS::WriteBufferFromHDFSImpl if (!hdfsExists(fs.get(), path.c_str())) throw Exception(ErrorCodes::BAD_ARGUMENTS, "File {} already exists", path); - fout = hdfsOpenFile(fs.get(), path.c_str(), flags, 0, settings_.hdfs_replication, 0); /// O_WRONLY meaning create or overwrite i.e., implies O_TRUNCAT here + fout = hdfsOpenFile(fs.get(), path.c_str(), flags, 0, replication_, 0); /// O_WRONLY meaning create or overwrite i.e., implies O_TRUNCAT here if (fout == nullptr) { @@ -83,11 +83,11 @@ struct WriteBufferFromHDFS::WriteBufferFromHDFSImpl WriteBufferFromHDFS::WriteBufferFromHDFS( const std::string & hdfs_name_, const Poco::Util::AbstractConfiguration & config_, - const Settings & settings_, + int replication_, size_t buf_size_, int flags_) : BufferWithOwnMemory(buf_size_) - , impl(std::make_unique(hdfs_name_, config_, settings_, flags_)) + , impl(std::make_unique(hdfs_name_, config_, replication_, flags_)) { } diff --git a/src/Storages/HDFS/WriteBufferFromHDFS.h b/src/Storages/HDFS/WriteBufferFromHDFS.h index 752ea6659ef..32425ee3394 100644 --- a/src/Storages/HDFS/WriteBufferFromHDFS.h +++ b/src/Storages/HDFS/WriteBufferFromHDFS.h @@ -24,7 +24,7 @@ public: WriteBufferFromHDFS( const String & hdfs_name_, const Poco::Util::AbstractConfiguration & config_, - const Settings & settings_, + int replication_, size_t buf_size_ = DBMS_DEFAULT_BUFFER_SIZE, int flags = O_WRONLY); From 8d044958405ab1d9f6da011b9a233eb36df10263 Mon Sep 17 00:00:00 2001 From: leosunli Date: Thu, 2 Dec 2021 16:31:47 +0800 Subject: [PATCH 107/262] set hdfs_replication default value 0 Signed-off-by: leosunli --- src/Core/Settings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 4d014930195..4e0e50cc521 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -75,7 +75,7 @@ class IColumn; M(UInt64, s3_max_single_read_retries, 4, "The maximum number of retries during single S3 read.", 0) \ M(UInt64, s3_max_redirects, 10, "Max number of S3 redirects hops allowed.", 0) \ M(UInt64, s3_max_connections, 1024, "The maximum number of connections per server.", 0) \ - M(UInt64, hdfs_replication, 3, "The actual number of replications can be specified when the hdfs file is created.", 0) \ + M(UInt64, hdfs_replication, 0, "The actual number of replications can be specified when the hdfs file is created.", 0) \ M(UInt64, hsts_max_age, 0, "Expired time for hsts. 0 means disable HSTS.", 0) \ M(Bool, extremes, false, "Calculate minimums and maximums of the result columns. They can be output in JSON-formats.", IMPORTANT) \ M(Bool, use_uncompressed_cache, false, "Whether to use the cache of uncompressed blocks.", 0) \ From 4017bb8f78b5e664ea1b33184e3181ac1cb0c5a7 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Thu, 2 Dec 2021 23:01:32 +0300 Subject: [PATCH 108/262] Update WriteBufferFromHDFS.h --- src/Storages/HDFS/WriteBufferFromHDFS.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Storages/HDFS/WriteBufferFromHDFS.h b/src/Storages/HDFS/WriteBufferFromHDFS.h index 32425ee3394..fe9af7dfba4 100644 --- a/src/Storages/HDFS/WriteBufferFromHDFS.h +++ b/src/Storages/HDFS/WriteBufferFromHDFS.h @@ -7,7 +7,6 @@ #include #include #include -#include #include #include From 6f611eec91d38cbe1d45cdc52d5a3fb651419db2 Mon Sep 17 00:00:00 2001 From: leosunli Date: Fri, 3 Dec 2021 16:01:14 +0800 Subject: [PATCH 109/262] Fix bug Signed-off-by: leosunli --- src/Disks/HDFS/DiskHDFS.cpp | 8 ++++---- src/Storages/HDFS/StorageHDFS.cpp | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Disks/HDFS/DiskHDFS.cpp b/src/Disks/HDFS/DiskHDFS.cpp index befd4ed0f5f..21c8c965361 100644 --- a/src/Disks/HDFS/DiskHDFS.cpp +++ b/src/Disks/HDFS/DiskHDFS.cpp @@ -142,13 +142,13 @@ bool DiskHDFS::checkUniqueId(const String & hdfs_uri) const namespace { -std::unique_ptr getSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, const Settings & setttings) +std::unique_ptr getSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, const Settings & settings) { return std::make_unique( config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024), config.getInt(config_prefix + ".thread_pool_size", 16), config.getInt(config_prefix + ".objects_chunk_size_to_delete", 1000), - setttings.hdfs_replication); + settings.hdfs_replication); } } @@ -174,8 +174,8 @@ void registerDiskHDFS(DiskFactory & factory) return std::make_shared( name, uri, - getSettings(config, config_prefix), - metadata_disk, config, context.getSettingsRef()); + getSettings(config, config_prefix, context_.getSettingsRef()), + metadata_disk, config); }; factory.registerDiskType("hdfs", creator); diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index 6b03a2490ae..f8b29f5499d 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -203,7 +203,7 @@ public: const CompressionMethod compression_method) : SinkToStorage(sample_block) { - write_buf = wrapWriteBufferWithCompressionMethod(std::make_unique(uri, context->getGlobalContext()->getConfigRef(), context.getSettingsRef().hdfs_replication), compression_method, 3); + write_buf = wrapWriteBufferWithCompressionMethod(std::make_unique(uri, context->getGlobalContext()->getConfigRef(), context->getSettingsRef().hdfs_replication), compression_method, 3); writer = FormatFactory::instance().getOutputFormatParallelIfPossible(format, *write_buf, sample_block, context); } From 3946b156a1e758081bdb609691eb0c3d2764b3f9 Mon Sep 17 00:00:00 2001 From: leosunli Date: Mon, 6 Dec 2021 14:31:55 +0800 Subject: [PATCH 110/262] Fix bug Signed-off-by: leosunli --- src/Disks/HDFS/DiskHDFS.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Disks/HDFS/DiskHDFS.cpp b/src/Disks/HDFS/DiskHDFS.cpp index 21c8c965361..c116a62a977 100644 --- a/src/Disks/HDFS/DiskHDFS.cpp +++ b/src/Disks/HDFS/DiskHDFS.cpp @@ -174,7 +174,7 @@ void registerDiskHDFS(DiskFactory & factory) return std::make_shared( name, uri, - getSettings(config, config_prefix, context_.getSettingsRef()), + getSettings(config, config_prefix, context_->getSettingsRef()), metadata_disk, config); }; From b68136d1e4e734fc9b449c17fb68811f7764e13a Mon Sep 17 00:00:00 2001 From: vxider Date: Mon, 6 Dec 2021 07:12:21 +0000 Subject: [PATCH 111/262] small code style update --- src/Storages/WindowView/StorageWindowView.cpp | 23 ++++++++++--------- src/Storages/WindowView/StorageWindowView.h | 2 +- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 94f88842cbb..915e775ff14 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -307,10 +307,12 @@ namespace } } -static void extractDependentTable(ContextPtr context, ASTSelectQuery & query, String & select_database_name, String & select_table_name) +static void extractDependentTable(ContextPtr context, ASTPtr & query, String & select_database_name, String & select_table_name) { - auto db_and_table = getDatabaseAndTable(query, 0); - ASTPtr subquery = extractTableExpression(query, 0); + ASTSelectQuery & select_query = typeid_cast(*query); + + auto db_and_table = getDatabaseAndTable(select_query, 0); + ASTPtr subquery = extractTableExpression(select_query, 0); if (!db_and_table && !subquery) return; @@ -323,7 +325,7 @@ static void extractDependentTable(ContextPtr context, ASTSelectQuery & query, St { db_and_table->database = select_database_name; AddDefaultDatabaseVisitor visitor(context, select_database_name); - visitor.visit(query); + visitor.visit(select_query); } else select_database_name = db_and_table->database; @@ -335,7 +337,7 @@ static void extractDependentTable(ContextPtr context, ASTSelectQuery & query, St auto & inner_select_query = ast_select->list_of_selects->children.at(0); - extractDependentTable(context, inner_select_query->as(), select_database_name, select_table_name); + extractDependentTable(context, inner_select_query, select_database_name, select_table_name); } else throw Exception( @@ -943,12 +945,11 @@ StorageWindowView::StorageWindowView( ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_WINDOW_VIEW, "UNION is not supported for {}", getName()); - ASTSelectQuery & select_query_ = typeid_cast(*query.select->list_of_selects->children.at(0)); + select_query = query.select->list_of_selects->children.at(0)->clone(); String select_database_name = getContext()->getCurrentDatabase(); String select_table_name; - extractDependentTable(getContext(), select_query_, select_database_name, select_table_name); - - select_query = select_query_.clone(); + auto select_query_tmp = select_query->clone(); + extractDependentTable(getContext(), select_query_tmp, select_database_name, select_table_name); /// If the table is not specified - use the table `system.one` if (select_table_name.empty()) @@ -960,7 +961,7 @@ StorageWindowView::StorageWindowView( DatabaseCatalog::instance().addDependency(select_table_id, table_id_); /// Extract all info from query; substitute Function_TUMPLE and Function_HOP with Function_WINDOW_ID. - auto inner_query = innerQueryParser(select_query_); + auto inner_query = innerQueryParser(select_query->as()); // Parse mergeable query mergeable_query = inner_query->clone(); @@ -1029,7 +1030,7 @@ StorageWindowView::StorageWindowView( } -ASTPtr StorageWindowView::innerQueryParser(ASTSelectQuery & query) +ASTPtr StorageWindowView::innerQueryParser(const ASTSelectQuery & query) { if (!query.groupBy()) throw Exception(ErrorCodes::INCORRECT_QUERY, "GROUP BY query is required for {}", getName()); diff --git a/src/Storages/WindowView/StorageWindowView.h b/src/Storages/WindowView/StorageWindowView.h index e989663c7e5..9ebca224a7c 100644 --- a/src/Storages/WindowView/StorageWindowView.h +++ b/src/Storages/WindowView/StorageWindowView.h @@ -209,7 +209,7 @@ private: String function_now_timezone; - ASTPtr innerQueryParser(ASTSelectQuery & inner_query); + ASTPtr innerQueryParser(const ASTSelectQuery & query); void eventTimeParser(const ASTCreateQuery & query); std::shared_ptr getInnerTableCreateQuery( From ff01b97fb5e6ab4f0f46a3eb1b8c13efa0c78a25 Mon Sep 17 00:00:00 2001 From: liyang830 Date: Mon, 6 Dec 2021 15:32:12 +0800 Subject: [PATCH 112/262] feat: modify need_reset_counters simple method --- src/Access/EnabledQuota.cpp | 8 +++++- src/Core/Settings.h | 8 ++---- .../Access/InterpreterShowPrivilegesQuery.cpp | 2 -- .../Access/InterpreterShowPrivilegesQuery.h | 4 +-- .../InterpreterShowProcesslistQuery.cpp | 1 - .../InterpreterShowProcesslistQuery.h | 4 +-- .../InterpreterShowTablesQuery.cpp | 1 - src/Interpreters/InterpreterShowTablesQuery.h | 4 +-- src/Interpreters/executeQuery.cpp | 2 +- tests/integration/test_quota/test.py | 25 ++++--------------- 10 files changed, 21 insertions(+), 38 deletions(-) diff --git a/src/Access/EnabledQuota.cpp b/src/Access/EnabledQuota.cpp index 5f8251ce3c9..359a1642840 100644 --- a/src/Access/EnabledQuota.cpp +++ b/src/Access/EnabledQuota.cpp @@ -67,7 +67,13 @@ struct EnabledQuota::Impl { /// We reset counters only if the interval's end has been calculated before. /// If it hasn't we just calculate the interval's end for the first time and don't reset counters yet. - need_reset_counters = (end_of_interval.load().count() != 0); + if (!interval.end_of_interval.load().count()) + { + /// We need to calculate end of the interval if it hasn't been calculated before. + bool dummy; + getEndOfInterval(interval, current_time, dummy); + } + need_reset_counters = true; break; } end = std::chrono::system_clock::time_point{end_loaded}; diff --git a/src/Core/Settings.h b/src/Core/Settings.h index d94e2f8d1c1..b9fe43ff7e4 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -641,17 +641,13 @@ class IColumn; // End of FORMAT_FACTORY_SETTINGS // Please add settings non-related to formats into the COMMON_SETTINGS above. -// settings work in query runtime -#define RUNTIME_QUERY_SETTINGS(M) \ - M(Bool, is_reinterpreted_execution, false, "Queries such as show tables will be reinterpreted to select query.", 0) -// End of RUNTIME_QUERY_SETTINGS + #define LIST_OF_SETTINGS(M) \ COMMON_SETTINGS(M) \ OBSOLETE_SETTINGS(M) \ - FORMAT_FACTORY_SETTINGS(M) \ - RUNTIME_QUERY_SETTINGS(M) + FORMAT_FACTORY_SETTINGS(M) DECLARE_SETTINGS_TRAITS_ALLOW_CUSTOM_SETTINGS(SettingsTraits, LIST_OF_SETTINGS) diff --git a/src/Interpreters/Access/InterpreterShowPrivilegesQuery.cpp b/src/Interpreters/Access/InterpreterShowPrivilegesQuery.cpp index 436e2962ed3..cc15bf191ee 100644 --- a/src/Interpreters/Access/InterpreterShowPrivilegesQuery.cpp +++ b/src/Interpreters/Access/InterpreterShowPrivilegesQuery.cpp @@ -1,6 +1,5 @@ #include #include -#include namespace DB { @@ -12,7 +11,6 @@ InterpreterShowPrivilegesQuery::InterpreterShowPrivilegesQuery(const ASTPtr & qu BlockIO InterpreterShowPrivilegesQuery::execute() { - context->applySettingChange({"is_reinterpreted_execution", true}); return executeQuery("SELECT * FROM system.privileges", context, true); } diff --git a/src/Interpreters/Access/InterpreterShowPrivilegesQuery.h b/src/Interpreters/Access/InterpreterShowPrivilegesQuery.h index d8a9d3206e5..852d5173eb1 100644 --- a/src/Interpreters/Access/InterpreterShowPrivilegesQuery.h +++ b/src/Interpreters/Access/InterpreterShowPrivilegesQuery.h @@ -15,8 +15,8 @@ public: BlockIO execute() override; - bool ignoreQuota() const override { return false; } - bool ignoreLimits() const override { return false; } + bool ignoreQuota() const override { return true; } + bool ignoreLimits() const override { return true; } private: ASTPtr query_ptr; diff --git a/src/Interpreters/InterpreterShowProcesslistQuery.cpp b/src/Interpreters/InterpreterShowProcesslistQuery.cpp index 5c11f3a53fc..780ba688a89 100644 --- a/src/Interpreters/InterpreterShowProcesslistQuery.cpp +++ b/src/Interpreters/InterpreterShowProcesslistQuery.cpp @@ -12,7 +12,6 @@ namespace DB BlockIO InterpreterShowProcesslistQuery::execute() { - getContext()->applySettingChange({"is_reinterpreted_execution", true}); return executeQuery("SELECT * FROM system.processes", getContext(), true); } diff --git a/src/Interpreters/InterpreterShowProcesslistQuery.h b/src/Interpreters/InterpreterShowProcesslistQuery.h index 05124eb2b41..31454882a89 100644 --- a/src/Interpreters/InterpreterShowProcesslistQuery.h +++ b/src/Interpreters/InterpreterShowProcesslistQuery.h @@ -19,8 +19,8 @@ public: /// We ignore the quota and limits here because execute() will rewrite a show query as a SELECT query and then /// the SELECT query will checks the quota and limits. - bool ignoreQuota() const override { return false; } - bool ignoreLimits() const override { return false; } + bool ignoreQuota() const override { return true; } + bool ignoreLimits() const override { return true; } private: ASTPtr query_ptr; diff --git a/src/Interpreters/InterpreterShowTablesQuery.cpp b/src/Interpreters/InterpreterShowTablesQuery.cpp index b5c79cdb415..609df1404ca 100644 --- a/src/Interpreters/InterpreterShowTablesQuery.cpp +++ b/src/Interpreters/InterpreterShowTablesQuery.cpp @@ -142,7 +142,6 @@ String InterpreterShowTablesQuery::getRewrittenQuery() BlockIO InterpreterShowTablesQuery::execute() { - getContext()->applySettingChange({"is_reinterpreted_execution", true}); return executeQuery(getRewrittenQuery(), getContext(), true); } diff --git a/src/Interpreters/InterpreterShowTablesQuery.h b/src/Interpreters/InterpreterShowTablesQuery.h index fe1bd861177..16fc9ef2cf4 100644 --- a/src/Interpreters/InterpreterShowTablesQuery.h +++ b/src/Interpreters/InterpreterShowTablesQuery.h @@ -22,8 +22,8 @@ public: /// We ignore the quota and limits here because execute() will rewrite a show query as a SELECT query and then /// the SELECT query will checks the quota and limits. - bool ignoreQuota() const override { return false; } - bool ignoreLimits() const override { return false; } + bool ignoreQuota() const override { return true; } + bool ignoreLimits() const override { return true; } private: ASTPtr query_ptr; diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 0404530ff9b..f401f708ab1 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -600,7 +600,7 @@ static std::tuple executeQueryImpl( auto interpreter = InterpreterFactory::get(ast, context, SelectQueryOptions(stage).setInternal(internal)); std::shared_ptr quota; - if (!interpreter->ignoreQuota() && !context->getSettingsRef().is_reinterpreted_execution) + if (!interpreter->ignoreQuota()) { quota = context->getQuota(); if (quota) diff --git a/tests/integration/test_quota/test.py b/tests/integration/test_quota/test.py index 57080459513..a2edfdb1624 100644 --- a/tests/integration/test_quota/test.py +++ b/tests/integration/test_quota/test.py @@ -393,46 +393,31 @@ def test_query_inserts(): def test_consumption_of_show_tables(): - assert re.match( - "myQuota\\tdefault\\t.*\\t31556952\\t0\\t1000\\t0\\t.*\\t\\\\N.*", - instance.query("SHOW QUOTA")) assert instance.query("SHOW TABLES") == "test_table\n" assert re.match( - "myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t0\\t.*\\t\\\\N.*", + "myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t1\\t500\\t0\\t500\\t0\\t\\\\N\\t1\\t\\\\N.*", instance.query("SHOW QUOTA")) def test_consumption_of_show_databases(): - assert re.match( - "myQuota\\tdefault\\t.*\\t31556952\\t0\\t1000\\t0\\t.*\\t\\\\N.*", - instance.query("SHOW QUOTA")) assert instance.query("SHOW DATABASES") == "INFORMATION_SCHEMA\ndefault\ninformation_schema\nsystem\n" assert re.match( - "myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t0\\t.*\\t\\\\N.*", + "myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t1\\t500\\t0\\t500\\t0\\t\\\\N\\t1\\t\\\\N.*", instance.query("SHOW QUOTA")) def test_consumption_of_show_clusters(): - assert re.match( - "myQuota\\tdefault\\t.*\\t31556952\\t0\\t1000\\t0\\t.*\\t\\\\N.*", - instance.query("SHOW QUOTA")) assert len(instance.query("SHOW CLUSTERS")) > 0 assert re.match( - "myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t0\\t.*\\t\\\\N.*", + "myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t1\\t500\\t0\\t500\\t0\\t\\\\N.*", instance.query("SHOW QUOTA")) def test_consumption_of_show_processlist(): - assert re.match( - "myQuota\\tdefault\\t.*\\t31556952\\t0\\t1000\\t0\\t.*\\t\\\\N.*", - instance.query("SHOW QUOTA")) instance.query("SHOW PROCESSLIST") assert re.match( - "myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t0\\t.*\\t\\\\N.*", + "myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t1\\t500\\t0\\t500\\t0\\t\\\\N.*", instance.query("SHOW QUOTA")) def test_consumption_of_show_privileges(): - assert re.match( - "myQuota\\tdefault\\t.*\\t31556952\\t0\\t1000\\t0\\t.*\\t\\\\N.*", - instance.query("SHOW QUOTA")) assert len(instance.query("SHOW PRIVILEGES")) > 0 assert re.match( - "myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t0\\t.*\\t\\\\N.*", + "myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t1\\t500\\t0\\t500\\t0\\t\\\\N.*", instance.query("SHOW QUOTA")) From 251a4963f0ae6a7fdfca754c10a5b4d363fde82d Mon Sep 17 00:00:00 2001 From: liyang830 Date: Mon, 6 Dec 2021 15:38:23 +0800 Subject: [PATCH 113/262] feat: modify test --- src/Core/Settings.h | 3 --- src/Interpreters/Access/InterpreterShowPrivilegesQuery.cpp | 1 + tests/integration/test_quota/test.py | 4 ++-- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index b9fe43ff7e4..fb8d72f51c4 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -641,9 +641,6 @@ class IColumn; // End of FORMAT_FACTORY_SETTINGS // Please add settings non-related to formats into the COMMON_SETTINGS above. - - - #define LIST_OF_SETTINGS(M) \ COMMON_SETTINGS(M) \ OBSOLETE_SETTINGS(M) \ diff --git a/src/Interpreters/Access/InterpreterShowPrivilegesQuery.cpp b/src/Interpreters/Access/InterpreterShowPrivilegesQuery.cpp index cc15bf191ee..05aa74d7dc4 100644 --- a/src/Interpreters/Access/InterpreterShowPrivilegesQuery.cpp +++ b/src/Interpreters/Access/InterpreterShowPrivilegesQuery.cpp @@ -1,6 +1,7 @@ #include #include + namespace DB { InterpreterShowPrivilegesQuery::InterpreterShowPrivilegesQuery(const ASTPtr & query_ptr_, ContextMutablePtr context_) diff --git a/tests/integration/test_quota/test.py b/tests/integration/test_quota/test.py index a2edfdb1624..4149987996b 100644 --- a/tests/integration/test_quota/test.py +++ b/tests/integration/test_quota/test.py @@ -401,7 +401,7 @@ def test_consumption_of_show_tables(): def test_consumption_of_show_databases(): assert instance.query("SHOW DATABASES") == "INFORMATION_SCHEMA\ndefault\ninformation_schema\nsystem\n" assert re.match( - "myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t1\\t500\\t0\\t500\\t0\\t\\\\N\\t1\\t\\\\N.*", + "myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t1\\t500\\t0\\t500\\t0\\t\\\\N\\t4\\t\\\\N.*", instance.query("SHOW QUOTA")) def test_consumption_of_show_clusters(): @@ -413,7 +413,7 @@ def test_consumption_of_show_clusters(): def test_consumption_of_show_processlist(): instance.query("SHOW PROCESSLIST") assert re.match( - "myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t1\\t500\\t0\\t500\\t0\\t\\\\N.*", + "myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t1\\t500\\t0\\t500\\t0\\t\\\\N\\t0\\t\\\\N.*", instance.query("SHOW QUOTA")) def test_consumption_of_show_privileges(): From 31648d95e2f70e0af6cb1286a6674d83ee07b732 Mon Sep 17 00:00:00 2001 From: Dmitriy Dorofeev Date: Mon, 6 Dec 2021 10:49:14 +0300 Subject: [PATCH 114/262] use application/x-ndjson for streaming JSON (#32223) --- src/Processors/Formats/Impl/JSONEachRowRowOutputFormat.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/Formats/Impl/JSONEachRowRowOutputFormat.h b/src/Processors/Formats/Impl/JSONEachRowRowOutputFormat.h index 147169b5e91..ac03c2991bf 100644 --- a/src/Processors/Formats/Impl/JSONEachRowRowOutputFormat.h +++ b/src/Processors/Formats/Impl/JSONEachRowRowOutputFormat.h @@ -27,7 +27,7 @@ public: /// Content-Type to set when sending HTTP response. String getContentType() const override { - return settings.json.array_of_rows ? "application/json; charset=UTF-8" : IRowOutputFormat::getContentType(); + return settings.json.array_of_rows ? "application/json; charset=UTF-8" : "application/x-ndjson; charset=UTF-8" ; } protected: From 2220d1784c1023f11bac5583ba02e21048b825a5 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 6 Dec 2021 10:49:44 +0300 Subject: [PATCH 115/262] Disable --- tests/queries/0_stateless/01050_window_view_parser_tumble.sql | 2 ++ tests/queries/0_stateless/01051_window_view_parser_hop.sql | 2 ++ .../0_stateless/01052_window_view_proc_tumble_to_now.sql | 2 ++ tests/queries/0_stateless/01053_window_view_proc_hop_to_now.sql | 2 ++ tests/queries/0_stateless/01054_window_view_proc_tumble_to.sql | 2 ++ tests/queries/0_stateless/01055_window_view_proc_hop_to.sql | 2 ++ .../01057_window_view_event_tumble_to_strict_asc.sql | 2 ++ .../0_stateless/01058_window_view_event_hop_to_strict_asc.sql | 2 ++ .../0_stateless/01060_window_view_event_tumble_to_asc.sql | 2 ++ .../queries/0_stateless/01061_window_view_event_hop_to_asc.sql | 2 ++ .../0_stateless/01063_window_view_event_tumble_to_bounded.sql | 2 ++ .../0_stateless/01064_window_view_event_hop_to_bounded.sql | 2 ++ .../01066_window_view_event_tumble_to_strict_asc_lateness.sql | 2 ++ .../01067_window_view_event_tumble_to_asc_lateness.sql | 2 ++ .../01068_window_view_event_tumble_to_bounded_lateness.sql | 2 ++ 15 files changed, 30 insertions(+) diff --git a/tests/queries/0_stateless/01050_window_view_parser_tumble.sql b/tests/queries/0_stateless/01050_window_view_parser_tumble.sql index 12f67a68237..f11c4507a98 100644 --- a/tests/queries/0_stateless/01050_window_view_parser_tumble.sql +++ b/tests/queries/0_stateless/01050_window_view_parser_tumble.sql @@ -1,3 +1,5 @@ +-- Tags: disabled + SET allow_experimental_window_view = 1; DROP TABLE IF EXISTS mt; diff --git a/tests/queries/0_stateless/01051_window_view_parser_hop.sql b/tests/queries/0_stateless/01051_window_view_parser_hop.sql index 3c1e3d16320..6d79cd9a5f5 100644 --- a/tests/queries/0_stateless/01051_window_view_parser_hop.sql +++ b/tests/queries/0_stateless/01051_window_view_parser_hop.sql @@ -1,3 +1,5 @@ +-- Tags: disabled + SET allow_experimental_window_view = 1; DROP TABLE IF EXISTS mt; diff --git a/tests/queries/0_stateless/01052_window_view_proc_tumble_to_now.sql b/tests/queries/0_stateless/01052_window_view_proc_tumble_to_now.sql index 787811ad942..f7b4080f2d6 100644 --- a/tests/queries/0_stateless/01052_window_view_proc_tumble_to_now.sql +++ b/tests/queries/0_stateless/01052_window_view_proc_tumble_to_now.sql @@ -1,3 +1,5 @@ +-- Tags: disabled + SET allow_experimental_window_view = 1; DROP TABLE IF EXISTS mt; diff --git a/tests/queries/0_stateless/01053_window_view_proc_hop_to_now.sql b/tests/queries/0_stateless/01053_window_view_proc_hop_to_now.sql index 4413e52025d..fb905041a1e 100644 --- a/tests/queries/0_stateless/01053_window_view_proc_hop_to_now.sql +++ b/tests/queries/0_stateless/01053_window_view_proc_hop_to_now.sql @@ -1,3 +1,5 @@ +-- Tags: disabled + SET allow_experimental_window_view = 1; DROP TABLE IF EXISTS mt; diff --git a/tests/queries/0_stateless/01054_window_view_proc_tumble_to.sql b/tests/queries/0_stateless/01054_window_view_proc_tumble_to.sql index 8ecd93fbf87..6417c81fe40 100644 --- a/tests/queries/0_stateless/01054_window_view_proc_tumble_to.sql +++ b/tests/queries/0_stateless/01054_window_view_proc_tumble_to.sql @@ -1,3 +1,5 @@ +-- Tags: disabled + SET allow_experimental_window_view = 1; DROP TABLE IF EXISTS mt; diff --git a/tests/queries/0_stateless/01055_window_view_proc_hop_to.sql b/tests/queries/0_stateless/01055_window_view_proc_hop_to.sql index 8e28577f645..589892b31e8 100644 --- a/tests/queries/0_stateless/01055_window_view_proc_hop_to.sql +++ b/tests/queries/0_stateless/01055_window_view_proc_hop_to.sql @@ -1,3 +1,5 @@ +-- Tags: disabled + SET allow_experimental_window_view = 1; DROP TABLE IF EXISTS mt; diff --git a/tests/queries/0_stateless/01057_window_view_event_tumble_to_strict_asc.sql b/tests/queries/0_stateless/01057_window_view_event_tumble_to_strict_asc.sql index 3f1200667ee..47b641dbe97 100644 --- a/tests/queries/0_stateless/01057_window_view_event_tumble_to_strict_asc.sql +++ b/tests/queries/0_stateless/01057_window_view_event_tumble_to_strict_asc.sql @@ -1,3 +1,5 @@ +-- Tags: disabled + SET allow_experimental_window_view = 1; DROP TABLE IF EXISTS mt; diff --git a/tests/queries/0_stateless/01058_window_view_event_hop_to_strict_asc.sql b/tests/queries/0_stateless/01058_window_view_event_hop_to_strict_asc.sql index adf55eb6f2c..9056d91da1e 100644 --- a/tests/queries/0_stateless/01058_window_view_event_hop_to_strict_asc.sql +++ b/tests/queries/0_stateless/01058_window_view_event_hop_to_strict_asc.sql @@ -1,3 +1,5 @@ +-- Tags: disabled + SET allow_experimental_window_view = 1; DROP TABLE IF EXISTS mt; diff --git a/tests/queries/0_stateless/01060_window_view_event_tumble_to_asc.sql b/tests/queries/0_stateless/01060_window_view_event_tumble_to_asc.sql index 2576636f622..4dacf7b8554 100644 --- a/tests/queries/0_stateless/01060_window_view_event_tumble_to_asc.sql +++ b/tests/queries/0_stateless/01060_window_view_event_tumble_to_asc.sql @@ -1,3 +1,5 @@ +-- Tags: disabled + SET allow_experimental_window_view = 1; DROP TABLE IF EXISTS mt; diff --git a/tests/queries/0_stateless/01061_window_view_event_hop_to_asc.sql b/tests/queries/0_stateless/01061_window_view_event_hop_to_asc.sql index 892168226c5..5b086e382be 100644 --- a/tests/queries/0_stateless/01061_window_view_event_hop_to_asc.sql +++ b/tests/queries/0_stateless/01061_window_view_event_hop_to_asc.sql @@ -1,3 +1,5 @@ +-- Tags: disabled + SET allow_experimental_window_view = 1; DROP TABLE IF EXISTS mt; diff --git a/tests/queries/0_stateless/01063_window_view_event_tumble_to_bounded.sql b/tests/queries/0_stateless/01063_window_view_event_tumble_to_bounded.sql index 290ae600845..9168d966c1c 100644 --- a/tests/queries/0_stateless/01063_window_view_event_tumble_to_bounded.sql +++ b/tests/queries/0_stateless/01063_window_view_event_tumble_to_bounded.sql @@ -1,3 +1,5 @@ +-- Tags: disabled + SET allow_experimental_window_view = 1; DROP TABLE IF EXISTS mt; diff --git a/tests/queries/0_stateless/01064_window_view_event_hop_to_bounded.sql b/tests/queries/0_stateless/01064_window_view_event_hop_to_bounded.sql index 25041cdf244..5735f14c569 100644 --- a/tests/queries/0_stateless/01064_window_view_event_hop_to_bounded.sql +++ b/tests/queries/0_stateless/01064_window_view_event_hop_to_bounded.sql @@ -1,3 +1,5 @@ +-- Tags: disabled + SET allow_experimental_window_view = 1; DROP TABLE IF EXISTS mt; diff --git a/tests/queries/0_stateless/01066_window_view_event_tumble_to_strict_asc_lateness.sql b/tests/queries/0_stateless/01066_window_view_event_tumble_to_strict_asc_lateness.sql index b8a72c1f0bb..c5859b7dd9f 100644 --- a/tests/queries/0_stateless/01066_window_view_event_tumble_to_strict_asc_lateness.sql +++ b/tests/queries/0_stateless/01066_window_view_event_tumble_to_strict_asc_lateness.sql @@ -1,3 +1,5 @@ +-- Tags: disabled + SET allow_experimental_window_view = 1; DROP TABLE IF EXISTS mt; diff --git a/tests/queries/0_stateless/01067_window_view_event_tumble_to_asc_lateness.sql b/tests/queries/0_stateless/01067_window_view_event_tumble_to_asc_lateness.sql index 92cb366d28d..afc1c54005d 100644 --- a/tests/queries/0_stateless/01067_window_view_event_tumble_to_asc_lateness.sql +++ b/tests/queries/0_stateless/01067_window_view_event_tumble_to_asc_lateness.sql @@ -1,3 +1,5 @@ +-- Tags: disabled + SET allow_experimental_window_view = 1; DROP TABLE IF EXISTS mt; diff --git a/tests/queries/0_stateless/01068_window_view_event_tumble_to_bounded_lateness.sql b/tests/queries/0_stateless/01068_window_view_event_tumble_to_bounded_lateness.sql index 81c3ef227d8..6ccc335d9a5 100644 --- a/tests/queries/0_stateless/01068_window_view_event_tumble_to_bounded_lateness.sql +++ b/tests/queries/0_stateless/01068_window_view_event_tumble_to_bounded_lateness.sql @@ -1,3 +1,5 @@ +-- Tags: disabled + SET allow_experimental_window_view = 1; DROP TABLE IF EXISTS mt; From bf3d3ea25c8a67fadb88f076103d9b75f9f47d64 Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Mon, 6 Dec 2021 16:20:10 +0800 Subject: [PATCH 116/262] Fix style --- src/IO/WriteBufferFromHTTP.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/IO/WriteBufferFromHTTP.cpp b/src/IO/WriteBufferFromHTTP.cpp index 352e38f3529..102cfd69197 100644 --- a/src/IO/WriteBufferFromHTTP.cpp +++ b/src/IO/WriteBufferFromHTTP.cpp @@ -10,7 +10,7 @@ WriteBufferFromHTTP::WriteBufferFromHTTP( const Poco::URI & uri, const std::string & method, const std::string & content_type, - const ConnectionTimeouts & timeouts, + const ConnectionTimeouts & timeouts, size_t buffer_size_) : WriteBufferFromOStream(buffer_size_) , session{makeHTTPSession(uri, timeouts)} From 55c6e16f375c4f59fb3ca8f65ad73ca89bb9291e Mon Sep 17 00:00:00 2001 From: Tatiana Kirillova Date: Mon, 6 Dec 2021 11:25:18 +0300 Subject: [PATCH 117/262] Update docs/en/sql-reference/aggregate-functions/reference/sparkbar.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/en/sql-reference/aggregate-functions/reference/sparkbar.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/aggregate-functions/reference/sparkbar.md b/docs/en/sql-reference/aggregate-functions/reference/sparkbar.md index 34052aac86f..63a53ce2f03 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/sparkbar.md +++ b/docs/en/sql-reference/aggregate-functions/reference/sparkbar.md @@ -7,7 +7,7 @@ toc_title: sparkbar The function plots a frequency histogram for values `x` and the repetition rate of these `y` values over the interval `[min_x, max_x]`. -If no interval is specified, then the minimum `x` will be used as the interval start, and the maximum `x` will use as the interval end. +If no interval is specified, then the minimum `x` is used as the interval start, and the maximum `x` — as the interval end. **Syntax** From e6d00819640ca4125df7b11f1472942cc563b07b Mon Sep 17 00:00:00 2001 From: Tatiana Kirillova Date: Mon, 6 Dec 2021 11:25:32 +0300 Subject: [PATCH 118/262] Update docs/en/sql-reference/aggregate-functions/reference/sparkbar.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- .../aggregate-functions/reference/sparkbar.md | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/docs/en/sql-reference/aggregate-functions/reference/sparkbar.md b/docs/en/sql-reference/aggregate-functions/reference/sparkbar.md index 63a53ce2f03..adbe1d551ca 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/sparkbar.md +++ b/docs/en/sql-reference/aggregate-functions/reference/sparkbar.md @@ -15,13 +15,16 @@ If no interval is specified, then the minimum `x` is used as the interval start, sparkbar(width, min_x, max_x)(x, y) ``` +**Parameters** + +- `width` — The number of segments. Type: [Integer](../../../sql-reference/data-types/int-uint.md). +- `min_x` — The interval start. Optional parameter. +- `max_x` — The interval end. Optional parameter. + **Arguments** -- `width` — The number of segments. Must be [Integer](../../../sql-reference/data-types/int-uint.md). -- `min_x` — The interval start. Optional value. -- `max_x` — The interval end. Optional value. -- `x` — The range of values. -- `y` — The frequency of values. +- `x` — The field with values. +- `y` — The field with the frequency of values. **Returned value** From 59a701ee6ec3a2d49e978fe658fc46ab55a9f0dc Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Mon, 6 Dec 2021 16:31:47 +0800 Subject: [PATCH 119/262] Fix style --- src/IO/WriteBufferFromHTTP.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/IO/WriteBufferFromHTTP.cpp b/src/IO/WriteBufferFromHTTP.cpp index 102cfd69197..5ddc28d2db1 100644 --- a/src/IO/WriteBufferFromHTTP.cpp +++ b/src/IO/WriteBufferFromHTTP.cpp @@ -7,8 +7,8 @@ namespace DB { WriteBufferFromHTTP::WriteBufferFromHTTP( - const Poco::URI & uri, - const std::string & method, + const Poco::URI & uri, + const std::string & method, const std::string & content_type, const ConnectionTimeouts & timeouts, size_t buffer_size_) From e7a431276852019a9491d99c9877646595c6cf3e Mon Sep 17 00:00:00 2001 From: vxider Date: Mon, 6 Dec 2021 08:41:56 +0000 Subject: [PATCH 120/262] add comments --- src/Storages/WindowView/StorageWindowView.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Storages/WindowView/StorageWindowView.h b/src/Storages/WindowView/StorageWindowView.h index 9ebca224a7c..08f24816d72 100644 --- a/src/Storages/WindowView/StorageWindowView.h +++ b/src/Storages/WindowView/StorageWindowView.h @@ -150,8 +150,11 @@ public: private: Poco::Logger * log; + /// Stored query, e.g. SELECT * FROM * GROUP BY TUMBLE(now(), *) ASTPtr select_query; + /// Used to generate the mergeable state of select_query, e.g. SELECT * FROM * GROUP BY WINDOW_ID(____timestamp, *) ASTPtr mergeable_query; + /// Used to fetch the mergeable state and generate the final result. e.g. SELECT * FROM * GROUP BY TUMBLE(____timestamp, *) ASTPtr final_query; ContextMutablePtr window_view_context; From 5bab484422a09bcbcb537f99675bd87b44bae183 Mon Sep 17 00:00:00 2001 From: vxider Date: Mon, 6 Dec 2021 08:50:10 +0000 Subject: [PATCH 121/262] increase sleep time in tests --- .../0_stateless/01052_window_view_proc_tumble_to_now.sql | 2 +- tests/queries/0_stateless/01053_window_view_proc_hop_to_now.sql | 2 +- .../01057_window_view_event_tumble_to_strict_asc.sql | 2 +- .../0_stateless/01058_window_view_event_hop_to_strict_asc.sql | 2 +- .../0_stateless/01060_window_view_event_tumble_to_asc.sql | 2 +- .../queries/0_stateless/01061_window_view_event_hop_to_asc.sql | 2 +- .../0_stateless/01063_window_view_event_tumble_to_bounded.sql | 2 +- .../0_stateless/01064_window_view_event_hop_to_bounded.sql | 2 +- .../01066_window_view_event_tumble_to_strict_asc_lateness.sql | 2 +- .../01067_window_view_event_tumble_to_asc_lateness.sql | 2 +- .../01068_window_view_event_tumble_to_bounded_lateness.sql | 2 +- 11 files changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/queries/0_stateless/01052_window_view_proc_tumble_to_now.sql b/tests/queries/0_stateless/01052_window_view_proc_tumble_to_now.sql index 787811ad942..169a66e7bc6 100644 --- a/tests/queries/0_stateless/01052_window_view_proc_tumble_to_now.sql +++ b/tests/queries/0_stateless/01052_window_view_proc_tumble_to_now.sql @@ -9,7 +9,7 @@ CREATE TABLE mt(a Int32) ENGINE=MergeTree ORDER BY tuple(); CREATE WINDOW VIEW wv TO dst AS SELECT count(a) AS count FROM mt GROUP BY TUMBLE(now('US/Samoa'), INTERVAL '1' SECOND, 'US/Samoa') AS wid; INSERT INTO mt VALUES (1); -SELECT sleep(2); +SELECT sleep(3); SELECT count from dst; DROP TABLE wv; diff --git a/tests/queries/0_stateless/01053_window_view_proc_hop_to_now.sql b/tests/queries/0_stateless/01053_window_view_proc_hop_to_now.sql index 4413e52025d..c39bab21cb1 100644 --- a/tests/queries/0_stateless/01053_window_view_proc_hop_to_now.sql +++ b/tests/queries/0_stateless/01053_window_view_proc_hop_to_now.sql @@ -9,7 +9,7 @@ CREATE TABLE mt(a Int32) ENGINE=MergeTree ORDER BY tuple(); CREATE WINDOW VIEW wv TO dst AS SELECT count(a) AS count FROM mt GROUP BY HOP(now('US/Samoa'), INTERVAL '1' SECOND, INTERVAL '1' SECOND, 'US/Samoa') AS wid; INSERT INTO mt VALUES (1); -SELECT sleep(2); +SELECT sleep(3); SELECT count from dst; DROP TABLE wv; diff --git a/tests/queries/0_stateless/01057_window_view_event_tumble_to_strict_asc.sql b/tests/queries/0_stateless/01057_window_view_event_tumble_to_strict_asc.sql index 3f1200667ee..4883e006e85 100644 --- a/tests/queries/0_stateless/01057_window_view_event_tumble_to_strict_asc.sql +++ b/tests/queries/0_stateless/01057_window_view_event_tumble_to_strict_asc.sql @@ -17,7 +17,7 @@ INSERT INTO mt VALUES (1, '1990/01/01 12:00:10'); INSERT INTO mt VALUES (1, '1990/01/01 12:00:11'); INSERT INTO mt VALUES (1, '1990/01/01 12:00:30'); -SELECT sleep(1); +SELECT sleep(3); SELECT * from dst order by w_end; DROP TABLE wv; diff --git a/tests/queries/0_stateless/01058_window_view_event_hop_to_strict_asc.sql b/tests/queries/0_stateless/01058_window_view_event_hop_to_strict_asc.sql index adf55eb6f2c..944fd9939b4 100644 --- a/tests/queries/0_stateless/01058_window_view_event_hop_to_strict_asc.sql +++ b/tests/queries/0_stateless/01058_window_view_event_hop_to_strict_asc.sql @@ -17,7 +17,7 @@ INSERT INTO mt VALUES (1, '1990/01/01 12:00:10'); INSERT INTO mt VALUES (1, '1990/01/01 12:00:11'); INSERT INTO mt VALUES (1, '1990/01/01 12:00:30'); -SELECT sleep(1); +SELECT sleep(3); SELECT * from dst order by w_end; DROP TABLE wv; diff --git a/tests/queries/0_stateless/01060_window_view_event_tumble_to_asc.sql b/tests/queries/0_stateless/01060_window_view_event_tumble_to_asc.sql index 2576636f622..18b17fd3d2b 100644 --- a/tests/queries/0_stateless/01060_window_view_event_tumble_to_asc.sql +++ b/tests/queries/0_stateless/01060_window_view_event_tumble_to_asc.sql @@ -18,7 +18,7 @@ INSERT INTO mt VALUES (1, '1990/01/01 12:00:10'); INSERT INTO mt VALUES (1, '1990/01/01 12:00:11'); INSERT INTO mt VALUES (1, '1990/01/01 12:00:30'); -SELECT sleep(1); +SELECT sleep(3); SELECT * from dst order by w_end; DROP TABLE wv; diff --git a/tests/queries/0_stateless/01061_window_view_event_hop_to_asc.sql b/tests/queries/0_stateless/01061_window_view_event_hop_to_asc.sql index 892168226c5..2cf98d6b08f 100644 --- a/tests/queries/0_stateless/01061_window_view_event_hop_to_asc.sql +++ b/tests/queries/0_stateless/01061_window_view_event_hop_to_asc.sql @@ -18,7 +18,7 @@ INSERT INTO mt VALUES (1, '1990/01/01 12:00:10'); INSERT INTO mt VALUES (1, '1990/01/01 12:00:11'); INSERT INTO mt VALUES (1, '1990/01/01 12:00:30'); -SELECT sleep(1); +SELECT sleep(3); SELECT * from dst order by w_end; DROP TABLE wv; diff --git a/tests/queries/0_stateless/01063_window_view_event_tumble_to_bounded.sql b/tests/queries/0_stateless/01063_window_view_event_tumble_to_bounded.sql index 290ae600845..37757fd77b3 100644 --- a/tests/queries/0_stateless/01063_window_view_event_tumble_to_bounded.sql +++ b/tests/queries/0_stateless/01063_window_view_event_tumble_to_bounded.sql @@ -18,7 +18,7 @@ INSERT INTO mt VALUES (1, '1990/01/01 12:00:10'); INSERT INTO mt VALUES (1, '1990/01/01 12:00:11'); INSERT INTO mt VALUES (1, '1990/01/01 12:00:30'); -SELECT sleep(1); +SELECT sleep(3); SELECT * from dst order by w_end; DROP TABLE wv; diff --git a/tests/queries/0_stateless/01064_window_view_event_hop_to_bounded.sql b/tests/queries/0_stateless/01064_window_view_event_hop_to_bounded.sql index 25041cdf244..5f148900905 100644 --- a/tests/queries/0_stateless/01064_window_view_event_hop_to_bounded.sql +++ b/tests/queries/0_stateless/01064_window_view_event_hop_to_bounded.sql @@ -17,7 +17,7 @@ INSERT INTO mt VALUES (1, '1990/01/01 12:00:10'); INSERT INTO mt VALUES (1, '1990/01/01 12:00:11'); INSERT INTO mt VALUES (1, '1990/01/01 12:00:30'); -SELECT sleep(1); +SELECT sleep(3); SELECT * from dst order by w_end; DROP TABLE wv NO DELAY; diff --git a/tests/queries/0_stateless/01066_window_view_event_tumble_to_strict_asc_lateness.sql b/tests/queries/0_stateless/01066_window_view_event_tumble_to_strict_asc_lateness.sql index b8a72c1f0bb..1c55b70f3aa 100644 --- a/tests/queries/0_stateless/01066_window_view_event_tumble_to_strict_asc_lateness.sql +++ b/tests/queries/0_stateless/01066_window_view_event_tumble_to_strict_asc_lateness.sql @@ -21,7 +21,7 @@ INSERT INTO mt VALUES (1, '1990/01/01 12:00:07'); INSERT INTO mt VALUES (1, '1990/01/01 12:00:10'); INSERT INTO mt VALUES (1, '1990/01/01 12:00:11'); -SELECT sleep(1); +SELECT sleep(3); SELECT * from dst order by w_end, count; DROP TABLE wv; diff --git a/tests/queries/0_stateless/01067_window_view_event_tumble_to_asc_lateness.sql b/tests/queries/0_stateless/01067_window_view_event_tumble_to_asc_lateness.sql index 92cb366d28d..11409203d4c 100644 --- a/tests/queries/0_stateless/01067_window_view_event_tumble_to_asc_lateness.sql +++ b/tests/queries/0_stateless/01067_window_view_event_tumble_to_asc_lateness.sql @@ -21,7 +21,7 @@ INSERT INTO mt VALUES (1, '1990/01/01 12:00:07'); INSERT INTO mt VALUES (1, '1990/01/01 12:00:10'); INSERT INTO mt VALUES (1, '1990/01/01 12:00:11'); -SELECT sleep(1); +SELECT sleep(3); SELECT * from dst order by w_end, count; DROP TABLE wv; diff --git a/tests/queries/0_stateless/01068_window_view_event_tumble_to_bounded_lateness.sql b/tests/queries/0_stateless/01068_window_view_event_tumble_to_bounded_lateness.sql index 81c3ef227d8..74a095c632f 100644 --- a/tests/queries/0_stateless/01068_window_view_event_tumble_to_bounded_lateness.sql +++ b/tests/queries/0_stateless/01068_window_view_event_tumble_to_bounded_lateness.sql @@ -22,7 +22,7 @@ INSERT INTO mt VALUES (1, '1990/01/01 12:00:10'); INSERT INTO mt VALUES (1, '1990/01/01 12:00:11'); INSERT INTO mt VALUES (1, '1990/01/01 12:00:12'); -SELECT sleep(1); +SELECT sleep(3); SELECT * from dst order by w_end, count; DROP TABLE wv; From e0fa4731fb7e0dd02d0b0bfee268d28f4591754d Mon Sep 17 00:00:00 2001 From: vdimir Date: Mon, 6 Dec 2021 12:57:09 +0300 Subject: [PATCH 122/262] Fix typo in tupleToNameValuePairs doc --- docs/en/sql-reference/functions/tuple-functions.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/tuple-functions.md b/docs/en/sql-reference/functions/tuple-functions.md index b72b75d6de6..8d06e8ea1cc 100644 --- a/docs/en/sql-reference/functions/tuple-functions.md +++ b/docs/en/sql-reference/functions/tuple-functions.md @@ -175,6 +175,7 @@ in which the `Strings` represents the named fields of the tuple and `T` are the ``` sql tupleToNameValuePairs(tuple) +``` **Arguments** @@ -196,7 +197,7 @@ CREATE TABLE tupletest (`col` Tuple(user_ID UInt64, session_ID UInt64) ENGINE = INSERT INTO tupletest VALUES (tuple( 100, 2502)), (tuple(1,100)); SELECT tupleToNameValuePairs(col) FROM tupletest; -``` +``` Result: From 1ec9039b1de05b08d039d60f7b2f943fffb7abef Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Mon, 6 Dec 2021 14:01:56 +0300 Subject: [PATCH 123/262] ExternalDictionariesLoader fix getCurrentDatabase multiple times --- src/Interpreters/ExternalDictionariesLoader.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/ExternalDictionariesLoader.cpp b/src/Interpreters/ExternalDictionariesLoader.cpp index 74bff33c914..f615aa24a91 100644 --- a/src/Interpreters/ExternalDictionariesLoader.cpp +++ b/src/Interpreters/ExternalDictionariesLoader.cpp @@ -98,12 +98,12 @@ QualifiedTableName ExternalDictionariesLoader::qualifyDictionaryNameWithDatabase /// If dictionary was not qualified with database name, try to resolve dictionary as xml dictionary. if (qualified_name->database.empty() && !has(qualified_name->table)) { - auto current_database_name = query_context->getCurrentDatabase(); + std::string current_database_name = query_context->getCurrentDatabase(); std::string resolved_name = resolveDictionaryNameFromDatabaseCatalog(dictionary_name, current_database_name); /// If after qualify dictionary_name with default_database_name we find it, add default_database to qualified name. if (has(resolved_name)) - qualified_name->database = query_context->getCurrentDatabase(); + qualified_name->database = std::move(current_database_name); } return *qualified_name; From ee2655b704a628ee0bb1501ec07003a9f8fdc475 Mon Sep 17 00:00:00 2001 From: Peignon Melvyn Date: Mon, 6 Dec 2021 12:46:19 +0100 Subject: [PATCH 124/262] Update mergetree.md The link did not had the correct anchor --- docs/en/engines/table-engines/mergetree-family/mergetree.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index d08de080e6b..4b7473f76ad 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -262,7 +262,7 @@ In the example below, the index can’t be used. SELECT count() FROM table WHERE CounterID = 34 OR URL LIKE '%upyachka%' ``` -To check whether ClickHouse can use the index when running a query, use the settings [force_index_by_date](../../../operations/settings/settings.md#settings-force_index_by_date) and [force_primary_key](../../../operations/settings/settings.md). +To check whether ClickHouse can use the index when running a query, use the settings [force_index_by_date](../../../operations/settings/settings.md#settings-force_index_by_date) and [force_primary_key](../../../operations/settings/settings.md#force-primary-key). The key for partitioning by month allows reading only those data blocks which contain dates from the proper range. In this case, the data block may contain data for many dates (up to an entire month). Within a block, data is sorted by primary key, which might not contain the date as the first column. Because of this, using a query with only a date condition that does not specify the primary key prefix will cause more data to be read than for a single date. From 32b48752735094435a65722a59123352fa79a0aa Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 6 Dec 2021 14:53:39 +0300 Subject: [PATCH 125/262] Fix possible Pipeline stuck in case of StrictResize processor. --- src/Processors/ResizeProcessor.cpp | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/src/Processors/ResizeProcessor.cpp b/src/Processors/ResizeProcessor.cpp index d652a342150..f5ee1cb487c 100644 --- a/src/Processors/ResizeProcessor.cpp +++ b/src/Processors/ResizeProcessor.cpp @@ -403,12 +403,22 @@ IProcessor::Status StrictResizeProcessor::prepare(const PortNumbers & updated_in /// Close all other waiting for data outputs (there is no corresponding input for them). while (!waiting_outputs.empty()) { - auto & output = output_ports[waiting_outputs.front()]; - waiting_outputs.pop(); + auto & output = output_ports[waiting_outputs.front()]; + waiting_outputs.pop(); - output.status = OutputStatus::Finished; - output.port->finish(); - ++num_finished_outputs; + if (output.status != OutputStatus::Finished) + ++num_finished_outputs; + + output.status = OutputStatus::Finished; + output.port->finish(); + } + + if (num_finished_outputs == outputs.size()) + { + for (auto & input : inputs) + input.close(); + + return Status::Finished; } if (disabled_input_ports.empty()) @@ -418,4 +428,3 @@ IProcessor::Status StrictResizeProcessor::prepare(const PortNumbers & updated_in } } - From f064f2cdaaae09bfcad5d5f7b161dd36dfe040cd Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 6 Dec 2021 15:21:39 +0300 Subject: [PATCH 126/262] Use seq_cst semantic for MergeTreeBackgroundExecutor mertic. (#32125) --- .../MergeTree/MergeTreeBackgroundExecutor.h | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.h b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.h index c59e53fb20e..f4635812e08 100644 --- a/src/Storages/MergeTree/MergeTreeBackgroundExecutor.h +++ b/src/Storages/MergeTree/MergeTreeBackgroundExecutor.h @@ -34,11 +34,22 @@ struct TaskRuntimeData { TaskRuntimeData(ExecutableTaskPtr && task_, CurrentMetrics::Metric metric_) : task(std::move(task_)) - , increment(std::move(metric_)) - {} + , metric(metric_) + { + /// Increment and decrement a metric with sequentially consistent memory order + /// This is needed, because in unit test this metric is read from another thread + /// and some invariant is checked. With relaxed memory order we could read stale value + /// for this metric, that's why test can be failed. + CurrentMetrics::values[metric].fetch_add(1); + } + + ~TaskRuntimeData() + { + CurrentMetrics::values[metric].fetch_sub(1); + } ExecutableTaskPtr task; - CurrentMetrics::Increment increment; + CurrentMetrics::Metric metric; std::atomic_bool is_currently_deleting{false}; /// Actually autoreset=false is needed only for unit test /// where multiple threads could remove tasks corresponding to the same storage From 39554a6843749fa0e4a857af5064f7c862f7e120 Mon Sep 17 00:00:00 2001 From: Roman Bug Date: Mon, 6 Dec 2021 15:29:42 +0300 Subject: [PATCH 127/262] Update docs/ru/operations/server-configuration-parameters/settings.md Co-authored-by: gyuton <40863448+gyuton@users.noreply.github.com> --- docs/ru/operations/server-configuration-parameters/settings.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/operations/server-configuration-parameters/settings.md b/docs/ru/operations/server-configuration-parameters/settings.md index a19380e36f4..1750ffc340b 100644 --- a/docs/ru/operations/server-configuration-parameters/settings.md +++ b/docs/ru/operations/server-configuration-parameters/settings.md @@ -1005,7 +1005,7 @@ ClickHouse проверяет условия для `min_part_size` и `min_part - `database` – имя базы данных. - `table` – имя системной таблицы, где будут логироваться запросы. -- `partition_by` — устанавливает [произвольный ключ партиционирования](../../engines/table-engines/mergetree-family/custom-partitioning-key.md). Нельзя использовать если указывается параметр `engine`. +- `partition_by` — устанавливает [произвольный ключ партиционирования](../../engines/table-engines/mergetree-family/custom-partitioning-key.md). Нельзя использовать, если задан параметр `engine`. - `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если задан параметр `partition_by`. - `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. From a45b204eb478be80c0fede42e6e33e025d690460 Mon Sep 17 00:00:00 2001 From: Roman Bug Date: Mon, 6 Dec 2021 15:29:47 +0300 Subject: [PATCH 128/262] Update docs/ru/operations/server-configuration-parameters/settings.md Co-authored-by: gyuton <40863448+gyuton@users.noreply.github.com> --- docs/ru/operations/server-configuration-parameters/settings.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/operations/server-configuration-parameters/settings.md b/docs/ru/operations/server-configuration-parameters/settings.md index 1750ffc340b..1b0c7fc5897 100644 --- a/docs/ru/operations/server-configuration-parameters/settings.md +++ b/docs/ru/operations/server-configuration-parameters/settings.md @@ -1006,7 +1006,7 @@ ClickHouse проверяет условия для `min_part_size` и `min_part - `database` – имя базы данных. - `table` – имя системной таблицы, где будут логироваться запросы. - `partition_by` — устанавливает [произвольный ключ партиционирования](../../engines/table-engines/mergetree-family/custom-partitioning-key.md). Нельзя использовать, если задан параметр `engine`. -- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если задан параметр `partition_by`. +- `engine` — устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать, если задан параметр `partition_by`. - `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. Если таблица не существует, то ClickHouse создаст её. Если структура журнала запросов изменилась при обновлении сервера ClickHouse, то таблица со старой структурой переименовывается, а новая таблица создается автоматически. From 1d75d8fd45c92a2028a988cc2aba7d664f45c603 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 6 Dec 2021 15:52:08 +0300 Subject: [PATCH 129/262] Fix assert. --- src/Processors/Transforms/WindowTransform.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index 57754847a42..e9974e9950f 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -203,7 +203,7 @@ WindowTransform::WindowTransform(const Block & input_header_, auto input_columns = input_header.getColumns(); for (auto & column : input_columns) { - column = std::move(column)->convertToFullColumnIfConst(); + column = recursiveRemoveLowCardinality(std::move(column)->convertToFullColumnIfConst()); } input_header.setColumns(std::move(input_columns)); From c3a7858a6378811cbf1ea2b427a033c0f35c12c8 Mon Sep 17 00:00:00 2001 From: romanzhukov Date: Mon, 6 Dec 2021 16:22:15 +0300 Subject: [PATCH 130/262] Fix comments --- docs/en/operations/settings/settings.md | 2 +- docs/ru/operations/settings/settings.md | 2 +- docs/ru/operations/system-tables/query_views_log.md | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index c4c13a48560..fa4cc41e8ff 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -992,7 +992,7 @@ log_queries_min_type='EXCEPTION_WHILE_PROCESSING' Setting up query threads logging. -Query threads log into `system.query_thread_log` table. This setting have effect only when [log_queries](#settings-log-queries) is true. Queries’ threads run by ClickHouse with this setup are logged according to the rules in the [query_thread_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) server configuration parameter. +Query threads log into [system.query_thread_log](../../operations/system-tables/query_thread_log.md) table. This setting have effect only when [log_queries](#settings-log-queries) is true. Queries’ threads run by ClickHouse with this setup are logged according to the rules in the [query_thread_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) server configuration parameter. Possible values: diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index d6830a30321..f9717b0fb27 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -914,7 +914,7 @@ log_queries_min_type='EXCEPTION_WHILE_PROCESSING' Управляет логированием информации о потоках выполнения запросов. -Информация о потоках выполнения запросов сохраняется в системной таблице `system.query_thread_log`. Работает только в том случае, если включена настройка [log_queries](#settings-log-queries). Лог информации о потоках выполнения запросов, переданных в ClickHouse с этой установкой, записывается согласно правилам конфигурационного параметра сервера [query_thread_log](../server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log). +Информация о потоках выполнения запросов сохраняется в системной таблице [system.query_thread_log](../../operations/system-tables/query_thread_log.md). Работает только в том случае, если включена настройка [log_queries](#settings-log-queries). Лог информации о потоках выполнения запросов, переданных в ClickHouse с этой установкой, записывается согласно правилам конфигурационного параметра сервера [query_thread_log](../server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log). Возможные значения: diff --git a/docs/ru/operations/system-tables/query_views_log.md b/docs/ru/operations/system-tables/query_views_log.md index 9715d739af1..8b1a8d387a6 100644 --- a/docs/ru/operations/system-tables/query_views_log.md +++ b/docs/ru/operations/system-tables/query_views_log.md @@ -11,7 +11,7 @@ ClickHouse не удаляет данные из таблицы автоматически. Подробнее смотрите раздел [Системные таблицы](../../operations/system-tables/index.md#system-tables-introduction). -Вы можете включить настройку [log_queries_probability](../../operations/settings/settings.md#log-queries-probability), чтобы уменьшить количество запросов, регистрируемых в таблице `query_views_log`. +Чтобы уменьшить количество запросов, регистрируемых в таблице `query_views_log`, вы можете включить настройку [log_queries_probability](../../operations/settings/settings.md#log-queries-probability). Столбцы: @@ -33,7 +33,7 @@ ClickHouse не удаляет данные из таблицы автомати - `written_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — количество записанных строк. - `written_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — количество записанных байт. - `peak_memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — максимальная разница между объемом выделенной и освобожденной памяти в контексте этого представления. -- `ProfileEvents` ([Map(String, UInt64)](../../sql-reference/data-types/array.md)) — События профиля, которые измеряют различные показатели. Их описание можно найти в таблице [system.events](../../operations/system-tables/events.md#system_tables-events). +- `ProfileEvents` ([Map(String, UInt64)](../../sql-reference/data-types/array.md)) — события профиля, которые измеряют различные показатели. Их описание можно найти в таблице [system.events](../../operations/system-tables/events.md#system_tables-events). - `status` ([Enum8](../../sql-reference/data-types/enum.md)) — статус представления. Возможные значения: - `'QueryStart' = 1` — успешное начало выполнения представления. Не должно отображаться. - `'QueryFinish' = 2` — успешное завершение выполнения представления. From 7797a7290776deca2c20dc14b9730fb8cd7df25e Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 6 Dec 2021 16:35:34 +0300 Subject: [PATCH 131/262] fix false-positive ciclyc dependency with XML dict --- src/Databases/DDLDependencyVisitor.cpp | 6 ++++- src/Databases/DDLDependencyVisitor.h | 2 +- src/Databases/DatabaseMemory.cpp | 2 +- src/Databases/DatabaseOrdinary.cpp | 4 +-- src/Databases/TablesLoader.cpp | 8 ++++-- src/Interpreters/InterpreterCreateQuery.cpp | 5 ++-- .../configs/dictionaries/node.xml | 25 +++++++++++++++++++ .../test_dictionaries_dependency_xml/test.py | 9 ++++++- 8 files changed, 51 insertions(+), 10 deletions(-) create mode 100644 tests/integration/test_dictionaries_dependency_xml/configs/dictionaries/node.xml diff --git a/src/Databases/DDLDependencyVisitor.cpp b/src/Databases/DDLDependencyVisitor.cpp index 09d3752b180..532691f7978 100644 --- a/src/Databases/DDLDependencyVisitor.cpp +++ b/src/Databases/DDLDependencyVisitor.cpp @@ -11,7 +11,7 @@ namespace DB { -TableNamesSet getDependenciesSetFromCreateQuery(ContextPtr global_context, const ASTPtr & ast) +TableNamesSet getDependenciesSetFromCreateQuery(ContextPtr global_context, const QualifiedTableName & table, const ASTPtr & ast) { assert(global_context == global_context->getGlobalContext()); TableLoadingDependenciesVisitor::Data data; @@ -20,6 +20,7 @@ TableNamesSet getDependenciesSetFromCreateQuery(ContextPtr global_context, const data.global_context = global_context; TableLoadingDependenciesVisitor visitor{data}; visitor.visit(ast); + data.dependencies.erase(table); return data.dependencies; } @@ -132,7 +133,10 @@ void DDLDependencyVisitor::extractTableNameFromArgument(const ASTFunction & func } if (qualified_name.database.empty()) + { + /// It can be table/dictionary from default database or XML dictionary, but we cannot distinguish it here. qualified_name.database = data.default_database; + } data.dependencies.emplace(std::move(qualified_name)); } diff --git a/src/Databases/DDLDependencyVisitor.h b/src/Databases/DDLDependencyVisitor.h index b5ca976f665..ae7f7aa94d9 100644 --- a/src/Databases/DDLDependencyVisitor.h +++ b/src/Databases/DDLDependencyVisitor.h @@ -12,7 +12,7 @@ class ASTStorage; using TableNamesSet = std::unordered_set; -TableNamesSet getDependenciesSetFromCreateQuery(ContextPtr global_context, const ASTPtr & ast); +TableNamesSet getDependenciesSetFromCreateQuery(ContextPtr global_context, const QualifiedTableName & table, const ASTPtr & ast); /// Visits ASTCreateQuery and extracts names of table (or dictionary) dependencies /// from column default expressions (joinGet, dictGet, etc) diff --git a/src/Databases/DatabaseMemory.cpp b/src/Databases/DatabaseMemory.cpp index 50e56885743..3309d25b1c2 100644 --- a/src/Databases/DatabaseMemory.cpp +++ b/src/Databases/DatabaseMemory.cpp @@ -121,7 +121,7 @@ void DatabaseMemory::alterTable(ContextPtr local_context, const StorageID & tabl throw Exception(ErrorCodes::UNKNOWN_TABLE, "Cannot alter: There is no metadata of table {}", table_id.getNameForLogs()); applyMetadataChangesToCreateQuery(it->second, metadata); - TableNamesSet new_dependencies = getDependenciesSetFromCreateQuery(local_context->getGlobalContext(), it->second); + TableNamesSet new_dependencies = getDependenciesSetFromCreateQuery(local_context->getGlobalContext(), table_id.getQualifiedName(), it->second); DatabaseCatalog::instance().updateLoadingDependencies(table_id, std::move(new_dependencies)); } diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp index b7a0aff24d6..b5557d9a08d 100644 --- a/src/Databases/DatabaseOrdinary.cpp +++ b/src/Databases/DatabaseOrdinary.cpp @@ -181,8 +181,8 @@ void DatabaseOrdinary::loadTablesMetadata(ContextPtr local_context, ParsedTables return; } - TableNamesSet loading_dependencies = getDependenciesSetFromCreateQuery(getContext(), ast); QualifiedTableName qualified_name{database_name, create_query->getTable()}; + TableNamesSet loading_dependencies = getDependenciesSetFromCreateQuery(getContext(), qualified_name, ast); std::lock_guard lock{metadata.mutex}; metadata.parsed_tables[qualified_name] = ParsedTableMetadata{full_path.string(), ast}; @@ -297,7 +297,7 @@ void DatabaseOrdinary::alterTable(ContextPtr local_context, const StorageID & ta out.close(); } - TableNamesSet new_dependencies = getDependenciesSetFromCreateQuery(local_context->getGlobalContext(), ast); + TableNamesSet new_dependencies = getDependenciesSetFromCreateQuery(local_context->getGlobalContext(), table_id.getQualifiedName(), ast); DatabaseCatalog::instance().updateLoadingDependencies(table_id, std::move(new_dependencies)); commitAlterTable(table_id, table_metadata_tmp_path, table_metadata_path, statement, local_context); diff --git a/src/Databases/TablesLoader.cpp b/src/Databases/TablesLoader.cpp index b8c380b7be1..4ce719279f3 100644 --- a/src/Databases/TablesLoader.cpp +++ b/src/Databases/TablesLoader.cpp @@ -133,10 +133,14 @@ void TablesLoader::removeUnresolvableDependencies(bool remove_loaded) /// Table exists and it's already loaded if (DatabaseCatalog::instance().isTableExist(StorageID(dependency_name.database, dependency_name.table), global_context)) return remove_loaded; - /// It's XML dictionary. It was loaded before tables and DDL dictionaries. + /// It's XML dictionary. if (dependency_name.database == metadata.default_database && global_context->getExternalDictionariesLoader().has(dependency_name.table)) - return remove_loaded; + { + LOG_WARNING(log, "Tables {} depend on XML dictionary {}, but XML dictionaries are loaded independently." + "Consider converting it to DDL dictionary.", fmt::join(info.dependent_database_objects, ", "), dependency_name); + return true; + } /// Some tables depends on table "dependency_name", but there is no such table in DatabaseCatalog and we don't have its metadata. /// We will ignore it and try to load dependent tables without "dependency_name" diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 0d9cad34545..8f003e75a07 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -981,9 +981,10 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) return {}; /// If table has dependencies - add them to the graph - TableNamesSet loading_dependencies = getDependenciesSetFromCreateQuery(getContext()->getGlobalContext(), query_ptr); + QualifiedTableName qualified_name{database_name, create.getTable()}; + TableNamesSet loading_dependencies = getDependenciesSetFromCreateQuery(getContext()->getGlobalContext(), qualified_name, query_ptr); if (!loading_dependencies.empty()) - DatabaseCatalog::instance().addLoadingDependencies(QualifiedTableName{database_name, create.getTable()}, std::move(loading_dependencies)); + DatabaseCatalog::instance().addLoadingDependencies(std::move(qualified_name), std::move(loading_dependencies)); return fillTableIfNeeded(create); } diff --git a/tests/integration/test_dictionaries_dependency_xml/configs/dictionaries/node.xml b/tests/integration/test_dictionaries_dependency_xml/configs/dictionaries/node.xml new file mode 100644 index 00000000000..2cd957a3720 --- /dev/null +++ b/tests/integration/test_dictionaries_dependency_xml/configs/dictionaries/node.xml @@ -0,0 +1,25 @@ + + + node + + + localhost + 9000 + default + + system + select dummy, toString(dummy) from system.one + + + 0 + + + key + + name + String + + + + + diff --git a/tests/integration/test_dictionaries_dependency_xml/test.py b/tests/integration/test_dictionaries_dependency_xml/test.py index 1b3ea32d09c..13635c7b969 100644 --- a/tests/integration/test_dictionaries_dependency_xml/test.py +++ b/tests/integration/test_dictionaries_dependency_xml/test.py @@ -3,7 +3,7 @@ from helpers.cluster import ClickHouseCluster from helpers.test_tools import assert_eq_with_retry DICTIONARY_FILES = ['configs/dictionaries/dep_x.xml', 'configs/dictionaries/dep_y.xml', - 'configs/dictionaries/dep_z.xml'] + 'configs/dictionaries/dep_z.xml', 'configs/dictionaries/node.xml'] cluster = ClickHouseCluster(__file__) instance = cluster.add_instance('instance', dictionaries=DICTIONARY_FILES, stay_alive=True) @@ -117,3 +117,10 @@ def test_dependent_tables(started_cluster): query("drop table system.join") query("drop database a") query("drop database lazy") + + +def test_xml_dict_same_name(started_cluster): + instance.query("create table default.node ( key UInt64, name String ) Engine=Dictionary(node);") + instance.restart_clickhouse() + assert "node" in instance.query("show tables from default") + instance.query("drop table default.node") From 5b03dabb09362fc099467c6e4a960989aa17f8fc Mon Sep 17 00:00:00 2001 From: romanzhukov Date: Mon, 6 Dec 2021 16:38:19 +0300 Subject: [PATCH 132/262] Update string-search-functions.md --- docs/ru/sql-reference/functions/string-search-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/functions/string-search-functions.md b/docs/ru/sql-reference/functions/string-search-functions.md index f0c8f51225e..4969bf0f2eb 100644 --- a/docs/ru/sql-reference/functions/string-search-functions.md +++ b/docs/ru/sql-reference/functions/string-search-functions.md @@ -357,7 +357,7 @@ Result: ## multiFuzzyMatchAny(haystack, distance, \[pattern1, pattern2, …, patternn\]) {#multifuzzymatchanyhaystack-distance-pattern1-pattern2-patternn} -То же, что и `multiMatchAny`, но возвращает 1 если любой шаблон соответствует haystack в пределах константного [редакционного расстояния](https://en.wikipedia.org/wiki/Edit_distance). Эта функция основана на экспериментальной библиотеке [hyperscan](https://intel.github.io/hyperscan/dev-reference/compilation.html#approximate-matching) и может быть медленной для некоторых частных случаев. Производительность зависит от значения редакционного расстояния и используемых шаблонов, но всегда медленнее по сравнению с нечеткими вариантами. +То же, что и `multiMatchAny`, но возвращает 1 если любой шаблон соответствует haystack в пределах константного [редакционного расстояния](https://en.wikipedia.org/wiki/Edit_distance). Эта функция основана на экспериментальной библиотеке [hyperscan](https://intel.github.io/hyperscan/dev-reference/compilation.html#approximate-matching) и может быть медленной для некоторых частных случаев. Производительность зависит от значения редакционного расстояния и используемых шаблонов, но всегда медленнее по сравнению с non-fuzzy вариантами. ## multiFuzzyMatchAnyIndex(haystack, distance, \[pattern1, pattern2, …, patternn\]) {#multifuzzymatchanyindexhaystack-distance-pattern1-pattern2-patternn} From 0c2d56fc8847c803bc6832cdbcca66439c9bf898 Mon Sep 17 00:00:00 2001 From: liyang830 Date: Mon, 6 Dec 2021 21:46:24 +0800 Subject: [PATCH 133/262] feat: add fist show quota data --- tests/integration/test_quota/test.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/integration/test_quota/test.py b/tests/integration/test_quota/test.py index 4149987996b..9311b0bad36 100644 --- a/tests/integration/test_quota/test.py +++ b/tests/integration/test_quota/test.py @@ -393,30 +393,45 @@ def test_query_inserts(): def test_consumption_of_show_tables(): + assert re.match( + "myQuota\\tdefault\\t.*\\t31556952\\t0\\t1000\\t0\\t.*\\t\\\\N.*", + instance.query("SHOW QUOTA")) assert instance.query("SHOW TABLES") == "test_table\n" assert re.match( "myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t1\\t500\\t0\\t500\\t0\\t\\\\N\\t1\\t\\\\N.*", instance.query("SHOW QUOTA")) def test_consumption_of_show_databases(): + assert re.match( + "myQuota\\tdefault\\t.*\\t31556952\\t0\\t1000\\t0\\t.*\\t\\\\N.*", + instance.query("SHOW QUOTA")) assert instance.query("SHOW DATABASES") == "INFORMATION_SCHEMA\ndefault\ninformation_schema\nsystem\n" assert re.match( "myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t1\\t500\\t0\\t500\\t0\\t\\\\N\\t4\\t\\\\N.*", instance.query("SHOW QUOTA")) def test_consumption_of_show_clusters(): + assert re.match( + "myQuota\\tdefault\\t.*\\t31556952\\t0\\t1000\\t0\\t.*\\t\\\\N.*", + instance.query("SHOW QUOTA")) assert len(instance.query("SHOW CLUSTERS")) > 0 assert re.match( "myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t1\\t500\\t0\\t500\\t0\\t\\\\N.*", instance.query("SHOW QUOTA")) def test_consumption_of_show_processlist(): + assert re.match( + "myQuota\\tdefault\\t.*\\t31556952\\t0\\t1000\\t0\\t.*\\t\\\\N.*", + instance.query("SHOW QUOTA")) instance.query("SHOW PROCESSLIST") assert re.match( "myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t1\\t500\\t0\\t500\\t0\\t\\\\N\\t0\\t\\\\N.*", instance.query("SHOW QUOTA")) def test_consumption_of_show_privileges(): + assert re.match( + "myQuota\\tdefault\\t.*\\t31556952\\t0\\t1000\\t0\\t.*\\t\\\\N.*", + instance.query("SHOW QUOTA")) assert len(instance.query("SHOW PRIVILEGES")) > 0 assert re.match( "myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t1\\t500\\t0\\t500\\t0\\t\\\\N.*", From ee8a1607909d9867f7661d9bd6d0a605a5ab8820 Mon Sep 17 00:00:00 2001 From: vxider Date: Mon, 6 Dec 2021 14:26:39 +0000 Subject: [PATCH 134/262] enable window view tests --- tests/queries/0_stateless/01050_window_view_parser_tumble.sql | 2 -- tests/queries/0_stateless/01051_window_view_parser_hop.sql | 2 -- .../0_stateless/01052_window_view_proc_tumble_to_now.sql | 2 -- tests/queries/0_stateless/01053_window_view_proc_hop_to_now.sql | 2 -- tests/queries/0_stateless/01054_window_view_proc_tumble_to.sql | 2 -- tests/queries/0_stateless/01055_window_view_proc_hop_to.sql | 2 -- .../01057_window_view_event_tumble_to_strict_asc.sql | 2 -- .../0_stateless/01058_window_view_event_hop_to_strict_asc.sql | 2 -- .../0_stateless/01060_window_view_event_tumble_to_asc.sql | 2 -- .../queries/0_stateless/01061_window_view_event_hop_to_asc.sql | 2 -- .../0_stateless/01063_window_view_event_tumble_to_bounded.sql | 2 -- .../0_stateless/01064_window_view_event_hop_to_bounded.sql | 2 -- .../01066_window_view_event_tumble_to_strict_asc_lateness.sql | 2 -- .../01067_window_view_event_tumble_to_asc_lateness.sql | 2 -- .../01068_window_view_event_tumble_to_bounded_lateness.sql | 2 -- 15 files changed, 30 deletions(-) diff --git a/tests/queries/0_stateless/01050_window_view_parser_tumble.sql b/tests/queries/0_stateless/01050_window_view_parser_tumble.sql index 996ff4aa55a..6837036263c 100644 --- a/tests/queries/0_stateless/01050_window_view_parser_tumble.sql +++ b/tests/queries/0_stateless/01050_window_view_parser_tumble.sql @@ -1,5 +1,3 @@ --- Tags: disabled - SET allow_experimental_window_view = 1; DROP TABLE IF EXISTS mt; diff --git a/tests/queries/0_stateless/01051_window_view_parser_hop.sql b/tests/queries/0_stateless/01051_window_view_parser_hop.sql index 48ffe4c7f21..df0729108d0 100644 --- a/tests/queries/0_stateless/01051_window_view_parser_hop.sql +++ b/tests/queries/0_stateless/01051_window_view_parser_hop.sql @@ -1,5 +1,3 @@ --- Tags: disabled - SET allow_experimental_window_view = 1; DROP TABLE IF EXISTS mt; diff --git a/tests/queries/0_stateless/01052_window_view_proc_tumble_to_now.sql b/tests/queries/0_stateless/01052_window_view_proc_tumble_to_now.sql index d23461570b5..169a66e7bc6 100644 --- a/tests/queries/0_stateless/01052_window_view_proc_tumble_to_now.sql +++ b/tests/queries/0_stateless/01052_window_view_proc_tumble_to_now.sql @@ -1,5 +1,3 @@ --- Tags: disabled - SET allow_experimental_window_view = 1; DROP TABLE IF EXISTS mt; diff --git a/tests/queries/0_stateless/01053_window_view_proc_hop_to_now.sql b/tests/queries/0_stateless/01053_window_view_proc_hop_to_now.sql index 6ad9852c930..c39bab21cb1 100644 --- a/tests/queries/0_stateless/01053_window_view_proc_hop_to_now.sql +++ b/tests/queries/0_stateless/01053_window_view_proc_hop_to_now.sql @@ -1,5 +1,3 @@ --- Tags: disabled - SET allow_experimental_window_view = 1; DROP TABLE IF EXISTS mt; diff --git a/tests/queries/0_stateless/01054_window_view_proc_tumble_to.sql b/tests/queries/0_stateless/01054_window_view_proc_tumble_to.sql index 22435dc3309..f229969603b 100644 --- a/tests/queries/0_stateless/01054_window_view_proc_tumble_to.sql +++ b/tests/queries/0_stateless/01054_window_view_proc_tumble_to.sql @@ -1,5 +1,3 @@ --- Tags: disabled - SET allow_experimental_window_view = 1; DROP TABLE IF EXISTS mt; diff --git a/tests/queries/0_stateless/01055_window_view_proc_hop_to.sql b/tests/queries/0_stateless/01055_window_view_proc_hop_to.sql index fc2eccdf4db..b75cc33e741 100644 --- a/tests/queries/0_stateless/01055_window_view_proc_hop_to.sql +++ b/tests/queries/0_stateless/01055_window_view_proc_hop_to.sql @@ -1,5 +1,3 @@ --- Tags: disabled - SET allow_experimental_window_view = 1; DROP TABLE IF EXISTS mt; diff --git a/tests/queries/0_stateless/01057_window_view_event_tumble_to_strict_asc.sql b/tests/queries/0_stateless/01057_window_view_event_tumble_to_strict_asc.sql index 799fe9b71cd..4883e006e85 100644 --- a/tests/queries/0_stateless/01057_window_view_event_tumble_to_strict_asc.sql +++ b/tests/queries/0_stateless/01057_window_view_event_tumble_to_strict_asc.sql @@ -1,5 +1,3 @@ --- Tags: disabled - SET allow_experimental_window_view = 1; DROP TABLE IF EXISTS mt; diff --git a/tests/queries/0_stateless/01058_window_view_event_hop_to_strict_asc.sql b/tests/queries/0_stateless/01058_window_view_event_hop_to_strict_asc.sql index 677f814efe4..944fd9939b4 100644 --- a/tests/queries/0_stateless/01058_window_view_event_hop_to_strict_asc.sql +++ b/tests/queries/0_stateless/01058_window_view_event_hop_to_strict_asc.sql @@ -1,5 +1,3 @@ --- Tags: disabled - SET allow_experimental_window_view = 1; DROP TABLE IF EXISTS mt; diff --git a/tests/queries/0_stateless/01060_window_view_event_tumble_to_asc.sql b/tests/queries/0_stateless/01060_window_view_event_tumble_to_asc.sql index 2edec8b8f11..18b17fd3d2b 100644 --- a/tests/queries/0_stateless/01060_window_view_event_tumble_to_asc.sql +++ b/tests/queries/0_stateless/01060_window_view_event_tumble_to_asc.sql @@ -1,5 +1,3 @@ --- Tags: disabled - SET allow_experimental_window_view = 1; DROP TABLE IF EXISTS mt; diff --git a/tests/queries/0_stateless/01061_window_view_event_hop_to_asc.sql b/tests/queries/0_stateless/01061_window_view_event_hop_to_asc.sql index c10f09f2485..2cf98d6b08f 100644 --- a/tests/queries/0_stateless/01061_window_view_event_hop_to_asc.sql +++ b/tests/queries/0_stateless/01061_window_view_event_hop_to_asc.sql @@ -1,5 +1,3 @@ --- Tags: disabled - SET allow_experimental_window_view = 1; DROP TABLE IF EXISTS mt; diff --git a/tests/queries/0_stateless/01063_window_view_event_tumble_to_bounded.sql b/tests/queries/0_stateless/01063_window_view_event_tumble_to_bounded.sql index 80e3ef02ea6..37757fd77b3 100644 --- a/tests/queries/0_stateless/01063_window_view_event_tumble_to_bounded.sql +++ b/tests/queries/0_stateless/01063_window_view_event_tumble_to_bounded.sql @@ -1,5 +1,3 @@ --- Tags: disabled - SET allow_experimental_window_view = 1; DROP TABLE IF EXISTS mt; diff --git a/tests/queries/0_stateless/01064_window_view_event_hop_to_bounded.sql b/tests/queries/0_stateless/01064_window_view_event_hop_to_bounded.sql index ce9ae4c007f..5f148900905 100644 --- a/tests/queries/0_stateless/01064_window_view_event_hop_to_bounded.sql +++ b/tests/queries/0_stateless/01064_window_view_event_hop_to_bounded.sql @@ -1,5 +1,3 @@ --- Tags: disabled - SET allow_experimental_window_view = 1; DROP TABLE IF EXISTS mt; diff --git a/tests/queries/0_stateless/01066_window_view_event_tumble_to_strict_asc_lateness.sql b/tests/queries/0_stateless/01066_window_view_event_tumble_to_strict_asc_lateness.sql index 4d1c9929727..1c55b70f3aa 100644 --- a/tests/queries/0_stateless/01066_window_view_event_tumble_to_strict_asc_lateness.sql +++ b/tests/queries/0_stateless/01066_window_view_event_tumble_to_strict_asc_lateness.sql @@ -1,5 +1,3 @@ --- Tags: disabled - SET allow_experimental_window_view = 1; DROP TABLE IF EXISTS mt; diff --git a/tests/queries/0_stateless/01067_window_view_event_tumble_to_asc_lateness.sql b/tests/queries/0_stateless/01067_window_view_event_tumble_to_asc_lateness.sql index 34bccd86769..11409203d4c 100644 --- a/tests/queries/0_stateless/01067_window_view_event_tumble_to_asc_lateness.sql +++ b/tests/queries/0_stateless/01067_window_view_event_tumble_to_asc_lateness.sql @@ -1,5 +1,3 @@ --- Tags: disabled - SET allow_experimental_window_view = 1; DROP TABLE IF EXISTS mt; diff --git a/tests/queries/0_stateless/01068_window_view_event_tumble_to_bounded_lateness.sql b/tests/queries/0_stateless/01068_window_view_event_tumble_to_bounded_lateness.sql index f3ea2fc1f1e..74a095c632f 100644 --- a/tests/queries/0_stateless/01068_window_view_event_tumble_to_bounded_lateness.sql +++ b/tests/queries/0_stateless/01068_window_view_event_tumble_to_bounded_lateness.sql @@ -1,5 +1,3 @@ --- Tags: disabled - SET allow_experimental_window_view = 1; DROP TABLE IF EXISTS mt; From 303552f51507178fae443ddf822485031b6f7d9f Mon Sep 17 00:00:00 2001 From: msaf1980 Date: Thu, 25 Nov 2021 22:16:20 +0500 Subject: [PATCH 135/262] graphite: split tagged/plain rollup rules (for merges perfomance) --- base/base/StringRef.h | 5 +- src/CMakeLists.txt | 1 + src/Common/tests/gtest_global_context.cpp | 7 + src/Common/tests/gtest_global_context.h | 6 +- src/Processors/Merges/Algorithms/Graphite.cpp | 493 +++++++++++++++ src/Processors/Merges/Algorithms/Graphite.h | 37 +- .../GraphiteRollupSortedAlgorithm.cpp | 59 +- .../GraphiteRollupSortedAlgorithm.h | 10 - .../Algorithms/tests/gtest_graphite.cpp | 597 ++++++++++++++++++ .../MergeTree/registerStorageMergeTree.cpp | 175 ----- src/Storages/System/StorageSystemGraphite.cpp | 4 + tests/integration/helpers/test_tools.py | 16 + .../test_graphite_merge_tree/test.py | 20 +- .../__init__.py | 0 .../configs/graphite_rollup.xml | 120 ++++ .../configs/users.xml | 8 + .../test_graphite_merge_tree_typed/test.py | 580 +++++++++++++++++ ...ultiple_paths_and_versions.reference.plain | 84 +++ ...ltiple_paths_and_versions.reference.tagged | 84 +++ .../02117_show_create_table_system.reference | 2 +- utils/CMakeLists.txt | 1 + utils/graphite-rollup/CMakeLists.txt | 23 + .../graphite-rollup/graphite-rollup-bench.cpp | 147 +++++ utils/graphite-rollup/metrics.txt | 11 + utils/graphite-rollup/rollup-tag-list.xml | 167 +++++ utils/graphite-rollup/rollup-typed.xml | 167 +++++ utils/graphite-rollup/rollup.xml | 147 +++++ 27 files changed, 2705 insertions(+), 266 deletions(-) create mode 100644 src/Common/tests/gtest_global_context.cpp create mode 100644 src/Processors/Merges/Algorithms/Graphite.cpp create mode 100644 src/Processors/Merges/Algorithms/tests/gtest_graphite.cpp create mode 100644 tests/integration/test_graphite_merge_tree_typed/__init__.py create mode 100644 tests/integration/test_graphite_merge_tree_typed/configs/graphite_rollup.xml create mode 100644 tests/integration/test_graphite_merge_tree_typed/configs/users.xml create mode 100644 tests/integration/test_graphite_merge_tree_typed/test.py create mode 100644 tests/integration/test_graphite_merge_tree_typed/test_multiple_paths_and_versions.reference.plain create mode 100644 tests/integration/test_graphite_merge_tree_typed/test_multiple_paths_and_versions.reference.tagged create mode 100644 utils/graphite-rollup/CMakeLists.txt create mode 100644 utils/graphite-rollup/graphite-rollup-bench.cpp create mode 100644 utils/graphite-rollup/metrics.txt create mode 100644 utils/graphite-rollup/rollup-tag-list.xml create mode 100644 utils/graphite-rollup/rollup-typed.xml create mode 100644 utils/graphite-rollup/rollup.xml diff --git a/base/base/StringRef.h b/base/base/StringRef.h index d0184dbc24c..98c322320a5 100644 --- a/base/base/StringRef.h +++ b/base/base/StringRef.h @@ -48,7 +48,10 @@ struct StringRef std::string toString() const { return std::string(data, size); } explicit operator std::string() const { return toString(); } - constexpr explicit operator std::string_view() const { return {data, size}; } + + std::string_view toView() const { return std::string_view(data, size); } + + constexpr explicit operator std::string_view() const { return std::string_view(data, size); } }; /// Here constexpr doesn't implicate inline, see https://www.viva64.com/en/w/v1043/ diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 5f4ebaaa895..1f7a2700e5a 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -547,6 +547,7 @@ if (ENABLE_TESTS AND USE_GTEST) clickhouse_parsers clickhouse_storages_system dbms + clickhouse_common_config clickhouse_common_zookeeper string_utils) diff --git a/src/Common/tests/gtest_global_context.cpp b/src/Common/tests/gtest_global_context.cpp new file mode 100644 index 00000000000..19ba3cdc269 --- /dev/null +++ b/src/Common/tests/gtest_global_context.cpp @@ -0,0 +1,7 @@ +#include "gtest_global_context.h" + +const ContextHolder & getContext() +{ + static ContextHolder holder; + return holder; +} diff --git a/src/Common/tests/gtest_global_context.h b/src/Common/tests/gtest_global_context.h index 9bd7c2490d6..7756be7ce9b 100644 --- a/src/Common/tests/gtest_global_context.h +++ b/src/Common/tests/gtest_global_context.h @@ -18,8 +18,4 @@ struct ContextHolder ContextHolder(ContextHolder &&) = default; }; -inline const ContextHolder & getContext() -{ - static ContextHolder holder; - return holder; -} +const ContextHolder & getContext(); diff --git a/src/Processors/Merges/Algorithms/Graphite.cpp b/src/Processors/Merges/Algorithms/Graphite.cpp new file mode 100644 index 00000000000..38d3fa30b42 --- /dev/null +++ b/src/Processors/Merges/Algorithms/Graphite.cpp @@ -0,0 +1,493 @@ +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +#include + +using namespace std::literals; + +namespace DB::ErrorCodes +{ + extern const int NOT_IMPLEMENTED; + extern const int BAD_ARGUMENTS; + extern const int UNKNOWN_ELEMENT_IN_CONFIG; + extern const int NO_ELEMENTS_IN_CONFIG; + } + +namespace DB::Graphite +{ +static std::unordered_map ruleTypeMap = +{ + { RuleTypeAll, "all" }, + { RuleTypePlain, "plain" }, + { RuleTypeTagged, "tagged"}, + { RuleTypeTagList, "tag_list"} +}; + +const String & ruleTypeStr(RuleType rule_type) +{ + try + { + return ruleTypeMap.at(rule_type); + } + catch (...) + { + throw Exception("invalid rule type: " + std::to_string(rule_type), DB::ErrorCodes::BAD_ARGUMENTS); + } +} + +RuleType ruleType(const String & s) +{ + if (s == "all") + return RuleTypeAll; + else if (s == "plain") + return RuleTypePlain; + else if (s == "tagged") + return RuleTypeTagged; + else if (s == "tag_list") + return RuleTypeTagList; + else + throw Exception("invalid rule type: " + s, DB::ErrorCodes::BAD_ARGUMENTS); +} + +static const Graphite::Pattern undef_pattern = +{ /// empty pattern for selectPatternForPath + .rule_type = RuleTypeAll, + .regexp = nullptr, + .regexp_str = "", + .function = nullptr, + .retentions = Graphite::Retentions(), + .type = undef_pattern.TypeUndef, +}; + +inline static const Patterns & selectPatternsForMetricType(const Graphite::Params & params, const StringRef path) +{ + if (params.patterns_typed) + { + std::string_view path_view = path.toView(); + if (path_view.find("?"sv) == path_view.npos) + return params.patterns_plain; + else + return params.patterns_tagged; + } + else + { + return params.patterns; + } +} + +Graphite::RollupRule selectPatternForPath( + const Graphite::Params & params, + const StringRef path) +{ + const Graphite::Pattern * first_match = &undef_pattern; + + const Patterns & patterns_check = selectPatternsForMetricType(params, path); + + for (const auto & pattern : patterns_check) + { + if (!pattern.regexp) + { + /// Default pattern + if (first_match->type == first_match->TypeUndef && pattern.type == pattern.TypeAll) + { + /// There is only default pattern for both retention and aggregation + return std::pair(&pattern, &pattern); + } + if (pattern.type != first_match->type) + { + if (first_match->type == first_match->TypeRetention) + { + return std::pair(first_match, &pattern); + } + if (first_match->type == first_match->TypeAggregation) + { + return std::pair(&pattern, first_match); + } + } + } + else + { + if (pattern.regexp->match(path.data, path.size)) + { + /// General pattern with matched path + if (pattern.type == pattern.TypeAll) + { + /// Only for not default patterns with both function and retention parameters + return std::pair(&pattern, &pattern); + } + if (first_match->type == first_match->TypeUndef) + { + first_match = &pattern; + continue; + } + if (pattern.type != first_match->type) + { + if (first_match->type == first_match->TypeRetention) + { + return std::pair(first_match, &pattern); + } + if (first_match->type == first_match->TypeAggregation) + { + return std::pair(&pattern, first_match); + } + } + } + } + } + + return {nullptr, nullptr}; +} + +/** Is used to order Graphite::Retentions by age and precision descending. + * Throws exception if not both age and precision are less or greater then another. + */ +static bool compareRetentions(const Retention & a, const Retention & b) +{ + if (a.age > b.age && a.precision > b.precision) + { + return true; + } + else if (a.age < b.age && a.precision < b.precision) + { + return false; + } + String error_msg = "age and precision should only grow up: " + + std::to_string(a.age) + ":" + std::to_string(a.precision) + " vs " + + std::to_string(b.age) + ":" + std::to_string(b.precision); + throw Exception( + error_msg, + DB::ErrorCodes::BAD_ARGUMENTS); +} + +bool operator==(const Retention & a, const Retention & b) +{ + return a.age == b.age && a.precision == b.precision; +} + +std::ostream & operator<<(std::ostream & stream, const Retentions & a) +{ + stream << "{ "; + for (size_t i = 0; i < a.size(); i++) + { + if (i > 0) + stream << ","; + stream << " { age = " << a[i].age << ", precision = " << a[i].precision << " }"; + } + stream << " }"; + + return stream; +} + +bool operator==(const Pattern & a, const Pattern & b) +{ + // equal + // Retentions retentions; /// Must be ordered by 'age' descending. + if (a.type != b.type || a.regexp_str != b.regexp_str || a.rule_type != b.rule_type) + return false; + + if (a.function == nullptr) + { + if (b.function != nullptr) + return false; + } + else if (b.function == nullptr) + { + return false; + } + else if (a.function->getName() != b.function->getName()) + { + return false; + } + + return a.retentions == b.retentions; +} + +std::ostream & operator<<(std::ostream & stream, const Pattern & a) +{ + stream << "{ rule_type = " << ruleTypeStr(a.rule_type); + if (!a.regexp_str.empty()) + stream << ", regexp = '" << a.regexp_str << "'"; + if (a.function != nullptr) + stream << ", function = " << a.function->getName(); + if (!a.retentions.empty()) + { + stream << ",\n retentions = {\n"; + for (size_t i = 0; i < a.retentions.size(); i++) + { + stream << " { " << a.retentions[i].age << ", " << a.retentions[i].precision << " }"; + if (i < a.retentions.size() - 1) + stream << ","; + stream << "\n"; + } + stream << " }\n"; + } + else + stream << " "; + + stream << "}"; + return stream; +} + +std::string buildTaggedRegex(std::string regexp_str) +{ + /* + * tags list in format (for name or any value can use regexp, alphabet sorting not needed) + * spaces are not stiped and used as tag and value part + * name must be first (if used) + * + * tag1=value1; tag2=VALUE2_REGEX;tag3=value3 + * or + * name;tag1=value1;tag2=VALUE2_REGEX;tag3=value3 + * or for one tag + * tag1=value1 + * + * Resulting regex against metric like + * name?tag1=value1&tag2=value2 + * + * So, + * + * name + * produce + * name\? + * + * tag2=val2 + * produce + * [\?&]tag2=val2(&.*)?$ + * + * nam.* ; tag1=val1 ; tag2=val2 + * produce + * nam.*\?(.*&)?tag1=val1&(.*&)?tag2=val2(&.*)?$ + */ + + std::vector tags; + + splitInto<';'>(tags, regexp_str); + /* remove empthy elements */ + using namespace std::string_literals; + tags.erase(std::remove(tags.begin(), tags.end(), ""s), tags.end()); + if (tags[0].find('=') == tags[0].npos) + { + if (tags.size() == 1) /* only name */ + return "^" + tags[0] + "\\?"; + /* start with name value */ + regexp_str = "^" + tags[0] + "\\?(.*&)?"; + tags.erase(std::begin(tags)); + } + else + regexp_str = "[\\?&]"; + + std::sort(std::begin(tags), std::end(tags)); /* sorted tag keys */ + regexp_str += fmt::format( + "{}{}", + fmt::join(tags, "&(.*&)?"), + "(&.*)?$" /* close regex */ + ); + + return regexp_str; +} + +/** Read the settings for Graphite rollup from config. + * Example + * + * + * Path + * + * click_cost + * any + * + * 0 + * 3600 + * + * + * 86400 + * 60 + * + * + * + * max + * + * 0 + * 60 + * + * + * 3600 + * 300 + * + * + * 86400 + * 3600 + * + * + * + */ +static const Pattern & +appendGraphitePattern( + const Poco::Util::AbstractConfiguration & config, + const String & config_element, Patterns & patterns, + bool default_rule, + ContextPtr context) +{ + Pattern pattern; + + Poco::Util::AbstractConfiguration::Keys keys; + config.keys(config_element, keys); + + for (const auto & key : keys) + { + if (key == "regexp") + { + pattern.regexp_str = config.getString(config_element + ".regexp"); + } + else if (key == "function") + { + String aggregate_function_name_with_params = config.getString(config_element + ".function"); + String aggregate_function_name; + Array params_row; + getAggregateFunctionNameAndParametersArray( + aggregate_function_name_with_params, aggregate_function_name, params_row, "GraphiteMergeTree storage initialization", context); + + /// TODO Not only Float64 + AggregateFunctionProperties properties; + pattern.function = AggregateFunctionFactory::instance().get( + aggregate_function_name, {std::make_shared()}, params_row, properties); + } + else if (key == "rule_type") + { + String rule_type = config.getString(config_element + ".rule_type"); + pattern.rule_type = ruleType(rule_type); + } + else if (startsWith(key, "retention")) + { + pattern.retentions.emplace_back(Graphite::Retention{ + .age = config.getUInt(config_element + "." + key + ".age"), + .precision = config.getUInt(config_element + "." + key + ".precision")}); + } + else + throw Exception("Unknown element in config: " + key, DB::ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); + } + + if (!pattern.regexp_str.empty()) + { + if (pattern.rule_type == RuleTypeTagList) + { + // construct tagged regexp + pattern.regexp_str = buildTaggedRegex(pattern.regexp_str); + pattern.rule_type = RuleTypeTagged; + } + pattern.regexp = std::make_shared(pattern.regexp_str); + } + + if (!pattern.function && pattern.retentions.empty()) + throw Exception( + "At least one of an aggregate function or retention rules is mandatory for rollup patterns in GraphiteMergeTree", + DB::ErrorCodes::NO_ELEMENTS_IN_CONFIG); + + if (default_rule && pattern.rule_type != RuleTypeAll) + { + throw Exception( + "Default must have rule_type all for rollup patterns in GraphiteMergeTree", + DB::ErrorCodes::BAD_ARGUMENTS); + } + + if (!pattern.function) + { + pattern.type = pattern.TypeRetention; + } + else if (pattern.retentions.empty()) + { + pattern.type = pattern.TypeAggregation; + } + else + { + pattern.type = pattern.TypeAll; + } + + if (pattern.type & pattern.TypeAggregation) /// TypeAggregation or TypeAll + if (pattern.function->allocatesMemoryInArena()) + throw Exception( + "Aggregate function " + pattern.function->getName() + " isn't supported in GraphiteMergeTree", DB::ErrorCodes::NOT_IMPLEMENTED); + + /// retention should be in descending order of age. + if (pattern.type & pattern.TypeRetention) /// TypeRetention or TypeAll + std::sort(pattern.retentions.begin(), pattern.retentions.end(), compareRetentions); + + patterns.emplace_back(pattern); + return patterns.back(); +} + +void setGraphitePatternsFromConfig(ContextPtr context, const String & config_element, Graphite::Params & params) +{ + const auto & config = context->getConfigRef(); + + if (!config.has(config_element)) + throw Exception("No '" + config_element + "' element in configuration file", ErrorCodes::NO_ELEMENTS_IN_CONFIG); + + params.config_name = config_element; + params.path_column_name = config.getString(config_element + ".path_column_name", "Path"); + params.time_column_name = config.getString(config_element + ".time_column_name", "Time"); + params.value_column_name = config.getString(config_element + ".value_column_name", "Value"); + params.version_column_name = config.getString(config_element + ".version_column_name", "Timestamp"); + + params.patterns_typed = false; + + Poco::Util::AbstractConfiguration::Keys keys; + config.keys(config_element, keys); + + for (const auto & key : keys) + { + if (startsWith(key, "pattern")) + { + if (appendGraphitePattern(config, config_element + "." + key, params.patterns, false, context).rule_type != RuleTypeAll) + params.patterns_typed = true; + } + else if (key == "default") + { + /// See below. + } + else if (key == "path_column_name" || key == "time_column_name" || key == "value_column_name" || key == "version_column_name") + { + /// See above. + } + else + throw Exception("Unknown element in config: " + key, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); + } + + if (config.has(config_element + ".default")) + appendGraphitePattern(config, config_element + "." + ".default", params.patterns, true, context); + + for (const auto & pattern : params.patterns) + { + if (pattern.rule_type == RuleTypeAll) + { + if (params.patterns_typed) + { + params.patterns_plain.push_back(pattern); + params.patterns_tagged.push_back(pattern); + } + } + else if (pattern.rule_type == RuleTypePlain) + { + params.patterns_plain.push_back(pattern); + } + else if (pattern.rule_type == RuleTypeTagged) + { + params.patterns_tagged.push_back(pattern); + } + else + { + throw Exception("Unhandled rule_type in config: " + ruleTypeStr(pattern.rule_type), ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); + } + } +} + +} diff --git a/src/Processors/Merges/Algorithms/Graphite.h b/src/Processors/Merges/Algorithms/Graphite.h index ecb1aeb9804..dc39cb46386 100644 --- a/src/Processors/Merges/Algorithms/Graphite.h +++ b/src/Processors/Merges/Algorithms/Graphite.h @@ -1,13 +1,8 @@ #pragma once + +#include #include - -namespace DB -{ - -class IAggregateFunction; -using AggregateFunctionPtr = std::shared_ptr; - -} +#include /** Intended for implementation of "rollup" - aggregation (rounding) of older data * for a table with Graphite data (Graphite is the system for time series monitoring). @@ -97,16 +92,32 @@ using AggregateFunctionPtr = std::shared_ptr; namespace DB::Graphite { +// sync with rule_types_str +enum RuleType +{ + RuleTypeAll = 0, // default, with regex, compatible with old scheme + RuleTypePlain = 1, // plain metrics, with regex, compatible with old scheme + RuleTypeTagged = 2, // tagged metrics, with regex, compatible with old scheme + RuleTypeTagList = 3 // tagged metrics, with regex (converted to RuleTypeTagged from string like 'retention=10min ; env=(staging|prod)') +}; + +const String & ruleTypeStr(RuleType rule_type); + struct Retention { UInt32 age; UInt32 precision; }; +bool operator==(const Retention & a, const Retention & b); + using Retentions = std::vector; +std::ostream &operator<<(std::ostream & stream, const Retentions & a); + struct Pattern { + RuleType rule_type = RuleTypeAll; std::shared_ptr regexp; std::string regexp_str; AggregateFunctionPtr function; @@ -114,6 +125,9 @@ struct Pattern enum { TypeUndef, TypeRetention, TypeAggregation, TypeAll } type = TypeAll; /// The type of defined pattern, filled automatically }; +bool operator==(const Pattern & a, const Pattern & b); +std::ostream &operator<<(std::ostream & stream, const Pattern & a); + using Patterns = std::vector; using RetentionPattern = Pattern; using AggregationPattern = Pattern; @@ -125,9 +139,16 @@ struct Params String time_column_name; String value_column_name; String version_column_name; + bool patterns_typed; Graphite::Patterns patterns; + Graphite::Patterns patterns_plain; + Graphite::Patterns patterns_tagged; }; using RollupRule = std::pair; +Graphite::RollupRule selectPatternForPath(const Graphite::Params & params, const StringRef path); + +void setGraphitePatternsFromConfig(ContextPtr context, const String & config_element, Graphite::Params & params); + } diff --git a/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp index 328c34823a0..c4f60571dd9 100644 --- a/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -52,62 +53,6 @@ GraphiteRollupSortedAlgorithm::GraphiteRollupSortedAlgorithm( columns_definition = defineColumns(header, params); } -Graphite::RollupRule GraphiteRollupSortedAlgorithm::selectPatternForPath(StringRef path) const -{ - const Graphite::Pattern * first_match = &undef_pattern; - - for (const auto & pattern : params.patterns) - { - if (!pattern.regexp) - { - /// Default pattern - if (first_match->type == first_match->TypeUndef && pattern.type == pattern.TypeAll) - { - /// There is only default pattern for both retention and aggregation - return std::pair(&pattern, &pattern); - } - if (pattern.type != first_match->type) - { - if (first_match->type == first_match->TypeRetention) - { - return std::pair(first_match, &pattern); - } - if (first_match->type == first_match->TypeAggregation) - { - return std::pair(&pattern, first_match); - } - } - } - else if (pattern.regexp->match(path.data, path.size)) - { - /// General pattern with matched path - if (pattern.type == pattern.TypeAll) - { - /// Only for not default patterns with both function and retention parameters - return std::pair(&pattern, &pattern); - } - if (first_match->type == first_match->TypeUndef) - { - first_match = &pattern; - continue; - } - if (pattern.type != first_match->type) - { - if (first_match->type == first_match->TypeRetention) - { - return std::pair(first_match, &pattern); - } - if (first_match->type == first_match->TypeAggregation) - { - return std::pair(&pattern, first_match); - } - } - } - } - - return {nullptr, nullptr}; -} - UInt32 GraphiteRollupSortedAlgorithm::selectPrecision(const Graphite::Retentions & retentions, time_t time) const { static_assert(is_signed_v, "time_t must be signed type"); @@ -188,7 +133,7 @@ IMergingAlgorithm::Status GraphiteRollupSortedAlgorithm::merge() Graphite::RollupRule next_rule = merged_data.currentRule(); if (new_path) - next_rule = selectPatternForPath(next_path); + next_rule = selectPatternForPath(this->params, next_path); const Graphite::RetentionPattern * retention_pattern = std::get<0>(next_rule); time_t next_time_rounded; diff --git a/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.h b/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.h index 0155b73b238..4968cbfc470 100644 --- a/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.h +++ b/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.h @@ -102,16 +102,6 @@ private: time_t current_time = 0; time_t current_time_rounded = 0; - const Graphite::Pattern undef_pattern = - { /// temporary empty pattern for selectPatternForPath - .regexp = nullptr, - .regexp_str = "", - .function = nullptr, - .retentions = DB::Graphite::Retentions(), - .type = undef_pattern.TypeUndef, - }; - - Graphite::RollupRule selectPatternForPath(StringRef path) const; UInt32 selectPrecision(const Graphite::Retentions & retentions, time_t time) const; /// Insert the values into the resulting columns, which will not be changed in the future. diff --git a/src/Processors/Merges/Algorithms/tests/gtest_graphite.cpp b/src/Processors/Merges/Algorithms/tests/gtest_graphite.cpp new file mode 100644 index 00000000000..1d739bf566a --- /dev/null +++ b/src/Processors/Merges/Algorithms/tests/gtest_graphite.cpp @@ -0,0 +1,597 @@ +#include +#include +#include +#include + +#include + +#include +#include + +#include +#include +#include +#include + +using namespace DB; + +static int regAggregateFunctions = 0; + +void tryRegisterAggregateFunctions() +{ + if (!regAggregateFunctions) + { + registerAggregateFunctions(); + regAggregateFunctions = 1; + } +} + +static ConfigProcessor::LoadedConfig loadConfiguration(const std::string & config_path) +{ + ConfigProcessor config_processor(config_path, true, true); + ConfigProcessor::LoadedConfig config = config_processor.loadConfig(false); + return config; +} + +static ConfigProcessor::LoadedConfig loadConfigurationFromString(std::string & s) +{ + char tmp_file[19]; + strcpy(tmp_file, "/tmp/rollup-XXXXXX"); + int fd = mkstemp(tmp_file); + if (fd == -1) + { + throw std::runtime_error(strerror(errno)); + } + try { + if (write(fd, s.c_str(), s.size()) < s.size()) + { + throw std::runtime_error("unable write to temp file"); + } + if (write(fd, "\n", 1) != 1) + { + throw std::runtime_error("unable write to temp file"); + } + close(fd); + auto config_path = std::string(tmp_file) + ".xml"; + if (std::rename(tmp_file, config_path.c_str())) + { + int err = errno; + remove(tmp_file); + throw std::runtime_error(strerror(err)); + } + ConfigProcessor::LoadedConfig config = loadConfiguration(config_path); + remove(tmp_file); + return config; + } + catch (...) + { + remove(tmp_file); + throw; + } +} + +static Graphite::Params setGraphitePatterns(ContextMutablePtr context, ConfigProcessor::LoadedConfig & config) +{ + context->setConfig(config.configuration); + + Graphite::Params params; + setGraphitePatternsFromConfig(context, "graphite_rollup", params); + + return params; +} + +struct PatternForCheck +{ + Graphite::RuleType rule_type; + std::string regexp_str; + String function; + Graphite::Retentions retentions; +}; + + +bool checkRule(const Graphite::Pattern & pattern, const struct PatternForCheck & pattern_check, + const std::string & typ, const std::string & path, std::string & message) +{ + bool rule_type_eq = (pattern.rule_type == pattern_check.rule_type); + bool regexp_eq = (pattern.regexp_str == pattern_check.regexp_str); + bool function_eq = (pattern.function == nullptr && pattern_check.function.empty()) + || (pattern.function != nullptr && pattern.function->getName() == pattern_check.function); + bool retentions_eq = (pattern.retentions == pattern_check.retentions); + + if (rule_type_eq && regexp_eq && function_eq && retentions_eq) + return true; + + message = typ + " rollup rule mismatch for '" + path + "'," + + (rule_type_eq ? "" : "rule_type ") + + (regexp_eq ? "" : "regexp ") + + (function_eq ? "" : "function ") + + (retentions_eq ? "" : "retentions "); + return false; +} + +std::ostream & operator<<(std::ostream & stream, const PatternForCheck & a) +{ + stream << "{ rule_type = " << ruleTypeStr(a.rule_type); + if (!a.regexp_str.empty()) + stream << ", regexp = '" << a.regexp_str << "'"; + if (!a.function.empty()) + stream << ", function = " << a.function; + if (!a.retentions.empty()) + { + stream << ",\n retentions = {\n"; + for (size_t i = 0; i < a.retentions.size(); i++) + { + stream << " { " << a.retentions[i].age << ", " << a.retentions[i].precision << " }"; + if (i < a.retentions.size() - 1) + stream << ","; + stream << "\n"; + } + stream << " }\n"; + } + else + stream << " "; + + stream << "}"; + return stream; +} + +struct PatternsForPath +{ + std::string path; + PatternForCheck retention_want; + PatternForCheck aggregation_want; +}; + +TEST(GraphiteTest, testSelectPattern) +{ + tryRegisterAggregateFunctions(); + + using namespace std::literals; + + std::string + xml(R"END( + + + \.sum$ + sum + + + ^((.*)|.)sum\? + sum + + + \.max$ + max + + + ^((.*)|.)max\? + max + + + \.min$ + min + + + ^((.*)|.)min\? + min + + + \.(count|sum|sum_sq)$ + sum + + + ^((.*)|.)(count|sum|sum_sq)\? + sum + + + ^retention\. + + 0 + 60 + + + 86400 + 3600 + + + + avg + + 0 + 60 + + + 3600 + 300 + + + 86400 + 3600 + + + + +)END"); + + // Retentions must be ordered by 'age' descending. + std::vector tests + { + { + "test.sum", + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default + { Graphite::RuleTypeAll, R"END(\.sum$)END", "sum", { } } + }, + { + "val.sum?env=test&tag=Fake3", + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default + { Graphite::RuleTypeAll, R"END(^((.*)|.)sum\?)END", "sum", { } } + }, + { + "test.max", + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default + { Graphite::RuleTypeAll, R"END(\.max$)END", "max", { } }, + }, + { + "val.max?env=test&tag=Fake4", + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default + { Graphite::RuleTypeAll, R"END(^((.*)|.)max\?)END", "max", { } }, + }, + { + "test.min", + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default + { Graphite::RuleTypeAll, R"END(\.min$)END", "min", { } }, + }, + { + "val.min?env=test&tag=Fake5", + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default + { Graphite::RuleTypeAll, R"END(^((.*)|.)min\?)END", "min", { } }, + }, + { + "retention.count", + { Graphite::RuleTypeAll, R"END(^retention\.)END", "", { { 86400, 3600 }, { 0, 60 } } }, // ^retention + { Graphite::RuleTypeAll, R"END(\.(count|sum|sum_sq)$)END", "sum", { } }, + }, + { + "val.retention.count?env=test&tag=Fake5", + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default + { Graphite::RuleTypeAll, R"END(^((.*)|.)(count|sum|sum_sq)\?)END", "sum", { } }, + }, + { + "val.count?env=test&tag=Fake5", + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default + { Graphite::RuleTypeAll, R"END(^((.*)|.)(count|sum|sum_sq)\?)END", "sum", { } }, + }, + { + "test.p95", + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default + }, + { + "val.p95?env=test&tag=FakeNo", + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default + }, + { + "default", + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default + }, + { + "val.default?env=test&tag=FakeNo", + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default + } + }; + + auto config = loadConfigurationFromString(xml); + ContextMutablePtr context = getContext().context; + Graphite::Params params = setGraphitePatterns(context, config); + + for (const auto & t : tests) + { + auto rule = DB::Graphite::selectPatternForPath(params, t.path); + std:: string message; + if (!checkRule(*rule.first, t.retention_want, "retention", t.path, message)) + ADD_FAILURE() << message << ", got\n" << *rule.first << "\n, want\n" << t.retention_want << "\n"; + if (!checkRule(*rule.second, t.aggregation_want, "aggregation", t.path, message)) + ADD_FAILURE() << message << ", got\n" << *rule.second << "\n, want\n" << t.aggregation_want << "\n"; + } +} + + +namespace DB::Graphite +{ + std::string buildTaggedRegex(std::string regexp_str); +} + +struct RegexCheck +{ + std::string regex; + std::string regex_want; + std::string match; + std::string nomatch; +}; + +TEST(GraphiteTest, testBuildTaggedRegex) +{ + std::vector tests + { + { + "cpu\\.loadavg;project=DB.*;env=st.*", + R"END(^cpu\.loadavg\?(.*&)?env=st.*&(.*&)?project=DB.*(&.*)?$)END", + R"END(cpu.loadavg?env=staging&project=DBAAS)END", + R"END(cpu.loadavg?env=staging&project=D)END" + }, + { + R"END(project=DB.*;env=staging;)END", + R"END([\?&]env=staging&(.*&)?project=DB.*(&.*)?$)END", + R"END(cpu.loadavg?env=staging&project=DBPG)END", + R"END(cpu.loadavg?env=stagingN&project=DBAAS)END" + }, + { + "env=staging;", + R"END([\?&]env=staging(&.*)?$)END", + R"END(cpu.loadavg?env=staging&project=DPG)END", + R"END(cpu.loadavg?env=stagingN)END" + }, + { + " env = staging ;", // spaces are allowed, + R"END([\?&] env = staging (&.*)?$)END", + R"END(cpu.loadavg? env = staging &project=DPG)END", + R"END(cpu.loadavg?env=stagingN)END" + }, + { + "name;", + R"END(^name\?)END", + R"END(name?env=staging&project=DPG)END", + R"END(nameN?env=stagingN)END", + }, + { + "name", + R"END(^name\?)END", + R"END(name?env=staging&project=DPG)END", + R"END(nameN?env=stagingN)END", + } + }; + for (const auto & t : tests) + { + auto s = DB::Graphite::buildTaggedRegex(t.regex); + EXPECT_EQ(t.regex_want, s) << "result for '" << t.regex_want << "' mismatch"; + auto regexp = OptimizedRegularExpression(s); + EXPECT_TRUE(regexp.match(t.match.data(), t.match.size())) << t.match << " match for '" << s << "' failed"; + EXPECT_FALSE(regexp.match(t.nomatch.data(), t.nomatch.size())) << t.nomatch << " ! match for '" << s << "' failed"; + } +} + +TEST(GraphiteTest, testSelectPatternTyped) +{ + tryRegisterAggregateFunctions(); + + using namespace std::literals; + + std::string + xml(R"END( + + + plain + \.sum$ + sum + + + tagged + ^((.*)|.)sum\? + sum + + + plain + \.max$ + max + + + tagged + ^((.*)|.)max\? + max + + + plain + \.min$ + min + + + tagged + ^((.*)|.)min\? + min + + + plain + \.(count|sum|sum_sq)$ + sum + + + tagged + ^((.*)|.)(count|sum|sum_sq)\? + sum + + + plain + ^retention\. + + 0 + 60 + + + 86400 + 3600 + + + + tagged + + + 0 + 60 + + + 86400 + 3600 + + + + tag_list + retention=10min;env=staging + + 0 + 600 + + + 86400 + 3600 + + + + tag_list + retention=10min;env=[A-Za-z-]+rod[A-Za-z-]+ + + 0 + 600 + + + 86400 + 3600 + + + + tag_list + cpu\.loadavg + + 0 + 600 + + + 86400 + 3600 + + + + avg + + 0 + 60 + + + 3600 + 300 + + + 86400 + 3600 + + + + +)END"); + + // Retentions must be ordered by 'age' descending. + std::vector tests + { + { + "test.sum", + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default + { Graphite::RuleTypePlain, R"END(\.sum$)END", "sum", { } } + }, + { + "val.sum?env=test&tag=Fake3", + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default + { Graphite::RuleTypeTagged, R"END(^((.*)|.)sum\?)END", "sum", { } } + }, + { + "test.max", + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default + { Graphite::RuleTypePlain, R"END(\.max$)END", "max", { } }, + }, + { + "val.max?env=test&tag=Fake4", + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default + { Graphite::RuleTypeTagged, R"END(^((.*)|.)max\?)END", "max", { } }, + }, + { + "test.min", + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default + { Graphite::RuleTypePlain, R"END(\.min$)END", "min", { } }, + }, + { + "val.min?env=test&tag=Fake5", + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default + { Graphite::RuleTypeTagged, R"END(^((.*)|.)min\?)END", "min", { } }, + }, + { + "retention.count", + { Graphite::RuleTypePlain, R"END(^retention\.)END", "", { { 86400, 3600 }, { 0, 60 } } }, // ^retention + { Graphite::RuleTypePlain, R"END(\.(count|sum|sum_sq)$)END", "sum", { } }, + }, + { + "val.count?env=test&retention=hour&tag=Fake5", + { Graphite::RuleTypeTagged, R"END([\?&]retention=hour(&.*)?$)END", "", { { 86400, 3600 }, { 0, 60 } } }, // tagged retention=hour + { Graphite::RuleTypeTagged, R"END(^((.*)|.)(count|sum|sum_sq)\?)END", "sum", { } }, + }, + { + "val.count?env=test&retention=hour", + { Graphite::RuleTypeTagged, R"END([\?&]retention=hour(&.*)?$)END", "", { { 86400, 3600 }, { 0, 60 } } }, // tagged retention=hour + { Graphite::RuleTypeTagged, R"END(^((.*)|.)(count|sum|sum_sq)\?)END", "sum", { } }, + }, + { + "val.count?env=staging&retention=10min", + { Graphite::RuleTypeTagged, R"END([\?&]env=staging&(.*&)?retention=10min(&.*)?$)END", "", { { 86400, 3600 }, { 0, 600 } } }, // retention=10min ; env=staging + { Graphite::RuleTypeTagged, R"END(^((.*)|.)(count|sum|sum_sq)\?)END", "sum", { } }, + }, + { + "val.count?env=production&retention=10min", + { Graphite::RuleTypeTagged, R"END([\?&]env=[A-Za-z-]+rod[A-Za-z-]+&(.*&)?retention=10min(&.*)?$)END", "", { { 86400, 3600 }, { 0, 600 } } }, // retention=10min ; env=[A-Za-z-]+rod[A-Za-z-]+ + { Graphite::RuleTypeTagged, R"END(^((.*)|.)(count|sum|sum_sq)\?)END", "sum", { } }, + }, + { + "val.count?env=test&tag=Fake5", + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default + { Graphite::RuleTypeTagged, R"END(^((.*)|.)(count|sum|sum_sq)\?)END", "sum", { } }, + }, + { + "cpu.loadavg?env=test&tag=FakeNo", + { Graphite::RuleTypeTagged, R"END(^cpu\.loadavg\?)END", "", { { 86400, 3600 }, { 0, 600 } } }, // name=cpu\.loadavg + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, + }, + { + "test.p95", + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default + }, + { + "val.p95?env=test&tag=FakeNo", + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default + }, + { + "default", + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default + }, + { + "val.default?env=test&tag=FakeNo", + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default + { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default + } + }; + + auto config = loadConfigurationFromString(xml); + ContextMutablePtr context = getContext().context; + Graphite::Params params = setGraphitePatterns(context, config); + + for (const auto & t : tests) + { + auto rule = DB::Graphite::selectPatternForPath(params, t.path); + std:: string message; + if (!checkRule(*rule.first, t.retention_want, "retention", t.path, message)) + ADD_FAILURE() << message << ", got\n" << *rule.first << "\n, want\n" << t.retention_want << "\n"; + if (!checkRule(*rule.second, t.aggregation_want, "aggregation", t.path, message)) + ADD_FAILURE() << message << ", got\n" << *rule.second << "\n, want\n" << t.aggregation_want << "\n"; + } +} diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index cb52c8b86c0..ac6f4d8b7a4 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -22,17 +22,13 @@ #include #include -#include namespace DB { namespace ErrorCodes { - extern const int NOT_IMPLEMENTED; extern const int BAD_ARGUMENTS; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int UNKNOWN_ELEMENT_IN_CONFIG; - extern const int NO_ELEMENTS_IN_CONFIG; extern const int UNKNOWN_STORAGE; extern const int NO_REPLICA_NAME_GIVEN; } @@ -62,171 +58,6 @@ static Names extractColumnNames(const ASTPtr & node) } } -/** Is used to order Graphite::Retentions by age and precision descending. - * Throws exception if not both age and precision are less or greater then another. - */ -static bool compareRetentions(const Graphite::Retention & a, const Graphite::Retention & b) -{ - if (a.age > b.age && a.precision > b.precision) - { - return true; - } - else if (a.age < b.age && a.precision < b.precision) - { - return false; - } - String error_msg = "age and precision should only grow up: " - + std::to_string(a.age) + ":" + std::to_string(a.precision) + " vs " - + std::to_string(b.age) + ":" + std::to_string(b.precision); - throw Exception( - error_msg, - ErrorCodes::BAD_ARGUMENTS); -} - -/** Read the settings for Graphite rollup from config. - * Example - * - * - * Path - * - * click_cost - * any - * - * 0 - * 3600 - * - * - * 86400 - * 60 - * - * - * - * max - * - * 0 - * 60 - * - * - * 3600 - * 300 - * - * - * 86400 - * 3600 - * - * - * - */ -static void appendGraphitePattern( - const Poco::Util::AbstractConfiguration & config, - const String & config_element, - Graphite::Patterns & out_patterns, - ContextPtr context) -{ - Graphite::Pattern pattern; - - Poco::Util::AbstractConfiguration::Keys keys; - config.keys(config_element, keys); - - for (const auto & key : keys) - { - if (key == "regexp") - { - pattern.regexp_str = config.getString(config_element + ".regexp"); - pattern.regexp = std::make_shared(pattern.regexp_str); - } - else if (key == "function") - { - String aggregate_function_name_with_params = config.getString(config_element + ".function"); - String aggregate_function_name; - Array params_row; - getAggregateFunctionNameAndParametersArray( - aggregate_function_name_with_params, aggregate_function_name, params_row, "GraphiteMergeTree storage initialization", context); - - /// TODO Not only Float64 - AggregateFunctionProperties properties; - pattern.function = AggregateFunctionFactory::instance().get( - aggregate_function_name, {std::make_shared()}, params_row, properties); - } - else if (startsWith(key, "retention")) - { - pattern.retentions.emplace_back(Graphite::Retention{ - .age = config.getUInt(config_element + "." + key + ".age"), - .precision = config.getUInt(config_element + "." + key + ".precision")}); - } - else - throw Exception("Unknown element in config: " + key, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); - } - - if (!pattern.function && pattern.retentions.empty()) - throw Exception( - "At least one of an aggregate function or retention rules is mandatory for rollup patterns in GraphiteMergeTree", - ErrorCodes::NO_ELEMENTS_IN_CONFIG); - - if (!pattern.function) - { - pattern.type = pattern.TypeRetention; - } - else if (pattern.retentions.empty()) - { - pattern.type = pattern.TypeAggregation; - } - else - { - pattern.type = pattern.TypeAll; - } - - if (pattern.type & pattern.TypeAggregation) /// TypeAggregation or TypeAll - if (pattern.function->allocatesMemoryInArena()) - throw Exception( - "Aggregate function " + pattern.function->getName() + " isn't supported in GraphiteMergeTree", ErrorCodes::NOT_IMPLEMENTED); - - /// retention should be in descending order of age. - if (pattern.type & pattern.TypeRetention) /// TypeRetention or TypeAll - std::sort(pattern.retentions.begin(), pattern.retentions.end(), compareRetentions); - - out_patterns.emplace_back(pattern); -} - -static void setGraphitePatternsFromConfig(ContextPtr context, const String & config_element, Graphite::Params & params) -{ - const auto & config = context->getConfigRef(); - - if (!config.has(config_element)) - throw Exception("No '" + config_element + "' element in configuration file", ErrorCodes::NO_ELEMENTS_IN_CONFIG); - - params.config_name = config_element; - params.path_column_name = config.getString(config_element + ".path_column_name", "Path"); - params.time_column_name = config.getString(config_element + ".time_column_name", "Time"); - params.value_column_name = config.getString(config_element + ".value_column_name", "Value"); - params.version_column_name = config.getString(config_element + ".version_column_name", "Timestamp"); - - Poco::Util::AbstractConfiguration::Keys keys; - config.keys(config_element, keys); - - for (const auto & key : keys) - { - if (startsWith(key, "pattern")) - { - appendGraphitePattern(config, config_element + "." + key, params.patterns, context); - } - else if (key == "default") - { - /// See below. - } - else if (key == "path_column_name" || key == "time_column_name" || key == "value_column_name" || key == "version_column_name") - { - /// See above. - } - else - throw Exception("Unknown element in config: " + key, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); - } - - if (config.has(config_element + ".default")) - appendGraphitePattern(config, config_element + "." + ".default", params.patterns, context); -} - - static String getMergeTreeVerboseHelp(bool) { using namespace std::string_literals; @@ -542,12 +373,6 @@ static StoragePtr create(const StorageFactory::Arguments & args) /// to make possible copying metadata files between replicas. Macros::MacroExpansionInfo info; info.table_id = args.table_id; - if (is_replicated_database) - { - auto database = DatabaseCatalog::instance().getDatabase(args.table_id.database_name); - info.shard = getReplicatedDatabaseShardName(database); - info.replica = getReplicatedDatabaseReplicaName(database); - } if (!allow_uuid_macro) info.table_id.uuid = UUIDHelpers::Nil; zookeeper_path = args.getContext()->getMacros()->expand(zookeeper_path, info); diff --git a/src/Storages/System/StorageSystemGraphite.cpp b/src/Storages/System/StorageSystemGraphite.cpp index dd592600d18..8711162385f 100644 --- a/src/Storages/System/StorageSystemGraphite.cpp +++ b/src/Storages/System/StorageSystemGraphite.cpp @@ -10,6 +10,7 @@ NamesAndTypesList StorageSystemGraphite::getNamesAndTypes() { return { {"config_name", std::make_shared()}, + {"rule_type", std::make_shared()}, {"regexp", std::make_shared()}, {"function", std::make_shared()}, {"age", std::make_shared()}, @@ -85,6 +86,7 @@ void StorageSystemGraphite::fillData(MutableColumns & res_columns, ContextPtr co bool is_default = pattern.regexp == nullptr; String regexp; String function; + const String & rule_type = ruleTypeStr(pattern.rule_type); if (is_default) { @@ -107,6 +109,7 @@ void StorageSystemGraphite::fillData(MutableColumns & res_columns, ContextPtr co { size_t i = 0; res_columns[i++]->insert(config.first); + res_columns[i++]->insert(rule_type); res_columns[i++]->insert(regexp); res_columns[i++]->insert(function); res_columns[i++]->insert(retention.age); @@ -121,6 +124,7 @@ void StorageSystemGraphite::fillData(MutableColumns & res_columns, ContextPtr co { size_t i = 0; res_columns[i++]->insert(config.first); + res_columns[i++]->insert(rule_type); res_columns[i++]->insert(regexp); res_columns[i++]->insert(function); res_columns[i++]->insertDefault(); diff --git a/tests/integration/helpers/test_tools.py b/tests/integration/helpers/test_tools.py index 3577553be34..ec3841f79d7 100644 --- a/tests/integration/helpers/test_tools.py +++ b/tests/integration/helpers/test_tools.py @@ -100,3 +100,19 @@ def exec_query_with_retry(instance, query, retry_count=40, sleep_time=0.5, silen time.sleep(sleep_time) else: raise exception + +def csv_compare(result, expected): + csv_result = TSV(result) + csv_expected = TSV(expected) + mismatch = [] + max_len = len(csv_result) if len(csv_result) > len(csv_expected) else len(csv_expected) + for i in range(max_len): + if i >= len(csv_result): + mismatch.append("-[%d]=%s" % (i, csv_expected.lines[i])) + elif i >= len(csv_expected): + mismatch.append("+[%d]=%s" % (i, csv_result.lines[i])) + elif csv_expected.lines[i] != csv_result.lines[i]: + mismatch.append("-[%d]=%s" % (i, csv_expected.lines[i])) + mismatch.append("+[%d]=%s" % (i, csv_result.lines[i])) + + return "\n".join(mismatch) diff --git a/tests/integration/test_graphite_merge_tree/test.py b/tests/integration/test_graphite_merge_tree/test.py index 7628211551d..9e48f12f007 100644 --- a/tests/integration/test_graphite_merge_tree/test.py +++ b/tests/integration/test_graphite_merge_tree/test.py @@ -6,6 +6,7 @@ import pytest from helpers.client import QueryRuntimeException from helpers.cluster import ClickHouseCluster from helpers.test_tools import TSV +from helpers.test_tools import csv_compare cluster = ClickHouseCluster(__file__) instance = cluster.add_instance('instance', @@ -234,18 +235,19 @@ SELECT * FROM test.graphite; def test_system_graphite_retentions(graphite_table): expected = ''' -graphite_rollup \\\\.count$ sum 0 0 1 0 ['test'] ['graphite'] -graphite_rollup \\\\.max$ max 0 0 2 0 ['test'] ['graphite'] -graphite_rollup ^five_min\\\\. 31536000 14400 3 0 ['test'] ['graphite'] -graphite_rollup ^five_min\\\\. 5184000 3600 3 0 ['test'] ['graphite'] -graphite_rollup ^five_min\\\\. 0 300 3 0 ['test'] ['graphite'] -graphite_rollup ^one_min avg 31536000 600 4 0 ['test'] ['graphite'] -graphite_rollup ^one_min avg 7776000 300 4 0 ['test'] ['graphite'] -graphite_rollup ^one_min avg 0 60 4 0 ['test'] ['graphite'] +graphite_rollup all \\\\.count$ sum 0 0 1 0 ['test'] ['graphite'] +graphite_rollup all \\\\.max$ max 0 0 2 0 ['test'] ['graphite'] +graphite_rollup all ^five_min\\\\. 31536000 14400 3 0 ['test'] ['graphite'] +graphite_rollup all ^five_min\\\\. 5184000 3600 3 0 ['test'] ['graphite'] +graphite_rollup all ^five_min\\\\. 0 300 3 0 ['test'] ['graphite'] +graphite_rollup all ^one_min avg 31536000 600 4 0 ['test'] ['graphite'] +graphite_rollup all ^one_min avg 7776000 300 4 0 ['test'] ['graphite'] +graphite_rollup all ^one_min avg 0 60 4 0 ['test'] ['graphite'] ''' result = q('SELECT * from system.graphite_retentions') - assert TSV(result) == TSV(expected) + mismatch = csv_compare(result, expected) + assert len(mismatch) == 0, f"got\n{result}\nwant\n{expected}\ndiff\n{mismatch}\n" q(''' DROP TABLE IF EXISTS test.graphite2; diff --git a/tests/integration/test_graphite_merge_tree_typed/__init__.py b/tests/integration/test_graphite_merge_tree_typed/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_graphite_merge_tree_typed/configs/graphite_rollup.xml b/tests/integration/test_graphite_merge_tree_typed/configs/graphite_rollup.xml new file mode 100644 index 00000000000..c716540a61c --- /dev/null +++ b/tests/integration/test_graphite_merge_tree_typed/configs/graphite_rollup.xml @@ -0,0 +1,120 @@ + + + + metric + timestamp + value + updated + + plain + \.count$ + sum + + + plain + \.max$ + max + + + plain + ^five_min\. + + 0 + 300 + + + 5184000 + 3600 + + + 31536000 + 14400 + + + + plain + ^one_min + avg + + 0 + 60 + + + 7776000 + 300 + + + 31536000 + 600 + + + + tagged + + avg + + 0 + 60 + + + 7776000 + 300 + + + 31536000 + 600 + + + + tag_list + retention=five_min + avg + + 0 + 300 + + + 5184000 + 3600 + + + 31536000 + 14400 + + + + tagged + ^for_taggged + avg + + 0 + 60 + + + 7776000 + 300 + + + 31536000 + 600 + + + + all + ^ten_min\. + sum + + 0 + 600 + + + 5184000 + 7200 + + + 31536000 + 28800 + + + + diff --git a/tests/integration/test_graphite_merge_tree_typed/configs/users.xml b/tests/integration/test_graphite_merge_tree_typed/configs/users.xml new file mode 100644 index 00000000000..66d0cd7e445 --- /dev/null +++ b/tests/integration/test_graphite_merge_tree_typed/configs/users.xml @@ -0,0 +1,8 @@ + + + + + 0 + + + diff --git a/tests/integration/test_graphite_merge_tree_typed/test.py b/tests/integration/test_graphite_merge_tree_typed/test.py new file mode 100644 index 00000000000..e26fd0d2e77 --- /dev/null +++ b/tests/integration/test_graphite_merge_tree_typed/test.py @@ -0,0 +1,580 @@ +import datetime +import os.path as p +import time + +import sys +import pytest +from helpers.client import QueryRuntimeException +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import TSV +from helpers.test_tools import csv_compare + +cluster = ClickHouseCluster(__file__) +instance = cluster.add_instance('instance', + main_configs=['configs/graphite_rollup.xml'], + user_configs=["configs/users.xml"]) +q = instance.query + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + q('CREATE DATABASE test') + + yield cluster + + finally: + cluster.shutdown() + + +@pytest.fixture +def graphite_table(started_cluster): + q(''' +DROP TABLE IF EXISTS test.graphite; +CREATE TABLE test.graphite + (metric String, value Float64, timestamp UInt32, date Date, updated UInt32) + ENGINE = GraphiteMergeTree('graphite_rollup') + PARTITION BY toYYYYMM(date) + ORDER BY (metric, timestamp) + SETTINGS index_granularity=8192; +''') + + yield + + q('DROP TABLE test.graphite') + + +def test_rollup_versions_plain(graphite_table): + timestamp = int(time.time()) + rounded_timestamp = timestamp - timestamp % 60 + date = datetime.date.today().isoformat() + + # Insert rows with timestamps relative to the current time so that the + # first retention clause is active. + # Two parts are created. + q(''' +INSERT INTO test.graphite (metric, value, timestamp, date, updated) + VALUES ('one_min.x1', 100, {timestamp}, '{date}', 1); +INSERT INTO test.graphite (metric, value, timestamp, date, updated) + VALUES ('one_min.x1', 200, {timestamp}, '{date}', 2); +'''.format(timestamp=timestamp, date=date)) + + expected1 = '''\ +one_min.x1 100 {timestamp} {date} 1 +one_min.x1 200 {timestamp} {date} 2 +'''.format(timestamp=timestamp, date=date) + + assert TSV( + q('SELECT * FROM test.graphite ORDER BY updated') + ) == TSV(expected1) + + q('OPTIMIZE TABLE test.graphite') + + # After rollup only the row with max version is retained. + expected2 = '''\ +one_min.x1 200 {timestamp} {date} 2 +'''.format(timestamp=rounded_timestamp, date=date) + + assert TSV(q('SELECT * FROM test.graphite')) == TSV(expected2) + + +def test_rollup_versions_tagged(graphite_table): + timestamp = int(time.time()) + rounded_timestamp = timestamp - timestamp % 60 + date = datetime.date.today().isoformat() + + # Insert rows with timestamps relative to the current time so that the + # first retention clause is active. + # Two parts are created. + q(''' +INSERT INTO test.graphite (metric, value, timestamp, date, updated) + VALUES ('x1?retention=one_min', 100, {timestamp}, '{date}', 1); +INSERT INTO test.graphite (metric, value, timestamp, date, updated) + VALUES ('x1?retention=one_min', 200, {timestamp}, '{date}', 2); +'''.format(timestamp=timestamp, date=date)) + + expected1 = '''\ +x1?retention=one_min 100 {timestamp} {date} 1 +x1?retention=one_min 200 {timestamp} {date} 2 +'''.format(timestamp=timestamp, date=date) + + result = q('SELECT * FROM test.graphite ORDER BY metric, updated') + mismatch = csv_compare(result, expected1) + assert len(mismatch) == 0, f"got\n{result}\nwant\n{expected1}\ndiff\n{mismatch}\n" + + q('OPTIMIZE TABLE test.graphite') + + # After rollup only the row with max version is retained. + expected2 = '''\ +x1?retention=one_min 200 {timestamp} {date} 2 +'''.format(timestamp=rounded_timestamp, date=date) + + result = q('SELECT * FROM test.graphite ORDER BY metric, updated') + mismatch = csv_compare(result, expected2) + assert len(mismatch) == 0, f"got\n{result}\nwant\n{expected2}\ndiff\n{mismatch}\n" + + +def test_rollup_versions_all(graphite_table): + timestamp = int(time.time()) + rounded_timestamp = timestamp - timestamp % 600 + date = datetime.date.today().isoformat() + + # Insert rows with timestamps relative to the current time so that the + # first retention clause is active. + # Two parts are created. + q(''' +INSERT INTO test.graphite (metric, value, timestamp, date, updated) + VALUES ('ten_min.x1', 100, {timestamp}, '{date}', 1); +INSERT INTO test.graphite (metric, value, timestamp, date, updated) + VALUES ('ten_min.x1', 200, {timestamp}, '{date}', 2); +INSERT INTO test.graphite (metric, value, timestamp, date, updated) + VALUES ('ten_min.x1?env=staging', 100, {timestamp}, '{date}', 1); +INSERT INTO test.graphite (metric, value, timestamp, date, updated) + VALUES ('ten_min.x1?env=staging', 200, {timestamp}, '{date}', 2); +'''.format(timestamp=timestamp, date=date)) + + expected1 = '''\ +ten_min.x1 100 {timestamp} {date} 1 +ten_min.x1 200 {timestamp} {date} 2 +ten_min.x1?env=staging 100 {timestamp} {date} 1 +ten_min.x1?env=staging 200 {timestamp} {date} 2 +'''.format(timestamp=timestamp, date=date) + + result = q('SELECT * FROM test.graphite ORDER BY metric, updated') + mismatch = csv_compare(result, expected1) + assert len(mismatch) == 0, f"got\n{result}\nwant\n{expected1}\ndiff\n{mismatch}\n" + + q('OPTIMIZE TABLE test.graphite') + + # After rollup only the row with max version is retained. + expected2 = '''\ +ten_min.x1 200 {timestamp} {date} 2 +ten_min.x1?env=staging 200 {timestamp} {date} 2 +'''.format(timestamp=rounded_timestamp, date=date) + + result = q('SELECT * FROM test.graphite ORDER BY metric, updated') + mismatch = csv_compare(result, expected2) + assert len(mismatch) == 0, f"got\n{result}\nwant\n{expected2}\ndiff\n{mismatch}\n" + + +def test_rollup_aggregation_plain(graphite_table): + # This query essentially emulates what rollup does. + result1 = q(''' +SELECT avg(v), max(upd) +FROM (SELECT timestamp, + argMax(value, (updated, number)) AS v, + max(updated) AS upd + FROM (SELECT 'one_min.x5' AS metric, + toFloat64(number) AS value, + toUInt32(1111111111 + intDiv(number, 3)) AS timestamp, + toDate('2017-02-02') AS date, + toUInt32(intDiv(number, 2)) AS updated, + number + FROM system.numbers LIMIT 1000000) + WHERE intDiv(timestamp, 600) * 600 = 1111444200 + GROUP BY timestamp) +''') + + expected1 = '''\ +999634.9918367347 499999 +''' + assert TSV(result1) == TSV(expected1) + + # Timestamp 1111111111 is in sufficiently distant past + # so that the last retention clause is active. + result2 = q(''' +INSERT INTO test.graphite + SELECT 'one_min.x' AS metric, + toFloat64(number) AS value, + toUInt32(1111111111 + intDiv(number, 3)) AS timestamp, + toDate('2017-02-02') AS date, toUInt32(intDiv(number, 2)) AS updated + FROM (SELECT * FROM system.numbers LIMIT 1000000) + WHERE intDiv(timestamp, 600) * 600 = 1111444200; + +OPTIMIZE TABLE test.graphite PARTITION 201702 FINAL; + +SELECT * FROM test.graphite; +''') + + expected2 = '''\ +one_min.x 999634.9918367347 1111444200 2017-02-02 499999 +''' + + assert TSV(result2) == TSV(expected2) + + +def test_rollup_aggregation_tagged(graphite_table): + # This query essentially emulates what rollup does. + result1 = q(''' +SELECT avg(v), max(upd) +FROM (SELECT timestamp, + argMax(value, (updated, number)) AS v, + max(updated) AS upd + FROM (SELECT 'x?retention=one_min' AS metric, + toFloat64(number) AS value, + toUInt32(1111111111 + intDiv(number, 3)) AS timestamp, + toDate('2017-02-02') AS date, + toUInt32(intDiv(number, 2)) AS updated, + number + FROM system.numbers LIMIT 1000000) + WHERE intDiv(timestamp, 600) * 600 = 1111444200 + GROUP BY timestamp) +''') + + expected1 = '''\ +999634.9918367347 499999 +''' + assert TSV(result1) == TSV(expected1) + + # Timestamp 1111111111 is in sufficiently distant past + # so that the last retention clause is active. + result2 = q(''' +INSERT INTO test.graphite + SELECT 'x?retention=one_min' AS metric, + toFloat64(number) AS value, + toUInt32(1111111111 + intDiv(number, 3)) AS timestamp, + toDate('2017-02-02') AS date, toUInt32(intDiv(number, 2)) AS updated + FROM (SELECT * FROM system.numbers LIMIT 1000000) + WHERE intDiv(timestamp, 600) * 600 = 1111444200; + +OPTIMIZE TABLE test.graphite PARTITION 201702 FINAL; + +SELECT * FROM test.graphite; +''') + + expected2 = '''\ +x?retention=one_min 999634.9918367347 1111444200 2017-02-02 499999 +''' + + assert TSV(result2) == TSV(expected2) + + +def test_rollup_aggregation_2_plain(graphite_table): + result = q(''' +INSERT INTO test.graphite + SELECT 'one_min.x' AS metric, + toFloat64(number) AS value, + toUInt32(1111111111 - intDiv(number, 3)) AS timestamp, + toDate('2017-02-02') AS date, + toUInt32(100 - number) AS updated + FROM (SELECT * FROM system.numbers LIMIT 50); + +OPTIMIZE TABLE test.graphite PARTITION 201702 FINAL; + +SELECT * FROM test.graphite; +''') + + expected = '''\ +one_min.x 24 1111110600 2017-02-02 100 +''' + + assert TSV(result) == TSV(expected) + + +def test_rollup_aggregation_2_tagged(graphite_table): + result = q(''' +INSERT INTO test.graphite + SELECT 'x?retention=one_min' AS metric, + toFloat64(number) AS value, + toUInt32(1111111111 - intDiv(number, 3)) AS timestamp, + toDate('2017-02-02') AS date, + toUInt32(100 - number) AS updated + FROM (SELECT * FROM system.numbers LIMIT 50); + +OPTIMIZE TABLE test.graphite PARTITION 201702 FINAL; + +SELECT * FROM test.graphite; +''') + + expected = '''\ +x?retention=one_min 24 1111110600 2017-02-02 100 +''' + + assert TSV(result) == TSV(expected) + + +def test_multiple_paths_and_versions_plain(graphite_table): + result = q(''' +INSERT INTO test.graphite + SELECT 'one_min.x' AS metric, + toFloat64(number) AS value, + toUInt32(1111111111 + intDiv(number, 3) * 600) AS timestamp, + toDate('2017-02-02') AS date, + toUInt32(100 - number) AS updated + FROM (SELECT * FROM system.numbers LIMIT 50); + +OPTIMIZE TABLE test.graphite PARTITION 201702 FINAL; + +SELECT * FROM test.graphite; + + +INSERT INTO test.graphite + SELECT 'one_min.y' AS metric, + toFloat64(number) AS value, + toUInt32(1111111111 + number * 600) AS timestamp, + toDate('2017-02-02') AS date, + toUInt32(100 - number) AS updated + FROM (SELECT * FROM system.numbers LIMIT 50); + +OPTIMIZE TABLE test.graphite PARTITION 201702 FINAL; + +SELECT * FROM test.graphite; +''') + + with open(p.join(p.dirname(__file__), + 'test_multiple_paths_and_versions.reference.plain') + ) as reference: + assert TSV(result) == TSV(reference) + + +def test_multiple_paths_and_versions_tagged(graphite_table): + result = q(''' +INSERT INTO test.graphite + SELECT 'x?retention=one_min' AS metric, + toFloat64(number) AS value, + toUInt32(1111111111 + intDiv(number, 3) * 600) AS timestamp, + toDate('2017-02-02') AS date, + toUInt32(100 - number) AS updated + FROM (SELECT * FROM system.numbers LIMIT 50); + +OPTIMIZE TABLE test.graphite PARTITION 201702 FINAL; + +SELECT * FROM test.graphite; + + +INSERT INTO test.graphite + SELECT 'y?retention=one_min' AS metric, + toFloat64(number) AS value, + toUInt32(1111111111 + number * 600) AS timestamp, + toDate('2017-02-02') AS date, + toUInt32(100 - number) AS updated + FROM (SELECT * FROM system.numbers LIMIT 50); + +OPTIMIZE TABLE test.graphite PARTITION 201702 FINAL; + +SELECT * FROM test.graphite; +''') + + with open(p.join(p.dirname(__file__), + 'test_multiple_paths_and_versions.reference.tagged') + ) as reference: + assert TSV(result) == TSV(reference) + + +def test_multiple_output_blocks(graphite_table): + MERGED_BLOCK_SIZE = 8192 + + to_insert = '' + expected = '' + for i in range(2 * MERGED_BLOCK_SIZE + 1): + rolled_up_time = 1000000200 + 600 * i + + for j in range(3): + cur_time = rolled_up_time + 100 * j + to_insert += 'one_min.x1 {} {} 2001-09-09 1\n'.format( + 10 * j, cur_time + ) + to_insert += 'one_min.x1 {} {} 2001-09-09 2\n'.format( + 10 * (j + 1), cur_time + ) + + expected += 'one_min.x1 20 {} 2001-09-09 2\n'.format(rolled_up_time) + + q('INSERT INTO test.graphite FORMAT TSV', to_insert) + + result = q(''' +OPTIMIZE TABLE test.graphite PARTITION 200109 FINAL; + +SELECT * FROM test.graphite; +''') + + assert TSV(result) == TSV(expected) + + +def test_paths_not_matching_any_pattern(graphite_table): + to_insert = '''\ +one_min.x1 100 1000000000 2001-09-09 1 +zzzzzzzz 100 1000000001 2001-09-09 1 +zzzzzzzz 200 1000000001 2001-09-09 2 +''' + + q('INSERT INTO test.graphite FORMAT TSV', to_insert) + + expected = '''\ +one_min.x1 100 999999600 2001-09-09 1 +zzzzzzzz 200 1000000001 2001-09-09 2 +''' + + result = q(''' +OPTIMIZE TABLE test.graphite PARTITION 200109 FINAL; + +SELECT * FROM test.graphite; +''') + + assert TSV(result) == TSV(expected) + + +def test_rules_isolation(graphite_table): + to_insert = '''\ +one_min.x1 100 1000000000 2001-09-09 1 +for_taggged 100 1000000001 2001-09-09 1 +for_taggged 200 1000000001 2001-09-09 2 +one_min?env=staging 100 1000000001 2001-09-09 1 +one_min?env=staging 200 1000000001 2001-09-09 2 +''' + + q('INSERT INTO test.graphite FORMAT TSV', to_insert) + + expected = '''\ +for_taggged 200 1000000001 2001-09-09 2 +one_min.x1 100 999999600 2001-09-09 1 +one_min?env=staging 200 1000000001 2001-09-09 2 +''' + + result = q(''' +OPTIMIZE TABLE test.graphite PARTITION 200109 FINAL; + +SELECT * FROM test.graphite; +''') + + result = q('SELECT * FROM test.graphite ORDER BY metric, updated') + mismatch = csv_compare(result, expected) + assert len(mismatch) == 0, f"got\n{result}\nwant\n{expected}\ndiff\n{mismatch}\n" + + +def test_system_graphite_retentions(graphite_table): + expected = ''' +graphite_rollup plain \\\\.count$ sum 0 0 1 0 ['test'] ['graphite'] +graphite_rollup plain \\\\.max$ max 0 0 2 0 ['test'] ['graphite'] +graphite_rollup plain ^five_min\\\\. 31536000 14400 3 0 ['test'] ['graphite'] +graphite_rollup plain ^five_min\\\\. 5184000 3600 3 0 ['test'] ['graphite'] +graphite_rollup plain ^five_min\\\\. 0 300 3 0 ['test'] ['graphite'] +graphite_rollup plain ^one_min avg 31536000 600 4 0 ['test'] ['graphite'] +graphite_rollup plain ^one_min avg 7776000 300 4 0 ['test'] ['graphite'] +graphite_rollup plain ^one_min avg 0 60 4 0 ['test'] ['graphite'] +graphite_rollup tagged [\\\\?&]retention=one_min(&.*)?$ avg 31536000 600 5 0 ['test'] ['graphite'] +graphite_rollup tagged [\\\\?&]retention=one_min(&.*)?$ avg 7776000 300 5 0 ['test'] ['graphite'] +graphite_rollup tagged [\\\\?&]retention=one_min(&.*)?$ avg 0 60 5 0 ['test'] ['graphite'] +graphite_rollup tagged [\\\\?&]retention=five_min(&.*)?$ avg 31536000 14400 6 0 ['test'] ['graphite'] +graphite_rollup tagged [\\\\?&]retention=five_min(&.*)?$ avg 5184000 3600 6 0 ['test'] ['graphite'] +graphite_rollup tagged [\\\\?&]retention=five_min(&.*)?$ avg 0 300 6 0 ['test'] ['graphite'] +graphite_rollup tagged ^for_taggged avg 31536000 600 7 0 ['test'] ['graphite'] +graphite_rollup tagged ^for_taggged avg 7776000 300 7 0 ['test'] ['graphite'] +graphite_rollup tagged ^for_taggged avg 0 60 7 0 ['test'] ['graphite'] +graphite_rollup all ^ten_min\\\\. sum 31536000 28800 8 0 ['test'] ['graphite'] +graphite_rollup all ^ten_min\\\\. sum 5184000 7200 8 0 ['test'] ['graphite'] +graphite_rollup all ^ten_min\\\\. sum 0 600 8 0 ['test'] ['graphite'] + ''' + result = q('SELECT * from system.graphite_retentions') + + mismatch = csv_compare(result, expected) + assert len(mismatch) == 0, f"got\n{result}\nwant\n{expected}\ndiff\n{mismatch}\n" + + q(''' +DROP TABLE IF EXISTS test.graphite2; +CREATE TABLE test.graphite2 + (metric String, value Float64, timestamp UInt32, date Date, updated UInt32) + ENGINE = GraphiteMergeTree('graphite_rollup') + PARTITION BY toYYYYMM(date) + ORDER BY (metric, timestamp) + SETTINGS index_granularity=8192; + ''') + expected = ''' +graphite_rollup ['test','test'] ['graphite','graphite2'] +graphite_rollup ['test','test'] ['graphite','graphite2'] +graphite_rollup ['test','test'] ['graphite','graphite2'] +graphite_rollup ['test','test'] ['graphite','graphite2'] +graphite_rollup ['test','test'] ['graphite','graphite2'] +graphite_rollup ['test','test'] ['graphite','graphite2'] +graphite_rollup ['test','test'] ['graphite','graphite2'] +graphite_rollup ['test','test'] ['graphite','graphite2'] + ''' + result = q(''' + SELECT + config_name, + Tables.database, + Tables.table + FROM system.graphite_retentions + ''') + assert csv_compare(result, expected), f"got\n{result}\nwant\n{expected}" + + +def test_path_dangling_pointer(graphite_table): + q(''' +DROP TABLE IF EXISTS test.graphite2; +CREATE TABLE test.graphite2 + (metric String, value Float64, timestamp UInt32, date Date, updated UInt32) + ENGINE = GraphiteMergeTree('graphite_rollup') + PARTITION BY toYYYYMM(date) + ORDER BY (metric, timestamp) + SETTINGS index_granularity=1; + ''') + + path = 'abcd' * 4000000 # 16MB + q('INSERT INTO test.graphite2 FORMAT TSV', + "{}\t0.0\t0\t2018-01-01\t100\n".format(path)) + q('INSERT INTO test.graphite2 FORMAT TSV', + "{}\t0.0\t0\t2018-01-01\t101\n".format(path)) + for version in range(10): + q('INSERT INTO test.graphite2 FORMAT TSV', + "{}\t0.0\t0\t2018-01-01\t{}\n".format(path, version)) + + while True: + q('OPTIMIZE TABLE test.graphite2 PARTITION 201801 FINAL') + parts = int(q("SELECT count() FROM system.parts " + "WHERE active AND database='test' " + "AND table='graphite2'")) + if parts == 1: + break + print(('Parts', parts)) + + assert TSV( + q("SELECT value, timestamp, date, updated FROM test.graphite2") + ) == TSV("0\t0\t2018-01-01\t101\n") + + q('DROP TABLE test.graphite2') + + +def test_combined_rules(graphite_table): + # 1487970000 ~ Sat 25 Feb 00:00:00 MSK 2017 + to_insert = 'INSERT INTO test.graphite VALUES ' + expected_unmerged = '' + for i in range(384): + to_insert += "('five_min.count', {v}, {t}, toDate({t}), 1), ".format( + v=1, t=1487970000 + (i * 300) + ) + to_insert += "('five_min.max', {v}, {t}, toDate({t}), 1), ".format( + v=i, t=1487970000 + (i * 300) + ) + expected_unmerged += ("five_min.count\t{v1}\t{t}\n" + "five_min.max\t{v2}\t{t}\n").format( + v1=1, v2=i, + t=1487970000 + (i * 300) + ) + + q(to_insert) + assert TSV(q('SELECT metric, value, timestamp FROM test.graphite' + ' ORDER BY (timestamp, metric)')) == TSV(expected_unmerged) + + q('OPTIMIZE TABLE test.graphite PARTITION 201702 FINAL') + expected_merged = ''' + five_min.count 48 1487970000 2017-02-25 1 + five_min.count 48 1487984400 2017-02-25 1 + five_min.count 48 1487998800 2017-02-25 1 + five_min.count 48 1488013200 2017-02-25 1 + five_min.count 48 1488027600 2017-02-25 1 + five_min.count 48 1488042000 2017-02-25 1 + five_min.count 48 1488056400 2017-02-26 1 + five_min.count 48 1488070800 2017-02-26 1 + five_min.max 47 1487970000 2017-02-25 1 + five_min.max 95 1487984400 2017-02-25 1 + five_min.max 143 1487998800 2017-02-25 1 + five_min.max 191 1488013200 2017-02-25 1 + five_min.max 239 1488027600 2017-02-25 1 + five_min.max 287 1488042000 2017-02-25 1 + five_min.max 335 1488056400 2017-02-26 1 + five_min.max 383 1488070800 2017-02-26 1 + ''' + assert TSV(q('SELECT * FROM test.graphite' + ' ORDER BY (metric, timestamp)')) == TSV(expected_merged) diff --git a/tests/integration/test_graphite_merge_tree_typed/test_multiple_paths_and_versions.reference.plain b/tests/integration/test_graphite_merge_tree_typed/test_multiple_paths_and_versions.reference.plain new file mode 100644 index 00000000000..0f10d11ed05 --- /dev/null +++ b/tests/integration/test_graphite_merge_tree_typed/test_multiple_paths_and_versions.reference.plain @@ -0,0 +1,84 @@ +one_min.x 0 1111110600 2017-02-02 100 +one_min.x 3 1111111200 2017-02-02 97 +one_min.x 6 1111111800 2017-02-02 94 +one_min.x 9 1111112400 2017-02-02 91 +one_min.x 12 1111113000 2017-02-02 88 +one_min.x 15 1111113600 2017-02-02 85 +one_min.x 18 1111114200 2017-02-02 82 +one_min.x 21 1111114800 2017-02-02 79 +one_min.x 24 1111115400 2017-02-02 76 +one_min.x 27 1111116000 2017-02-02 73 +one_min.x 30 1111116600 2017-02-02 70 +one_min.x 33 1111117200 2017-02-02 67 +one_min.x 36 1111117800 2017-02-02 64 +one_min.x 39 1111118400 2017-02-02 61 +one_min.x 42 1111119000 2017-02-02 58 +one_min.x 45 1111119600 2017-02-02 55 +one_min.x 48 1111120200 2017-02-02 52 +one_min.x 0 1111110600 2017-02-02 100 +one_min.x 3 1111111200 2017-02-02 97 +one_min.x 6 1111111800 2017-02-02 94 +one_min.x 9 1111112400 2017-02-02 91 +one_min.x 12 1111113000 2017-02-02 88 +one_min.x 15 1111113600 2017-02-02 85 +one_min.x 18 1111114200 2017-02-02 82 +one_min.x 21 1111114800 2017-02-02 79 +one_min.x 24 1111115400 2017-02-02 76 +one_min.x 27 1111116000 2017-02-02 73 +one_min.x 30 1111116600 2017-02-02 70 +one_min.x 33 1111117200 2017-02-02 67 +one_min.x 36 1111117800 2017-02-02 64 +one_min.x 39 1111118400 2017-02-02 61 +one_min.x 42 1111119000 2017-02-02 58 +one_min.x 45 1111119600 2017-02-02 55 +one_min.x 48 1111120200 2017-02-02 52 +one_min.y 0 1111110600 2017-02-02 100 +one_min.y 1 1111111200 2017-02-02 99 +one_min.y 2 1111111800 2017-02-02 98 +one_min.y 3 1111112400 2017-02-02 97 +one_min.y 4 1111113000 2017-02-02 96 +one_min.y 5 1111113600 2017-02-02 95 +one_min.y 6 1111114200 2017-02-02 94 +one_min.y 7 1111114800 2017-02-02 93 +one_min.y 8 1111115400 2017-02-02 92 +one_min.y 9 1111116000 2017-02-02 91 +one_min.y 10 1111116600 2017-02-02 90 +one_min.y 11 1111117200 2017-02-02 89 +one_min.y 12 1111117800 2017-02-02 88 +one_min.y 13 1111118400 2017-02-02 87 +one_min.y 14 1111119000 2017-02-02 86 +one_min.y 15 1111119600 2017-02-02 85 +one_min.y 16 1111120200 2017-02-02 84 +one_min.y 17 1111120800 2017-02-02 83 +one_min.y 18 1111121400 2017-02-02 82 +one_min.y 19 1111122000 2017-02-02 81 +one_min.y 20 1111122600 2017-02-02 80 +one_min.y 21 1111123200 2017-02-02 79 +one_min.y 22 1111123800 2017-02-02 78 +one_min.y 23 1111124400 2017-02-02 77 +one_min.y 24 1111125000 2017-02-02 76 +one_min.y 25 1111125600 2017-02-02 75 +one_min.y 26 1111126200 2017-02-02 74 +one_min.y 27 1111126800 2017-02-02 73 +one_min.y 28 1111127400 2017-02-02 72 +one_min.y 29 1111128000 2017-02-02 71 +one_min.y 30 1111128600 2017-02-02 70 +one_min.y 31 1111129200 2017-02-02 69 +one_min.y 32 1111129800 2017-02-02 68 +one_min.y 33 1111130400 2017-02-02 67 +one_min.y 34 1111131000 2017-02-02 66 +one_min.y 35 1111131600 2017-02-02 65 +one_min.y 36 1111132200 2017-02-02 64 +one_min.y 37 1111132800 2017-02-02 63 +one_min.y 38 1111133400 2017-02-02 62 +one_min.y 39 1111134000 2017-02-02 61 +one_min.y 40 1111134600 2017-02-02 60 +one_min.y 41 1111135200 2017-02-02 59 +one_min.y 42 1111135800 2017-02-02 58 +one_min.y 43 1111136400 2017-02-02 57 +one_min.y 44 1111137000 2017-02-02 56 +one_min.y 45 1111137600 2017-02-02 55 +one_min.y 46 1111138200 2017-02-02 54 +one_min.y 47 1111138800 2017-02-02 53 +one_min.y 48 1111139400 2017-02-02 52 +one_min.y 49 1111140000 2017-02-02 51 diff --git a/tests/integration/test_graphite_merge_tree_typed/test_multiple_paths_and_versions.reference.tagged b/tests/integration/test_graphite_merge_tree_typed/test_multiple_paths_and_versions.reference.tagged new file mode 100644 index 00000000000..e2c63ab3b22 --- /dev/null +++ b/tests/integration/test_graphite_merge_tree_typed/test_multiple_paths_and_versions.reference.tagged @@ -0,0 +1,84 @@ +x?retention=one_min 0 1111110600 2017-02-02 100 +x?retention=one_min 3 1111111200 2017-02-02 97 +x?retention=one_min 6 1111111800 2017-02-02 94 +x?retention=one_min 9 1111112400 2017-02-02 91 +x?retention=one_min 12 1111113000 2017-02-02 88 +x?retention=one_min 15 1111113600 2017-02-02 85 +x?retention=one_min 18 1111114200 2017-02-02 82 +x?retention=one_min 21 1111114800 2017-02-02 79 +x?retention=one_min 24 1111115400 2017-02-02 76 +x?retention=one_min 27 1111116000 2017-02-02 73 +x?retention=one_min 30 1111116600 2017-02-02 70 +x?retention=one_min 33 1111117200 2017-02-02 67 +x?retention=one_min 36 1111117800 2017-02-02 64 +x?retention=one_min 39 1111118400 2017-02-02 61 +x?retention=one_min 42 1111119000 2017-02-02 58 +x?retention=one_min 45 1111119600 2017-02-02 55 +x?retention=one_min 48 1111120200 2017-02-02 52 +x?retention=one_min 0 1111110600 2017-02-02 100 +x?retention=one_min 3 1111111200 2017-02-02 97 +x?retention=one_min 6 1111111800 2017-02-02 94 +x?retention=one_min 9 1111112400 2017-02-02 91 +x?retention=one_min 12 1111113000 2017-02-02 88 +x?retention=one_min 15 1111113600 2017-02-02 85 +x?retention=one_min 18 1111114200 2017-02-02 82 +x?retention=one_min 21 1111114800 2017-02-02 79 +x?retention=one_min 24 1111115400 2017-02-02 76 +x?retention=one_min 27 1111116000 2017-02-02 73 +x?retention=one_min 30 1111116600 2017-02-02 70 +x?retention=one_min 33 1111117200 2017-02-02 67 +x?retention=one_min 36 1111117800 2017-02-02 64 +x?retention=one_min 39 1111118400 2017-02-02 61 +x?retention=one_min 42 1111119000 2017-02-02 58 +x?retention=one_min 45 1111119600 2017-02-02 55 +x?retention=one_min 48 1111120200 2017-02-02 52 +y?retention=one_min 0 1111110600 2017-02-02 100 +y?retention=one_min 1 1111111200 2017-02-02 99 +y?retention=one_min 2 1111111800 2017-02-02 98 +y?retention=one_min 3 1111112400 2017-02-02 97 +y?retention=one_min 4 1111113000 2017-02-02 96 +y?retention=one_min 5 1111113600 2017-02-02 95 +y?retention=one_min 6 1111114200 2017-02-02 94 +y?retention=one_min 7 1111114800 2017-02-02 93 +y?retention=one_min 8 1111115400 2017-02-02 92 +y?retention=one_min 9 1111116000 2017-02-02 91 +y?retention=one_min 10 1111116600 2017-02-02 90 +y?retention=one_min 11 1111117200 2017-02-02 89 +y?retention=one_min 12 1111117800 2017-02-02 88 +y?retention=one_min 13 1111118400 2017-02-02 87 +y?retention=one_min 14 1111119000 2017-02-02 86 +y?retention=one_min 15 1111119600 2017-02-02 85 +y?retention=one_min 16 1111120200 2017-02-02 84 +y?retention=one_min 17 1111120800 2017-02-02 83 +y?retention=one_min 18 1111121400 2017-02-02 82 +y?retention=one_min 19 1111122000 2017-02-02 81 +y?retention=one_min 20 1111122600 2017-02-02 80 +y?retention=one_min 21 1111123200 2017-02-02 79 +y?retention=one_min 22 1111123800 2017-02-02 78 +y?retention=one_min 23 1111124400 2017-02-02 77 +y?retention=one_min 24 1111125000 2017-02-02 76 +y?retention=one_min 25 1111125600 2017-02-02 75 +y?retention=one_min 26 1111126200 2017-02-02 74 +y?retention=one_min 27 1111126800 2017-02-02 73 +y?retention=one_min 28 1111127400 2017-02-02 72 +y?retention=one_min 29 1111128000 2017-02-02 71 +y?retention=one_min 30 1111128600 2017-02-02 70 +y?retention=one_min 31 1111129200 2017-02-02 69 +y?retention=one_min 32 1111129800 2017-02-02 68 +y?retention=one_min 33 1111130400 2017-02-02 67 +y?retention=one_min 34 1111131000 2017-02-02 66 +y?retention=one_min 35 1111131600 2017-02-02 65 +y?retention=one_min 36 1111132200 2017-02-02 64 +y?retention=one_min 37 1111132800 2017-02-02 63 +y?retention=one_min 38 1111133400 2017-02-02 62 +y?retention=one_min 39 1111134000 2017-02-02 61 +y?retention=one_min 40 1111134600 2017-02-02 60 +y?retention=one_min 41 1111135200 2017-02-02 59 +y?retention=one_min 42 1111135800 2017-02-02 58 +y?retention=one_min 43 1111136400 2017-02-02 57 +y?retention=one_min 44 1111137000 2017-02-02 56 +y?retention=one_min 45 1111137600 2017-02-02 55 +y?retention=one_min 46 1111138200 2017-02-02 54 +y?retention=one_min 47 1111138800 2017-02-02 53 +y?retention=one_min 48 1111139400 2017-02-02 52 +y?retention=one_min 49 1111140000 2017-02-02 51 diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index 2b391cd292e..a2e56fa0f1d 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -21,7 +21,7 @@ CREATE TABLE system.events\n(\n `event` String,\n `value` UInt64,\n `de CREATE TABLE system.formats\n(\n `name` String,\n `is_input` UInt8,\n `is_output` UInt8\n)\nENGINE = SystemFormats()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.functions\n(\n `name` String,\n `is_aggregate` UInt8,\n `case_insensitive` UInt8,\n `alias_to` String,\n `create_query` String,\n `origin` Enum8(\'System\' = 0, \'SQLUserDefined\' = 1, \'ExecutableUserDefined\' = 2)\n)\nENGINE = SystemFunctions()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.grants\n(\n `user_name` Nullable(String),\n `role_name` Nullable(String),\n `access_type` Enum8(\'SQLITE\' = -128, \'ODBC\' = -127, \'JDBC\' = -126, \'HDFS\' = -125, \'S3\' = -124, \'SOURCES\' = -123, \'ALL\' = -122, \'NONE\' = -121, \'SHOW DATABASES\' = 0, \'SHOW TABLES\' = 1, \'SHOW COLUMNS\' = 2, \'SHOW DICTIONARIES\' = 3, \'SHOW\' = 4, \'SELECT\' = 5, \'INSERT\' = 6, \'ALTER UPDATE\' = 7, \'ALTER DELETE\' = 8, \'ALTER ADD COLUMN\' = 9, \'ALTER MODIFY COLUMN\' = 10, \'ALTER DROP COLUMN\' = 11, \'ALTER COMMENT COLUMN\' = 12, \'ALTER CLEAR COLUMN\' = 13, \'ALTER RENAME COLUMN\' = 14, \'ALTER MATERIALIZE COLUMN\' = 15, \'ALTER COLUMN\' = 16, \'ALTER MODIFY COMMENT\' = 17, \'ALTER ORDER BY\' = 18, \'ALTER SAMPLE BY\' = 19, \'ALTER ADD INDEX\' = 20, \'ALTER DROP INDEX\' = 21, \'ALTER MATERIALIZE INDEX\' = 22, \'ALTER CLEAR INDEX\' = 23, \'ALTER INDEX\' = 24, \'ALTER ADD PROJECTION\' = 25, \'ALTER DROP PROJECTION\' = 26, \'ALTER MATERIALIZE PROJECTION\' = 27, \'ALTER CLEAR PROJECTION\' = 28, \'ALTER PROJECTION\' = 29, \'ALTER ADD CONSTRAINT\' = 30, \'ALTER DROP CONSTRAINT\' = 31, \'ALTER CONSTRAINT\' = 32, \'ALTER TTL\' = 33, \'ALTER MATERIALIZE TTL\' = 34, \'ALTER SETTINGS\' = 35, \'ALTER MOVE PARTITION\' = 36, \'ALTER FETCH PARTITION\' = 37, \'ALTER FREEZE PARTITION\' = 38, \'ALTER DATABASE SETTINGS\' = 39, \'ALTER TABLE\' = 40, \'ALTER DATABASE\' = 41, \'ALTER VIEW REFRESH\' = 42, \'ALTER VIEW MODIFY QUERY\' = 43, \'ALTER VIEW\' = 44, \'ALTER\' = 45, \'CREATE DATABASE\' = 46, \'CREATE TABLE\' = 47, \'CREATE VIEW\' = 48, \'CREATE DICTIONARY\' = 49, \'CREATE TEMPORARY TABLE\' = 50, \'CREATE FUNCTION\' = 51, \'CREATE\' = 52, \'DROP DATABASE\' = 53, \'DROP TABLE\' = 54, \'DROP VIEW\' = 55, \'DROP DICTIONARY\' = 56, \'DROP FUNCTION\' = 57, \'DROP\' = 58, \'TRUNCATE\' = 59, \'OPTIMIZE\' = 60, \'KILL QUERY\' = 61, \'MOVE PARTITION BETWEEN SHARDS\' = 62, \'CREATE USER\' = 63, \'ALTER USER\' = 64, \'DROP USER\' = 65, \'CREATE ROLE\' = 66, \'ALTER ROLE\' = 67, \'DROP ROLE\' = 68, \'ROLE ADMIN\' = 69, \'CREATE ROW POLICY\' = 70, \'ALTER ROW POLICY\' = 71, \'DROP ROW POLICY\' = 72, \'CREATE QUOTA\' = 73, \'ALTER QUOTA\' = 74, \'DROP QUOTA\' = 75, \'CREATE SETTINGS PROFILE\' = 76, \'ALTER SETTINGS PROFILE\' = 77, \'DROP SETTINGS PROFILE\' = 78, \'SHOW USERS\' = 79, \'SHOW ROLES\' = 80, \'SHOW ROW POLICIES\' = 81, \'SHOW QUOTAS\' = 82, \'SHOW SETTINGS PROFILES\' = 83, \'SHOW ACCESS\' = 84, \'ACCESS MANAGEMENT\' = 85, \'SYSTEM SHUTDOWN\' = 86, \'SYSTEM DROP DNS CACHE\' = 87, \'SYSTEM DROP MARK CACHE\' = 88, \'SYSTEM DROP UNCOMPRESSED CACHE\' = 89, \'SYSTEM DROP MMAP CACHE\' = 90, \'SYSTEM DROP COMPILED EXPRESSION CACHE\' = 91, \'SYSTEM DROP CACHE\' = 92, \'SYSTEM RELOAD CONFIG\' = 93, \'SYSTEM RELOAD SYMBOLS\' = 94, \'SYSTEM RELOAD DICTIONARY\' = 95, \'SYSTEM RELOAD MODEL\' = 96, \'SYSTEM RELOAD FUNCTION\' = 97, \'SYSTEM RELOAD EMBEDDED DICTIONARIES\' = 98, \'SYSTEM RELOAD\' = 99, \'SYSTEM RESTART DISK\' = 100, \'SYSTEM MERGES\' = 101, \'SYSTEM TTL MERGES\' = 102, \'SYSTEM FETCHES\' = 103, \'SYSTEM MOVES\' = 104, \'SYSTEM DISTRIBUTED SENDS\' = 105, \'SYSTEM REPLICATED SENDS\' = 106, \'SYSTEM SENDS\' = 107, \'SYSTEM REPLICATION QUEUES\' = 108, \'SYSTEM DROP REPLICA\' = 109, \'SYSTEM SYNC REPLICA\' = 110, \'SYSTEM RESTART REPLICA\' = 111, \'SYSTEM RESTORE REPLICA\' = 112, \'SYSTEM FLUSH DISTRIBUTED\' = 113, \'SYSTEM FLUSH LOGS\' = 114, \'SYSTEM FLUSH\' = 115, \'SYSTEM\' = 116, \'dictGet\' = 117, \'addressToLine\' = 118, \'addressToSymbol\' = 119, \'demangle\' = 120, \'INTROSPECTION\' = 121, \'FILE\' = 122, \'URL\' = 123, \'REMOTE\' = 124, \'MONGO\' = 125, \'MYSQL\' = 126, \'POSTGRES\' = 127),\n `database` Nullable(String),\n `table` Nullable(String),\n `column` Nullable(String),\n `is_partial_revoke` UInt8,\n `grant_option` UInt8\n)\nENGINE = SystemGrants()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' -CREATE TABLE system.graphite_retentions\n(\n `config_name` String,\n `regexp` String,\n `function` String,\n `age` UInt64,\n `precision` UInt64,\n `priority` UInt16,\n `is_default` UInt8,\n `Tables.database` Array(String),\n `Tables.table` Array(String)\n)\nENGINE = SystemGraphite()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' +CREATE TABLE system.graphite_retentions\n(\n `config_name` String,\n `rule_type` String,\n `regexp` String,\n `function` String,\n `age` UInt64,\n `precision` UInt64,\n `priority` UInt16,\n `is_default` UInt8,\n `Tables.database` Array(String),\n `Tables.table` Array(String)\n)\nENGINE = SystemGraphite()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.licenses\n(\n `library_name` String,\n `license_type` String,\n `license_path` String,\n `license_text` String\n)\nENGINE = SystemLicenses()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.macros\n(\n `macro` String,\n `substitution` String\n)\nENGINE = SystemMacros()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.merge_tree_settings\n(\n `name` String,\n `value` String,\n `changed` UInt8,\n `description` String,\n `type` String\n)\nENGINE = SystemMergeTreeSettings()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt index 8309b6bcb53..a930e7db3fc 100644 --- a/utils/CMakeLists.txt +++ b/utils/CMakeLists.txt @@ -32,6 +32,7 @@ if (NOT DEFINED ENABLE_UTILS OR ENABLE_UTILS) add_subdirectory (wal-dump) add_subdirectory (check-mysql-binlog) add_subdirectory (keeper-bench) + add_subdirectory (graphite-rollup) if (USE_NURAFT) add_subdirectory (keeper-data-dumper) diff --git a/utils/graphite-rollup/CMakeLists.txt b/utils/graphite-rollup/CMakeLists.txt new file mode 100644 index 00000000000..bd6a078fbd5 --- /dev/null +++ b/utils/graphite-rollup/CMakeLists.txt @@ -0,0 +1,23 @@ +add_executable(graphite-rollup-bench graphite-rollup-bench.cpp) +target_link_libraries( + graphite-rollup-bench + PRIVATE + clickhouse_storages_system + clickhouse_aggregate_functions + clickhouse_common_config + dbms +) +target_include_directories( + graphite-rollup-bench + PRIVATE + ${ClickHouse_SOURCE_DIR}/src ${CMAKE_BINARY_DIR}/src + ${ClickHouse_SOURCE_DIR}/base ${ClickHouse_SOURCE_DIR}/base/pcg-random + ${CMAKE_BINARY_DIR}/src/Core/include + ${POCO_INCLUDE_DIR} + ${ClickHouse_SOURCE_DIR}/contrib/double-conversion ${ClickHouse_SOURCE_DIR}/contrib/dragonbox/include + ${ClickHouse_SOURCE_DIR}/contrib/fmtlib/include + ${ClickHouse_SOURCE_DIR}/contrib/cityhash102/include + ${RE2_INCLUDE_DIR} ${CMAKE_BINARY_DIR}/contrib/re2_st +) + +target_compile_definitions(graphite-rollup-bench PRIVATE RULES_DIR="${CMAKE_CURRENT_SOURCE_DIR}") diff --git a/utils/graphite-rollup/graphite-rollup-bench.cpp b/utils/graphite-rollup/graphite-rollup-bench.cpp new file mode 100644 index 00000000000..dabe0353b0f --- /dev/null +++ b/utils/graphite-rollup/graphite-rollup-bench.cpp @@ -0,0 +1,147 @@ +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include + +using namespace DB; + +static SharedContextHolder shared_context = Context::createShared(); + +std::vector loadMetrics(const std::string & metrics_file) +{ + std::vector metrics; + + FILE * stream; + char * line = nullptr; + size_t len = 0; + ssize_t nread; + + stream = fopen(metrics_file.c_str(), "r"); + if (stream == nullptr) + { + throw std::runtime_error(strerror(errno)); + } + + while ((nread = getline(&line, &len, stream)) != -1) + { + size_t l = strlen(line); + if (l > 0) + { + if (line[l - 1] == '\n') + { + line[l - 1] = '\0'; + l--; + } + if (l > 0) + { + metrics.push_back(StringRef(strdup(line), l)); + } + } + } + free(line); + if (ferror(stream)) + { + fclose(stream); + throw std::runtime_error(strerror(errno)); + } + + fclose(stream); + + return metrics; +} + +ConfigProcessor::LoadedConfig loadConfiguration(const std::string & config_path) +{ + ConfigProcessor config_processor(config_path, true, true); + ConfigProcessor::LoadedConfig config = config_processor.loadConfig(false); + return config; +} + +void bench(const std::string & config_path, const std::string & metrics_file, size_t n, bool verbose) +{ + auto config = loadConfiguration(config_path); + + auto context = Context::createGlobal(shared_context.get()); + context->setConfig(config.configuration.get()); + + Graphite::Params params; + setGraphitePatternsFromConfig(context, "graphite_rollup", params); + + std::vector metrics = loadMetrics(metrics_file); + + std::vector durations(metrics.size()); + size_t j, i; + for (j = 0; j < n; j++) + { + for (i = 0; i < metrics.size(); i++) + { + auto start = std::chrono::high_resolution_clock::now(); + + auto rule = DB::Graphite::selectPatternForPath(params, metrics[i]); + (void)rule; + + auto end = std::chrono::high_resolution_clock::now(); + double duration = (duration_cast>(end - start)).count() * 1E9; + durations[i] += duration; + + if (j == 0 && verbose) + { + std::cout << metrics[i].data << ": rule with regexp '" << rule.second->regexp_str << "' found\n"; + } + } + } + + for (i = 0; i < metrics.size(); i++) + { + std::cout << metrics[i].data << " " << durations[i] / n << " ns\n"; + free(const_cast(static_cast(metrics[i].data))); + } +} + +int main(int argc, char ** argv) +{ + registerAggregateFunctions(); + + std::string config_file, metrics_file; + + using namespace std::literals; + + std::string config_default = RULES_DIR + "/rollup.xml"s; + std::string metrics_default = RULES_DIR + "/metrics.txt"s; + + namespace po = boost::program_options; + po::variables_map vm; + + po::options_description desc; + desc.add_options()("help,h", "produce help")( + "config,c", po::value()->default_value(config_default), "XML config with rollup rules")( + "metrics,m", po::value()->default_value(metrics_default), "metrcis files (one metric per line) for run benchmark")( + "verbose,V", po::bool_switch()->default_value(false), "verbose output (print found rule)"); + + po::parsed_options parsed = po::command_line_parser(argc, argv).options(desc).run(); + po::store(parsed, vm); + po::notify(vm); + + if (vm.count("help")) + { + std::cout << desc << '\n'; + exit(1); + } + + bench(vm["config"].as(), vm["metrics"].as(), 10000, vm["verbose"].as()); + + return 0; +} diff --git a/utils/graphite-rollup/metrics.txt b/utils/graphite-rollup/metrics.txt new file mode 100644 index 00000000000..199c3791310 --- /dev/null +++ b/utils/graphite-rollup/metrics.txt @@ -0,0 +1,11 @@ +test.sum +sum?env=test&tag=Fake3 +test.max +max?env=test&tag=Fake4 +test.min +min?env=test&tag=Fake5 +fake5?env=test&tag=Fake5 +test.p95 +p95?env=test&tag=FakeNo +default +default?env=test&tag=FakeNo diff --git a/utils/graphite-rollup/rollup-tag-list.xml b/utils/graphite-rollup/rollup-tag-list.xml new file mode 100644 index 00000000000..ef28f2089ad --- /dev/null +++ b/utils/graphite-rollup/rollup-tag-list.xml @@ -0,0 +1,167 @@ + + + + plain + \.sum$ + sum + + 0 + 60 + + + 86400 + 3600 + + + + tagged + ^((.*)|.)sum\? + sum + + 0 + 60 + + + 86400 + 3600 + + + + plain + \.max$ + max + + 0 + 60 + + + 86400 + 3600 + + + + tagged + ^((.*)|.)max\? + max + + 0 + 60 + + + 86400 + 3600 + + + + plain + \.min$ + min + + 0 + 60 + + + 86400 + 3600 + + + + tagged + ^((.*)|.)min\? + min + + 0 + 60 + + + 86400 + 3600 + + + + plain + \.fake1\..*\.Fake1\. + sum + + + tag_list + fake1;tag=Fake1 + sum + + + plain + \.fake2\..*\.Fake2\. + sum + + + tag_list + fake2;tag=Fake2 + sum + + + plain + \.fake3\..*\.Fake3\. + sum + + + tag_list + fake3;tag=Fake3 + sum + + + plain + \.fake4\..*\.Fake4\. + sum + + + tag_list + fake4;tag=Fake4 + sum + + + plain + \.fake5\..*\.Fake5\. + sum + + + tag_list + fake5;tag=Fake5 + sum + + + plain + \.fake6\..*\.Fake6\. + sum + + + tag_list + fake6;tag=Fake6 + sum + + + plain + \.fake7\..*\.Fake7\. + sum + + + tag_list + fake7;tag=Fake7 + sum + + + avg + + 0 + 60 + + + 3600 + 300 + + + 86400 + 3600 + + + + diff --git a/utils/graphite-rollup/rollup-typed.xml b/utils/graphite-rollup/rollup-typed.xml new file mode 100644 index 00000000000..0b27d43ece9 --- /dev/null +++ b/utils/graphite-rollup/rollup-typed.xml @@ -0,0 +1,167 @@ + + + + plain + \.sum$ + sum + + 0 + 60 + + + 86400 + 3600 + + + + tagged + ^((.*)|.)sum\? + sum + + 0 + 60 + + + 86400 + 3600 + + + + plain + \.max$ + max + + 0 + 60 + + + 86400 + 3600 + + + + tagged + ^((.*)|.)max\? + max + + 0 + 60 + + + 86400 + 3600 + + + + plain + \.min$ + min + + 0 + 60 + + + 86400 + 3600 + + + + tagged + ^((.*)|.)min\? + min + + 0 + 60 + + + 86400 + 3600 + + + + plain + \.fake1\..*\.Fake1\. + sum + + + tagged + + sum + + + plain + \.fake2\..*\.Fake2\. + sum + + + tagged + + sum + + + plain + \.fake3\..*\.Fake3\. + sum + + + tagged + + sum + + + plain + \.fake4\..*\.Fake4\. + sum + + + tagged + + sum + + + plain + \.fake5\..*\.Fake5\. + sum + + + tagged + + sum + + + plain + \.fake6\..*\.Fake6\. + sum + + + tagged + + sum + + + plain + \.fake7\..*\.Fake7\. + sum + + + tagged + + sum + + + avg + + 0 + 60 + + + 3600 + 300 + + + 86400 + 3600 + + + + diff --git a/utils/graphite-rollup/rollup.xml b/utils/graphite-rollup/rollup.xml new file mode 100644 index 00000000000..641b0130509 --- /dev/null +++ b/utils/graphite-rollup/rollup.xml @@ -0,0 +1,147 @@ + + + + \.sum$ + sum + + 0 + 60 + + + 86400 + 3600 + + + + ^((.*)|.)sum\? + sum + + 0 + 60 + + + 86400 + 3600 + + + + \.max$ + max + + 0 + 60 + + + 86400 + 3600 + + + + ^((.*)|.)max\? + max + + 0 + 60 + + + 86400 + 3600 + + + + \.min$ + min + + 0 + 60 + + + 86400 + 3600 + + + + ^((.*)|.)min\? + min + + 0 + 60 + + + 86400 + 3600 + + + + \.fake1\..*\.Fake1\. + sum + + + + sum + + + \.fake2\..*\.Fake2\. + sum + + + + sum + + + \.fake3\..*\.Fake3\. + sum + + + + sum + + + \.fake4\..*\.Fake4\. + sum + + + + sum + + + \.fake5\..*\.Fake5\. + sum + + + + sum + + + \.fake6\..*\.Fake6\. + sum + + + + sum + + + \.fake7\..*\.Fake7\. + sum + + + + sum + + + avg + + 0 + 60 + + + 3600 + 300 + + + 86400 + 3600 + + + + From c92cfc38d44f73a77821e37b71f8d040d7f1b914 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Mon, 6 Dec 2021 19:47:30 +0300 Subject: [PATCH 136/262] Fix possible crash in DataTypeAggregateFunction (#32287) --- src/DataTypes/DataTypeAggregateFunction.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/DataTypes/DataTypeAggregateFunction.cpp b/src/DataTypes/DataTypeAggregateFunction.cpp index d572da1ecd0..c65a30b80ac 100644 --- a/src/DataTypes/DataTypeAggregateFunction.cpp +++ b/src/DataTypes/DataTypeAggregateFunction.cpp @@ -139,17 +139,20 @@ static DataTypePtr create(const ASTPtr & arguments) if (!arguments || arguments->children.empty()) throw Exception("Data type AggregateFunction requires parameters: " - "name of aggregate function and list of data types for arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + "version(optionally), name of aggregate function and list of data types for arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); ASTPtr data_type_ast = arguments->children[0]; size_t argument_types_start_idx = 1; /* If aggregate function definition doesn't have version, it will have in AST children args [ASTFunction, types...] - in case * it is parametric, or [ASTIdentifier, types...] - otherwise. If aggregate function has version in AST, then it will be: - * [ASTLitearl, ASTFunction (or ASTIdentifier), types...]. + * [ASTLiteral, ASTFunction (or ASTIdentifier), types...]. */ if (auto * version_ast = arguments->children[0]->as()) { + if (arguments->children.size() < 2) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Data type AggregateFunction has version, but it requires at least one more parameter - name of aggregate function"); version = version_ast->value.safeGet(); data_type_ast = arguments->children[1]; argument_types_start_idx = 2; From fefecde2c471b29c0bcaeb548df03b4156e0123c Mon Sep 17 00:00:00 2001 From: Tatiana Kirillova Date: Mon, 6 Dec 2021 20:36:42 +0300 Subject: [PATCH 137/262] translate --- .../aggregate-functions/reference/sparkbar.md | 2 +- .../aggregate-functions/reference/sparkbar.md | 63 +++++++++++++++++++ 2 files changed, 64 insertions(+), 1 deletion(-) create mode 100644 docs/ru/sql-reference/aggregate-functions/reference/sparkbar.md diff --git a/docs/en/sql-reference/aggregate-functions/reference/sparkbar.md b/docs/en/sql-reference/aggregate-functions/reference/sparkbar.md index adbe1d551ca..4c19d16ec60 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/sparkbar.md +++ b/docs/en/sql-reference/aggregate-functions/reference/sparkbar.md @@ -12,7 +12,7 @@ If no interval is specified, then the minimum `x` is used as the interval start, **Syntax** ``` sql -sparkbar(width, min_x, max_x)(x, y) +sparkbar(width[, min_x, max_x])(x, y) ``` **Parameters** diff --git a/docs/ru/sql-reference/aggregate-functions/reference/sparkbar.md b/docs/ru/sql-reference/aggregate-functions/reference/sparkbar.md new file mode 100644 index 00000000000..e58d78bc0e5 --- /dev/null +++ b/docs/ru/sql-reference/aggregate-functions/reference/sparkbar.md @@ -0,0 +1,63 @@ +--- +toc_priority: 311 +toc_title: sparkbar +--- + +# sparkbar {#sparkbar} + +Функция строит гистограмму частот по заданным значениям `x` и частоте повторения этих значений `y` на интервале `[min_x, max_x]`. + +Если интервал для постоения не указан, то в качестве начального значения будет использовано минимальное `x`, а в качестве конечного — максимальное `x`. + +**Синтаксис** + +``` sql +sparkbar(width[, min_x, max_x])(x, y) +``` + +**Параметры** + +- `width` — Количество отрезков. Тип: [Integer](../../../sql-reference/data-types/int-uint.md). +- `min_x` — Начало интервала. Необязательный параметр. +- `max_x` — Конец интервала. Необязательный параметр. + +**Аргументы** + +- `x` — Поле со значениями. +- `y` — Поле с частотой значений. + +**Возвращаемые значения** + +- Гистограмма частот. + +**Пример** + +Запрос: + +``` sql +CREATE TABLE spark_bar_data (`cnt` UInt64,`event_date` Date) ENGINE = MergeTree ORDER BY event_date SETTINGS index_granularity = 8192; + +INSERT INTO spark_bar_data VALUES(1,'2020-01-01'),(4,'2020-01-02'),(5,'2020-01-03'),(2,'2020-01-04'),(3,'2020-01-05'),(7,'2020-01-06'),(6,'2020-01-07'),(8,'2020-01-08'),(2,'2020-01-11'); + +SELECT sparkbar(9)(event_date,cnt) FROM spark_bar_data; + +SELECT sparkbar(9,toDate('2020-01-01'),toDate('2020-01-10'))(event_date,cnt) FROM spark_bar_data; +``` + +Результат: + +``` text + +┌─sparkbar(9)(event_date, cnt)─┐ +│ │ +│ ▁▅▄▃██▅ ▁ │ +│ │ +└──────────────────────────────┘ + +┌─sparkbar(9, toDate('2020-01-01'), toDate('2020-01-10'))(event_date, cnt)─┐ +│ │ +│▁▄▄▂▅▇█▁ │ +│ │ +└──────────────────────────────────────────────────────────────────────────┘ +``` + From fb24e7181f982a897b14a57b1822f4b53096d152 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Mon, 6 Dec 2021 18:27:06 +0000 Subject: [PATCH 138/262] Better --- programs/local/LocalServer.cpp | 64 +++++++++++++++++++++++---------- programs/main.cpp | 8 ++--- src/Functions/getFuzzerData.cpp | 46 ++---------------------- src/Functions/getFuzzerData.h | 48 +++++++++++++++++++++++++ 4 files changed, 98 insertions(+), 68 deletions(-) create mode 100644 src/Functions/getFuzzerData.h diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 17541b19b8a..a1c35cec97c 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -41,6 +41,10 @@ #include #include +#if defined(FUZZING_MODE) + #include +#endif + namespace fs = std::filesystem; @@ -404,18 +408,28 @@ try ThreadStatus thread_status; setupSignalHandler(); -#ifdef FUZZING_MODE - static bool first_time = true; - if (first_time) - { -#endif std::cout << std::fixed << std::setprecision(3); std::cerr << std::fixed << std::setprecision(3); +#if defined(FUZZING_MODE) + static bool first_time = true; + if (first_time) + { + + if (queries_files.empty() && !config().has("query")) + { + std::cerr << "\033[31m" << "ClickHouse compiled in fuzzing mode." << "\033[0m" << std::endl; + std::cerr << "\033[31m" << "You have to provide a query with --query or --queries-file option." << "\033[0m" << std::endl; + std::cerr << "\033[31m" << "The query have to use function getFuzzerData() inside." << "\033[0m" << std::endl; + exit(1); + } + + is_interactive = false; +#else is_interactive = stdin_is_a_tty && (config().hasOption("interactive") || (!config().has("query") && !config().has("table-structure") && queries_files.empty())); - +#endif if (!is_interactive) { /// We will terminate process on error @@ -446,6 +460,7 @@ try #ifdef FUZZING_MODE first_time = false; + } #endif if (is_interactive && !delayed_interactive) @@ -664,7 +679,7 @@ void LocalServer::processConfig() } -static std::string getHelpHeader() +[[ maybe_unused ]] static std::string getHelpHeader() { return "usage: clickhouse-local [initial table definition] [--query ]\n" @@ -680,7 +695,7 @@ static std::string getHelpHeader() } -static std::string getHelpFooter() +[[ maybe_unused ]] static std::string getHelpFooter() { return "Example printing memory used by each Unix user:\n" @@ -691,11 +706,21 @@ static std::string getHelpFooter() } -void LocalServer::printHelpMessage(const OptionsDescription & options_description) +void LocalServer::printHelpMessage([[maybe_unused]] const OptionsDescription & options_description) { +#if defined(FUZZING_MODE) + std::cout << + "usage: clickhouse --query [--query-file ]\n" + + "ClickHouse is build with coverage guided fuzzer (libfuzzer) inside it.\n" + "You have to provide a query which contains getFuzzerData function.\n" + "This will take the data from fuzzing engine, pass it to getFuzzerData function and execute a query.\n" + "Each time the data will be different, and it will last until some segfault or sanitizer assertion is found. \n"; +#else std::cout << getHelpHeader() << "\n"; std::cout << options_description.main_description.value() << "\n"; std::cout << getHelpFooter() << "\n"; +#endif } @@ -793,8 +818,11 @@ int mainEntryClickHouseLocal(int argc, char ** argv) } } -#ifdef FUZZING_MODE -#include +#if defined(FUZZING_MODE) + +// #include + +// #endif std::optional fuzz_app; @@ -825,6 +853,9 @@ extern "C" int LLVMFuzzerInitialize(int * pargc, char *** pargv) return 0; } + + + extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size) { try @@ -833,19 +864,14 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size) if (size) --size; auto cur_str = String(reinterpret_cast(data), size); - // to clearly see the beginning and the end - std::cerr << '>' << cur_str << '<' << std::endl; + DB::FunctionGetFuzzerData::update(cur_str); fuzz_app->run(); + return 0; } catch (...) { - std::cerr << "Why here?!?!?!?!?!?!?!?!?!?!?!?!?!?!?!?!?!?!?!?!" << std::endl; - std::cerr << DB::getCurrentExceptionMessage(true) << std::endl; - return 0; - //auto code = DB::getCurrentExceptionCode(); - //return code ? code : 1; + return 1; } - return 0; } #endif diff --git a/programs/main.cpp b/programs/main.cpp index b4b229d123b..cd416f57982 100644 --- a/programs/main.cpp +++ b/programs/main.cpp @@ -88,9 +88,10 @@ namespace using MainFunc = int (*)(int, char**); +#if !defined(FUZZING_MODE) /// Add an item here to register new application -[[maybe_unused]]std::pair clickhouse_applications[] = +std::pair clickhouse_applications[] = { #if ENABLE_CLICKHOUSE_LOCAL {"local", mainEntryClickHouseLocal}, @@ -141,7 +142,6 @@ using MainFunc = int (*)(int, char**); {"hash-binary", mainEntryClickHouseHashBinary}, }; -#ifndef FUZZING_MODE int printHelp(int, char **) { std::cerr << "Use one of the following commands:" << std::endl; @@ -149,9 +149,7 @@ int printHelp(int, char **) std::cerr << "clickhouse " << application.first << " [args] " << std::endl; return -1; } -#endif -#ifndef FUZZING_MODE bool isClickhouseApp(const std::string & app_suffix, std::vector & argv) { /// Use app if the first arg 'app' is passed (the arg should be quietly removed) @@ -350,7 +348,7 @@ bool inside_main = false; bool inside_main = true; #endif -#ifndef FUZZING_MODE +#if !defined(FUZZING_MODE) int main(int argc_, char ** argv_) { inside_main = true; diff --git a/src/Functions/getFuzzerData.cpp b/src/Functions/getFuzzerData.cpp index c01f575f0be..f516c871950 100644 --- a/src/Functions/getFuzzerData.cpp +++ b/src/Functions/getFuzzerData.cpp @@ -1,53 +1,11 @@ -#include -#include -#include -#include +#include namespace DB { -class FunctionGetFuzzerData : public IFunction -{ - inline static String fuzz_data; - -public: - static constexpr auto name = "getFuzzerData"; - - inline static FunctionPtr create(ContextPtr) { return create(); } - - static FunctionPtr create() - { - return std::make_shared(); - } - - inline String getName() const override { return name; } - - inline size_t getNumberOfArguments() const override { return 0; } - - DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override - { - return std::make_shared(); - } - - inline bool isDeterministic() const override { return false; } - - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName &, - const DataTypePtr &, - size_t input_rows_count) const override - { - return DataTypeString().createColumnConst(input_rows_count, fuzz_data); - } - - static void update(const String & fuzz_data_) - { - fuzz_data = fuzz_data_; - } -}; void registerFunctionGetFuzzerData(FunctionFactory & factory) { factory.registerFunction(); - factory.registerAlias("get_fuzzer_data", FunctionGetFuzzerData::name, FunctionFactory::CaseInsensitive); } + } diff --git a/src/Functions/getFuzzerData.h b/src/Functions/getFuzzerData.h new file mode 100644 index 00000000000..06f11f28e70 --- /dev/null +++ b/src/Functions/getFuzzerData.h @@ -0,0 +1,48 @@ +#include +#include +#include +#include + +namespace DB +{ +class FunctionGetFuzzerData : public IFunction +{ + inline static String fuzz_data; + +public: + static constexpr auto name = "getFuzzerData"; + + inline static FunctionPtr create(ContextPtr) { return create(); } + + static FunctionPtr create() + { + return std::make_shared(); + } + + inline String getName() const override { return name; } + + inline size_t getNumberOfArguments() const override { return 0; } + + DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override + { + return std::make_shared(); + } + + inline bool isDeterministic() const override { return false; } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName &, + const DataTypePtr &, + size_t input_rows_count) const override + { + return DataTypeString().createColumnConst(input_rows_count, fuzz_data); + } + + static void update(const String & fuzz_data_) + { + fuzz_data = fuzz_data_; + } +}; + +} From 3a5ba8fe44b3696eee561503416233c75b594f7d Mon Sep 17 00:00:00 2001 From: Tatiana Kirillova Date: Mon, 6 Dec 2021 21:48:09 +0300 Subject: [PATCH 139/262] Update docs/ru/sql-reference/aggregate-functions/reference/sparkbar.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- .../ru/sql-reference/aggregate-functions/reference/sparkbar.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/aggregate-functions/reference/sparkbar.md b/docs/ru/sql-reference/aggregate-functions/reference/sparkbar.md index e58d78bc0e5..28da1390522 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/sparkbar.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/sparkbar.md @@ -7,7 +7,8 @@ toc_title: sparkbar Функция строит гистограмму частот по заданным значениям `x` и частоте повторения этих значений `y` на интервале `[min_x, max_x]`. -Если интервал для постоения не указан, то в качестве начального значения будет использовано минимальное `x`, а в качестве конечного — максимальное `x`. +Если интервал для построения не указан, то в качестве нижней границы интервала будет взято минимальное значение `x`, а в качестве верхней границы — максимальное значение `x`. + **Синтаксис** From 0d89a9b3a20430645bf15643f2c58223ceb53c3c Mon Sep 17 00:00:00 2001 From: Tatiana Kirillova Date: Mon, 6 Dec 2021 21:48:24 +0300 Subject: [PATCH 140/262] Update docs/ru/sql-reference/aggregate-functions/reference/sparkbar.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- .../ru/sql-reference/aggregate-functions/reference/sparkbar.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/aggregate-functions/reference/sparkbar.md b/docs/ru/sql-reference/aggregate-functions/reference/sparkbar.md index 28da1390522..05cf4d67b75 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/sparkbar.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/sparkbar.md @@ -25,7 +25,8 @@ sparkbar(width[, min_x, max_x])(x, y) **Аргументы** - `x` — Поле со значениями. -- `y` — Поле с частотой значений. +- `y` — Поле с частотой повторения значений. + **Возвращаемые значения** From 3a38520bc74cb71fcdb5466803c78107dfc8457f Mon Sep 17 00:00:00 2001 From: Tatiana Kirillova Date: Mon, 6 Dec 2021 21:48:31 +0300 Subject: [PATCH 141/262] Update docs/ru/sql-reference/aggregate-functions/reference/sparkbar.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- .../ru/sql-reference/aggregate-functions/reference/sparkbar.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/aggregate-functions/reference/sparkbar.md b/docs/ru/sql-reference/aggregate-functions/reference/sparkbar.md index 05cf4d67b75..b66d710744e 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/sparkbar.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/sparkbar.md @@ -18,7 +18,8 @@ sparkbar(width[, min_x, max_x])(x, y) **Параметры** -- `width` — Количество отрезков. Тип: [Integer](../../../sql-reference/data-types/int-uint.md). +- `width` — Количество столбцов гистограммы. Тип: [Integer](../../../sql-reference/data-types/int-uint.md). + - `min_x` — Начало интервала. Необязательный параметр. - `max_x` — Конец интервала. Необязательный параметр. From 6854bb46f59f472e561d66a23c1eeee74e91c440 Mon Sep 17 00:00:00 2001 From: Tatiana Kirillova Date: Mon, 6 Dec 2021 21:48:53 +0300 Subject: [PATCH 142/262] Update docs/en/sql-reference/aggregate-functions/reference/sparkbar.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- .../en/sql-reference/aggregate-functions/reference/sparkbar.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/en/sql-reference/aggregate-functions/reference/sparkbar.md b/docs/en/sql-reference/aggregate-functions/reference/sparkbar.md index 4c19d16ec60..47c696129c7 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/sparkbar.md +++ b/docs/en/sql-reference/aggregate-functions/reference/sparkbar.md @@ -5,7 +5,8 @@ toc_title: sparkbar # sparkbar {#sparkbar} -The function plots a frequency histogram for values `x` and the repetition rate of these `y` values over the interval `[min_x, max_x]`. +The function plots a frequency histogram for values `x` and the repetition rate `y` of these values over the interval `[min_x, max_x]`. + If no interval is specified, then the minimum `x` is used as the interval start, and the maximum `x` — as the interval end. From f07fc08a493250ff25bcc9aa5e9996833e363611 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 6 Dec 2021 22:08:32 +0300 Subject: [PATCH 143/262] Better fix. --- src/Interpreters/ActionsDAG.cpp | 25 ++++++++++++++-- src/Interpreters/ActionsDAG.h | 4 +-- src/Interpreters/ExpressionAnalyzer.cpp | 16 ++++++++++ src/Processors/Transforms/WindowTransform.cpp | 29 +------------------ .../02126_lc_window_functions.reference | 10 +++++++ .../0_stateless/02126_lc_window_functions.sql | 3 ++ 6 files changed, 54 insertions(+), 33 deletions(-) diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index d0b360dda82..ec04177e4db 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -983,19 +983,38 @@ void ActionsDAG::assertDeterministic() const "Expression must be deterministic but it contains non-deterministic part `{}`", node.result_name); } -void ActionsDAG::addMaterializingOutputActions() +void ActionsDAG::addMaterializingOutputActions(bool remove_low_cardinality) { for (auto & node : index) - node = &materializeNode(*node); + node = &materializeNode(*node, remove_low_cardinality); } -const ActionsDAG::Node & ActionsDAG::materializeNode(const Node & node) +const ActionsDAG::Node & ActionsDAG::materializeNode(const Node & node, bool remove_low_cardinality) { FunctionOverloadResolverPtr func_builder_materialize = std::make_unique( std::make_shared()); const auto & name = node.result_name; const auto * func = &addFunction(func_builder_materialize, {&node}, {}); + if (remove_low_cardinality) + { + auto res_type = recursiveRemoveLowCardinality(func->result_type); + if (res_type.get() != func->result_type.get()) + { + ColumnWithTypeAndName column; + column.name = res_type->getName(); + column.column = DataTypeString().createColumnConst(0, column.name); + column.type = std::make_shared(); + + const auto * right_arg = &addColumn(std::move(column)); + const auto * left_arg = func; + + FunctionOverloadResolverPtr func_builder_cast = CastInternalOverloadResolver::createImpl(); + + NodeRawConstPtrs children = { left_arg, right_arg }; + func = &addFunction(func_builder_cast, std::move(children), {}); + } + } return addAlias(*func, name); } diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h index 9a5ad01a252..97193bececd 100644 --- a/src/Interpreters/ActionsDAG.h +++ b/src/Interpreters/ActionsDAG.h @@ -192,10 +192,10 @@ public: /// For apply materialize() function for every output. /// Also add aliases so the result names remain unchanged. - void addMaterializingOutputActions(); + void addMaterializingOutputActions(bool remove_low_cardinality = false); /// Apply materialize() function to node. Result node has the same name. - const Node & materializeNode(const Node & node); + const Node & materializeNode(const Node & node, bool remove_low_cardinality = false); enum class MatchColumnsMode { diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 9b343bec055..4ebe1691f27 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -1697,6 +1697,22 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( query_analyzer.appendWindowFunctionsArguments(chain, only_types || !first_stage); + // If we have a (logically) constant column, some Chunks will have a + // Const column for it, and some -- materialized. Such difference is + // generated by e.g. MergingSortedAlgorithm, which mostly materializes + // the constant ORDER BY columns, but in some obscure cases passes them + // through, unmaterialized. This mix is a pain to work with in Window + // Transform, because we have to compare columns across blocks, when e.g. + // searching for peer group boundaries, and each of the four combinations + // of const and materialized requires different code. + // Another problem with Const columns is that the aggregate functions + // can't work with them, so we have to materialize them like the + // Aggregator does. + // Likewise, aggregate functions can't work with LowCardinality, + // so we have to materialize them too. + // Just materialize everything. + chain.getLastActions()->addMaterializingOutputActions(true); + // Build a list of output columns of the window step. // 1) We need the columns that are the output of ExpressionActions. for (const auto & x : chain.getLastActions()->getNamesAndTypesList()) diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index 8cbe1c96e44..8abf3980777 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -198,15 +198,6 @@ WindowTransform::WindowTransform(const Block & input_header_, , input_header(input_header_) , window_description(window_description_) { - // Materialize all columns in header, because we materialize all columns - // in chunks and it's convenient if they match. - auto input_columns = input_header.getColumns(); - for (auto & column : input_columns) - { - column = recursiveRemoveLowCardinality(std::move(column)->convertToFullColumnIfConst()); - } - input_header.setColumns(std::move(input_columns)); - // Initialize window function workspaces. workspaces.reserve(functions.size()); for (const auto & f : functions) @@ -1045,25 +1036,7 @@ void WindowTransform::appendChunk(Chunk & chunk) // happens, because even in the case of `count() over ()` we have a dummy // input column. block.rows = chunk.getNumRows(); - - // If we have a (logically) constant column, some Chunks will have a - // Const column for it, and some -- materialized. Such difference is - // generated by e.g. MergingSortedAlgorithm, which mostly materializes - // the constant ORDER BY columns, but in some obscure cases passes them - // through, unmaterialized. This mix is a pain to work with in Window - // Transform, because we have to compare columns across blocks, when e.g. - // searching for peer group boundaries, and each of the four combinations - // of const and materialized requires different code. - // Another problem with Const columns is that the aggregate functions - // can't work with them, so we have to materialize them like the - // Aggregator does. - // Likewise, aggregate functions can't work with LowCardinality, - // so we have to materialize them too. - // Just materialize everything. - auto columns = chunk.detachColumns(); - for (auto & column : columns) - column = recursiveRemoveLowCardinality(std::move(column)->convertToFullColumnIfConst()); - block.input_columns = std::move(columns); + block.input_columns = chunk.detachColumns(); // Initialize output columns. for (auto & ws : workspaces) diff --git a/tests/queries/0_stateless/02126_lc_window_functions.reference b/tests/queries/0_stateless/02126_lc_window_functions.reference index 75378377541..f2c4b32cb48 100644 --- a/tests/queries/0_stateless/02126_lc_window_functions.reference +++ b/tests/queries/0_stateless/02126_lc_window_functions.reference @@ -8,3 +8,13 @@ 2 2 2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 diff --git a/tests/queries/0_stateless/02126_lc_window_functions.sql b/tests/queries/0_stateless/02126_lc_window_functions.sql index b76d921406b..7baf0e8eb3e 100644 --- a/tests/queries/0_stateless/02126_lc_window_functions.sql +++ b/tests/queries/0_stateless/02126_lc_window_functions.sql @@ -20,3 +20,6 @@ FROM SELECT CAST(CAST(number % 5, 'Enum8(\'Red\' = 0, \'Blue\' = 1, \'Yellow\' = 2, \'Black\' = 3, \'White\' = 4)'), 'LowCardinality(String)') AS sym FROM numbers(10) ); + + +select * from (SELECT countIf(sym = 'Red') OVER (Range BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS res FROM (SELECT max(255) OVER (Rows BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), CAST(CAST(number % 5, 'Enum8(\'Red\' = 0, \'Blue\' = 1, \'Yellow\' = 2, \'Black\' = 3, \'White\' = 4)'), 'LowCardinality(String)') AS sym FROM numbers(1048576))) limit 10; From eab6f0ba492f1922d4943a6bc0ff9fca7eb8dd02 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Mon, 6 Dec 2021 23:35:29 +0300 Subject: [PATCH 144/262] Update FormatFactory.cpp --- src/Formats/FormatFactory.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 4539a0d6e6a..7788b9d115b 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -312,7 +312,7 @@ String FormatFactory::getContentType( throw Exception(ErrorCodes::FORMAT_IS_NOT_SUITABLE_FOR_OUTPUT, "Format {} is not suitable for output (with processors)", name); auto format_settings = _format_settings ? *_format_settings : getFormatSettings(context); - + Block empty_block; RowOutputFormatParams empty_params; WriteBufferFromOwnString empty_buffer; From 80a146816ca837b892002775854fcd9fac1353c7 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Mon, 6 Dec 2021 21:34:52 +0000 Subject: [PATCH 145/262] More comments and style --- CMakeLists.txt | 4 +++ docker/packager/other/fuzzer.sh | 4 --- programs/local/CMakeLists.txt | 10 ------- programs/local/LocalServer.cpp | 50 ++++++++++++++------------------- 4 files changed, 25 insertions(+), 43 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7a3991bc93c..bc0f119e3f6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -149,6 +149,10 @@ if (ENABLE_FUZZING) set (ENABLE_JEMALLOC 0) set (ENABLE_CHECK_HEAVY_BUILDS 1) set (GLIBC_COMPATIBILITY OFF) + + # For codegen_select_fuzzer + set (ENABLE_PROTOBUF 1) + set (USE_INTERNAL_PROTOBUF_LIBRARY 1) endif() # Global libraries diff --git a/docker/packager/other/fuzzer.sh b/docker/packager/other/fuzzer.sh index 4c208ca7e31..431352f1126 100755 --- a/docker/packager/other/fuzzer.sh +++ b/docker/packager/other/fuzzer.sh @@ -31,10 +31,6 @@ do mv "$FUZZER_PATH" /output/fuzzers done -ninja clickhouse-local -LOCAL_PATH=$(find ./programs -name clickhouse) -strip --strip-unneeded "$LOCAL_PATH" -mv "$LOCAL_PATH" /output/fuzzers tar -zcvf /output/fuzzers.tar.gz /output/fuzzers rm -rf /output/fuzzers diff --git a/programs/local/CMakeLists.txt b/programs/local/CMakeLists.txt index 4ac8ad5d30d..da466f725b3 100644 --- a/programs/local/CMakeLists.txt +++ b/programs/local/CMakeLists.txt @@ -22,14 +22,4 @@ if (ENABLE_FUZZING) add_compile_definitions(FUZZING_MODE=1) set (WITH_COVERAGE ON) target_link_libraries(clickhouse-local-lib PRIVATE ${LIB_FUZZING_ENGINE}) - #add_executable(fuzz-clickhouse-local LocalServer.cpp ${SRCS}) - #[[target_link_libraries(fuzz-clickhouse-local PRIVATE - dbms - ${LIB_FUZZING_ENGINE} - loggers - clickhouse_functions - clickhouse_aggregate_functions - clickhouse_storages_system - clickhouse_table_functions - readpassphrase)]] endif () diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index a1c35cec97c..561b0588787 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -710,7 +710,9 @@ void LocalServer::printHelpMessage([[maybe_unused]] const OptionsDescription & o { #if defined(FUZZING_MODE) std::cout << - "usage: clickhouse --query [--query-file ]\n" + "usage: clickhouse -- \n" + "Note: It is important not to use only one letter keys with single dash for \n" + "for clickhouse-local arguments. It may work incorrectly.\n" "ClickHouse is build with coverage guided fuzzer (libfuzzer) inside it.\n" "You have to provide a query which contains getFuzzerData function.\n" @@ -820,10 +822,6 @@ int mainEntryClickHouseLocal(int argc, char ** argv) #if defined(FUZZING_MODE) -// #include - -// #endif - std::optional fuzz_app; extern "C" int LLVMFuzzerInitialize(int * pargc, char *** pargv) @@ -831,8 +829,11 @@ extern "C" int LLVMFuzzerInitialize(int * pargc, char *** pargv) int & argc = *pargc; char ** argv = *pargv; - // position of delimiter "--" that separates arguments - // of clickhouse-local and fuzzer + /// As a user you can add flags to clickhouse binary in fuzzing mode as follows + /// clickhouse -- + + /// Calculate the position of delimiter "--" that separates arguments + /// of clickhouse-local and libfuzzer int pos_delim = argc; for (int i = 0; i < argc; ++i) { @@ -843,35 +844,26 @@ extern "C" int LLVMFuzzerInitialize(int * pargc, char *** pargv) } } + /// Initialize clickhouse-local app fuzz_app.emplace(); fuzz_app->init(pos_delim, argv); - for (int i = pos_delim + 1; i < argc; ++i) - std::swap(argv[i], argv[i - pos_delim]); - argc -= pos_delim; - if (argc == 0) // no delimiter provided - ++argc; + + /// We will leave clickhouse-local specific arguments as is, because libfuzzer will ignore + /// all keys starting with -- return 0; } - - extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size) +try { - try - { - // inappropriate symbol for fuzzing at the end - if (size) - --size; - auto cur_str = String(reinterpret_cast(data), size); - - DB::FunctionGetFuzzerData::update(cur_str); - fuzz_app->run(); - return 0; - } - catch (...) - { - return 1; - } + auto input = String(reinterpret_cast(data), size); + DB::FunctionGetFuzzerData::update(input); + fuzz_app->run(); + return 0; +} +catch (...) +{ + return 1; } #endif From 0a5df82c2f92d94e01009320652da3ec892460f8 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Mon, 6 Dec 2021 21:40:44 +0000 Subject: [PATCH 146/262] Bump From dc5707bdb5b2479712b95ca6a9804fa04c0ee2e5 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Tue, 7 Dec 2021 10:47:17 +0300 Subject: [PATCH 147/262] Update backport.py --- utils/github/backport.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/github/backport.py b/utils/github/backport.py index a28a1510694..9227dbf4108 100644 --- a/utils/github/backport.py +++ b/utils/github/backport.py @@ -74,7 +74,7 @@ class Backport: # First pass. Find all must-backports for label in pr['labels']['nodes']: - if label['name'] == 'pr-bugfix' or label['name'] == 'pr-must-backport': + if label['name'] == 'pr-must-backport': backport_map[pr['number']] = branch_set.copy() continue matched = RE_MUST_BACKPORT.match(label['name']) From 9df664e1c99a8fbe021c7545d8af63d52427e581 Mon Sep 17 00:00:00 2001 From: dongyifeng Date: Tue, 7 Dec 2021 16:09:39 +0800 Subject: [PATCH 148/262] fix bug when remove unneeded columns in subquery (#32289) --- src/Interpreters/TreeRewriter.cpp | 6 +++++- .../0_stateless/02131_remove_columns_in_subquery.reference | 1 + .../0_stateless/02131_remove_columns_in_subquery.sql | 1 + 3 files changed, 7 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02131_remove_columns_in_subquery.reference create mode 100644 tests/queries/0_stateless/02131_remove_columns_in_subquery.sql diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index d864bb54b2e..6b3a50d88e2 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -465,9 +465,13 @@ void removeUnneededColumnsFromSelectClause(const ASTSelectQuery * select_query, ASTFunction * func = elem->as(); /// Never remove untuple. It's result column may be in required columns. - /// It is not easy to analyze untuple here, because types were not calculated yes. + /// It is not easy to analyze untuple here, because types were not calculated yet. if (func && func->name == "untuple") new_elements.push_back(elem); + + /// removing aggregation can change number of rows, so `count()` result in outer sub-query would be wrong + if (func && AggregateFunctionFactory::instance().isAggregateFunctionName(func->name) && !select_query->groupBy()) + new_elements.push_back(elem); } } diff --git a/tests/queries/0_stateless/02131_remove_columns_in_subquery.reference b/tests/queries/0_stateless/02131_remove_columns_in_subquery.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/02131_remove_columns_in_subquery.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02131_remove_columns_in_subquery.sql b/tests/queries/0_stateless/02131_remove_columns_in_subquery.sql new file mode 100644 index 00000000000..f9ca2269aad --- /dev/null +++ b/tests/queries/0_stateless/02131_remove_columns_in_subquery.sql @@ -0,0 +1 @@ +select count(1) from (SELECT 1 AS a, count(1) FROM numbers(5)) From c4c517bb8a5ed2cff70c18dae437e785de483d66 Mon Sep 17 00:00:00 2001 From: vxider Date: Tue, 7 Dec 2021 08:14:00 +0000 Subject: [PATCH 149/262] rename window functions --- src/Functions/FunctionsWindow.cpp | 14 ++-- src/Functions/FunctionsWindow.h | 20 +++--- src/Storages/WindowView/StorageWindowView.cpp | 48 ++++++------- src/Storages/WindowView/StorageWindowView.h | 32 ++++----- ...7_window_view_parser_inner_table.reference | 20 +++--- .../01047_window_view_parser_inner_table.sql | 20 +++--- .../01048_window_view_parser.reference | 28 ++++---- .../0_stateless/01048_window_view_parser.sql | 28 ++++---- ...049_window_view_window_functions.reference | 68 +++++++++---------- .../01049_window_view_window_functions.sql | 68 +++++++++---------- .../01050_window_view_parser_tumble.sql | 14 ++-- .../01051_window_view_parser_hop.sql | 14 ++-- .../01052_window_view_proc_tumble_to_now.sql | 2 +- .../01053_window_view_proc_hop_to_now.sql | 2 +- .../01054_window_view_proc_tumble_to.sql | 2 +- .../01055_window_view_proc_hop_to.sql | 2 +- .../01056_window_view_proc_hop_watch.py | 2 +- ...window_view_event_tumble_to_strict_asc.sql | 2 +- ...58_window_view_event_hop_to_strict_asc.sql | 2 +- ..._window_view_event_hop_watch_strict_asc.py | 2 +- .../01060_window_view_event_tumble_to_asc.sql | 2 +- .../01061_window_view_event_hop_to_asc.sql | 2 +- .../01062_window_view_event_hop_watch_asc.py | 2 +- ...63_window_view_event_tumble_to_bounded.sql | 2 +- ...01064_window_view_event_hop_to_bounded.sql | 2 +- ...065_window_view_event_hop_watch_bounded.py | 2 +- ...ew_event_tumble_to_strict_asc_lateness.sql | 2 +- ...ndow_view_event_tumble_to_asc_lateness.sql | 2 +- ..._view_event_tumble_to_bounded_lateness.sql | 2 +- .../01069_window_view_proc_tumble_watch.py | 2 +- 30 files changed, 205 insertions(+), 205 deletions(-) diff --git a/src/Functions/FunctionsWindow.cpp b/src/Functions/FunctionsWindow.cpp index a26faac304d..be336aa9a7c 100644 --- a/src/Functions/FunctionsWindow.cpp +++ b/src/Functions/FunctionsWindow.cpp @@ -116,7 +116,7 @@ namespace template <> struct WindowImpl { - static constexpr auto name = "TUMBLE"; + static constexpr auto name = "tumble"; [[maybe_unused]] static DataTypePtr getReturnType(const ColumnsWithTypeAndName & arguments, const String & function_name) { @@ -213,7 +213,7 @@ struct WindowImpl template <> struct WindowImpl { - static constexpr auto name = "TUMBLE_START"; + static constexpr auto name = "tumbleStart"; static DataTypePtr getReturnType(const ColumnsWithTypeAndName & arguments, const String & function_name) { @@ -257,7 +257,7 @@ struct WindowImpl template <> struct WindowImpl { - static constexpr auto name = "TUMBLE_END"; + static constexpr auto name = "tumbleEnd"; [[maybe_unused]] static DataTypePtr getReturnType(const ColumnsWithTypeAndName & arguments, const String & function_name) { @@ -285,7 +285,7 @@ struct WindowImpl template <> struct WindowImpl { - static constexpr auto name = "HOP"; + static constexpr auto name = "hop"; [[maybe_unused]] static DataTypePtr getReturnType(const ColumnsWithTypeAndName & arguments, const String & function_name) { @@ -417,7 +417,7 @@ struct WindowImpl template <> struct WindowImpl { - static constexpr auto name = "WINDOW_ID"; + static constexpr auto name = "windowID"; [[maybe_unused]] static DataTypePtr getReturnType(const ColumnsWithTypeAndName & arguments, const String & function_name) { @@ -569,7 +569,7 @@ struct WindowImpl template <> struct WindowImpl { - static constexpr auto name = "HOP_START"; + static constexpr auto name = "hopStart"; static DataTypePtr getReturnType(const ColumnsWithTypeAndName & arguments, const String & function_name) { @@ -612,7 +612,7 @@ struct WindowImpl template <> struct WindowImpl { - static constexpr auto name = "HOP_END"; + static constexpr auto name = "hopEnd"; [[maybe_unused]] static DataTypePtr getReturnType(const ColumnsWithTypeAndName & arguments, const String & function_name) { diff --git a/src/Functions/FunctionsWindow.h b/src/Functions/FunctionsWindow.h index 37acb660751..be4513225cf 100644 --- a/src/Functions/FunctionsWindow.h +++ b/src/Functions/FunctionsWindow.h @@ -9,25 +9,25 @@ namespace DB /** Window functions: * - * TUMBLE(time_attr, interval [, timezone]) + * tumble(time_attr, interval [, timezone]) * - * TUMBLE_START(window_id) + * tumbleStart(window_id) * - * TUMBLE_START(time_attr, interval [, timezone]) + * tumbleStart(time_attr, interval [, timezone]) * - * TUMBLE_END(window_id) + * tumbleEnd(window_id) * - * TUMBLE_END(time_attr, interval [, timezone]) + * tumbleEnd(time_attr, interval [, timezone]) * - * HOP(time_attr, hop_interval, window_interval [, timezone]) + * hop(time_attr, hop_interval, window_interval [, timezone]) * - * HOP_START(window_id) + * hopStart(window_id) * - * HOP_START(time_attr, hop_interval, window_interval [, timezone]) + * hopStart(time_attr, hop_interval, window_interval [, timezone]) * - * HOP_END(window_id) + * hopEnd(window_id) * - * HOP_END(time_attr, hop_interval, window_interval [, timezone]) + * hopEnd(time_attr, hop_interval, window_interval [, timezone]) * */ enum WindowFunctionName diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 915e775ff14..51f2a37aa8f 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -62,7 +62,7 @@ namespace ErrorCodes namespace { - /// Fetch all window info and replace TUMPLE or HOP node names with WINDOW_ID + /// Fetch all window info and replace tumble or hop node names with windowID struct FetchQueryInfoMatcher { using Visitor = InDepthNodeVisitor; @@ -85,10 +85,10 @@ namespace { if (auto * t = ast->as()) { - if (t->name == "TUMBLE" || t->name == "HOP") + if (t->name == "tumble" || t->name == "hop") { - data.is_tumble = t->name == "TUMBLE"; - data.is_hop = t->name == "HOP"; + data.is_tumble = t->name == "tumble"; + data.is_hop = t->name == "hop"; auto temp_node = t->clone(); temp_node->setAlias(""); if (startsWith(t->arguments->children[0]->getColumnName(), "toDateTime")) @@ -98,7 +98,7 @@ namespace if (!data.window_function) { data.serialized_window_function = serializeAST(*temp_node); - t->name = "WINDOW_ID"; + t->name = "windowID"; data.window_id_name = t->getColumnName(); data.window_id_alias = t->alias; data.window_function = t->clone(); @@ -109,14 +109,14 @@ namespace { if (serializeAST(*temp_node) != data.serialized_window_function) throw Exception("WINDOW VIEW only support ONE WINDOW FUNCTION", ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_WINDOW_VIEW); - t->name = "WINDOW_ID"; + t->name = "windowID"; } } } } }; - /// Replace WINDOW_ID node name with either TUMBLE or HOP. + /// Replace windowID node name with either tumble or hop. struct ReplaceWindowIdMatcher { public: @@ -132,15 +132,15 @@ namespace { if (auto * t = ast->as()) { - if (t->name == "WINDOW_ID") + if (t->name == "windowID") t->name = data.window_name; } } }; - /// GROUP BY TUMBLE(now(), INTERVAL '5' SECOND) + /// GROUP BY tumble(now(), INTERVAL '5' SECOND) /// will become - /// GROUP BY TUMBLE(____timestamp, INTERVAL '5' SECOND) + /// GROUP BY tumble(____timestamp, INTERVAL '5' SECOND) struct ReplaceFunctionNowData { using TypeToVisit = ASTFunction; @@ -151,7 +151,7 @@ namespace void visit(ASTFunction & node, ASTPtr & node_ptr) { - if (node.name == "WINDOW_ID" || node.name == "TUMBLE" || node.name == "HOP") + if (node.name == "windowID" || node.name == "tumble" || node.name == "hop") { if (const auto * t = node.arguments->children[0]->as(); t && t->name == "now") @@ -188,8 +188,8 @@ namespace { if (auto * t = ast->as()) { - if (t->name == "HOP" || t->name == "TUMBLE") - t->name = "WINDOW_ID"; + if (t->name == "hop" || t->name == "tumble") + t->name = "windowID"; } } }; @@ -221,12 +221,12 @@ namespace { if (node.name == "tuple") { - /// tuple(WINDOW_ID(timestamp, toIntervalSecond('5'))) + /// tuple(windowID(timestamp, toIntervalSecond('5'))) return; } else { - /// WINDOW_ID(timestamp, toIntervalSecond('5')) -> identifier. + /// windowID(timestamp, toIntervalSecond('5')) -> identifier. /// and other... node_ptr = std::make_shared(node.getColumnName()); } @@ -351,14 +351,14 @@ static size_t getWindowIDColumnPosition(const Block & header) auto position = -1; for (const auto & column : header.getColumnsWithTypeAndName()) { - if (startsWith(column.name, "WINDOW_ID")) + if (startsWith(column.name, "windowID")) { position = header.getPositionByName(column.name); break; } } if (position < 0) - throw Exception("Not found column WINDOW_ID", ErrorCodes::LOGICAL_ERROR); + throw Exception("Not found column windowID", ErrorCodes::LOGICAL_ERROR); return position; } @@ -631,7 +631,7 @@ std::shared_ptr StorageWindowView::getInnerTableCreateQuery( time_now_visitor.visit(node); function_now_timezone = time_now_data.now_timezone; } - /// TUMBLE/HOP -> WINDOW_ID + /// tumble/hop -> windowID func_window_visitor.visit(node); to_identifier_visitor.visit(node); new_storage->set(field, node); @@ -960,7 +960,7 @@ StorageWindowView::StorageWindowView( select_table_id = StorageID(select_database_name, select_table_name); DatabaseCatalog::instance().addDependency(select_table_id, table_id_); - /// Extract all info from query; substitute Function_TUMPLE and Function_HOP with Function_WINDOW_ID. + /// Extract all info from query; substitute Function_tumble and Function_hop with Function_windowID. auto inner_query = innerQueryParser(select_query->as()); // Parse mergeable query @@ -971,13 +971,13 @@ StorageWindowView::StorageWindowView( if (is_time_column_func_now) window_id_name = func_now_data.window_id_name; - // Parse final query (same as mergeable query but has TUMBLE/HOP instead of WINDOW_ID) + // Parse final query (same as mergeable query but has tumble/hop instead of windowID) final_query = mergeable_query->clone(); ReplaceWindowIdMatcher::Data final_query_data; if (is_tumble) - final_query_data.window_name = "TUMBLE"; + final_query_data.window_name = "tumble"; else - final_query_data.window_name = "HOP"; + final_query_data.window_name = "hop"; ReplaceWindowIdMatcher::Visitor(final_query_data).visit(final_query); is_watermark_strictly_ascending = query.is_watermark_strictly_ascending; @@ -989,9 +989,9 @@ StorageWindowView::StorageWindowView( eventTimeParser(query); if (is_tumble) - window_column_name = std::regex_replace(window_id_name, std::regex("WINDOW_ID"), "TUMBLE"); + window_column_name = std::regex_replace(window_id_name, std::regex("windowID"), "tumble"); else - window_column_name = std::regex_replace(window_id_name, std::regex("WINDOW_ID"), "HOP"); + window_column_name = std::regex_replace(window_id_name, std::regex("windowID"), "hop"); auto generate_inner_table_name = [](const StorageID & storage_id) { diff --git a/src/Storages/WindowView/StorageWindowView.h b/src/Storages/WindowView/StorageWindowView.h index 08f24816d72..aaa9f7093e7 100644 --- a/src/Storages/WindowView/StorageWindowView.h +++ b/src/Storages/WindowView/StorageWindowView.h @@ -22,11 +22,11 @@ using ASTPtr = std::shared_ptr; * [ENGINE [db.]name] * [WATERMARK strategy] [ALLOWED_LATENESS interval_function] * AS SELECT ... - * GROUP BY [TUBLE/HOP(...)] + * GROUP BY [tumble/hop(...)] * * - only stores data that has not been triggered yet; * - fire_task checks if there is a window ready to be fired - * (each window result is fired in one output at the end of TUMBLE/HOP window interval); + * (each window result is fired in one output at the end of tumble/hop window interval); * - intermediate data is stored in inner table with * AggregatingMergeTree engine by default, but any other -MergeTree * engine might be used as inner table engine; @@ -35,24 +35,24 @@ using ASTPtr = std::shared_ptr; * Here function in GROUP BY clause results in a "window_id" * represented as Tuple(DateTime, DateTime) - lower and upper bounds of the window. * Function might be one of the following: - * 1. TUMBLE(time_attr, interval [, timezone]) + * 1. tumble(time_attr, interval [, timezone]) * - non-overlapping, continuous windows with a fixed duration (interval); * - example: - * SELECT TUMBLE(toDateTime('2021-01-01 00:01:45'), INTERVAL 10 SECOND) + * SELECT tumble(toDateTime('2021-01-01 00:01:45'), INTERVAL 10 SECOND) * results in ('2021-01-01 00:01:40','2021-01-01 00:01:50') - * 2. HOP(time_attr, hop_interval, window_interval [, timezone]) + * 2. hop(time_attr, hop_interval, window_interval [, timezone]) * - sliding window; * - has a fixed duration (window_interval parameter) and hops by a * specified hop interval (hop_interval parameter); * If the hop_interval is smaller than the window_interval, hopping windows * are overlapping. Thus, records can be assigned to multiple windows. * - example: - * SELECT HOP(toDateTime('2021-01-01 00:00:45'), INTERVAL 3 SECOND, INTERVAL 10 SECOND) + * SELECT hop(toDateTime('2021-01-01 00:00:45'), INTERVAL 3 SECOND, INTERVAL 10 SECOND) * results in ('2021-01-01 00:00:38','2021-01-01 00:00:48') * * DateTime value can be used with the following functions to find out start/end of the window: - * - TUMPLE_START(time_attr, interval [, timezone]), TUMPLE_END(time_attr, interval [, timezone]) - * - HOP_START(time_attr, hop_interval, window_interval [, timezone]), HOP_END(time_attr, hop_interval, window_interval [, timezone]) + * - tumbleStart(time_attr, interval [, timezone]), tumbleEnd(time_attr, interval [, timezone]) + * - hopStart(time_attr, hop_interval, window_interval [, timezone]), hopEnd(time_attr, hop_interval, window_interval [, timezone]) * * * Time processing options. @@ -61,8 +61,8 @@ using ASTPtr = std::shared_ptr; * - produces results based on the time of the local machine; * - example: * CREATE WINDOW VIEW test.wv TO test.dst - * AS SELECT count(number), TUMBLE_START(w_id) as w_start FROM test.mt - * GROUP BY TUMBLE(now(), INTERVAL '5' SECOND) as w_id + * AS SELECT count(number), tumbleStart(w_id) as w_start FROM test.mt + * GROUP BY tumble(now(), INTERVAL '5' SECOND) as w_id * * 2. event time * - produces results based on the time that is contained in every record; @@ -79,7 +79,7 @@ using ASTPtr = std::shared_ptr; * CREATE WINDOW VIEW test.wv TO test.dst * WATERMARK=STRICTLY_ASCENDING * AS SELECT count(number) FROM test.mt - * GROUP BY TUMBLE(timestamp, INTERVAL '5' SECOND); + * GROUP BY tumble(timestamp, INTERVAL '5' SECOND); * (where `timestamp` is a DateTime column in test.mt) * * @@ -90,8 +90,8 @@ using ASTPtr = std::shared_ptr; * - Can be enabled by using ALLOWED_LATENESS=INTERVAL, like this: * CREATE WINDOW VIEW test.wv TO test.dst * WATERMARK=ASCENDING ALLOWED_LATENESS=INTERVAL '2' SECOND - * AS SELECT count(a) AS count, TUMBLE_END(wid) AS w_end FROM test.mt - * GROUP BY TUMBLE(timestamp, INTERVAL '5' SECOND) AS wid; + * AS SELECT count(a) AS count, tumbleEnd(wid) AS w_end FROM test.mt + * GROUP BY tumble(timestamp, INTERVAL '5' SECOND) AS wid; * * - Instead of firing at the end of windows, WINDOW VIEW will fire * immediately when encountering late events; @@ -150,11 +150,11 @@ public: private: Poco::Logger * log; - /// Stored query, e.g. SELECT * FROM * GROUP BY TUMBLE(now(), *) + /// Stored query, e.g. SELECT * FROM * GROUP BY tumble(now(), *) ASTPtr select_query; - /// Used to generate the mergeable state of select_query, e.g. SELECT * FROM * GROUP BY WINDOW_ID(____timestamp, *) + /// Used to generate the mergeable state of select_query, e.g. SELECT * FROM * GROUP BY windowID(____timestamp, *) ASTPtr mergeable_query; - /// Used to fetch the mergeable state and generate the final result. e.g. SELECT * FROM * GROUP BY TUMBLE(____timestamp, *) + /// Used to fetch the mergeable state and generate the final result. e.g. SELECT * FROM * GROUP BY tumble(____timestamp, *) ASTPtr final_query; ContextMutablePtr window_view_context; diff --git a/tests/queries/0_stateless/01047_window_view_parser_inner_table.reference b/tests/queries/0_stateless/01047_window_view_parser_inner_table.reference index 19ebe5e0dbc..77f48f2832c 100644 --- a/tests/queries/0_stateless/01047_window_view_parser_inner_table.reference +++ b/tests/queries/0_stateless/01047_window_view_parser_inner_table.reference @@ -1,22 +1,22 @@ ---TUMBLE--- ||---WINDOW COLUMN NAME--- -CREATE TABLE test_01047.`.inner.wv`\n(\n `WINDOW_ID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `WINDOW_ID(timestamp, toIntervalSecond(\'1\'))`\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01047.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'))`\nSETTINGS index_granularity = 8192 ||---WINDOW COLUMN ALIAS--- -CREATE TABLE test_01047.`.inner.wv`\n(\n `WINDOW_ID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `WINDOW_ID(timestamp, toIntervalSecond(\'1\'))`\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01047.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'))`\nSETTINGS index_granularity = 8192 ||---IDENTIFIER--- -CREATE TABLE test_01047.`.inner.wv`\n(\n `b` Int32,\n `WINDOW_ID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `WINDOW_ID(timestamp, toIntervalSecond(\'1\'))`\nORDER BY (`WINDOW_ID(timestamp, toIntervalSecond(\'1\'))`, b)\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01047.`.inner.wv`\n(\n `b` Int32,\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'))`\nORDER BY (`windowID(timestamp, toIntervalSecond(\'1\'))`, b)\nSETTINGS index_granularity = 8192 ||---FUNCTION--- -CREATE TABLE test_01047.`.inner.wv`\n(\n `plus(a, b)` Int64,\n `WINDOW_ID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `WINDOW_ID(timestamp, toIntervalSecond(\'1\'))`\nORDER BY (`WINDOW_ID(timestamp, toIntervalSecond(\'1\'))`, `plus(a, b)`)\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01047.`.inner.wv`\n(\n `plus(a, b)` Int64,\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'))`\nORDER BY (`windowID(timestamp, toIntervalSecond(\'1\'))`, `plus(a, b)`)\nSETTINGS index_granularity = 8192 ||---PARTITION--- -CREATE TABLE test_01047.`.inner.wv`\n(\n `WINDOW_ID(____timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPARTITION BY `WINDOW_ID(____timestamp, toIntervalSecond(\'1\'))`\nORDER BY `WINDOW_ID(____timestamp, toIntervalSecond(\'1\'))`\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01047.`.inner.wv`\n(\n `windowID(____timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPARTITION BY `windowID(____timestamp, toIntervalSecond(\'1\'))`\nORDER BY `windowID(____timestamp, toIntervalSecond(\'1\'))`\nSETTINGS index_granularity = 8192 ---HOP--- ||---WINDOW COLUMN NAME--- -CREATE TABLE test_01047.`.inner.wv`\n(\n `WINDOW_ID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `WINDOW_ID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01047.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nSETTINGS index_granularity = 8192 ||---WINDOW COLUMN ALIAS--- -CREATE TABLE test_01047.`.inner.wv`\n(\n `WINDOW_ID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `WINDOW_ID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01047.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nSETTINGS index_granularity = 8192 ||---IDENTIFIER--- -CREATE TABLE test_01047.`.inner.wv`\n(\n `b` Int32,\n `WINDOW_ID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `WINDOW_ID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nORDER BY (`WINDOW_ID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`, b)\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01047.`.inner.wv`\n(\n `b` Int32,\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nORDER BY (`windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`, b)\nSETTINGS index_granularity = 8192 ||---FUNCTION--- -CREATE TABLE test_01047.`.inner.wv`\n(\n `plus(a, b)` Int64,\n `WINDOW_ID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `WINDOW_ID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nORDER BY (`WINDOW_ID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`, `plus(a, b)`)\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01047.`.inner.wv`\n(\n `plus(a, b)` Int64,\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nORDER BY (`windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`, `plus(a, b)`)\nSETTINGS index_granularity = 8192 ||---PARTITION--- -CREATE TABLE test_01047.`.inner.wv`\n(\n `WINDOW_ID(____timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPARTITION BY `WINDOW_ID(____timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nORDER BY `WINDOW_ID(____timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01047.`.inner.wv`\n(\n `windowID(____timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPARTITION BY `windowID(____timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nORDER BY `windowID(____timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/01047_window_view_parser_inner_table.sql b/tests/queries/0_stateless/01047_window_view_parser_inner_table.sql index f332ec57b7f..777c5ae2a5a 100644 --- a/tests/queries/0_stateless/01047_window_view_parser_inner_table.sql +++ b/tests/queries/0_stateless/01047_window_view_parser_inner_table.sql @@ -12,31 +12,31 @@ SELECT '---TUMBLE---'; SELECT '||---WINDOW COLUMN NAME---'; DROP TABLE IF EXISTS test_01047.wv; DROP TABLE IF EXISTS test_01047.`.inner.wv`; -CREATE WINDOW VIEW test_01047.wv ENGINE AggregatingMergeTree ORDER BY TUMBLE(timestamp, INTERVAL '1' SECOND) AS SELECT count(a), TUMBLE_END(wid) AS count FROM test_01047.mt GROUP BY TUMBLE(timestamp, INTERVAL '1' SECOND) as wid; +CREATE WINDOW VIEW test_01047.wv ENGINE AggregatingMergeTree ORDER BY tumble(timestamp, INTERVAL '1' SECOND) AS SELECT count(a), tumbleEnd(wid) AS count FROM test_01047.mt GROUP BY tumble(timestamp, INTERVAL '1' SECOND) as wid; SHOW CREATE TABLE test_01047.`.inner.wv`; SELECT '||---WINDOW COLUMN ALIAS---'; DROP TABLE IF EXISTS test_01047.wv; DROP TABLE IF EXISTS test_01047.`.inner.wv`; -CREATE WINDOW VIEW test_01047.wv ENGINE AggregatingMergeTree ORDER BY wid AS SELECT count(a) AS count, TUMBLE(timestamp, INTERVAL '1' SECOND) AS wid FROM test_01047.mt GROUP BY wid; +CREATE WINDOW VIEW test_01047.wv ENGINE AggregatingMergeTree ORDER BY wid AS SELECT count(a) AS count, tumble(timestamp, INTERVAL '1' SECOND) AS wid FROM test_01047.mt GROUP BY wid; SHOW CREATE TABLE test_01047.`.inner.wv`; SELECT '||---IDENTIFIER---'; DROP TABLE IF EXISTS test_01047.wv; DROP TABLE IF EXISTS test_01047.`.inner.wv`; -CREATE WINDOW VIEW test_01047.wv ENGINE AggregatingMergeTree ORDER BY (TUMBLE(timestamp, INTERVAL '1' SECOND), b) PRIMARY KEY TUMBLE(timestamp, INTERVAL '1' SECOND) AS SELECT count(a) AS count FROM test_01047.mt GROUP BY b, TUMBLE(timestamp, INTERVAL '1' SECOND) AS wid; +CREATE WINDOW VIEW test_01047.wv ENGINE AggregatingMergeTree ORDER BY (tumble(timestamp, INTERVAL '1' SECOND), b) PRIMARY KEY tumble(timestamp, INTERVAL '1' SECOND) AS SELECT count(a) AS count FROM test_01047.mt GROUP BY b, tumble(timestamp, INTERVAL '1' SECOND) AS wid; SHOW CREATE TABLE test_01047.`.inner.wv`; SELECT '||---FUNCTION---'; DROP TABLE IF EXISTS test_01047.wv; DROP TABLE IF EXISTS test_01047.`.inner.wv`; -CREATE WINDOW VIEW test_01047.wv ENGINE AggregatingMergeTree ORDER BY (TUMBLE(timestamp, INTERVAL '1' SECOND), plus(a, b)) PRIMARY KEY TUMBLE(timestamp, INTERVAL '1' SECOND) AS SELECT count(a) AS count FROM test_01047.mt GROUP BY plus(a, b) as _type, TUMBLE(timestamp, INTERVAL '1' SECOND) AS wid; +CREATE WINDOW VIEW test_01047.wv ENGINE AggregatingMergeTree ORDER BY (tumble(timestamp, INTERVAL '1' SECOND), plus(a, b)) PRIMARY KEY tumble(timestamp, INTERVAL '1' SECOND) AS SELECT count(a) AS count FROM test_01047.mt GROUP BY plus(a, b) as _type, tumble(timestamp, INTERVAL '1' SECOND) AS wid; SHOW CREATE TABLE test_01047.`.inner.wv`; SELECT '||---PARTITION---'; DROP TABLE IF EXISTS test_01047.wv; DROP TABLE IF EXISTS test_01047.`.inner.wv`; -CREATE WINDOW VIEW test_01047.wv ENGINE AggregatingMergeTree ORDER BY wid PARTITION BY wid AS SELECT count(a) AS count, TUMBLE(now(), INTERVAL '1' SECOND) AS wid FROM test_01047.mt GROUP BY wid; +CREATE WINDOW VIEW test_01047.wv ENGINE AggregatingMergeTree ORDER BY wid PARTITION BY wid AS SELECT count(a) AS count, tumble(now(), INTERVAL '1' SECOND) AS wid FROM test_01047.mt GROUP BY wid; SHOW CREATE TABLE test_01047.`.inner.wv`; @@ -44,31 +44,31 @@ SELECT '---HOP---'; SELECT '||---WINDOW COLUMN NAME---'; DROP TABLE IF EXISTS test_01047.wv; DROP TABLE IF EXISTS test_01047.`.inner.wv`; -CREATE WINDOW VIEW test_01047.wv ENGINE AggregatingMergeTree ORDER BY HOP(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS SELECT count(a) AS count, HOP_END(wid) FROM test_01047.mt GROUP BY HOP(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) as wid; +CREATE WINDOW VIEW test_01047.wv ENGINE AggregatingMergeTree ORDER BY hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS SELECT count(a) AS count, hopEnd(wid) FROM test_01047.mt GROUP BY hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) as wid; SHOW CREATE TABLE test_01047.`.inner.wv`; SELECT '||---WINDOW COLUMN ALIAS---'; DROP TABLE IF EXISTS test_01047.wv; DROP TABLE IF EXISTS test_01047.`.inner.wv`; -CREATE WINDOW VIEW test_01047.wv ENGINE AggregatingMergeTree ORDER BY wid AS SELECT count(a) AS count, HOP(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid FROM test_01047.mt GROUP BY wid; +CREATE WINDOW VIEW test_01047.wv ENGINE AggregatingMergeTree ORDER BY wid AS SELECT count(a) AS count, hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid FROM test_01047.mt GROUP BY wid; SHOW CREATE TABLE test_01047.`.inner.wv`; SELECT '||---IDENTIFIER---'; DROP TABLE IF EXISTS test_01047.wv; DROP TABLE IF EXISTS test_01047.`.inner.wv`; -CREATE WINDOW VIEW test_01047.wv ENGINE AggregatingMergeTree ORDER BY (HOP(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND), b) PRIMARY KEY HOP(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS SELECT count(a) AS count FROM test_01047.mt GROUP BY b, HOP(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; +CREATE WINDOW VIEW test_01047.wv ENGINE AggregatingMergeTree ORDER BY (hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND), b) PRIMARY KEY hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS SELECT count(a) AS count FROM test_01047.mt GROUP BY b, hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; SHOW CREATE TABLE test_01047.`.inner.wv`; SELECT '||---FUNCTION---'; DROP TABLE IF EXISTS test_01047.wv; DROP TABLE IF EXISTS test_01047.`.inner.wv`; -CREATE WINDOW VIEW test_01047.wv ENGINE AggregatingMergeTree ORDER BY (HOP(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND), plus(a, b)) PRIMARY KEY HOP(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS SELECT count(a) AS count FROM test_01047.mt GROUP BY plus(a, b) as _type, HOP(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; +CREATE WINDOW VIEW test_01047.wv ENGINE AggregatingMergeTree ORDER BY (hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND), plus(a, b)) PRIMARY KEY hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS SELECT count(a) AS count FROM test_01047.mt GROUP BY plus(a, b) as _type, hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; SHOW CREATE TABLE test_01047.`.inner.wv`; SELECT '||---PARTITION---'; DROP TABLE IF EXISTS test_01047.wv; DROP TABLE IF EXISTS test_01047.`.inner.wv`; -CREATE WINDOW VIEW test_01047.wv ENGINE AggregatingMergeTree ORDER BY wid PARTITION BY wid AS SELECT count(a) AS count, HOP_END(wid) FROM test_01047.mt GROUP BY HOP(now(), INTERVAL '1' SECOND, INTERVAL '3' SECOND) as wid; +CREATE WINDOW VIEW test_01047.wv ENGINE AggregatingMergeTree ORDER BY wid PARTITION BY wid AS SELECT count(a) AS count, hopEnd(wid) FROM test_01047.mt GROUP BY hop(now(), INTERVAL '1' SECOND, INTERVAL '3' SECOND) as wid; SHOW CREATE TABLE test_01047.`.inner.wv`; DROP TABLE test_01047.wv; diff --git a/tests/queries/0_stateless/01048_window_view_parser.reference b/tests/queries/0_stateless/01048_window_view_parser.reference index 47ed39fc1d8..6625313f572 100644 --- a/tests/queries/0_stateless/01048_window_view_parser.reference +++ b/tests/queries/0_stateless/01048_window_view_parser.reference @@ -1,26 +1,26 @@ ---TUMBLE--- ||---WINDOW COLUMN NAME--- -CREATE TABLE test_01048.`.inner.wv`\n(\n `WINDOW_ID(timestamp, toIntervalSecond(1))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `WINDOW_ID(timestamp, toIntervalSecond(1))`\nORDER BY tuple(`WINDOW_ID(timestamp, toIntervalSecond(1))`)\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(1))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(1))`\nORDER BY tuple(`windowID(timestamp, toIntervalSecond(1))`)\nSETTINGS index_granularity = 8192 ||---WINDOW COLUMN ALIAS--- -CREATE TABLE test_01048.`.inner.wv`\n(\n `WINDOW_ID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `WINDOW_ID(timestamp, toIntervalSecond(\'1\'))`\nORDER BY tuple(`WINDOW_ID(timestamp, toIntervalSecond(\'1\'))`)\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'))`\nORDER BY tuple(`windowID(timestamp, toIntervalSecond(\'1\'))`)\nSETTINGS index_granularity = 8192 ||---IDENTIFIER--- -CREATE TABLE test_01048.`.inner.wv`\n(\n `b` Int32,\n `WINDOW_ID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `WINDOW_ID(timestamp, toIntervalSecond(\'1\'))`\nORDER BY (`WINDOW_ID(timestamp, toIntervalSecond(\'1\'))`, b)\nSETTINGS index_granularity = 8192 -CREATE TABLE test_01048.`.inner.wv`\n(\n `WINDOW_ID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `b` Int32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `WINDOW_ID(timestamp, toIntervalSecond(\'1\'))`\nORDER BY (`WINDOW_ID(timestamp, toIntervalSecond(\'1\'))`, b)\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `b` Int32,\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'))`\nORDER BY (`windowID(timestamp, toIntervalSecond(\'1\'))`, b)\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `b` Int32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'))`\nORDER BY (`windowID(timestamp, toIntervalSecond(\'1\'))`, b)\nSETTINGS index_granularity = 8192 ||---FUNCTION--- -CREATE TABLE test_01048.`.inner.wv`\n(\n `plus(a, b)` Int64,\n `WINDOW_ID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `WINDOW_ID(timestamp, toIntervalSecond(\'1\'))`\nORDER BY (`WINDOW_ID(timestamp, toIntervalSecond(\'1\'))`, `plus(a, b)`)\nSETTINGS index_granularity = 8192 -CREATE TABLE test_01048.`.inner.wv`\n(\n `WINDOW_ID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `plus(a, b)` Int64,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `WINDOW_ID(timestamp, toIntervalSecond(\'1\'))`\nORDER BY (`WINDOW_ID(timestamp, toIntervalSecond(\'1\'))`, `plus(a, b)`)\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `plus(a, b)` Int64,\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'))`\nORDER BY (`windowID(timestamp, toIntervalSecond(\'1\'))`, `plus(a, b)`)\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `plus(a, b)` Int64,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'))`\nORDER BY (`windowID(timestamp, toIntervalSecond(\'1\'))`, `plus(a, b)`)\nSETTINGS index_granularity = 8192 ||---TimeZone--- -CREATE TABLE test_01048.`.inner.wv`\n(\n `WINDOW_ID(timestamp, toIntervalSecond(\'1\'), \'Asia/Shanghai\')` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `WINDOW_ID(timestamp, toIntervalSecond(\'1\'), \'Asia/Shanghai\')`\nORDER BY tuple(`WINDOW_ID(timestamp, toIntervalSecond(\'1\'), \'Asia/Shanghai\')`)\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'), \'Asia/Shanghai\')` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'), \'Asia/Shanghai\')`\nORDER BY tuple(`windowID(timestamp, toIntervalSecond(\'1\'), \'Asia/Shanghai\')`)\nSETTINGS index_granularity = 8192 ---HOP--- ||---WINDOW COLUMN NAME--- -CREATE TABLE test_01048.`.inner.wv`\n(\n `WINDOW_ID(timestamp, toIntervalSecond(1), toIntervalSecond(3))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `WINDOW_ID(timestamp, toIntervalSecond(1), toIntervalSecond(3))`\nORDER BY tuple(`WINDOW_ID(timestamp, toIntervalSecond(1), toIntervalSecond(3))`)\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(1), toIntervalSecond(3))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(1), toIntervalSecond(3))`\nORDER BY tuple(`windowID(timestamp, toIntervalSecond(1), toIntervalSecond(3))`)\nSETTINGS index_granularity = 8192 ||---WINDOW COLUMN ALIAS--- -CREATE TABLE test_01048.`.inner.wv`\n(\n `WINDOW_ID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `WINDOW_ID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nORDER BY tuple(`WINDOW_ID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`)\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nORDER BY tuple(`windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`)\nSETTINGS index_granularity = 8192 ||---IDENTIFIER--- -CREATE TABLE test_01048.`.inner.wv`\n(\n `b` Int32,\n `WINDOW_ID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `WINDOW_ID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nORDER BY (`WINDOW_ID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`, b)\nSETTINGS index_granularity = 8192 -CREATE TABLE test_01048.`.inner.wv`\n(\n `WINDOW_ID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `b` Int32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `WINDOW_ID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nORDER BY (`WINDOW_ID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`, b)\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `b` Int32,\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nORDER BY (`windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`, b)\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `b` Int32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nORDER BY (`windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`, b)\nSETTINGS index_granularity = 8192 ||---FUNCTION--- -CREATE TABLE test_01048.`.inner.wv`\n(\n `plus(a, b)` Int64,\n `WINDOW_ID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `WINDOW_ID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nORDER BY (`WINDOW_ID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`, `plus(a, b)`)\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `plus(a, b)` Int64,\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nORDER BY (`windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`, `plus(a, b)`)\nSETTINGS index_granularity = 8192 ||---TimeZone--- -CREATE TABLE test_01048.`.inner.wv`\n(\n `WINDOW_ID(timestamp, toIntervalSecond(1), toIntervalSecond(3), \'Asia/Shanghai\')` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `WINDOW_ID(timestamp, toIntervalSecond(1), toIntervalSecond(3), \'Asia/Shanghai\')`\nORDER BY tuple(`WINDOW_ID(timestamp, toIntervalSecond(1), toIntervalSecond(3), \'Asia/Shanghai\')`)\nSETTINGS index_granularity = 8192 -CREATE TABLE test_01048.`.inner.wv`\n(\n `WINDOW_ID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `plus(a, b)` Int64,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `WINDOW_ID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nORDER BY (`WINDOW_ID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`, `plus(a, b)`)\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(1), toIntervalSecond(3), \'Asia/Shanghai\')` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(1), toIntervalSecond(3), \'Asia/Shanghai\')`\nORDER BY tuple(`windowID(timestamp, toIntervalSecond(1), toIntervalSecond(3), \'Asia/Shanghai\')`)\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `plus(a, b)` Int64,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nORDER BY (`windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`, `plus(a, b)`)\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/01048_window_view_parser.sql b/tests/queries/0_stateless/01048_window_view_parser.sql index e7dc4b324f6..3f57f6fbd91 100644 --- a/tests/queries/0_stateless/01048_window_view_parser.sql +++ b/tests/queries/0_stateless/01048_window_view_parser.sql @@ -11,71 +11,71 @@ CREATE TABLE test_01048.mt(a Int32, b Int32, timestamp DateTime) ENGINE=MergeTre SELECT '---TUMBLE---'; SELECT '||---WINDOW COLUMN NAME---'; DROP TABLE IF EXISTS test_01048.wv; -CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count, TUMBLE_END(wid) as wend FROM test_01048.mt GROUP BY TUMBLE(timestamp, INTERVAL 1 SECOND) as wid; +CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count, tumbleEnd(wid) as wend FROM test_01048.mt GROUP BY tumble(timestamp, INTERVAL 1 SECOND) as wid; SHOW CREATE TABLE test_01048.`.inner.wv`; SELECT '||---WINDOW COLUMN ALIAS---'; DROP TABLE IF EXISTS test_01048.wv; -CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count, TUMBLE(timestamp, INTERVAL '1' SECOND) AS wid FROM test_01048.mt GROUP BY wid; +CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count, tumble(timestamp, INTERVAL '1' SECOND) AS wid FROM test_01048.mt GROUP BY wid; SHOW CREATE TABLE test_01048.`.inner.wv`; SELECT '||---IDENTIFIER---'; DROP TABLE IF EXISTS test_01048.wv; -CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count FROM test_01048.mt GROUP BY b, TUMBLE(timestamp, INTERVAL '1' SECOND) AS wid; +CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count FROM test_01048.mt GROUP BY b, tumble(timestamp, INTERVAL '1' SECOND) AS wid; SHOW CREATE TABLE test_01048.`.inner.wv`; DROP TABLE IF EXISTS test_01048.wv; -CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count FROM test_01048.mt GROUP BY TUMBLE(timestamp, INTERVAL '1' SECOND) AS wid, b; +CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count FROM test_01048.mt GROUP BY tumble(timestamp, INTERVAL '1' SECOND) AS wid, b; SHOW CREATE TABLE test_01048.`.inner.wv`; SELECT '||---FUNCTION---'; DROP TABLE IF EXISTS test_01048.wv; -CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count FROM test_01048.mt GROUP BY plus(a, b) as _type, TUMBLE(timestamp, INTERVAL '1' SECOND) AS wid; +CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count FROM test_01048.mt GROUP BY plus(a, b) as _type, tumble(timestamp, INTERVAL '1' SECOND) AS wid; SHOW CREATE TABLE test_01048.`.inner.wv`; DROP TABLE IF EXISTS test_01048.wv; -CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count FROM test_01048.mt GROUP BY TUMBLE(timestamp, INTERVAL '1' SECOND) AS wid, plus(a, b); +CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count FROM test_01048.mt GROUP BY tumble(timestamp, INTERVAL '1' SECOND) AS wid, plus(a, b); SHOW CREATE TABLE test_01048.`.inner.wv`; SELECT '||---TimeZone---'; DROP TABLE IF EXISTS test_01048.wv; -CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count, TUMBLE(timestamp, INTERVAL '1' SECOND, 'Asia/Shanghai') AS wid FROM test_01048.mt GROUP BY wid; +CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count, tumble(timestamp, INTERVAL '1' SECOND, 'Asia/Shanghai') AS wid FROM test_01048.mt GROUP BY wid; SHOW CREATE TABLE test_01048.`.inner.wv`; SELECT '---HOP---'; SELECT '||---WINDOW COLUMN NAME---'; DROP TABLE IF EXISTS test_01048.wv; -CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count, HOP_END(wid) as wend FROM test_01048.mt GROUP BY HOP(timestamp, INTERVAL 1 SECOND, INTERVAL 3 SECOND) as wid; +CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count, hopEnd(wid) as wend FROM test_01048.mt GROUP BY hop(timestamp, INTERVAL 1 SECOND, INTERVAL 3 SECOND) as wid; SHOW CREATE TABLE test_01048.`.inner.wv`; SELECT '||---WINDOW COLUMN ALIAS---'; DROP TABLE IF EXISTS test_01048.wv; -CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count, HOP(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid FROM test_01048.mt GROUP BY wid; +CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count, hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid FROM test_01048.mt GROUP BY wid; SHOW CREATE TABLE test_01048.`.inner.wv`; SELECT '||---IDENTIFIER---'; DROP TABLE IF EXISTS test_01048.wv; -CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count FROM test_01048.mt GROUP BY b, HOP(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; +CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count FROM test_01048.mt GROUP BY b, hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; SHOW CREATE TABLE test_01048.`.inner.wv`; DROP TABLE IF EXISTS test_01048.wv; -CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count FROM test_01048.mt GROUP BY HOP(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid, b; +CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count FROM test_01048.mt GROUP BY hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid, b; SHOW CREATE TABLE test_01048.`.inner.wv`; SELECT '||---FUNCTION---'; DROP TABLE IF EXISTS test_01048.wv; -CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count FROM test_01048.mt GROUP BY plus(a, b) as _type, HOP(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; +CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count FROM test_01048.mt GROUP BY plus(a, b) as _type, hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; SHOW CREATE TABLE test_01048.`.inner.wv`; SELECT '||---TimeZone---'; DROP TABLE IF EXISTS test_01048.wv; -CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count, HOP_END(wid) as wend FROM test_01048.mt GROUP BY HOP(timestamp, INTERVAL 1 SECOND, INTERVAL 3 SECOND, 'Asia/Shanghai') as wid; +CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count, hopEnd(wid) as wend FROM test_01048.mt GROUP BY hop(timestamp, INTERVAL 1 SECOND, INTERVAL 3 SECOND, 'Asia/Shanghai') as wid; SHOW CREATE TABLE test_01048.`.inner.wv`; DROP TABLE IF EXISTS test_01048.wv; -CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count FROM test_01048.mt GROUP BY HOP(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid, plus(a, b); +CREATE WINDOW VIEW test_01048.wv AS SELECT count(a) AS count FROM test_01048.mt GROUP BY hop(timestamp, INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid, plus(a, b); SHOW CREATE TABLE test_01048.`.inner.wv`; DROP TABLE test_01048.wv; diff --git a/tests/queries/0_stateless/01049_window_view_window_functions.reference b/tests/queries/0_stateless/01049_window_view_window_functions.reference index e8813db5a7d..2d49664b280 100644 --- a/tests/queries/0_stateless/01049_window_view_window_functions.reference +++ b/tests/queries/0_stateless/01049_window_view_window_functions.reference @@ -1,69 +1,69 @@ -- { echo } -SELECT TUMBLE(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 SECOND, 'US/Samoa'); +SELECT tumble(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 SECOND, 'US/Samoa'); ('2020-01-09 12:00:01','2020-01-09 12:00:02') -SELECT TUMBLE(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 MINUTE, 'US/Samoa'); +SELECT tumble(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 MINUTE, 'US/Samoa'); ('2020-01-09 12:00:00','2020-01-09 12:01:00') -SELECT TUMBLE(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' HOUR, 'US/Samoa'); +SELECT tumble(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' HOUR, 'US/Samoa'); ('2020-01-09 12:00:00','2020-01-09 13:00:00') -SELECT TUMBLE(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'); +SELECT tumble(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'); ('2020-01-09 00:00:00','2020-01-10 00:00:00') -SELECT TUMBLE(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 WEEK, 'US/Samoa'); +SELECT tumble(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 WEEK, 'US/Samoa'); ('2020-01-06','2020-01-13') -SELECT TUMBLE(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' MONTH, 'US/Samoa'); +SELECT tumble(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' MONTH, 'US/Samoa'); ('2020-01-01','2020-02-01') -SELECT TUMBLE(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' QUARTER, 'US/Samoa'); +SELECT tumble(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' QUARTER, 'US/Samoa'); ('2020-01-01','2020-04-01') -SELECT TUMBLE(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' YEAR, 'US/Samoa'); +SELECT tumble(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' YEAR, 'US/Samoa'); ('2020-01-01','2021-01-01') -SELECT TUMBLE(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'); +SELECT tumble(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'); ('2020-01-09 00:00:00','2020-01-10 00:00:00') -SELECT TUMBLE_START(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'); +SELECT tumbleStart(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'); 2020-01-09 00:00:00 -SELECT toDateTime(TUMBLE_START(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'), 'US/Samoa'); +SELECT toDateTime(tumbleStart(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'), 'US/Samoa'); 2020-01-09 00:00:00 -SELECT toDateTime(TUMBLE_START(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'), 'US/Samoa'); +SELECT toDateTime(tumbleStart(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'), 'US/Samoa'); 2020-01-09 00:00:00 -SELECT TUMBLE_START(TUMBLE(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa')); +SELECT tumbleStart(tumble(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa')); 2020-01-09 00:00:00 -SELECT TUMBLE_END(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'); +SELECT tumbleEnd(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'); 2020-01-10 00:00:00 -SELECT toDateTime(TUMBLE_END(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'), 'US/Samoa'); +SELECT toDateTime(tumbleEnd(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'), 'US/Samoa'); 2020-01-10 00:00:00 -SELECT toDateTime(TUMBLE_END(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'), 'US/Samoa'); +SELECT toDateTime(tumbleEnd(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'), 'US/Samoa'); 2020-01-10 00:00:00 -SELECT TUMBLE_END(TUMBLE(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa')); +SELECT tumbleEnd(tumble(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa')); 2020-01-10 00:00:00 -SELECT HOP(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 SECOND, INTERVAL 3 SECOND, 'US/Samoa'); +SELECT hop(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 SECOND, INTERVAL 3 SECOND, 'US/Samoa'); ('2020-01-09 11:59:59','2020-01-09 12:00:02') -SELECT HOP(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 MINUTE, INTERVAL 3 MINUTE, 'US/Samoa'); +SELECT hop(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 MINUTE, INTERVAL 3 MINUTE, 'US/Samoa'); ('2020-01-09 11:58:00','2020-01-09 12:01:00') -SELECT HOP(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 HOUR, INTERVAL 3 HOUR, 'US/Samoa'); +SELECT hop(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 HOUR, INTERVAL 3 HOUR, 'US/Samoa'); ('2020-01-09 10:00:00','2020-01-09 13:00:00') -SELECT HOP(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 DAY, INTERVAL 3 DAY, 'US/Samoa'); +SELECT hop(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 DAY, INTERVAL 3 DAY, 'US/Samoa'); ('2020-01-07 00:00:00','2020-01-10 00:00:00') -SELECT HOP(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 WEEK, INTERVAL 3 WEEK, 'US/Samoa'); +SELECT hop(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 WEEK, INTERVAL 3 WEEK, 'US/Samoa'); ('2019-12-23','2020-01-13') -SELECT HOP(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 MONTH, INTERVAL 3 MONTH, 'US/Samoa'); +SELECT hop(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 MONTH, INTERVAL 3 MONTH, 'US/Samoa'); ('2019-11-01','2020-02-01') -SELECT HOP(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 QUARTER, INTERVAL 3 QUARTER, 'US/Samoa'); +SELECT hop(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 QUARTER, INTERVAL 3 QUARTER, 'US/Samoa'); ('2019-07-01','2020-04-01') -SELECT HOP(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 YEAR, INTERVAL 3 YEAR, 'US/Samoa'); +SELECT hop(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 YEAR, INTERVAL 3 YEAR, 'US/Samoa'); ('2018-01-01','2021-01-01') -SELECT HOP(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'); +SELECT hop(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'); ('2020-01-07 00:00:00','2020-01-10 00:00:00') -SELECT HOP_START(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'); +SELECT hopStart(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'); 2020-01-07 00:00:00 -SELECT toDateTime(HOP_START(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'), 'US/Samoa'); +SELECT toDateTime(hopStart(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'), 'US/Samoa'); 2020-01-07 00:00:00 -SELECT toDateTime(HOP_START(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'), 'US/Samoa'); +SELECT toDateTime(hopStart(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'), 'US/Samoa'); 2020-01-07 00:00:00 -SELECT HOP_START(HOP(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa')); +SELECT hopStart(hop(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa')); 2020-01-07 00:00:00 -SELECT HOP_END(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'); +SELECT hopEnd(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'); 2020-01-10 00:00:00 -SELECT toDateTime(HOP_END(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'), 'US/Samoa'); +SELECT toDateTime(hopEnd(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'), 'US/Samoa'); 2020-01-10 00:00:00 -SELECT toDateTime(HOP_END(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'), 'US/Samoa'); +SELECT toDateTime(hopEnd(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'), 'US/Samoa'); 2020-01-10 00:00:00 -SELECT HOP_END(HOP(toDateTime('2019-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa')); +SELECT hopEnd(hop(toDateTime('2019-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa')); 2019-01-10 00:00:00 diff --git a/tests/queries/0_stateless/01049_window_view_window_functions.sql b/tests/queries/0_stateless/01049_window_view_window_functions.sql index 4c98f9445e1..617019bd2c6 100644 --- a/tests/queries/0_stateless/01049_window_view_window_functions.sql +++ b/tests/queries/0_stateless/01049_window_view_window_functions.sql @@ -1,38 +1,38 @@ -- { echo } -SELECT TUMBLE(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 SECOND, 'US/Samoa'); -SELECT TUMBLE(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 MINUTE, 'US/Samoa'); -SELECT TUMBLE(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' HOUR, 'US/Samoa'); -SELECT TUMBLE(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'); -SELECT TUMBLE(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 WEEK, 'US/Samoa'); -SELECT TUMBLE(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' MONTH, 'US/Samoa'); -SELECT TUMBLE(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' QUARTER, 'US/Samoa'); -SELECT TUMBLE(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' YEAR, 'US/Samoa'); +SELECT tumble(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 SECOND, 'US/Samoa'); +SELECT tumble(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 MINUTE, 'US/Samoa'); +SELECT tumble(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' HOUR, 'US/Samoa'); +SELECT tumble(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'); +SELECT tumble(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 WEEK, 'US/Samoa'); +SELECT tumble(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' MONTH, 'US/Samoa'); +SELECT tumble(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' QUARTER, 'US/Samoa'); +SELECT tumble(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' YEAR, 'US/Samoa'); -SELECT TUMBLE(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'); -SELECT TUMBLE_START(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'); -SELECT toDateTime(TUMBLE_START(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'), 'US/Samoa'); -SELECT toDateTime(TUMBLE_START(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'), 'US/Samoa'); -SELECT TUMBLE_START(TUMBLE(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa')); -SELECT TUMBLE_END(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'); -SELECT toDateTime(TUMBLE_END(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'), 'US/Samoa'); -SELECT toDateTime(TUMBLE_END(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'), 'US/Samoa'); -SELECT TUMBLE_END(TUMBLE(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa')); +SELECT tumble(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'); +SELECT tumbleStart(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'); +SELECT toDateTime(tumbleStart(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'), 'US/Samoa'); +SELECT toDateTime(tumbleStart(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'), 'US/Samoa'); +SELECT tumbleStart(tumble(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa')); +SELECT tumbleEnd(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'); +SELECT toDateTime(tumbleEnd(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'), 'US/Samoa'); +SELECT toDateTime(tumbleEnd(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa'), 'US/Samoa'); +SELECT tumbleEnd(tumble(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, 'US/Samoa')); -SELECT HOP(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 SECOND, INTERVAL 3 SECOND, 'US/Samoa'); -SELECT HOP(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 MINUTE, INTERVAL 3 MINUTE, 'US/Samoa'); -SELECT HOP(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 HOUR, INTERVAL 3 HOUR, 'US/Samoa'); -SELECT HOP(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 DAY, INTERVAL 3 DAY, 'US/Samoa'); -SELECT HOP(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 WEEK, INTERVAL 3 WEEK, 'US/Samoa'); -SELECT HOP(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 MONTH, INTERVAL 3 MONTH, 'US/Samoa'); -SELECT HOP(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 QUARTER, INTERVAL 3 QUARTER, 'US/Samoa'); -SELECT HOP(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 YEAR, INTERVAL 3 YEAR, 'US/Samoa'); +SELECT hop(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 SECOND, INTERVAL 3 SECOND, 'US/Samoa'); +SELECT hop(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 MINUTE, INTERVAL 3 MINUTE, 'US/Samoa'); +SELECT hop(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 HOUR, INTERVAL 3 HOUR, 'US/Samoa'); +SELECT hop(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 DAY, INTERVAL 3 DAY, 'US/Samoa'); +SELECT hop(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 WEEK, INTERVAL 3 WEEK, 'US/Samoa'); +SELECT hop(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 MONTH, INTERVAL 3 MONTH, 'US/Samoa'); +SELECT hop(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 QUARTER, INTERVAL 3 QUARTER, 'US/Samoa'); +SELECT hop(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL 1 YEAR, INTERVAL 3 YEAR, 'US/Samoa'); -SELECT HOP(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'); -SELECT HOP_START(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'); -SELECT toDateTime(HOP_START(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'), 'US/Samoa'); -SELECT toDateTime(HOP_START(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'), 'US/Samoa'); -SELECT HOP_START(HOP(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa')); -SELECT HOP_END(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'); -SELECT toDateTime(HOP_END(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'), 'US/Samoa'); -SELECT toDateTime(HOP_END(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'), 'US/Samoa'); -SELECT HOP_END(HOP(toDateTime('2019-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa')); +SELECT hop(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'); +SELECT hopStart(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'); +SELECT toDateTime(hopStart(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'), 'US/Samoa'); +SELECT toDateTime(hopStart(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'), 'US/Samoa'); +SELECT hopStart(hop(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa')); +SELECT hopEnd(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'); +SELECT toDateTime(hopEnd(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'), 'US/Samoa'); +SELECT toDateTime(hopEnd(toDateTime('2020-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa'), 'US/Samoa'); +SELECT hopEnd(hop(toDateTime('2019-01-09 12:00:01', 'US/Samoa'), INTERVAL '1' DAY, INTERVAL '3' DAY, 'US/Samoa')); diff --git a/tests/queries/0_stateless/01050_window_view_parser_tumble.sql b/tests/queries/0_stateless/01050_window_view_parser_tumble.sql index 6837036263c..54f9ed00cbe 100644 --- a/tests/queries/0_stateless/01050_window_view_parser_tumble.sql +++ b/tests/queries/0_stateless/01050_window_view_parser_tumble.sql @@ -6,28 +6,28 @@ CREATE TABLE mt(a Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple(); SELECT '---WATERMARK---'; DROP TABLE IF EXISTS wv NO DELAY; -CREATE WINDOW VIEW wv WATERMARK=INTERVAL '1' SECOND AS SELECT count(a), TUMBLE_START(wid) AS w_start, TUMBLE_END(wid) AS w_end FROM mt GROUP BY TUMBLE(timestamp, INTERVAL '3' SECOND) AS wid; +CREATE WINDOW VIEW wv WATERMARK=INTERVAL '1' SECOND AS SELECT count(a), tumbleStart(wid) AS w_start, tumbleEnd(wid) AS w_end FROM mt GROUP BY tumble(timestamp, INTERVAL '3' SECOND) AS wid; SELECT '---With w_end---'; DROP TABLE IF EXISTS wv NO DELAY; -CREATE WINDOW VIEW wv AS SELECT count(a), TUMBLE_START(TUMBLE(timestamp, INTERVAL '3' SECOND)) AS w_start, TUMBLE_END(wid) AS w_end FROM mt GROUP BY TUMBLE(timestamp, INTERVAL '3' SECOND) AS wid; +CREATE WINDOW VIEW wv AS SELECT count(a), tumbleStart(tumble(timestamp, INTERVAL '3' SECOND)) AS w_start, tumbleEnd(wid) AS w_end FROM mt GROUP BY tumble(timestamp, INTERVAL '3' SECOND) AS wid; SELECT '---WithOut w_end---'; DROP TABLE IF EXISTS wv NO DELAY; -CREATE WINDOW VIEW wv AS SELECT count(a), TUMBLE_START(wid) AS w_start FROM mt GROUP BY TUMBLE(timestamp, INTERVAL '3' SECOND) AS wid; +CREATE WINDOW VIEW wv AS SELECT count(a), tumbleStart(wid) AS w_start FROM mt GROUP BY tumble(timestamp, INTERVAL '3' SECOND) AS wid; SELECT '---WITH---'; DROP TABLE IF EXISTS wv NO DELAY; -CREATE WINDOW VIEW wv AS WITH toDateTime('2018-01-01 00:00:00') AS date_time SELECT count(a), TUMBLE_START(wid) AS w_start, TUMBLE_END(wid) AS w_end, date_time FROM mt GROUP BY TUMBLE(timestamp, INTERVAL '3' SECOND) AS wid; +CREATE WINDOW VIEW wv AS WITH toDateTime('2018-01-01 00:00:00') AS date_time SELECT count(a), tumbleStart(wid) AS w_start, tumbleEnd(wid) AS w_end, date_time FROM mt GROUP BY tumble(timestamp, INTERVAL '3' SECOND) AS wid; SELECT '---WHERE---'; DROP TABLE IF EXISTS wv NO DELAY; -CREATE WINDOW VIEW wv AS SELECT count(a), TUMBLE_START(wid) AS w_start FROM mt WHERE a != 1 GROUP BY TUMBLE(timestamp, INTERVAL '3' SECOND) AS wid; +CREATE WINDOW VIEW wv AS SELECT count(a), tumbleStart(wid) AS w_start FROM mt WHERE a != 1 GROUP BY tumble(timestamp, INTERVAL '3' SECOND) AS wid; SELECT '---ORDER_BY---'; DROP TABLE IF EXISTS wv NO DELAY; -CREATE WINDOW VIEW wv AS SELECT count(a), TUMBLE_START(wid) AS w_start FROM mt WHERE a != 1 GROUP BY TUMBLE(timestamp, INTERVAL '3' SECOND) AS wid ORDER BY w_start; +CREATE WINDOW VIEW wv AS SELECT count(a), tumbleStart(wid) AS w_start FROM mt WHERE a != 1 GROUP BY tumble(timestamp, INTERVAL '3' SECOND) AS wid ORDER BY w_start; SELECT '---With now---'; DROP TABLE IF EXISTS wv NO DELAY; -CREATE WINDOW VIEW wv AS SELECT count(a), TUMBLE_START(wid) AS w_start, TUMBLE_END(TUMBLE(now(), INTERVAL '3' SECOND)) AS w_end FROM mt GROUP BY TUMBLE(now(), INTERVAL '3' SECOND) AS wid; +CREATE WINDOW VIEW wv AS SELECT count(a), tumbleStart(wid) AS w_start, tumbleEnd(tumble(now(), INTERVAL '3' SECOND)) AS w_end FROM mt GROUP BY tumble(now(), INTERVAL '3' SECOND) AS wid; diff --git a/tests/queries/0_stateless/01051_window_view_parser_hop.sql b/tests/queries/0_stateless/01051_window_view_parser_hop.sql index df0729108d0..0f705d5c911 100644 --- a/tests/queries/0_stateless/01051_window_view_parser_hop.sql +++ b/tests/queries/0_stateless/01051_window_view_parser_hop.sql @@ -6,28 +6,28 @@ CREATE TABLE mt(a Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple(); SELECT '---WATERMARK---'; DROP TABLE IF EXISTS wv NO DELAY; -CREATE WINDOW VIEW wv WATERMARK=INTERVAL '1' SECOND AS SELECT count(a), HOP_START(wid) AS w_start, HOP_END(wid) AS w_end FROM mt GROUP BY HOP(timestamp, INTERVAL '3' SECOND, INTERVAL '5' SECOND) AS wid; +CREATE WINDOW VIEW wv WATERMARK=INTERVAL '1' SECOND AS SELECT count(a), hopStart(wid) AS w_start, hopEnd(wid) AS w_end FROM mt GROUP BY hop(timestamp, INTERVAL '3' SECOND, INTERVAL '5' SECOND) AS wid; SELECT '---With w_end---'; DROP TABLE IF EXISTS wv NO DELAY; -CREATE WINDOW VIEW wv AS SELECT count(a), HOP_START(wid) AS w_start, HOP_END(wid) AS w_end FROM mt GROUP BY HOP(timestamp, INTERVAL '3' SECOND, INTERVAL '5' SECOND) AS wid; +CREATE WINDOW VIEW wv AS SELECT count(a), hopStart(wid) AS w_start, hopEnd(wid) AS w_end FROM mt GROUP BY hop(timestamp, INTERVAL '3' SECOND, INTERVAL '5' SECOND) AS wid; SELECT '---WithOut w_end---'; DROP TABLE IF EXISTS wv NO DELAY; -CREATE WINDOW VIEW wv AS SELECT count(a), HOP_START(wid) AS w_start FROM mt GROUP BY HOP(timestamp, INTERVAL '3' SECOND, INTERVAL '5' SECOND) AS wid; +CREATE WINDOW VIEW wv AS SELECT count(a), hopStart(wid) AS w_start FROM mt GROUP BY hop(timestamp, INTERVAL '3' SECOND, INTERVAL '5' SECOND) AS wid; SELECT '---WITH---'; DROP TABLE IF EXISTS wv NO DELAY; -CREATE WINDOW VIEW wv AS WITH toDateTime('2018-01-01 00:00:00') AS date_time SELECT count(a), HOP_START(wid) AS w_start, HOP_END(wid) AS w_end, date_time FROM mt GROUP BY HOP(timestamp, INTERVAL '3' SECOND, INTERVAL '5' SECOND) AS wid; +CREATE WINDOW VIEW wv AS WITH toDateTime('2018-01-01 00:00:00') AS date_time SELECT count(a), hopStart(wid) AS w_start, hopEnd(wid) AS w_end, date_time FROM mt GROUP BY hop(timestamp, INTERVAL '3' SECOND, INTERVAL '5' SECOND) AS wid; SELECT '---WHERE---'; DROP TABLE IF EXISTS wv NO DELAY; -CREATE WINDOW VIEW wv AS SELECT count(a), HOP_START(wid) AS w_start FROM mt WHERE a != 1 GROUP BY HOP(timestamp, INTERVAL '3' SECOND, INTERVAL '5' SECOND) AS wid; +CREATE WINDOW VIEW wv AS SELECT count(a), hopStart(wid) AS w_start FROM mt WHERE a != 1 GROUP BY hop(timestamp, INTERVAL '3' SECOND, INTERVAL '5' SECOND) AS wid; SELECT '---ORDER_BY---'; DROP TABLE IF EXISTS wv NO DELAY; -CREATE WINDOW VIEW wv AS SELECT count(a), HOP_START(wid) AS w_start FROM mt WHERE a != 1 GROUP BY HOP(timestamp, INTERVAL '3' SECOND, INTERVAL '5' SECOND) AS wid ORDER BY w_start; +CREATE WINDOW VIEW wv AS SELECT count(a), hopStart(wid) AS w_start FROM mt WHERE a != 1 GROUP BY hop(timestamp, INTERVAL '3' SECOND, INTERVAL '5' SECOND) AS wid ORDER BY w_start; SELECT '---With now---'; DROP TABLE IF EXISTS wv NO DELAY; -CREATE WINDOW VIEW wv AS SELECT count(a), HOP_START(wid) AS w_start, HOP_END(HOP(now(), INTERVAL '1' SECOND, INTERVAL '3' SECOND)) as w_end FROM mt GROUP BY HOP(now(), INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; +CREATE WINDOW VIEW wv AS SELECT count(a), hopStart(wid) AS w_start, hopEnd(hop(now(), INTERVAL '1' SECOND, INTERVAL '3' SECOND)) as w_end FROM mt GROUP BY hop(now(), INTERVAL '1' SECOND, INTERVAL '3' SECOND) AS wid; diff --git a/tests/queries/0_stateless/01052_window_view_proc_tumble_to_now.sql b/tests/queries/0_stateless/01052_window_view_proc_tumble_to_now.sql index 169a66e7bc6..2d01e1205b2 100644 --- a/tests/queries/0_stateless/01052_window_view_proc_tumble_to_now.sql +++ b/tests/queries/0_stateless/01052_window_view_proc_tumble_to_now.sql @@ -6,7 +6,7 @@ DROP TABLE IF EXISTS wv; CREATE TABLE dst(count UInt64) Engine=MergeTree ORDER BY tuple(); CREATE TABLE mt(a Int32) ENGINE=MergeTree ORDER BY tuple(); -CREATE WINDOW VIEW wv TO dst AS SELECT count(a) AS count FROM mt GROUP BY TUMBLE(now('US/Samoa'), INTERVAL '1' SECOND, 'US/Samoa') AS wid; +CREATE WINDOW VIEW wv TO dst AS SELECT count(a) AS count FROM mt GROUP BY tumble(now('US/Samoa'), INTERVAL '1' SECOND, 'US/Samoa') AS wid; INSERT INTO mt VALUES (1); SELECT sleep(3); diff --git a/tests/queries/0_stateless/01053_window_view_proc_hop_to_now.sql b/tests/queries/0_stateless/01053_window_view_proc_hop_to_now.sql index c39bab21cb1..9f3dc3ca89e 100644 --- a/tests/queries/0_stateless/01053_window_view_proc_hop_to_now.sql +++ b/tests/queries/0_stateless/01053_window_view_proc_hop_to_now.sql @@ -6,7 +6,7 @@ DROP TABLE IF EXISTS wv; CREATE TABLE dst(count UInt64) Engine=MergeTree ORDER BY tuple(); CREATE TABLE mt(a Int32) ENGINE=MergeTree ORDER BY tuple(); -CREATE WINDOW VIEW wv TO dst AS SELECT count(a) AS count FROM mt GROUP BY HOP(now('US/Samoa'), INTERVAL '1' SECOND, INTERVAL '1' SECOND, 'US/Samoa') AS wid; +CREATE WINDOW VIEW wv TO dst AS SELECT count(a) AS count FROM mt GROUP BY hop(now('US/Samoa'), INTERVAL '1' SECOND, INTERVAL '1' SECOND, 'US/Samoa') AS wid; INSERT INTO mt VALUES (1); SELECT sleep(3); diff --git a/tests/queries/0_stateless/01054_window_view_proc_tumble_to.sql b/tests/queries/0_stateless/01054_window_view_proc_tumble_to.sql index f229969603b..86b7ab89150 100644 --- a/tests/queries/0_stateless/01054_window_view_proc_tumble_to.sql +++ b/tests/queries/0_stateless/01054_window_view_proc_tumble_to.sql @@ -7,7 +7,7 @@ DROP TABLE IF EXISTS `.inner.wv`; CREATE TABLE dst(count UInt64) Engine=MergeTree ORDER BY tuple(); CREATE TABLE mt(a Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple(); -CREATE WINDOW VIEW wv TO dst AS SELECT count(a) AS count FROM mt GROUP BY TUMBLE(timestamp, INTERVAL '1' SECOND, 'US/Samoa') AS wid; +CREATE WINDOW VIEW wv TO dst AS SELECT count(a) AS count FROM mt GROUP BY tumble(timestamp, INTERVAL '1' SECOND, 'US/Samoa') AS wid; INSERT INTO mt VALUES (1, now('US/Samoa') + 1); SELECT sleep(3); diff --git a/tests/queries/0_stateless/01055_window_view_proc_hop_to.sql b/tests/queries/0_stateless/01055_window_view_proc_hop_to.sql index b75cc33e741..1da497092c5 100644 --- a/tests/queries/0_stateless/01055_window_view_proc_hop_to.sql +++ b/tests/queries/0_stateless/01055_window_view_proc_hop_to.sql @@ -7,7 +7,7 @@ DROP TABLE IF EXISTS `.inner.wv`; CREATE TABLE dst(count UInt64) Engine=MergeTree ORDER BY tuple(); CREATE TABLE mt(a Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple(); -CREATE WINDOW VIEW wv TO dst AS SELECT count(a) AS count FROM mt GROUP BY HOP(timestamp, INTERVAL '1' SECOND, INTERVAL '1' SECOND, 'US/Samoa') AS wid; +CREATE WINDOW VIEW wv TO dst AS SELECT count(a) AS count FROM mt GROUP BY hop(timestamp, INTERVAL '1' SECOND, INTERVAL '1' SECOND, 'US/Samoa') AS wid; INSERT INTO mt VALUES (1, now('US/Samoa') + 1); SELECT sleep(3); diff --git a/tests/queries/0_stateless/01056_window_view_proc_hop_watch.py b/tests/queries/0_stateless/01056_window_view_proc_hop_watch.py index df83615d507..02e97ee7a17 100755 --- a/tests/queries/0_stateless/01056_window_view_proc_hop_watch.py +++ b/tests/queries/0_stateless/01056_window_view_proc_hop_watch.py @@ -34,7 +34,7 @@ with client(name='client1>', log=log) as client1, client(name='client2>', log=lo client1.send('CREATE TABLE 01056_window_view_proc_hop_watch.mt(a Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple()') client1.expect(prompt) - client1.send("CREATE WINDOW VIEW 01056_window_view_proc_hop_watch.wv AS SELECT count(a) AS count FROM 01056_window_view_proc_hop_watch.mt GROUP BY HOP(timestamp, INTERVAL '1' SECOND, INTERVAL '1' SECOND, 'US/Samoa') AS wid;") + client1.send("CREATE WINDOW VIEW 01056_window_view_proc_hop_watch.wv AS SELECT count(a) AS count FROM 01056_window_view_proc_hop_watch.mt GROUP BY hop(timestamp, INTERVAL '1' SECOND, INTERVAL '1' SECOND, 'US/Samoa') AS wid;") client1.expect(prompt) client1.send('WATCH 01056_window_view_proc_hop_watch.wv') diff --git a/tests/queries/0_stateless/01057_window_view_event_tumble_to_strict_asc.sql b/tests/queries/0_stateless/01057_window_view_event_tumble_to_strict_asc.sql index 4883e006e85..de738662817 100644 --- a/tests/queries/0_stateless/01057_window_view_event_tumble_to_strict_asc.sql +++ b/tests/queries/0_stateless/01057_window_view_event_tumble_to_strict_asc.sql @@ -7,7 +7,7 @@ DROP TABLE IF EXISTS `.inner.wv`; CREATE TABLE dst(count UInt64, w_end DateTime) Engine=MergeTree ORDER BY tuple(); CREATE TABLE mt(a Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple(); -CREATE WINDOW VIEW wv TO dst WATERMARK=STRICTLY_ASCENDING AS SELECT count(a) AS count, TUMBLE_END(wid) as w_end FROM mt GROUP BY TUMBLE(timestamp, INTERVAL '5' SECOND, 'US/Samoa') AS wid; +CREATE WINDOW VIEW wv TO dst WATERMARK=STRICTLY_ASCENDING AS SELECT count(a) AS count, tumbleEnd(wid) as w_end FROM mt GROUP BY tumble(timestamp, INTERVAL '5' SECOND, 'US/Samoa') AS wid; INSERT INTO mt VALUES (1, '1990/01/01 12:00:00'); INSERT INTO mt VALUES (1, '1990/01/01 12:00:01'); diff --git a/tests/queries/0_stateless/01058_window_view_event_hop_to_strict_asc.sql b/tests/queries/0_stateless/01058_window_view_event_hop_to_strict_asc.sql index 944fd9939b4..c9846cbd7cd 100644 --- a/tests/queries/0_stateless/01058_window_view_event_hop_to_strict_asc.sql +++ b/tests/queries/0_stateless/01058_window_view_event_hop_to_strict_asc.sql @@ -7,7 +7,7 @@ DROP TABLE IF EXISTS `.inner.wv`; CREATE TABLE dst(count UInt64, w_end DateTime) Engine=MergeTree ORDER BY tuple(); CREATE TABLE mt(a Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple(); -CREATE WINDOW VIEW wv TO dst WATERMARK=STRICTLY_ASCENDING AS SELECT count(a) AS count, HOP_END(wid) as w_end FROM mt GROUP BY HOP(timestamp, INTERVAL '2' SECOND, INTERVAL '3' SECOND, 'US/Samoa') AS wid; +CREATE WINDOW VIEW wv TO dst WATERMARK=STRICTLY_ASCENDING AS SELECT count(a) AS count, hopEnd(wid) as w_end FROM mt GROUP BY hop(timestamp, INTERVAL '2' SECOND, INTERVAL '3' SECOND, 'US/Samoa') AS wid; INSERT INTO mt VALUES (1, '1990/01/01 12:00:00'); INSERT INTO mt VALUES (1, '1990/01/01 12:00:01'); diff --git a/tests/queries/0_stateless/01059_window_view_event_hop_watch_strict_asc.py b/tests/queries/0_stateless/01059_window_view_event_hop_watch_strict_asc.py index 1052b44965c..638182ac216 100755 --- a/tests/queries/0_stateless/01059_window_view_event_hop_watch_strict_asc.py +++ b/tests/queries/0_stateless/01059_window_view_event_hop_watch_strict_asc.py @@ -30,7 +30,7 @@ with client(name='client1>', log=log) as client1, client(name='client2>', log=lo client1.send("CREATE TABLE db_01059_event_hop_watch_strict_asc.mt(a Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple()") client1.expect(prompt) - client1.send("CREATE WINDOW VIEW db_01059_event_hop_watch_strict_asc.wv WATERMARK=STRICTLY_ASCENDING AS SELECT count(a) AS count, HOP_END(wid) as w_end FROM db_01059_event_hop_watch_strict_asc.mt GROUP BY HOP(timestamp, INTERVAL '2' SECOND, INTERVAL '3' SECOND, 'US/Samoa') AS wid;") + client1.send("CREATE WINDOW VIEW db_01059_event_hop_watch_strict_asc.wv WATERMARK=STRICTLY_ASCENDING AS SELECT count(a) AS count, hopEnd(wid) as w_end FROM db_01059_event_hop_watch_strict_asc.mt GROUP BY hop(timestamp, INTERVAL '2' SECOND, INTERVAL '3' SECOND, 'US/Samoa') AS wid;") client1.expect(prompt) client1.send('WATCH db_01059_event_hop_watch_strict_asc.wv') diff --git a/tests/queries/0_stateless/01060_window_view_event_tumble_to_asc.sql b/tests/queries/0_stateless/01060_window_view_event_tumble_to_asc.sql index 18b17fd3d2b..cb27e881870 100644 --- a/tests/queries/0_stateless/01060_window_view_event_tumble_to_asc.sql +++ b/tests/queries/0_stateless/01060_window_view_event_tumble_to_asc.sql @@ -7,7 +7,7 @@ DROP TABLE IF EXISTS `.inner.wv`; CREATE TABLE dst(count UInt64, w_end DateTime) Engine=MergeTree ORDER BY tuple(); CREATE TABLE mt(a Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple(); -CREATE WINDOW VIEW wv TO dst WATERMARK=ASCENDING AS SELECT count(a) AS count, TUMBLE_END(wid) AS w_end FROM mt GROUP BY TUMBLE(timestamp, INTERVAL '5' SECOND, 'US/Samoa') AS wid; +CREATE WINDOW VIEW wv TO dst WATERMARK=ASCENDING AS SELECT count(a) AS count, tumbleEnd(wid) AS w_end FROM mt GROUP BY tumble(timestamp, INTERVAL '5' SECOND, 'US/Samoa') AS wid; INSERT INTO mt VALUES (1, '1990/01/01 12:00:00'); INSERT INTO mt VALUES (1, '1990/01/01 12:00:01'); diff --git a/tests/queries/0_stateless/01061_window_view_event_hop_to_asc.sql b/tests/queries/0_stateless/01061_window_view_event_hop_to_asc.sql index 2cf98d6b08f..c021bd1d4a1 100644 --- a/tests/queries/0_stateless/01061_window_view_event_hop_to_asc.sql +++ b/tests/queries/0_stateless/01061_window_view_event_hop_to_asc.sql @@ -7,7 +7,7 @@ DROP TABLE IF EXISTS `.inner.wv`; CREATE TABLE dst(count UInt64, w_end DateTime) Engine=MergeTree ORDER BY tuple(); CREATE TABLE mt(a Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple(); -CREATE WINDOW VIEW wv TO dst WATERMARK=ASCENDING AS SELECT count(a) AS count, HOP_END(wid) AS w_end FROM mt GROUP BY HOP(timestamp, INTERVAL '2' SECOND, INTERVAL '3' SECOND, 'US/Samoa') AS wid; +CREATE WINDOW VIEW wv TO dst WATERMARK=ASCENDING AS SELECT count(a) AS count, hopEnd(wid) AS w_end FROM mt GROUP BY hop(timestamp, INTERVAL '2' SECOND, INTERVAL '3' SECOND, 'US/Samoa') AS wid; INSERT INTO mt VALUES (1, '1990/01/01 12:00:00'); INSERT INTO mt VALUES (1, '1990/01/01 12:00:01'); diff --git a/tests/queries/0_stateless/01062_window_view_event_hop_watch_asc.py b/tests/queries/0_stateless/01062_window_view_event_hop_watch_asc.py index 8541c7ee064..6be3e08665c 100755 --- a/tests/queries/0_stateless/01062_window_view_event_hop_watch_asc.py +++ b/tests/queries/0_stateless/01062_window_view_event_hop_watch_asc.py @@ -32,7 +32,7 @@ with client(name='client1>', log=log) as client1, client(name='client2>', log=lo client1.send('CREATE TABLE 01062_window_view_event_hop_watch_asc.mt(a Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple()') client1.expect(prompt) - client1.send("CREATE WINDOW VIEW 01062_window_view_event_hop_watch_asc.wv WATERMARK=ASCENDING AS SELECT count(a) AS count, HOP_END(wid) AS w_end FROM 01062_window_view_event_hop_watch_asc.mt GROUP BY HOP(timestamp, INTERVAL '2' SECOND, INTERVAL '3' SECOND, 'US/Samoa') AS wid") + client1.send("CREATE WINDOW VIEW 01062_window_view_event_hop_watch_asc.wv WATERMARK=ASCENDING AS SELECT count(a) AS count, hopEnd(wid) AS w_end FROM 01062_window_view_event_hop_watch_asc.mt GROUP BY hop(timestamp, INTERVAL '2' SECOND, INTERVAL '3' SECOND, 'US/Samoa') AS wid") client1.expect(prompt) client1.send('WATCH 01062_window_view_event_hop_watch_asc.wv') diff --git a/tests/queries/0_stateless/01063_window_view_event_tumble_to_bounded.sql b/tests/queries/0_stateless/01063_window_view_event_tumble_to_bounded.sql index 37757fd77b3..6b17d04517a 100644 --- a/tests/queries/0_stateless/01063_window_view_event_tumble_to_bounded.sql +++ b/tests/queries/0_stateless/01063_window_view_event_tumble_to_bounded.sql @@ -7,7 +7,7 @@ DROP TABLE IF EXISTS `.inner.wv`; CREATE TABLE dst(count UInt64, w_end DateTime) Engine=MergeTree ORDER BY tuple(); CREATE TABLE mt(a Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple(); -CREATE WINDOW VIEW wv TO dst WATERMARK = INTERVAL '2' SECOND AS SELECT count(a) AS count, TUMBLE_END(wid) AS w_end FROM mt GROUP BY TUMBLE(timestamp, INTERVAL '5' SECOND, 'US/Samoa') AS wid; +CREATE WINDOW VIEW wv TO dst WATERMARK = INTERVAL '2' SECOND AS SELECT count(a) AS count, tumbleEnd(wid) AS w_end FROM mt GROUP BY tumble(timestamp, INTERVAL '5' SECOND, 'US/Samoa') AS wid; INSERT INTO mt VALUES (1, '1990/01/01 12:00:00'); INSERT INTO mt VALUES (1, '1990/01/01 12:00:01'); diff --git a/tests/queries/0_stateless/01064_window_view_event_hop_to_bounded.sql b/tests/queries/0_stateless/01064_window_view_event_hop_to_bounded.sql index 5f148900905..2f4b1c13d47 100644 --- a/tests/queries/0_stateless/01064_window_view_event_hop_to_bounded.sql +++ b/tests/queries/0_stateless/01064_window_view_event_hop_to_bounded.sql @@ -6,7 +6,7 @@ DROP TABLE IF EXISTS wv NO DELAY; CREATE TABLE dst(count UInt64, w_end DateTime) Engine=MergeTree ORDER BY tuple(); CREATE TABLE mt(a Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple(); -CREATE WINDOW VIEW wv TO dst WATERMARK=INTERVAL '2' SECOND AS SELECT count(a) AS count, HOP_END(wid) AS w_end FROM mt GROUP BY HOP(timestamp, INTERVAL '2' SECOND, INTERVAL '3' SECOND, 'US/Samoa') AS wid; +CREATE WINDOW VIEW wv TO dst WATERMARK=INTERVAL '2' SECOND AS SELECT count(a) AS count, hopEnd(wid) AS w_end FROM mt GROUP BY hop(timestamp, INTERVAL '2' SECOND, INTERVAL '3' SECOND, 'US/Samoa') AS wid; INSERT INTO mt VALUES (1, '1990/01/01 12:00:00'); INSERT INTO mt VALUES (1, '1990/01/01 12:00:01'); diff --git a/tests/queries/0_stateless/01065_window_view_event_hop_watch_bounded.py b/tests/queries/0_stateless/01065_window_view_event_hop_watch_bounded.py index adae1073c80..b828c5116da 100755 --- a/tests/queries/0_stateless/01065_window_view_event_hop_watch_bounded.py +++ b/tests/queries/0_stateless/01065_window_view_event_hop_watch_bounded.py @@ -32,7 +32,7 @@ with client(name='client1>', log=log) as client1, client(name='client2>', log=lo client1.send('CREATE TABLE test.mt(a Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple()') client1.expect(prompt) - client1.send("CREATE WINDOW VIEW test.wv WATERMARK=INTERVAL '2' SECOND AS SELECT count(a) AS count, HOP_END(wid) AS w_end FROM test.mt GROUP BY HOP(timestamp, INTERVAL '2' SECOND, INTERVAL '3' SECOND, 'US/Samoa') AS wid") + client1.send("CREATE WINDOW VIEW test.wv WATERMARK=INTERVAL '2' SECOND AS SELECT count(a) AS count, hopEnd(wid) AS w_end FROM test.mt GROUP BY hop(timestamp, INTERVAL '2' SECOND, INTERVAL '3' SECOND, 'US/Samoa') AS wid") client1.expect(prompt) client1.send('WATCH test.wv') diff --git a/tests/queries/0_stateless/01066_window_view_event_tumble_to_strict_asc_lateness.sql b/tests/queries/0_stateless/01066_window_view_event_tumble_to_strict_asc_lateness.sql index 1c55b70f3aa..37830d506d6 100644 --- a/tests/queries/0_stateless/01066_window_view_event_tumble_to_strict_asc_lateness.sql +++ b/tests/queries/0_stateless/01066_window_view_event_tumble_to_strict_asc_lateness.sql @@ -8,7 +8,7 @@ DROP TABLE IF EXISTS `.inner.wv`; CREATE TABLE dst(count UInt64, w_end DateTime) Engine=MergeTree ORDER BY tuple(); CREATE TABLE mt(a Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple(); -CREATE WINDOW VIEW wv TO dst WATERMARK=STRICTLY_ASCENDING ALLOWED_LATENESS=INTERVAL '2' SECOND AS SELECT count(a) AS count, TUMBLE_END(wid) AS w_end FROM mt GROUP BY TUMBLE(timestamp, INTERVAL '5' SECOND, 'US/Samoa') AS wid; +CREATE WINDOW VIEW wv TO dst WATERMARK=STRICTLY_ASCENDING ALLOWED_LATENESS=INTERVAL '2' SECOND AS SELECT count(a) AS count, tumbleEnd(wid) AS w_end FROM mt GROUP BY tumble(timestamp, INTERVAL '5' SECOND, 'US/Samoa') AS wid; INSERT INTO mt VALUES (1, '1990/01/01 12:00:00'); INSERT INTO mt VALUES (1, '1990/01/01 12:00:02'); diff --git a/tests/queries/0_stateless/01067_window_view_event_tumble_to_asc_lateness.sql b/tests/queries/0_stateless/01067_window_view_event_tumble_to_asc_lateness.sql index 11409203d4c..eb57d9b6b15 100644 --- a/tests/queries/0_stateless/01067_window_view_event_tumble_to_asc_lateness.sql +++ b/tests/queries/0_stateless/01067_window_view_event_tumble_to_asc_lateness.sql @@ -8,7 +8,7 @@ DROP TABLE IF EXISTS `.inner.wv`; CREATE TABLE dst(count UInt64, w_end DateTime) Engine=MergeTree ORDER BY tuple(); CREATE TABLE mt(a Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple(); -CREATE WINDOW VIEW wv TO dst WATERMARK=ASCENDING ALLOWED_LATENESS=INTERVAL '2' SECOND AS SELECT count(a) AS count, TUMBLE_END(wid) AS w_end FROM mt GROUP BY TUMBLE(timestamp, INTERVAL '5' SECOND, 'US/Samoa') AS wid; +CREATE WINDOW VIEW wv TO dst WATERMARK=ASCENDING ALLOWED_LATENESS=INTERVAL '2' SECOND AS SELECT count(a) AS count, tumbleEnd(wid) AS w_end FROM mt GROUP BY tumble(timestamp, INTERVAL '5' SECOND, 'US/Samoa') AS wid; INSERT INTO mt VALUES (1, '1990/01/01 12:00:00'); INSERT INTO mt VALUES (1, '1990/01/01 12:00:02'); diff --git a/tests/queries/0_stateless/01068_window_view_event_tumble_to_bounded_lateness.sql b/tests/queries/0_stateless/01068_window_view_event_tumble_to_bounded_lateness.sql index 74a095c632f..bc6d3a30947 100644 --- a/tests/queries/0_stateless/01068_window_view_event_tumble_to_bounded_lateness.sql +++ b/tests/queries/0_stateless/01068_window_view_event_tumble_to_bounded_lateness.sql @@ -8,7 +8,7 @@ DROP TABLE IF EXISTS `.inner.wv`; CREATE TABLE dst(count UInt64, w_end DateTime) Engine=MergeTree ORDER BY tuple(); CREATE TABLE mt(a Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple(); -CREATE WINDOW VIEW wv TO dst WATERMARK=INTERVAL '2' SECOND ALLOWED_LATENESS=INTERVAL '2' SECOND AS SELECT count(a) AS count, TUMBLE_END(wid) AS w_end FROM mt GROUP BY TUMBLE(timestamp, INTERVAL '5' SECOND, 'US/Samoa') AS wid; +CREATE WINDOW VIEW wv TO dst WATERMARK=INTERVAL '2' SECOND ALLOWED_LATENESS=INTERVAL '2' SECOND AS SELECT count(a) AS count, tumbleEnd(wid) AS w_end FROM mt GROUP BY tumble(timestamp, INTERVAL '5' SECOND, 'US/Samoa') AS wid; INSERT INTO mt VALUES (1, '1990/01/01 12:00:00'); INSERT INTO mt VALUES (1, '1990/01/01 12:00:02'); diff --git a/tests/queries/0_stateless/01069_window_view_proc_tumble_watch.py b/tests/queries/0_stateless/01069_window_view_proc_tumble_watch.py index 6f4f1795857..adab2988e39 100755 --- a/tests/queries/0_stateless/01069_window_view_proc_tumble_watch.py +++ b/tests/queries/0_stateless/01069_window_view_proc_tumble_watch.py @@ -32,7 +32,7 @@ with client(name='client1>', log=log) as client1, client(name='client2>', log=lo client1.send('CREATE TABLE 01069_window_view_proc_tumble_watch.mt(a Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple()') client1.expect(prompt) - client1.send("CREATE WINDOW VIEW 01069_window_view_proc_tumble_watch.wv AS SELECT count(a) AS count FROM 01069_window_view_proc_tumble_watch.mt GROUP BY TUMBLE(timestamp, INTERVAL '1' SECOND, 'US/Samoa') AS wid;") + client1.send("CREATE WINDOW VIEW 01069_window_view_proc_tumble_watch.wv AS SELECT count(a) AS count FROM 01069_window_view_proc_tumble_watch.mt GROUP BY tumble(timestamp, INTERVAL '1' SECOND, 'US/Samoa') AS wid;") client1.expect(prompt) client1.send('WATCH 01069_window_view_proc_tumble_watch.wv') From 9b9d5243596eb4e9a361da1a7b6a221eb42b3b9b Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 7 Dec 2021 12:12:20 +0300 Subject: [PATCH 150/262] Revert "Add a test with 20000 mutations in one query" --- .../02125_many_mutations.reference | 6 --- .../0_stateless/02125_many_mutations.sh | 49 ------------------- 2 files changed, 55 deletions(-) delete mode 100644 tests/queries/0_stateless/02125_many_mutations.reference delete mode 100755 tests/queries/0_stateless/02125_many_mutations.sh diff --git a/tests/queries/0_stateless/02125_many_mutations.reference b/tests/queries/0_stateless/02125_many_mutations.reference deleted file mode 100644 index c98d8221c7f..00000000000 --- a/tests/queries/0_stateless/02125_many_mutations.reference +++ /dev/null @@ -1,6 +0,0 @@ -0 0 -1 1 -20000 -0 -0 20000 -1 20001 diff --git a/tests/queries/0_stateless/02125_many_mutations.sh b/tests/queries/0_stateless/02125_many_mutations.sh deleted file mode 100755 index 727cc9d6213..00000000000 --- a/tests/queries/0_stateless/02125_many_mutations.sh +++ /dev/null @@ -1,49 +0,0 @@ -#!/usr/bin/env bash -# Tags: long - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -$CLICKHOUSE_CLIENT -q "create table many_mutations (x UInt32, y UInt32) engine = MergeTree order by x" -$CLICKHOUSE_CLIENT -q "insert into many_mutations values (0, 0), (1, 1)" -$CLICKHOUSE_CLIENT -q "system stop merges many_mutations" - -$CLICKHOUSE_CLIENT -q "select x, y from many_mutations order by x" - -job() -{ - for i in {1..1000} - do - $CLICKHOUSE_CLIENT -q "alter table many_mutations update y = y + 1 where 1" - done -} - -job & -job & -job & -job & -job & -job & -job & -job & -job & -job & -job & -job & -job & -job & -job & -job & -job & -job & -job & -job & - -wait - -$CLICKHOUSE_CLIENT -q "select count() from system.mutations where database = currentDatabase() and table = 'many_mutations' and not is_done" -$CLICKHOUSE_CLIENT -q "system start merges many_mutations" -$CLICKHOUSE_CLIENT -q "optimize table many_mutations final" -$CLICKHOUSE_CLIENT -q "select count() from system.mutations where database = currentDatabase() and table = 'many_mutations' and not is_done" -$CLICKHOUSE_CLIENT -q "select x, y from many_mutations order by x" From 5717458f0c9d0df237b7aca2e826e509c4a18820 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 7 Dec 2021 12:12:41 +0300 Subject: [PATCH 151/262] Revert "Revert "Add a test with 20000 mutations in one query"" --- .../02125_many_mutations.reference | 6 +++ .../0_stateless/02125_many_mutations.sh | 49 +++++++++++++++++++ 2 files changed, 55 insertions(+) create mode 100644 tests/queries/0_stateless/02125_many_mutations.reference create mode 100755 tests/queries/0_stateless/02125_many_mutations.sh diff --git a/tests/queries/0_stateless/02125_many_mutations.reference b/tests/queries/0_stateless/02125_many_mutations.reference new file mode 100644 index 00000000000..c98d8221c7f --- /dev/null +++ b/tests/queries/0_stateless/02125_many_mutations.reference @@ -0,0 +1,6 @@ +0 0 +1 1 +20000 +0 +0 20000 +1 20001 diff --git a/tests/queries/0_stateless/02125_many_mutations.sh b/tests/queries/0_stateless/02125_many_mutations.sh new file mode 100755 index 00000000000..727cc9d6213 --- /dev/null +++ b/tests/queries/0_stateless/02125_many_mutations.sh @@ -0,0 +1,49 @@ +#!/usr/bin/env bash +# Tags: long + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "create table many_mutations (x UInt32, y UInt32) engine = MergeTree order by x" +$CLICKHOUSE_CLIENT -q "insert into many_mutations values (0, 0), (1, 1)" +$CLICKHOUSE_CLIENT -q "system stop merges many_mutations" + +$CLICKHOUSE_CLIENT -q "select x, y from many_mutations order by x" + +job() +{ + for i in {1..1000} + do + $CLICKHOUSE_CLIENT -q "alter table many_mutations update y = y + 1 where 1" + done +} + +job & +job & +job & +job & +job & +job & +job & +job & +job & +job & +job & +job & +job & +job & +job & +job & +job & +job & +job & +job & + +wait + +$CLICKHOUSE_CLIENT -q "select count() from system.mutations where database = currentDatabase() and table = 'many_mutations' and not is_done" +$CLICKHOUSE_CLIENT -q "system start merges many_mutations" +$CLICKHOUSE_CLIENT -q "optimize table many_mutations final" +$CLICKHOUSE_CLIENT -q "select count() from system.mutations where database = currentDatabase() and table = 'many_mutations' and not is_done" +$CLICKHOUSE_CLIENT -q "select x, y from many_mutations order by x" From deb6a06ce40b4a0f0efe60c1ca146971b70218b3 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 7 Dec 2021 12:18:26 +0300 Subject: [PATCH 152/262] Update 02125_many_mutations.sh --- tests/queries/0_stateless/02125_many_mutations.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02125_many_mutations.sh b/tests/queries/0_stateless/02125_many_mutations.sh index 727cc9d6213..603713e5e9f 100755 --- a/tests/queries/0_stateless/02125_many_mutations.sh +++ b/tests/queries/0_stateless/02125_many_mutations.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long +# Tags: long, no-tsan, no-debug, no-asan, no-msan, no-ubsan CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From 03dc76da153f6bc4169acec7961b3a4355777b1f Mon Sep 17 00:00:00 2001 From: vxider Date: Tue, 7 Dec 2021 10:05:19 +0000 Subject: [PATCH 153/262] add window view doc(en) --- .../functions/window-functions.md | 114 +++++++++++++++++ .../sql-reference/statements/create/view.md | 118 +++++++++++++++++- 2 files changed, 231 insertions(+), 1 deletion(-) create mode 100644 docs/en/sql-reference/functions/window-functions.md diff --git a/docs/en/sql-reference/functions/window-functions.md b/docs/en/sql-reference/functions/window-functions.md new file mode 100644 index 00000000000..66f33f512a7 --- /dev/null +++ b/docs/en/sql-reference/functions/window-functions.md @@ -0,0 +1,114 @@ +--- +toc_priority: 68 +toc_title: Window +--- + +# Window Functions {#window-functions} + +Window functions indicate the lower and upper window bound of records in WindowView. The functions for working with WindowView are listed below. + +## tumble {#window-functions-tumble} + +A tumbling time window assigns records to non-overlapping, continuous windows with a fixed duration (interval). + +``` sql +tumble(time_attr, interval [, timezone]) +``` + +**Arguments** +- `time_attr` - Date and time. [DateTime](../../sql-reference/data-types/datetime.md) data type. +- `interval` - Window interval in [Interval](../../sql-reference/data-types/special-data-types/interval.md) data type. +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). + +**Returned values** + +- The lower and upper bound of the tumble window. + +Type: `Tuple(DateTime, DateTime)` + +**Example** + +Query: + +``` sql +SELECT tumble(now(), toIntervalDay('1')) +``` + +Result: + +``` text +┌─tumble(now(), toIntervalDay('1'))─────────────┐ +│ ['2020-01-01 00:00:00','2020-01-02 00:00:00'] │ +└───────────────────────────────────────────────┘ +``` + +## hop {#window-functions-hop} + +A hopping time window has a fixed duration (`window_interval`) and hops by a specified hop interval (`hop_interval`). If the `hop_interval` is smaller than the `window_interval`, hopping windows are overlapping. Thus, records can be assigned to multiple windows. + +``` sql +hop(time_attr, hop_interval, window_interval [, timezone]) +``` + +**Arguments** + +- `time_attr` - Date and time. [DateTime](../../sql-reference/data-types/datetime.md) data type. +- `hop_interval` - Hop interval in [Interval](../../sql-reference/data-types/special-data-types/interval.md) data type. Should be a positive number. +- `window_interval` - Window interval in [Interval](../../sql-reference/data-types/special-data-types/interval.md) data type. Should be a positive number. +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). + +**Returned values** + +- The lower and upper bound of the hop window. Since hop windows are + overlapped, the function only returns the bound of the **first** window when + hop function is used **without** `WINDOW VIEW`. + +Type: `Tuple(DateTime, DateTime)` + +**Example** + +Query: + +``` sql +SELECT hop(now(), INTERVAL '1' SECOND, INTERVAL '2' SECOND) +``` + +Result: + +``` text +┌─hop(now(), toIntervalSecond('1'), toIntervalSecond('2'))──┐ +│ ('2020-01-14 16:58:22','2020-01-14 16:58:24') │ +└───────────────────────────────────────────────────────────┘ +``` + +## tumbleStart {#window-functions-tumblestart} + +Indicate the lower bound of a tumble function. + +``` sql +tumbleStart(time_attr, interval [, timezone]); +``` + +## tumbleEnd {#window-functions-tumbleend} + +Indicate the upper bound of a tumble function. + +``` sql +tumbleEnd(time_attr, interval [, timezone]); +``` + +## hopStart {#window-functions-hopstart} + +Indicate the lower bound of a hop function. + +``` sql +hopStart(time_attr, hop_interval, window_interval [, timezone]); +``` + +## hopEnd {#window-functions-hopend} + +Indicate the upper bound of a hop function. + +``` sql +hopEnd(time_attr, hop_interval, window_interval [, timezone]); +``` \ No newline at end of file diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md index ec34c57a4cd..f67cc62f7db 100644 --- a/docs/en/sql-reference/statements/create/view.md +++ b/docs/en/sql-reference/statements/create/view.md @@ -5,7 +5,7 @@ toc_title: VIEW # CREATE VIEW {#create-view} -Creates a new view. Views can be [normal](#normal), [materialized](#materialized) and [live](#live-view) (the latter is an experimental feature). +Creates a new view. Views can be [normal](#normal), [materialized](#materialized), [live](#live-view), and [window](#window-view) (live view and window view are experimental features). ## Normal View {#normal} @@ -243,3 +243,119 @@ Most common uses of live view tables include: **See Also** - [ALTER LIVE VIEW](../alter/view.md#alter-live-view) + +## Window View [Experimental] {#window-view} + +!!! important "Important" + This is an experimental feature that may change in backwards-incompatible ways in the future releases. + Enable usage of window views and `WATCH` query using [allow_experimental_window_view](../../../operations/settings/settings.md#allow-experimental-window-view) setting. Input the command `set allow_experimental_window_view = 1`. + +``` sql +CREATE WINDOW VIEW [IF NOT EXISTS] [db.]table_name [TO [db.]table_name] [ENGINE = engine] [WATERMARK = strategy] [ALLOWED_LATENESS = interval_function] AS SELECT ... GROUP BY window_function +``` + +Window view can aggregate data by time window and output the results when the window is ready to fire. It stores the partial aggregation results in an inner(or specified) table and can push the processing result to a specified table or push notifications using the WATCH query. + +Creating a window view is similar to creating `MATERIALIZED VIEW`. Window view needs an inner storage engine to store intermediate data. The inner storage will use `AggregatingMergeTree` as the default engine. + +### Window Functions {#window-view-windowfunctions} + +[WindowFunctions](../../functions/window-functions.md) are used to indicate the lower and upper window bound of records. The window view needs to be used with a window function. + +### TIME ATTRIBUTES {#window-view-timeattributes} + +Window view supports **processing time** and **event time** process. + +**Processing time** allows window view to produce results based on the local machine's time and is used by default. It is the most straightforward notion of time but does not provide determinism. The processing time attribute can be defined by setting the `time_attr` of the window function to a table column or using the function `now()`. The following query creates a window view with processing time. + +``` sql +CREATE WINDOW VIEW wv AS SELECT count(number), tumbleStart(w_id) as w_start from date GROUP BY tumble(now(), INTERVAL '5' SECOND) as w_id +``` + +**Event time** is the time that each individual event occurred on its producing device. This time is typically embedded within the records when it is generated. Event time processing allows for consistent results even in case of out-of-order events or late events. Window view supports event time processing by using `WATERMARK` syntax. + +Window view provides three watermark strategies. + +* `STRICTLY_ASCENDING`: Emits a watermark of the maximum observed timestamp so far. Rows that have a timestamp smaller to the max timestamp are not late. +* `ASCENDING`: Emits a watermark of the maximum observed timestamp so far minus 1. Rows that have a timestamp equal and smaller to the max timestamp are not late. +* `BOUNDED`: WATERMARK=INTERVAL. Emits watermarks, which are the maximum observed timestamp minus the specified delay. + +The following queries are examples of creating a window view with `WATERMARK`. + +``` sql +CREATE WINDOW VIEW wv WATERMARK=STRICTLY_ASCENDING AS SELECT count(number) FROM date GROUP BY tumble(timestamp, INTERVAL '5' SECOND); +CREATE WINDOW VIEW wv WATERMARK=ASCENDING AS SELECT count(number) FROM date GROUP BY tumble(timestamp, INTERVAL '5' SECOND); +CREATE WINDOW VIEW wv WATERMARK=INTERVAL '3' SECOND AS SELECT count(number) FROM date GROUP BY tumble(timestamp, INTERVAL '5' SECOND); +``` + +By default, the window will be fired when the watermark comes, and elements that arrived behind the watermark will be dropped. Window view supports late event processing by setting `ALLOWED_LATENESS=INTERVAL`. An example of lateness handling is: + +``` sql +CREATE WINDOW VIEW test.wv TO test.dst WATERMARK=ASCENDING ALLOWED_LATENESS=INTERVAL '2' SECOND AS SELECT count(a) AS count, tumbleEnd(wid) AS w_end FROM test.mt GROUP BY tumble(timestamp, INTERVAL '5' SECOND) AS wid; +``` + +Note that elements emitted by a late firing should be treated as updated results of a previous computation. Instead of firing at the end of windows, the window view will fire immediately when the late event arrives. Thus, it will result in multiple outputs for the same window. Users need to take these duplicated results into account or deduplicate them. + +### Monitoring New Windows{#window-view-monitoring} + +Window view supports the `WATCH` query to constantly append the processing results to the console or use `TO` syntax to output the results to a table. + +``` sql +WATCH [db.]name [LIMIT n] +``` + +`WATCH` query acts similar as in `LIVE VIEW`. A `LIMIT` can be specified to set the number of updates to receive before terminating the query. + +### Settings {#window-view-settings} + +- `window_view_clean_interval`: The clean interval of window view in seconds to free outdated data. The system will retain the windows that have not been fully triggered according to the system time or `WATERMARK` configuration, and the other data will be deleted. +- `window_view_heartbeat_interval`: The heartbeat interval in seconds to indicate the watch query is alive. + +### Example {#window-view-example} + +Suppose we need to count the number of click logs per 10 seconds in a log table called `data`, and its table structure is: + +``` sql +CREATE TABLE data ( `id` UInt64, `timestamp` DateTime) ENGINE = Memory; +``` + +First, we create a window view with tumble window of 10 seconds interval: + +``` sql +CREATE WINDOW VIEW wv as select count(id), tumbleStart(w_id) as window_start from data group by tumble(timestamp, INTERVAL '10' SECOND) as w_id +``` + +Then, we use the `WATCH` query to get the results. + +``` sql +WATCH wv +``` + +When logs are inserted into table `data`, + +``` sql +INSERT INTO data VALUES(1,now()) +``` + +The `WATCH` query should print the results as follows: + +``` text +┌─count(id)─┬────────window_start─┐ +│ 1 │ 2020-01-14 16:56:40 │ +└───────────┴─────────────────────┘ +``` + +Alternatively, we can attach the output to another table using `TO` syntax. + +``` sql +CREATE WINDOW VIEW wv TO dst AS SELECT count(id), tumbleStart(w_id) as window_start FROM data GROUP BY tumble(timestamp, INTERVAL '10' SECOND) as w_id +``` + +Additional examples can be found among stateful tests of ClickHouse (they are named `*window_view*` there). + +### Window View Usage {#window-view-usage} + +The window view is useful in the following scenarios: + +* **Monitoring**: Aggregate and calculate the metrics logs by time, and output the results to a target table. The dashboard can use the target table as a source table. +* **Analyzing**: Automatically aggregate and preprocess data in the time window. This can be useful when analyzing a large number of logs. The preprocessing eliminates repeated calculations in multiple queries and reduces query latency. From c7f0a400d812e4a97f3f3d225e28a2db9fcef7aa Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 7 Dec 2021 10:31:48 +0000 Subject: [PATCH 154/262] Typo --- programs/local/LocalServer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 561b0588787..1f27072f142 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -830,7 +830,7 @@ extern "C" int LLVMFuzzerInitialize(int * pargc, char *** pargv) char ** argv = *pargv; /// As a user you can add flags to clickhouse binary in fuzzing mode as follows - /// clickhouse -- + /// clickhouse -- /// Calculate the position of delimiter "--" that separates arguments /// of clickhouse-local and libfuzzer From ed17afbdb312b65d24ab383ca5cc59f841792825 Mon Sep 17 00:00:00 2001 From: vxider Date: Tue, 7 Dec 2021 10:31:49 +0000 Subject: [PATCH 155/262] rename window function to window view function --- .../functions/window-functions.md | 114 ------------------ .../sql-reference/statements/create/view.md | 8 +- 2 files changed, 4 insertions(+), 118 deletions(-) delete mode 100644 docs/en/sql-reference/functions/window-functions.md diff --git a/docs/en/sql-reference/functions/window-functions.md b/docs/en/sql-reference/functions/window-functions.md deleted file mode 100644 index 66f33f512a7..00000000000 --- a/docs/en/sql-reference/functions/window-functions.md +++ /dev/null @@ -1,114 +0,0 @@ ---- -toc_priority: 68 -toc_title: Window ---- - -# Window Functions {#window-functions} - -Window functions indicate the lower and upper window bound of records in WindowView. The functions for working with WindowView are listed below. - -## tumble {#window-functions-tumble} - -A tumbling time window assigns records to non-overlapping, continuous windows with a fixed duration (interval). - -``` sql -tumble(time_attr, interval [, timezone]) -``` - -**Arguments** -- `time_attr` - Date and time. [DateTime](../../sql-reference/data-types/datetime.md) data type. -- `interval` - Window interval in [Interval](../../sql-reference/data-types/special-data-types/interval.md) data type. -- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). - -**Returned values** - -- The lower and upper bound of the tumble window. - -Type: `Tuple(DateTime, DateTime)` - -**Example** - -Query: - -``` sql -SELECT tumble(now(), toIntervalDay('1')) -``` - -Result: - -``` text -┌─tumble(now(), toIntervalDay('1'))─────────────┐ -│ ['2020-01-01 00:00:00','2020-01-02 00:00:00'] │ -└───────────────────────────────────────────────┘ -``` - -## hop {#window-functions-hop} - -A hopping time window has a fixed duration (`window_interval`) and hops by a specified hop interval (`hop_interval`). If the `hop_interval` is smaller than the `window_interval`, hopping windows are overlapping. Thus, records can be assigned to multiple windows. - -``` sql -hop(time_attr, hop_interval, window_interval [, timezone]) -``` - -**Arguments** - -- `time_attr` - Date and time. [DateTime](../../sql-reference/data-types/datetime.md) data type. -- `hop_interval` - Hop interval in [Interval](../../sql-reference/data-types/special-data-types/interval.md) data type. Should be a positive number. -- `window_interval` - Window interval in [Interval](../../sql-reference/data-types/special-data-types/interval.md) data type. Should be a positive number. -- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). - -**Returned values** - -- The lower and upper bound of the hop window. Since hop windows are - overlapped, the function only returns the bound of the **first** window when - hop function is used **without** `WINDOW VIEW`. - -Type: `Tuple(DateTime, DateTime)` - -**Example** - -Query: - -``` sql -SELECT hop(now(), INTERVAL '1' SECOND, INTERVAL '2' SECOND) -``` - -Result: - -``` text -┌─hop(now(), toIntervalSecond('1'), toIntervalSecond('2'))──┐ -│ ('2020-01-14 16:58:22','2020-01-14 16:58:24') │ -└───────────────────────────────────────────────────────────┘ -``` - -## tumbleStart {#window-functions-tumblestart} - -Indicate the lower bound of a tumble function. - -``` sql -tumbleStart(time_attr, interval [, timezone]); -``` - -## tumbleEnd {#window-functions-tumbleend} - -Indicate the upper bound of a tumble function. - -``` sql -tumbleEnd(time_attr, interval [, timezone]); -``` - -## hopStart {#window-functions-hopstart} - -Indicate the lower bound of a hop function. - -``` sql -hopStart(time_attr, hop_interval, window_interval [, timezone]); -``` - -## hopEnd {#window-functions-hopend} - -Indicate the upper bound of a hop function. - -``` sql -hopEnd(time_attr, hop_interval, window_interval [, timezone]); -``` \ No newline at end of file diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md index f67cc62f7db..aa6b82360e0 100644 --- a/docs/en/sql-reference/statements/create/view.md +++ b/docs/en/sql-reference/statements/create/view.md @@ -251,22 +251,22 @@ Most common uses of live view tables include: Enable usage of window views and `WATCH` query using [allow_experimental_window_view](../../../operations/settings/settings.md#allow-experimental-window-view) setting. Input the command `set allow_experimental_window_view = 1`. ``` sql -CREATE WINDOW VIEW [IF NOT EXISTS] [db.]table_name [TO [db.]table_name] [ENGINE = engine] [WATERMARK = strategy] [ALLOWED_LATENESS = interval_function] AS SELECT ... GROUP BY window_function +CREATE WINDOW VIEW [IF NOT EXISTS] [db.]table_name [TO [db.]table_name] [ENGINE = engine] [WATERMARK = strategy] [ALLOWED_LATENESS = interval_function] AS SELECT ... GROUP BY window_view_function ``` Window view can aggregate data by time window and output the results when the window is ready to fire. It stores the partial aggregation results in an inner(or specified) table and can push the processing result to a specified table or push notifications using the WATCH query. Creating a window view is similar to creating `MATERIALIZED VIEW`. Window view needs an inner storage engine to store intermediate data. The inner storage will use `AggregatingMergeTree` as the default engine. -### Window Functions {#window-view-windowfunctions} +### Window View Functions {#window-view-windowviewfunctions} -[WindowFunctions](../../functions/window-functions.md) are used to indicate the lower and upper window bound of records. The window view needs to be used with a window function. +[Window view functions](../../functions/window-view-functions.md) are used to indicate the lower and upper window bound of records. The window view needs to be used with a window view function. ### TIME ATTRIBUTES {#window-view-timeattributes} Window view supports **processing time** and **event time** process. -**Processing time** allows window view to produce results based on the local machine's time and is used by default. It is the most straightforward notion of time but does not provide determinism. The processing time attribute can be defined by setting the `time_attr` of the window function to a table column or using the function `now()`. The following query creates a window view with processing time. +**Processing time** allows window view to produce results based on the local machine's time and is used by default. It is the most straightforward notion of time but does not provide determinism. The processing time attribute can be defined by setting the `time_attr` of the window view function to a table column or using the function `now()`. The following query creates a window view with processing time. ``` sql CREATE WINDOW VIEW wv AS SELECT count(number), tumbleStart(w_id) as w_start from date GROUP BY tumble(now(), INTERVAL '5' SECOND) as w_id From 6c16348faa59805ebf44b4bdd92675eee5a2ad17 Mon Sep 17 00:00:00 2001 From: tavplubix Date: Tue, 7 Dec 2021 13:32:26 +0300 Subject: [PATCH 156/262] Fix division by zero in avgWeighted with Decimal argument (#32303) * fix division by zero * Update src/AggregateFunctions/AggregateFunctionAvg.h Co-authored-by: Dmitry Novik * remove trash * Update AggregateFunctionAvg.h Co-authored-by: Dmitry Novik --- src/AggregateFunctions/AggregateFunctionAvg.h | 35 +++++-------------- .../AggregateFunctionAvgWeighted.cpp | 6 ++-- .../01668_avg_weighted_ubsan.reference | 13 +++++++ .../0_stateless/01668_avg_weighted_ubsan.sql | 4 +++ 4 files changed, 28 insertions(+), 30 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionAvg.h b/src/AggregateFunctions/AggregateFunctionAvg.h index e2a9220f113..8ca0ae1dac2 100644 --- a/src/AggregateFunctions/AggregateFunctionAvg.h +++ b/src/AggregateFunctions/AggregateFunctionAvg.h @@ -20,6 +20,7 @@ namespace DB { + struct Settings; template constexpr bool DecimalOrExtendedInt = @@ -42,39 +43,19 @@ struct AvgFraction /// Invoked only is either Numerator or Denominator are Decimal. Float64 NO_SANITIZE_UNDEFINED divideIfAnyDecimal(UInt32 num_scale, UInt32 denom_scale [[maybe_unused]]) const { - if constexpr (is_decimal && is_decimal) - { - // According to the docs, num(S1) / denom(S2) would have scale S1 - - if constexpr (std::is_same_v && std::is_same_v) - ///Special case as Decimal256 / Decimal128 = compile error (as Decimal128 is not parametrized by a wide - ///int), but an __int128 instead - return DecimalUtils::convertTo( - numerator / (denominator.template convertTo()), num_scale); - else - return DecimalUtils::convertTo(numerator / denominator, num_scale); - } - - /// Numerator is always casted to Float64 to divide correctly if the denominator is not Float64. - Float64 num_converted; - + Float64 numerator_float; if constexpr (is_decimal) - num_converted = DecimalUtils::convertTo(numerator, num_scale); + numerator_float = DecimalUtils::convertTo(numerator, num_scale); else - num_converted = static_cast(numerator); /// all other types, including extended integral. - - std::conditional_t, - Float64, Denominator> denom_converted; + numerator_float = numerator; + Float64 denominator_float; if constexpr (is_decimal) - denom_converted = DecimalUtils::convertTo(denominator, denom_scale); - else if constexpr (DecimalOrExtendedInt) - /// no way to divide Float64 and extended integral type without an explicit cast. - denom_converted = static_cast(denominator); + denominator_float = DecimalUtils::convertTo(denominator, denom_scale); else - denom_converted = denominator; /// can divide on float, no cast required. + denominator_float = denominator; - return num_converted / denom_converted; + return numerator_float / denominator_float; } Float64 NO_SANITIZE_UNDEFINED divide() const diff --git a/src/AggregateFunctions/AggregateFunctionAvgWeighted.cpp b/src/AggregateFunctions/AggregateFunctionAvgWeighted.cpp index b7fdb3460e3..ab6fdc8fd7e 100644 --- a/src/AggregateFunctions/AggregateFunctionAvgWeighted.cpp +++ b/src/AggregateFunctions/AggregateFunctionAvgWeighted.cpp @@ -82,17 +82,17 @@ createAggregateFunctionAvgWeighted(const std::string & name, const DataTypes & a const bool left_decimal = isDecimal(data_type); const bool right_decimal = isDecimal(data_type_weight); + /// We multiply value by weight, so actual scale of numerator is + if (left_decimal && right_decimal) ptr.reset(create(*data_type, *data_type_weight, argument_types, - getDecimalScale(*data_type), getDecimalScale(*data_type_weight))); + getDecimalScale(*data_type) + getDecimalScale(*data_type_weight), getDecimalScale(*data_type_weight))); else if (left_decimal) ptr.reset(create(*data_type, *data_type_weight, argument_types, getDecimalScale(*data_type))); else if (right_decimal) ptr.reset(create(*data_type, *data_type_weight, argument_types, - // numerator is not decimal, so its scale is 0 - 0, getDecimalScale(*data_type_weight))); + getDecimalScale(*data_type_weight), getDecimalScale(*data_type_weight))); else ptr.reset(create(*data_type, *data_type_weight, argument_types)); diff --git a/tests/queries/0_stateless/01668_avg_weighted_ubsan.reference b/tests/queries/0_stateless/01668_avg_weighted_ubsan.reference index ec064f61ba7..a8921b27cff 100644 --- a/tests/queries/0_stateless/01668_avg_weighted_ubsan.reference +++ b/tests/queries/0_stateless/01668_avg_weighted_ubsan.reference @@ -1 +1,14 @@ -0 +nan +nan +1 +2 +3 +4 +5 +6 +7 +8 +9 +nan +nan diff --git a/tests/queries/0_stateless/01668_avg_weighted_ubsan.sql b/tests/queries/0_stateless/01668_avg_weighted_ubsan.sql index 24e7dc0cb90..1c31c23eaee 100644 --- a/tests/queries/0_stateless/01668_avg_weighted_ubsan.sql +++ b/tests/queries/0_stateless/01668_avg_weighted_ubsan.sql @@ -1 +1,5 @@ SELECT round(avgWeighted(x, y)) FROM (SELECT 1023 AS x, 1000000000 AS y UNION ALL SELECT 10 AS x, -9223372036854775808 AS y); +select avgWeighted(number, toDecimal128(number, 9)) from numbers(0); +SELECT avgWeighted(a, toDecimal64(c, 9)) OVER (PARTITION BY c) FROM (SELECT number AS a, number AS c FROM numbers(10)); +select avg(toDecimal128(number, 9)) from numbers(0); +select avgWeighted(number, toDecimal128(0, 9)) from numbers(10); From 7e036d31e976625f4ce8f0e60b6bb7d9fed6f4c8 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 7 Dec 2021 11:29:46 +0000 Subject: [PATCH 157/262] One more style related commit --- src/Functions/getFuzzerData.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Functions/getFuzzerData.h b/src/Functions/getFuzzerData.h index 06f11f28e70..635ca2bdce9 100644 --- a/src/Functions/getFuzzerData.h +++ b/src/Functions/getFuzzerData.h @@ -1,3 +1,5 @@ +#pragma once + #include #include #include From 877716b2b005401bcc81d10ad1e48a4c9b9d9b90 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 7 Dec 2021 15:46:19 +0300 Subject: [PATCH 158/262] Update 02125_many_mutations.sh --- tests/queries/0_stateless/02125_many_mutations.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02125_many_mutations.sh b/tests/queries/0_stateless/02125_many_mutations.sh index 603713e5e9f..7a89e5f7c4f 100755 --- a/tests/queries/0_stateless/02125_many_mutations.sh +++ b/tests/queries/0_stateless/02125_many_mutations.sh @@ -13,7 +13,7 @@ $CLICKHOUSE_CLIENT -q "select x, y from many_mutations order by x" job() { - for i in {1..1000} + for _ in {1..1000} do $CLICKHOUSE_CLIENT -q "alter table many_mutations update y = y + 1 where 1" done From afcc3b6f965ce032f68631ca72596423a914bbce Mon Sep 17 00:00:00 2001 From: vxider Date: Tue, 7 Dec 2021 13:35:30 +0000 Subject: [PATCH 159/262] add doc window-view-functions --- .../functions/window-view-functions.md | 114 ++++++++++++++++++ 1 file changed, 114 insertions(+) create mode 100644 docs/en/sql-reference/functions/window-view-functions.md diff --git a/docs/en/sql-reference/functions/window-view-functions.md b/docs/en/sql-reference/functions/window-view-functions.md new file mode 100644 index 00000000000..5684e93bd88 --- /dev/null +++ b/docs/en/sql-reference/functions/window-view-functions.md @@ -0,0 +1,114 @@ +--- +toc_priority: 68 +toc_title: Window View +--- + +# Window View Functions {#window-view-functions} + +Window functions indicate the lower and upper window bound of records in WindowView. The functions for working with WindowView are listed below. + +## tumble {#window-view-functions-tumble} + +A tumbling time window assigns records to non-overlapping, continuous windows with a fixed duration (interval). + +``` sql +tumble(time_attr, interval [, timezone]) +``` + +**Arguments** +- `time_attr` - Date and time. [DateTime](../../sql-reference/data-types/datetime.md) data type. +- `interval` - Window interval in [Interval](../../sql-reference/data-types/special-data-types/interval.md) data type. +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). + +**Returned values** + +- The lower and upper bound of the tumble window. + +Type: `Tuple(DateTime, DateTime)` + +**Example** + +Query: + +``` sql +SELECT tumble(now(), toIntervalDay('1')) +``` + +Result: + +``` text +┌─tumble(now(), toIntervalDay('1'))─────────────┐ +│ ['2020-01-01 00:00:00','2020-01-02 00:00:00'] │ +└───────────────────────────────────────────────┘ +``` + +## hop {#window-view-functions-hop} + +A hopping time window has a fixed duration (`window_interval`) and hops by a specified hop interval (`hop_interval`). If the `hop_interval` is smaller than the `window_interval`, hopping windows are overlapping. Thus, records can be assigned to multiple windows. + +``` sql +hop(time_attr, hop_interval, window_interval [, timezone]) +``` + +**Arguments** + +- `time_attr` - Date and time. [DateTime](../../sql-reference/data-types/datetime.md) data type. +- `hop_interval` - Hop interval in [Interval](../../sql-reference/data-types/special-data-types/interval.md) data type. Should be a positive number. +- `window_interval` - Window interval in [Interval](../../sql-reference/data-types/special-data-types/interval.md) data type. Should be a positive number. +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). + +**Returned values** + +- The lower and upper bound of the hop window. Since hop windows are + overlapped, the function only returns the bound of the **first** window when + hop function is used **without** `WINDOW VIEW`. + +Type: `Tuple(DateTime, DateTime)` + +**Example** + +Query: + +``` sql +SELECT hop(now(), INTERVAL '1' SECOND, INTERVAL '2' SECOND) +``` + +Result: + +``` text +┌─hop(now(), toIntervalSecond('1'), toIntervalSecond('2'))──┐ +│ ('2020-01-14 16:58:22','2020-01-14 16:58:24') │ +└───────────────────────────────────────────────────────────┘ +``` + +## tumbleStart {#window-view-functions-tumblestart} + +Indicate the lower bound of a tumble function. + +``` sql +tumbleStart(time_attr, interval [, timezone]); +``` + +## tumbleEnd {#window-view-functions-tumbleend} + +Indicate the upper bound of a tumble function. + +``` sql +tumbleEnd(time_attr, interval [, timezone]); +``` + +## hopStart {#window-view-functions-hopstart} + +Indicate the lower bound of a hop function. + +``` sql +hopStart(time_attr, hop_interval, window_interval [, timezone]); +``` + +## hopEnd {#window-view-functions-hopend} + +Indicate the upper bound of a hop function. + +``` sql +hopEnd(time_attr, hop_interval, window_interval [, timezone]); +``` \ No newline at end of file From ef7ae7cb7571a95dd3966d89d4851e50b33df7af Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 7 Dec 2021 16:42:16 +0300 Subject: [PATCH 160/262] fix MATERIALIZE COLUMN in case when data type of expression is not equal to data type of column --- src/Interpreters/MutationsInterpreter.cpp | 5 ++- .../02131_materialize_column_cast.reference | 14 ++++++++ .../02131_materialize_column_cast.sql | 35 +++++++++++++++++++ 3 files changed, 53 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02131_materialize_column_cast.reference create mode 100644 tests/queries/0_stateless/02131_materialize_column_cast.sql diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 471ad67d4e7..ecf79c03445 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -569,7 +569,10 @@ ASTPtr MutationsInterpreter::prepare(bool dry_run) stages.emplace_back(context); const auto & column = columns_desc.get(command.column_name); - stages.back().column_to_updated.emplace(column.name, column.default_desc.expression->clone()); + auto materialized_column = makeASTFunction( + "_CAST", column.default_desc.expression->clone(), std::make_shared(column.type->getName())); + + stages.back().column_to_updated.emplace(column.name, materialized_column); } else if (command.type == MutationCommand::MATERIALIZE_INDEX) { diff --git a/tests/queries/0_stateless/02131_materialize_column_cast.reference b/tests/queries/0_stateless/02131_materialize_column_cast.reference new file mode 100644 index 00000000000..8c94b2ade9c --- /dev/null +++ b/tests/queries/0_stateless/02131_materialize_column_cast.reference @@ -0,0 +1,14 @@ +1_1_1_0_2 i Int32 +1_1_1_0_2 s LowCardinality(String) +=========== +1_1_1_0_2 i Int32 +1_1_1_0_2 s LowCardinality(String) +2_3_3_0 i Int32 +2_3_3_0 s LowCardinality(String) +=========== +1_1_1_0_4 i Int32 +1_1_1_0_4 s LowCardinality(String) +2_3_3_0_4 i Int32 +2_3_3_0_4 s LowCardinality(String) +1 1 +2 2 diff --git a/tests/queries/0_stateless/02131_materialize_column_cast.sql b/tests/queries/0_stateless/02131_materialize_column_cast.sql new file mode 100644 index 00000000000..3bfeaf5baeb --- /dev/null +++ b/tests/queries/0_stateless/02131_materialize_column_cast.sql @@ -0,0 +1,35 @@ +DROP TABLE IF EXISTS t_materialize_column; + +CREATE TABLE t_materialize_column (i Int32) +ENGINE = MergeTree ORDER BY i PARTITION BY i +SETTINGS min_bytes_for_wide_part = 0; + +INSERT INTO t_materialize_column VALUES (1); + +ALTER TABLE t_materialize_column ADD COLUMN s LowCardinality(String) DEFAULT toString(i); +ALTER TABLE t_materialize_column MATERIALIZE COLUMN s SETTINGS mutations_sync = 2; + +SELECT name, column, type FROM system.parts_columns +WHERE table = 't_materialize_column' AND database = currentDatabase() AND active +ORDER BY name, column; + +SELECT '==========='; + +INSERT INTO t_materialize_column (i) VALUES (2); + +SELECT name, column, type FROM system.parts_columns +WHERE table = 't_materialize_column' AND database = currentDatabase() AND active +ORDER BY name, column; + +SELECT '==========='; + +ALTER TABLE t_materialize_column ADD INDEX s_bf (s) TYPE bloom_filter(0.01) GRANULARITY 1; +ALTER TABLE t_materialize_column MATERIALIZE INDEX s_bf SETTINGS mutations_sync = 2; + +SELECT name, column, type FROM system.parts_columns +WHERE table = 't_materialize_column' AND database = currentDatabase() AND active +ORDER BY name, column; + +SELECT * FROM t_materialize_column ORDER BY i; + +DROP TABLE t_materialize_column; From b41552492e9496edcbd3a9917a4613b98efb68ca Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 7 Dec 2021 15:28:09 +0000 Subject: [PATCH 161/262] Done --- utils/graphite-rollup/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/graphite-rollup/CMakeLists.txt b/utils/graphite-rollup/CMakeLists.txt index bd6a078fbd5..3cc0d3e756f 100644 --- a/utils/graphite-rollup/CMakeLists.txt +++ b/utils/graphite-rollup/CMakeLists.txt @@ -9,7 +9,7 @@ target_link_libraries( ) target_include_directories( graphite-rollup-bench - PRIVATE + SYSTEM PRIVATE ${ClickHouse_SOURCE_DIR}/src ${CMAKE_BINARY_DIR}/src ${ClickHouse_SOURCE_DIR}/base ${ClickHouse_SOURCE_DIR}/base/pcg-random ${CMAKE_BINARY_DIR}/src/Core/include From 76f91c2c6cd8e324e0342a2fb096ac64ad368d48 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 7 Dec 2021 16:50:57 +0000 Subject: [PATCH 162/262] Done --- src/Interpreters/MutationsInterpreter.cpp | 4 ++-- .../02132_empty_mutation_livelock.reference | 2 ++ .../0_stateless/02132_empty_mutation_livelock.sql | 12 ++++++++++++ 3 files changed, 16 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/02132_empty_mutation_livelock.reference create mode 100644 tests/queries/0_stateless/02132_empty_mutation_livelock.sql diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 471ad67d4e7..d7443ca6edf 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -231,8 +231,8 @@ bool isStorageTouchedByMutations( PullingPipelineExecutor executor(io.pipeline); Block block; - while (!block.rows()) - executor.pull(block); + while (executor.pull(block)) {} + if (!block.rows()) return false; else if (block.rows() != 1) diff --git a/tests/queries/0_stateless/02132_empty_mutation_livelock.reference b/tests/queries/0_stateless/02132_empty_mutation_livelock.reference new file mode 100644 index 00000000000..e58e9764b39 --- /dev/null +++ b/tests/queries/0_stateless/02132_empty_mutation_livelock.reference @@ -0,0 +1,2 @@ +100 +100 diff --git a/tests/queries/0_stateless/02132_empty_mutation_livelock.sql b/tests/queries/0_stateless/02132_empty_mutation_livelock.sql new file mode 100644 index 00000000000..186199d4e13 --- /dev/null +++ b/tests/queries/0_stateless/02132_empty_mutation_livelock.sql @@ -0,0 +1,12 @@ +drop table if exists a8x; + +set empty_result_for_aggregation_by_empty_set=1; +create table a8x ENGINE = MergeTree ORDER BY tuple() settings min_bytes_for_wide_part=0 as SELECT number FROM system.numbers limit 100; + +select count() from a8x; + +set mutations_sync=1; +alter table a8x update number=0 WHERE number=-3; + +select count() from a8x; +drop table if exists a8x; From 4f46ac6b30f40939e3004f4ad683a776143b2cb7 Mon Sep 17 00:00:00 2001 From: tavplubix Date: Tue, 7 Dec 2021 19:55:55 +0300 Subject: [PATCH 163/262] Remove LeaderElection (#32140) * remove LeaderElection * try fix tests * Update test.py * Update test.py --- src/Storages/MergeTree/LeaderElection.h | 170 ++++++------------ .../PartMovesBetweenShardsOrchestrator.cpp | 1 + .../ReplicatedMergeTreeCleanupThread.cpp | 2 +- .../ReplicatedMergeTreeRestartingThread.cpp | 7 - src/Storages/StorageReplicatedMergeTree.cpp | 60 +++---- src/Storages/StorageReplicatedMergeTree.h | 15 +- .../test_backward_compatibility/test.py | 15 +- .../test.py | 3 + 8 files changed, 90 insertions(+), 183 deletions(-) diff --git a/src/Storages/MergeTree/LeaderElection.h b/src/Storages/MergeTree/LeaderElection.h index ccc5fada537..afaf2e7e841 100644 --- a/src/Storages/MergeTree/LeaderElection.h +++ b/src/Storages/MergeTree/LeaderElection.h @@ -1,9 +1,6 @@ #pragma once -#include -#include #include -#include #include #include #include @@ -17,135 +14,74 @@ namespace zkutil * * But then we decided to get rid of leader election, so every replica can become leader. * For now, every replica can become leader if there is no leader among replicas with old version. - * - * It's tempting to remove this class at all, but we have to maintain it, - * to maintain compatibility when replicas with different versions work on the same cluster - * (this is allowed for short time period during cluster update). - * - * Replicas with new versions creates ephemeral sequential nodes with values like "replica_name (multiple leaders Ok)". - * If the first node belongs to a replica with new version, then all replicas with new versions become leaders. */ -class LeaderElection + +void checkNoOldLeaders(Poco::Logger * log, ZooKeeper & zookeeper, const String path) { -public: - using LeadershipHandler = std::function; + /// Previous versions (before 21.12) used to create ephemeral sequential node path/leader_election- + /// Replica with the lexicographically smallest node name becomes leader (before 20.6) or enables multi-leader mode (since 20.6) + constexpr auto persistent_multiple_leaders = "leader_election-0"; /// Less than any sequential node + constexpr auto suffix = " (multiple leaders Ok)"; + constexpr auto persistent_identifier = "all (multiple leaders Ok)"; - /** handler is called when this instance become leader. - * - * identifier - if not empty, must uniquely (within same path) identify participant of leader election. - * It means that different participants of leader election have different identifiers - * and existence of more than one ephemeral node with same identifier indicates an error. - */ - LeaderElection( - DB::BackgroundSchedulePool & pool_, - const std::string & path_, - ZooKeeper & zookeeper_, - LeadershipHandler handler_, - const std::string & identifier_) - : pool(pool_), path(path_), zookeeper(zookeeper_), handler(handler_), identifier(identifier_ + suffix) - , log_name("LeaderElection (" + path + ")") - , log(&Poco::Logger::get(log_name)) + size_t num_tries = 1000; + while (num_tries--) { - task = pool.createTask(log_name, [this] { threadFunction(); }); - createNode(); - } - - void shutdown() - { - if (shutdown_called) + Strings potential_leaders; + Coordination::Error code = zookeeper.tryGetChildren(path, potential_leaders); + /// NOTE zookeeper_path/leader_election node must exist now, but maybe we will remove it in future versions. + if (code == Coordination::Error::ZNONODE) return; + else if (code != Coordination::Error::ZOK) + throw KeeperException(code, path); - shutdown_called = true; - task->deactivate(); - } + Coordination::Requests ops; - ~LeaderElection() - { - releaseNode(); - } - -private: - static inline constexpr auto suffix = " (multiple leaders Ok)"; - DB::BackgroundSchedulePool & pool; - DB::BackgroundSchedulePool::TaskHolder task; - std::string path; - ZooKeeper & zookeeper; - LeadershipHandler handler; - std::string identifier; - std::string log_name; - Poco::Logger * log; - - EphemeralNodeHolderPtr node; - std::string node_name; - - std::atomic shutdown_called {false}; - - void createNode() - { - shutdown_called = false; - node = EphemeralNodeHolder::createSequential(fs::path(path) / "leader_election-", zookeeper, identifier); - - std::string node_path = node->getPath(); - node_name = node_path.substr(node_path.find_last_of('/') + 1); - - task->activateAndSchedule(); - } - - void releaseNode() - { - shutdown(); - node = nullptr; - } - - void threadFunction() - { - bool success = false; - - try + if (potential_leaders.empty()) { - Strings children = zookeeper.getChildren(path); - std::sort(children.begin(), children.end()); - - auto my_node_it = std::lower_bound(children.begin(), children.end(), node_name); - if (my_node_it == children.end() || *my_node_it != node_name) - throw Poco::Exception("Assertion failed in LeaderElection"); - - String value = zookeeper.get(path + "/" + children.front()); - - if (value.ends_with(suffix)) - { - handler(); + /// Ensure that no leaders appeared and enable persistent multi-leader mode + /// May fail with ZNOTEMPTY + ops.emplace_back(makeRemoveRequest(path, 0)); + ops.emplace_back(makeCreateRequest(path, "", zkutil::CreateMode::Persistent)); + /// May fail with ZNODEEXISTS + ops.emplace_back(makeCreateRequest(fs::path(path) / persistent_multiple_leaders, persistent_identifier, zkutil::CreateMode::Persistent)); + } + else + { + if (potential_leaders.front() == persistent_multiple_leaders) return; + + /// Ensure that current leader supports multi-leader mode and make it persistent + auto current_leader = fs::path(path) / potential_leaders.front(); + Coordination::Stat leader_stat; + String identifier; + if (!zookeeper.tryGet(current_leader, identifier, &leader_stat)) + { + LOG_INFO(log, "LeaderElection: leader suddenly changed, will retry"); + continue; } - if (my_node_it == children.begin()) - throw Poco::Exception("Assertion failed in LeaderElection"); + if (!identifier.ends_with(suffix)) + throw Poco::Exception(fmt::format("Found leader replica ({}) with too old version (< 20.6). Stop it before upgrading", identifier)); - /// Watch for the node in front of us. - --my_node_it; - std::string get_path_value; - if (!zookeeper.tryGetWatch(path + "/" + *my_node_it, get_path_value, nullptr, task->getWatchCallback())) - task->schedule(); - - success = true; - } - catch (const KeeperException & e) - { - DB::tryLogCurrentException(log); - - if (e.code == Coordination::Error::ZSESSIONEXPIRED) - return; - } - catch (...) - { - DB::tryLogCurrentException(log); + /// Version does not matter, just check that it still exists. + /// May fail with ZNONODE + ops.emplace_back(makeCheckRequest(current_leader, leader_stat.version)); + /// May fail with ZNODEEXISTS + ops.emplace_back(makeCreateRequest(fs::path(path) / persistent_multiple_leaders, persistent_identifier, zkutil::CreateMode::Persistent)); } - if (!success) - task->scheduleAfter(10 * 1000); + Coordination::Responses res; + code = zookeeper.tryMulti(ops, res); + if (code == Coordination::Error::ZOK) + return; + else if (code == Coordination::Error::ZNOTEMPTY || code == Coordination::Error::ZNODEEXISTS || code == Coordination::Error::ZNONODE) + LOG_INFO(log, "LeaderElection: leader suddenly changed or new node appeared, will retry"); + else + KeeperMultiException::check(code, ops, res); } -}; -using LeaderElectionPtr = std::shared_ptr; + throw Poco::Exception("Cannot check that no old leaders exist"); +} } diff --git a/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp b/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp index b3a17250549..4d18adc1dfc 100644 --- a/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp +++ b/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp index 5731092f2a8..ff37a341205 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp @@ -2,9 +2,9 @@ #include #include #include +#include #include -#include #include diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp index 3bb592dcdcb..0cc6955ff72 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp @@ -197,11 +197,6 @@ bool ReplicatedMergeTreeRestartingThread::tryStartup() updateQuorumIfWeHavePart(); - if (storage_settings->replicated_can_become_leader) - storage.enterLeaderElection(); - else - LOG_INFO(log, "Will not enter leader election because replicated_can_become_leader=0"); - /// Anything above can throw a KeeperException if something is wrong with ZK. /// Anything below should not throw exceptions. @@ -380,8 +375,6 @@ void ReplicatedMergeTreeRestartingThread::partialShutdown() LOG_TRACE(log, "Waiting for threads to finish"); - storage.exitLeaderElection(); - storage.queue_updating_task->deactivate(); storage.mutations_updating_task->deactivate(); storage.mutations_finalizing_task->deactivate(); diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 7f600fc054c..852e2b10e6c 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -32,6 +32,7 @@ #include #include #include +#include #include @@ -3400,53 +3401,29 @@ void StorageReplicatedMergeTree::removePartAndEnqueueFetch(const String & part_n } -void StorageReplicatedMergeTree::enterLeaderElection() +void StorageReplicatedMergeTree::startBeingLeader() { - auto callback = [this]() + if (!getSettings()->replicated_can_become_leader) { - LOG_INFO(log, "Became leader"); - - is_leader = true; - merge_selecting_task->activateAndSchedule(); - }; - - try - { - leader_election = std::make_shared( - getContext()->getSchedulePool(), - fs::path(zookeeper_path) / "leader_election", - *current_zookeeper, /// current_zookeeper lives for the lifetime of leader_election, - /// since before changing `current_zookeeper`, `leader_election` object is destroyed in `partialShutdown` method. - callback, - replica_name); - } - catch (...) - { - leader_election = nullptr; - throw; + LOG_INFO(log, "Will not enter leader election because replicated_can_become_leader=0"); + return; } + + zkutil::checkNoOldLeaders(log, *current_zookeeper, fs::path(zookeeper_path) / "leader_election"); + + LOG_INFO(log, "Became leader"); + is_leader = true; + merge_selecting_task->activateAndSchedule(); } -void StorageReplicatedMergeTree::exitLeaderElection() +void StorageReplicatedMergeTree::stopBeingLeader() { - if (!leader_election) + if (!is_leader) return; - /// Shut down the leader election thread to avoid suddenly becoming the leader again after - /// we have stopped the merge_selecting_thread, but before we have deleted the leader_election object. - leader_election->shutdown(); - - if (is_leader) - { - LOG_INFO(log, "Stopped being leader"); - - is_leader = false; - merge_selecting_task->deactivate(); - } - - /// Delete the node in ZK only after we have stopped the merge_selecting_thread - so that only one - /// replica assigns merges at any given time. - leader_election = nullptr; + LOG_INFO(log, "Stopped being leader"); + is_leader = false; + merge_selecting_task->deactivate(); } ConnectionTimeouts StorageReplicatedMergeTree::getFetchPartHTTPTimeouts(ContextPtr local_context) @@ -4109,10 +4086,12 @@ void StorageReplicatedMergeTree::startup() assert(prev_ptr == nullptr); getContext()->getInterserverIOHandler().addEndpoint(data_parts_exchange_ptr->getId(replica_path), data_parts_exchange_ptr); + startBeingLeader(); + /// In this thread replica will be activated. restarting_thread.start(); - /// Wait while restarting_thread initializes LeaderElection (and so on) or makes first attempt to do it + /// Wait while restarting_thread finishing initialization startup_event.wait(); startBackgroundMovesIfNeeded(); @@ -4145,6 +4124,7 @@ void StorageReplicatedMergeTree::shutdown() fetcher.blocker.cancelForever(); merger_mutator.merges_blocker.cancelForever(); parts_mover.moves_blocker.cancelForever(); + stopBeingLeader(); restarting_thread.shutdown(); background_operations_assignee.finish(); diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 65daf82a633..bcd364df30e 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include @@ -320,7 +319,6 @@ private: * It can be false only when old ClickHouse versions are working on the same cluster, because now we allow multiple leaders. */ std::atomic is_leader {false}; - zkutil::LeaderElectionPtr leader_election; InterserverIOEndpointPtr data_parts_exchange_endpoint; @@ -514,15 +512,10 @@ private: bool processQueueEntry(ReplicatedMergeTreeQueue::SelectedEntryPtr entry); - /// Postcondition: - /// either leader_election is fully initialized (node in ZK is created and the watching thread is launched) - /// or an exception is thrown and leader_election is destroyed. - void enterLeaderElection(); - - /// Postcondition: - /// is_leader is false, merge_selecting_thread is stopped, leader_election is nullptr. - /// leader_election node in ZK is either deleted, or the session is marked expired. - void exitLeaderElection(); + /// Start being leader (if not disabled by setting). + /// Since multi-leaders are allowed, it just sets is_leader flag. + void startBeingLeader(); + void stopBeingLeader(); /** Selects the parts to merge and writes to the log. */ diff --git a/tests/integration/test_backward_compatibility/test.py b/tests/integration/test_backward_compatibility/test.py index 71aedb78e5b..a8f4968956c 100644 --- a/tests/integration/test_backward_compatibility/test.py +++ b/tests/integration/test_backward_compatibility/test.py @@ -11,13 +11,14 @@ node2 = cluster.add_instance('node2', main_configs=['configs/wide_parts_only.xml def start_cluster(): try: cluster.start() - for i, node in enumerate([node1, node2]): - node.query_with_retry( - '''CREATE TABLE t(date Date, id UInt32) - ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/t', '{}') - PARTITION BY toYYYYMM(date) - ORDER BY id'''.format(i)) - + create_query = '''CREATE TABLE t(date Date, id UInt32) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/t', '{}') + PARTITION BY toYYYYMM(date) + ORDER BY id''' + node1.query(create_query.format(1)) + node1.query("DETACH TABLE t") # stop being leader + node2.query(create_query.format(2)) + node1.query("ATTACH TABLE t") yield cluster finally: diff --git a/tests/integration/test_version_update_after_mutation/test.py b/tests/integration/test_version_update_after_mutation/test.py index 8d38234ccdd..3c22f2ed380 100644 --- a/tests/integration/test_version_update_after_mutation/test.py +++ b/tests/integration/test_version_update_after_mutation/test.py @@ -36,6 +36,8 @@ def test_mutate_and_upgrade(start_cluster): node1.query("ALTER TABLE mt DELETE WHERE id = 2", settings={"mutations_sync": "2"}) node2.query("SYSTEM SYNC REPLICA mt", timeout=15) + node2.query("DETACH TABLE mt") # stop being leader + node1.query("DETACH TABLE mt") # stop being leader node1.restart_with_latest_version(signal=9) node2.restart_with_latest_version(signal=9) @@ -83,6 +85,7 @@ def test_upgrade_while_mutation(start_cluster): node3.query("SYSTEM STOP MERGES mt1") node3.query("ALTER TABLE mt1 DELETE WHERE id % 2 == 0") + node3.query("DETACH TABLE mt1") # stop being leader node3.restart_with_latest_version(signal=9) # checks for readonly From 2c6cb902c437ce3faadfa8e87520ec49bab5deb1 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 7 Dec 2021 20:54:49 +0300 Subject: [PATCH 164/262] Revert "Better fix." This reverts commit f07fc08a493250ff25bcc9aa5e9996833e363611. --- src/Interpreters/ActionsDAG.cpp | 25 ++-------------- src/Interpreters/ActionsDAG.h | 4 +-- src/Interpreters/ExpressionAnalyzer.cpp | 16 ---------- src/Processors/Transforms/WindowTransform.cpp | 29 ++++++++++++++++++- .../02126_lc_window_functions.reference | 10 ------- .../0_stateless/02126_lc_window_functions.sql | 3 -- 6 files changed, 33 insertions(+), 54 deletions(-) diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index ec04177e4db..d0b360dda82 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -983,38 +983,19 @@ void ActionsDAG::assertDeterministic() const "Expression must be deterministic but it contains non-deterministic part `{}`", node.result_name); } -void ActionsDAG::addMaterializingOutputActions(bool remove_low_cardinality) +void ActionsDAG::addMaterializingOutputActions() { for (auto & node : index) - node = &materializeNode(*node, remove_low_cardinality); + node = &materializeNode(*node); } -const ActionsDAG::Node & ActionsDAG::materializeNode(const Node & node, bool remove_low_cardinality) +const ActionsDAG::Node & ActionsDAG::materializeNode(const Node & node) { FunctionOverloadResolverPtr func_builder_materialize = std::make_unique( std::make_shared()); const auto & name = node.result_name; const auto * func = &addFunction(func_builder_materialize, {&node}, {}); - if (remove_low_cardinality) - { - auto res_type = recursiveRemoveLowCardinality(func->result_type); - if (res_type.get() != func->result_type.get()) - { - ColumnWithTypeAndName column; - column.name = res_type->getName(); - column.column = DataTypeString().createColumnConst(0, column.name); - column.type = std::make_shared(); - - const auto * right_arg = &addColumn(std::move(column)); - const auto * left_arg = func; - - FunctionOverloadResolverPtr func_builder_cast = CastInternalOverloadResolver::createImpl(); - - NodeRawConstPtrs children = { left_arg, right_arg }; - func = &addFunction(func_builder_cast, std::move(children), {}); - } - } return addAlias(*func, name); } diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h index 97193bececd..9a5ad01a252 100644 --- a/src/Interpreters/ActionsDAG.h +++ b/src/Interpreters/ActionsDAG.h @@ -192,10 +192,10 @@ public: /// For apply materialize() function for every output. /// Also add aliases so the result names remain unchanged. - void addMaterializingOutputActions(bool remove_low_cardinality = false); + void addMaterializingOutputActions(); /// Apply materialize() function to node. Result node has the same name. - const Node & materializeNode(const Node & node, bool remove_low_cardinality = false); + const Node & materializeNode(const Node & node); enum class MatchColumnsMode { diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 4ebe1691f27..9b343bec055 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -1697,22 +1697,6 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( query_analyzer.appendWindowFunctionsArguments(chain, only_types || !first_stage); - // If we have a (logically) constant column, some Chunks will have a - // Const column for it, and some -- materialized. Such difference is - // generated by e.g. MergingSortedAlgorithm, which mostly materializes - // the constant ORDER BY columns, but in some obscure cases passes them - // through, unmaterialized. This mix is a pain to work with in Window - // Transform, because we have to compare columns across blocks, when e.g. - // searching for peer group boundaries, and each of the four combinations - // of const and materialized requires different code. - // Another problem with Const columns is that the aggregate functions - // can't work with them, so we have to materialize them like the - // Aggregator does. - // Likewise, aggregate functions can't work with LowCardinality, - // so we have to materialize them too. - // Just materialize everything. - chain.getLastActions()->addMaterializingOutputActions(true); - // Build a list of output columns of the window step. // 1) We need the columns that are the output of ExpressionActions. for (const auto & x : chain.getLastActions()->getNamesAndTypesList()) diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index 8abf3980777..8cbe1c96e44 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -198,6 +198,15 @@ WindowTransform::WindowTransform(const Block & input_header_, , input_header(input_header_) , window_description(window_description_) { + // Materialize all columns in header, because we materialize all columns + // in chunks and it's convenient if they match. + auto input_columns = input_header.getColumns(); + for (auto & column : input_columns) + { + column = recursiveRemoveLowCardinality(std::move(column)->convertToFullColumnIfConst()); + } + input_header.setColumns(std::move(input_columns)); + // Initialize window function workspaces. workspaces.reserve(functions.size()); for (const auto & f : functions) @@ -1036,7 +1045,25 @@ void WindowTransform::appendChunk(Chunk & chunk) // happens, because even in the case of `count() over ()` we have a dummy // input column. block.rows = chunk.getNumRows(); - block.input_columns = chunk.detachColumns(); + + // If we have a (logically) constant column, some Chunks will have a + // Const column for it, and some -- materialized. Such difference is + // generated by e.g. MergingSortedAlgorithm, which mostly materializes + // the constant ORDER BY columns, but in some obscure cases passes them + // through, unmaterialized. This mix is a pain to work with in Window + // Transform, because we have to compare columns across blocks, when e.g. + // searching for peer group boundaries, and each of the four combinations + // of const and materialized requires different code. + // Another problem with Const columns is that the aggregate functions + // can't work with them, so we have to materialize them like the + // Aggregator does. + // Likewise, aggregate functions can't work with LowCardinality, + // so we have to materialize them too. + // Just materialize everything. + auto columns = chunk.detachColumns(); + for (auto & column : columns) + column = recursiveRemoveLowCardinality(std::move(column)->convertToFullColumnIfConst()); + block.input_columns = std::move(columns); // Initialize output columns. for (auto & ws : workspaces) diff --git a/tests/queries/0_stateless/02126_lc_window_functions.reference b/tests/queries/0_stateless/02126_lc_window_functions.reference index f2c4b32cb48..75378377541 100644 --- a/tests/queries/0_stateless/02126_lc_window_functions.reference +++ b/tests/queries/0_stateless/02126_lc_window_functions.reference @@ -8,13 +8,3 @@ 2 2 2 -2 -2 -2 -2 -2 -2 -2 -2 -2 -2 diff --git a/tests/queries/0_stateless/02126_lc_window_functions.sql b/tests/queries/0_stateless/02126_lc_window_functions.sql index 7baf0e8eb3e..b76d921406b 100644 --- a/tests/queries/0_stateless/02126_lc_window_functions.sql +++ b/tests/queries/0_stateless/02126_lc_window_functions.sql @@ -20,6 +20,3 @@ FROM SELECT CAST(CAST(number % 5, 'Enum8(\'Red\' = 0, \'Blue\' = 1, \'Yellow\' = 2, \'Black\' = 3, \'White\' = 4)'), 'LowCardinality(String)') AS sym FROM numbers(10) ); - - -select * from (SELECT countIf(sym = 'Red') OVER (Range BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS res FROM (SELECT max(255) OVER (Rows BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), CAST(CAST(number % 5, 'Enum8(\'Red\' = 0, \'Blue\' = 1, \'Yellow\' = 2, \'Black\' = 3, \'White\' = 4)'), 'LowCardinality(String)') AS sym FROM numbers(1048576))) limit 10; From 07b2d69ea28d40e720347a3b934aa1276da887a1 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 8 Dec 2021 14:22:43 +0300 Subject: [PATCH 165/262] Another try. --- src/Processors/QueryPlan/WindowStep.cpp | 7 +++++++ src/Processors/Transforms/WindowTransform.cpp | 12 ++++++++++-- src/Processors/Transforms/WindowTransform.h | 1 + .../02126_lc_window_functions.reference | 3 +++ .../0_stateless/02126_lc_window_functions.sql | 16 ++++++++++++++++ 5 files changed, 37 insertions(+), 2 deletions(-) diff --git a/src/Processors/QueryPlan/WindowStep.cpp b/src/Processors/QueryPlan/WindowStep.cpp index cd4bb5f6730..94d6c4fe1d6 100644 --- a/src/Processors/QueryPlan/WindowStep.cpp +++ b/src/Processors/QueryPlan/WindowStep.cpp @@ -6,6 +6,7 @@ #include #include #include +//#include namespace DB { @@ -41,6 +42,12 @@ static Block addWindowFunctionResultColumns(const Block & block, result.insert(column_with_type); } + // for (auto & col : result) + // { + // col.column = recursiveRemoveLowCardinality(col.column); + // col.type = recursiveRemoveLowCardinality(col.type); + // } + return result; } diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index 8cbe1c96e44..bc6129c3bd2 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -203,7 +204,7 @@ WindowTransform::WindowTransform(const Block & input_header_, auto input_columns = input_header.getColumns(); for (auto & column : input_columns) { - column = recursiveRemoveLowCardinality(std::move(column)->convertToFullColumnIfConst()); + column = std::move(column)->convertToFullColumnIfConst(); } input_header.setColumns(std::move(input_columns)); @@ -1006,6 +1007,12 @@ static void assertSameColumns(const Columns & left_all, assert(left_column); assert(right_column); + if (const auto * left_lc = typeid_cast(left_column)) + left_column = &left_lc->getDictionary(); + + if (const auto * right_lc = typeid_cast(right_column)) + right_column = &right_lc->getDictionary(); + assert(typeid(*left_column).hash_code() == typeid(*right_column).hash_code()); @@ -1061,6 +1068,7 @@ void WindowTransform::appendChunk(Chunk & chunk) // so we have to materialize them too. // Just materialize everything. auto columns = chunk.detachColumns(); + block.original_input_columns = columns; for (auto & column : columns) column = recursiveRemoveLowCardinality(std::move(column)->convertToFullColumnIfConst()); block.input_columns = std::move(columns); @@ -1305,7 +1313,7 @@ IProcessor::Status WindowTransform::prepare() // Output the ready block. const auto i = next_output_block_number - first_block_number; auto & block = blocks[i]; - auto columns = block.input_columns; + auto columns = block.original_input_columns; for (auto & res : block.output_columns) { columns.push_back(ColumnPtr(std::move(res))); diff --git a/src/Processors/Transforms/WindowTransform.h b/src/Processors/Transforms/WindowTransform.h index 5fbdd6d38e1..077979e83b9 100644 --- a/src/Processors/Transforms/WindowTransform.h +++ b/src/Processors/Transforms/WindowTransform.h @@ -39,6 +39,7 @@ struct WindowFunctionWorkspace struct WindowTransformBlock { + Columns original_input_columns; Columns input_columns; MutableColumns output_columns; diff --git a/tests/queries/0_stateless/02126_lc_window_functions.reference b/tests/queries/0_stateless/02126_lc_window_functions.reference index 75378377541..bb2c453139e 100644 --- a/tests/queries/0_stateless/02126_lc_window_functions.reference +++ b/tests/queries/0_stateless/02126_lc_window_functions.reference @@ -8,3 +8,6 @@ 2 2 2 +a\0aa 1 +a\0aa 1 +a\0aa 1 diff --git a/tests/queries/0_stateless/02126_lc_window_functions.sql b/tests/queries/0_stateless/02126_lc_window_functions.sql index b76d921406b..6a1fb691a37 100644 --- a/tests/queries/0_stateless/02126_lc_window_functions.sql +++ b/tests/queries/0_stateless/02126_lc_window_functions.sql @@ -20,3 +20,19 @@ FROM SELECT CAST(CAST(number % 5, 'Enum8(\'Red\' = 0, \'Blue\' = 1, \'Yellow\' = 2, \'Black\' = 3, \'White\' = 4)'), 'LowCardinality(String)') AS sym FROM numbers(10) ); + +SELECT materialize(toLowCardinality('a\0aa')), countIf(toLowCardinality('aaaaaaa\0aaaaaaa\0aaaaaaa\0aaaaaaa\0aaaaaaa\0aaaaaaa\0aaaaaaa\0aaaaaaa\0aaaaaaa\0aaaaaaa\0aaaaaaa\0aaaaaaa\0aaaaaaa\0aaaaaaa\0aaaaaaa\0aaaaaaa\0'), sym = 'Red') OVER (Range BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS res FROM (SELECT CAST(CAST(number % 5, 'Enum8(\'Red\' = 0, \'Blue\' = 1, \'Yellow\' = 2, \'Black\' = 3, \'White\' = 4)'), 'LowCardinality(String)') AS sym FROM numbers(3)); + +SELECT + NULL, + id, + max(id) OVER (Rows BETWEEN 10 PRECEDING AND UNBOUNDED FOLLOWING) AS aid +FROM +( + SELECT + NULL, + max(id) OVER (), + materialize(toLowCardinality('')) AS id + FROM numbers_mt(0, 1) +) +FORMAT `Null`; From dd906b2a3e2c38fb033fa5addd397fc40a90eb9d Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 8 Dec 2021 14:25:00 +0300 Subject: [PATCH 166/262] Remove comments. --- src/Processors/QueryPlan/WindowStep.cpp | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/Processors/QueryPlan/WindowStep.cpp b/src/Processors/QueryPlan/WindowStep.cpp index 94d6c4fe1d6..cd4bb5f6730 100644 --- a/src/Processors/QueryPlan/WindowStep.cpp +++ b/src/Processors/QueryPlan/WindowStep.cpp @@ -6,7 +6,6 @@ #include #include #include -//#include namespace DB { @@ -42,12 +41,6 @@ static Block addWindowFunctionResultColumns(const Block & block, result.insert(column_with_type); } - // for (auto & col : result) - // { - // col.column = recursiveRemoveLowCardinality(col.column); - // col.type = recursiveRemoveLowCardinality(col.type); - // } - return result; } From 785d79531de5cac486bf7b00b6718452074dc42b Mon Sep 17 00:00:00 2001 From: tavplubix Date: Wed, 8 Dec 2021 15:27:32 +0300 Subject: [PATCH 167/262] Update StorageReplicatedMergeTree.cpp --- src/Storages/StorageReplicatedMergeTree.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 852e2b10e6c..89506184354 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -4086,14 +4086,15 @@ void StorageReplicatedMergeTree::startup() assert(prev_ptr == nullptr); getContext()->getInterserverIOHandler().addEndpoint(data_parts_exchange_ptr->getId(replica_path), data_parts_exchange_ptr); - startBeingLeader(); - /// In this thread replica will be activated. restarting_thread.start(); /// Wait while restarting_thread finishing initialization startup_event.wait(); + /// Restarting thread has initialized replication queue, replica can become leader now + startBeingLeader(); + startBackgroundMovesIfNeeded(); part_moves_between_shards_orchestrator.start(); From 398d2f5984af4f32054eff08191d64784b65c473 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Wed, 8 Dec 2021 12:36:55 +0000 Subject: [PATCH 168/262] Fix --- src/Storages/MergeTree/tests/gtest_executor.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Storages/MergeTree/tests/gtest_executor.cpp b/src/Storages/MergeTree/tests/gtest_executor.cpp index a208e7dc233..d2895215ebe 100644 --- a/src/Storages/MergeTree/tests/gtest_executor.cpp +++ b/src/Storages/MergeTree/tests/gtest_executor.cpp @@ -147,7 +147,7 @@ TEST(Executor, RemoveTasksStress) for (size_t j = 0; j < tasks_kinds; ++j) executor->removeTasksCorrespondingToStorage({"test", std::to_string(j)}); - ASSERT_EQ(CurrentMetrics::values[CurrentMetrics::BackgroundMergesAndMutationsPoolTask], 0); - executor->wait(); + + ASSERT_EQ(CurrentMetrics::values[CurrentMetrics::BackgroundMergesAndMutationsPoolTask], 0); } From eb759c83f6f899b046fd9a4eb3d16312a9f91a55 Mon Sep 17 00:00:00 2001 From: vxider Date: Wed, 8 Dec 2021 14:43:26 +0000 Subject: [PATCH 169/262] window view docs improvement --- .../functions/window-view-functions.md | 18 ++- .../sql-reference/statements/create/view.md | 8 +- .../functions/window-view-functions.md | 112 +++++++++++++++++ .../sql-reference/statements/create/view.md | 119 +++++++++++++++++- 4 files changed, 242 insertions(+), 15 deletions(-) create mode 100644 docs/zh/sql-reference/functions/window-view-functions.md diff --git a/docs/en/sql-reference/functions/window-view-functions.md b/docs/en/sql-reference/functions/window-view-functions.md index 5684e93bd88..3f560aa96b9 100644 --- a/docs/en/sql-reference/functions/window-view-functions.md +++ b/docs/en/sql-reference/functions/window-view-functions.md @@ -5,11 +5,11 @@ toc_title: Window View # Window View Functions {#window-view-functions} -Window functions indicate the lower and upper window bound of records in WindowView. The functions for working with WindowView are listed below. +Window view functions return the inclusive lower and exclusive upper bound of the corresponding window. The functions for working with WindowView are listed below: ## tumble {#window-view-functions-tumble} -A tumbling time window assigns records to non-overlapping, continuous windows with a fixed duration (interval). +A tumbling time window assigns records to non-overlapping, continuous windows with a fixed duration (`interval`). ``` sql tumble(time_attr, interval [, timezone]) @@ -22,7 +22,7 @@ tumble(time_attr, interval [, timezone]) **Returned values** -- The lower and upper bound of the tumble window. +- The inclusive lower and exclusive upper bound of the corresponding tumbling window. Type: `Tuple(DateTime, DateTime)` @@ -59,9 +59,7 @@ hop(time_attr, hop_interval, window_interval [, timezone]) **Returned values** -- The lower and upper bound of the hop window. Since hop windows are - overlapped, the function only returns the bound of the **first** window when - hop function is used **without** `WINDOW VIEW`. +- The inclusive lower and exclusive upper bound of the corresponding hopping window. Since one record can be assigned to multiple hop windows, the function only returns the bound of the **first** window when hop function is used **without** `WINDOW VIEW`. Type: `Tuple(DateTime, DateTime)` @@ -83,7 +81,7 @@ Result: ## tumbleStart {#window-view-functions-tumblestart} -Indicate the lower bound of a tumble function. +Returns the inclusive lower bound of the corresponding tumbling window. ``` sql tumbleStart(time_attr, interval [, timezone]); @@ -91,7 +89,7 @@ tumbleStart(time_attr, interval [, timezone]); ## tumbleEnd {#window-view-functions-tumbleend} -Indicate the upper bound of a tumble function. +Returns the exclusive upper bound of the corresponding tumbling window. ``` sql tumbleEnd(time_attr, interval [, timezone]); @@ -99,7 +97,7 @@ tumbleEnd(time_attr, interval [, timezone]); ## hopStart {#window-view-functions-hopstart} -Indicate the lower bound of a hop function. +Returns the inclusive lower bound of the corresponding hopping window. ``` sql hopStart(time_attr, hop_interval, window_interval [, timezone]); @@ -107,7 +105,7 @@ hopStart(time_attr, hop_interval, window_interval [, timezone]); ## hopEnd {#window-view-functions-hopend} -Indicate the upper bound of a hop function. +Returns the exclusive upper bound of the corresponding hopping window. ``` sql hopEnd(time_attr, hop_interval, window_interval [, timezone]); diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md index aa6b82360e0..464de02eac6 100644 --- a/docs/en/sql-reference/statements/create/view.md +++ b/docs/en/sql-reference/statements/create/view.md @@ -254,13 +254,13 @@ Most common uses of live view tables include: CREATE WINDOW VIEW [IF NOT EXISTS] [db.]table_name [TO [db.]table_name] [ENGINE = engine] [WATERMARK = strategy] [ALLOWED_LATENESS = interval_function] AS SELECT ... GROUP BY window_view_function ``` -Window view can aggregate data by time window and output the results when the window is ready to fire. It stores the partial aggregation results in an inner(or specified) table and can push the processing result to a specified table or push notifications using the WATCH query. +Window view can aggregate data by time window and output the results when the window is ready to fire. It stores the partial aggregation results in an inner(or specified) table to reduce latency and can push the processing result to a specified table or push notifications using the WATCH query. Creating a window view is similar to creating `MATERIALIZED VIEW`. Window view needs an inner storage engine to store intermediate data. The inner storage will use `AggregatingMergeTree` as the default engine. ### Window View Functions {#window-view-windowviewfunctions} -[Window view functions](../../functions/window-view-functions.md) are used to indicate the lower and upper window bound of records. The window view needs to be used with a window view function. +[Window view functions](../../functions/window-view-functions.md) are used to get the lower and upper window bound of records. The window view needs to be used with a window view function. ### TIME ATTRIBUTES {#window-view-timeattributes} @@ -274,13 +274,13 @@ CREATE WINDOW VIEW wv AS SELECT count(number), tumbleStart(w_id) as w_start from **Event time** is the time that each individual event occurred on its producing device. This time is typically embedded within the records when it is generated. Event time processing allows for consistent results even in case of out-of-order events or late events. Window view supports event time processing by using `WATERMARK` syntax. -Window view provides three watermark strategies. +Window view provides three watermark strategies: * `STRICTLY_ASCENDING`: Emits a watermark of the maximum observed timestamp so far. Rows that have a timestamp smaller to the max timestamp are not late. * `ASCENDING`: Emits a watermark of the maximum observed timestamp so far minus 1. Rows that have a timestamp equal and smaller to the max timestamp are not late. * `BOUNDED`: WATERMARK=INTERVAL. Emits watermarks, which are the maximum observed timestamp minus the specified delay. -The following queries are examples of creating a window view with `WATERMARK`. +The following queries are examples of creating a window view with `WATERMARK`: ``` sql CREATE WINDOW VIEW wv WATERMARK=STRICTLY_ASCENDING AS SELECT count(number) FROM date GROUP BY tumble(timestamp, INTERVAL '5' SECOND); diff --git a/docs/zh/sql-reference/functions/window-view-functions.md b/docs/zh/sql-reference/functions/window-view-functions.md new file mode 100644 index 00000000000..86425bc78e2 --- /dev/null +++ b/docs/zh/sql-reference/functions/window-view-functions.md @@ -0,0 +1,112 @@ +--- +toc_priority: 68 +toc_title: Window View +--- + +# Window View 函数{#window-view-han-shu} + +Window view函数用于获取窗口的起始(包含边界)和结束时间(不包含边界)。系统支持的window view函数如下: + +## tumble {#window-view-functions-tumble} + +tumble窗口是连续的、不重叠的固定大小(`interval`)时间窗口。 + +``` sql +tumble(time_attr, interval [, timezone]) +``` + +**参数** +- `time_attr` - [DateTime](../../sql-reference/data-types/datetime.md)类型的时间数据。 +- `interval` - [Interval](../../sql-reference/data-types/special-data-types/interval.md)类型的窗口大小。 +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) 类型的时区(可选参数). + +**返回值** + +- tumble窗口的开始(包含边界)和结束时间(不包含边界) + +类型: `Tuple(DateTime, DateTime)` + +**示例** + +查询: + +``` sql +SELECT tumble(now(), toIntervalDay('1')) +``` + +结果: + +``` text +┌─tumble(now(), toIntervalDay('1'))─────────────┐ +│ ['2020-01-01 00:00:00','2020-01-02 00:00:00'] │ +└───────────────────────────────────────────────┘ +``` + +## hop {#window-view-functions-hop} + +hop窗口是一个固定大小(`window_interval`)的时间窗口,并按照一个固定的滑动间隔(`hop_interval`)滑动。当滑动间隔小于窗口大小时,滑动窗口间存在重叠,此时一个数据可能存在于多个窗口。 + +``` sql +hop(time_attr, hop_interval, window_interval [, timezone]) +``` + +**参数** + +- `time_attr` - [DateTime](../../sql-reference/data-types/datetime.md)类型的时间数据。 +- `hop_interval` - Hop interval in [Interval](../../sql-reference/data-types/special-data-types/interval.md)类型的滑动间隔,需要大于0。 +- `window_interval` - [Interval](../../sql-reference/data-types/special-data-types/interval.md)类型的窗口大小,需要大于0。 +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) 类型的时区(可选参数)。 + +**返回值** + +- hop窗口的开始(包含边界)和结束时间(不包含边界)。由于一个数据可能存在于多个窗口,脱离window view单独调用该函数时只返回第一个窗口数据。 + +类型: `Tuple(DateTime, DateTime)` + +**示例** + +查询: + +``` sql +SELECT hop(now(), INTERVAL '1' SECOND, INTERVAL '2' SECOND) +``` + +结果: + +``` text +┌─hop(now(), toIntervalSecond('1'), toIntervalSecond('2'))──┐ +│ ('2020-01-14 16:58:22','2020-01-14 16:58:24') │ +└───────────────────────────────────────────────────────────┘ +``` + +## tumbleStart {#window-view-functions-tumblestart} + +返回tumble窗口的开始时间(包含边界)。 + +``` sql +tumbleStart(time_attr, interval [, timezone]); +``` + +## tumbleEnd {#window-view-functions-tumbleend} + +返回tumble窗口的结束时间(不包含边界)。 + +``` sql +tumbleEnd(time_attr, interval [, timezone]); +``` + +## hopStart {#window-view-functions-hopstart} + +返回hop窗口的开始时间(包含边界)。 + +``` sql +hopStart(time_attr, hop_interval, window_interval [, timezone]); +``` + +## hopEnd {#window-view-functions-hopend} + +返回hop窗口的结束时间(不包含边界)。 + +``` sql +hopEnd(time_attr, hop_interval, window_interval [, timezone]); +``` \ No newline at end of file diff --git a/docs/zh/sql-reference/statements/create/view.md b/docs/zh/sql-reference/statements/create/view.md index da69860f068..ed64b578150 100644 --- a/docs/zh/sql-reference/statements/create/view.md +++ b/docs/zh/sql-reference/statements/create/view.md @@ -5,7 +5,7 @@ toc_title: VIEW # CREATE VIEW {#create-view} -创建一个新视图。 有两种类型的视图:普通视图和物化视图。 +创建一个新视图。 有两种类型的视图:普通视图,物化视图,Live视图和Window视图。 ## Normal {#normal} @@ -241,3 +241,120 @@ Code: 60. DB::Exception: Received from localhost:9000. DB::Exception: Table defa - 使用定期刷新从系统表中查看指标。 [原始文章](https://clickhouse.com/docs/en/sql-reference/statements/create/view/) + +## Window View [Experimental] {#window-view} + +!!! important "重要" + 这是一项试验性功能,可能会在未来版本中以向后不兼容的方式进行更改。 + 通过[allow_experimental_window_view](../../../operations/settings/settings.md#allow-experimental-window-view)启用window view以及`WATCH`语句。输入命令 + `set allow_experimental_window_view = 1`。 + +``` sql +CREATE WINDOW VIEW [IF NOT EXISTS] [db.]table_name [TO [db.]table_name] [ENGINE = engine] [WATERMARK = strategy] [ALLOWED_LATENESS = interval_function] AS SELECT ... GROUP BY window_view_function +``` + +Window view可以通过时间窗口聚合数据,并在满足窗口触发条件时自动触发对应窗口计算。其通过将计算状态保存降低处理延迟,支持将处理结果输出至目标表或通过`WATCH`语句输出至终端。 + +创建window view的方式和创建物化视图类似。Window view使用默认为`AggregatingMergeTree`的内部存储引擎存储计算中间状态。 + +### Window View 函数{#window-view-han-shu} + +[Window view函数](../../functions/window-view-functions.md)用于获取窗口的起始和结束时间。Window view需要和window view函数配合使用。 + +### 时间属性{#window-view-shi-jian-shu-xing} + +Window view 支持**处理时间**和**事件时间**两种时间类型。 + +**处理时间**为默认时间类型,该模式下window view使用本地机器时间计算窗口数据。“处理时间”时间类型计算简单,但具有不确定性。该模式下时间可以为window view函数的第一个参数`time_attr`,或通过函数`now()`使用当前机器时间。下面的例子展示了使用“处理时间”创建的window view的例子。 + +``` sql +CREATE WINDOW VIEW wv AS SELECT count(number), tumbleStart(w_id) as w_start from date GROUP BY tumble(now(), INTERVAL '5' SECOND) as w_id +``` + +**事件时间** 是事件真实发生的时间,该时间往往在事件发生时便嵌入数据记录。事件时间处理提供较高的确定性,可以处理乱序数据以及迟到数据。Window view 通过水位线(`WATERMARK`)启用事件时间处理。 + +Window view提供如下三种水位线策略: + +* `STRICTLY_ASCENDING`: 提交观测到的最大时间作为水位线,小于最大观测时间的数据不算迟到。 +* `ASCENDING`: 提交观测到的最大时间减1作为水位线。小于或等于最大观测时间的数据不算迟到。 +* `BOUNDED`: WATERMARK=INTERVAL. 提交最大观测时间减去固定间隔(`INTERVAL`)做为水位线。 + +以下为使用`WATERMARK`创建window view的示例: + +``` sql +CREATE WINDOW VIEW wv WATERMARK=STRICTLY_ASCENDING AS SELECT count(number) FROM date GROUP BY tumble(timestamp, INTERVAL '5' SECOND); +CREATE WINDOW VIEW wv WATERMARK=ASCENDING AS SELECT count(number) FROM date GROUP BY tumble(timestamp, INTERVAL '5' SECOND); +CREATE WINDOW VIEW wv WATERMARK=INTERVAL '3' SECOND AS SELECT count(number) FROM date GROUP BY tumble(timestamp, INTERVAL '5' SECOND); +``` + +通常,窗口会在水位线到达时触发,水位线到达之后的数据会被丢弃。Window view可以通过设置`ALLOWED_LATENESS=INTERVAL`来开启迟到消息处理。示例如下: + +``` sql +CREATE WINDOW VIEW test.wv TO test.dst WATERMARK=ASCENDING ALLOWED_LATENESS=INTERVAL '2' SECOND AS SELECT count(a) AS count, tumbleEnd(wid) AS w_end FROM test.mt GROUP BY tumble(timestamp, INTERVAL '5' SECOND) AS wid; +``` + +需要注意的是,迟到消息需要更新之前的处理结果。与在窗口结束时触发不同,迟到消息到达时window view会立即触发计算。因此,会导致同一个窗口输出多次计算结果。用户需要注意这种情况,并消除重复结果。 + +### 新窗口监控{#window-view-xin-chuang-kou-jian-kong} + +Window view可以通过`WATCH`语句将处理结果推送至终端,或通过`TO`语句将结果推送至数据表。 + +``` sql +WATCH [db.]name [LIMIT n] +``` + +`WATCH`语句和`LIVE VIEW`中的类似。支持设置`LIMIT`参数,输出消息数目达到`LIMIT`限制时结束查询。 + +### 设置{#window-view-she-zhi} + +- `window_view_clean_interval`: window view清除过期数据间隔(单位为秒)。系统会定期清除过期数据,尚未触发的窗口数据不会被清除。 +- `window_view_heartbeat_interval`: 用于判断watch查询活跃的心跳时间间隔。 + +### 示例{#window-view-shi-li} + +假设我们需要每10秒统计一次`data`表中的点击日志,且`data`表的结构如下: + +``` sql +CREATE TABLE data ( `id` UInt64, `timestamp` DateTime) ENGINE = Memory; +``` + +首先,使用10秒大小的tumble函数创建window view。 + +``` sql +CREATE WINDOW VIEW wv as select count(id), tumbleStart(w_id) as window_start from data group by tumble(timestamp, INTERVAL '10' SECOND) as w_id +``` + +随后,我们使用`WATCH`语句获取计算结果。 + +``` sql +WATCH wv +``` + +当日志插入表`data`时, + +``` sql +INSERT INTO data VALUES(1,now()) +``` + +`WATCH`语句会输出如下结果: + +``` text +┌─count(id)─┬────────window_start─┐ +│ 1 │ 2020-01-14 16:56:40 │ +└───────────┴─────────────────────┘ +``` + +或者,我们可以通过`TO`关键字将处理结果输出至另一张表。 + +``` sql +CREATE WINDOW VIEW wv TO dst AS SELECT count(id), tumbleStart(w_id) as window_start FROM data GROUP BY tumble(timestamp, INTERVAL '10' SECOND) as w_id +``` + +ClickHouse测试中提供了更多的示例(以`*window_view*`命名)。 + +### Window View 使用场景{#window-view-shi-yong-chang-jing} + +Window view 在以下场景有用: + +* **监控**: 以时间维度聚合及处理数据,并将处理结果输出至目标表。用户可通过目标表获取并操作计算结果。 +* **分析**: 以时间维度进行数据分析. 当数据源非常庞大时,window view可以减少重复全表查询的计算量。 From 03927f5fc6b5991aa125fdf9de0ab5f899be3ea2 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Wed, 8 Dec 2021 10:58:06 +0100 Subject: [PATCH 170/262] Revert "graphite: split tagged/plain rollup rules (for merges perfomance)" This reverts commit 303552f51507178fae443ddf822485031b6f7d9f. --- base/base/StringRef.h | 5 +- src/CMakeLists.txt | 1 - src/Common/tests/gtest_global_context.cpp | 7 - src/Common/tests/gtest_global_context.h | 6 +- src/Processors/Merges/Algorithms/Graphite.cpp | 493 --------------- src/Processors/Merges/Algorithms/Graphite.h | 37 +- .../GraphiteRollupSortedAlgorithm.cpp | 59 +- .../GraphiteRollupSortedAlgorithm.h | 10 + .../Algorithms/tests/gtest_graphite.cpp | 597 ------------------ .../MergeTree/registerStorageMergeTree.cpp | 175 +++++ src/Storages/System/StorageSystemGraphite.cpp | 4 - tests/integration/helpers/test_tools.py | 16 - .../test_graphite_merge_tree/test.py | 20 +- .../__init__.py | 0 .../configs/graphite_rollup.xml | 120 ---- .../configs/users.xml | 8 - .../test_graphite_merge_tree_typed/test.py | 580 ----------------- ...ultiple_paths_and_versions.reference.plain | 84 --- ...ltiple_paths_and_versions.reference.tagged | 84 --- .../02117_show_create_table_system.reference | 2 +- utils/CMakeLists.txt | 1 - utils/graphite-rollup/CMakeLists.txt | 23 - .../graphite-rollup/graphite-rollup-bench.cpp | 147 ----- utils/graphite-rollup/metrics.txt | 11 - utils/graphite-rollup/rollup-tag-list.xml | 167 ----- utils/graphite-rollup/rollup-typed.xml | 167 ----- utils/graphite-rollup/rollup.xml | 147 ----- 27 files changed, 266 insertions(+), 2705 deletions(-) delete mode 100644 src/Common/tests/gtest_global_context.cpp delete mode 100644 src/Processors/Merges/Algorithms/Graphite.cpp delete mode 100644 src/Processors/Merges/Algorithms/tests/gtest_graphite.cpp delete mode 100644 tests/integration/test_graphite_merge_tree_typed/__init__.py delete mode 100644 tests/integration/test_graphite_merge_tree_typed/configs/graphite_rollup.xml delete mode 100644 tests/integration/test_graphite_merge_tree_typed/configs/users.xml delete mode 100644 tests/integration/test_graphite_merge_tree_typed/test.py delete mode 100644 tests/integration/test_graphite_merge_tree_typed/test_multiple_paths_and_versions.reference.plain delete mode 100644 tests/integration/test_graphite_merge_tree_typed/test_multiple_paths_and_versions.reference.tagged delete mode 100644 utils/graphite-rollup/CMakeLists.txt delete mode 100644 utils/graphite-rollup/graphite-rollup-bench.cpp delete mode 100644 utils/graphite-rollup/metrics.txt delete mode 100644 utils/graphite-rollup/rollup-tag-list.xml delete mode 100644 utils/graphite-rollup/rollup-typed.xml delete mode 100644 utils/graphite-rollup/rollup.xml diff --git a/base/base/StringRef.h b/base/base/StringRef.h index 98c322320a5..d0184dbc24c 100644 --- a/base/base/StringRef.h +++ b/base/base/StringRef.h @@ -48,10 +48,7 @@ struct StringRef std::string toString() const { return std::string(data, size); } explicit operator std::string() const { return toString(); } - - std::string_view toView() const { return std::string_view(data, size); } - - constexpr explicit operator std::string_view() const { return std::string_view(data, size); } + constexpr explicit operator std::string_view() const { return {data, size}; } }; /// Here constexpr doesn't implicate inline, see https://www.viva64.com/en/w/v1043/ diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 1f7a2700e5a..5f4ebaaa895 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -547,7 +547,6 @@ if (ENABLE_TESTS AND USE_GTEST) clickhouse_parsers clickhouse_storages_system dbms - clickhouse_common_config clickhouse_common_zookeeper string_utils) diff --git a/src/Common/tests/gtest_global_context.cpp b/src/Common/tests/gtest_global_context.cpp deleted file mode 100644 index 19ba3cdc269..00000000000 --- a/src/Common/tests/gtest_global_context.cpp +++ /dev/null @@ -1,7 +0,0 @@ -#include "gtest_global_context.h" - -const ContextHolder & getContext() -{ - static ContextHolder holder; - return holder; -} diff --git a/src/Common/tests/gtest_global_context.h b/src/Common/tests/gtest_global_context.h index 7756be7ce9b..9bd7c2490d6 100644 --- a/src/Common/tests/gtest_global_context.h +++ b/src/Common/tests/gtest_global_context.h @@ -18,4 +18,8 @@ struct ContextHolder ContextHolder(ContextHolder &&) = default; }; -const ContextHolder & getContext(); +inline const ContextHolder & getContext() +{ + static ContextHolder holder; + return holder; +} diff --git a/src/Processors/Merges/Algorithms/Graphite.cpp b/src/Processors/Merges/Algorithms/Graphite.cpp deleted file mode 100644 index 38d3fa30b42..00000000000 --- a/src/Processors/Merges/Algorithms/Graphite.cpp +++ /dev/null @@ -1,493 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include - -#include - -using namespace std::literals; - -namespace DB::ErrorCodes -{ - extern const int NOT_IMPLEMENTED; - extern const int BAD_ARGUMENTS; - extern const int UNKNOWN_ELEMENT_IN_CONFIG; - extern const int NO_ELEMENTS_IN_CONFIG; - } - -namespace DB::Graphite -{ -static std::unordered_map ruleTypeMap = -{ - { RuleTypeAll, "all" }, - { RuleTypePlain, "plain" }, - { RuleTypeTagged, "tagged"}, - { RuleTypeTagList, "tag_list"} -}; - -const String & ruleTypeStr(RuleType rule_type) -{ - try - { - return ruleTypeMap.at(rule_type); - } - catch (...) - { - throw Exception("invalid rule type: " + std::to_string(rule_type), DB::ErrorCodes::BAD_ARGUMENTS); - } -} - -RuleType ruleType(const String & s) -{ - if (s == "all") - return RuleTypeAll; - else if (s == "plain") - return RuleTypePlain; - else if (s == "tagged") - return RuleTypeTagged; - else if (s == "tag_list") - return RuleTypeTagList; - else - throw Exception("invalid rule type: " + s, DB::ErrorCodes::BAD_ARGUMENTS); -} - -static const Graphite::Pattern undef_pattern = -{ /// empty pattern for selectPatternForPath - .rule_type = RuleTypeAll, - .regexp = nullptr, - .regexp_str = "", - .function = nullptr, - .retentions = Graphite::Retentions(), - .type = undef_pattern.TypeUndef, -}; - -inline static const Patterns & selectPatternsForMetricType(const Graphite::Params & params, const StringRef path) -{ - if (params.patterns_typed) - { - std::string_view path_view = path.toView(); - if (path_view.find("?"sv) == path_view.npos) - return params.patterns_plain; - else - return params.patterns_tagged; - } - else - { - return params.patterns; - } -} - -Graphite::RollupRule selectPatternForPath( - const Graphite::Params & params, - const StringRef path) -{ - const Graphite::Pattern * first_match = &undef_pattern; - - const Patterns & patterns_check = selectPatternsForMetricType(params, path); - - for (const auto & pattern : patterns_check) - { - if (!pattern.regexp) - { - /// Default pattern - if (first_match->type == first_match->TypeUndef && pattern.type == pattern.TypeAll) - { - /// There is only default pattern for both retention and aggregation - return std::pair(&pattern, &pattern); - } - if (pattern.type != first_match->type) - { - if (first_match->type == first_match->TypeRetention) - { - return std::pair(first_match, &pattern); - } - if (first_match->type == first_match->TypeAggregation) - { - return std::pair(&pattern, first_match); - } - } - } - else - { - if (pattern.regexp->match(path.data, path.size)) - { - /// General pattern with matched path - if (pattern.type == pattern.TypeAll) - { - /// Only for not default patterns with both function and retention parameters - return std::pair(&pattern, &pattern); - } - if (first_match->type == first_match->TypeUndef) - { - first_match = &pattern; - continue; - } - if (pattern.type != first_match->type) - { - if (first_match->type == first_match->TypeRetention) - { - return std::pair(first_match, &pattern); - } - if (first_match->type == first_match->TypeAggregation) - { - return std::pair(&pattern, first_match); - } - } - } - } - } - - return {nullptr, nullptr}; -} - -/** Is used to order Graphite::Retentions by age and precision descending. - * Throws exception if not both age and precision are less or greater then another. - */ -static bool compareRetentions(const Retention & a, const Retention & b) -{ - if (a.age > b.age && a.precision > b.precision) - { - return true; - } - else if (a.age < b.age && a.precision < b.precision) - { - return false; - } - String error_msg = "age and precision should only grow up: " - + std::to_string(a.age) + ":" + std::to_string(a.precision) + " vs " - + std::to_string(b.age) + ":" + std::to_string(b.precision); - throw Exception( - error_msg, - DB::ErrorCodes::BAD_ARGUMENTS); -} - -bool operator==(const Retention & a, const Retention & b) -{ - return a.age == b.age && a.precision == b.precision; -} - -std::ostream & operator<<(std::ostream & stream, const Retentions & a) -{ - stream << "{ "; - for (size_t i = 0; i < a.size(); i++) - { - if (i > 0) - stream << ","; - stream << " { age = " << a[i].age << ", precision = " << a[i].precision << " }"; - } - stream << " }"; - - return stream; -} - -bool operator==(const Pattern & a, const Pattern & b) -{ - // equal - // Retentions retentions; /// Must be ordered by 'age' descending. - if (a.type != b.type || a.regexp_str != b.regexp_str || a.rule_type != b.rule_type) - return false; - - if (a.function == nullptr) - { - if (b.function != nullptr) - return false; - } - else if (b.function == nullptr) - { - return false; - } - else if (a.function->getName() != b.function->getName()) - { - return false; - } - - return a.retentions == b.retentions; -} - -std::ostream & operator<<(std::ostream & stream, const Pattern & a) -{ - stream << "{ rule_type = " << ruleTypeStr(a.rule_type); - if (!a.regexp_str.empty()) - stream << ", regexp = '" << a.regexp_str << "'"; - if (a.function != nullptr) - stream << ", function = " << a.function->getName(); - if (!a.retentions.empty()) - { - stream << ",\n retentions = {\n"; - for (size_t i = 0; i < a.retentions.size(); i++) - { - stream << " { " << a.retentions[i].age << ", " << a.retentions[i].precision << " }"; - if (i < a.retentions.size() - 1) - stream << ","; - stream << "\n"; - } - stream << " }\n"; - } - else - stream << " "; - - stream << "}"; - return stream; -} - -std::string buildTaggedRegex(std::string regexp_str) -{ - /* - * tags list in format (for name or any value can use regexp, alphabet sorting not needed) - * spaces are not stiped and used as tag and value part - * name must be first (if used) - * - * tag1=value1; tag2=VALUE2_REGEX;tag3=value3 - * or - * name;tag1=value1;tag2=VALUE2_REGEX;tag3=value3 - * or for one tag - * tag1=value1 - * - * Resulting regex against metric like - * name?tag1=value1&tag2=value2 - * - * So, - * - * name - * produce - * name\? - * - * tag2=val2 - * produce - * [\?&]tag2=val2(&.*)?$ - * - * nam.* ; tag1=val1 ; tag2=val2 - * produce - * nam.*\?(.*&)?tag1=val1&(.*&)?tag2=val2(&.*)?$ - */ - - std::vector tags; - - splitInto<';'>(tags, regexp_str); - /* remove empthy elements */ - using namespace std::string_literals; - tags.erase(std::remove(tags.begin(), tags.end(), ""s), tags.end()); - if (tags[0].find('=') == tags[0].npos) - { - if (tags.size() == 1) /* only name */ - return "^" + tags[0] + "\\?"; - /* start with name value */ - regexp_str = "^" + tags[0] + "\\?(.*&)?"; - tags.erase(std::begin(tags)); - } - else - regexp_str = "[\\?&]"; - - std::sort(std::begin(tags), std::end(tags)); /* sorted tag keys */ - regexp_str += fmt::format( - "{}{}", - fmt::join(tags, "&(.*&)?"), - "(&.*)?$" /* close regex */ - ); - - return regexp_str; -} - -/** Read the settings for Graphite rollup from config. - * Example - * - * - * Path - * - * click_cost - * any - * - * 0 - * 3600 - * - * - * 86400 - * 60 - * - * - * - * max - * - * 0 - * 60 - * - * - * 3600 - * 300 - * - * - * 86400 - * 3600 - * - * - * - */ -static const Pattern & -appendGraphitePattern( - const Poco::Util::AbstractConfiguration & config, - const String & config_element, Patterns & patterns, - bool default_rule, - ContextPtr context) -{ - Pattern pattern; - - Poco::Util::AbstractConfiguration::Keys keys; - config.keys(config_element, keys); - - for (const auto & key : keys) - { - if (key == "regexp") - { - pattern.regexp_str = config.getString(config_element + ".regexp"); - } - else if (key == "function") - { - String aggregate_function_name_with_params = config.getString(config_element + ".function"); - String aggregate_function_name; - Array params_row; - getAggregateFunctionNameAndParametersArray( - aggregate_function_name_with_params, aggregate_function_name, params_row, "GraphiteMergeTree storage initialization", context); - - /// TODO Not only Float64 - AggregateFunctionProperties properties; - pattern.function = AggregateFunctionFactory::instance().get( - aggregate_function_name, {std::make_shared()}, params_row, properties); - } - else if (key == "rule_type") - { - String rule_type = config.getString(config_element + ".rule_type"); - pattern.rule_type = ruleType(rule_type); - } - else if (startsWith(key, "retention")) - { - pattern.retentions.emplace_back(Graphite::Retention{ - .age = config.getUInt(config_element + "." + key + ".age"), - .precision = config.getUInt(config_element + "." + key + ".precision")}); - } - else - throw Exception("Unknown element in config: " + key, DB::ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); - } - - if (!pattern.regexp_str.empty()) - { - if (pattern.rule_type == RuleTypeTagList) - { - // construct tagged regexp - pattern.regexp_str = buildTaggedRegex(pattern.regexp_str); - pattern.rule_type = RuleTypeTagged; - } - pattern.regexp = std::make_shared(pattern.regexp_str); - } - - if (!pattern.function && pattern.retentions.empty()) - throw Exception( - "At least one of an aggregate function or retention rules is mandatory for rollup patterns in GraphiteMergeTree", - DB::ErrorCodes::NO_ELEMENTS_IN_CONFIG); - - if (default_rule && pattern.rule_type != RuleTypeAll) - { - throw Exception( - "Default must have rule_type all for rollup patterns in GraphiteMergeTree", - DB::ErrorCodes::BAD_ARGUMENTS); - } - - if (!pattern.function) - { - pattern.type = pattern.TypeRetention; - } - else if (pattern.retentions.empty()) - { - pattern.type = pattern.TypeAggregation; - } - else - { - pattern.type = pattern.TypeAll; - } - - if (pattern.type & pattern.TypeAggregation) /// TypeAggregation or TypeAll - if (pattern.function->allocatesMemoryInArena()) - throw Exception( - "Aggregate function " + pattern.function->getName() + " isn't supported in GraphiteMergeTree", DB::ErrorCodes::NOT_IMPLEMENTED); - - /// retention should be in descending order of age. - if (pattern.type & pattern.TypeRetention) /// TypeRetention or TypeAll - std::sort(pattern.retentions.begin(), pattern.retentions.end(), compareRetentions); - - patterns.emplace_back(pattern); - return patterns.back(); -} - -void setGraphitePatternsFromConfig(ContextPtr context, const String & config_element, Graphite::Params & params) -{ - const auto & config = context->getConfigRef(); - - if (!config.has(config_element)) - throw Exception("No '" + config_element + "' element in configuration file", ErrorCodes::NO_ELEMENTS_IN_CONFIG); - - params.config_name = config_element; - params.path_column_name = config.getString(config_element + ".path_column_name", "Path"); - params.time_column_name = config.getString(config_element + ".time_column_name", "Time"); - params.value_column_name = config.getString(config_element + ".value_column_name", "Value"); - params.version_column_name = config.getString(config_element + ".version_column_name", "Timestamp"); - - params.patterns_typed = false; - - Poco::Util::AbstractConfiguration::Keys keys; - config.keys(config_element, keys); - - for (const auto & key : keys) - { - if (startsWith(key, "pattern")) - { - if (appendGraphitePattern(config, config_element + "." + key, params.patterns, false, context).rule_type != RuleTypeAll) - params.patterns_typed = true; - } - else if (key == "default") - { - /// See below. - } - else if (key == "path_column_name" || key == "time_column_name" || key == "value_column_name" || key == "version_column_name") - { - /// See above. - } - else - throw Exception("Unknown element in config: " + key, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); - } - - if (config.has(config_element + ".default")) - appendGraphitePattern(config, config_element + "." + ".default", params.patterns, true, context); - - for (const auto & pattern : params.patterns) - { - if (pattern.rule_type == RuleTypeAll) - { - if (params.patterns_typed) - { - params.patterns_plain.push_back(pattern); - params.patterns_tagged.push_back(pattern); - } - } - else if (pattern.rule_type == RuleTypePlain) - { - params.patterns_plain.push_back(pattern); - } - else if (pattern.rule_type == RuleTypeTagged) - { - params.patterns_tagged.push_back(pattern); - } - else - { - throw Exception("Unhandled rule_type in config: " + ruleTypeStr(pattern.rule_type), ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); - } - } -} - -} diff --git a/src/Processors/Merges/Algorithms/Graphite.h b/src/Processors/Merges/Algorithms/Graphite.h index dc39cb46386..ecb1aeb9804 100644 --- a/src/Processors/Merges/Algorithms/Graphite.h +++ b/src/Processors/Merges/Algorithms/Graphite.h @@ -1,8 +1,13 @@ #pragma once - -#include #include -#include + +namespace DB +{ + +class IAggregateFunction; +using AggregateFunctionPtr = std::shared_ptr; + +} /** Intended for implementation of "rollup" - aggregation (rounding) of older data * for a table with Graphite data (Graphite is the system for time series monitoring). @@ -92,32 +97,16 @@ namespace DB::Graphite { -// sync with rule_types_str -enum RuleType -{ - RuleTypeAll = 0, // default, with regex, compatible with old scheme - RuleTypePlain = 1, // plain metrics, with regex, compatible with old scheme - RuleTypeTagged = 2, // tagged metrics, with regex, compatible with old scheme - RuleTypeTagList = 3 // tagged metrics, with regex (converted to RuleTypeTagged from string like 'retention=10min ; env=(staging|prod)') -}; - -const String & ruleTypeStr(RuleType rule_type); - struct Retention { UInt32 age; UInt32 precision; }; -bool operator==(const Retention & a, const Retention & b); - using Retentions = std::vector; -std::ostream &operator<<(std::ostream & stream, const Retentions & a); - struct Pattern { - RuleType rule_type = RuleTypeAll; std::shared_ptr regexp; std::string regexp_str; AggregateFunctionPtr function; @@ -125,9 +114,6 @@ struct Pattern enum { TypeUndef, TypeRetention, TypeAggregation, TypeAll } type = TypeAll; /// The type of defined pattern, filled automatically }; -bool operator==(const Pattern & a, const Pattern & b); -std::ostream &operator<<(std::ostream & stream, const Pattern & a); - using Patterns = std::vector; using RetentionPattern = Pattern; using AggregationPattern = Pattern; @@ -139,16 +125,9 @@ struct Params String time_column_name; String value_column_name; String version_column_name; - bool patterns_typed; Graphite::Patterns patterns; - Graphite::Patterns patterns_plain; - Graphite::Patterns patterns_tagged; }; using RollupRule = std::pair; -Graphite::RollupRule selectPatternForPath(const Graphite::Params & params, const StringRef path); - -void setGraphitePatternsFromConfig(ContextPtr context, const String & config_element, Graphite::Params & params); - } diff --git a/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp index c4f60571dd9..328c34823a0 100644 --- a/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp @@ -1,4 +1,3 @@ -#include #include #include #include @@ -53,6 +52,62 @@ GraphiteRollupSortedAlgorithm::GraphiteRollupSortedAlgorithm( columns_definition = defineColumns(header, params); } +Graphite::RollupRule GraphiteRollupSortedAlgorithm::selectPatternForPath(StringRef path) const +{ + const Graphite::Pattern * first_match = &undef_pattern; + + for (const auto & pattern : params.patterns) + { + if (!pattern.regexp) + { + /// Default pattern + if (first_match->type == first_match->TypeUndef && pattern.type == pattern.TypeAll) + { + /// There is only default pattern for both retention and aggregation + return std::pair(&pattern, &pattern); + } + if (pattern.type != first_match->type) + { + if (first_match->type == first_match->TypeRetention) + { + return std::pair(first_match, &pattern); + } + if (first_match->type == first_match->TypeAggregation) + { + return std::pair(&pattern, first_match); + } + } + } + else if (pattern.regexp->match(path.data, path.size)) + { + /// General pattern with matched path + if (pattern.type == pattern.TypeAll) + { + /// Only for not default patterns with both function and retention parameters + return std::pair(&pattern, &pattern); + } + if (first_match->type == first_match->TypeUndef) + { + first_match = &pattern; + continue; + } + if (pattern.type != first_match->type) + { + if (first_match->type == first_match->TypeRetention) + { + return std::pair(first_match, &pattern); + } + if (first_match->type == first_match->TypeAggregation) + { + return std::pair(&pattern, first_match); + } + } + } + } + + return {nullptr, nullptr}; +} + UInt32 GraphiteRollupSortedAlgorithm::selectPrecision(const Graphite::Retentions & retentions, time_t time) const { static_assert(is_signed_v, "time_t must be signed type"); @@ -133,7 +188,7 @@ IMergingAlgorithm::Status GraphiteRollupSortedAlgorithm::merge() Graphite::RollupRule next_rule = merged_data.currentRule(); if (new_path) - next_rule = selectPatternForPath(this->params, next_path); + next_rule = selectPatternForPath(next_path); const Graphite::RetentionPattern * retention_pattern = std::get<0>(next_rule); time_t next_time_rounded; diff --git a/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.h b/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.h index 4968cbfc470..0155b73b238 100644 --- a/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.h +++ b/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.h @@ -102,6 +102,16 @@ private: time_t current_time = 0; time_t current_time_rounded = 0; + const Graphite::Pattern undef_pattern = + { /// temporary empty pattern for selectPatternForPath + .regexp = nullptr, + .regexp_str = "", + .function = nullptr, + .retentions = DB::Graphite::Retentions(), + .type = undef_pattern.TypeUndef, + }; + + Graphite::RollupRule selectPatternForPath(StringRef path) const; UInt32 selectPrecision(const Graphite::Retentions & retentions, time_t time) const; /// Insert the values into the resulting columns, which will not be changed in the future. diff --git a/src/Processors/Merges/Algorithms/tests/gtest_graphite.cpp b/src/Processors/Merges/Algorithms/tests/gtest_graphite.cpp deleted file mode 100644 index 1d739bf566a..00000000000 --- a/src/Processors/Merges/Algorithms/tests/gtest_graphite.cpp +++ /dev/null @@ -1,597 +0,0 @@ -#include -#include -#include -#include - -#include - -#include -#include - -#include -#include -#include -#include - -using namespace DB; - -static int regAggregateFunctions = 0; - -void tryRegisterAggregateFunctions() -{ - if (!regAggregateFunctions) - { - registerAggregateFunctions(); - regAggregateFunctions = 1; - } -} - -static ConfigProcessor::LoadedConfig loadConfiguration(const std::string & config_path) -{ - ConfigProcessor config_processor(config_path, true, true); - ConfigProcessor::LoadedConfig config = config_processor.loadConfig(false); - return config; -} - -static ConfigProcessor::LoadedConfig loadConfigurationFromString(std::string & s) -{ - char tmp_file[19]; - strcpy(tmp_file, "/tmp/rollup-XXXXXX"); - int fd = mkstemp(tmp_file); - if (fd == -1) - { - throw std::runtime_error(strerror(errno)); - } - try { - if (write(fd, s.c_str(), s.size()) < s.size()) - { - throw std::runtime_error("unable write to temp file"); - } - if (write(fd, "\n", 1) != 1) - { - throw std::runtime_error("unable write to temp file"); - } - close(fd); - auto config_path = std::string(tmp_file) + ".xml"; - if (std::rename(tmp_file, config_path.c_str())) - { - int err = errno; - remove(tmp_file); - throw std::runtime_error(strerror(err)); - } - ConfigProcessor::LoadedConfig config = loadConfiguration(config_path); - remove(tmp_file); - return config; - } - catch (...) - { - remove(tmp_file); - throw; - } -} - -static Graphite::Params setGraphitePatterns(ContextMutablePtr context, ConfigProcessor::LoadedConfig & config) -{ - context->setConfig(config.configuration); - - Graphite::Params params; - setGraphitePatternsFromConfig(context, "graphite_rollup", params); - - return params; -} - -struct PatternForCheck -{ - Graphite::RuleType rule_type; - std::string regexp_str; - String function; - Graphite::Retentions retentions; -}; - - -bool checkRule(const Graphite::Pattern & pattern, const struct PatternForCheck & pattern_check, - const std::string & typ, const std::string & path, std::string & message) -{ - bool rule_type_eq = (pattern.rule_type == pattern_check.rule_type); - bool regexp_eq = (pattern.regexp_str == pattern_check.regexp_str); - bool function_eq = (pattern.function == nullptr && pattern_check.function.empty()) - || (pattern.function != nullptr && pattern.function->getName() == pattern_check.function); - bool retentions_eq = (pattern.retentions == pattern_check.retentions); - - if (rule_type_eq && regexp_eq && function_eq && retentions_eq) - return true; - - message = typ + " rollup rule mismatch for '" + path + "'," + - (rule_type_eq ? "" : "rule_type ") + - (regexp_eq ? "" : "regexp ") + - (function_eq ? "" : "function ") + - (retentions_eq ? "" : "retentions "); - return false; -} - -std::ostream & operator<<(std::ostream & stream, const PatternForCheck & a) -{ - stream << "{ rule_type = " << ruleTypeStr(a.rule_type); - if (!a.regexp_str.empty()) - stream << ", regexp = '" << a.regexp_str << "'"; - if (!a.function.empty()) - stream << ", function = " << a.function; - if (!a.retentions.empty()) - { - stream << ",\n retentions = {\n"; - for (size_t i = 0; i < a.retentions.size(); i++) - { - stream << " { " << a.retentions[i].age << ", " << a.retentions[i].precision << " }"; - if (i < a.retentions.size() - 1) - stream << ","; - stream << "\n"; - } - stream << " }\n"; - } - else - stream << " "; - - stream << "}"; - return stream; -} - -struct PatternsForPath -{ - std::string path; - PatternForCheck retention_want; - PatternForCheck aggregation_want; -}; - -TEST(GraphiteTest, testSelectPattern) -{ - tryRegisterAggregateFunctions(); - - using namespace std::literals; - - std::string - xml(R"END( - - - \.sum$ - sum - - - ^((.*)|.)sum\? - sum - - - \.max$ - max - - - ^((.*)|.)max\? - max - - - \.min$ - min - - - ^((.*)|.)min\? - min - - - \.(count|sum|sum_sq)$ - sum - - - ^((.*)|.)(count|sum|sum_sq)\? - sum - - - ^retention\. - - 0 - 60 - - - 86400 - 3600 - - - - avg - - 0 - 60 - - - 3600 - 300 - - - 86400 - 3600 - - - - -)END"); - - // Retentions must be ordered by 'age' descending. - std::vector tests - { - { - "test.sum", - { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default - { Graphite::RuleTypeAll, R"END(\.sum$)END", "sum", { } } - }, - { - "val.sum?env=test&tag=Fake3", - { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default - { Graphite::RuleTypeAll, R"END(^((.*)|.)sum\?)END", "sum", { } } - }, - { - "test.max", - { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default - { Graphite::RuleTypeAll, R"END(\.max$)END", "max", { } }, - }, - { - "val.max?env=test&tag=Fake4", - { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default - { Graphite::RuleTypeAll, R"END(^((.*)|.)max\?)END", "max", { } }, - }, - { - "test.min", - { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default - { Graphite::RuleTypeAll, R"END(\.min$)END", "min", { } }, - }, - { - "val.min?env=test&tag=Fake5", - { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default - { Graphite::RuleTypeAll, R"END(^((.*)|.)min\?)END", "min", { } }, - }, - { - "retention.count", - { Graphite::RuleTypeAll, R"END(^retention\.)END", "", { { 86400, 3600 }, { 0, 60 } } }, // ^retention - { Graphite::RuleTypeAll, R"END(\.(count|sum|sum_sq)$)END", "sum", { } }, - }, - { - "val.retention.count?env=test&tag=Fake5", - { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default - { Graphite::RuleTypeAll, R"END(^((.*)|.)(count|sum|sum_sq)\?)END", "sum", { } }, - }, - { - "val.count?env=test&tag=Fake5", - { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default - { Graphite::RuleTypeAll, R"END(^((.*)|.)(count|sum|sum_sq)\?)END", "sum", { } }, - }, - { - "test.p95", - { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default - { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default - }, - { - "val.p95?env=test&tag=FakeNo", - { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default - { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default - }, - { - "default", - { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default - { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default - }, - { - "val.default?env=test&tag=FakeNo", - { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default - { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default - } - }; - - auto config = loadConfigurationFromString(xml); - ContextMutablePtr context = getContext().context; - Graphite::Params params = setGraphitePatterns(context, config); - - for (const auto & t : tests) - { - auto rule = DB::Graphite::selectPatternForPath(params, t.path); - std:: string message; - if (!checkRule(*rule.first, t.retention_want, "retention", t.path, message)) - ADD_FAILURE() << message << ", got\n" << *rule.first << "\n, want\n" << t.retention_want << "\n"; - if (!checkRule(*rule.second, t.aggregation_want, "aggregation", t.path, message)) - ADD_FAILURE() << message << ", got\n" << *rule.second << "\n, want\n" << t.aggregation_want << "\n"; - } -} - - -namespace DB::Graphite -{ - std::string buildTaggedRegex(std::string regexp_str); -} - -struct RegexCheck -{ - std::string regex; - std::string regex_want; - std::string match; - std::string nomatch; -}; - -TEST(GraphiteTest, testBuildTaggedRegex) -{ - std::vector tests - { - { - "cpu\\.loadavg;project=DB.*;env=st.*", - R"END(^cpu\.loadavg\?(.*&)?env=st.*&(.*&)?project=DB.*(&.*)?$)END", - R"END(cpu.loadavg?env=staging&project=DBAAS)END", - R"END(cpu.loadavg?env=staging&project=D)END" - }, - { - R"END(project=DB.*;env=staging;)END", - R"END([\?&]env=staging&(.*&)?project=DB.*(&.*)?$)END", - R"END(cpu.loadavg?env=staging&project=DBPG)END", - R"END(cpu.loadavg?env=stagingN&project=DBAAS)END" - }, - { - "env=staging;", - R"END([\?&]env=staging(&.*)?$)END", - R"END(cpu.loadavg?env=staging&project=DPG)END", - R"END(cpu.loadavg?env=stagingN)END" - }, - { - " env = staging ;", // spaces are allowed, - R"END([\?&] env = staging (&.*)?$)END", - R"END(cpu.loadavg? env = staging &project=DPG)END", - R"END(cpu.loadavg?env=stagingN)END" - }, - { - "name;", - R"END(^name\?)END", - R"END(name?env=staging&project=DPG)END", - R"END(nameN?env=stagingN)END", - }, - { - "name", - R"END(^name\?)END", - R"END(name?env=staging&project=DPG)END", - R"END(nameN?env=stagingN)END", - } - }; - for (const auto & t : tests) - { - auto s = DB::Graphite::buildTaggedRegex(t.regex); - EXPECT_EQ(t.regex_want, s) << "result for '" << t.regex_want << "' mismatch"; - auto regexp = OptimizedRegularExpression(s); - EXPECT_TRUE(regexp.match(t.match.data(), t.match.size())) << t.match << " match for '" << s << "' failed"; - EXPECT_FALSE(regexp.match(t.nomatch.data(), t.nomatch.size())) << t.nomatch << " ! match for '" << s << "' failed"; - } -} - -TEST(GraphiteTest, testSelectPatternTyped) -{ - tryRegisterAggregateFunctions(); - - using namespace std::literals; - - std::string - xml(R"END( - - - plain - \.sum$ - sum - - - tagged - ^((.*)|.)sum\? - sum - - - plain - \.max$ - max - - - tagged - ^((.*)|.)max\? - max - - - plain - \.min$ - min - - - tagged - ^((.*)|.)min\? - min - - - plain - \.(count|sum|sum_sq)$ - sum - - - tagged - ^((.*)|.)(count|sum|sum_sq)\? - sum - - - plain - ^retention\. - - 0 - 60 - - - 86400 - 3600 - - - - tagged - - - 0 - 60 - - - 86400 - 3600 - - - - tag_list - retention=10min;env=staging - - 0 - 600 - - - 86400 - 3600 - - - - tag_list - retention=10min;env=[A-Za-z-]+rod[A-Za-z-]+ - - 0 - 600 - - - 86400 - 3600 - - - - tag_list - cpu\.loadavg - - 0 - 600 - - - 86400 - 3600 - - - - avg - - 0 - 60 - - - 3600 - 300 - - - 86400 - 3600 - - - - -)END"); - - // Retentions must be ordered by 'age' descending. - std::vector tests - { - { - "test.sum", - { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default - { Graphite::RuleTypePlain, R"END(\.sum$)END", "sum", { } } - }, - { - "val.sum?env=test&tag=Fake3", - { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default - { Graphite::RuleTypeTagged, R"END(^((.*)|.)sum\?)END", "sum", { } } - }, - { - "test.max", - { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default - { Graphite::RuleTypePlain, R"END(\.max$)END", "max", { } }, - }, - { - "val.max?env=test&tag=Fake4", - { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default - { Graphite::RuleTypeTagged, R"END(^((.*)|.)max\?)END", "max", { } }, - }, - { - "test.min", - { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default - { Graphite::RuleTypePlain, R"END(\.min$)END", "min", { } }, - }, - { - "val.min?env=test&tag=Fake5", - { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default - { Graphite::RuleTypeTagged, R"END(^((.*)|.)min\?)END", "min", { } }, - }, - { - "retention.count", - { Graphite::RuleTypePlain, R"END(^retention\.)END", "", { { 86400, 3600 }, { 0, 60 } } }, // ^retention - { Graphite::RuleTypePlain, R"END(\.(count|sum|sum_sq)$)END", "sum", { } }, - }, - { - "val.count?env=test&retention=hour&tag=Fake5", - { Graphite::RuleTypeTagged, R"END([\?&]retention=hour(&.*)?$)END", "", { { 86400, 3600 }, { 0, 60 } } }, // tagged retention=hour - { Graphite::RuleTypeTagged, R"END(^((.*)|.)(count|sum|sum_sq)\?)END", "sum", { } }, - }, - { - "val.count?env=test&retention=hour", - { Graphite::RuleTypeTagged, R"END([\?&]retention=hour(&.*)?$)END", "", { { 86400, 3600 }, { 0, 60 } } }, // tagged retention=hour - { Graphite::RuleTypeTagged, R"END(^((.*)|.)(count|sum|sum_sq)\?)END", "sum", { } }, - }, - { - "val.count?env=staging&retention=10min", - { Graphite::RuleTypeTagged, R"END([\?&]env=staging&(.*&)?retention=10min(&.*)?$)END", "", { { 86400, 3600 }, { 0, 600 } } }, // retention=10min ; env=staging - { Graphite::RuleTypeTagged, R"END(^((.*)|.)(count|sum|sum_sq)\?)END", "sum", { } }, - }, - { - "val.count?env=production&retention=10min", - { Graphite::RuleTypeTagged, R"END([\?&]env=[A-Za-z-]+rod[A-Za-z-]+&(.*&)?retention=10min(&.*)?$)END", "", { { 86400, 3600 }, { 0, 600 } } }, // retention=10min ; env=[A-Za-z-]+rod[A-Za-z-]+ - { Graphite::RuleTypeTagged, R"END(^((.*)|.)(count|sum|sum_sq)\?)END", "sum", { } }, - }, - { - "val.count?env=test&tag=Fake5", - { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default - { Graphite::RuleTypeTagged, R"END(^((.*)|.)(count|sum|sum_sq)\?)END", "sum", { } }, - }, - { - "cpu.loadavg?env=test&tag=FakeNo", - { Graphite::RuleTypeTagged, R"END(^cpu\.loadavg\?)END", "", { { 86400, 3600 }, { 0, 600 } } }, // name=cpu\.loadavg - { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, - }, - { - "test.p95", - { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default - { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default - }, - { - "val.p95?env=test&tag=FakeNo", - { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default - { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default - }, - { - "default", - { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default - { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default - }, - { - "val.default?env=test&tag=FakeNo", - { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default - { Graphite::RuleTypeAll, "", "avg", { { 86400, 3600 }, { 3600, 300 }, { 0, 60 } } }, //default - } - }; - - auto config = loadConfigurationFromString(xml); - ContextMutablePtr context = getContext().context; - Graphite::Params params = setGraphitePatterns(context, config); - - for (const auto & t : tests) - { - auto rule = DB::Graphite::selectPatternForPath(params, t.path); - std:: string message; - if (!checkRule(*rule.first, t.retention_want, "retention", t.path, message)) - ADD_FAILURE() << message << ", got\n" << *rule.first << "\n, want\n" << t.retention_want << "\n"; - if (!checkRule(*rule.second, t.aggregation_want, "aggregation", t.path, message)) - ADD_FAILURE() << message << ", got\n" << *rule.second << "\n, want\n" << t.aggregation_want << "\n"; - } -} diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index ac6f4d8b7a4..cb52c8b86c0 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -22,13 +22,17 @@ #include #include +#include namespace DB { namespace ErrorCodes { + extern const int NOT_IMPLEMENTED; extern const int BAD_ARGUMENTS; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int UNKNOWN_ELEMENT_IN_CONFIG; + extern const int NO_ELEMENTS_IN_CONFIG; extern const int UNKNOWN_STORAGE; extern const int NO_REPLICA_NAME_GIVEN; } @@ -58,6 +62,171 @@ static Names extractColumnNames(const ASTPtr & node) } } +/** Is used to order Graphite::Retentions by age and precision descending. + * Throws exception if not both age and precision are less or greater then another. + */ +static bool compareRetentions(const Graphite::Retention & a, const Graphite::Retention & b) +{ + if (a.age > b.age && a.precision > b.precision) + { + return true; + } + else if (a.age < b.age && a.precision < b.precision) + { + return false; + } + String error_msg = "age and precision should only grow up: " + + std::to_string(a.age) + ":" + std::to_string(a.precision) + " vs " + + std::to_string(b.age) + ":" + std::to_string(b.precision); + throw Exception( + error_msg, + ErrorCodes::BAD_ARGUMENTS); +} + +/** Read the settings for Graphite rollup from config. + * Example + * + * + * Path + * + * click_cost + * any + * + * 0 + * 3600 + * + * + * 86400 + * 60 + * + * + * + * max + * + * 0 + * 60 + * + * + * 3600 + * 300 + * + * + * 86400 + * 3600 + * + * + * + */ +static void appendGraphitePattern( + const Poco::Util::AbstractConfiguration & config, + const String & config_element, + Graphite::Patterns & out_patterns, + ContextPtr context) +{ + Graphite::Pattern pattern; + + Poco::Util::AbstractConfiguration::Keys keys; + config.keys(config_element, keys); + + for (const auto & key : keys) + { + if (key == "regexp") + { + pattern.regexp_str = config.getString(config_element + ".regexp"); + pattern.regexp = std::make_shared(pattern.regexp_str); + } + else if (key == "function") + { + String aggregate_function_name_with_params = config.getString(config_element + ".function"); + String aggregate_function_name; + Array params_row; + getAggregateFunctionNameAndParametersArray( + aggregate_function_name_with_params, aggregate_function_name, params_row, "GraphiteMergeTree storage initialization", context); + + /// TODO Not only Float64 + AggregateFunctionProperties properties; + pattern.function = AggregateFunctionFactory::instance().get( + aggregate_function_name, {std::make_shared()}, params_row, properties); + } + else if (startsWith(key, "retention")) + { + pattern.retentions.emplace_back(Graphite::Retention{ + .age = config.getUInt(config_element + "." + key + ".age"), + .precision = config.getUInt(config_element + "." + key + ".precision")}); + } + else + throw Exception("Unknown element in config: " + key, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); + } + + if (!pattern.function && pattern.retentions.empty()) + throw Exception( + "At least one of an aggregate function or retention rules is mandatory for rollup patterns in GraphiteMergeTree", + ErrorCodes::NO_ELEMENTS_IN_CONFIG); + + if (!pattern.function) + { + pattern.type = pattern.TypeRetention; + } + else if (pattern.retentions.empty()) + { + pattern.type = pattern.TypeAggregation; + } + else + { + pattern.type = pattern.TypeAll; + } + + if (pattern.type & pattern.TypeAggregation) /// TypeAggregation or TypeAll + if (pattern.function->allocatesMemoryInArena()) + throw Exception( + "Aggregate function " + pattern.function->getName() + " isn't supported in GraphiteMergeTree", ErrorCodes::NOT_IMPLEMENTED); + + /// retention should be in descending order of age. + if (pattern.type & pattern.TypeRetention) /// TypeRetention or TypeAll + std::sort(pattern.retentions.begin(), pattern.retentions.end(), compareRetentions); + + out_patterns.emplace_back(pattern); +} + +static void setGraphitePatternsFromConfig(ContextPtr context, const String & config_element, Graphite::Params & params) +{ + const auto & config = context->getConfigRef(); + + if (!config.has(config_element)) + throw Exception("No '" + config_element + "' element in configuration file", ErrorCodes::NO_ELEMENTS_IN_CONFIG); + + params.config_name = config_element; + params.path_column_name = config.getString(config_element + ".path_column_name", "Path"); + params.time_column_name = config.getString(config_element + ".time_column_name", "Time"); + params.value_column_name = config.getString(config_element + ".value_column_name", "Value"); + params.version_column_name = config.getString(config_element + ".version_column_name", "Timestamp"); + + Poco::Util::AbstractConfiguration::Keys keys; + config.keys(config_element, keys); + + for (const auto & key : keys) + { + if (startsWith(key, "pattern")) + { + appendGraphitePattern(config, config_element + "." + key, params.patterns, context); + } + else if (key == "default") + { + /// See below. + } + else if (key == "path_column_name" || key == "time_column_name" || key == "value_column_name" || key == "version_column_name") + { + /// See above. + } + else + throw Exception("Unknown element in config: " + key, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); + } + + if (config.has(config_element + ".default")) + appendGraphitePattern(config, config_element + "." + ".default", params.patterns, context); +} + + static String getMergeTreeVerboseHelp(bool) { using namespace std::string_literals; @@ -373,6 +542,12 @@ static StoragePtr create(const StorageFactory::Arguments & args) /// to make possible copying metadata files between replicas. Macros::MacroExpansionInfo info; info.table_id = args.table_id; + if (is_replicated_database) + { + auto database = DatabaseCatalog::instance().getDatabase(args.table_id.database_name); + info.shard = getReplicatedDatabaseShardName(database); + info.replica = getReplicatedDatabaseReplicaName(database); + } if (!allow_uuid_macro) info.table_id.uuid = UUIDHelpers::Nil; zookeeper_path = args.getContext()->getMacros()->expand(zookeeper_path, info); diff --git a/src/Storages/System/StorageSystemGraphite.cpp b/src/Storages/System/StorageSystemGraphite.cpp index 8711162385f..dd592600d18 100644 --- a/src/Storages/System/StorageSystemGraphite.cpp +++ b/src/Storages/System/StorageSystemGraphite.cpp @@ -10,7 +10,6 @@ NamesAndTypesList StorageSystemGraphite::getNamesAndTypes() { return { {"config_name", std::make_shared()}, - {"rule_type", std::make_shared()}, {"regexp", std::make_shared()}, {"function", std::make_shared()}, {"age", std::make_shared()}, @@ -86,7 +85,6 @@ void StorageSystemGraphite::fillData(MutableColumns & res_columns, ContextPtr co bool is_default = pattern.regexp == nullptr; String regexp; String function; - const String & rule_type = ruleTypeStr(pattern.rule_type); if (is_default) { @@ -109,7 +107,6 @@ void StorageSystemGraphite::fillData(MutableColumns & res_columns, ContextPtr co { size_t i = 0; res_columns[i++]->insert(config.first); - res_columns[i++]->insert(rule_type); res_columns[i++]->insert(regexp); res_columns[i++]->insert(function); res_columns[i++]->insert(retention.age); @@ -124,7 +121,6 @@ void StorageSystemGraphite::fillData(MutableColumns & res_columns, ContextPtr co { size_t i = 0; res_columns[i++]->insert(config.first); - res_columns[i++]->insert(rule_type); res_columns[i++]->insert(regexp); res_columns[i++]->insert(function); res_columns[i++]->insertDefault(); diff --git a/tests/integration/helpers/test_tools.py b/tests/integration/helpers/test_tools.py index ec3841f79d7..3577553be34 100644 --- a/tests/integration/helpers/test_tools.py +++ b/tests/integration/helpers/test_tools.py @@ -100,19 +100,3 @@ def exec_query_with_retry(instance, query, retry_count=40, sleep_time=0.5, silen time.sleep(sleep_time) else: raise exception - -def csv_compare(result, expected): - csv_result = TSV(result) - csv_expected = TSV(expected) - mismatch = [] - max_len = len(csv_result) if len(csv_result) > len(csv_expected) else len(csv_expected) - for i in range(max_len): - if i >= len(csv_result): - mismatch.append("-[%d]=%s" % (i, csv_expected.lines[i])) - elif i >= len(csv_expected): - mismatch.append("+[%d]=%s" % (i, csv_result.lines[i])) - elif csv_expected.lines[i] != csv_result.lines[i]: - mismatch.append("-[%d]=%s" % (i, csv_expected.lines[i])) - mismatch.append("+[%d]=%s" % (i, csv_result.lines[i])) - - return "\n".join(mismatch) diff --git a/tests/integration/test_graphite_merge_tree/test.py b/tests/integration/test_graphite_merge_tree/test.py index 9e48f12f007..7628211551d 100644 --- a/tests/integration/test_graphite_merge_tree/test.py +++ b/tests/integration/test_graphite_merge_tree/test.py @@ -6,7 +6,6 @@ import pytest from helpers.client import QueryRuntimeException from helpers.cluster import ClickHouseCluster from helpers.test_tools import TSV -from helpers.test_tools import csv_compare cluster = ClickHouseCluster(__file__) instance = cluster.add_instance('instance', @@ -235,19 +234,18 @@ SELECT * FROM test.graphite; def test_system_graphite_retentions(graphite_table): expected = ''' -graphite_rollup all \\\\.count$ sum 0 0 1 0 ['test'] ['graphite'] -graphite_rollup all \\\\.max$ max 0 0 2 0 ['test'] ['graphite'] -graphite_rollup all ^five_min\\\\. 31536000 14400 3 0 ['test'] ['graphite'] -graphite_rollup all ^five_min\\\\. 5184000 3600 3 0 ['test'] ['graphite'] -graphite_rollup all ^five_min\\\\. 0 300 3 0 ['test'] ['graphite'] -graphite_rollup all ^one_min avg 31536000 600 4 0 ['test'] ['graphite'] -graphite_rollup all ^one_min avg 7776000 300 4 0 ['test'] ['graphite'] -graphite_rollup all ^one_min avg 0 60 4 0 ['test'] ['graphite'] +graphite_rollup \\\\.count$ sum 0 0 1 0 ['test'] ['graphite'] +graphite_rollup \\\\.max$ max 0 0 2 0 ['test'] ['graphite'] +graphite_rollup ^five_min\\\\. 31536000 14400 3 0 ['test'] ['graphite'] +graphite_rollup ^five_min\\\\. 5184000 3600 3 0 ['test'] ['graphite'] +graphite_rollup ^five_min\\\\. 0 300 3 0 ['test'] ['graphite'] +graphite_rollup ^one_min avg 31536000 600 4 0 ['test'] ['graphite'] +graphite_rollup ^one_min avg 7776000 300 4 0 ['test'] ['graphite'] +graphite_rollup ^one_min avg 0 60 4 0 ['test'] ['graphite'] ''' result = q('SELECT * from system.graphite_retentions') - mismatch = csv_compare(result, expected) - assert len(mismatch) == 0, f"got\n{result}\nwant\n{expected}\ndiff\n{mismatch}\n" + assert TSV(result) == TSV(expected) q(''' DROP TABLE IF EXISTS test.graphite2; diff --git a/tests/integration/test_graphite_merge_tree_typed/__init__.py b/tests/integration/test_graphite_merge_tree_typed/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/integration/test_graphite_merge_tree_typed/configs/graphite_rollup.xml b/tests/integration/test_graphite_merge_tree_typed/configs/graphite_rollup.xml deleted file mode 100644 index c716540a61c..00000000000 --- a/tests/integration/test_graphite_merge_tree_typed/configs/graphite_rollup.xml +++ /dev/null @@ -1,120 +0,0 @@ - - - - metric - timestamp - value - updated - - plain - \.count$ - sum - - - plain - \.max$ - max - - - plain - ^five_min\. - - 0 - 300 - - - 5184000 - 3600 - - - 31536000 - 14400 - - - - plain - ^one_min - avg - - 0 - 60 - - - 7776000 - 300 - - - 31536000 - 600 - - - - tagged - - avg - - 0 - 60 - - - 7776000 - 300 - - - 31536000 - 600 - - - - tag_list - retention=five_min - avg - - 0 - 300 - - - 5184000 - 3600 - - - 31536000 - 14400 - - - - tagged - ^for_taggged - avg - - 0 - 60 - - - 7776000 - 300 - - - 31536000 - 600 - - - - all - ^ten_min\. - sum - - 0 - 600 - - - 5184000 - 7200 - - - 31536000 - 28800 - - - - diff --git a/tests/integration/test_graphite_merge_tree_typed/configs/users.xml b/tests/integration/test_graphite_merge_tree_typed/configs/users.xml deleted file mode 100644 index 66d0cd7e445..00000000000 --- a/tests/integration/test_graphite_merge_tree_typed/configs/users.xml +++ /dev/null @@ -1,8 +0,0 @@ - - - - - 0 - - - diff --git a/tests/integration/test_graphite_merge_tree_typed/test.py b/tests/integration/test_graphite_merge_tree_typed/test.py deleted file mode 100644 index e26fd0d2e77..00000000000 --- a/tests/integration/test_graphite_merge_tree_typed/test.py +++ /dev/null @@ -1,580 +0,0 @@ -import datetime -import os.path as p -import time - -import sys -import pytest -from helpers.client import QueryRuntimeException -from helpers.cluster import ClickHouseCluster -from helpers.test_tools import TSV -from helpers.test_tools import csv_compare - -cluster = ClickHouseCluster(__file__) -instance = cluster.add_instance('instance', - main_configs=['configs/graphite_rollup.xml'], - user_configs=["configs/users.xml"]) -q = instance.query - - -@pytest.fixture(scope="module") -def started_cluster(): - try: - cluster.start() - q('CREATE DATABASE test') - - yield cluster - - finally: - cluster.shutdown() - - -@pytest.fixture -def graphite_table(started_cluster): - q(''' -DROP TABLE IF EXISTS test.graphite; -CREATE TABLE test.graphite - (metric String, value Float64, timestamp UInt32, date Date, updated UInt32) - ENGINE = GraphiteMergeTree('graphite_rollup') - PARTITION BY toYYYYMM(date) - ORDER BY (metric, timestamp) - SETTINGS index_granularity=8192; -''') - - yield - - q('DROP TABLE test.graphite') - - -def test_rollup_versions_plain(graphite_table): - timestamp = int(time.time()) - rounded_timestamp = timestamp - timestamp % 60 - date = datetime.date.today().isoformat() - - # Insert rows with timestamps relative to the current time so that the - # first retention clause is active. - # Two parts are created. - q(''' -INSERT INTO test.graphite (metric, value, timestamp, date, updated) - VALUES ('one_min.x1', 100, {timestamp}, '{date}', 1); -INSERT INTO test.graphite (metric, value, timestamp, date, updated) - VALUES ('one_min.x1', 200, {timestamp}, '{date}', 2); -'''.format(timestamp=timestamp, date=date)) - - expected1 = '''\ -one_min.x1 100 {timestamp} {date} 1 -one_min.x1 200 {timestamp} {date} 2 -'''.format(timestamp=timestamp, date=date) - - assert TSV( - q('SELECT * FROM test.graphite ORDER BY updated') - ) == TSV(expected1) - - q('OPTIMIZE TABLE test.graphite') - - # After rollup only the row with max version is retained. - expected2 = '''\ -one_min.x1 200 {timestamp} {date} 2 -'''.format(timestamp=rounded_timestamp, date=date) - - assert TSV(q('SELECT * FROM test.graphite')) == TSV(expected2) - - -def test_rollup_versions_tagged(graphite_table): - timestamp = int(time.time()) - rounded_timestamp = timestamp - timestamp % 60 - date = datetime.date.today().isoformat() - - # Insert rows with timestamps relative to the current time so that the - # first retention clause is active. - # Two parts are created. - q(''' -INSERT INTO test.graphite (metric, value, timestamp, date, updated) - VALUES ('x1?retention=one_min', 100, {timestamp}, '{date}', 1); -INSERT INTO test.graphite (metric, value, timestamp, date, updated) - VALUES ('x1?retention=one_min', 200, {timestamp}, '{date}', 2); -'''.format(timestamp=timestamp, date=date)) - - expected1 = '''\ -x1?retention=one_min 100 {timestamp} {date} 1 -x1?retention=one_min 200 {timestamp} {date} 2 -'''.format(timestamp=timestamp, date=date) - - result = q('SELECT * FROM test.graphite ORDER BY metric, updated') - mismatch = csv_compare(result, expected1) - assert len(mismatch) == 0, f"got\n{result}\nwant\n{expected1}\ndiff\n{mismatch}\n" - - q('OPTIMIZE TABLE test.graphite') - - # After rollup only the row with max version is retained. - expected2 = '''\ -x1?retention=one_min 200 {timestamp} {date} 2 -'''.format(timestamp=rounded_timestamp, date=date) - - result = q('SELECT * FROM test.graphite ORDER BY metric, updated') - mismatch = csv_compare(result, expected2) - assert len(mismatch) == 0, f"got\n{result}\nwant\n{expected2}\ndiff\n{mismatch}\n" - - -def test_rollup_versions_all(graphite_table): - timestamp = int(time.time()) - rounded_timestamp = timestamp - timestamp % 600 - date = datetime.date.today().isoformat() - - # Insert rows with timestamps relative to the current time so that the - # first retention clause is active. - # Two parts are created. - q(''' -INSERT INTO test.graphite (metric, value, timestamp, date, updated) - VALUES ('ten_min.x1', 100, {timestamp}, '{date}', 1); -INSERT INTO test.graphite (metric, value, timestamp, date, updated) - VALUES ('ten_min.x1', 200, {timestamp}, '{date}', 2); -INSERT INTO test.graphite (metric, value, timestamp, date, updated) - VALUES ('ten_min.x1?env=staging', 100, {timestamp}, '{date}', 1); -INSERT INTO test.graphite (metric, value, timestamp, date, updated) - VALUES ('ten_min.x1?env=staging', 200, {timestamp}, '{date}', 2); -'''.format(timestamp=timestamp, date=date)) - - expected1 = '''\ -ten_min.x1 100 {timestamp} {date} 1 -ten_min.x1 200 {timestamp} {date} 2 -ten_min.x1?env=staging 100 {timestamp} {date} 1 -ten_min.x1?env=staging 200 {timestamp} {date} 2 -'''.format(timestamp=timestamp, date=date) - - result = q('SELECT * FROM test.graphite ORDER BY metric, updated') - mismatch = csv_compare(result, expected1) - assert len(mismatch) == 0, f"got\n{result}\nwant\n{expected1}\ndiff\n{mismatch}\n" - - q('OPTIMIZE TABLE test.graphite') - - # After rollup only the row with max version is retained. - expected2 = '''\ -ten_min.x1 200 {timestamp} {date} 2 -ten_min.x1?env=staging 200 {timestamp} {date} 2 -'''.format(timestamp=rounded_timestamp, date=date) - - result = q('SELECT * FROM test.graphite ORDER BY metric, updated') - mismatch = csv_compare(result, expected2) - assert len(mismatch) == 0, f"got\n{result}\nwant\n{expected2}\ndiff\n{mismatch}\n" - - -def test_rollup_aggregation_plain(graphite_table): - # This query essentially emulates what rollup does. - result1 = q(''' -SELECT avg(v), max(upd) -FROM (SELECT timestamp, - argMax(value, (updated, number)) AS v, - max(updated) AS upd - FROM (SELECT 'one_min.x5' AS metric, - toFloat64(number) AS value, - toUInt32(1111111111 + intDiv(number, 3)) AS timestamp, - toDate('2017-02-02') AS date, - toUInt32(intDiv(number, 2)) AS updated, - number - FROM system.numbers LIMIT 1000000) - WHERE intDiv(timestamp, 600) * 600 = 1111444200 - GROUP BY timestamp) -''') - - expected1 = '''\ -999634.9918367347 499999 -''' - assert TSV(result1) == TSV(expected1) - - # Timestamp 1111111111 is in sufficiently distant past - # so that the last retention clause is active. - result2 = q(''' -INSERT INTO test.graphite - SELECT 'one_min.x' AS metric, - toFloat64(number) AS value, - toUInt32(1111111111 + intDiv(number, 3)) AS timestamp, - toDate('2017-02-02') AS date, toUInt32(intDiv(number, 2)) AS updated - FROM (SELECT * FROM system.numbers LIMIT 1000000) - WHERE intDiv(timestamp, 600) * 600 = 1111444200; - -OPTIMIZE TABLE test.graphite PARTITION 201702 FINAL; - -SELECT * FROM test.graphite; -''') - - expected2 = '''\ -one_min.x 999634.9918367347 1111444200 2017-02-02 499999 -''' - - assert TSV(result2) == TSV(expected2) - - -def test_rollup_aggregation_tagged(graphite_table): - # This query essentially emulates what rollup does. - result1 = q(''' -SELECT avg(v), max(upd) -FROM (SELECT timestamp, - argMax(value, (updated, number)) AS v, - max(updated) AS upd - FROM (SELECT 'x?retention=one_min' AS metric, - toFloat64(number) AS value, - toUInt32(1111111111 + intDiv(number, 3)) AS timestamp, - toDate('2017-02-02') AS date, - toUInt32(intDiv(number, 2)) AS updated, - number - FROM system.numbers LIMIT 1000000) - WHERE intDiv(timestamp, 600) * 600 = 1111444200 - GROUP BY timestamp) -''') - - expected1 = '''\ -999634.9918367347 499999 -''' - assert TSV(result1) == TSV(expected1) - - # Timestamp 1111111111 is in sufficiently distant past - # so that the last retention clause is active. - result2 = q(''' -INSERT INTO test.graphite - SELECT 'x?retention=one_min' AS metric, - toFloat64(number) AS value, - toUInt32(1111111111 + intDiv(number, 3)) AS timestamp, - toDate('2017-02-02') AS date, toUInt32(intDiv(number, 2)) AS updated - FROM (SELECT * FROM system.numbers LIMIT 1000000) - WHERE intDiv(timestamp, 600) * 600 = 1111444200; - -OPTIMIZE TABLE test.graphite PARTITION 201702 FINAL; - -SELECT * FROM test.graphite; -''') - - expected2 = '''\ -x?retention=one_min 999634.9918367347 1111444200 2017-02-02 499999 -''' - - assert TSV(result2) == TSV(expected2) - - -def test_rollup_aggregation_2_plain(graphite_table): - result = q(''' -INSERT INTO test.graphite - SELECT 'one_min.x' AS metric, - toFloat64(number) AS value, - toUInt32(1111111111 - intDiv(number, 3)) AS timestamp, - toDate('2017-02-02') AS date, - toUInt32(100 - number) AS updated - FROM (SELECT * FROM system.numbers LIMIT 50); - -OPTIMIZE TABLE test.graphite PARTITION 201702 FINAL; - -SELECT * FROM test.graphite; -''') - - expected = '''\ -one_min.x 24 1111110600 2017-02-02 100 -''' - - assert TSV(result) == TSV(expected) - - -def test_rollup_aggregation_2_tagged(graphite_table): - result = q(''' -INSERT INTO test.graphite - SELECT 'x?retention=one_min' AS metric, - toFloat64(number) AS value, - toUInt32(1111111111 - intDiv(number, 3)) AS timestamp, - toDate('2017-02-02') AS date, - toUInt32(100 - number) AS updated - FROM (SELECT * FROM system.numbers LIMIT 50); - -OPTIMIZE TABLE test.graphite PARTITION 201702 FINAL; - -SELECT * FROM test.graphite; -''') - - expected = '''\ -x?retention=one_min 24 1111110600 2017-02-02 100 -''' - - assert TSV(result) == TSV(expected) - - -def test_multiple_paths_and_versions_plain(graphite_table): - result = q(''' -INSERT INTO test.graphite - SELECT 'one_min.x' AS metric, - toFloat64(number) AS value, - toUInt32(1111111111 + intDiv(number, 3) * 600) AS timestamp, - toDate('2017-02-02') AS date, - toUInt32(100 - number) AS updated - FROM (SELECT * FROM system.numbers LIMIT 50); - -OPTIMIZE TABLE test.graphite PARTITION 201702 FINAL; - -SELECT * FROM test.graphite; - - -INSERT INTO test.graphite - SELECT 'one_min.y' AS metric, - toFloat64(number) AS value, - toUInt32(1111111111 + number * 600) AS timestamp, - toDate('2017-02-02') AS date, - toUInt32(100 - number) AS updated - FROM (SELECT * FROM system.numbers LIMIT 50); - -OPTIMIZE TABLE test.graphite PARTITION 201702 FINAL; - -SELECT * FROM test.graphite; -''') - - with open(p.join(p.dirname(__file__), - 'test_multiple_paths_and_versions.reference.plain') - ) as reference: - assert TSV(result) == TSV(reference) - - -def test_multiple_paths_and_versions_tagged(graphite_table): - result = q(''' -INSERT INTO test.graphite - SELECT 'x?retention=one_min' AS metric, - toFloat64(number) AS value, - toUInt32(1111111111 + intDiv(number, 3) * 600) AS timestamp, - toDate('2017-02-02') AS date, - toUInt32(100 - number) AS updated - FROM (SELECT * FROM system.numbers LIMIT 50); - -OPTIMIZE TABLE test.graphite PARTITION 201702 FINAL; - -SELECT * FROM test.graphite; - - -INSERT INTO test.graphite - SELECT 'y?retention=one_min' AS metric, - toFloat64(number) AS value, - toUInt32(1111111111 + number * 600) AS timestamp, - toDate('2017-02-02') AS date, - toUInt32(100 - number) AS updated - FROM (SELECT * FROM system.numbers LIMIT 50); - -OPTIMIZE TABLE test.graphite PARTITION 201702 FINAL; - -SELECT * FROM test.graphite; -''') - - with open(p.join(p.dirname(__file__), - 'test_multiple_paths_and_versions.reference.tagged') - ) as reference: - assert TSV(result) == TSV(reference) - - -def test_multiple_output_blocks(graphite_table): - MERGED_BLOCK_SIZE = 8192 - - to_insert = '' - expected = '' - for i in range(2 * MERGED_BLOCK_SIZE + 1): - rolled_up_time = 1000000200 + 600 * i - - for j in range(3): - cur_time = rolled_up_time + 100 * j - to_insert += 'one_min.x1 {} {} 2001-09-09 1\n'.format( - 10 * j, cur_time - ) - to_insert += 'one_min.x1 {} {} 2001-09-09 2\n'.format( - 10 * (j + 1), cur_time - ) - - expected += 'one_min.x1 20 {} 2001-09-09 2\n'.format(rolled_up_time) - - q('INSERT INTO test.graphite FORMAT TSV', to_insert) - - result = q(''' -OPTIMIZE TABLE test.graphite PARTITION 200109 FINAL; - -SELECT * FROM test.graphite; -''') - - assert TSV(result) == TSV(expected) - - -def test_paths_not_matching_any_pattern(graphite_table): - to_insert = '''\ -one_min.x1 100 1000000000 2001-09-09 1 -zzzzzzzz 100 1000000001 2001-09-09 1 -zzzzzzzz 200 1000000001 2001-09-09 2 -''' - - q('INSERT INTO test.graphite FORMAT TSV', to_insert) - - expected = '''\ -one_min.x1 100 999999600 2001-09-09 1 -zzzzzzzz 200 1000000001 2001-09-09 2 -''' - - result = q(''' -OPTIMIZE TABLE test.graphite PARTITION 200109 FINAL; - -SELECT * FROM test.graphite; -''') - - assert TSV(result) == TSV(expected) - - -def test_rules_isolation(graphite_table): - to_insert = '''\ -one_min.x1 100 1000000000 2001-09-09 1 -for_taggged 100 1000000001 2001-09-09 1 -for_taggged 200 1000000001 2001-09-09 2 -one_min?env=staging 100 1000000001 2001-09-09 1 -one_min?env=staging 200 1000000001 2001-09-09 2 -''' - - q('INSERT INTO test.graphite FORMAT TSV', to_insert) - - expected = '''\ -for_taggged 200 1000000001 2001-09-09 2 -one_min.x1 100 999999600 2001-09-09 1 -one_min?env=staging 200 1000000001 2001-09-09 2 -''' - - result = q(''' -OPTIMIZE TABLE test.graphite PARTITION 200109 FINAL; - -SELECT * FROM test.graphite; -''') - - result = q('SELECT * FROM test.graphite ORDER BY metric, updated') - mismatch = csv_compare(result, expected) - assert len(mismatch) == 0, f"got\n{result}\nwant\n{expected}\ndiff\n{mismatch}\n" - - -def test_system_graphite_retentions(graphite_table): - expected = ''' -graphite_rollup plain \\\\.count$ sum 0 0 1 0 ['test'] ['graphite'] -graphite_rollup plain \\\\.max$ max 0 0 2 0 ['test'] ['graphite'] -graphite_rollup plain ^five_min\\\\. 31536000 14400 3 0 ['test'] ['graphite'] -graphite_rollup plain ^five_min\\\\. 5184000 3600 3 0 ['test'] ['graphite'] -graphite_rollup plain ^five_min\\\\. 0 300 3 0 ['test'] ['graphite'] -graphite_rollup plain ^one_min avg 31536000 600 4 0 ['test'] ['graphite'] -graphite_rollup plain ^one_min avg 7776000 300 4 0 ['test'] ['graphite'] -graphite_rollup plain ^one_min avg 0 60 4 0 ['test'] ['graphite'] -graphite_rollup tagged [\\\\?&]retention=one_min(&.*)?$ avg 31536000 600 5 0 ['test'] ['graphite'] -graphite_rollup tagged [\\\\?&]retention=one_min(&.*)?$ avg 7776000 300 5 0 ['test'] ['graphite'] -graphite_rollup tagged [\\\\?&]retention=one_min(&.*)?$ avg 0 60 5 0 ['test'] ['graphite'] -graphite_rollup tagged [\\\\?&]retention=five_min(&.*)?$ avg 31536000 14400 6 0 ['test'] ['graphite'] -graphite_rollup tagged [\\\\?&]retention=five_min(&.*)?$ avg 5184000 3600 6 0 ['test'] ['graphite'] -graphite_rollup tagged [\\\\?&]retention=five_min(&.*)?$ avg 0 300 6 0 ['test'] ['graphite'] -graphite_rollup tagged ^for_taggged avg 31536000 600 7 0 ['test'] ['graphite'] -graphite_rollup tagged ^for_taggged avg 7776000 300 7 0 ['test'] ['graphite'] -graphite_rollup tagged ^for_taggged avg 0 60 7 0 ['test'] ['graphite'] -graphite_rollup all ^ten_min\\\\. sum 31536000 28800 8 0 ['test'] ['graphite'] -graphite_rollup all ^ten_min\\\\. sum 5184000 7200 8 0 ['test'] ['graphite'] -graphite_rollup all ^ten_min\\\\. sum 0 600 8 0 ['test'] ['graphite'] - ''' - result = q('SELECT * from system.graphite_retentions') - - mismatch = csv_compare(result, expected) - assert len(mismatch) == 0, f"got\n{result}\nwant\n{expected}\ndiff\n{mismatch}\n" - - q(''' -DROP TABLE IF EXISTS test.graphite2; -CREATE TABLE test.graphite2 - (metric String, value Float64, timestamp UInt32, date Date, updated UInt32) - ENGINE = GraphiteMergeTree('graphite_rollup') - PARTITION BY toYYYYMM(date) - ORDER BY (metric, timestamp) - SETTINGS index_granularity=8192; - ''') - expected = ''' -graphite_rollup ['test','test'] ['graphite','graphite2'] -graphite_rollup ['test','test'] ['graphite','graphite2'] -graphite_rollup ['test','test'] ['graphite','graphite2'] -graphite_rollup ['test','test'] ['graphite','graphite2'] -graphite_rollup ['test','test'] ['graphite','graphite2'] -graphite_rollup ['test','test'] ['graphite','graphite2'] -graphite_rollup ['test','test'] ['graphite','graphite2'] -graphite_rollup ['test','test'] ['graphite','graphite2'] - ''' - result = q(''' - SELECT - config_name, - Tables.database, - Tables.table - FROM system.graphite_retentions - ''') - assert csv_compare(result, expected), f"got\n{result}\nwant\n{expected}" - - -def test_path_dangling_pointer(graphite_table): - q(''' -DROP TABLE IF EXISTS test.graphite2; -CREATE TABLE test.graphite2 - (metric String, value Float64, timestamp UInt32, date Date, updated UInt32) - ENGINE = GraphiteMergeTree('graphite_rollup') - PARTITION BY toYYYYMM(date) - ORDER BY (metric, timestamp) - SETTINGS index_granularity=1; - ''') - - path = 'abcd' * 4000000 # 16MB - q('INSERT INTO test.graphite2 FORMAT TSV', - "{}\t0.0\t0\t2018-01-01\t100\n".format(path)) - q('INSERT INTO test.graphite2 FORMAT TSV', - "{}\t0.0\t0\t2018-01-01\t101\n".format(path)) - for version in range(10): - q('INSERT INTO test.graphite2 FORMAT TSV', - "{}\t0.0\t0\t2018-01-01\t{}\n".format(path, version)) - - while True: - q('OPTIMIZE TABLE test.graphite2 PARTITION 201801 FINAL') - parts = int(q("SELECT count() FROM system.parts " - "WHERE active AND database='test' " - "AND table='graphite2'")) - if parts == 1: - break - print(('Parts', parts)) - - assert TSV( - q("SELECT value, timestamp, date, updated FROM test.graphite2") - ) == TSV("0\t0\t2018-01-01\t101\n") - - q('DROP TABLE test.graphite2') - - -def test_combined_rules(graphite_table): - # 1487970000 ~ Sat 25 Feb 00:00:00 MSK 2017 - to_insert = 'INSERT INTO test.graphite VALUES ' - expected_unmerged = '' - for i in range(384): - to_insert += "('five_min.count', {v}, {t}, toDate({t}), 1), ".format( - v=1, t=1487970000 + (i * 300) - ) - to_insert += "('five_min.max', {v}, {t}, toDate({t}), 1), ".format( - v=i, t=1487970000 + (i * 300) - ) - expected_unmerged += ("five_min.count\t{v1}\t{t}\n" - "five_min.max\t{v2}\t{t}\n").format( - v1=1, v2=i, - t=1487970000 + (i * 300) - ) - - q(to_insert) - assert TSV(q('SELECT metric, value, timestamp FROM test.graphite' - ' ORDER BY (timestamp, metric)')) == TSV(expected_unmerged) - - q('OPTIMIZE TABLE test.graphite PARTITION 201702 FINAL') - expected_merged = ''' - five_min.count 48 1487970000 2017-02-25 1 - five_min.count 48 1487984400 2017-02-25 1 - five_min.count 48 1487998800 2017-02-25 1 - five_min.count 48 1488013200 2017-02-25 1 - five_min.count 48 1488027600 2017-02-25 1 - five_min.count 48 1488042000 2017-02-25 1 - five_min.count 48 1488056400 2017-02-26 1 - five_min.count 48 1488070800 2017-02-26 1 - five_min.max 47 1487970000 2017-02-25 1 - five_min.max 95 1487984400 2017-02-25 1 - five_min.max 143 1487998800 2017-02-25 1 - five_min.max 191 1488013200 2017-02-25 1 - five_min.max 239 1488027600 2017-02-25 1 - five_min.max 287 1488042000 2017-02-25 1 - five_min.max 335 1488056400 2017-02-26 1 - five_min.max 383 1488070800 2017-02-26 1 - ''' - assert TSV(q('SELECT * FROM test.graphite' - ' ORDER BY (metric, timestamp)')) == TSV(expected_merged) diff --git a/tests/integration/test_graphite_merge_tree_typed/test_multiple_paths_and_versions.reference.plain b/tests/integration/test_graphite_merge_tree_typed/test_multiple_paths_and_versions.reference.plain deleted file mode 100644 index 0f10d11ed05..00000000000 --- a/tests/integration/test_graphite_merge_tree_typed/test_multiple_paths_and_versions.reference.plain +++ /dev/null @@ -1,84 +0,0 @@ -one_min.x 0 1111110600 2017-02-02 100 -one_min.x 3 1111111200 2017-02-02 97 -one_min.x 6 1111111800 2017-02-02 94 -one_min.x 9 1111112400 2017-02-02 91 -one_min.x 12 1111113000 2017-02-02 88 -one_min.x 15 1111113600 2017-02-02 85 -one_min.x 18 1111114200 2017-02-02 82 -one_min.x 21 1111114800 2017-02-02 79 -one_min.x 24 1111115400 2017-02-02 76 -one_min.x 27 1111116000 2017-02-02 73 -one_min.x 30 1111116600 2017-02-02 70 -one_min.x 33 1111117200 2017-02-02 67 -one_min.x 36 1111117800 2017-02-02 64 -one_min.x 39 1111118400 2017-02-02 61 -one_min.x 42 1111119000 2017-02-02 58 -one_min.x 45 1111119600 2017-02-02 55 -one_min.x 48 1111120200 2017-02-02 52 -one_min.x 0 1111110600 2017-02-02 100 -one_min.x 3 1111111200 2017-02-02 97 -one_min.x 6 1111111800 2017-02-02 94 -one_min.x 9 1111112400 2017-02-02 91 -one_min.x 12 1111113000 2017-02-02 88 -one_min.x 15 1111113600 2017-02-02 85 -one_min.x 18 1111114200 2017-02-02 82 -one_min.x 21 1111114800 2017-02-02 79 -one_min.x 24 1111115400 2017-02-02 76 -one_min.x 27 1111116000 2017-02-02 73 -one_min.x 30 1111116600 2017-02-02 70 -one_min.x 33 1111117200 2017-02-02 67 -one_min.x 36 1111117800 2017-02-02 64 -one_min.x 39 1111118400 2017-02-02 61 -one_min.x 42 1111119000 2017-02-02 58 -one_min.x 45 1111119600 2017-02-02 55 -one_min.x 48 1111120200 2017-02-02 52 -one_min.y 0 1111110600 2017-02-02 100 -one_min.y 1 1111111200 2017-02-02 99 -one_min.y 2 1111111800 2017-02-02 98 -one_min.y 3 1111112400 2017-02-02 97 -one_min.y 4 1111113000 2017-02-02 96 -one_min.y 5 1111113600 2017-02-02 95 -one_min.y 6 1111114200 2017-02-02 94 -one_min.y 7 1111114800 2017-02-02 93 -one_min.y 8 1111115400 2017-02-02 92 -one_min.y 9 1111116000 2017-02-02 91 -one_min.y 10 1111116600 2017-02-02 90 -one_min.y 11 1111117200 2017-02-02 89 -one_min.y 12 1111117800 2017-02-02 88 -one_min.y 13 1111118400 2017-02-02 87 -one_min.y 14 1111119000 2017-02-02 86 -one_min.y 15 1111119600 2017-02-02 85 -one_min.y 16 1111120200 2017-02-02 84 -one_min.y 17 1111120800 2017-02-02 83 -one_min.y 18 1111121400 2017-02-02 82 -one_min.y 19 1111122000 2017-02-02 81 -one_min.y 20 1111122600 2017-02-02 80 -one_min.y 21 1111123200 2017-02-02 79 -one_min.y 22 1111123800 2017-02-02 78 -one_min.y 23 1111124400 2017-02-02 77 -one_min.y 24 1111125000 2017-02-02 76 -one_min.y 25 1111125600 2017-02-02 75 -one_min.y 26 1111126200 2017-02-02 74 -one_min.y 27 1111126800 2017-02-02 73 -one_min.y 28 1111127400 2017-02-02 72 -one_min.y 29 1111128000 2017-02-02 71 -one_min.y 30 1111128600 2017-02-02 70 -one_min.y 31 1111129200 2017-02-02 69 -one_min.y 32 1111129800 2017-02-02 68 -one_min.y 33 1111130400 2017-02-02 67 -one_min.y 34 1111131000 2017-02-02 66 -one_min.y 35 1111131600 2017-02-02 65 -one_min.y 36 1111132200 2017-02-02 64 -one_min.y 37 1111132800 2017-02-02 63 -one_min.y 38 1111133400 2017-02-02 62 -one_min.y 39 1111134000 2017-02-02 61 -one_min.y 40 1111134600 2017-02-02 60 -one_min.y 41 1111135200 2017-02-02 59 -one_min.y 42 1111135800 2017-02-02 58 -one_min.y 43 1111136400 2017-02-02 57 -one_min.y 44 1111137000 2017-02-02 56 -one_min.y 45 1111137600 2017-02-02 55 -one_min.y 46 1111138200 2017-02-02 54 -one_min.y 47 1111138800 2017-02-02 53 -one_min.y 48 1111139400 2017-02-02 52 -one_min.y 49 1111140000 2017-02-02 51 diff --git a/tests/integration/test_graphite_merge_tree_typed/test_multiple_paths_and_versions.reference.tagged b/tests/integration/test_graphite_merge_tree_typed/test_multiple_paths_and_versions.reference.tagged deleted file mode 100644 index e2c63ab3b22..00000000000 --- a/tests/integration/test_graphite_merge_tree_typed/test_multiple_paths_and_versions.reference.tagged +++ /dev/null @@ -1,84 +0,0 @@ -x?retention=one_min 0 1111110600 2017-02-02 100 -x?retention=one_min 3 1111111200 2017-02-02 97 -x?retention=one_min 6 1111111800 2017-02-02 94 -x?retention=one_min 9 1111112400 2017-02-02 91 -x?retention=one_min 12 1111113000 2017-02-02 88 -x?retention=one_min 15 1111113600 2017-02-02 85 -x?retention=one_min 18 1111114200 2017-02-02 82 -x?retention=one_min 21 1111114800 2017-02-02 79 -x?retention=one_min 24 1111115400 2017-02-02 76 -x?retention=one_min 27 1111116000 2017-02-02 73 -x?retention=one_min 30 1111116600 2017-02-02 70 -x?retention=one_min 33 1111117200 2017-02-02 67 -x?retention=one_min 36 1111117800 2017-02-02 64 -x?retention=one_min 39 1111118400 2017-02-02 61 -x?retention=one_min 42 1111119000 2017-02-02 58 -x?retention=one_min 45 1111119600 2017-02-02 55 -x?retention=one_min 48 1111120200 2017-02-02 52 -x?retention=one_min 0 1111110600 2017-02-02 100 -x?retention=one_min 3 1111111200 2017-02-02 97 -x?retention=one_min 6 1111111800 2017-02-02 94 -x?retention=one_min 9 1111112400 2017-02-02 91 -x?retention=one_min 12 1111113000 2017-02-02 88 -x?retention=one_min 15 1111113600 2017-02-02 85 -x?retention=one_min 18 1111114200 2017-02-02 82 -x?retention=one_min 21 1111114800 2017-02-02 79 -x?retention=one_min 24 1111115400 2017-02-02 76 -x?retention=one_min 27 1111116000 2017-02-02 73 -x?retention=one_min 30 1111116600 2017-02-02 70 -x?retention=one_min 33 1111117200 2017-02-02 67 -x?retention=one_min 36 1111117800 2017-02-02 64 -x?retention=one_min 39 1111118400 2017-02-02 61 -x?retention=one_min 42 1111119000 2017-02-02 58 -x?retention=one_min 45 1111119600 2017-02-02 55 -x?retention=one_min 48 1111120200 2017-02-02 52 -y?retention=one_min 0 1111110600 2017-02-02 100 -y?retention=one_min 1 1111111200 2017-02-02 99 -y?retention=one_min 2 1111111800 2017-02-02 98 -y?retention=one_min 3 1111112400 2017-02-02 97 -y?retention=one_min 4 1111113000 2017-02-02 96 -y?retention=one_min 5 1111113600 2017-02-02 95 -y?retention=one_min 6 1111114200 2017-02-02 94 -y?retention=one_min 7 1111114800 2017-02-02 93 -y?retention=one_min 8 1111115400 2017-02-02 92 -y?retention=one_min 9 1111116000 2017-02-02 91 -y?retention=one_min 10 1111116600 2017-02-02 90 -y?retention=one_min 11 1111117200 2017-02-02 89 -y?retention=one_min 12 1111117800 2017-02-02 88 -y?retention=one_min 13 1111118400 2017-02-02 87 -y?retention=one_min 14 1111119000 2017-02-02 86 -y?retention=one_min 15 1111119600 2017-02-02 85 -y?retention=one_min 16 1111120200 2017-02-02 84 -y?retention=one_min 17 1111120800 2017-02-02 83 -y?retention=one_min 18 1111121400 2017-02-02 82 -y?retention=one_min 19 1111122000 2017-02-02 81 -y?retention=one_min 20 1111122600 2017-02-02 80 -y?retention=one_min 21 1111123200 2017-02-02 79 -y?retention=one_min 22 1111123800 2017-02-02 78 -y?retention=one_min 23 1111124400 2017-02-02 77 -y?retention=one_min 24 1111125000 2017-02-02 76 -y?retention=one_min 25 1111125600 2017-02-02 75 -y?retention=one_min 26 1111126200 2017-02-02 74 -y?retention=one_min 27 1111126800 2017-02-02 73 -y?retention=one_min 28 1111127400 2017-02-02 72 -y?retention=one_min 29 1111128000 2017-02-02 71 -y?retention=one_min 30 1111128600 2017-02-02 70 -y?retention=one_min 31 1111129200 2017-02-02 69 -y?retention=one_min 32 1111129800 2017-02-02 68 -y?retention=one_min 33 1111130400 2017-02-02 67 -y?retention=one_min 34 1111131000 2017-02-02 66 -y?retention=one_min 35 1111131600 2017-02-02 65 -y?retention=one_min 36 1111132200 2017-02-02 64 -y?retention=one_min 37 1111132800 2017-02-02 63 -y?retention=one_min 38 1111133400 2017-02-02 62 -y?retention=one_min 39 1111134000 2017-02-02 61 -y?retention=one_min 40 1111134600 2017-02-02 60 -y?retention=one_min 41 1111135200 2017-02-02 59 -y?retention=one_min 42 1111135800 2017-02-02 58 -y?retention=one_min 43 1111136400 2017-02-02 57 -y?retention=one_min 44 1111137000 2017-02-02 56 -y?retention=one_min 45 1111137600 2017-02-02 55 -y?retention=one_min 46 1111138200 2017-02-02 54 -y?retention=one_min 47 1111138800 2017-02-02 53 -y?retention=one_min 48 1111139400 2017-02-02 52 -y?retention=one_min 49 1111140000 2017-02-02 51 diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index a2e56fa0f1d..2b391cd292e 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -21,7 +21,7 @@ CREATE TABLE system.events\n(\n `event` String,\n `value` UInt64,\n `de CREATE TABLE system.formats\n(\n `name` String,\n `is_input` UInt8,\n `is_output` UInt8\n)\nENGINE = SystemFormats()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.functions\n(\n `name` String,\n `is_aggregate` UInt8,\n `case_insensitive` UInt8,\n `alias_to` String,\n `create_query` String,\n `origin` Enum8(\'System\' = 0, \'SQLUserDefined\' = 1, \'ExecutableUserDefined\' = 2)\n)\nENGINE = SystemFunctions()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.grants\n(\n `user_name` Nullable(String),\n `role_name` Nullable(String),\n `access_type` Enum8(\'SQLITE\' = -128, \'ODBC\' = -127, \'JDBC\' = -126, \'HDFS\' = -125, \'S3\' = -124, \'SOURCES\' = -123, \'ALL\' = -122, \'NONE\' = -121, \'SHOW DATABASES\' = 0, \'SHOW TABLES\' = 1, \'SHOW COLUMNS\' = 2, \'SHOW DICTIONARIES\' = 3, \'SHOW\' = 4, \'SELECT\' = 5, \'INSERT\' = 6, \'ALTER UPDATE\' = 7, \'ALTER DELETE\' = 8, \'ALTER ADD COLUMN\' = 9, \'ALTER MODIFY COLUMN\' = 10, \'ALTER DROP COLUMN\' = 11, \'ALTER COMMENT COLUMN\' = 12, \'ALTER CLEAR COLUMN\' = 13, \'ALTER RENAME COLUMN\' = 14, \'ALTER MATERIALIZE COLUMN\' = 15, \'ALTER COLUMN\' = 16, \'ALTER MODIFY COMMENT\' = 17, \'ALTER ORDER BY\' = 18, \'ALTER SAMPLE BY\' = 19, \'ALTER ADD INDEX\' = 20, \'ALTER DROP INDEX\' = 21, \'ALTER MATERIALIZE INDEX\' = 22, \'ALTER CLEAR INDEX\' = 23, \'ALTER INDEX\' = 24, \'ALTER ADD PROJECTION\' = 25, \'ALTER DROP PROJECTION\' = 26, \'ALTER MATERIALIZE PROJECTION\' = 27, \'ALTER CLEAR PROJECTION\' = 28, \'ALTER PROJECTION\' = 29, \'ALTER ADD CONSTRAINT\' = 30, \'ALTER DROP CONSTRAINT\' = 31, \'ALTER CONSTRAINT\' = 32, \'ALTER TTL\' = 33, \'ALTER MATERIALIZE TTL\' = 34, \'ALTER SETTINGS\' = 35, \'ALTER MOVE PARTITION\' = 36, \'ALTER FETCH PARTITION\' = 37, \'ALTER FREEZE PARTITION\' = 38, \'ALTER DATABASE SETTINGS\' = 39, \'ALTER TABLE\' = 40, \'ALTER DATABASE\' = 41, \'ALTER VIEW REFRESH\' = 42, \'ALTER VIEW MODIFY QUERY\' = 43, \'ALTER VIEW\' = 44, \'ALTER\' = 45, \'CREATE DATABASE\' = 46, \'CREATE TABLE\' = 47, \'CREATE VIEW\' = 48, \'CREATE DICTIONARY\' = 49, \'CREATE TEMPORARY TABLE\' = 50, \'CREATE FUNCTION\' = 51, \'CREATE\' = 52, \'DROP DATABASE\' = 53, \'DROP TABLE\' = 54, \'DROP VIEW\' = 55, \'DROP DICTIONARY\' = 56, \'DROP FUNCTION\' = 57, \'DROP\' = 58, \'TRUNCATE\' = 59, \'OPTIMIZE\' = 60, \'KILL QUERY\' = 61, \'MOVE PARTITION BETWEEN SHARDS\' = 62, \'CREATE USER\' = 63, \'ALTER USER\' = 64, \'DROP USER\' = 65, \'CREATE ROLE\' = 66, \'ALTER ROLE\' = 67, \'DROP ROLE\' = 68, \'ROLE ADMIN\' = 69, \'CREATE ROW POLICY\' = 70, \'ALTER ROW POLICY\' = 71, \'DROP ROW POLICY\' = 72, \'CREATE QUOTA\' = 73, \'ALTER QUOTA\' = 74, \'DROP QUOTA\' = 75, \'CREATE SETTINGS PROFILE\' = 76, \'ALTER SETTINGS PROFILE\' = 77, \'DROP SETTINGS PROFILE\' = 78, \'SHOW USERS\' = 79, \'SHOW ROLES\' = 80, \'SHOW ROW POLICIES\' = 81, \'SHOW QUOTAS\' = 82, \'SHOW SETTINGS PROFILES\' = 83, \'SHOW ACCESS\' = 84, \'ACCESS MANAGEMENT\' = 85, \'SYSTEM SHUTDOWN\' = 86, \'SYSTEM DROP DNS CACHE\' = 87, \'SYSTEM DROP MARK CACHE\' = 88, \'SYSTEM DROP UNCOMPRESSED CACHE\' = 89, \'SYSTEM DROP MMAP CACHE\' = 90, \'SYSTEM DROP COMPILED EXPRESSION CACHE\' = 91, \'SYSTEM DROP CACHE\' = 92, \'SYSTEM RELOAD CONFIG\' = 93, \'SYSTEM RELOAD SYMBOLS\' = 94, \'SYSTEM RELOAD DICTIONARY\' = 95, \'SYSTEM RELOAD MODEL\' = 96, \'SYSTEM RELOAD FUNCTION\' = 97, \'SYSTEM RELOAD EMBEDDED DICTIONARIES\' = 98, \'SYSTEM RELOAD\' = 99, \'SYSTEM RESTART DISK\' = 100, \'SYSTEM MERGES\' = 101, \'SYSTEM TTL MERGES\' = 102, \'SYSTEM FETCHES\' = 103, \'SYSTEM MOVES\' = 104, \'SYSTEM DISTRIBUTED SENDS\' = 105, \'SYSTEM REPLICATED SENDS\' = 106, \'SYSTEM SENDS\' = 107, \'SYSTEM REPLICATION QUEUES\' = 108, \'SYSTEM DROP REPLICA\' = 109, \'SYSTEM SYNC REPLICA\' = 110, \'SYSTEM RESTART REPLICA\' = 111, \'SYSTEM RESTORE REPLICA\' = 112, \'SYSTEM FLUSH DISTRIBUTED\' = 113, \'SYSTEM FLUSH LOGS\' = 114, \'SYSTEM FLUSH\' = 115, \'SYSTEM\' = 116, \'dictGet\' = 117, \'addressToLine\' = 118, \'addressToSymbol\' = 119, \'demangle\' = 120, \'INTROSPECTION\' = 121, \'FILE\' = 122, \'URL\' = 123, \'REMOTE\' = 124, \'MONGO\' = 125, \'MYSQL\' = 126, \'POSTGRES\' = 127),\n `database` Nullable(String),\n `table` Nullable(String),\n `column` Nullable(String),\n `is_partial_revoke` UInt8,\n `grant_option` UInt8\n)\nENGINE = SystemGrants()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' -CREATE TABLE system.graphite_retentions\n(\n `config_name` String,\n `rule_type` String,\n `regexp` String,\n `function` String,\n `age` UInt64,\n `precision` UInt64,\n `priority` UInt16,\n `is_default` UInt8,\n `Tables.database` Array(String),\n `Tables.table` Array(String)\n)\nENGINE = SystemGraphite()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' +CREATE TABLE system.graphite_retentions\n(\n `config_name` String,\n `regexp` String,\n `function` String,\n `age` UInt64,\n `precision` UInt64,\n `priority` UInt16,\n `is_default` UInt8,\n `Tables.database` Array(String),\n `Tables.table` Array(String)\n)\nENGINE = SystemGraphite()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.licenses\n(\n `library_name` String,\n `license_type` String,\n `license_path` String,\n `license_text` String\n)\nENGINE = SystemLicenses()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.macros\n(\n `macro` String,\n `substitution` String\n)\nENGINE = SystemMacros()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.merge_tree_settings\n(\n `name` String,\n `value` String,\n `changed` UInt8,\n `description` String,\n `type` String\n)\nENGINE = SystemMergeTreeSettings()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt index a930e7db3fc..8309b6bcb53 100644 --- a/utils/CMakeLists.txt +++ b/utils/CMakeLists.txt @@ -32,7 +32,6 @@ if (NOT DEFINED ENABLE_UTILS OR ENABLE_UTILS) add_subdirectory (wal-dump) add_subdirectory (check-mysql-binlog) add_subdirectory (keeper-bench) - add_subdirectory (graphite-rollup) if (USE_NURAFT) add_subdirectory (keeper-data-dumper) diff --git a/utils/graphite-rollup/CMakeLists.txt b/utils/graphite-rollup/CMakeLists.txt deleted file mode 100644 index 3cc0d3e756f..00000000000 --- a/utils/graphite-rollup/CMakeLists.txt +++ /dev/null @@ -1,23 +0,0 @@ -add_executable(graphite-rollup-bench graphite-rollup-bench.cpp) -target_link_libraries( - graphite-rollup-bench - PRIVATE - clickhouse_storages_system - clickhouse_aggregate_functions - clickhouse_common_config - dbms -) -target_include_directories( - graphite-rollup-bench - SYSTEM PRIVATE - ${ClickHouse_SOURCE_DIR}/src ${CMAKE_BINARY_DIR}/src - ${ClickHouse_SOURCE_DIR}/base ${ClickHouse_SOURCE_DIR}/base/pcg-random - ${CMAKE_BINARY_DIR}/src/Core/include - ${POCO_INCLUDE_DIR} - ${ClickHouse_SOURCE_DIR}/contrib/double-conversion ${ClickHouse_SOURCE_DIR}/contrib/dragonbox/include - ${ClickHouse_SOURCE_DIR}/contrib/fmtlib/include - ${ClickHouse_SOURCE_DIR}/contrib/cityhash102/include - ${RE2_INCLUDE_DIR} ${CMAKE_BINARY_DIR}/contrib/re2_st -) - -target_compile_definitions(graphite-rollup-bench PRIVATE RULES_DIR="${CMAKE_CURRENT_SOURCE_DIR}") diff --git a/utils/graphite-rollup/graphite-rollup-bench.cpp b/utils/graphite-rollup/graphite-rollup-bench.cpp deleted file mode 100644 index dabe0353b0f..00000000000 --- a/utils/graphite-rollup/graphite-rollup-bench.cpp +++ /dev/null @@ -1,147 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include -#include -#include -#include -#include - -using namespace DB; - -static SharedContextHolder shared_context = Context::createShared(); - -std::vector loadMetrics(const std::string & metrics_file) -{ - std::vector metrics; - - FILE * stream; - char * line = nullptr; - size_t len = 0; - ssize_t nread; - - stream = fopen(metrics_file.c_str(), "r"); - if (stream == nullptr) - { - throw std::runtime_error(strerror(errno)); - } - - while ((nread = getline(&line, &len, stream)) != -1) - { - size_t l = strlen(line); - if (l > 0) - { - if (line[l - 1] == '\n') - { - line[l - 1] = '\0'; - l--; - } - if (l > 0) - { - metrics.push_back(StringRef(strdup(line), l)); - } - } - } - free(line); - if (ferror(stream)) - { - fclose(stream); - throw std::runtime_error(strerror(errno)); - } - - fclose(stream); - - return metrics; -} - -ConfigProcessor::LoadedConfig loadConfiguration(const std::string & config_path) -{ - ConfigProcessor config_processor(config_path, true, true); - ConfigProcessor::LoadedConfig config = config_processor.loadConfig(false); - return config; -} - -void bench(const std::string & config_path, const std::string & metrics_file, size_t n, bool verbose) -{ - auto config = loadConfiguration(config_path); - - auto context = Context::createGlobal(shared_context.get()); - context->setConfig(config.configuration.get()); - - Graphite::Params params; - setGraphitePatternsFromConfig(context, "graphite_rollup", params); - - std::vector metrics = loadMetrics(metrics_file); - - std::vector durations(metrics.size()); - size_t j, i; - for (j = 0; j < n; j++) - { - for (i = 0; i < metrics.size(); i++) - { - auto start = std::chrono::high_resolution_clock::now(); - - auto rule = DB::Graphite::selectPatternForPath(params, metrics[i]); - (void)rule; - - auto end = std::chrono::high_resolution_clock::now(); - double duration = (duration_cast>(end - start)).count() * 1E9; - durations[i] += duration; - - if (j == 0 && verbose) - { - std::cout << metrics[i].data << ": rule with regexp '" << rule.second->regexp_str << "' found\n"; - } - } - } - - for (i = 0; i < metrics.size(); i++) - { - std::cout << metrics[i].data << " " << durations[i] / n << " ns\n"; - free(const_cast(static_cast(metrics[i].data))); - } -} - -int main(int argc, char ** argv) -{ - registerAggregateFunctions(); - - std::string config_file, metrics_file; - - using namespace std::literals; - - std::string config_default = RULES_DIR + "/rollup.xml"s; - std::string metrics_default = RULES_DIR + "/metrics.txt"s; - - namespace po = boost::program_options; - po::variables_map vm; - - po::options_description desc; - desc.add_options()("help,h", "produce help")( - "config,c", po::value()->default_value(config_default), "XML config with rollup rules")( - "metrics,m", po::value()->default_value(metrics_default), "metrcis files (one metric per line) for run benchmark")( - "verbose,V", po::bool_switch()->default_value(false), "verbose output (print found rule)"); - - po::parsed_options parsed = po::command_line_parser(argc, argv).options(desc).run(); - po::store(parsed, vm); - po::notify(vm); - - if (vm.count("help")) - { - std::cout << desc << '\n'; - exit(1); - } - - bench(vm["config"].as(), vm["metrics"].as(), 10000, vm["verbose"].as()); - - return 0; -} diff --git a/utils/graphite-rollup/metrics.txt b/utils/graphite-rollup/metrics.txt deleted file mode 100644 index 199c3791310..00000000000 --- a/utils/graphite-rollup/metrics.txt +++ /dev/null @@ -1,11 +0,0 @@ -test.sum -sum?env=test&tag=Fake3 -test.max -max?env=test&tag=Fake4 -test.min -min?env=test&tag=Fake5 -fake5?env=test&tag=Fake5 -test.p95 -p95?env=test&tag=FakeNo -default -default?env=test&tag=FakeNo diff --git a/utils/graphite-rollup/rollup-tag-list.xml b/utils/graphite-rollup/rollup-tag-list.xml deleted file mode 100644 index ef28f2089ad..00000000000 --- a/utils/graphite-rollup/rollup-tag-list.xml +++ /dev/null @@ -1,167 +0,0 @@ - - - - plain - \.sum$ - sum - - 0 - 60 - - - 86400 - 3600 - - - - tagged - ^((.*)|.)sum\? - sum - - 0 - 60 - - - 86400 - 3600 - - - - plain - \.max$ - max - - 0 - 60 - - - 86400 - 3600 - - - - tagged - ^((.*)|.)max\? - max - - 0 - 60 - - - 86400 - 3600 - - - - plain - \.min$ - min - - 0 - 60 - - - 86400 - 3600 - - - - tagged - ^((.*)|.)min\? - min - - 0 - 60 - - - 86400 - 3600 - - - - plain - \.fake1\..*\.Fake1\. - sum - - - tag_list - fake1;tag=Fake1 - sum - - - plain - \.fake2\..*\.Fake2\. - sum - - - tag_list - fake2;tag=Fake2 - sum - - - plain - \.fake3\..*\.Fake3\. - sum - - - tag_list - fake3;tag=Fake3 - sum - - - plain - \.fake4\..*\.Fake4\. - sum - - - tag_list - fake4;tag=Fake4 - sum - - - plain - \.fake5\..*\.Fake5\. - sum - - - tag_list - fake5;tag=Fake5 - sum - - - plain - \.fake6\..*\.Fake6\. - sum - - - tag_list - fake6;tag=Fake6 - sum - - - plain - \.fake7\..*\.Fake7\. - sum - - - tag_list - fake7;tag=Fake7 - sum - - - avg - - 0 - 60 - - - 3600 - 300 - - - 86400 - 3600 - - - - diff --git a/utils/graphite-rollup/rollup-typed.xml b/utils/graphite-rollup/rollup-typed.xml deleted file mode 100644 index 0b27d43ece9..00000000000 --- a/utils/graphite-rollup/rollup-typed.xml +++ /dev/null @@ -1,167 +0,0 @@ - - - - plain - \.sum$ - sum - - 0 - 60 - - - 86400 - 3600 - - - - tagged - ^((.*)|.)sum\? - sum - - 0 - 60 - - - 86400 - 3600 - - - - plain - \.max$ - max - - 0 - 60 - - - 86400 - 3600 - - - - tagged - ^((.*)|.)max\? - max - - 0 - 60 - - - 86400 - 3600 - - - - plain - \.min$ - min - - 0 - 60 - - - 86400 - 3600 - - - - tagged - ^((.*)|.)min\? - min - - 0 - 60 - - - 86400 - 3600 - - - - plain - \.fake1\..*\.Fake1\. - sum - - - tagged - - sum - - - plain - \.fake2\..*\.Fake2\. - sum - - - tagged - - sum - - - plain - \.fake3\..*\.Fake3\. - sum - - - tagged - - sum - - - plain - \.fake4\..*\.Fake4\. - sum - - - tagged - - sum - - - plain - \.fake5\..*\.Fake5\. - sum - - - tagged - - sum - - - plain - \.fake6\..*\.Fake6\. - sum - - - tagged - - sum - - - plain - \.fake7\..*\.Fake7\. - sum - - - tagged - - sum - - - avg - - 0 - 60 - - - 3600 - 300 - - - 86400 - 3600 - - - - diff --git a/utils/graphite-rollup/rollup.xml b/utils/graphite-rollup/rollup.xml deleted file mode 100644 index 641b0130509..00000000000 --- a/utils/graphite-rollup/rollup.xml +++ /dev/null @@ -1,147 +0,0 @@ - - - - \.sum$ - sum - - 0 - 60 - - - 86400 - 3600 - - - - ^((.*)|.)sum\? - sum - - 0 - 60 - - - 86400 - 3600 - - - - \.max$ - max - - 0 - 60 - - - 86400 - 3600 - - - - ^((.*)|.)max\? - max - - 0 - 60 - - - 86400 - 3600 - - - - \.min$ - min - - 0 - 60 - - - 86400 - 3600 - - - - ^((.*)|.)min\? - min - - 0 - 60 - - - 86400 - 3600 - - - - \.fake1\..*\.Fake1\. - sum - - - - sum - - - \.fake2\..*\.Fake2\. - sum - - - - sum - - - \.fake3\..*\.Fake3\. - sum - - - - sum - - - \.fake4\..*\.Fake4\. - sum - - - - sum - - - \.fake5\..*\.Fake5\. - sum - - - - sum - - - \.fake6\..*\.Fake6\. - sum - - - - sum - - - \.fake7\..*\.Fake7\. - sum - - - - sum - - - avg - - 0 - 60 - - - 3600 - 300 - - - 86400 - 3600 - - - - From e90f047ba0df2c901e826117bb1e8b74457ccf08 Mon Sep 17 00:00:00 2001 From: Denny Crane Date: Wed, 8 Dec 2021 11:06:10 -0400 Subject: [PATCH 171/262] Doc. merge_tree_clear_old_temporary_directories_interval_seconds merge_tree_clear_old_parts_interval_seconds were moved to merge_tree settings --- .../settings/merge-tree-settings.md | 21 +++++++++++++++++++ docs/en/operations/settings/settings.md | 20 ------------------ .../settings/merge-tree-settings.md | 20 ++++++++++++++++++ docs/ru/operations/settings/settings.md | 20 ------------------ 4 files changed, 41 insertions(+), 40 deletions(-) diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md index 0fd1e54955c..af75d130ed3 100644 --- a/docs/en/operations/settings/merge-tree-settings.md +++ b/docs/en/operations/settings/merge-tree-settings.md @@ -356,3 +356,24 @@ Possible values: - 1 — Parts are detached. Default value: `0`. + +## merge_tree_clear_old_temporary_directories_interval_seconds {#setting-merge-tree-clear-old-temporary-directories-interval-seconds} + +Sets the interval in seconds for ClickHouse to execute the cleanup of old temporary directories. + +Possible values: + +- Any positive integer. + +Default value: `60` seconds. + +## merge_tree_clear_old_parts_interval_seconds {#setting-merge-tree-clear-old-parts-interval-seconds} + +Sets the interval in seconds for ClickHouse to execute the cleanup of old parts, WALs, and mutations. + +Possible values: + +- Any positive integer. + +Default value: `1` second. + diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index fa4cc41e8ff..30d129d9b29 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -885,26 +885,6 @@ Possible values: Default value: 2013265920. -## merge_tree_clear_old_temporary_directories_interval_seconds {#setting-merge-tree-clear-old-temporary-directories-interval-seconds} - -Sets the interval in seconds for ClickHouse to execute the cleanup of old temporary directories. - -Possible values: - -- Any positive integer. - -Default value: `60` seconds. - -## merge_tree_clear_old_parts_interval_seconds {#setting-merge-tree-clear-old-parts-interval-seconds} - -Sets the interval in seconds for ClickHouse to execute the cleanup of old parts, WALs, and mutations. - -Possible values: - -- Any positive integer. - -Default value: `1` second. - ## min_bytes_to_use_direct_io {#settings-min-bytes-to-use-direct-io} The minimum data volume required for using direct I/O access to the storage disk. diff --git a/docs/ru/operations/settings/merge-tree-settings.md b/docs/ru/operations/settings/merge-tree-settings.md index 31cc229c6aa..e30539498b3 100644 --- a/docs/ru/operations/settings/merge-tree-settings.md +++ b/docs/ru/operations/settings/merge-tree-settings.md @@ -355,3 +355,23 @@ Eсли суммарное число активных кусков во все - 1 — куски данных открепляются. Значение по умолчанию: `0`. + +## merge_tree_clear_old_temporary_directories_interval_seconds {#setting-merge-tree-clear-old-temporary-directories-interval-seconds} + +Задает интервал в секундах для удаления старых временных каталогов на сервере ClickHouse. + +Возможные значения: + +- Положительное целое число. + +Значение по умолчанию: `60` секунд. + +## merge_tree_clear_old_parts_interval_seconds {#setting-merge-tree-clear-old-parts-interval-seconds} + +Задает интервал в секундах для удаления старых кусков данных, журналов предзаписи (WAL) и мутаций на сервере ClickHouse. + +Возможные значения: + +- Положительное целое число. + +Значение по умолчанию: `1` секунда. diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index f9717b0fb27..1b4da512c9f 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -807,26 +807,6 @@ ClickHouse может парсить только базовый формат `Y Значение по умолчанию: 2013265920. -## merge_tree_clear_old_temporary_directories_interval_seconds {#setting-merge-tree-clear-old-temporary-directories-interval-seconds} - -Задает интервал в секундах для удаления старых временных каталогов на сервере ClickHouse. - -Возможные значения: - -- Положительное целое число. - -Значение по умолчанию: `60` секунд. - -## merge_tree_clear_old_parts_interval_seconds {#setting-merge-tree-clear-old-parts-interval-seconds} - -Задает интервал в секундах для удаления старых кусков данных, журналов предзаписи (WAL) и мутаций на сервере ClickHouse . - -Возможные значения: - -- Положительное целое число. - -Значение по умолчанию: `1` секунда. - ## min_bytes_to_use_direct_io {#settings-min-bytes-to-use-direct-io} Минимальный объём данных, необходимый для прямого (небуферизованного) чтения/записи (direct I/O) на диск. From 68b7111694818a999205e3f7f1a8829906e66f4e Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 8 Dec 2021 18:17:39 +0000 Subject: [PATCH 172/262] Update poco --- contrib/poco | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/poco b/contrib/poco index 258b9ba6cd2..520a90e02e3 160000 --- a/contrib/poco +++ b/contrib/poco @@ -1 +1 @@ -Subproject commit 258b9ba6cd245ff88e9346f75c43464c403f329d +Subproject commit 520a90e02e3e5cb90afeae1846d161dbc508a6f1 From 5fad4f62047023c308d46e6170e90b5f4a009da7 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 8 Dec 2021 21:23:00 +0300 Subject: [PATCH 173/262] clickhouse-local: fix CREATE DATABASE with Atomic engine Before it fails to create due to "metadata" directory had not been created, since metadata_path is different for Atomic database, see InterpreterCreateQuery.cpp. --- src/Databases/DatabaseAtomic.cpp | 1 + .../0_stateless/02135_local_create_db.reference | 0 tests/queries/0_stateless/02135_local_create_db.sh | 12 ++++++++++++ 3 files changed, 13 insertions(+) create mode 100644 tests/queries/0_stateless/02135_local_create_db.reference create mode 100755 tests/queries/0_stateless/02135_local_create_db.sh diff --git a/src/Databases/DatabaseAtomic.cpp b/src/Databases/DatabaseAtomic.cpp index c898d5ee943..cb0c1cdae95 100644 --- a/src/Databases/DatabaseAtomic.cpp +++ b/src/Databases/DatabaseAtomic.cpp @@ -43,6 +43,7 @@ DatabaseAtomic::DatabaseAtomic(String name_, String metadata_path_, UUID uuid, c , db_uuid(uuid) { assert(db_uuid != UUIDHelpers::Nil); + fs::create_directories(fs::path(getContext()->getPath()) / "metadata"); fs::create_directories(path_to_table_symlinks); tryCreateMetadataSymlink(); } diff --git a/tests/queries/0_stateless/02135_local_create_db.reference b/tests/queries/0_stateless/02135_local_create_db.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02135_local_create_db.sh b/tests/queries/0_stateless/02135_local_create_db.sh new file mode 100755 index 00000000000..2a0105e554e --- /dev/null +++ b/tests/queries/0_stateless/02135_local_create_db.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +for Engine in Atomic Ordinary; do + $CLICKHOUSE_LOCAL --query """ + CREATE DATABASE foo_$Engine Engine=$Engine; + DROP DATABASE foo_$Engine; + """ +done From 417bd12629fc53faf28dd3862641462babb3593e Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 2 Dec 2021 17:19:09 +0300 Subject: [PATCH 174/262] Fix 'APPLY lambda' parsing --- src/Parsers/ExpressionElementParsers.cpp | 10 ++++++++-- .../02128_apply_lambda_parsing.reference | 10 ++++++++++ .../0_stateless/02128_apply_lambda_parsing.sql | 13 +++++++++++++ 3 files changed, 31 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/02128_apply_lambda_parsing.reference create mode 100644 tests/queries/0_stateless/02128_apply_lambda_parsing.sql diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index e9ec7b43a21..e598698daf8 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -22,7 +22,6 @@ #include #include #include -#include #include #include @@ -35,7 +34,6 @@ #include #include -#include #include "ASTColumnsMatcher.h" #include @@ -48,6 +46,8 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; extern const int SYNTAX_ERROR; extern const int LOGICAL_ERROR; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int TYPE_MISMATCH; } @@ -1935,7 +1935,13 @@ bool ParserColumnsTransformers::parseImpl(Pos & pos, ASTPtr & node, Expected & e { if (const auto * func = lambda->as(); func && func->name == "lambda") { + if (func->arguments->children.size() != 2) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "lambda requires two arguments"); + const auto * lambda_args_tuple = func->arguments->children.at(0)->as(); + if (!lambda_args_tuple || lambda_args_tuple->name != "tuple") + throw Exception(ErrorCodes::TYPE_MISMATCH, "First argument of lambda must be a tuple"); + const ASTs & lambda_arg_asts = lambda_args_tuple->arguments->children; if (lambda_arg_asts.size() != 1) throw Exception(ErrorCodes::BAD_ARGUMENTS, "APPLY column transformer can only accept lambda with one argument"); diff --git a/tests/queries/0_stateless/02128_apply_lambda_parsing.reference b/tests/queries/0_stateless/02128_apply_lambda_parsing.reference new file mode 100644 index 00000000000..120eec989de --- /dev/null +++ b/tests/queries/0_stateless/02128_apply_lambda_parsing.reference @@ -0,0 +1,10 @@ +1 +1 +1 +1 +1 +1 +2 +3 +4 +5 diff --git a/tests/queries/0_stateless/02128_apply_lambda_parsing.sql b/tests/queries/0_stateless/02128_apply_lambda_parsing.sql new file mode 100644 index 00000000000..51cec494d78 --- /dev/null +++ b/tests/queries/0_stateless/02128_apply_lambda_parsing.sql @@ -0,0 +1,13 @@ +WITH * APPLY lambda(e); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT * APPLY lambda(); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT * APPLY lambda(1); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT * APPLY lambda(x); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT * APPLY lambda(range(1)); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT * APPLY lambda(range(x)); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT * APPLY lambda(1, 2); -- { clientError TYPE_MISMATCH } +SELECT * APPLY lambda(x, y); -- { clientError TYPE_MISMATCH } +SELECT * APPLY lambda((x, y), 2); -- { clientError BAD_ARGUMENTS } +SELECT * APPLY lambda((x, y), x + y); -- { clientError BAD_ARGUMENTS } +SELECT * APPLY lambda(tuple(1), 1); -- { clientError BAD_ARGUMENTS } +SELECT * APPLY lambda(tuple(x), 1) FROM numbers(5); +SELECT * APPLY lambda(tuple(x), x + 1) FROM numbers(5); From 7ba60aa770770e9bfc49f3a7824149dade0ff9f4 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Mon, 6 Dec 2021 14:03:26 +0300 Subject: [PATCH 175/262] Change error codes to SYNTAX_ERROR --- src/Parsers/ExpressionElementParsers.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index e598698daf8..584c2a32afd 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -46,8 +46,6 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; extern const int SYNTAX_ERROR; extern const int LOGICAL_ERROR; - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int TYPE_MISMATCH; } @@ -1936,20 +1934,20 @@ bool ParserColumnsTransformers::parseImpl(Pos & pos, ASTPtr & node, Expected & e if (const auto * func = lambda->as(); func && func->name == "lambda") { if (func->arguments->children.size() != 2) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "lambda requires two arguments"); + throw Exception(ErrorCodes::SYNTAX_ERROR, "lambda requires two arguments"); const auto * lambda_args_tuple = func->arguments->children.at(0)->as(); if (!lambda_args_tuple || lambda_args_tuple->name != "tuple") - throw Exception(ErrorCodes::TYPE_MISMATCH, "First argument of lambda must be a tuple"); + throw Exception(ErrorCodes::SYNTAX_ERROR, "First argument of lambda must be a tuple"); const ASTs & lambda_arg_asts = lambda_args_tuple->arguments->children; if (lambda_arg_asts.size() != 1) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "APPLY column transformer can only accept lambda with one argument"); + throw Exception(ErrorCodes::SYNTAX_ERROR, "APPLY column transformer can only accept lambda with one argument"); if (auto opt_arg_name = tryGetIdentifierName(lambda_arg_asts[0]); opt_arg_name) lambda_arg = *opt_arg_name; else - throw Exception(ErrorCodes::BAD_ARGUMENTS, "lambda argument declarations must be identifiers"); + throw Exception(ErrorCodes::SYNTAX_ERROR, "lambda argument declarations must be identifiers"); } else { From fc382afacb4a2f0bfe63e544484725d503489b60 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Mon, 6 Dec 2021 14:04:19 +0300 Subject: [PATCH 176/262] Update test --- .../02128_apply_lambda_parsing.sql | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/queries/0_stateless/02128_apply_lambda_parsing.sql b/tests/queries/0_stateless/02128_apply_lambda_parsing.sql index 51cec494d78..5fc809ca75d 100644 --- a/tests/queries/0_stateless/02128_apply_lambda_parsing.sql +++ b/tests/queries/0_stateless/02128_apply_lambda_parsing.sql @@ -1,13 +1,13 @@ -WITH * APPLY lambda(e); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -SELECT * APPLY lambda(); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -SELECT * APPLY lambda(1); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -SELECT * APPLY lambda(x); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -SELECT * APPLY lambda(range(1)); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -SELECT * APPLY lambda(range(x)); -- { clientError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } -SELECT * APPLY lambda(1, 2); -- { clientError TYPE_MISMATCH } -SELECT * APPLY lambda(x, y); -- { clientError TYPE_MISMATCH } -SELECT * APPLY lambda((x, y), 2); -- { clientError BAD_ARGUMENTS } -SELECT * APPLY lambda((x, y), x + y); -- { clientError BAD_ARGUMENTS } -SELECT * APPLY lambda(tuple(1), 1); -- { clientError BAD_ARGUMENTS } +WITH * APPLY lambda(e); -- { clientError SYNTAX_ERROR } +SELECT * APPLY lambda(); -- { clientError SYNTAX_ERROR } +SELECT * APPLY lambda(1); -- { clientError SYNTAX_ERROR } +SELECT * APPLY lambda(x); -- { clientError SYNTAX_ERROR } +SELECT * APPLY lambda(range(1)); -- { clientError SYNTAX_ERROR } +SELECT * APPLY lambda(range(x)); -- { clientError SYNTAX_ERROR } +SELECT * APPLY lambda(1, 2); -- { clientError SYNTAX_ERROR } +SELECT * APPLY lambda(x, y); -- { clientError SYNTAX_ERROR } +SELECT * APPLY lambda((x, y), 2); -- { clientError SYNTAX_ERROR } +SELECT * APPLY lambda((x, y), x + y); -- { clientError SYNTAX_ERROR } +SELECT * APPLY lambda(tuple(1), 1); -- { clientError SYNTAX_ERROR } SELECT * APPLY lambda(tuple(x), 1) FROM numbers(5); SELECT * APPLY lambda(tuple(x), x + 1) FROM numbers(5); From 81d45dbf661e4b3456803933d8f32f657f1fa714 Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 7 Dec 2021 19:09:27 +0300 Subject: [PATCH 177/262] Fix possible crash in fuzzer --- programs/client/Client.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 383b9bb5e52..b6214d66628 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -705,6 +705,12 @@ bool Client::processWithFuzzing(const String & full_query) throw; } + if (!orig_ast) + { + // Can't continue after a parsing error + return true; + } + // `USE db` should not be executed // since this will break every query after `DROP db` if (orig_ast->as()) @@ -712,12 +718,6 @@ bool Client::processWithFuzzing(const String & full_query) return true; } - if (!orig_ast) - { - // Can't continue after a parsing error - return true; - } - // Don't repeat: // - INSERT -- Because the tables may grow too big. // - CREATE -- Because first we run the unmodified query, it will succeed, From 51bb37715f7134d0a15d40183f5b13e1ddc95a26 Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 8 Dec 2021 21:28:50 +0300 Subject: [PATCH 178/262] Fix fuzzer --- docker/test/fuzzer/run-fuzzer.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh index 351b4a3c541..bafbd415e24 100755 --- a/docker/test/fuzzer/run-fuzzer.sh +++ b/docker/test/fuzzer/run-fuzzer.sh @@ -189,6 +189,7 @@ continue --receive_data_timeout_ms=10000 \ --stacktrace \ --query-fuzzer-runs=1000 \ + --testmode --queries-file $(ls -1 ch/tests/queries/0_stateless/*.sql | sort -R) \ $NEW_TESTS_OPT \ > >(tail -n 100000 > fuzzer.log) \ From b6c1d1e72edd4e8163b648f2d7c552479b74f160 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 8 Dec 2021 21:09:11 +0000 Subject: [PATCH 179/262] Use latest version for tests --- docker/test/integration/runner/compose/docker_compose_mongo.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/integration/runner/compose/docker_compose_mongo.yml b/docker/test/integration/runner/compose/docker_compose_mongo.yml index e794966bd08..a342a0f55e5 100644 --- a/docker/test/integration/runner/compose/docker_compose_mongo.yml +++ b/docker/test/integration/runner/compose/docker_compose_mongo.yml @@ -1,7 +1,7 @@ version: '2.3' services: mongo1: - image: mongo:3.6 + image: mongo:latest restart: always environment: MONGO_INITDB_ROOT_USERNAME: root From 5e26dcbfa716af3dbd77841818eaec0090db5775 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 9 Dec 2021 00:27:47 +0300 Subject: [PATCH 180/262] Auto version update to [21.12.1.9017] [54457] --- cmake/autogenerated_versions.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index f13110d7179..31fef1250da 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -6,7 +6,7 @@ SET(VERSION_REVISION 54457) SET(VERSION_MAJOR 21) SET(VERSION_MINOR 12) SET(VERSION_PATCH 1) -SET(VERSION_GITHASH 503a418dedf0011e9040c3a1b6913e0b5488be4c) -SET(VERSION_DESCRIBE v21.12.1.1-prestable) -SET(VERSION_STRING 21.12.1.1) +SET(VERSION_GITHASH 4cc45c1e15912ee300bca7cc8b8da2b888a70e2a) +SET(VERSION_DESCRIBE v21.12.1.9017-prestable) +SET(VERSION_STRING 21.12.1.9017) # end of autochange From f2ccddb7be65179154d9b80a962c060828934529 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Thu, 9 Dec 2021 00:33:01 +0300 Subject: [PATCH 181/262] Auto version update to [21.13.1.1] [54458] --- cmake/autogenerated_versions.txt | 8 +- debian/changelog | 4 +- docker/client/Dockerfile | 2 +- docker/server/Dockerfile | 2 +- docker/test/Dockerfile | 2 +- .../StorageSystemContributors.generated.cpp | 941 +++++++++--------- 6 files changed, 499 insertions(+), 460 deletions(-) diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index 31fef1250da..8e7c061088a 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -2,11 +2,11 @@ # NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION, # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes. -SET(VERSION_REVISION 54457) +SET(VERSION_REVISION 54458) SET(VERSION_MAJOR 21) -SET(VERSION_MINOR 12) +SET(VERSION_MINOR 13) SET(VERSION_PATCH 1) SET(VERSION_GITHASH 4cc45c1e15912ee300bca7cc8b8da2b888a70e2a) -SET(VERSION_DESCRIBE v21.12.1.9017-prestable) -SET(VERSION_STRING 21.12.1.9017) +SET(VERSION_DESCRIBE v21.13.1.1-prestable) +SET(VERSION_STRING 21.13.1.1) # end of autochange diff --git a/debian/changelog b/debian/changelog index a2709485e44..3c1be00d664 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,5 +1,5 @@ -clickhouse (21.12.1.1) unstable; urgency=low +clickhouse (21.13.1.1) unstable; urgency=low * Modified source code - -- clickhouse-release Tue, 02 Nov 2021 00:56:42 +0300 + -- clickhouse-release Thu, 09 Dec 2021 00:32:58 +0300 diff --git a/docker/client/Dockerfile b/docker/client/Dockerfile index e84cb601c0f..9ce06939a85 100644 --- a/docker/client/Dockerfile +++ b/docker/client/Dockerfile @@ -5,7 +5,7 @@ ARG apt_archive="http://archive.ubuntu.com" RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list ARG repository="deb https://repo.clickhouse.com/deb/stable/ main/" -ARG version=21.12.1.* +ARG version=21.13.1.* RUN apt-get update \ && apt-get install --yes --no-install-recommends \ diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile index 96e7e73af33..bfdf65cd56c 100644 --- a/docker/server/Dockerfile +++ b/docker/server/Dockerfile @@ -5,7 +5,7 @@ ARG apt_archive="http://archive.ubuntu.com" RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list ARG repository="deb https://repo.clickhouse.com/deb/stable/ main/" -ARG version=21.12.1.* +ARG version=21.13.1.* ARG gosu_ver=1.10 # set non-empty deb_location_url url to create a docker image diff --git a/docker/test/Dockerfile b/docker/test/Dockerfile index 76967da9f9a..c24c013646f 100644 --- a/docker/test/Dockerfile +++ b/docker/test/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb https://repo.clickhouse.com/deb/stable/ main/" -ARG version=21.12.1.* +ARG version=21.13.1.* RUN apt-get update && \ apt-get install -y apt-transport-https dirmngr && \ diff --git a/src/Storages/System/StorageSystemContributors.generated.cpp b/src/Storages/System/StorageSystemContributors.generated.cpp index c33fa6cad44..87bd266af96 100644 --- a/src/Storages/System/StorageSystemContributors.generated.cpp +++ b/src/Storages/System/StorageSystemContributors.generated.cpp @@ -1,48 +1,41 @@ // autogenerated by ./StorageSystemContributors.sh const char * auto_contributors[] { - "박현우", "0xflotus", "20018712", "243f6a88 85a308d3", "243f6a8885a308d313198a2e037", "3ldar-nasyrov", "821008736@qq.com", - "abdrakhmanov", - "abel-wang", - "abyss7", - "achimbab", - "achulkov2", - "adevyatova", - "ageraab", + "ANDREI STAROVEROV", "Ahmed Dardery", "Aimiyoo", - "akazz", "Akazz", - "akonyaev", - "akuzm", "Alain BERRIER", "Albert Kidrachev", "Alberto", - "Aleksandra (Ася)", "Aleksandr Karo", - "Aleksandrov Vladimir", "Aleksandr Shalimov", - "alekseik1", + "Aleksandra (Ася)", + "Aleksandrov Vladimir", "Aleksei Levushkin", "Aleksei Semiglazov", - "Aleksey", "Aleksey Akulovich", - "alesapin", + "Aleksey", + "Alex Bocharov", + "Alex Cao", + "Alex Karo", + "Alex Krash", + "Alex Ryndin", + "Alex Zatelepin", "Alex", "Alexander Avdonkin", "Alexander Bezpiatov", "Alexander Burmak", "Alexander Chashnikov", "Alexander Ermolaev", - "Alexander Gololobov", "Alexander GQ Gerasiov", + "Alexander Gololobov", "Alexander Kazakov", - "alexander kozhikhov", "Alexander Kozhikhov", "Alexander Krasheninnikov", "Alexander Kuranoff", @@ -59,63 +52,46 @@ const char * auto_contributors[] { "Alexander Sapin", "Alexander Tokmakov", "Alexander Tretiakov", - "Alexandra", - "Alexandra Latysheva", - "Alexandre Snarskii", "Alexandr Kondratev", "Alexandr Krasheninnikov", "Alexandr Orlov", - "Alex Bocharov", + "Alexandra Latysheva", + "Alexandra", + "Alexandre Snarskii", "Alexei Averchenko", - "Alexey", "Alexey Arno", "Alexey Boykov", "Alexey Dushechkin", "Alexey Elymanov", "Alexey Ilyukhov", - "alexey-milovidov", "Alexey Milovidov", "Alexey Tronov", "Alexey Vasiliev", "Alexey Zatelepin", - "Alex Karo", - "Alex Krash", - "alex.lvxin", - "Alex Ryndin", + "Alexey", "Alexsey Shestakov", - "alex-zaitsev", - "Alex Zatelepin", - "alfredlu", + "Ali Demirci", "Aliaksandr Pliutau", "Aliaksandr Shylau", - "Ali Demirci", "Alina Terekhova", - "amesaru", "Amesaru", "Amir Vaza", "Amos Bird", - "amoschen", - "amudong", + "Amr Alaa", "Amy Krishnevsky", + "AnaUvarova", "Anastasiya Rodigina", "Anastasiya Tsarkova", "Anatoly Pugachev", - "ana-uvarova", - "AnaUvarova", "Andr0901", - "andrc1901", "Andreas Hunkeler", "AndreevDm", "Andrei Bodrov", "Andrei Ch", "Andrei Chulkov", - "andrei-karpliuk", "Andrei Nekrashevich", - "ANDREI STAROVEROV", "Andrew Grigorev", "Andrew Onyshchuk", - "andrewsg", - "Andrey", "Andrey Chulkov", "Andrey Dudin", "Andrey Kadochnikov", @@ -124,20 +100,16 @@ const char * auto_contributors[] { "Andrey M", "Andrey Mironov", "Andrey Skobtsov", + "Andrey Torsunov", "Andrey Urusov", "Andrey Z", + "Andrey", "Andy Liang", "Andy Yang", "Anmol Arora", - "Anna", "Anna Shakhova", - "anneji", - "anneji-dev", - "annvsh", - "anrodigina", + "Anna", "Anthony N. Simon", - "antikvist", - "anton", "Anton Ivashkin", "Anton Kobzev", "Anton Kvasha", @@ -149,151 +121,94 @@ const char * auto_contributors[] { "Anton Tikhonov", "Anton Yuzhaninov", "Anton Zhabolenko", - "ap11", - "a.palagashvili", - "aprudaev", "Ariel Robaldo", "Arsen Hakobyan", "Arslan G", "ArtCorp", "Artem Andreenko", - "Artemeey", "Artem Gavrilov", "Artem Hnilov", - "Artemkin Pavel", "Artem Konovalov", "Artem Streltsov", "Artem Zuikov", + "Artemeey", + "Artemkin Pavel", "Arthur Petukhovsky", "Arthur Tokarchuk", "Arthur Wong", - "artpaul", - "Artur", "Artur Beglaryan", "Artur Filatenkov", + "Artur", "AsiaKorushkina", - "asiana21", - "atereh", "Atri Sharma", - "avasiliev", - "avogar", "Avogar", - "avsharapov", - "awesomeleo", "Azat Khuzhin", + "BSD_Conqueror", "Babacar Diassé", "Bakhtiyor Ruziev", "BanyRule", "Baudouin Giard", "BayoNet", - "bbkas", - "benamazing", - "benbiti", "Benjamin Naecker", "Bertrand Junqua", - "bgranvea", "Bharat Nallan", - "bharatnc", "Big Elephant", "Bill", "BiteTheDDDDt", "BlahGeek", - "blazerer", - "bluebirddm", - "bobrovskij artemij", - "Bogdan", "Bogdan Voronin", + "Bogdan", "BohuTANG", "Bolinov", "BoloniniD", - "booknouse", "Boris Granveaud", "Bowen Masco", - "bo zeng", "Braulio Valdivielso", "Brett Hoerner", - "BSD_Conqueror", - "bseng", "Bulat Gaifullin", "Carbyn", - "caspian", "Caspian", - "cekc", - "centos7", - "champtar", - "chang.chen", - "changvvb", "Chao Ma", "Chao Wang", - "chasingegg", - "chengy8934", - "chenjian", - "chenqi", - "chenxing-xc", - "chenxing.xc", "Chen Yufei", - "chertus", "Chienlung Cheung", - "chou.fan", "Christian", - "christophe.kalenzaga", "Ciprian Hacman", "Clement Rodriguez", - "Clément Rodriguez", "ClickHouse Admin", - "cn-ds", + "Clément Rodriguez", "Cody Baker", "Colum", - "comunodi", "Constantin S. Pan", - "coraxster", + "Constantine Peresypkin", "CurtizJ", - "damozhaeva", + "DIAOZHAFENG", "Daniel Bershatsky", "Daniel Dao", "Daniel Qin", "Danila Kutenin", - "dankondr", "Dao Minh Thuc", - "daoready", "Daria Mozhaeva", "Dario", - "Darío", "DarkWanderer", - "dasmfm", - "davydovska", - "decaseal", + "Darío", "Denis Burlaka", "Denis Glazachev", "Denis Krivak", "Denis Zhuravlev", "Denny Crane", - "dependabot[bot]", - "dependabot-preview[bot]", "Derek Perkins", - "detailyang", - "dfenelonov", - "dgrr", - "DIAOZHAFENG", - "dimarub2000", "Ding Xiang Fei", - "dinosaur", - "divanorama", - "dkxiaohei", - "dmi-feo", "Dmitriev Mikhail", - "dmitrii", "Dmitrii Kovalkov", "Dmitrii Raev", - "dmitriiut", - "Dmitriy", + "Dmitriy Dorofeev", "Dmitriy Lushnikov", - "Dmitry", + "Dmitriy", "Dmitry Belyavtsev", "Dmitry Bilunov", "Dmitry Galuza", "Dmitry Krylov", - "dmitry kuzmin", "Dmitry Luhtionov", "Dmitry Moskowski", "Dmitry Muzyka", @@ -302,148 +217,89 @@ const char * auto_contributors[] { "Dmitry Rubashkin", "Dmitry S..ky / skype: dvska-at-skype", "Dmitry Ukolov", + "Dmitry", "Doge", "Dongdong Yang", "DoomzD", "Dr. Strange Looker", "DuckSoft", - "d.v.semenov", - "eaxdev", - "eejoin", - "egatov", "Egor O'Sten", "Egor Savin", "Ekaterina", - "elBroom", "Eldar Zaitov", "Elena Baskakova", - "elenaspb2019", - "elevankoff", "Elghazal Ahmed", "Elizaveta Mironyuk", - "emakarov", - "emhlbmc", - "emironyuk", + "Elykov Alexandr", "Emmanuel Donin de Rosière", - "Eric", "Eric Daniel", + "Eric", "Erixonich", - "ermaotech", "Ernest Poletaev", "Eugene Klimov", "Eugene Konkov", "Evgenia Sudarikova", - "Evgeniia Sudarikova", "Evgenii Pravda", + "Evgeniia Sudarikova", "Evgeniy Gatov", "Evgeniy Udodov", "Evgeny Konkov", "Evgeny Markov", - "evtan", + "Evgeny", "Ewout", - "exprmntr", - "ezhaka", - "f1yegor", - "Fabiano Francesconi", + "FArthur-cmd", "Fabian Stäber", + "Fabiano Francesconi", "Fadi Hadzh", "Fan()", - "fancno", - "FArthur-cmd", - "fastio", - "favstovol", "FawnD2", "Federico Ceratto", "FeehanG", - "feihengye", - "felixoid", - "felixxdu", - "feng lv", - "fenglv", - "fessmage", "FgoDt", - "fibersel", "Filatenkov Artur", - "filimonov", - "filipe", "Filipe Caixeta", - "flow", "Flowyi", - "flynn", - "foxxmary", "Francisco Barón", - "frank", - "franklee", + "Frank Chen", "Frank Zhao", - "fredchenbj", "Fruit of Eden", - "Fullstop000", - "fuqi", - "Fuwang Hu", - "fuwhu", "Fu Zhe", - "fuzhe1989", - "fuzzERot", + "Fullstop000", + "Fuwang Hu", "Gagan Arneja", "Gao Qiang", - "g-arslan", "Gary Dotzler", + "George G", "George", "George3d6", - "George G", "Georgy Ginzburg", "Gervasio Varela", - "ggerogery", - "giordyb", "Gleb Kanterov", "Gleb Novikov", "Gleb-Tretyakov", - "glockbender", - "glushkovds", "Gregory", - "Grigory", "Grigory Buteyko", "Grigory Pervakov", + "Grigory", "Guillaume Tassery", - "guoleiyi", "Guo Wei (William)", - "gyuton", "Haavard Kvaalen", "Habibullah Oladepo", "Hamoon", - "hao.he", "Hasitha Kanchana", "Hasnat", - "hchen9", - "hcz", - "heng zhao", - "hermano", - "hexiaoting", - "hhell", "Hiroaki Nakamura", - "hotid", - "huangzhaowei", "HuFuwang", "Hui Wang", - "hustnn", - "huzhichengdd", - "ice1x", - "idfer", - "igomac", - "igor", - "Igor", "Igor Hatarist", - "igor.lapko", "Igor Mineev", "Igor Strykhar", - "Igr", + "Igor", "Igr Mineev", - "ikarishinjieva", + "Igr", "Ikko Ashimine", - "ikopylov", "Ildar Musin", "Ildus Kurbangaliev", - "Ilya", "Ilya Breev", "Ilya Golshtein", "Ilya Khomutov", @@ -454,78 +310,56 @@ const char * auto_contributors[] { "Ilya Shipitsin", "Ilya Skrypitsa", "Ilya Yatsishin", + "Ilya", "ImgBotApp", - "imgbot[bot]", - "ip", - "Islam Israfilov", "Islam Israfilov (Islam93)", - "it1804", - "Ivan", + "Islam Israfilov", "Ivan A. Torgashov", "Ivan Babrou", "Ivan Blinkov", "Ivan He", - "ivan-kush", "Ivan Kush", "Ivan Kushnarenko", "Ivan Lezhankin", "Ivan Milov", "Ivan Remen", "Ivan Starkov", - "ivanzhukov", "Ivan Zhukov", - "ivoleg", + "Ivan", "Jack Song", "JackyWoo", "Jacob Hayes", - "jakalletti", "Jakub Kuklis", "JaosnHsieh", - "jasine", - "Jason", "Jason Keirstead", - "jasperzhu", - "javartisan", - "javi", - "javi santana", + "Jason", "Javi Santana", "Javi santana bot", "Jean Baptiste Favre", "Jeffrey Dang", - "jennyma", - "jetgm", "Jiading Guo", "Jiang Tao", - "jianmei zhang", - "jkuklis", - "João Figueiredo", "Jochen Schalanda", - "John", "John Hummel", "John Skopis", + "John", "Jonatas Freitas", + "João Figueiredo", "Julian Zhou", - "jyz0309", "Kang Liu", "Karl Pietrzak", - "karnevil13", - "keenwolf", "Keiji Yoshida", "Ken Chen", "Ken MacInnis", "Kevin Chiang", "Kevin Michel", - "kevin wan", "Kiran", "Kirill Danshin", "Kirill Ershov", - "kirillikoff", "Kirill Malev", "Kirill Shvakov", - "kmeaw", "Koblikov Mihail", "KochetovNicolai", - "kolsys", "Konstantin Grabar", "Konstantin Ilchenko", "Konstantin Lebedev", @@ -534,73 +368,36 @@ const char * auto_contributors[] { "Konstantin Rudenskii", "Korenevskiy Denis", "Korviakov Andrey", - "koshachy", "Kostiantyn Storozhuk", "Kozlov Ivan", - "kreuzerkrieg", "Kruglov Pavel", - "ks1322", "Kseniia Sumarokova", - "kshvakov", - "kssenii", "Ky Li", - "l", - "l1tsolaiki", - "lalex", + "LB", "Latysheva Alexandra", - "laurieliyang", - "lehasm", - "Léo Ercolanelli", "Leonardo Cecchi", "Leopold Schabel", - "leozhang", "Lev Borodin", - "levie", - "levushkin aleksej", - "levysh", "Lewinma", - "lhuang0928", - "lhuang09287750", - "liang.huang", - "liangqian", - "libenwang", - "lichengxiang", - "linceyou", - "listar", - "litao91", - "liu-bov", "Liu Cong", "LiuCong", - "liuyangkuan", "LiuYangkuan", - "liuyimin", - "liyang", - "lomberts", - "long2ice", "Lopatin Konstantin", "Loud_Scream", - "lthaooo", - "ltybc-coder", - "luc1ph3r", "Lucid Dreams", "Luis Bosque", - "lulichao", "Lv Feng", + "Léo Ercolanelli", "M0r64n", - "madianjun", "MagiaGroz", - "maiha", - "Maksim", + "Maks Skorokhod", "Maksim Fedotov", "Maksim Kita", - "Maks Skorokhod", - "malkfilipp", + "Maksim", "Malte", - "manmitya", - "maqroll", "Marat IDRISOV", - "Marek Vavruša", "Marek Vavrusa", + "Marek Vavruša", "Marek Vavruša", "Mariano Benítez Mulet", "Mark Andreev", @@ -609,21 +406,18 @@ const char * auto_contributors[] { "Maroun Maroun", "Marquitos", "Marsel Arduanov", - "Martijn Bakker", "Marti Raudsepp", + "Martijn Bakker", "Marvin Taschenberger", "Masha", - "mastertheknife", "Matthew Peveler", "Matwey V. Kornilov", - "Mátyás Jani", - "Max", "Max Akhmedov", "Max Bruce", - "maxim", + "Max Vetrov", + "Max", + "MaxWk", "Maxim Akhmedov", - "MaximAL", - "maxim-babenko", "Maxim Babenko", "Maxim Fedotov", "Maxim Fridental", @@ -634,141 +428,100 @@ const char * auto_contributors[] { "Maxim Serebryakov", "Maxim Smirnov", "Maxim Ulanovskiy", - "maxkuzn", - "maxulan", - "Max Vetrov", - "MaxWk", + "MaximAL", "Mc.Spring", - "mehanizm", "MeiK", - "melin", - "memo", "Memo", - "meo", - "meoww-bot", - "mergify[bot]", "Metehan Çetinkaya", "Metikov Vadim", - "mf5137", - "mfridental", "Michael Furmur", "Michael Kolupaev", "Michael Monashev", "Michael Razuvaev", "Michael Smitasin", "Michal Lisowski", - "michon470", "MicrochipQ", "Miguel Fernández", - "miha-g", "Mihail Fandyushin", - "mikael", "Mikahil Nacharov", - "Mike", "Mike F", "Mike Kot", - "mikepop7", - "Mikhail", + "Mike", "Mikhail Andreev", "Mikhail Cheshkov", "Mikhail Fandyushin", "Mikhail Filimonov", - "Mikhail f. Shiryaev", "Mikhail Gaidamaka", "Mikhail Korotov", "Mikhail Malafeev", "Mikhail Nacharov", "Mikhail Salosin", "Mikhail Surin", + "Mikhail f. Shiryaev", + "Mikhail", "MikuSugar", "Milad Arabi", - "millb", "Misko Lee", - "mnkonkova", - "mo-avatar", "Mohamad Fadhil", "Mohammad Hossein Sekhavat", - "morty", - "moscas", "Mostafa Dahab", "MovElb", "Mr.General", "Murat Kabilov", - "muzzlerator", - "m-ves", - "mwish", "MyroTk", - "myrrc", - "nagorny", + "Mátyás Jani", + "NIKITA MIKHAILOV", "Narek Galstyan", - "nauta", - "nautaa", + "Natasha Murashkina", + "NeZeD [Mac Pro]", "Neeke Gao", - "neng.liu", "Neng Liu", "NengLiu", - "never lee", - "NeZeD [Mac Pro]", - "nicelulu", - "Nickita", "Nickita Taranov", + "Nickita", "Nickolay Yastrebov", - "nickzhwang", - "Nicolae Vartolomei", "Nico Mandery", "Nico Piderman", + "Nicolae Vartolomei", "Nik", "Nikhil Nadig", "Nikhil Raman", "Nikita Lapkov", "Nikita Mikhailov", - "NIKITA MIKHAILOV", "Nikita Mikhalev", - "nikitamikhaylov", "Nikita Mikhaylov", "Nikita Orlov", "Nikita Tikhomirov", "Nikita Vasilev", "Nikolai Kochetov", "Nikolai Sorokin", - "Nikolay", "Nikolay Degterinsky", "Nikolay Kirsh", "Nikolay Semyachkin", "Nikolay Shcheglov", "Nikolay Vasiliev", "Nikolay Volosatov", + "Nikolay", "Niu Zhaojie", - "nonexistence", - "ns-vasilev", - "nvartolomei", - "oandrew", - "objatie_groba", - "ocadaruma", "Odin Hultgren Van Der Horst", - "ogorbacheva", "Okada Haruki", "Oleg Ershov", "Oleg Favstov", "Oleg Komarov", - "olegkv", "Oleg Matrokhin", "Oleg Obleukhov", "Oleg Strokachuk", "Olga Khvostikova", - "olgarev", "Olga Revyakina", "OmarBazaraa", - "Onehr7", "OnePiece", - "orantius", + "Onehr7", "Orivej Desh", "Oskar Wojciski", "OuO", - "palasonicq", + "PHO", "Paramtamtam", "Patrick Zippenfenig", - "Pavel", "Pavel Cheremushkin", "Pavel Kartaviy", "Pavel Kartavyy", @@ -778,87 +531,59 @@ const char * auto_contributors[] { "Pavel Medvedev", "Pavel Patrin", "Pavel Yakunin", + "Pavel", "Pavlo Bashynskiy", "Pawel Rog", - "pawelsz-rb", - "pdv-ru", + "Peignon Melvyn", "Peng Jian", "Persiyanov Dmitriy Andreevich", "Pervakov Grigorii", "Pervakov Grigory", - "peshkurov", - "philip.han", "Philippe Ombredanne", - "PHO", - "pingyu", - "potya", "Potya", "Pradeep Chhetri", - "presto53", - "proller", - "pufit", - "pyos", "Pysaoke", - "qianlixiang", - "qianmoQ", - "quid", "Quid37", - "quoctan132", - "r1j1k", "Rafael David Tinoco", - "rainbowsysu", "Ramazan Polat", - "Raúl Marín", "Ravengg", + "Raúl Marín", "Realist007", - "redclusive", "RedClusive", "RegulusZ", "Reilee", "Reto Kromer", "Ri", - "ritaank", - "robert", + "Rich Raposa", "Robert Hodges", - "robot-clickhouse", - "robot-metrika-test", - "rodrigargar", "Rohit Agarwal", "Romain Neutron", - "roman", "Roman Bug", + "Roman Chyrva", "Roman Lipovsky", "Roman Nikolaev", "Roman Nozdrin", "Roman Peshkurov", "Roman Tsisyk", - "romanzhukov", "Roman Zhukov", - "root", - "roverxu", - "ruct", - "Ruslan", + "Roman", "Ruslan Savchenko", + "Ruslan", "Russ Frank", "Ruzal Ibragimov", - "ryzuo", + "S.M.A. Djawadi", "Sabyanin Maxim", "SaltTan", "Sami Kerola", "Samuel Chou", - "santaux", - "satanson", "Saulius Valatka", - "sdk2", "Sean Haynes", - "Sébastien", - "Sébastien Launay", - "serebrserg", + "Serg Kulakov", + "Serge Rider", "Sergei Bocharov", "Sergei Semin", "Sergei Shtykov", "Sergei Tsetlin (rekub)", - "Serge Rider", "Sergey Demurin", "Sergey Elantsev", "Sergey Fedorov", @@ -872,144 +597,101 @@ const char * auto_contributors[] { "Sergey Zaikin", "Sergi Almacellas Abellana", "Sergi Vladykin", - "Serg Kulakov", - "sev7e0", "SevaCode", - "sevirov", "Seyed Mehrshad Hosseini", - "sfod", - "shangshujie", - "shedx", "Sherry Wang", "Shoh Jahon", "Silviu Caragea", "Simeon Emanuilov", "Simon Liu", "Simon Podlipsky", - "Šimon Podlipský", - "simon-says", "Sina", "Sjoerd Mulder", "Slach", - "S.M.A. Djawadi", "Snow", "Sofia Antipushina", - "songenjie", - "spff", - "spongedc", - "spyros87", "Stanislav Pavlovichev", "Stas Kelvich", "Stas Pavlovichev", - "stavrolia", "Stefan Thies", - "Stepan", "Stepan Herold", - "stepenhu", + "Stepan", "Steve-金勇", "Stig Bakken", "Storozhuk Kostiantyn", "Stupnikov Andrey", - "su-houzhen", - "sundy", - "sundy-li", - "sundyli", "SuperBot", - "svladykin", + "SuperDJY", + "Sébastien Launay", + "Sébastien", "TAC", + "TCeason", "Tagir Kuskarov", - "tai", - "taichong", "Tai White", - "taiyang-li", "Taleh Zaliyev", "Tangaev", - "tao jiang", - "Tatiana", "Tatiana Kirillova", - "tavplubix", - "TCeason", - "Teja", + "Tatiana", "Teja Srivastasa", + "Teja", "Tema Novikov", - "templarzq", "Tentoshka", - "terrylin", "The-Alchemist", - "Thomas Berdy", "Thom O'Connor", - "tianzhou", + "Thomas Berdy", "Tiaonmmn", - "tiger.yan", "Tigran Khudaverdyan", - "tison", + "Timur Magomedov", "TiunovNN", "Tobias Adamson", "Tobias Lins", "Tom Bombadil", - "topvisor", + "Tom Risse", + "Tomáš Hromada", "Tsarkova Anastasia", "TszkitLo40", - "turbo jason", - "tyrionhuang", - "ubuntu", "Ubuntu", "Ubus", "UnamedRus", - "unegare", - "unknown", - "urgordeadbeef", "V", - "Vadim", - "VadimPE", + "VDimir", "Vadim Plakhtinskiy", "Vadim Skipin", "Vadim Volodin", + "Vadim", + "VadimPE", "Val", "Valera Ryaboshapko", - "Vasilyev Nikita", "Vasily Kozhukhovskiy", "Vasily Morozov", "Vasily Nemkov", "Vasily Okunev", "Vasily Vasilkov", - "vdimir", - "VDimir", - "velom", + "Vasilyev Nikita", "Veloman Yunkan", "Veniamin Gvozdikov", "Veselkov Konstantin", - "vesslanjin", - "vgocoder", "Viachaslau Boben", - "vic", - "vicdashkov", - "vicgao", - "Victor", "Victor Tarnavsky", + "Victor", "Viktor Taranenko", - "vinity", "Vitalii S", "Vitaliy Fedorchenko", "Vitaliy Karnienko", "Vitaliy Kozlovskiy", "Vitaliy Lyudvichenko", "Vitaliy Zakaznikov", - "Vitaly", "Vitaly Baranov", "Vitaly Orlov", "Vitaly Samigullin", "Vitaly Stoyan", - "vitstn", - "vivarum", + "Vitaly", "Vivien Maisonneuve", "Vlad Arkhipov", - "Vladimir", "Vladimir Bunchuk", "Vladimir C", "Vladimir Ch", "Vladimir Chebotarev", - "vladimir golovchenko", "Vladimir Golovchenko", "Vladimir Goncharov", "Vladimir Klimontovich", @@ -1017,81 +699,437 @@ const char * auto_contributors[] { "Vladimir Kopysov", "Vladimir Kozbin", "Vladimir Smirnov", + "Vladimir", "Vladislav Rassokhin", "Vladislav Smirnov", "Vojtech Splichal", "Volodymyr Kuznetsov", "Vsevolod Orlov", - "vxider", "Vxider", "Vyacheslav Alipov", - "vzakaznikov", - "wangchao", "Wang Fenjin", "WangZengrui", - "weeds085490", "Weiqing Xu", "William Shallum", "Winter Zhang", - "wzl", "Xianda Ke", "Xiang Zhou", - "xiedeyantu", - "xPoSx", - "Yağızcan Değirmenci", - "yang", + "Y Lu", "Yangkuan Liu", - "yangshuai", "Yatsishin Ilya", - "yeer", + "Yağızcan Değirmenci", "Yegor Andreenko", "Yegor Levankov", - "ygrek", - "yhgcn", - "Yiğit Konur", - "yiguolei", "Yingchun Lai", "Yingfan Chen", + "Yiğit Konur", + "Yohann Jardin", + "Yuntao Wu", + "Yuri Dyachenko", + "Yurii Vlasenko", + "Yuriy Baranov", + "Yuriy Chernyshov", + "Yuriy Korzhenevskiy", + "Yuriy", + "Yury Karpovich", + "Yury Stankevich", + "ZhiYong Wang", + "Zhichang Yu", + "Zhichun Wu", + "Zhipeng", + "Zijie Lu", + "Zoran Pandovski", + "a.palagashvili", + "abdrakhmanov", + "abel-wang", + "abyss7", + "achimbab", + "achulkov2", + "adevyatova", + "ageraab", + "akazz", + "akonyaev", + "akuzm", + "alekseik1", + "alesapin", + "alex-zaitsev", + "alex.lvxin", + "alexander kozhikhov", + "alexey-milovidov", + "alfredlu", + "amesaru", + "amoschen", + "amudong", + "ana-uvarova", + "andrc1901", + "andrei-karpliuk", + "andrewsg", + "anneji", + "anneji-dev", + "annvsh", + "anrodigina", + "antikvist", + "anton", + "ap11", + "aprudaev", + "artpaul", + "asiana21", + "atereh", + "avasiliev", + "avogar", + "avsharapov", + "awesomeleo", + "bbkas", + "benamazing", + "benbiti", + "bgranvea", + "bharatnc", + "blazerer", + "bluebirddm", + "bo zeng", + "bobrovskij artemij", + "booknouse", + "bseng", + "caspian", + "cekc", + "centos7", + "cfcz48", + "cgp", + "champtar", + "chang.chen", + "changvvb", + "chasingegg", + "chengy8934", + "chenjian", + "chenqi", + "chenxing-xc", + "chenxing.xc", + "chertus", + "chou.fan", + "christophe.kalenzaga", + "cms", + "cmsxbc", + "cn-ds", + "comunodi", + "congbaoyangrou", + "coraxster", + "d.v.semenov", + "damozhaeva", + "dankondr", + "daoready", + "dasmfm", + "davydovska", + "decaseal", + "dependabot-preview[bot]", + "dependabot[bot]", + "detailyang", + "dfenelonov", + "dgrr", + "dimarub2000", + "dinosaur", + "divanorama", + "dkxiaohei", + "dmi-feo", + "dmitrii", + "dmitriiut", + "dmitry kuzmin", + "dongyifeng", + "eaxdev", + "eejoin", + "egatov", + "elBroom", + "elenaspb2019", + "elevankoff", + "emakarov", + "emhlbmc", + "emironyuk", + "ermaotech", + "evtan", + "exprmntr", + "ezhaka", + "f1yegor", + "fancno", + "fastio", + "favstovol", + "feihengye", + "felixoid", + "felixxdu", + "feng lv", + "fenglv", + "fessmage", + "fibersel", + "filimonov", + "filipe", + "flow", + "flynn", + "foxxmary", + "frank chen", + "frank", + "franklee", + "fredchenbj", + "freedomDR", + "fuqi", + "fuwhu", + "fuzhe1989", + "fuzzERot", + "g-arslan", + "ggerogery", + "giordyb", + "glockbender", + "glushkovds", + "guoleiyi", + "gyuton", + "hao.he", + "hchen9", + "hcz", + "heng zhao", + "hermano", + "hexiaoting", + "hhell", + "hotid", + "huangzhaowei", + "hustnn", + "huzhichengdd", + "ice1x", + "idfer", + "igomac", + "igor", + "igor.lapko", + "ikarishinjieva", + "ikopylov", + "imgbot[bot]", + "ip", + "it1804", + "ivan-kush", + "ivanzhukov", + "ivoleg", + "jakalletti", + "jasine", + "jasperzhu", + "javartisan", + "javi santana", + "javi", + "jennyma", + "jetgm", + "jianmei zhang", + "jkuklis", + "jus1096", + "jyz0309", + "karnevil13", + "keenwolf", + "kevin wan", + "khamadiev", + "kirillikoff", + "kmeaw", + "kolsys", + "koshachy", + "kreuzerkrieg", + "ks1322", + "kshvakov", + "kssenii", + "l", + "l1tsolaiki", + "lalex", + "laurieliyang", + "lehasm", + "leosunli", + "leozhang", + "levie", + "levushkin aleksej", + "levysh", + "lhuang0928", + "lhuang09287750", + "liang.huang", + "liangqian", + "libenwang", + "lichengxiang", + "linceyou", + "listar", + "litao91", + "liu-bov", + "liuyangkuan", + "liuyimin", + "liyang", + "liyang830", + "lomberts", + "loneylee", + "long2ice", + "lthaooo", + "ltybc-coder", + "luc1ph3r", + "lulichao", + "m-ves", + "madianjun", + "maiha", + "malkfilipp", + "manmitya", + "maqroll", + "mastertheknife", + "maxim", + "maxim-babenko", + "maxkuzn", + "maxulan", + "mehanizm", + "melin", + "memo", + "meo", + "meoww-bot", + "mergify[bot]", + "mf5137", + "mfridental", + "michael1589", + "michon470", + "miha-g", + "mikael", + "mikepop7", + "millb", + "mnkonkova", + "mo-avatar", + "morty", + "moscas", + "msaf1980", + "muzzlerator", + "mwish", + "myrrc", + "nagorny", + "nauta", + "nautaa", + "neng.liu", + "never lee", + "nicelulu", + "nickzhwang", + "nikitamikhaylov", + "nonexistence", + "ns-vasilev", + "nvartolomei", + "oandrew", + "objatie_groba", + "ocadaruma", + "ogorbacheva", + "olegkv", + "olgarev", + "orantius", + "p0ny", + "palasonicq", + "pawelsz-rb", + "pdv-ru", + "peshkurov", + "philip.han", + "pingyu", + "potya", + "presto53", + "proller", + "pufit", + "pyos", + "qianlixiang", + "qianmoQ", + "qieqieplus", + "quid", + "quoctan132", + "r1j1k", + "rainbowsysu", + "redclusive", + "ritaank", + "robert", + "robot-clickhouse", + "robot-metrika-test", + "rodrigargar", + "roman", + "romanzhukov", + "root", + "roverxu", + "ruct", + "ryzuo", + "santaux", + "satanson", + "sdk2", + "serebrserg", + "sev7e0", + "sevirov", + "sfod", + "shangshujie", + "shedx", + "simon-says", + "songenjie", + "spff", + "spongedc", + "spume", + "spyros87", + "stavrolia", + "stepenhu", + "su-houzhen", + "sundy", + "sundy-li", + "sundyli", + "sunlisheng", + "svladykin", + "tai", + "taichong", + "taiyang-li", + "tao jiang", + "tavplubix", + "templarzq", + "terrylin", + "tianzhou", + "tiger.yan", + "tison", + "topvisor", + "turbo jason", + "tyrionhuang", + "ubuntu", + "unegare", + "unknown", + "urgordeadbeef", + "vdimir", + "velom", + "vesslanjin", + "vgocoder", + "vic", + "vicdashkov", + "vicgao", + "vinity", + "vitstn", + "vivarum", + "vladimir golovchenko", + "vxider", + "vzakaznikov", + "wangchao", + "weeds085490", + "wzl", + "xPoSx", + "xiedeyantu", + "yandd", + "yang", + "yangshuai", + "yeer", + "ygrek", + "yhgcn", + "yiguolei", "yingjinghan", "ylchou", - "Y Lu", - "Yohann Jardin", "yonesko", "yuchuansun", "yuefoo", "yulu86", "yuluxu", - "Yuntao Wu", - "Yuri Dyachenko", - "Yurii Vlasenko", - "Yuriy", - "Yuriy Baranov", - "Yuriy Chernyshov", - "Yuriy Korzhenevskiy", - "Yury Karpovich", - "Yury Stankevich", "ywill3", "zamulla", "zhang2014", + "zhanghuajie", "zhanglistar", "zhangshengyu", "zhangxiao018", "zhangxiao871", "zhen ni", - "Zhichang Yu", - "Zhichun Wu", - "Zhipeng", - "ZhiYong Wang", "zhongyuankai", "zhukai", - "Zijie Lu", "zlx19950903", - "Zoran Pandovski", "zvonand", "zvrr", "zvvr", "zxc111", "zzsmdfj", + "Šimon Podlipský", "Артем Стрельцов", "Владислав Тихонов", "Георгий Кондратьев", @@ -1121,4 +1159,5 @@ const char * auto_contributors[] { "靳阳", "黄朝晖", "黄璞", + "박현우", nullptr}; From e52851d0eda2b3e23757600099883aa7f3247c7b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 9 Dec 2021 01:15:35 +0300 Subject: [PATCH 182/262] Add a test #16171 --- ...where_where_lowcardinality_replacing.reference | 2 ++ ...al_prewhere_where_lowcardinality_replacing.sql | 15 +++++++++++++++ 2 files changed, 17 insertions(+) create mode 100644 tests/queries/0_stateless/02133_final_prewhere_where_lowcardinality_replacing.reference create mode 100644 tests/queries/0_stateless/02133_final_prewhere_where_lowcardinality_replacing.sql diff --git a/tests/queries/0_stateless/02133_final_prewhere_where_lowcardinality_replacing.reference b/tests/queries/0_stateless/02133_final_prewhere_where_lowcardinality_replacing.reference new file mode 100644 index 00000000000..0b7680a594f --- /dev/null +++ b/tests/queries/0_stateless/02133_final_prewhere_where_lowcardinality_replacing.reference @@ -0,0 +1,2 @@ +LowCardinality(String) +LowCardinality(String) diff --git a/tests/queries/0_stateless/02133_final_prewhere_where_lowcardinality_replacing.sql b/tests/queries/0_stateless/02133_final_prewhere_where_lowcardinality_replacing.sql new file mode 100644 index 00000000000..a801fe08614 --- /dev/null +++ b/tests/queries/0_stateless/02133_final_prewhere_where_lowcardinality_replacing.sql @@ -0,0 +1,15 @@ +DROP TABLE IF EXISTS errors_local; + +CREATE TABLE errors_local (level LowCardinality(String)) ENGINE=ReplacingMergeTree ORDER BY level settings min_bytes_for_wide_part = '10000000'; +insert into errors_local select toString(number) from numbers(10000); + +SELECT toTypeName(level) FROM errors_local FINAL PREWHERE isNotNull(level) WHERE isNotNull(level) LIMIT 1; + +DROP TABLE errors_local; + +CREATE TABLE errors_local(level LowCardinality(String)) ENGINE=ReplacingMergeTree ORDER BY level; +insert into errors_local select toString(number) from numbers(10000); + +SELECT toTypeName(level) FROM errors_local FINAL PREWHERE isNotNull(level) WHERE isNotNull(level) LIMIT 1; + +DROP TABLE errors_local; From 45527768b2dcf669ce061f3cb77a00c8e54c32c5 Mon Sep 17 00:00:00 2001 From: vxider Date: Thu, 9 Dec 2021 02:34:37 +0000 Subject: [PATCH 183/262] small update for window view doc --- docs/zh/sql-reference/functions/window-view-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/zh/sql-reference/functions/window-view-functions.md b/docs/zh/sql-reference/functions/window-view-functions.md index 86425bc78e2..a8afac9a85d 100644 --- a/docs/zh/sql-reference/functions/window-view-functions.md +++ b/docs/zh/sql-reference/functions/window-view-functions.md @@ -53,7 +53,7 @@ hop(time_attr, hop_interval, window_interval [, timezone]) **参数** - `time_attr` - [DateTime](../../sql-reference/data-types/datetime.md)类型的时间数据。 -- `hop_interval` - Hop interval in [Interval](../../sql-reference/data-types/special-data-types/interval.md)类型的滑动间隔,需要大于0。 +- `hop_interval` - [Interval](../../sql-reference/data-types/special-data-types/interval.md)类型的滑动间隔,需要大于0。 - `window_interval` - [Interval](../../sql-reference/data-types/special-data-types/interval.md)类型的窗口大小,需要大于0。 - `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) 类型的时区(可选参数)。 From b9861c45e1eb6ce5ab4e5f71354884d51ee1fbce Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Thu, 9 Dec 2021 15:17:24 +0800 Subject: [PATCH 184/262] terminate build when linker path not found --- cmake/tools.cmake | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cmake/tools.cmake b/cmake/tools.cmake index eb3624f3b3b..69a37304f58 100644 --- a/cmake/tools.cmake +++ b/cmake/tools.cmake @@ -91,6 +91,9 @@ endif () if (LINKER_NAME) if (COMPILER_CLANG AND (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 12.0.0 OR CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 12.0.0)) find_program (LLD_PATH NAMES ${LINKER_NAME}) + if (NOT LLD_PATH) + message (FATAL_ERROR "Using linker ${LINKER_NAME} but can't find its path.") + endif () set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --ld-path=${LLD_PATH}") set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} --ld-path=${LLD_PATH}") else () From fad3158bf7a50503f88cfc83c5a46e372acfc2f3 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 9 Dec 2021 10:55:51 +0300 Subject: [PATCH 185/262] Fix images names in integration tests --- tests/ci/integration_test_check.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/ci/integration_test_check.py b/tests/ci/integration_test_check.py index 4a60d825687..723e81d63cb 100644 --- a/tests/ci/integration_test_check.py +++ b/tests/ci/integration_test_check.py @@ -25,15 +25,15 @@ from tee_popen import TeePopen DOWNLOAD_RETRIES_COUNT = 5 IMAGES = [ - "yandex/clickhouse-integration-tests-runner", - "yandex/clickhouse-mysql-golang-client", - "yandex/clickhouse-mysql-java-client", - "yandex/clickhouse-mysql-js-client", - "yandex/clickhouse-mysql-php-client", - "yandex/clickhouse-postgresql-java-client", - "yandex/clickhouse-integration-test", - "yandex/clickhouse-kerberos-kdc", - "yandex/clickhouse-integration-helper", + "clickhouse/integration-tests-runner", + "clickhouse/mysql-golang-client", + "clickhouse/mysql-java-client", + "clickhouse/mysql-js-client", + "clickhouse/mysql-php-client", + "clickhouse/postgresql-java-client", + "clickhouse/integration-test", + "clickhouse/kerberos-kdc", + "clickhouse/integration-helper", ] def get_json_params_dict(check_name, pr_info, docker_images): From 8618368d944ba883a75f6df3f0ff4ca3d43e0e4d Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 9 Dec 2021 10:55:51 +0300 Subject: [PATCH 186/262] Fix images names in integration tests --- tests/ci/integration_test_check.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/ci/integration_test_check.py b/tests/ci/integration_test_check.py index de18bbeb7d9..ce426ce3579 100644 --- a/tests/ci/integration_test_check.py +++ b/tests/ci/integration_test_check.py @@ -23,15 +23,15 @@ from stopwatch import Stopwatch DOWNLOAD_RETRIES_COUNT = 5 IMAGES = [ - "yandex/clickhouse-integration-tests-runner", - "yandex/clickhouse-mysql-golang-client", - "yandex/clickhouse-mysql-java-client", - "yandex/clickhouse-mysql-js-client", - "yandex/clickhouse-mysql-php-client", - "yandex/clickhouse-postgresql-java-client", - "yandex/clickhouse-integration-test", - "yandex/clickhouse-kerberos-kdc", - "yandex/clickhouse-integration-helper", + "clickhouse/integration-tests-runner", + "clickhouse/mysql-golang-client", + "clickhouse/mysql-java-client", + "clickhouse/mysql-js-client", + "clickhouse/mysql-php-client", + "clickhouse/postgresql-java-client", + "clickhouse/integration-test", + "clickhouse/kerberos-kdc", + "clickhouse/integration-helper", ] def get_json_params_dict(check_name, pr_info, docker_images): From 7ea7aa8db82ed91d2d4c058cae8f1e49796b0be3 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 9 Dec 2021 12:17:03 +0300 Subject: [PATCH 187/262] Fix build check --- tests/ci/build_report_check.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/ci/build_report_check.py b/tests/ci/build_report_check.py index f807dcc7cc2..3d97a973017 100644 --- a/tests/ci/build_report_check.py +++ b/tests/ci/build_report_check.py @@ -121,6 +121,9 @@ if __name__ == "__main__": build_logs += build_logs_url logging.info("Totally got %s results", len(build_results)) + if len(build_results) == 0: + logging.info("No builds, failing check") + sys.exit(1) s3_helper = S3Helper('https://s3.amazonaws.com') From 1df2383d000db19dcd48f601f7eef54775296a61 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Thu, 9 Dec 2021 12:17:31 +0300 Subject: [PATCH 188/262] Update run-fuzzer.sh --- docker/test/fuzzer/run-fuzzer.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh index bafbd415e24..11a0396caeb 100755 --- a/docker/test/fuzzer/run-fuzzer.sh +++ b/docker/test/fuzzer/run-fuzzer.sh @@ -189,7 +189,7 @@ continue --receive_data_timeout_ms=10000 \ --stacktrace \ --query-fuzzer-runs=1000 \ - --testmode + --testmode \ --queries-file $(ls -1 ch/tests/queries/0_stateless/*.sql | sort -R) \ $NEW_TESTS_OPT \ > >(tail -n 100000 > fuzzer.log) \ From dbf50910168191f4e00243647a7ba1e21ab89c6a Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Thu, 9 Dec 2021 13:39:28 +0300 Subject: [PATCH 189/262] Parallel reading from replicas (#29279) --- docker/test/stateful/run.sh | 8 +- programs/server/config.xml | 32 +++ src/CMakeLists.txt | 2 + src/Client/Connection.cpp | 19 ++ src/Client/Connection.h | 5 + src/Client/HedgedConnections.cpp | 6 +- src/Client/HedgedConnections.h | 11 +- src/Client/IConnections.h | 17 +- src/Client/IServerConnection.h | 7 + src/Client/LocalConnection.cpp | 5 + src/Client/LocalConnection.h | 2 + src/Client/MultiplexedConnections.cpp | 40 ++- src/Client/MultiplexedConnections.h | 6 +- src/Common/PoolBase.h | 1 - src/Core/Protocol.h | 68 ++--- src/Core/ProtocolDefines.h | 6 +- src/Core/Settings.h | 2 + src/IO/WriteHelpers.h | 1 + src/Interpreters/ClientInfo.cpp | 16 ++ src/Interpreters/ClientInfo.h | 5 + src/Interpreters/Cluster.h | 2 + .../ClusterProxy/IStreamFactory.h | 2 + .../ClusterProxy/SelectStreamFactory.cpp | 2 + src/Interpreters/Context.cpp | 16 +- src/Interpreters/Context.h | 12 +- .../QueryPlan/ReadFromMergeTree.cpp | 44 +++- src/Processors/QueryPlan/ReadFromMergeTree.h | 5 +- src/Processors/QueryPlan/ReadFromRemote.cpp | 89 ++++++- src/Processors/QueryPlan/ReadFromRemote.h | 20 +- src/QueryPipeline/RemoteQueryExecutor.cpp | 75 ++++-- src/QueryPipeline/RemoteQueryExecutor.h | 36 ++- src/Server/TCPHandler.cpp | 60 ++++- src/Server/TCPHandler.h | 4 + src/Storages/IStorage.h | 1 + src/Storages/MergeTree/IMergeTreeDataPart.cpp | 10 +- src/Storages/MergeTree/IMergeTreeDataPart.h | 2 +- src/Storages/MergeTree/IntersectionsIndexes.h | 237 +++++++++++++++++ src/Storages/MergeTree/MarkRange.cpp | 25 ++ src/Storages/MergeTree/MarkRange.h | 6 + .../MergeTreeBaseSelectProcessor.cpp | 249 +++++++++++++++++- .../MergeTree/MergeTreeBaseSelectProcessor.h | 84 +++++- .../MergeTree/MergeTreeBlockReadUtils.cpp | 5 +- .../MergeTree/MergeTreeBlockReadUtils.h | 4 +- src/Storages/MergeTree/MergeTreeData.cpp | 4 +- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 15 +- .../MergeTree/MergeTreeDataSelectExecutor.h | 6 +- .../MergeTreeInOrderSelectProcessor.cpp | 5 +- .../MergeTreeInOrderSelectProcessor.h | 5 +- .../MergeTree/MergeTreeRangeReader.cpp | 5 +- .../MergeTreeReverseSelectProcessor.cpp | 5 +- .../MergeTreeReverseSelectProcessor.h | 6 +- .../MergeTree/MergeTreeSelectProcessor.cpp | 12 +- .../MergeTree/MergeTreeSelectProcessor.h | 9 +- .../MergeTree/MergeTreeSequentialSource.cpp | 2 + .../MergeTreeThreadSelectProcessor.cpp | 71 ++++- .../MergeTreeThreadSelectProcessor.h | 14 +- .../ParallelReplicasReadingCoordinator.cpp | 143 ++++++++++ .../ParallelReplicasReadingCoordinator.h | 20 ++ src/Storages/MergeTree/RequestResponse.cpp | 141 ++++++++++ src/Storages/MergeTree/RequestResponse.h | 57 ++++ .../MergeTree/tests/gtest_coordinator.cpp | 240 +++++++++++++++++ src/Storages/StorageDistributed.cpp | 7 +- src/Storages/StorageMergeTree.cpp | 9 +- src/Storages/StorageReplicatedMergeTree.cpp | 16 +- src/Storages/StorageS3Cluster.cpp | 2 +- .../System/StorageSystemPartsColumns.cpp | 2 +- .../StorageSystemProjectionPartsColumns.cpp | 2 +- ...4_shard_distributed_with_many_replicas.sql | 1 - .../01870_modulo_partition_key.sql | 8 +- ...el_processing_on_replicas_part_1.reference | 110 ++++++++ ..._parallel_processing_on_replicas_part_1.sh | 98 +++++++ 71 files changed, 2085 insertions(+), 179 deletions(-) create mode 100644 src/Storages/MergeTree/IntersectionsIndexes.h create mode 100644 src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp create mode 100644 src/Storages/MergeTree/ParallelReplicasReadingCoordinator.h create mode 100644 src/Storages/MergeTree/RequestResponse.cpp create mode 100644 src/Storages/MergeTree/RequestResponse.h create mode 100644 src/Storages/MergeTree/tests/gtest_coordinator.cpp create mode 100644 tests/queries/1_stateful/00168_parallel_processing_on_replicas_part_1.reference create mode 100755 tests/queries/1_stateful/00168_parallel_processing_on_replicas_part_1.sh diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh index 680392df43e..f8dee0f8bc9 100755 --- a/docker/test/stateful/run.sh +++ b/docker/test/stateful/run.sh @@ -61,6 +61,7 @@ chmod 777 -R /var/lib/clickhouse clickhouse-client --query "SHOW DATABASES" clickhouse-client --query "ATTACH DATABASE datasets ENGINE = Ordinary" + service clickhouse-server restart # Wait for server to start accepting connections @@ -109,8 +110,13 @@ function run_tests() fi set +e - clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --no-stateless --hung-check --print-time "${ADDITIONAL_OPTIONS[@]}" \ + clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --no-stateless --hung-check --print-time \ + --skip 00168_parallel_processing_on_replicas "${ADDITIONAL_OPTIONS[@]}" \ "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt + + clickhouse-test --timeout 1200 --testname --shard --zookeeper --check-zookeeper-session --no-stateless --hung-check --print-time \ + 00168_parallel_processing_on_replicas "${ADDITIONAL_OPTIONS[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee -a test_output/test_result.txt + set -e } diff --git a/programs/server/config.xml b/programs/server/config.xml index 37f36aa5215..9a2a6d7729f 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -650,6 +650,38 @@ + + + false + + 127.0.0.1 + 9000 + + + 127.0.0.2 + 9000 + + + 127.0.0.3 + 9000 + + + + diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 5f4ebaaa895..5e4a16cfda7 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -517,6 +517,8 @@ if (USE_BZIP2) target_include_directories (clickhouse_common_io SYSTEM BEFORE PRIVATE ${BZIP2_INCLUDE_DIR}) endif() +dbms_target_link_libraries(PUBLIC consistent-hashing) + include ("${ClickHouse_SOURCE_DIR}/cmake/add_check.cmake") if (ENABLE_TESTS AND USE_GTEST) diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index ca10160fa88..505a6514812 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -603,6 +603,14 @@ void Connection::sendReadTaskResponse(const String & response) out->next(); } + +void Connection::sendMergeTreeReadTaskResponse(const PartitionReadResponse & response) +{ + writeVarUInt(Protocol::Client::MergeTreeReadTaskResponse, *out); + response.serialize(*out); + out->next(); +} + void Connection::sendPreparedData(ReadBuffer & input, size_t size, const String & name) { /// NOTE 'Throttler' is not used in this method (could use, but it's not important right now). @@ -872,6 +880,10 @@ Packet Connection::receivePacket() case Protocol::Server::ReadTaskRequest: return res; + case Protocol::Server::MergeTreeReadTaskRequest: + res.request = receivePartitionReadRequest(); + return res; + case Protocol::Server::ProfileEvents: res.block = receiveProfileEvents(); return res; @@ -1023,6 +1035,13 @@ ProfileInfo Connection::receiveProfileInfo() const return profile_info; } +PartitionReadRequest Connection::receivePartitionReadRequest() const +{ + PartitionReadRequest request; + request.deserialize(*in); + return request; +} + void Connection::throwUnexpectedPacket(UInt64 packet_type, const char * expected) const { diff --git a/src/Client/Connection.h b/src/Client/Connection.h index 3b49760ba10..2ea5334bbd3 100644 --- a/src/Client/Connection.h +++ b/src/Client/Connection.h @@ -16,6 +16,8 @@ #include +#include + #include #include @@ -104,6 +106,8 @@ public: void sendData(const Block & block, const String & name/* = "" */, bool scalar/* = false */) override; + void sendMergeTreeReadTaskResponse(const PartitionReadResponse & response) override; + void sendExternalTablesData(ExternalTablesData & data) override; bool poll(size_t timeout_microseconds/* = 0 */) override; @@ -255,6 +259,7 @@ private: std::vector receiveMultistringMessage(UInt64 msg_type) const; std::unique_ptr receiveException() const; Progress receiveProgress() const; + PartitionReadRequest receivePartitionReadRequest() const; ProfileInfo receiveProfileInfo() const; void initInputBuffers(); diff --git a/src/Client/HedgedConnections.cpp b/src/Client/HedgedConnections.cpp index c73bea53d10..791ac4c1ef1 100644 --- a/src/Client/HedgedConnections.cpp +++ b/src/Client/HedgedConnections.cpp @@ -132,7 +132,7 @@ void HedgedConnections::sendQuery( const String & query, const String & query_id, UInt64 stage, - const ClientInfo & client_info, + ClientInfo & client_info, bool with_pending_data) { std::lock_guard lock(cancel_mutex); @@ -171,7 +171,9 @@ void HedgedConnections::sendQuery( modified_settings.group_by_two_level_threshold_bytes = 0; } - if (offset_states.size() > 1) + const bool enable_sample_offset_parallel_processing = settings.max_parallel_replicas > 1 && !settings.allow_experimental_parallel_reading_from_replicas; + + if (offset_states.size() > 1 && enable_sample_offset_parallel_processing) { modified_settings.parallel_replicas_count = offset_states.size(); modified_settings.parallel_replica_offset = fd_to_replica_location[replica.packet_receiver->getFileDescriptor()].offset; diff --git a/src/Client/HedgedConnections.h b/src/Client/HedgedConnections.h index e39d9582cde..d64f7ea4286 100644 --- a/src/Client/HedgedConnections.h +++ b/src/Client/HedgedConnections.h @@ -86,7 +86,7 @@ public: const String & query, const String & query_id, UInt64 stage, - const ClientInfo & client_info, + ClientInfo & client_info, bool with_pending_data) override; void sendReadTaskResponse(const String &) override @@ -94,6 +94,11 @@ public: throw Exception("sendReadTaskResponse in not supported with HedgedConnections", ErrorCodes::LOGICAL_ERROR); } + void sendMergeTreeReadTaskResponse(PartitionReadResponse) override + { + throw Exception("sendMergeTreeReadTaskResponse in not supported with HedgedConnections", ErrorCodes::LOGICAL_ERROR); + } + Packet receivePacket() override; Packet receivePacketUnlocked(AsyncCallback async_callback, bool is_draining) override; @@ -112,6 +117,8 @@ public: bool hasActiveConnections() const override { return active_connection_count > 0; } + void setReplicaInfo(ReplicaInfo value) override { replica_info = value; } + private: /// If we don't receive data from replica and there is no progress in query /// execution for receive_data_timeout, we are trying to get new @@ -199,6 +206,8 @@ private: bool sent_query = false; bool cancelled = false; + ReplicaInfo replica_info; + mutable std::mutex cancel_mutex; }; diff --git a/src/Client/IConnections.h b/src/Client/IConnections.h index 53267cbbb3e..8dbd58c9598 100644 --- a/src/Client/IConnections.h +++ b/src/Client/IConnections.h @@ -1,6 +1,9 @@ #pragma once +#include + #include +#include namespace DB { @@ -27,10 +30,11 @@ public: const String & query, const String & query_id, UInt64 stage, - const ClientInfo & client_info, + ClientInfo & client_info, bool with_pending_data) = 0; virtual void sendReadTaskResponse(const String &) = 0; + virtual void sendMergeTreeReadTaskResponse(PartitionReadResponse response) = 0; /// Get packet from any replica. virtual Packet receivePacket() = 0; @@ -56,6 +60,17 @@ public: /// Get the replica addresses as a string. virtual std::string dumpAddresses() const = 0; + + struct ReplicaInfo + { + size_t all_replicas_count{0}; + size_t number_of_current_replica{0}; + }; + + /// This is needed in max_parallel_replicas case. + /// We create a RemoteQueryExecutor for each replica + virtual void setReplicaInfo(ReplicaInfo value) = 0; + /// Returns the number of replicas. virtual size_t size() const = 0; diff --git a/src/Client/IServerConnection.h b/src/Client/IServerConnection.h index 9d6b54ef32f..b7c6ae314e2 100644 --- a/src/Client/IServerConnection.h +++ b/src/Client/IServerConnection.h @@ -12,6 +12,8 @@ #include #include +#include + #include @@ -32,10 +34,13 @@ struct Packet Progress progress; ProfileInfo profile_info; std::vector part_uuids; + PartitionReadRequest request; + PartitionReadResponse response; Packet() : type(Protocol::Server::Hello) {} }; + /// Struct which represents data we are going to send for external table. struct ExternalTableData { @@ -96,6 +101,8 @@ public: /// Send all contents of external (temporary) tables. virtual void sendExternalTablesData(ExternalTablesData & data) = 0; + virtual void sendMergeTreeReadTaskResponse(const PartitionReadResponse & response) = 0; + /// Check, if has data to read. virtual bool poll(size_t timeout_microseconds) = 0; diff --git a/src/Client/LocalConnection.cpp b/src/Client/LocalConnection.cpp index 4f476b57c27..9eaa9ce883a 100644 --- a/src/Client/LocalConnection.cpp +++ b/src/Client/LocalConnection.cpp @@ -424,6 +424,11 @@ void LocalConnection::sendExternalTablesData(ExternalTablesData &) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not implemented"); } +void LocalConnection::sendMergeTreeReadTaskResponse(const PartitionReadResponse &) +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not implemented"); +} + ServerConnectionPtr LocalConnection::createConnection(const ConnectionParameters &, ContextPtr current_context, bool send_progress) { return std::make_unique(current_context, send_progress); diff --git a/src/Client/LocalConnection.h b/src/Client/LocalConnection.h index 1cc23defa6e..fbd054506e7 100644 --- a/src/Client/LocalConnection.h +++ b/src/Client/LocalConnection.h @@ -92,6 +92,8 @@ public: void sendExternalTablesData(ExternalTablesData &) override; + void sendMergeTreeReadTaskResponse(const PartitionReadResponse & response) override; + bool poll(size_t timeout_microseconds/* = 0 */) override; bool hasReadPendingData() const override; diff --git a/src/Client/MultiplexedConnections.cpp b/src/Client/MultiplexedConnections.cpp index a27f7709555..c3000443a9c 100644 --- a/src/Client/MultiplexedConnections.cpp +++ b/src/Client/MultiplexedConnections.cpp @@ -1,9 +1,10 @@ #include + +#include +#include #include #include -#include -#include "Core/Protocol.h" - +#include namespace DB { @@ -110,7 +111,7 @@ void MultiplexedConnections::sendQuery( const String & query, const String & query_id, UInt64 stage, - const ClientInfo & client_info, + ClientInfo & client_info, bool with_pending_data) { std::lock_guard lock(cancel_mutex); @@ -131,16 +132,29 @@ void MultiplexedConnections::sendQuery( modified_settings.group_by_two_level_threshold = 0; modified_settings.group_by_two_level_threshold_bytes = 0; } + + if (settings.allow_experimental_parallel_reading_from_replicas) + { + client_info.collaborate_with_initiator = true; + client_info.count_participating_replicas = replica_info.all_replicas_count; + client_info.number_of_current_replica = replica_info.number_of_current_replica; + } } + const bool enable_sample_offset_parallel_processing = settings.max_parallel_replicas > 1 && !settings.allow_experimental_parallel_reading_from_replicas; + size_t num_replicas = replica_states.size(); if (num_replicas > 1) { - /// Use multiple replicas for parallel query processing. - modified_settings.parallel_replicas_count = num_replicas; + if (enable_sample_offset_parallel_processing) + /// Use multiple replicas for parallel query processing. + modified_settings.parallel_replicas_count = num_replicas; + for (size_t i = 0; i < num_replicas; ++i) { - modified_settings.parallel_replica_offset = i; + if (enable_sample_offset_parallel_processing) + modified_settings.parallel_replica_offset = i; + replica_states[i].connection->sendQuery(timeouts, query, query_id, stage, &modified_settings, &client_info, with_pending_data); } @@ -179,6 +193,16 @@ void MultiplexedConnections::sendReadTaskResponse(const String & response) current_connection->sendReadTaskResponse(response); } + +void MultiplexedConnections::sendMergeTreeReadTaskResponse(PartitionReadResponse response) +{ + std::lock_guard lock(cancel_mutex); + if (cancelled) + return; + current_connection->sendMergeTreeReadTaskResponse(response); +} + + Packet MultiplexedConnections::receivePacket() { std::lock_guard lock(cancel_mutex); @@ -234,6 +258,7 @@ Packet MultiplexedConnections::drain() switch (packet.type) { + case Protocol::Server::MergeTreeReadTaskRequest: case Protocol::Server::ReadTaskRequest: case Protocol::Server::PartUUIDs: case Protocol::Server::Data: @@ -313,6 +338,7 @@ Packet MultiplexedConnections::receivePacketUnlocked(AsyncCallback async_callbac switch (packet.type) { + case Protocol::Server::MergeTreeReadTaskRequest: case Protocol::Server::ReadTaskRequest: case Protocol::Server::PartUUIDs: case Protocol::Server::Data: diff --git a/src/Client/MultiplexedConnections.h b/src/Client/MultiplexedConnections.h index 4fb7d496b0c..e76d54218c7 100644 --- a/src/Client/MultiplexedConnections.h +++ b/src/Client/MultiplexedConnections.h @@ -38,10 +38,11 @@ public: const String & query, const String & query_id, UInt64 stage, - const ClientInfo & client_info, + ClientInfo & client_info, bool with_pending_data) override; void sendReadTaskResponse(const String &) override; + void sendMergeTreeReadTaskResponse(PartitionReadResponse response) override; Packet receivePacket() override; @@ -62,6 +63,7 @@ public: /// Without locking, because sendCancel() does not change the state of the replicas. bool hasActiveConnections() const override { return active_connection_count > 0; } + void setReplicaInfo(ReplicaInfo value) override { replica_info = value; } private: Packet receivePacketUnlocked(AsyncCallback async_callback, bool is_draining) override; @@ -102,6 +104,8 @@ private: bool sent_query = false; bool cancelled = false; + ReplicaInfo replica_info; + /// A mutex for the sendCancel function to execute safely /// in separate thread. mutable std::mutex cancel_mutex; diff --git a/src/Common/PoolBase.h b/src/Common/PoolBase.h index 3f7f340c5d1..85d4e84abca 100644 --- a/src/Common/PoolBase.h +++ b/src/Common/PoolBase.h @@ -163,4 +163,3 @@ protected: /** Creates a new object to put into the pool. */ virtual ObjectPtr allocObject() = 0; }; - diff --git a/src/Core/Protocol.h b/src/Core/Protocol.h index fb18e1135a5..08c675eb421 100644 --- a/src/Core/Protocol.h +++ b/src/Core/Protocol.h @@ -64,24 +64,26 @@ namespace Protocol { enum Enum { - Hello = 0, /// Name, version, revision. - Data = 1, /// A block of data (compressed or not). - Exception = 2, /// The exception during query execution. - Progress = 3, /// Query execution progress: rows read, bytes read. - Pong = 4, /// Ping response - EndOfStream = 5, /// All packets were transmitted - ProfileInfo = 6, /// Packet with profiling info. - Totals = 7, /// A block with totals (compressed or not). - Extremes = 8, /// A block with minimums and maximums (compressed or not). - TablesStatusResponse = 9, /// A response to TablesStatus request. - Log = 10, /// System logs of the query execution - TableColumns = 11, /// Columns' description for default values calculation - PartUUIDs = 12, /// List of unique parts ids. - ReadTaskRequest = 13, /// String (UUID) describes a request for which next task is needed - /// This is such an inverted logic, where server sends requests - /// And client returns back response - ProfileEvents = 14, /// Packet with profile events from server. - MAX = ProfileEvents, + Hello = 0, /// Name, version, revision. + Data = 1, /// A block of data (compressed or not). + Exception = 2, /// The exception during query execution. + Progress = 3, /// Query execution progress: rows read, bytes read. + Pong = 4, /// Ping response + EndOfStream = 5, /// All packets were transmitted + ProfileInfo = 6, /// Packet with profiling info. + Totals = 7, /// A block with totals (compressed or not). + Extremes = 8, /// A block with minimums and maximums (compressed or not). + TablesStatusResponse = 9, /// A response to TablesStatus request. + Log = 10, /// System logs of the query execution + TableColumns = 11, /// Columns' description for default values calculation + PartUUIDs = 12, /// List of unique parts ids. + ReadTaskRequest = 13, /// String (UUID) describes a request for which next task is needed + /// This is such an inverted logic, where server sends requests + /// And client returns back response + ProfileEvents = 14, /// Packet with profile events from server. + MergeTreeReadTaskRequest = 15, /// Request from a MergeTree replica to a coordinator + MAX = MergeTreeReadTaskRequest, + }; /// NOTE: If the type of packet argument would be Enum, the comparison packet >= 0 && packet < 10 @@ -106,6 +108,7 @@ namespace Protocol "PartUUIDs", "ReadTaskRequest", "ProfileEvents", + "MergeTreeReadTaskRequest", }; return packet <= MAX ? data[packet] @@ -130,20 +133,20 @@ namespace Protocol { enum Enum { - Hello = 0, /// Name, version, revision, default DB - Query = 1, /// Query id, query settings, stage up to which the query must be executed, - /// whether the compression must be used, - /// query text (without data for INSERTs). - Data = 2, /// A block of data (compressed or not). - Cancel = 3, /// Cancel the query execution. - Ping = 4, /// Check that connection to the server is alive. - TablesStatusRequest = 5, /// Check status of tables on the server. - KeepAlive = 6, /// Keep the connection alive - Scalar = 7, /// A block of data (compressed or not). - IgnoredPartUUIDs = 8, /// List of unique parts ids to exclude from query processing - ReadTaskResponse = 9, /// TODO: - - MAX = ReadTaskResponse, + Hello = 0, /// Name, version, revision, default DB + Query = 1, /// Query id, query settings, stage up to which the query must be executed, + /// whether the compression must be used, + /// query text (without data for INSERTs). + Data = 2, /// A block of data (compressed or not). + Cancel = 3, /// Cancel the query execution. + Ping = 4, /// Check that connection to the server is alive. + TablesStatusRequest = 5, /// Check status of tables on the server. + KeepAlive = 6, /// Keep the connection alive + Scalar = 7, /// A block of data (compressed or not). + IgnoredPartUUIDs = 8, /// List of unique parts ids to exclude from query processing + ReadTaskResponse = 9, /// A filename to read from s3 (used in s3Cluster) + MergeTreeReadTaskResponse = 10, /// Coordinator's decision with a modified set of mark ranges allowed to read + MAX = MergeTreeReadTaskResponse, }; inline const char * toString(UInt64 packet) @@ -159,6 +162,7 @@ namespace Protocol "Scalar", "IgnoredPartUUIDs", "ReadTaskResponse", + "MergeTreeReadTaskResponse" }; return packet <= MAX ? data[packet] diff --git a/src/Core/ProtocolDefines.h b/src/Core/ProtocolDefines.h index ac0fba384b8..36820788b91 100644 --- a/src/Core/ProtocolDefines.h +++ b/src/Core/ProtocolDefines.h @@ -31,6 +31,9 @@ #define DBMS_CLUSTER_PROCESSING_PROTOCOL_VERSION 1 +#define DBMS_PARALLEL_REPLICAS_PROTOCOL_VERSION 1 +#define DBMS_MIN_REVISION_WITH_PARALLEL_REPLICAS 54453 + /// Minimum revision supporting interserver secret. #define DBMS_MIN_REVISION_WITH_INTERSERVER_SECRET 54441 @@ -48,6 +51,7 @@ /// NOTE: DBMS_TCP_PROTOCOL_VERSION has nothing common with VERSION_REVISION, /// later is just a number for server version (one number instead of commit SHA) /// for simplicity (sometimes it may be more convenient in some use cases). -#define DBMS_TCP_PROTOCOL_VERSION 54452 + +#define DBMS_TCP_PROTOCOL_VERSION 54453 #define DBMS_MIN_PROTOCOL_VERSION_WITH_INITIAL_QUERY_START_TIME 54449 diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 4e0e50cc521..47b01655c26 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -126,6 +126,8 @@ class IColumn; M(UInt64, parallel_replicas_count, 0, "", 0) \ M(UInt64, parallel_replica_offset, 0, "", 0) \ \ + M(Bool, allow_experimental_parallel_reading_from_replicas, false, "If true, ClickHouse will send a SELECT query to all replicas of a table. It will work for any kind on MergeTree table.", 0) \ + \ M(Bool, skip_unavailable_shards, false, "If true, ClickHouse silently skips unavailable shards and nodes unresolvable through DNS. Shard is marked as unavailable when none of the replicas can be reached.", 0) \ \ M(UInt64, parallel_distributed_insert_select, 0, "Process distributed INSERT SELECT query in the same cluster on local tables on every shard, if 1 SELECT is executed on each shard, if 2 SELECT and INSERT is executed on each shard", 0) \ diff --git a/src/IO/WriteHelpers.h b/src/IO/WriteHelpers.h index 9cdc77df957..5498e1c90f3 100644 --- a/src/IO/WriteHelpers.h +++ b/src/IO/WriteHelpers.h @@ -118,6 +118,7 @@ inline void writeStringBinary(const std::string_view & s, WriteBuffer & buf) writeStringBinary(StringRef{s}, buf); } + template void writeVectorBinary(const std::vector & v, WriteBuffer & buf) { diff --git a/src/Interpreters/ClientInfo.cpp b/src/Interpreters/ClientInfo.cpp index 79956aaebed..827e7d27409 100644 --- a/src/Interpreters/ClientInfo.cpp +++ b/src/Interpreters/ClientInfo.cpp @@ -89,6 +89,13 @@ void ClientInfo::write(WriteBuffer & out, const UInt64 server_protocol_revision) writeBinary(uint8_t(0), out); } } + + if (server_protocol_revision >= DBMS_MIN_REVISION_WITH_PARALLEL_REPLICAS) + { + writeVarUInt(static_cast(collaborate_with_initiator), out); + writeVarUInt(count_participating_replicas, out); + writeVarUInt(number_of_current_replica, out); + } } @@ -170,6 +177,15 @@ void ClientInfo::read(ReadBuffer & in, const UInt64 client_protocol_revision) readBinary(client_trace_context.trace_flags, in); } } + + if (client_protocol_revision >= DBMS_MIN_REVISION_WITH_PARALLEL_REPLICAS) + { + UInt64 value; + readVarUInt(value, in); + collaborate_with_initiator = static_cast(value); + readVarUInt(count_participating_replicas, in); + readVarUInt(number_of_current_replica, in); + } } diff --git a/src/Interpreters/ClientInfo.h b/src/Interpreters/ClientInfo.h index d42c34f07e2..3ce740c6436 100644 --- a/src/Interpreters/ClientInfo.h +++ b/src/Interpreters/ClientInfo.h @@ -108,6 +108,11 @@ public: bool is_replicated_database_internal = false; + /// For parallel processing on replicas + bool collaborate_with_initiator{false}; + UInt64 count_participating_replicas{0}; + UInt64 number_of_current_replica{0}; + bool empty() const { return query_kind == QueryKind::NO_QUERY; } /** Serialization and deserialization. diff --git a/src/Interpreters/Cluster.h b/src/Interpreters/Cluster.h index ec78abf574c..a64e17264b1 100644 --- a/src/Interpreters/Cluster.h +++ b/src/Interpreters/Cluster.h @@ -184,6 +184,8 @@ public: bool isLocal() const { return !local_addresses.empty(); } bool hasRemoteConnections() const { return local_addresses.size() != per_replica_pools.size(); } size_t getLocalNodeCount() const { return local_addresses.size(); } + size_t getRemoteNodeCount() const { return per_replica_pools.size() - local_addresses.size(); } + size_t getAllNodeCount() const { return per_replica_pools.size(); } bool hasInternalReplication() const { return has_internal_replication; } /// Name of directory for asynchronous write to StorageDistributed if has_internal_replication const std::string & insertPathForInternalReplication(bool prefer_localhost_replica, bool use_compact_format) const; diff --git a/src/Interpreters/ClusterProxy/IStreamFactory.h b/src/Interpreters/ClusterProxy/IStreamFactory.h index 6360aee2f55..483ce9dcab9 100644 --- a/src/Interpreters/ClusterProxy/IStreamFactory.h +++ b/src/Interpreters/ClusterProxy/IStreamFactory.h @@ -37,7 +37,9 @@ public: Block header; size_t shard_num = 0; + size_t num_replicas = 0; ConnectionPoolWithFailoverPtr pool; + ConnectionPoolPtrs per_replica_pools; /// If we connect to replicas lazily. /// (When there is a local replica with big delay). diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp index b644f2936d9..a47874c475a 100644 --- a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp +++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp @@ -117,7 +117,9 @@ void SelectStreamFactory::createForShard( .query = modified_query_ast, .header = header, .shard_num = shard_info.shard_num, + .num_replicas = shard_info.getAllNodeCount(), .pool = shard_info.pool, + .per_replica_pools = shard_info.per_replica_pools, .lazy = lazy, .local_delay = local_delay, }); diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 9b2721cd15d..db1d6a37877 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -2962,7 +2962,7 @@ PartUUIDsPtr Context::getPartUUIDs() const ReadTaskCallback Context::getReadTaskCallback() const { if (!next_task_callback.has_value()) - throw Exception(fmt::format("Next task callback is not set for query {}", getInitialQueryId()), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Next task callback is not set for query {}", getInitialQueryId()); return next_task_callback.value(); } @@ -2972,6 +2972,20 @@ void Context::setReadTaskCallback(ReadTaskCallback && callback) next_task_callback = callback; } + +MergeTreeReadTaskCallback Context::getMergeTreeReadTaskCallback() const +{ + if (!merge_tree_read_task_callback.has_value()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Next task callback for is not set for query {}", getInitialQueryId()); + + return merge_tree_read_task_callback.value(); +} + +void Context::setMergeTreeReadTaskCallback(MergeTreeReadTaskCallback && callback) +{ + merge_tree_read_task_callback = callback; +} + PartUUIDsPtr Context::getIgnoredPartUUIDs() const { auto lock = getLock(); diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 5948cc7f7a7..823bc028c15 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -14,6 +14,7 @@ #include #include #include +#include #include "config_core.h" @@ -148,6 +149,8 @@ using InputBlocksReader = std::function; /// Used in distributed task processing using ReadTaskCallback = std::function; +using MergeTreeReadTaskCallback = std::function(PartitionReadRequest)>; + /// An empty interface for an arbitrary object that may be attached by a shared pointer /// to query context, when using ClickHouse as a library. struct IHostContext @@ -216,8 +219,12 @@ private: Scalars scalars; Scalars local_scalars; - /// Fields for distributed s3 function + /// Used in s3Cluster table function. With this callback, a worker node could ask an initiator + /// about next file to read from s3. std::optional next_task_callback; + /// Used in parallel reading from replicas. A replica tells about its intentions to read + /// some ranges from some part and initiator will tell the replica about whether it is accepted or denied. + std::optional merge_tree_read_task_callback; /// Record entities accessed by current query, and store this information in system.query_log. struct QueryAccessInfo @@ -865,6 +872,9 @@ public: ReadTaskCallback getReadTaskCallback() const; void setReadTaskCallback(ReadTaskCallback && callback); + MergeTreeReadTaskCallback getMergeTreeReadTaskCallback() const; + void setMergeTreeReadTaskCallback(MergeTreeReadTaskCallback && callback); + /// Background executors related methods void initializeBackgroundExecutorsIfNeeded(); diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 67ca6d3d8e0..eddbbb9138c 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -74,7 +74,8 @@ ReadFromMergeTree::ReadFromMergeTree( bool sample_factor_column_queried_, std::shared_ptr max_block_numbers_to_read_, Poco::Logger * log_, - MergeTreeDataSelectAnalysisResultPtr analyzed_result_ptr_) + MergeTreeDataSelectAnalysisResultPtr analyzed_result_ptr_, + bool enable_parallel_reading) : ISourceStep(DataStream{.header = MergeTreeBaseSelectProcessor::transformHeader( metadata_snapshot_->getSampleBlockForColumns(real_column_names_, data_.getVirtuals(), data_.getStorageID()), getPrewhereInfo(query_info_), @@ -107,6 +108,9 @@ ReadFromMergeTree::ReadFromMergeTree( auto type = std::make_shared(); output_stream->header.insert({type->createColumn(), type, "_sample_factor"}); } + + if (enable_parallel_reading) + read_task_callback = context->getMergeTreeReadTaskCallback(); } Pipe ReadFromMergeTree::readFromPool( @@ -127,6 +131,7 @@ Pipe ReadFromMergeTree::readFromPool( } const auto & settings = context->getSettingsRef(); + const auto & client_info = context->getClientInfo(); MergeTreeReadPool::BackoffSettings backoff_settings(settings); auto pool = std::make_shared( @@ -147,17 +152,30 @@ Pipe ReadFromMergeTree::readFromPool( for (size_t i = 0; i < max_streams; ++i) { + std::optional extension; + if (read_task_callback) + { + extension = ParallelReadingExtension + { + .callback = read_task_callback.value(), + .count_participating_replicas = client_info.count_participating_replicas, + .number_of_current_replica = client_info.number_of_current_replica, + .colums_to_read = required_columns + }; + } + auto source = std::make_shared( i, pool, min_marks_for_concurrent_read, max_block_size, settings.preferred_block_size_bytes, settings.preferred_max_column_in_block_size_bytes, data, metadata_snapshot, use_uncompressed_cache, - prewhere_info, actions_settings, reader_settings, virt_column_names); + prewhere_info, actions_settings, reader_settings, virt_column_names, std::move(extension)); - if (i == 0) - { - /// Set the approximate number of rows for the first source only + /// Set the approximate number of rows for the first source only + /// In case of parallel processing on replicas do not set approximate rows at all. + /// Because the value will be identical on every replicas and will be accounted + /// multiple times (settings.max_parallel_replicas times more) + if (i == 0 && !client_info.collaborate_with_initiator) source->addTotalRowsApprox(total_rows); - } pipes.emplace_back(std::move(source)); } @@ -172,10 +190,22 @@ ProcessorPtr ReadFromMergeTree::createSource( bool use_uncompressed_cache, bool has_limit_below_one_block) { + const auto & client_info = context->getClientInfo(); + std::optional extension; + if (read_task_callback) + { + extension = ParallelReadingExtension + { + .callback = read_task_callback.value(), + .count_participating_replicas = client_info.count_participating_replicas, + .number_of_current_replica = client_info.number_of_current_replica, + .colums_to_read = required_columns + }; + } return std::make_shared( data, metadata_snapshot, part.data_part, max_block_size, preferred_block_size_bytes, preferred_max_column_in_block_size_bytes, required_columns, part.ranges, use_uncompressed_cache, prewhere_info, - actions_settings, reader_settings, virt_column_names, part.part_index_in_query, has_limit_below_one_block); + actions_settings, reader_settings, virt_column_names, part.part_index_in_query, has_limit_below_one_block, std::move(extension)); } Pipe ReadFromMergeTree::readInOrder( diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.h b/src/Processors/QueryPlan/ReadFromMergeTree.h index 46b62467ae0..0bdfa66bcc7 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.h +++ b/src/Processors/QueryPlan/ReadFromMergeTree.h @@ -97,7 +97,8 @@ public: bool sample_factor_column_queried_, std::shared_ptr max_block_numbers_to_read_, Poco::Logger * log_, - MergeTreeDataSelectAnalysisResultPtr analyzed_result_ptr_ + MergeTreeDataSelectAnalysisResultPtr analyzed_result_ptr_, + bool enable_parallel_reading ); String getName() const override { return "ReadFromMergeTree"; } @@ -184,6 +185,8 @@ private: MergeTreeDataSelectAnalysisResultPtr selectRangesToRead(MergeTreeData::DataPartsVector parts) const; ReadFromMergeTree::AnalysisResult getAnalysisResult() const; MergeTreeDataSelectAnalysisResultPtr analyzed_result_ptr; + + std::optional read_task_callback; }; struct MergeTreeDataSelectAnalysisResult diff --git a/src/Processors/QueryPlan/ReadFromRemote.cpp b/src/Processors/QueryPlan/ReadFromRemote.cpp index 399e7d01839..8fcec03d746 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.cpp +++ b/src/Processors/QueryPlan/ReadFromRemote.cpp @@ -12,6 +12,8 @@ #include #include #include +#include +#include namespace DB { @@ -112,7 +114,10 @@ ReadFromRemote::ReadFromRemote( { } -void ReadFromRemote::addLazyPipe(Pipes & pipes, const ClusterProxy::IStreamFactory::Shard & shard) +void ReadFromRemote::addLazyPipe(Pipes & pipes, const ClusterProxy::IStreamFactory::Shard & shard, + std::shared_ptr coordinator, + std::shared_ptr pool, + std::optional replica_info) { bool add_agg_info = stage == QueryProcessingStage::WithMergeableState; bool add_totals = false; @@ -125,7 +130,10 @@ void ReadFromRemote::addLazyPipe(Pipes & pipes, const ClusterProxy::IStreamFacto } auto lazily_create_stream = [ - pool = shard.pool, shard_num = shard.shard_num, shard_count = shard_count, query = shard.query, header = shard.header, + replica_info = replica_info, + pool = pool ? pool : shard.pool, + coordinator = coordinator, + shard_num = shard.shard_num, shard_count = shard_count, query = shard.query, header = shard.header, context = context, throttler = throttler, main_table = main_table, table_func_ptr = table_func_ptr, scalars = scalars, external_tables = external_tables, @@ -161,9 +169,12 @@ void ReadFromRemote::addLazyPipe(Pipes & pipes, const ClusterProxy::IStreamFacto max_remote_delay = std::max(try_result.staleness, max_remote_delay); } - if (try_results.empty() || local_delay < max_remote_delay) + /// We disable this branch in case of parallel reading from replicas, because createLocalPlan will call + /// InterpreterSelectQuery directly and it will be too ugly to pass ParallelReplicasCoordinator or some callback there. + if (!context->getClientInfo().collaborate_with_initiator && (try_results.empty() || local_delay < max_remote_delay)) { auto plan = createLocalPlan(query, header, context, stage, shard_num, shard_count); + return QueryPipelineBuilder::getPipe(std::move(*plan->buildQueryPipeline( QueryPlanOptimizationSettings::fromContext(context), BuildQueryPipelineSettings::fromContext(context)))); @@ -180,7 +191,8 @@ void ReadFromRemote::addLazyPipe(Pipes & pipes, const ClusterProxy::IStreamFacto scalars["_shard_num"] = Block{{DataTypeUInt32().createColumnConst(1, shard_num), std::make_shared(), "_shard_num"}}; auto remote_query_executor = std::make_shared( - pool, std::move(connections), query_string, header, context, throttler, scalars, external_tables, stage); + pool, std::move(connections), query_string, header, context, throttler, scalars, external_tables, stage, + RemoteQueryExecutor::Extension{.parallel_reading_coordinator = std::move(coordinator), .replica_info = replica_info}); return createRemoteSourcePipe(remote_query_executor, add_agg_info, add_totals, add_extremes, async_read); } @@ -191,7 +203,10 @@ void ReadFromRemote::addLazyPipe(Pipes & pipes, const ClusterProxy::IStreamFacto addConvertingActions(pipes.back(), output_stream->header); } -void ReadFromRemote::addPipe(Pipes & pipes, const ClusterProxy::IStreamFactory::Shard & shard) +void ReadFromRemote::addPipe(Pipes & pipes, const ClusterProxy::IStreamFactory::Shard & shard, + std::shared_ptr coordinator, + std::shared_ptr pool, + std::optional replica_info) { bool add_agg_info = stage == QueryProcessingStage::WithMergeableState; bool add_totals = false; @@ -207,11 +222,20 @@ void ReadFromRemote::addPipe(Pipes & pipes, const ClusterProxy::IStreamFactory:: scalars["_shard_num"] = Block{{DataTypeUInt32().createColumnConst(1, shard.shard_num), std::make_shared(), "_shard_num"}}; - auto remote_query_executor = std::make_shared( - shard.pool, query_string, shard.header, context, throttler, scalars, external_tables, stage); + + std::shared_ptr remote_query_executor; + + remote_query_executor = std::make_shared( + pool ? pool : shard.pool, query_string, shard.header, context, throttler, scalars, external_tables, stage, + RemoteQueryExecutor::Extension{.parallel_reading_coordinator = std::move(coordinator), .replica_info = std::move(replica_info)}); + remote_query_executor->setLogger(log); - remote_query_executor->setPoolMode(PoolMode::GET_MANY); + /// In case of parallel reading from replicas we have a connection pool per replica. + /// Setting PoolMode will make no sense. + if (!pool) + remote_query_executor->setPoolMode(PoolMode::GET_MANY); + if (!table_func_ptr) remote_query_executor->setMainTable(main_table); @@ -223,12 +247,51 @@ void ReadFromRemote::addPipe(Pipes & pipes, const ClusterProxy::IStreamFactory:: void ReadFromRemote::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) { Pipes pipes; - for (const auto & shard : shards) + + const auto & settings = context->getSettingsRef(); + const bool enable_sample_offset_parallel_processing = settings.max_parallel_replicas > 1 && !settings.allow_experimental_parallel_reading_from_replicas; + + /// We have to create a pipe for each replica + /// FIXME: The second condition is only for tests to work, because hedged connections enabled by default. + if (settings.max_parallel_replicas > 1 && !enable_sample_offset_parallel_processing && !context->getSettingsRef().use_hedged_requests) { - if (shard.lazy) - addLazyPipe(pipes, shard); - else - addPipe(pipes, shard); + const Settings & current_settings = context->getSettingsRef(); + auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(current_settings); + + for (const auto & shard : shards) + { + auto coordinator = std::make_shared(); + + for (size_t replica_num = 0; replica_num < shard.num_replicas; ++replica_num) + { + IConnections::ReplicaInfo replica_info + { + .all_replicas_count = shard.num_replicas, + .number_of_current_replica = replica_num + }; + + auto pool = shard.per_replica_pools[replica_num]; + auto pool_with_failover = std::make_shared( + ConnectionPoolPtrs{pool}, current_settings.load_balancing); + + if (shard.lazy) + addLazyPipe(pipes, shard, coordinator, pool_with_failover, replica_info); + else + addPipe(pipes, shard, coordinator, pool_with_failover, replica_info); + } + } + } + else + { + for (const auto & shard : shards) + { + auto coordinator = std::make_shared(); + + if (shard.lazy) + addLazyPipe(pipes, shard, /*coordinator=*/nullptr, /*pool*/{}, /*replica_info*/std::nullopt); + else + addPipe(pipes, shard, /*coordinator=*/nullptr, /*pool*/{}, /*replica_info*/std::nullopt); + } } auto pipe = Pipe::unitePipes(std::move(pipes)); diff --git a/src/Processors/QueryPlan/ReadFromRemote.h b/src/Processors/QueryPlan/ReadFromRemote.h index f963164dd3f..f361be93b5a 100644 --- a/src/Processors/QueryPlan/ReadFromRemote.h +++ b/src/Processors/QueryPlan/ReadFromRemote.h @@ -1,9 +1,11 @@ #pragma once #include #include +#include #include #include #include +#include namespace DB { @@ -37,6 +39,12 @@ public: void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; private: + enum class Mode + { + PerReplica, + PerShard + }; + ClusterProxy::IStreamFactory::Shards shards; QueryProcessingStage::Enum stage; @@ -52,8 +60,16 @@ private: Poco::Logger * log; UInt32 shard_count; - void addLazyPipe(Pipes & pipes, const ClusterProxy::IStreamFactory::Shard & shard); - void addPipe(Pipes & pipes, const ClusterProxy::IStreamFactory::Shard & shard); + void addLazyPipe(Pipes & pipes, const ClusterProxy::IStreamFactory::Shard & shard, + std::shared_ptr coordinator, + std::shared_ptr pool, + std::optional replica_info); + void addPipe(Pipes & pipes, const ClusterProxy::IStreamFactory::Shard & shard, + std::shared_ptr coordinator, + std::shared_ptr pool, + std::optional replica_info); + + void addPipeForReplica(); }; } diff --git a/src/QueryPipeline/RemoteQueryExecutor.cpp b/src/QueryPipeline/RemoteQueryExecutor.cpp index ada16a1f201..653d9a2bbf8 100644 --- a/src/QueryPipeline/RemoteQueryExecutor.cpp +++ b/src/QueryPipeline/RemoteQueryExecutor.cpp @@ -7,6 +7,7 @@ #include #include #include "Core/Protocol.h" +#include "IO/ReadHelpers.h" #include #include #include @@ -20,6 +21,7 @@ #include #include #include +#include namespace CurrentMetrics @@ -42,21 +44,26 @@ namespace ErrorCodes RemoteQueryExecutor::RemoteQueryExecutor( const String & query_, const Block & header_, ContextPtr context_, const Scalars & scalars_, const Tables & external_tables_, - QueryProcessingStage::Enum stage_, std::shared_ptr task_iterator_) + QueryProcessingStage::Enum stage_, std::optional extension_) : header(header_), query(query_), context(context_), scalars(scalars_) - , external_tables(external_tables_), stage(stage_), task_iterator(task_iterator_) + , external_tables(external_tables_), stage(stage_) + , task_iterator(extension_ ? extension_->task_iterator : nullptr) + , parallel_reading_coordinator(extension_ ? extension_->parallel_reading_coordinator : nullptr) {} RemoteQueryExecutor::RemoteQueryExecutor( Connection & connection, const String & query_, const Block & header_, ContextPtr context_, ThrottlerPtr throttler, const Scalars & scalars_, const Tables & external_tables_, - QueryProcessingStage::Enum stage_, std::shared_ptr task_iterator_) - : RemoteQueryExecutor(query_, header_, context_, scalars_, external_tables_, stage_, task_iterator_) + QueryProcessingStage::Enum stage_, std::optional extension_) + : RemoteQueryExecutor(query_, header_, context_, scalars_, external_tables_, stage_, extension_) { - create_connections = [this, &connection, throttler]() + create_connections = [this, &connection, throttler, extension_]() { - return std::make_shared(connection, context->getSettingsRef(), throttler); + auto res = std::make_shared(connection, context->getSettingsRef(), throttler); + if (extension_ && extension_->replica_info) + res->setReplicaInfo(*extension_->replica_info); + return res; }; } @@ -64,12 +71,15 @@ RemoteQueryExecutor::RemoteQueryExecutor( std::shared_ptr connection_ptr, const String & query_, const Block & header_, ContextPtr context_, ThrottlerPtr throttler, const Scalars & scalars_, const Tables & external_tables_, - QueryProcessingStage::Enum stage_, std::shared_ptr task_iterator_) - : RemoteQueryExecutor(query_, header_, context_, scalars_, external_tables_, stage_, task_iterator_) + QueryProcessingStage::Enum stage_, std::optional extension_) + : RemoteQueryExecutor(query_, header_, context_, scalars_, external_tables_, stage_, extension_) { - create_connections = [this, connection_ptr, throttler]() + create_connections = [this, connection_ptr, throttler, extension_]() { - return std::make_shared(connection_ptr, context->getSettingsRef(), throttler); + auto res = std::make_shared(connection_ptr, context->getSettingsRef(), throttler); + if (extension_ && extension_->replica_info) + res->setReplicaInfo(*extension_->replica_info); + return res; }; } @@ -78,12 +88,18 @@ RemoteQueryExecutor::RemoteQueryExecutor( std::vector && connections_, const String & query_, const Block & header_, ContextPtr context_, const ThrottlerPtr & throttler, const Scalars & scalars_, const Tables & external_tables_, - QueryProcessingStage::Enum stage_, std::shared_ptr task_iterator_) + QueryProcessingStage::Enum stage_, std::optional extension_) : header(header_), query(query_), context(context_) - , scalars(scalars_), external_tables(external_tables_), stage(stage_), task_iterator(task_iterator_), pool(pool_) + , scalars(scalars_), external_tables(external_tables_), stage(stage_) + , task_iterator(extension_ ? extension_->task_iterator : nullptr) + , parallel_reading_coordinator(extension_ ? extension_->parallel_reading_coordinator : nullptr) + , pool(pool_) { - create_connections = [this, connections_, throttler]() mutable { - return std::make_shared(std::move(connections_), context->getSettingsRef(), throttler); + create_connections = [this, connections_, throttler, extension_]() mutable { + auto res = std::make_shared(std::move(connections_), context->getSettingsRef(), throttler); + if (extension_ && extension_->replica_info) + res->setReplicaInfo(*extension_->replica_info); + return res; }; } @@ -91,11 +107,14 @@ RemoteQueryExecutor::RemoteQueryExecutor( const ConnectionPoolWithFailoverPtr & pool_, const String & query_, const Block & header_, ContextPtr context_, const ThrottlerPtr & throttler, const Scalars & scalars_, const Tables & external_tables_, - QueryProcessingStage::Enum stage_, std::shared_ptr task_iterator_) + QueryProcessingStage::Enum stage_, std::optional extension_) : header(header_), query(query_), context(context_) - , scalars(scalars_), external_tables(external_tables_), stage(stage_), task_iterator(task_iterator_), pool(pool_) + , scalars(scalars_), external_tables(external_tables_), stage(stage_) + , task_iterator(extension_ ? extension_->task_iterator : nullptr) + , parallel_reading_coordinator(extension_ ? extension_->parallel_reading_coordinator : nullptr) + , pool(pool_) { - create_connections = [this, throttler]()->std::shared_ptr + create_connections = [this, throttler, extension_]()->std::shared_ptr { const Settings & current_settings = context->getSettingsRef(); auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(current_settings); @@ -107,7 +126,10 @@ RemoteQueryExecutor::RemoteQueryExecutor( if (main_table) table_to_check = std::make_shared(main_table.getQualifiedName()); - return std::make_shared(pool, context, timeouts, throttler, pool_mode, table_to_check); + auto res = std::make_shared(pool, context, timeouts, throttler, pool_mode, table_to_check); + if (extension_ && extension_->replica_info) + res->setReplicaInfo(*extension_->replica_info); + return res; } #endif @@ -122,7 +144,10 @@ RemoteQueryExecutor::RemoteQueryExecutor( else connection_entries = pool->getMany(timeouts, ¤t_settings, pool_mode); - return std::make_shared(std::move(connection_entries), current_settings, throttler); + auto res = std::make_shared(std::move(connection_entries), current_settings, throttler); + if (extension_ && extension_->replica_info) + res->setReplicaInfo(*extension_->replica_info); + return res; }; } @@ -344,6 +369,9 @@ std::optional RemoteQueryExecutor::processPacket(Packet packet) { switch (packet.type) { + case Protocol::Server::MergeTreeReadTaskRequest: + processMergeTreeReadTaskRequest(packet.request); + break; case Protocol::Server::ReadTaskRequest: processReadTaskRequest(); break; @@ -440,6 +468,15 @@ void RemoteQueryExecutor::processReadTaskRequest() connections->sendReadTaskResponse(response); } +void RemoteQueryExecutor::processMergeTreeReadTaskRequest(PartitionReadRequest request) +{ + if (!parallel_reading_coordinator) + throw Exception("Coordinator for parallel reading from replicas is not initialized", ErrorCodes::LOGICAL_ERROR); + + auto response = parallel_reading_coordinator->handleRequest(std::move(request)); + connections->sendMergeTreeReadTaskResponse(response); +} + void RemoteQueryExecutor::finish(std::unique_ptr * read_context) { /** If one of: diff --git a/src/QueryPipeline/RemoteQueryExecutor.h b/src/QueryPipeline/RemoteQueryExecutor.h index b7a2509ea97..d5603fd2281 100644 --- a/src/QueryPipeline/RemoteQueryExecutor.h +++ b/src/QueryPipeline/RemoteQueryExecutor.h @@ -1,5 +1,7 @@ #pragma once +#include + #include #include #include @@ -7,7 +9,7 @@ #include #include #include -#include +#include namespace DB @@ -35,20 +37,33 @@ class RemoteQueryExecutor public: using ReadContext = RemoteQueryExecutorReadContext; + /// We can provide additional logic for RemoteQueryExecutor + /// For example for s3Cluster table function we provide an Iterator over tasks to do. + /// Nodes involved into the query send request for a new task and we answer them using this object. + /// In case of parallel reading from replicas we provide a Coordinator object + /// Every replica will tell us about parts and mark ranges it wants to read and coordinator will + /// decide whether to deny or to accept that request. + struct Extension + { + std::shared_ptr task_iterator{nullptr}; + std::shared_ptr parallel_reading_coordinator; + std::optional replica_info; + }; + /// Takes already set connection. /// We don't own connection, thus we have to drain it synchronously. RemoteQueryExecutor( Connection & connection, const String & query_, const Block & header_, ContextPtr context_, ThrottlerPtr throttler_ = nullptr, const Scalars & scalars_ = Scalars(), const Tables & external_tables_ = Tables(), - QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete, std::shared_ptr task_iterator_ = {}); + QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete, std::optional extension_ = std::nullopt); /// Takes already set connection. RemoteQueryExecutor( std::shared_ptr connection, const String & query_, const Block & header_, ContextPtr context_, ThrottlerPtr throttler_ = nullptr, const Scalars & scalars_ = Scalars(), const Tables & external_tables_ = Tables(), - QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete, std::shared_ptr task_iterator_ = {}); + QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete, std::optional extension_ = std::nullopt); /// Accepts several connections already taken from pool. RemoteQueryExecutor( @@ -56,14 +71,14 @@ public: std::vector && connections_, const String & query_, const Block & header_, ContextPtr context_, const ThrottlerPtr & throttler = nullptr, const Scalars & scalars_ = Scalars(), const Tables & external_tables_ = Tables(), - QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete, std::shared_ptr task_iterator_ = {}); + QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete, std::optional extension_ = std::nullopt); /// Takes a pool and gets one or several connections from it. RemoteQueryExecutor( const ConnectionPoolWithFailoverPtr & pool, const String & query_, const Block & header_, ContextPtr context_, const ThrottlerPtr & throttler = nullptr, const Scalars & scalars_ = Scalars(), const Tables & external_tables_ = Tables(), - QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete, std::shared_ptr task_iterator_ = {}); + QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete, std::optional extension_ = std::nullopt); ~RemoteQueryExecutor(); @@ -115,7 +130,7 @@ private: RemoteQueryExecutor( const String & query_, const Block & header_, ContextPtr context_, const Scalars & scalars_, const Tables & external_tables_, - QueryProcessingStage::Enum stage_, std::shared_ptr task_iterator_); + QueryProcessingStage::Enum stage_, std::optional extension_); Block header; Block totals; @@ -136,6 +151,13 @@ private: /// Initiator identifier for distributed task processing std::shared_ptr task_iterator; + std::shared_ptr parallel_reading_coordinator; + + /// This is needed only for parallel reading from replicas, because + /// we create a RemoteQueryExecutor per replica and have to store additional info + /// about the number of the current replica or the count of replicas at all. + IConnections::ReplicaInfo replica_info; + std::function()> create_connections; /// Hold a shared reference to the connection pool so that asynchronous connection draining will /// work safely. Make sure it's the first member so that we don't destruct it too early. @@ -203,6 +225,8 @@ private: void processReadTaskRequest(); + void processMergeTreeReadTaskRequest(PartitionReadRequest request); + /// Cancell query and restart it with info about duplicated UUIDs /// only for `allow_experimental_query_deduplication`. std::variant restartQueryWithoutDuplicatedUUIDs(std::unique_ptr * read_context = nullptr); diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 32154054cc0..cdf1838c06b 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -310,10 +310,25 @@ void TCPHandler::runImpl() query_context->setReadTaskCallback([this]() -> String { std::lock_guard lock(task_callback_mutex); + + if (state.is_cancelled) + return {}; + sendReadTaskRequestAssumeLocked(); return receiveReadTaskResponseAssumeLocked(); }); + query_context->setMergeTreeReadTaskCallback([this](PartitionReadRequest request) -> std::optional + { + std::lock_guard lock(task_callback_mutex); + + if (state.is_cancelled) + return std::nullopt; + + sendMergeTreeReadTaskRequstAssumeLocked(std::move(request)); + return receivePartitionMergeTreeReadTaskResponseAssumeLocked(); + }); + /// Processing Query state.io = executeQuery(state.query, query_context, false, state.stage); @@ -663,10 +678,13 @@ void TCPHandler::processOrdinaryQueryWithProcessors() Block block; while (executor.pull(block, interactive_delay / 1000)) { - std::lock_guard lock(task_callback_mutex); + std::unique_lock lock(task_callback_mutex); if (isQueryCancelled()) { + /// Several callback like callback for parallel reading could be called from inside the pipeline + /// and we have to unlock the mutex from our side to prevent deadlock. + lock.unlock(); /// A packet was received requesting to stop execution of the request. executor.cancel(); break; @@ -786,6 +804,15 @@ void TCPHandler::sendReadTaskRequestAssumeLocked() out->next(); } + +void TCPHandler::sendMergeTreeReadTaskRequstAssumeLocked(PartitionReadRequest request) +{ + writeVarUInt(Protocol::Server::MergeTreeReadTaskRequest, *out); + request.serialize(*out); + out->next(); +} + + void TCPHandler::sendProfileInfo(const ProfileInfo & info) { writeVarUInt(Protocol::Server::ProfileInfo, *out); @@ -1297,6 +1324,35 @@ String TCPHandler::receiveReadTaskResponseAssumeLocked() } +std::optional TCPHandler::receivePartitionMergeTreeReadTaskResponseAssumeLocked() +{ + UInt64 packet_type = 0; + readVarUInt(packet_type, *in); + if (packet_type != Protocol::Client::MergeTreeReadTaskResponse) + { + if (packet_type == Protocol::Client::Cancel) + { + state.is_cancelled = true; + /// For testing connection collector. + if (sleep_in_receive_cancel.totalMilliseconds()) + { + std::chrono::milliseconds ms(sleep_in_receive_cancel.totalMilliseconds()); + std::this_thread::sleep_for(ms); + } + return std::nullopt; + } + else + { + throw Exception(fmt::format("Received {} packet after requesting read task", + Protocol::Client::toString(packet_type)), ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT); + } + } + PartitionReadResponse response; + response.deserialize(*in); + return response; +} + + void TCPHandler::receiveClusterNameAndSalt() { readStringBinary(cluster, *in); @@ -1697,7 +1753,7 @@ bool TCPHandler::isQueryCancelled() return true; default: - throw NetException("Unknown packet from client", ErrorCodes::UNKNOWN_PACKET_FROM_CLIENT); + throw NetException("Unknown packet from client " + toString(packet_type), ErrorCodes::UNKNOWN_PACKET_FROM_CLIENT); } } diff --git a/src/Server/TCPHandler.h b/src/Server/TCPHandler.h index 3cf3346cd72..4a340e328ed 100644 --- a/src/Server/TCPHandler.h +++ b/src/Server/TCPHandler.h @@ -15,6 +15,8 @@ #include #include +#include + #include "IServer.h" #include "base/types.h" @@ -201,6 +203,7 @@ private: void receiveQuery(); void receiveIgnoredPartUUIDs(); String receiveReadTaskResponseAssumeLocked(); + std::optional receivePartitionMergeTreeReadTaskResponseAssumeLocked(); bool receiveData(bool scalar); bool readDataNext(); void readData(); @@ -233,6 +236,7 @@ private: void sendEndOfStream(); void sendPartUUIDs(); void sendReadTaskRequestAssumeLocked(); + void sendMergeTreeReadTaskRequstAssumeLocked(PartitionReadRequest request); void sendProfileInfo(const ProfileInfo & info); void sendTotals(const Block & totals); void sendExtremes(const Block & extremes); diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index cf7b075a204..8432e5c48d1 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -20,6 +20,7 @@ #include #include +#include namespace DB diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 1255bf975e6..21dbedbb6ac 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -546,7 +546,7 @@ String IMergeTreeDataPart::getColumnNameWithMinimumCompressedSize(const StorageM if (!hasColumnFiles(column)) continue; - const auto size = getColumnSize(column_name, *column_type).data_compressed; + const auto size = getColumnSize(column_name).data_compressed; if (size < minimum_size) { minimum_size = size; @@ -747,7 +747,7 @@ CompressionCodecPtr IMergeTreeDataPart::detectDefaultCompressionCodec() const for (const auto & part_column : columns) { /// It was compressed with default codec and it's not empty - auto column_size = getColumnSize(part_column.name, *part_column.type); + auto column_size = getColumnSize(part_column.name); if (column_size.data_compressed != 0 && !storage_columns.hasCompressionCodec(part_column.name)) { auto serialization = IDataType::getSerialization(part_column, @@ -885,7 +885,7 @@ void IMergeTreeDataPart::loadRowsCount() /// Most trivial types if (column.type->isValueRepresentedByNumber() && !column.type->haveSubtypes()) { - auto size = getColumnSize(column.name, *column.type); + auto size = getColumnSize(column.name); if (size.data_uncompressed == 0) continue; @@ -933,7 +933,7 @@ void IMergeTreeDataPart::loadRowsCount() if (!column_col->isFixedAndContiguous() || column_col->lowCardinality()) continue; - size_t column_size = getColumnSize(column.name, *column.type).data_uncompressed; + size_t column_size = getColumnSize(column.name).data_uncompressed; if (!column_size) continue; @@ -1490,7 +1490,7 @@ void IMergeTreeDataPart::calculateSecondaryIndicesSizesOnDisk() } } -ColumnSize IMergeTreeDataPart::getColumnSize(const String & column_name, const IDataType & /* type */) const +ColumnSize IMergeTreeDataPart::getColumnSize(const String & column_name) const { /// For some types of parts columns_size maybe not calculated auto it = columns_sizes.find(column_name); diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index 1467b0ef03f..3515da20fa9 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -103,7 +103,7 @@ public: /// NOTE: Returns zeros if column files are not found in checksums. /// Otherwise return information about column size on disk. - ColumnSize getColumnSize(const String & column_name, const IDataType & /* type */) const; + ColumnSize getColumnSize(const String & column_name) const; /// NOTE: Returns zeros if secondary indexes are not found in checksums. /// Otherwise return information about secondary index size on disk. diff --git a/src/Storages/MergeTree/IntersectionsIndexes.h b/src/Storages/MergeTree/IntersectionsIndexes.h new file mode 100644 index 00000000000..68ccbc4a0b1 --- /dev/null +++ b/src/Storages/MergeTree/IntersectionsIndexes.h @@ -0,0 +1,237 @@ +#pragma once + +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +/// A boundary of a segment (left or right) +struct PartToRead +{ + PartBlockRange range; + struct PartAndProjectionNames + { + String part; + String projection; + bool operator<(const PartAndProjectionNames & rhs) const + { + if (part == rhs.part) + return projection < rhs.projection; + return part < rhs.part; + } + bool operator==(const PartAndProjectionNames & rhs) const + { + return part == rhs.part && projection == rhs.projection; + } + }; + + PartAndProjectionNames name; + + bool operator==(const PartToRead & rhs) const + { + return range == rhs.range && name == rhs.name; + } + + bool operator<(const PartToRead & rhs) const + { + /// We allow only consecutive non-intersecting ranges + const bool intersection = + (range.begin <= rhs.range.begin && rhs.range.begin < range.end) || + (rhs.range.begin <= range.begin && range.begin <= rhs.range.end); + if (intersection) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Got intersecting parts. First [{}, {}]. Second [{}, {}]", + range.begin, range.end, rhs.range.begin, rhs.range.end); + return range.begin < rhs.range.begin && range.end <= rhs.range.begin; + } +}; + +/// MergeTreeDataPart is described as a segment (min block and max block) +/// During request handling we have to know how many intersection +/// current part has with already saved parts in our state. +struct PartSegments +{ + enum class IntersectionResult + { + NO_INTERSECTION, + EXACTLY_ONE_INTERSECTION, + REJECT + }; + + void addPart(PartToRead part) { segments.insert(std::move(part)); } + + IntersectionResult getIntersectionResult(PartToRead part) + { + bool intersected_before = false; + for (const auto & segment: segments) + { + auto are_intersect = [](auto & x, auto & y) + { + /// <= is important here, because we are working with segments [a, b] + if ((x.begin <= y.begin) && (y.begin <= x.end)) + return true; + if ((y.begin <= x.begin) && (x.begin <= y.end)) + return true; + return false; + }; + + if (are_intersect(segment.range, part.range)) + { + /// We have two or possibly more intersections + if (intersected_before) + return IntersectionResult::REJECT; + + /// We have intersection with part with different name + /// or with different min or max block + /// It could happens if we have merged part on one replica + /// but not on another. + if (segment != part) + return IntersectionResult::REJECT; + + /// We allow only the intersection with the same part as we have + intersected_before = true; + } + } + + return intersected_before ? IntersectionResult::EXACTLY_ONE_INTERSECTION : IntersectionResult::NO_INTERSECTION; + } + + using OrderedSegments = std::set; + OrderedSegments segments; +}; + +/// This is used only in parallel reading from replicas +/// This struct is an ordered set of half intervals and it is responsible for +/// giving an inversion of that intervals (e.g. [a, b) => {[-inf, a), [b, +inf)}) +/// or giving an intersection of two sets of intervals +/// This is needed, because MarkRange is actually a half-opened interval +/// and during the query execution we receive some kind of request from every replica +/// to read some ranges from a specific part. +/// We have to avoid the situation, where some range is read twice. +/// This struct helps us to do it using only two operations (intersection and inversion) +/// over a set of half opened intervals. +struct HalfIntervals +{ + static HalfIntervals initializeWithEntireSpace() + { + auto left_inf = std::numeric_limits::min(); + auto right_inf = std::numeric_limits::max(); + return HalfIntervals{{{left_inf, right_inf}}}; + } + + static HalfIntervals initializeFromMarkRanges(MarkRanges ranges) + { + OrderedRanges new_intervals; + for (const auto & range : ranges) + new_intervals.insert(range); + + return HalfIntervals{std::move(new_intervals)}; + } + + MarkRanges convertToMarkRangesFinal() + { + MarkRanges result; + std::move(intervals.begin(), intervals.end(), std::back_inserter(result)); + return result; + } + + HalfIntervals & intersect(const HalfIntervals & rhs) + { + /** + * first [ ) [ ) [ ) [ ) [ ) + * second [ ) [ ) [ ) [ ) + */ + OrderedRanges intersected; + + const auto & first_intervals = intervals; + auto first = first_intervals.begin(); + const auto & second_intervals = rhs.intervals; + auto second = second_intervals.begin(); + + while (first != first_intervals.end() && second != second_intervals.end()) + { + auto curr_intersection = MarkRange{ + std::max(second->begin, first->begin), + std::min(second->end, first->end) + }; + + /// Insert only if segments are intersect + if (curr_intersection.begin < curr_intersection.end) + intersected.insert(std::move(curr_intersection)); + + if (first->end <= second->end) + ++first; + else + ++second; + } + + std::swap(intersected, intervals); + + return *this; + } + + HalfIntervals & negate() + { + auto left_inf = std::numeric_limits::min(); + auto right_inf = std::numeric_limits::max(); + + if (intervals.empty()) + { + intervals.insert(MarkRange{left_inf, right_inf}); + return *this; + } + + OrderedRanges new_ranges; + + /// Possibly add (-inf; begin) + if (auto begin = intervals.begin()->begin; begin != left_inf) + new_ranges.insert(MarkRange{left_inf, begin}); + + auto prev = intervals.begin(); + for (auto it = std::next(intervals.begin()); it != intervals.end(); ++it) + { + if (prev->end != it->begin) + new_ranges.insert(MarkRange{prev->end, it->begin}); + prev = it; + } + + /// Try to add (end; +inf) + if (auto end = intervals.rbegin()->end; end != right_inf) + new_ranges.insert(MarkRange{end, right_inf}); + + std::swap(new_ranges, intervals); + + return *this; + } + + bool operator==(const HalfIntervals & rhs) const + { + return intervals == rhs.intervals; + } + + using OrderedRanges = std::set; + OrderedRanges intervals; +}; + + +[[ maybe_unused ]] static std::ostream & operator<< (std::ostream & out, const HalfIntervals & ranges) +{ + for (const auto & range: ranges.intervals) + out << fmt::format("({}, {}) ", range.begin, range.end); + return out; +} + +/// This is needed for tests where we don't need to modify objects +[[ maybe_unused ]] static HalfIntervals getIntersection(const HalfIntervals & first, const HalfIntervals & second) +{ + auto result = first; + result.intersect(second); + return result; +} + +} diff --git a/src/Storages/MergeTree/MarkRange.cpp b/src/Storages/MergeTree/MarkRange.cpp index 7f097cd7106..343c4ecaf22 100644 --- a/src/Storages/MergeTree/MarkRange.cpp +++ b/src/Storages/MergeTree/MarkRange.cpp @@ -3,6 +3,31 @@ namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +bool MarkRange::operator==(const MarkRange & rhs) const +{ + return begin == rhs.begin && end == rhs.end; +} + +bool MarkRange::operator<(const MarkRange & rhs) const +{ + /// We allow only consecutive non-intersecting ranges + /// Here we check whether a beginning of one range lies inside another range + /// (ranges are intersect) + const bool is_intersection = (begin <= rhs.begin && rhs.begin < end) || + (rhs.begin <= begin && begin < rhs.end); + + if (is_intersection) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Intersecting mark ranges are not allowed, it is a bug! First range ({}, {}), second range ({}, {})", begin, end, rhs.begin, rhs.end); + + return begin < rhs.begin && end <= rhs.begin; +} + size_t getLastMark(const MarkRanges & ranges) { size_t current_task_last_mark = 0; diff --git a/src/Storages/MergeTree/MarkRange.h b/src/Storages/MergeTree/MarkRange.h index b46913db30c..4f32be6ab14 100644 --- a/src/Storages/MergeTree/MarkRange.h +++ b/src/Storages/MergeTree/MarkRange.h @@ -2,7 +2,9 @@ #include #include +#include +#include namespace DB { @@ -18,6 +20,10 @@ struct MarkRange MarkRange() = default; MarkRange(const size_t begin_, const size_t end_) : begin{begin_}, end{end_} {} + + bool operator==(const MarkRange & rhs) const; + + bool operator<(const MarkRange & rhs) const; }; using MarkRanges = std::deque; diff --git a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp index 2f46543b03c..fbc818a7de9 100644 --- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -12,6 +13,8 @@ #include +#include + namespace DB { @@ -33,7 +36,8 @@ MergeTreeBaseSelectProcessor::MergeTreeBaseSelectProcessor( UInt64 preferred_max_column_in_block_size_bytes_, const MergeTreeReaderSettings & reader_settings_, bool use_uncompressed_cache_, - const Names & virt_column_names_) + const Names & virt_column_names_, + std::optional extension_) : SourceWithProgress(transformHeader(std::move(header), prewhere_info_, storage_.getPartitionValueType(), virt_column_names_)) , storage(storage_) , metadata_snapshot(metadata_snapshot_) @@ -45,6 +49,7 @@ MergeTreeBaseSelectProcessor::MergeTreeBaseSelectProcessor( , use_uncompressed_cache(use_uncompressed_cache_) , virt_column_names(virt_column_names_) , partition_value_type(storage.getPartitionValueType()) + , extension(extension_) { header_without_virtual_columns = getPort().getHeader(); @@ -71,6 +76,91 @@ MergeTreeBaseSelectProcessor::MergeTreeBaseSelectProcessor( } +bool MergeTreeBaseSelectProcessor::getNewTask() +{ + /// No parallel reading feature + if (!extension.has_value()) + { + if (getNewTaskImpl()) + { + finalizeNewTask(); + return true; + } + return false; + } + return getNewTaskParallelReading(); +} + + +bool MergeTreeBaseSelectProcessor::getNewTaskParallelReading() +{ + if (getTaskFromBuffer()) + return true; + + if (no_more_tasks) + return getDelayedTasks(); + + while (true) + { + /// The end of execution. No task. + if (!getNewTaskImpl()) + { + no_more_tasks = true; + return getDelayedTasks(); + } + + splitCurrentTaskRangesAndFillBuffer(); + + if (getTaskFromBuffer()) + return true; + } +} + + +bool MergeTreeBaseSelectProcessor::getTaskFromBuffer() +{ + while (!buffered_ranges.empty()) + { + auto ranges = std::move(buffered_ranges.front()); + buffered_ranges.pop_front(); + + assert(!ranges.empty()); + + auto res = performRequestToCoordinator(ranges, /*delayed=*/false); + + if (Status::Accepted == res) + return true; + + if (Status::Cancelled == res) + break; + } + return false; +} + + +bool MergeTreeBaseSelectProcessor::getDelayedTasks() +{ + while (!delayed_tasks.empty()) + { + task = std::move(delayed_tasks.front()); + delayed_tasks.pop_front(); + + assert(!task->mark_ranges.empty()); + + auto res = performRequestToCoordinator(task->mark_ranges, /*delayed=*/true); + + if (Status::Accepted == res) + return true; + + if (Status::Cancelled == res) + break; + } + + finish(); + return false; +} + + Chunk MergeTreeBaseSelectProcessor::generate() { while (!isCancelled()) @@ -479,6 +569,163 @@ std::unique_ptr MergeTreeBaseSelectProcessor::getSi data_part, Names(complete_column_names.begin(), complete_column_names.end()), sample_block); } + +MergeTreeBaseSelectProcessor::Status MergeTreeBaseSelectProcessor::performRequestToCoordinator(MarkRanges requested_ranges, bool delayed) +{ + String partition_id = task->data_part->info.partition_id; + String part_name; + String projection_name; + + if (task->data_part->isProjectionPart()) + { + part_name = task->data_part->getParentPart()->name; + projection_name = task->data_part->name; + } + else + { + part_name = task->data_part->name; + projection_name = ""; + } + + PartBlockRange block_range + { + .begin = task->data_part->info.min_block, + .end = task->data_part->info.max_block + }; + + PartitionReadRequest request + { + .partition_id = std::move(partition_id), + .part_name = std::move(part_name), + .projection_name = std::move(projection_name), + .block_range = std::move(block_range), + .mark_ranges = std::move(requested_ranges) + }; + + /// Constistent hashing won't work with reading in order, because at the end of the execution + /// we could possibly seek back + if (!delayed && canUseConsistentHashingForParallelReading()) + { + const auto hash = request.getConsistentHash(extension->count_participating_replicas); + if (hash != extension->number_of_current_replica) + { + auto delayed_task = std::make_unique(*task); // Create a copy + delayed_task->mark_ranges = std::move(request.mark_ranges); + delayed_tasks.emplace_back(std::move(delayed_task)); + return Status::Denied; + } + } + + auto optional_response = extension.value().callback(std::move(request)); + + if (!optional_response.has_value()) + return Status::Cancelled; + + auto response = optional_response.value(); + + task->mark_ranges = std::move(response.mark_ranges); + + if (response.denied || task->mark_ranges.empty()) + return Status::Denied; + + finalizeNewTask(); + + return Status::Accepted; +} + + +size_t MergeTreeBaseSelectProcessor::estimateMaxBatchSizeForHugeRanges() +{ + /// This is an empirical number and it is so, + /// because we have an adaptive granularity by default. + const size_t average_granule_size_bytes = 8UL * 1024 * 1024 * 10; // 10 MiB + + /// We want to have one RTT per one gigabyte of data read from disk + /// this could be configurable. + const size_t max_size_for_one_request = 8UL * 1024 * 1024 * 1024; // 1 GiB + + size_t sum_average_marks_size = 0; + /// getColumnSize is not fully implemented for compact parts + if (task->data_part->getType() == IMergeTreeDataPart::Type::COMPACT) + { + sum_average_marks_size = average_granule_size_bytes; + } + else + { + for (const auto & name : extension->colums_to_read) + { + auto size = task->data_part->getColumnSize(name); + + assert(size.marks != 0); + sum_average_marks_size += size.data_uncompressed / size.marks; + } + } + + if (sum_average_marks_size == 0) + sum_average_marks_size = average_granule_size_bytes; // 10 MiB + + LOG_TEST(log, "Reading from {} part, average mark size is {}", + task->data_part->getTypeName(), sum_average_marks_size); + + return max_size_for_one_request / sum_average_marks_size; +} + +void MergeTreeBaseSelectProcessor::splitCurrentTaskRangesAndFillBuffer() +{ + const size_t max_batch_size = estimateMaxBatchSizeForHugeRanges(); + + size_t current_batch_size = 0; + buffered_ranges.emplace_back(); + + for (const auto & range : task->mark_ranges) + { + auto expand_if_needed = [&] + { + if (current_batch_size > max_batch_size) + { + buffered_ranges.emplace_back(); + current_batch_size = 0; + } + }; + + expand_if_needed(); + + if (range.end - range.begin < max_batch_size) + { + buffered_ranges.back().push_back(range); + current_batch_size += range.end - range.begin; + continue; + } + + auto current_begin = range.begin; + auto current_end = range.begin + max_batch_size; + + while (current_end < range.end) + { + auto current_range = MarkRange{current_begin, current_end}; + buffered_ranges.back().push_back(current_range); + current_batch_size += current_end - current_begin; + + current_begin = current_end; + current_end = current_end + max_batch_size; + + expand_if_needed(); + } + + if (range.end - current_begin > 0) + { + auto current_range = MarkRange{current_begin, range.end}; + buffered_ranges.back().push_back(current_range); + current_batch_size += range.end - current_begin; + + expand_if_needed(); + } + } + + if (buffered_ranges.back().empty()) + buffered_ranges.pop_back(); +} + MergeTreeBaseSelectProcessor::~MergeTreeBaseSelectProcessor() = default; } diff --git a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h index d102e4f07a4..c462c34aa83 100644 --- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h +++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h @@ -3,6 +3,7 @@ #include #include #include +#include #include @@ -15,6 +16,18 @@ class UncompressedCache; class MarkCache; struct PrewhereExprInfo; + +struct ParallelReadingExtension +{ + MergeTreeReadTaskCallback callback; + size_t count_participating_replicas{0}; + size_t number_of_current_replica{0}; + /// This is needed to estimate the number of bytes + /// between a pair of marks to perform one request + /// over the network for a 1Gb of data. + Names colums_to_read; +}; + /// Base class for MergeTreeThreadSelectProcessor and MergeTreeSelectProcessor class MergeTreeBaseSelectProcessor : public SourceWithProgress { @@ -30,7 +43,8 @@ public: UInt64 preferred_max_column_in_block_size_bytes_, const MergeTreeReaderSettings & reader_settings_, bool use_uncompressed_cache_, - const Names & virt_column_names_ = {}); + const Names & virt_column_names_ = {}, + std::optional extension = {}); ~MergeTreeBaseSelectProcessor() override; @@ -43,10 +57,22 @@ public: const Block & sample_block); protected: + Chunk generate() final; - /// Creates new this->task, and initializes readers. - virtual bool getNewTask() = 0; + /// Creates new this->task and return a flag whether it was successful or not + virtual bool getNewTaskImpl() = 0; + /// Creates new readers for a task it is needed. These methods are separate, because + /// in case of parallel reading from replicas the whole task could be denied by a coodinator + /// or it could modified somehow. + virtual void finalizeNewTask() = 0; + + size_t estimateMaxBatchSizeForHugeRanges(); + + virtual bool canUseConsistentHashingForParallelReading() { return false; } + + /// Closes readers and unlock part locks + virtual void finish() = 0; virtual Chunk readFromPart(); @@ -82,14 +108,62 @@ protected: /// This header is used for chunks from readFromPart(). Block header_without_virtual_columns; - std::unique_ptr task; - std::shared_ptr owned_uncompressed_cache; std::shared_ptr owned_mark_cache; using MergeTreeReaderPtr = std::unique_ptr; MergeTreeReaderPtr reader; MergeTreeReaderPtr pre_reader; + + MergeTreeReadTaskPtr task; + + std::optional extension; + bool no_more_tasks{false}; + std::deque delayed_tasks; + std::deque buffered_ranges; + +private: + Poco::Logger * log = &Poco::Logger::get("MergeTreeBaseSelectProcessor"); + + enum class Status + { + Accepted, + Cancelled, + Denied + }; + + /// Calls getNewTaskImpl() to get new task, then performs a request to a coordinator + /// The coordinator may modify the set of ranges to read from a part or could + /// deny the whole request. In the latter case it creates new task and retries. + /// Then it calls finalizeNewTask() to create readers for a task if it is needed. + bool getNewTask(); + bool getNewTaskParallelReading(); + + /// After PK analysis the range of marks could be extremely big + /// We divide this range to a set smaller consecutive ranges + /// Then, depending on the type of reading (concurrent, in order or in reverse order) + /// we can calculate a consistent hash function with the number of buckets equal to + /// the number of replicas involved. And after that we can throw away some ranges with + /// hash not equals to the number of the current replica. + bool getTaskFromBuffer(); + + /// But we can't throw that ranges completely, because if we have different sets of parts + /// on replicas (have merged part on one, but not on another), then such a situation is possible + /// - Coordinator allows to read from a big merged part, but this part is present only on one replica. + /// And that replica calculates consistent hash and throws away some ranges + /// - Coordinator denies other replicas to read from another parts (source parts for that big one) + /// At the end, the result of the query is wrong, because we didn't read all the data. + /// So, we have to remember parts and mark ranges with hash different then current replica number. + /// An we have to ask the coordinator about its permission to read from that "delayed" parts. + /// It won't work with reading in order or reading in reverse order, because we can possibly seek back. + bool getDelayedTasks(); + + /// It will form a request a request to coordinator and + /// then reinitialize the mark ranges of this->task object + Status performRequestToCoordinator(MarkRanges requested_ranges, bool delayed); + + void splitCurrentTaskRangesAndFillBuffer(); + }; } diff --git a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp index 88f3052e833..07d51d25700 100644 --- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp +++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp @@ -128,8 +128,6 @@ MergeTreeReadTask::MergeTreeReadTask( { } -MergeTreeReadTask::~MergeTreeReadTask() = default; - MergeTreeBlockSizePredictor::MergeTreeBlockSizePredictor( const MergeTreeData::DataPartPtr & data_part_, const Names & columns, const Block & sample_block) @@ -175,8 +173,7 @@ void MergeTreeBlockSizePredictor::initialize(const Block & sample_block, const C ColumnInfo info; info.name = column_name; /// If column isn't fixed and doesn't have checksum, than take first - ColumnSize column_size = data_part->getColumnSize( - column_name, *column_with_type_and_name.type); + ColumnSize column_size = data_part->getColumnSize(column_name); info.bytes_per_row_global = column_size.data_uncompressed ? column_size.data_uncompressed / number_of_rows_in_part diff --git a/src/Storages/MergeTree/MergeTreeBlockReadUtils.h b/src/Storages/MergeTree/MergeTreeBlockReadUtils.h index 2dfe6fcf06d..b931a13c027 100644 --- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.h +++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.h @@ -14,7 +14,7 @@ struct MergeTreeReadTask; struct MergeTreeBlockSizePredictor; using MergeTreeReadTaskPtr = std::unique_ptr; -using MergeTreeBlockSizePredictorPtr = std::unique_ptr; +using MergeTreeBlockSizePredictorPtr = std::shared_ptr; /** If some of the requested columns are not in the part, @@ -59,8 +59,6 @@ struct MergeTreeReadTask const Names & ordered_names_, const NameSet & column_name_set_, const NamesAndTypesList & columns_, const NamesAndTypesList & pre_columns_, const bool remove_prewhere_column_, const bool should_reorder_, MergeTreeBlockSizePredictorPtr && size_predictor_); - - virtual ~MergeTreeReadTask(); }; struct MergeTreeReadTaskColumns diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 3cf7023053f..e58472e572b 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -3163,7 +3163,7 @@ void MergeTreeData::addPartContributionToColumnAndSecondaryIndexSizes(const Data for (const auto & column : part->getColumns()) { ColumnSize & total_column_size = column_sizes[column.name]; - ColumnSize part_column_size = part->getColumnSize(column.name, *column.type); + ColumnSize part_column_size = part->getColumnSize(column.name); total_column_size.add(part_column_size); } @@ -3181,7 +3181,7 @@ void MergeTreeData::removePartContributionToColumnAndSecondaryIndexSizes(const D for (const auto & column : part->getColumns()) { ColumnSize & total_column_size = column_sizes[column.name]; - ColumnSize part_column_size = part->getColumnSize(column.name, *column.type); + ColumnSize part_column_size = part->getColumnSize(column.name); auto log_subtract = [&](size_t & from, size_t value, const char * field) { diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 036e7d89c5a..cdedd37e14a 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -124,7 +124,8 @@ QueryPlanPtr MergeTreeDataSelectExecutor::read( const UInt64 max_block_size, const unsigned num_streams, QueryProcessingStage::Enum processed_stage, - std::shared_ptr max_block_numbers_to_read) const + std::shared_ptr max_block_numbers_to_read, + bool enable_parallel_reading) const { if (query_info.merge_tree_empty_result) return std::make_unique(); @@ -142,7 +143,8 @@ QueryPlanPtr MergeTreeDataSelectExecutor::read( max_block_size, num_streams, max_block_numbers_to_read, - query_info.merge_tree_select_result_ptr); + query_info.merge_tree_select_result_ptr, + enable_parallel_reading); if (plan->isInitialized() && settings.allow_experimental_projection_optimization && settings.force_optimize_projection && !metadata_snapshot->projections.empty()) @@ -184,7 +186,8 @@ QueryPlanPtr MergeTreeDataSelectExecutor::read( max_block_size, num_streams, max_block_numbers_to_read, - query_info.projection->merge_tree_projection_select_result_ptr); + query_info.projection->merge_tree_projection_select_result_ptr, + enable_parallel_reading); } if (projection_plan->isInitialized()) @@ -1210,7 +1213,8 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts( const UInt64 max_block_size, const unsigned num_streams, std::shared_ptr max_block_numbers_to_read, - MergeTreeDataSelectAnalysisResultPtr merge_tree_select_result_ptr) const + MergeTreeDataSelectAnalysisResultPtr merge_tree_select_result_ptr, + bool enable_parallel_reading) const { /// If merge_tree_select_result_ptr != nullptr, we use analyzed result so parts will always be empty. if (merge_tree_select_result_ptr) @@ -1243,7 +1247,8 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts( sample_factor_column_queried, max_block_numbers_to_read, log, - merge_tree_select_result_ptr + merge_tree_select_result_ptr, + enable_parallel_reading ); QueryPlanPtr plan = std::make_unique(); diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h index f19d145fc93..3dde324ce22 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h @@ -34,7 +34,8 @@ public: UInt64 max_block_size, unsigned num_streams, QueryProcessingStage::Enum processed_stage, - std::shared_ptr max_block_numbers_to_read = nullptr) const; + std::shared_ptr max_block_numbers_to_read = nullptr, + bool enable_parallel_reading = false) const; /// The same as read, but with specified set of parts. QueryPlanPtr readFromParts( @@ -47,7 +48,8 @@ public: UInt64 max_block_size, unsigned num_streams, std::shared_ptr max_block_numbers_to_read = nullptr, - MergeTreeDataSelectAnalysisResultPtr merge_tree_select_result_ptr = nullptr) const; + MergeTreeDataSelectAnalysisResultPtr merge_tree_select_result_ptr = nullptr, + bool enable_parallel_reading = false) const; /// Get an estimation for the number of marks we are going to read. /// Reads nothing. Secondary indexes are not used. diff --git a/src/Storages/MergeTree/MergeTreeInOrderSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeInOrderSelectProcessor.cpp index 48a9d62d872..961106af51b 100644 --- a/src/Storages/MergeTree/MergeTreeInOrderSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeInOrderSelectProcessor.cpp @@ -8,14 +8,11 @@ namespace ErrorCodes extern const int MEMORY_LIMIT_EXCEEDED; } -bool MergeTreeInOrderSelectProcessor::getNewTask() +bool MergeTreeInOrderSelectProcessor::getNewTaskImpl() try { if (all_mark_ranges.empty()) - { - finish(); return false; - } if (!reader) initializeReaders(); diff --git a/src/Storages/MergeTree/MergeTreeInOrderSelectProcessor.h b/src/Storages/MergeTree/MergeTreeInOrderSelectProcessor.h index ecf648b0291..467292d88bb 100644 --- a/src/Storages/MergeTree/MergeTreeInOrderSelectProcessor.h +++ b/src/Storages/MergeTree/MergeTreeInOrderSelectProcessor.h @@ -12,7 +12,7 @@ class MergeTreeInOrderSelectProcessor final : public MergeTreeSelectProcessor { public: template - MergeTreeInOrderSelectProcessor(Args &&... args) + explicit MergeTreeInOrderSelectProcessor(Args &&... args) : MergeTreeSelectProcessor{std::forward(args)...} { LOG_DEBUG(log, "Reading {} ranges in order from part {}, approx. {} rows starting from {}", @@ -23,7 +23,8 @@ public: String getName() const override { return "MergeTreeInOrder"; } private: - bool getNewTask() override; + bool getNewTaskImpl() override; + void finalizeNewTask() override {} Poco::Logger * log = &Poco::Logger::get("MergeTreeInOrderSelectProcessor"); }; diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/src/Storages/MergeTree/MergeTreeRangeReader.cpp index 124f13b14a8..8481cee0f86 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.cpp +++ b/src/Storages/MergeTree/MergeTreeRangeReader.cpp @@ -14,6 +14,7 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; + extern const int BAD_ARGUMENTS; } @@ -185,7 +186,7 @@ MergeTreeRangeReader::Stream::Stream( void MergeTreeRangeReader::Stream::checkNotFinished() const { if (isFinished()) - throw Exception("Cannot read out of marks range.", ErrorCodes::LOGICAL_ERROR); + throw Exception("Cannot read out of marks range.", ErrorCodes::BAD_ARGUMENTS); } void MergeTreeRangeReader::Stream::checkEnoughSpaceInCurrentGranule(size_t num_rows) const @@ -290,7 +291,7 @@ void MergeTreeRangeReader::ReadResult::adjustLastGranule() size_t num_rows_to_subtract = total_rows_per_granule - num_read_rows; if (rows_per_granule.empty()) - throw Exception("Can't adjust last granule because no granules were added.", ErrorCodes::LOGICAL_ERROR); + throw Exception("Can't adjust last granule because no granules were added", ErrorCodes::LOGICAL_ERROR); if (num_rows_to_subtract > rows_per_granule.back()) throw Exception(ErrorCodes::LOGICAL_ERROR, diff --git a/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp index 16ce9823ebb..6c4059d64d0 100644 --- a/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp @@ -8,14 +8,11 @@ namespace ErrorCodes extern const int MEMORY_LIMIT_EXCEEDED; } -bool MergeTreeReverseSelectProcessor::getNewTask() +bool MergeTreeReverseSelectProcessor::getNewTaskImpl() try { if (chunks.empty() && all_mark_ranges.empty()) - { - finish(); return false; - } /// We have some blocks to return in buffer. /// Return true to continue reading, but actually don't create a task. diff --git a/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.h b/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.h index 18ab51c03a0..395f5d5cd2a 100644 --- a/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.h +++ b/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.h @@ -13,7 +13,7 @@ class MergeTreeReverseSelectProcessor final : public MergeTreeSelectProcessor { public: template - MergeTreeReverseSelectProcessor(Args &&... args) + explicit MergeTreeReverseSelectProcessor(Args &&... args) : MergeTreeSelectProcessor{std::forward(args)...} { LOG_DEBUG(log, "Reading {} ranges in reverse order from part {}, approx. {} rows starting from {}", @@ -24,7 +24,9 @@ public: String getName() const override { return "MergeTreeReverse"; } private: - bool getNewTask() override; + bool getNewTaskImpl() override; + void finalizeNewTask() override {} + Chunk readFromPart() override; Chunks chunks; diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp index 203ce7a57d2..2d4d3617cee 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp @@ -22,12 +22,13 @@ MergeTreeSelectProcessor::MergeTreeSelectProcessor( const MergeTreeReaderSettings & reader_settings_, const Names & virt_column_names_, size_t part_index_in_query_, - bool has_limit_below_one_block_) + bool has_limit_below_one_block_, + std::optional extension_) : MergeTreeBaseSelectProcessor{ metadata_snapshot_->getSampleBlockForColumns(required_columns_, storage_.getVirtuals(), storage_.getStorageID()), storage_, metadata_snapshot_, prewhere_info_, std::move(actions_settings), max_block_size_rows_, preferred_block_size_bytes_, preferred_max_column_in_block_size_bytes_, - reader_settings_, use_uncompressed_cache_, virt_column_names_}, + reader_settings_, use_uncompressed_cache_, virt_column_names_, extension_}, required_columns{std::move(required_columns_)}, data_part{owned_data_part_}, sample_block(metadata_snapshot_->getSampleBlock()), @@ -36,7 +37,11 @@ MergeTreeSelectProcessor::MergeTreeSelectProcessor( has_limit_below_one_block(has_limit_below_one_block_), total_rows(data_part->index_granularity.getRowsCountInRanges(all_mark_ranges)) { - addTotalRowsApprox(total_rows); + /// Actually it means that parallel reading from replicas enabled + /// and we have to collaborate with initiator. + /// In this case we won't set approximate rows, because it will be accounted multiple times + if (!extension_.has_value()) + addTotalRowsApprox(total_rows); ordered_names = header_without_virtual_columns.getNames(); } @@ -64,6 +69,7 @@ void MergeTreeSelectProcessor::initializeReaders() } + void MergeTreeSelectProcessor::finish() { /** Close the files (before destroying the object). diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.h b/src/Storages/MergeTree/MergeTreeSelectProcessor.h index f9b19f9f692..2ecdc3b59a8 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.h +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.h @@ -31,17 +31,16 @@ public: const MergeTreeReaderSettings & reader_settings, const Names & virt_column_names = {}, size_t part_index_in_query_ = 0, - bool has_limit_below_one_block_ = false); + bool has_limit_below_one_block_ = false, + std::optional extension_ = {}); ~MergeTreeSelectProcessor() override; - /// Closes readers and unlock part locks - void finish(); - protected: /// Defer initialization from constructor, because it may be heavy - /// and it's better to do it lazily in `getNewTask`, which is executing in parallel. + /// and it's better to do it lazily in `getNewTaskImpl`, which is executing in parallel. void initializeReaders(); + void finish() override final; /// Used by Task Names required_columns; diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp index df8d6a7c127..687458ee681 100644 --- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp +++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp @@ -36,6 +36,8 @@ MergeTreeSequentialSource::MergeTreeSequentialSource( data_part->getMarksCount(), data_part->name, data_part->rows_count); } + /// Note, that we don't check setting collaborate_with_coordinator presence, because this source + /// is only used in background merges. addTotalRowsApprox(data_part->rows_count); /// Add columns because we don't want to read empty blocks diff --git a/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.cpp index 6a8ef860c87..6a44da06f1f 100644 --- a/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.cpp @@ -7,6 +7,10 @@ namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} MergeTreeThreadSelectProcessor::MergeTreeThreadSelectProcessor( const size_t thread_, @@ -21,12 +25,13 @@ MergeTreeThreadSelectProcessor::MergeTreeThreadSelectProcessor( const PrewhereInfoPtr & prewhere_info_, ExpressionActionsSettings actions_settings, const MergeTreeReaderSettings & reader_settings_, - const Names & virt_column_names_) + const Names & virt_column_names_, + std::optional extension_) : MergeTreeBaseSelectProcessor{ pool_->getHeader(), storage_, metadata_snapshot_, prewhere_info_, std::move(actions_settings), max_block_size_rows_, preferred_block_size_bytes_, preferred_max_column_in_block_size_bytes_, - reader_settings_, use_uncompressed_cache_, virt_column_names_}, + reader_settings_, use_uncompressed_cache_, virt_column_names_, extension_}, thread{thread_}, pool{pool_} { @@ -39,28 +44,61 @@ MergeTreeThreadSelectProcessor::MergeTreeThreadSelectProcessor( min_marks_to_read = (min_marks_to_read_ * fixed_index_granularity + max_block_size_rows - 1) / max_block_size_rows * max_block_size_rows / fixed_index_granularity; } + else if (extension.has_value()) + { + /// Parallel reading from replicas is enabled. + /// We try to estimate the average number of bytes in a granule + /// to make one request over the network per one gigabyte of data + /// Actually we will ask MergeTreeReadPool to provide us heavier tasks to read + /// because the most part of each task will be postponed + /// (due to using consistent hash for better cache affinity) + const size_t amount_of_read_bytes_per_one_request = 1024 * 1024 * 1024; // 1GiB + /// In case of reading from compact parts (for which we can't estimate the average size of marks) + /// we will use this value + const size_t empirical_size_of_mark = 1024 * 1024 * 10; // 10 MiB + + if (extension->colums_to_read.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "A set of column to read is empty. It is a bug"); + + size_t sum_average_marks_size = 0; + auto column_sizes = storage.getColumnSizes(); + for (const auto & name : extension->colums_to_read) + { + auto it = column_sizes.find(name); + if (it == column_sizes.end()) + continue; + auto size = it->second; + + if (size.data_compressed == 0 || size.data_uncompressed == 0 || size.marks == 0) + continue; + + sum_average_marks_size += size.data_uncompressed / size.marks; + } + + if (sum_average_marks_size == 0) + sum_average_marks_size = empirical_size_of_mark * extension->colums_to_read.size(); + + min_marks_to_read = extension->count_participating_replicas * amount_of_read_bytes_per_one_request / sum_average_marks_size; + } else + { min_marks_to_read = min_marks_to_read_; + } + ordered_names = getPort().getHeader().getNames(); } /// Requests read task from MergeTreeReadPool and signals whether it got one -bool MergeTreeThreadSelectProcessor::getNewTask() +bool MergeTreeThreadSelectProcessor::getNewTaskImpl() { task = pool->getTask(min_marks_to_read, thread, ordered_names); + return static_cast(task); +} - if (!task) - { - /** Close the files (before destroying the object). - * When many sources are created, but simultaneously reading only a few of them, - * buffers don't waste memory. - */ - reader.reset(); - pre_reader.reset(); - return false; - } +void MergeTreeThreadSelectProcessor::finalizeNewTask() +{ const std::string part_name = task->data_part->isProjectionPart() ? task->data_part->getParentPart()->name : task->data_part->name; /// Allows pool to reduce number of threads in case of too slow reads. @@ -99,8 +137,13 @@ bool MergeTreeThreadSelectProcessor::getNewTask() } last_readed_part_name = part_name; +} - return true; + +void MergeTreeThreadSelectProcessor::finish() +{ + reader.reset(); + pre_reader.reset(); } diff --git a/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.h b/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.h index d17b15c3635..110c4fa34e6 100644 --- a/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.h +++ b/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.h @@ -11,7 +11,7 @@ class MergeTreeReadPool; /** Used in conjunction with MergeTreeReadPool, asking it for more work to do and performing whatever reads it is asked * to perform. */ -class MergeTreeThreadSelectProcessor : public MergeTreeBaseSelectProcessor +class MergeTreeThreadSelectProcessor final : public MergeTreeBaseSelectProcessor { public: MergeTreeThreadSelectProcessor( @@ -27,8 +27,8 @@ public: const PrewhereInfoPtr & prewhere_info_, ExpressionActionsSettings actions_settings, const MergeTreeReaderSettings & reader_settings_, - - const Names & virt_column_names_); + const Names & virt_column_names_, + std::optional extension_); String getName() const override { return "MergeTreeThread"; } @@ -36,7 +36,13 @@ public: protected: /// Requests read task from MergeTreeReadPool and signals whether it got one - bool getNewTask() override; + bool getNewTaskImpl() override; + + void finalizeNewTask() override; + + void finish() override; + + bool canUseConsistentHashingForParallelReading() override { return true; } private: /// "thread" index (there are N threads and each thread is assigned index in interval [0..N-1]) diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp new file mode 100644 index 00000000000..80f438a46db --- /dev/null +++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.cpp @@ -0,0 +1,143 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#include +#include +#include +#include +#include "IO/WriteBufferFromString.h" +#include +#include + +namespace DB +{ + +class ParallelReplicasReadingCoordinator::Impl +{ +public: + using PartitionReadRequestPtr = std::unique_ptr; + using PartToMarkRanges = std::map; + + struct PartitionReading + { + PartSegments part_ranges; + PartToMarkRanges mark_ranges_in_part; + }; + + using PartitionToBlockRanges = std::map; + PartitionToBlockRanges partitions; + + std::mutex mutex; + + PartitionReadResponse handleRequest(PartitionReadRequest request); +}; + + +PartitionReadResponse ParallelReplicasReadingCoordinator::Impl::handleRequest(PartitionReadRequest request) +{ + AtomicStopwatch watch; + std::lock_guard lock(mutex); + + auto partition_it = partitions.find(request.partition_id); + + SCOPE_EXIT({ + LOG_TRACE(&Poco::Logger::get("ParallelReplicasReadingCoordinator"), "Time for handling request: {}ns", watch.elapsed()); + }); + + PartToRead::PartAndProjectionNames part_and_projection + { + .part = request.part_name, + .projection = request.projection_name + }; + + /// We are the first who wants to process parts in partition + if (partition_it == partitions.end()) + { + PartitionReading partition_reading; + + PartToRead part_to_read; + part_to_read.range = request.block_range; + part_to_read.name = part_and_projection; + + partition_reading.part_ranges.addPart(std::move(part_to_read)); + + /// As this query is first in partition, we will accept all ranges from it. + /// We need just to update our state. + auto request_ranges = HalfIntervals::initializeFromMarkRanges(request.mark_ranges); + auto mark_ranges_index = HalfIntervals::initializeWithEntireSpace(); + mark_ranges_index.intersect(request_ranges.negate()); + + partition_reading.mark_ranges_in_part.insert({part_and_projection, std::move(mark_ranges_index)}); + partitions.insert({request.partition_id, std::move(partition_reading)}); + + return {.denied = false, .mark_ranges = std::move(request.mark_ranges)}; + } + + auto & partition_reading = partition_it->second; + + PartToRead part_to_read; + part_to_read.range = request.block_range; + part_to_read.name = part_and_projection; + + auto part_intersection_res = partition_reading.part_ranges.getIntersectionResult(part_to_read); + + switch (part_intersection_res) + { + case PartSegments::IntersectionResult::REJECT: + { + return {.denied = true, .mark_ranges = {}}; + } + case PartSegments::IntersectionResult::EXACTLY_ONE_INTERSECTION: + { + auto marks_it = partition_reading.mark_ranges_in_part.find(part_and_projection); + + auto & intervals_to_do = marks_it->second; + auto result = HalfIntervals::initializeFromMarkRanges(request.mark_ranges); + result.intersect(intervals_to_do); + + /// Update intervals_to_do + intervals_to_do.intersect(HalfIntervals::initializeFromMarkRanges(std::move(request.mark_ranges)).negate()); + + auto result_ranges = result.convertToMarkRangesFinal(); + const bool denied = result_ranges.empty(); + return {.denied = denied, .mark_ranges = std::move(result_ranges)}; + } + case PartSegments::IntersectionResult::NO_INTERSECTION: + { + partition_reading.part_ranges.addPart(std::move(part_to_read)); + + auto mark_ranges_index = HalfIntervals::initializeWithEntireSpace().intersect( + HalfIntervals::initializeFromMarkRanges(request.mark_ranges).negate() + ); + partition_reading.mark_ranges_in_part.insert({part_and_projection, std::move(mark_ranges_index)}); + + return {.denied = false, .mark_ranges = std::move(request.mark_ranges)}; + } + } + + __builtin_unreachable(); +} + +PartitionReadResponse ParallelReplicasReadingCoordinator::handleRequest(PartitionReadRequest request) +{ + return pimpl->handleRequest(std::move(request)); +} + +ParallelReplicasReadingCoordinator::ParallelReplicasReadingCoordinator() +{ + pimpl = std::make_unique(); +} + +ParallelReplicasReadingCoordinator::~ParallelReplicasReadingCoordinator() = default; + +} diff --git a/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.h b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.h new file mode 100644 index 00000000000..af74e0fae49 --- /dev/null +++ b/src/Storages/MergeTree/ParallelReplicasReadingCoordinator.h @@ -0,0 +1,20 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ParallelReplicasReadingCoordinator +{ +public: + ParallelReplicasReadingCoordinator(); + ~ParallelReplicasReadingCoordinator(); + PartitionReadResponse handleRequest(PartitionReadRequest request); +private: + class Impl; + std::unique_ptr pimpl; +}; + +} diff --git a/src/Storages/MergeTree/RequestResponse.cpp b/src/Storages/MergeTree/RequestResponse.cpp new file mode 100644 index 00000000000..a266540b99a --- /dev/null +++ b/src/Storages/MergeTree/RequestResponse.cpp @@ -0,0 +1,141 @@ +#include + +#include +#include +#include +#include + +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNKNOWN_PROTOCOL; +} + +static void readMarkRangesBinary(MarkRanges & ranges, ReadBuffer & buf, size_t MAX_RANGES_SIZE = DEFAULT_MAX_STRING_SIZE) +{ + size_t size = 0; + readVarUInt(size, buf); + + if (size > MAX_RANGES_SIZE) + throw Poco::Exception("Too large ranges size."); + + ranges.resize(size); + for (size_t i = 0; i < size; ++i) + { + readBinary(ranges[i].begin, buf); + readBinary(ranges[i].end, buf); + } +} + + +static void writeMarkRangesBinary(const MarkRanges & ranges, WriteBuffer & buf) +{ + writeVarUInt(ranges.size(), buf); + + for (const auto & [begin, end] : ranges) + { + writeBinary(begin, buf); + writeBinary(end, buf); + } +} + + +void PartitionReadRequest::serialize(WriteBuffer & out) const +{ + /// Must be the first + writeVarUInt(DBMS_PARALLEL_REPLICAS_PROTOCOL_VERSION, out); + + writeStringBinary(partition_id, out); + writeStringBinary(part_name, out); + writeStringBinary(projection_name, out); + + writeVarInt(block_range.begin, out); + writeVarInt(block_range.end, out); + + writeMarkRangesBinary(mark_ranges, out); +} + + +void PartitionReadRequest::describe(WriteBuffer & out) const +{ + String result; + result += fmt::format("partition_id: {} \n", partition_id); + result += fmt::format("part_name: {} \n", part_name); + result += fmt::format("projection_name: {} \n", projection_name); + result += fmt::format("block_range: ({}, {}) \n", block_range.begin, block_range.end); + result += "mark_ranges: "; + for (const auto & range : mark_ranges) + result += fmt::format("({}, {}) ", range.begin, range.end); + result += '\n'; + out.write(result.c_str(), result.size()); +} + +void PartitionReadRequest::deserialize(ReadBuffer & in) +{ + UInt64 version; + readVarUInt(version, in); + if (version != DBMS_PARALLEL_REPLICAS_PROTOCOL_VERSION) + throw Exception(ErrorCodes::UNKNOWN_PROTOCOL, "Protocol versions for parallel reading \ + from replicas differ. Got: {}, supported version: {}", + version, DBMS_PARALLEL_REPLICAS_PROTOCOL_VERSION); + + readStringBinary(partition_id, in); + readStringBinary(part_name, in); + readStringBinary(projection_name, in); + + readVarInt(block_range.begin, in); + readVarInt(block_range.end, in); + + readMarkRangesBinary(mark_ranges, in); +} + +UInt64 PartitionReadRequest::getConsistentHash(size_t buckets) const +{ + auto hash = SipHash(); + hash.update(partition_id); + hash.update(part_name); + hash.update(projection_name); + + hash.update(block_range.begin); + hash.update(block_range.end); + + for (const auto & range : mark_ranges) + { + hash.update(range.begin); + hash.update(range.end); + } + + return ConsistentHashing(hash.get64(), buckets); +} + + +void PartitionReadResponse::serialize(WriteBuffer & out) const +{ + /// Must be the first + writeVarUInt(DBMS_PARALLEL_REPLICAS_PROTOCOL_VERSION, out); + + writeVarUInt(static_cast(denied), out); + writeMarkRangesBinary(mark_ranges, out); +} + + +void PartitionReadResponse::deserialize(ReadBuffer & in) +{ + UInt64 version; + readVarUInt(version, in); + if (version != DBMS_PARALLEL_REPLICAS_PROTOCOL_VERSION) + throw Exception(ErrorCodes::UNKNOWN_PROTOCOL, "Protocol versions for parallel reading \ + from replicas differ. Got: {}, supported version: {}", + version, DBMS_PARALLEL_REPLICAS_PROTOCOL_VERSION); + + UInt64 value; + readVarUInt(value, in); + denied = static_cast(value); + readMarkRangesBinary(mark_ranges, in); +} + +} diff --git a/src/Storages/MergeTree/RequestResponse.h b/src/Storages/MergeTree/RequestResponse.h new file mode 100644 index 00000000000..85c8f7181af --- /dev/null +++ b/src/Storages/MergeTree/RequestResponse.h @@ -0,0 +1,57 @@ +#pragma once + +#include +#include + +#include + +#include +#include + +#include + + +namespace DB +{ + +/// Represents a segment [left; right] +struct PartBlockRange +{ + Int64 begin; + Int64 end; + + bool operator==(const PartBlockRange & rhs) const + { + return begin == rhs.begin && end == rhs.end; + } +}; + +struct PartitionReadRequest +{ + String partition_id; + String part_name; + String projection_name; + PartBlockRange block_range; + MarkRanges mark_ranges; + + void serialize(WriteBuffer & out) const; + void describe(WriteBuffer & out) const; + void deserialize(ReadBuffer & in); + + UInt64 getConsistentHash(size_t buckets) const; +}; + +struct PartitionReadResponse +{ + bool denied{false}; + MarkRanges mark_ranges{}; + + void serialize(WriteBuffer & out) const; + void deserialize(ReadBuffer & in); +}; + + +using MergeTreeReadTaskCallback = std::function(PartitionReadRequest)>; + + +} diff --git a/src/Storages/MergeTree/tests/gtest_coordinator.cpp b/src/Storages/MergeTree/tests/gtest_coordinator.cpp new file mode 100644 index 00000000000..7bcf3304c2b --- /dev/null +++ b/src/Storages/MergeTree/tests/gtest_coordinator.cpp @@ -0,0 +1,240 @@ +#include + +#include +#include +#include + +#include + +#include + +using namespace DB; + + +TEST(HalfIntervals, Simple) +{ + ASSERT_TRUE(( + HalfIntervals{{{1, 2}, {3, 4}}}.negate() == + HalfIntervals{{{0, 1}, {2, 3}, {4, 18446744073709551615UL}}} + )); + + { + auto left = HalfIntervals{{{0, 2}, {4, 6}}}.negate(); + ASSERT_TRUE(( + left == + HalfIntervals{{{2, 4}, {6, 18446744073709551615UL}}} + )); + } + + { + auto left = HalfIntervals{{{0, 2}, {4, 6}}}; + auto right = HalfIntervals{{{1, 5}}}.negate(); + auto intersection = left.intersect(right); + + ASSERT_TRUE(( + intersection == + HalfIntervals{{{0, 1}, {5, 6}}} + )); + } + + { + auto left = HalfIntervals{{{1, 2}, {2, 3}}}; + auto right = HalfIntervals::initializeWithEntireSpace(); + auto intersection = right.intersect(left.negate()); + + ASSERT_TRUE(( + intersection == + HalfIntervals{{{0, 1}, {3, 18446744073709551615UL}}} + )); + } + + { + auto left = HalfIntervals{{{1, 2}, {2, 3}, {3, 4}, {4, 5}}}; + + ASSERT_EQ(getIntersection(left, HalfIntervals{{{1, 4}}}).convertToMarkRangesFinal().size(), 3); + ASSERT_EQ(getIntersection(left, HalfIntervals{{{1, 5}}}).convertToMarkRangesFinal().size(), 4); + } + + { + auto left = HalfIntervals{{{1, 3}, {3, 5}, {5, 7}}}; + + ASSERT_EQ(getIntersection(left, HalfIntervals{{{3, 5}}}).convertToMarkRangesFinal().size(), 1); + ASSERT_EQ(getIntersection(left, HalfIntervals{{{3, 7}}}).convertToMarkRangesFinal().size(), 2); + ASSERT_EQ(getIntersection(left, HalfIntervals{{{4, 6}}}).convertToMarkRangesFinal().size(), 2); + ASSERT_EQ(getIntersection(left, HalfIntervals{{{1, 7}}}).convertToMarkRangesFinal().size(), 3); + } + + { + auto left = HalfIntervals{{{1, 3}}}; + + ASSERT_EQ(getIntersection(left, HalfIntervals{{{3, 4}}}).convertToMarkRangesFinal().size(), 0); + } + + { + auto left = HalfIntervals{{{1, 2}, {3, 4}, {5, 6}}}; + + ASSERT_EQ(getIntersection(left, HalfIntervals{{{2, 3}}}).convertToMarkRangesFinal().size(), 0); + ASSERT_EQ(getIntersection(left, HalfIntervals{{{4, 5}}}).convertToMarkRangesFinal().size(), 0); + ASSERT_EQ(getIntersection(left, HalfIntervals{{{1, 6}}}).convertToMarkRangesFinal().size(), 3); + } +} + +TEST(HalfIntervals, TwoRequests) +{ + auto left = HalfIntervals{{{1, 2}, {2, 3}}}; + auto right = HalfIntervals{{{2, 3}, {3, 4}}}; + auto intersection = left.intersect(right); + + ASSERT_TRUE(( + intersection == + HalfIntervals{{{2, 3}}} + )); + + /// With negation + left = HalfIntervals{{{1, 2}, {2, 3}}}.negate(); + right = HalfIntervals{{{2, 3}, {3, 4}}}; + intersection = left.intersect(right); + + + ASSERT_TRUE(( + intersection == + HalfIntervals{{{3, 4}}} + )); +} + +TEST(HalfIntervals, SelfIntersection) +{ + auto left = HalfIntervals{{{1, 2}, {2, 3}, {4, 5}}}; + auto right = left; + auto intersection = left.intersect(right); + + ASSERT_TRUE(( + intersection == right + )); + + left = HalfIntervals{{{1, 2}, {2, 3}, {4, 5}}}; + right = left; + right.negate(); + intersection = left.intersect(right); + + ASSERT_TRUE(( + intersection == HalfIntervals{} + )); +} + + +TEST(Coordinator, Simple) +{ + PartitionReadRequest request; + request.partition_id = "a"; + request.part_name = "b"; + request.projection_name = "c"; + request.block_range = PartBlockRange{1, 2}; + request.mark_ranges = MarkRanges{{1, 2}, {3, 4}}; + + ParallelReplicasReadingCoordinator coordinator; + auto response = coordinator.handleRequest(request); + + ASSERT_FALSE(response.denied) << "Process request at first has to be accepted"; + + ASSERT_EQ(response.mark_ranges.size(), request.mark_ranges.size()); + + for (int i = 0; i < response.mark_ranges.size(); ++i) + EXPECT_EQ(response.mark_ranges[i], request.mark_ranges[i]); + + response = coordinator.handleRequest(request); + ASSERT_TRUE(response.denied) << "Process the same request second time"; +} + + +TEST(Coordinator, TwoRequests) +{ + PartitionReadRequest first; + first.partition_id = "a"; + first.part_name = "b"; + first.projection_name = "c"; + first.block_range = PartBlockRange{0, 0}; + first.mark_ranges = MarkRanges{{1, 2}, {2, 3}}; + + auto second = first; + second.mark_ranges = MarkRanges{{2, 3}, {3, 4}}; + + ParallelReplicasReadingCoordinator coordinator; + auto response = coordinator.handleRequest(first); + + ASSERT_FALSE(response.denied) << "First request must me accepted"; + + ASSERT_EQ(response.mark_ranges.size(), first.mark_ranges.size()); + for (int i = 0; i < response.mark_ranges.size(); ++i) + EXPECT_EQ(response.mark_ranges[i], first.mark_ranges[i]); + + response = coordinator.handleRequest(second); + ASSERT_FALSE(response.denied); + ASSERT_EQ(response.mark_ranges.size(), 1); + ASSERT_EQ(response.mark_ranges.front(), (MarkRange{3, 4})); +} + + +TEST(Coordinator, PartIntersections) +{ + { + PartSegments boundaries; + + boundaries.addPart(PartToRead{{1, 1}, {"TestPart", "TestProjection"}}); + boundaries.addPart(PartToRead{{2, 2}, {"TestPart", "TestProjection"}}); + boundaries.addPart(PartToRead{{3, 3}, {"TestPart", "TestProjection"}}); + boundaries.addPart(PartToRead{{4, 4}, {"TestPart", "TestProjection"}}); + + ASSERT_EQ(boundaries.getIntersectionResult({{1, 4}, {"TestPart", "TestProjection"}}), PartSegments::IntersectionResult::REJECT); + ASSERT_EQ(boundaries.getIntersectionResult({{0, 5}, {"TestPart", "TestProjection"}}), PartSegments::IntersectionResult::REJECT); + ASSERT_EQ(boundaries.getIntersectionResult({{1, 1}, {"TestPart", "TestProjection"}}), PartSegments::IntersectionResult::EXACTLY_ONE_INTERSECTION); + ASSERT_EQ(boundaries.getIntersectionResult({{1, 1}, {"ClickHouse", "AnotherProjection"}}), PartSegments::IntersectionResult::REJECT); + ASSERT_EQ(boundaries.getIntersectionResult({{1, 2}, {"TestPart", "TestProjection"}}), PartSegments::IntersectionResult::REJECT); + + boundaries.addPart(PartToRead{{5, 5}, {"TestPart", "TestProjection"}}); + boundaries.addPart(PartToRead{{0, 0}, {"TestPart", "TestProjection"}}); + + ASSERT_EQ(boundaries.getIntersectionResult({{0, 5}, {"TestPart", "TestProjection"}}), PartSegments::IntersectionResult::REJECT); + ASSERT_EQ(boundaries.getIntersectionResult({{1, 1}, {"TestPart", "TestProjection"}}), PartSegments::IntersectionResult::EXACTLY_ONE_INTERSECTION); + ASSERT_EQ(boundaries.getIntersectionResult({{1, 1}, {"ClickHouse", "AnotherProjection"}}), PartSegments::IntersectionResult::REJECT); + ASSERT_EQ(boundaries.getIntersectionResult({{1, 2}, {"TestPart", "TestProjection"}}), PartSegments::IntersectionResult::REJECT); + ASSERT_EQ(boundaries.getIntersectionResult({{0, 3}, {"TestPart", "TestProjection"}}), PartSegments::IntersectionResult::REJECT); + } + + { + PartSegments boundaries; + boundaries.addPart(PartToRead{{1, 3}, {"TestPart", "TestProjection"}}); + boundaries.addPart(PartToRead{{4, 5}, {"TestPart", "TestProjection"}}); + + ASSERT_EQ(boundaries.getIntersectionResult({{2, 4}, {"TestPart", "TestProjection"}}), PartSegments::IntersectionResult::REJECT); + ASSERT_EQ(boundaries.getIntersectionResult({{0, 6}, {"TestPart", "TestProjection"}}), PartSegments::IntersectionResult::REJECT); + } + + { + PartSegments boundaries; + boundaries.addPart(PartToRead{{1, 3}, {"TestPart", "TestProjection"}}); + boundaries.addPart(PartToRead{{4, 6}, {"TestPart", "TestProjection"}}); + boundaries.addPart(PartToRead{{7, 9}, {"TestPart", "TestProjection"}}); + + ASSERT_EQ(boundaries.getIntersectionResult({{2, 8}, {"TestPart", "TestProjection"}}), PartSegments::IntersectionResult::REJECT); + ASSERT_EQ(boundaries.getIntersectionResult({{4, 6}, {"TestPart", "TestProjection"}}), PartSegments::IntersectionResult::EXACTLY_ONE_INTERSECTION); + ASSERT_EQ(boundaries.getIntersectionResult({{3, 7}, {"TestPart", "TestProjection"}}), PartSegments::IntersectionResult::REJECT); + ASSERT_EQ(boundaries.getIntersectionResult({{5, 7}, {"TestPart", "TestProjection"}}), PartSegments::IntersectionResult::REJECT); + } + + { + PartSegments boundaries; + + ASSERT_EQ(boundaries.getIntersectionResult({{1, 1}, {"TestPart", "TestProjection"}}), PartSegments::IntersectionResult::NO_INTERSECTION); + ASSERT_EQ(boundaries.getIntersectionResult({{1, 3}, {"TestPart", "TestProjection"}}), PartSegments::IntersectionResult::NO_INTERSECTION); + ASSERT_EQ(boundaries.getIntersectionResult({{0, 100500}, {"TestPart", "TestProjection"}}), PartSegments::IntersectionResult::NO_INTERSECTION); + + boundaries.addPart(PartToRead{{1, 1}, {"TestPart", "TestProjection"}}); + boundaries.addPart(PartToRead{{2, 2}, {"TestPart", "TestProjection"}}); + boundaries.addPart(PartToRead{{3, 3}, {"TestPart", "TestProjection"}}); + + ASSERT_EQ(boundaries.getIntersectionResult({{1, 1}, {"TestPart", "TestProjection"}}), PartSegments::IntersectionResult::EXACTLY_ONE_INTERSECTION); + ASSERT_EQ(boundaries.getIntersectionResult({{1, 3}, {"TestPart", "TestProjection"}}), PartSegments::IntersectionResult::REJECT); + ASSERT_EQ(boundaries.getIntersectionResult({{100, 100500}, {"TestPart", "TestProjection"}}), PartSegments::IntersectionResult::NO_INTERSECTION); + } +} diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index a45afd847e6..e033d319fc8 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -110,6 +110,7 @@ namespace ErrorCodes extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int INCORRECT_NUMBER_OF_COLUMNS; extern const int INFINITE_LOOP; + extern const int ILLEGAL_FINAL; extern const int TYPE_MISMATCH; extern const int TOO_MANY_ROWS; extern const int UNABLE_TO_SKIP_UNUSED_SHARDS; @@ -273,7 +274,7 @@ size_t getClusterQueriedNodes(const Settings & settings, const ClusterPtr & clus { size_t num_local_shards = cluster->getLocalShardCount(); size_t num_remote_shards = cluster->getRemoteShardCount(); - return (num_remote_shards * settings.max_parallel_replicas) + num_local_shards; + return (num_remote_shards + num_local_shards) * settings.max_parallel_replicas; } } @@ -590,6 +591,10 @@ void StorageDistributed::read( const size_t /*max_block_size*/, const unsigned /*num_streams*/) { + const auto * select_query = query_info.query->as(); + if (select_query->final() && local_context->getSettingsRef().allow_experimental_parallel_reading_from_replicas) + throw Exception(ErrorCodes::ILLEGAL_FINAL, "Final modifier is not allowed together with parallel reading from replicas feature"); + const auto & modified_query_ast = rewriteSelectQuery( query_info.query, remote_database, remote_table, remote_table_function_ptr); diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 8a3d786532e..470a406dbe4 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -191,7 +191,14 @@ void StorageMergeTree::read( size_t max_block_size, unsigned num_streams) { - if (auto plan = reader.read(column_names, metadata_snapshot, query_info, local_context, max_block_size, num_streams, processed_stage)) + /// If true, then we will ask initiator if we can read chosen ranges + bool enable_parallel_reading = local_context->getClientInfo().collaborate_with_initiator; + + if (enable_parallel_reading) + LOG_TRACE(log, "Parallel reading from replicas enabled {}", enable_parallel_reading); + + if (auto plan = reader.read( + column_names, metadata_snapshot, query_info, local_context, max_block_size, num_streams, processed_stage, nullptr, enable_parallel_reading)) query_plan = std::move(*plan); } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 89506184354..d0d52fd488a 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -48,8 +48,10 @@ #include #include +#include #include #include +#include #include #include @@ -61,6 +63,8 @@ #include #include #include +#include +#include #include @@ -4228,6 +4232,9 @@ void StorageReplicatedMergeTree::read( const size_t max_block_size, const unsigned num_streams) { + /// If true, then we will ask initiator if we can read chosen ranges + const bool enable_parallel_reading = local_context->getClientInfo().collaborate_with_initiator; + /** The `select_sequential_consistency` setting has two meanings: * 1. To throw an exception if on a replica there are not all parts which have been written down on quorum of remaining replicas. * 2. Do not read parts that have not yet been written to the quorum of the replicas. @@ -4237,13 +4244,18 @@ void StorageReplicatedMergeTree::read( { auto max_added_blocks = std::make_shared(getMaxAddedBlocks()); if (auto plan = reader.read( - column_names, metadata_snapshot, query_info, local_context, max_block_size, num_streams, processed_stage, std::move(max_added_blocks))) + column_names, metadata_snapshot, query_info, local_context, + max_block_size, num_streams, processed_stage, std::move(max_added_blocks), enable_parallel_reading)) query_plan = std::move(*plan); return; } - if (auto plan = reader.read(column_names, metadata_snapshot, query_info, local_context, max_block_size, num_streams, processed_stage)) + if (auto plan = reader.read( + column_names, metadata_snapshot, query_info, local_context, + max_block_size, num_streams, processed_stage, nullptr, enable_parallel_reading)) + { query_plan = std::move(*plan); + } } Pipe StorageReplicatedMergeTree::read( diff --git a/src/Storages/StorageS3Cluster.cpp b/src/Storages/StorageS3Cluster.cpp index f49fd35044d..659071b392d 100644 --- a/src/Storages/StorageS3Cluster.cpp +++ b/src/Storages/StorageS3Cluster.cpp @@ -126,7 +126,7 @@ Pipe StorageS3Cluster::read( scalars, Tables(), processed_stage, - callback); + RemoteQueryExecutor::Extension{.task_iterator = callback}); pipes.emplace_back(std::make_shared(remote_query_executor, add_agg_info, false)); } diff --git a/src/Storages/System/StorageSystemPartsColumns.cpp b/src/Storages/System/StorageSystemPartsColumns.cpp index 33ec5c457f6..8dbd73628ca 100644 --- a/src/Storages/System/StorageSystemPartsColumns.cpp +++ b/src/Storages/System/StorageSystemPartsColumns.cpp @@ -206,7 +206,7 @@ void StorageSystemPartsColumns::processNextStorage( columns[res_index++]->insertDefault(); } - ColumnSize column_size = part->getColumnSize(column.name, *column.type); + ColumnSize column_size = part->getColumnSize(column.name); if (columns_mask[src_index++]) columns[res_index++]->insert(column_size.data_compressed + column_size.marks); if (columns_mask[src_index++]) diff --git a/src/Storages/System/StorageSystemProjectionPartsColumns.cpp b/src/Storages/System/StorageSystemProjectionPartsColumns.cpp index bdbe9a46846..f6490177014 100644 --- a/src/Storages/System/StorageSystemProjectionPartsColumns.cpp +++ b/src/Storages/System/StorageSystemProjectionPartsColumns.cpp @@ -237,7 +237,7 @@ void StorageSystemProjectionPartsColumns::processNextStorage( columns[res_index++]->insertDefault(); } - ColumnSize column_size = part->getColumnSize(column.name, *column.type); + ColumnSize column_size = part->getColumnSize(column.name); if (columns_mask[src_index++]) columns[res_index++]->insert(column_size.data_compressed + column_size.marks); if (columns_mask[src_index++]) diff --git a/tests/queries/0_stateless/00124_shard_distributed_with_many_replicas.sql b/tests/queries/0_stateless/00124_shard_distributed_with_many_replicas.sql index 6bc5fe268d6..f9cbf92db41 100644 --- a/tests/queries/0_stateless/00124_shard_distributed_with_many_replicas.sql +++ b/tests/queries/0_stateless/00124_shard_distributed_with_many_replicas.sql @@ -1,7 +1,6 @@ -- Tags: replica, distributed SET max_parallel_replicas = 2; - DROP TABLE IF EXISTS report; CREATE TABLE report(id UInt32, event_date Date, priority UInt32, description String) ENGINE = MergeTree(event_date, intHash32(id), (id, event_date, intHash32(id)), 8192); diff --git a/tests/queries/0_stateless/01870_modulo_partition_key.sql b/tests/queries/0_stateless/01870_modulo_partition_key.sql index 06b6fc86d3e..3d839de9c64 100644 --- a/tests/queries/0_stateless/01870_modulo_partition_key.sql +++ b/tests/queries/0_stateless/01870_modulo_partition_key.sql @@ -50,7 +50,7 @@ SELECT count() FROM table4 WHERE id % 10 = 7; SELECT 'comparison:'; SELECT v, v-205 as vv, modulo(vv, 200), moduloLegacy(vv, 200) FROM table1 ORDER BY v; -DROP TABLE table1; -DROP TABLE table2; -DROP TABLE table3; -DROP TABLE table4; +DROP TABLE table1 SYNC; +DROP TABLE table2 SYNC; +DROP TABLE table3 SYNC; +DROP TABLE table4 SYNC; diff --git a/tests/queries/1_stateful/00168_parallel_processing_on_replicas_part_1.reference b/tests/queries/1_stateful/00168_parallel_processing_on_replicas_part_1.reference new file mode 100644 index 00000000000..2675904dea0 --- /dev/null +++ b/tests/queries/1_stateful/00168_parallel_processing_on_replicas_part_1.reference @@ -0,0 +1,110 @@ +Testing 00001_count_hits.sql ----> Ok! ✅ +Testing 00002_count_visits.sql ----> Ok! ✅ +Testing 00004_top_counters.sql ----> Ok! ✅ +Testing 00005_filtering.sql ----> Ok! ✅ +Testing 00006_agregates.sql ----> Ok! ✅ +Testing 00007_uniq.sql ----> Ok! ✅ +Testing 00008_uniq.sql ----> Ok! ✅ +Testing 00009_uniq_distributed.sql ----> Ok! ✅ +Testing 00010_quantiles_segfault.sql ----> Ok! ✅ +Testing 00011_sorting.sql ----> Ok! ✅ +Testing 00012_sorting_distributed.sql ----> Ok! ✅ +Skipping 00013_sorting_of_nested.sql +Testing 00014_filtering_arrays.sql ----> Ok! ✅ +Testing 00015_totals_and_no_aggregate_functions.sql ----> Ok! ✅ +Testing 00016_any_if_distributed_cond_always_false.sql ----> Ok! ✅ +Testing 00017_aggregation_uninitialized_memory.sql ----> Ok! ✅ +Testing 00020_distinct_order_by_distributed.sql ----> Ok! ✅ +Testing 00021_1_select_with_in.sql ----> Ok! ✅ +Testing 00021_2_select_with_in.sql ----> Ok! ✅ +Testing 00021_3_select_with_in.sql ----> Ok! ✅ +Testing 00022_merge_prewhere.sql ----> Ok! ✅ +Testing 00023_totals_limit.sql ----> Ok! ✅ +Testing 00024_random_counters.sql ----> Ok! ✅ +Testing 00030_array_enumerate_uniq.sql ----> Ok! ✅ +Testing 00031_array_enumerate_uniq.sql ----> Ok! ✅ +Testing 00032_aggregate_key64.sql ----> Ok! ✅ +Testing 00033_aggregate_key_string.sql ----> Ok! ✅ +Testing 00034_aggregate_key_fixed_string.sql ----> Ok! ✅ +Testing 00035_aggregate_keys128.sql ----> Ok! ✅ +Testing 00036_aggregate_hashed.sql ----> Ok! ✅ +Testing 00037_uniq_state_merge1.sql ----> Ok! ✅ +Testing 00038_uniq_state_merge2.sql ----> Ok! ✅ +Testing 00039_primary_key.sql ----> Ok! ✅ +Testing 00040_aggregating_materialized_view.sql ----> Ok! ✅ +Testing 00041_aggregating_materialized_view.sql ----> Ok! ✅ +Testing 00042_any_left_join.sql ----> Ok! ✅ +Testing 00043_any_left_join.sql ----> Ok! ✅ +Testing 00044_any_left_join_string.sql ----> Ok! ✅ +Testing 00045_uniq_upto.sql ----> Ok! ✅ +Testing 00046_uniq_upto_distributed.sql ----> Ok! ✅ +Testing 00047_bar.sql ----> Ok! ✅ +Testing 00048_min_max.sql ----> Ok! ✅ +Testing 00049_max_string_if.sql ----> Ok! ✅ +Testing 00050_min_max.sql ----> Ok! ✅ +Testing 00051_min_max_array.sql ----> Ok! ✅ +Testing 00052_group_by_in.sql ----> Ok! ✅ +Testing 00053_replicate_segfault.sql ----> Ok! ✅ +Testing 00054_merge_tree_partitions.sql ----> Ok! ✅ +Testing 00055_index_and_not.sql ----> Ok! ✅ +Testing 00056_view.sql ----> Ok! ✅ +Testing 00059_merge_sorting_empty_array_joined.sql ----> Ok! ✅ +Testing 00060_move_to_prewhere_and_sets.sql ----> Ok! ✅ +Skipping 00061_storage_buffer.sql +Testing 00062_loyalty.sql ----> Ok! ✅ +Testing 00063_loyalty_joins.sql ----> Ok! ✅ +Testing 00065_loyalty_with_storage_join.sql ----> Ok! ✅ +Testing 00066_sorting_distributed_many_replicas.sql ----> Ok! ✅ +Testing 00067_union_all.sql ----> Ok! ✅ +Testing 00068_subquery_in_prewhere.sql ----> Ok! ✅ +Testing 00069_duplicate_aggregation_keys.sql ----> Ok! ✅ +Testing 00071_merge_tree_optimize_aio.sql ----> Ok! ✅ +Testing 00072_compare_date_and_string_index.sql ----> Ok! ✅ +Testing 00073_uniq_array.sql ----> Ok! ✅ +Testing 00074_full_join.sql ----> Ok! ✅ +Testing 00075_left_array_join.sql ----> Ok! ✅ +Testing 00076_system_columns_bytes.sql ----> Ok! ✅ +Testing 00077_log_tinylog_stripelog.sql ----> Ok! ✅ +Testing 00078_group_by_arrays.sql ----> Ok! ✅ +Testing 00079_array_join_not_used_joined_column.sql ----> Ok! ✅ +Testing 00080_array_join_and_union.sql ----> Ok! ✅ +Testing 00081_group_by_without_key_and_totals.sql ----> Ok! ✅ +Testing 00082_quantiles.sql ----> Ok! ✅ +Testing 00083_array_filter.sql ----> Ok! ✅ +Testing 00084_external_aggregation.sql ----> Ok! ✅ +Testing 00085_monotonic_evaluation_segfault.sql ----> Ok! ✅ +Testing 00086_array_reduce.sql ----> Ok! ✅ +Testing 00087_where_0.sql ----> Ok! ✅ +Testing 00088_global_in_one_shard_and_rows_before_limit.sql ----> Ok! ✅ +Testing 00089_position_functions_with_non_constant_arg.sql ----> Ok! ✅ +Testing 00091_prewhere_two_conditions.sql ----> Ok! ✅ +Testing 00093_prewhere_array_join.sql ----> Ok! ✅ +Testing 00094_order_by_array_join_limit.sql ----> Ok! ✅ +Skipping 00095_hyperscan_profiler.sql +Testing 00139_like.sql ----> Ok! ✅ +Skipping 00140_rename.sql +Testing 00141_transform.sql ----> Ok! ✅ +Testing 00142_system_columns.sql ----> Ok! ✅ +Testing 00143_transform_non_const_default.sql ----> Ok! ✅ +Testing 00144_functions_of_aggregation_states.sql ----> Ok! ✅ +Testing 00145_aggregate_functions_statistics.sql ----> Ok! ✅ +Testing 00146_aggregate_function_uniq.sql ----> Ok! ✅ +Testing 00147_global_in_aggregate_function.sql ----> Ok! ✅ +Testing 00148_monotonic_functions_and_index.sql ----> Ok! ✅ +Testing 00149_quantiles_timing_distributed.sql ----> Ok! ✅ +Testing 00150_quantiles_timing_precision.sql ----> Ok! ✅ +Testing 00151_order_by_read_in_order.sql ----> Ok! ✅ +Skipping 00151_replace_partition_with_different_granularity.sql +Skipping 00152_insert_different_granularity.sql +Testing 00153_aggregate_arena_race.sql ----> Ok! ✅ +Skipping 00154_avro.sql +Testing 00156_max_execution_speed_sample_merge.sql ----> Ok! ✅ +Skipping 00157_cache_dictionary.sql +Skipping 00158_cache_dictionary_has.sql +Testing 00160_decode_xml_component.sql ----> Ok! ✅ +Testing 00162_mmap_compression_none.sql ----> Ok! ✅ +Testing 00164_quantileBfloat16.sql ----> Ok! ✅ +Testing 00165_jit_aggregate_functions.sql ----> Ok! ✅ +Skipping 00166_explain_estimate.sql +Testing 00167_read_bytes_from_fs.sql ----> Ok! ✅ +Total failed tests: diff --git a/tests/queries/1_stateful/00168_parallel_processing_on_replicas_part_1.sh b/tests/queries/1_stateful/00168_parallel_processing_on_replicas_part_1.sh new file mode 100755 index 00000000000..ba1245d9679 --- /dev/null +++ b/tests/queries/1_stateful/00168_parallel_processing_on_replicas_part_1.sh @@ -0,0 +1,98 @@ +#!/usr/bin/env bash +# Tags: no-tsan + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# set -e + +# All replicas are localhost, disable `prefer_localhost_replica` option to test network interface +# Currently this feature could not work with hedged requests +# Enabling `enable_sample_offset_parallel_processing` feature could lead to intersecting marks, so some of them would be thrown away and it will lead to incorrect result of SELECT query +SETTINGS="--max_parallel_replicas=3 --prefer_localhost_replica=false --use_hedged_requests=false --async_socket_for_remote=false --allow_experimental_parallel_reading_from_replicas=true" + +# Prepare tables +$CLICKHOUSE_CLIENT $SETTINGS -nm -q ''' + drop table if exists test.dist_hits SYNC; + drop table if exists test.dist_visits SYNC; + + create table test.dist_hits as test.hits engine = Distributed("test_cluster_one_shard_three_replicas_localhost", test, hits, rand()); + create table test.dist_visits as test.visits engine = Distributed("test_cluster_one_shard_three_replicas_localhost", test, visits, rand()); +'''; + +FAILED=() + +# PreviouslyFailed=( +# ) + +SkipList=( + "00013_sorting_of_nested.sql" # It contains FINAL, which is not allowed together with parallel reading + + "00061_storage_buffer.sql" + "00095_hyperscan_profiler.sql" # too long in debug (there is a --no-debug tag inside a test) + + "00140_rename.sql" # Multiple renames are not allowed with DatabaseReplicated and tags are not forwarded through this test + + "00154_avro.sql" # Plain select * with limit with Distributed table is not deterministic + "00151_replace_partition_with_different_granularity.sql" # Replace partition from Distributed is not allowed + "00152_insert_different_granularity.sql" # The same as above + + "00157_cache_dictionary.sql" # Too long in debug mode, but result is correct + "00158_cache_dictionary_has.sql" # The same as above + + "00166_explain_estimate.sql" # Distributed table returns nothing +) + +# for TESTPATH in "${PreviouslyFailed[@]}" +for TESTPATH in "$CURDIR"/*.sql; +do + TESTNAME=$(basename $TESTPATH) + + if [[ " ${SkipList[*]} " =~ ${TESTNAME} ]]; then + echo "Skipping $TESTNAME " + continue + fi + + echo -n "Testing $TESTNAME ----> " + + # prepare test + NEW_TESTNAME="/tmp/dist_$TESTNAME" + # Added g to sed command to replace all tables, not the first + cat $TESTPATH | sed -e 's/test.hits/test.dist_hits/g' | sed -e 's/test.visits/test.dist_visits/g' > $NEW_TESTNAME + + TESTNAME_RESULT="/tmp/result_$TESTNAME" + NEW_TESTNAME_RESULT="/tmp/result_dist_$TESTNAME" + + $CLICKHOUSE_CLIENT $SETTINGS -nm --testmode < $TESTPATH > $TESTNAME_RESULT + $CLICKHOUSE_CLIENT $SETTINGS -nm --testmode < $NEW_TESTNAME > $NEW_TESTNAME_RESULT + + expected=$(cat $TESTNAME_RESULT | md5sum) + actual=$(cat $NEW_TESTNAME_RESULT | md5sum) + + if [[ "$expected" != "$actual" ]]; then + FAILED+=("$TESTNAME") + echo "Failed! ❌ " + echo "Plain:" + cat $TESTNAME_RESULT + echo "Distributed:" + cat $NEW_TESTNAME_RESULT + else + echo "Ok! ✅" + fi +done + + +echo "Total failed tests: " +# Iterate the loop to read and print each array element +for value in "${FAILED[@]}" +do + echo "🔺 $value" +done + +# Drop tables + +$CLICKHOUSE_CLIENT $SETTINGS -nm -q ''' + drop table if exists test.dist_hits SYNC; + drop table if exists test.dist_visits SYNC; +'''; From de0a9a84a94c61900d08774c57cb6d223b16af6d Mon Sep 17 00:00:00 2001 From: tavplubix Date: Thu, 9 Dec 2021 13:51:39 +0300 Subject: [PATCH 190/262] Update run-fuzzer.sh --- docker/test/fuzzer/run-fuzzer.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh index 351b4a3c541..b8fe9dcbc4f 100755 --- a/docker/test/fuzzer/run-fuzzer.sh +++ b/docker/test/fuzzer/run-fuzzer.sh @@ -35,7 +35,7 @@ function clone fi git diff --name-only master HEAD | tee ci-changed-files.txt else - if [ -v COMMIT_SHA ]; then + if [ -v SHA_TO_TEST ]; then git fetch --depth 2 origin "$SHA_TO_TEST" git checkout "$SHA_TO_TEST" echo "Checked out nominal SHA $SHA_TO_TEST for master" From 40c9ffdffff976600809c4b30a10f1bb7c14b8a0 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 9 Dec 2021 14:14:50 +0300 Subject: [PATCH 191/262] Disable --- .../ArithmeticOperationsInAgrFuncOptimize.cpp | 5 +++++ .../RewriteAnyFunctionVisitor.cpp | 5 +++++ .../RewriteSumIfFunctionVisitor.cpp | 5 +++++ ..._functions_disable_optimizations.reference | 20 +++++++++++++++++++ ...window_functions_disable_optimizations.sql | 14 +++++++++++++ 5 files changed, 49 insertions(+) create mode 100644 tests/queries/0_stateless/02129_window_functions_disable_optimizations.reference create mode 100644 tests/queries/0_stateless/02129_window_functions_disable_optimizations.sql diff --git a/src/Interpreters/ArithmeticOperationsInAgrFuncOptimize.cpp b/src/Interpreters/ArithmeticOperationsInAgrFuncOptimize.cpp index 70a58971d3f..383ca3db6f4 100644 --- a/src/Interpreters/ArithmeticOperationsInAgrFuncOptimize.cpp +++ b/src/Interpreters/ArithmeticOperationsInAgrFuncOptimize.cpp @@ -157,7 +157,12 @@ void ArithmeticOperationsInAgrFuncMatcher::visit(const ASTFunction & func, ASTPt void ArithmeticOperationsInAgrFuncMatcher::visit(ASTPtr & ast, Data & data) { if (const auto * function_node = ast->as()) + { + if (function_node->is_window_function) + return; + visit(*function_node, ast, data); + } } bool ArithmeticOperationsInAgrFuncMatcher::needChildVisit(const ASTPtr & node, const ASTPtr &) diff --git a/src/Interpreters/RewriteAnyFunctionVisitor.cpp b/src/Interpreters/RewriteAnyFunctionVisitor.cpp index eed6368ae54..5eb14aa4252 100644 --- a/src/Interpreters/RewriteAnyFunctionVisitor.cpp +++ b/src/Interpreters/RewriteAnyFunctionVisitor.cpp @@ -63,7 +63,12 @@ bool extractIdentifiers(const ASTFunction & func, std::unordered_set & void RewriteAnyFunctionMatcher::visit(ASTPtr & ast, Data & data) { if (auto * func = ast->as()) + { + if (func->is_window_function) + return; + visit(*func, ast, data); + } } void RewriteAnyFunctionMatcher::visit(const ASTFunction & func, ASTPtr & ast, Data & data) diff --git a/src/Interpreters/RewriteSumIfFunctionVisitor.cpp b/src/Interpreters/RewriteSumIfFunctionVisitor.cpp index 7b322ca1585..7f725c1d8a5 100644 --- a/src/Interpreters/RewriteSumIfFunctionVisitor.cpp +++ b/src/Interpreters/RewriteSumIfFunctionVisitor.cpp @@ -10,7 +10,12 @@ namespace DB void RewriteSumIfFunctionMatcher::visit(ASTPtr & ast, Data & data) { if (auto * func = ast->as()) + { + if (func->is_window_function) + return; + visit(*func, ast, data); + } } void RewriteSumIfFunctionMatcher::visit(const ASTFunction & func, ASTPtr & ast, Data &) diff --git a/tests/queries/0_stateless/02129_window_functions_disable_optimizations.reference b/tests/queries/0_stateless/02129_window_functions_disable_optimizations.reference new file mode 100644 index 00000000000..1fd9e58f556 --- /dev/null +++ b/tests/queries/0_stateless/02129_window_functions_disable_optimizations.reference @@ -0,0 +1,20 @@ +1 1 +0 1 +0 1 +0 1 +0 1 +0 1 +0 1 +0 1 +0 1 +0 1 +0 0.5 30 15 +1 1 30 45 +2 0.5 30 60 +3 1 30 90 +4 0.5 30 105 +5 1 30 135 +6 0.5 30 150 +7 1 30 180 +8 0.5 30 195 +9 1 30 225 diff --git a/tests/queries/0_stateless/02129_window_functions_disable_optimizations.sql b/tests/queries/0_stateless/02129_window_functions_disable_optimizations.sql new file mode 100644 index 00000000000..847d868b10b --- /dev/null +++ b/tests/queries/0_stateless/02129_window_functions_disable_optimizations.sql @@ -0,0 +1,14 @@ +SET optimize_rewrite_sum_if_to_count_if = 1; + +SELECT if(number % 10 = 0, 1, 0) AS dummy, +sum(dummy) OVER w +FROM numbers(10) +WINDOW w AS (ORDER BY number ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW); + +SET optimize_arithmetic_operations_in_aggregate_functions=1; +SELECT + *, + if((number % 2) = 0, 0.5, 1) AS a, + 30 AS b, + sum(a * b) OVER (ORDER BY number ASC) AS s +FROM numbers(10) From f4cbbfd8c04e0320842d2d8d475ba6e3f2774029 Mon Sep 17 00:00:00 2001 From: tavplubix Date: Thu, 9 Dec 2021 14:17:55 +0300 Subject: [PATCH 192/262] Update run-fuzzer.sh --- docker/test/fuzzer/run-fuzzer.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh index b8fe9dcbc4f..fe9aa199a08 100755 --- a/docker/test/fuzzer/run-fuzzer.sh +++ b/docker/test/fuzzer/run-fuzzer.sh @@ -1,5 +1,5 @@ #!/bin/bash -# shellcheck disable=SC2086,SC2001,SC2046 +# shellcheck disable=SC2086,SC2001,SC2046,SC2030,SC2031 set -eux set -o pipefail From 0bf7330ddcc4498f1198202d93c1cd3e6de2501e Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 9 Dec 2021 15:40:14 +0300 Subject: [PATCH 193/262] Update test --- tests/queries/0_stateless/01591_window_functions.reference | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/01591_window_functions.reference b/tests/queries/0_stateless/01591_window_functions.reference index 07562557369..4811d0a02ad 100644 --- a/tests/queries/0_stateless/01591_window_functions.reference +++ b/tests/queries/0_stateless/01591_window_functions.reference @@ -174,6 +174,8 @@ select groupArray(number) over () from numbers(3); -- Seen errors like 'column `1` not found' from count(1). select count(1) over (rows unbounded preceding), max(number + 1) over () from numbers(3); 1 3 +2 3 +3 3 -- Should work in DISTINCT select distinct sum(0) over (rows unbounded preceding) from numbers(2); 0 From 19f8b416bcd75fa29b79375ba4b8e1c3f3230ca3 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 9 Dec 2021 16:28:15 +0300 Subject: [PATCH 194/262] Add automatic workflow rerun --- .../Dockerfile | 0 .../app.py | 59 ++++++++++++++++++- .../requirements.txt | 0 3 files changed, 57 insertions(+), 2 deletions(-) rename tests/ci/{approve_lambda => workflow_approve_rerun_lambda}/Dockerfile (100%) rename tests/ci/{approve_lambda => workflow_approve_rerun_lambda}/app.py (82%) rename tests/ci/{approve_lambda => workflow_approve_rerun_lambda}/requirements.txt (100%) diff --git a/tests/ci/approve_lambda/Dockerfile b/tests/ci/workflow_approve_rerun_lambda/Dockerfile similarity index 100% rename from tests/ci/approve_lambda/Dockerfile rename to tests/ci/workflow_approve_rerun_lambda/Dockerfile diff --git a/tests/ci/approve_lambda/app.py b/tests/ci/workflow_approve_rerun_lambda/app.py similarity index 82% rename from tests/ci/approve_lambda/app.py rename to tests/ci/workflow_approve_rerun_lambda/app.py index 619c80ce299..436e9b06ede 100644 --- a/tests/ci/approve_lambda/app.py +++ b/tests/ci/workflow_approve_rerun_lambda/app.py @@ -25,8 +25,8 @@ SUSPICIOUS_PATTERNS = [ MAX_RETRY = 5 WorkflowDescription = namedtuple('WorkflowDescription', - ['name', 'action', 'run_id', 'event', 'workflow_id', - 'fork_owner_login', 'fork_branch']) + ['name', 'action', 'run_id', 'event', 'workflow_id', 'conclusion', 'status', + 'fork_owner_login', 'fork_branch', 'rerun_url', 'jobs_url', 'attempt', 'url']) TRUSTED_WORKFLOW_IDS = { 14586616, # Cancel workflows, always trusted @@ -38,6 +38,12 @@ TRUSTED_ORG_IDS = { 54801242, # clickhouse } +NEED_RERUN_WORKFLOWS = { + 13241696, # PR + 15834118, # Docs + 15522500, # MasterCI +} + # Individual trusted contirbutors who are not in any trusted organization. # Can be changed in runtime: we will append users that we learned to be in # a trusted org, to save GitHub API calls. @@ -180,6 +186,12 @@ def get_workflow_description_from_event(event): fork_branch = event['workflow_run']['head_branch'] name = event['workflow_run']['name'] workflow_id = event['workflow_run']['workflow_id'] + conclusion = event['workflow_run']['conclusion'] + attempt = event['workflow_run']['run_attempt'] + status = event['workflow_run']['status'] + jobs_url = event['workflow_run']['jobs_url'] + rerun_url = event['workflow_run']['rerun_url'] + url = event['workflow_run']['html_url'] return WorkflowDescription( name=name, action=action, @@ -188,6 +200,12 @@ def get_workflow_description_from_event(event): fork_owner_login=fork_owner, fork_branch=fork_branch, workflow_id=workflow_id, + conclusion=conclusion, + attempt=attempt, + status=status, + jobs_url=jobs_url, + rerun_url=rerun_url, + url=url ) def get_pr_author_and_orgs(pull_request): @@ -255,12 +273,49 @@ def get_token_from_aws(): installation_id = get_installation_id(encoded_jwt) return get_access_token(encoded_jwt, installation_id) +def check_need_to_rerun(workflow_description): + if workflow_description.attempt >= 2: + print("Not going to rerun workflow because it's already tried more than two times") + return False + print("Going to check jobs") + + jobs = _exec_get_with_retry(workflow_description.jobs_url + "?per_page=100") + print("Got jobs", len(jobs['jobs'])) + for job in jobs['jobs']: + if job['conclusion'] not in ('success', 'skipped'): + print("Job", job['name'], "failed, checking steps") + for step in job['steps']: + # always the last job + if step['name'] == 'Complete job': + print("Found Complete job step for job", job['name']) + break + else: + print("Checked all steps and doesn't found Complete job, going to rerun") + return True + + return False + +def rerun_workflow(workflow_description, token): + print("Going to rerun workflow") + _exec_post_with_retry(workflow_description.rerun_url, token) + def main(event): token = get_token_from_aws() event_data = json.loads(event['body']) workflow_description = get_workflow_description_from_event(event_data) print("Got workflow description", workflow_description) + if workflow_description.action == 'completed' and workflow_description.conclusion == 'failure': + print("Workflow", workflow_description.url, "completed and failed, let's check for rerun") + + if workflow_description.workflow_id not in NEED_RERUN_WORKFLOWS: + print("Workflow", workflow_description.workflow_id, "not in list of rerunable workflows") + return + + if check_need_to_rerun(workflow_description): + rerun_workflow(workflow_description, token) + return + if workflow_description.action != "requested": print("Exiting, event action is", workflow_description.action) return diff --git a/tests/ci/approve_lambda/requirements.txt b/tests/ci/workflow_approve_rerun_lambda/requirements.txt similarity index 100% rename from tests/ci/approve_lambda/requirements.txt rename to tests/ci/workflow_approve_rerun_lambda/requirements.txt From b3bd9e6a374f4ecdb81b8f6cec648683547926cf Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 9 Dec 2021 16:31:13 +0300 Subject: [PATCH 195/262] Fix arraySlice with null args. --- src/Functions/array/arraySlice.cpp | 2 +- .../00498_array_functions_concat_slice_push_pop.reference | 2 ++ .../0_stateless/00498_array_functions_concat_slice_push_pop.sql | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Functions/array/arraySlice.cpp b/src/Functions/array/arraySlice.cpp index d6b50f55563..7a2e97de78a 100644 --- a/src/Functions/array/arraySlice.cpp +++ b/src/Functions/array/arraySlice.cpp @@ -102,7 +102,7 @@ public: { if (!length_column || length_column->onlyNull()) { - return array_column; + return arguments[0].column; } else if (isColumnConst(*length_column)) sink = GatherUtils::sliceFromLeftConstantOffsetBounded(*source, 0, length_column->getInt(0)); diff --git a/tests/queries/0_stateless/00498_array_functions_concat_slice_push_pop.reference b/tests/queries/0_stateless/00498_array_functions_concat_slice_push_pop.reference index 1cc42544311..f757a86aeee 100644 --- a/tests/queries/0_stateless/00498_array_functions_concat_slice_push_pop.reference +++ b/tests/queries/0_stateless/00498_array_functions_concat_slice_push_pop.reference @@ -35,6 +35,8 @@ slice [2,NULL,4,5] ['b','c','d'] ['b',NULL,'d'] +[] 1 +[] 1 push back \N [1,1] diff --git a/tests/queries/0_stateless/00498_array_functions_concat_slice_push_pop.sql b/tests/queries/0_stateless/00498_array_functions_concat_slice_push_pop.sql index 8f2f0811193..c87d52d2478 100644 --- a/tests/queries/0_stateless/00498_array_functions_concat_slice_push_pop.sql +++ b/tests/queries/0_stateless/00498_array_functions_concat_slice_push_pop.sql @@ -36,6 +36,7 @@ select arraySlice([1, 2, 3, 4, 5, 6], 10, 1); select arraySlice([1, 2, Null, 4, 5, 6], 2, 4); select arraySlice(['a', 'b', 'c', 'd', 'e'], 2, 3); select arraySlice([Null, 'b', Null, 'd', 'e'], 2, 3); +select arraySlice([], materialize(NULL), NULL), 1 from numbers(2); select 'push back'; select arrayPushBack(Null, 1); From f4f06ca368c96f3345f176fd42d2e74625c77fdf Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 9 Dec 2021 16:59:21 +0300 Subject: [PATCH 196/262] Fix assert. --- src/Processors/Transforms/WindowTransform.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index bc6129c3bd2..7a3bb25d2c6 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -1008,10 +1008,10 @@ static void assertSameColumns(const Columns & left_all, assert(right_column); if (const auto * left_lc = typeid_cast(left_column)) - left_column = &left_lc->getDictionary(); + left_column = left_lc->getDictionary().getNestedColumn().get(); if (const auto * right_lc = typeid_cast(right_column)) - right_column = &right_lc->getDictionary(); + right_column = right_lc->getDictionary().getNestedColumn().get(); assert(typeid(*left_column).hash_code() == typeid(*right_column).hash_code()); From 952e975d160af5c4632520acbf8f56343e19ab87 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 9 Dec 2021 17:32:00 +0300 Subject: [PATCH 197/262] Fix unit tests on master --- .github/workflows/master.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 26921b8ea48..d60c2889cc8 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -1623,7 +1623,7 @@ jobs: env: TEMP_PATH: ${{runner.temp}}/unit_tests_ubsan REPORTS_PATH: ${{runner.temp}}/reports_dir - CHECK_NAME: 'Unit tests (msan, actions)' + CHECK_NAME: 'Unit tests (ubsan, actions)' REPO_COPY: ${{runner.temp}}/unit_tests_ubsan/ClickHouse run: | sudo rm -fr $TEMP_PATH From f5a77fca391015026b7878c215331f60b8e5058b Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 9 Dec 2021 14:40:51 +0000 Subject: [PATCH 198/262] Done --- .../runner/compose/docker_compose_mongo.yml | 6 +++++ src/Common/ErrorCodes.cpp | 1 + src/Processors/Transforms/MongoDBSource.cpp | 9 +++++++ .../ExternalDataSourceConfiguration.cpp | 2 +- .../ExternalDataSourceConfiguration.h | 1 - src/Storages/StorageMongoDB.cpp | 17 ++++++------- tests/integration/helpers/cluster.py | 4 +++- .../configs/named_collections.xml | 2 +- .../integration/test_storage_mongodb/test.py | 24 +++++++++++++++++-- 9 files changed, 52 insertions(+), 14 deletions(-) diff --git a/docker/test/integration/runner/compose/docker_compose_mongo.yml b/docker/test/integration/runner/compose/docker_compose_mongo.yml index e794966bd08..060017b9f87 100644 --- a/docker/test/integration/runner/compose/docker_compose_mongo.yml +++ b/docker/test/integration/runner/compose/docker_compose_mongo.yml @@ -9,3 +9,9 @@ services: ports: - ${MONGO_EXTERNAL_PORT}:${MONGO_INTERNAL_PORT} command: --profile=2 --verbose + + mongo2: + image: mongo:latest + restart: always + ports: + - "27018:27017" diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index 54785f92926..84a796d03d9 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -601,6 +601,7 @@ M(631, UNKNOWN_FILE_SIZE) \ M(632, UNEXPECTED_DATA_AFTER_PARSED_VALUE) \ M(633, QUERY_IS_NOT_SUPPORTED_IN_WINDOW_VIEW) \ + M(634, MONGODB_ERROR) \ \ M(999, KEEPER_EXCEPTION) \ M(1000, POCO_EXCEPTION) \ diff --git a/src/Processors/Transforms/MongoDBSource.cpp b/src/Processors/Transforms/MongoDBSource.cpp index 30ad9746520..4f5be41a89d 100644 --- a/src/Processors/Transforms/MongoDBSource.cpp +++ b/src/Processors/Transforms/MongoDBSource.cpp @@ -36,6 +36,7 @@ namespace ErrorCodes extern const int MONGODB_CANNOT_AUTHENTICATE; extern const int NOT_FOUND_COLUMN_IN_BLOCK; extern const int UNKNOWN_TYPE; + extern const int MONGODB_ERROR; } @@ -327,6 +328,14 @@ Chunk MongoDBSource::generate() for (auto & document : response.documents()) { + if (document->exists("ok") && document->exists("$err") + && document->exists("code") && document->getInteger("ok") == 0) + { + auto code = document->getInteger("code"); + const Poco::MongoDB::Element::Ptr value = document->get("$err"); + auto message = static_cast &>(*value).value(); + throw Exception(ErrorCodes::MONGODB_ERROR, "Got error from MongoDB: {}, code: {}", message, code); + } ++num_rows; for (const auto idx : collections::range(0, size)) diff --git a/src/Storages/ExternalDataSourceConfiguration.cpp b/src/Storages/ExternalDataSourceConfiguration.cpp index 8389c432db2..ade89ea7228 100644 --- a/src/Storages/ExternalDataSourceConfiguration.cpp +++ b/src/Storages/ExternalDataSourceConfiguration.cpp @@ -86,7 +86,7 @@ std::optional getExternalDataSourceConfiguration(const configuration.username = config.getString(collection_prefix + ".user", ""); configuration.password = config.getString(collection_prefix + ".password", ""); configuration.database = config.getString(collection_prefix + ".database", ""); - configuration.table = config.getString(collection_prefix + ".table", ""); + configuration.table = config.getString(collection_prefix + ".table", config.getString(collection_prefix + ".collection", "")); configuration.schema = config.getString(collection_prefix + ".schema", ""); configuration.addresses_expr = config.getString(collection_prefix + ".addresses_expr", ""); diff --git a/src/Storages/ExternalDataSourceConfiguration.h b/src/Storages/ExternalDataSourceConfiguration.h index b214caa9a12..502f8b800e3 100644 --- a/src/Storages/ExternalDataSourceConfiguration.h +++ b/src/Storages/ExternalDataSourceConfiguration.h @@ -40,7 +40,6 @@ struct StorageMySQLConfiguration : ExternalDataSourceConfiguration struct StorageMongoDBConfiguration : ExternalDataSourceConfiguration { - String collection; String options; }; diff --git a/src/Storages/StorageMongoDB.cpp b/src/Storages/StorageMongoDB.cpp index 9b48f3fc3b3..fe0f9b8d4b4 100644 --- a/src/Storages/StorageMongoDB.cpp +++ b/src/Storages/StorageMongoDB.cpp @@ -67,9 +67,12 @@ void StorageMongoDB::connectIfNotConnected() if (!authenticated) { # if POCO_VERSION >= 0x01070800 - Poco::MongoDB::Database poco_db(database_name); - if (!poco_db.authenticate(*connection, username, password, Poco::MongoDB::Database::AUTH_SCRAM_SHA1)) - throw Exception("Cannot authenticate in MongoDB, incorrect user or password", ErrorCodes::MONGODB_CANNOT_AUTHENTICATE); + if (!username.empty() && !password.empty()) + { + Poco::MongoDB::Database poco_db(database_name); + if (!poco_db.authenticate(*connection, username, password, Poco::MongoDB::Database::AUTH_SCRAM_SHA1)) + throw Exception("Cannot authenticate in MongoDB, incorrect user or password", ErrorCodes::MONGODB_CANNOT_AUTHENTICATE); + } # else authenticate(*connection, database_name, username, password); # endif @@ -112,9 +115,7 @@ StorageMongoDBConfiguration StorageMongoDB::getConfiguration(ASTs engine_args, C for (const auto & [arg_name, arg_value] : storage_specific_args) { - if (arg_name == "collection") - configuration.collection = arg_value->as()->value.safeGet(); - else if (arg_name == "options") + if (arg_name == "options") configuration.options = arg_value->as()->value.safeGet(); else throw Exception(ErrorCodes::BAD_ARGUMENTS, @@ -139,7 +140,7 @@ StorageMongoDBConfiguration StorageMongoDB::getConfiguration(ASTs engine_args, C configuration.host = parsed_host_port.first; configuration.port = parsed_host_port.second; configuration.database = engine_args[1]->as().value.safeGet(); - configuration.collection = engine_args[2]->as().value.safeGet(); + configuration.table = engine_args[2]->as().value.safeGet(); configuration.username = engine_args[3]->as().value.safeGet(); configuration.password = engine_args[4]->as().value.safeGet(); @@ -163,7 +164,7 @@ void registerStorageMongoDB(StorageFactory & factory) configuration.host, configuration.port, configuration.database, - configuration.collection, + configuration.table, configuration.username, configuration.password, configuration.options, diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 0817cc882b4..66bc8a0ab09 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -725,6 +725,8 @@ class ClickHouseCluster: env_variables['MONGO_HOST'] = self.mongo_host env_variables['MONGO_EXTERNAL_PORT'] = str(self.mongo_port) env_variables['MONGO_INTERNAL_PORT'] = "27017" + env_variables['MONGO_EXTERNAL_PORT_2'] = "27018" + env_variables['MONGO_INTERNAL_PORT_2'] = "27017" self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_mongo.yml')]) self.base_mongo_cmd = ['docker-compose', '--env-file', instance.env_file, '--project-name', self.project_name, '--file', p.join(docker_compose_yml_dir, 'docker_compose_mongo.yml')] @@ -2107,7 +2109,7 @@ class ClickHouseInstance: except Exception as e: logging.warning(f"Current start attempt failed. Will kill {pid} just in case.") self.exec_in_container(["bash", "-c", f"kill -9 {pid}"], user='root', nothrow=True) - time.sleep(time_to_sleep) + time.sleep(time_to_sleep) raise Exception("Cannot start ClickHouse, see additional info in logs") diff --git a/tests/integration/test_storage_mongodb/configs/named_collections.xml b/tests/integration/test_storage_mongodb/configs/named_collections.xml index feb6b55af02..5f7db390982 100644 --- a/tests/integration/test_storage_mongodb/configs/named_collections.xml +++ b/tests/integration/test_storage_mongodb/configs/named_collections.xml @@ -6,7 +6,7 @@ mongo1 27017 test - simple_table
+ simple_table diff --git a/tests/integration/test_storage_mongodb/test.py b/tests/integration/test_storage_mongodb/test.py index 1a5de353d7d..2d27ec18018 100644 --- a/tests/integration/test_storage_mongodb/test.py +++ b/tests/integration/test_storage_mongodb/test.py @@ -20,8 +20,12 @@ def started_cluster(request): cluster.shutdown() -def get_mongo_connection(started_cluster, secure=False): - connection_str = 'mongodb://root:clickhouse@localhost:{}'.format(started_cluster.mongo_port) +def get_mongo_connection(started_cluster, secure=False, with_credentials=True): + connection_str = '' + if with_credentials: + connection_str = 'mongodb://root:clickhouse@localhost:{}'.format(started_cluster.mongo_port) + else: + connection_str = 'mongodb://localhost:27018' if secure: connection_str += '/?tls=true&tlsAllowInvalidCertificates=true' return pymongo.MongoClient(connection_str) @@ -138,4 +142,20 @@ def test_predefined_connection_configuration(started_cluster): node = started_cluster.instances['node'] node.query("create table simple_mongo_table(key UInt64, data String) engine = MongoDB(mongo1)") + assert node.query("SELECT count() FROM simple_mongo_table") == '100\n' + simple_mongo_table.drop() + +@pytest.mark.parametrize('started_cluster', [False], indirect=['started_cluster']) +def test_no_credentials(started_cluster): + mongo_connection = get_mongo_connection(started_cluster, with_credentials=False) + db = mongo_connection['test'] + simple_mongo_table = db['simple_table'] + data = [] + for i in range(0, 100): + data.append({'key': i, 'data': hex(i * i)}) + simple_mongo_table.insert_many(data) + + node = started_cluster.instances['node'] + node.query("create table simple_mongo_table_2(key UInt64, data String) engine = MongoDB('mongo2:27017', 'test', 'simple_table', '', '')") + assert node.query("SELECT count() FROM simple_mongo_table_2") == '100\n' simple_mongo_table.drop() From 1eb36c72364b3ae8c43dbbd2f014c1f151fdde84 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 9 Dec 2021 17:48:03 +0300 Subject: [PATCH 199/262] Remove dependency between integration and functional tests --- .github/workflows/main.yml | 6 +++--- .github/workflows/master.yml | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 2adfbce3577..69a863b75a0 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -1498,7 +1498,7 @@ jobs: ############################# INTEGRATION TESTS ############################################# ############################################################################################# IntegrationTestsAsan: - needs: [BuilderDebAsan, FunctionalStatelessTestAsan] + needs: [BuilderDebAsan] runs-on: [self-hosted, stress-tester] steps: - name: Download json reports @@ -1526,7 +1526,7 @@ jobs: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH IntegrationTestsTsan: - needs: [BuilderDebTsan, FunctionalStatelessTestTsan] + needs: [BuilderDebTsan] runs-on: [self-hosted, stress-tester] steps: - name: Download json reports @@ -1554,7 +1554,7 @@ jobs: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH IntegrationTestsRelease: - needs: [BuilderDebRelease, FunctionalStatelessTestRelease] + needs: [BuilderDebRelease] runs-on: [self-hosted, stress-tester] steps: - name: Download json reports diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index d60c2889cc8..5d4dec16303 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -1268,7 +1268,7 @@ jobs: ############################# INTEGRATION TESTS ############################################# ############################################################################################# IntegrationTestsAsan: - needs: [BuilderDebAsan, FunctionalStatelessTestAsan] + needs: [BuilderDebAsan] runs-on: [self-hosted, stress-tester] steps: - name: Download json reports @@ -1296,7 +1296,7 @@ jobs: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH IntegrationTestsTsan: - needs: [BuilderDebTsan, FunctionalStatelessTestTsan] + needs: [BuilderDebTsan] runs-on: [self-hosted, stress-tester] steps: - name: Download json reports @@ -1324,7 +1324,7 @@ jobs: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH IntegrationTestsRelease: - needs: [BuilderDebRelease, FunctionalStatelessTestRelease] + needs: [BuilderDebRelease] runs-on: [self-hosted, stress-tester] steps: - name: Download json reports From 272c0bb35a62390a9d90e67d24babb863fcdcc12 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 9 Dec 2021 18:58:28 +0300 Subject: [PATCH 200/262] Fix crash in case of MATERIALIZE COLUMN with no default expression. --- src/Interpreters/MutationsInterpreter.cpp | 6 ++++++ tests/queries/0_stateless/02008_materialize_column.sql | 2 ++ 2 files changed, 8 insertions(+) diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 471ad67d4e7..d595efc5485 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -569,6 +569,12 @@ ASTPtr MutationsInterpreter::prepare(bool dry_run) stages.emplace_back(context); const auto & column = columns_desc.get(command.column_name); + + if (!column.default_desc.expression) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Cannot materialize column `{}` because it doesn't have default expression", column.name); + stages.back().column_to_updated.emplace(column.name, column.default_desc.expression->clone()); } else if (command.type == MutationCommand::MATERIALIZE_INDEX) diff --git a/tests/queries/0_stateless/02008_materialize_column.sql b/tests/queries/0_stateless/02008_materialize_column.sql index 4136a04568e..8a8eb2afe83 100644 --- a/tests/queries/0_stateless/02008_materialize_column.sql +++ b/tests/queries/0_stateless/02008_materialize_column.sql @@ -5,6 +5,8 @@ SET mutations_sync = 2; CREATE TABLE tmp (x Int64) ENGINE = MergeTree() ORDER BY tuple() PARTITION BY tuple(); INSERT INTO tmp SELECT * FROM system.numbers LIMIT 20; +ALTER TABLE tmp MATERIALIZE COLUMN x; -- { serverError 36 } + ALTER TABLE tmp ADD COLUMN s String DEFAULT toString(x); SELECT groupArray(x), groupArray(s) FROM tmp; From ee2d661de8e28faebf48b9a4eddaa07ab013ec16 Mon Sep 17 00:00:00 2001 From: Rich Raposa Date: Thu, 9 Dec 2021 09:21:33 -0700 Subject: [PATCH 201/262] Update CHANGELOG.md FYI: I did not sort the new features --- CHANGELOG.md | 189 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 189 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f34725448f2..2788bee40b2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,192 @@ +### ClickHouse release v21.12, 2021-12-09 + +#### Backward Incompatible Change + +* A "leader election" mechanism is removed from `ReplicatedMergeTree`, because multiple leaders are supported since 20.6. If you are upgrading from an older version and some replica with an old version is a leader, then server will fail to start after upgrade. Stop replicas with old version to make new version start. After that it will not be possible to downgrade to version older than 20.6. [#32140](https://github.com/ClickHouse/ClickHouse/pull/32140) ([tavplubix](https://github.com/tavplubix)). +* Do not allow direct select for Kafka/RabbitMQ/FileLog. Can be enabled by setting `stream_like_engine_allow_direct_select`. Direct select will be not allowed even if enabled by setting, in case there is an attached materialized view. For Kafka and RabbitMQ direct selectm if allowed, will not commit massages by default. To enable commits with direct select, user must use storage level setting `kafka{rabbitmq}_commit_on_select=1` (default `0`). cc @filimonov. [#31053](https://github.com/ClickHouse/ClickHouse/pull/31053) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Return unquoted string in JSON_VALUE. Closes [#27965](https://github.com/ClickHouse/ClickHouse/issues/27965). [#31008](https://github.com/ClickHouse/ClickHouse/pull/31008) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add custom null representation support for TSV/CSV input formats. Fix deserialing Nullable(String) in TSV/CSV/JSONCompactStringsEachRow/JSONStringsEachRow input formats. Rename `output_format_csv_null_representation` and `output_format_tsv_null_representation` to `format_csv_null_representation` and `format_tsv_null_representation` accordingly. [#30497](https://github.com/ClickHouse/ClickHouse/pull/30497) ([Kruglov Pavel](https://github.com/Avogar)). + +#### New Feature + +* Added new SQL elements `WINDOW VIEW` and `WINDOW FUNCTION` to enable stream processing for ClickHouse. [#8331](https://github.com/ClickHouse/ClickHouse/pull/8331) ([vxider](https://github.com/Vxider)). +* Basic access authentication for http/url functions. [#31648](https://github.com/ClickHouse/ClickHouse/pull/31648) ([michael1589](https://github.com/michael1589)). +* Allow to print/parse names and types of colums in `CustomSeparated` input/output format. Add formats `CustomSeparatedWithNames/WithNamesAndTypes` similar to `TSVWithNames/WithNamesAndTypes`. [#31434](https://github.com/ClickHouse/ClickHouse/pull/31434) ([Kruglov Pavel](https://github.com/Avogar)). +* Aliyun OSS Storage support. [#31286](https://github.com/ClickHouse/ClickHouse/pull/31286) ([cfcz48](https://github.com/cfcz48)). +* Exposes all GlobalThreadPool configurations to the configuration files. [#31285](https://github.com/ClickHouse/ClickHouse/pull/31285) ([Tomáš Hromada](https://github.com/gyfis)). +* Support `bool` data type. [#31072](https://github.com/ClickHouse/ClickHouse/pull/31072) ([kevin wan](https://github.com/MaxWk)). +* Support for `PARTITION BY` in File, URL, HDFS storages and with `INSERT INTO` table function. Closes [#30273](https://github.com/ClickHouse/ClickHouse/issues/30273). [#30690](https://github.com/ClickHouse/ClickHouse/pull/30690) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Introduced window functions `exponentialTimeDecayedSum`, `exponentialTimeDecayedMax`, `exponentialTimeDecayedCount` and `exponentialTimeDecayedAvg` which are more effective than `exponentialMovingAverage` for bigger windows. Also more use-cases were covered. [#29799](https://github.com/ClickHouse/ClickHouse/pull/29799) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Add option to compress logs before writing them to a file using LZ4. Closes [#23860](https://github.com/ClickHouse/ClickHouse/issues/23860). [#29219](https://github.com/ClickHouse/ClickHouse/pull/29219) ([Nikolay Degterinsky](https://github.com/evillique)). +* Implemented more of the ZooKeeper Four Letter Words commands in clickhouse-keeper: https://zookeeper.apache.org/doc/r3.4.8/zookeeperAdmin.html#sc_zkCommands. [#28981](https://github.com/ClickHouse/ClickHouse/pull/28981) ([JackyWoo](https://github.com/JackyWoo)). +* The `murmurHash3_128` and `sipHash128` functions now accept an arbitrary number of arguments. This closes [#28774](https://github.com/ClickHouse/ClickHouse/issues/28774). [#28965](https://github.com/ClickHouse/ClickHouse/pull/28965) ([小路](https://github.com/nicelulu)). +* Adding function `getFuzzerData()` to easily fuzz particular functions. This closes [#23227](https://github.com/ClickHouse/ClickHouse/issues/23227). [#27526](https://github.com/ClickHouse/ClickHouse/pull/27526) ([Alexey Boykov](https://github.com/mathalex)). +* Support `JOIN ON 1 = 1` that have CROSS JOIN semantic. This closes [#25578](https://github.com/ClickHouse/ClickHouse/issues/25578). [#25894](https://github.com/ClickHouse/ClickHouse/pull/25894) ([Vladimir C](https://github.com/vdimir)). +* Add Map combinator for `Map` type. - Rename old `sum-, min-, max- Map` for mapped arrays to `sum-, min-, max- MappedArrays`. [#24539](https://github.com/ClickHouse/ClickHouse/pull/24539) ([Ildus Kurbangaliev](https://github.com/ildus)). +* Added `CONSTRAINT ... ASSUME ...` (without checking during `INSERT`). Added query transformation to CNF (https://github.com/ClickHouse/ClickHouse/issues/11749) for more convenient optimization. Added simple query rewriting using constraints (only simple matching now, will be improved to support <,=,>... later). Added ability to replace heavy columns with light. Added ability to use the index in queries. [#18787](https://github.com/ClickHouse/ClickHouse/pull/18787) ([Nikita Vasilev](https://github.com/nikvas0)). + +#### Performance Improvement + +* Speed up query parsing. [#31949](https://github.com/ClickHouse/ClickHouse/pull/31949) ([Raúl Marín](https://github.com/Algunenano)). +* Speed up count over nullable columns. [#31806](https://github.com/ClickHouse/ClickHouse/pull/31806) ([Raúl Marín](https://github.com/Algunenano)). +* Speed up `avg` and `sumCount` aggregate functions. [#31694](https://github.com/ClickHouse/ClickHouse/pull/31694) ([Raúl Marín](https://github.com/Algunenano)). +* Improve performance of JSON and XML output formats. [#31673](https://github.com/ClickHouse/ClickHouse/pull/31673) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Support parallel formatting for all text formats, except `JSONEachRowWithProgress` and `PrettyCompactMonoBlock`. [#31489](https://github.com/ClickHouse/ClickHouse/pull/31489) ([Kruglov Pavel](https://github.com/Avogar)). +* Improve performance of syncing data to block device. This closes [#31181](https://github.com/ClickHouse/ClickHouse/issues/31181). [#31229](https://github.com/ClickHouse/ClickHouse/pull/31229) ([zhanglistar](https://github.com/zhanglistar)). +* Fixing query performance issue in `LiveView` tables. Fixes [#30831](https://github.com/ClickHouse/ClickHouse/issues/30831). [#31006](https://github.com/ClickHouse/ClickHouse/pull/31006) ([vzakaznikov](https://github.com/vzakaznikov)). +* Allow to split `GraphiteMergeTree` rollup rules for plain/tagged metrics (optional `rule_type` field). [#25122](https://github.com/ClickHouse/ClickHouse/pull/25122) ([Michail Safronov](https://github.com/msaf1980)). + +#### Improvement + +* Support default expression for HDFS storage and optimize fetching when source is column oriented. [#32256](https://github.com/ClickHouse/ClickHouse/pull/32256) ([李扬](https://github.com/taiyang-li)). +* Improve the operation name of an opentelemetry span. [#32234](https://github.com/ClickHouse/ClickHouse/pull/32234) ([Frank Chen](https://github.com/FrankChen021)). +* Use `Content-Type: application/x-ndjson` (http://ndjson.org/) for output format `JSONEachRow`. [#32223](https://github.com/ClickHouse/ClickHouse/pull/32223) ([Dmitriy Dorofeev](https://github.com/deem0n)). +* Improve skipping unknown fields with quoted escaping rule in Template/CustomSeparated formats. Previously you could skip only quoted strings, now you can skip values with any type. [#32204](https://github.com/ClickHouse/ClickHouse/pull/32204) ([Kruglov Pavel](https://github.com/Avogar)). +* Added `update_field` support for `RangeHashedDictionary`, `ComplexKeyRangeHashedDictionary`. [#32185](https://github.com/ClickHouse/ClickHouse/pull/32185) ([Maksim Kita](https://github.com/kitaisreal)). +* Now `clickhouse-keeper` refuses to start or apply configuration changes when they contain duplicated IDs or endpoints. Fixes [#31339](https://github.com/ClickHouse/ClickHouse/issues/31339). [#32121](https://github.com/ClickHouse/ClickHouse/pull/32121) ([alesapin](https://github.com/alesapin)). +* Set Content-Type in HTTP packets issued from URL engine. [#32113](https://github.com/ClickHouse/ClickHouse/pull/32113) ([Frank Chen](https://github.com/FrankChen021)). +* Return Content-Type as 'application/json' for `JSONEachRow` format if `output_format_json_array_of_rows` is enabled. [#32112](https://github.com/ClickHouse/ClickHouse/pull/32112) ([Frank Chen](https://github.com/FrankChen021)). +* Allow to write `+` before `Float32`/`Float64` values. [#32079](https://github.com/ClickHouse/ClickHouse/pull/32079) ([Kruglov Pavel](https://github.com/Avogar)). +* Allow a user configured `hdfs_replication` parameter for `DiskHdfs` and `StorageHdfs`. Closes [#32039](https://github.com/ClickHouse/ClickHouse/issues/32039). [#32049](https://github.com/ClickHouse/ClickHouse/pull/32049) ([leosunli](https://github.com/leosunli)). +* Added ClickHouse `exception` and `exception_code` fields to opentelemetry span log. [#32040](https://github.com/ClickHouse/ClickHouse/pull/32040) ([Frank Chen](https://github.com/FrankChen021)). +* Fix a bug that opentelemetry span log duration is zero at the query level if there is a query exception. [#32038](https://github.com/ClickHouse/ClickHouse/pull/32038) ([Frank Chen](https://github.com/FrankChen021)). +* Remove excessive `DESC TABLE` requests for `remote()` (in case of `remote('127.1', system.one)` (i.e. identifier as the db.table instead of string) there was excessive `DESC TABLE` request). [#32019](https://github.com/ClickHouse/ClickHouse/pull/32019) ([Azat Khuzhin](https://github.com/azat)). +* Support PostgreSQL style ALTER MODIFY COLUMN. [#32003](https://github.com/ClickHouse/ClickHouse/pull/32003) ([SuperDJY](https://github.com/cmsxbc)). +* Fix the issue that `LowCardinality` of `Int256` cannot be created. [#31832](https://github.com/ClickHouse/ClickHouse/pull/31832) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Recreate system.*_log tables in case of different engine/partition_by. [#31824](https://github.com/ClickHouse/ClickHouse/pull/31824) ([Azat Khuzhin](https://github.com/azat)). +* `MaterializedMySQL`: Fix issue with table named 'table'. [#31781](https://github.com/ClickHouse/ClickHouse/pull/31781) ([Håvard Kvålen](https://github.com/havardk)). +* ClickHouse dictionary source support named collections. Closes [#31705](https://github.com/ClickHouse/ClickHouse/issues/31705). [#31749](https://github.com/ClickHouse/ClickHouse/pull/31749) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Allow to use named collections configuration for kafka and rabbitmq engines (the same way as for other integration table engines). [#31691](https://github.com/ClickHouse/ClickHouse/pull/31691) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Always re-render prompt while navigating history in clickhouse-client. This will improve usability of manipulating very long queries that don't fit on screen. [#31675](https://github.com/ClickHouse/ClickHouse/pull/31675) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Add bindings for navigating through history (instead of lines/history). [#31641](https://github.com/ClickHouse/ClickHouse/pull/31641) ([Azat Khuzhin](https://github.com/azat)). +* Improve the `max_execution_time` checks. Fixed some cases when timeout checks do not happen and query could run too long. [#31636](https://github.com/ClickHouse/ClickHouse/pull/31636) ([Raúl Marín](https://github.com/Algunenano)). +* Better exception message when `users.xml` cannot be loaded due to bad password hash. This closes [#24126](https://github.com/ClickHouse/ClickHouse/issues/24126). [#31557](https://github.com/ClickHouse/ClickHouse/pull/31557) ([Vitaly Baranov](https://github.com/vitlibar)). +* Use shard and replica name from `Replicated` database arguments when expanding macros in `ReplicatedMergeTree` arguments if these macros are not defined in config. Closes [#31471](https://github.com/ClickHouse/ClickHouse/issues/31471). [#31488](https://github.com/ClickHouse/ClickHouse/pull/31488) ([tavplubix](https://github.com/tavplubix)). +* Better analysis for `min/max/count` projection. Now, with enabled `allow_experimental_projection_optimization`, virtual `min/max/count` projection can be used together with columns from partition key. [#31474](https://github.com/ClickHouse/ClickHouse/pull/31474) ([Amos Bird](https://github.com/amosbird)). +* Add `--pager` support for `clickhouse-local`. [#31457](https://github.com/ClickHouse/ClickHouse/pull/31457) ([Azat Khuzhin](https://github.com/azat)). +* Fix waiting of the editor during interactive query edition (`waitpid()` returns -1 on `SIGWINCH` and `EDITOR` and `clickhouse-local`/`clickhouse-client` works concurrently). [#31456](https://github.com/ClickHouse/ClickHouse/pull/31456) ([Azat Khuzhin](https://github.com/azat)). +* Throw an exception if there is some garbage after field in `JSONCompactStrings(EachRow)` format. [#31455](https://github.com/ClickHouse/ClickHouse/pull/31455) ([Kruglov Pavel](https://github.com/Avogar)). +* Default value of `http_send_timeout` and `http_receive_timeout` settings changed from 1800 (30 minutes) to 180 (3 minutes). [#31450](https://github.com/ClickHouse/ClickHouse/pull/31450) ([tavplubix](https://github.com/tavplubix)). +* `MaterializedMySQL` now handles `CREATE TABLE ... LIKE ...` DDL queries. [#31410](https://github.com/ClickHouse/ClickHouse/pull/31410) ([Stig Bakken](https://github.com/stigsb)). +* Return fake create query when executing `show create table` on system's tables. [#31391](https://github.com/ClickHouse/ClickHouse/pull/31391) ([SuperDJY](https://github.com/cmsxbc)). +* Previously progress was shown only for `numbers` table function, not for `numbers_mt`. Now for `numbers_mt` it is also shown. [#31318](https://github.com/ClickHouse/ClickHouse/pull/31318) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Initial user's roles are used now to find row policies, see [#31080](https://github.com/ClickHouse/ClickHouse/issues/31080). [#31262](https://github.com/ClickHouse/ClickHouse/pull/31262) ([Vitaly Baranov](https://github.com/vitlibar)). +* Optimize function `tupleElement` to reading of subcolumn with enabled setting `optimize_functions_to_subcolumns`. [#31261](https://github.com/ClickHouse/ClickHouse/pull/31261) ([Anton Popov](https://github.com/CurtizJ)). +* If some obsolete setting is changed show warning in `system.warnings`. [#31252](https://github.com/ClickHouse/ClickHouse/pull/31252) ([tavplubix](https://github.com/tavplubix)). +* Optimize function `mapContains` to reading of subcolumn `key` with enabled settings `optimize_functions_to_subcolumns`. [#31218](https://github.com/ClickHouse/ClickHouse/pull/31218) ([Anton Popov](https://github.com/CurtizJ)). +* Improved backoff for background cleanup tasks in `MergeTree`. Settings `merge_tree_clear_old_temporary_directories_interval_seconds` and `merge_tree_clear_old_parts_interval_seconds` moved from users settings to merge tree settings. [#31180](https://github.com/ClickHouse/ClickHouse/pull/31180) ([tavplubix](https://github.com/tavplubix)). +* Syntax changed so now backup engine should be set explicitly: `BACKUP ... TO Disk('backups', 'path\')`. Also changed the format of backup's metadata, now it's in XML. Backup of a whole database now works. [#31178](https://github.com/ClickHouse/ClickHouse/pull/31178) ([Vitaly Baranov](https://github.com/vitlibar)). +* Now every replica will send to client only incremental information about profile events counters. [#31155](https://github.com/ClickHouse/ClickHouse/pull/31155) ([Dmitry Novik](https://github.com/novikd)). +* Use DiskPtr instead of OS's file system API in class `IDiskRemote` in order to get more extendability. Closes [#31117](https://github.com/ClickHouse/ClickHouse/issues/31117). [#31136](https://github.com/ClickHouse/ClickHouse/pull/31136) ([Yangkuan Liu](https://github.com/LiuYangkuan)). +* Enable multiline editing in clickhouse-client by default. This addresses [#31121](https://github.com/ClickHouse/ClickHouse/issues/31121) . [#31123](https://github.com/ClickHouse/ClickHouse/pull/31123) ([Amos Bird](https://github.com/amosbird)). +* Function name normalization for `ALTER` queries. This helps avoid metadata mismatch between creating table with indices/projections and adding indices/projections via alter commands. This is a follow-up PR of https://github.com/ClickHouse/ClickHouse/pull/20174. Mark as improvements as there are no bug reports and the senario is somehow rare. [#31095](https://github.com/ClickHouse/ClickHouse/pull/31095) ([Amos Bird](https://github.com/amosbird)). +* Support `IF EXISTS` modifier for `RENAME DATABASE`/`TABLE`/`DICTIONARY` query. If this directive is used, one will not get an error if the DATABASE/TABLE/DICTIONARY to be renamed doesn't exist. [#31081](https://github.com/ClickHouse/ClickHouse/pull/31081) ([victorgao](https://github.com/kafka1991)). +* Cancel vertical merges when partition is dropped. This is a follow-up of https://github.com/ClickHouse/ClickHouse/pull/25684 and https://github.com/ClickHouse/ClickHouse/pull/30996. [#31057](https://github.com/ClickHouse/ClickHouse/pull/31057) ([Amos Bird](https://github.com/amosbird)). +* The local session inside a Clickhouse dictionary source won't send its events to the session log anymore. This fixes a possible deadlock (tsan alert) on shutdown. Also this PR fixes flaky `test_dictionaries_dependency_xml/`. [#31013](https://github.com/ClickHouse/ClickHouse/pull/31013) ([Vitaly Baranov](https://github.com/vitlibar)). +* Only grab AlterLock when we do alter command. [#31010](https://github.com/ClickHouse/ClickHouse/pull/31010) ([Amos Bird](https://github.com/amosbird)). +* Do not allow to drop a table or dictionary if some tables or dictionaries depend on it. [#30977](https://github.com/ClickHouse/ClickHouse/pull/30977) ([tavplubix](https://github.com/tavplubix)). +* Add settings `merge_tree_min_rows_for_concurrent_read_for_remote_filesystem` and `merge_tree_min_bytes_for_concurrent_read_for_remote_filesystem`. [#30970](https://github.com/ClickHouse/ClickHouse/pull/30970) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Reduce memory usage when reading with `s3` / `url` / `hdfs` formats `Parquet`, `ORC`, `Arrow` (controlled by setting `input_format_allow_seeks`, enabled by default). Also add setting `remote_read_min_bytes_for_seek` to control seeks. Closes [#10461](https://github.com/ClickHouse/ClickHouse/issues/10461). Closes [#16857](https://github.com/ClickHouse/ClickHouse/issues/16857). [#30936](https://github.com/ClickHouse/ClickHouse/pull/30936) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Support `INTERVAL` type in `STEP` clause for `WITH FILL` modifier. [#30927](https://github.com/ClickHouse/ClickHouse/pull/30927) ([Anton Popov](https://github.com/CurtizJ)). +* Fix `--verbose` option in clickhouse-local interactive mode and allow logging into file. [#30881](https://github.com/ClickHouse/ClickHouse/pull/30881) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Added `\l`, `\d`, `\c` aliases like in MySQL. [#30876](https://github.com/ClickHouse/ClickHouse/pull/30876) ([Pavel Medvedev](https://github.com/pmed)). +* For clickhouse-local or clickhouse-client: if there is `--interactive` option with `--query` or `--queries-file`, then first execute them like in non-interactive and then start interactive mode. [#30851](https://github.com/ClickHouse/ClickHouse/pull/30851) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix possible "The local set of parts of X doesn't look like the set of parts in ZooKeeper" error (if DROP fails during removing znodes from zookeeper). [#30826](https://github.com/ClickHouse/ClickHouse/pull/30826) ([Azat Khuzhin](https://github.com/azat)). +* Implement the commands BACKUP and RESTORE for the Log family. [#30688](https://github.com/ClickHouse/ClickHouse/pull/30688) ([Vitaly Baranov](https://github.com/vitlibar)). +* Avro format works against Kafka. Setting `output_format_avro_rows_in_file` added. [#30351](https://github.com/ClickHouse/ClickHouse/pull/30351) ([Ilya Golshtein](https://github.com/ilejn)). +* Refactor formats `TSV`, `TSVRaw`, `CSV` and `JSONCompactEachRow`, `JSONCompactStringsEachRow`, remove code duplication, add base interface for formats with `-WithNames` and `-WithNamesAndTypes` suffixes. Add formats `CSVWithNamesAndTypes`, `TSVRawWithNames`, `TSVRawWithNamesAndTypes`, `JSONCompactEachRowWIthNames`, `JSONCompactStringsEachRowWIthNames`, `RowBinaryWithNames`. Support parallel parsing for formats `TSVWithNamesAndTypes`, `TSVRaw(WithNames/WIthNamesAndTypes)`, `CSVWithNamesAndTypes`, `JSONCompactEachRow(WithNames/WIthNamesAndTypes)`, `JSONCompactStringsEachRow(WithNames/WIthNamesAndTypes)`. Support columns mapping and types checking for `RowBinaryWithNamesAndTypes` format. Add setting `input_format_with_types_use_header` which specify if we should check that types written in `WIthNamesAndTypes` format matches with table structure. Add setting `input_format_csv_empty_as_default` and use it in CSV format instead of `input_format_defaults_for_omitted_fields` (because this setting should not control `csv_empty_as_default`). Fix usage of setting `input_format_defaults_for_omitted_fields` (it was used only as `csv_empty_as_default`, but it should control calculation of default expressions for omitted fields). Fix Nullable input/output in `TSVRaw` format, make this format fully compatible with inserting into TSV. Fix inserting NULLs in `LowCardinality(Nullable)` when `input_format_null_as_default` is enabled (previously default values was inserted instead of actual NULLs). Fix strings deserialization in `JSONStringsEachRow`/`JSONCompactStringsEachRow` formats (strings were parsed just until first '\n' or '\t'). Add ability to use `Raw` escaping rule in Template input format. Add diagnostic info for JSONCompactEachRow(WithNames/WIthNamesAndTypes) input format. Fix bug with parallel parsing of `-WithNames` formats in case when setting `min_chunk_bytes_for_parallel_parsing` is less than bytes in a single row. [#30178](https://github.com/ClickHouse/ClickHouse/pull/30178) ([Kruglov Pavel](https://github.com/Avogar)). +* Add support for parallel reading from multiple files and support globs in `FROM INFILE` clause. [#30135](https://github.com/ClickHouse/ClickHouse/pull/30135) ([Filatenkov Artur](https://github.com/FArthur-cmd)). +* Make reading from HTTP retriable. Closes [#29696](https://github.com/ClickHouse/ClickHouse/issues/29696). [#29894](https://github.com/ClickHouse/ClickHouse/pull/29894) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Allow to specify one or any number of PostgreSQL schemas for one `MaterializedPostgreSQL` database. Closes [#28901](https://github.com/ClickHouse/ClickHouse/issues/28901). Closes [#29324](https://github.com/ClickHouse/ClickHouse/issues/29324). [#28933](https://github.com/ClickHouse/ClickHouse/pull/28933) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add support for `Identifier` table and database query parameters. Closes [#27226](https://github.com/ClickHouse/ClickHouse/issues/27226). [#28668](https://github.com/ClickHouse/ClickHouse/pull/28668) ([Nikolay Degterinsky](https://github.com/evillique)). +* Add optimizations for constant conditions in JOIN ON, ref [#26928](https://github.com/ClickHouse/ClickHouse/issues/26928). [#27021](https://github.com/ClickHouse/ClickHouse/pull/27021) ([Vladimir C](https://github.com/vdimir)). +* Closes [#12552](https://github.com/ClickHouse/ClickHouse/issues/12552). Allow versioning of aggregate function states. [#24820](https://github.com/ClickHouse/ClickHouse/pull/24820) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Skipping mutations of different partitions in `StorageMergeTree`. [#21326](https://github.com/ClickHouse/ClickHouse/pull/21326) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Enable clang `-fstrict-vtable-pointers`, `-fwhole-program-vtables` compile options. [#20151](https://github.com/ClickHouse/ClickHouse/pull/20151) ([Maksim Kita](https://github.com/kitaisreal)). + + +#### Bug Fixes + +* Fix bug when remove unneeded columns in subquery. If there is an aggregation function in query without group by, do not remove if it is unneeded. [#32289](https://github.com/ClickHouse/ClickHouse/pull/32289) ([dongyifeng](https://github.com/dyf6372)). +* Fix skipping columns while writing protobuf. This PR fixes [#31160](https://github.com/ClickHouse/ClickHouse/issues/31160), see the comment [#31160](https://github.com/ClickHouse/ClickHouse/issues/31160)#issuecomment-980595318. [#31988](https://github.com/ClickHouse/ClickHouse/pull/31988) ([Vitaly Baranov](https://github.com/vitlibar)). +* Quota limit was not reached, but the limit was exceeded. This PR fixes [#31174](https://github.com/ClickHouse/ClickHouse/issues/31174). [#31337](https://github.com/ClickHouse/ClickHouse/pull/31337) ([sunny](https://github.com/sunny19930321)). +* Fix SHOW GRANTS when partial revokes are used. This PR fixes [#31138](https://github.com/ClickHouse/ClickHouse/issues/31138). [#31249](https://github.com/ClickHouse/ClickHouse/pull/31249) ([Vitaly Baranov](https://github.com/vitlibar)). +* Memory amount was incorrectly estimated when ClickHouse is run in containers with cgroup limits. [#31157](https://github.com/ClickHouse/ClickHouse/pull/31157) ([Pavel Medvedev](https://github.com/pmed)). +* Fixed the behavior when mutations that have nothing to do are stuck (with enabled setting `empty_result_for_aggregation_by_empty_set`). [#32358](https://github.com/ClickHouse/ClickHouse/pull/32358) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix `ALTER ... MATERIALIZE COLUMN ...` queries in case when data type of default expression is not equal to the data type of column. [#32348](https://github.com/ClickHouse/ClickHouse/pull/32348) ([Anton Popov](https://github.com/CurtizJ)). +* Fixed crash with SIGFPE in aggregate function `avgWeighted` with `Decimal` argument. Fixes [#32053](https://github.com/ClickHouse/ClickHouse/issues/32053). [#32303](https://github.com/ClickHouse/ClickHouse/pull/32303) ([tavplubix](https://github.com/tavplubix)). +* Server might fail to start with `Cannot attach 1 tables due to cyclic dependencies` error if `Dictionary` table looks at XML-dictionary with the same name, it's fixed. Fixes [#31315](https://github.com/ClickHouse/ClickHouse/issues/31315). [#32288](https://github.com/ClickHouse/ClickHouse/pull/32288) ([tavplubix](https://github.com/tavplubix)). +* Fix window view parser. [#32232](https://github.com/ClickHouse/ClickHouse/pull/32232) ([vxider](https://github.com/Vxider)). +* Fix parsing error while NaN deserializing for `Nullable(Float)` for `Quoted` escaping rule. [#32190](https://github.com/ClickHouse/ClickHouse/pull/32190) ([Kruglov Pavel](https://github.com/Avogar)). +* XML dictionaries identifiers, used in table create query, can be qualified to `default_database` during upgrade to newer version. Closes [#31963](https://github.com/ClickHouse/ClickHouse/issues/31963). [#32187](https://github.com/ClickHouse/ClickHouse/pull/32187) ([Maksim Kita](https://github.com/kitaisreal)). +* Number of active replicas might be determined incorrectly when inserting with quorum if setting `replicated_can_become_leader` is disabled on some replicas. It's fixed. [#32157](https://github.com/ClickHouse/ClickHouse/pull/32157) ([tavplubix](https://github.com/tavplubix)). +* Dictionaries fix cases when `{condition}` does not work for custom database queries. [#32117](https://github.com/ClickHouse/ClickHouse/pull/32117) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix `CAST` from `Nullable` with `cast_keep_nullable` (`PARAMETER_OUT_OF_BOUND` error before for i.e. `toUInt32OrDefault(toNullable(toUInt32(1)))`). [#32080](https://github.com/ClickHouse/ClickHouse/pull/32080) ([Azat Khuzhin](https://github.com/azat)). +* Fix CREATE TABLE of Join Storage with multiply settings contains persistency. Close [#31680](https://github.com/ClickHouse/ClickHouse/issues/31680). [#32066](https://github.com/ClickHouse/ClickHouse/pull/32066) ([SuperDJY](https://github.com/cmsxbc)). +* Fixed `Directory ... already exists and is not empty` error when detaching part. [#32063](https://github.com/ClickHouse/ClickHouse/pull/32063) ([tavplubix](https://github.com/tavplubix)). +* `MaterializedMySQL`: Fix rare corruption of `DECIMAL` data. [#31990](https://github.com/ClickHouse/ClickHouse/pull/31990) ([Håvard Kvålen](https://github.com/havardk)). +* Fix `FileLog` engine unnesessary create meta data directory when create table failed. Fix [#31962](https://github.com/ClickHouse/ClickHouse/issues/31962). [#31967](https://github.com/ClickHouse/ClickHouse/pull/31967) ([flynn](https://github.com/ucasfl)). +* Some `GET_PART` entry might hang in replication queue if part is lost on all replicas and there are no other parts in the same partition. It's fixed in cases when partition key contains only columns of integer types or `Date[Time]`. Fixes [#31485](https://github.com/ClickHouse/ClickHouse/issues/31485). [#31887](https://github.com/ClickHouse/ClickHouse/pull/31887) ([tavplubix](https://github.com/tavplubix)). +* Fix functions `empty` and `notEmpty` with arguments of `UUID` type. Fixes [#31819](https://github.com/ClickHouse/ClickHouse/issues/31819). [#31883](https://github.com/ClickHouse/ClickHouse/pull/31883) ([Anton Popov](https://github.com/CurtizJ)). +* Change configuration path from `keeper_server.session_timeout_ms` to `keeper_server.coordination_settings.session_timeout_ms` when constructing a `KeeperTCPHandler`. Same with `operation_timeout`. [#31859](https://github.com/ClickHouse/ClickHouse/pull/31859) ([JackyWoo](https://github.com/JackyWoo)). +* Fix a bug about function transform with decimal args. [#31839](https://github.com/ClickHouse/ClickHouse/pull/31839) ([李帅](https://github.com/loneylee)). +* Fix reading from `MergeTree` tables with enabled `use_uncompressed_cache`. [#31826](https://github.com/ClickHouse/ClickHouse/pull/31826) ([Anton Popov](https://github.com/CurtizJ)). +* Fix invalid cast of nullable type when nullable primary key is used. This fixes [#31075](https://github.com/ClickHouse/ClickHouse/issues/31075). [#31823](https://github.com/ClickHouse/ClickHouse/pull/31823) ([Amos Bird](https://github.com/amosbird)). +* Fix recursive user defined functions crash. Closes [#30856](https://github.com/ClickHouse/ClickHouse/issues/30856). [#31820](https://github.com/ClickHouse/ClickHouse/pull/31820) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix possible assertion `../src/IO/ReadBuffer.h:58: bool DB::ReadBuffer::next(): Assertion '!hasPendingData()' failed.` in TSKV format. [#31804](https://github.com/ClickHouse/ClickHouse/pull/31804) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix crash when function `dictGet` with type is used for dictionary attribute when type is `Nullable`. Fixes [#30980](https://github.com/ClickHouse/ClickHouse/issues/30980). [#31800](https://github.com/ClickHouse/ClickHouse/pull/31800) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix crash with empty result on odbc query. Closes [#31465](https://github.com/ClickHouse/ClickHouse/issues/31465). [#31766](https://github.com/ClickHouse/ClickHouse/pull/31766) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix usage of `Buffer` table engine with type `Map`. Fixes [#30546](https://github.com/ClickHouse/ClickHouse/issues/30546). [#31742](https://github.com/ClickHouse/ClickHouse/pull/31742) ([Anton Popov](https://github.com/CurtizJ)). +* Fix group by / order by / limit by aliases with positional arguments enabled. Closes [#31173](https://github.com/ClickHouse/ClickHouse/issues/31173). [#31741](https://github.com/ClickHouse/ClickHouse/pull/31741) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix disabling query profiler (In case of `query_profiler_real_time_period_ns>0`/`query_profiler_cpu_time_period_ns>0` query profiler can stayed enabled even after query finished). [#31740](https://github.com/ClickHouse/ClickHouse/pull/31740) ([Azat Khuzhin](https://github.com/azat)). +* Fixed rare segfault on concurrent `ATTACH PARTITION` queries. [#31738](https://github.com/ClickHouse/ClickHouse/pull/31738) ([tavplubix](https://github.com/tavplubix)). +* Fix race in JSONEachRowWithProgress output format when data and lines with progress are mixed in output. [#31736](https://github.com/ClickHouse/ClickHouse/pull/31736) ([Kruglov Pavel](https://github.com/Avogar)). +* Fixed `there are no such cluster here` error on execution of `ON CLUSTER` query if specified cluster name is name of `Replicated` database. [#31723](https://github.com/ClickHouse/ClickHouse/pull/31723) ([tavplubix](https://github.com/tavplubix)). +* Fix exception on some of the applications of `decrypt` function on Nullable columns. This closes [#31662](https://github.com/ClickHouse/ClickHouse/issues/31662). This closes [#31426](https://github.com/ClickHouse/ClickHouse/issues/31426). [#31707](https://github.com/ClickHouse/ClickHouse/pull/31707) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* * Fixed function ngrams when string contains utf8 characters. [#31706](https://github.com/ClickHouse/ClickHouse/pull/31706) ([yandd](https://github.com/yandd)). +* Settings `input_format_allow_errors_num` and `input_format_allow_errors_ratio` did not work for parsing of domain types, such as `IPv4`, it's fixed. Fixes [#31686](https://github.com/ClickHouse/ClickHouse/issues/31686). [#31697](https://github.com/ClickHouse/ClickHouse/pull/31697) ([tavplubix](https://github.com/tavplubix)). +* Fixed null pointer exception in `MATERIALIZE COLUMN`. [#31679](https://github.com/ClickHouse/ClickHouse/pull/31679) ([Vladimir Chebotarev](https://github.com/excitoon)). +* `RENAME TABLE` query worked incorrectly on attempt to rename an DDL dictionary in `Ordinary` database, it's fixed. [#31638](https://github.com/ClickHouse/ClickHouse/pull/31638) ([tavplubix](https://github.com/tavplubix)). +* Fix sparkbars are not aligned, see: [#26175](https://github.com/ClickHouse/ClickHouse/issues/26175)#issuecomment-960353867, [comment](https://github.com/ClickHouse/ClickHouse/issues/26175#issuecomment-961155065). [#31624](https://github.com/ClickHouse/ClickHouse/pull/31624) ([小路](https://github.com/nicelulu)). +* All non-x86 builds were broken, because we don't have tests for them. This closes [#31417](https://github.com/ClickHouse/ClickHouse/issues/31417). This closes [#31524](https://github.com/ClickHouse/ClickHouse/issues/31524). [#31574](https://github.com/ClickHouse/ClickHouse/pull/31574) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix invalid generated JSON when only column names contain invalid UTF-8 sequences. [#31534](https://github.com/ClickHouse/ClickHouse/pull/31534) ([Kevin Michel](https://github.com/kmichel-aiven)). +* Disable `partial_merge_join_left_table_buffer_bytes` before bug in this optimization is fixed. See [#31009](https://github.com/ClickHouse/ClickHouse/issues/31009)). Remove redundant option `partial_merge_join_optimizations`. [#31528](https://github.com/ClickHouse/ClickHouse/pull/31528) ([Vladimir C](https://github.com/vdimir)). +* Fix progress for short INSERT SELECT queries. [#31510](https://github.com/ClickHouse/ClickHouse/pull/31510) ([Azat Khuzhin](https://github.com/azat)). +* Fix bug with group by and positional arguments. Closes [#31280](https://github.com/ClickHouse/ClickHouse/issues/31280)#issuecomment-968696186. [#31420](https://github.com/ClickHouse/ClickHouse/pull/31420) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Resolve `nullptr` in STS credentials provider for S3. [#31409](https://github.com/ClickHouse/ClickHouse/pull/31409) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Remove not like function into RPNElement. [#31169](https://github.com/ClickHouse/ClickHouse/pull/31169) ([sundyli](https://github.com/sundy-li)). +* Fix bug in Keeper which can lead to inability to start when some coordination logs was lost and we have more fresh snapshot than our latest log. [#31150](https://github.com/ClickHouse/ClickHouse/pull/31150) ([alesapin](https://github.com/alesapin)). +* Rewrite right distributed table in local join. solves [#25809](https://github.com/ClickHouse/ClickHouse/issues/25809). [#31105](https://github.com/ClickHouse/ClickHouse/pull/31105) ([abel-cheng](https://github.com/abel-cheng)). +* Fix StorageMerge with aliases and where (it did not work before at all). Closes [#28802](https://github.com/ClickHouse/ClickHouse/issues/28802). [#31044](https://github.com/ClickHouse/ClickHouse/pull/31044) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix abort in debug server and `DB::Exception: std::out_of_range: basic_string` error in release server in case of bad hdfs url by adding additional check of hdfs url structure. [#31042](https://github.com/ClickHouse/ClickHouse/pull/31042) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix possible assert in `hdfs` table function/engine, add test. [#31036](https://github.com/ClickHouse/ClickHouse/pull/31036) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix JSONValue/Query with quoted identifiers. This allows to have spaces in json path. Closes [#30971](https://github.com/ClickHouse/ClickHouse/issues/30971). [#31003](https://github.com/ClickHouse/ClickHouse/pull/31003) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Using `formatRow` function with not row formats led to segfault. Don't allow to use this function with such formats (because it doesn't make sense). [#31001](https://github.com/ClickHouse/ClickHouse/pull/31001) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix bug which broke select queries if they happened after dropping materialized view. Found in [#30691](https://github.com/ClickHouse/ClickHouse/issues/30691). [#30997](https://github.com/ClickHouse/ClickHouse/pull/30997) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Skip max_partition_size_to_drop check in case of ATTACH PARTITION ... FROM and MOVE PARTITION ... [#30995](https://github.com/ClickHouse/ClickHouse/pull/30995) ([Amr Alaa](https://github.com/amralaa-MSFT)). +* Fix some corner cases with intersect/except. Closes [#30803](https://github.com/ClickHouse/ClickHouse/issues/30803). [#30965](https://github.com/ClickHouse/ClickHouse/pull/30965) ([Kseniia Sumarokova](https://github.com/kssenii)). + +#### Build/Testing/Packaging Improvement + +* Fix broken symlink for sysroot/linux-riscv64/usr/lib. [#32071](https://github.com/ClickHouse/ClickHouse/pull/32071) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Build rpm and tgz packages in master and release branches workfolw. [#32048](https://github.com/ClickHouse/ClickHouse/pull/32048) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Adjust artifactory pusher to a new bucket paths. Use only version or pull request number in bucket, no `0`. Create a function to read github event data. [#31952](https://github.com/ClickHouse/ClickHouse/pull/31952) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Make ClickHouse build fully reproducible (byte identical on different machines). This closes [#22113](https://github.com/ClickHouse/ClickHouse/issues/22113). [#31899](https://github.com/ClickHouse/ClickHouse/pull/31899) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Remove filesystem path to the build directory from binaries to enable reproducible builds. This needed for [#22113](https://github.com/ClickHouse/ClickHouse/issues/22113). [#31838](https://github.com/ClickHouse/ClickHouse/pull/31838) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Revert changes from [#28016](https://github.com/ClickHouse/ClickHouse/issues/28016): archive.ubuntu.com should be faster in general than RU mirror. [#31822](https://github.com/ClickHouse/ClickHouse/pull/31822) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* More correct setting up capabilities inside Docker. [#31802](https://github.com/ClickHouse/ClickHouse/pull/31802) ([Constantine Peresypkin](https://github.com/pkit)). +* Replaced default ports for clickhouse-keeper internal communication from 44444 to 9234. Fixes [#30879](https://github.com/ClickHouse/ClickHouse/issues/30879). [#31799](https://github.com/ClickHouse/ClickHouse/pull/31799) ([alesapin](https://github.com/alesapin)). +* The script for uploading packages to the artifactory is added. [#31748](https://github.com/ClickHouse/ClickHouse/pull/31748) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Avoid downloading toolchain tarballs for cross-compiling for FreeBSD. [#31672](https://github.com/ClickHouse/ClickHouse/pull/31672) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Remove hardcoded repository name from CI scripts. [#31536](https://github.com/ClickHouse/ClickHouse/pull/31536) ([Constantine Peresypkin](https://github.com/pkit)). +* Initial support for risc-v. See development/build-cross-riscv for quirks and build command that was tested. [#31309](https://github.com/ClickHouse/ClickHouse/pull/31309) ([Vladimir Smirnov](https://github.com/Civil)). +* Drop support for using Ordinary databases with `MaterializedMySQL`. [#31292](https://github.com/ClickHouse/ClickHouse/pull/31292) ([Stig Bakken](https://github.com/stigsb)). +* Fix build snappy error in [#30790](https://github.com/ClickHouse/ClickHouse/issues/30790) Update of contrib/snappy is in https://github.com/google/snappy/pull/145/files. [#30796](https://github.com/ClickHouse/ClickHouse/pull/30796) ([李扬](https://github.com/taiyang-li)). +* Use our own CMakeLists for `zlib-ng`, `cassandra`, `mariadb-connector-c` and `xz`, `re2`, `sentry`, `gsasl`, `arrow`, `protobuf`. This is needed for [#20151](https://github.com/ClickHouse/ClickHouse/issues/20151). Part of [#9226](https://github.com/ClickHouse/ClickHouse/issues/9226). A small step towards removal of annoying trash from the build system. [#30599](https://github.com/ClickHouse/ClickHouse/pull/30599) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Hermetic builds: use fixed version of libc and make sure that no source or binary files from the host OS are using during build. This closes [#27133](https://github.com/ClickHouse/ClickHouse/issues/27133). This closes [#21435](https://github.com/ClickHouse/ClickHouse/issues/21435). This closes [#30462](https://github.com/ClickHouse/ClickHouse/issues/30462). [#30011](https://github.com/ClickHouse/ClickHouse/pull/30011) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* support compile in arm machine with parameter "-DENABLE_TESTS=OFF". [#31007](https://github.com/ClickHouse/ClickHouse/pull/31007) ([zhanghuajie](https://github.com/zhanghuajieHIT)). + + ### ClickHouse release v21.11, 2021-11-09 #### Backward Incompatible Change From b74af1af5d3c66330369a0404f335ce8bacb9c3a Mon Sep 17 00:00:00 2001 From: Vxider Date: Fri, 10 Dec 2021 00:51:31 +0800 Subject: [PATCH 202/262] fix window view docs --- docs/en/sql-reference/statements/create/view.md | 2 +- .../sql-reference/functions/window-view-functions.md | 2 +- docs/zh/sql-reference/statements/create/view.md | 12 ++++++------ 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md index 464de02eac6..1e2e10b5cb6 100644 --- a/docs/en/sql-reference/statements/create/view.md +++ b/docs/en/sql-reference/statements/create/view.md @@ -296,7 +296,7 @@ CREATE WINDOW VIEW test.wv TO test.dst WATERMARK=ASCENDING ALLOWED_LATENESS=INTE Note that elements emitted by a late firing should be treated as updated results of a previous computation. Instead of firing at the end of windows, the window view will fire immediately when the late event arrives. Thus, it will result in multiple outputs for the same window. Users need to take these duplicated results into account or deduplicate them. -### Monitoring New Windows{#window-view-monitoring} +### Monitoring New Windows {#window-view-monitoring} Window view supports the `WATCH` query to constantly append the processing results to the console or use `TO` syntax to output the results to a table. diff --git a/docs/zh/sql-reference/functions/window-view-functions.md b/docs/zh/sql-reference/functions/window-view-functions.md index a8afac9a85d..b203fc41206 100644 --- a/docs/zh/sql-reference/functions/window-view-functions.md +++ b/docs/zh/sql-reference/functions/window-view-functions.md @@ -3,7 +3,7 @@ toc_priority: 68 toc_title: Window View --- -# Window View 函数{#window-view-han-shu} +# Window View 函数 {#window-view-han-shu} Window view函数用于获取窗口的起始(包含边界)和结束时间(不包含边界)。系统支持的window view函数如下: diff --git a/docs/zh/sql-reference/statements/create/view.md b/docs/zh/sql-reference/statements/create/view.md index ed64b578150..967db792038 100644 --- a/docs/zh/sql-reference/statements/create/view.md +++ b/docs/zh/sql-reference/statements/create/view.md @@ -257,11 +257,11 @@ Window view可以通过时间窗口聚合数据,并在满足窗口触发条件 创建window view的方式和创建物化视图类似。Window view使用默认为`AggregatingMergeTree`的内部存储引擎存储计算中间状态。 -### Window View 函数{#window-view-han-shu} +### Window View 函数 {#window-view-han-shu} [Window view函数](../../functions/window-view-functions.md)用于获取窗口的起始和结束时间。Window view需要和window view函数配合使用。 -### 时间属性{#window-view-shi-jian-shu-xing} +### 时间属性 {#window-view-shi-jian-shu-xing} Window view 支持**处理时间**和**事件时间**两种时间类型。 @@ -295,7 +295,7 @@ CREATE WINDOW VIEW test.wv TO test.dst WATERMARK=ASCENDING ALLOWED_LATENESS=INTE 需要注意的是,迟到消息需要更新之前的处理结果。与在窗口结束时触发不同,迟到消息到达时window view会立即触发计算。因此,会导致同一个窗口输出多次计算结果。用户需要注意这种情况,并消除重复结果。 -### 新窗口监控{#window-view-xin-chuang-kou-jian-kong} +### 新窗口监控 {#window-view-xin-chuang-kou-jian-kong} Window view可以通过`WATCH`语句将处理结果推送至终端,或通过`TO`语句将结果推送至数据表。 @@ -305,12 +305,12 @@ WATCH [db.]name [LIMIT n] `WATCH`语句和`LIVE VIEW`中的类似。支持设置`LIMIT`参数,输出消息数目达到`LIMIT`限制时结束查询。 -### 设置{#window-view-she-zhi} +### 设置 {#window-view-she-zhi} - `window_view_clean_interval`: window view清除过期数据间隔(单位为秒)。系统会定期清除过期数据,尚未触发的窗口数据不会被清除。 - `window_view_heartbeat_interval`: 用于判断watch查询活跃的心跳时间间隔。 -### 示例{#window-view-shi-li} +### 示例 {#window-view-shi-li} 假设我们需要每10秒统计一次`data`表中的点击日志,且`data`表的结构如下: @@ -352,7 +352,7 @@ CREATE WINDOW VIEW wv TO dst AS SELECT count(id), tumbleStart(w_id) as window_st ClickHouse测试中提供了更多的示例(以`*window_view*`命名)。 -### Window View 使用场景{#window-view-shi-yong-chang-jing} +### Window View 使用场景 {#window-view-shi-yong-chang-jing} Window view 在以下场景有用: From f0362d83016862e6b1c3ab27e381d12d8ef1b9a1 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Thu, 9 Dec 2021 20:39:18 +0300 Subject: [PATCH 203/262] Update odbc-bridge.md --- docs/en/operations/utilities/odbc-bridge.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/utilities/odbc-bridge.md b/docs/en/operations/utilities/odbc-bridge.md index 70b413c9c1f..e5967085c49 100644 --- a/docs/en/operations/utilities/odbc-bridge.md +++ b/docs/en/operations/utilities/odbc-bridge.md @@ -26,7 +26,7 @@ Query is send in post body. Response is returned in RowBinary format. ```bash $ clickhouse-odbc-bridge --http-port 9018 --daemon -$ curl -d "query=SELECT PageID, ImpID, AdType FROM Keys ORDER BY PageID, ImpID" --data-urlencode "connection_string=DSN=ClickHouse;DATABASE=stat" --data-urlencode "columns=columns format version: 1 +$ curl -d "query=SELECT PageID, ImpID, AdType FROM Keys ORDER BY PageID, ImpID" --data-urlencode "connection_string=DSN=ClickHouse;DATABASE=stat" --data-urlencode "sample_block=columns format version: 1 3 columns: \`PageID\` String \`ImpID\` String From c155de58193deecdd3ac3496a58279b099e17f7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 9 Dec 2021 19:00:08 +0100 Subject: [PATCH 204/262] 02122_parallel_formatting: Address grep binary warnings --- tests/queries/0_stateless/02122_parallel_formatting.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/02122_parallel_formatting.sh b/tests/queries/0_stateless/02122_parallel_formatting.sh index 8061cbe58b2..f0c24344329 100755 --- a/tests/queries/0_stateless/02122_parallel_formatting.sh +++ b/tests/queries/0_stateless/02122_parallel_formatting.sh @@ -11,14 +11,14 @@ formats="RowBinary RowBinaryWithNames RowBinaryWithNamesAndTypes XML Markdown Ve for format in ${formats}; do echo $format-1 - $CLICKHOUSE_CLIENT -q "select number, number + 1, concat('string: ', toString(number)) from numbers(200000) format $format" --output_format_parallel_formatting=0 --output_format_pretty_max_rows=1000000 | grep -v "elapsed" > $non_parallel_file - $CLICKHOUSE_CLIENT -q "select number, number + 1, concat('string: ', toString(number)) from numbers(200000) format $format" --output_format_parallel_formatting=1 --output_format_pretty_max_rows=1000000 | grep -v "elapsed" > $parallel_file + $CLICKHOUSE_CLIENT -q "select number, number + 1, concat('string: ', toString(number)) from numbers(200000) format $format" --output_format_parallel_formatting=0 --output_format_pretty_max_rows=1000000 | grep -a -v "elapsed" > $non_parallel_file + $CLICKHOUSE_CLIENT -q "select number, number + 1, concat('string: ', toString(number)) from numbers(200000) format $format" --output_format_parallel_formatting=1 --output_format_pretty_max_rows=1000000 | grep -a -v "elapsed" > $parallel_file diff $non_parallel_file $parallel_file echo $format-2 - $CLICKHOUSE_CLIENT -q "select number, number + 1, concat('string: ', toString(number)) from numbers(200000) group by number with totals limit 190000 format $format" --extremes=1 --output_format_parallel_formatting=0 --output_format_pretty_max_rows=1000000 | grep -v "elapsed" > $non_parallel_file - $CLICKHOUSE_CLIENT -q "select number, number + 1, concat('string: ', toString(number)) from numbers(200000) group by number with totals limit 190000 format $format" --extremes=1 --output_format_parallel_formatting=1 --output_format_pretty_max_rows=1000000 | grep -v "elapsed" > $parallel_file + $CLICKHOUSE_CLIENT -q "select number, number + 1, concat('string: ', toString(number)) from numbers(200000) group by number with totals limit 190000 format $format" --extremes=1 --output_format_parallel_formatting=0 --output_format_pretty_max_rows=1000000 | grep -a -v "elapsed" > $non_parallel_file + $CLICKHOUSE_CLIENT -q "select number, number + 1, concat('string: ', toString(number)) from numbers(200000) group by number with totals limit 190000 format $format" --extremes=1 --output_format_parallel_formatting=1 --output_format_pretty_max_rows=1000000 | grep -a -v "elapsed" > $parallel_file diff $non_parallel_file $parallel_file done From ebdcf7e38efef759eb744c24235d6d1d265d4d6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 9 Dec 2021 19:26:29 +0100 Subject: [PATCH 205/262] 01950_kill_large_group_by_query: Increase timeout --- .../0_stateless/01950_kill_large_group_by_query.sh | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/01950_kill_large_group_by_query.sh b/tests/queries/0_stateless/01950_kill_large_group_by_query.sh index 0b369c7257e..aba9d2d2467 100755 --- a/tests/queries/0_stateless/01950_kill_large_group_by_query.sh +++ b/tests/queries/0_stateless/01950_kill_large_group_by_query.sh @@ -12,9 +12,11 @@ function wait_for_query_to_start() } +MAX_TIMEOUT=30 + # TCP CLIENT -$CLICKHOUSE_CLIENT --max_execution_time 10 --query_id "test_01948_tcp_$CLICKHOUSE_DATABASE" -q \ +$CLICKHOUSE_CLIENT --max_execution_time $MAX_TIMEOUT --query_id "test_01948_tcp_$CLICKHOUSE_DATABASE" -q \ "SELECT * FROM ( SELECT a.name as n @@ -30,12 +32,12 @@ $CLICKHOUSE_CLIENT --max_execution_time 10 --query_id "test_01948_tcp_$CLICKHOUS LIMIT 20 FORMAT Null" > /dev/null 2>&1 & wait_for_query_to_start "test_01948_tcp_$CLICKHOUSE_DATABASE" -$CLICKHOUSE_CLIENT --max_execution_time 10 -q "KILL QUERY WHERE query_id = 'test_01948_tcp_$CLICKHOUSE_DATABASE' SYNC" +$CLICKHOUSE_CLIENT --max_execution_time $MAX_TIMEOUT -q "KILL QUERY WHERE query_id = 'test_01948_tcp_$CLICKHOUSE_DATABASE' SYNC" # HTTP CLIENT -${CLICKHOUSE_CURL_COMMAND} -q --max-time 10 -sS "$CLICKHOUSE_URL&query_id=test_01948_http_$CLICKHOUSE_DATABASE" -d \ +${CLICKHOUSE_CURL_COMMAND} -q --max-time $MAX_TIMEOUT -sS "$CLICKHOUSE_URL&query_id=test_01948_http_$CLICKHOUSE_DATABASE" -d \ "SELECT * FROM ( SELECT a.name as n @@ -51,4 +53,4 @@ ${CLICKHOUSE_CURL_COMMAND} -q --max-time 10 -sS "$CLICKHOUSE_URL&query_id=test_0 LIMIT 20 FORMAT Null" > /dev/null 2>&1 & wait_for_query_to_start "test_01948_http_$CLICKHOUSE_DATABASE" -$CLICKHOUSE_CURL --max-time 10 -sS "$CLICKHOUSE_URL" -d "KILL QUERY WHERE query_id = 'test_01948_http_$CLICKHOUSE_DATABASE' SYNC" +$CLICKHOUSE_CURL --max-time $MAX_TIMEOUT -sS "$CLICKHOUSE_URL" -d "KILL QUERY WHERE query_id = 'test_01948_http_$CLICKHOUSE_DATABASE' SYNC" From eee3fe6639b553b781ad8df77555a3270f920ac9 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 9 Dec 2021 23:17:19 +0300 Subject: [PATCH 206/262] Fix two unused builds --- .github/workflows/backport_branches.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml index 98a33927667..b61b74f86d3 100644 --- a/.github/workflows/backport_branches.yml +++ b/.github/workflows/backport_branches.yml @@ -207,8 +207,6 @@ jobs: - BuilderDebRelease - BuilderDebAsan - BuilderDebTsan - - BuilderDebUBsan - - BuilderDebMsan - BuilderDebDebug runs-on: [self-hosted, style-checker] steps: From 80dcaae0c079483d57925c8b0a654da1ca82075b Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 9 Dec 2021 23:18:13 +0300 Subject: [PATCH 207/262] Fix one more time --- .github/workflows/backport_branches.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml index b61b74f86d3..859756f07af 100644 --- a/.github/workflows/backport_branches.yml +++ b/.github/workflows/backport_branches.yml @@ -331,7 +331,7 @@ jobs: ############################# INTEGRATION TESTS ############################################# ############################################################################################# IntegrationTestsRelease: - needs: [BuilderDebRelease, FunctionalStatelessTestRelease] + needs: [BuilderDebRelease] runs-on: [self-hosted, stress-tester] steps: - name: Download json reports From f2dfa89bc9834e9e78fae290993eebd7078d2cbe Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Thu, 9 Dec 2021 23:58:46 +0300 Subject: [PATCH 208/262] Update docker_compose_mongo.yml --- docker/test/integration/runner/compose/docker_compose_mongo.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/integration/runner/compose/docker_compose_mongo.yml b/docker/test/integration/runner/compose/docker_compose_mongo.yml index a342a0f55e5..732734abcbe 100644 --- a/docker/test/integration/runner/compose/docker_compose_mongo.yml +++ b/docker/test/integration/runner/compose/docker_compose_mongo.yml @@ -1,7 +1,7 @@ version: '2.3' services: mongo1: - image: mongo:latest + image: mongo:5.0 restart: always environment: MONGO_INITDB_ROOT_USERNAME: root From 85e53b1b1fd0a407263e1317542ad5fe26398a6f Mon Sep 17 00:00:00 2001 From: tavplubix Date: Fri, 10 Dec 2021 00:12:45 +0300 Subject: [PATCH 209/262] Try fix attaching gdb in tests (#32448) * attach gdb with sudo * fix * Update run.sh --- docker/test/fuzzer/run-fuzzer.sh | 2 +- docker/test/performance-comparison/compare.sh | 2 +- docker/test/stress/run.sh | 3 ++- tests/ci/ast_fuzzer_check.py | 2 +- tests/ci/stress_check.py | 2 +- 5 files changed, 6 insertions(+), 5 deletions(-) diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh index 75d188e3190..764fa9a0f76 100755 --- a/docker/test/fuzzer/run-fuzzer.sh +++ b/docker/test/fuzzer/run-fuzzer.sh @@ -165,7 +165,7 @@ thread apply all backtrace continue " > script.gdb - gdb -batch -command script.gdb -p $server_pid & + sudo gdb -batch -command script.gdb -p $server_pid & # Check connectivity after we attach gdb, because it might cause the server # to freeze and the fuzzer will fail. diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index c32b50a3cbe..02d881347af 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -291,7 +291,7 @@ function get_profiles_watchdog for pid in $(pgrep -f clickhouse) do - gdb -p "$pid" --batch --ex "info proc all" --ex "thread apply all bt" --ex quit &> "$pid.gdb.log" & + sudo gdb -p "$pid" --batch --ex "info proc all" --ex "thread apply all bt" --ex quit &> "$pid.gdb.log" & done wait diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 6d720d02cdc..2ed4050d514 100755 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -1,6 +1,7 @@ #!/bin/bash # shellcheck disable=SC2094 # shellcheck disable=SC2086 +# shellcheck disable=SC2024 set -x @@ -142,7 +143,7 @@ quit # FIXME Hung check may work incorrectly because of attached gdb # 1. False positives are possible # 2. We cannot attach another gdb to get stacktraces if some queries hung - gdb -batch -command script.gdb -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" >> /test_output/gdb.log & + sudo gdb -batch -command script.gdb -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" >> /test_output/gdb.log & } configure diff --git a/tests/ci/ast_fuzzer_check.py b/tests/ci/ast_fuzzer_check.py index 656e9fdbe50..bbf822c3879 100644 --- a/tests/ci/ast_fuzzer_check.py +++ b/tests/ci/ast_fuzzer_check.py @@ -21,7 +21,7 @@ IMAGE_NAME = 'clickhouse/fuzzer' def get_run_command(pr_number, sha, download_url, workspace_path, image): return f'docker run --network=host --volume={workspace_path}:/workspace ' \ - '--cap-add syslog --cap-add sys_admin ' \ + '--cap-add syslog --cap-add sys_admin --cap-add=SYS_PTRACE ' \ f'-e PR_TO_TEST={pr_number} -e SHA_TO_TEST={sha} -e BINARY_URL_TO_DOWNLOAD="{download_url}" '\ f'{image}' diff --git a/tests/ci/stress_check.py b/tests/ci/stress_check.py index 7ec1a978cb9..911d370a594 100644 --- a/tests/ci/stress_check.py +++ b/tests/ci/stress_check.py @@ -22,7 +22,7 @@ from tee_popen import TeePopen def get_run_command(build_path, result_folder, server_log_folder, image): - cmd = "docker run -e S3_URL='https://clickhouse-datasets.s3.amazonaws.com' " + \ + cmd = "docker run --cap-add=SYS_PTRACE -e S3_URL='https://clickhouse-datasets.s3.amazonaws.com' " + \ f"--volume={build_path}:/package_folder " \ f"--volume={result_folder}:/test_output " \ f"--volume={server_log_folder}:/var/log/clickhouse-server {image}" From db83d82f5a301ece3e401d2972fe80d9d07b0907 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 10 Dec 2021 10:54:37 +0300 Subject: [PATCH 210/262] Fix integration tests path --- tests/ci/docker_pull_helper.py | 5 +++++ tests/ci/integration_test_check.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/ci/docker_pull_helper.py b/tests/ci/docker_pull_helper.py index f9804744820..50354da6801 100644 --- a/tests/ci/docker_pull_helper.py +++ b/tests/ci/docker_pull_helper.py @@ -25,6 +25,11 @@ def get_images_with_versions(reports_path, required_image, pull=True): images_path = os.path.join(root, 'changed_images.json') break + if not images_path: + logging.info("Images file not found") + else: + logging.info("Images file path %s", images_path) + if images_path is not None and os.path.exists(images_path): logging.info("Images file exists") with open(images_path, 'r', encoding='utf-8') as images_fd: diff --git a/tests/ci/integration_test_check.py b/tests/ci/integration_test_check.py index 723e81d63cb..69c4603b3ea 100644 --- a/tests/ci/integration_test_check.py +++ b/tests/ci/integration_test_check.py @@ -120,7 +120,7 @@ if __name__ == "__main__": logging.info("Check is already finished according to github status, exiting") sys.exit(0) - images = get_images_with_versions(temp_path, IMAGES) + images = get_images_with_versions(reports_path, IMAGES) images_with_versions = {i.name: i.version for i in images} result_path = os.path.join(temp_path, "output_dir") if not os.path.exists(result_path): From 4c1babee0541c1d39bb6d5bece0a3f14e69d7b20 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 10 Dec 2021 12:07:24 +0300 Subject: [PATCH 211/262] Split long functional tests to multiple checks --- .github/workflows/main.yml | 210 +++++++++++++++++++++++++++++- .github/workflows/master.yml | 210 +++++++++++++++++++++++++++++- docker/test/stateless/Dockerfile | 1 - docker/test/stateless/run.sh | 7 + tests/ci/functional_test_check.py | 37 ++++-- tests/clickhouse-test | 34 ++++- 6 files changed, 469 insertions(+), 30 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 69a863b75a0..fe124320adb 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -886,7 +886,7 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH - FunctionalStatelessTestTsan: + FunctionalStatelessTestTsan0: needs: [BuilderDebTsan] runs-on: [self-hosted, func-tester] steps: @@ -903,6 +903,70 @@ jobs: CHECK_NAME: 'Stateless tests (thread, actions)' REPO_COPY: ${{runner.temp}}/stateless_tsan/ClickHouse KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 0 + RUN_BY_HASH_TOTAL: 3 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatelessTestTsan1: + needs: [BuilderDebTsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + env: + TEMP_PATH: ${{runner.temp}}/stateless_tsan + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stateless tests (thread, actions)' + REPO_COPY: ${{runner.temp}}/stateless_tsan/ClickHouse + KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 1 + RUN_BY_HASH_TOTAL: 3 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatelessTestTsan2: + needs: [BuilderDebTsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + env: + TEMP_PATH: ${{runner.temp}}/stateless_tsan + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stateless tests (thread, actions)' + REPO_COPY: ${{runner.temp}}/stateless_tsan/ClickHouse + KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 2 + RUN_BY_HASH_TOTAL: 3 run: | sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH @@ -944,7 +1008,7 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH - FunctionalStatelessTestMsan: + FunctionalStatelessTestMsan0: needs: [BuilderDebMsan] runs-on: [self-hosted, func-tester] steps: @@ -961,6 +1025,8 @@ jobs: CHECK_NAME: 'Stateless tests (memory, actions)' REPO_COPY: ${{runner.temp}}/stateless_memory/ClickHouse KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 0 + RUN_BY_HASH_TOTAL: 3 run: | sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH @@ -973,7 +1039,69 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH - FunctionalStatelessTestDebug: + FunctionalStatelessTestMsan1: + needs: [BuilderDebMsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + env: + TEMP_PATH: ${{runner.temp}}/stateless_memory + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stateless tests (memory, actions)' + REPO_COPY: ${{runner.temp}}/stateless_memory/ClickHouse + KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 1 + RUN_BY_HASH_TOTAL: 3 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatelessTestMsan2: + needs: [BuilderDebMsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + env: + TEMP_PATH: ${{runner.temp}}/stateless_memory + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stateless tests (memory, actions)' + REPO_COPY: ${{runner.temp}}/stateless_memory/ClickHouse + KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 2 + RUN_BY_HASH_TOTAL: 3 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatelessTestDebug0: needs: [BuilderDebDebug] runs-on: [self-hosted, func-tester] steps: @@ -990,6 +1118,70 @@ jobs: CHECK_NAME: 'Stateless tests (debug, actions)' REPO_COPY: ${{runner.temp}}/stateless_debug/ClickHouse KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 0 + RUN_BY_HASH_TOTAL: 3 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatelessTestDebug1: + needs: [BuilderDebDebug] + runs-on: [self-hosted, func-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + env: + TEMP_PATH: ${{runner.temp}}/stateless_debug + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stateless tests (debug, actions)' + REPO_COPY: ${{runner.temp}}/stateless_debug/ClickHouse + KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 1 + RUN_BY_HASH_TOTAL: 3 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatelessTestDebug2: + needs: [BuilderDebDebug] + runs-on: [self-hosted, func-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + env: + TEMP_PATH: ${{runner.temp}}/stateless_debug + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stateless tests (debug, actions)' + REPO_COPY: ${{runner.temp}}/stateless_debug/ClickHouse + KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 2 + RUN_BY_HASH_TOTAL: 3 run: | sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH @@ -1759,13 +1951,19 @@ jobs: - CheckLabels - BuilderReport - FastTest - - FunctionalStatelessTestDebug + - FunctionalStatelessTestDebug0 + - FunctionalStatelessTestDebug1 + - FunctionalStatelessTestDebug2 - FunctionalStatelessTestRelease - FunctionalStatelessTestReleaseDatabaseReplicated - FunctionalStatelessTestReleaseWideParts - FunctionalStatelessTestAsan - - FunctionalStatelessTestTsan - - FunctionalStatelessTestMsan + - FunctionalStatelessTestTsan0 + - FunctionalStatelessTestTsan1 + - FunctionalStatelessTestTsan2 + - FunctionalStatelessTestMsan0 + - FunctionalStatelessTestMsan1 + - FunctionalStatelessTestMsan2 - FunctionalStatelessTestUBsan - FunctionalStatefulTestDebug - FunctionalStatefulTestRelease diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 5d4dec16303..f7c25bb28d3 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -799,7 +799,7 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH - FunctionalStatelessTestTsan: + FunctionalStatelessTestTsan0: needs: [BuilderDebTsan] runs-on: [self-hosted, func-tester] steps: @@ -816,6 +816,70 @@ jobs: CHECK_NAME: 'Stateless tests (thread, actions)' REPO_COPY: ${{runner.temp}}/stateless_tsan/ClickHouse KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 0 + RUN_BY_HASH_TOTAL: 3 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatelessTestTsan1: + needs: [BuilderDebTsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + env: + TEMP_PATH: ${{runner.temp}}/stateless_tsan + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stateless tests (thread, actions)' + REPO_COPY: ${{runner.temp}}/stateless_tsan/ClickHouse + KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 1 + RUN_BY_HASH_TOTAL: 3 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatelessTestTsan2: + needs: [BuilderDebTsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + env: + TEMP_PATH: ${{runner.temp}}/stateless_tsan + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stateless tests (thread, actions)' + REPO_COPY: ${{runner.temp}}/stateless_tsan/ClickHouse + KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 2 + RUN_BY_HASH_TOTAL: 3 run: | sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH @@ -857,7 +921,7 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH - FunctionalStatelessTestMsan: + FunctionalStatelessTestMsan0: needs: [BuilderDebMsan] runs-on: [self-hosted, func-tester] steps: @@ -874,6 +938,8 @@ jobs: CHECK_NAME: 'Stateless tests (memory, actions)' REPO_COPY: ${{runner.temp}}/stateless_memory/ClickHouse KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 0 + RUN_BY_HASH_TOTAL: 3 run: | sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH @@ -886,7 +952,69 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH - FunctionalStatelessTestDebug: + FunctionalStatelessTestMsan1: + needs: [BuilderDebMsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + env: + TEMP_PATH: ${{runner.temp}}/stateless_memory + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stateless tests (memory, actions)' + REPO_COPY: ${{runner.temp}}/stateless_memory/ClickHouse + KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 1 + RUN_BY_HASH_TOTAL: 3 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatelessTestMsan2: + needs: [BuilderDebMsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + env: + TEMP_PATH: ${{runner.temp}}/stateless_memory + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stateless tests (memory, actions)' + REPO_COPY: ${{runner.temp}}/stateless_memory/ClickHouse + KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 2 + RUN_BY_HASH_TOTAL: 3 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatelessTestDebug0: needs: [BuilderDebDebug] runs-on: [self-hosted, func-tester] steps: @@ -903,6 +1031,70 @@ jobs: CHECK_NAME: 'Stateless tests (debug, actions)' REPO_COPY: ${{runner.temp}}/stateless_debug/ClickHouse KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 0 + RUN_BY_HASH_TOTAL: 3 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatelessTestDebug1: + needs: [BuilderDebDebug] + runs-on: [self-hosted, func-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + env: + TEMP_PATH: ${{runner.temp}}/stateless_debug + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stateless tests (debug, actions)' + REPO_COPY: ${{runner.temp}}/stateless_debug/ClickHouse + KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 1 + RUN_BY_HASH_TOTAL: 3 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatelessTestDebug2: + needs: [BuilderDebDebug] + runs-on: [self-hosted, func-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + env: + TEMP_PATH: ${{runner.temp}}/stateless_debug + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stateless tests (debug, actions)' + REPO_COPY: ${{runner.temp}}/stateless_debug/ClickHouse + KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 2 + RUN_BY_HASH_TOTAL: 3 run: | sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH @@ -1641,12 +1833,18 @@ jobs: needs: - DockerHubPush - BuilderReport - - FunctionalStatelessTestDebug + - FunctionalStatelessTestDebug0 + - FunctionalStatelessTestDebug1 + - FunctionalStatelessTestDebug2 - FunctionalStatelessTestRelease - FunctionalStatelessTestReleaseDatabaseOrdinary - FunctionalStatelessTestAsan - - FunctionalStatelessTestTsan - - FunctionalStatelessTestMsan + - FunctionalStatelessTestTsan0 + - FunctionalStatelessTestTsan1 + - FunctionalStatelessTestTsan2 + - FunctionalStatelessTestMsan0 + - FunctionalStatelessTestMsan1 + - FunctionalStatelessTestMsan2 - FunctionalStatelessTestUBsan - FunctionalStatefulTestDebug - FunctionalStatefulTestRelease diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index 7de8c061673..05d26924b15 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -49,7 +49,6 @@ RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone ENV NUM_TRIES=1 ENV MAX_RUN_TIME=0 - # Download Minio-related binaries RUN wget 'https://dl.min.io/server/minio/release/linux-amd64/minio' \ && chmod +x ./minio \ diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 93f64fdec66..8827f5b1bf6 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -96,6 +96,13 @@ function run_tests() ADDITIONAL_OPTIONS+=('8') fi + if [[ -n "$RUN_BY_HASH_NUM" ]] && [[ -n "$RUN_BY_HASH_TOTAL" ]]; then + ADDITIONAL_OPTIONS+=('--run-by-hash-num') + ADDITIONAL_OPTIONS+=("$RUN_BY_HASH_NUM") + ADDITIONAL_OPTIONS+=('--run-by-hash-total') + ADDITIONAL_OPTIONS+=("$RUN_BY_HASH_TOTAL") + fi + set +e clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \ --test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \ diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py index 15b9ab44b31..90f83ef3be9 100644 --- a/tests/ci/functional_test_check.py +++ b/tests/ci/functional_test_check.py @@ -20,15 +20,20 @@ from stopwatch import Stopwatch from rerun_helper import RerunHelper from tee_popen import TeePopen -def get_additional_envs(check_name): +def get_additional_envs(check_name, run_by_hash_num, run_by_hash_total): + result = [] if 'DatabaseReplicated' in check_name: - return ["USE_DATABASE_REPLICATED=1"] + result.append("USE_DATABASE_REPLICATED=1") if 'DatabaseOrdinary' in check_name: - return ["USE_DATABASE_ORDINARY=1"] + result.append("USE_DATABASE_ORDINARY=1") if 'wide parts enabled' in check_name: - return ["USE_POLYMORPHIC_PARTS=1"] + result.append("USE_POLYMORPHIC_PARTS=1") - return [] + if run_by_hash_total != 0: + result.append(f"RUN_BY_HASH_NUM={run_by_hash_num}") + result.append(f"RUN_BY_HASH_TOTAL={run_by_hash_total}") + + return result def get_image_name(check_name): if 'stateless' in check_name.lower(): @@ -117,12 +122,22 @@ if __name__ == "__main__": check_name = sys.argv[1] kill_timeout = int(sys.argv[2]) + flaky_check = 'flaky' in check_name.lower() gh = Github(get_best_robot_token()) pr_info = PRInfo(get_event(), need_changed_files=flaky_check) - rerun_helper = RerunHelper(gh, pr_info, check_name) + if 'RUN_BY_HASH_NUM' in os.environ: + run_by_hash_num = int(os.getenv('RUN_BY_HASH_NUM')) + run_by_hash_total = int(os.getenv('RUN_BY_HASH_TOTAL')) + check_name_with_group = check_name + f' [{run_by_hash_num}/{run_by_hash_total}]' + else: + run_by_hash_num = 0 + run_by_hash_total = 0 + check_name_with_group = check_name + + rerun_helper = RerunHelper(gh, pr_info, check_name_with_group) if rerun_helper.is_already_finished_by_status(): logging.info("Check is already finished according to github status, exiting") sys.exit(0) @@ -135,7 +150,7 @@ if __name__ == "__main__": tests_to_run = get_tests_to_run(pr_info) if not tests_to_run: commit = get_commit(gh, pr_info.sha) - commit.create_status(context=check_name, description='Not found changed stateless tests', state='success') + commit.create_status(context=check_name_with_group, description='Not found changed stateless tests', state='success') sys.exit(0) image_name = get_image_name(check_name) @@ -157,7 +172,7 @@ if __name__ == "__main__": run_log_path = os.path.join(result_path, "runlog.log") - additional_envs = get_additional_envs(check_name) + additional_envs = get_additional_envs(check_name, run_by_hash_num, run_by_hash_total) run_command = get_run_command(packages_path, result_path, server_log_path, kill_timeout, additional_envs, docker_image, flaky_check, tests_to_run) logging.info("Going to run func tests: %s", run_command) @@ -176,12 +191,12 @@ if __name__ == "__main__": ch_helper = ClickHouseHelper() mark_flaky_tests(ch_helper, check_name, test_results) - report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [run_log_path] + additional_logs, check_name) + report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [run_log_path] + additional_logs, check_name_with_group) print(f"::notice ::Report url: {report_url}") - post_commit_status(gh, pr_info.sha, check_name, description, state, report_url) + post_commit_status(gh, pr_info.sha, check_name_with_group, description, state, report_url) - prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_results, state, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, check_name) + prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_results, state, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, check_name_with_group) ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events) if state != 'success': diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 8a87227519f..177e2b35c4e 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -17,6 +17,8 @@ import math import http.client import urllib.parse import json +# for crc32 +import zlib from argparse import ArgumentParser from typing import Tuple, Union, Optional, Dict, Set, List @@ -57,6 +59,13 @@ MAX_RETRIES = 3 TEST_FILE_EXTENSIONS = ['.sql', '.sql.j2', '.sh', '.py', '.expect'] + +def stringhash(s): + # default hash() function consistent + # only during process invocation https://stackoverflow.com/a/42089311 + return zlib.crc32(s.encode('utf-8')) + + class HTTPError(Exception): def __init__(self, message=None, code=None): self.message = message @@ -756,7 +765,15 @@ class TestSuite: self.suite_tmp_path: str = suite_tmp_path self.suite: str = suite - self.all_tests: List[str] = self.get_tests_list(self.tests_in_suite_key_func) + filter_func = lambda x: True + + if args.run_by_hash_num and args.run_by_hash_total: + if args.run_by_hash_num > args.run_by_hash_total: + raise Exception(f"Incorrect run by hash, value {args.run_by_hash_num} bigger than total {args.run_by_hash_total}") + + filter_func = lambda x: stringhash(x) % args.run_by_hash_total == args.run_by_hash_num + + self.all_tests: List[str] = self.get_tests_list(self.tests_in_suite_key_func, filter_func) self.all_tags: Dict[str, Set[str]] = self.read_test_tags(self.suite_path, self.all_tests) self.sequential_tests = [] @@ -777,17 +794,17 @@ class TestSuite: return ('no-parallel' in self.all_tags[test_name]) or ('sequential' in self.all_tags[test_name]) - def get_tests_list(self, sort_key): + def get_tests_list(self, sort_key, filter_func): """ Return list of tests file names to run """ - all_tests = list(self.get_selected_tests()) + all_tests = list(self.get_selected_tests(filter_func)) all_tests = all_tests * self.args.test_runs all_tests.sort(key=sort_key) return all_tests - def get_selected_tests(self): + def get_selected_tests(self, filter_func): """ Find all files with tests, filter, render templates """ @@ -804,11 +821,13 @@ class TestSuite: continue if USE_JINJA and test_name.endswith(".gen.sql"): continue + if not filter_func(test_name): + continue test_name = self.render_test_template(j2env, self.suite_path, test_name) yield test_name @staticmethod - def readTestSuite(args, suite_dir_name: str): + def read_test_suite(args, suite_dir_name: str): def is_data_present(): return int(clickhouse_execute(args, 'EXISTS TABLE test.hits')) @@ -1192,7 +1211,7 @@ def main(args): if server_died.is_set(): break - test_suite = TestSuite.readTestSuite(args, suite) + test_suite = TestSuite.read_test_suite(args, suite) if test_suite is None: continue @@ -1325,6 +1344,9 @@ if __name__ == '__main__': parser.add_argument('--print-time', action='store_true', dest='print_time', help='Print test time') parser.add_argument('--check-zookeeper-session', action='store_true', help='Check ZooKeeper session uptime to determine if failed test should be retried') + parser.add_argument('--run-by-hash-num', type=int, help='Run tests matching crc32(test_name) % run_by_hash_total == run_by_hash_num') + parser.add_argument('--run-by-hash-total', type=int, help='Total test groups for crc32(test_name) % run_by_hash_total == run_by_hash_num') + group = parser.add_mutually_exclusive_group(required=False) group.add_argument('--zookeeper', action='store_true', default=None, dest='zookeeper', help='Run zookeeper related tests') group.add_argument('--no-zookeeper', action='store_false', default=None, dest='zookeeper', help='Do not run zookeeper related tests') From 32f78f2ba4a9ce8caff968c19d01e3451d705f8c Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 10 Dec 2021 12:09:11 +0300 Subject: [PATCH 212/262] Better check name --- tests/ci/functional_test_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py index 90f83ef3be9..a3ca357db18 100644 --- a/tests/ci/functional_test_check.py +++ b/tests/ci/functional_test_check.py @@ -131,7 +131,7 @@ if __name__ == "__main__": if 'RUN_BY_HASH_NUM' in os.environ: run_by_hash_num = int(os.getenv('RUN_BY_HASH_NUM')) run_by_hash_total = int(os.getenv('RUN_BY_HASH_TOTAL')) - check_name_with_group = check_name + f' [{run_by_hash_num}/{run_by_hash_total}]' + check_name_with_group = check_name + f' [{run_by_hash_num + 1}/{run_by_hash_total}]' else: run_by_hash_num = 0 run_by_hash_total = 0 From 7638ce558f7b0df1f2102e730adb1c53c93a0c56 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Fri, 10 Dec 2021 17:18:21 +0800 Subject: [PATCH 213/262] Add test --- .../02132_client_history_navigation.expect | 33 +++++++++++++++++++ .../02132_client_history_navigation.reference | 0 2 files changed, 33 insertions(+) create mode 100755 tests/queries/0_stateless/02132_client_history_navigation.expect create mode 100644 tests/queries/0_stateless/02132_client_history_navigation.reference diff --git a/tests/queries/0_stateless/02132_client_history_navigation.expect b/tests/queries/0_stateless/02132_client_history_navigation.expect new file mode 100755 index 00000000000..129a65e0a0a --- /dev/null +++ b/tests/queries/0_stateless/02132_client_history_navigation.expect @@ -0,0 +1,33 @@ +#!/usr/bin/expect -f +# Tags: no-fasttest + +log_user 0 +set timeout 3 +match_max 100000 +# A default timeout action is to do nothing, change it to fail +expect_after { + timeout { + exit 1 + } +} + +# useful debugging configuration +# exp_internal 1 + +set basedir [file dirname $argv0] +spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --highlight 0" +expect ":) " + +# Make a query +send -- "SELECT 1\r" +expect "1" +expect ":) " +send -- "SELECT 2" +send -- "\033\[A" +expect "SELECT 1" +send -- "\033\[B" +expect "SELECT 2" +send -- "\r" +expect "2" +send -- "exit\r" +expect eof diff --git a/tests/queries/0_stateless/02132_client_history_navigation.reference b/tests/queries/0_stateless/02132_client_history_navigation.reference new file mode 100644 index 00000000000..e69de29bb2d From 52fa82b4d414a168ed2d4e21460f0ca4ec2a35a9 Mon Sep 17 00:00:00 2001 From: vxider Date: Fri, 10 Dec 2021 09:43:44 +0000 Subject: [PATCH 214/262] rename window-view function to time window function --- ...-functions.md => time-window-functions.md} | 18 ++++---- .../sql-reference/statements/create/view.md | 8 ++-- ...-functions.md => time-window-functions.md} | 18 ++++---- .../sql-reference/statements/create/view.md | 10 ++--- ...ionsWindow.cpp => FunctionsTimeWindow.cpp} | 44 +++++++++---------- ...unctionsWindow.h => FunctionsTimeWindow.h} | 24 +++++----- src/Functions/registerFunctions.cpp | 4 +- src/Storages/WindowView/StorageWindowView.cpp | 10 ++--- 8 files changed, 68 insertions(+), 68 deletions(-) rename docs/en/sql-reference/functions/{window-view-functions.md => time-window-functions.md} (89%) rename docs/zh/sql-reference/functions/{window-view-functions.md => time-window-functions.md} (86%) rename src/Functions/{FunctionsWindow.cpp => FunctionsTimeWindow.cpp} (94%) rename src/Functions/{FunctionsWindow.h => FunctionsTimeWindow.h} (88%) diff --git a/docs/en/sql-reference/functions/window-view-functions.md b/docs/en/sql-reference/functions/time-window-functions.md similarity index 89% rename from docs/en/sql-reference/functions/window-view-functions.md rename to docs/en/sql-reference/functions/time-window-functions.md index 3f560aa96b9..2ea44a6e585 100644 --- a/docs/en/sql-reference/functions/window-view-functions.md +++ b/docs/en/sql-reference/functions/time-window-functions.md @@ -1,13 +1,13 @@ --- toc_priority: 68 -toc_title: Window View +toc_title: Time Window --- -# Window View Functions {#window-view-functions} +# Time Window Functions {#time-window-functions} -Window view functions return the inclusive lower and exclusive upper bound of the corresponding window. The functions for working with WindowView are listed below: +Time window functions return the inclusive lower and exclusive upper bound of the corresponding window. The functions for working with WindowView are listed below: -## tumble {#window-view-functions-tumble} +## tumble {#time-window-functions-tumble} A tumbling time window assigns records to non-overlapping, continuous windows with a fixed duration (`interval`). @@ -42,7 +42,7 @@ Result: └───────────────────────────────────────────────┘ ``` -## hop {#window-view-functions-hop} +## hop {#time-window-functions-hop} A hopping time window has a fixed duration (`window_interval`) and hops by a specified hop interval (`hop_interval`). If the `hop_interval` is smaller than the `window_interval`, hopping windows are overlapping. Thus, records can be assigned to multiple windows. @@ -79,7 +79,7 @@ Result: └───────────────────────────────────────────────────────────┘ ``` -## tumbleStart {#window-view-functions-tumblestart} +## tumbleStart {#time-window-functions-tumblestart} Returns the inclusive lower bound of the corresponding tumbling window. @@ -87,7 +87,7 @@ Returns the inclusive lower bound of the corresponding tumbling window. tumbleStart(time_attr, interval [, timezone]); ``` -## tumbleEnd {#window-view-functions-tumbleend} +## tumbleEnd {#time-window-functions-tumbleend} Returns the exclusive upper bound of the corresponding tumbling window. @@ -95,7 +95,7 @@ Returns the exclusive upper bound of the corresponding tumbling window. tumbleEnd(time_attr, interval [, timezone]); ``` -## hopStart {#window-view-functions-hopstart} +## hopStart {#time-window-functions-hopstart} Returns the inclusive lower bound of the corresponding hopping window. @@ -103,7 +103,7 @@ Returns the inclusive lower bound of the corresponding hopping window. hopStart(time_attr, hop_interval, window_interval [, timezone]); ``` -## hopEnd {#window-view-functions-hopend} +## hopEnd {#time-window-functions-hopend} Returns the exclusive upper bound of the corresponding hopping window. diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md index 1e2e10b5cb6..8d7d7b48c05 100644 --- a/docs/en/sql-reference/statements/create/view.md +++ b/docs/en/sql-reference/statements/create/view.md @@ -251,22 +251,22 @@ Most common uses of live view tables include: Enable usage of window views and `WATCH` query using [allow_experimental_window_view](../../../operations/settings/settings.md#allow-experimental-window-view) setting. Input the command `set allow_experimental_window_view = 1`. ``` sql -CREATE WINDOW VIEW [IF NOT EXISTS] [db.]table_name [TO [db.]table_name] [ENGINE = engine] [WATERMARK = strategy] [ALLOWED_LATENESS = interval_function] AS SELECT ... GROUP BY window_view_function +CREATE WINDOW VIEW [IF NOT EXISTS] [db.]table_name [TO [db.]table_name] [ENGINE = engine] [WATERMARK = strategy] [ALLOWED_LATENESS = interval_function] AS SELECT ... GROUP BY time_window_function ``` Window view can aggregate data by time window and output the results when the window is ready to fire. It stores the partial aggregation results in an inner(or specified) table to reduce latency and can push the processing result to a specified table or push notifications using the WATCH query. Creating a window view is similar to creating `MATERIALIZED VIEW`. Window view needs an inner storage engine to store intermediate data. The inner storage will use `AggregatingMergeTree` as the default engine. -### Window View Functions {#window-view-windowviewfunctions} +### Time Window Functions {#window-view-timewindowfunctions} -[Window view functions](../../functions/window-view-functions.md) are used to get the lower and upper window bound of records. The window view needs to be used with a window view function. +[Time window functions](../../functions/time-window-functions.md) are used to get the lower and upper window bound of records. The window view needs to be used with a time window function. ### TIME ATTRIBUTES {#window-view-timeattributes} Window view supports **processing time** and **event time** process. -**Processing time** allows window view to produce results based on the local machine's time and is used by default. It is the most straightforward notion of time but does not provide determinism. The processing time attribute can be defined by setting the `time_attr` of the window view function to a table column or using the function `now()`. The following query creates a window view with processing time. +**Processing time** allows window view to produce results based on the local machine's time and is used by default. It is the most straightforward notion of time but does not provide determinism. The processing time attribute can be defined by setting the `time_attr` of the time window function to a table column or using the function `now()`. The following query creates a window view with processing time. ``` sql CREATE WINDOW VIEW wv AS SELECT count(number), tumbleStart(w_id) as w_start from date GROUP BY tumble(now(), INTERVAL '5' SECOND) as w_id diff --git a/docs/zh/sql-reference/functions/window-view-functions.md b/docs/zh/sql-reference/functions/time-window-functions.md similarity index 86% rename from docs/zh/sql-reference/functions/window-view-functions.md rename to docs/zh/sql-reference/functions/time-window-functions.md index b203fc41206..ab28a47ad55 100644 --- a/docs/zh/sql-reference/functions/window-view-functions.md +++ b/docs/zh/sql-reference/functions/time-window-functions.md @@ -1,13 +1,13 @@ --- toc_priority: 68 -toc_title: Window View +toc_title: 时间窗口 --- -# Window View 函数 {#window-view-han-shu} +# 时间窗口函数 {#time-window-han-shu} -Window view函数用于获取窗口的起始(包含边界)和结束时间(不包含边界)。系统支持的window view函数如下: +时间窗口函数用于获取窗口的起始(包含边界)和结束时间(不包含边界)。系统支持的时间窗口函数如下: -## tumble {#window-view-functions-tumble} +## tumble {#time-window-functions-tumble} tumble窗口是连续的、不重叠的固定大小(`interval`)时间窗口。 @@ -42,7 +42,7 @@ SELECT tumble(now(), toIntervalDay('1')) └───────────────────────────────────────────────┘ ``` -## hop {#window-view-functions-hop} +## hop {#time-window-functions-hop} hop窗口是一个固定大小(`window_interval`)的时间窗口,并按照一个固定的滑动间隔(`hop_interval`)滑动。当滑动间隔小于窗口大小时,滑动窗口间存在重叠,此时一个数据可能存在于多个窗口。 @@ -79,7 +79,7 @@ SELECT hop(now(), INTERVAL '1' SECOND, INTERVAL '2' SECOND) └───────────────────────────────────────────────────────────┘ ``` -## tumbleStart {#window-view-functions-tumblestart} +## tumbleStart {#time-window-functions-tumblestart} 返回tumble窗口的开始时间(包含边界)。 @@ -87,7 +87,7 @@ SELECT hop(now(), INTERVAL '1' SECOND, INTERVAL '2' SECOND) tumbleStart(time_attr, interval [, timezone]); ``` -## tumbleEnd {#window-view-functions-tumbleend} +## tumbleEnd {#time-window-functions-tumbleend} 返回tumble窗口的结束时间(不包含边界)。 @@ -95,7 +95,7 @@ tumbleStart(time_attr, interval [, timezone]); tumbleEnd(time_attr, interval [, timezone]); ``` -## hopStart {#window-view-functions-hopstart} +## hopStart {#time-window-functions-hopstart} 返回hop窗口的开始时间(包含边界)。 @@ -103,7 +103,7 @@ tumbleEnd(time_attr, interval [, timezone]); hopStart(time_attr, hop_interval, window_interval [, timezone]); ``` -## hopEnd {#window-view-functions-hopend} +## hopEnd {#time-window-functions-hopend} 返回hop窗口的结束时间(不包含边界)。 diff --git a/docs/zh/sql-reference/statements/create/view.md b/docs/zh/sql-reference/statements/create/view.md index 967db792038..506f1717b03 100644 --- a/docs/zh/sql-reference/statements/create/view.md +++ b/docs/zh/sql-reference/statements/create/view.md @@ -250,28 +250,28 @@ Code: 60. DB::Exception: Received from localhost:9000. DB::Exception: Table defa `set allow_experimental_window_view = 1`。 ``` sql -CREATE WINDOW VIEW [IF NOT EXISTS] [db.]table_name [TO [db.]table_name] [ENGINE = engine] [WATERMARK = strategy] [ALLOWED_LATENESS = interval_function] AS SELECT ... GROUP BY window_view_function +CREATE WINDOW VIEW [IF NOT EXISTS] [db.]table_name [TO [db.]table_name] [ENGINE = engine] [WATERMARK = strategy] [ALLOWED_LATENESS = interval_function] AS SELECT ... GROUP BY time_window_function ``` Window view可以通过时间窗口聚合数据,并在满足窗口触发条件时自动触发对应窗口计算。其通过将计算状态保存降低处理延迟,支持将处理结果输出至目标表或通过`WATCH`语句输出至终端。 创建window view的方式和创建物化视图类似。Window view使用默认为`AggregatingMergeTree`的内部存储引擎存储计算中间状态。 -### Window View 函数 {#window-view-han-shu} +### 时间窗口函数 {#window-view-shi-jian-chuang-kou-han-shu} -[Window view函数](../../functions/window-view-functions.md)用于获取窗口的起始和结束时间。Window view需要和window view函数配合使用。 +[时间窗口函数](../../functions/time-window-functions.md)用于获取窗口的起始和结束时间。Window view需要和时间窗口函数配合使用。 ### 时间属性 {#window-view-shi-jian-shu-xing} Window view 支持**处理时间**和**事件时间**两种时间类型。 -**处理时间**为默认时间类型,该模式下window view使用本地机器时间计算窗口数据。“处理时间”时间类型计算简单,但具有不确定性。该模式下时间可以为window view函数的第一个参数`time_attr`,或通过函数`now()`使用当前机器时间。下面的例子展示了使用“处理时间”创建的window view的例子。 +**处理时间**为默认时间类型,该模式下window view使用本地机器时间计算窗口数据。“处理时间”时间类型计算简单,但具有不确定性。该模式下时间可以为时间窗口函数的第一个参数`time_attr`,或通过函数`now()`使用当前机器时间。下面的例子展示了使用“处理时间”创建window view的例子。 ``` sql CREATE WINDOW VIEW wv AS SELECT count(number), tumbleStart(w_id) as w_start from date GROUP BY tumble(now(), INTERVAL '5' SECOND) as w_id ``` -**事件时间** 是事件真实发生的时间,该时间往往在事件发生时便嵌入数据记录。事件时间处理提供较高的确定性,可以处理乱序数据以及迟到数据。Window view 通过水位线(`WATERMARK`)启用事件时间处理。 +**事件时间** 是事件真实发生的时间,该时间往往在事件发生时便嵌入数据记录。事件时间处理提供较高的确定性,可以处理乱序数据以及迟到数据。Window view通过水位线(`WATERMARK`)启用事件时间处理。 Window view提供如下三种水位线策略: diff --git a/src/Functions/FunctionsWindow.cpp b/src/Functions/FunctionsTimeWindow.cpp similarity index 94% rename from src/Functions/FunctionsWindow.cpp rename to src/Functions/FunctionsTimeWindow.cpp index be336aa9a7c..d6f6becde9a 100644 --- a/src/Functions/FunctionsWindow.cpp +++ b/src/Functions/FunctionsTimeWindow.cpp @@ -9,7 +9,7 @@ #include #include #include -#include +#include namespace DB { @@ -114,7 +114,7 @@ namespace } template <> -struct WindowImpl +struct TimeWindowImpl { static constexpr auto name = "tumble"; @@ -211,7 +211,7 @@ struct WindowImpl }; template <> -struct WindowImpl +struct TimeWindowImpl { static constexpr auto name = "tumbleStart"; @@ -231,7 +231,7 @@ struct WindowImpl } else { - return std::static_pointer_cast(WindowImpl::getReturnType(arguments, function_name)) + return std::static_pointer_cast(TimeWindowImpl::getReturnType(arguments, function_name)) ->getElement(0); } } @@ -249,19 +249,19 @@ struct WindowImpl result_column = time_column.column; } else - result_column = WindowImpl::dispatchForColumns(arguments, function_name); + result_column = TimeWindowImpl::dispatchForColumns(arguments, function_name); return executeWindowBound(result_column, 0, function_name); } }; template <> -struct WindowImpl +struct TimeWindowImpl { static constexpr auto name = "tumbleEnd"; [[maybe_unused]] static DataTypePtr getReturnType(const ColumnsWithTypeAndName & arguments, const String & function_name) { - return WindowImpl::getReturnType(arguments, function_name); + return TimeWindowImpl::getReturnType(arguments, function_name); } [[maybe_unused]] static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String& function_name) @@ -277,13 +277,13 @@ struct WindowImpl result_column = time_column.column; } else - result_column = WindowImpl::dispatchForColumns(arguments, function_name); + result_column = TimeWindowImpl::dispatchForColumns(arguments, function_name); return executeWindowBound(result_column, 1, function_name); } }; template <> -struct WindowImpl +struct TimeWindowImpl { static constexpr auto name = "hop"; @@ -415,7 +415,7 @@ struct WindowImpl }; template <> -struct WindowImpl +struct TimeWindowImpl { static constexpr auto name = "windowID"; @@ -547,7 +547,7 @@ struct WindowImpl [[maybe_unused]] static ColumnPtr dispatchForTumbleColumns(const ColumnsWithTypeAndName & arguments, const String & function_name) { - ColumnPtr column = WindowImpl::dispatchForColumns(arguments, function_name); + ColumnPtr column = TimeWindowImpl::dispatchForColumns(arguments, function_name); return executeWindowBound(column, 1, function_name); } @@ -567,7 +567,7 @@ struct WindowImpl }; template <> -struct WindowImpl +struct TimeWindowImpl { static constexpr auto name = "hopStart"; @@ -587,7 +587,7 @@ struct WindowImpl } else { - return std::static_pointer_cast(WindowImpl::getReturnType(arguments, function_name))->getElement(0); + return std::static_pointer_cast(TimeWindowImpl::getReturnType(arguments, function_name))->getElement(0); } } @@ -604,19 +604,19 @@ struct WindowImpl result_column = time_column.column; } else - result_column = WindowImpl::dispatchForColumns(arguments, function_name); + result_column = TimeWindowImpl::dispatchForColumns(arguments, function_name); return executeWindowBound(result_column, 0, function_name); } }; template <> -struct WindowImpl +struct TimeWindowImpl { static constexpr auto name = "hopEnd"; [[maybe_unused]] static DataTypePtr getReturnType(const ColumnsWithTypeAndName & arguments, const String & function_name) { - return WindowImpl::getReturnType(arguments, function_name); + return TimeWindowImpl::getReturnType(arguments, function_name); } [[maybe_unused]] static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name) @@ -632,25 +632,25 @@ struct WindowImpl result_column = time_column.column; } else - result_column = WindowImpl::dispatchForColumns(arguments, function_name); + result_column = TimeWindowImpl::dispatchForColumns(arguments, function_name); return executeWindowBound(result_column, 1, function_name); } }; template -DataTypePtr FunctionWindow::getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const +DataTypePtr FunctionTimeWindow::getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const { - return WindowImpl::getReturnType(arguments, name); + return TimeWindowImpl::getReturnType(arguments, name); } template -ColumnPtr FunctionWindow::executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t /*input_rows_count*/) const +ColumnPtr FunctionTimeWindow::executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t /*input_rows_count*/) const { - return WindowImpl::dispatchForColumns(arguments, name); + return TimeWindowImpl::dispatchForColumns(arguments, name); } -void registerFunctionsWindow(FunctionFactory& factory) +void registerFunctionsTimeWindow(FunctionFactory& factory) { factory.registerFunction(); factory.registerFunction(); diff --git a/src/Functions/FunctionsWindow.h b/src/Functions/FunctionsTimeWindow.h similarity index 88% rename from src/Functions/FunctionsWindow.h rename to src/Functions/FunctionsTimeWindow.h index be4513225cf..11da7042f15 100644 --- a/src/Functions/FunctionsWindow.h +++ b/src/Functions/FunctionsTimeWindow.h @@ -7,7 +7,7 @@ namespace DB { -/** Window functions: +/** Time window functions: * * tumble(time_attr, interval [, timezone]) * @@ -118,7 +118,7 @@ struct ToStartOfTransform; #undef ADD_TIME template -struct WindowImpl +struct TimeWindowImpl { static constexpr auto name = "UNKNOWN"; @@ -128,11 +128,11 @@ struct WindowImpl }; template -class FunctionWindow : public IFunction +class FunctionTimeWindow : public IFunction { public: - static constexpr auto name = WindowImpl::name; - static FunctionPtr create(ContextPtr) { return std::make_shared(); } + static constexpr auto name = TimeWindowImpl::name; + static FunctionPtr create(ContextPtr) { return std::make_shared(); } String getName() const override { return name; } bool isVariadic() const override { return true; } size_t getNumberOfArguments() const override { return 0; } @@ -145,11 +145,11 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t /*input_rows_count*/) const override; }; -using FunctionTumble = FunctionWindow; -using FunctionTumbleStart = FunctionWindow; -using FunctionTumbleEnd = FunctionWindow; -using FunctionHop = FunctionWindow; -using FunctionWindowId = FunctionWindow; -using FunctionHopStart = FunctionWindow; -using FunctionHopEnd = FunctionWindow; +using FunctionTumble = FunctionTimeWindow; +using FunctionTumbleStart = FunctionTimeWindow; +using FunctionTumbleEnd = FunctionTimeWindow; +using FunctionHop = FunctionTimeWindow; +using FunctionWindowId = FunctionTimeWindow; +using FunctionHopStart = FunctionTimeWindow; +using FunctionHopEnd = FunctionTimeWindow; } diff --git a/src/Functions/registerFunctions.cpp b/src/Functions/registerFunctions.cpp index 8db19435443..2b56615ee6f 100644 --- a/src/Functions/registerFunctions.cpp +++ b/src/Functions/registerFunctions.cpp @@ -54,7 +54,7 @@ void registerFunctionValidateNestedArraySizes(FunctionFactory & factory); void registerFunctionsSnowflake(FunctionFactory & factory); void registerFunctionTid(FunctionFactory & factory); void registerFunctionLogTrace(FunctionFactory & factory); -void registerFunctionsWindow(FunctionFactory &); +void registerFunctionsTimeWindow(FunctionFactory &); #if USE_SSL void registerFunctionEncrypt(FunctionFactory & factory); @@ -115,7 +115,7 @@ void registerFunctions() registerFunctionsStringHash(factory); registerFunctionValidateNestedArraySizes(factory); registerFunctionsSnowflake(factory); - registerFunctionsWindow(factory); + registerFunctionsTimeWindow(factory); #if USE_SSL registerFunctionEncrypt(factory); diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 51f2a37aa8f..0a674ae62e9 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include #include #include @@ -93,7 +93,7 @@ namespace temp_node->setAlias(""); if (startsWith(t->arguments->children[0]->getColumnName(), "toDateTime")) throw Exception( - "The first argument of window function should not be a constant value.", + "The first argument of time window function should not be a constant value.", ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_WINDOW_VIEW); if (!data.window_function) { @@ -108,7 +108,7 @@ namespace else { if (serializeAST(*temp_node) != data.serialized_window_function) - throw Exception("WINDOW VIEW only support ONE WINDOW FUNCTION", ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_WINDOW_VIEW); + throw Exception("WINDOW VIEW only support ONE TIME WINDOW FUNCTION", ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_WINDOW_VIEW); t->name = "windowID"; } } @@ -1042,14 +1042,14 @@ ASTPtr StorageWindowView::innerQueryParser(const ASTSelectQuery & query) if (!query_info_data.is_tumble && !query_info_data.is_hop) throw Exception(ErrorCodes::INCORRECT_QUERY, - "WINDOW FUNCTION is not specified for {}", getName()); + "TIME WINDOW FUNCTION is not specified for {}", getName()); window_id_name = query_info_data.window_id_name; window_id_alias = query_info_data.window_id_alias; timestamp_column_name = query_info_data.timestamp_column_name; is_tumble = query_info_data.is_tumble; - // Parse window function + // Parse time window function ASTFunction & window_function = typeid_cast(*query_info_data.window_function); const auto & arguments = window_function.arguments->children; extractWindowArgument( From 84320f7ba9113ede5bdc7fd260379a2e7bfb0b0a Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Fri, 10 Dec 2021 12:47:05 +0300 Subject: [PATCH 215/262] Fix first time calculations of the ends of quota intervals. --- src/Access/EnabledQuota.cpp | 25 +++++++++++++++++-------- tests/integration/test_quota/test.py | 15 --------------- 2 files changed, 17 insertions(+), 23 deletions(-) diff --git a/src/Access/EnabledQuota.cpp b/src/Access/EnabledQuota.cpp index 359a1642840..4d3a963fed8 100644 --- a/src/Access/EnabledQuota.cpp +++ b/src/Access/EnabledQuota.cpp @@ -65,14 +65,6 @@ struct EnabledQuota::Impl end = end + duration * n; if (end_of_interval.compare_exchange_strong(end_loaded, end.time_since_epoch())) { - /// We reset counters only if the interval's end has been calculated before. - /// If it hasn't we just calculate the interval's end for the first time and don't reset counters yet. - if (!interval.end_of_interval.load().count()) - { - /// We need to calculate end of the interval if it hasn't been calculated before. - bool dummy; - getEndOfInterval(interval, current_time, dummy); - } need_reset_counters = true; break; } @@ -99,10 +91,19 @@ struct EnabledQuota::Impl { for (const auto & interval : intervals.intervals) { + if (!interval.end_of_interval.load().count()) + { + /// We need to calculate end of the interval if it hasn't been calculated before. + bool dummy; + getEndOfInterval(interval, current_time, dummy); + } + ResourceAmount used = (interval.used[resource_type] += amount); ResourceAmount max = interval.max[resource_type]; + if (!max) continue; + if (used > max) { bool counters_were_reset = false; @@ -127,10 +128,18 @@ struct EnabledQuota::Impl { for (const auto & interval : intervals.intervals) { + if (!interval.end_of_interval.load().count()) + { + /// We need to calculate end of the interval if it hasn't been calculated before. + bool dummy; + getEndOfInterval(interval, current_time, dummy); + } + ResourceAmount used = interval.used[resource_type]; ResourceAmount max = interval.max[resource_type]; if (!max) continue; + if (used > max) { bool counters_were_reset = false; diff --git a/tests/integration/test_quota/test.py b/tests/integration/test_quota/test.py index 9311b0bad36..4149987996b 100644 --- a/tests/integration/test_quota/test.py +++ b/tests/integration/test_quota/test.py @@ -393,45 +393,30 @@ def test_query_inserts(): def test_consumption_of_show_tables(): - assert re.match( - "myQuota\\tdefault\\t.*\\t31556952\\t0\\t1000\\t0\\t.*\\t\\\\N.*", - instance.query("SHOW QUOTA")) assert instance.query("SHOW TABLES") == "test_table\n" assert re.match( "myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t1\\t500\\t0\\t500\\t0\\t\\\\N\\t1\\t\\\\N.*", instance.query("SHOW QUOTA")) def test_consumption_of_show_databases(): - assert re.match( - "myQuota\\tdefault\\t.*\\t31556952\\t0\\t1000\\t0\\t.*\\t\\\\N.*", - instance.query("SHOW QUOTA")) assert instance.query("SHOW DATABASES") == "INFORMATION_SCHEMA\ndefault\ninformation_schema\nsystem\n" assert re.match( "myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t1\\t500\\t0\\t500\\t0\\t\\\\N\\t4\\t\\\\N.*", instance.query("SHOW QUOTA")) def test_consumption_of_show_clusters(): - assert re.match( - "myQuota\\tdefault\\t.*\\t31556952\\t0\\t1000\\t0\\t.*\\t\\\\N.*", - instance.query("SHOW QUOTA")) assert len(instance.query("SHOW CLUSTERS")) > 0 assert re.match( "myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t1\\t500\\t0\\t500\\t0\\t\\\\N.*", instance.query("SHOW QUOTA")) def test_consumption_of_show_processlist(): - assert re.match( - "myQuota\\tdefault\\t.*\\t31556952\\t0\\t1000\\t0\\t.*\\t\\\\N.*", - instance.query("SHOW QUOTA")) instance.query("SHOW PROCESSLIST") assert re.match( "myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t1\\t500\\t0\\t500\\t0\\t\\\\N\\t0\\t\\\\N.*", instance.query("SHOW QUOTA")) def test_consumption_of_show_privileges(): - assert re.match( - "myQuota\\tdefault\\t.*\\t31556952\\t0\\t1000\\t0\\t.*\\t\\\\N.*", - instance.query("SHOW QUOTA")) assert len(instance.query("SHOW PRIVILEGES")) > 0 assert re.match( "myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t1\\t500\\t0\\t500\\t0\\t\\\\N.*", From d021e13b8ceb86cd0765a45b272167c0b1670491 Mon Sep 17 00:00:00 2001 From: vxider Date: Fri, 10 Dec 2021 09:59:50 +0000 Subject: [PATCH 216/262] rename window function name --- src/Functions/FunctionsTimeWindow.cpp | 4 ++-- src/Functions/FunctionsTimeWindow.h | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Functions/FunctionsTimeWindow.cpp b/src/Functions/FunctionsTimeWindow.cpp index d6f6becde9a..79ce7356ee7 100644 --- a/src/Functions/FunctionsTimeWindow.cpp +++ b/src/Functions/FunctionsTimeWindow.cpp @@ -638,13 +638,13 @@ struct TimeWindowImpl } }; -template +template DataTypePtr FunctionTimeWindow::getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const { return TimeWindowImpl::getReturnType(arguments, name); } -template +template ColumnPtr FunctionTimeWindow::executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t /*input_rows_count*/) const { return TimeWindowImpl::dispatchForColumns(arguments, name); diff --git a/src/Functions/FunctionsTimeWindow.h b/src/Functions/FunctionsTimeWindow.h index 11da7042f15..6e3b5da9971 100644 --- a/src/Functions/FunctionsTimeWindow.h +++ b/src/Functions/FunctionsTimeWindow.h @@ -30,7 +30,7 @@ namespace DB * hopEnd(time_attr, hop_interval, window_interval [, timezone]) * */ -enum WindowFunctionName +enum TimeWindowFunctionName { TUMBLE, TUMBLE_START, @@ -117,7 +117,7 @@ struct ToStartOfTransform; ADD_TIME(Second, 1) #undef ADD_TIME -template +template struct TimeWindowImpl { static constexpr auto name = "UNKNOWN"; @@ -127,7 +127,7 @@ struct TimeWindowImpl static ColumnPtr dispatchForColumns(const ColumnsWithTypeAndName & arguments, const String & function_name); }; -template +template class FunctionTimeWindow : public IFunction { public: From 79b1fdfee5357c9416d876d03fa38c1f87b1c9c7 Mon Sep 17 00:00:00 2001 From: tavplubix Date: Fri, 10 Dec 2021 13:14:45 +0300 Subject: [PATCH 217/262] disable flaky tests --- .../queries/0_stateless/01052_window_view_proc_tumble_to_now.sql | 1 + tests/queries/0_stateless/01053_window_view_proc_hop_to_now.sql | 1 + tests/queries/0_stateless/01054_window_view_proc_tumble_to.sql | 1 + tests/queries/0_stateless/01055_window_view_proc_hop_to.sql | 1 + .../0_stateless/01057_window_view_event_tumble_to_strict_asc.sql | 1 + .../0_stateless/01058_window_view_event_hop_to_strict_asc.sql | 1 + .../0_stateless/01060_window_view_event_tumble_to_asc.sql | 1 + tests/queries/0_stateless/01061_window_view_event_hop_to_asc.sql | 1 + .../0_stateless/01063_window_view_event_tumble_to_bounded.sql | 1 + .../0_stateless/01064_window_view_event_hop_to_bounded.sql | 1 + .../01067_window_view_event_tumble_to_asc_lateness.sql | 1 + .../01068_window_view_event_tumble_to_bounded_lateness.sql | 1 + 12 files changed, 12 insertions(+) diff --git a/tests/queries/0_stateless/01052_window_view_proc_tumble_to_now.sql b/tests/queries/0_stateless/01052_window_view_proc_tumble_to_now.sql index 2d01e1205b2..35aa6dc96b3 100644 --- a/tests/queries/0_stateless/01052_window_view_proc_tumble_to_now.sql +++ b/tests/queries/0_stateless/01052_window_view_proc_tumble_to_now.sql @@ -1,3 +1,4 @@ +-- Tags: disabled SET allow_experimental_window_view = 1; DROP TABLE IF EXISTS mt; diff --git a/tests/queries/0_stateless/01053_window_view_proc_hop_to_now.sql b/tests/queries/0_stateless/01053_window_view_proc_hop_to_now.sql index 9f3dc3ca89e..42b8c6cad62 100644 --- a/tests/queries/0_stateless/01053_window_view_proc_hop_to_now.sql +++ b/tests/queries/0_stateless/01053_window_view_proc_hop_to_now.sql @@ -1,3 +1,4 @@ +-- Tags: disabled SET allow_experimental_window_view = 1; DROP TABLE IF EXISTS mt; diff --git a/tests/queries/0_stateless/01054_window_view_proc_tumble_to.sql b/tests/queries/0_stateless/01054_window_view_proc_tumble_to.sql index 86b7ab89150..821ab8bbac1 100644 --- a/tests/queries/0_stateless/01054_window_view_proc_tumble_to.sql +++ b/tests/queries/0_stateless/01054_window_view_proc_tumble_to.sql @@ -1,3 +1,4 @@ +-- Tags: disabled SET allow_experimental_window_view = 1; DROP TABLE IF EXISTS mt; diff --git a/tests/queries/0_stateless/01055_window_view_proc_hop_to.sql b/tests/queries/0_stateless/01055_window_view_proc_hop_to.sql index 1da497092c5..df47b527183 100644 --- a/tests/queries/0_stateless/01055_window_view_proc_hop_to.sql +++ b/tests/queries/0_stateless/01055_window_view_proc_hop_to.sql @@ -1,3 +1,4 @@ +-- Tags: disabled SET allow_experimental_window_view = 1; DROP TABLE IF EXISTS mt; diff --git a/tests/queries/0_stateless/01057_window_view_event_tumble_to_strict_asc.sql b/tests/queries/0_stateless/01057_window_view_event_tumble_to_strict_asc.sql index de738662817..12a5c2a4f65 100644 --- a/tests/queries/0_stateless/01057_window_view_event_tumble_to_strict_asc.sql +++ b/tests/queries/0_stateless/01057_window_view_event_tumble_to_strict_asc.sql @@ -1,3 +1,4 @@ +-- Tags: disabled SET allow_experimental_window_view = 1; DROP TABLE IF EXISTS mt; diff --git a/tests/queries/0_stateless/01058_window_view_event_hop_to_strict_asc.sql b/tests/queries/0_stateless/01058_window_view_event_hop_to_strict_asc.sql index c9846cbd7cd..97f2481d9e8 100644 --- a/tests/queries/0_stateless/01058_window_view_event_hop_to_strict_asc.sql +++ b/tests/queries/0_stateless/01058_window_view_event_hop_to_strict_asc.sql @@ -1,3 +1,4 @@ +-- Tags: disabled SET allow_experimental_window_view = 1; DROP TABLE IF EXISTS mt; diff --git a/tests/queries/0_stateless/01060_window_view_event_tumble_to_asc.sql b/tests/queries/0_stateless/01060_window_view_event_tumble_to_asc.sql index cb27e881870..6c3c5e94bc5 100644 --- a/tests/queries/0_stateless/01060_window_view_event_tumble_to_asc.sql +++ b/tests/queries/0_stateless/01060_window_view_event_tumble_to_asc.sql @@ -1,3 +1,4 @@ +-- Tags: disabled SET allow_experimental_window_view = 1; DROP TABLE IF EXISTS mt; diff --git a/tests/queries/0_stateless/01061_window_view_event_hop_to_asc.sql b/tests/queries/0_stateless/01061_window_view_event_hop_to_asc.sql index c021bd1d4a1..4bb3e06305d 100644 --- a/tests/queries/0_stateless/01061_window_view_event_hop_to_asc.sql +++ b/tests/queries/0_stateless/01061_window_view_event_hop_to_asc.sql @@ -1,3 +1,4 @@ +-- Tags: disabled SET allow_experimental_window_view = 1; DROP TABLE IF EXISTS mt; diff --git a/tests/queries/0_stateless/01063_window_view_event_tumble_to_bounded.sql b/tests/queries/0_stateless/01063_window_view_event_tumble_to_bounded.sql index 6b17d04517a..165a418ed47 100644 --- a/tests/queries/0_stateless/01063_window_view_event_tumble_to_bounded.sql +++ b/tests/queries/0_stateless/01063_window_view_event_tumble_to_bounded.sql @@ -1,3 +1,4 @@ +-- Tags: disabled SET allow_experimental_window_view = 1; DROP TABLE IF EXISTS mt; diff --git a/tests/queries/0_stateless/01064_window_view_event_hop_to_bounded.sql b/tests/queries/0_stateless/01064_window_view_event_hop_to_bounded.sql index 2f4b1c13d47..fe0a9eadebe 100644 --- a/tests/queries/0_stateless/01064_window_view_event_hop_to_bounded.sql +++ b/tests/queries/0_stateless/01064_window_view_event_hop_to_bounded.sql @@ -1,3 +1,4 @@ +-- Tags: disabled SET allow_experimental_window_view = 1; DROP TABLE IF EXISTS mt; diff --git a/tests/queries/0_stateless/01067_window_view_event_tumble_to_asc_lateness.sql b/tests/queries/0_stateless/01067_window_view_event_tumble_to_asc_lateness.sql index eb57d9b6b15..0cd88c48f3a 100644 --- a/tests/queries/0_stateless/01067_window_view_event_tumble_to_asc_lateness.sql +++ b/tests/queries/0_stateless/01067_window_view_event_tumble_to_asc_lateness.sql @@ -1,3 +1,4 @@ +-- Tags: disabled SET allow_experimental_window_view = 1; DROP TABLE IF EXISTS mt; diff --git a/tests/queries/0_stateless/01068_window_view_event_tumble_to_bounded_lateness.sql b/tests/queries/0_stateless/01068_window_view_event_tumble_to_bounded_lateness.sql index bc6d3a30947..64cd4e81c1b 100644 --- a/tests/queries/0_stateless/01068_window_view_event_tumble_to_bounded_lateness.sql +++ b/tests/queries/0_stateless/01068_window_view_event_tumble_to_bounded_lateness.sql @@ -1,3 +1,4 @@ +-- Tags: disabled SET allow_experimental_window_view = 1; DROP TABLE IF EXISTS mt; From 2c7bb56a2cfa9a0cefd998c0a74c2de8063b61ee Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 10 Dec 2021 13:22:29 +0300 Subject: [PATCH 218/262] Split integration tests into parts --- .github/workflows/main.yml | 204 ++++++++++++++++++++++++++++- tests/ci/integration_test_check.py | 27 ++-- tests/integration/ci-runner.py | 21 +++ 3 files changed, 237 insertions(+), 15 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index fe124320adb..57a30d44fae 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -1689,7 +1689,7 @@ jobs: ############################################################################################# ############################# INTEGRATION TESTS ############################################# ############################################################################################# - IntegrationTestsAsan: + IntegrationTestsAsan0: needs: [BuilderDebAsan] runs-on: [self-hosted, stress-tester] steps: @@ -1705,6 +1705,8 @@ jobs: REPORTS_PATH: ${{runner.temp}}/reports_dir CHECK_NAME: 'Integration tests (asan, actions)' REPO_COPY: ${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM: 0 + RUN_BY_HASH_TOTAL: 3 run: | sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH @@ -1717,7 +1719,67 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH - IntegrationTestsTsan: + IntegrationTestsAsan1: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + env: + TEMP_PATH: ${{runner.temp}}/integration_tests_asan + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Integration tests (asan, actions)' + REPO_COPY: ${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM: 1 + RUN_BY_HASH_TOTAL: 3 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + IntegrationTestsAsan2: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + env: + TEMP_PATH: ${{runner.temp}}/integration_tests_asan + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Integration tests (asan, actions)' + REPO_COPY: ${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM: 2 + RUN_BY_HASH_TOTAL: 3 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + IntegrationTestsTsan0: needs: [BuilderDebTsan] runs-on: [self-hosted, stress-tester] steps: @@ -1733,6 +1795,8 @@ jobs: REPORTS_PATH: ${{runner.temp}}/reports_dir CHECK_NAME: 'Integration tests (thread, actions)' REPO_COPY: ${{runner.temp}}/integration_tests_tsan/ClickHouse + RUN_BY_HASH_NUM: 0 + RUN_BY_HASH_TOTAL: 4 run: | sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH @@ -1745,7 +1809,97 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH - IntegrationTestsRelease: + IntegrationTestsTsan1: + needs: [BuilderDebTsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + env: + TEMP_PATH: ${{runner.temp}}/integration_tests_tsan + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Integration tests (thread, actions)' + REPO_COPY: ${{runner.temp}}/integration_tests_tsan/ClickHouse + RUN_BY_HASH_NUM: 1 + RUN_BY_HASH_TOTAL: 4 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + IntegrationTestsTsan2: + needs: [BuilderDebTsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + env: + TEMP_PATH: ${{runner.temp}}/integration_tests_tsan + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Integration tests (thread, actions)' + REPO_COPY: ${{runner.temp}}/integration_tests_tsan/ClickHouse + RUN_BY_HASH_NUM: 2 + RUN_BY_HASH_TOTAL: 4 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + IntegrationTestsTsan3: + needs: [BuilderDebTsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + env: + TEMP_PATH: ${{runner.temp}}/integration_tests_tsan + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Integration tests (thread, actions)' + REPO_COPY: ${{runner.temp}}/integration_tests_tsan/ClickHouse + RUN_BY_HASH_NUM: 3 + RUN_BY_HASH_TOTAL: 4 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + IntegrationTestsRelease0: needs: [BuilderDebRelease] runs-on: [self-hosted, stress-tester] steps: @@ -1761,6 +1915,38 @@ jobs: REPORTS_PATH: ${{runner.temp}}/reports_dir CHECK_NAME: 'Integration tests (release, actions)' REPO_COPY: ${{runner.temp}}/integration_tests_release/ClickHouse + RUN_BY_HASH_NUM: 0 + RUN_BY_HASH_TOTAL: 2 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + IntegrationTestsRelease1: + needs: [BuilderDebRelease] + runs-on: [self-hosted, stress-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + env: + TEMP_PATH: ${{runner.temp}}/integration_tests_release + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Integration tests (release, actions)' + REPO_COPY: ${{runner.temp}}/integration_tests_release/ClickHouse + RUN_BY_HASH_NUM: 1 + RUN_BY_HASH_TOTAL: 2 run: | sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH @@ -1981,9 +2167,15 @@ jobs: - ASTFuzzerTestTsan - ASTFuzzerTestMSan - ASTFuzzerTestUBSan - - IntegrationTestsAsan - - IntegrationTestsRelease - - IntegrationTestsTsan + - IntegrationTestsAsan0 + - IntegrationTestsAsan1 + - IntegrationTestsAsan2 + - IntegrationTestsRelease0 + - IntegrationTestsRelease1 + - IntegrationTestsTsan0 + - IntegrationTestsTsan1 + - IntegrationTestsTsan2 + - IntegrationTestsTsan3 - PVSCheck - UnitTestsAsan - UnitTestsTsan diff --git a/tests/ci/integration_test_check.py b/tests/ci/integration_test_check.py index 723e81d63cb..01799447184 100644 --- a/tests/ci/integration_test_check.py +++ b/tests/ci/integration_test_check.py @@ -22,8 +22,6 @@ from rerun_helper import RerunHelper from tee_popen import TeePopen -DOWNLOAD_RETRIES_COUNT = 5 - IMAGES = [ "clickhouse/integration-tests-runner", "clickhouse/mysql-golang-client", @@ -36,7 +34,7 @@ IMAGES = [ "clickhouse/integration-helper", ] -def get_json_params_dict(check_name, pr_info, docker_images): +def get_json_params_dict(check_name, pr_info, docker_images, run_by_hash_total, run_by_hash_num): return { 'context_name': check_name, 'commit': pr_info.sha, @@ -46,6 +44,8 @@ def get_json_params_dict(check_name, pr_info, docker_images): 'shuffle_test_groups': False, 'use_tmpfs': False, 'disable_net_host': True, + 'run_by_hash_total': run_by_hash_total, + 'run_by_hash_num': run_by_hash_num, } def get_env_for_runner(build_path, repo_path, result_path, work_path): @@ -107,6 +107,15 @@ if __name__ == "__main__": check_name = sys.argv[1] + if 'RUN_BY_HASH_NUM' in os.environ: + run_by_hash_num = int(os.getenv('RUN_BY_HASH_NUM')) + run_by_hash_total = int(os.getenv('RUN_BY_HASH_TOTAL')) + check_name_with_group = check_name + f' [{run_by_hash_num + 1}/{run_by_hash_total}]' + else: + run_by_hash_num = 0 + run_by_hash_total = 0 + check_name_with_group = check_name + if not os.path.exists(temp_path): os.makedirs(temp_path) @@ -115,12 +124,12 @@ if __name__ == "__main__": gh = Github(get_best_robot_token()) - rerun_helper = RerunHelper(gh, pr_info, check_name) + rerun_helper = RerunHelper(gh, pr_info, check_name_with_group) if rerun_helper.is_already_finished_by_status(): logging.info("Check is already finished according to github status, exiting") sys.exit(0) - images = get_images_with_versions(temp_path, IMAGES) + images = get_images_with_versions(reports_path, IMAGES) images_with_versions = {i.name: i.version for i in images} result_path = os.path.join(temp_path, "output_dir") if not os.path.exists(result_path): @@ -140,7 +149,7 @@ if __name__ == "__main__": json_path = os.path.join(work_path, 'params.json') with open(json_path, 'w', encoding='utf-8') as json_params: - json_params.write(json.dumps(get_json_params_dict(check_name, pr_info, images_with_versions))) + json_params.write(json.dumps(get_json_params_dict(check_name, pr_info, images_with_versions, run_by_hash_total, run_by_hash_num))) output_path_log = os.path.join(result_path, "main_script_log.txt") @@ -162,9 +171,9 @@ if __name__ == "__main__": mark_flaky_tests(ch_helper, check_name, test_results) s3_helper = S3Helper('https://s3.amazonaws.com') - report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [output_path_log] + additional_logs, check_name, False) + report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [output_path_log] + additional_logs, check_name_with_group, False) print(f"::notice ::Report url: {report_url}") - post_commit_status(gh, pr_info.sha, check_name, description, state, report_url) + post_commit_status(gh, pr_info.sha, check_name_with_group, description, state, report_url) - prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_results, state, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, check_name) + prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_results, state, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, check_name_with_group) ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events) diff --git a/tests/integration/ci-runner.py b/tests/integration/ci-runner.py index 25d09a8c4c5..c8745294c5b 100755 --- a/tests/integration/ci-runner.py +++ b/tests/integration/ci-runner.py @@ -10,6 +10,8 @@ from collections import defaultdict import random import json import csv +# for crc32 +import zlib MAX_RETRY = 3 @@ -26,6 +28,9 @@ MAX_TIME_SECONDS = 3600 MAX_TIME_IN_SANDBOX = 20 * 60 # 20 minutes TASK_TIMEOUT = 8 * 60 * 60 # 8 hours +def stringhash(s): + return zlib.crc32(s.encode('utf-8')) + def get_tests_to_run(pr_info): result = set([]) changed_files = pr_info['changed_files'] @@ -183,6 +188,13 @@ class ClickhouseIntegrationTestsRunner: self.start_time = time.time() self.soft_deadline_time = self.start_time + (TASK_TIMEOUT - MAX_TIME_IN_SANDBOX) + if 'run_by_hash_total' in self.params: + self.run_by_hash_total = self.params['run_by_hash_total'] + self.run_by_hash_num = self.params['run_by_hash_num'] + else: + self.run_by_hash_total = 0 + self.run_by_hash_num = 0 + def path(self): return self.result_path @@ -576,6 +588,15 @@ class ClickhouseIntegrationTestsRunner: self._install_clickhouse(build_path) logging.info("Dump iptables before run %s", subprocess.check_output("sudo iptables -L", shell=True)) all_tests = self._get_all_tests(repo_path) + + if self.run_by_hash_total != 0: + grouped_tests = self.group_test_by_file(all_tests) + all_filtered_by_hash_tests = [] + for group, tests_in_group in grouped_tests.items(): + if stringhash(group) % self.run_by_hash_total == self.run_by_hash_num: + all_filtered_by_hash_tests += tests_in_group + all_tests = all_filtered_by_hash_tests + parallel_skip_tests = self._get_parallel_tests_skip_list(repo_path) logging.info("Found %s tests first 3 %s", len(all_tests), ' '.join(all_tests[:3])) filtered_sequential_tests = list(filter(lambda test: test in all_tests, parallel_skip_tests)) From 8f8f65e6e09080ba74408b68a5794661644feb17 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 10 Dec 2021 13:30:12 +0300 Subject: [PATCH 219/262] Fix clickhouse test --- tests/clickhouse-test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 177e2b35c4e..784edfc1917 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -767,7 +767,7 @@ class TestSuite: filter_func = lambda x: True - if args.run_by_hash_num and args.run_by_hash_total: + if args.run_by_hash_num is not None and args.run_by_hash_total is not None: if args.run_by_hash_num > args.run_by_hash_total: raise Exception(f"Incorrect run by hash, value {args.run_by_hash_num} bigger than total {args.run_by_hash_total}") From 3e9eb86e2b5e4a5d6c7bd7a61d5c92a0d952a45b Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 10 Dec 2021 13:57:10 +0300 Subject: [PATCH 220/262] Fix master yml --- .github/workflows/master.yml | 232 ++++++++++++++++++++++++++++++++++- 1 file changed, 226 insertions(+), 6 deletions(-) diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index f7c25bb28d3..cdf66d26310 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -1459,7 +1459,7 @@ jobs: ############################################################################################# ############################# INTEGRATION TESTS ############################################# ############################################################################################# - IntegrationTestsAsan: + IntegrationTestsAsan0: needs: [BuilderDebAsan] runs-on: [self-hosted, stress-tester] steps: @@ -1475,6 +1475,8 @@ jobs: REPORTS_PATH: ${{runner.temp}}/reports_dir CHECK_NAME: 'Integration tests (asan, actions)' REPO_COPY: ${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM: 0 + RUN_BY_HASH_TOTAL: 3 run: | sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH @@ -1487,7 +1489,67 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH - IntegrationTestsTsan: + IntegrationTestsAsan1: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + env: + TEMP_PATH: ${{runner.temp}}/integration_tests_asan + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Integration tests (asan, actions)' + REPO_COPY: ${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM: 1 + RUN_BY_HASH_TOTAL: 3 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + IntegrationTestsAsan2: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + env: + TEMP_PATH: ${{runner.temp}}/integration_tests_asan + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Integration tests (asan, actions)' + REPO_COPY: ${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM: 2 + RUN_BY_HASH_TOTAL: 3 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + IntegrationTestsTsan0: needs: [BuilderDebTsan] runs-on: [self-hosted, stress-tester] steps: @@ -1503,6 +1565,8 @@ jobs: REPORTS_PATH: ${{runner.temp}}/reports_dir CHECK_NAME: 'Integration tests (thread, actions)' REPO_COPY: ${{runner.temp}}/integration_tests_tsan/ClickHouse + RUN_BY_HASH_NUM: 0 + RUN_BY_HASH_TOTAL: 4 run: | sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH @@ -1515,7 +1579,97 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH - IntegrationTestsRelease: + IntegrationTestsTsan1: + needs: [BuilderDebTsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + env: + TEMP_PATH: ${{runner.temp}}/integration_tests_tsan + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Integration tests (thread, actions)' + REPO_COPY: ${{runner.temp}}/integration_tests_tsan/ClickHouse + RUN_BY_HASH_NUM: 1 + RUN_BY_HASH_TOTAL: 4 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + IntegrationTestsTsan2: + needs: [BuilderDebTsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + env: + TEMP_PATH: ${{runner.temp}}/integration_tests_tsan + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Integration tests (thread, actions)' + REPO_COPY: ${{runner.temp}}/integration_tests_tsan/ClickHouse + RUN_BY_HASH_NUM: 2 + RUN_BY_HASH_TOTAL: 4 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + IntegrationTestsTsan3: + needs: [BuilderDebTsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + env: + TEMP_PATH: ${{runner.temp}}/integration_tests_tsan + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Integration tests (thread, actions)' + REPO_COPY: ${{runner.temp}}/integration_tests_tsan/ClickHouse + RUN_BY_HASH_NUM: 3 + RUN_BY_HASH_TOTAL: 4 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + IntegrationTestsRelease0: needs: [BuilderDebRelease] runs-on: [self-hosted, stress-tester] steps: @@ -1531,6 +1685,66 @@ jobs: REPORTS_PATH: ${{runner.temp}}/reports_dir CHECK_NAME: 'Integration tests (release, actions)' REPO_COPY: ${{runner.temp}}/integration_tests_release/ClickHouse + RUN_BY_HASH_NUM: 0 + RUN_BY_HASH_TOTAL: 2 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + IntegrationTestsRelease1: + needs: [BuilderDebRelease] + runs-on: [self-hosted, stress-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + env: + TEMP_PATH: ${{runner.temp}}/integration_tests_release + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Integration tests (release, actions)' + REPO_COPY: ${{runner.temp}}/integration_tests_release/ClickHouse + RUN_BY_HASH_NUM: 1 + RUN_BY_HASH_TOTAL: 2 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + IntegrationTestsFlakyCheck: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + env: + TEMP_PATH: ${{runner.temp}}/integration_tests_asan_flaky_check + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Integration tests flaky check (asan, actions)' + REPO_COPY: ${{runner.temp}}/integration_tests_asan_flaky_check/ClickHouse run: | sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH @@ -1858,9 +2072,15 @@ jobs: - StressTestTsan - StressTestMsan - StressTestUBsan - - IntegrationTestsAsan - - IntegrationTestsRelease - - IntegrationTestsTsan + - IntegrationTestsAsan0 + - IntegrationTestsAsan1 + - IntegrationTestsAsan2 + - IntegrationTestsRelease0 + - IntegrationTestsRelease1 + - IntegrationTestsTsan0 + - IntegrationTestsTsan1 + - IntegrationTestsTsan2 + - IntegrationTestsTsan3 - CompatibilityCheck - ASTFuzzerTestDebug - ASTFuzzerTestAsan From 9aa98d7b991bcaed538cb2b9baaa5ee350b82f52 Mon Sep 17 00:00:00 2001 From: vxider Date: Fri, 10 Dec 2021 11:09:04 +0000 Subject: [PATCH 221/262] fix flaky window view tests --- ...1053_window_view_proc_hop_to_now.reference | 1 - .../01053_window_view_proc_hop_to_now.sh | 27 +++++++++++++++++++ .../01053_window_view_proc_hop_to_now.sql | 17 ------------ ...01054_window_view_proc_tumble_to.reference | 1 - .../01054_window_view_proc_tumble_to.sh | 27 +++++++++++++++++++ .../01054_window_view_proc_tumble_to.sql | 18 ------------- .../01055_window_view_proc_hop_to.reference | 1 - .../01055_window_view_proc_hop_to.sh | 27 +++++++++++++++++++ .../01055_window_view_proc_hop_to.sql | 18 ------------- 9 files changed, 81 insertions(+), 56 deletions(-) create mode 100755 tests/queries/0_stateless/01053_window_view_proc_hop_to_now.sh delete mode 100644 tests/queries/0_stateless/01053_window_view_proc_hop_to_now.sql create mode 100755 tests/queries/0_stateless/01054_window_view_proc_tumble_to.sh delete mode 100644 tests/queries/0_stateless/01054_window_view_proc_tumble_to.sql create mode 100755 tests/queries/0_stateless/01055_window_view_proc_hop_to.sh delete mode 100644 tests/queries/0_stateless/01055_window_view_proc_hop_to.sql diff --git a/tests/queries/0_stateless/01053_window_view_proc_hop_to_now.reference b/tests/queries/0_stateless/01053_window_view_proc_hop_to_now.reference index 0d66ea1aee9..d00491fd7e5 100644 --- a/tests/queries/0_stateless/01053_window_view_proc_hop_to_now.reference +++ b/tests/queries/0_stateless/01053_window_view_proc_hop_to_now.reference @@ -1,2 +1 @@ -0 1 diff --git a/tests/queries/0_stateless/01053_window_view_proc_hop_to_now.sh b/tests/queries/0_stateless/01053_window_view_proc_hop_to_now.sh new file mode 100755 index 00000000000..5c649b4c64b --- /dev/null +++ b/tests/queries/0_stateless/01053_window_view_proc_hop_to_now.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT --multiquery < Date: Thu, 9 Dec 2021 20:45:54 +0300 Subject: [PATCH 222/262] Fix test_prometheus_endpoint --- tests/integration/test_prometheus_endpoint/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_prometheus_endpoint/test.py b/tests/integration/test_prometheus_endpoint/test.py index 06276803c3d..60d9164acd2 100644 --- a/tests/integration/test_prometheus_endpoint/test.py +++ b/tests/integration/test_prometheus_endpoint/test.py @@ -30,7 +30,7 @@ def parse_response_line(line): if line.startswith("#"): return {} - match = re.match('^([a-zA-Z_:][a-zA-Z0-9_:]+)(\{.*\})? (\d)', line) + match = re.match('^([a-zA-Z_:][a-zA-Z0-9_:]+)(\{.*\})? -?(\d)', line) assert match, line name, _, val = match.groups() return {name: int(val)} From 7bf5c8356114145ea0c99cdda77af5e81f3f9086 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 10 Dec 2021 15:45:04 +0300 Subject: [PATCH 223/262] Fix queries with hasColumnInTable constant condition and non existing column. --- src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp | 2 +- ...25_constant_if_condition_and_not_existing_column.reference | 4 ++++ .../02125_constant_if_condition_and_not_existing_column.sql | 3 +++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp b/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp index 0440c52797c..802bf4e43ce 100644 --- a/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp +++ b/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp @@ -51,7 +51,7 @@ static bool tryExtractConstValueFromCondition(const ASTPtr & condition, bool & v } } } - else if (function->name == "toUInt8" || function->name == "toInt8") + else if (function->name == "toUInt8" || function->name == "toInt8" || function->name == "identity") { if (const auto * expr_list = function->arguments->as()) { diff --git a/tests/queries/0_stateless/02125_constant_if_condition_and_not_existing_column.reference b/tests/queries/0_stateless/02125_constant_if_condition_and_not_existing_column.reference index 67f2590a0c6..a7903610a42 100644 --- a/tests/queries/0_stateless/02125_constant_if_condition_and_not_existing_column.reference +++ b/tests/queries/0_stateless/02125_constant_if_condition_and_not_existing_column.reference @@ -6,3 +6,7 @@ 42 42 42 +SELECT + x, + concat(x, \'_\') +FROM test diff --git a/tests/queries/0_stateless/02125_constant_if_condition_and_not_existing_column.sql b/tests/queries/0_stateless/02125_constant_if_condition_and_not_existing_column.sql index ad3d417bc26..d2041a612a6 100644 --- a/tests/queries/0_stateless/02125_constant_if_condition_and_not_existing_column.sql +++ b/tests/queries/0_stateless/02125_constant_if_condition_and_not_existing_column.sql @@ -11,4 +11,7 @@ select if(toUInt8(1), 42, y) from test; select if(toInt8(1), 42, y) from test; select if(toUInt8(toUInt8(0)), y, 42) from test; select if(cast(cast(0, 'UInt8'), 'UInt8'), y, 42) from test; + +explain syntax select x, if((select hasColumnInTable(currentDatabase(), 'test', 'y')), y, x || '_') from test; + drop table if exists t; From ed4ea6fe3a099ba4cd7592b40914431f6d8e7a1a Mon Sep 17 00:00:00 2001 From: vdimir Date: Fri, 10 Dec 2021 15:57:00 +0300 Subject: [PATCH 224/262] Handle const column in JoinCommon::removeColumnNullability --- src/Interpreters/join_common.cpp | 6 ++++++ .../queries/0_stateless/02133_issue_32458.reference | 0 tests/queries/0_stateless/02133_issue_32458.sql | 13 +++++++++++++ 3 files changed, 19 insertions(+) create mode 100644 tests/queries/0_stateless/02133_issue_32458.reference create mode 100644 tests/queries/0_stateless/02133_issue_32458.sql diff --git a/src/Interpreters/join_common.cpp b/src/Interpreters/join_common.cpp index bf20bef6992..b571a8e8e10 100644 --- a/src/Interpreters/join_common.cpp +++ b/src/Interpreters/join_common.cpp @@ -225,7 +225,13 @@ void removeColumnNullability(ColumnWithTypeAndName & column) if (column.column && column.column->isNullable()) { + column.column = column.column->convertToFullColumnIfConst(); const auto * nullable_col = checkAndGetColumn(*column.column); + if (!nullable_col) + { + throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Column '{}' is expected to be nullable", column.dumpStructure()); + } + MutableColumnPtr mutable_column = nullable_col->getNestedColumn().cloneEmpty(); insertFromNullableOrDefault(mutable_column, nullable_col); column.column = std::move(mutable_column); diff --git a/tests/queries/0_stateless/02133_issue_32458.reference b/tests/queries/0_stateless/02133_issue_32458.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02133_issue_32458.sql b/tests/queries/0_stateless/02133_issue_32458.sql new file mode 100644 index 00000000000..16af361db7a --- /dev/null +++ b/tests/queries/0_stateless/02133_issue_32458.sql @@ -0,0 +1,13 @@ +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; + +CREATE TABLE t1 (`id` Int32, `key` String) ENGINE = Memory; +CREATE TABLE t2 (`id` Int32, `key` String) ENGINE = Memory; + +INSERT INTO t1 VALUES (0, ''); +INSERT INTO t2 VALUES (0, ''); + +SELECT * FROM t1 ANY INNER JOIN t2 ON ((NULL = t1.key) = t2.id) AND (('' = t1.key) = t2.id); + +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; From 25427719d40e521846187e68294f9141ed037327 Mon Sep 17 00:00:00 2001 From: tavplubix Date: Fri, 10 Dec 2021 16:29:51 +0300 Subject: [PATCH 225/262] Try fix 'Directory tmp_merge_' already exists (#32201) * try fix 'directory tmp_merge_' already exists * fix * fix * fix Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> --- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 92 ++++++++++--------- src/Storages/MergeTree/IMergeTreeDataPart.h | 1 + src/Storages/StorageReplicatedMergeTree.cpp | 4 +- 3 files changed, 53 insertions(+), 44 deletions(-) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 21dbedbb6ac..ea7bc0d4db5 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -424,52 +424,59 @@ void IMergeTreeDataPart::setColumns(const NamesAndTypesList & new_columns) void IMergeTreeDataPart::removeIfNeeded() { - if (state == State::DeleteOnDestroy || is_temp) + if (!is_temp && state != State::DeleteOnDestroy) + return; + + try { - try - { - auto path = getFullRelativePath(); + auto path = getFullRelativePath(); - if (!volume->getDisk()->exists(path)) + if (!volume->getDisk()->exists(path)) + return; + + if (is_temp) + { + String file_name = fileName(relative_path); + + if (file_name.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "relative_path {} of part {} is invalid or not set", relative_path, name); + + if (!startsWith(file_name, "tmp") && !endsWith(file_name, ".tmp_proj")) + { + LOG_ERROR( + storage.log, + "~DataPart() should remove part {} but its name doesn't start with \"tmp\" or end with \".tmp_proj\". Too " + "suspicious, keeping the part.", + path); return; - - if (is_temp) - { - String file_name = fileName(relative_path); - - if (file_name.empty()) - throw Exception("relative_path " + relative_path + " of part " + name + " is invalid or not set", ErrorCodes::LOGICAL_ERROR); - - if (!startsWith(file_name, "tmp") && !endsWith(file_name, ".tmp_proj")) - { - LOG_ERROR( - storage.log, - "~DataPart() should remove part {} but its name doesn't start with \"tmp\" or end with \".tmp_proj\". Too " - "suspicious, keeping the part.", - path); - return; - } - } - - if (parent_part) - { - std::optional keep_shared_data = keepSharedDataInDecoupledStorage(); - if (!keep_shared_data.has_value()) - return; - projectionRemove(parent_part->getFullRelativePath(), *keep_shared_data); - } - else - remove(); - - if (state == State::DeleteOnDestroy) - { - LOG_TRACE(storage.log, "Removed part from old location {}", path); } } - catch (...) + + if (parent_part) { - tryLogCurrentException(__PRETTY_FUNCTION__); + std::optional keep_shared_data = keepSharedDataInDecoupledStorage(); + if (!keep_shared_data.has_value()) + return; + projectionRemove(parent_part->getFullRelativePath(), *keep_shared_data); } + else + remove(); + + if (state == State::DeleteOnDestroy) + { + LOG_TRACE(storage.log, "Removed part from old location {}", path); + } + } + catch (...) + { + /// FIXME If part it temporary, then directory will not be removed for 1 day (temporary_directories_lifetime). + /// If it's tmp_merge_ or tmp_fetch_, + /// then all future attempts to execute part producing operation will fail with "directory already exists". + /// Seems like it's especially important for remote disks, because removal may fail due to network issues. + tryLogCurrentException(__PRETTY_FUNCTION__); + assert(!is_temp); + assert(state != State::DeleteOnDestroy); + assert(state != State::Temporary); } } @@ -1157,14 +1164,17 @@ void IMergeTreeDataPart::remove() const * And a race condition can happen that will lead to "File not found" error here. */ + /// NOTE We rename part to delete_tmp_ instead of delete_tmp_ to avoid race condition + /// when we try to remove two parts with the same name, but different relative paths, + /// for example all_1_2_1 (in Deleting state) and tmp_merge_all_1_2_1 (in Temporary state). fs::path from = fs::path(storage.relative_data_path) / relative_path; - fs::path to = fs::path(storage.relative_data_path) / ("delete_tmp_" + name); + fs::path to = fs::path(storage.relative_data_path) / ("delete_tmp_" + relative_path); // TODO directory delete_tmp_ is never removed if server crashes before returning from this function auto disk = volume->getDisk(); if (disk->exists(to)) { - LOG_WARNING(storage.log, "Directory {} (to which part must be renamed before removing) already exists. Most likely this is due to unclean restart. Removing it.", fullPath(disk, to)); + LOG_WARNING(storage.log, "Directory {} (to which part must be renamed before removing) already exists. Most likely this is due to unclean restart or race condition. Removing it.", fullPath(disk, to)); try { disk->removeSharedRecursive(fs::path(to) / "", *keep_shared_data); diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index 3515da20fa9..a203d45aa25 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -198,6 +198,7 @@ public: mutable std::atomic remove_time { std::numeric_limits::max() }; /// If true, the destructor will delete the directory with the part. + /// FIXME Why do we need this flag? What's difference from Temporary and DeleteOnDestroy state? Can we get rid of this? bool is_temp = false; /// If true it means that there are no ZooKeeper node for this part, so it should be deleted only from filesystem diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index d0d52fd488a..ca877d8a72d 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -1373,9 +1373,6 @@ void StorageReplicatedMergeTree::checkPartChecksumsAndAddCommitOps(const zkutil: const auto storage_settings_ptr = getSettings(); String part_path = fs::path(replica_path) / "parts" / part_name; - //ops.emplace_back(zkutil::makeCheckRequest( - // zookeeper_path + "/columns", expected_columns_version)); - if (storage_settings_ptr->use_minimalistic_part_header_in_zookeeper) { ops.emplace_back(zkutil::makeCreateRequest( @@ -1421,6 +1418,7 @@ MergeTreeData::DataPartsVector StorageReplicatedMergeTree::checkPartChecksumsAnd Coordination::Requests new_ops; for (const String & part_path : absent_part_paths_on_replicas) { + /// NOTE Create request may fail with ZNONODE if replica is being dropped, we will throw an exception new_ops.emplace_back(zkutil::makeCreateRequest(part_path, "", zkutil::CreateMode::Persistent)); new_ops.emplace_back(zkutil::makeRemoveRequest(part_path, -1)); } From b6b73ba5b20c50053e50ff9b13505c711e2cb7d2 Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 10 Dec 2021 14:54:55 +0000 Subject: [PATCH 226/262] pymongo --- docker/test/integration/runner/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile index 5695be70b9a..55c0b53a3a2 100644 --- a/docker/test/integration/runner/Dockerfile +++ b/docker/test/integration/runner/Dockerfile @@ -76,7 +76,7 @@ RUN python3 -m pip install \ minio \ protobuf \ psycopg2-binary==2.8.6 \ - pymongo \ + pymongo==3.11.0 \ pytest \ pytest-timeout \ pytest-xdist \ From 46356a34a30cd95600a192957978d7a0fd7c1f2e Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 10 Dec 2021 14:58:17 +0000 Subject: [PATCH 227/262] Update pymongo --- docker/test/integration/runner/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile index 5695be70b9a..55c0b53a3a2 100644 --- a/docker/test/integration/runner/Dockerfile +++ b/docker/test/integration/runner/Dockerfile @@ -76,7 +76,7 @@ RUN python3 -m pip install \ minio \ protobuf \ psycopg2-binary==2.8.6 \ - pymongo \ + pymongo==3.11.0 \ pytest \ pytest-timeout \ pytest-xdist \ From 3e28581ea42f171f9598418d7ad3ef9dcc181f62 Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 10 Dec 2021 01:10:28 +0300 Subject: [PATCH 228/262] Extend test --- ...dow_functions_disable_optimizations.reference | 16 ++++++++++++++++ ...29_window_functions_disable_optimizations.sql | 15 ++++++++++++++- 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02129_window_functions_disable_optimizations.reference b/tests/queries/0_stateless/02129_window_functions_disable_optimizations.reference index 1fd9e58f556..f66c81021c9 100644 --- a/tests/queries/0_stateless/02129_window_functions_disable_optimizations.reference +++ b/tests/queries/0_stateless/02129_window_functions_disable_optimizations.reference @@ -18,3 +18,19 @@ 7 1 30 180 8 0.5 30 195 9 1 30 225 +0 0 0 +1 1 1 +2 0 1 +3 0 1 +4 0 1 +5 0 1 +6 0 1 +7 0 1 +8 0 1 +9 0 1 +5772761.230862 +5773916.014064 +5775070.797267 +5776226.273617 +5777381.749967 +5778537.226317 diff --git a/tests/queries/0_stateless/02129_window_functions_disable_optimizations.sql b/tests/queries/0_stateless/02129_window_functions_disable_optimizations.sql index 847d868b10b..cfe9f20d378 100644 --- a/tests/queries/0_stateless/02129_window_functions_disable_optimizations.sql +++ b/tests/queries/0_stateless/02129_window_functions_disable_optimizations.sql @@ -11,4 +11,17 @@ SELECT if((number % 2) = 0, 0.5, 1) AS a, 30 AS b, sum(a * b) OVER (ORDER BY number ASC) AS s -FROM numbers(10) +FROM numbers(10); + +SET optimize_aggregators_of_group_by_keys=1; + +SELECT + *, + if(number = 1, 1, 0) as a, + max(a) OVER (ORDER BY number ASC) AS s +FROM numbers(10); + +SET optimize_group_by_function_keys = 1; +SELECT round(sum(log(2) * number), 6) AS k FROM numbers(10000) +GROUP BY (number % 2) * (number % 3), number % 3, number % 2 +HAVING sum(log(2) * number) > 346.57353 ORDER BY k; From f5e949b88e1c2e912b91b12f47350ca2e2809e23 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Fri, 10 Dec 2021 18:12:27 +0300 Subject: [PATCH 229/262] Ping CI --- src/Storages/WindowView/StorageWindowView.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 0a674ae62e9..272276c5164 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -116,7 +116,7 @@ namespace } }; - /// Replace windowID node name with either tumble or hop. + /// Replace windowID node name with either tumble or hop struct ReplaceWindowIdMatcher { public: From dcbba460c1d0b0f4b90afe92d04ae6e1b279b791 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 10 Dec 2021 18:38:42 +0300 Subject: [PATCH 230/262] Revert "Split long tests into multiple checks" --- .github/workflows/main.yml | 414 +-------------------------- .github/workflows/master.yml | 442 +---------------------------- docker/test/stateless/Dockerfile | 1 + docker/test/stateless/run.sh | 7 - tests/ci/functional_test_check.py | 37 +-- tests/ci/integration_test_check.py | 25 +- tests/clickhouse-test | 34 +-- tests/integration/ci-runner.py | 21 -- 8 files changed, 50 insertions(+), 931 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 57a30d44fae..69a863b75a0 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -886,7 +886,7 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH - FunctionalStatelessTestTsan0: + FunctionalStatelessTestTsan: needs: [BuilderDebTsan] runs-on: [self-hosted, func-tester] steps: @@ -903,70 +903,6 @@ jobs: CHECK_NAME: 'Stateless tests (thread, actions)' REPO_COPY: ${{runner.temp}}/stateless_tsan/ClickHouse KILL_TIMEOUT: 10800 - RUN_BY_HASH_NUM: 0 - RUN_BY_HASH_TOTAL: 3 - run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT - - name: Cleanup - if: always() - run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH - FunctionalStatelessTestTsan1: - needs: [BuilderDebTsan] - runs-on: [self-hosted, func-tester] - steps: - - name: Download json reports - uses: actions/download-artifact@v2 - with: - path: ${{runner.temp}}/reports_dir - - name: Check out repository code - uses: actions/checkout@v2 - - name: Functional test - env: - TEMP_PATH: ${{runner.temp}}/stateless_tsan - REPORTS_PATH: ${{runner.temp}}/reports_dir - CHECK_NAME: 'Stateless tests (thread, actions)' - REPO_COPY: ${{runner.temp}}/stateless_tsan/ClickHouse - KILL_TIMEOUT: 10800 - RUN_BY_HASH_NUM: 1 - RUN_BY_HASH_TOTAL: 3 - run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT - - name: Cleanup - if: always() - run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH - FunctionalStatelessTestTsan2: - needs: [BuilderDebTsan] - runs-on: [self-hosted, func-tester] - steps: - - name: Download json reports - uses: actions/download-artifact@v2 - with: - path: ${{runner.temp}}/reports_dir - - name: Check out repository code - uses: actions/checkout@v2 - - name: Functional test - env: - TEMP_PATH: ${{runner.temp}}/stateless_tsan - REPORTS_PATH: ${{runner.temp}}/reports_dir - CHECK_NAME: 'Stateless tests (thread, actions)' - REPO_COPY: ${{runner.temp}}/stateless_tsan/ClickHouse - KILL_TIMEOUT: 10800 - RUN_BY_HASH_NUM: 2 - RUN_BY_HASH_TOTAL: 3 run: | sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH @@ -1008,7 +944,7 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH - FunctionalStatelessTestMsan0: + FunctionalStatelessTestMsan: needs: [BuilderDebMsan] runs-on: [self-hosted, func-tester] steps: @@ -1025,8 +961,6 @@ jobs: CHECK_NAME: 'Stateless tests (memory, actions)' REPO_COPY: ${{runner.temp}}/stateless_memory/ClickHouse KILL_TIMEOUT: 10800 - RUN_BY_HASH_NUM: 0 - RUN_BY_HASH_TOTAL: 3 run: | sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH @@ -1039,69 +973,7 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH - FunctionalStatelessTestMsan1: - needs: [BuilderDebMsan] - runs-on: [self-hosted, func-tester] - steps: - - name: Download json reports - uses: actions/download-artifact@v2 - with: - path: ${{runner.temp}}/reports_dir - - name: Check out repository code - uses: actions/checkout@v2 - - name: Functional test - env: - TEMP_PATH: ${{runner.temp}}/stateless_memory - REPORTS_PATH: ${{runner.temp}}/reports_dir - CHECK_NAME: 'Stateless tests (memory, actions)' - REPO_COPY: ${{runner.temp}}/stateless_memory/ClickHouse - KILL_TIMEOUT: 10800 - RUN_BY_HASH_NUM: 1 - RUN_BY_HASH_TOTAL: 3 - run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT - - name: Cleanup - if: always() - run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH - FunctionalStatelessTestMsan2: - needs: [BuilderDebMsan] - runs-on: [self-hosted, func-tester] - steps: - - name: Download json reports - uses: actions/download-artifact@v2 - with: - path: ${{runner.temp}}/reports_dir - - name: Check out repository code - uses: actions/checkout@v2 - - name: Functional test - env: - TEMP_PATH: ${{runner.temp}}/stateless_memory - REPORTS_PATH: ${{runner.temp}}/reports_dir - CHECK_NAME: 'Stateless tests (memory, actions)' - REPO_COPY: ${{runner.temp}}/stateless_memory/ClickHouse - KILL_TIMEOUT: 10800 - RUN_BY_HASH_NUM: 2 - RUN_BY_HASH_TOTAL: 3 - run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT - - name: Cleanup - if: always() - run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH - FunctionalStatelessTestDebug0: + FunctionalStatelessTestDebug: needs: [BuilderDebDebug] runs-on: [self-hosted, func-tester] steps: @@ -1118,70 +990,6 @@ jobs: CHECK_NAME: 'Stateless tests (debug, actions)' REPO_COPY: ${{runner.temp}}/stateless_debug/ClickHouse KILL_TIMEOUT: 10800 - RUN_BY_HASH_NUM: 0 - RUN_BY_HASH_TOTAL: 3 - run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT - - name: Cleanup - if: always() - run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH - FunctionalStatelessTestDebug1: - needs: [BuilderDebDebug] - runs-on: [self-hosted, func-tester] - steps: - - name: Download json reports - uses: actions/download-artifact@v2 - with: - path: ${{runner.temp}}/reports_dir - - name: Check out repository code - uses: actions/checkout@v2 - - name: Functional test - env: - TEMP_PATH: ${{runner.temp}}/stateless_debug - REPORTS_PATH: ${{runner.temp}}/reports_dir - CHECK_NAME: 'Stateless tests (debug, actions)' - REPO_COPY: ${{runner.temp}}/stateless_debug/ClickHouse - KILL_TIMEOUT: 10800 - RUN_BY_HASH_NUM: 1 - RUN_BY_HASH_TOTAL: 3 - run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT - - name: Cleanup - if: always() - run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH - FunctionalStatelessTestDebug2: - needs: [BuilderDebDebug] - runs-on: [self-hosted, func-tester] - steps: - - name: Download json reports - uses: actions/download-artifact@v2 - with: - path: ${{runner.temp}}/reports_dir - - name: Check out repository code - uses: actions/checkout@v2 - - name: Functional test - env: - TEMP_PATH: ${{runner.temp}}/stateless_debug - REPORTS_PATH: ${{runner.temp}}/reports_dir - CHECK_NAME: 'Stateless tests (debug, actions)' - REPO_COPY: ${{runner.temp}}/stateless_debug/ClickHouse - KILL_TIMEOUT: 10800 - RUN_BY_HASH_NUM: 2 - RUN_BY_HASH_TOTAL: 3 run: | sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH @@ -1689,7 +1497,7 @@ jobs: ############################################################################################# ############################# INTEGRATION TESTS ############################################# ############################################################################################# - IntegrationTestsAsan0: + IntegrationTestsAsan: needs: [BuilderDebAsan] runs-on: [self-hosted, stress-tester] steps: @@ -1705,8 +1513,6 @@ jobs: REPORTS_PATH: ${{runner.temp}}/reports_dir CHECK_NAME: 'Integration tests (asan, actions)' REPO_COPY: ${{runner.temp}}/integration_tests_asan/ClickHouse - RUN_BY_HASH_NUM: 0 - RUN_BY_HASH_TOTAL: 3 run: | sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH @@ -1719,67 +1525,7 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH - IntegrationTestsAsan1: - needs: [BuilderDebAsan] - runs-on: [self-hosted, stress-tester] - steps: - - name: Download json reports - uses: actions/download-artifact@v2 - with: - path: ${{runner.temp}}/reports_dir - - name: Check out repository code - uses: actions/checkout@v2 - - name: Integration test - env: - TEMP_PATH: ${{runner.temp}}/integration_tests_asan - REPORTS_PATH: ${{runner.temp}}/reports_dir - CHECK_NAME: 'Integration tests (asan, actions)' - REPO_COPY: ${{runner.temp}}/integration_tests_asan/ClickHouse - RUN_BY_HASH_NUM: 1 - RUN_BY_HASH_TOTAL: 3 - run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 integration_test_check.py "$CHECK_NAME" - - name: Cleanup - if: always() - run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH - IntegrationTestsAsan2: - needs: [BuilderDebAsan] - runs-on: [self-hosted, stress-tester] - steps: - - name: Download json reports - uses: actions/download-artifact@v2 - with: - path: ${{runner.temp}}/reports_dir - - name: Check out repository code - uses: actions/checkout@v2 - - name: Integration test - env: - TEMP_PATH: ${{runner.temp}}/integration_tests_asan - REPORTS_PATH: ${{runner.temp}}/reports_dir - CHECK_NAME: 'Integration tests (asan, actions)' - REPO_COPY: ${{runner.temp}}/integration_tests_asan/ClickHouse - RUN_BY_HASH_NUM: 2 - RUN_BY_HASH_TOTAL: 3 - run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 integration_test_check.py "$CHECK_NAME" - - name: Cleanup - if: always() - run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH - IntegrationTestsTsan0: + IntegrationTestsTsan: needs: [BuilderDebTsan] runs-on: [self-hosted, stress-tester] steps: @@ -1795,8 +1541,6 @@ jobs: REPORTS_PATH: ${{runner.temp}}/reports_dir CHECK_NAME: 'Integration tests (thread, actions)' REPO_COPY: ${{runner.temp}}/integration_tests_tsan/ClickHouse - RUN_BY_HASH_NUM: 0 - RUN_BY_HASH_TOTAL: 4 run: | sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH @@ -1809,97 +1553,7 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH - IntegrationTestsTsan1: - needs: [BuilderDebTsan] - runs-on: [self-hosted, stress-tester] - steps: - - name: Download json reports - uses: actions/download-artifact@v2 - with: - path: ${{runner.temp}}/reports_dir - - name: Check out repository code - uses: actions/checkout@v2 - - name: Integration test - env: - TEMP_PATH: ${{runner.temp}}/integration_tests_tsan - REPORTS_PATH: ${{runner.temp}}/reports_dir - CHECK_NAME: 'Integration tests (thread, actions)' - REPO_COPY: ${{runner.temp}}/integration_tests_tsan/ClickHouse - RUN_BY_HASH_NUM: 1 - RUN_BY_HASH_TOTAL: 4 - run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 integration_test_check.py "$CHECK_NAME" - - name: Cleanup - if: always() - run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH - IntegrationTestsTsan2: - needs: [BuilderDebTsan] - runs-on: [self-hosted, stress-tester] - steps: - - name: Download json reports - uses: actions/download-artifact@v2 - with: - path: ${{runner.temp}}/reports_dir - - name: Check out repository code - uses: actions/checkout@v2 - - name: Integration test - env: - TEMP_PATH: ${{runner.temp}}/integration_tests_tsan - REPORTS_PATH: ${{runner.temp}}/reports_dir - CHECK_NAME: 'Integration tests (thread, actions)' - REPO_COPY: ${{runner.temp}}/integration_tests_tsan/ClickHouse - RUN_BY_HASH_NUM: 2 - RUN_BY_HASH_TOTAL: 4 - run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 integration_test_check.py "$CHECK_NAME" - - name: Cleanup - if: always() - run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH - IntegrationTestsTsan3: - needs: [BuilderDebTsan] - runs-on: [self-hosted, stress-tester] - steps: - - name: Download json reports - uses: actions/download-artifact@v2 - with: - path: ${{runner.temp}}/reports_dir - - name: Check out repository code - uses: actions/checkout@v2 - - name: Integration test - env: - TEMP_PATH: ${{runner.temp}}/integration_tests_tsan - REPORTS_PATH: ${{runner.temp}}/reports_dir - CHECK_NAME: 'Integration tests (thread, actions)' - REPO_COPY: ${{runner.temp}}/integration_tests_tsan/ClickHouse - RUN_BY_HASH_NUM: 3 - RUN_BY_HASH_TOTAL: 4 - run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 integration_test_check.py "$CHECK_NAME" - - name: Cleanup - if: always() - run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH - IntegrationTestsRelease0: + IntegrationTestsRelease: needs: [BuilderDebRelease] runs-on: [self-hosted, stress-tester] steps: @@ -1915,38 +1569,6 @@ jobs: REPORTS_PATH: ${{runner.temp}}/reports_dir CHECK_NAME: 'Integration tests (release, actions)' REPO_COPY: ${{runner.temp}}/integration_tests_release/ClickHouse - RUN_BY_HASH_NUM: 0 - RUN_BY_HASH_TOTAL: 2 - run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 integration_test_check.py "$CHECK_NAME" - - name: Cleanup - if: always() - run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH - IntegrationTestsRelease1: - needs: [BuilderDebRelease] - runs-on: [self-hosted, stress-tester] - steps: - - name: Download json reports - uses: actions/download-artifact@v2 - with: - path: ${{runner.temp}}/reports_dir - - name: Check out repository code - uses: actions/checkout@v2 - - name: Integration test - env: - TEMP_PATH: ${{runner.temp}}/integration_tests_release - REPORTS_PATH: ${{runner.temp}}/reports_dir - CHECK_NAME: 'Integration tests (release, actions)' - REPO_COPY: ${{runner.temp}}/integration_tests_release/ClickHouse - RUN_BY_HASH_NUM: 1 - RUN_BY_HASH_TOTAL: 2 run: | sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH @@ -2137,19 +1759,13 @@ jobs: - CheckLabels - BuilderReport - FastTest - - FunctionalStatelessTestDebug0 - - FunctionalStatelessTestDebug1 - - FunctionalStatelessTestDebug2 + - FunctionalStatelessTestDebug - FunctionalStatelessTestRelease - FunctionalStatelessTestReleaseDatabaseReplicated - FunctionalStatelessTestReleaseWideParts - FunctionalStatelessTestAsan - - FunctionalStatelessTestTsan0 - - FunctionalStatelessTestTsan1 - - FunctionalStatelessTestTsan2 - - FunctionalStatelessTestMsan0 - - FunctionalStatelessTestMsan1 - - FunctionalStatelessTestMsan2 + - FunctionalStatelessTestTsan + - FunctionalStatelessTestMsan - FunctionalStatelessTestUBsan - FunctionalStatefulTestDebug - FunctionalStatefulTestRelease @@ -2167,15 +1783,9 @@ jobs: - ASTFuzzerTestTsan - ASTFuzzerTestMSan - ASTFuzzerTestUBSan - - IntegrationTestsAsan0 - - IntegrationTestsAsan1 - - IntegrationTestsAsan2 - - IntegrationTestsRelease0 - - IntegrationTestsRelease1 - - IntegrationTestsTsan0 - - IntegrationTestsTsan1 - - IntegrationTestsTsan2 - - IntegrationTestsTsan3 + - IntegrationTestsAsan + - IntegrationTestsRelease + - IntegrationTestsTsan - PVSCheck - UnitTestsAsan - UnitTestsTsan diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index cdf66d26310..5d4dec16303 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -799,7 +799,7 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH - FunctionalStatelessTestTsan0: + FunctionalStatelessTestTsan: needs: [BuilderDebTsan] runs-on: [self-hosted, func-tester] steps: @@ -816,70 +816,6 @@ jobs: CHECK_NAME: 'Stateless tests (thread, actions)' REPO_COPY: ${{runner.temp}}/stateless_tsan/ClickHouse KILL_TIMEOUT: 10800 - RUN_BY_HASH_NUM: 0 - RUN_BY_HASH_TOTAL: 3 - run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT - - name: Cleanup - if: always() - run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH - FunctionalStatelessTestTsan1: - needs: [BuilderDebTsan] - runs-on: [self-hosted, func-tester] - steps: - - name: Download json reports - uses: actions/download-artifact@v2 - with: - path: ${{runner.temp}}/reports_dir - - name: Check out repository code - uses: actions/checkout@v2 - - name: Functional test - env: - TEMP_PATH: ${{runner.temp}}/stateless_tsan - REPORTS_PATH: ${{runner.temp}}/reports_dir - CHECK_NAME: 'Stateless tests (thread, actions)' - REPO_COPY: ${{runner.temp}}/stateless_tsan/ClickHouse - KILL_TIMEOUT: 10800 - RUN_BY_HASH_NUM: 1 - RUN_BY_HASH_TOTAL: 3 - run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT - - name: Cleanup - if: always() - run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH - FunctionalStatelessTestTsan2: - needs: [BuilderDebTsan] - runs-on: [self-hosted, func-tester] - steps: - - name: Download json reports - uses: actions/download-artifact@v2 - with: - path: ${{runner.temp}}/reports_dir - - name: Check out repository code - uses: actions/checkout@v2 - - name: Functional test - env: - TEMP_PATH: ${{runner.temp}}/stateless_tsan - REPORTS_PATH: ${{runner.temp}}/reports_dir - CHECK_NAME: 'Stateless tests (thread, actions)' - REPO_COPY: ${{runner.temp}}/stateless_tsan/ClickHouse - KILL_TIMEOUT: 10800 - RUN_BY_HASH_NUM: 2 - RUN_BY_HASH_TOTAL: 3 run: | sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH @@ -921,7 +857,7 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH - FunctionalStatelessTestMsan0: + FunctionalStatelessTestMsan: needs: [BuilderDebMsan] runs-on: [self-hosted, func-tester] steps: @@ -938,8 +874,6 @@ jobs: CHECK_NAME: 'Stateless tests (memory, actions)' REPO_COPY: ${{runner.temp}}/stateless_memory/ClickHouse KILL_TIMEOUT: 10800 - RUN_BY_HASH_NUM: 0 - RUN_BY_HASH_TOTAL: 3 run: | sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH @@ -952,69 +886,7 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH - FunctionalStatelessTestMsan1: - needs: [BuilderDebMsan] - runs-on: [self-hosted, func-tester] - steps: - - name: Download json reports - uses: actions/download-artifact@v2 - with: - path: ${{runner.temp}}/reports_dir - - name: Check out repository code - uses: actions/checkout@v2 - - name: Functional test - env: - TEMP_PATH: ${{runner.temp}}/stateless_memory - REPORTS_PATH: ${{runner.temp}}/reports_dir - CHECK_NAME: 'Stateless tests (memory, actions)' - REPO_COPY: ${{runner.temp}}/stateless_memory/ClickHouse - KILL_TIMEOUT: 10800 - RUN_BY_HASH_NUM: 1 - RUN_BY_HASH_TOTAL: 3 - run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT - - name: Cleanup - if: always() - run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH - FunctionalStatelessTestMsan2: - needs: [BuilderDebMsan] - runs-on: [self-hosted, func-tester] - steps: - - name: Download json reports - uses: actions/download-artifact@v2 - with: - path: ${{runner.temp}}/reports_dir - - name: Check out repository code - uses: actions/checkout@v2 - - name: Functional test - env: - TEMP_PATH: ${{runner.temp}}/stateless_memory - REPORTS_PATH: ${{runner.temp}}/reports_dir - CHECK_NAME: 'Stateless tests (memory, actions)' - REPO_COPY: ${{runner.temp}}/stateless_memory/ClickHouse - KILL_TIMEOUT: 10800 - RUN_BY_HASH_NUM: 2 - RUN_BY_HASH_TOTAL: 3 - run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT - - name: Cleanup - if: always() - run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH - FunctionalStatelessTestDebug0: + FunctionalStatelessTestDebug: needs: [BuilderDebDebug] runs-on: [self-hosted, func-tester] steps: @@ -1031,70 +903,6 @@ jobs: CHECK_NAME: 'Stateless tests (debug, actions)' REPO_COPY: ${{runner.temp}}/stateless_debug/ClickHouse KILL_TIMEOUT: 10800 - RUN_BY_HASH_NUM: 0 - RUN_BY_HASH_TOTAL: 3 - run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT - - name: Cleanup - if: always() - run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH - FunctionalStatelessTestDebug1: - needs: [BuilderDebDebug] - runs-on: [self-hosted, func-tester] - steps: - - name: Download json reports - uses: actions/download-artifact@v2 - with: - path: ${{runner.temp}}/reports_dir - - name: Check out repository code - uses: actions/checkout@v2 - - name: Functional test - env: - TEMP_PATH: ${{runner.temp}}/stateless_debug - REPORTS_PATH: ${{runner.temp}}/reports_dir - CHECK_NAME: 'Stateless tests (debug, actions)' - REPO_COPY: ${{runner.temp}}/stateless_debug/ClickHouse - KILL_TIMEOUT: 10800 - RUN_BY_HASH_NUM: 1 - RUN_BY_HASH_TOTAL: 3 - run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT - - name: Cleanup - if: always() - run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH - FunctionalStatelessTestDebug2: - needs: [BuilderDebDebug] - runs-on: [self-hosted, func-tester] - steps: - - name: Download json reports - uses: actions/download-artifact@v2 - with: - path: ${{runner.temp}}/reports_dir - - name: Check out repository code - uses: actions/checkout@v2 - - name: Functional test - env: - TEMP_PATH: ${{runner.temp}}/stateless_debug - REPORTS_PATH: ${{runner.temp}}/reports_dir - CHECK_NAME: 'Stateless tests (debug, actions)' - REPO_COPY: ${{runner.temp}}/stateless_debug/ClickHouse - KILL_TIMEOUT: 10800 - RUN_BY_HASH_NUM: 2 - RUN_BY_HASH_TOTAL: 3 run: | sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH @@ -1459,7 +1267,7 @@ jobs: ############################################################################################# ############################# INTEGRATION TESTS ############################################# ############################################################################################# - IntegrationTestsAsan0: + IntegrationTestsAsan: needs: [BuilderDebAsan] runs-on: [self-hosted, stress-tester] steps: @@ -1475,8 +1283,6 @@ jobs: REPORTS_PATH: ${{runner.temp}}/reports_dir CHECK_NAME: 'Integration tests (asan, actions)' REPO_COPY: ${{runner.temp}}/integration_tests_asan/ClickHouse - RUN_BY_HASH_NUM: 0 - RUN_BY_HASH_TOTAL: 3 run: | sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH @@ -1489,67 +1295,7 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH - IntegrationTestsAsan1: - needs: [BuilderDebAsan] - runs-on: [self-hosted, stress-tester] - steps: - - name: Download json reports - uses: actions/download-artifact@v2 - with: - path: ${{runner.temp}}/reports_dir - - name: Check out repository code - uses: actions/checkout@v2 - - name: Integration test - env: - TEMP_PATH: ${{runner.temp}}/integration_tests_asan - REPORTS_PATH: ${{runner.temp}}/reports_dir - CHECK_NAME: 'Integration tests (asan, actions)' - REPO_COPY: ${{runner.temp}}/integration_tests_asan/ClickHouse - RUN_BY_HASH_NUM: 1 - RUN_BY_HASH_TOTAL: 3 - run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 integration_test_check.py "$CHECK_NAME" - - name: Cleanup - if: always() - run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH - IntegrationTestsAsan2: - needs: [BuilderDebAsan] - runs-on: [self-hosted, stress-tester] - steps: - - name: Download json reports - uses: actions/download-artifact@v2 - with: - path: ${{runner.temp}}/reports_dir - - name: Check out repository code - uses: actions/checkout@v2 - - name: Integration test - env: - TEMP_PATH: ${{runner.temp}}/integration_tests_asan - REPORTS_PATH: ${{runner.temp}}/reports_dir - CHECK_NAME: 'Integration tests (asan, actions)' - REPO_COPY: ${{runner.temp}}/integration_tests_asan/ClickHouse - RUN_BY_HASH_NUM: 2 - RUN_BY_HASH_TOTAL: 3 - run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 integration_test_check.py "$CHECK_NAME" - - name: Cleanup - if: always() - run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH - IntegrationTestsTsan0: + IntegrationTestsTsan: needs: [BuilderDebTsan] runs-on: [self-hosted, stress-tester] steps: @@ -1565,8 +1311,6 @@ jobs: REPORTS_PATH: ${{runner.temp}}/reports_dir CHECK_NAME: 'Integration tests (thread, actions)' REPO_COPY: ${{runner.temp}}/integration_tests_tsan/ClickHouse - RUN_BY_HASH_NUM: 0 - RUN_BY_HASH_TOTAL: 4 run: | sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH @@ -1579,97 +1323,7 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH - IntegrationTestsTsan1: - needs: [BuilderDebTsan] - runs-on: [self-hosted, stress-tester] - steps: - - name: Download json reports - uses: actions/download-artifact@v2 - with: - path: ${{runner.temp}}/reports_dir - - name: Check out repository code - uses: actions/checkout@v2 - - name: Integration test - env: - TEMP_PATH: ${{runner.temp}}/integration_tests_tsan - REPORTS_PATH: ${{runner.temp}}/reports_dir - CHECK_NAME: 'Integration tests (thread, actions)' - REPO_COPY: ${{runner.temp}}/integration_tests_tsan/ClickHouse - RUN_BY_HASH_NUM: 1 - RUN_BY_HASH_TOTAL: 4 - run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 integration_test_check.py "$CHECK_NAME" - - name: Cleanup - if: always() - run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH - IntegrationTestsTsan2: - needs: [BuilderDebTsan] - runs-on: [self-hosted, stress-tester] - steps: - - name: Download json reports - uses: actions/download-artifact@v2 - with: - path: ${{runner.temp}}/reports_dir - - name: Check out repository code - uses: actions/checkout@v2 - - name: Integration test - env: - TEMP_PATH: ${{runner.temp}}/integration_tests_tsan - REPORTS_PATH: ${{runner.temp}}/reports_dir - CHECK_NAME: 'Integration tests (thread, actions)' - REPO_COPY: ${{runner.temp}}/integration_tests_tsan/ClickHouse - RUN_BY_HASH_NUM: 2 - RUN_BY_HASH_TOTAL: 4 - run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 integration_test_check.py "$CHECK_NAME" - - name: Cleanup - if: always() - run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH - IntegrationTestsTsan3: - needs: [BuilderDebTsan] - runs-on: [self-hosted, stress-tester] - steps: - - name: Download json reports - uses: actions/download-artifact@v2 - with: - path: ${{runner.temp}}/reports_dir - - name: Check out repository code - uses: actions/checkout@v2 - - name: Integration test - env: - TEMP_PATH: ${{runner.temp}}/integration_tests_tsan - REPORTS_PATH: ${{runner.temp}}/reports_dir - CHECK_NAME: 'Integration tests (thread, actions)' - REPO_COPY: ${{runner.temp}}/integration_tests_tsan/ClickHouse - RUN_BY_HASH_NUM: 3 - RUN_BY_HASH_TOTAL: 4 - run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 integration_test_check.py "$CHECK_NAME" - - name: Cleanup - if: always() - run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH - IntegrationTestsRelease0: + IntegrationTestsRelease: needs: [BuilderDebRelease] runs-on: [self-hosted, stress-tester] steps: @@ -1685,66 +1339,6 @@ jobs: REPORTS_PATH: ${{runner.temp}}/reports_dir CHECK_NAME: 'Integration tests (release, actions)' REPO_COPY: ${{runner.temp}}/integration_tests_release/ClickHouse - RUN_BY_HASH_NUM: 0 - RUN_BY_HASH_TOTAL: 2 - run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 integration_test_check.py "$CHECK_NAME" - - name: Cleanup - if: always() - run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH - IntegrationTestsRelease1: - needs: [BuilderDebRelease] - runs-on: [self-hosted, stress-tester] - steps: - - name: Download json reports - uses: actions/download-artifact@v2 - with: - path: ${{runner.temp}}/reports_dir - - name: Check out repository code - uses: actions/checkout@v2 - - name: Integration test - env: - TEMP_PATH: ${{runner.temp}}/integration_tests_release - REPORTS_PATH: ${{runner.temp}}/reports_dir - CHECK_NAME: 'Integration tests (release, actions)' - REPO_COPY: ${{runner.temp}}/integration_tests_release/ClickHouse - RUN_BY_HASH_NUM: 1 - RUN_BY_HASH_TOTAL: 2 - run: | - sudo rm -fr $TEMP_PATH - mkdir -p $TEMP_PATH - cp -r $GITHUB_WORKSPACE $TEMP_PATH - cd $REPO_COPY/tests/ci - python3 integration_test_check.py "$CHECK_NAME" - - name: Cleanup - if: always() - run: | - docker kill $(docker ps -q) ||: - docker rm -f $(docker ps -a -q) ||: - sudo rm -fr $TEMP_PATH - IntegrationTestsFlakyCheck: - needs: [BuilderDebAsan] - runs-on: [self-hosted, stress-tester] - steps: - - name: Download json reports - uses: actions/download-artifact@v2 - with: - path: ${{runner.temp}}/reports_dir - - name: Check out repository code - uses: actions/checkout@v2 - - name: Integration test - env: - TEMP_PATH: ${{runner.temp}}/integration_tests_asan_flaky_check - REPORTS_PATH: ${{runner.temp}}/reports_dir - CHECK_NAME: 'Integration tests flaky check (asan, actions)' - REPO_COPY: ${{runner.temp}}/integration_tests_asan_flaky_check/ClickHouse run: | sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH @@ -2047,18 +1641,12 @@ jobs: needs: - DockerHubPush - BuilderReport - - FunctionalStatelessTestDebug0 - - FunctionalStatelessTestDebug1 - - FunctionalStatelessTestDebug2 + - FunctionalStatelessTestDebug - FunctionalStatelessTestRelease - FunctionalStatelessTestReleaseDatabaseOrdinary - FunctionalStatelessTestAsan - - FunctionalStatelessTestTsan0 - - FunctionalStatelessTestTsan1 - - FunctionalStatelessTestTsan2 - - FunctionalStatelessTestMsan0 - - FunctionalStatelessTestMsan1 - - FunctionalStatelessTestMsan2 + - FunctionalStatelessTestTsan + - FunctionalStatelessTestMsan - FunctionalStatelessTestUBsan - FunctionalStatefulTestDebug - FunctionalStatefulTestRelease @@ -2072,15 +1660,9 @@ jobs: - StressTestTsan - StressTestMsan - StressTestUBsan - - IntegrationTestsAsan0 - - IntegrationTestsAsan1 - - IntegrationTestsAsan2 - - IntegrationTestsRelease0 - - IntegrationTestsRelease1 - - IntegrationTestsTsan0 - - IntegrationTestsTsan1 - - IntegrationTestsTsan2 - - IntegrationTestsTsan3 + - IntegrationTestsAsan + - IntegrationTestsRelease + - IntegrationTestsTsan - CompatibilityCheck - ASTFuzzerTestDebug - ASTFuzzerTestAsan diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index 05d26924b15..7de8c061673 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -49,6 +49,7 @@ RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone ENV NUM_TRIES=1 ENV MAX_RUN_TIME=0 + # Download Minio-related binaries RUN wget 'https://dl.min.io/server/minio/release/linux-amd64/minio' \ && chmod +x ./minio \ diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 8827f5b1bf6..93f64fdec66 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -96,13 +96,6 @@ function run_tests() ADDITIONAL_OPTIONS+=('8') fi - if [[ -n "$RUN_BY_HASH_NUM" ]] && [[ -n "$RUN_BY_HASH_TOTAL" ]]; then - ADDITIONAL_OPTIONS+=('--run-by-hash-num') - ADDITIONAL_OPTIONS+=("$RUN_BY_HASH_NUM") - ADDITIONAL_OPTIONS+=('--run-by-hash-total') - ADDITIONAL_OPTIONS+=("$RUN_BY_HASH_TOTAL") - fi - set +e clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \ --test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \ diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py index a3ca357db18..15b9ab44b31 100644 --- a/tests/ci/functional_test_check.py +++ b/tests/ci/functional_test_check.py @@ -20,20 +20,15 @@ from stopwatch import Stopwatch from rerun_helper import RerunHelper from tee_popen import TeePopen -def get_additional_envs(check_name, run_by_hash_num, run_by_hash_total): - result = [] +def get_additional_envs(check_name): if 'DatabaseReplicated' in check_name: - result.append("USE_DATABASE_REPLICATED=1") + return ["USE_DATABASE_REPLICATED=1"] if 'DatabaseOrdinary' in check_name: - result.append("USE_DATABASE_ORDINARY=1") + return ["USE_DATABASE_ORDINARY=1"] if 'wide parts enabled' in check_name: - result.append("USE_POLYMORPHIC_PARTS=1") + return ["USE_POLYMORPHIC_PARTS=1"] - if run_by_hash_total != 0: - result.append(f"RUN_BY_HASH_NUM={run_by_hash_num}") - result.append(f"RUN_BY_HASH_TOTAL={run_by_hash_total}") - - return result + return [] def get_image_name(check_name): if 'stateless' in check_name.lower(): @@ -122,22 +117,12 @@ if __name__ == "__main__": check_name = sys.argv[1] kill_timeout = int(sys.argv[2]) - flaky_check = 'flaky' in check_name.lower() gh = Github(get_best_robot_token()) pr_info = PRInfo(get_event(), need_changed_files=flaky_check) - if 'RUN_BY_HASH_NUM' in os.environ: - run_by_hash_num = int(os.getenv('RUN_BY_HASH_NUM')) - run_by_hash_total = int(os.getenv('RUN_BY_HASH_TOTAL')) - check_name_with_group = check_name + f' [{run_by_hash_num + 1}/{run_by_hash_total}]' - else: - run_by_hash_num = 0 - run_by_hash_total = 0 - check_name_with_group = check_name - - rerun_helper = RerunHelper(gh, pr_info, check_name_with_group) + rerun_helper = RerunHelper(gh, pr_info, check_name) if rerun_helper.is_already_finished_by_status(): logging.info("Check is already finished according to github status, exiting") sys.exit(0) @@ -150,7 +135,7 @@ if __name__ == "__main__": tests_to_run = get_tests_to_run(pr_info) if not tests_to_run: commit = get_commit(gh, pr_info.sha) - commit.create_status(context=check_name_with_group, description='Not found changed stateless tests', state='success') + commit.create_status(context=check_name, description='Not found changed stateless tests', state='success') sys.exit(0) image_name = get_image_name(check_name) @@ -172,7 +157,7 @@ if __name__ == "__main__": run_log_path = os.path.join(result_path, "runlog.log") - additional_envs = get_additional_envs(check_name, run_by_hash_num, run_by_hash_total) + additional_envs = get_additional_envs(check_name) run_command = get_run_command(packages_path, result_path, server_log_path, kill_timeout, additional_envs, docker_image, flaky_check, tests_to_run) logging.info("Going to run func tests: %s", run_command) @@ -191,12 +176,12 @@ if __name__ == "__main__": ch_helper = ClickHouseHelper() mark_flaky_tests(ch_helper, check_name, test_results) - report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [run_log_path] + additional_logs, check_name_with_group) + report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [run_log_path] + additional_logs, check_name) print(f"::notice ::Report url: {report_url}") - post_commit_status(gh, pr_info.sha, check_name_with_group, description, state, report_url) + post_commit_status(gh, pr_info.sha, check_name, description, state, report_url) - prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_results, state, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, check_name_with_group) + prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_results, state, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, check_name) ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events) if state != 'success': diff --git a/tests/ci/integration_test_check.py b/tests/ci/integration_test_check.py index 01799447184..69c4603b3ea 100644 --- a/tests/ci/integration_test_check.py +++ b/tests/ci/integration_test_check.py @@ -22,6 +22,8 @@ from rerun_helper import RerunHelper from tee_popen import TeePopen +DOWNLOAD_RETRIES_COUNT = 5 + IMAGES = [ "clickhouse/integration-tests-runner", "clickhouse/mysql-golang-client", @@ -34,7 +36,7 @@ IMAGES = [ "clickhouse/integration-helper", ] -def get_json_params_dict(check_name, pr_info, docker_images, run_by_hash_total, run_by_hash_num): +def get_json_params_dict(check_name, pr_info, docker_images): return { 'context_name': check_name, 'commit': pr_info.sha, @@ -44,8 +46,6 @@ def get_json_params_dict(check_name, pr_info, docker_images, run_by_hash_total, 'shuffle_test_groups': False, 'use_tmpfs': False, 'disable_net_host': True, - 'run_by_hash_total': run_by_hash_total, - 'run_by_hash_num': run_by_hash_num, } def get_env_for_runner(build_path, repo_path, result_path, work_path): @@ -107,15 +107,6 @@ if __name__ == "__main__": check_name = sys.argv[1] - if 'RUN_BY_HASH_NUM' in os.environ: - run_by_hash_num = int(os.getenv('RUN_BY_HASH_NUM')) - run_by_hash_total = int(os.getenv('RUN_BY_HASH_TOTAL')) - check_name_with_group = check_name + f' [{run_by_hash_num + 1}/{run_by_hash_total}]' - else: - run_by_hash_num = 0 - run_by_hash_total = 0 - check_name_with_group = check_name - if not os.path.exists(temp_path): os.makedirs(temp_path) @@ -124,7 +115,7 @@ if __name__ == "__main__": gh = Github(get_best_robot_token()) - rerun_helper = RerunHelper(gh, pr_info, check_name_with_group) + rerun_helper = RerunHelper(gh, pr_info, check_name) if rerun_helper.is_already_finished_by_status(): logging.info("Check is already finished according to github status, exiting") sys.exit(0) @@ -149,7 +140,7 @@ if __name__ == "__main__": json_path = os.path.join(work_path, 'params.json') with open(json_path, 'w', encoding='utf-8') as json_params: - json_params.write(json.dumps(get_json_params_dict(check_name, pr_info, images_with_versions, run_by_hash_total, run_by_hash_num))) + json_params.write(json.dumps(get_json_params_dict(check_name, pr_info, images_with_versions))) output_path_log = os.path.join(result_path, "main_script_log.txt") @@ -171,9 +162,9 @@ if __name__ == "__main__": mark_flaky_tests(ch_helper, check_name, test_results) s3_helper = S3Helper('https://s3.amazonaws.com') - report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [output_path_log] + additional_logs, check_name_with_group, False) + report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [output_path_log] + additional_logs, check_name, False) print(f"::notice ::Report url: {report_url}") - post_commit_status(gh, pr_info.sha, check_name_with_group, description, state, report_url) + post_commit_status(gh, pr_info.sha, check_name, description, state, report_url) - prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_results, state, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, check_name_with_group) + prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_results, state, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, check_name) ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 784edfc1917..8a87227519f 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -17,8 +17,6 @@ import math import http.client import urllib.parse import json -# for crc32 -import zlib from argparse import ArgumentParser from typing import Tuple, Union, Optional, Dict, Set, List @@ -59,13 +57,6 @@ MAX_RETRIES = 3 TEST_FILE_EXTENSIONS = ['.sql', '.sql.j2', '.sh', '.py', '.expect'] - -def stringhash(s): - # default hash() function consistent - # only during process invocation https://stackoverflow.com/a/42089311 - return zlib.crc32(s.encode('utf-8')) - - class HTTPError(Exception): def __init__(self, message=None, code=None): self.message = message @@ -765,15 +756,7 @@ class TestSuite: self.suite_tmp_path: str = suite_tmp_path self.suite: str = suite - filter_func = lambda x: True - - if args.run_by_hash_num is not None and args.run_by_hash_total is not None: - if args.run_by_hash_num > args.run_by_hash_total: - raise Exception(f"Incorrect run by hash, value {args.run_by_hash_num} bigger than total {args.run_by_hash_total}") - - filter_func = lambda x: stringhash(x) % args.run_by_hash_total == args.run_by_hash_num - - self.all_tests: List[str] = self.get_tests_list(self.tests_in_suite_key_func, filter_func) + self.all_tests: List[str] = self.get_tests_list(self.tests_in_suite_key_func) self.all_tags: Dict[str, Set[str]] = self.read_test_tags(self.suite_path, self.all_tests) self.sequential_tests = [] @@ -794,17 +777,17 @@ class TestSuite: return ('no-parallel' in self.all_tags[test_name]) or ('sequential' in self.all_tags[test_name]) - def get_tests_list(self, sort_key, filter_func): + def get_tests_list(self, sort_key): """ Return list of tests file names to run """ - all_tests = list(self.get_selected_tests(filter_func)) + all_tests = list(self.get_selected_tests()) all_tests = all_tests * self.args.test_runs all_tests.sort(key=sort_key) return all_tests - def get_selected_tests(self, filter_func): + def get_selected_tests(self): """ Find all files with tests, filter, render templates """ @@ -821,13 +804,11 @@ class TestSuite: continue if USE_JINJA and test_name.endswith(".gen.sql"): continue - if not filter_func(test_name): - continue test_name = self.render_test_template(j2env, self.suite_path, test_name) yield test_name @staticmethod - def read_test_suite(args, suite_dir_name: str): + def readTestSuite(args, suite_dir_name: str): def is_data_present(): return int(clickhouse_execute(args, 'EXISTS TABLE test.hits')) @@ -1211,7 +1192,7 @@ def main(args): if server_died.is_set(): break - test_suite = TestSuite.read_test_suite(args, suite) + test_suite = TestSuite.readTestSuite(args, suite) if test_suite is None: continue @@ -1344,9 +1325,6 @@ if __name__ == '__main__': parser.add_argument('--print-time', action='store_true', dest='print_time', help='Print test time') parser.add_argument('--check-zookeeper-session', action='store_true', help='Check ZooKeeper session uptime to determine if failed test should be retried') - parser.add_argument('--run-by-hash-num', type=int, help='Run tests matching crc32(test_name) % run_by_hash_total == run_by_hash_num') - parser.add_argument('--run-by-hash-total', type=int, help='Total test groups for crc32(test_name) % run_by_hash_total == run_by_hash_num') - group = parser.add_mutually_exclusive_group(required=False) group.add_argument('--zookeeper', action='store_true', default=None, dest='zookeeper', help='Run zookeeper related tests') group.add_argument('--no-zookeeper', action='store_false', default=None, dest='zookeeper', help='Do not run zookeeper related tests') diff --git a/tests/integration/ci-runner.py b/tests/integration/ci-runner.py index c8745294c5b..25d09a8c4c5 100755 --- a/tests/integration/ci-runner.py +++ b/tests/integration/ci-runner.py @@ -10,8 +10,6 @@ from collections import defaultdict import random import json import csv -# for crc32 -import zlib MAX_RETRY = 3 @@ -28,9 +26,6 @@ MAX_TIME_SECONDS = 3600 MAX_TIME_IN_SANDBOX = 20 * 60 # 20 minutes TASK_TIMEOUT = 8 * 60 * 60 # 8 hours -def stringhash(s): - return zlib.crc32(s.encode('utf-8')) - def get_tests_to_run(pr_info): result = set([]) changed_files = pr_info['changed_files'] @@ -188,13 +183,6 @@ class ClickhouseIntegrationTestsRunner: self.start_time = time.time() self.soft_deadline_time = self.start_time + (TASK_TIMEOUT - MAX_TIME_IN_SANDBOX) - if 'run_by_hash_total' in self.params: - self.run_by_hash_total = self.params['run_by_hash_total'] - self.run_by_hash_num = self.params['run_by_hash_num'] - else: - self.run_by_hash_total = 0 - self.run_by_hash_num = 0 - def path(self): return self.result_path @@ -588,15 +576,6 @@ class ClickhouseIntegrationTestsRunner: self._install_clickhouse(build_path) logging.info("Dump iptables before run %s", subprocess.check_output("sudo iptables -L", shell=True)) all_tests = self._get_all_tests(repo_path) - - if self.run_by_hash_total != 0: - grouped_tests = self.group_test_by_file(all_tests) - all_filtered_by_hash_tests = [] - for group, tests_in_group in grouped_tests.items(): - if stringhash(group) % self.run_by_hash_total == self.run_by_hash_num: - all_filtered_by_hash_tests += tests_in_group - all_tests = all_filtered_by_hash_tests - parallel_skip_tests = self._get_parallel_tests_skip_list(repo_path) logging.info("Found %s tests first 3 %s", len(all_tests), ' '.join(all_tests[:3])) filtered_sequential_tests = list(filter(lambda test: test in all_tests, parallel_skip_tests)) From a6e55b00e33416c4eb26da82543ae45b79a66bc7 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 10 Dec 2021 18:39:02 +0300 Subject: [PATCH 231/262] Revert "Revert "Split long tests into multiple checks"" --- .github/workflows/main.yml | 414 ++++++++++++++++++++++++++- .github/workflows/master.yml | 442 ++++++++++++++++++++++++++++- docker/test/stateless/Dockerfile | 1 - docker/test/stateless/run.sh | 7 + tests/ci/functional_test_check.py | 37 ++- tests/ci/integration_test_check.py | 25 +- tests/clickhouse-test | 34 ++- tests/integration/ci-runner.py | 21 ++ 8 files changed, 931 insertions(+), 50 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 69a863b75a0..57a30d44fae 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -886,7 +886,7 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH - FunctionalStatelessTestTsan: + FunctionalStatelessTestTsan0: needs: [BuilderDebTsan] runs-on: [self-hosted, func-tester] steps: @@ -903,6 +903,70 @@ jobs: CHECK_NAME: 'Stateless tests (thread, actions)' REPO_COPY: ${{runner.temp}}/stateless_tsan/ClickHouse KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 0 + RUN_BY_HASH_TOTAL: 3 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatelessTestTsan1: + needs: [BuilderDebTsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + env: + TEMP_PATH: ${{runner.temp}}/stateless_tsan + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stateless tests (thread, actions)' + REPO_COPY: ${{runner.temp}}/stateless_tsan/ClickHouse + KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 1 + RUN_BY_HASH_TOTAL: 3 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatelessTestTsan2: + needs: [BuilderDebTsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + env: + TEMP_PATH: ${{runner.temp}}/stateless_tsan + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stateless tests (thread, actions)' + REPO_COPY: ${{runner.temp}}/stateless_tsan/ClickHouse + KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 2 + RUN_BY_HASH_TOTAL: 3 run: | sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH @@ -944,7 +1008,7 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH - FunctionalStatelessTestMsan: + FunctionalStatelessTestMsan0: needs: [BuilderDebMsan] runs-on: [self-hosted, func-tester] steps: @@ -961,6 +1025,8 @@ jobs: CHECK_NAME: 'Stateless tests (memory, actions)' REPO_COPY: ${{runner.temp}}/stateless_memory/ClickHouse KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 0 + RUN_BY_HASH_TOTAL: 3 run: | sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH @@ -973,7 +1039,69 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH - FunctionalStatelessTestDebug: + FunctionalStatelessTestMsan1: + needs: [BuilderDebMsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + env: + TEMP_PATH: ${{runner.temp}}/stateless_memory + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stateless tests (memory, actions)' + REPO_COPY: ${{runner.temp}}/stateless_memory/ClickHouse + KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 1 + RUN_BY_HASH_TOTAL: 3 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatelessTestMsan2: + needs: [BuilderDebMsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + env: + TEMP_PATH: ${{runner.temp}}/stateless_memory + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stateless tests (memory, actions)' + REPO_COPY: ${{runner.temp}}/stateless_memory/ClickHouse + KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 2 + RUN_BY_HASH_TOTAL: 3 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatelessTestDebug0: needs: [BuilderDebDebug] runs-on: [self-hosted, func-tester] steps: @@ -990,6 +1118,70 @@ jobs: CHECK_NAME: 'Stateless tests (debug, actions)' REPO_COPY: ${{runner.temp}}/stateless_debug/ClickHouse KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 0 + RUN_BY_HASH_TOTAL: 3 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatelessTestDebug1: + needs: [BuilderDebDebug] + runs-on: [self-hosted, func-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + env: + TEMP_PATH: ${{runner.temp}}/stateless_debug + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stateless tests (debug, actions)' + REPO_COPY: ${{runner.temp}}/stateless_debug/ClickHouse + KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 1 + RUN_BY_HASH_TOTAL: 3 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatelessTestDebug2: + needs: [BuilderDebDebug] + runs-on: [self-hosted, func-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + env: + TEMP_PATH: ${{runner.temp}}/stateless_debug + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stateless tests (debug, actions)' + REPO_COPY: ${{runner.temp}}/stateless_debug/ClickHouse + KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 2 + RUN_BY_HASH_TOTAL: 3 run: | sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH @@ -1497,7 +1689,7 @@ jobs: ############################################################################################# ############################# INTEGRATION TESTS ############################################# ############################################################################################# - IntegrationTestsAsan: + IntegrationTestsAsan0: needs: [BuilderDebAsan] runs-on: [self-hosted, stress-tester] steps: @@ -1513,6 +1705,8 @@ jobs: REPORTS_PATH: ${{runner.temp}}/reports_dir CHECK_NAME: 'Integration tests (asan, actions)' REPO_COPY: ${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM: 0 + RUN_BY_HASH_TOTAL: 3 run: | sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH @@ -1525,7 +1719,67 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH - IntegrationTestsTsan: + IntegrationTestsAsan1: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + env: + TEMP_PATH: ${{runner.temp}}/integration_tests_asan + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Integration tests (asan, actions)' + REPO_COPY: ${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM: 1 + RUN_BY_HASH_TOTAL: 3 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + IntegrationTestsAsan2: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + env: + TEMP_PATH: ${{runner.temp}}/integration_tests_asan + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Integration tests (asan, actions)' + REPO_COPY: ${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM: 2 + RUN_BY_HASH_TOTAL: 3 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + IntegrationTestsTsan0: needs: [BuilderDebTsan] runs-on: [self-hosted, stress-tester] steps: @@ -1541,6 +1795,8 @@ jobs: REPORTS_PATH: ${{runner.temp}}/reports_dir CHECK_NAME: 'Integration tests (thread, actions)' REPO_COPY: ${{runner.temp}}/integration_tests_tsan/ClickHouse + RUN_BY_HASH_NUM: 0 + RUN_BY_HASH_TOTAL: 4 run: | sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH @@ -1553,7 +1809,97 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH - IntegrationTestsRelease: + IntegrationTestsTsan1: + needs: [BuilderDebTsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + env: + TEMP_PATH: ${{runner.temp}}/integration_tests_tsan + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Integration tests (thread, actions)' + REPO_COPY: ${{runner.temp}}/integration_tests_tsan/ClickHouse + RUN_BY_HASH_NUM: 1 + RUN_BY_HASH_TOTAL: 4 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + IntegrationTestsTsan2: + needs: [BuilderDebTsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + env: + TEMP_PATH: ${{runner.temp}}/integration_tests_tsan + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Integration tests (thread, actions)' + REPO_COPY: ${{runner.temp}}/integration_tests_tsan/ClickHouse + RUN_BY_HASH_NUM: 2 + RUN_BY_HASH_TOTAL: 4 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + IntegrationTestsTsan3: + needs: [BuilderDebTsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + env: + TEMP_PATH: ${{runner.temp}}/integration_tests_tsan + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Integration tests (thread, actions)' + REPO_COPY: ${{runner.temp}}/integration_tests_tsan/ClickHouse + RUN_BY_HASH_NUM: 3 + RUN_BY_HASH_TOTAL: 4 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + IntegrationTestsRelease0: needs: [BuilderDebRelease] runs-on: [self-hosted, stress-tester] steps: @@ -1569,6 +1915,38 @@ jobs: REPORTS_PATH: ${{runner.temp}}/reports_dir CHECK_NAME: 'Integration tests (release, actions)' REPO_COPY: ${{runner.temp}}/integration_tests_release/ClickHouse + RUN_BY_HASH_NUM: 0 + RUN_BY_HASH_TOTAL: 2 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + IntegrationTestsRelease1: + needs: [BuilderDebRelease] + runs-on: [self-hosted, stress-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + env: + TEMP_PATH: ${{runner.temp}}/integration_tests_release + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Integration tests (release, actions)' + REPO_COPY: ${{runner.temp}}/integration_tests_release/ClickHouse + RUN_BY_HASH_NUM: 1 + RUN_BY_HASH_TOTAL: 2 run: | sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH @@ -1759,13 +2137,19 @@ jobs: - CheckLabels - BuilderReport - FastTest - - FunctionalStatelessTestDebug + - FunctionalStatelessTestDebug0 + - FunctionalStatelessTestDebug1 + - FunctionalStatelessTestDebug2 - FunctionalStatelessTestRelease - FunctionalStatelessTestReleaseDatabaseReplicated - FunctionalStatelessTestReleaseWideParts - FunctionalStatelessTestAsan - - FunctionalStatelessTestTsan - - FunctionalStatelessTestMsan + - FunctionalStatelessTestTsan0 + - FunctionalStatelessTestTsan1 + - FunctionalStatelessTestTsan2 + - FunctionalStatelessTestMsan0 + - FunctionalStatelessTestMsan1 + - FunctionalStatelessTestMsan2 - FunctionalStatelessTestUBsan - FunctionalStatefulTestDebug - FunctionalStatefulTestRelease @@ -1783,9 +2167,15 @@ jobs: - ASTFuzzerTestTsan - ASTFuzzerTestMSan - ASTFuzzerTestUBSan - - IntegrationTestsAsan - - IntegrationTestsRelease - - IntegrationTestsTsan + - IntegrationTestsAsan0 + - IntegrationTestsAsan1 + - IntegrationTestsAsan2 + - IntegrationTestsRelease0 + - IntegrationTestsRelease1 + - IntegrationTestsTsan0 + - IntegrationTestsTsan1 + - IntegrationTestsTsan2 + - IntegrationTestsTsan3 - PVSCheck - UnitTestsAsan - UnitTestsTsan diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 5d4dec16303..cdf66d26310 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -799,7 +799,7 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH - FunctionalStatelessTestTsan: + FunctionalStatelessTestTsan0: needs: [BuilderDebTsan] runs-on: [self-hosted, func-tester] steps: @@ -816,6 +816,70 @@ jobs: CHECK_NAME: 'Stateless tests (thread, actions)' REPO_COPY: ${{runner.temp}}/stateless_tsan/ClickHouse KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 0 + RUN_BY_HASH_TOTAL: 3 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatelessTestTsan1: + needs: [BuilderDebTsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + env: + TEMP_PATH: ${{runner.temp}}/stateless_tsan + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stateless tests (thread, actions)' + REPO_COPY: ${{runner.temp}}/stateless_tsan/ClickHouse + KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 1 + RUN_BY_HASH_TOTAL: 3 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatelessTestTsan2: + needs: [BuilderDebTsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + env: + TEMP_PATH: ${{runner.temp}}/stateless_tsan + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stateless tests (thread, actions)' + REPO_COPY: ${{runner.temp}}/stateless_tsan/ClickHouse + KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 2 + RUN_BY_HASH_TOTAL: 3 run: | sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH @@ -857,7 +921,7 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH - FunctionalStatelessTestMsan: + FunctionalStatelessTestMsan0: needs: [BuilderDebMsan] runs-on: [self-hosted, func-tester] steps: @@ -874,6 +938,8 @@ jobs: CHECK_NAME: 'Stateless tests (memory, actions)' REPO_COPY: ${{runner.temp}}/stateless_memory/ClickHouse KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 0 + RUN_BY_HASH_TOTAL: 3 run: | sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH @@ -886,7 +952,69 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH - FunctionalStatelessTestDebug: + FunctionalStatelessTestMsan1: + needs: [BuilderDebMsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + env: + TEMP_PATH: ${{runner.temp}}/stateless_memory + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stateless tests (memory, actions)' + REPO_COPY: ${{runner.temp}}/stateless_memory/ClickHouse + KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 1 + RUN_BY_HASH_TOTAL: 3 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatelessTestMsan2: + needs: [BuilderDebMsan] + runs-on: [self-hosted, func-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + env: + TEMP_PATH: ${{runner.temp}}/stateless_memory + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stateless tests (memory, actions)' + REPO_COPY: ${{runner.temp}}/stateless_memory/ClickHouse + KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 2 + RUN_BY_HASH_TOTAL: 3 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatelessTestDebug0: needs: [BuilderDebDebug] runs-on: [self-hosted, func-tester] steps: @@ -903,6 +1031,70 @@ jobs: CHECK_NAME: 'Stateless tests (debug, actions)' REPO_COPY: ${{runner.temp}}/stateless_debug/ClickHouse KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 0 + RUN_BY_HASH_TOTAL: 3 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatelessTestDebug1: + needs: [BuilderDebDebug] + runs-on: [self-hosted, func-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + env: + TEMP_PATH: ${{runner.temp}}/stateless_debug + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stateless tests (debug, actions)' + REPO_COPY: ${{runner.temp}}/stateless_debug/ClickHouse + KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 1 + RUN_BY_HASH_TOTAL: 3 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 functional_test_check.py "$CHECK_NAME" $KILL_TIMEOUT + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + FunctionalStatelessTestDebug2: + needs: [BuilderDebDebug] + runs-on: [self-hosted, func-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Functional test + env: + TEMP_PATH: ${{runner.temp}}/stateless_debug + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Stateless tests (debug, actions)' + REPO_COPY: ${{runner.temp}}/stateless_debug/ClickHouse + KILL_TIMEOUT: 10800 + RUN_BY_HASH_NUM: 2 + RUN_BY_HASH_TOTAL: 3 run: | sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH @@ -1267,7 +1459,7 @@ jobs: ############################################################################################# ############################# INTEGRATION TESTS ############################################# ############################################################################################# - IntegrationTestsAsan: + IntegrationTestsAsan0: needs: [BuilderDebAsan] runs-on: [self-hosted, stress-tester] steps: @@ -1283,6 +1475,8 @@ jobs: REPORTS_PATH: ${{runner.temp}}/reports_dir CHECK_NAME: 'Integration tests (asan, actions)' REPO_COPY: ${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM: 0 + RUN_BY_HASH_TOTAL: 3 run: | sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH @@ -1295,7 +1489,67 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH - IntegrationTestsTsan: + IntegrationTestsAsan1: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + env: + TEMP_PATH: ${{runner.temp}}/integration_tests_asan + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Integration tests (asan, actions)' + REPO_COPY: ${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM: 1 + RUN_BY_HASH_TOTAL: 3 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + IntegrationTestsAsan2: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + env: + TEMP_PATH: ${{runner.temp}}/integration_tests_asan + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Integration tests (asan, actions)' + REPO_COPY: ${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM: 2 + RUN_BY_HASH_TOTAL: 3 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + IntegrationTestsTsan0: needs: [BuilderDebTsan] runs-on: [self-hosted, stress-tester] steps: @@ -1311,6 +1565,8 @@ jobs: REPORTS_PATH: ${{runner.temp}}/reports_dir CHECK_NAME: 'Integration tests (thread, actions)' REPO_COPY: ${{runner.temp}}/integration_tests_tsan/ClickHouse + RUN_BY_HASH_NUM: 0 + RUN_BY_HASH_TOTAL: 4 run: | sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH @@ -1323,7 +1579,97 @@ jobs: docker kill $(docker ps -q) ||: docker rm -f $(docker ps -a -q) ||: sudo rm -fr $TEMP_PATH - IntegrationTestsRelease: + IntegrationTestsTsan1: + needs: [BuilderDebTsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + env: + TEMP_PATH: ${{runner.temp}}/integration_tests_tsan + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Integration tests (thread, actions)' + REPO_COPY: ${{runner.temp}}/integration_tests_tsan/ClickHouse + RUN_BY_HASH_NUM: 1 + RUN_BY_HASH_TOTAL: 4 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + IntegrationTestsTsan2: + needs: [BuilderDebTsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + env: + TEMP_PATH: ${{runner.temp}}/integration_tests_tsan + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Integration tests (thread, actions)' + REPO_COPY: ${{runner.temp}}/integration_tests_tsan/ClickHouse + RUN_BY_HASH_NUM: 2 + RUN_BY_HASH_TOTAL: 4 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + IntegrationTestsTsan3: + needs: [BuilderDebTsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + env: + TEMP_PATH: ${{runner.temp}}/integration_tests_tsan + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Integration tests (thread, actions)' + REPO_COPY: ${{runner.temp}}/integration_tests_tsan/ClickHouse + RUN_BY_HASH_NUM: 3 + RUN_BY_HASH_TOTAL: 4 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + IntegrationTestsRelease0: needs: [BuilderDebRelease] runs-on: [self-hosted, stress-tester] steps: @@ -1339,6 +1685,66 @@ jobs: REPORTS_PATH: ${{runner.temp}}/reports_dir CHECK_NAME: 'Integration tests (release, actions)' REPO_COPY: ${{runner.temp}}/integration_tests_release/ClickHouse + RUN_BY_HASH_NUM: 0 + RUN_BY_HASH_TOTAL: 2 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + IntegrationTestsRelease1: + needs: [BuilderDebRelease] + runs-on: [self-hosted, stress-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + env: + TEMP_PATH: ${{runner.temp}}/integration_tests_release + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Integration tests (release, actions)' + REPO_COPY: ${{runner.temp}}/integration_tests_release/ClickHouse + RUN_BY_HASH_NUM: 1 + RUN_BY_HASH_TOTAL: 2 + run: | + sudo rm -fr $TEMP_PATH + mkdir -p $TEMP_PATH + cp -r $GITHUB_WORKSPACE $TEMP_PATH + cd $REPO_COPY/tests/ci + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker kill $(docker ps -q) ||: + docker rm -f $(docker ps -a -q) ||: + sudo rm -fr $TEMP_PATH + IntegrationTestsFlakyCheck: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{runner.temp}}/reports_dir + - name: Check out repository code + uses: actions/checkout@v2 + - name: Integration test + env: + TEMP_PATH: ${{runner.temp}}/integration_tests_asan_flaky_check + REPORTS_PATH: ${{runner.temp}}/reports_dir + CHECK_NAME: 'Integration tests flaky check (asan, actions)' + REPO_COPY: ${{runner.temp}}/integration_tests_asan_flaky_check/ClickHouse run: | sudo rm -fr $TEMP_PATH mkdir -p $TEMP_PATH @@ -1641,12 +2047,18 @@ jobs: needs: - DockerHubPush - BuilderReport - - FunctionalStatelessTestDebug + - FunctionalStatelessTestDebug0 + - FunctionalStatelessTestDebug1 + - FunctionalStatelessTestDebug2 - FunctionalStatelessTestRelease - FunctionalStatelessTestReleaseDatabaseOrdinary - FunctionalStatelessTestAsan - - FunctionalStatelessTestTsan - - FunctionalStatelessTestMsan + - FunctionalStatelessTestTsan0 + - FunctionalStatelessTestTsan1 + - FunctionalStatelessTestTsan2 + - FunctionalStatelessTestMsan0 + - FunctionalStatelessTestMsan1 + - FunctionalStatelessTestMsan2 - FunctionalStatelessTestUBsan - FunctionalStatefulTestDebug - FunctionalStatefulTestRelease @@ -1660,9 +2072,15 @@ jobs: - StressTestTsan - StressTestMsan - StressTestUBsan - - IntegrationTestsAsan - - IntegrationTestsRelease - - IntegrationTestsTsan + - IntegrationTestsAsan0 + - IntegrationTestsAsan1 + - IntegrationTestsAsan2 + - IntegrationTestsRelease0 + - IntegrationTestsRelease1 + - IntegrationTestsTsan0 + - IntegrationTestsTsan1 + - IntegrationTestsTsan2 + - IntegrationTestsTsan3 - CompatibilityCheck - ASTFuzzerTestDebug - ASTFuzzerTestAsan diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index 7de8c061673..05d26924b15 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -49,7 +49,6 @@ RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone ENV NUM_TRIES=1 ENV MAX_RUN_TIME=0 - # Download Minio-related binaries RUN wget 'https://dl.min.io/server/minio/release/linux-amd64/minio' \ && chmod +x ./minio \ diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 93f64fdec66..8827f5b1bf6 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -96,6 +96,13 @@ function run_tests() ADDITIONAL_OPTIONS+=('8') fi + if [[ -n "$RUN_BY_HASH_NUM" ]] && [[ -n "$RUN_BY_HASH_TOTAL" ]]; then + ADDITIONAL_OPTIONS+=('--run-by-hash-num') + ADDITIONAL_OPTIONS+=("$RUN_BY_HASH_NUM") + ADDITIONAL_OPTIONS+=('--run-by-hash-total') + ADDITIONAL_OPTIONS+=("$RUN_BY_HASH_TOTAL") + fi + set +e clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \ --test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \ diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py index 15b9ab44b31..a3ca357db18 100644 --- a/tests/ci/functional_test_check.py +++ b/tests/ci/functional_test_check.py @@ -20,15 +20,20 @@ from stopwatch import Stopwatch from rerun_helper import RerunHelper from tee_popen import TeePopen -def get_additional_envs(check_name): +def get_additional_envs(check_name, run_by_hash_num, run_by_hash_total): + result = [] if 'DatabaseReplicated' in check_name: - return ["USE_DATABASE_REPLICATED=1"] + result.append("USE_DATABASE_REPLICATED=1") if 'DatabaseOrdinary' in check_name: - return ["USE_DATABASE_ORDINARY=1"] + result.append("USE_DATABASE_ORDINARY=1") if 'wide parts enabled' in check_name: - return ["USE_POLYMORPHIC_PARTS=1"] + result.append("USE_POLYMORPHIC_PARTS=1") - return [] + if run_by_hash_total != 0: + result.append(f"RUN_BY_HASH_NUM={run_by_hash_num}") + result.append(f"RUN_BY_HASH_TOTAL={run_by_hash_total}") + + return result def get_image_name(check_name): if 'stateless' in check_name.lower(): @@ -117,12 +122,22 @@ if __name__ == "__main__": check_name = sys.argv[1] kill_timeout = int(sys.argv[2]) + flaky_check = 'flaky' in check_name.lower() gh = Github(get_best_robot_token()) pr_info = PRInfo(get_event(), need_changed_files=flaky_check) - rerun_helper = RerunHelper(gh, pr_info, check_name) + if 'RUN_BY_HASH_NUM' in os.environ: + run_by_hash_num = int(os.getenv('RUN_BY_HASH_NUM')) + run_by_hash_total = int(os.getenv('RUN_BY_HASH_TOTAL')) + check_name_with_group = check_name + f' [{run_by_hash_num + 1}/{run_by_hash_total}]' + else: + run_by_hash_num = 0 + run_by_hash_total = 0 + check_name_with_group = check_name + + rerun_helper = RerunHelper(gh, pr_info, check_name_with_group) if rerun_helper.is_already_finished_by_status(): logging.info("Check is already finished according to github status, exiting") sys.exit(0) @@ -135,7 +150,7 @@ if __name__ == "__main__": tests_to_run = get_tests_to_run(pr_info) if not tests_to_run: commit = get_commit(gh, pr_info.sha) - commit.create_status(context=check_name, description='Not found changed stateless tests', state='success') + commit.create_status(context=check_name_with_group, description='Not found changed stateless tests', state='success') sys.exit(0) image_name = get_image_name(check_name) @@ -157,7 +172,7 @@ if __name__ == "__main__": run_log_path = os.path.join(result_path, "runlog.log") - additional_envs = get_additional_envs(check_name) + additional_envs = get_additional_envs(check_name, run_by_hash_num, run_by_hash_total) run_command = get_run_command(packages_path, result_path, server_log_path, kill_timeout, additional_envs, docker_image, flaky_check, tests_to_run) logging.info("Going to run func tests: %s", run_command) @@ -176,12 +191,12 @@ if __name__ == "__main__": ch_helper = ClickHouseHelper() mark_flaky_tests(ch_helper, check_name, test_results) - report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [run_log_path] + additional_logs, check_name) + report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [run_log_path] + additional_logs, check_name_with_group) print(f"::notice ::Report url: {report_url}") - post_commit_status(gh, pr_info.sha, check_name, description, state, report_url) + post_commit_status(gh, pr_info.sha, check_name_with_group, description, state, report_url) - prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_results, state, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, check_name) + prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_results, state, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, check_name_with_group) ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events) if state != 'success': diff --git a/tests/ci/integration_test_check.py b/tests/ci/integration_test_check.py index 69c4603b3ea..01799447184 100644 --- a/tests/ci/integration_test_check.py +++ b/tests/ci/integration_test_check.py @@ -22,8 +22,6 @@ from rerun_helper import RerunHelper from tee_popen import TeePopen -DOWNLOAD_RETRIES_COUNT = 5 - IMAGES = [ "clickhouse/integration-tests-runner", "clickhouse/mysql-golang-client", @@ -36,7 +34,7 @@ IMAGES = [ "clickhouse/integration-helper", ] -def get_json_params_dict(check_name, pr_info, docker_images): +def get_json_params_dict(check_name, pr_info, docker_images, run_by_hash_total, run_by_hash_num): return { 'context_name': check_name, 'commit': pr_info.sha, @@ -46,6 +44,8 @@ def get_json_params_dict(check_name, pr_info, docker_images): 'shuffle_test_groups': False, 'use_tmpfs': False, 'disable_net_host': True, + 'run_by_hash_total': run_by_hash_total, + 'run_by_hash_num': run_by_hash_num, } def get_env_for_runner(build_path, repo_path, result_path, work_path): @@ -107,6 +107,15 @@ if __name__ == "__main__": check_name = sys.argv[1] + if 'RUN_BY_HASH_NUM' in os.environ: + run_by_hash_num = int(os.getenv('RUN_BY_HASH_NUM')) + run_by_hash_total = int(os.getenv('RUN_BY_HASH_TOTAL')) + check_name_with_group = check_name + f' [{run_by_hash_num + 1}/{run_by_hash_total}]' + else: + run_by_hash_num = 0 + run_by_hash_total = 0 + check_name_with_group = check_name + if not os.path.exists(temp_path): os.makedirs(temp_path) @@ -115,7 +124,7 @@ if __name__ == "__main__": gh = Github(get_best_robot_token()) - rerun_helper = RerunHelper(gh, pr_info, check_name) + rerun_helper = RerunHelper(gh, pr_info, check_name_with_group) if rerun_helper.is_already_finished_by_status(): logging.info("Check is already finished according to github status, exiting") sys.exit(0) @@ -140,7 +149,7 @@ if __name__ == "__main__": json_path = os.path.join(work_path, 'params.json') with open(json_path, 'w', encoding='utf-8') as json_params: - json_params.write(json.dumps(get_json_params_dict(check_name, pr_info, images_with_versions))) + json_params.write(json.dumps(get_json_params_dict(check_name, pr_info, images_with_versions, run_by_hash_total, run_by_hash_num))) output_path_log = os.path.join(result_path, "main_script_log.txt") @@ -162,9 +171,9 @@ if __name__ == "__main__": mark_flaky_tests(ch_helper, check_name, test_results) s3_helper = S3Helper('https://s3.amazonaws.com') - report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [output_path_log] + additional_logs, check_name, False) + report_url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [output_path_log] + additional_logs, check_name_with_group, False) print(f"::notice ::Report url: {report_url}") - post_commit_status(gh, pr_info.sha, check_name, description, state, report_url) + post_commit_status(gh, pr_info.sha, check_name_with_group, description, state, report_url) - prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_results, state, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, check_name) + prepared_events = prepare_tests_results_for_clickhouse(pr_info, test_results, state, stopwatch.duration_seconds, stopwatch.start_time_str, report_url, check_name_with_group) ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 8a87227519f..784edfc1917 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -17,6 +17,8 @@ import math import http.client import urllib.parse import json +# for crc32 +import zlib from argparse import ArgumentParser from typing import Tuple, Union, Optional, Dict, Set, List @@ -57,6 +59,13 @@ MAX_RETRIES = 3 TEST_FILE_EXTENSIONS = ['.sql', '.sql.j2', '.sh', '.py', '.expect'] + +def stringhash(s): + # default hash() function consistent + # only during process invocation https://stackoverflow.com/a/42089311 + return zlib.crc32(s.encode('utf-8')) + + class HTTPError(Exception): def __init__(self, message=None, code=None): self.message = message @@ -756,7 +765,15 @@ class TestSuite: self.suite_tmp_path: str = suite_tmp_path self.suite: str = suite - self.all_tests: List[str] = self.get_tests_list(self.tests_in_suite_key_func) + filter_func = lambda x: True + + if args.run_by_hash_num is not None and args.run_by_hash_total is not None: + if args.run_by_hash_num > args.run_by_hash_total: + raise Exception(f"Incorrect run by hash, value {args.run_by_hash_num} bigger than total {args.run_by_hash_total}") + + filter_func = lambda x: stringhash(x) % args.run_by_hash_total == args.run_by_hash_num + + self.all_tests: List[str] = self.get_tests_list(self.tests_in_suite_key_func, filter_func) self.all_tags: Dict[str, Set[str]] = self.read_test_tags(self.suite_path, self.all_tests) self.sequential_tests = [] @@ -777,17 +794,17 @@ class TestSuite: return ('no-parallel' in self.all_tags[test_name]) or ('sequential' in self.all_tags[test_name]) - def get_tests_list(self, sort_key): + def get_tests_list(self, sort_key, filter_func): """ Return list of tests file names to run """ - all_tests = list(self.get_selected_tests()) + all_tests = list(self.get_selected_tests(filter_func)) all_tests = all_tests * self.args.test_runs all_tests.sort(key=sort_key) return all_tests - def get_selected_tests(self): + def get_selected_tests(self, filter_func): """ Find all files with tests, filter, render templates """ @@ -804,11 +821,13 @@ class TestSuite: continue if USE_JINJA and test_name.endswith(".gen.sql"): continue + if not filter_func(test_name): + continue test_name = self.render_test_template(j2env, self.suite_path, test_name) yield test_name @staticmethod - def readTestSuite(args, suite_dir_name: str): + def read_test_suite(args, suite_dir_name: str): def is_data_present(): return int(clickhouse_execute(args, 'EXISTS TABLE test.hits')) @@ -1192,7 +1211,7 @@ def main(args): if server_died.is_set(): break - test_suite = TestSuite.readTestSuite(args, suite) + test_suite = TestSuite.read_test_suite(args, suite) if test_suite is None: continue @@ -1325,6 +1344,9 @@ if __name__ == '__main__': parser.add_argument('--print-time', action='store_true', dest='print_time', help='Print test time') parser.add_argument('--check-zookeeper-session', action='store_true', help='Check ZooKeeper session uptime to determine if failed test should be retried') + parser.add_argument('--run-by-hash-num', type=int, help='Run tests matching crc32(test_name) % run_by_hash_total == run_by_hash_num') + parser.add_argument('--run-by-hash-total', type=int, help='Total test groups for crc32(test_name) % run_by_hash_total == run_by_hash_num') + group = parser.add_mutually_exclusive_group(required=False) group.add_argument('--zookeeper', action='store_true', default=None, dest='zookeeper', help='Run zookeeper related tests') group.add_argument('--no-zookeeper', action='store_false', default=None, dest='zookeeper', help='Do not run zookeeper related tests') diff --git a/tests/integration/ci-runner.py b/tests/integration/ci-runner.py index 25d09a8c4c5..c8745294c5b 100755 --- a/tests/integration/ci-runner.py +++ b/tests/integration/ci-runner.py @@ -10,6 +10,8 @@ from collections import defaultdict import random import json import csv +# for crc32 +import zlib MAX_RETRY = 3 @@ -26,6 +28,9 @@ MAX_TIME_SECONDS = 3600 MAX_TIME_IN_SANDBOX = 20 * 60 # 20 minutes TASK_TIMEOUT = 8 * 60 * 60 # 8 hours +def stringhash(s): + return zlib.crc32(s.encode('utf-8')) + def get_tests_to_run(pr_info): result = set([]) changed_files = pr_info['changed_files'] @@ -183,6 +188,13 @@ class ClickhouseIntegrationTestsRunner: self.start_time = time.time() self.soft_deadline_time = self.start_time + (TASK_TIMEOUT - MAX_TIME_IN_SANDBOX) + if 'run_by_hash_total' in self.params: + self.run_by_hash_total = self.params['run_by_hash_total'] + self.run_by_hash_num = self.params['run_by_hash_num'] + else: + self.run_by_hash_total = 0 + self.run_by_hash_num = 0 + def path(self): return self.result_path @@ -576,6 +588,15 @@ class ClickhouseIntegrationTestsRunner: self._install_clickhouse(build_path) logging.info("Dump iptables before run %s", subprocess.check_output("sudo iptables -L", shell=True)) all_tests = self._get_all_tests(repo_path) + + if self.run_by_hash_total != 0: + grouped_tests = self.group_test_by_file(all_tests) + all_filtered_by_hash_tests = [] + for group, tests_in_group in grouped_tests.items(): + if stringhash(group) % self.run_by_hash_total == self.run_by_hash_num: + all_filtered_by_hash_tests += tests_in_group + all_tests = all_filtered_by_hash_tests + parallel_skip_tests = self._get_parallel_tests_skip_list(repo_path) logging.info("Found %s tests first 3 %s", len(all_tests), ' '.join(all_tests[:3])) filtered_sequential_tests = list(filter(lambda test: test in all_tests, parallel_skip_tests)) From 84bccb83bd829645c007ec7c5b8080eed8083bbb Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 10 Dec 2021 19:38:20 +0300 Subject: [PATCH 232/262] Trying to add debug info --- docker/test/stateful/run.sh | 7 ++++++- docker/test/stateless/run.sh | 7 ++++++- tests/ci/functional_test_check.py | 14 +++++++++++++- 3 files changed, 25 insertions(+), 3 deletions(-) diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh index f8dee0f8bc9..8202a07f017 100755 --- a/docker/test/stateful/run.sh +++ b/docker/test/stateful/run.sh @@ -123,7 +123,12 @@ function run_tests() export -f run_tests timeout "$MAX_RUN_TIME" bash -c run_tests ||: -./process_functional_tests_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv +echo "Files in current directory" +ls -la ./ +echo "Files in root directory" +ls -la / + +/process_functional_tests_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv grep -Fa "Fatal" /var/log/clickhouse-server/clickhouse-server.log ||: diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 8827f5b1bf6..d6d9f189e89 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -115,7 +115,12 @@ export -f run_tests timeout "$MAX_RUN_TIME" bash -c run_tests ||: -./process_functional_tests_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv +echo "Files in current directory" +ls -la ./ +echo "Files in root directory" +ls -la / + +/process_functional_tests_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv clickhouse-client -q "system flush logs" ||: diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py index a3ca357db18..fb157db31ba 100644 --- a/tests/ci/functional_test_check.py +++ b/tests/ci/functional_test_check.py @@ -94,7 +94,12 @@ def process_results(result_folder, server_log_path): additional_files = additional_files + [os.path.join(server_log_path, f) for f in server_log_files] status_path = os.path.join(result_folder, "check_status.tsv") - logging.info("Found test_results.tsv") + if os.path.exists(status_path): + logging.info("Found check_status.tsv") + else: + logging.info("Files in result folder %s", os.listdir(result_folder)) + raise Exception("File check_status.tsv not found") + with open(status_path, 'r', encoding='utf-8') as status_file: status = list(csv.reader(status_file, delimiter='\t')) @@ -103,6 +108,13 @@ def process_results(result_folder, server_log_path): state, description = status[0][0], status[0][1] results_path = os.path.join(result_folder, "test_results.tsv") + + if os.path.exists(results_path): + logging.info("Found test_results.tsv") + else: + logging.info("Files in result folder %s", os.listdir(result_folder)) + raise Exception("File test_results.tsv not found") + with open(results_path, 'r', encoding='utf-8') as results_file: test_results = list(csv.reader(results_file, delimiter='\t')) if len(test_results) == 0: From 8e3529818ec71bce00b7645ac6f4c7b5186b3f35 Mon Sep 17 00:00:00 2001 From: vxider Date: Fri, 10 Dec 2021 18:01:54 +0000 Subject: [PATCH 233/262] update window view tests --- ...2_window_view_proc_tumble_to_now.reference | 1 - .../01052_window_view_proc_tumble_to_now.sh | 27 +++++++++++++++++++ .../01052_window_view_proc_tumble_to_now.sql | 18 ------------- .../01053_window_view_proc_hop_to_now.sh | 4 +-- .../01054_window_view_proc_tumble_to.sh | 4 +-- .../01055_window_view_proc_hop_to.sh | 4 +-- ..._view_event_tumble_to_strict_asc.reference | 1 - ...window_view_event_tumble_to_strict_asc.sh} | 23 ++++++++++------ ...dow_view_event_hop_to_strict_asc.reference | 1 - ...58_window_view_event_hop_to_strict_asc.sh} | 23 ++++++++++------ ..._window_view_event_tumble_to_asc.reference | 1 - ... 01060_window_view_event_tumble_to_asc.sh} | 23 ++++++++++------ ...061_window_view_event_hop_to_asc.reference | 1 - ... => 01061_window_view_event_hop_to_asc.sh} | 23 ++++++++++------ ...dow_view_event_tumble_to_bounded.reference | 1 - ...63_window_view_event_tumble_to_bounded.sh} | 23 ++++++++++------ ...window_view_event_hop_to_bounded.reference | 1 - ...01064_window_view_event_hop_to_bounded.sh} | 24 +++++++++++------ ...nt_tumble_to_strict_asc_lateness.reference | 1 - ...ew_event_tumble_to_strict_asc_lateness.sh} | 23 ++++++++++------ ...iew_event_tumble_to_asc_lateness.reference | 1 - ...ndow_view_event_tumble_to_asc_lateness.sh} | 24 ++++++++++------- ...event_tumble_to_bounded_lateness.reference | 1 - ..._view_event_tumble_to_bounded_lateness.sh} | 24 ++++++++++------- 24 files changed, 169 insertions(+), 108 deletions(-) create mode 100755 tests/queries/0_stateless/01052_window_view_proc_tumble_to_now.sh delete mode 100644 tests/queries/0_stateless/01052_window_view_proc_tumble_to_now.sql rename tests/queries/0_stateless/{01057_window_view_event_tumble_to_strict_asc.sql => 01057_window_view_event_tumble_to_strict_asc.sh} (62%) mode change 100644 => 100755 rename tests/queries/0_stateless/{01058_window_view_event_hop_to_strict_asc.sql => 01058_window_view_event_hop_to_strict_asc.sh} (62%) mode change 100644 => 100755 rename tests/queries/0_stateless/{01060_window_view_event_tumble_to_asc.sql => 01060_window_view_event_tumble_to_asc.sh} (63%) mode change 100644 => 100755 rename tests/queries/0_stateless/{01061_window_view_event_hop_to_asc.sql => 01061_window_view_event_hop_to_asc.sh} (63%) mode change 100644 => 100755 rename tests/queries/0_stateless/{01063_window_view_event_tumble_to_bounded.sql => 01063_window_view_event_tumble_to_bounded.sh} (63%) mode change 100644 => 100755 rename tests/queries/0_stateless/{01064_window_view_event_hop_to_bounded.sql => 01064_window_view_event_hop_to_bounded.sh} (62%) mode change 100644 => 100755 rename tests/queries/0_stateless/{01066_window_view_event_tumble_to_strict_asc_lateness.sql => 01066_window_view_event_tumble_to_strict_asc_lateness.sh} (66%) mode change 100644 => 100755 rename tests/queries/0_stateless/{01067_window_view_event_tumble_to_asc_lateness.sql => 01067_window_view_event_tumble_to_asc_lateness.sh} (66%) mode change 100644 => 100755 rename tests/queries/0_stateless/{01068_window_view_event_tumble_to_bounded_lateness.sql => 01068_window_view_event_tumble_to_bounded_lateness.sh} (67%) mode change 100644 => 100755 diff --git a/tests/queries/0_stateless/01052_window_view_proc_tumble_to_now.reference b/tests/queries/0_stateless/01052_window_view_proc_tumble_to_now.reference index 0d66ea1aee9..d00491fd7e5 100644 --- a/tests/queries/0_stateless/01052_window_view_proc_tumble_to_now.reference +++ b/tests/queries/0_stateless/01052_window_view_proc_tumble_to_now.reference @@ -1,2 +1 @@ -0 1 diff --git a/tests/queries/0_stateless/01052_window_view_proc_tumble_to_now.sh b/tests/queries/0_stateless/01052_window_view_proc_tumble_to_now.sh new file mode 100755 index 00000000000..033568b6077 --- /dev/null +++ b/tests/queries/0_stateless/01052_window_view_proc_tumble_to_now.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT --multiquery < Date: Thu, 9 Dec 2021 14:35:22 +0300 Subject: [PATCH 234/262] Take into account nested structures while filling missing columns while reading protobuf. --- src/DataTypes/NestedUtils.cpp | 10 ++ src/DataTypes/NestedUtils.h | 1 + src/Formats/ProtobufSerializer.cpp | 76 ++++++---- src/Formats/RowInputMissingColumnsFiller.cpp | 140 ++++++++++++++++++ src/Formats/RowInputMissingColumnsFiller.h | 40 +++++ ..._format_skipped_column_in_nested.reference | 3 + ...rotobuf_format_skipped_column_in_nested.sh | 6 +- 7 files changed, 244 insertions(+), 32 deletions(-) create mode 100644 src/Formats/RowInputMissingColumnsFiller.cpp create mode 100644 src/Formats/RowInputMissingColumnsFiller.h diff --git a/src/DataTypes/NestedUtils.cpp b/src/DataTypes/NestedUtils.cpp index 4f804a0ca50..10ef35b7e7c 100644 --- a/src/DataTypes/NestedUtils.cpp +++ b/src/DataTypes/NestedUtils.cpp @@ -45,6 +45,15 @@ std::pair splitName(const std::string & name) return {name.substr(0, idx), name.substr(idx + 1)}; } +std::pair splitName(const std::string_view & name) +{ + auto idx = name.find_first_of('.'); + if (idx == std::string::npos || idx == 0 || idx + 1 == name.size()) + return {name, {}}; + + return {name.substr(0, idx), name.substr(idx + 1)}; +} + std::string extractTableName(const std::string & nested_name) { @@ -211,6 +220,7 @@ void validateArraySizes(const Block & block) } } + std::unordered_set getAllTableNames(const Block & block) { std::unordered_set nested_table_names; diff --git a/src/DataTypes/NestedUtils.h b/src/DataTypes/NestedUtils.h index d16e309fc81..9ed48920ce2 100644 --- a/src/DataTypes/NestedUtils.h +++ b/src/DataTypes/NestedUtils.h @@ -12,6 +12,7 @@ namespace Nested std::string concatenateName(const std::string & nested_table_name, const std::string & nested_field_name); std::pair splitName(const std::string & name); + std::pair splitName(const std::string_view & name); /// Returns the prefix of the name to the first '.'. Or the name is unchanged if there is no dot. std::string extractTableName(const std::string & nested_name); diff --git a/src/Formats/ProtobufSerializer.cpp b/src/Formats/ProtobufSerializer.cpp index 4c1e03578c1..07189d0edfc 100644 --- a/src/Formats/ProtobufSerializer.cpp +++ b/src/Formats/ProtobufSerializer.cpp @@ -28,6 +28,7 @@ # include # include # include +# include # include # include # include @@ -2147,9 +2148,11 @@ namespace std::vector && field_descs_, const FieldDescriptor * parent_field_descriptor_, bool with_length_delimiter_, + std::unique_ptr missing_columns_filler_, const ProtobufReaderOrWriter & reader_or_writer_) : parent_field_descriptor(parent_field_descriptor_) , with_length_delimiter(with_length_delimiter_) + , missing_columns_filler(std::move(missing_columns_filler_)) , should_skip_if_empty(parent_field_descriptor ? shouldSkipZeroOrEmpty(*parent_field_descriptor) : false) , reader(reader_or_writer_.reader) , writer(reader_or_writer_.writer) @@ -2170,8 +2173,6 @@ namespace if (!num_columns_) wrongNumberOfColumns(num_columns_, ">0"); - columns.assign(columns_, columns_ + num_columns_); - std::vector field_columns; for (const FieldInfo & info : field_infos) { @@ -2188,13 +2189,17 @@ namespace if (reader) { - missing_column_indices.resize(num_columns_); - for (size_t column_index : collections::range(num_columns_)) - missing_column_indices[column_index] = column_index; - for (const auto & field_info : field_infos) - for (size_t column_index : field_info.column_indices) - missing_column_indices[column_index] = static_cast(-1); - boost::range::remove_erase(missing_column_indices, static_cast(-1)); + mutable_columns.resize(num_columns_); + for (size_t i : collections::range(num_columns_)) + mutable_columns[i] = columns_[i]->assumeMutable(); + + std::vector column_is_missing; + column_is_missing.resize(num_columns_, true); + for (const FieldInfo & info : field_infos) + for (size_t i : info.column_indices) + column_is_missing[i] = false; + + has_missing_columns = (std::find(column_is_missing.begin(), column_is_missing.end(), true) != column_is_missing.end()); } } @@ -2243,7 +2248,7 @@ namespace { last_field_index = 0; last_field_tag = field_infos[0].field_tag; - size_t old_size = columns.empty() ? 0 : columns[0]->size(); + size_t old_size = mutable_columns.empty() ? 0 : mutable_columns[0]->size(); try { @@ -2268,10 +2273,10 @@ namespace } catch (...) { - for (auto & column : columns) + for (auto & column : mutable_columns) { if (column->size() > old_size) - column->assumeMutableRef().popBack(column->size() - old_size); + column->popBack(column->size() - old_size); } throw; } @@ -2342,13 +2347,8 @@ namespace void addDefaultsToMissingColumns(size_t row_num) { - for (size_t column_index : missing_column_indices) - { - auto & column = columns[column_index]; - size_t old_size = column->size(); - if (row_num >= old_size) - column->assumeMutableRef().insertDefault(); - } + if (has_missing_columns) + missing_columns_filler->addDefaults(mutable_columns, row_num); } struct FieldInfo @@ -2374,13 +2374,14 @@ namespace const FieldDescriptor * const parent_field_descriptor; const bool with_length_delimiter; + const std::unique_ptr missing_columns_filler; const bool should_skip_if_empty; ProtobufReader * const reader; ProtobufWriter * const writer; std::vector field_infos; std::unordered_map field_index_by_field_tag; - Columns columns; - std::vector missing_column_indices; + MutableColumns mutable_columns; + bool has_missing_columns = false; int last_field_tag = 0; size_t last_field_index = static_cast(-1); }; @@ -2626,7 +2627,8 @@ namespace with_length_delimiter, /* parent_field_descriptor = */ nullptr, used_column_indices, - /* columns_are_reordered_outside = */ false); + /* columns_are_reordered_outside = */ false, + /* check_nested_while_filling_missing_columns = */ true); if (!message_serializer) { @@ -2813,7 +2815,8 @@ namespace bool with_length_delimiter, const FieldDescriptor * parent_field_descriptor, std::vector & used_column_indices, - bool columns_are_reordered_outside) + bool columns_are_reordered_outside, + bool check_nested_while_filling_missing_columns) { std::vector column_names_sv; column_names_sv.reserve(num_columns); @@ -2828,7 +2831,8 @@ namespace with_length_delimiter, parent_field_descriptor, used_column_indices, - columns_are_reordered_outside); + columns_are_reordered_outside, + check_nested_while_filling_missing_columns); } std::unique_ptr buildMessageSerializerImpl( @@ -2839,7 +2843,8 @@ namespace bool with_length_delimiter, const FieldDescriptor * parent_field_descriptor, std::vector & used_column_indices, - bool columns_are_reordered_outside) + bool columns_are_reordered_outside, + bool check_nested_while_filling_missing_columns) { std::vector field_descs; boost::container::flat_map field_descriptors_in_use; @@ -2962,7 +2967,8 @@ namespace /* with_length_delimiter = */ false, field_descriptor, used_column_indices_in_nested, - /* columns_are_reordered_outside = */ true); + /* columns_are_reordered_outside = */ true, + /* check_nested_while_filling_missing_columns = */ false); /// `columns_are_reordered_outside` is true because column indices are /// going to be transformed and then written to the outer message, @@ -3001,7 +3007,8 @@ namespace /* with_length_delimiter = */ false, field_descriptor, used_column_indices_in_nested, - /* columns_are_reordered_outside = */ true); + /* columns_are_reordered_outside = */ true, + /* check_nested_while_filling_missing_columns = */ false); /// `columns_are_reordered_outside` is true because column indices are /// going to be transformed and then written to the outer message, @@ -3040,8 +3047,18 @@ namespace if (field_descs.empty()) return nullptr; + std::unique_ptr missing_columns_filler; + if (reader_or_writer.reader) + { + if (check_nested_while_filling_missing_columns) + missing_columns_filler = std::make_unique(num_columns, column_names, data_types); + else + missing_columns_filler = std::make_unique(); + } + return std::make_unique( - std::move(field_descs), parent_field_descriptor, with_length_delimiter, reader_or_writer); + std::move(field_descs), parent_field_descriptor, with_length_delimiter, + std::move(missing_columns_filler), reader_or_writer); } /// Builds a serializer for one-to-one match: @@ -3147,7 +3164,8 @@ namespace /* with_length_delimiter = */ false, &field_descriptor, used_column_indices, - /* columns_are_reordered_outside = */ false); + /* columns_are_reordered_outside = */ false, + /* check_nested_while_filling_missing_columns = */ false); if (!message_serializer) { diff --git a/src/Formats/RowInputMissingColumnsFiller.cpp b/src/Formats/RowInputMissingColumnsFiller.cpp new file mode 100644 index 00000000000..ff8f9e19380 --- /dev/null +++ b/src/Formats/RowInputMissingColumnsFiller.cpp @@ -0,0 +1,140 @@ +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_COLUMN; +} + + +RowInputMissingColumnsFiller::RowInputMissingColumnsFiller() = default; + +RowInputMissingColumnsFiller::RowInputMissingColumnsFiller(const NamesAndTypesList & names_and_types) +{ + std::unordered_map> nested_groups; /// Nested prefix -> column indices. + size_t i = 0; + for (auto it = names_and_types.begin(); it != names_and_types.end(); ++it, ++i) + { + const auto & name_and_type = *it; + if (isArray(name_and_type.type)) + { + auto split = Nested::splitName(name_and_type.name); + if (!split.second.empty()) /// Is it really a column of Nested data structure? + nested_groups[split.first].push_back(i); + } + } + setNestedGroups(std::move(nested_groups), names_and_types.size()); +} + +RowInputMissingColumnsFiller::RowInputMissingColumnsFiller(const Names & names, const DataTypes & types) +{ + std::unordered_map> nested_groups; /// Nested prefix -> column indices. + for (size_t i = 0; i != names.size(); ++i) + { + if (isArray(types[i])) + { + auto split = Nested::splitName(names[i]); + if (!split.second.empty()) /// Is it really a column of Nested data structure? + nested_groups[split.first].push_back(i); + } + } + setNestedGroups(std::move(nested_groups), names.size()); +} + +RowInputMissingColumnsFiller::RowInputMissingColumnsFiller(size_t count, const std::string_view * names, const DataTypePtr * types) +{ + std::unordered_map> nested_groups; /// Nested prefix -> column indices. + for (size_t i = 0; i != count; ++i) + { + if (isArray(types[i])) + { + auto split = Nested::splitName(names[i]); + if (!split.second.empty()) /// Is it really a column of Nested data structure? + nested_groups[split.first].push_back(i); + } + } + setNestedGroups(std::move(nested_groups), count); +} + +void RowInputMissingColumnsFiller::setNestedGroups(std::unordered_map> && nested_groups, size_t num_columns) +{ + if (!nested_groups.empty()) + { + column_infos.resize(num_columns); + for (auto & nested_group : nested_groups | boost::adaptors::map_values) + { + if (nested_group.size() <= 1) + continue; + auto nested_group_shared = std::make_shared>(std::move(nested_group)); + for (size_t i : *nested_group_shared) + column_infos[i].nested_group = nested_group_shared; + } + } +} + + +void RowInputMissingColumnsFiller::addDefaults(MutableColumns & columns, size_t row_num) const +{ + for (size_t i = 0; i != columns.size(); ++i) + { + auto & column = *columns[i]; + size_t column_size = column.size(); + if (row_num < column_size) + continue; /// The column already has an element in this position, skipping. + + if (row_num > column_size) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Wrong row_number {}, expected either {} or {}", row_num, column_size - 1, column_size); + + if ((i >= column_infos.size()) || !column_infos[i].nested_group) + { + column.insertDefault(); + continue; + } + + const auto & nested_group = *column_infos[i].nested_group; + size_t size_of_array = 0; + for (size_t j : nested_group) + { + const auto & column_j = columns[j]; + size_t column_size_j = column_j->size(); + if (row_num < column_size_j) + { + const auto * column_array = typeid_cast(column_j.get()); + if (!column_array) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column with Array type is not represented by ColumnArray column: {}", column_j->dumpStructure()); + const auto & offsets = column_array->getOffsets(); + size_of_array = offsets[row_num] - offsets[row_num - 1]; + break; + } + } + + for (size_t j : nested_group) + { + auto & column_j = columns[j]; + size_t column_size_j = column_j->size(); + if (row_num >= column_size_j) + { + if (row_num > column_size_j) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Wrong row_number {}, expected either {} or {}", row_num, column_size_j - 1, column_size_j); + + auto * column_array = typeid_cast(column_j.get()); + if (!column_array) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column with Array type is not represented by ColumnArray column: {}", column_j->dumpStructure()); + + auto & data = column_array->getData(); + auto & offsets = column_array->getOffsets(); + for (size_t k = 0; k != size_of_array; ++k) + data.insertDefault(); + offsets.push_back(data.size()); + } + } + } +} + +} diff --git a/src/Formats/RowInputMissingColumnsFiller.h b/src/Formats/RowInputMissingColumnsFiller.h new file mode 100644 index 00000000000..0eaefd4e814 --- /dev/null +++ b/src/Formats/RowInputMissingColumnsFiller.h @@ -0,0 +1,40 @@ +#pragma once + +#include + + +namespace DB +{ + +/// Adds default values to columns if they don't have a specified row yet. +/// This class can be useful for implementing IRowInputFormat. +/// For missing columns of nested structure, it creates not columns of empty arrays, +/// but columns of arrays of correct lengths. +class RowInputMissingColumnsFiller +{ +public: + /// Makes a column filler which checks nested structures while adding default values to columns. + RowInputMissingColumnsFiller(const NamesAndTypesList & names_and_types); + RowInputMissingColumnsFiller(const Names & names, const DataTypes & types); + RowInputMissingColumnsFiller(size_t count, const std::string_view * names, const DataTypePtr * types); + + /// Default constructor makes a column filler which doesn't check nested structures while + /// adding default values to columns. + RowInputMissingColumnsFiller(); + + /// Adds default values to some columns. + /// For each column the function checks the number of rows and if it's less than (row_num + 1) + /// the function will add a default value to this column. + void addDefaults(MutableColumns & columns, size_t row_num) const; + +private: + void setNestedGroups(std::unordered_map> && nested_groups, size_t num_columns); + + struct ColumnInfo + { + std::shared_ptr> nested_group; + }; + std::vector column_infos; +}; + +} diff --git a/tests/queries/0_stateless/00825_protobuf_format_skipped_column_in_nested.reference b/tests/queries/0_stateless/00825_protobuf_format_skipped_column_in_nested.reference index 1a80e6401db..12550ffbf28 100644 --- a/tests/queries/0_stateless/00825_protobuf_format_skipped_column_in_nested.reference +++ b/tests/queries/0_stateless/00825_protobuf_format_skipped_column_in_nested.reference @@ -25,3 +25,6 @@ modules { } Binary representation is as expected + +e4048ead-30a2-45e5-90be-2af1c7137523 [1] [50639] [58114] [[5393]] [[1]] [[]] [[17811]] [[(0,20)]] +e4048ead-30a2-45e5-90be-2af1c7137523 dummy [1] [50639] [58114] [[5393]] [[1]] [[3411]] [[17811]] [[(10,20)]] diff --git a/tests/queries/0_stateless/00825_protobuf_format_skipped_column_in_nested.sh b/tests/queries/0_stateless/00825_protobuf_format_skipped_column_in_nested.sh index b413385fb77..ed35df5e98b 100755 --- a/tests/queries/0_stateless/00825_protobuf_format_skipped_column_in_nested.sh +++ b/tests/queries/0_stateless/00825_protobuf_format_skipped_column_in_nested.sh @@ -47,9 +47,9 @@ echo $CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$SCHEMADIR/00825_protobuf_format_skipped_column_in_nested:UpdateMessage" --input "$BINARY_FILE_PATH" # Check the input in the protobuf format (now the table contains the same data twice). -#echo -#$CLICKHOUSE_CLIENT --query "INSERT INTO table_skipped_column_in_nested_00825 FORMAT Protobuf SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_skipped_column_in_nested:UpdateMessage'" < "$BINARY_FILE_PATH" -#$CLICKHOUSE_CLIENT --query "SELECT * FROM table_skipped_column_in_nested_00825" +echo +$CLICKHOUSE_CLIENT --query "INSERT INTO table_skipped_column_in_nested_00825 FORMAT Protobuf SETTINGS format_schema='$SCHEMADIR/00825_protobuf_format_skipped_column_in_nested:UpdateMessage'" < "$BINARY_FILE_PATH" +$CLICKHOUSE_CLIENT --query "SELECT * FROM table_skipped_column_in_nested_00825 ORDER BY unused1" rm "$BINARY_FILE_PATH" $CLICKHOUSE_CLIENT --query "DROP TABLE table_skipped_column_in_nested_00825" From 2045a4f2459976453d8aaf3e89abb245fe8f7525 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 10 Dec 2021 21:45:29 +0300 Subject: [PATCH 235/262] clickhouse-test: use basename of the test for *.sh tests --- tests/clickhouse-test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 8a87227519f..43106ec0f58 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -564,7 +564,7 @@ class TestCase: database = args.testcase_database # This is for .sh tests - os.environ["CLICKHOUSE_LOG_COMMENT"] = self.case_file + os.environ["CLICKHOUSE_LOG_COMMENT"] = args.testcase_basename params = { 'client': client + ' --database=' + database, From b1bc5c37c966bbaf45f189b0d1253459a2d5fc71 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 10 Dec 2021 21:51:43 +0300 Subject: [PATCH 236/262] tests: split sum_ubsan into avg_ubsan --- tests/queries/0_stateless/01660_sum_ubsan.reference | 9 +++++++-- tests/queries/0_stateless/01660_sum_ubsan.sql | 6 ++---- tests/queries/0_stateless/02144_avg_ubsan.reference | 8 ++++++++ tests/queries/0_stateless/02144_avg_ubsan.sql | 6 ++++++ 4 files changed, 23 insertions(+), 6 deletions(-) create mode 100644 tests/queries/0_stateless/02144_avg_ubsan.reference create mode 100644 tests/queries/0_stateless/02144_avg_ubsan.sql diff --git a/tests/queries/0_stateless/01660_sum_ubsan.reference b/tests/queries/0_stateless/01660_sum_ubsan.reference index c2ff9f590d5..6ac74108e71 100644 --- a/tests/queries/0_stateless/01660_sum_ubsan.reference +++ b/tests/queries/0_stateless/01660_sum_ubsan.reference @@ -1,5 +1,10 @@ +-- { echo } + +-- Aggregate function 'sum' allows overflow with two's complement arithmetics. +-- This contradicts the standard SQL semantic and we are totally fine with it. +SELECT sum(-8000000000000000000) FROM numbers(11); 4233720368547758080 -384883669867978000 +SELECT sum(-8000000000000000000) FROM remote('127.0.0.{1,2,3,4,5,6,7,8,9,10,11}', system.one); 4233720368547758080 -384883669867978000 +SELECT sumKahan(-8000000000000000000) FROM numbers(11); -88000000000000000000 diff --git a/tests/queries/0_stateless/01660_sum_ubsan.sql b/tests/queries/0_stateless/01660_sum_ubsan.sql index 1d544324f77..9a3268563ef 100644 --- a/tests/queries/0_stateless/01660_sum_ubsan.sql +++ b/tests/queries/0_stateless/01660_sum_ubsan.sql @@ -1,9 +1,7 @@ +-- { echo } + -- Aggregate function 'sum' allows overflow with two's complement arithmetics. -- This contradicts the standard SQL semantic and we are totally fine with it. SELECT sum(-8000000000000000000) FROM numbers(11); -SELECT avg(-8000000000000000000) FROM numbers(11); - SELECT sum(-8000000000000000000) FROM remote('127.0.0.{1,2,3,4,5,6,7,8,9,10,11}', system.one); -SELECT avg(-8000000000000000000) FROM remote('127.0.0.{1,2,3,4,5,6,7,8,9,10,11}', system.one); - SELECT sumKahan(-8000000000000000000) FROM numbers(11); diff --git a/tests/queries/0_stateless/02144_avg_ubsan.reference b/tests/queries/0_stateless/02144_avg_ubsan.reference new file mode 100644 index 00000000000..12304a438cf --- /dev/null +++ b/tests/queries/0_stateless/02144_avg_ubsan.reference @@ -0,0 +1,8 @@ +-- { echo } + +-- Aggregate function 'avg' allows overflow with two's complement arithmetics. +-- This contradicts the standard SQL semantic and we are totally fine with it. +SELECT avg(-8000000000000000000) FROM remote('127.0.0.{1,2,3,4,5,6,7,8,9,10,11}', system.one); +384883669867978000 +SELECT avg(-8000000000000000000) FROM numbers(11); +384883669867978000 diff --git a/tests/queries/0_stateless/02144_avg_ubsan.sql b/tests/queries/0_stateless/02144_avg_ubsan.sql new file mode 100644 index 00000000000..ee6f2ffef7c --- /dev/null +++ b/tests/queries/0_stateless/02144_avg_ubsan.sql @@ -0,0 +1,6 @@ +-- { echo } + +-- Aggregate function 'avg' allows overflow with two's complement arithmetics. +-- This contradicts the standard SQL semantic and we are totally fine with it. +SELECT avg(-8000000000000000000) FROM remote('127.0.0.{1,2,3,4,5,6,7,8,9,10,11}', system.one); +SELECT avg(-8000000000000000000) FROM numbers(11); From 2cd5a15193d5a92d7e177d1e983ed326918049d5 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Fri, 10 Dec 2021 22:02:28 +0300 Subject: [PATCH 237/262] Update docker_compose_mongo.yml --- docker/test/integration/runner/compose/docker_compose_mongo.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/integration/runner/compose/docker_compose_mongo.yml b/docker/test/integration/runner/compose/docker_compose_mongo.yml index 060017b9f87..04f069f6d27 100644 --- a/docker/test/integration/runner/compose/docker_compose_mongo.yml +++ b/docker/test/integration/runner/compose/docker_compose_mongo.yml @@ -11,7 +11,7 @@ services: command: --profile=2 --verbose mongo2: - image: mongo:latest + image: mongo:5.0 restart: always ports: - "27018:27017" From fa6090f588dbf4cbb5f28bd2210847b070bb8218 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 10 Dec 2021 21:46:26 +0300 Subject: [PATCH 238/262] Fix processing initial table (--table/stdin) in clickhouse-local This patch will: - fix the issue when table had been tried to create multiple times for --queries-files - create these table for --interactive mode (before it works only if you had some queries already, i.e. when it run interactive after non-interactive) This will also make ClientBase interface a little bit cleaner, by removing one abstract method getQueryTextPrefix() --- programs/local/LocalServer.cpp | 10 ++++------ programs/local/LocalServer.h | 1 - src/Client/ClientBase.cpp | 8 +------- src/Client/ClientBase.h | 6 ++---- ...02140_clickhouse_local_queries_file_table.reference | 0 .../02140_clickhouse_local_queries_file_table.sh | 7 +++++++ .../02141_clickhouse_local_interactive_table.reference | 1 + .../02141_clickhouse_local_interactive_table.sh | 7 +++++++ 8 files changed, 22 insertions(+), 18 deletions(-) create mode 100644 tests/queries/0_stateless/02140_clickhouse_local_queries_file_table.reference create mode 100755 tests/queries/0_stateless/02140_clickhouse_local_queries_file_table.sh create mode 100644 tests/queries/0_stateless/02141_clickhouse_local_interactive_table.reference create mode 100755 tests/queries/0_stateless/02141_clickhouse_local_interactive_table.sh diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 1f27072f142..33615080df4 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -388,12 +388,6 @@ void LocalServer::setupUsers() } -String LocalServer::getQueryTextPrefix() -{ - return getInitialCreateTableQuery(); -} - - void LocalServer::connect() { connection_parameters = ConnectionParameters(config()); @@ -463,6 +457,10 @@ try } #endif + String initial_query = getInitialCreateTableQuery(); + if (!initial_query.empty()) + processQueryText(initial_query); + if (is_interactive && !delayed_interactive) { runInteractive(); diff --git a/programs/local/LocalServer.h b/programs/local/LocalServer.h index ce0df06c86a..06e3746eacd 100644 --- a/programs/local/LocalServer.h +++ b/programs/local/LocalServer.h @@ -37,7 +37,6 @@ protected: void processError(const String & query) const override; String getName() const override { return "local"; } - String getQueryTextPrefix() override; void printHelpMessage(const OptionsDescription & options_description) override; void addOptions(OptionsDescription & options_description) override; diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index b97d8342186..4052531c493 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -1494,17 +1494,14 @@ void ClientBase::runNonInteractive() { auto process_multi_query_from_file = [&](const String & file) { - auto text = getQueryTextPrefix(); String queries_from_file; ReadBufferFromFile in(file); readStringUntilEOF(queries_from_file, in); - text += queries_from_file; - return executeMultiQuery(text); + return executeMultiQuery(queries_from_file); }; - /// Read all queries into `text`. for (const auto & queries_file : queries_files) { for (const auto & interleave_file : interleave_queries_files) @@ -1519,9 +1516,6 @@ void ClientBase::runNonInteractive() } String text; - if (is_multiquery) - text = getQueryTextPrefix(); - if (config().has("query")) { text += config().getRawString("query"); /// Poco configuration should not process substitutions in form of ${...} inside query. diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index bad1395e699..4c5d29b390b 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -78,9 +78,6 @@ protected: String & query_to_execute, ASTPtr & parsed_query, const String & all_queries_text, std::optional & current_exception); - /// For non-interactive multi-query mode get queries text prefix. - virtual String getQueryTextPrefix() { return ""; } - static void clearTerminal(); void showClientVersion(); @@ -100,9 +97,10 @@ protected: const std::vector & external_tables_arguments) = 0; virtual void processConfig() = 0; -private: +protected: bool processQueryText(const String & text); +private: void receiveResult(ASTPtr parsed_query); bool receiveAndProcessPacket(ASTPtr parsed_query, bool cancelled); void receiveLogs(ASTPtr parsed_query); diff --git a/tests/queries/0_stateless/02140_clickhouse_local_queries_file_table.reference b/tests/queries/0_stateless/02140_clickhouse_local_queries_file_table.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02140_clickhouse_local_queries_file_table.sh b/tests/queries/0_stateless/02140_clickhouse_local_queries_file_table.sh new file mode 100755 index 00000000000..377cbb13688 --- /dev/null +++ b/tests/queries/0_stateless/02140_clickhouse_local_queries_file_table.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL --file /dev/null --structure "key String" --input-format TSVWithNamesAndTypes --queries-file <(echo 'select 1') --queries-file <(echo 'select 2') --format Null diff --git a/tests/queries/0_stateless/02141_clickhouse_local_interactive_table.reference b/tests/queries/0_stateless/02141_clickhouse_local_interactive_table.reference new file mode 100644 index 00000000000..e4c93e9e1c5 --- /dev/null +++ b/tests/queries/0_stateless/02141_clickhouse_local_interactive_table.reference @@ -0,0 +1 @@ +CREATE TABLE _local.table\n(\n `key` String\n)\nENGINE = File(\'TSVWithNamesAndTypes\', \'/dev/null\') diff --git a/tests/queries/0_stateless/02141_clickhouse_local_interactive_table.sh b/tests/queries/0_stateless/02141_clickhouse_local_interactive_table.sh new file mode 100755 index 00000000000..fc71f779fa1 --- /dev/null +++ b/tests/queries/0_stateless/02141_clickhouse_local_interactive_table.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL --file /dev/null --structure "key String" --input-format TSVWithNamesAndTypes --interactive --send_logs_level=trace <<<'show create table table' From 837ff82312217e7a10aa31b4c0e529b98f8d3aaa Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 10 Dec 2021 21:51:43 +0300 Subject: [PATCH 239/262] Suppress UBSan errors for avg() function CI: https://s3.amazonaws.com/clickhouse-test-reports/0/dd2ccd3b17e7c59ed9c8184f00f8dae85ee87d1f/fuzzer_astfuzzerubsan,actions//report.html --- src/AggregateFunctions/AggregateFunctionAvg.h | 13 +++++++++---- .../queries/0_stateless/02144_avg_ubsan.reference | 14 ++++++++++---- tests/queries/0_stateless/02144_avg_ubsan.sql | 9 +++++++-- 3 files changed, 26 insertions(+), 10 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionAvg.h b/src/AggregateFunctions/AggregateFunctionAvg.h index 8ca0ae1dac2..eb061337753 100644 --- a/src/AggregateFunctions/AggregateFunctionAvg.h +++ b/src/AggregateFunctions/AggregateFunctionAvg.h @@ -218,9 +218,9 @@ public: using ColVecType = ColumnVectorOrDecimal; - void NO_SANITIZE_UNDEFINED add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const final + void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const final { - this->data(place).numerator += static_cast(*columns[0]).getData()[row_num]; + increment(place, static_cast(*columns[0]).getData()[row_num]); ++this->data(place).denominator; } @@ -240,7 +240,7 @@ public: sum_data.addMany(column.getData().data(), batch_size); this->data(place).denominator += batch_size; } - this->data(place).numerator += sum_data.sum; + increment(place, sum_data.sum); } void addBatchSinglePlaceNotNull( @@ -270,7 +270,7 @@ public: sum_data.addManyNotNull(column.getData().data(), null_map, batch_size); this->data(place).denominator += batch_size - countBytesInFilter(null_map, batch_size); } - this->data(place).numerator += sum_data.sum; + increment(place, sum_data.sum); } String getName() const override { return "avg"; } @@ -298,5 +298,10 @@ public: #endif +private: + void NO_SANITIZE_UNDEFINED increment(AggregateDataPtr __restrict place, Numerator inc) const + { + this->data(place).numerator += inc; + } }; } diff --git a/tests/queries/0_stateless/02144_avg_ubsan.reference b/tests/queries/0_stateless/02144_avg_ubsan.reference index 12304a438cf..09f03e40e59 100644 --- a/tests/queries/0_stateless/02144_avg_ubsan.reference +++ b/tests/queries/0_stateless/02144_avg_ubsan.reference @@ -2,7 +2,13 @@ -- Aggregate function 'avg' allows overflow with two's complement arithmetics. -- This contradicts the standard SQL semantic and we are totally fine with it. -SELECT avg(-8000000000000000000) FROM remote('127.0.0.{1,2,3,4,5,6,7,8,9,10,11}', system.one); -384883669867978000 -SELECT avg(-8000000000000000000) FROM numbers(11); -384883669867978000 + +-- AggregateFunctionAvg::add +SELECT avg(-8000000000000000000) FROM (SELECT *, 1 AS k FROM numbers(65535*2)) GROUP BY k; +63121857572613.94 +-- AggregateFunctionAvg::addBatchSinglePlace +SELECT avg(-8000000000000000000) FROM numbers(65535 * 2); +63121857572613.94 +-- AggregateFunctionAvg::addBatchSinglePlaceNotNull +SELECT avg(toNullable(-8000000000000000000)) FROM numbers(65535 * 2); +63121857572613.94 diff --git a/tests/queries/0_stateless/02144_avg_ubsan.sql b/tests/queries/0_stateless/02144_avg_ubsan.sql index ee6f2ffef7c..7c51963333e 100644 --- a/tests/queries/0_stateless/02144_avg_ubsan.sql +++ b/tests/queries/0_stateless/02144_avg_ubsan.sql @@ -2,5 +2,10 @@ -- Aggregate function 'avg' allows overflow with two's complement arithmetics. -- This contradicts the standard SQL semantic and we are totally fine with it. -SELECT avg(-8000000000000000000) FROM remote('127.0.0.{1,2,3,4,5,6,7,8,9,10,11}', system.one); -SELECT avg(-8000000000000000000) FROM numbers(11); + +-- AggregateFunctionAvg::add +SELECT avg(-8000000000000000000) FROM (SELECT *, 1 AS k FROM numbers(65535*2)) GROUP BY k; +-- AggregateFunctionAvg::addBatchSinglePlace +SELECT avg(-8000000000000000000) FROM numbers(65535 * 2); +-- AggregateFunctionAvg::addBatchSinglePlaceNotNull +SELECT avg(toNullable(-8000000000000000000)) FROM numbers(65535 * 2); From 82c2d8dd2c2710b85e687d547fdeb8207f6b8833 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Fri, 10 Dec 2021 23:18:47 +0300 Subject: [PATCH 240/262] Add synchronization to ProtobufSchemas. --- src/Formats/ProtobufSchemas.cpp | 1 + src/Formats/ProtobufSchemas.h | 2 ++ 2 files changed, 3 insertions(+) diff --git a/src/Formats/ProtobufSchemas.cpp b/src/Formats/ProtobufSchemas.cpp index 9c6ed76ef27..6d1d48158a5 100644 --- a/src/Formats/ProtobufSchemas.cpp +++ b/src/Formats/ProtobufSchemas.cpp @@ -73,6 +73,7 @@ ProtobufSchemas::~ProtobufSchemas() = default; const google::protobuf::Descriptor * ProtobufSchemas::getMessageTypeForFormatSchema(const FormatSchemaInfo & info) { + std::lock_guard lock(mutex); auto it = importers.find(info.schemaDirectory()); if (it == importers.end()) it = importers.emplace(info.schemaDirectory(), std::make_unique(info.schemaDirectory())).first; diff --git a/src/Formats/ProtobufSchemas.h b/src/Formats/ProtobufSchemas.h index f911cb2ce4b..0a2eeea9893 100644 --- a/src/Formats/ProtobufSchemas.h +++ b/src/Formats/ProtobufSchemas.h @@ -4,6 +4,7 @@ #if USE_PROTOBUF #include +#include #include #include #include @@ -39,6 +40,7 @@ public: private: class ImporterWithSourceTree; std::unordered_map> importers; + std::mutex mutex; }; } From add1e8302e1cd00e944675e00b6bb6a661e7347f Mon Sep 17 00:00:00 2001 From: Vxider Date: Sat, 11 Dec 2021 04:39:13 +0000 Subject: [PATCH 241/262] Ping CI --- .../queries/0_stateless/01055_window_view_proc_hop_to.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/01055_window_view_proc_hop_to.sh b/tests/queries/0_stateless/01055_window_view_proc_hop_to.sh index ad8197d9b7b..85c69cf76cf 100755 --- a/tests/queries/0_stateless/01055_window_view_proc_hop_to.sh +++ b/tests/queries/0_stateless/01055_window_view_proc_hop_to.sh @@ -21,7 +21,7 @@ while true; do $CLICKHOUSE_CLIENT --query="SELECT count(*) FROM dst" | grep -q "1" && break || sleep .5 ||: done -$CLICKHOUSE_CLIENT --query="SELECT count FROM dst" -$CLICKHOUSE_CLIENT --query="DROP TABLE wv" -$CLICKHOUSE_CLIENT --query="DROP TABLE mt" -$CLICKHOUSE_CLIENT --query="DROP TABLE dst" +$CLICKHOUSE_CLIENT --query="SELECT count FROM dst;" +$CLICKHOUSE_CLIENT --query="DROP TABLE wv;" +$CLICKHOUSE_CLIENT --query="DROP TABLE mt;" +$CLICKHOUSE_CLIENT --query="DROP TABLE dst;" From 9a7159897360cb6f59561b7b3e1afb0ab9bf7af2 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 11 Dec 2021 10:57:23 +0300 Subject: [PATCH 242/262] Fix table lifetime in case of parallel DROP TABLE and INSERT Stress tests founds [1]: ==527==WARNING: MemorySanitizer: use-of-uninitialized-value 0 0x37078ffd in unsigned long std::__1::__cxx_atomic_fetch_add(std::__1::__cxx_atomic_base_impl*, unsigned long, std::__1::memory_order) obj-x86_64-linux-gnu/../contrib/libcxx/include/atomic:1050:12 1 0x37078ffd in std::__1::__atomic_base::fetch_add(unsigned long, std::__1::memory_order) obj-x86_64-linux-gnu/../contrib/libcxx/include/atomic:1719:17 2 0x37078ffd in std::__1::__atomic_base::operator++() obj-x86_64-linux-gnu/../contrib/libcxx/include/atomic:1756:57 3 0x37078ffd in SimpleIncrement::get() obj-x86_64-linux-gnu/../src/Common/SimpleIncrement.h:20:16 4 0x37078ffd in DB::MergeTreeDataWriter::writeTempPart(DB::BlockWithPartition&, std::__1::shared_ptr const&, std::__1::shared_ptr) obj-x86_64-linux-gnu/../src/Storages/MergeTree/MergeTreeDataWriter.cpp:276:46 5 0x373c446c in DB::MergeTreeSink::consume(DB::Chunk) obj-x86_64-linux-gnu/../src/Storages/MergeTree/MergeTreeSink.cpp:27:65 Uninitialized value was created by a heap deallocation 6 0x32d481e8 in DB::DatabaseCatalog::TableMarkedAsDropped::~TableMarkedAsDropped() obj-x86_64-linux-gnu/../src/Interpreters/DatabaseCatalog.h:248:12 7 0x32d3c134 in DB::DatabaseCatalog::dropTableDataTask() obj-x86_64-linux-gnu/../src/Interpreters/DatabaseCatalog.cpp:908:1 [1]: https://s3.amazonaws.com/clickhouse-test-reports/32534/fa6090f588dbf4cbb5f28bd2210847b070bb8218/stress_test__memory__actions_.html The query was CREATE MATERIALIZED VIEW ... POPULATE AS SELECT ... from 00040_aggregating_materialized_view test. --- src/Interpreters/InterpreterInsertQuery.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 8c0d3620dd6..b7edf12e23f 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -380,13 +380,6 @@ BlockIO InterpreterInsertQuery::execute() BlockIO res; - res.pipeline.addStorageHolder(table); - if (const auto * mv = dynamic_cast(table.get())) - { - if (auto inner_table = mv->tryGetTargetTable()) - res.pipeline.addStorageHolder(inner_table); - } - /// What type of query: INSERT or INSERT SELECT or INSERT WATCH? if (is_distributed_insert_select) { @@ -445,6 +438,13 @@ BlockIO InterpreterInsertQuery::execute() } } + res.pipeline.addStorageHolder(table); + if (const auto * mv = dynamic_cast(table.get())) + { + if (auto inner_table = mv->tryGetTargetTable()) + res.pipeline.addStorageHolder(inner_table); + } + return res; } From ca9c5dc4ebfd12a6095260b06128c39d545da7ca Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 11 Dec 2021 10:48:43 +0300 Subject: [PATCH 243/262] Fix LOGICAL_ERROR for MATERIALIZED VIEW over table functions (i.e. numbers()) Replace LOGICAL_ERROR with QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW in this case. --- src/Storages/SelectQueryDescription.cpp | 8 ++++---- tests/queries/0_stateless/02146_mv_non_phys.reference | 0 tests/queries/0_stateless/02146_mv_non_phys.sql | 2 ++ 3 files changed, 6 insertions(+), 4 deletions(-) create mode 100644 tests/queries/0_stateless/02146_mv_non_phys.reference create mode 100644 tests/queries/0_stateless/02146_mv_non_phys.sql diff --git a/src/Storages/SelectQueryDescription.cpp b/src/Storages/SelectQueryDescription.cpp index 018a9f0ea98..2cc8f769cf1 100644 --- a/src/Storages/SelectQueryDescription.cpp +++ b/src/Storages/SelectQueryDescription.cpp @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -12,7 +13,6 @@ namespace DB namespace ErrorCodes { extern const int QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW; -extern const int LOGICAL_ERROR; } SelectQueryDescription::SelectQueryDescription(const SelectQueryDescription & other) @@ -60,9 +60,9 @@ StorageID extractDependentTableFromSelectQuery(ASTSelectQuery & query, ContextPt { auto * ast_select = subquery->as(); if (!ast_select) - throw Exception("Logical error while creating StorageMaterializedView. " - "Could not retrieve table name from select query.", - DB::ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW, + "StorageMaterializedView cannot be created from table functions ({})", + serializeAST(*subquery)); if (ast_select->list_of_selects->children.size() != 1) throw Exception("UNION is not supported for MATERIALIZED VIEW", ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW); diff --git a/tests/queries/0_stateless/02146_mv_non_phys.reference b/tests/queries/0_stateless/02146_mv_non_phys.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02146_mv_non_phys.sql b/tests/queries/0_stateless/02146_mv_non_phys.sql new file mode 100644 index 00000000000..4b15900fe76 --- /dev/null +++ b/tests/queries/0_stateless/02146_mv_non_phys.sql @@ -0,0 +1,2 @@ +drop table if exists mv_02146; +create materialized view mv_02146 engine=MergeTree() order by number as select * from numbers(10); -- { serverError QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW } From 4e4837758a1b691017763426315413c6154a6cd8 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 11 Dec 2021 11:20:36 +0300 Subject: [PATCH 244/262] Remove readline support - it was not nested for a long time - replxx is an upstream way for completion --- base/base/CMakeLists.txt | 24 --- base/base/ReadlineLineReader.cpp | 187 ------------------ base/base/ReadlineLineReader.h | 19 -- contrib/replxx-cmake/CMakeLists.txt | 2 +- .../linux_x86_64/private/config.h | 4 +- docker/packager/binary/Dockerfile | 1 - docker/test/integration/base/Dockerfile | 1 - docker/test/integration/runner/Dockerfile | 1 - docker/test/testflows/runner/Dockerfile | 1 - src/Client/ClientBase.cpp | 3 - utils/ci/install-libraries.sh | 1 - utils/ci/install-os-packages.sh | 9 - 12 files changed, 3 insertions(+), 250 deletions(-) delete mode 100644 base/base/ReadlineLineReader.cpp delete mode 100644 base/base/ReadlineLineReader.h diff --git a/base/base/CMakeLists.txt b/base/base/CMakeLists.txt index 000233738f7..c0b0801bd2e 100644 --- a/base/base/CMakeLists.txt +++ b/base/base/CMakeLists.txt @@ -24,8 +24,6 @@ set (SRCS if (ENABLE_REPLXX) list (APPEND SRCS ReplxxLineReader.cpp) -elseif (ENABLE_READLINE) - list (APPEND SRCS ReadlineLineReader.cpp) endif () if (USE_DEBUG_HELPERS) @@ -52,28 +50,6 @@ if (OS_DARWIN AND NOT MAKE_STATIC_LIBRARIES) target_link_libraries(common PUBLIC -Wl,-U,_inside_main) endif() -# Allow explicit fallback to readline -if (NOT ENABLE_REPLXX AND ENABLE_READLINE) - message (STATUS "Attempt to fallback to readline explicitly") - set (READLINE_PATHS "/usr/local/opt/readline/lib") - # First try find custom lib for macos users (default lib without history support) - find_library (READLINE_LIB NAMES readline PATHS ${READLINE_PATHS} NO_DEFAULT_PATH) - if (NOT READLINE_LIB) - find_library (READLINE_LIB NAMES readline PATHS ${READLINE_PATHS}) - endif () - - set(READLINE_INCLUDE_PATHS "/usr/local/opt/readline/include") - find_path (READLINE_INCLUDE_DIR NAMES readline/readline.h PATHS ${READLINE_INCLUDE_PATHS} NO_DEFAULT_PATH) - if (NOT READLINE_INCLUDE_DIR) - find_path (READLINE_INCLUDE_DIR NAMES readline/readline.h PATHS ${READLINE_INCLUDE_PATHS}) - endif () - if (READLINE_INCLUDE_DIR AND READLINE_LIB) - target_link_libraries(common PUBLIC ${READLINE_LIB}) - target_compile_definitions(common PUBLIC USE_READLINE=1) - message (STATUS "Using readline: ${READLINE_INCLUDE_DIR} : ${READLINE_LIB}") - endif () -endif () - target_link_libraries (common PUBLIC ${CITYHASH_LIBRARIES} diff --git a/base/base/ReadlineLineReader.cpp b/base/base/ReadlineLineReader.cpp deleted file mode 100644 index de444a0b1d9..00000000000 --- a/base/base/ReadlineLineReader.cpp +++ /dev/null @@ -1,187 +0,0 @@ -#include -#include -#include - -#include -#include -#include -#include - -#include - -namespace -{ - -/// Trim ending whitespace inplace -void trim(String & s) -{ - s.erase(std::find_if(s.rbegin(), s.rend(), [](int ch) { return !std::isspace(ch); }).base(), s.end()); -} - -} - -static const LineReader::Suggest * suggest; - -/// Points to current word to suggest. -static LineReader::Suggest::Words::const_iterator pos; -/// Points after the last possible match. -static LineReader::Suggest::Words::const_iterator end; - -/// Set iterators to the matched range of words if any. -static void findRange(const char * prefix, size_t prefix_length) -{ - std::string prefix_str(prefix); - if (auto completions = suggest->getCompletions(prefix_str, prefix_length)) - std::tie(pos, end) = *completions; -} - -/// Iterates through matched range. -static char * nextMatch() -{ - if (pos >= end) - return nullptr; - - /// readline will free memory by itself. - char * word = strdup(pos->c_str()); - ++pos; - return word; -} - -static char * generate(const char * text, int state) -{ - if (!suggest->ready) - return nullptr; - if (state == 0) - findRange(text, strlen(text)); - - /// Do not append whitespace after word. For unknown reason, rl_completion_append_character = '\0' does not work. - rl_completion_suppress_append = 1; - - return nextMatch(); -}; - -ReadlineLineReader::ReadlineLineReader( - const Suggest & suggest_, const String & history_file_path_, bool multiline_, Patterns extenders_, Patterns delimiters_) - : LineReader(history_file_path_, multiline_, std::move(extenders_), std::move(delimiters_)) -{ - suggest = &suggest_; - - if (!history_file_path.empty()) - { - int res = read_history(history_file_path.c_str()); - if (res) - std::cerr << "Cannot read history from file " + history_file_path + ": "+ errnoToString(errno) << std::endl; - } - - /// Added '.' to the default list. Because it is used to separate database and table. - rl_basic_word_break_characters = word_break_characters; - - /// Not append whitespace after single suggestion. Because whitespace after function name is meaningless. - rl_completion_append_character = '\0'; - - rl_completion_entry_function = generate; - - /// Install Ctrl+C signal handler that will be used in interactive mode. - - if (rl_initialize()) - throw std::runtime_error("Cannot initialize readline"); - - auto clear_prompt_or_exit = [](int) - { - /// This is signal safe. - ssize_t res = write(STDOUT_FILENO, "\n", 1); - - /// Allow to quit client while query is in progress by pressing Ctrl+C twice. - /// (First press to Ctrl+C will try to cancel query by InterruptListener). - if (res == 1 && rl_line_buffer[0] && !RL_ISSTATE(RL_STATE_DONE)) - { - rl_replace_line("", 0); - if (rl_forced_update_display()) - _exit(0); - } - else - { - /// A little dirty, but we struggle to find better way to correctly - /// force readline to exit after returning from the signal handler. - _exit(0); - } - }; - - if (signal(SIGINT, clear_prompt_or_exit) == SIG_ERR) - throw std::runtime_error(std::string("Cannot set signal handler for readline: ") + errnoToString(errno)); - - rl_variable_bind("completion-ignore-case", "on"); - // TODO: it doesn't work - // history_write_timestamps = 1; -} - -ReadlineLineReader::~ReadlineLineReader() -{ -} - -LineReader::InputStatus ReadlineLineReader::readOneLine(const String & prompt) -{ - input.clear(); - - const char* cinput = readline(prompt.c_str()); - if (cinput == nullptr) - return (errno != EAGAIN) ? ABORT : RESET_LINE; - input = cinput; - - trim(input); - return INPUT_LINE; -} - -void ReadlineLineReader::addToHistory(const String & line) -{ - add_history(line.c_str()); - - // Flush changes to the disk - // NOTE readline builds a buffer of all the lines to write, and write them in one syscall. - // Thus there is no need to lock the history file here. - write_history(history_file_path.c_str()); -} - -#if RL_VERSION_MAJOR >= 7 - -#define BRACK_PASTE_PREF "\033[200~" -#define BRACK_PASTE_SUFF "\033[201~" - -#define BRACK_PASTE_LAST '~' -#define BRACK_PASTE_SLEN 6 - -/// This handler bypasses some unused macro/event checkings and remove trailing newlines before insertion. -static int clickhouse_rl_bracketed_paste_begin(int /* count */, int /* key */) -{ - std::string buf; - buf.reserve(128); - - RL_SETSTATE(RL_STATE_MOREINPUT); - SCOPE_EXIT(RL_UNSETSTATE(RL_STATE_MOREINPUT)); - int c; - while ((c = rl_read_key()) >= 0) - { - if (c == '\r') - c = '\n'; - buf.push_back(c); - if (buf.size() >= BRACK_PASTE_SLEN && c == BRACK_PASTE_LAST && buf.substr(buf.size() - BRACK_PASTE_SLEN) == BRACK_PASTE_SUFF) - { - buf.resize(buf.size() - BRACK_PASTE_SLEN); - break; - } - } - trim(buf); - return static_cast(rl_insert_text(buf.c_str())) == buf.size() ? 0 : 1; -} - -#endif - -void ReadlineLineReader::enableBracketedPaste() -{ -#if RL_VERSION_MAJOR >= 7 - rl_variable_bind("enable-bracketed-paste", "on"); - - /// Use our bracketed paste handler to get better user experience. See comments above. - rl_bind_keyseq(BRACK_PASTE_PREF, clickhouse_rl_bracketed_paste_begin); -#endif -}; diff --git a/base/base/ReadlineLineReader.h b/base/base/ReadlineLineReader.h deleted file mode 100644 index 95bd23b4634..00000000000 --- a/base/base/ReadlineLineReader.h +++ /dev/null @@ -1,19 +0,0 @@ -#pragma once - -#include "LineReader.h" - -#include -#include - -class ReadlineLineReader : public LineReader -{ -public: - ReadlineLineReader(const Suggest & suggest, const String & history_file_path, bool multiline, Patterns extenders_, Patterns delimiters_); - ~ReadlineLineReader() override; - - void enableBracketedPaste() override; - -private: - InputStatus readOneLine(const String & prompt) override; - void addToHistory(const String & line) override; -}; diff --git a/contrib/replxx-cmake/CMakeLists.txt b/contrib/replxx-cmake/CMakeLists.txt index 07f24bae25d..222a38095cb 100644 --- a/contrib/replxx-cmake/CMakeLists.txt +++ b/contrib/replxx-cmake/CMakeLists.txt @@ -8,7 +8,7 @@ if (NOT ENABLE_REPLXX) add_library(replxx INTERFACE) target_compile_definitions(replxx INTERFACE USE_REPLXX=0) - message (STATUS "Not using replxx (Beware! Runtime fallback to readline is possible!)") + message (STATUS "Not using replxx") return() endif() diff --git a/contrib/unixodbc-cmake/linux_x86_64/private/config.h b/contrib/unixodbc-cmake/linux_x86_64/private/config.h index d80a4da4665..59cee9e8565 100644 --- a/contrib/unixodbc-cmake/linux_x86_64/private/config.h +++ b/contrib/unixodbc-cmake/linux_x86_64/private/config.h @@ -202,10 +202,10 @@ #define HAVE_READDIR 1 /* Add readline support */ -#define HAVE_READLINE 1 +/* #undef HAVE_READLINE */ /* Define to 1 if you have the header file. */ -#define HAVE_READLINE_HISTORY_H 1 +/* #undef HAVE_READLINE_HISTORY_H */ /* Use the scandir lib */ /* #undef HAVE_SCANDIR */ diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index 6a6d0e7212c..28e84d359b3 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -52,7 +52,6 @@ RUN apt-get update \ llvm-${LLVM_VERSION} \ llvm-${LLVM_VERSION}-dev \ libicu-dev \ - libreadline-dev \ moreutils \ ninja-build \ pigz \ diff --git a/docker/test/integration/base/Dockerfile b/docker/test/integration/base/Dockerfile index add4dad0132..89c2b19236e 100644 --- a/docker/test/integration/base/Dockerfile +++ b/docker/test/integration/base/Dockerfile @@ -7,7 +7,6 @@ RUN apt-get update \ && env DEBIAN_FRONTEND=noninteractive apt-get -y install \ tzdata \ python3 \ - libreadline-dev \ libicu-dev \ bsdutils \ gdb \ diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile index 55c0b53a3a2..e86f17dae70 100644 --- a/docker/test/integration/runner/Dockerfile +++ b/docker/test/integration/runner/Dockerfile @@ -21,7 +21,6 @@ RUN apt-get update \ cgroupfs-mount \ python3-pip \ tzdata \ - libreadline-dev \ libicu-dev \ bsdutils \ curl \ diff --git a/docker/test/testflows/runner/Dockerfile b/docker/test/testflows/runner/Dockerfile index 8ea3cd46973..d15f237587b 100644 --- a/docker/test/testflows/runner/Dockerfile +++ b/docker/test/testflows/runner/Dockerfile @@ -21,7 +21,6 @@ RUN apt-get update \ cgroupfs-mount \ python3-pip \ tzdata \ - libreadline-dev \ libicu-dev \ bsdutils \ curl \ diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 4052531c493..58bc239f003 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -1414,9 +1414,6 @@ void ClientBase::runInteractive() highlight_callback = highlight; ReplxxLineReader lr(*suggest, history_file, config().has("multiline"), query_extenders, query_delimiters, highlight_callback); - -#elif defined(USE_READLINE) && USE_READLINE - ReadlineLineReader lr(*suggest, history_file, config().has("multiline"), query_extenders, query_delimiters); #else LineReader lr(history_file, config().has("multiline"), query_extenders, query_delimiters); #endif diff --git a/utils/ci/install-libraries.sh b/utils/ci/install-libraries.sh index 7615375fbc1..3c26e3b09b1 100755 --- a/utils/ci/install-libraries.sh +++ b/utils/ci/install-libraries.sh @@ -4,4 +4,3 @@ set -e -x source default-config ./install-os-packages.sh libicu-dev -./install-os-packages.sh libreadline-dev diff --git a/utils/ci/install-os-packages.sh b/utils/ci/install-os-packages.sh index 38fa6dbba15..b4b0c74f30c 100755 --- a/utils/ci/install-os-packages.sh +++ b/utils/ci/install-os-packages.sh @@ -46,9 +46,6 @@ case $PACKAGE_MANAGER in libicu-dev) $SUDO apt-get install -y libicu-dev ;; - libreadline-dev) - $SUDO apt-get install -y libreadline-dev - ;; llvm-libs*) $SUDO apt-get install -y ${WHAT/llvm-libs/liblld}-dev ${WHAT/llvm-libs/libclang}-dev ;; @@ -91,9 +88,6 @@ case $PACKAGE_MANAGER in libicu-dev) $SUDO yum install -y libicu-devel ;; - libreadline-dev) - $SUDO yum install -y readline-devel - ;; *) echo "Unknown package"; exit 1; ;; @@ -130,9 +124,6 @@ case $PACKAGE_MANAGER in libicu-dev) $SUDO pkg install -y icu ;; - libreadline-dev) - $SUDO pkg install -y readline - ;; *) echo "Unknown package"; exit 1; ;; From d8bf26f705e4f11c0fafb05f92fd6433ccb2e38f Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 11 Dec 2021 11:33:57 +0300 Subject: [PATCH 245/262] Remove even minimal support for readline --- base/base/LineReader.cpp | 37 ------------------------------------- 1 file changed, 37 deletions(-) diff --git a/base/base/LineReader.cpp b/base/base/LineReader.cpp index 5beebb58b3b..9491f957762 100644 --- a/base/base/LineReader.cpp +++ b/base/base/LineReader.cpp @@ -10,16 +10,6 @@ #include -#ifdef OS_LINUX -/// We can detect if code is linked with one or another readline variants or open the library dynamically. -# include -extern "C" -{ - char * readline(const char *) __attribute__((__weak__)); - char * (*readline_ptr)(const char *) = readline; -} -#endif - #ifdef HAS_RESERVED_IDENTIFIER #pragma clang diagnostic ignored "-Wreserved-identifier" #endif @@ -152,33 +142,6 @@ LineReader::InputStatus LineReader::readOneLine(const String & prompt) { input.clear(); -#ifdef OS_LINUX - if (!readline_ptr) - { - for (const auto * name : {"libreadline.so", "libreadline.so.0", "libeditline.so", "libeditline.so.0"}) - { - void * dl_handle = dlopen(name, RTLD_LAZY); - if (dl_handle) - { - readline_ptr = reinterpret_cast(dlsym(dl_handle, "readline")); - if (readline_ptr) - { - break; - } - } - } - } - - /// Minimal support for readline - if (readline_ptr) - { - char * line_read = (*readline_ptr)(prompt.c_str()); - if (!line_read) - return ABORT; - input = line_read; - } - else -#endif { std::cout << prompt; std::getline(std::cin, input); From eea269a8299639118155641d19e6ed2ba9a87dd3 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Sat, 11 Dec 2021 13:19:14 +0300 Subject: [PATCH 246/262] Improve quota's end-of-interval calculations. --- src/Access/EnabledQuota.cpp | 146 ++++++++++++++++++------------------ src/Access/EnabledQuota.h | 6 +- src/Access/QuotaCache.cpp | 29 ++----- src/Access/QuotaCache.h | 2 +- 4 files changed, 86 insertions(+), 97 deletions(-) diff --git a/src/Access/EnabledQuota.cpp b/src/Access/EnabledQuota.cpp index 5551b6dca40..78dd3c7022a 100644 --- a/src/Access/EnabledQuota.cpp +++ b/src/Access/EnabledQuota.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -15,6 +16,7 @@ namespace ErrorCodes extern const int QUOTA_EXPIRED; } + struct EnabledQuota::Impl { [[noreturn]] static void throwQuotaExceed( @@ -35,52 +37,6 @@ struct EnabledQuota::Impl } - /// Returns the end of the current interval. If the passed `current_time` is greater than that end, - /// the function automatically recalculates the interval's end by adding the interval's duration - /// one or more times until the interval's end is greater than `current_time`. - /// If that recalculation occurs the function also resets amounts of resources used and sets the variable - /// `counters_were_reset`. - static std::chrono::system_clock::time_point getEndOfInterval( - const Interval & interval, std::chrono::system_clock::time_point current_time, bool & counters_were_reset) - { - auto & end_of_interval = interval.end_of_interval; - auto end_loaded = end_of_interval.load(); - auto end = std::chrono::system_clock::time_point{end_loaded}; - if (current_time < end) - { - counters_were_reset = false; - return end; - } - - bool need_reset_counters = false; - - do - { - /// Calculate the end of the next interval: - /// | X | - /// end current_time next_end = end + duration * n - /// where n is an integer number, n >= 1. - const auto duration = interval.duration; - UInt64 n = static_cast((current_time - end + duration) / duration); - end = end + duration * n; - if (end_of_interval.compare_exchange_strong(end_loaded, end.time_since_epoch())) - { - need_reset_counters = true; - break; - } - end = std::chrono::system_clock::time_point{end_loaded}; - } - while (current_time >= end); - - if (need_reset_counters) - { - boost::range::fill(interval.used, 0); - counters_were_reset = true; - } - return end; - } - - static void used( const String & user_name, const Intervals & intervals, @@ -89,33 +45,22 @@ struct EnabledQuota::Impl std::chrono::system_clock::time_point current_time, bool check_exceeded) { + auto quota_type_i = static_cast(quota_type); for (const auto & interval : intervals.intervals) { - if (!interval.end_of_interval.load().count()) - { - /// We need to calculate end of the interval if it hasn't been calculated before. - bool dummy; - getEndOfInterval(interval, current_time, dummy); - } - - auto quota_type_i = static_cast(quota_type); QuotaValue used = (interval.used[quota_type_i] += value); QuotaValue max = interval.max[quota_type_i]; - if (!max) continue; if (used > max) { bool counters_were_reset = false; - auto end_of_interval = getEndOfInterval(interval, current_time, counters_were_reset); + auto end_of_interval = interval.getEndOfInterval(current_time, counters_were_reset); if (counters_were_reset) - { used = (interval.used[quota_type_i] += value); - if ((used > max) && check_exceeded) - throwQuotaExceed(user_name, intervals.quota_name, quota_type, used, max, interval.duration, end_of_interval); - } - else if (check_exceeded) + + if (check_exceeded && (used > max)) throwQuotaExceed(user_name, intervals.quota_name, quota_type, used, max, interval.duration, end_of_interval); } } @@ -130,23 +75,15 @@ struct EnabledQuota::Impl auto quota_type_i = static_cast(quota_type); for (const auto & interval : intervals.intervals) { - if (!interval.end_of_interval.load().count()) - { - /// We need to calculate end of the interval if it hasn't been calculated before. - bool dummy; - getEndOfInterval(interval, current_time, dummy); - } - QuotaValue used = interval.used[quota_type_i]; QuotaValue max = interval.max[quota_type_i]; - if (!max) continue; if (used > max) { bool counters_were_reset = false; - std::chrono::system_clock::time_point end_of_interval = getEndOfInterval(interval, current_time, counters_were_reset); + auto end_of_interval = interval.getEndOfInterval(current_time, counters_were_reset); if (!counters_were_reset) throwQuotaExceed(user_name, intervals.quota_name, quota_type, used, max, interval.duration, end_of_interval); } @@ -161,17 +98,32 @@ struct EnabledQuota::Impl for (auto quota_type : collections::range(QuotaType::MAX)) checkExceeded(user_name, intervals, quota_type, current_time); } + + static std::chrono::system_clock::duration randomDuration(std::chrono::seconds max) + { + auto count = std::chrono::duration_cast(max).count(); + std::uniform_int_distribution distribution{0, count - 1}; + return std::chrono::system_clock::duration(distribution(thread_local_rng)); + } }; -EnabledQuota::Interval::Interval() +EnabledQuota::Interval::Interval(std::chrono::seconds duration_, bool randomize_interval_, std::chrono::system_clock::time_point current_time_) + : duration(duration_) , randomize_interval(randomize_interval_) { + std::chrono::system_clock::time_point initial_end{}; + if (randomize_interval_) + initial_end += Impl::randomDuration(duration_); + end_of_interval = initial_end.time_since_epoch(); + for (auto quota_type : collections::range(QuotaType::MAX)) { auto quota_type_i = static_cast(quota_type); used[quota_type_i].store(0); max[quota_type_i] = 0; } + + getEndOfInterval(current_time_); /// Force updating the end of the interval for the first time. } @@ -193,6 +145,55 @@ EnabledQuota::Interval & EnabledQuota::Interval::operator =(const Interval & src } +/// Returns the end of the current interval. If the passed `current_time` is greater than that end, +/// the function automatically recalculates the interval's end by adding the interval's duration +/// one or more times until the interval's end is greater than `current_time`. +/// If that recalculation occurs the function also resets amounts of resources used and sets the variable +/// `counters_were_reset`. +std::chrono::system_clock::time_point EnabledQuota::Interval::getEndOfInterval(std::chrono::system_clock::time_point current_time) const +{ + bool counters_were_reset; + return getEndOfInterval(current_time, counters_were_reset); +} + +std::chrono::system_clock::time_point EnabledQuota::Interval::getEndOfInterval(std::chrono::system_clock::time_point current_time, bool & counters_were_reset) const +{ + auto end_loaded = end_of_interval.load(); + auto end = std::chrono::system_clock::time_point{end_loaded}; + if (current_time < end) + { + counters_were_reset = false; + return end; + } + + bool need_reset_counters = false; + + do + { + /// Calculate the end of the next interval: + /// | X | + /// end current_time next_end = end + duration * n + /// where n is an integer number, n >= 1. + UInt64 n = static_cast((current_time - end + duration) / duration); + end = end + duration * n; + if (end_of_interval.compare_exchange_strong(end_loaded, end.time_since_epoch())) + { + need_reset_counters = true; + break; + } + end = std::chrono::system_clock::time_point{end_loaded}; + } + while (current_time >= end); + + if (need_reset_counters) + { + boost::range::fill(used, 0); + counters_were_reset = true; + } + return end; +} + + std::optional EnabledQuota::Intervals::getUsage(std::chrono::system_clock::time_point current_time) const { if (!quota_id) @@ -208,8 +209,7 @@ std::optional EnabledQuota::Intervals::getUsage(std::chrono::system_ auto & out = usage.intervals.back(); out.duration = in.duration; out.randomize_interval = in.randomize_interval; - bool counters_were_reset = false; - out.end_of_interval = Impl::getEndOfInterval(in, current_time, counters_were_reset); + out.end_of_interval = in.getEndOfInterval(current_time); for (auto quota_type : collections::range(QuotaType::MAX)) { auto quota_type_i = static_cast(quota_type); diff --git a/src/Access/EnabledQuota.h b/src/Access/EnabledQuota.h index 097afe861d2..88362c9193f 100644 --- a/src/Access/EnabledQuota.h +++ b/src/Access/EnabledQuota.h @@ -73,9 +73,13 @@ private: bool randomize_interval = false; mutable std::atomic end_of_interval; - Interval(); + Interval(std::chrono::seconds duration_, bool randomize_interval_, std::chrono::system_clock::time_point current_time_); + Interval(const Interval & src) { *this = src; } Interval & operator =(const Interval & src); + + std::chrono::system_clock::time_point getEndOfInterval(std::chrono::system_clock::time_point current_time) const; + std::chrono::system_clock::time_point getEndOfInterval(std::chrono::system_clock::time_point current_time, bool & counters_were_reset) const; }; struct Intervals diff --git a/src/Access/QuotaCache.cpp b/src/Access/QuotaCache.cpp index 566c2409205..43ab4268b0c 100644 --- a/src/Access/QuotaCache.cpp +++ b/src/Access/QuotaCache.cpp @@ -4,7 +4,6 @@ #include #include #include -#include #include #include #include @@ -22,17 +21,6 @@ namespace ErrorCodes } -namespace -{ - std::chrono::system_clock::duration randomDuration(std::chrono::seconds max) - { - auto count = std::chrono::duration_cast(max).count(); - std::uniform_int_distribution distribution{0, count - 1}; - return std::chrono::system_clock::duration(distribution(thread_local_rng)); - } -} - - void QuotaCache::QuotaInfo::setQuota(const QuotaPtr & quota_, const UUID & quota_id_) { quota = quota_; @@ -94,18 +82,21 @@ boost::shared_ptr QuotaCache::QuotaInfo::getOrBui auto it = key_to_intervals.find(key); if (it != key_to_intervals.end()) return it->second; - return rebuildIntervals(key); + return rebuildIntervals(key, std::chrono::system_clock::now()); } void QuotaCache::QuotaInfo::rebuildAllIntervals() { + if (key_to_intervals.empty()) + return; + auto current_time = std::chrono::system_clock::now(); for (const String & key : key_to_intervals | boost::adaptors::map_keys) - rebuildIntervals(key); + rebuildIntervals(key, current_time); } -boost::shared_ptr QuotaCache::QuotaInfo::rebuildIntervals(const String & key) +boost::shared_ptr QuotaCache::QuotaInfo::rebuildIntervals(const String & key, std::chrono::system_clock::time_point current_time) { auto new_intervals = boost::make_shared(); new_intervals->quota_name = quota->getName(); @@ -115,14 +106,8 @@ boost::shared_ptr QuotaCache::QuotaInfo::rebuildI intervals.reserve(quota->all_limits.size()); for (const auto & limits : quota->all_limits) { - intervals.emplace_back(); + intervals.emplace_back(limits.duration, limits.randomize_interval, current_time); auto & interval = intervals.back(); - interval.duration = limits.duration; - std::chrono::system_clock::time_point end_of_interval{}; - interval.randomize_interval = limits.randomize_interval; - if (limits.randomize_interval) - end_of_interval += randomDuration(limits.duration); - interval.end_of_interval = end_of_interval.time_since_epoch(); for (auto quota_type : collections::range(QuotaType::MAX)) { auto quota_type_i = static_cast(quota_type); diff --git a/src/Access/QuotaCache.h b/src/Access/QuotaCache.h index 77682230370..7298acad415 100644 --- a/src/Access/QuotaCache.h +++ b/src/Access/QuotaCache.h @@ -43,7 +43,7 @@ private: String calculateKey(const EnabledQuota & enabled_quota) const; boost::shared_ptr getOrBuildIntervals(const String & key); - boost::shared_ptr rebuildIntervals(const String & key); + boost::shared_ptr rebuildIntervals(const String & key, std::chrono::system_clock::time_point current_time); void rebuildAllIntervals(); QuotaPtr quota; From a3dfb899275be7ea9bc684d9b44d010bec439907 Mon Sep 17 00:00:00 2001 From: bharatnc Date: Sat, 11 Dec 2021 08:12:44 -0800 Subject: [PATCH 247/262] remove unused headers in test --- src/Coordination/tests/gtest_coordination.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Coordination/tests/gtest_coordination.cpp b/src/Coordination/tests/gtest_coordination.cpp index d5498a1bc13..b324ba119fa 100644 --- a/src/Coordination/tests/gtest_coordination.cpp +++ b/src/Coordination/tests/gtest_coordination.cpp @@ -1,6 +1,5 @@ #include -#include #include "config_core.h" #if USE_NURAFT @@ -15,7 +14,6 @@ #include #include #include -#include #include #include #include From 7ed4c75e637c6db27d803fc9822da21795f8d14d Mon Sep 17 00:00:00 2001 From: Denny Crane Date: Sat, 11 Dec 2021 13:14:41 -0400 Subject: [PATCH 248/262] Update tips.md fixed grammar --- docs/en/operations/tips.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/tips.md b/docs/en/operations/tips.md index 6866c4db491..477d3b52965 100644 --- a/docs/en/operations/tips.md +++ b/docs/en/operations/tips.md @@ -34,7 +34,7 @@ Use `perf top` to watch the time spent in the kernel for memory management. Permanent huge pages also do not need to be allocated. !!! warning "Attention" - If your system has less than 16 GB of RAM you may experience various memory exceptions because default settings does not match this amount of RAM. Recommended amount of RAM is 32 GB or more. You can use ClickHouse in system with small amount of RAM, even with 2 GB of RAM, but it requires an additional tuning and able to process small ingestion rate. + If your system has less than 16 GB of RAM, you may experience various memory exceptions because default settings do not match this amount of memory. The recommended amount of RAM is 32 GB or more. You can use ClickHouse in a system with a small amount of RAM, even with 2 GB of RAM, but it requires additional tuning and can ingest at a low rate. ## Storage Subsystem {#storage-subsystem} From 57c027be0eff6c7c58788c4f442afd1cddb95264 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 11 Dec 2021 21:25:23 +0300 Subject: [PATCH 249/262] Remove arcadia build support --- programs/client/Client.cpp | 4 +--- programs/keeper/Keeper.cpp | 6 ++---- programs/main.cpp | 4 +--- programs/server/Server.cpp | 8 +++----- src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.h | 3 --- src/Disks/IO/ReadBufferFromRemoteFSGather.h | 3 --- src/Server/PostgreSQLHandler.h | 5 +---- src/Server/PostgreSQLHandlerFactory.h | 5 +---- 8 files changed, 9 insertions(+), 29 deletions(-) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index b6214d66628..e01677aaac6 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -20,9 +20,7 @@ #include #include -#if !defined(ARCADIA_BUILD) -# include -#endif +#include #include #include #include diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index 4dadef911d7..afd6a36ea15 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -22,10 +22,8 @@ #include #include -#if !defined(ARCADIA_BUILD) -# include "config_core.h" -# include "Common/config_version.h" -#endif +#include "config_core.h" +#include "Common/config_version.h" #if USE_SSL # include diff --git a/programs/main.cpp b/programs/main.cpp index cd416f57982..2cdda075ca7 100644 --- a/programs/main.cpp +++ b/programs/main.cpp @@ -13,9 +13,7 @@ #include #include /// pair -#if !defined(ARCADIA_BUILD) -# include "config_tools.h" -#endif +#include "config_tools.h" #include #include diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index bd99b047e6b..14075f9fbf2 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -82,10 +82,8 @@ #include #include -#if !defined(ARCADIA_BUILD) -# include "config_core.h" -# include "Common/config_version.h" -#endif +#include "config_core.h" +#include "Common/config_version.h" #if defined(OS_LINUX) # include @@ -96,7 +94,7 @@ #endif #if USE_SSL -# if USE_INTERNAL_SSL_LIBRARY && !defined(ARCADIA_BUILD) +# if USE_INTERNAL_SSL_LIBRARY # include # endif # include diff --git a/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.h b/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.h index c9b6532e76c..1b0cc17cb41 100644 --- a/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.h +++ b/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.h @@ -1,9 +1,6 @@ #pragma once -#if !defined(ARCADIA_BUILD) #include -#endif - #include #include #include diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.h b/src/Disks/IO/ReadBufferFromRemoteFSGather.h index 5bc7d4e4819..f15de4a2d7f 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.h +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.h @@ -1,9 +1,6 @@ #pragma once -#if !defined(ARCADIA_BUILD) #include -#endif - #include #include #include diff --git a/src/Server/PostgreSQLHandler.h b/src/Server/PostgreSQLHandler.h index ded9616296a..1d33f41f255 100644 --- a/src/Server/PostgreSQLHandler.h +++ b/src/Server/PostgreSQLHandler.h @@ -1,15 +1,12 @@ #pragma once #include +#include #include #include #include #include "IServer.h" -#if !defined(ARCADIA_BUILD) -# include -#endif - #if USE_SSL # include #endif diff --git a/src/Server/PostgreSQLHandlerFactory.h b/src/Server/PostgreSQLHandlerFactory.h index 9103cbaad90..dc3d4047d2a 100644 --- a/src/Server/PostgreSQLHandlerFactory.h +++ b/src/Server/PostgreSQLHandlerFactory.h @@ -5,10 +5,7 @@ #include #include #include - -#if !defined(ARCADIA_BUILD) -# include -#endif +#include namespace DB { From 8268d8a8c3677c4872ab41b55d91584cd6e68f3f Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 11 Dec 2021 21:27:36 +0300 Subject: [PATCH 250/262] Cleanup .gitattributes --- .gitattributes | 2 -- 1 file changed, 2 deletions(-) diff --git a/.gitattributes b/.gitattributes index efb059f169a..bcc7d57b904 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,4 +1,2 @@ contrib/* linguist-vendored *.h linguist-language=C++ -# to avoid frequent conflicts -tests/queries/0_stateless/arcadia_skip_list.txt text merge=union From 8a89b8e39599d2045009095c8343b4e2a782e004 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 12 Dec 2021 00:13:47 +0300 Subject: [PATCH 251/262] perf: do not fail in case of slow queries (to avoid hiding possible issues) Do not stop processing pathologically slow queries, since this may hide errors in other queries, i.e. when the test failed on one of servers (upstream or from PR) and someone interpret this is as OK with the following comment: "the failure was on the upstream server it is OK, PR should fix it" Anyway there is almost zero such queries right now, and before merging something this should be reviewed. --- docker/test/performance-comparison/perf.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/docker/test/performance-comparison/perf.py b/docker/test/performance-comparison/perf.py index 301c5cc7d73..e4366852232 100755 --- a/docker/test/performance-comparison/perf.py +++ b/docker/test/performance-comparison/perf.py @@ -354,11 +354,9 @@ for query_index in queries_to_run: print(f'query\t{query_index}\t{run_id}\t{conn_index}\t{elapsed}') if elapsed > args.max_query_seconds: - # Stop processing pathologically slow queries, to avoid timing out - # the entire test task. This shouldn't really happen, so we don't - # need much handling for this case and can just exit. + # Do not stop processing pathologically slow queries, + # since this may hide errors in other queries. print(f'The query no. {query_index} is taking too long to run ({elapsed} s)', file=sys.stderr) - exit(2) # Be careful with the counter, after this line it's the next iteration # already. From 89db2c57f46f0984e0d4849c2380ae3ea627f1c3 Mon Sep 17 00:00:00 2001 From: kssenii Date: Sun, 12 Dec 2021 00:19:06 +0300 Subject: [PATCH 252/262] Fix --- .../ReadBufferFromRabbitMQConsumer.cpp | 5 -- .../RabbitMQ/ReadBufferFromRabbitMQConsumer.h | 12 +---- src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 47 ++++++++++--------- src/Storages/RabbitMQ/StorageRabbitMQ.h | 1 + 4 files changed, 26 insertions(+), 39 deletions(-) diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp index ac60d748e36..c8f199d098e 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.cpp @@ -20,7 +20,6 @@ namespace ErrorCodes } ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( - ChannelPtr consumer_channel_, RabbitMQHandler & event_handler_, std::vector & queues_, size_t channel_id_base_, @@ -30,7 +29,6 @@ ReadBufferFromRabbitMQConsumer::ReadBufferFromRabbitMQConsumer( uint32_t queue_size_, const std::atomic & stopped_) : ReadBuffer(nullptr, 0) - , consumer_channel(std::move(consumer_channel_)) , event_handler(event_handler_) , queues(queues_) , channel_base(channel_base_) @@ -129,9 +127,6 @@ void ReadBufferFromRabbitMQConsumer::setupChannel() if (!consumer_channel) return; - /// We mark initialized only once. - initialized = true; - wait_subscription.store(true); consumer_channel->onReady([&]() diff --git a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h index 55d129856b8..8a527011a3c 100644 --- a/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h +++ b/src/Storages/RabbitMQ/ReadBufferFromRabbitMQConsumer.h @@ -20,7 +20,6 @@ class ReadBufferFromRabbitMQConsumer : public ReadBuffer public: ReadBufferFromRabbitMQConsumer( - ChannelPtr consumer_channel_, RabbitMQHandler & event_handler_, std::vector & queues_, size_t channel_id_base_, @@ -37,7 +36,7 @@ public: UInt64 delivery_tag; String channel_id; - AckTracker() : delivery_tag(0), channel_id("") {} + AckTracker() = default; AckTracker(UInt64 tag, String id) : delivery_tag(tag), channel_id(id) {} }; @@ -75,12 +74,6 @@ public: auto getMessageID() const { return current.message_id; } auto getTimestamp() const { return current.timestamp; } - void initialize() - { - if (!initialized) - setupChannel(); - } - private: bool nextImpl() override; @@ -105,9 +98,6 @@ private: AckTracker last_inserted_record_info; UInt64 prev_tag = 0, channel_id_counter = 0; - - /// Has initial setup after constructor been made? - bool initialized = false; }; } diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index 66772e7015b..ac299657ae6 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -577,7 +577,7 @@ bool StorageRabbitMQ::updateChannel(ChannelPtr & channel) try { channel = connection->createChannel(); - return channel->usable(); + return true; } catch (...) { @@ -587,6 +587,21 @@ bool StorageRabbitMQ::updateChannel(ChannelPtr & channel) } +void StorageRabbitMQ::prepareChannelForBuffer(ConsumerBufferPtr buffer) +{ + if (!buffer) + return; + + if (buffer->queuesCount() != queues.size()) + buffer->updateQueues(queues); + + buffer->updateAckTracker(); + + if (updateChannel(buffer->getChannel())) + buffer->setupChannel(); +} + + void StorageRabbitMQ::unbindExchange() { /* This is needed because with RabbitMQ (without special adjustments) can't, for example, properly make mv if there was insert query @@ -715,9 +730,9 @@ void StorageRabbitMQ::startup() } catch (...) { - tryLogCurrentException(log); if (!is_attach) throw; + tryLogCurrentException(log); } } else @@ -731,15 +746,14 @@ void StorageRabbitMQ::startup() try { auto buffer = createReadBuffer(); - if (rabbit_is_ready) - buffer->initialize(); pushReadBuffer(std::move(buffer)); ++num_created_consumers; } - catch (const AMQP::Exception & e) + catch (...) { - LOG_ERROR(log, "Got AMQ exception {}", e.what()); - throw; + if (!is_attach) + throw; + tryLogCurrentException(log); } } @@ -871,9 +885,8 @@ ConsumerBufferPtr StorageRabbitMQ::popReadBuffer(std::chrono::milliseconds timeo ConsumerBufferPtr StorageRabbitMQ::createReadBuffer() { - ChannelPtr consumer_channel = connection->createChannel(); return std::make_shared( - std::move(consumer_channel), connection->getHandler(), queues, ++consumer_id, + connection->getHandler(), queues, ++consumer_id, unique_strbase, log, row_delimiter, queue_size, shutdown_called); } @@ -921,7 +934,7 @@ void StorageRabbitMQ::initializeBuffers() if (!initialized) { for (const auto & buffer : buffers) - buffer->initialize(); + prepareChannelForBuffer(buffer); initialized = true; } } @@ -1086,19 +1099,7 @@ bool StorageRabbitMQ::streamToViews() if (source->needChannelUpdate()) { auto buffer = source->getBuffer(); - if (buffer) - { - if (buffer->queuesCount() != queues.size()) - buffer->updateQueues(queues); - - buffer->updateAckTracker(); - - if (updateChannel(buffer->getChannel())) - { - LOG_TRACE(log, "Connection is active, but channel update is needed"); - buffer->setupChannel(); - } - } + prepareChannelForBuffer(buffer); } /* false is returned by the sendAck function in only two cases: diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h index a27a5bd59f1..9633326366d 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.h +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h @@ -66,6 +66,7 @@ public: bool updateChannel(ChannelPtr & channel); void updateQueues(std::vector & queues_) { queues_ = queues; } + void prepareChannelForBuffer(ConsumerBufferPtr buffer); void incrementReader(); void decrementReader(); From a8484ec06d7ed1ff52b9eb63cfd970f8dc238eaf Mon Sep 17 00:00:00 2001 From: kssenii Date: Sun, 12 Dec 2021 00:47:21 +0300 Subject: [PATCH 253/262] Add test --- .../test.py | 25 +++++++++++++------ 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/tests/integration/test_postgresql_replica_database_engine_1/test.py b/tests/integration/test_postgresql_replica_database_engine_1/test.py index 0dd36d64516..cba9e93c056 100644 --- a/tests/integration/test_postgresql_replica_database_engine_1/test.py +++ b/tests/integration/test_postgresql_replica_database_engine_1/test.py @@ -985,18 +985,29 @@ def test_abrupt_server_restart_while_heavy_replication(started_cluster): cursor.execute('drop table if exists postgresql_replica_{};'.format(i)) -def test_quoting(started_cluster): - table_name = 'user' - conn = get_postgres_conn(ip=started_cluster.postgres_ip, - port=started_cluster.postgres_port, - database=True) +def test_quoting_1(started_cluster): + conn = get_postgres_conn(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, database=True) cursor = conn.cursor() + table_name = 'user' create_postgres_table(cursor, table_name); - instance.query("INSERT INTO postgres_database.{} SELECT number, number from numbers(50)".format(table_name)) + instance.query(f"INSERT INTO postgres_database.{table_name} SELECT number, number from numbers(50)") create_materialized_db(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port) check_tables_are_synchronized(table_name); - drop_postgres_table(cursor, table_name) drop_materialized_db() + drop_postgres_table(cursor, table_name) + + +def test_quoting_2(started_cluster): + conn = get_postgres_conn(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, database=True) + cursor = conn.cursor() + table_name = 'user' + create_postgres_table(cursor, table_name); + instance.query(f"INSERT INTO postgres_database.{table_name} SELECT number, number from numbers(50)") + create_materialized_db(ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, + settings=[f"materialized_postgresql_tables_list = '{table_name}'"]) + check_tables_are_synchronized(table_name); + drop_materialized_db() + drop_postgres_table(cursor, table_name) def test_user_managed_slots(started_cluster): From 9179b338fa97450593d7f3ffa990b77a29434591 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sun, 12 Dec 2021 02:51:58 +0300 Subject: [PATCH 254/262] Update CHANGELOG.md --- CHANGELOG.md | 183 ++++++++++++++++++++++++--------------------------- 1 file changed, 86 insertions(+), 97 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2788bee40b2..12f74c71fde 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,63 +2,79 @@ #### Backward Incompatible Change -* A "leader election" mechanism is removed from `ReplicatedMergeTree`, because multiple leaders are supported since 20.6. If you are upgrading from an older version and some replica with an old version is a leader, then server will fail to start after upgrade. Stop replicas with old version to make new version start. After that it will not be possible to downgrade to version older than 20.6. [#32140](https://github.com/ClickHouse/ClickHouse/pull/32140) ([tavplubix](https://github.com/tavplubix)). -* Do not allow direct select for Kafka/RabbitMQ/FileLog. Can be enabled by setting `stream_like_engine_allow_direct_select`. Direct select will be not allowed even if enabled by setting, in case there is an attached materialized view. For Kafka and RabbitMQ direct selectm if allowed, will not commit massages by default. To enable commits with direct select, user must use storage level setting `kafka{rabbitmq}_commit_on_select=1` (default `0`). cc @filimonov. [#31053](https://github.com/ClickHouse/ClickHouse/pull/31053) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Return unquoted string in JSON_VALUE. Closes [#27965](https://github.com/ClickHouse/ClickHouse/issues/27965). [#31008](https://github.com/ClickHouse/ClickHouse/pull/31008) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Add custom null representation support for TSV/CSV input formats. Fix deserialing Nullable(String) in TSV/CSV/JSONCompactStringsEachRow/JSONStringsEachRow input formats. Rename `output_format_csv_null_representation` and `output_format_tsv_null_representation` to `format_csv_null_representation` and `format_tsv_null_representation` accordingly. [#30497](https://github.com/ClickHouse/ClickHouse/pull/30497) ([Kruglov Pavel](https://github.com/Avogar)). +* *A fix for a feature that previously had unwanted behaviour.* Do not allow direct select for Kafka/RabbitMQ/FileLog. Can be enabled by setting `stream_like_engine_allow_direct_select`. Direct select will be not allowed even if enabled by setting, in case there is an attached materialized view. For Kafka and RabbitMQ direct selectm if allowed, will not commit massages by default. To enable commits with direct select, user must use storage level setting `kafka{rabbitmq}_commit_on_select=1` (default `0`). [#31053](https://github.com/ClickHouse/ClickHouse/pull/31053) ([Kseniia Sumarokova](https://github.com/kssenii)). +* *A slight change in behaviour of a new function.* Return unquoted string in JSON_VALUE. Closes [#27965](https://github.com/ClickHouse/ClickHouse/issues/27965). [#31008](https://github.com/ClickHouse/ClickHouse/pull/31008) ([Kseniia Sumarokova](https://github.com/kssenii)). +* *Setting rename.* Add custom null representation support for TSV/CSV input formats. Fix deserialing Nullable(String) in TSV/CSV/JSONCompactStringsEachRow/JSONStringsEachRow input formats. Rename `output_format_csv_null_representation` and `output_format_tsv_null_representation` to `format_csv_null_representation` and `format_tsv_null_representation` accordingly. [#30497](https://github.com/ClickHouse/ClickHouse/pull/30497) ([Kruglov Pavel](https://github.com/Avogar)). +* *Further deprecation of already unused code.* This is relevant only for users of ClickHouse versions 20.6 and older. A "leader election" mechanism is removed from `ReplicatedMergeTree`, because multiple leaders are supported since 20.6. If you are upgrading from an older version and some replica with an old version is a leader, then server will fail to start after upgrade. Stop replicas with old version to make new version start. After that it will not be possible to downgrade to version older than 20.6. [#32140](https://github.com/ClickHouse/ClickHouse/pull/32140) ([tavplubix](https://github.com/tavplubix)). #### New Feature -* Added new SQL elements `WINDOW VIEW` and `WINDOW FUNCTION` to enable stream processing for ClickHouse. [#8331](https://github.com/ClickHouse/ClickHouse/pull/8331) ([vxider](https://github.com/Vxider)). -* Basic access authentication for http/url functions. [#31648](https://github.com/ClickHouse/ClickHouse/pull/31648) ([michael1589](https://github.com/michael1589)). -* Allow to print/parse names and types of colums in `CustomSeparated` input/output format. Add formats `CustomSeparatedWithNames/WithNamesAndTypes` similar to `TSVWithNames/WithNamesAndTypes`. [#31434](https://github.com/ClickHouse/ClickHouse/pull/31434) ([Kruglov Pavel](https://github.com/Avogar)). -* Aliyun OSS Storage support. [#31286](https://github.com/ClickHouse/ClickHouse/pull/31286) ([cfcz48](https://github.com/cfcz48)). -* Exposes all GlobalThreadPool configurations to the configuration files. [#31285](https://github.com/ClickHouse/ClickHouse/pull/31285) ([Tomáš Hromada](https://github.com/gyfis)). -* Support `bool` data type. [#31072](https://github.com/ClickHouse/ClickHouse/pull/31072) ([kevin wan](https://github.com/MaxWk)). +* Implemented more of the ZooKeeper Four Letter Words commands in clickhouse-keeper: https://zookeeper.apache.org/doc/r3.4.8/zookeeperAdmin.html#sc_zkCommands. [#28981](https://github.com/ClickHouse/ClickHouse/pull/28981) ([JackyWoo](https://github.com/JackyWoo)). Now `clickhouse-keeper` is feature complete. +* Support for `Bool` data type. [#31072](https://github.com/ClickHouse/ClickHouse/pull/31072) ([kevin wan](https://github.com/MaxWk)). * Support for `PARTITION BY` in File, URL, HDFS storages and with `INSERT INTO` table function. Closes [#30273](https://github.com/ClickHouse/ClickHouse/issues/30273). [#30690](https://github.com/ClickHouse/ClickHouse/pull/30690) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Added `CONSTRAINT ... ASSUME ...` (without checking during `INSERT`). Added query transformation to CNF (https://github.com/ClickHouse/ClickHouse/issues/11749) for more convenient optimization. Added simple query rewriting using constraints (only simple matching now, will be improved to support <,=,>... later). Added ability to replace heavy columns with light columns if it's possible. [#18787](https://github.com/ClickHouse/ClickHouse/pull/18787) ([Nikita Vasilev](https://github.com/nikvas0)). +* Basic access authentication for http/url functions. [#31648](https://github.com/ClickHouse/ClickHouse/pull/31648) ([michael1589](https://github.com/michael1589)). +* Support `INTERVAL` type in `STEP` clause for `WITH FILL` modifier. [#30927](https://github.com/ClickHouse/ClickHouse/pull/30927) ([Anton Popov](https://github.com/CurtizJ)). +* Add support for parallel reading from multiple files and support globs in `FROM INFILE` clause. [#30135](https://github.com/ClickHouse/ClickHouse/pull/30135) ([Filatenkov Artur](https://github.com/FArthur-cmd)). +* Add support for `Identifier` table and database query parameters. Closes [#27226](https://github.com/ClickHouse/ClickHouse/issues/27226). [#28668](https://github.com/ClickHouse/ClickHouse/pull/28668) ([Nikolay Degterinsky](https://github.com/evillique)). +* *TLDR: Major improvements of completeness and consistency of text formats.* Refactor formats `TSV`, `TSVRaw`, `CSV` and `JSONCompactEachRow`, `JSONCompactStringsEachRow`, remove code duplication, add base interface for formats with `-WithNames` and `-WithNamesAndTypes` suffixes. Add formats `CSVWithNamesAndTypes`, `TSVRawWithNames`, `TSVRawWithNamesAndTypes`, `JSONCompactEachRowWIthNames`, `JSONCompactStringsEachRowWIthNames`, `RowBinaryWithNames`. Support parallel parsing for formats `TSVWithNamesAndTypes`, `TSVRaw(WithNames/WIthNamesAndTypes)`, `CSVWithNamesAndTypes`, `JSONCompactEachRow(WithNames/WIthNamesAndTypes)`, `JSONCompactStringsEachRow(WithNames/WIthNamesAndTypes)`. Support columns mapping and types checking for `RowBinaryWithNamesAndTypes` format. Add setting `input_format_with_types_use_header` which specify if we should check that types written in `WIthNamesAndTypes` format matches with table structure. Add setting `input_format_csv_empty_as_default` and use it in CSV format instead of `input_format_defaults_for_omitted_fields` (because this setting should not control `csv_empty_as_default`). Fix usage of setting `input_format_defaults_for_omitted_fields` (it was used only as `csv_empty_as_default`, but it should control calculation of default expressions for omitted fields). Fix Nullable input/output in `TSVRaw` format, make this format fully compatible with inserting into TSV. Fix inserting NULLs in `LowCardinality(Nullable)` when `input_format_null_as_default` is enabled (previously default values was inserted instead of actual NULLs). Fix strings deserialization in `JSONStringsEachRow`/`JSONCompactStringsEachRow` formats (strings were parsed just until first '\n' or '\t'). Add ability to use `Raw` escaping rule in Template input format. Add diagnostic info for JSONCompactEachRow(WithNames/WIthNamesAndTypes) input format. Fix bug with parallel parsing of `-WithNames` formats in case when setting `min_chunk_bytes_for_parallel_parsing` is less than bytes in a single row. [#30178](https://github.com/ClickHouse/ClickHouse/pull/30178) ([Kruglov Pavel](https://github.com/Avogar)). Allow to print/parse names and types of colums in `CustomSeparated` input/output format. Add formats `CustomSeparatedWithNames/WithNamesAndTypes` similar to `TSVWithNames/WithNamesAndTypes`. [#31434](https://github.com/ClickHouse/ClickHouse/pull/31434) ([Kruglov Pavel](https://github.com/Avogar)). +* Aliyun OSS Storage support. [#31286](https://github.com/ClickHouse/ClickHouse/pull/31286) ([cfcz48](https://github.com/cfcz48)). +* Exposes all settings of the global thread pool in the configuration file. [#31285](https://github.com/ClickHouse/ClickHouse/pull/31285) ([Tomáš Hromada](https://github.com/gyfis)). * Introduced window functions `exponentialTimeDecayedSum`, `exponentialTimeDecayedMax`, `exponentialTimeDecayedCount` and `exponentialTimeDecayedAvg` which are more effective than `exponentialMovingAverage` for bigger windows. Also more use-cases were covered. [#29799](https://github.com/ClickHouse/ClickHouse/pull/29799) ([Vladimir Chebotarev](https://github.com/excitoon)). * Add option to compress logs before writing them to a file using LZ4. Closes [#23860](https://github.com/ClickHouse/ClickHouse/issues/23860). [#29219](https://github.com/ClickHouse/ClickHouse/pull/29219) ([Nikolay Degterinsky](https://github.com/evillique)). -* Implemented more of the ZooKeeper Four Letter Words commands in clickhouse-keeper: https://zookeeper.apache.org/doc/r3.4.8/zookeeperAdmin.html#sc_zkCommands. [#28981](https://github.com/ClickHouse/ClickHouse/pull/28981) ([JackyWoo](https://github.com/JackyWoo)). -* The `murmurHash3_128` and `sipHash128` functions now accept an arbitrary number of arguments. This closes [#28774](https://github.com/ClickHouse/ClickHouse/issues/28774). [#28965](https://github.com/ClickHouse/ClickHouse/pull/28965) ([小路](https://github.com/nicelulu)). -* Adding function `getFuzzerData()` to easily fuzz particular functions. This closes [#23227](https://github.com/ClickHouse/ClickHouse/issues/23227). [#27526](https://github.com/ClickHouse/ClickHouse/pull/27526) ([Alexey Boykov](https://github.com/mathalex)). * Support `JOIN ON 1 = 1` that have CROSS JOIN semantic. This closes [#25578](https://github.com/ClickHouse/ClickHouse/issues/25578). [#25894](https://github.com/ClickHouse/ClickHouse/pull/25894) ([Vladimir C](https://github.com/vdimir)). * Add Map combinator for `Map` type. - Rename old `sum-, min-, max- Map` for mapped arrays to `sum-, min-, max- MappedArrays`. [#24539](https://github.com/ClickHouse/ClickHouse/pull/24539) ([Ildus Kurbangaliev](https://github.com/ildus)). -* Added `CONSTRAINT ... ASSUME ...` (without checking during `INSERT`). Added query transformation to CNF (https://github.com/ClickHouse/ClickHouse/issues/11749) for more convenient optimization. Added simple query rewriting using constraints (only simple matching now, will be improved to support <,=,>... later). Added ability to replace heavy columns with light. Added ability to use the index in queries. [#18787](https://github.com/ClickHouse/ClickHouse/pull/18787) ([Nikita Vasilev](https://github.com/nikvas0)). +* Make reading from HTTP retriable. Closes [#29696](https://github.com/ClickHouse/ClickHouse/issues/29696). [#29894](https://github.com/ClickHouse/ClickHouse/pull/29894) ([Kseniia Sumarokova](https://github.com/kssenii)). + +#### Experimental Feature + +* `WINDOW VIEW` to enable stream processing in ClickHouse. [#8331](https://github.com/ClickHouse/ClickHouse/pull/8331) ([vxider](https://github.com/Vxider)). +* Drop support for using Ordinary databases with `MaterializedMySQL`. [#31292](https://github.com/ClickHouse/ClickHouse/pull/31292) ([Stig Bakken](https://github.com/stigsb)). +* Implement the commands BACKUP and RESTORE for the Log family. This feature is under development. [#30688](https://github.com/ClickHouse/ClickHouse/pull/30688) ([Vitaly Baranov](https://github.com/vitlibar)). #### Performance Improvement -* Speed up query parsing. [#31949](https://github.com/ClickHouse/ClickHouse/pull/31949) ([Raúl Marín](https://github.com/Algunenano)). +* Reduce memory usage when reading with `s3` / `url` / `hdfs` formats `Parquet`, `ORC`, `Arrow` (controlled by setting `input_format_allow_seeks`, enabled by default). Also add setting `remote_read_min_bytes_for_seek` to control seeks. Closes [#10461](https://github.com/ClickHouse/ClickHouse/issues/10461). Closes [#16857](https://github.com/ClickHouse/ClickHouse/issues/16857). [#30936](https://github.com/ClickHouse/ClickHouse/pull/30936) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add optimizations for constant conditions in JOIN ON, ref [#26928](https://github.com/ClickHouse/ClickHouse/issues/26928). [#27021](https://github.com/ClickHouse/ClickHouse/pull/27021) ([Vladimir C](https://github.com/vdimir)). +* Support parallel formatting for all text formats, except `JSONEachRowWithProgress` and `PrettyCompactMonoBlock`. [#31489](https://github.com/ClickHouse/ClickHouse/pull/31489) ([Kruglov Pavel](https://github.com/Avogar)). * Speed up count over nullable columns. [#31806](https://github.com/ClickHouse/ClickHouse/pull/31806) ([Raúl Marín](https://github.com/Algunenano)). * Speed up `avg` and `sumCount` aggregate functions. [#31694](https://github.com/ClickHouse/ClickHouse/pull/31694) ([Raúl Marín](https://github.com/Algunenano)). * Improve performance of JSON and XML output formats. [#31673](https://github.com/ClickHouse/ClickHouse/pull/31673) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Support parallel formatting for all text formats, except `JSONEachRowWithProgress` and `PrettyCompactMonoBlock`. [#31489](https://github.com/ClickHouse/ClickHouse/pull/31489) ([Kruglov Pavel](https://github.com/Avogar)). * Improve performance of syncing data to block device. This closes [#31181](https://github.com/ClickHouse/ClickHouse/issues/31181). [#31229](https://github.com/ClickHouse/ClickHouse/pull/31229) ([zhanglistar](https://github.com/zhanglistar)). * Fixing query performance issue in `LiveView` tables. Fixes [#30831](https://github.com/ClickHouse/ClickHouse/issues/30831). [#31006](https://github.com/ClickHouse/ClickHouse/pull/31006) ([vzakaznikov](https://github.com/vzakaznikov)). +* Speed up query parsing. [#31949](https://github.com/ClickHouse/ClickHouse/pull/31949) ([Raúl Marín](https://github.com/Algunenano)). * Allow to split `GraphiteMergeTree` rollup rules for plain/tagged metrics (optional `rule_type` field). [#25122](https://github.com/ClickHouse/ClickHouse/pull/25122) ([Michail Safronov](https://github.com/msaf1980)). +* Remove excessive `DESC TABLE` requests for `remote()` (in case of `remote('127.1', system.one)` (i.e. identifier as the db.table instead of string) there was excessive `DESC TABLE` request). [#32019](https://github.com/ClickHouse/ClickHouse/pull/32019) ([Azat Khuzhin](https://github.com/azat)). +* Optimize function `tupleElement` to reading of subcolumn with enabled setting `optimize_functions_to_subcolumns`. [#31261](https://github.com/ClickHouse/ClickHouse/pull/31261) ([Anton Popov](https://github.com/CurtizJ)). +* Optimize function `mapContains` to reading of subcolumn `key` with enabled settings `optimize_functions_to_subcolumns`. [#31218](https://github.com/ClickHouse/ClickHouse/pull/31218) ([Anton Popov](https://github.com/CurtizJ)). +* Add settings `merge_tree_min_rows_for_concurrent_read_for_remote_filesystem` and `merge_tree_min_bytes_for_concurrent_read_for_remote_filesystem`. [#30970](https://github.com/ClickHouse/ClickHouse/pull/30970) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Skipping mutations of different partitions in `StorageMergeTree`. [#21326](https://github.com/ClickHouse/ClickHouse/pull/21326) ([Vladimir Chebotarev](https://github.com/excitoon)). #### Improvement -* Support default expression for HDFS storage and optimize fetching when source is column oriented. [#32256](https://github.com/ClickHouse/ClickHouse/pull/32256) ([李扬](https://github.com/taiyang-li)). +* Do not allow to drop a table or dictionary if some tables or dictionaries depend on it. [#30977](https://github.com/ClickHouse/ClickHouse/pull/30977) ([tavplubix](https://github.com/tavplubix)). +* Allow versioning of aggregate function states. Now we can introduce backward compatible changes in serialization format of aggregate function states. Closes [#12552](https://github.com/ClickHouse/ClickHouse/issues/12552). [#24820](https://github.com/ClickHouse/ClickHouse/pull/24820) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Support PostgreSQL style `ALTER MODIFY COLUMN` syntax. [#32003](https://github.com/ClickHouse/ClickHouse/pull/32003) ([SuperDJY](https://github.com/cmsxbc)). +* Added `update_field` support for `RangeHashedDictionary`, `ComplexKeyRangeHashedDictionary`. [#32185](https://github.com/ClickHouse/ClickHouse/pull/32185) ([Maksim Kita](https://github.com/kitaisreal)). +* The `murmurHash3_128` and `sipHash128` functions now accept an arbitrary number of arguments. This closes [#28774](https://github.com/ClickHouse/ClickHouse/issues/28774). [#28965](https://github.com/ClickHouse/ClickHouse/pull/28965) ([小路](https://github.com/nicelulu)). +* Support default expression for `HDFS` storage and optimize fetching when source is column oriented. [#32256](https://github.com/ClickHouse/ClickHouse/pull/32256) ([李扬](https://github.com/taiyang-li)). * Improve the operation name of an opentelemetry span. [#32234](https://github.com/ClickHouse/ClickHouse/pull/32234) ([Frank Chen](https://github.com/FrankChen021)). * Use `Content-Type: application/x-ndjson` (http://ndjson.org/) for output format `JSONEachRow`. [#32223](https://github.com/ClickHouse/ClickHouse/pull/32223) ([Dmitriy Dorofeev](https://github.com/deem0n)). * Improve skipping unknown fields with quoted escaping rule in Template/CustomSeparated formats. Previously you could skip only quoted strings, now you can skip values with any type. [#32204](https://github.com/ClickHouse/ClickHouse/pull/32204) ([Kruglov Pavel](https://github.com/Avogar)). -* Added `update_field` support for `RangeHashedDictionary`, `ComplexKeyRangeHashedDictionary`. [#32185](https://github.com/ClickHouse/ClickHouse/pull/32185) ([Maksim Kita](https://github.com/kitaisreal)). * Now `clickhouse-keeper` refuses to start or apply configuration changes when they contain duplicated IDs or endpoints. Fixes [#31339](https://github.com/ClickHouse/ClickHouse/issues/31339). [#32121](https://github.com/ClickHouse/ClickHouse/pull/32121) ([alesapin](https://github.com/alesapin)). * Set Content-Type in HTTP packets issued from URL engine. [#32113](https://github.com/ClickHouse/ClickHouse/pull/32113) ([Frank Chen](https://github.com/FrankChen021)). * Return Content-Type as 'application/json' for `JSONEachRow` format if `output_format_json_array_of_rows` is enabled. [#32112](https://github.com/ClickHouse/ClickHouse/pull/32112) ([Frank Chen](https://github.com/FrankChen021)). -* Allow to write `+` before `Float32`/`Float64` values. [#32079](https://github.com/ClickHouse/ClickHouse/pull/32079) ([Kruglov Pavel](https://github.com/Avogar)). -* Allow a user configured `hdfs_replication` parameter for `DiskHdfs` and `StorageHdfs`. Closes [#32039](https://github.com/ClickHouse/ClickHouse/issues/32039). [#32049](https://github.com/ClickHouse/ClickHouse/pull/32049) ([leosunli](https://github.com/leosunli)). +* Allow to parse `+` before `Float32`/`Float64` values. [#32079](https://github.com/ClickHouse/ClickHouse/pull/32079) ([Kruglov Pavel](https://github.com/Avogar)). +* Allow a user configured `hdfs_replication` parameter for `DiskHDFS` and `StorageHDFS`. Closes [#32039](https://github.com/ClickHouse/ClickHouse/issues/32039). [#32049](https://github.com/ClickHouse/ClickHouse/pull/32049) ([leosunli](https://github.com/leosunli)). * Added ClickHouse `exception` and `exception_code` fields to opentelemetry span log. [#32040](https://github.com/ClickHouse/ClickHouse/pull/32040) ([Frank Chen](https://github.com/FrankChen021)). -* Fix a bug that opentelemetry span log duration is zero at the query level if there is a query exception. [#32038](https://github.com/ClickHouse/ClickHouse/pull/32038) ([Frank Chen](https://github.com/FrankChen021)). -* Remove excessive `DESC TABLE` requests for `remote()` (in case of `remote('127.1', system.one)` (i.e. identifier as the db.table instead of string) there was excessive `DESC TABLE` request). [#32019](https://github.com/ClickHouse/ClickHouse/pull/32019) ([Azat Khuzhin](https://github.com/azat)). -* Support PostgreSQL style ALTER MODIFY COLUMN. [#32003](https://github.com/ClickHouse/ClickHouse/pull/32003) ([SuperDJY](https://github.com/cmsxbc)). +* Improve opentelemetry span log duration - it was is zero at the query level if there is a query exception. [#32038](https://github.com/ClickHouse/ClickHouse/pull/32038) ([Frank Chen](https://github.com/FrankChen021)). * Fix the issue that `LowCardinality` of `Int256` cannot be created. [#31832](https://github.com/ClickHouse/ClickHouse/pull/31832) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Recreate system.*_log tables in case of different engine/partition_by. [#31824](https://github.com/ClickHouse/ClickHouse/pull/31824) ([Azat Khuzhin](https://github.com/azat)). +* Recreate `system.*_log` tables in case of different engine/partition_by. [#31824](https://github.com/ClickHouse/ClickHouse/pull/31824) ([Azat Khuzhin](https://github.com/azat)). * `MaterializedMySQL`: Fix issue with table named 'table'. [#31781](https://github.com/ClickHouse/ClickHouse/pull/31781) ([Håvard Kvålen](https://github.com/havardk)). -* ClickHouse dictionary source support named collections. Closes [#31705](https://github.com/ClickHouse/ClickHouse/issues/31705). [#31749](https://github.com/ClickHouse/ClickHouse/pull/31749) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Allow to use named collections configuration for kafka and rabbitmq engines (the same way as for other integration table engines). [#31691](https://github.com/ClickHouse/ClickHouse/pull/31691) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Always re-render prompt while navigating history in clickhouse-client. This will improve usability of manipulating very long queries that don't fit on screen. [#31675](https://github.com/ClickHouse/ClickHouse/pull/31675) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Add bindings for navigating through history (instead of lines/history). [#31641](https://github.com/ClickHouse/ClickHouse/pull/31641) ([Azat Khuzhin](https://github.com/azat)). +* ClickHouse dictionary source: support named collections. Closes [#31705](https://github.com/ClickHouse/ClickHouse/issues/31705). [#31749](https://github.com/ClickHouse/ClickHouse/pull/31749) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Allow to use named collections configuration for Kafka and RabbitMQ engines (the same way as for other integration table engines). [#31691](https://github.com/ClickHouse/ClickHouse/pull/31691) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Always re-render prompt while navigating history in clickhouse-client. This will improve usability of manipulating very long queries that don't fit on screen. [#31675](https://github.com/ClickHouse/ClickHouse/pull/31675) ([alexey-milovidov](https://github.com/alexey-milovidov)) (author: Amos Bird). +* Add key bindings for navigating through history (instead of lines/history). [#31641](https://github.com/ClickHouse/ClickHouse/pull/31641) ([Azat Khuzhin](https://github.com/azat)). * Improve the `max_execution_time` checks. Fixed some cases when timeout checks do not happen and query could run too long. [#31636](https://github.com/ClickHouse/ClickHouse/pull/31636) ([Raúl Marín](https://github.com/Algunenano)). * Better exception message when `users.xml` cannot be loaded due to bad password hash. This closes [#24126](https://github.com/ClickHouse/ClickHouse/issues/24126). [#31557](https://github.com/ClickHouse/ClickHouse/pull/31557) ([Vitaly Baranov](https://github.com/vitlibar)). * Use shard and replica name from `Replicated` database arguments when expanding macros in `ReplicatedMergeTree` arguments if these macros are not defined in config. Closes [#31471](https://github.com/ClickHouse/ClickHouse/issues/31471). [#31488](https://github.com/ClickHouse/ClickHouse/pull/31488) ([tavplubix](https://github.com/tavplubix)). @@ -68,123 +84,96 @@ * Throw an exception if there is some garbage after field in `JSONCompactStrings(EachRow)` format. [#31455](https://github.com/ClickHouse/ClickHouse/pull/31455) ([Kruglov Pavel](https://github.com/Avogar)). * Default value of `http_send_timeout` and `http_receive_timeout` settings changed from 1800 (30 minutes) to 180 (3 minutes). [#31450](https://github.com/ClickHouse/ClickHouse/pull/31450) ([tavplubix](https://github.com/tavplubix)). * `MaterializedMySQL` now handles `CREATE TABLE ... LIKE ...` DDL queries. [#31410](https://github.com/ClickHouse/ClickHouse/pull/31410) ([Stig Bakken](https://github.com/stigsb)). -* Return fake create query when executing `show create table` on system's tables. [#31391](https://github.com/ClickHouse/ClickHouse/pull/31391) ([SuperDJY](https://github.com/cmsxbc)). -* Previously progress was shown only for `numbers` table function, not for `numbers_mt`. Now for `numbers_mt` it is also shown. [#31318](https://github.com/ClickHouse/ClickHouse/pull/31318) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Return artificial create query when executing `show create table` on system's tables. [#31391](https://github.com/ClickHouse/ClickHouse/pull/31391) ([SuperDJY](https://github.com/cmsxbc)). +* Previously progress was shown only for `numbers` table function. Now for `numbers_mt` it is also shown. [#31318](https://github.com/ClickHouse/ClickHouse/pull/31318) ([Kseniia Sumarokova](https://github.com/kssenii)). * Initial user's roles are used now to find row policies, see [#31080](https://github.com/ClickHouse/ClickHouse/issues/31080). [#31262](https://github.com/ClickHouse/ClickHouse/pull/31262) ([Vitaly Baranov](https://github.com/vitlibar)). -* Optimize function `tupleElement` to reading of subcolumn with enabled setting `optimize_functions_to_subcolumns`. [#31261](https://github.com/ClickHouse/ClickHouse/pull/31261) ([Anton Popov](https://github.com/CurtizJ)). -* If some obsolete setting is changed show warning in `system.warnings`. [#31252](https://github.com/ClickHouse/ClickHouse/pull/31252) ([tavplubix](https://github.com/tavplubix)). -* Optimize function `mapContains` to reading of subcolumn `key` with enabled settings `optimize_functions_to_subcolumns`. [#31218](https://github.com/ClickHouse/ClickHouse/pull/31218) ([Anton Popov](https://github.com/CurtizJ)). +* If some obsolete setting is changed - show warning in `system.warnings`. [#31252](https://github.com/ClickHouse/ClickHouse/pull/31252) ([tavplubix](https://github.com/tavplubix)). * Improved backoff for background cleanup tasks in `MergeTree`. Settings `merge_tree_clear_old_temporary_directories_interval_seconds` and `merge_tree_clear_old_parts_interval_seconds` moved from users settings to merge tree settings. [#31180](https://github.com/ClickHouse/ClickHouse/pull/31180) ([tavplubix](https://github.com/tavplubix)). -* Syntax changed so now backup engine should be set explicitly: `BACKUP ... TO Disk('backups', 'path\')`. Also changed the format of backup's metadata, now it's in XML. Backup of a whole database now works. [#31178](https://github.com/ClickHouse/ClickHouse/pull/31178) ([Vitaly Baranov](https://github.com/vitlibar)). -* Now every replica will send to client only incremental information about profile events counters. [#31155](https://github.com/ClickHouse/ClickHouse/pull/31155) ([Dmitry Novik](https://github.com/novikd)). -* Use DiskPtr instead of OS's file system API in class `IDiskRemote` in order to get more extendability. Closes [#31117](https://github.com/ClickHouse/ClickHouse/issues/31117). [#31136](https://github.com/ClickHouse/ClickHouse/pull/31136) ([Yangkuan Liu](https://github.com/LiuYangkuan)). +* Now every replica will send to client only incremental information about profile events counters. [#31155](https://github.com/ClickHouse/ClickHouse/pull/31155) ([Dmitry Novik](https://github.com/novikd)). This makes `--hardware_utilization` option in `clickhouse-client` usable. * Enable multiline editing in clickhouse-client by default. This addresses [#31121](https://github.com/ClickHouse/ClickHouse/issues/31121) . [#31123](https://github.com/ClickHouse/ClickHouse/pull/31123) ([Amos Bird](https://github.com/amosbird)). * Function name normalization for `ALTER` queries. This helps avoid metadata mismatch between creating table with indices/projections and adding indices/projections via alter commands. This is a follow-up PR of https://github.com/ClickHouse/ClickHouse/pull/20174. Mark as improvements as there are no bug reports and the senario is somehow rare. [#31095](https://github.com/ClickHouse/ClickHouse/pull/31095) ([Amos Bird](https://github.com/amosbird)). * Support `IF EXISTS` modifier for `RENAME DATABASE`/`TABLE`/`DICTIONARY` query. If this directive is used, one will not get an error if the DATABASE/TABLE/DICTIONARY to be renamed doesn't exist. [#31081](https://github.com/ClickHouse/ClickHouse/pull/31081) ([victorgao](https://github.com/kafka1991)). * Cancel vertical merges when partition is dropped. This is a follow-up of https://github.com/ClickHouse/ClickHouse/pull/25684 and https://github.com/ClickHouse/ClickHouse/pull/30996. [#31057](https://github.com/ClickHouse/ClickHouse/pull/31057) ([Amos Bird](https://github.com/amosbird)). * The local session inside a Clickhouse dictionary source won't send its events to the session log anymore. This fixes a possible deadlock (tsan alert) on shutdown. Also this PR fixes flaky `test_dictionaries_dependency_xml/`. [#31013](https://github.com/ClickHouse/ClickHouse/pull/31013) ([Vitaly Baranov](https://github.com/vitlibar)). -* Only grab AlterLock when we do alter command. [#31010](https://github.com/ClickHouse/ClickHouse/pull/31010) ([Amos Bird](https://github.com/amosbird)). -* Do not allow to drop a table or dictionary if some tables or dictionaries depend on it. [#30977](https://github.com/ClickHouse/ClickHouse/pull/30977) ([tavplubix](https://github.com/tavplubix)). -* Add settings `merge_tree_min_rows_for_concurrent_read_for_remote_filesystem` and `merge_tree_min_bytes_for_concurrent_read_for_remote_filesystem`. [#30970](https://github.com/ClickHouse/ClickHouse/pull/30970) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Reduce memory usage when reading with `s3` / `url` / `hdfs` formats `Parquet`, `ORC`, `Arrow` (controlled by setting `input_format_allow_seeks`, enabled by default). Also add setting `remote_read_min_bytes_for_seek` to control seeks. Closes [#10461](https://github.com/ClickHouse/ClickHouse/issues/10461). Closes [#16857](https://github.com/ClickHouse/ClickHouse/issues/16857). [#30936](https://github.com/ClickHouse/ClickHouse/pull/30936) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Support `INTERVAL` type in `STEP` clause for `WITH FILL` modifier. [#30927](https://github.com/ClickHouse/ClickHouse/pull/30927) ([Anton Popov](https://github.com/CurtizJ)). +* Less locking in ALTER command. [#31010](https://github.com/ClickHouse/ClickHouse/pull/31010) ([Amos Bird](https://github.com/amosbird)). * Fix `--verbose` option in clickhouse-local interactive mode and allow logging into file. [#30881](https://github.com/ClickHouse/ClickHouse/pull/30881) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Added `\l`, `\d`, `\c` aliases like in MySQL. [#30876](https://github.com/ClickHouse/ClickHouse/pull/30876) ([Pavel Medvedev](https://github.com/pmed)). +* Added `\l`, `\d`, `\c` commands in `clickhouse-client` like in MySQL and PostgreSQL. [#30876](https://github.com/ClickHouse/ClickHouse/pull/30876) ([Pavel Medvedev](https://github.com/pmed)). * For clickhouse-local or clickhouse-client: if there is `--interactive` option with `--query` or `--queries-file`, then first execute them like in non-interactive and then start interactive mode. [#30851](https://github.com/ClickHouse/ClickHouse/pull/30851) ([Kseniia Sumarokova](https://github.com/kssenii)). * Fix possible "The local set of parts of X doesn't look like the set of parts in ZooKeeper" error (if DROP fails during removing znodes from zookeeper). [#30826](https://github.com/ClickHouse/ClickHouse/pull/30826) ([Azat Khuzhin](https://github.com/azat)). -* Implement the commands BACKUP and RESTORE for the Log family. [#30688](https://github.com/ClickHouse/ClickHouse/pull/30688) ([Vitaly Baranov](https://github.com/vitlibar)). * Avro format works against Kafka. Setting `output_format_avro_rows_in_file` added. [#30351](https://github.com/ClickHouse/ClickHouse/pull/30351) ([Ilya Golshtein](https://github.com/ilejn)). -* Refactor formats `TSV`, `TSVRaw`, `CSV` and `JSONCompactEachRow`, `JSONCompactStringsEachRow`, remove code duplication, add base interface for formats with `-WithNames` and `-WithNamesAndTypes` suffixes. Add formats `CSVWithNamesAndTypes`, `TSVRawWithNames`, `TSVRawWithNamesAndTypes`, `JSONCompactEachRowWIthNames`, `JSONCompactStringsEachRowWIthNames`, `RowBinaryWithNames`. Support parallel parsing for formats `TSVWithNamesAndTypes`, `TSVRaw(WithNames/WIthNamesAndTypes)`, `CSVWithNamesAndTypes`, `JSONCompactEachRow(WithNames/WIthNamesAndTypes)`, `JSONCompactStringsEachRow(WithNames/WIthNamesAndTypes)`. Support columns mapping and types checking for `RowBinaryWithNamesAndTypes` format. Add setting `input_format_with_types_use_header` which specify if we should check that types written in `WIthNamesAndTypes` format matches with table structure. Add setting `input_format_csv_empty_as_default` and use it in CSV format instead of `input_format_defaults_for_omitted_fields` (because this setting should not control `csv_empty_as_default`). Fix usage of setting `input_format_defaults_for_omitted_fields` (it was used only as `csv_empty_as_default`, but it should control calculation of default expressions for omitted fields). Fix Nullable input/output in `TSVRaw` format, make this format fully compatible with inserting into TSV. Fix inserting NULLs in `LowCardinality(Nullable)` when `input_format_null_as_default` is enabled (previously default values was inserted instead of actual NULLs). Fix strings deserialization in `JSONStringsEachRow`/`JSONCompactStringsEachRow` formats (strings were parsed just until first '\n' or '\t'). Add ability to use `Raw` escaping rule in Template input format. Add diagnostic info for JSONCompactEachRow(WithNames/WIthNamesAndTypes) input format. Fix bug with parallel parsing of `-WithNames` formats in case when setting `min_chunk_bytes_for_parallel_parsing` is less than bytes in a single row. [#30178](https://github.com/ClickHouse/ClickHouse/pull/30178) ([Kruglov Pavel](https://github.com/Avogar)). -* Add support for parallel reading from multiple files and support globs in `FROM INFILE` clause. [#30135](https://github.com/ClickHouse/ClickHouse/pull/30135) ([Filatenkov Artur](https://github.com/FArthur-cmd)). -* Make reading from HTTP retriable. Closes [#29696](https://github.com/ClickHouse/ClickHouse/issues/29696). [#29894](https://github.com/ClickHouse/ClickHouse/pull/29894) ([Kseniia Sumarokova](https://github.com/kssenii)). * Allow to specify one or any number of PostgreSQL schemas for one `MaterializedPostgreSQL` database. Closes [#28901](https://github.com/ClickHouse/ClickHouse/issues/28901). Closes [#29324](https://github.com/ClickHouse/ClickHouse/issues/29324). [#28933](https://github.com/ClickHouse/ClickHouse/pull/28933) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Add support for `Identifier` table and database query parameters. Closes [#27226](https://github.com/ClickHouse/ClickHouse/issues/27226). [#28668](https://github.com/ClickHouse/ClickHouse/pull/28668) ([Nikolay Degterinsky](https://github.com/evillique)). -* Add optimizations for constant conditions in JOIN ON, ref [#26928](https://github.com/ClickHouse/ClickHouse/issues/26928). [#27021](https://github.com/ClickHouse/ClickHouse/pull/27021) ([Vladimir C](https://github.com/vdimir)). -* Closes [#12552](https://github.com/ClickHouse/ClickHouse/issues/12552). Allow versioning of aggregate function states. [#24820](https://github.com/ClickHouse/ClickHouse/pull/24820) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Skipping mutations of different partitions in `StorageMergeTree`. [#21326](https://github.com/ClickHouse/ClickHouse/pull/21326) ([Vladimir Chebotarev](https://github.com/excitoon)). -* Enable clang `-fstrict-vtable-pointers`, `-fwhole-program-vtables` compile options. [#20151](https://github.com/ClickHouse/ClickHouse/pull/20151) ([Maksim Kita](https://github.com/kitaisreal)). - +* Replaced default ports for clickhouse-keeper internal communication from 44444 to 9234. Fixes [#30879](https://github.com/ClickHouse/ClickHouse/issues/30879). [#31799](https://github.com/ClickHouse/ClickHouse/pull/31799) ([alesapin](https://github.com/alesapin)). +* Implement function transform with Decimal arguments. [#31839](https://github.com/ClickHouse/ClickHouse/pull/31839) ([李帅](https://github.com/loneylee)). +* Fix abort in debug server and `DB::Exception: std::out_of_range: basic_string` error in release server in case of bad hdfs url by adding additional check of hdfs url structure. [#31042](https://github.com/ClickHouse/ClickHouse/pull/31042) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix possible assert in `hdfs` table function/engine, add test. [#31036](https://github.com/ClickHouse/ClickHouse/pull/31036) ([Kruglov Pavel](https://github.com/Avogar)). #### Bug Fixes -* Fix bug when remove unneeded columns in subquery. If there is an aggregation function in query without group by, do not remove if it is unneeded. [#32289](https://github.com/ClickHouse/ClickHouse/pull/32289) ([dongyifeng](https://github.com/dyf6372)). +* Fix group by / order by / limit by aliases with positional arguments enabled. Closes [#31173](https://github.com/ClickHouse/ClickHouse/issues/31173). [#31741](https://github.com/ClickHouse/ClickHouse/pull/31741) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix usage of `Buffer` table engine with type `Map`. Fixes [#30546](https://github.com/ClickHouse/ClickHouse/issues/30546). [#31742](https://github.com/ClickHouse/ClickHouse/pull/31742) ([Anton Popov](https://github.com/CurtizJ)). +* Fix reading from `MergeTree` tables with enabled `use_uncompressed_cache`. [#31826](https://github.com/ClickHouse/ClickHouse/pull/31826) ([Anton Popov](https://github.com/CurtizJ)). +* Fixed the behavior when mutations that have nothing to do are stuck (with enabled setting `empty_result_for_aggregation_by_empty_set`). [#32358](https://github.com/ClickHouse/ClickHouse/pull/32358) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). * Fix skipping columns while writing protobuf. This PR fixes [#31160](https://github.com/ClickHouse/ClickHouse/issues/31160), see the comment [#31160](https://github.com/ClickHouse/ClickHouse/issues/31160)#issuecomment-980595318. [#31988](https://github.com/ClickHouse/ClickHouse/pull/31988) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix bug when remove unneeded columns in subquery. If there is an aggregation function in query without group by, do not remove if it is unneeded. [#32289](https://github.com/ClickHouse/ClickHouse/pull/32289) ([dongyifeng](https://github.com/dyf6372)). * Quota limit was not reached, but the limit was exceeded. This PR fixes [#31174](https://github.com/ClickHouse/ClickHouse/issues/31174). [#31337](https://github.com/ClickHouse/ClickHouse/pull/31337) ([sunny](https://github.com/sunny19930321)). * Fix SHOW GRANTS when partial revokes are used. This PR fixes [#31138](https://github.com/ClickHouse/ClickHouse/issues/31138). [#31249](https://github.com/ClickHouse/ClickHouse/pull/31249) ([Vitaly Baranov](https://github.com/vitlibar)). * Memory amount was incorrectly estimated when ClickHouse is run in containers with cgroup limits. [#31157](https://github.com/ClickHouse/ClickHouse/pull/31157) ([Pavel Medvedev](https://github.com/pmed)). -* Fixed the behavior when mutations that have nothing to do are stuck (with enabled setting `empty_result_for_aggregation_by_empty_set`). [#32358](https://github.com/ClickHouse/ClickHouse/pull/32358) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). * Fix `ALTER ... MATERIALIZE COLUMN ...` queries in case when data type of default expression is not equal to the data type of column. [#32348](https://github.com/ClickHouse/ClickHouse/pull/32348) ([Anton Popov](https://github.com/CurtizJ)). * Fixed crash with SIGFPE in aggregate function `avgWeighted` with `Decimal` argument. Fixes [#32053](https://github.com/ClickHouse/ClickHouse/issues/32053). [#32303](https://github.com/ClickHouse/ClickHouse/pull/32303) ([tavplubix](https://github.com/tavplubix)). * Server might fail to start with `Cannot attach 1 tables due to cyclic dependencies` error if `Dictionary` table looks at XML-dictionary with the same name, it's fixed. Fixes [#31315](https://github.com/ClickHouse/ClickHouse/issues/31315). [#32288](https://github.com/ClickHouse/ClickHouse/pull/32288) ([tavplubix](https://github.com/tavplubix)). -* Fix window view parser. [#32232](https://github.com/ClickHouse/ClickHouse/pull/32232) ([vxider](https://github.com/Vxider)). * Fix parsing error while NaN deserializing for `Nullable(Float)` for `Quoted` escaping rule. [#32190](https://github.com/ClickHouse/ClickHouse/pull/32190) ([Kruglov Pavel](https://github.com/Avogar)). -* XML dictionaries identifiers, used in table create query, can be qualified to `default_database` during upgrade to newer version. Closes [#31963](https://github.com/ClickHouse/ClickHouse/issues/31963). [#32187](https://github.com/ClickHouse/ClickHouse/pull/32187) ([Maksim Kita](https://github.com/kitaisreal)). +* XML dictionaries: identifiers, used in table create query, can be qualified to `default_database` during upgrade to newer version. Closes [#31963](https://github.com/ClickHouse/ClickHouse/issues/31963). [#32187](https://github.com/ClickHouse/ClickHouse/pull/32187) ([Maksim Kita](https://github.com/kitaisreal)). * Number of active replicas might be determined incorrectly when inserting with quorum if setting `replicated_can_become_leader` is disabled on some replicas. It's fixed. [#32157](https://github.com/ClickHouse/ClickHouse/pull/32157) ([tavplubix](https://github.com/tavplubix)). -* Dictionaries fix cases when `{condition}` does not work for custom database queries. [#32117](https://github.com/ClickHouse/ClickHouse/pull/32117) ([Maksim Kita](https://github.com/kitaisreal)). +* Dictionaries: fix cases when `{condition}` does not work for custom database queries. [#32117](https://github.com/ClickHouse/ClickHouse/pull/32117) ([Maksim Kita](https://github.com/kitaisreal)). * Fix `CAST` from `Nullable` with `cast_keep_nullable` (`PARAMETER_OUT_OF_BOUND` error before for i.e. `toUInt32OrDefault(toNullable(toUInt32(1)))`). [#32080](https://github.com/ClickHouse/ClickHouse/pull/32080) ([Azat Khuzhin](https://github.com/azat)). -* Fix CREATE TABLE of Join Storage with multiply settings contains persistency. Close [#31680](https://github.com/ClickHouse/ClickHouse/issues/31680). [#32066](https://github.com/ClickHouse/ClickHouse/pull/32066) ([SuperDJY](https://github.com/cmsxbc)). +* Fix CREATE TABLE of Join Storage in some obscure cases. Close [#31680](https://github.com/ClickHouse/ClickHouse/issues/31680). [#32066](https://github.com/ClickHouse/ClickHouse/pull/32066) ([SuperDJY](https://github.com/cmsxbc)). * Fixed `Directory ... already exists and is not empty` error when detaching part. [#32063](https://github.com/ClickHouse/ClickHouse/pull/32063) ([tavplubix](https://github.com/tavplubix)). -* `MaterializedMySQL`: Fix rare corruption of `DECIMAL` data. [#31990](https://github.com/ClickHouse/ClickHouse/pull/31990) ([Håvard Kvålen](https://github.com/havardk)). -* Fix `FileLog` engine unnesessary create meta data directory when create table failed. Fix [#31962](https://github.com/ClickHouse/ClickHouse/issues/31962). [#31967](https://github.com/ClickHouse/ClickHouse/pull/31967) ([flynn](https://github.com/ucasfl)). +* `MaterializedMySQL` (experimental feature): Fix misinterpretation of `DECIMAL` data from MySQL. [#31990](https://github.com/ClickHouse/ClickHouse/pull/31990) ([Håvard Kvålen](https://github.com/havardk)). +* `FileLog` (experimental feature) engine unnesessary created meta data directory when create table failed. Fix [#31962](https://github.com/ClickHouse/ClickHouse/issues/31962). [#31967](https://github.com/ClickHouse/ClickHouse/pull/31967) ([flynn](https://github.com/ucasfl)). * Some `GET_PART` entry might hang in replication queue if part is lost on all replicas and there are no other parts in the same partition. It's fixed in cases when partition key contains only columns of integer types or `Date[Time]`. Fixes [#31485](https://github.com/ClickHouse/ClickHouse/issues/31485). [#31887](https://github.com/ClickHouse/ClickHouse/pull/31887) ([tavplubix](https://github.com/tavplubix)). * Fix functions `empty` and `notEmpty` with arguments of `UUID` type. Fixes [#31819](https://github.com/ClickHouse/ClickHouse/issues/31819). [#31883](https://github.com/ClickHouse/ClickHouse/pull/31883) ([Anton Popov](https://github.com/CurtizJ)). * Change configuration path from `keeper_server.session_timeout_ms` to `keeper_server.coordination_settings.session_timeout_ms` when constructing a `KeeperTCPHandler`. Same with `operation_timeout`. [#31859](https://github.com/ClickHouse/ClickHouse/pull/31859) ([JackyWoo](https://github.com/JackyWoo)). -* Fix a bug about function transform with decimal args. [#31839](https://github.com/ClickHouse/ClickHouse/pull/31839) ([李帅](https://github.com/loneylee)). -* Fix reading from `MergeTree` tables with enabled `use_uncompressed_cache`. [#31826](https://github.com/ClickHouse/ClickHouse/pull/31826) ([Anton Popov](https://github.com/CurtizJ)). -* Fix invalid cast of nullable type when nullable primary key is used. This fixes [#31075](https://github.com/ClickHouse/ClickHouse/issues/31075). [#31823](https://github.com/ClickHouse/ClickHouse/pull/31823) ([Amos Bird](https://github.com/amosbird)). -* Fix recursive user defined functions crash. Closes [#30856](https://github.com/ClickHouse/ClickHouse/issues/30856). [#31820](https://github.com/ClickHouse/ClickHouse/pull/31820) ([Maksim Kita](https://github.com/kitaisreal)). -* Fix possible assertion `../src/IO/ReadBuffer.h:58: bool DB::ReadBuffer::next(): Assertion '!hasPendingData()' failed.` in TSKV format. [#31804](https://github.com/ClickHouse/ClickHouse/pull/31804) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix invalid cast of Nullable type when nullable primary key is used. (Nullable primary key is a discouraged feature - please do not use). This fixes [#31075](https://github.com/ClickHouse/ClickHouse/issues/31075). [#31823](https://github.com/ClickHouse/ClickHouse/pull/31823) ([Amos Bird](https://github.com/amosbird)). +* Fix crash in recursive UDF in SQL. Closes [#30856](https://github.com/ClickHouse/ClickHouse/issues/30856). [#31820](https://github.com/ClickHouse/ClickHouse/pull/31820) ([Maksim Kita](https://github.com/kitaisreal)). * Fix crash when function `dictGet` with type is used for dictionary attribute when type is `Nullable`. Fixes [#30980](https://github.com/ClickHouse/ClickHouse/issues/30980). [#31800](https://github.com/ClickHouse/ClickHouse/pull/31800) ([Maksim Kita](https://github.com/kitaisreal)). -* Fix crash with empty result on odbc query. Closes [#31465](https://github.com/ClickHouse/ClickHouse/issues/31465). [#31766](https://github.com/ClickHouse/ClickHouse/pull/31766) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix usage of `Buffer` table engine with type `Map`. Fixes [#30546](https://github.com/ClickHouse/ClickHouse/issues/30546). [#31742](https://github.com/ClickHouse/ClickHouse/pull/31742) ([Anton Popov](https://github.com/CurtizJ)). -* Fix group by / order by / limit by aliases with positional arguments enabled. Closes [#31173](https://github.com/ClickHouse/ClickHouse/issues/31173). [#31741](https://github.com/ClickHouse/ClickHouse/pull/31741) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix crash with empty result of ODBC query (with some ODBC drivers). Closes [#31465](https://github.com/ClickHouse/ClickHouse/issues/31465). [#31766](https://github.com/ClickHouse/ClickHouse/pull/31766) ([Kseniia Sumarokova](https://github.com/kssenii)). * Fix disabling query profiler (In case of `query_profiler_real_time_period_ns>0`/`query_profiler_cpu_time_period_ns>0` query profiler can stayed enabled even after query finished). [#31740](https://github.com/ClickHouse/ClickHouse/pull/31740) ([Azat Khuzhin](https://github.com/azat)). * Fixed rare segfault on concurrent `ATTACH PARTITION` queries. [#31738](https://github.com/ClickHouse/ClickHouse/pull/31738) ([tavplubix](https://github.com/tavplubix)). * Fix race in JSONEachRowWithProgress output format when data and lines with progress are mixed in output. [#31736](https://github.com/ClickHouse/ClickHouse/pull/31736) ([Kruglov Pavel](https://github.com/Avogar)). * Fixed `there are no such cluster here` error on execution of `ON CLUSTER` query if specified cluster name is name of `Replicated` database. [#31723](https://github.com/ClickHouse/ClickHouse/pull/31723) ([tavplubix](https://github.com/tavplubix)). * Fix exception on some of the applications of `decrypt` function on Nullable columns. This closes [#31662](https://github.com/ClickHouse/ClickHouse/issues/31662). This closes [#31426](https://github.com/ClickHouse/ClickHouse/issues/31426). [#31707](https://github.com/ClickHouse/ClickHouse/pull/31707) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* * Fixed function ngrams when string contains utf8 characters. [#31706](https://github.com/ClickHouse/ClickHouse/pull/31706) ([yandd](https://github.com/yandd)). +* Fixed function ngrams when string contains UTF-8 characters. [#31706](https://github.com/ClickHouse/ClickHouse/pull/31706) ([yandd](https://github.com/yandd)). * Settings `input_format_allow_errors_num` and `input_format_allow_errors_ratio` did not work for parsing of domain types, such as `IPv4`, it's fixed. Fixes [#31686](https://github.com/ClickHouse/ClickHouse/issues/31686). [#31697](https://github.com/ClickHouse/ClickHouse/pull/31697) ([tavplubix](https://github.com/tavplubix)). -* Fixed null pointer exception in `MATERIALIZE COLUMN`. [#31679](https://github.com/ClickHouse/ClickHouse/pull/31679) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Fixed null pointer exception in `MATERIALIZE COLUMN`. [#31679](https://github.com/ClickHouse/ClickHouse/pull/31679) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). * `RENAME TABLE` query worked incorrectly on attempt to rename an DDL dictionary in `Ordinary` database, it's fixed. [#31638](https://github.com/ClickHouse/ClickHouse/pull/31638) ([tavplubix](https://github.com/tavplubix)). -* Fix sparkbars are not aligned, see: [#26175](https://github.com/ClickHouse/ClickHouse/issues/26175)#issuecomment-960353867, [comment](https://github.com/ClickHouse/ClickHouse/issues/26175#issuecomment-961155065). [#31624](https://github.com/ClickHouse/ClickHouse/pull/31624) ([小路](https://github.com/nicelulu)). -* All non-x86 builds were broken, because we don't have tests for them. This closes [#31417](https://github.com/ClickHouse/ClickHouse/issues/31417). This closes [#31524](https://github.com/ClickHouse/ClickHouse/issues/31524). [#31574](https://github.com/ClickHouse/ClickHouse/pull/31574) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Implement `sparkbar` aggregate function as it was intended, see: [#26175](https://github.com/ClickHouse/ClickHouse/issues/26175)#issuecomment-960353867, [comment](https://github.com/ClickHouse/ClickHouse/issues/26175#issuecomment-961155065). [#31624](https://github.com/ClickHouse/ClickHouse/pull/31624) ([小路](https://github.com/nicelulu)). * Fix invalid generated JSON when only column names contain invalid UTF-8 sequences. [#31534](https://github.com/ClickHouse/ClickHouse/pull/31534) ([Kevin Michel](https://github.com/kmichel-aiven)). * Disable `partial_merge_join_left_table_buffer_bytes` before bug in this optimization is fixed. See [#31009](https://github.com/ClickHouse/ClickHouse/issues/31009)). Remove redundant option `partial_merge_join_optimizations`. [#31528](https://github.com/ClickHouse/ClickHouse/pull/31528) ([Vladimir C](https://github.com/vdimir)). -* Fix progress for short INSERT SELECT queries. [#31510](https://github.com/ClickHouse/ClickHouse/pull/31510) ([Azat Khuzhin](https://github.com/azat)). -* Fix bug with group by and positional arguments. Closes [#31280](https://github.com/ClickHouse/ClickHouse/issues/31280)#issuecomment-968696186. [#31420](https://github.com/ClickHouse/ClickHouse/pull/31420) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix progress for short `INSERT SELECT` queries. [#31510](https://github.com/ClickHouse/ClickHouse/pull/31510) ([Azat Khuzhin](https://github.com/azat)). +* Fix wrong behavior with group by and positional arguments. Closes [#31280](https://github.com/ClickHouse/ClickHouse/issues/31280)#issuecomment-968696186. [#31420](https://github.com/ClickHouse/ClickHouse/pull/31420) ([Kseniia Sumarokova](https://github.com/kssenii)). * Resolve `nullptr` in STS credentials provider for S3. [#31409](https://github.com/ClickHouse/ClickHouse/pull/31409) ([Vladimir Chebotarev](https://github.com/excitoon)). -* Remove not like function into RPNElement. [#31169](https://github.com/ClickHouse/ClickHouse/pull/31169) ([sundyli](https://github.com/sundy-li)). +* Remove `notLike` function from index analysis, because it was wrong. [#31169](https://github.com/ClickHouse/ClickHouse/pull/31169) ([sundyli](https://github.com/sundy-li)). * Fix bug in Keeper which can lead to inability to start when some coordination logs was lost and we have more fresh snapshot than our latest log. [#31150](https://github.com/ClickHouse/ClickHouse/pull/31150) ([alesapin](https://github.com/alesapin)). * Rewrite right distributed table in local join. solves [#25809](https://github.com/ClickHouse/ClickHouse/issues/25809). [#31105](https://github.com/ClickHouse/ClickHouse/pull/31105) ([abel-cheng](https://github.com/abel-cheng)). -* Fix StorageMerge with aliases and where (it did not work before at all). Closes [#28802](https://github.com/ClickHouse/ClickHouse/issues/28802). [#31044](https://github.com/ClickHouse/ClickHouse/pull/31044) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix abort in debug server and `DB::Exception: std::out_of_range: basic_string` error in release server in case of bad hdfs url by adding additional check of hdfs url structure. [#31042](https://github.com/ClickHouse/ClickHouse/pull/31042) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix possible assert in `hdfs` table function/engine, add test. [#31036](https://github.com/ClickHouse/ClickHouse/pull/31036) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix JSONValue/Query with quoted identifiers. This allows to have spaces in json path. Closes [#30971](https://github.com/ClickHouse/ClickHouse/issues/30971). [#31003](https://github.com/ClickHouse/ClickHouse/pull/31003) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Using `formatRow` function with not row formats led to segfault. Don't allow to use this function with such formats (because it doesn't make sense). [#31001](https://github.com/ClickHouse/ClickHouse/pull/31001) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix `Merge` table with aliases and where (it did not work before at all). Closes [#28802](https://github.com/ClickHouse/ClickHouse/issues/28802). [#31044](https://github.com/ClickHouse/ClickHouse/pull/31044) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix JSON_VALUE/JSON_QUERY with quoted identifiers. This allows to have spaces in json path. Closes [#30971](https://github.com/ClickHouse/ClickHouse/issues/30971). [#31003](https://github.com/ClickHouse/ClickHouse/pull/31003) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Using `formatRow` function with not row-oriented formats led to segfault. Don't allow to use this function with such formats (because it doesn't make sense). [#31001](https://github.com/ClickHouse/ClickHouse/pull/31001) ([Kruglov Pavel](https://github.com/Avogar)). * Fix bug which broke select queries if they happened after dropping materialized view. Found in [#30691](https://github.com/ClickHouse/ClickHouse/issues/30691). [#30997](https://github.com/ClickHouse/ClickHouse/pull/30997) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Skip max_partition_size_to_drop check in case of ATTACH PARTITION ... FROM and MOVE PARTITION ... [#30995](https://github.com/ClickHouse/ClickHouse/pull/30995) ([Amr Alaa](https://github.com/amralaa-MSFT)). -* Fix some corner cases with intersect/except. Closes [#30803](https://github.com/ClickHouse/ClickHouse/issues/30803). [#30965](https://github.com/ClickHouse/ClickHouse/pull/30965) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Skip `max_partition_size_to_drop check` in case of ATTACH PARTITION ... FROM and MOVE PARTITION ... [#30995](https://github.com/ClickHouse/ClickHouse/pull/30995) ([Amr Alaa](https://github.com/amralaa-MSFT)). +* Fix some corner cases with `INTERSECT` and `EXCEPT` operators. Closes [#30803](https://github.com/ClickHouse/ClickHouse/issues/30803). [#30965](https://github.com/ClickHouse/ClickHouse/pull/30965) ([Kseniia Sumarokova](https://github.com/kssenii)). #### Build/Testing/Packaging Improvement -* Fix broken symlink for sysroot/linux-riscv64/usr/lib. [#32071](https://github.com/ClickHouse/ClickHouse/pull/32071) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Build rpm and tgz packages in master and release branches workfolw. [#32048](https://github.com/ClickHouse/ClickHouse/pull/32048) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Adjust artifactory pusher to a new bucket paths. Use only version or pull request number in bucket, no `0`. Create a function to read github event data. [#31952](https://github.com/ClickHouse/ClickHouse/pull/31952) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Make ClickHouse build fully reproducible (byte identical on different machines). This closes [#22113](https://github.com/ClickHouse/ClickHouse/issues/22113). [#31899](https://github.com/ClickHouse/ClickHouse/pull/31899) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Remove filesystem path to the build directory from binaries to enable reproducible builds. This needed for [#22113](https://github.com/ClickHouse/ClickHouse/issues/22113). [#31838](https://github.com/ClickHouse/ClickHouse/pull/31838) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Revert changes from [#28016](https://github.com/ClickHouse/ClickHouse/issues/28016): archive.ubuntu.com should be faster in general than RU mirror. [#31822](https://github.com/ClickHouse/ClickHouse/pull/31822) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* More correct setting up capabilities inside Docker. [#31802](https://github.com/ClickHouse/ClickHouse/pull/31802) ([Constantine Peresypkin](https://github.com/pkit)). -* Replaced default ports for clickhouse-keeper internal communication from 44444 to 9234. Fixes [#30879](https://github.com/ClickHouse/ClickHouse/issues/30879). [#31799](https://github.com/ClickHouse/ClickHouse/pull/31799) ([alesapin](https://github.com/alesapin)). -* The script for uploading packages to the artifactory is added. [#31748](https://github.com/ClickHouse/ClickHouse/pull/31748) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Avoid downloading toolchain tarballs for cross-compiling for FreeBSD. [#31672](https://github.com/ClickHouse/ClickHouse/pull/31672) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Remove hardcoded repository name from CI scripts. [#31536](https://github.com/ClickHouse/ClickHouse/pull/31536) ([Constantine Peresypkin](https://github.com/pkit)). -* Initial support for risc-v. See development/build-cross-riscv for quirks and build command that was tested. [#31309](https://github.com/ClickHouse/ClickHouse/pull/31309) ([Vladimir Smirnov](https://github.com/Civil)). -* Drop support for using Ordinary databases with `MaterializedMySQL`. [#31292](https://github.com/ClickHouse/ClickHouse/pull/31292) ([Stig Bakken](https://github.com/stigsb)). -* Fix build snappy error in [#30790](https://github.com/ClickHouse/ClickHouse/issues/30790) Update of contrib/snappy is in https://github.com/google/snappy/pull/145/files. [#30796](https://github.com/ClickHouse/ClickHouse/pull/30796) ([李扬](https://github.com/taiyang-li)). +* Fix incorrect filtering result on non-x86 builds. This closes [#31417](https://github.com/ClickHouse/ClickHouse/issues/31417). This closes [#31524](https://github.com/ClickHouse/ClickHouse/issues/31524). [#31574](https://github.com/ClickHouse/ClickHouse/pull/31574) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Make ClickHouse build fully reproducible (byte identical on different machines). This closes [#22113](https://github.com/ClickHouse/ClickHouse/issues/22113). [#31899](https://github.com/ClickHouse/ClickHouse/pull/31899) ([alexey-milovidov](https://github.com/alexey-milovidov)). Remove filesystem path to the build directory from binaries to enable reproducible builds. This needed for [#22113](https://github.com/ClickHouse/ClickHouse/issues/22113). [#31838](https://github.com/ClickHouse/ClickHouse/pull/31838) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Use our own CMakeLists for `zlib-ng`, `cassandra`, `mariadb-connector-c` and `xz`, `re2`, `sentry`, `gsasl`, `arrow`, `protobuf`. This is needed for [#20151](https://github.com/ClickHouse/ClickHouse/issues/20151). Part of [#9226](https://github.com/ClickHouse/ClickHouse/issues/9226). A small step towards removal of annoying trash from the build system. [#30599](https://github.com/ClickHouse/ClickHouse/pull/30599) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Hermetic builds: use fixed version of libc and make sure that no source or binary files from the host OS are using during build. This closes [#27133](https://github.com/ClickHouse/ClickHouse/issues/27133). This closes [#21435](https://github.com/ClickHouse/ClickHouse/issues/21435). This closes [#30462](https://github.com/ClickHouse/ClickHouse/issues/30462). [#30011](https://github.com/ClickHouse/ClickHouse/pull/30011) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* support compile in arm machine with parameter "-DENABLE_TESTS=OFF". [#31007](https://github.com/ClickHouse/ClickHouse/pull/31007) ([zhanghuajie](https://github.com/zhanghuajieHIT)). +* Adding function `getFuzzerData()` to easily fuzz particular functions. This closes [#23227](https://github.com/ClickHouse/ClickHouse/issues/23227). [#27526](https://github.com/ClickHouse/ClickHouse/pull/27526) ([Alexey Boykov](https://github.com/mathalex)). +* More correct setting up capabilities inside Docker. [#31802](https://github.com/ClickHouse/ClickHouse/pull/31802) ([Constantine Peresypkin](https://github.com/pkit)). +* Enable clang `-fstrict-vtable-pointers`, `-fwhole-program-vtables` compile options. [#20151](https://github.com/ClickHouse/ClickHouse/pull/20151) ([Maksim Kita](https://github.com/kitaisreal)). +* Avoid downloading toolchain tarballs for cross-compiling for FreeBSD. [#31672](https://github.com/ClickHouse/ClickHouse/pull/31672) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Initial support for risc-v. See development/build-cross-riscv for quirks and build command that was tested. [#31309](https://github.com/ClickHouse/ClickHouse/pull/31309) ([Vladimir Smirnov](https://github.com/Civil)). +* Support compile in arm machine with parameter "-DENABLE_TESTS=OFF". [#31007](https://github.com/ClickHouse/ClickHouse/pull/31007) ([zhanghuajie](https://github.com/zhanghuajieHIT)). ### ClickHouse release v21.11, 2021-11-09 From a4a7b59cf34831d44aa7d4ae5e1888e3592b8418 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sun, 12 Dec 2021 02:52:20 +0300 Subject: [PATCH 255/262] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 12f74c71fde..3cf09fca89a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -### ClickHouse release v21.12, 2021-12-09 +### ClickHouse release v21.12, 2021-12-13 #### Backward Incompatible Change From bf6e091fe0893e75cd30efec2be483f1b1f282f8 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sun, 12 Dec 2021 02:54:06 +0300 Subject: [PATCH 256/262] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3cf09fca89a..3b6046d38de 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,7 @@ * *A fix for a feature that previously had unwanted behaviour.* Do not allow direct select for Kafka/RabbitMQ/FileLog. Can be enabled by setting `stream_like_engine_allow_direct_select`. Direct select will be not allowed even if enabled by setting, in case there is an attached materialized view. For Kafka and RabbitMQ direct selectm if allowed, will not commit massages by default. To enable commits with direct select, user must use storage level setting `kafka{rabbitmq}_commit_on_select=1` (default `0`). [#31053](https://github.com/ClickHouse/ClickHouse/pull/31053) ([Kseniia Sumarokova](https://github.com/kssenii)). * *A slight change in behaviour of a new function.* Return unquoted string in JSON_VALUE. Closes [#27965](https://github.com/ClickHouse/ClickHouse/issues/27965). [#31008](https://github.com/ClickHouse/ClickHouse/pull/31008) ([Kseniia Sumarokova](https://github.com/kssenii)). * *Setting rename.* Add custom null representation support for TSV/CSV input formats. Fix deserialing Nullable(String) in TSV/CSV/JSONCompactStringsEachRow/JSONStringsEachRow input formats. Rename `output_format_csv_null_representation` and `output_format_tsv_null_representation` to `format_csv_null_representation` and `format_tsv_null_representation` accordingly. [#30497](https://github.com/ClickHouse/ClickHouse/pull/30497) ([Kruglov Pavel](https://github.com/Avogar)). -* *Further deprecation of already unused code.* This is relevant only for users of ClickHouse versions 20.6 and older. A "leader election" mechanism is removed from `ReplicatedMergeTree`, because multiple leaders are supported since 20.6. If you are upgrading from an older version and some replica with an old version is a leader, then server will fail to start after upgrade. Stop replicas with old version to make new version start. After that it will not be possible to downgrade to version older than 20.6. [#32140](https://github.com/ClickHouse/ClickHouse/pull/32140) ([tavplubix](https://github.com/tavplubix)). +* *Further deprecation of already unused code.* This is relevant only for users of ClickHouse versions older than 20.6. A "leader election" mechanism is removed from `ReplicatedMergeTree`, because multiple leaders are supported since 20.6. If you are upgrading from an older version and some replica with an old version is a leader, then server will fail to start after upgrade. Stop replicas with old version to make new version start. After that it will not be possible to downgrade to version older than 20.6. [#32140](https://github.com/ClickHouse/ClickHouse/pull/32140) ([tavplubix](https://github.com/tavplubix)). #### New Feature From 3b0f026827762654eb7e80feff9a3ad4280e844a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 12 Dec 2021 03:28:35 +0300 Subject: [PATCH 257/262] Slight performance improvement of "reinterpret" function --- src/Functions/reinterpretAs.cpp | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/src/Functions/reinterpretAs.cpp b/src/Functions/reinterpretAs.cpp index f237b158fe5..ad357c74402 100644 --- a/src/Functions/reinterpretAs.cpp +++ b/src/Functions/reinterpretAs.cpp @@ -24,6 +24,7 @@ #include + namespace DB { namespace ErrorCodes @@ -174,16 +175,14 @@ public: const auto & offsets_from = col_from->getOffsets(); size_t size = offsets_from.size(); auto & vec_res = col_res->getData(); - vec_res.resize(size); + vec_res.resize_fill(size); size_t offset = 0; for (size_t i = 0; i < size; ++i) { - ToFieldType value{}; - memcpy(&value, + memcpy(&vec_res[i], &data_from[offset], std::min(static_cast(sizeof(ToFieldType)), offsets_from[i] - offset - 1)); - vec_res[i] = value; offset = offsets_from[i]; } @@ -201,15 +200,18 @@ public: size_t step = col_from_fixed->getN(); size_t size = data_from.size() / step; auto & vec_res = col_res->getData(); - vec_res.resize(size); size_t offset = 0; size_t copy_size = std::min(step, sizeof(ToFieldType)); + + if (sizeof(ToFieldType) <= step) + vec_res.resize(size); + else + vec_res.resize_fill(size); + for (size_t i = 0; i < size; ++i) { - ToFieldType value{}; - memcpy(&value, &data_from[offset], copy_size); - vec_res[i] = value; + memcpy(&vec_res[i], &data_from[offset], copy_size); offset += step; } @@ -288,7 +290,7 @@ private: { StringRef data = src.getDataAt(i); - std::memcpy(&data_to[offset], data.data, std::min(n, data.size)); + memcpy(&data_to[offset], data.data, std::min(n, data.size)); offset += n; } } @@ -347,10 +349,13 @@ private: using To = typename ToContainer::value_type; size_t size = from.size(); - to.resize_fill(size); - static constexpr size_t copy_size = std::min(sizeof(From), sizeof(To)); + if (sizeof(To) <= sizeof(From)) + to.resize(size); + else + to.resize_fill(size); + for (size_t i = 0; i < size; ++i) memcpy(static_cast(&to[i]), static_cast(&from[i]), copy_size); } From e58e69923c2a92b466de06b3b4b0766ee0b517e1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 12 Dec 2021 03:55:23 +0300 Subject: [PATCH 258/262] Fix bad test --- .../queries/0_stateless/01615_random_one_shard_insertion.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01615_random_one_shard_insertion.sql b/tests/queries/0_stateless/01615_random_one_shard_insertion.sql index e205a358db1..5a087e1e20f 100644 --- a/tests/queries/0_stateless/01615_random_one_shard_insertion.sql +++ b/tests/queries/0_stateless/01615_random_one_shard_insertion.sql @@ -15,11 +15,11 @@ set insert_distributed_one_random_shard = 1; set max_block_size = 1; set max_insert_block_size = 1; set min_insert_block_size_rows = 1; -insert into distr select number from numbers(20); +insert into distr select number from numbers(100); select count() != 0 from shard_0.tbl; select count() != 0 from shard_1.tbl; -select * from distr order by number; +select * from distr order by number LIMIT 20; drop table if exists shard_0.tbl; drop table if exists shard_1.tbl; From d677a5ab67bfae002fa981f4dda39cd61eb54f92 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 12 Dec 2021 04:31:56 +0300 Subject: [PATCH 259/262] Fix perf test --- tests/performance/reinterpret_as.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/performance/reinterpret_as.xml b/tests/performance/reinterpret_as.xml index 79ce167a363..dbf6df160ed 100644 --- a/tests/performance/reinterpret_as.xml +++ b/tests/performance/reinterpret_as.xml @@ -191,7 +191,7 @@ toInt256(number) as d, toString(number) as f, toFixedString(f, 20) as g - FROM numbers_mt(200000000) + FROM numbers_mt(100000000) SETTINGS max_threads = 8 FORMAT Null
From 453135eb634316bc03e062839eb7e3d48da672ab Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 12 Dec 2021 05:24:47 +0300 Subject: [PATCH 260/262] Fix indecent error message --- src/Interpreters/DDLWorker.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp index 27bb4906f1a..ee5dc4deebb 100644 --- a/src/Interpreters/DDLWorker.cpp +++ b/src/Interpreters/DDLWorker.cpp @@ -1189,7 +1189,7 @@ void DDLWorker::runMainThread() } catch (...) { - tryLogCurrentException(log, "Unexpected error, will try to restart main thread:"); + tryLogCurrentException(log, "Unexpected error, will try to restart main thread"); reset_state(); sleepForSeconds(5); } From 44f366fd12bd266a08cfe7b057c6f74aac58f5d4 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 12 Dec 2021 00:08:26 +0300 Subject: [PATCH 261/262] Cleanup perf test runner Use subshell to: - avoid change/restore of TIMEFORMAT - grepping out trace output, by using set +x in a subshell - use array for options to avoid extra backslashes --- docker/test/performance-comparison/compare.sh | 30 +++++++++++-------- 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index 02d881347af..401656c9d09 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -261,16 +261,24 @@ function run_tests # Use awk because bash doesn't support floating point arithmetic. profile_seconds=$(awk "BEGIN { print ($profile_seconds_left > 0 ? 10 : 0) }") - TIMEFORMAT=$(printf "$test_name\t%%3R\t%%3U\t%%3S\n") - # The grep is to filter out set -x output and keep only time output. - # The '2>&1 >/dev/null' redirects stderr to stdout, and discards stdout. - { \ - time "$script_dir/perf.py" --host localhost localhost --port $LEFT_SERVER_PORT $RIGHT_SERVER_PORT \ - --runs "$CHPC_RUNS" --max-queries "$CHPC_MAX_QUERIES" \ - --profile-seconds "$profile_seconds" \ - -- "$test" > "$test_name-raw.tsv" 2> "$test_name-err.log" ; \ - } 2>&1 >/dev/null | tee >(grep -v ^+ >> "wall-clock-times.tsv") \ - || echo "Test $test_name failed with error code $?" >> "$test_name-err.log" + ( + set +x + argv=( + --host localhost localhost + --port "$LEFT_SERVER_PORT" "$RIGHT_SERVER_PORT" + --runs "$CHPC_RUNS" + --max-queries "$CHPC_MAX_QUERIES" + --profile-seconds "$profile_seconds" + + "$test" + ) + TIMEFORMAT=$(printf "$test_name\t%%3R\t%%3U\t%%3S\n") + # one more subshell to suppress trace output for "set +x" + ( + time "$script_dir/perf.py" "${argv[@]}" > "$test_name-raw.tsv" 2> "$test_name-err.log" + ) 2>>wall-clock-times.tsv >/dev/null \ + || echo "Test $test_name failed with error code $?" >> "$test_name-err.log" + ) 2>/dev/null profile_seconds_left=$(awk -F' ' \ 'BEGIN { s = '$profile_seconds_left'; } /^profile-total/ { s -= $2 } END { print s }' \ @@ -278,8 +286,6 @@ function run_tests current_test=$((current_test + 1)) done - unset TIMEFORMAT - wait } From c1d3323e256f1eab95cadad06a7fb22871d9b9e4 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 13 Dec 2021 10:48:57 +0300 Subject: [PATCH 262/262] Rerun workflows more times --- tests/ci/workflow_approve_rerun_lambda/app.py | 29 +++++++++++++++---- 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/tests/ci/workflow_approve_rerun_lambda/app.py b/tests/ci/workflow_approve_rerun_lambda/app.py index 436e9b06ede..8c54414b63b 100644 --- a/tests/ci/workflow_approve_rerun_lambda/app.py +++ b/tests/ci/workflow_approve_rerun_lambda/app.py @@ -23,9 +23,10 @@ SUSPICIOUS_PATTERNS = [ ] MAX_RETRY = 5 +MAX_WORKFLOW_RERUN = 5 WorkflowDescription = namedtuple('WorkflowDescription', - ['name', 'action', 'run_id', 'event', 'workflow_id', 'conclusion', 'status', + ['name', 'action', 'run_id', 'event', 'workflow_id', 'conclusion', 'status', 'api_url', 'fork_owner_login', 'fork_branch', 'rerun_url', 'jobs_url', 'attempt', 'url']) TRUSTED_WORKFLOW_IDS = { @@ -192,6 +193,7 @@ def get_workflow_description_from_event(event): jobs_url = event['workflow_run']['jobs_url'] rerun_url = event['workflow_run']['rerun_url'] url = event['workflow_run']['html_url'] + api_url = event['workflow_run']['url'] return WorkflowDescription( name=name, action=action, @@ -205,7 +207,8 @@ def get_workflow_description_from_event(event): status=status, jobs_url=jobs_url, rerun_url=rerun_url, - url=url + url=url, + api_url=api_url ) def get_pr_author_and_orgs(pull_request): @@ -273,15 +276,29 @@ def get_token_from_aws(): installation_id = get_installation_id(encoded_jwt) return get_access_token(encoded_jwt, installation_id) +def get_workflow_jobs(workflow_description): + jobs_url = workflow_description.api_url + f"/attempts/{workflow_description.attempt}/jobs" + jobs = [] + i = 1 + while True: + got_jobs = _exec_get_with_retry(jobs_url + f"?page={i}") + if len(got_jobs['jobs']) == 0: + break + + jobs += got_jobs['jobs'] + i += 1 + + return jobs + def check_need_to_rerun(workflow_description): - if workflow_description.attempt >= 2: + if workflow_description.attempt >= MAX_WORKFLOW_RERUN: print("Not going to rerun workflow because it's already tried more than two times") return False print("Going to check jobs") - jobs = _exec_get_with_retry(workflow_description.jobs_url + "?per_page=100") - print("Got jobs", len(jobs['jobs'])) - for job in jobs['jobs']: + jobs = get_workflow_jobs(workflow_description) + print("Got jobs", len(jobs)) + for job in jobs: if job['conclusion'] not in ('success', 'skipped'): print("Job", job['name'], "failed, checking steps") for step in job['steps']: